{ "best_metric": 11.667974472045898, "best_model_checkpoint": "miner_id_24/checkpoint-55000", "epoch": 1.1513020179184459, "eval_steps": 1000, "global_step": 55000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0932763962153564e-05, "grad_norm": 0.056203052401542664, "learning_rate": 6.666666666666667e-06, "loss": 11.7602, "step": 1 }, { "epoch": 2.0932763962153564e-05, "eval_loss": 11.762771606445312, "eval_runtime": 34.0426, "eval_samples_per_second": 28.229, "eval_steps_per_second": 7.079, "step": 1 }, { "epoch": 4.186552792430713e-05, "grad_norm": 0.05756784975528717, "learning_rate": 1.3333333333333333e-05, "loss": 11.7631, "step": 2 }, { "epoch": 6.279829188646068e-05, "grad_norm": 0.04162173345685005, "learning_rate": 2e-05, "loss": 11.7627, "step": 3 }, { "epoch": 8.373105584861426e-05, "grad_norm": 0.0596429705619812, "learning_rate": 2.6666666666666667e-05, "loss": 11.7716, "step": 4 }, { "epoch": 0.00010466381981076781, "grad_norm": 0.06288924068212509, "learning_rate": 3.3333333333333335e-05, "loss": 11.7643, "step": 5 }, { "epoch": 0.00012559658377292137, "grad_norm": 0.07149013131856918, "learning_rate": 4e-05, "loss": 11.7624, "step": 6 }, { "epoch": 0.00014652934773507493, "grad_norm": 0.035834964364767075, "learning_rate": 4.666666666666667e-05, "loss": 11.7613, "step": 7 }, { "epoch": 0.0001674621116972285, "grad_norm": 0.04928908124566078, "learning_rate": 5.333333333333333e-05, "loss": 11.7624, "step": 8 }, { "epoch": 0.00018839487565938207, "grad_norm": 0.04172995686531067, "learning_rate": 6e-05, "loss": 11.764, "step": 9 }, { "epoch": 0.00020932763962153562, "grad_norm": 0.04076745733618736, "learning_rate": 6.666666666666667e-05, "loss": 11.7672, "step": 10 }, { "epoch": 0.00023026040358368918, "grad_norm": 0.04949198663234711, "learning_rate": 7.333333333333333e-05, "loss": 11.7603, "step": 11 }, { "epoch": 0.00025119316754584274, "grad_norm": 0.07872851192951202, "learning_rate": 8e-05, "loss": 11.7627, "step": 12 }, { "epoch": 0.0002721259315079963, "grad_norm": 0.07041776925325394, "learning_rate": 8.666666666666667e-05, "loss": 11.765, "step": 13 }, { "epoch": 0.00029305869547014985, "grad_norm": 0.04996146634221077, "learning_rate": 9.333333333333334e-05, "loss": 11.7537, "step": 14 }, { "epoch": 0.00031399145943230346, "grad_norm": 0.05254559963941574, "learning_rate": 0.0001, "loss": 11.7617, "step": 15 }, { "epoch": 0.000334924223394457, "grad_norm": 0.04735461249947548, "learning_rate": 0.00010666666666666667, "loss": 11.7595, "step": 16 }, { "epoch": 0.0003558569873566106, "grad_norm": 0.051286254078149796, "learning_rate": 0.00011333333333333334, "loss": 11.7648, "step": 17 }, { "epoch": 0.00037678975131876413, "grad_norm": 0.05645868927240372, "learning_rate": 0.00012, "loss": 11.7603, "step": 18 }, { "epoch": 0.0003977225152809177, "grad_norm": 0.036114685237407684, "learning_rate": 0.00012666666666666666, "loss": 11.7643, "step": 19 }, { "epoch": 0.00041865527924307125, "grad_norm": 0.03756109997630119, "learning_rate": 0.00013333333333333334, "loss": 11.7626, "step": 20 }, { "epoch": 0.0004395880432052248, "grad_norm": 0.06092577055096626, "learning_rate": 0.00014, "loss": 11.7615, "step": 21 }, { "epoch": 0.00046052080716737836, "grad_norm": 0.0471370629966259, "learning_rate": 0.00014666666666666666, "loss": 11.7703, "step": 22 }, { "epoch": 0.0004814535711295319, "grad_norm": 0.061720602214336395, "learning_rate": 0.00015333333333333334, "loss": 11.7617, "step": 23 }, { "epoch": 0.0005023863350916855, "grad_norm": 0.05533916875720024, "learning_rate": 0.00016, "loss": 11.761, "step": 24 }, { "epoch": 0.0005233190990538391, "grad_norm": 0.06437263637781143, "learning_rate": 0.0001666666666666667, "loss": 11.7541, "step": 25 }, { "epoch": 0.0005442518630159926, "grad_norm": 0.0632123351097107, "learning_rate": 0.00017333333333333334, "loss": 11.7531, "step": 26 }, { "epoch": 0.0005651846269781462, "grad_norm": 0.05920521914958954, "learning_rate": 0.00018, "loss": 11.7571, "step": 27 }, { "epoch": 0.0005861173909402997, "grad_norm": 0.05806617811322212, "learning_rate": 0.0001866666666666667, "loss": 11.7666, "step": 28 }, { "epoch": 0.0006070501549024533, "grad_norm": 0.049883805215358734, "learning_rate": 0.00019333333333333333, "loss": 11.7608, "step": 29 }, { "epoch": 0.0006279829188646069, "grad_norm": 0.08594968914985657, "learning_rate": 0.0002, "loss": 11.7562, "step": 30 }, { "epoch": 0.0006489156828267604, "grad_norm": 0.05677652731537819, "learning_rate": 0.00019999999997596398, "loss": 11.7573, "step": 31 }, { "epoch": 0.000669848446788914, "grad_norm": 0.0587715320289135, "learning_rate": 0.000199999999903856, "loss": 11.7556, "step": 32 }, { "epoch": 0.0006907812107510675, "grad_norm": 0.07861647009849548, "learning_rate": 0.00019999999978367598, "loss": 11.7576, "step": 33 }, { "epoch": 0.0007117139747132212, "grad_norm": 0.0815449208021164, "learning_rate": 0.00019999999961542399, "loss": 11.7592, "step": 34 }, { "epoch": 0.0007326467386753747, "grad_norm": 0.10315592586994171, "learning_rate": 0.00019999999939909996, "loss": 11.757, "step": 35 }, { "epoch": 0.0007535795026375283, "grad_norm": 0.055126458406448364, "learning_rate": 0.00019999999913470395, "loss": 11.7516, "step": 36 }, { "epoch": 0.0007745122665996818, "grad_norm": 0.06587603688240051, "learning_rate": 0.0001999999988222359, "loss": 11.7627, "step": 37 }, { "epoch": 0.0007954450305618354, "grad_norm": 0.1005813255906105, "learning_rate": 0.00019999999846169588, "loss": 11.7557, "step": 38 }, { "epoch": 0.000816377794523989, "grad_norm": 0.0986056849360466, "learning_rate": 0.00019999999805308386, "loss": 11.7575, "step": 39 }, { "epoch": 0.0008373105584861425, "grad_norm": 0.08490145206451416, "learning_rate": 0.0001999999975963998, "loss": 11.7569, "step": 40 }, { "epoch": 0.0008582433224482961, "grad_norm": 0.14445467293262482, "learning_rate": 0.00019999999709164378, "loss": 11.7561, "step": 41 }, { "epoch": 0.0008791760864104496, "grad_norm": 0.08076098561286926, "learning_rate": 0.00019999999653881574, "loss": 11.7519, "step": 42 }, { "epoch": 0.0009001088503726032, "grad_norm": 0.10718613117933273, "learning_rate": 0.0001999999959379157, "loss": 11.7564, "step": 43 }, { "epoch": 0.0009210416143347567, "grad_norm": 0.05650531128048897, "learning_rate": 0.00019999999528894368, "loss": 11.762, "step": 44 }, { "epoch": 0.0009419743782969103, "grad_norm": 0.11559094488620758, "learning_rate": 0.0001999999945918996, "loss": 11.7548, "step": 45 }, { "epoch": 0.0009629071422590638, "grad_norm": 0.10773409157991409, "learning_rate": 0.00019999999384678356, "loss": 11.7482, "step": 46 }, { "epoch": 0.0009838399062212174, "grad_norm": 0.11440612375736237, "learning_rate": 0.00019999999305359548, "loss": 11.7538, "step": 47 }, { "epoch": 0.001004772670183371, "grad_norm": 0.10271722823381424, "learning_rate": 0.00019999999221233545, "loss": 11.7528, "step": 48 }, { "epoch": 0.0010257054341455247, "grad_norm": 0.12739768624305725, "learning_rate": 0.00019999999132300342, "loss": 11.7567, "step": 49 }, { "epoch": 0.0010466381981076782, "grad_norm": 0.12475908547639847, "learning_rate": 0.00019999999038559934, "loss": 11.7588, "step": 50 }, { "epoch": 0.0010675709620698317, "grad_norm": 0.10844914615154266, "learning_rate": 0.0001999999894001233, "loss": 11.7545, "step": 51 }, { "epoch": 0.0010885037260319852, "grad_norm": 0.1250046193599701, "learning_rate": 0.00019999998836657526, "loss": 11.7377, "step": 52 }, { "epoch": 0.001109436489994139, "grad_norm": 0.11641913652420044, "learning_rate": 0.00019999998728495523, "loss": 11.7449, "step": 53 }, { "epoch": 0.0011303692539562924, "grad_norm": 0.071332186460495, "learning_rate": 0.00019999998615526316, "loss": 11.755, "step": 54 }, { "epoch": 0.001151302017918446, "grad_norm": 0.10034549236297607, "learning_rate": 0.00019999998497749913, "loss": 11.7507, "step": 55 }, { "epoch": 0.0011722347818805994, "grad_norm": 0.12090073525905609, "learning_rate": 0.00019999998375166308, "loss": 11.7495, "step": 56 }, { "epoch": 0.0011931675458427531, "grad_norm": 0.11055192351341248, "learning_rate": 0.00019999998247775507, "loss": 11.7531, "step": 57 }, { "epoch": 0.0012141003098049066, "grad_norm": 0.10260391980409622, "learning_rate": 0.00019999998115577503, "loss": 11.7592, "step": 58 }, { "epoch": 0.0012350330737670601, "grad_norm": 0.15169058740139008, "learning_rate": 0.000199999979785723, "loss": 11.7474, "step": 59 }, { "epoch": 0.0012559658377292139, "grad_norm": 0.14092567563056946, "learning_rate": 0.00019999997836759898, "loss": 11.7443, "step": 60 }, { "epoch": 0.0012768986016913674, "grad_norm": 0.126256063580513, "learning_rate": 0.00019999997690140297, "loss": 11.7355, "step": 61 }, { "epoch": 0.0012978313656535209, "grad_norm": 0.09273310005664825, "learning_rate": 0.00019999997538713496, "loss": 11.7499, "step": 62 }, { "epoch": 0.0013187641296156744, "grad_norm": 0.12557393312454224, "learning_rate": 0.00019999997382479497, "loss": 11.733, "step": 63 }, { "epoch": 0.001339696893577828, "grad_norm": 0.14238058030605316, "learning_rate": 0.00019999997221438297, "loss": 11.7499, "step": 64 }, { "epoch": 0.0013606296575399816, "grad_norm": 0.1341496855020523, "learning_rate": 0.000199999970555899, "loss": 11.7483, "step": 65 }, { "epoch": 0.001381562421502135, "grad_norm": 0.1680867224931717, "learning_rate": 0.00019999996884934303, "loss": 11.7479, "step": 66 }, { "epoch": 0.0014024951854642888, "grad_norm": 0.16098076105117798, "learning_rate": 0.00019999996709471507, "loss": 11.7445, "step": 67 }, { "epoch": 0.0014234279494264423, "grad_norm": 0.141373410820961, "learning_rate": 0.00019999996529201512, "loss": 11.7309, "step": 68 }, { "epoch": 0.0014443607133885958, "grad_norm": 0.17527581751346588, "learning_rate": 0.0001999999634412432, "loss": 11.7545, "step": 69 }, { "epoch": 0.0014652934773507493, "grad_norm": 0.09394515305757523, "learning_rate": 0.00019999996154239927, "loss": 11.7395, "step": 70 }, { "epoch": 0.001486226241312903, "grad_norm": 0.1119910329580307, "learning_rate": 0.00019999995959548334, "loss": 11.752, "step": 71 }, { "epoch": 0.0015071590052750565, "grad_norm": 0.15359100699424744, "learning_rate": 0.00019999995760049545, "loss": 11.7459, "step": 72 }, { "epoch": 0.00152809176923721, "grad_norm": 0.1098518893122673, "learning_rate": 0.00019999995555743558, "loss": 11.7386, "step": 73 }, { "epoch": 0.0015490245331993635, "grad_norm": 0.1201215460896492, "learning_rate": 0.00019999995346630373, "loss": 11.7493, "step": 74 }, { "epoch": 0.0015699572971615173, "grad_norm": 0.11554825305938721, "learning_rate": 0.00019999995132709988, "loss": 11.7459, "step": 75 }, { "epoch": 0.0015908900611236708, "grad_norm": 0.0982440784573555, "learning_rate": 0.00019999994913982408, "loss": 11.74, "step": 76 }, { "epoch": 0.0016118228250858243, "grad_norm": 0.11309687048196793, "learning_rate": 0.00019999994690447626, "loss": 11.7473, "step": 77 }, { "epoch": 0.001632755589047978, "grad_norm": 0.17983326315879822, "learning_rate": 0.0001999999446210565, "loss": 11.7452, "step": 78 }, { "epoch": 0.0016536883530101315, "grad_norm": 0.09072911739349365, "learning_rate": 0.00019999994228956473, "loss": 11.7533, "step": 79 }, { "epoch": 0.001674621116972285, "grad_norm": 0.1536559760570526, "learning_rate": 0.00019999993991000098, "loss": 11.7382, "step": 80 }, { "epoch": 0.0016955538809344385, "grad_norm": 0.08702988177537918, "learning_rate": 0.0001999999374823653, "loss": 11.7399, "step": 81 }, { "epoch": 0.0017164866448965922, "grad_norm": 0.12257847934961319, "learning_rate": 0.00019999993500665762, "loss": 11.749, "step": 82 }, { "epoch": 0.0017374194088587457, "grad_norm": 0.12803927063941956, "learning_rate": 0.00019999993248287794, "loss": 11.7445, "step": 83 }, { "epoch": 0.0017583521728208992, "grad_norm": 0.14351588487625122, "learning_rate": 0.00019999992991102632, "loss": 11.7449, "step": 84 }, { "epoch": 0.001779284936783053, "grad_norm": 0.10697727650403976, "learning_rate": 0.00019999992729110273, "loss": 11.7472, "step": 85 }, { "epoch": 0.0018002177007452064, "grad_norm": 0.13407211005687714, "learning_rate": 0.00019999992462310716, "loss": 11.7344, "step": 86 }, { "epoch": 0.00182115046470736, "grad_norm": 0.11424198746681213, "learning_rate": 0.00019999992190703963, "loss": 11.7444, "step": 87 }, { "epoch": 0.0018420832286695134, "grad_norm": 0.12634237110614777, "learning_rate": 0.00019999991914290013, "loss": 11.7448, "step": 88 }, { "epoch": 0.0018630159926316672, "grad_norm": 0.44085782766342163, "learning_rate": 0.00019999991633068868, "loss": 11.7027, "step": 89 }, { "epoch": 0.0018839487565938207, "grad_norm": 0.14115838706493378, "learning_rate": 0.00019999991347040524, "loss": 11.7448, "step": 90 }, { "epoch": 0.0019048815205559742, "grad_norm": 0.10738669335842133, "learning_rate": 0.00019999991056204985, "loss": 11.7422, "step": 91 }, { "epoch": 0.0019258142845181277, "grad_norm": 0.13752935826778412, "learning_rate": 0.0001999999076056225, "loss": 11.7373, "step": 92 }, { "epoch": 0.0019467470484802814, "grad_norm": 0.13737276196479797, "learning_rate": 0.0001999999046011232, "loss": 11.7568, "step": 93 }, { "epoch": 0.001967679812442435, "grad_norm": 0.11772209405899048, "learning_rate": 0.0001999999015485519, "loss": 11.7478, "step": 94 }, { "epoch": 0.0019886125764045884, "grad_norm": 0.13583049178123474, "learning_rate": 0.0001999998984479087, "loss": 11.7392, "step": 95 }, { "epoch": 0.002009545340366742, "grad_norm": 0.2474967986345291, "learning_rate": 0.0001999998952991935, "loss": 11.7381, "step": 96 }, { "epoch": 0.0020304781043288954, "grad_norm": 0.112013079226017, "learning_rate": 0.00019999989210240637, "loss": 11.7368, "step": 97 }, { "epoch": 0.0020514108682910493, "grad_norm": 0.10719872266054153, "learning_rate": 0.0001999998888575473, "loss": 11.75, "step": 98 }, { "epoch": 0.002072343632253203, "grad_norm": 0.0996989831328392, "learning_rate": 0.00019999988556461626, "loss": 11.7466, "step": 99 }, { "epoch": 0.0020932763962153563, "grad_norm": 0.11389109492301941, "learning_rate": 0.00019999988222361325, "loss": 11.7464, "step": 100 }, { "epoch": 0.00211420916017751, "grad_norm": 0.1585276573896408, "learning_rate": 0.0001999998788345383, "loss": 11.7523, "step": 101 }, { "epoch": 0.0021351419241396634, "grad_norm": 0.11120504885911942, "learning_rate": 0.00019999987539739145, "loss": 11.734, "step": 102 }, { "epoch": 0.002156074688101817, "grad_norm": 0.13900424540042877, "learning_rate": 0.0001999998719121726, "loss": 11.737, "step": 103 }, { "epoch": 0.0021770074520639704, "grad_norm": 0.11519607901573181, "learning_rate": 0.00019999986837888184, "loss": 11.7382, "step": 104 }, { "epoch": 0.0021979402160261243, "grad_norm": 0.10889256000518799, "learning_rate": 0.00019999986479751915, "loss": 11.7467, "step": 105 }, { "epoch": 0.002218872979988278, "grad_norm": 0.09722790867090225, "learning_rate": 0.00019999986116808448, "loss": 11.7349, "step": 106 }, { "epoch": 0.0022398057439504313, "grad_norm": 0.13690216839313507, "learning_rate": 0.0001999998574905779, "loss": 11.7382, "step": 107 }, { "epoch": 0.002260738507912585, "grad_norm": 0.1278126835823059, "learning_rate": 0.00019999985376499942, "loss": 11.7505, "step": 108 }, { "epoch": 0.0022816712718747383, "grad_norm": 0.11255389451980591, "learning_rate": 0.00019999984999134895, "loss": 11.7437, "step": 109 }, { "epoch": 0.002302604035836892, "grad_norm": 0.1530066579580307, "learning_rate": 0.00019999984616962655, "loss": 11.7311, "step": 110 }, { "epoch": 0.0023235367997990453, "grad_norm": 0.11648288369178772, "learning_rate": 0.00019999984229983225, "loss": 11.7473, "step": 111 }, { "epoch": 0.002344469563761199, "grad_norm": 0.10153660923242569, "learning_rate": 0.00019999983838196598, "loss": 11.7524, "step": 112 }, { "epoch": 0.0023654023277233528, "grad_norm": 0.1269916594028473, "learning_rate": 0.00019999983441602783, "loss": 11.7515, "step": 113 }, { "epoch": 0.0023863350916855063, "grad_norm": 0.11910085380077362, "learning_rate": 0.00019999983040201773, "loss": 11.7503, "step": 114 }, { "epoch": 0.0024072678556476598, "grad_norm": 0.10632143169641495, "learning_rate": 0.00019999982633993573, "loss": 11.7417, "step": 115 }, { "epoch": 0.0024282006196098133, "grad_norm": 0.09389708191156387, "learning_rate": 0.00019999982222978179, "loss": 11.7479, "step": 116 }, { "epoch": 0.0024491333835719668, "grad_norm": 0.1156199499964714, "learning_rate": 0.0001999998180715559, "loss": 11.7232, "step": 117 }, { "epoch": 0.0024700661475341203, "grad_norm": 0.13299009203910828, "learning_rate": 0.00019999981386525817, "loss": 11.7403, "step": 118 }, { "epoch": 0.0024909989114962738, "grad_norm": 0.13472118973731995, "learning_rate": 0.00019999980961088847, "loss": 11.7427, "step": 119 }, { "epoch": 0.0025119316754584277, "grad_norm": 0.11129526793956757, "learning_rate": 0.00019999980530844687, "loss": 11.736, "step": 120 }, { "epoch": 0.002532864439420581, "grad_norm": 0.08245090395212173, "learning_rate": 0.00019999980095793335, "loss": 11.738, "step": 121 }, { "epoch": 0.0025537972033827347, "grad_norm": 0.09626433998346329, "learning_rate": 0.00019999979655934796, "loss": 11.7394, "step": 122 }, { "epoch": 0.002574729967344888, "grad_norm": 0.12189431488513947, "learning_rate": 0.00019999979211269062, "loss": 11.7461, "step": 123 }, { "epoch": 0.0025956627313070417, "grad_norm": 0.11410100013017654, "learning_rate": 0.00019999978761796138, "loss": 11.7412, "step": 124 }, { "epoch": 0.0026165954952691952, "grad_norm": 0.4139077663421631, "learning_rate": 0.00019999978307516026, "loss": 11.7413, "step": 125 }, { "epoch": 0.0026375282592313487, "grad_norm": 0.14012619853019714, "learning_rate": 0.00019999977848428723, "loss": 11.7481, "step": 126 }, { "epoch": 0.0026584610231935027, "grad_norm": 0.09883134812116623, "learning_rate": 0.0001999997738453423, "loss": 11.7433, "step": 127 }, { "epoch": 0.002679393787155656, "grad_norm": 0.09854042530059814, "learning_rate": 0.00019999976915832549, "loss": 11.746, "step": 128 }, { "epoch": 0.0027003265511178097, "grad_norm": 0.09005728363990784, "learning_rate": 0.00019999976442323675, "loss": 11.7365, "step": 129 }, { "epoch": 0.002721259315079963, "grad_norm": 0.18067948520183563, "learning_rate": 0.00019999975964007615, "loss": 11.7423, "step": 130 }, { "epoch": 0.0027421920790421167, "grad_norm": 0.12251310050487518, "learning_rate": 0.00019999975480884365, "loss": 11.7316, "step": 131 }, { "epoch": 0.00276312484300427, "grad_norm": 0.15963079035282135, "learning_rate": 0.00019999974992953928, "loss": 11.7369, "step": 132 }, { "epoch": 0.0027840576069664237, "grad_norm": 0.10016746073961258, "learning_rate": 0.00019999974500216304, "loss": 11.7377, "step": 133 }, { "epoch": 0.0028049903709285776, "grad_norm": 0.08738070726394653, "learning_rate": 0.00019999974002671487, "loss": 11.7393, "step": 134 }, { "epoch": 0.002825923134890731, "grad_norm": 0.11423972994089127, "learning_rate": 0.00019999973500319486, "loss": 11.7493, "step": 135 }, { "epoch": 0.0028468558988528846, "grad_norm": 0.16274306178092957, "learning_rate": 0.00019999972993160295, "loss": 11.736, "step": 136 }, { "epoch": 0.002867788662815038, "grad_norm": 0.14793366193771362, "learning_rate": 0.00019999972481193917, "loss": 11.7404, "step": 137 }, { "epoch": 0.0028887214267771916, "grad_norm": 0.14891433715820312, "learning_rate": 0.00019999971964420352, "loss": 11.7483, "step": 138 }, { "epoch": 0.002909654190739345, "grad_norm": 0.11780198663473129, "learning_rate": 0.000199999714428396, "loss": 11.7395, "step": 139 }, { "epoch": 0.0029305869547014986, "grad_norm": 0.11535870283842087, "learning_rate": 0.00019999970916451663, "loss": 11.7362, "step": 140 }, { "epoch": 0.0029515197186636526, "grad_norm": 0.15338483452796936, "learning_rate": 0.00019999970385256536, "loss": 11.7362, "step": 141 }, { "epoch": 0.002972452482625806, "grad_norm": 0.17951445281505585, "learning_rate": 0.00019999969849254226, "loss": 11.7534, "step": 142 }, { "epoch": 0.0029933852465879596, "grad_norm": 0.11512784659862518, "learning_rate": 0.0001999996930844473, "loss": 11.7366, "step": 143 }, { "epoch": 0.003014318010550113, "grad_norm": 0.15918485820293427, "learning_rate": 0.00019999968762828045, "loss": 11.7449, "step": 144 }, { "epoch": 0.0030352507745122666, "grad_norm": 0.10364490002393723, "learning_rate": 0.0001999996821240418, "loss": 11.7394, "step": 145 }, { "epoch": 0.00305618353847442, "grad_norm": 0.08146369457244873, "learning_rate": 0.00019999967657173128, "loss": 11.744, "step": 146 }, { "epoch": 0.0030771163024365736, "grad_norm": 0.1341237872838974, "learning_rate": 0.0001999996709713489, "loss": 11.7445, "step": 147 }, { "epoch": 0.003098049066398727, "grad_norm": 0.1221223771572113, "learning_rate": 0.00019999966532289464, "loss": 11.7402, "step": 148 }, { "epoch": 0.003118981830360881, "grad_norm": 0.14955729246139526, "learning_rate": 0.00019999965962636858, "loss": 11.7496, "step": 149 }, { "epoch": 0.0031399145943230345, "grad_norm": 0.12166628241539001, "learning_rate": 0.00019999965388177066, "loss": 11.7453, "step": 150 }, { "epoch": 0.003160847358285188, "grad_norm": 0.10833936929702759, "learning_rate": 0.00019999964808910093, "loss": 11.7278, "step": 151 }, { "epoch": 0.0031817801222473415, "grad_norm": 0.09060709178447723, "learning_rate": 0.00019999964224835933, "loss": 11.7379, "step": 152 }, { "epoch": 0.003202712886209495, "grad_norm": 0.10563063621520996, "learning_rate": 0.00019999963635954592, "loss": 11.7427, "step": 153 }, { "epoch": 0.0032236456501716485, "grad_norm": 0.15035150945186615, "learning_rate": 0.0001999996304226607, "loss": 11.7337, "step": 154 }, { "epoch": 0.003244578414133802, "grad_norm": 0.15323328971862793, "learning_rate": 0.0001999996244377036, "loss": 11.7455, "step": 155 }, { "epoch": 0.003265511178095956, "grad_norm": 0.083664670586586, "learning_rate": 0.0001999996184046747, "loss": 11.7307, "step": 156 }, { "epoch": 0.0032864439420581095, "grad_norm": 0.09082930535078049, "learning_rate": 0.00019999961232357403, "loss": 11.7355, "step": 157 }, { "epoch": 0.003307376706020263, "grad_norm": 0.10267471522092819, "learning_rate": 0.00019999960619440147, "loss": 11.742, "step": 158 }, { "epoch": 0.0033283094699824165, "grad_norm": 0.11284851282835007, "learning_rate": 0.00019999960001715713, "loss": 11.7298, "step": 159 }, { "epoch": 0.00334924223394457, "grad_norm": 0.08979658782482147, "learning_rate": 0.000199999593791841, "loss": 11.7432, "step": 160 }, { "epoch": 0.0033701749979067235, "grad_norm": 0.09228113293647766, "learning_rate": 0.000199999587518453, "loss": 11.7354, "step": 161 }, { "epoch": 0.003391107761868877, "grad_norm": 0.10374166071414948, "learning_rate": 0.00019999958119699325, "loss": 11.7391, "step": 162 }, { "epoch": 0.003412040525831031, "grad_norm": 0.15176832675933838, "learning_rate": 0.00019999957482746167, "loss": 11.7249, "step": 163 }, { "epoch": 0.0034329732897931844, "grad_norm": 0.11695821583271027, "learning_rate": 0.0001999995684098583, "loss": 11.7363, "step": 164 }, { "epoch": 0.003453906053755338, "grad_norm": 0.12398631870746613, "learning_rate": 0.00019999956194418313, "loss": 11.7356, "step": 165 }, { "epoch": 0.0034748388177174914, "grad_norm": 0.09747215360403061, "learning_rate": 0.00019999955543043615, "loss": 11.7413, "step": 166 }, { "epoch": 0.003495771581679645, "grad_norm": 0.1510889083147049, "learning_rate": 0.0001999995488686174, "loss": 11.7386, "step": 167 }, { "epoch": 0.0035167043456417984, "grad_norm": 0.13680781424045563, "learning_rate": 0.00019999954225872685, "loss": 11.7324, "step": 168 }, { "epoch": 0.003537637109603952, "grad_norm": 0.11985515803098679, "learning_rate": 0.00019999953560076455, "loss": 11.7199, "step": 169 }, { "epoch": 0.003558569873566106, "grad_norm": 0.11844564974308014, "learning_rate": 0.00019999952889473044, "loss": 11.7439, "step": 170 }, { "epoch": 0.0035795026375282594, "grad_norm": 0.08239074796438217, "learning_rate": 0.00019999952214062457, "loss": 11.7339, "step": 171 }, { "epoch": 0.003600435401490413, "grad_norm": 0.10775577276945114, "learning_rate": 0.0001999995153384469, "loss": 11.7383, "step": 172 }, { "epoch": 0.0036213681654525664, "grad_norm": 0.1204642504453659, "learning_rate": 0.00019999950848819745, "loss": 11.7315, "step": 173 }, { "epoch": 0.00364230092941472, "grad_norm": 0.13413850963115692, "learning_rate": 0.00019999950158987626, "loss": 11.7441, "step": 174 }, { "epoch": 0.0036632336933768734, "grad_norm": 0.12815482914447784, "learning_rate": 0.0001999994946434833, "loss": 11.7409, "step": 175 }, { "epoch": 0.003684166457339027, "grad_norm": 0.1482478678226471, "learning_rate": 0.0001999994876490186, "loss": 11.7355, "step": 176 }, { "epoch": 0.0037050992213011804, "grad_norm": 0.11938630044460297, "learning_rate": 0.0001999994806064821, "loss": 11.7347, "step": 177 }, { "epoch": 0.0037260319852633343, "grad_norm": 0.07974015176296234, "learning_rate": 0.00019999947351587386, "loss": 11.7275, "step": 178 }, { "epoch": 0.003746964749225488, "grad_norm": 0.11754369735717773, "learning_rate": 0.00019999946637719388, "loss": 11.7217, "step": 179 }, { "epoch": 0.0037678975131876413, "grad_norm": 0.10364191234111786, "learning_rate": 0.00019999945919044217, "loss": 11.7321, "step": 180 }, { "epoch": 0.003788830277149795, "grad_norm": 0.09475631266832352, "learning_rate": 0.00019999945195561868, "loss": 11.733, "step": 181 }, { "epoch": 0.0038097630411119483, "grad_norm": 0.6879712343215942, "learning_rate": 0.00019999944467272344, "loss": 11.7558, "step": 182 }, { "epoch": 0.003830695805074102, "grad_norm": 0.10866375267505646, "learning_rate": 0.0001999994373417565, "loss": 11.729, "step": 183 }, { "epoch": 0.0038516285690362554, "grad_norm": 0.1405775547027588, "learning_rate": 0.00019999942996271782, "loss": 11.7394, "step": 184 }, { "epoch": 0.0038725613329984093, "grad_norm": 0.11082305759191513, "learning_rate": 0.0001999994225356074, "loss": 11.7262, "step": 185 }, { "epoch": 0.003893494096960563, "grad_norm": 0.14604198932647705, "learning_rate": 0.00019999941506042526, "loss": 11.7361, "step": 186 }, { "epoch": 0.003914426860922716, "grad_norm": 0.10906744748353958, "learning_rate": 0.0001999994075371714, "loss": 11.7268, "step": 187 }, { "epoch": 0.00393535962488487, "grad_norm": 0.15845496952533722, "learning_rate": 0.0001999993999658458, "loss": 11.7366, "step": 188 }, { "epoch": 0.003956292388847023, "grad_norm": 0.10984986275434494, "learning_rate": 0.00019999939234644848, "loss": 11.7363, "step": 189 }, { "epoch": 0.003977225152809177, "grad_norm": 0.12190600484609604, "learning_rate": 0.00019999938467897947, "loss": 11.7402, "step": 190 }, { "epoch": 0.00399815791677133, "grad_norm": 0.13145731389522552, "learning_rate": 0.00019999937696343878, "loss": 11.7348, "step": 191 }, { "epoch": 0.004019090680733484, "grad_norm": 0.08458924293518066, "learning_rate": 0.00019999936919982633, "loss": 11.7318, "step": 192 }, { "epoch": 0.004040023444695637, "grad_norm": 0.12550973892211914, "learning_rate": 0.00019999936138814222, "loss": 11.7392, "step": 193 }, { "epoch": 0.004060956208657791, "grad_norm": 0.12640567123889923, "learning_rate": 0.00019999935352838638, "loss": 11.7339, "step": 194 }, { "epoch": 0.004081888972619944, "grad_norm": 0.11851721256971359, "learning_rate": 0.00019999934562055886, "loss": 11.732, "step": 195 }, { "epoch": 0.004102821736582099, "grad_norm": 0.17242524027824402, "learning_rate": 0.00019999933766465965, "loss": 11.7342, "step": 196 }, { "epoch": 0.004123754500544252, "grad_norm": 0.12765124440193176, "learning_rate": 0.00019999932966068874, "loss": 11.7441, "step": 197 }, { "epoch": 0.004144687264506406, "grad_norm": 0.10178928822278976, "learning_rate": 0.0001999993216086462, "loss": 11.7338, "step": 198 }, { "epoch": 0.004165620028468559, "grad_norm": 0.10227273404598236, "learning_rate": 0.00019999931350853193, "loss": 11.7194, "step": 199 }, { "epoch": 0.004186552792430713, "grad_norm": 0.1342269480228424, "learning_rate": 0.00019999930536034603, "loss": 11.7242, "step": 200 }, { "epoch": 0.004207485556392866, "grad_norm": 0.08597622811794281, "learning_rate": 0.00019999929716408841, "loss": 11.7271, "step": 201 }, { "epoch": 0.00422841832035502, "grad_norm": 0.12731815874576569, "learning_rate": 0.00019999928891975915, "loss": 11.7389, "step": 202 }, { "epoch": 0.004249351084317173, "grad_norm": 0.11810269206762314, "learning_rate": 0.00019999928062735823, "loss": 11.7292, "step": 203 }, { "epoch": 0.004270283848279327, "grad_norm": 0.10169169306755066, "learning_rate": 0.00019999927228688563, "loss": 11.7232, "step": 204 }, { "epoch": 0.00429121661224148, "grad_norm": 0.13938811421394348, "learning_rate": 0.0001999992638983414, "loss": 11.7206, "step": 205 }, { "epoch": 0.004312149376203634, "grad_norm": 0.11436536908149719, "learning_rate": 0.0001999992554617255, "loss": 11.7355, "step": 206 }, { "epoch": 0.004333082140165787, "grad_norm": 0.09243818372488022, "learning_rate": 0.000199999246977038, "loss": 11.7271, "step": 207 }, { "epoch": 0.004354014904127941, "grad_norm": 0.10453025251626968, "learning_rate": 0.0001999992384442788, "loss": 11.7345, "step": 208 }, { "epoch": 0.004374947668090094, "grad_norm": 0.19366972148418427, "learning_rate": 0.000199999229863448, "loss": 11.7376, "step": 209 }, { "epoch": 0.004395880432052249, "grad_norm": 0.08968038111925125, "learning_rate": 0.00019999922123454554, "loss": 11.7272, "step": 210 }, { "epoch": 0.004416813196014402, "grad_norm": 0.09770773351192474, "learning_rate": 0.00019999921255757145, "loss": 11.7151, "step": 211 }, { "epoch": 0.004437745959976556, "grad_norm": 0.11220710724592209, "learning_rate": 0.00019999920383252578, "loss": 11.7275, "step": 212 }, { "epoch": 0.004458678723938709, "grad_norm": 0.19314682483673096, "learning_rate": 0.00019999919505940845, "loss": 11.7312, "step": 213 }, { "epoch": 0.004479611487900863, "grad_norm": 0.11207528412342072, "learning_rate": 0.00019999918623821953, "loss": 11.7365, "step": 214 }, { "epoch": 0.004500544251863016, "grad_norm": 0.16487692296504974, "learning_rate": 0.00019999917736895895, "loss": 11.7391, "step": 215 }, { "epoch": 0.00452147701582517, "grad_norm": 0.09422063082456589, "learning_rate": 0.00019999916845162677, "loss": 11.7211, "step": 216 }, { "epoch": 0.004542409779787323, "grad_norm": 0.12668757140636444, "learning_rate": 0.00019999915948622302, "loss": 11.7264, "step": 217 }, { "epoch": 0.004563342543749477, "grad_norm": 0.09620699286460876, "learning_rate": 0.00019999915047274765, "loss": 11.7294, "step": 218 }, { "epoch": 0.00458427530771163, "grad_norm": 0.11569945514202118, "learning_rate": 0.0001999991414112007, "loss": 11.7274, "step": 219 }, { "epoch": 0.004605208071673784, "grad_norm": 0.08343329280614853, "learning_rate": 0.00019999913230158215, "loss": 11.7228, "step": 220 }, { "epoch": 0.004626140835635937, "grad_norm": 0.1368834376335144, "learning_rate": 0.000199999123143892, "loss": 11.7299, "step": 221 }, { "epoch": 0.004647073599598091, "grad_norm": 0.11829771101474762, "learning_rate": 0.00019999911393813027, "loss": 11.7438, "step": 222 }, { "epoch": 0.004668006363560244, "grad_norm": 0.16703495383262634, "learning_rate": 0.00019999910468429698, "loss": 11.7272, "step": 223 }, { "epoch": 0.004688939127522398, "grad_norm": 0.11212864518165588, "learning_rate": 0.0001999990953823921, "loss": 11.7301, "step": 224 }, { "epoch": 0.004709871891484552, "grad_norm": 0.1128857284784317, "learning_rate": 0.00019999908603241567, "loss": 11.7171, "step": 225 }, { "epoch": 0.0047308046554467055, "grad_norm": 0.11656668037176132, "learning_rate": 0.00019999907663436766, "loss": 11.7373, "step": 226 }, { "epoch": 0.004751737419408859, "grad_norm": 0.11369594186544418, "learning_rate": 0.00019999906718824807, "loss": 11.7271, "step": 227 }, { "epoch": 0.0047726701833710125, "grad_norm": 0.12390772253274918, "learning_rate": 0.00019999905769405696, "loss": 11.7256, "step": 228 }, { "epoch": 0.004793602947333166, "grad_norm": 0.10221050679683685, "learning_rate": 0.00019999904815179429, "loss": 11.7295, "step": 229 }, { "epoch": 0.0048145357112953195, "grad_norm": 0.12125889211893082, "learning_rate": 0.00019999903856146006, "loss": 11.7157, "step": 230 }, { "epoch": 0.004835468475257473, "grad_norm": 0.09916162490844727, "learning_rate": 0.00019999902892305432, "loss": 11.7225, "step": 231 }, { "epoch": 0.0048564012392196265, "grad_norm": 0.14593826234340668, "learning_rate": 0.000199999019236577, "loss": 11.7203, "step": 232 }, { "epoch": 0.00487733400318178, "grad_norm": 0.1189965009689331, "learning_rate": 0.00019999900950202815, "loss": 11.7422, "step": 233 }, { "epoch": 0.0048982667671439335, "grad_norm": 0.13066788017749786, "learning_rate": 0.0001999989997194078, "loss": 11.7342, "step": 234 }, { "epoch": 0.004919199531106087, "grad_norm": 0.15421953797340393, "learning_rate": 0.00019999898988871592, "loss": 11.7254, "step": 235 }, { "epoch": 0.0049401322950682405, "grad_norm": 0.17863306403160095, "learning_rate": 0.0001999989800099525, "loss": 11.7274, "step": 236 }, { "epoch": 0.004961065059030394, "grad_norm": 0.10821910947561264, "learning_rate": 0.0001999989700831176, "loss": 11.7306, "step": 237 }, { "epoch": 0.0049819978229925475, "grad_norm": 0.1044483482837677, "learning_rate": 0.00019999896010821117, "loss": 11.7375, "step": 238 }, { "epoch": 0.005002930586954702, "grad_norm": 0.12779177725315094, "learning_rate": 0.00019999895008523323, "loss": 11.7411, "step": 239 }, { "epoch": 0.005023863350916855, "grad_norm": 0.1219925805926323, "learning_rate": 0.00019999894001418382, "loss": 11.722, "step": 240 }, { "epoch": 0.005044796114879009, "grad_norm": 0.12229987233877182, "learning_rate": 0.0001999989298950629, "loss": 11.7308, "step": 241 }, { "epoch": 0.005065728878841162, "grad_norm": 0.1428784281015396, "learning_rate": 0.00019999891972787047, "loss": 11.7118, "step": 242 }, { "epoch": 0.005086661642803316, "grad_norm": 0.12454595416784286, "learning_rate": 0.0001999989095126066, "loss": 11.7268, "step": 243 }, { "epoch": 0.005107594406765469, "grad_norm": 0.2082051783800125, "learning_rate": 0.0001999988992492712, "loss": 11.7348, "step": 244 }, { "epoch": 0.005128527170727623, "grad_norm": 0.10442791879177094, "learning_rate": 0.00019999888893786435, "loss": 11.7294, "step": 245 }, { "epoch": 0.005149459934689776, "grad_norm": 0.12092941254377365, "learning_rate": 0.00019999887857838605, "loss": 11.7354, "step": 246 }, { "epoch": 0.00517039269865193, "grad_norm": 0.13045713305473328, "learning_rate": 0.00019999886817083625, "loss": 11.7211, "step": 247 }, { "epoch": 0.005191325462614083, "grad_norm": 0.09891767054796219, "learning_rate": 0.000199998857715215, "loss": 11.734, "step": 248 }, { "epoch": 0.005212258226576237, "grad_norm": 0.09108012169599533, "learning_rate": 0.0001999988472115223, "loss": 11.7218, "step": 249 }, { "epoch": 0.0052331909905383904, "grad_norm": 0.17259055376052856, "learning_rate": 0.00019999883665975814, "loss": 11.7351, "step": 250 }, { "epoch": 0.005254123754500544, "grad_norm": 0.10633157938718796, "learning_rate": 0.00019999882605992254, "loss": 11.7309, "step": 251 }, { "epoch": 0.0052750565184626974, "grad_norm": 0.15078407526016235, "learning_rate": 0.0001999988154120155, "loss": 11.7144, "step": 252 }, { "epoch": 0.005295989282424851, "grad_norm": 0.14342956244945526, "learning_rate": 0.00019999880471603706, "loss": 11.7358, "step": 253 }, { "epoch": 0.005316922046387005, "grad_norm": 0.13007716834545135, "learning_rate": 0.00019999879397198712, "loss": 11.7339, "step": 254 }, { "epoch": 0.005337854810349159, "grad_norm": 0.12723809480667114, "learning_rate": 0.0001999987831798658, "loss": 11.7203, "step": 255 }, { "epoch": 0.005358787574311312, "grad_norm": 0.11550663411617279, "learning_rate": 0.00019999877233967307, "loss": 11.7328, "step": 256 }, { "epoch": 0.005379720338273466, "grad_norm": 0.11513753235340118, "learning_rate": 0.0001999987614514089, "loss": 11.7217, "step": 257 }, { "epoch": 0.005400653102235619, "grad_norm": 0.11104996502399445, "learning_rate": 0.00019999875051507333, "loss": 11.727, "step": 258 }, { "epoch": 0.005421585866197773, "grad_norm": 0.11175264418125153, "learning_rate": 0.00019999873953066637, "loss": 11.7271, "step": 259 }, { "epoch": 0.005442518630159926, "grad_norm": 0.15607057511806488, "learning_rate": 0.000199998728498188, "loss": 11.7184, "step": 260 }, { "epoch": 0.00546345139412208, "grad_norm": 0.11607147008180618, "learning_rate": 0.00019999871741763824, "loss": 11.7362, "step": 261 }, { "epoch": 0.005484384158084233, "grad_norm": 0.10691114515066147, "learning_rate": 0.00019999870628901708, "loss": 11.7222, "step": 262 }, { "epoch": 0.005505316922046387, "grad_norm": 0.10972463339567184, "learning_rate": 0.00019999869511232456, "loss": 11.7317, "step": 263 }, { "epoch": 0.00552624968600854, "grad_norm": 0.11182203888893127, "learning_rate": 0.00019999868388756065, "loss": 11.7241, "step": 264 }, { "epoch": 0.005547182449970694, "grad_norm": 0.11121466010808945, "learning_rate": 0.00019999867261472537, "loss": 11.7251, "step": 265 }, { "epoch": 0.005568115213932847, "grad_norm": 0.1265299767255783, "learning_rate": 0.00019999866129381873, "loss": 11.7257, "step": 266 }, { "epoch": 0.005589047977895001, "grad_norm": 0.10071392357349396, "learning_rate": 0.00019999864992484073, "loss": 11.7336, "step": 267 }, { "epoch": 0.005609980741857155, "grad_norm": 0.16548822820186615, "learning_rate": 0.00019999863850779138, "loss": 11.7238, "step": 268 }, { "epoch": 0.005630913505819309, "grad_norm": 0.1492125391960144, "learning_rate": 0.00019999862704267067, "loss": 11.7254, "step": 269 }, { "epoch": 0.005651846269781462, "grad_norm": 0.14246973395347595, "learning_rate": 0.0001999986155294786, "loss": 11.7365, "step": 270 }, { "epoch": 0.005672779033743616, "grad_norm": 0.1154094710946083, "learning_rate": 0.0001999986039682152, "loss": 11.7173, "step": 271 }, { "epoch": 0.005693711797705769, "grad_norm": 0.09431154280900955, "learning_rate": 0.0001999985923588805, "loss": 11.7294, "step": 272 }, { "epoch": 0.005714644561667923, "grad_norm": 0.12854285538196564, "learning_rate": 0.00019999858070147446, "loss": 11.7232, "step": 273 }, { "epoch": 0.005735577325630076, "grad_norm": 0.13421416282653809, "learning_rate": 0.00019999856899599708, "loss": 11.7198, "step": 274 }, { "epoch": 0.00575651008959223, "grad_norm": 0.08885840326547623, "learning_rate": 0.00019999855724244837, "loss": 11.7286, "step": 275 }, { "epoch": 0.005777442853554383, "grad_norm": 0.1374874860048294, "learning_rate": 0.00019999854544082836, "loss": 11.7217, "step": 276 }, { "epoch": 0.005798375617516537, "grad_norm": 0.12132551521062851, "learning_rate": 0.00019999853359113707, "loss": 11.7194, "step": 277 }, { "epoch": 0.00581930838147869, "grad_norm": 0.11318861693143845, "learning_rate": 0.00019999852169337449, "loss": 11.7253, "step": 278 }, { "epoch": 0.005840241145440844, "grad_norm": 0.15406005084514618, "learning_rate": 0.00019999850974754057, "loss": 11.7218, "step": 279 }, { "epoch": 0.005861173909402997, "grad_norm": 0.12698601186275482, "learning_rate": 0.00019999849775363538, "loss": 11.727, "step": 280 }, { "epoch": 0.005882106673365151, "grad_norm": 0.1401127278804779, "learning_rate": 0.00019999848571165894, "loss": 11.7333, "step": 281 }, { "epoch": 0.005903039437327305, "grad_norm": 0.10699697583913803, "learning_rate": 0.00019999847362161117, "loss": 11.7141, "step": 282 }, { "epoch": 0.005923972201289459, "grad_norm": 0.11419758945703506, "learning_rate": 0.00019999846148349216, "loss": 11.7231, "step": 283 }, { "epoch": 0.005944904965251612, "grad_norm": 0.12719473242759705, "learning_rate": 0.0001999984492973019, "loss": 11.7243, "step": 284 }, { "epoch": 0.005965837729213766, "grad_norm": 0.12289194017648697, "learning_rate": 0.00019999843706304036, "loss": 11.7234, "step": 285 }, { "epoch": 0.005986770493175919, "grad_norm": 0.14601583778858185, "learning_rate": 0.00019999842478070755, "loss": 11.7179, "step": 286 }, { "epoch": 0.006007703257138073, "grad_norm": 0.16009001433849335, "learning_rate": 0.00019999841245030352, "loss": 11.7157, "step": 287 }, { "epoch": 0.006028636021100226, "grad_norm": 0.15164048969745636, "learning_rate": 0.00019999840007182824, "loss": 11.7247, "step": 288 }, { "epoch": 0.00604956878506238, "grad_norm": 0.14845716953277588, "learning_rate": 0.00019999838764528174, "loss": 11.7192, "step": 289 }, { "epoch": 0.006070501549024533, "grad_norm": 0.11396089941263199, "learning_rate": 0.000199998375170664, "loss": 11.7285, "step": 290 }, { "epoch": 0.006091434312986687, "grad_norm": 0.12016797810792923, "learning_rate": 0.00019999836264797502, "loss": 11.7246, "step": 291 }, { "epoch": 0.00611236707694884, "grad_norm": 0.15306958556175232, "learning_rate": 0.00019999835007721486, "loss": 11.7129, "step": 292 }, { "epoch": 0.006133299840910994, "grad_norm": 0.12025389820337296, "learning_rate": 0.00019999833745838345, "loss": 11.7255, "step": 293 }, { "epoch": 0.006154232604873147, "grad_norm": 0.14016832411289215, "learning_rate": 0.00019999832479148085, "loss": 11.7303, "step": 294 }, { "epoch": 0.006175165368835301, "grad_norm": 0.12113378196954727, "learning_rate": 0.00019999831207650708, "loss": 11.7214, "step": 295 }, { "epoch": 0.006196098132797454, "grad_norm": 0.12439002096652985, "learning_rate": 0.0001999982993134621, "loss": 11.7257, "step": 296 }, { "epoch": 0.0062170308967596085, "grad_norm": 0.12968777120113373, "learning_rate": 0.00019999828650234591, "loss": 11.7253, "step": 297 }, { "epoch": 0.006237963660721762, "grad_norm": 0.15094883739948273, "learning_rate": 0.00019999827364315854, "loss": 11.7128, "step": 298 }, { "epoch": 0.0062588964246839155, "grad_norm": 0.22616416215896606, "learning_rate": 0.00019999826073590003, "loss": 11.7299, "step": 299 }, { "epoch": 0.006279829188646069, "grad_norm": 0.1618216186761856, "learning_rate": 0.00019999824778057032, "loss": 11.7172, "step": 300 }, { "epoch": 0.0063007619526082225, "grad_norm": 0.12861861288547516, "learning_rate": 0.00019999823477716947, "loss": 11.7207, "step": 301 }, { "epoch": 0.006321694716570376, "grad_norm": 0.1206851378083229, "learning_rate": 0.00019999822172569746, "loss": 11.7194, "step": 302 }, { "epoch": 0.0063426274805325296, "grad_norm": 0.157434344291687, "learning_rate": 0.0001999982086261543, "loss": 11.7332, "step": 303 }, { "epoch": 0.006363560244494683, "grad_norm": 0.13538406789302826, "learning_rate": 0.00019999819547854, "loss": 11.716, "step": 304 }, { "epoch": 0.0063844930084568366, "grad_norm": 0.14746001362800598, "learning_rate": 0.00019999818228285453, "loss": 11.7297, "step": 305 }, { "epoch": 0.00640542577241899, "grad_norm": 0.12979772686958313, "learning_rate": 0.000199998169039098, "loss": 11.7183, "step": 306 }, { "epoch": 0.0064263585363811436, "grad_norm": 0.0935865119099617, "learning_rate": 0.00019999815574727028, "loss": 11.7158, "step": 307 }, { "epoch": 0.006447291300343297, "grad_norm": 0.11043119430541992, "learning_rate": 0.00019999814240737146, "loss": 11.7178, "step": 308 }, { "epoch": 0.006468224064305451, "grad_norm": 0.12452530860900879, "learning_rate": 0.00019999812901940156, "loss": 11.7181, "step": 309 }, { "epoch": 0.006489156828267604, "grad_norm": 0.12689433991909027, "learning_rate": 0.00019999811558336051, "loss": 11.7313, "step": 310 }, { "epoch": 0.0065100895922297584, "grad_norm": 0.1276911348104477, "learning_rate": 0.00019999810209924839, "loss": 11.7122, "step": 311 }, { "epoch": 0.006531022356191912, "grad_norm": 0.1381811946630478, "learning_rate": 0.00019999808856706517, "loss": 11.7182, "step": 312 }, { "epoch": 0.0065519551201540654, "grad_norm": 0.14837948977947235, "learning_rate": 0.00019999807498681087, "loss": 11.736, "step": 313 }, { "epoch": 0.006572887884116219, "grad_norm": 0.13823091983795166, "learning_rate": 0.0001999980613584855, "loss": 11.7321, "step": 314 }, { "epoch": 0.0065938206480783725, "grad_norm": 0.09496866166591644, "learning_rate": 0.00019999804768208902, "loss": 11.7175, "step": 315 }, { "epoch": 0.006614753412040526, "grad_norm": 0.12531335651874542, "learning_rate": 0.00019999803395762152, "loss": 11.7151, "step": 316 }, { "epoch": 0.0066356861760026795, "grad_norm": 0.1311008185148239, "learning_rate": 0.00019999802018508294, "loss": 11.7297, "step": 317 }, { "epoch": 0.006656618939964833, "grad_norm": 0.11202789098024368, "learning_rate": 0.0001999980063644733, "loss": 11.7237, "step": 318 }, { "epoch": 0.0066775517039269865, "grad_norm": 0.11284607648849487, "learning_rate": 0.0001999979924957926, "loss": 11.7151, "step": 319 }, { "epoch": 0.00669848446788914, "grad_norm": 0.11950270086526871, "learning_rate": 0.0001999979785790409, "loss": 11.7186, "step": 320 }, { "epoch": 0.0067194172318512935, "grad_norm": 0.14908497035503387, "learning_rate": 0.00019999796461421816, "loss": 11.7187, "step": 321 }, { "epoch": 0.006740349995813447, "grad_norm": 0.12809623777866364, "learning_rate": 0.00019999795060132437, "loss": 11.7204, "step": 322 }, { "epoch": 0.0067612827597756005, "grad_norm": 0.13998238742351532, "learning_rate": 0.00019999793654035958, "loss": 11.7135, "step": 323 }, { "epoch": 0.006782215523737754, "grad_norm": 0.15981042385101318, "learning_rate": 0.00019999792243132378, "loss": 11.7236, "step": 324 }, { "epoch": 0.0068031482876999075, "grad_norm": 0.13574174046516418, "learning_rate": 0.00019999790827421698, "loss": 11.7245, "step": 325 }, { "epoch": 0.006824081051662062, "grad_norm": 0.11066830158233643, "learning_rate": 0.00019999789406903914, "loss": 11.7061, "step": 326 }, { "epoch": 0.006845013815624215, "grad_norm": 0.12673094868659973, "learning_rate": 0.00019999787981579036, "loss": 11.7306, "step": 327 }, { "epoch": 0.006865946579586369, "grad_norm": 0.13206607103347778, "learning_rate": 0.00019999786551447058, "loss": 11.7229, "step": 328 }, { "epoch": 0.006886879343548522, "grad_norm": 0.1523435413837433, "learning_rate": 0.00019999785116507984, "loss": 11.7182, "step": 329 }, { "epoch": 0.006907812107510676, "grad_norm": 0.1070917546749115, "learning_rate": 0.00019999783676761808, "loss": 11.7257, "step": 330 }, { "epoch": 0.006928744871472829, "grad_norm": 0.105750672519207, "learning_rate": 0.0001999978223220854, "loss": 11.7209, "step": 331 }, { "epoch": 0.006949677635434983, "grad_norm": 0.13904407620429993, "learning_rate": 0.00019999780782848175, "loss": 11.7256, "step": 332 }, { "epoch": 0.006970610399397136, "grad_norm": 0.12350620329380035, "learning_rate": 0.0001999977932868071, "loss": 11.723, "step": 333 }, { "epoch": 0.00699154316335929, "grad_norm": 0.15382295846939087, "learning_rate": 0.00019999777869706157, "loss": 11.7264, "step": 334 }, { "epoch": 0.007012475927321443, "grad_norm": 0.14271296560764313, "learning_rate": 0.0001999977640592451, "loss": 11.7123, "step": 335 }, { "epoch": 0.007033408691283597, "grad_norm": 0.14750760793685913, "learning_rate": 0.00019999774937335768, "loss": 11.7336, "step": 336 }, { "epoch": 0.00705434145524575, "grad_norm": 0.11037807166576385, "learning_rate": 0.00019999773463939935, "loss": 11.72, "step": 337 }, { "epoch": 0.007075274219207904, "grad_norm": 0.1511078029870987, "learning_rate": 0.0001999977198573701, "loss": 11.7234, "step": 338 }, { "epoch": 0.007096206983170057, "grad_norm": 0.1403762698173523, "learning_rate": 0.00019999770502726993, "loss": 11.7175, "step": 339 }, { "epoch": 0.007117139747132212, "grad_norm": 0.10984516888856888, "learning_rate": 0.00019999769014909888, "loss": 11.7112, "step": 340 }, { "epoch": 0.007138072511094365, "grad_norm": 0.13330620527267456, "learning_rate": 0.00019999767522285692, "loss": 11.7282, "step": 341 }, { "epoch": 0.007159005275056519, "grad_norm": 0.15846803784370422, "learning_rate": 0.00019999766024854406, "loss": 11.7068, "step": 342 }, { "epoch": 0.007179938039018672, "grad_norm": 0.11744634062051773, "learning_rate": 0.00019999764522616036, "loss": 11.719, "step": 343 }, { "epoch": 0.007200870802980826, "grad_norm": 0.13380278646945953, "learning_rate": 0.00019999763015570576, "loss": 11.7118, "step": 344 }, { "epoch": 0.007221803566942979, "grad_norm": 0.10805917531251907, "learning_rate": 0.0001999976150371803, "loss": 11.7212, "step": 345 }, { "epoch": 0.007242736330905133, "grad_norm": 0.15449413657188416, "learning_rate": 0.000199997599870584, "loss": 11.7328, "step": 346 }, { "epoch": 0.007263669094867286, "grad_norm": 0.11984492093324661, "learning_rate": 0.00019999758465591682, "loss": 11.7197, "step": 347 }, { "epoch": 0.00728460185882944, "grad_norm": 0.10446450859308243, "learning_rate": 0.0001999975693931788, "loss": 11.7235, "step": 348 }, { "epoch": 0.007305534622791593, "grad_norm": 0.14321835339069366, "learning_rate": 0.00019999755408236998, "loss": 11.7162, "step": 349 }, { "epoch": 0.007326467386753747, "grad_norm": 0.11513587087392807, "learning_rate": 0.00019999753872349029, "loss": 11.7311, "step": 350 }, { "epoch": 0.0073474001507159, "grad_norm": 0.17049960792064667, "learning_rate": 0.00019999752331653983, "loss": 11.7166, "step": 351 }, { "epoch": 0.007368332914678054, "grad_norm": 0.14883778989315033, "learning_rate": 0.0001999975078615185, "loss": 11.7117, "step": 352 }, { "epoch": 0.007389265678640207, "grad_norm": 0.127522274851799, "learning_rate": 0.0001999974923584264, "loss": 11.719, "step": 353 }, { "epoch": 0.007410198442602361, "grad_norm": 0.13675598800182343, "learning_rate": 0.00019999747680726347, "loss": 11.7201, "step": 354 }, { "epoch": 0.007431131206564515, "grad_norm": 0.11587342619895935, "learning_rate": 0.0001999974612080298, "loss": 11.7178, "step": 355 }, { "epoch": 0.007452063970526669, "grad_norm": 0.11556491255760193, "learning_rate": 0.0001999974455607253, "loss": 11.7242, "step": 356 }, { "epoch": 0.007472996734488822, "grad_norm": 0.15339012444019318, "learning_rate": 0.00019999742986535007, "loss": 11.7306, "step": 357 }, { "epoch": 0.007493929498450976, "grad_norm": 0.10457748174667358, "learning_rate": 0.000199997414121904, "loss": 11.7126, "step": 358 }, { "epoch": 0.007514862262413129, "grad_norm": 0.11597443372011185, "learning_rate": 0.00019999739833038725, "loss": 11.7167, "step": 359 }, { "epoch": 0.007535795026375283, "grad_norm": 0.14202773571014404, "learning_rate": 0.00019999738249079968, "loss": 11.7314, "step": 360 }, { "epoch": 0.007556727790337436, "grad_norm": 0.1350574940443039, "learning_rate": 0.0001999973666031414, "loss": 11.7167, "step": 361 }, { "epoch": 0.00757766055429959, "grad_norm": 0.11307045072317123, "learning_rate": 0.00019999735066741236, "loss": 11.7298, "step": 362 }, { "epoch": 0.007598593318261743, "grad_norm": 0.12380598485469818, "learning_rate": 0.0001999973346836126, "loss": 11.7228, "step": 363 }, { "epoch": 0.007619526082223897, "grad_norm": 0.17077882587909698, "learning_rate": 0.00019999731865174213, "loss": 11.7148, "step": 364 }, { "epoch": 0.00764045884618605, "grad_norm": 0.2453930824995041, "learning_rate": 0.00019999730257180094, "loss": 11.7197, "step": 365 }, { "epoch": 0.007661391610148204, "grad_norm": 0.12945899367332458, "learning_rate": 0.00019999728644378903, "loss": 11.7131, "step": 366 }, { "epoch": 0.007682324374110357, "grad_norm": 0.14711779356002808, "learning_rate": 0.00019999727026770642, "loss": 11.7335, "step": 367 }, { "epoch": 0.007703257138072511, "grad_norm": 0.12876446545124054, "learning_rate": 0.00019999725404355314, "loss": 11.7186, "step": 368 }, { "epoch": 0.007724189902034665, "grad_norm": 0.17758320271968842, "learning_rate": 0.00019999723777132915, "loss": 11.7187, "step": 369 }, { "epoch": 0.007745122665996819, "grad_norm": 0.13317854702472687, "learning_rate": 0.00019999722145103453, "loss": 11.7246, "step": 370 }, { "epoch": 0.007766055429958972, "grad_norm": 0.16571950912475586, "learning_rate": 0.00019999720508266918, "loss": 11.7109, "step": 371 }, { "epoch": 0.007786988193921126, "grad_norm": 0.14521849155426025, "learning_rate": 0.0001999971886662332, "loss": 11.7147, "step": 372 }, { "epoch": 0.007807920957883279, "grad_norm": 0.1862119734287262, "learning_rate": 0.00019999717220172655, "loss": 11.7249, "step": 373 }, { "epoch": 0.007828853721845433, "grad_norm": 0.1525663584470749, "learning_rate": 0.0001999971556891493, "loss": 11.7247, "step": 374 }, { "epoch": 0.007849786485807586, "grad_norm": 0.14184141159057617, "learning_rate": 0.00019999713912850138, "loss": 11.7286, "step": 375 }, { "epoch": 0.00787071924976974, "grad_norm": 0.13390614092350006, "learning_rate": 0.00019999712251978283, "loss": 11.7231, "step": 376 }, { "epoch": 0.007891652013731893, "grad_norm": 0.13468481600284576, "learning_rate": 0.00019999710586299365, "loss": 11.7105, "step": 377 }, { "epoch": 0.007912584777694047, "grad_norm": 0.1435643583536148, "learning_rate": 0.00019999708915813388, "loss": 11.7244, "step": 378 }, { "epoch": 0.0079335175416562, "grad_norm": 0.1099109873175621, "learning_rate": 0.0001999970724052035, "loss": 11.7024, "step": 379 }, { "epoch": 0.007954450305618354, "grad_norm": 0.11935173720121384, "learning_rate": 0.00019999705560420252, "loss": 11.7276, "step": 380 }, { "epoch": 0.007975383069580507, "grad_norm": 0.14090566337108612, "learning_rate": 0.00019999703875513096, "loss": 11.7172, "step": 381 }, { "epoch": 0.00799631583354266, "grad_norm": 0.13572712242603302, "learning_rate": 0.0001999970218579888, "loss": 11.7185, "step": 382 }, { "epoch": 0.008017248597504814, "grad_norm": 0.12949134409427643, "learning_rate": 0.0001999970049127761, "loss": 11.7277, "step": 383 }, { "epoch": 0.008038181361466968, "grad_norm": 0.12604030966758728, "learning_rate": 0.00019999698791949277, "loss": 11.7125, "step": 384 }, { "epoch": 0.008059114125429121, "grad_norm": 0.11403421312570572, "learning_rate": 0.00019999697087813893, "loss": 11.7126, "step": 385 }, { "epoch": 0.008080046889391275, "grad_norm": 0.1483432501554489, "learning_rate": 0.00019999695378871457, "loss": 11.7214, "step": 386 }, { "epoch": 0.008100979653353428, "grad_norm": 0.11562389135360718, "learning_rate": 0.00019999693665121964, "loss": 11.7075, "step": 387 }, { "epoch": 0.008121912417315582, "grad_norm": 0.1695457547903061, "learning_rate": 0.00019999691946565417, "loss": 11.7138, "step": 388 }, { "epoch": 0.008142845181277735, "grad_norm": 0.12327993661165237, "learning_rate": 0.00019999690223201818, "loss": 11.7207, "step": 389 }, { "epoch": 0.008163777945239889, "grad_norm": 0.16508711874485016, "learning_rate": 0.00019999688495031168, "loss": 11.7111, "step": 390 }, { "epoch": 0.008184710709202044, "grad_norm": 0.1749548614025116, "learning_rate": 0.0001999968676205347, "loss": 11.7243, "step": 391 }, { "epoch": 0.008205643473164197, "grad_norm": 0.13734187185764313, "learning_rate": 0.00019999685024268718, "loss": 11.717, "step": 392 }, { "epoch": 0.008226576237126351, "grad_norm": 0.18016208708286285, "learning_rate": 0.0001999968328167692, "loss": 11.7233, "step": 393 }, { "epoch": 0.008247509001088504, "grad_norm": 0.1413753628730774, "learning_rate": 0.0001999968153427807, "loss": 11.712, "step": 394 }, { "epoch": 0.008268441765050658, "grad_norm": 0.14903607964515686, "learning_rate": 0.00019999679782072176, "loss": 11.7209, "step": 395 }, { "epoch": 0.008289374529012811, "grad_norm": 0.17239217460155487, "learning_rate": 0.00019999678025059235, "loss": 11.7095, "step": 396 }, { "epoch": 0.008310307292974965, "grad_norm": 0.11457118391990662, "learning_rate": 0.00019999676263239246, "loss": 11.7145, "step": 397 }, { "epoch": 0.008331240056937118, "grad_norm": 0.16116507351398468, "learning_rate": 0.00019999674496612216, "loss": 11.7246, "step": 398 }, { "epoch": 0.008352172820899272, "grad_norm": 0.14528557658195496, "learning_rate": 0.0001999967272517814, "loss": 11.7048, "step": 399 }, { "epoch": 0.008373105584861425, "grad_norm": 0.1997375339269638, "learning_rate": 0.00019999670948937022, "loss": 11.7085, "step": 400 }, { "epoch": 0.008394038348823579, "grad_norm": 0.13732677698135376, "learning_rate": 0.0001999966916788886, "loss": 11.7183, "step": 401 }, { "epoch": 0.008414971112785732, "grad_norm": 0.14167188107967377, "learning_rate": 0.00019999667382033658, "loss": 11.7109, "step": 402 }, { "epoch": 0.008435903876747886, "grad_norm": 0.13820306956768036, "learning_rate": 0.00019999665591371416, "loss": 11.7224, "step": 403 }, { "epoch": 0.00845683664071004, "grad_norm": 0.16800788044929504, "learning_rate": 0.00019999663795902133, "loss": 11.7209, "step": 404 }, { "epoch": 0.008477769404672193, "grad_norm": 0.1236567348241806, "learning_rate": 0.00019999661995625812, "loss": 11.7175, "step": 405 }, { "epoch": 0.008498702168634346, "grad_norm": 0.13569577038288116, "learning_rate": 0.00019999660190542454, "loss": 11.7152, "step": 406 }, { "epoch": 0.0085196349325965, "grad_norm": 0.12360803037881851, "learning_rate": 0.00019999658380652057, "loss": 11.7083, "step": 407 }, { "epoch": 0.008540567696558653, "grad_norm": 0.12019111216068268, "learning_rate": 0.00019999656565954625, "loss": 11.7183, "step": 408 }, { "epoch": 0.008561500460520807, "grad_norm": 0.13323119282722473, "learning_rate": 0.00019999654746450155, "loss": 11.7162, "step": 409 }, { "epoch": 0.00858243322448296, "grad_norm": 0.11168857663869858, "learning_rate": 0.00019999652922138655, "loss": 11.7146, "step": 410 }, { "epoch": 0.008603365988445114, "grad_norm": 0.1847880631685257, "learning_rate": 0.0001999965109302012, "loss": 11.6885, "step": 411 }, { "epoch": 0.008624298752407267, "grad_norm": 0.13330583274364471, "learning_rate": 0.00019999649259094552, "loss": 11.715, "step": 412 }, { "epoch": 0.008645231516369421, "grad_norm": 0.1324671357870102, "learning_rate": 0.00019999647420361951, "loss": 11.7235, "step": 413 }, { "epoch": 0.008666164280331574, "grad_norm": 0.11592907458543777, "learning_rate": 0.00019999645576822318, "loss": 11.7012, "step": 414 }, { "epoch": 0.008687097044293728, "grad_norm": 0.12120091170072556, "learning_rate": 0.00019999643728475656, "loss": 11.7108, "step": 415 }, { "epoch": 0.008708029808255881, "grad_norm": 0.1283930242061615, "learning_rate": 0.00019999641875321968, "loss": 11.7083, "step": 416 }, { "epoch": 0.008728962572218035, "grad_norm": 0.16387324035167694, "learning_rate": 0.0001999964001736125, "loss": 11.6996, "step": 417 }, { "epoch": 0.008749895336180188, "grad_norm": 0.10809442400932312, "learning_rate": 0.00019999638154593507, "loss": 11.7076, "step": 418 }, { "epoch": 0.008770828100142342, "grad_norm": 0.1386757642030716, "learning_rate": 0.0001999963628701873, "loss": 11.7063, "step": 419 }, { "epoch": 0.008791760864104497, "grad_norm": 0.12644200026988983, "learning_rate": 0.00019999634414636933, "loss": 11.7123, "step": 420 }, { "epoch": 0.00881269362806665, "grad_norm": 0.14850223064422607, "learning_rate": 0.00019999632537448113, "loss": 11.7186, "step": 421 }, { "epoch": 0.008833626392028804, "grad_norm": 0.14020271599292755, "learning_rate": 0.00019999630655452263, "loss": 11.7074, "step": 422 }, { "epoch": 0.008854559155990958, "grad_norm": 0.13681097328662872, "learning_rate": 0.00019999628768649393, "loss": 11.7221, "step": 423 }, { "epoch": 0.008875491919953111, "grad_norm": 0.13779480755329132, "learning_rate": 0.00019999626877039505, "loss": 11.7138, "step": 424 }, { "epoch": 0.008896424683915265, "grad_norm": 0.19677479565143585, "learning_rate": 0.00019999624980622594, "loss": 11.7229, "step": 425 }, { "epoch": 0.008917357447877418, "grad_norm": 0.16233675181865692, "learning_rate": 0.00019999623079398662, "loss": 11.7216, "step": 426 }, { "epoch": 0.008938290211839572, "grad_norm": 0.16977085173130035, "learning_rate": 0.00019999621173367711, "loss": 11.7065, "step": 427 }, { "epoch": 0.008959222975801725, "grad_norm": 0.13839226961135864, "learning_rate": 0.0001999961926252974, "loss": 11.7159, "step": 428 }, { "epoch": 0.008980155739763879, "grad_norm": 0.11970621347427368, "learning_rate": 0.00019999617346884752, "loss": 11.7117, "step": 429 }, { "epoch": 0.009001088503726032, "grad_norm": 0.1311412751674652, "learning_rate": 0.00019999615426432747, "loss": 11.7133, "step": 430 }, { "epoch": 0.009022021267688186, "grad_norm": 0.15262964367866516, "learning_rate": 0.00019999613501173728, "loss": 11.7126, "step": 431 }, { "epoch": 0.00904295403165034, "grad_norm": 0.165923610329628, "learning_rate": 0.00019999611571107698, "loss": 11.7033, "step": 432 }, { "epoch": 0.009063886795612493, "grad_norm": 0.7762564420700073, "learning_rate": 0.0001999960963623465, "loss": 11.7721, "step": 433 }, { "epoch": 0.009084819559574646, "grad_norm": 0.17553529143333435, "learning_rate": 0.00019999607696554586, "loss": 11.7117, "step": 434 }, { "epoch": 0.0091057523235368, "grad_norm": 0.1588762104511261, "learning_rate": 0.00019999605752067518, "loss": 11.7224, "step": 435 }, { "epoch": 0.009126685087498953, "grad_norm": 0.13175202906131744, "learning_rate": 0.00019999603802773434, "loss": 11.721, "step": 436 }, { "epoch": 0.009147617851461107, "grad_norm": 0.1516256332397461, "learning_rate": 0.00019999601848672344, "loss": 11.7154, "step": 437 }, { "epoch": 0.00916855061542326, "grad_norm": 0.23667733371257782, "learning_rate": 0.0001999959988976424, "loss": 11.7034, "step": 438 }, { "epoch": 0.009189483379385414, "grad_norm": 0.1424240916967392, "learning_rate": 0.00019999597926049128, "loss": 11.6958, "step": 439 }, { "epoch": 0.009210416143347567, "grad_norm": 1.0300225019454956, "learning_rate": 0.00019999595957527014, "loss": 11.7071, "step": 440 }, { "epoch": 0.00923134890730972, "grad_norm": 0.1564256101846695, "learning_rate": 0.00019999593984197889, "loss": 11.707, "step": 441 }, { "epoch": 0.009252281671271874, "grad_norm": 0.15526652336120605, "learning_rate": 0.00019999592006061763, "loss": 11.7134, "step": 442 }, { "epoch": 0.009273214435234028, "grad_norm": 0.13327212631702423, "learning_rate": 0.0001999959002311863, "loss": 11.7178, "step": 443 }, { "epoch": 0.009294147199196181, "grad_norm": 0.14373227953910828, "learning_rate": 0.00019999588035368493, "loss": 11.7092, "step": 444 }, { "epoch": 0.009315079963158335, "grad_norm": 0.16695018112659454, "learning_rate": 0.00019999586042811354, "loss": 11.7188, "step": 445 }, { "epoch": 0.009336012727120488, "grad_norm": 0.16400621831417084, "learning_rate": 0.00019999584045447214, "loss": 11.7014, "step": 446 }, { "epoch": 0.009356945491082642, "grad_norm": 0.1664522886276245, "learning_rate": 0.00019999582043276075, "loss": 11.7183, "step": 447 }, { "epoch": 0.009377878255044795, "grad_norm": 0.14348964393138885, "learning_rate": 0.00019999580036297935, "loss": 11.7079, "step": 448 }, { "epoch": 0.00939881101900695, "grad_norm": 0.1544647365808487, "learning_rate": 0.00019999578024512796, "loss": 11.7157, "step": 449 }, { "epoch": 0.009419743782969104, "grad_norm": 0.18010537326335907, "learning_rate": 0.00019999576007920662, "loss": 11.7228, "step": 450 }, { "epoch": 0.009440676546931258, "grad_norm": 0.10718479752540588, "learning_rate": 0.0001999957398652153, "loss": 11.7104, "step": 451 }, { "epoch": 0.009461609310893411, "grad_norm": 0.12379198521375656, "learning_rate": 0.00019999571960315401, "loss": 11.7105, "step": 452 }, { "epoch": 0.009482542074855565, "grad_norm": 0.12455327808856964, "learning_rate": 0.0001999956992930228, "loss": 11.725, "step": 453 }, { "epoch": 0.009503474838817718, "grad_norm": 0.13162051141262054, "learning_rate": 0.00019999567893482162, "loss": 11.7122, "step": 454 }, { "epoch": 0.009524407602779872, "grad_norm": 0.22567923367023468, "learning_rate": 0.00019999565852855055, "loss": 11.72, "step": 455 }, { "epoch": 0.009545340366742025, "grad_norm": 0.1267073154449463, "learning_rate": 0.00019999563807420953, "loss": 11.7123, "step": 456 }, { "epoch": 0.009566273130704179, "grad_norm": 0.1284102201461792, "learning_rate": 0.00019999561757179864, "loss": 11.7051, "step": 457 }, { "epoch": 0.009587205894666332, "grad_norm": 0.16903997957706451, "learning_rate": 0.00019999559702131784, "loss": 11.7093, "step": 458 }, { "epoch": 0.009608138658628486, "grad_norm": 0.16179943084716797, "learning_rate": 0.00019999557642276714, "loss": 11.7136, "step": 459 }, { "epoch": 0.009629071422590639, "grad_norm": 0.14929792284965515, "learning_rate": 0.00019999555577614655, "loss": 11.7177, "step": 460 }, { "epoch": 0.009650004186552793, "grad_norm": 0.16276401281356812, "learning_rate": 0.0001999955350814561, "loss": 11.7238, "step": 461 }, { "epoch": 0.009670936950514946, "grad_norm": 0.12645259499549866, "learning_rate": 0.0001999955143386958, "loss": 11.7007, "step": 462 }, { "epoch": 0.0096918697144771, "grad_norm": 0.16855791211128235, "learning_rate": 0.00019999549354786567, "loss": 11.7151, "step": 463 }, { "epoch": 0.009712802478439253, "grad_norm": 0.15756118297576904, "learning_rate": 0.00019999547270896568, "loss": 11.7248, "step": 464 }, { "epoch": 0.009733735242401407, "grad_norm": 0.1333039551973343, "learning_rate": 0.00019999545182199587, "loss": 11.7161, "step": 465 }, { "epoch": 0.00975466800636356, "grad_norm": 0.6762552261352539, "learning_rate": 0.00019999543088695623, "loss": 11.7227, "step": 466 }, { "epoch": 0.009775600770325714, "grad_norm": 0.13654425740242004, "learning_rate": 0.0001999954099038468, "loss": 11.7147, "step": 467 }, { "epoch": 0.009796533534287867, "grad_norm": 0.11954280734062195, "learning_rate": 0.00019999538887266756, "loss": 11.6952, "step": 468 }, { "epoch": 0.00981746629825002, "grad_norm": 0.1410980522632599, "learning_rate": 0.00019999536779341854, "loss": 11.7148, "step": 469 }, { "epoch": 0.009838399062212174, "grad_norm": 0.1474582552909851, "learning_rate": 0.00019999534666609973, "loss": 11.7173, "step": 470 }, { "epoch": 0.009859331826174328, "grad_norm": 0.18310676515102386, "learning_rate": 0.00019999532549071116, "loss": 11.7106, "step": 471 }, { "epoch": 0.009880264590136481, "grad_norm": 0.1618664264678955, "learning_rate": 0.00019999530426725284, "loss": 11.7031, "step": 472 }, { "epoch": 0.009901197354098635, "grad_norm": 0.1323370337486267, "learning_rate": 0.0001999952829957248, "loss": 11.7122, "step": 473 }, { "epoch": 0.009922130118060788, "grad_norm": 0.16826941072940826, "learning_rate": 0.00019999526167612696, "loss": 11.7168, "step": 474 }, { "epoch": 0.009943062882022942, "grad_norm": 0.13184814155101776, "learning_rate": 0.00019999524030845942, "loss": 11.7105, "step": 475 }, { "epoch": 0.009963995645985095, "grad_norm": 0.1440298855304718, "learning_rate": 0.00019999521889272218, "loss": 11.7058, "step": 476 }, { "epoch": 0.009984928409947249, "grad_norm": 0.13943423330783844, "learning_rate": 0.00019999519742891524, "loss": 11.713, "step": 477 }, { "epoch": 0.010005861173909404, "grad_norm": 0.12997035682201385, "learning_rate": 0.00019999517591703857, "loss": 11.7018, "step": 478 }, { "epoch": 0.010026793937871557, "grad_norm": 0.11289665102958679, "learning_rate": 0.00019999515435709225, "loss": 11.7107, "step": 479 }, { "epoch": 0.01004772670183371, "grad_norm": 0.17949357628822327, "learning_rate": 0.00019999513274907623, "loss": 11.7061, "step": 480 }, { "epoch": 0.010068659465795864, "grad_norm": 0.19587932527065277, "learning_rate": 0.00019999511109299056, "loss": 11.7125, "step": 481 }, { "epoch": 0.010089592229758018, "grad_norm": 0.16009117662906647, "learning_rate": 0.00019999508938883524, "loss": 11.7138, "step": 482 }, { "epoch": 0.010110524993720171, "grad_norm": 0.16868789494037628, "learning_rate": 0.00019999506763661025, "loss": 11.7285, "step": 483 }, { "epoch": 0.010131457757682325, "grad_norm": 0.1484278440475464, "learning_rate": 0.00019999504583631566, "loss": 11.7041, "step": 484 }, { "epoch": 0.010152390521644478, "grad_norm": 0.13887837529182434, "learning_rate": 0.00019999502398795142, "loss": 11.7089, "step": 485 }, { "epoch": 0.010173323285606632, "grad_norm": 0.13413088023662567, "learning_rate": 0.0001999950020915176, "loss": 11.7167, "step": 486 }, { "epoch": 0.010194256049568785, "grad_norm": 0.14391814172267914, "learning_rate": 0.00019999498014701414, "loss": 11.6933, "step": 487 }, { "epoch": 0.010215188813530939, "grad_norm": 0.14174379408359528, "learning_rate": 0.00019999495815444112, "loss": 11.7111, "step": 488 }, { "epoch": 0.010236121577493092, "grad_norm": 0.14895471930503845, "learning_rate": 0.00019999493611379848, "loss": 11.7134, "step": 489 }, { "epoch": 0.010257054341455246, "grad_norm": 0.15512877702713013, "learning_rate": 0.00019999491402508633, "loss": 11.6988, "step": 490 }, { "epoch": 0.0102779871054174, "grad_norm": 0.20825432240962982, "learning_rate": 0.0001999948918883046, "loss": 11.7044, "step": 491 }, { "epoch": 0.010298919869379553, "grad_norm": 0.15204904973506927, "learning_rate": 0.00019999486970345332, "loss": 11.7006, "step": 492 }, { "epoch": 0.010319852633341706, "grad_norm": 0.23436708748340607, "learning_rate": 0.00019999484747053247, "loss": 11.7381, "step": 493 }, { "epoch": 0.01034078539730386, "grad_norm": 0.14375825226306915, "learning_rate": 0.00019999482518954213, "loss": 11.7127, "step": 494 }, { "epoch": 0.010361718161266013, "grad_norm": 0.20636612176895142, "learning_rate": 0.00019999480286048225, "loss": 11.7178, "step": 495 }, { "epoch": 0.010382650925228167, "grad_norm": 0.1531464159488678, "learning_rate": 0.00019999478048335288, "loss": 11.7203, "step": 496 }, { "epoch": 0.01040358368919032, "grad_norm": 0.1956414133310318, "learning_rate": 0.000199994758058154, "loss": 11.708, "step": 497 }, { "epoch": 0.010424516453152474, "grad_norm": 0.1292150467634201, "learning_rate": 0.00019999473558488564, "loss": 11.7033, "step": 498 }, { "epoch": 0.010445449217114627, "grad_norm": 0.15768878161907196, "learning_rate": 0.00019999471306354782, "loss": 11.7169, "step": 499 }, { "epoch": 0.010466381981076781, "grad_norm": 0.1590561866760254, "learning_rate": 0.0001999946904941405, "loss": 11.725, "step": 500 }, { "epoch": 0.010487314745038934, "grad_norm": 0.1294384002685547, "learning_rate": 0.0001999946678766638, "loss": 11.702, "step": 501 }, { "epoch": 0.010508247509001088, "grad_norm": 0.1838153600692749, "learning_rate": 0.0001999946452111176, "loss": 11.7074, "step": 502 }, { "epoch": 0.010529180272963241, "grad_norm": 0.16923578083515167, "learning_rate": 0.000199994622497502, "loss": 11.7165, "step": 503 }, { "epoch": 0.010550113036925395, "grad_norm": 0.1314014047384262, "learning_rate": 0.00019999459973581697, "loss": 11.7063, "step": 504 }, { "epoch": 0.010571045800887548, "grad_norm": 0.15853461623191833, "learning_rate": 0.0001999945769260625, "loss": 11.7158, "step": 505 }, { "epoch": 0.010591978564849702, "grad_norm": 0.1837603598833084, "learning_rate": 0.0001999945540682387, "loss": 11.7031, "step": 506 }, { "epoch": 0.010612911328811857, "grad_norm": 0.1396726816892624, "learning_rate": 0.00019999453116234547, "loss": 11.7003, "step": 507 }, { "epoch": 0.01063384409277401, "grad_norm": 0.1476178616285324, "learning_rate": 0.00019999450820838287, "loss": 11.7128, "step": 508 }, { "epoch": 0.010654776856736164, "grad_norm": 0.15123625099658966, "learning_rate": 0.0001999944852063509, "loss": 11.7067, "step": 509 }, { "epoch": 0.010675709620698318, "grad_norm": 0.12716086208820343, "learning_rate": 0.0001999944621562496, "loss": 11.7072, "step": 510 }, { "epoch": 0.010696642384660471, "grad_norm": 0.1660926192998886, "learning_rate": 0.00019999443905807893, "loss": 11.7118, "step": 511 }, { "epoch": 0.010717575148622625, "grad_norm": 0.19784855842590332, "learning_rate": 0.00019999441591183893, "loss": 11.7134, "step": 512 }, { "epoch": 0.010738507912584778, "grad_norm": 0.16173794865608215, "learning_rate": 0.00019999439271752963, "loss": 11.7109, "step": 513 }, { "epoch": 0.010759440676546932, "grad_norm": 0.1459168940782547, "learning_rate": 0.00019999436947515102, "loss": 11.7109, "step": 514 }, { "epoch": 0.010780373440509085, "grad_norm": 0.1459716111421585, "learning_rate": 0.0001999943461847031, "loss": 11.7154, "step": 515 }, { "epoch": 0.010801306204471239, "grad_norm": 0.18990248441696167, "learning_rate": 0.0001999943228461859, "loss": 11.7182, "step": 516 }, { "epoch": 0.010822238968433392, "grad_norm": 0.14020995795726776, "learning_rate": 0.00019999429945959945, "loss": 11.7005, "step": 517 }, { "epoch": 0.010843171732395546, "grad_norm": 0.22783033549785614, "learning_rate": 0.0001999942760249437, "loss": 11.7098, "step": 518 }, { "epoch": 0.0108641044963577, "grad_norm": 0.16499322652816772, "learning_rate": 0.00019999425254221871, "loss": 11.7024, "step": 519 }, { "epoch": 0.010885037260319853, "grad_norm": 0.16663461923599243, "learning_rate": 0.00019999422901142446, "loss": 11.7066, "step": 520 }, { "epoch": 0.010905970024282006, "grad_norm": 0.15631665289402008, "learning_rate": 0.00019999420543256101, "loss": 11.7091, "step": 521 }, { "epoch": 0.01092690278824416, "grad_norm": 0.15544551610946655, "learning_rate": 0.00019999418180562836, "loss": 11.7231, "step": 522 }, { "epoch": 0.010947835552206313, "grad_norm": 0.19466981291770935, "learning_rate": 0.00019999415813062646, "loss": 11.7119, "step": 523 }, { "epoch": 0.010968768316168467, "grad_norm": 0.14686711132526398, "learning_rate": 0.00019999413440755537, "loss": 11.7145, "step": 524 }, { "epoch": 0.01098970108013062, "grad_norm": 0.17283287644386292, "learning_rate": 0.00019999411063641512, "loss": 11.7292, "step": 525 }, { "epoch": 0.011010633844092774, "grad_norm": 0.12148035317659378, "learning_rate": 0.00019999408681720566, "loss": 11.7113, "step": 526 }, { "epoch": 0.011031566608054927, "grad_norm": 0.14297200739383698, "learning_rate": 0.00019999406294992706, "loss": 11.7081, "step": 527 }, { "epoch": 0.01105249937201708, "grad_norm": 0.19606558978557587, "learning_rate": 0.0001999940390345793, "loss": 11.709, "step": 528 }, { "epoch": 0.011073432135979234, "grad_norm": 0.15110373497009277, "learning_rate": 0.0001999940150711624, "loss": 11.7055, "step": 529 }, { "epoch": 0.011094364899941388, "grad_norm": 0.14474008977413177, "learning_rate": 0.00019999399105967642, "loss": 11.709, "step": 530 }, { "epoch": 0.011115297663903541, "grad_norm": 0.12483978271484375, "learning_rate": 0.00019999396700012126, "loss": 11.7049, "step": 531 }, { "epoch": 0.011136230427865695, "grad_norm": 0.1883251965045929, "learning_rate": 0.00019999394289249705, "loss": 11.7114, "step": 532 }, { "epoch": 0.011157163191827848, "grad_norm": 0.13288095593452454, "learning_rate": 0.0001999939187368037, "loss": 11.7094, "step": 533 }, { "epoch": 0.011178095955790002, "grad_norm": 0.12556421756744385, "learning_rate": 0.0001999938945330413, "loss": 11.7085, "step": 534 }, { "epoch": 0.011199028719752157, "grad_norm": 0.14493146538734436, "learning_rate": 0.00019999387028120982, "loss": 11.7118, "step": 535 }, { "epoch": 0.01121996148371431, "grad_norm": 0.19461014866828918, "learning_rate": 0.0001999938459813093, "loss": 11.7091, "step": 536 }, { "epoch": 0.011240894247676464, "grad_norm": 0.15682515501976013, "learning_rate": 0.00019999382163333971, "loss": 11.71, "step": 537 }, { "epoch": 0.011261827011638617, "grad_norm": 0.11528632044792175, "learning_rate": 0.00019999379723730109, "loss": 11.7165, "step": 538 }, { "epoch": 0.011282759775600771, "grad_norm": 0.1838659793138504, "learning_rate": 0.00019999377279319346, "loss": 11.7065, "step": 539 }, { "epoch": 0.011303692539562924, "grad_norm": 0.14353227615356445, "learning_rate": 0.0001999937483010168, "loss": 11.6985, "step": 540 }, { "epoch": 0.011324625303525078, "grad_norm": 0.14260989427566528, "learning_rate": 0.00019999372376077116, "loss": 11.7082, "step": 541 }, { "epoch": 0.011345558067487231, "grad_norm": 0.15231996774673462, "learning_rate": 0.00019999369917245654, "loss": 11.698, "step": 542 }, { "epoch": 0.011366490831449385, "grad_norm": 0.13365525007247925, "learning_rate": 0.00019999367453607292, "loss": 11.7068, "step": 543 }, { "epoch": 0.011387423595411538, "grad_norm": 0.1633104383945465, "learning_rate": 0.00019999364985162036, "loss": 11.7033, "step": 544 }, { "epoch": 0.011408356359373692, "grad_norm": 0.15705107152462006, "learning_rate": 0.00019999362511909885, "loss": 11.7112, "step": 545 }, { "epoch": 0.011429289123335845, "grad_norm": 0.17499345541000366, "learning_rate": 0.00019999360033850838, "loss": 11.7101, "step": 546 }, { "epoch": 0.011450221887297999, "grad_norm": 0.19347117841243744, "learning_rate": 0.00019999357550984899, "loss": 11.7157, "step": 547 }, { "epoch": 0.011471154651260152, "grad_norm": 0.1695474088191986, "learning_rate": 0.00019999355063312065, "loss": 11.708, "step": 548 }, { "epoch": 0.011492087415222306, "grad_norm": 0.16803288459777832, "learning_rate": 0.00019999352570832345, "loss": 11.7084, "step": 549 }, { "epoch": 0.01151302017918446, "grad_norm": 0.1354203224182129, "learning_rate": 0.00019999350073545736, "loss": 11.7213, "step": 550 }, { "epoch": 0.011533952943146613, "grad_norm": 0.17980393767356873, "learning_rate": 0.00019999347571452242, "loss": 11.7109, "step": 551 }, { "epoch": 0.011554885707108766, "grad_norm": 0.13012365996837616, "learning_rate": 0.00019999345064551855, "loss": 11.6979, "step": 552 }, { "epoch": 0.01157581847107092, "grad_norm": 0.17307870090007782, "learning_rate": 0.00019999342552844582, "loss": 11.704, "step": 553 }, { "epoch": 0.011596751235033073, "grad_norm": 0.1387382596731186, "learning_rate": 0.00019999340036330428, "loss": 11.7018, "step": 554 }, { "epoch": 0.011617683998995227, "grad_norm": 0.18332621455192566, "learning_rate": 0.0001999933751500939, "loss": 11.7147, "step": 555 }, { "epoch": 0.01163861676295738, "grad_norm": 0.17456652224063873, "learning_rate": 0.00019999334988881473, "loss": 11.7172, "step": 556 }, { "epoch": 0.011659549526919534, "grad_norm": 0.17732249200344086, "learning_rate": 0.00019999332457946672, "loss": 11.7231, "step": 557 }, { "epoch": 0.011680482290881688, "grad_norm": 0.1371173858642578, "learning_rate": 0.0001999932992220499, "loss": 11.7184, "step": 558 }, { "epoch": 0.011701415054843841, "grad_norm": 0.16780251264572144, "learning_rate": 0.00019999327381656432, "loss": 11.7179, "step": 559 }, { "epoch": 0.011722347818805995, "grad_norm": 0.1294446438550949, "learning_rate": 0.00019999324836301, "loss": 11.7101, "step": 560 }, { "epoch": 0.011743280582768148, "grad_norm": 0.13770076632499695, "learning_rate": 0.00019999322286138688, "loss": 11.7041, "step": 561 }, { "epoch": 0.011764213346730302, "grad_norm": 0.15395502746105194, "learning_rate": 0.00019999319731169499, "loss": 11.7087, "step": 562 }, { "epoch": 0.011785146110692455, "grad_norm": 0.16340944170951843, "learning_rate": 0.00019999317171393442, "loss": 11.7054, "step": 563 }, { "epoch": 0.01180607887465461, "grad_norm": 0.13309942185878754, "learning_rate": 0.00019999314606810507, "loss": 11.704, "step": 564 }, { "epoch": 0.011827011638616764, "grad_norm": 0.19077539443969727, "learning_rate": 0.00019999312037420708, "loss": 11.7122, "step": 565 }, { "epoch": 0.011847944402578917, "grad_norm": 0.15048560500144958, "learning_rate": 0.00019999309463224037, "loss": 11.6964, "step": 566 }, { "epoch": 0.01186887716654107, "grad_norm": 0.14711757004261017, "learning_rate": 0.00019999306884220497, "loss": 11.7078, "step": 567 }, { "epoch": 0.011889809930503224, "grad_norm": 0.1322021335363388, "learning_rate": 0.00019999304300410086, "loss": 11.7062, "step": 568 }, { "epoch": 0.011910742694465378, "grad_norm": 0.11796886473894119, "learning_rate": 0.00019999301711792815, "loss": 11.7127, "step": 569 }, { "epoch": 0.011931675458427531, "grad_norm": 0.14588934183120728, "learning_rate": 0.00019999299118368677, "loss": 11.7066, "step": 570 }, { "epoch": 0.011952608222389685, "grad_norm": 0.17259015142917633, "learning_rate": 0.00019999296520137674, "loss": 11.7081, "step": 571 }, { "epoch": 0.011973540986351838, "grad_norm": 0.1595371812582016, "learning_rate": 0.0001999929391709981, "loss": 11.7165, "step": 572 }, { "epoch": 0.011994473750313992, "grad_norm": 0.16232545673847198, "learning_rate": 0.00019999291309255086, "loss": 11.6937, "step": 573 }, { "epoch": 0.012015406514276145, "grad_norm": 0.13484421372413635, "learning_rate": 0.000199992886966035, "loss": 11.7099, "step": 574 }, { "epoch": 0.012036339278238299, "grad_norm": 0.1561281532049179, "learning_rate": 0.00019999286079145056, "loss": 11.7113, "step": 575 }, { "epoch": 0.012057272042200452, "grad_norm": 0.15970993041992188, "learning_rate": 0.00019999283456879758, "loss": 11.7083, "step": 576 }, { "epoch": 0.012078204806162606, "grad_norm": 0.16457797586917877, "learning_rate": 0.000199992808298076, "loss": 11.7101, "step": 577 }, { "epoch": 0.01209913757012476, "grad_norm": 0.2137790024280548, "learning_rate": 0.0001999927819792859, "loss": 11.7026, "step": 578 }, { "epoch": 0.012120070334086913, "grad_norm": 0.13843238353729248, "learning_rate": 0.00019999275561242725, "loss": 11.7037, "step": 579 }, { "epoch": 0.012141003098049066, "grad_norm": 0.12962833046913147, "learning_rate": 0.00019999272919750006, "loss": 11.6976, "step": 580 }, { "epoch": 0.01216193586201122, "grad_norm": 0.1226603090763092, "learning_rate": 0.00019999270273450442, "loss": 11.7176, "step": 581 }, { "epoch": 0.012182868625973373, "grad_norm": 0.1637372076511383, "learning_rate": 0.00019999267622344025, "loss": 11.6988, "step": 582 }, { "epoch": 0.012203801389935527, "grad_norm": 0.25087255239486694, "learning_rate": 0.00019999264966430759, "loss": 11.7072, "step": 583 }, { "epoch": 0.01222473415389768, "grad_norm": 0.16174356639385223, "learning_rate": 0.00019999262305710647, "loss": 11.7241, "step": 584 }, { "epoch": 0.012245666917859834, "grad_norm": 0.27947014570236206, "learning_rate": 0.0001999925964018369, "loss": 11.73, "step": 585 }, { "epoch": 0.012266599681821987, "grad_norm": 0.13587793707847595, "learning_rate": 0.00019999256969849887, "loss": 11.7084, "step": 586 }, { "epoch": 0.01228753244578414, "grad_norm": 0.1856508105993271, "learning_rate": 0.0001999925429470924, "loss": 11.7101, "step": 587 }, { "epoch": 0.012308465209746294, "grad_norm": 0.17331886291503906, "learning_rate": 0.0001999925161476175, "loss": 11.707, "step": 588 }, { "epoch": 0.012329397973708448, "grad_norm": 0.16303750872612, "learning_rate": 0.00019999248930007425, "loss": 11.7031, "step": 589 }, { "epoch": 0.012350330737670601, "grad_norm": 0.14591027796268463, "learning_rate": 0.00019999246240446253, "loss": 11.7106, "step": 590 }, { "epoch": 0.012371263501632755, "grad_norm": 0.1704089194536209, "learning_rate": 0.0001999924354607825, "loss": 11.6933, "step": 591 }, { "epoch": 0.012392196265594908, "grad_norm": 0.14561085402965546, "learning_rate": 0.00019999240846903406, "loss": 11.7029, "step": 592 }, { "epoch": 0.012413129029557064, "grad_norm": 0.16846534609794617, "learning_rate": 0.00019999238142921728, "loss": 11.7071, "step": 593 }, { "epoch": 0.012434061793519217, "grad_norm": 0.1957116574048996, "learning_rate": 0.00019999235434133217, "loss": 11.7144, "step": 594 }, { "epoch": 0.01245499455748137, "grad_norm": 0.16167974472045898, "learning_rate": 0.0001999923272053787, "loss": 11.7046, "step": 595 }, { "epoch": 0.012475927321443524, "grad_norm": 0.1564483642578125, "learning_rate": 0.00019999230002135695, "loss": 11.7139, "step": 596 }, { "epoch": 0.012496860085405678, "grad_norm": 0.18674443662166595, "learning_rate": 0.00019999227278926686, "loss": 11.704, "step": 597 }, { "epoch": 0.012517792849367831, "grad_norm": 0.18800251185894012, "learning_rate": 0.0001999922455091085, "loss": 11.7244, "step": 598 }, { "epoch": 0.012538725613329985, "grad_norm": 0.2216104120016098, "learning_rate": 0.0001999922181808819, "loss": 11.7122, "step": 599 }, { "epoch": 0.012559658377292138, "grad_norm": 0.1686348170042038, "learning_rate": 0.00019999219080458697, "loss": 11.7129, "step": 600 }, { "epoch": 0.012580591141254292, "grad_norm": 0.1935407519340515, "learning_rate": 0.00019999216338022384, "loss": 11.6916, "step": 601 }, { "epoch": 0.012601523905216445, "grad_norm": 0.13294126093387604, "learning_rate": 0.00019999213590779244, "loss": 11.7017, "step": 602 }, { "epoch": 0.012622456669178599, "grad_norm": 0.1447775512933731, "learning_rate": 0.00019999210838729284, "loss": 11.6918, "step": 603 }, { "epoch": 0.012643389433140752, "grad_norm": 0.1777428537607193, "learning_rate": 0.00019999208081872502, "loss": 11.7037, "step": 604 }, { "epoch": 0.012664322197102906, "grad_norm": 0.14959150552749634, "learning_rate": 0.000199992053202089, "loss": 11.7052, "step": 605 }, { "epoch": 0.012685254961065059, "grad_norm": 0.1751081347465515, "learning_rate": 0.0001999920255373848, "loss": 11.7071, "step": 606 }, { "epoch": 0.012706187725027213, "grad_norm": 0.15320254862308502, "learning_rate": 0.00019999199782461244, "loss": 11.7059, "step": 607 }, { "epoch": 0.012727120488989366, "grad_norm": 0.15281184017658234, "learning_rate": 0.00019999197006377194, "loss": 11.702, "step": 608 }, { "epoch": 0.01274805325295152, "grad_norm": 0.18549305200576782, "learning_rate": 0.00019999194225486325, "loss": 11.7094, "step": 609 }, { "epoch": 0.012768986016913673, "grad_norm": 0.14644160866737366, "learning_rate": 0.00019999191439788643, "loss": 11.7079, "step": 610 }, { "epoch": 0.012789918780875827, "grad_norm": 0.13986854255199432, "learning_rate": 0.00019999188649284154, "loss": 11.7118, "step": 611 }, { "epoch": 0.01281085154483798, "grad_norm": 0.14214912056922913, "learning_rate": 0.00019999185853972852, "loss": 11.7112, "step": 612 }, { "epoch": 0.012831784308800134, "grad_norm": 0.13702741265296936, "learning_rate": 0.0001999918305385474, "loss": 11.7069, "step": 613 }, { "epoch": 0.012852717072762287, "grad_norm": 0.12380351126194, "learning_rate": 0.00019999180248929821, "loss": 11.704, "step": 614 }, { "epoch": 0.01287364983672444, "grad_norm": 0.1542462557554245, "learning_rate": 0.00019999177439198094, "loss": 11.6994, "step": 615 }, { "epoch": 0.012894582600686594, "grad_norm": 0.164563849568367, "learning_rate": 0.00019999174624659564, "loss": 11.7169, "step": 616 }, { "epoch": 0.012915515364648748, "grad_norm": 0.16642114520072937, "learning_rate": 0.00019999171805314229, "loss": 11.7234, "step": 617 }, { "epoch": 0.012936448128610901, "grad_norm": 0.16128210723400116, "learning_rate": 0.00019999168981162094, "loss": 11.7106, "step": 618 }, { "epoch": 0.012957380892573055, "grad_norm": 0.1321592628955841, "learning_rate": 0.00019999166152203157, "loss": 11.6978, "step": 619 }, { "epoch": 0.012978313656535208, "grad_norm": 0.14743541181087494, "learning_rate": 0.0001999916331843742, "loss": 11.6971, "step": 620 }, { "epoch": 0.012999246420497362, "grad_norm": 0.19882157444953918, "learning_rate": 0.00019999160479864886, "loss": 11.716, "step": 621 }, { "epoch": 0.013020179184459517, "grad_norm": 0.15972036123275757, "learning_rate": 0.00019999157636485553, "loss": 11.7029, "step": 622 }, { "epoch": 0.01304111194842167, "grad_norm": 0.1661757528781891, "learning_rate": 0.00019999154788299427, "loss": 11.6993, "step": 623 }, { "epoch": 0.013062044712383824, "grad_norm": 0.14810843765735626, "learning_rate": 0.00019999151935306503, "loss": 11.7109, "step": 624 }, { "epoch": 0.013082977476345977, "grad_norm": 0.15023276209831238, "learning_rate": 0.00019999149077506791, "loss": 11.7005, "step": 625 }, { "epoch": 0.013103910240308131, "grad_norm": 0.14334997534751892, "learning_rate": 0.00019999146214900285, "loss": 11.7144, "step": 626 }, { "epoch": 0.013124843004270284, "grad_norm": 0.1685383915901184, "learning_rate": 0.0001999914334748699, "loss": 11.7063, "step": 627 }, { "epoch": 0.013145775768232438, "grad_norm": 0.2551115155220032, "learning_rate": 0.00019999140475266907, "loss": 11.7355, "step": 628 }, { "epoch": 0.013166708532194591, "grad_norm": 0.15514720976352692, "learning_rate": 0.00019999137598240035, "loss": 11.705, "step": 629 }, { "epoch": 0.013187641296156745, "grad_norm": 0.23777569830417633, "learning_rate": 0.0001999913471640638, "loss": 11.7054, "step": 630 }, { "epoch": 0.013208574060118898, "grad_norm": 0.15580017864704132, "learning_rate": 0.0001999913182976594, "loss": 11.7143, "step": 631 }, { "epoch": 0.013229506824081052, "grad_norm": 0.15035337209701538, "learning_rate": 0.00019999128938318715, "loss": 11.7108, "step": 632 }, { "epoch": 0.013250439588043205, "grad_norm": 0.17705366015434265, "learning_rate": 0.00019999126042064707, "loss": 11.7107, "step": 633 }, { "epoch": 0.013271372352005359, "grad_norm": 0.2041730284690857, "learning_rate": 0.00019999123141003922, "loss": 11.7044, "step": 634 }, { "epoch": 0.013292305115967512, "grad_norm": 0.14013952016830444, "learning_rate": 0.00019999120235136356, "loss": 11.7116, "step": 635 }, { "epoch": 0.013313237879929666, "grad_norm": 0.1465970277786255, "learning_rate": 0.00019999117324462012, "loss": 11.7026, "step": 636 }, { "epoch": 0.01333417064389182, "grad_norm": 0.1653396040201187, "learning_rate": 0.00019999114408980894, "loss": 11.713, "step": 637 }, { "epoch": 0.013355103407853973, "grad_norm": 0.15308861434459686, "learning_rate": 0.00019999111488693, "loss": 11.7157, "step": 638 }, { "epoch": 0.013376036171816126, "grad_norm": 0.15993134677410126, "learning_rate": 0.00019999108563598333, "loss": 11.7016, "step": 639 }, { "epoch": 0.01339696893577828, "grad_norm": 0.18917667865753174, "learning_rate": 0.00019999105633696895, "loss": 11.7081, "step": 640 }, { "epoch": 0.013417901699740433, "grad_norm": 0.15229469537734985, "learning_rate": 0.00019999102698988686, "loss": 11.7001, "step": 641 }, { "epoch": 0.013438834463702587, "grad_norm": 0.18926985561847687, "learning_rate": 0.00019999099759473708, "loss": 11.7055, "step": 642 }, { "epoch": 0.01345976722766474, "grad_norm": 0.1461210399866104, "learning_rate": 0.0001999909681515196, "loss": 11.6917, "step": 643 }, { "epoch": 0.013480699991626894, "grad_norm": 0.13374581933021545, "learning_rate": 0.00019999093866023452, "loss": 11.707, "step": 644 }, { "epoch": 0.013501632755589047, "grad_norm": 0.20823268592357635, "learning_rate": 0.00019999090912088173, "loss": 11.7156, "step": 645 }, { "epoch": 0.013522565519551201, "grad_norm": 0.17236679792404175, "learning_rate": 0.00019999087953346135, "loss": 11.6925, "step": 646 }, { "epoch": 0.013543498283513354, "grad_norm": 0.18850982189178467, "learning_rate": 0.00019999084989797333, "loss": 11.7104, "step": 647 }, { "epoch": 0.013564431047475508, "grad_norm": 0.13187666237354279, "learning_rate": 0.0001999908202144177, "loss": 11.7021, "step": 648 }, { "epoch": 0.013585363811437661, "grad_norm": 0.17236214876174927, "learning_rate": 0.00019999079048279444, "loss": 11.704, "step": 649 }, { "epoch": 0.013606296575399815, "grad_norm": 0.18265970051288605, "learning_rate": 0.00019999076070310366, "loss": 11.7062, "step": 650 }, { "epoch": 0.01362722933936197, "grad_norm": 0.15806344151496887, "learning_rate": 0.0001999907308753453, "loss": 11.7022, "step": 651 }, { "epoch": 0.013648162103324124, "grad_norm": 0.14573799073696136, "learning_rate": 0.0001999907009995194, "loss": 11.7035, "step": 652 }, { "epoch": 0.013669094867286277, "grad_norm": 0.19578687846660614, "learning_rate": 0.00019999067107562596, "loss": 11.7191, "step": 653 }, { "epoch": 0.01369002763124843, "grad_norm": 0.17786675691604614, "learning_rate": 0.00019999064110366502, "loss": 11.7064, "step": 654 }, { "epoch": 0.013710960395210584, "grad_norm": 0.17356985807418823, "learning_rate": 0.00019999061108363653, "loss": 11.6988, "step": 655 }, { "epoch": 0.013731893159172738, "grad_norm": 0.17184384167194366, "learning_rate": 0.0001999905810155406, "loss": 11.7057, "step": 656 }, { "epoch": 0.013752825923134891, "grad_norm": 0.1842612624168396, "learning_rate": 0.00019999055089937718, "loss": 11.6936, "step": 657 }, { "epoch": 0.013773758687097045, "grad_norm": 0.16072674095630646, "learning_rate": 0.0001999905207351463, "loss": 11.7027, "step": 658 }, { "epoch": 0.013794691451059198, "grad_norm": 0.15793514251708984, "learning_rate": 0.00019999049052284794, "loss": 11.7025, "step": 659 }, { "epoch": 0.013815624215021352, "grad_norm": 0.24522794783115387, "learning_rate": 0.00019999046026248217, "loss": 11.705, "step": 660 }, { "epoch": 0.013836556978983505, "grad_norm": 0.15827132761478424, "learning_rate": 0.00019999042995404897, "loss": 11.6971, "step": 661 }, { "epoch": 0.013857489742945659, "grad_norm": 0.14108148217201233, "learning_rate": 0.00019999039959754837, "loss": 11.7146, "step": 662 }, { "epoch": 0.013878422506907812, "grad_norm": 0.180585578083992, "learning_rate": 0.0001999903691929804, "loss": 11.7016, "step": 663 }, { "epoch": 0.013899355270869966, "grad_norm": 0.13705173134803772, "learning_rate": 0.00019999033874034503, "loss": 11.7008, "step": 664 }, { "epoch": 0.01392028803483212, "grad_norm": 0.19263479113578796, "learning_rate": 0.00019999030823964232, "loss": 11.6924, "step": 665 }, { "epoch": 0.013941220798794273, "grad_norm": 0.1702137440443039, "learning_rate": 0.00019999027769087224, "loss": 11.7074, "step": 666 }, { "epoch": 0.013962153562756426, "grad_norm": 0.15209704637527466, "learning_rate": 0.00019999024709403486, "loss": 11.7048, "step": 667 }, { "epoch": 0.01398308632671858, "grad_norm": 0.17773820459842682, "learning_rate": 0.00019999021644913017, "loss": 11.7142, "step": 668 }, { "epoch": 0.014004019090680733, "grad_norm": 0.1440756916999817, "learning_rate": 0.00019999018575615814, "loss": 11.7066, "step": 669 }, { "epoch": 0.014024951854642887, "grad_norm": 0.13951919972896576, "learning_rate": 0.00019999015501511887, "loss": 11.7064, "step": 670 }, { "epoch": 0.01404588461860504, "grad_norm": 0.12141372263431549, "learning_rate": 0.00019999012422601228, "loss": 11.7026, "step": 671 }, { "epoch": 0.014066817382567194, "grad_norm": 0.1556934267282486, "learning_rate": 0.00019999009338883848, "loss": 11.7098, "step": 672 }, { "epoch": 0.014087750146529347, "grad_norm": 0.17390969395637512, "learning_rate": 0.0001999900625035974, "loss": 11.6904, "step": 673 }, { "epoch": 0.0141086829104915, "grad_norm": 0.1587255746126175, "learning_rate": 0.0001999900315702891, "loss": 11.7153, "step": 674 }, { "epoch": 0.014129615674453654, "grad_norm": 0.18750225007534027, "learning_rate": 0.00019999000058891362, "loss": 11.7219, "step": 675 }, { "epoch": 0.014150548438415808, "grad_norm": 0.1573847234249115, "learning_rate": 0.0001999899695594709, "loss": 11.7012, "step": 676 }, { "epoch": 0.014171481202377961, "grad_norm": 0.14716270565986633, "learning_rate": 0.000199989938481961, "loss": 11.6988, "step": 677 }, { "epoch": 0.014192413966340115, "grad_norm": 0.1719796508550644, "learning_rate": 0.00019998990735638396, "loss": 11.6968, "step": 678 }, { "epoch": 0.014213346730302268, "grad_norm": 0.1584298014640808, "learning_rate": 0.00019998987618273978, "loss": 11.7069, "step": 679 }, { "epoch": 0.014234279494264424, "grad_norm": 0.1570943146944046, "learning_rate": 0.00019998984496102842, "loss": 11.7136, "step": 680 }, { "epoch": 0.014255212258226577, "grad_norm": 0.1894170194864273, "learning_rate": 0.00019998981369124996, "loss": 11.696, "step": 681 }, { "epoch": 0.01427614502218873, "grad_norm": 0.15661349892616272, "learning_rate": 0.0001999897823734044, "loss": 11.6791, "step": 682 }, { "epoch": 0.014297077786150884, "grad_norm": 0.13515502214431763, "learning_rate": 0.00019998975100749174, "loss": 11.7033, "step": 683 }, { "epoch": 0.014318010550113038, "grad_norm": 0.20707789063453674, "learning_rate": 0.000199989719593512, "loss": 11.7018, "step": 684 }, { "epoch": 0.014338943314075191, "grad_norm": 0.13254906237125397, "learning_rate": 0.00019998968813146522, "loss": 11.7217, "step": 685 }, { "epoch": 0.014359876078037345, "grad_norm": 0.15336821973323822, "learning_rate": 0.00019998965662135137, "loss": 11.7066, "step": 686 }, { "epoch": 0.014380808841999498, "grad_norm": 0.17244276404380798, "learning_rate": 0.00019998962506317047, "loss": 11.7108, "step": 687 }, { "epoch": 0.014401741605961652, "grad_norm": 0.16399376094341278, "learning_rate": 0.00019998959345692257, "loss": 11.722, "step": 688 }, { "epoch": 0.014422674369923805, "grad_norm": 0.1773606389760971, "learning_rate": 0.0001999895618026077, "loss": 11.6875, "step": 689 }, { "epoch": 0.014443607133885959, "grad_norm": 0.16713005304336548, "learning_rate": 0.00019998953010022579, "loss": 11.7102, "step": 690 }, { "epoch": 0.014464539897848112, "grad_norm": 0.16627603769302368, "learning_rate": 0.00019998949834977694, "loss": 11.7068, "step": 691 }, { "epoch": 0.014485472661810266, "grad_norm": 0.15680205821990967, "learning_rate": 0.00019998946655126114, "loss": 11.7128, "step": 692 }, { "epoch": 0.014506405425772419, "grad_norm": 0.12759080529212952, "learning_rate": 0.0001999894347046784, "loss": 11.6954, "step": 693 }, { "epoch": 0.014527338189734573, "grad_norm": 0.21448414027690887, "learning_rate": 0.0001999894028100287, "loss": 11.6994, "step": 694 }, { "epoch": 0.014548270953696726, "grad_norm": 0.15465885400772095, "learning_rate": 0.00019998937086731212, "loss": 11.6982, "step": 695 }, { "epoch": 0.01456920371765888, "grad_norm": 0.1589699238538742, "learning_rate": 0.00019998933887652864, "loss": 11.7133, "step": 696 }, { "epoch": 0.014590136481621033, "grad_norm": 0.1708512306213379, "learning_rate": 0.00019998930683767828, "loss": 11.7085, "step": 697 }, { "epoch": 0.014611069245583187, "grad_norm": 0.18354716897010803, "learning_rate": 0.00019998927475076107, "loss": 11.687, "step": 698 }, { "epoch": 0.01463200200954534, "grad_norm": 0.14606605470180511, "learning_rate": 0.000199989242615777, "loss": 11.7011, "step": 699 }, { "epoch": 0.014652934773507494, "grad_norm": 0.15173842012882233, "learning_rate": 0.0001999892104327261, "loss": 11.7084, "step": 700 }, { "epoch": 0.014673867537469647, "grad_norm": 0.19360613822937012, "learning_rate": 0.0001999891782016084, "loss": 11.6991, "step": 701 }, { "epoch": 0.0146948003014318, "grad_norm": 0.1611892133951187, "learning_rate": 0.00019998914592242386, "loss": 11.7082, "step": 702 }, { "epoch": 0.014715733065393954, "grad_norm": 0.15539419651031494, "learning_rate": 0.00019998911359517255, "loss": 11.7099, "step": 703 }, { "epoch": 0.014736665829356108, "grad_norm": 0.20048236846923828, "learning_rate": 0.00019998908121985446, "loss": 11.7136, "step": 704 }, { "epoch": 0.014757598593318261, "grad_norm": 0.2706267535686493, "learning_rate": 0.00019998904879646966, "loss": 11.6948, "step": 705 }, { "epoch": 0.014778531357280415, "grad_norm": 0.14981651306152344, "learning_rate": 0.00019998901632501808, "loss": 11.7024, "step": 706 }, { "epoch": 0.014799464121242568, "grad_norm": 0.15470349788665771, "learning_rate": 0.00019998898380549975, "loss": 11.7089, "step": 707 }, { "epoch": 0.014820396885204722, "grad_norm": 0.16429296135902405, "learning_rate": 0.00019998895123791475, "loss": 11.6959, "step": 708 }, { "epoch": 0.014841329649166877, "grad_norm": 0.13618430495262146, "learning_rate": 0.00019998891862226307, "loss": 11.7032, "step": 709 }, { "epoch": 0.01486226241312903, "grad_norm": 0.1802181899547577, "learning_rate": 0.00019998888595854468, "loss": 11.7138, "step": 710 }, { "epoch": 0.014883195177091184, "grad_norm": 0.14702081680297852, "learning_rate": 0.00019998885324675963, "loss": 11.7101, "step": 711 }, { "epoch": 0.014904127941053337, "grad_norm": 0.19412992894649506, "learning_rate": 0.00019998882048690797, "loss": 11.7005, "step": 712 }, { "epoch": 0.01492506070501549, "grad_norm": 0.16142699122428894, "learning_rate": 0.00019998878767898964, "loss": 11.6988, "step": 713 }, { "epoch": 0.014945993468977644, "grad_norm": 0.19573938846588135, "learning_rate": 0.00019998875482300473, "loss": 11.7011, "step": 714 }, { "epoch": 0.014966926232939798, "grad_norm": 0.1531166434288025, "learning_rate": 0.0001999887219189532, "loss": 11.7072, "step": 715 }, { "epoch": 0.014987858996901951, "grad_norm": 0.17004945874214172, "learning_rate": 0.00019998868896683512, "loss": 11.6991, "step": 716 }, { "epoch": 0.015008791760864105, "grad_norm": 0.23429304361343384, "learning_rate": 0.00019998865596665045, "loss": 11.7057, "step": 717 }, { "epoch": 0.015029724524826258, "grad_norm": 0.1228628009557724, "learning_rate": 0.0001999886229183992, "loss": 11.7086, "step": 718 }, { "epoch": 0.015050657288788412, "grad_norm": 0.16445472836494446, "learning_rate": 0.00019998858982208143, "loss": 11.6995, "step": 719 }, { "epoch": 0.015071590052750565, "grad_norm": 0.1523524820804596, "learning_rate": 0.00019998855667769717, "loss": 11.7075, "step": 720 }, { "epoch": 0.015092522816712719, "grad_norm": 0.15620747208595276, "learning_rate": 0.00019998852348524637, "loss": 11.7001, "step": 721 }, { "epoch": 0.015113455580674872, "grad_norm": 0.25572070479393005, "learning_rate": 0.00019998849024472913, "loss": 11.709, "step": 722 }, { "epoch": 0.015134388344637026, "grad_norm": 0.19857439398765564, "learning_rate": 0.00019998845695614538, "loss": 11.6955, "step": 723 }, { "epoch": 0.01515532110859918, "grad_norm": 0.13409873843193054, "learning_rate": 0.00019998842361949518, "loss": 11.6969, "step": 724 }, { "epoch": 0.015176253872561333, "grad_norm": 0.1768866926431656, "learning_rate": 0.00019998839023477856, "loss": 11.7067, "step": 725 }, { "epoch": 0.015197186636523486, "grad_norm": 0.15390121936798096, "learning_rate": 0.00019998835680199548, "loss": 11.7056, "step": 726 }, { "epoch": 0.01521811940048564, "grad_norm": 0.2134755551815033, "learning_rate": 0.00019998832332114604, "loss": 11.713, "step": 727 }, { "epoch": 0.015239052164447793, "grad_norm": 0.13181369006633759, "learning_rate": 0.00019998828979223018, "loss": 11.7138, "step": 728 }, { "epoch": 0.015259984928409947, "grad_norm": 0.17596621811389923, "learning_rate": 0.00019998825621524795, "loss": 11.7126, "step": 729 }, { "epoch": 0.0152809176923721, "grad_norm": 0.15703652799129486, "learning_rate": 0.00019998822259019935, "loss": 11.7031, "step": 730 }, { "epoch": 0.015301850456334254, "grad_norm": 0.14515189826488495, "learning_rate": 0.00019998818891708443, "loss": 11.7166, "step": 731 }, { "epoch": 0.015322783220296407, "grad_norm": 0.15489645302295685, "learning_rate": 0.00019998815519590318, "loss": 11.7011, "step": 732 }, { "epoch": 0.015343715984258561, "grad_norm": 0.1709805727005005, "learning_rate": 0.0001999881214266556, "loss": 11.696, "step": 733 }, { "epoch": 0.015364648748220714, "grad_norm": 0.18854182958602905, "learning_rate": 0.00019998808760934175, "loss": 11.7087, "step": 734 }, { "epoch": 0.015385581512182868, "grad_norm": 0.14447306096553802, "learning_rate": 0.0001999880537439616, "loss": 11.6844, "step": 735 }, { "epoch": 0.015406514276145021, "grad_norm": 0.1415441781282425, "learning_rate": 0.0001999880198305152, "loss": 11.7012, "step": 736 }, { "epoch": 0.015427447040107175, "grad_norm": 0.18136052787303925, "learning_rate": 0.00019998798586900259, "loss": 11.726, "step": 737 }, { "epoch": 0.01544837980406933, "grad_norm": 0.1885782927274704, "learning_rate": 0.0001999879518594237, "loss": 11.7026, "step": 738 }, { "epoch": 0.015469312568031484, "grad_norm": 0.16951718926429749, "learning_rate": 0.00019998791780177864, "loss": 11.7042, "step": 739 }, { "epoch": 0.015490245331993637, "grad_norm": 0.16527386009693146, "learning_rate": 0.00019998788369606735, "loss": 11.6996, "step": 740 }, { "epoch": 0.01551117809595579, "grad_norm": 0.1641307920217514, "learning_rate": 0.0001999878495422899, "loss": 11.7035, "step": 741 }, { "epoch": 0.015532110859917944, "grad_norm": 0.17737933993339539, "learning_rate": 0.00019998781534044625, "loss": 11.7006, "step": 742 }, { "epoch": 0.015553043623880098, "grad_norm": 0.34348779916763306, "learning_rate": 0.00019998778109053648, "loss": 11.7134, "step": 743 }, { "epoch": 0.015573976387842251, "grad_norm": 0.13654763996601105, "learning_rate": 0.0001999877467925606, "loss": 11.7149, "step": 744 }, { "epoch": 0.015594909151804405, "grad_norm": 0.2958082854747772, "learning_rate": 0.0001999877124465186, "loss": 11.7035, "step": 745 }, { "epoch": 0.015615841915766558, "grad_norm": 0.1897699534893036, "learning_rate": 0.00019998767805241046, "loss": 11.7172, "step": 746 }, { "epoch": 0.01563677467972871, "grad_norm": 1.1002278327941895, "learning_rate": 0.00019998764361023628, "loss": 11.7548, "step": 747 }, { "epoch": 0.015657707443690865, "grad_norm": 0.1756860613822937, "learning_rate": 0.00019998760911999603, "loss": 11.7116, "step": 748 }, { "epoch": 0.015678640207653017, "grad_norm": 0.1693435162305832, "learning_rate": 0.00019998757458168973, "loss": 11.7006, "step": 749 }, { "epoch": 0.015699572971615172, "grad_norm": 0.1902988702058792, "learning_rate": 0.00019998753999531742, "loss": 11.7081, "step": 750 }, { "epoch": 0.015720505735577324, "grad_norm": 0.17927370965480804, "learning_rate": 0.00019998750536087903, "loss": 11.7027, "step": 751 }, { "epoch": 0.01574143849953948, "grad_norm": 0.2108737826347351, "learning_rate": 0.0001999874706783747, "loss": 11.7102, "step": 752 }, { "epoch": 0.015762371263501634, "grad_norm": 0.1593363881111145, "learning_rate": 0.00019998743594780438, "loss": 11.7085, "step": 753 }, { "epoch": 0.015783304027463786, "grad_norm": 0.19384174048900604, "learning_rate": 0.00019998740116916808, "loss": 11.7159, "step": 754 }, { "epoch": 0.01580423679142594, "grad_norm": 0.18705740571022034, "learning_rate": 0.00019998736634246585, "loss": 11.6971, "step": 755 }, { "epoch": 0.015825169555388093, "grad_norm": 0.1587515026330948, "learning_rate": 0.00019998733146769769, "loss": 11.7178, "step": 756 }, { "epoch": 0.01584610231935025, "grad_norm": 0.20166048407554626, "learning_rate": 0.0001999872965448636, "loss": 11.7056, "step": 757 }, { "epoch": 0.0158670350833124, "grad_norm": 0.20914575457572937, "learning_rate": 0.0001999872615739636, "loss": 11.6976, "step": 758 }, { "epoch": 0.015887967847274555, "grad_norm": 0.1624010056257248, "learning_rate": 0.00019998722655499777, "loss": 11.7215, "step": 759 }, { "epoch": 0.015908900611236707, "grad_norm": 0.22633717954158783, "learning_rate": 0.00019998719148796602, "loss": 11.7036, "step": 760 }, { "epoch": 0.015929833375198862, "grad_norm": 0.22637107968330383, "learning_rate": 0.00019998715637286848, "loss": 11.7169, "step": 761 }, { "epoch": 0.015950766139161014, "grad_norm": 0.16041788458824158, "learning_rate": 0.00019998712120970508, "loss": 11.7059, "step": 762 }, { "epoch": 0.01597169890312317, "grad_norm": 0.13034027814865112, "learning_rate": 0.00019998708599847586, "loss": 11.7116, "step": 763 }, { "epoch": 0.01599263166708532, "grad_norm": 0.16735072433948517, "learning_rate": 0.00019998705073918084, "loss": 11.7122, "step": 764 }, { "epoch": 0.016013564431047476, "grad_norm": 0.18907016515731812, "learning_rate": 0.00019998701543182007, "loss": 11.7127, "step": 765 }, { "epoch": 0.016034497195009628, "grad_norm": 0.14998988807201385, "learning_rate": 0.00019998698007639353, "loss": 11.6975, "step": 766 }, { "epoch": 0.016055429958971783, "grad_norm": 0.2028358429670334, "learning_rate": 0.00019998694467290122, "loss": 11.6909, "step": 767 }, { "epoch": 0.016076362722933935, "grad_norm": 0.19424614310264587, "learning_rate": 0.00019998690922134319, "loss": 11.7048, "step": 768 }, { "epoch": 0.01609729548689609, "grad_norm": 0.14254124462604523, "learning_rate": 0.00019998687372171946, "loss": 11.7004, "step": 769 }, { "epoch": 0.016118228250858242, "grad_norm": 0.16286201775074005, "learning_rate": 0.00019998683817403003, "loss": 11.716, "step": 770 }, { "epoch": 0.016139161014820397, "grad_norm": 0.1728399097919464, "learning_rate": 0.00019998680257827492, "loss": 11.6978, "step": 771 }, { "epoch": 0.01616009377878255, "grad_norm": 0.16934949159622192, "learning_rate": 0.00019998676693445418, "loss": 11.7081, "step": 772 }, { "epoch": 0.016181026542744704, "grad_norm": 0.2093725949525833, "learning_rate": 0.00019998673124256775, "loss": 11.694, "step": 773 }, { "epoch": 0.016201959306706856, "grad_norm": 0.1377018839120865, "learning_rate": 0.00019998669550261574, "loss": 11.6996, "step": 774 }, { "epoch": 0.01622289207066901, "grad_norm": 0.16073481738567352, "learning_rate": 0.00019998665971459806, "loss": 11.6893, "step": 775 }, { "epoch": 0.016243824834631163, "grad_norm": 0.15985512733459473, "learning_rate": 0.00019998662387851485, "loss": 11.6885, "step": 776 }, { "epoch": 0.01626475759859332, "grad_norm": 0.17269478738307953, "learning_rate": 0.00019998658799436604, "loss": 11.6989, "step": 777 }, { "epoch": 0.01628569036255547, "grad_norm": 0.14955151081085205, "learning_rate": 0.0001999865520621517, "loss": 11.7117, "step": 778 }, { "epoch": 0.016306623126517625, "grad_norm": 0.17543110251426697, "learning_rate": 0.00019998651608187177, "loss": 11.6981, "step": 779 }, { "epoch": 0.016327555890479777, "grad_norm": 0.14624078571796417, "learning_rate": 0.00019998648005352637, "loss": 11.7024, "step": 780 }, { "epoch": 0.016348488654441932, "grad_norm": 0.16980721056461334, "learning_rate": 0.00019998644397711544, "loss": 11.7031, "step": 781 }, { "epoch": 0.016369421418404088, "grad_norm": 0.1608070582151413, "learning_rate": 0.000199986407852639, "loss": 11.7031, "step": 782 }, { "epoch": 0.01639035418236624, "grad_norm": 0.20412540435791016, "learning_rate": 0.00019998637168009713, "loss": 11.7084, "step": 783 }, { "epoch": 0.016411286946328395, "grad_norm": 0.19375893473625183, "learning_rate": 0.00019998633545948978, "loss": 11.6967, "step": 784 }, { "epoch": 0.016432219710290547, "grad_norm": 0.1394992619752884, "learning_rate": 0.000199986299190817, "loss": 11.6955, "step": 785 }, { "epoch": 0.016453152474252702, "grad_norm": 0.15449926257133484, "learning_rate": 0.00019998626287407882, "loss": 11.7003, "step": 786 }, { "epoch": 0.016474085238214854, "grad_norm": 0.17081768810749054, "learning_rate": 0.00019998622650927524, "loss": 11.7136, "step": 787 }, { "epoch": 0.01649501800217701, "grad_norm": 0.1551727056503296, "learning_rate": 0.00019998619009640626, "loss": 11.6926, "step": 788 }, { "epoch": 0.01651595076613916, "grad_norm": 0.20602062344551086, "learning_rate": 0.00019998615363547192, "loss": 11.7201, "step": 789 }, { "epoch": 0.016536883530101316, "grad_norm": 0.17147627472877502, "learning_rate": 0.0001999861171264722, "loss": 11.7049, "step": 790 }, { "epoch": 0.016557816294063468, "grad_norm": 0.1556428074836731, "learning_rate": 0.0001999860805694072, "loss": 11.6967, "step": 791 }, { "epoch": 0.016578749058025623, "grad_norm": 0.20166954398155212, "learning_rate": 0.0001999860439642769, "loss": 11.704, "step": 792 }, { "epoch": 0.016599681821987775, "grad_norm": 0.16374927759170532, "learning_rate": 0.00019998600731108127, "loss": 11.7138, "step": 793 }, { "epoch": 0.01662061458594993, "grad_norm": 0.17646358907222748, "learning_rate": 0.00019998597060982034, "loss": 11.697, "step": 794 }, { "epoch": 0.01664154734991208, "grad_norm": 0.14611704647541046, "learning_rate": 0.0001999859338604942, "loss": 11.6997, "step": 795 }, { "epoch": 0.016662480113874237, "grad_norm": 0.17282985150814056, "learning_rate": 0.00019998589706310277, "loss": 11.6878, "step": 796 }, { "epoch": 0.01668341287783639, "grad_norm": 0.1939070075750351, "learning_rate": 0.00019998586021764613, "loss": 11.6976, "step": 797 }, { "epoch": 0.016704345641798544, "grad_norm": 0.1448133885860443, "learning_rate": 0.0001999858233241243, "loss": 11.7185, "step": 798 }, { "epoch": 0.016725278405760696, "grad_norm": 0.14624924957752228, "learning_rate": 0.00019998578638253728, "loss": 11.7113, "step": 799 }, { "epoch": 0.01674621116972285, "grad_norm": 0.1951896697282791, "learning_rate": 0.00019998574939288508, "loss": 11.7001, "step": 800 }, { "epoch": 0.016767143933685003, "grad_norm": 0.18494369089603424, "learning_rate": 0.00019998571235516774, "loss": 11.7066, "step": 801 }, { "epoch": 0.016788076697647158, "grad_norm": 0.1842448115348816, "learning_rate": 0.00019998567526938527, "loss": 11.7005, "step": 802 }, { "epoch": 0.01680900946160931, "grad_norm": 0.18399931490421295, "learning_rate": 0.00019998563813553765, "loss": 11.7005, "step": 803 }, { "epoch": 0.016829942225571465, "grad_norm": 0.19374924898147583, "learning_rate": 0.00019998560095362496, "loss": 11.7002, "step": 804 }, { "epoch": 0.016850874989533617, "grad_norm": 0.15924106538295746, "learning_rate": 0.00019998556372364715, "loss": 11.7085, "step": 805 }, { "epoch": 0.016871807753495772, "grad_norm": 0.154887393116951, "learning_rate": 0.0001999855264456043, "loss": 11.7102, "step": 806 }, { "epoch": 0.016892740517457924, "grad_norm": 0.16725878417491913, "learning_rate": 0.00019998548911949641, "loss": 11.6987, "step": 807 }, { "epoch": 0.01691367328142008, "grad_norm": 0.20706622302532196, "learning_rate": 0.0001999854517453235, "loss": 11.7068, "step": 808 }, { "epoch": 0.01693460604538223, "grad_norm": 0.21807022392749786, "learning_rate": 0.00019998541432308554, "loss": 11.7025, "step": 809 }, { "epoch": 0.016955538809344386, "grad_norm": 0.18136905133724213, "learning_rate": 0.0001999853768527826, "loss": 11.7015, "step": 810 }, { "epoch": 0.01697647157330654, "grad_norm": 0.1893421858549118, "learning_rate": 0.00019998533933441472, "loss": 11.6997, "step": 811 }, { "epoch": 0.016997404337268693, "grad_norm": 0.16596820950508118, "learning_rate": 0.00019998530176798188, "loss": 11.7129, "step": 812 }, { "epoch": 0.017018337101230848, "grad_norm": 0.15769556164741516, "learning_rate": 0.00019998526415348407, "loss": 11.7004, "step": 813 }, { "epoch": 0.017039269865193, "grad_norm": 0.15541183948516846, "learning_rate": 0.00019998522649092136, "loss": 11.7175, "step": 814 }, { "epoch": 0.017060202629155155, "grad_norm": 0.20915593206882477, "learning_rate": 0.00019998518878029374, "loss": 11.7107, "step": 815 }, { "epoch": 0.017081135393117307, "grad_norm": 0.1666242629289627, "learning_rate": 0.00019998515102160122, "loss": 11.7094, "step": 816 }, { "epoch": 0.017102068157079462, "grad_norm": 0.17674404382705688, "learning_rate": 0.00019998511321484385, "loss": 11.6959, "step": 817 }, { "epoch": 0.017123000921041614, "grad_norm": 0.18413372337818146, "learning_rate": 0.00019998507536002165, "loss": 11.6774, "step": 818 }, { "epoch": 0.01714393368500377, "grad_norm": 0.1599770188331604, "learning_rate": 0.0001999850374571346, "loss": 11.7073, "step": 819 }, { "epoch": 0.01716486644896592, "grad_norm": 0.1530405580997467, "learning_rate": 0.00019998499950618274, "loss": 11.7082, "step": 820 }, { "epoch": 0.017185799212928076, "grad_norm": 0.16235214471817017, "learning_rate": 0.0001999849615071661, "loss": 11.6989, "step": 821 }, { "epoch": 0.017206731976890228, "grad_norm": 0.1834850013256073, "learning_rate": 0.00019998492346008466, "loss": 11.7124, "step": 822 }, { "epoch": 0.017227664740852383, "grad_norm": 0.18458889424800873, "learning_rate": 0.00019998488536493848, "loss": 11.6981, "step": 823 }, { "epoch": 0.017248597504814535, "grad_norm": 0.16364791989326477, "learning_rate": 0.00019998484722172756, "loss": 11.7024, "step": 824 }, { "epoch": 0.01726953026877669, "grad_norm": 0.21170352399349213, "learning_rate": 0.00019998480903045193, "loss": 11.6881, "step": 825 }, { "epoch": 0.017290463032738842, "grad_norm": 0.15602168440818787, "learning_rate": 0.0001999847707911116, "loss": 11.7057, "step": 826 }, { "epoch": 0.017311395796700997, "grad_norm": 0.15238279104232788, "learning_rate": 0.00019998473250370656, "loss": 11.6814, "step": 827 }, { "epoch": 0.01733232856066315, "grad_norm": 0.15655578672885895, "learning_rate": 0.0001999846941682369, "loss": 11.6998, "step": 828 }, { "epoch": 0.017353261324625304, "grad_norm": 0.14346180856227875, "learning_rate": 0.00019998465578470253, "loss": 11.6948, "step": 829 }, { "epoch": 0.017374194088587456, "grad_norm": 0.17557720839977264, "learning_rate": 0.0001999846173531036, "loss": 11.7092, "step": 830 }, { "epoch": 0.01739512685254961, "grad_norm": 0.16048817336559296, "learning_rate": 0.00019998457887344002, "loss": 11.7097, "step": 831 }, { "epoch": 0.017416059616511763, "grad_norm": 0.15570907294750214, "learning_rate": 0.00019998454034571186, "loss": 11.6956, "step": 832 }, { "epoch": 0.017436992380473918, "grad_norm": 0.20007149875164032, "learning_rate": 0.00019998450176991913, "loss": 11.6943, "step": 833 }, { "epoch": 0.01745792514443607, "grad_norm": 0.18763534724712372, "learning_rate": 0.00019998446314606184, "loss": 11.6959, "step": 834 }, { "epoch": 0.017478857908398225, "grad_norm": 0.1533135324716568, "learning_rate": 0.00019998442447414, "loss": 11.6904, "step": 835 }, { "epoch": 0.017499790672360377, "grad_norm": 0.16243652999401093, "learning_rate": 0.0001999843857541537, "loss": 11.6899, "step": 836 }, { "epoch": 0.017520723436322532, "grad_norm": 0.1969839483499527, "learning_rate": 0.00019998434698610283, "loss": 11.729, "step": 837 }, { "epoch": 0.017541656200284684, "grad_norm": 0.15563151240348816, "learning_rate": 0.00019998430816998752, "loss": 11.693, "step": 838 }, { "epoch": 0.01756258896424684, "grad_norm": 0.23957078158855438, "learning_rate": 0.00019998426930580774, "loss": 11.7127, "step": 839 }, { "epoch": 0.017583521728208994, "grad_norm": 0.1776970773935318, "learning_rate": 0.00019998423039356352, "loss": 11.6998, "step": 840 }, { "epoch": 0.017604454492171146, "grad_norm": 0.17794205248355865, "learning_rate": 0.00019998419143325488, "loss": 11.688, "step": 841 }, { "epoch": 0.0176253872561333, "grad_norm": 0.21738791465759277, "learning_rate": 0.00019998415242488183, "loss": 11.6972, "step": 842 }, { "epoch": 0.017646320020095453, "grad_norm": 0.13905678689479828, "learning_rate": 0.0001999841133684444, "loss": 11.7008, "step": 843 }, { "epoch": 0.01766725278405761, "grad_norm": 0.20729321241378784, "learning_rate": 0.0001999840742639426, "loss": 11.6978, "step": 844 }, { "epoch": 0.01768818554801976, "grad_norm": 0.15093335509300232, "learning_rate": 0.00019998403511137645, "loss": 11.7124, "step": 845 }, { "epoch": 0.017709118311981915, "grad_norm": 0.1482487916946411, "learning_rate": 0.00019998399591074595, "loss": 11.7101, "step": 846 }, { "epoch": 0.017730051075944067, "grad_norm": 0.15393370389938354, "learning_rate": 0.00019998395666205116, "loss": 11.698, "step": 847 }, { "epoch": 0.017750983839906222, "grad_norm": 0.19782131910324097, "learning_rate": 0.0001999839173652921, "loss": 11.708, "step": 848 }, { "epoch": 0.017771916603868374, "grad_norm": 0.1478702276945114, "learning_rate": 0.00019998387802046874, "loss": 11.7116, "step": 849 }, { "epoch": 0.01779284936783053, "grad_norm": 0.18336798250675201, "learning_rate": 0.00019998383862758116, "loss": 11.7, "step": 850 }, { "epoch": 0.01781378213179268, "grad_norm": 0.12869687378406525, "learning_rate": 0.0001999837991866293, "loss": 11.703, "step": 851 }, { "epoch": 0.017834714895754836, "grad_norm": 0.18600551784038544, "learning_rate": 0.00019998375969761326, "loss": 11.6995, "step": 852 }, { "epoch": 0.017855647659716988, "grad_norm": 0.16646328568458557, "learning_rate": 0.00019998372016053297, "loss": 11.7061, "step": 853 }, { "epoch": 0.017876580423679143, "grad_norm": 0.19500832259655, "learning_rate": 0.00019998368057538854, "loss": 11.714, "step": 854 }, { "epoch": 0.017897513187641295, "grad_norm": 0.21590706706047058, "learning_rate": 0.00019998364094217996, "loss": 11.7123, "step": 855 }, { "epoch": 0.01791844595160345, "grad_norm": 0.1823892444372177, "learning_rate": 0.00019998360126090723, "loss": 11.7131, "step": 856 }, { "epoch": 0.017939378715565602, "grad_norm": 0.16544105112552643, "learning_rate": 0.0001999835615315704, "loss": 11.7074, "step": 857 }, { "epoch": 0.017960311479527757, "grad_norm": 0.18759918212890625, "learning_rate": 0.00019998352175416942, "loss": 11.6973, "step": 858 }, { "epoch": 0.01798124424348991, "grad_norm": 0.20668047666549683, "learning_rate": 0.0001999834819287044, "loss": 11.7064, "step": 859 }, { "epoch": 0.018002177007452064, "grad_norm": 0.1900876760482788, "learning_rate": 0.0001999834420551753, "loss": 11.6988, "step": 860 }, { "epoch": 0.018023109771414216, "grad_norm": 0.13624826073646545, "learning_rate": 0.00019998340213358214, "loss": 11.6963, "step": 861 }, { "epoch": 0.01804404253537637, "grad_norm": 0.23192426562309265, "learning_rate": 0.00019998336216392497, "loss": 11.6944, "step": 862 }, { "epoch": 0.018064975299338523, "grad_norm": 0.1777576059103012, "learning_rate": 0.0001999833221462038, "loss": 11.7096, "step": 863 }, { "epoch": 0.01808590806330068, "grad_norm": 0.16478341817855835, "learning_rate": 0.00019998328208041863, "loss": 11.7005, "step": 864 }, { "epoch": 0.01810684082726283, "grad_norm": 0.1352301388978958, "learning_rate": 0.0001999832419665695, "loss": 11.7052, "step": 865 }, { "epoch": 0.018127773591224985, "grad_norm": 0.1549212485551834, "learning_rate": 0.00019998320180465644, "loss": 11.6846, "step": 866 }, { "epoch": 0.018148706355187137, "grad_norm": 0.15651361644268036, "learning_rate": 0.0001999831615946794, "loss": 11.7038, "step": 867 }, { "epoch": 0.018169639119149292, "grad_norm": 0.28529900312423706, "learning_rate": 0.0001999831213366385, "loss": 11.7025, "step": 868 }, { "epoch": 0.018190571883111448, "grad_norm": 0.16945534944534302, "learning_rate": 0.0001999830810305337, "loss": 11.7098, "step": 869 }, { "epoch": 0.0182115046470736, "grad_norm": 0.175813689827919, "learning_rate": 0.000199983040676365, "loss": 11.7113, "step": 870 }, { "epoch": 0.018232437411035755, "grad_norm": 0.15069805085659027, "learning_rate": 0.00019998300027413248, "loss": 11.7112, "step": 871 }, { "epoch": 0.018253370174997906, "grad_norm": 0.15362383425235748, "learning_rate": 0.00019998295982383613, "loss": 11.6848, "step": 872 }, { "epoch": 0.01827430293896006, "grad_norm": 0.19621339440345764, "learning_rate": 0.00019998291932547596, "loss": 11.7091, "step": 873 }, { "epoch": 0.018295235702922213, "grad_norm": 0.15924721956253052, "learning_rate": 0.000199982878779052, "loss": 11.7034, "step": 874 }, { "epoch": 0.01831616846688437, "grad_norm": 0.16641011834144592, "learning_rate": 0.00019998283818456426, "loss": 11.6935, "step": 875 }, { "epoch": 0.01833710123084652, "grad_norm": 0.1951269805431366, "learning_rate": 0.00019998279754201276, "loss": 11.6919, "step": 876 }, { "epoch": 0.018358033994808676, "grad_norm": 0.21898117661476135, "learning_rate": 0.00019998275685139752, "loss": 11.6975, "step": 877 }, { "epoch": 0.018378966758770827, "grad_norm": 0.16402214765548706, "learning_rate": 0.0001999827161127186, "loss": 11.6938, "step": 878 }, { "epoch": 0.018399899522732983, "grad_norm": 0.20022498071193695, "learning_rate": 0.00019998267532597596, "loss": 11.7025, "step": 879 }, { "epoch": 0.018420832286695134, "grad_norm": 0.1449958086013794, "learning_rate": 0.00019998263449116962, "loss": 11.6958, "step": 880 }, { "epoch": 0.01844176505065729, "grad_norm": 0.17439968883991241, "learning_rate": 0.00019998259360829966, "loss": 11.7005, "step": 881 }, { "epoch": 0.01846269781461944, "grad_norm": 0.16291141510009766, "learning_rate": 0.00019998255267736608, "loss": 11.7021, "step": 882 }, { "epoch": 0.018483630578581597, "grad_norm": 0.16254299879074097, "learning_rate": 0.00019998251169836887, "loss": 11.6953, "step": 883 }, { "epoch": 0.01850456334254375, "grad_norm": 0.14521032571792603, "learning_rate": 0.00019998247067130805, "loss": 11.7089, "step": 884 }, { "epoch": 0.018525496106505904, "grad_norm": 0.1163296326994896, "learning_rate": 0.00019998242959618363, "loss": 11.7006, "step": 885 }, { "epoch": 0.018546428870468055, "grad_norm": 0.1608823835849762, "learning_rate": 0.00019998238847299566, "loss": 11.7072, "step": 886 }, { "epoch": 0.01856736163443021, "grad_norm": 0.19494546949863434, "learning_rate": 0.0001999823473017442, "loss": 11.6905, "step": 887 }, { "epoch": 0.018588294398392362, "grad_norm": 0.1950160712003708, "learning_rate": 0.00019998230608242915, "loss": 11.6854, "step": 888 }, { "epoch": 0.018609227162354518, "grad_norm": 0.13033221662044525, "learning_rate": 0.00019998226481505066, "loss": 11.6876, "step": 889 }, { "epoch": 0.01863015992631667, "grad_norm": 0.15218666195869446, "learning_rate": 0.00019998222349960866, "loss": 11.7175, "step": 890 }, { "epoch": 0.018651092690278825, "grad_norm": 0.15342533588409424, "learning_rate": 0.00019998218213610323, "loss": 11.6849, "step": 891 }, { "epoch": 0.018672025454240977, "grad_norm": 0.15680034458637238, "learning_rate": 0.00019998214072453437, "loss": 11.7039, "step": 892 }, { "epoch": 0.018692958218203132, "grad_norm": 0.16289082169532776, "learning_rate": 0.00019998209926490207, "loss": 11.7057, "step": 893 }, { "epoch": 0.018713890982165284, "grad_norm": 0.16401173174381256, "learning_rate": 0.00019998205775720632, "loss": 11.7151, "step": 894 }, { "epoch": 0.01873482374612744, "grad_norm": 0.15543116629123688, "learning_rate": 0.00019998201620144727, "loss": 11.7113, "step": 895 }, { "epoch": 0.01875575651008959, "grad_norm": 0.14165939390659332, "learning_rate": 0.00019998197459762483, "loss": 11.6948, "step": 896 }, { "epoch": 0.018776689274051746, "grad_norm": 0.15494556725025177, "learning_rate": 0.00019998193294573904, "loss": 11.7007, "step": 897 }, { "epoch": 0.0187976220380139, "grad_norm": 0.21393045783042908, "learning_rate": 0.00019998189124578993, "loss": 11.7138, "step": 898 }, { "epoch": 0.018818554801976053, "grad_norm": 0.15385615825653076, "learning_rate": 0.00019998184949777755, "loss": 11.6902, "step": 899 }, { "epoch": 0.018839487565938208, "grad_norm": 0.2013304978609085, "learning_rate": 0.0001999818077017019, "loss": 11.698, "step": 900 }, { "epoch": 0.01886042032990036, "grad_norm": 0.16990555822849274, "learning_rate": 0.00019998176585756295, "loss": 11.691, "step": 901 }, { "epoch": 0.018881353093862515, "grad_norm": 0.1558927446603775, "learning_rate": 0.00019998172396536079, "loss": 11.6886, "step": 902 }, { "epoch": 0.018902285857824667, "grad_norm": 0.15963707864284515, "learning_rate": 0.0001999816820250954, "loss": 11.7199, "step": 903 }, { "epoch": 0.018923218621786822, "grad_norm": 0.1706528663635254, "learning_rate": 0.0001999816400367668, "loss": 11.7056, "step": 904 }, { "epoch": 0.018944151385748974, "grad_norm": 0.15211763978004456, "learning_rate": 0.00019998159800037503, "loss": 11.6906, "step": 905 }, { "epoch": 0.01896508414971113, "grad_norm": 0.13555821776390076, "learning_rate": 0.0001999815559159201, "loss": 11.6907, "step": 906 }, { "epoch": 0.01898601691367328, "grad_norm": 0.13949450850486755, "learning_rate": 0.00019998151378340203, "loss": 11.6987, "step": 907 }, { "epoch": 0.019006949677635436, "grad_norm": 0.15860077738761902, "learning_rate": 0.00019998147160282085, "loss": 11.6966, "step": 908 }, { "epoch": 0.019027882441597588, "grad_norm": 0.1747034341096878, "learning_rate": 0.00019998142937417658, "loss": 11.7, "step": 909 }, { "epoch": 0.019048815205559743, "grad_norm": 0.18448902666568756, "learning_rate": 0.00019998138709746924, "loss": 11.7057, "step": 910 }, { "epoch": 0.019069747969521895, "grad_norm": 0.1536865085363388, "learning_rate": 0.00019998134477269884, "loss": 11.7106, "step": 911 }, { "epoch": 0.01909068073348405, "grad_norm": 0.23219656944274902, "learning_rate": 0.00019998130239986538, "loss": 11.6893, "step": 912 }, { "epoch": 0.019111613497446202, "grad_norm": 0.1809292435646057, "learning_rate": 0.00019998125997896892, "loss": 11.7058, "step": 913 }, { "epoch": 0.019132546261408357, "grad_norm": 0.2039773166179657, "learning_rate": 0.00019998121751000947, "loss": 11.7013, "step": 914 }, { "epoch": 0.01915347902537051, "grad_norm": 0.1461554914712906, "learning_rate": 0.00019998117499298704, "loss": 11.6965, "step": 915 }, { "epoch": 0.019174411789332664, "grad_norm": 0.16975124180316925, "learning_rate": 0.00019998113242790163, "loss": 11.699, "step": 916 }, { "epoch": 0.019195344553294816, "grad_norm": 0.2013310194015503, "learning_rate": 0.00019998108981475335, "loss": 11.6993, "step": 917 }, { "epoch": 0.01921627731725697, "grad_norm": 0.1503031849861145, "learning_rate": 0.00019998104715354212, "loss": 11.6833, "step": 918 }, { "epoch": 0.019237210081219123, "grad_norm": 0.1922810822725296, "learning_rate": 0.00019998100444426796, "loss": 11.709, "step": 919 }, { "epoch": 0.019258142845181278, "grad_norm": 0.26118695735931396, "learning_rate": 0.00019998096168693098, "loss": 11.6986, "step": 920 }, { "epoch": 0.01927907560914343, "grad_norm": 0.17497487366199493, "learning_rate": 0.00019998091888153113, "loss": 11.6983, "step": 921 }, { "epoch": 0.019300008373105585, "grad_norm": 0.1844528764486313, "learning_rate": 0.00019998087602806847, "loss": 11.7147, "step": 922 }, { "epoch": 0.019320941137067737, "grad_norm": 0.17007096111774445, "learning_rate": 0.00019998083312654295, "loss": 11.692, "step": 923 }, { "epoch": 0.019341873901029892, "grad_norm": 0.17393669486045837, "learning_rate": 0.00019998079017695468, "loss": 11.6974, "step": 924 }, { "epoch": 0.019362806664992044, "grad_norm": 0.18683063983917236, "learning_rate": 0.0001999807471793036, "loss": 11.6923, "step": 925 }, { "epoch": 0.0193837394289542, "grad_norm": 0.15583468973636627, "learning_rate": 0.0001999807041335898, "loss": 11.6948, "step": 926 }, { "epoch": 0.019404672192916354, "grad_norm": 0.1963982880115509, "learning_rate": 0.0001999806610398133, "loss": 11.6975, "step": 927 }, { "epoch": 0.019425604956878506, "grad_norm": 0.15666286647319794, "learning_rate": 0.00019998061789797404, "loss": 11.6961, "step": 928 }, { "epoch": 0.01944653772084066, "grad_norm": 0.1347261220216751, "learning_rate": 0.0001999805747080721, "loss": 11.6962, "step": 929 }, { "epoch": 0.019467470484802813, "grad_norm": 0.267464280128479, "learning_rate": 0.00019998053147010753, "loss": 11.706, "step": 930 }, { "epoch": 0.01948840324876497, "grad_norm": 0.18038363754749298, "learning_rate": 0.0001999804881840803, "loss": 11.69, "step": 931 }, { "epoch": 0.01950933601272712, "grad_norm": 0.2019745111465454, "learning_rate": 0.00019998044484999047, "loss": 11.7027, "step": 932 }, { "epoch": 0.019530268776689275, "grad_norm": 0.16682034730911255, "learning_rate": 0.000199980401467838, "loss": 11.6992, "step": 933 }, { "epoch": 0.019551201540651427, "grad_norm": 0.1739402413368225, "learning_rate": 0.00019998035803762293, "loss": 11.7015, "step": 934 }, { "epoch": 0.019572134304613582, "grad_norm": 0.17859800159931183, "learning_rate": 0.00019998031455934533, "loss": 11.6962, "step": 935 }, { "epoch": 0.019593067068575734, "grad_norm": 0.1480196863412857, "learning_rate": 0.0001999802710330052, "loss": 11.7013, "step": 936 }, { "epoch": 0.01961399983253789, "grad_norm": 0.169717937707901, "learning_rate": 0.0001999802274586025, "loss": 11.7059, "step": 937 }, { "epoch": 0.01963493259650004, "grad_norm": 0.1998303085565567, "learning_rate": 0.00019998018383613736, "loss": 11.6925, "step": 938 }, { "epoch": 0.019655865360462196, "grad_norm": 0.1459021419286728, "learning_rate": 0.00019998014016560972, "loss": 11.6957, "step": 939 }, { "epoch": 0.019676798124424348, "grad_norm": 0.19998681545257568, "learning_rate": 0.0001999800964470196, "loss": 11.712, "step": 940 }, { "epoch": 0.019697730888386503, "grad_norm": 0.18273337185382843, "learning_rate": 0.00019998005268036706, "loss": 11.7053, "step": 941 }, { "epoch": 0.019718663652348655, "grad_norm": 0.2005223035812378, "learning_rate": 0.0001999800088656521, "loss": 11.6999, "step": 942 }, { "epoch": 0.01973959641631081, "grad_norm": 0.1749740093946457, "learning_rate": 0.00019997996500287476, "loss": 11.6998, "step": 943 }, { "epoch": 0.019760529180272962, "grad_norm": 0.18626663088798523, "learning_rate": 0.00019997992109203506, "loss": 11.7134, "step": 944 }, { "epoch": 0.019781461944235117, "grad_norm": 0.1926407516002655, "learning_rate": 0.00019997987713313298, "loss": 11.706, "step": 945 }, { "epoch": 0.01980239470819727, "grad_norm": 0.13833896815776825, "learning_rate": 0.0001999798331261686, "loss": 11.7013, "step": 946 }, { "epoch": 0.019823327472159424, "grad_norm": 0.2110389918088913, "learning_rate": 0.00019997978907114185, "loss": 11.6908, "step": 947 }, { "epoch": 0.019844260236121576, "grad_norm": 0.1771744042634964, "learning_rate": 0.00019997974496805288, "loss": 11.7132, "step": 948 }, { "epoch": 0.01986519300008373, "grad_norm": 0.15671727061271667, "learning_rate": 0.0001999797008169016, "loss": 11.709, "step": 949 }, { "epoch": 0.019886125764045883, "grad_norm": 0.15989014506340027, "learning_rate": 0.0001999796566176881, "loss": 11.7062, "step": 950 }, { "epoch": 0.01990705852800804, "grad_norm": 0.24345427751541138, "learning_rate": 0.00019997961237041236, "loss": 11.6998, "step": 951 }, { "epoch": 0.01992799129197019, "grad_norm": 0.2066100835800171, "learning_rate": 0.0001999795680750744, "loss": 11.7152, "step": 952 }, { "epoch": 0.019948924055932345, "grad_norm": 0.15039436519145966, "learning_rate": 0.0001999795237316743, "loss": 11.7002, "step": 953 }, { "epoch": 0.019969856819894497, "grad_norm": 0.19376160204410553, "learning_rate": 0.000199979479340212, "loss": 11.7038, "step": 954 }, { "epoch": 0.019990789583856652, "grad_norm": 0.1517266482114792, "learning_rate": 0.0001999794349006876, "loss": 11.6929, "step": 955 }, { "epoch": 0.020011722347818808, "grad_norm": 0.19444838166236877, "learning_rate": 0.00019997939041310105, "loss": 11.6934, "step": 956 }, { "epoch": 0.02003265511178096, "grad_norm": 0.23419980704784393, "learning_rate": 0.00019997934587745242, "loss": 11.7089, "step": 957 }, { "epoch": 0.020053587875743115, "grad_norm": 0.19042707979679108, "learning_rate": 0.0001999793012937417, "loss": 11.6962, "step": 958 }, { "epoch": 0.020074520639705266, "grad_norm": 0.15607891976833344, "learning_rate": 0.00019997925666196894, "loss": 11.6913, "step": 959 }, { "epoch": 0.02009545340366742, "grad_norm": 0.20929940044879913, "learning_rate": 0.00019997921198213416, "loss": 11.6983, "step": 960 }, { "epoch": 0.020116386167629573, "grad_norm": 0.19349117577075958, "learning_rate": 0.00019997916725423735, "loss": 11.7088, "step": 961 }, { "epoch": 0.02013731893159173, "grad_norm": 0.18409620225429535, "learning_rate": 0.00019997912247827855, "loss": 11.6992, "step": 962 }, { "epoch": 0.02015825169555388, "grad_norm": 0.18365547060966492, "learning_rate": 0.00019997907765425778, "loss": 11.7144, "step": 963 }, { "epoch": 0.020179184459516036, "grad_norm": 0.15799643099308014, "learning_rate": 0.00019997903278217506, "loss": 11.702, "step": 964 }, { "epoch": 0.020200117223478187, "grad_norm": 0.18564024567604065, "learning_rate": 0.00019997898786203045, "loss": 11.6946, "step": 965 }, { "epoch": 0.020221049987440343, "grad_norm": 0.19801867008209229, "learning_rate": 0.0001999789428938239, "loss": 11.7071, "step": 966 }, { "epoch": 0.020241982751402494, "grad_norm": 0.2023898959159851, "learning_rate": 0.00019997889787755547, "loss": 11.7023, "step": 967 }, { "epoch": 0.02026291551536465, "grad_norm": 0.1553599238395691, "learning_rate": 0.0001999788528132252, "loss": 11.6951, "step": 968 }, { "epoch": 0.0202838482793268, "grad_norm": 0.14382024109363556, "learning_rate": 0.00019997880770083307, "loss": 11.7003, "step": 969 }, { "epoch": 0.020304781043288957, "grad_norm": 0.18725532293319702, "learning_rate": 0.00019997876254037916, "loss": 11.7008, "step": 970 }, { "epoch": 0.02032571380725111, "grad_norm": 0.20322394371032715, "learning_rate": 0.0001999787173318634, "loss": 11.7231, "step": 971 }, { "epoch": 0.020346646571213264, "grad_norm": 0.1911594122648239, "learning_rate": 0.0001999786720752859, "loss": 11.6954, "step": 972 }, { "epoch": 0.020367579335175415, "grad_norm": 0.21205362677574158, "learning_rate": 0.00019997862677064666, "loss": 11.6946, "step": 973 }, { "epoch": 0.02038851209913757, "grad_norm": 0.19715462625026703, "learning_rate": 0.00019997858141794565, "loss": 11.6954, "step": 974 }, { "epoch": 0.020409444863099722, "grad_norm": 0.1788211166858673, "learning_rate": 0.000199978536017183, "loss": 11.6921, "step": 975 }, { "epoch": 0.020430377627061878, "grad_norm": 0.24851027131080627, "learning_rate": 0.0001999784905683586, "loss": 11.7073, "step": 976 }, { "epoch": 0.02045131039102403, "grad_norm": 0.1872161477804184, "learning_rate": 0.00019997844507147256, "loss": 11.7061, "step": 977 }, { "epoch": 0.020472243154986185, "grad_norm": 0.18256351351737976, "learning_rate": 0.0001999783995265249, "loss": 11.7002, "step": 978 }, { "epoch": 0.020493175918948336, "grad_norm": 0.17453864216804504, "learning_rate": 0.00019997835393351557, "loss": 11.6933, "step": 979 }, { "epoch": 0.02051410868291049, "grad_norm": 0.17748704552650452, "learning_rate": 0.00019997830829244468, "loss": 11.7056, "step": 980 }, { "epoch": 0.020535041446872643, "grad_norm": 0.13580776751041412, "learning_rate": 0.0001999782626033122, "loss": 11.6938, "step": 981 }, { "epoch": 0.0205559742108348, "grad_norm": 0.1767847090959549, "learning_rate": 0.00019997821686611818, "loss": 11.7002, "step": 982 }, { "epoch": 0.02057690697479695, "grad_norm": 0.2057088315486908, "learning_rate": 0.0001999781710808626, "loss": 11.6947, "step": 983 }, { "epoch": 0.020597839738759106, "grad_norm": 0.15431362390518188, "learning_rate": 0.00019997812524754553, "loss": 11.6942, "step": 984 }, { "epoch": 0.02061877250272126, "grad_norm": 0.20394116640090942, "learning_rate": 0.00019997807936616697, "loss": 11.6993, "step": 985 }, { "epoch": 0.020639705266683413, "grad_norm": 0.1713804304599762, "learning_rate": 0.00019997803343672695, "loss": 11.7059, "step": 986 }, { "epoch": 0.020660638030645568, "grad_norm": 0.17471183836460114, "learning_rate": 0.00019997798745922544, "loss": 11.7018, "step": 987 }, { "epoch": 0.02068157079460772, "grad_norm": 0.2009122222661972, "learning_rate": 0.00019997794143366258, "loss": 11.6901, "step": 988 }, { "epoch": 0.020702503558569875, "grad_norm": 0.19536864757537842, "learning_rate": 0.00019997789536003825, "loss": 11.6948, "step": 989 }, { "epoch": 0.020723436322532027, "grad_norm": 0.1828080415725708, "learning_rate": 0.0001999778492383526, "loss": 11.7048, "step": 990 }, { "epoch": 0.020744369086494182, "grad_norm": 0.15354710817337036, "learning_rate": 0.00019997780306860557, "loss": 11.7104, "step": 991 }, { "epoch": 0.020765301850456334, "grad_norm": 0.20696932077407837, "learning_rate": 0.00019997775685079718, "loss": 11.7158, "step": 992 }, { "epoch": 0.02078623461441849, "grad_norm": 0.16085761785507202, "learning_rate": 0.00019997771058492751, "loss": 11.6949, "step": 993 }, { "epoch": 0.02080716737838064, "grad_norm": 0.15260130167007446, "learning_rate": 0.00019997766427099655, "loss": 11.7063, "step": 994 }, { "epoch": 0.020828100142342796, "grad_norm": 0.2703154683113098, "learning_rate": 0.00019997761790900432, "loss": 11.6974, "step": 995 }, { "epoch": 0.020849032906304948, "grad_norm": 0.21753056347370148, "learning_rate": 0.00019997757149895086, "loss": 11.6999, "step": 996 }, { "epoch": 0.020869965670267103, "grad_norm": 0.19911636412143707, "learning_rate": 0.00019997752504083617, "loss": 11.7002, "step": 997 }, { "epoch": 0.020890898434229255, "grad_norm": 0.16457553207874298, "learning_rate": 0.00019997747853466025, "loss": 11.6836, "step": 998 }, { "epoch": 0.02091183119819141, "grad_norm": 0.2213161140680313, "learning_rate": 0.0001999774319804232, "loss": 11.6945, "step": 999 }, { "epoch": 0.020932763962153562, "grad_norm": 0.1550571322441101, "learning_rate": 0.00019997738537812495, "loss": 11.7028, "step": 1000 }, { "epoch": 0.020932763962153562, "eval_loss": 11.700238227844238, "eval_runtime": 34.35, "eval_samples_per_second": 27.977, "eval_steps_per_second": 7.016, "step": 1000 }, { "epoch": 0.020953696726115717, "grad_norm": 0.1847425252199173, "learning_rate": 0.00019997733872776562, "loss": 11.6939, "step": 1001 }, { "epoch": 0.02097462949007787, "grad_norm": 0.19353288412094116, "learning_rate": 0.00019997729202934512, "loss": 11.6753, "step": 1002 }, { "epoch": 0.020995562254040024, "grad_norm": 0.14577724039554596, "learning_rate": 0.00019997724528286357, "loss": 11.7014, "step": 1003 }, { "epoch": 0.021016495018002176, "grad_norm": 0.15795031189918518, "learning_rate": 0.00019997719848832094, "loss": 11.7014, "step": 1004 }, { "epoch": 0.02103742778196433, "grad_norm": 0.18189410865306854, "learning_rate": 0.00019997715164571728, "loss": 11.6975, "step": 1005 }, { "epoch": 0.021058360545926483, "grad_norm": 0.21598045527935028, "learning_rate": 0.0001999771047550526, "loss": 11.7076, "step": 1006 }, { "epoch": 0.021079293309888638, "grad_norm": 0.22664694488048553, "learning_rate": 0.00019997705781632687, "loss": 11.7024, "step": 1007 }, { "epoch": 0.02110022607385079, "grad_norm": 0.1941751390695572, "learning_rate": 0.00019997701082954023, "loss": 11.692, "step": 1008 }, { "epoch": 0.021121158837812945, "grad_norm": 0.18436592817306519, "learning_rate": 0.0001999769637946926, "loss": 11.6966, "step": 1009 }, { "epoch": 0.021142091601775097, "grad_norm": 0.15862447023391724, "learning_rate": 0.00019997691671178408, "loss": 11.7076, "step": 1010 }, { "epoch": 0.021163024365737252, "grad_norm": 0.17718182504177094, "learning_rate": 0.00019997686958081462, "loss": 11.6842, "step": 1011 }, { "epoch": 0.021183957129699404, "grad_norm": 0.17097558081150055, "learning_rate": 0.00019997682240178427, "loss": 11.7103, "step": 1012 }, { "epoch": 0.02120488989366156, "grad_norm": 0.1414138823747635, "learning_rate": 0.00019997677517469308, "loss": 11.6915, "step": 1013 }, { "epoch": 0.021225822657623714, "grad_norm": 0.15777809917926788, "learning_rate": 0.00019997672789954103, "loss": 11.7088, "step": 1014 }, { "epoch": 0.021246755421585866, "grad_norm": 0.1807602345943451, "learning_rate": 0.00019997668057632816, "loss": 11.7044, "step": 1015 }, { "epoch": 0.02126768818554802, "grad_norm": 0.167560875415802, "learning_rate": 0.00019997663320505454, "loss": 11.711, "step": 1016 }, { "epoch": 0.021288620949510173, "grad_norm": 0.1750788390636444, "learning_rate": 0.0001999765857857201, "loss": 11.6977, "step": 1017 }, { "epoch": 0.02130955371347233, "grad_norm": 0.20804347097873688, "learning_rate": 0.00019997653831832493, "loss": 11.7152, "step": 1018 }, { "epoch": 0.02133048647743448, "grad_norm": 0.22055847942829132, "learning_rate": 0.00019997649080286903, "loss": 11.7193, "step": 1019 }, { "epoch": 0.021351419241396635, "grad_norm": 0.1954394429922104, "learning_rate": 0.00019997644323935246, "loss": 11.6919, "step": 1020 }, { "epoch": 0.021372352005358787, "grad_norm": 0.23779913783073425, "learning_rate": 0.00019997639562777518, "loss": 11.706, "step": 1021 }, { "epoch": 0.021393284769320942, "grad_norm": 0.23851127922534943, "learning_rate": 0.00019997634796813723, "loss": 11.7055, "step": 1022 }, { "epoch": 0.021414217533283094, "grad_norm": 0.18577134609222412, "learning_rate": 0.00019997630026043868, "loss": 11.6973, "step": 1023 }, { "epoch": 0.02143515029724525, "grad_norm": 0.15301842987537384, "learning_rate": 0.0001999762525046795, "loss": 11.6854, "step": 1024 }, { "epoch": 0.0214560830612074, "grad_norm": 0.17495673894882202, "learning_rate": 0.0001999762047008597, "loss": 11.7168, "step": 1025 }, { "epoch": 0.021477015825169556, "grad_norm": 0.19405409693717957, "learning_rate": 0.0001999761568489794, "loss": 11.7061, "step": 1026 }, { "epoch": 0.021497948589131708, "grad_norm": 0.16130000352859497, "learning_rate": 0.00019997610894903852, "loss": 11.6886, "step": 1027 }, { "epoch": 0.021518881353093863, "grad_norm": 0.23663005232810974, "learning_rate": 0.0001999760610010371, "loss": 11.6823, "step": 1028 }, { "epoch": 0.021539814117056015, "grad_norm": 0.20157739520072937, "learning_rate": 0.00019997601300497523, "loss": 11.7137, "step": 1029 }, { "epoch": 0.02156074688101817, "grad_norm": 0.20285652577877045, "learning_rate": 0.00019997596496085286, "loss": 11.7005, "step": 1030 }, { "epoch": 0.021581679644980322, "grad_norm": 0.16280870139598846, "learning_rate": 0.00019997591686867007, "loss": 11.7062, "step": 1031 }, { "epoch": 0.021602612408942477, "grad_norm": 0.1936010867357254, "learning_rate": 0.0001999758687284268, "loss": 11.6916, "step": 1032 }, { "epoch": 0.02162354517290463, "grad_norm": 0.18533635139465332, "learning_rate": 0.00019997582054012317, "loss": 11.697, "step": 1033 }, { "epoch": 0.021644477936866784, "grad_norm": 0.14579841494560242, "learning_rate": 0.00019997577230375913, "loss": 11.705, "step": 1034 }, { "epoch": 0.021665410700828936, "grad_norm": 0.18790528178215027, "learning_rate": 0.00019997572401933477, "loss": 11.6899, "step": 1035 }, { "epoch": 0.02168634346479109, "grad_norm": 0.15071547031402588, "learning_rate": 0.00019997567568685005, "loss": 11.708, "step": 1036 }, { "epoch": 0.021707276228753243, "grad_norm": 0.17357146739959717, "learning_rate": 0.00019997562730630504, "loss": 11.69, "step": 1037 }, { "epoch": 0.0217282089927154, "grad_norm": 0.2547093629837036, "learning_rate": 0.0001999755788776997, "loss": 11.7093, "step": 1038 }, { "epoch": 0.02174914175667755, "grad_norm": 0.16617847979068756, "learning_rate": 0.00019997553040103415, "loss": 11.6959, "step": 1039 }, { "epoch": 0.021770074520639705, "grad_norm": 0.19802433252334595, "learning_rate": 0.00019997548187630833, "loss": 11.6854, "step": 1040 }, { "epoch": 0.021791007284601857, "grad_norm": 0.20531640946865082, "learning_rate": 0.0001999754333035223, "loss": 11.7052, "step": 1041 }, { "epoch": 0.021811940048564012, "grad_norm": 0.1753583401441574, "learning_rate": 0.00019997538468267604, "loss": 11.6949, "step": 1042 }, { "epoch": 0.021832872812526168, "grad_norm": 0.16503417491912842, "learning_rate": 0.00019997533601376965, "loss": 11.7042, "step": 1043 }, { "epoch": 0.02185380557648832, "grad_norm": 0.17633679509162903, "learning_rate": 0.0001999752872968031, "loss": 11.6847, "step": 1044 }, { "epoch": 0.021874738340450475, "grad_norm": 0.17018334567546844, "learning_rate": 0.00019997523853177644, "loss": 11.7087, "step": 1045 }, { "epoch": 0.021895671104412626, "grad_norm": 0.2030055969953537, "learning_rate": 0.00019997518971868967, "loss": 11.6884, "step": 1046 }, { "epoch": 0.02191660386837478, "grad_norm": 0.18922306597232819, "learning_rate": 0.0001999751408575428, "loss": 11.7052, "step": 1047 }, { "epoch": 0.021937536632336933, "grad_norm": 0.23137404024600983, "learning_rate": 0.0001999750919483359, "loss": 11.6979, "step": 1048 }, { "epoch": 0.02195846939629909, "grad_norm": 0.18254490196704865, "learning_rate": 0.00019997504299106898, "loss": 11.6872, "step": 1049 }, { "epoch": 0.02197940216026124, "grad_norm": 0.1641779989004135, "learning_rate": 0.00019997499398574206, "loss": 11.7008, "step": 1050 }, { "epoch": 0.022000334924223396, "grad_norm": 0.18786939978599548, "learning_rate": 0.00019997494493235508, "loss": 11.6899, "step": 1051 }, { "epoch": 0.022021267688185547, "grad_norm": 0.15974467992782593, "learning_rate": 0.0001999748958309082, "loss": 11.6893, "step": 1052 }, { "epoch": 0.022042200452147703, "grad_norm": 0.16774313151836395, "learning_rate": 0.0001999748466814014, "loss": 11.7004, "step": 1053 }, { "epoch": 0.022063133216109854, "grad_norm": 0.16744175553321838, "learning_rate": 0.00019997479748383467, "loss": 11.7109, "step": 1054 }, { "epoch": 0.02208406598007201, "grad_norm": 0.21853011846542358, "learning_rate": 0.00019997474823820803, "loss": 11.6966, "step": 1055 }, { "epoch": 0.02210499874403416, "grad_norm": 0.1863621175289154, "learning_rate": 0.00019997469894452157, "loss": 11.697, "step": 1056 }, { "epoch": 0.022125931507996317, "grad_norm": 0.16950352489948273, "learning_rate": 0.00019997464960277522, "loss": 11.6817, "step": 1057 }, { "epoch": 0.02214686427195847, "grad_norm": 0.17900004982948303, "learning_rate": 0.00019997460021296907, "loss": 11.6981, "step": 1058 }, { "epoch": 0.022167797035920624, "grad_norm": 0.20174522697925568, "learning_rate": 0.00019997455077510313, "loss": 11.6905, "step": 1059 }, { "epoch": 0.022188729799882775, "grad_norm": 0.16731804609298706, "learning_rate": 0.00019997450128917744, "loss": 11.689, "step": 1060 }, { "epoch": 0.02220966256384493, "grad_norm": 0.1580362468957901, "learning_rate": 0.000199974451755192, "loss": 11.7097, "step": 1061 }, { "epoch": 0.022230595327807082, "grad_norm": 0.2182919979095459, "learning_rate": 0.00019997440217314682, "loss": 11.7029, "step": 1062 }, { "epoch": 0.022251528091769238, "grad_norm": 0.24874500930309296, "learning_rate": 0.00019997435254304194, "loss": 11.7164, "step": 1063 }, { "epoch": 0.02227246085573139, "grad_norm": 0.1778092086315155, "learning_rate": 0.00019997430286487736, "loss": 11.6916, "step": 1064 }, { "epoch": 0.022293393619693545, "grad_norm": 0.17992615699768066, "learning_rate": 0.00019997425313865317, "loss": 11.7072, "step": 1065 }, { "epoch": 0.022314326383655696, "grad_norm": 0.19874025881290436, "learning_rate": 0.00019997420336436935, "loss": 11.697, "step": 1066 }, { "epoch": 0.02233525914761785, "grad_norm": 0.22520942986011505, "learning_rate": 0.00019997415354202592, "loss": 11.7055, "step": 1067 }, { "epoch": 0.022356191911580003, "grad_norm": 0.22192923724651337, "learning_rate": 0.00019997410367162292, "loss": 11.6898, "step": 1068 }, { "epoch": 0.02237712467554216, "grad_norm": 0.19710032641887665, "learning_rate": 0.00019997405375316035, "loss": 11.7108, "step": 1069 }, { "epoch": 0.022398057439504314, "grad_norm": 0.16029557585716248, "learning_rate": 0.00019997400378663825, "loss": 11.6951, "step": 1070 }, { "epoch": 0.022418990203466466, "grad_norm": 0.20297512412071228, "learning_rate": 0.00019997395377205663, "loss": 11.6972, "step": 1071 }, { "epoch": 0.02243992296742862, "grad_norm": 0.14963079988956451, "learning_rate": 0.00019997390370941553, "loss": 11.694, "step": 1072 }, { "epoch": 0.022460855731390773, "grad_norm": 0.16788026690483093, "learning_rate": 0.000199973853598715, "loss": 11.7006, "step": 1073 }, { "epoch": 0.022481788495352928, "grad_norm": 0.16980008780956268, "learning_rate": 0.00019997380343995502, "loss": 11.7022, "step": 1074 }, { "epoch": 0.02250272125931508, "grad_norm": 0.17561820149421692, "learning_rate": 0.0001999737532331356, "loss": 11.7017, "step": 1075 }, { "epoch": 0.022523654023277235, "grad_norm": 0.2373284101486206, "learning_rate": 0.00019997370297825687, "loss": 11.7039, "step": 1076 }, { "epoch": 0.022544586787239387, "grad_norm": 0.149038165807724, "learning_rate": 0.0001999736526753187, "loss": 11.6996, "step": 1077 }, { "epoch": 0.022565519551201542, "grad_norm": 0.19479840993881226, "learning_rate": 0.00019997360232432124, "loss": 11.6839, "step": 1078 }, { "epoch": 0.022586452315163694, "grad_norm": 0.1674925535917282, "learning_rate": 0.00019997355192526443, "loss": 11.6905, "step": 1079 }, { "epoch": 0.02260738507912585, "grad_norm": 0.22119998931884766, "learning_rate": 0.00019997350147814836, "loss": 11.6948, "step": 1080 }, { "epoch": 0.022628317843088, "grad_norm": 0.1942969262599945, "learning_rate": 0.000199973450982973, "loss": 11.6997, "step": 1081 }, { "epoch": 0.022649250607050156, "grad_norm": 0.2108752578496933, "learning_rate": 0.0001999734004397384, "loss": 11.7053, "step": 1082 }, { "epoch": 0.022670183371012308, "grad_norm": 0.14080247282981873, "learning_rate": 0.00019997334984844458, "loss": 11.6953, "step": 1083 }, { "epoch": 0.022691116134974463, "grad_norm": 0.2101058065891266, "learning_rate": 0.0001999732992090916, "loss": 11.6922, "step": 1084 }, { "epoch": 0.022712048898936615, "grad_norm": 0.24466846883296967, "learning_rate": 0.00019997324852167944, "loss": 11.6903, "step": 1085 }, { "epoch": 0.02273298166289877, "grad_norm": 0.21341440081596375, "learning_rate": 0.0001999731977862081, "loss": 11.6986, "step": 1086 }, { "epoch": 0.02275391442686092, "grad_norm": 0.21377091109752655, "learning_rate": 0.00019997314700267768, "loss": 11.6974, "step": 1087 }, { "epoch": 0.022774847190823077, "grad_norm": 0.181711807847023, "learning_rate": 0.00019997309617108815, "loss": 11.6952, "step": 1088 }, { "epoch": 0.02279577995478523, "grad_norm": 0.19817927479743958, "learning_rate": 0.00019997304529143954, "loss": 11.708, "step": 1089 }, { "epoch": 0.022816712718747384, "grad_norm": 0.17127831280231476, "learning_rate": 0.0001999729943637319, "loss": 11.6939, "step": 1090 }, { "epoch": 0.022837645482709536, "grad_norm": 0.20509803295135498, "learning_rate": 0.00019997294338796524, "loss": 11.6866, "step": 1091 }, { "epoch": 0.02285857824667169, "grad_norm": 0.18517731130123138, "learning_rate": 0.00019997289236413956, "loss": 11.6938, "step": 1092 }, { "epoch": 0.022879511010633843, "grad_norm": 0.2178380936384201, "learning_rate": 0.00019997284129225494, "loss": 11.6974, "step": 1093 }, { "epoch": 0.022900443774595998, "grad_norm": 0.3145143389701843, "learning_rate": 0.00019997279017231137, "loss": 11.7033, "step": 1094 }, { "epoch": 0.02292137653855815, "grad_norm": 0.17865079641342163, "learning_rate": 0.00019997273900430884, "loss": 11.6916, "step": 1095 }, { "epoch": 0.022942309302520305, "grad_norm": 0.22002850472927094, "learning_rate": 0.00019997268778824742, "loss": 11.7078, "step": 1096 }, { "epoch": 0.022963242066482457, "grad_norm": 0.1587982475757599, "learning_rate": 0.00019997263652412714, "loss": 11.7065, "step": 1097 }, { "epoch": 0.022984174830444612, "grad_norm": 0.177576944231987, "learning_rate": 0.00019997258521194802, "loss": 11.698, "step": 1098 }, { "epoch": 0.023005107594406767, "grad_norm": 0.20049042999744415, "learning_rate": 0.00019997253385171005, "loss": 11.7116, "step": 1099 }, { "epoch": 0.02302604035836892, "grad_norm": 0.240153506398201, "learning_rate": 0.00019997248244341332, "loss": 11.7039, "step": 1100 }, { "epoch": 0.023046973122331074, "grad_norm": 0.16244842112064362, "learning_rate": 0.00019997243098705776, "loss": 11.6932, "step": 1101 }, { "epoch": 0.023067905886293226, "grad_norm": 0.1575298309326172, "learning_rate": 0.0001999723794826435, "loss": 11.6872, "step": 1102 }, { "epoch": 0.02308883865025538, "grad_norm": 0.17444388568401337, "learning_rate": 0.00019997232793017046, "loss": 11.6898, "step": 1103 }, { "epoch": 0.023109771414217533, "grad_norm": 0.24566158652305603, "learning_rate": 0.00019997227632963875, "loss": 11.7026, "step": 1104 }, { "epoch": 0.023130704178179688, "grad_norm": 0.1963285207748413, "learning_rate": 0.00019997222468104835, "loss": 11.6968, "step": 1105 }, { "epoch": 0.02315163694214184, "grad_norm": 0.20787453651428223, "learning_rate": 0.00019997217298439932, "loss": 11.7139, "step": 1106 }, { "epoch": 0.023172569706103995, "grad_norm": 0.16847942769527435, "learning_rate": 0.00019997212123969166, "loss": 11.691, "step": 1107 }, { "epoch": 0.023193502470066147, "grad_norm": 0.171690434217453, "learning_rate": 0.00019997206944692537, "loss": 11.7038, "step": 1108 }, { "epoch": 0.023214435234028302, "grad_norm": 0.1954004317522049, "learning_rate": 0.00019997201760610053, "loss": 11.6835, "step": 1109 }, { "epoch": 0.023235367997990454, "grad_norm": 0.1941322535276413, "learning_rate": 0.00019997196571721712, "loss": 11.6999, "step": 1110 }, { "epoch": 0.02325630076195261, "grad_norm": 0.14905127882957458, "learning_rate": 0.0001999719137802752, "loss": 11.6953, "step": 1111 }, { "epoch": 0.02327723352591476, "grad_norm": 0.16230347752571106, "learning_rate": 0.00019997186179527478, "loss": 11.6858, "step": 1112 }, { "epoch": 0.023298166289876916, "grad_norm": 0.16935808956623077, "learning_rate": 0.00019997180976221585, "loss": 11.7025, "step": 1113 }, { "epoch": 0.023319099053839068, "grad_norm": 0.1914507895708084, "learning_rate": 0.0001999717576810985, "loss": 11.7027, "step": 1114 }, { "epoch": 0.023340031817801223, "grad_norm": 0.22039154171943665, "learning_rate": 0.0001999717055519227, "loss": 11.7082, "step": 1115 }, { "epoch": 0.023360964581763375, "grad_norm": 0.20121058821678162, "learning_rate": 0.00019997165337468854, "loss": 11.7071, "step": 1116 }, { "epoch": 0.02338189734572553, "grad_norm": 0.2671426236629486, "learning_rate": 0.00019997160114939596, "loss": 11.7144, "step": 1117 }, { "epoch": 0.023402830109687682, "grad_norm": 0.21498633921146393, "learning_rate": 0.00019997154887604502, "loss": 11.7083, "step": 1118 }, { "epoch": 0.023423762873649837, "grad_norm": 0.16016358137130737, "learning_rate": 0.00019997149655463577, "loss": 11.7135, "step": 1119 }, { "epoch": 0.02344469563761199, "grad_norm": 0.20323394238948822, "learning_rate": 0.00019997144418516823, "loss": 11.6951, "step": 1120 }, { "epoch": 0.023465628401574144, "grad_norm": 0.18430167436599731, "learning_rate": 0.0001999713917676424, "loss": 11.7071, "step": 1121 }, { "epoch": 0.023486561165536296, "grad_norm": 0.16897861659526825, "learning_rate": 0.00019997133930205833, "loss": 11.7054, "step": 1122 }, { "epoch": 0.02350749392949845, "grad_norm": 0.1812838912010193, "learning_rate": 0.00019997128678841603, "loss": 11.6967, "step": 1123 }, { "epoch": 0.023528426693460603, "grad_norm": 0.21950113773345947, "learning_rate": 0.00019997123422671554, "loss": 11.6969, "step": 1124 }, { "epoch": 0.02354935945742276, "grad_norm": 0.1918695867061615, "learning_rate": 0.00019997118161695684, "loss": 11.6945, "step": 1125 }, { "epoch": 0.02357029222138491, "grad_norm": 0.16065643727779388, "learning_rate": 0.00019997112895914004, "loss": 11.6945, "step": 1126 }, { "epoch": 0.023591224985347065, "grad_norm": 0.14797846972942352, "learning_rate": 0.00019997107625326508, "loss": 11.7028, "step": 1127 }, { "epoch": 0.02361215774930922, "grad_norm": 0.1935836672782898, "learning_rate": 0.00019997102349933202, "loss": 11.7193, "step": 1128 }, { "epoch": 0.023633090513271372, "grad_norm": 0.18433146178722382, "learning_rate": 0.0001999709706973409, "loss": 11.6973, "step": 1129 }, { "epoch": 0.023654023277233528, "grad_norm": 0.17831404507160187, "learning_rate": 0.0001999709178472917, "loss": 11.6957, "step": 1130 }, { "epoch": 0.02367495604119568, "grad_norm": 0.2022370994091034, "learning_rate": 0.00019997086494918449, "loss": 11.6907, "step": 1131 }, { "epoch": 0.023695888805157835, "grad_norm": 0.16922712326049805, "learning_rate": 0.0001999708120030193, "loss": 11.6901, "step": 1132 }, { "epoch": 0.023716821569119986, "grad_norm": 0.2557711601257324, "learning_rate": 0.0001999707590087961, "loss": 11.693, "step": 1133 }, { "epoch": 0.02373775433308214, "grad_norm": 0.21932435035705566, "learning_rate": 0.000199970705966515, "loss": 11.6933, "step": 1134 }, { "epoch": 0.023758687097044293, "grad_norm": 0.15976637601852417, "learning_rate": 0.00019997065287617594, "loss": 11.6919, "step": 1135 }, { "epoch": 0.02377961986100645, "grad_norm": 0.16138583421707153, "learning_rate": 0.00019997059973777902, "loss": 11.6893, "step": 1136 }, { "epoch": 0.0238005526249686, "grad_norm": 0.24211587011814117, "learning_rate": 0.00019997054655132424, "loss": 11.6896, "step": 1137 }, { "epoch": 0.023821485388930756, "grad_norm": 0.232025608420372, "learning_rate": 0.00019997049331681158, "loss": 11.701, "step": 1138 }, { "epoch": 0.023842418152892907, "grad_norm": 0.20010793209075928, "learning_rate": 0.00019997044003424108, "loss": 11.7092, "step": 1139 }, { "epoch": 0.023863350916855063, "grad_norm": 0.2207615077495575, "learning_rate": 0.00019997038670361281, "loss": 11.712, "step": 1140 }, { "epoch": 0.023884283680817214, "grad_norm": 0.19864033162593842, "learning_rate": 0.0001999703333249268, "loss": 11.7005, "step": 1141 }, { "epoch": 0.02390521644477937, "grad_norm": 0.20690365135669708, "learning_rate": 0.00019997027989818303, "loss": 11.6938, "step": 1142 }, { "epoch": 0.02392614920874152, "grad_norm": 0.1447480469942093, "learning_rate": 0.0001999702264233815, "loss": 11.697, "step": 1143 }, { "epoch": 0.023947081972703677, "grad_norm": 0.18564239144325256, "learning_rate": 0.00019997017290052235, "loss": 11.6833, "step": 1144 }, { "epoch": 0.02396801473666583, "grad_norm": 0.1427335888147354, "learning_rate": 0.00019997011932960547, "loss": 11.6978, "step": 1145 }, { "epoch": 0.023988947500627984, "grad_norm": 0.16855014860630035, "learning_rate": 0.00019997006571063097, "loss": 11.7002, "step": 1146 }, { "epoch": 0.024009880264590135, "grad_norm": 0.174174964427948, "learning_rate": 0.00019997001204359886, "loss": 11.6948, "step": 1147 }, { "epoch": 0.02403081302855229, "grad_norm": 0.2388385534286499, "learning_rate": 0.00019996995832850917, "loss": 11.7193, "step": 1148 }, { "epoch": 0.024051745792514442, "grad_norm": 0.19885513186454773, "learning_rate": 0.00019996990456536194, "loss": 11.7132, "step": 1149 }, { "epoch": 0.024072678556476598, "grad_norm": 0.16261599957942963, "learning_rate": 0.00019996985075415713, "loss": 11.687, "step": 1150 }, { "epoch": 0.02409361132043875, "grad_norm": 0.18326467275619507, "learning_rate": 0.0001999697968948948, "loss": 11.7031, "step": 1151 }, { "epoch": 0.024114544084400905, "grad_norm": 0.17073954641819, "learning_rate": 0.00019996974298757504, "loss": 11.7045, "step": 1152 }, { "epoch": 0.024135476848363056, "grad_norm": 0.20432952046394348, "learning_rate": 0.0001999696890321978, "loss": 11.6876, "step": 1153 }, { "epoch": 0.02415640961232521, "grad_norm": 0.19262272119522095, "learning_rate": 0.00019996963502876312, "loss": 11.7, "step": 1154 }, { "epoch": 0.024177342376287363, "grad_norm": 0.16175657510757446, "learning_rate": 0.00019996958097727102, "loss": 11.683, "step": 1155 }, { "epoch": 0.02419827514024952, "grad_norm": 0.1789063662290573, "learning_rate": 0.00019996952687772156, "loss": 11.699, "step": 1156 }, { "epoch": 0.024219207904211674, "grad_norm": 0.2125629335641861, "learning_rate": 0.00019996947273011472, "loss": 11.7224, "step": 1157 }, { "epoch": 0.024240140668173826, "grad_norm": 0.17360547184944153, "learning_rate": 0.00019996941853445057, "loss": 11.7004, "step": 1158 }, { "epoch": 0.02426107343213598, "grad_norm": 0.17319616675376892, "learning_rate": 0.00019996936429072913, "loss": 11.7, "step": 1159 }, { "epoch": 0.024282006196098133, "grad_norm": 0.21006879210472107, "learning_rate": 0.00019996930999895037, "loss": 11.6915, "step": 1160 }, { "epoch": 0.024302938960060288, "grad_norm": 0.1792927086353302, "learning_rate": 0.00019996925565911439, "loss": 11.7032, "step": 1161 }, { "epoch": 0.02432387172402244, "grad_norm": 0.2094022035598755, "learning_rate": 0.0001999692012712212, "loss": 11.6984, "step": 1162 }, { "epoch": 0.024344804487984595, "grad_norm": 0.20072242617607117, "learning_rate": 0.00019996914683527079, "loss": 11.7008, "step": 1163 }, { "epoch": 0.024365737251946747, "grad_norm": 0.17084772884845734, "learning_rate": 0.0001999690923512632, "loss": 11.6948, "step": 1164 }, { "epoch": 0.024386670015908902, "grad_norm": 0.20122705399990082, "learning_rate": 0.0001999690378191985, "loss": 11.7001, "step": 1165 }, { "epoch": 0.024407602779871054, "grad_norm": 0.19356320798397064, "learning_rate": 0.00019996898323907666, "loss": 11.6983, "step": 1166 }, { "epoch": 0.02442853554383321, "grad_norm": 0.1641995906829834, "learning_rate": 0.0001999689286108977, "loss": 11.697, "step": 1167 }, { "epoch": 0.02444946830779536, "grad_norm": 0.18804945051670074, "learning_rate": 0.0001999688739346617, "loss": 11.7044, "step": 1168 }, { "epoch": 0.024470401071757516, "grad_norm": 0.16214299201965332, "learning_rate": 0.00019996881921036864, "loss": 11.6895, "step": 1169 }, { "epoch": 0.024491333835719668, "grad_norm": 0.18204395473003387, "learning_rate": 0.0001999687644380186, "loss": 11.7059, "step": 1170 }, { "epoch": 0.024512266599681823, "grad_norm": 0.19600464403629303, "learning_rate": 0.00019996870961761155, "loss": 11.6923, "step": 1171 }, { "epoch": 0.024533199363643975, "grad_norm": 0.22878287732601166, "learning_rate": 0.00019996865474914753, "loss": 11.7102, "step": 1172 }, { "epoch": 0.02455413212760613, "grad_norm": 0.16591079533100128, "learning_rate": 0.00019996859983262657, "loss": 11.6981, "step": 1173 }, { "epoch": 0.02457506489156828, "grad_norm": 0.18505047261714935, "learning_rate": 0.00019996854486804873, "loss": 11.6846, "step": 1174 }, { "epoch": 0.024595997655530437, "grad_norm": 0.22090743482112885, "learning_rate": 0.00019996848985541398, "loss": 11.7015, "step": 1175 }, { "epoch": 0.02461693041949259, "grad_norm": 0.1707501858472824, "learning_rate": 0.00019996843479472238, "loss": 11.7026, "step": 1176 }, { "epoch": 0.024637863183454744, "grad_norm": 0.2113986313343048, "learning_rate": 0.00019996837968597398, "loss": 11.7005, "step": 1177 }, { "epoch": 0.024658795947416896, "grad_norm": 0.17155861854553223, "learning_rate": 0.00019996832452916872, "loss": 11.6836, "step": 1178 }, { "epoch": 0.02467972871137905, "grad_norm": 0.17677387595176697, "learning_rate": 0.00019996826932430672, "loss": 11.6984, "step": 1179 }, { "epoch": 0.024700661475341203, "grad_norm": 0.17593799531459808, "learning_rate": 0.00019996821407138797, "loss": 11.7089, "step": 1180 }, { "epoch": 0.024721594239303358, "grad_norm": 0.17323938012123108, "learning_rate": 0.00019996815877041247, "loss": 11.698, "step": 1181 }, { "epoch": 0.02474252700326551, "grad_norm": 0.18405568599700928, "learning_rate": 0.0001999681034213803, "loss": 11.7103, "step": 1182 }, { "epoch": 0.024763459767227665, "grad_norm": 0.18859431147575378, "learning_rate": 0.00019996804802429146, "loss": 11.7065, "step": 1183 }, { "epoch": 0.024784392531189817, "grad_norm": 0.21949560940265656, "learning_rate": 0.00019996799257914597, "loss": 11.6968, "step": 1184 }, { "epoch": 0.024805325295151972, "grad_norm": 0.2050994634628296, "learning_rate": 0.00019996793708594384, "loss": 11.705, "step": 1185 }, { "epoch": 0.024826258059114127, "grad_norm": 0.17869950830936432, "learning_rate": 0.00019996788154468515, "loss": 11.6956, "step": 1186 }, { "epoch": 0.02484719082307628, "grad_norm": 0.21866777539253235, "learning_rate": 0.00019996782595536989, "loss": 11.6868, "step": 1187 }, { "epoch": 0.024868123587038434, "grad_norm": 0.1932854801416397, "learning_rate": 0.00019996777031799806, "loss": 11.6966, "step": 1188 }, { "epoch": 0.024889056351000586, "grad_norm": 0.21253670752048492, "learning_rate": 0.00019996771463256976, "loss": 11.6964, "step": 1189 }, { "epoch": 0.02490998911496274, "grad_norm": 0.1708359569311142, "learning_rate": 0.00019996765889908496, "loss": 11.6954, "step": 1190 }, { "epoch": 0.024930921878924893, "grad_norm": 0.17852917313575745, "learning_rate": 0.0001999676031175437, "loss": 11.7001, "step": 1191 }, { "epoch": 0.024951854642887048, "grad_norm": 0.17546957731246948, "learning_rate": 0.00019996754728794603, "loss": 11.6916, "step": 1192 }, { "epoch": 0.0249727874068492, "grad_norm": 0.15599198639392853, "learning_rate": 0.00019996749141029194, "loss": 11.6944, "step": 1193 }, { "epoch": 0.024993720170811355, "grad_norm": 0.17726938426494598, "learning_rate": 0.00019996743548458148, "loss": 11.6887, "step": 1194 }, { "epoch": 0.025014652934773507, "grad_norm": 0.16118651628494263, "learning_rate": 0.00019996737951081465, "loss": 11.6985, "step": 1195 }, { "epoch": 0.025035585698735662, "grad_norm": 0.15739260613918304, "learning_rate": 0.0001999673234889915, "loss": 11.6873, "step": 1196 }, { "epoch": 0.025056518462697814, "grad_norm": 0.16334125399589539, "learning_rate": 0.0001999672674191121, "loss": 11.6901, "step": 1197 }, { "epoch": 0.02507745122665997, "grad_norm": 0.19842614233493805, "learning_rate": 0.0001999672113011764, "loss": 11.705, "step": 1198 }, { "epoch": 0.02509838399062212, "grad_norm": 0.18797922134399414, "learning_rate": 0.00019996715513518443, "loss": 11.6924, "step": 1199 }, { "epoch": 0.025119316754584276, "grad_norm": 0.17608796060085297, "learning_rate": 0.0001999670989211363, "loss": 11.7086, "step": 1200 }, { "epoch": 0.025140249518546428, "grad_norm": 0.20391087234020233, "learning_rate": 0.00019996704265903194, "loss": 11.7074, "step": 1201 }, { "epoch": 0.025161182282508583, "grad_norm": 0.14455842971801758, "learning_rate": 0.00019996698634887144, "loss": 11.6965, "step": 1202 }, { "epoch": 0.025182115046470735, "grad_norm": 0.1934814602136612, "learning_rate": 0.00019996692999065481, "loss": 11.7093, "step": 1203 }, { "epoch": 0.02520304781043289, "grad_norm": 0.15760773420333862, "learning_rate": 0.00019996687358438204, "loss": 11.6988, "step": 1204 }, { "epoch": 0.025223980574395042, "grad_norm": 0.19226527214050293, "learning_rate": 0.00019996681713005323, "loss": 11.6938, "step": 1205 }, { "epoch": 0.025244913338357197, "grad_norm": 0.17513759434223175, "learning_rate": 0.00019996676062766834, "loss": 11.6953, "step": 1206 }, { "epoch": 0.02526584610231935, "grad_norm": 0.15389826893806458, "learning_rate": 0.00019996670407722744, "loss": 11.6831, "step": 1207 }, { "epoch": 0.025286778866281504, "grad_norm": 0.2091352492570877, "learning_rate": 0.00019996664747873056, "loss": 11.7035, "step": 1208 }, { "epoch": 0.025307711630243656, "grad_norm": 0.18334104120731354, "learning_rate": 0.0001999665908321777, "loss": 11.6991, "step": 1209 }, { "epoch": 0.02532864439420581, "grad_norm": 0.16134686768054962, "learning_rate": 0.00019996653413756885, "loss": 11.6934, "step": 1210 }, { "epoch": 0.025349577158167963, "grad_norm": 0.18809078633785248, "learning_rate": 0.00019996647739490411, "loss": 11.6986, "step": 1211 }, { "epoch": 0.025370509922130118, "grad_norm": 0.18799595534801483, "learning_rate": 0.0001999664206041835, "loss": 11.6998, "step": 1212 }, { "epoch": 0.02539144268609227, "grad_norm": 0.17006881535053253, "learning_rate": 0.000199966363765407, "loss": 11.7006, "step": 1213 }, { "epoch": 0.025412375450054425, "grad_norm": 0.17001774907112122, "learning_rate": 0.0001999663068785747, "loss": 11.7058, "step": 1214 }, { "epoch": 0.02543330821401658, "grad_norm": 0.21449489891529083, "learning_rate": 0.00019996624994368656, "loss": 11.6996, "step": 1215 }, { "epoch": 0.025454240977978732, "grad_norm": 0.1504162847995758, "learning_rate": 0.00019996619296074267, "loss": 11.6992, "step": 1216 }, { "epoch": 0.025475173741940887, "grad_norm": 0.19606773555278778, "learning_rate": 0.00019996613592974304, "loss": 11.6986, "step": 1217 }, { "epoch": 0.02549610650590304, "grad_norm": 0.18913662433624268, "learning_rate": 0.00019996607885068763, "loss": 11.7174, "step": 1218 }, { "epoch": 0.025517039269865194, "grad_norm": 0.20175834000110626, "learning_rate": 0.00019996602172357654, "loss": 11.7019, "step": 1219 }, { "epoch": 0.025537972033827346, "grad_norm": 0.22518137097358704, "learning_rate": 0.0001999659645484098, "loss": 11.7101, "step": 1220 }, { "epoch": 0.0255589047977895, "grad_norm": 0.19221939146518707, "learning_rate": 0.0001999659073251874, "loss": 11.7121, "step": 1221 }, { "epoch": 0.025579837561751653, "grad_norm": 0.28996527194976807, "learning_rate": 0.0001999658500539094, "loss": 11.6979, "step": 1222 }, { "epoch": 0.02560077032571381, "grad_norm": 0.24220263957977295, "learning_rate": 0.00019996579273457582, "loss": 11.6967, "step": 1223 }, { "epoch": 0.02562170308967596, "grad_norm": 0.15755604207515717, "learning_rate": 0.00019996573536718666, "loss": 11.6866, "step": 1224 }, { "epoch": 0.025642635853638115, "grad_norm": 0.22710680961608887, "learning_rate": 0.00019996567795174197, "loss": 11.6969, "step": 1225 }, { "epoch": 0.025663568617600267, "grad_norm": 0.22239451110363007, "learning_rate": 0.00019996562048824176, "loss": 11.6988, "step": 1226 }, { "epoch": 0.025684501381562422, "grad_norm": 0.2076595574617386, "learning_rate": 0.0001999655629766861, "loss": 11.6824, "step": 1227 }, { "epoch": 0.025705434145524574, "grad_norm": 0.17132611572742462, "learning_rate": 0.000199965505417075, "loss": 11.7002, "step": 1228 }, { "epoch": 0.02572636690948673, "grad_norm": 0.17289838194847107, "learning_rate": 0.00019996544780940845, "loss": 11.706, "step": 1229 }, { "epoch": 0.02574729967344888, "grad_norm": 0.2147035300731659, "learning_rate": 0.0001999653901536865, "loss": 11.7109, "step": 1230 }, { "epoch": 0.025768232437411037, "grad_norm": 0.21891385316848755, "learning_rate": 0.0001999653324499092, "loss": 11.6915, "step": 1231 }, { "epoch": 0.02578916520137319, "grad_norm": 0.22288744151592255, "learning_rate": 0.00019996527469807657, "loss": 11.7029, "step": 1232 }, { "epoch": 0.025810097965335344, "grad_norm": 0.1823716014623642, "learning_rate": 0.00019996521689818862, "loss": 11.6972, "step": 1233 }, { "epoch": 0.025831030729297495, "grad_norm": 0.19381316006183624, "learning_rate": 0.0001999651590502454, "loss": 11.6991, "step": 1234 }, { "epoch": 0.02585196349325965, "grad_norm": 0.20789851248264313, "learning_rate": 0.0001999651011542469, "loss": 11.6961, "step": 1235 }, { "epoch": 0.025872896257221802, "grad_norm": 0.1965898722410202, "learning_rate": 0.0001999650432101932, "loss": 11.7016, "step": 1236 }, { "epoch": 0.025893829021183958, "grad_norm": 0.12890346348285675, "learning_rate": 0.00019996498521808426, "loss": 11.6972, "step": 1237 }, { "epoch": 0.02591476178514611, "grad_norm": 0.188777893781662, "learning_rate": 0.0001999649271779202, "loss": 11.6986, "step": 1238 }, { "epoch": 0.025935694549108265, "grad_norm": 0.1867390275001526, "learning_rate": 0.00019996486908970096, "loss": 11.6958, "step": 1239 }, { "epoch": 0.025956627313070416, "grad_norm": 0.2246929556131363, "learning_rate": 0.00019996481095342663, "loss": 11.6922, "step": 1240 }, { "epoch": 0.02597756007703257, "grad_norm": 0.20296800136566162, "learning_rate": 0.00019996475276909718, "loss": 11.6963, "step": 1241 }, { "epoch": 0.025998492840994723, "grad_norm": 0.2889523506164551, "learning_rate": 0.00019996469453671266, "loss": 11.7015, "step": 1242 }, { "epoch": 0.02601942560495688, "grad_norm": 0.1940385401248932, "learning_rate": 0.00019996463625627318, "loss": 11.709, "step": 1243 }, { "epoch": 0.026040358368919034, "grad_norm": 0.1875561773777008, "learning_rate": 0.00019996457792777862, "loss": 11.6971, "step": 1244 }, { "epoch": 0.026061291132881186, "grad_norm": 0.18660476803779602, "learning_rate": 0.00019996451955122912, "loss": 11.7073, "step": 1245 }, { "epoch": 0.02608222389684334, "grad_norm": 0.23951095342636108, "learning_rate": 0.00019996446112662466, "loss": 11.7031, "step": 1246 }, { "epoch": 0.026103156660805493, "grad_norm": 0.17592336237430573, "learning_rate": 0.0001999644026539653, "loss": 11.6985, "step": 1247 }, { "epoch": 0.026124089424767648, "grad_norm": 0.24165302515029907, "learning_rate": 0.00019996434413325101, "loss": 11.7064, "step": 1248 }, { "epoch": 0.0261450221887298, "grad_norm": 0.5300272107124329, "learning_rate": 0.0001999642855644819, "loss": 11.7302, "step": 1249 }, { "epoch": 0.026165954952691955, "grad_norm": 0.1640656441450119, "learning_rate": 0.00019996422694765794, "loss": 11.6929, "step": 1250 }, { "epoch": 0.026186887716654107, "grad_norm": 0.25639963150024414, "learning_rate": 0.00019996416828277916, "loss": 11.699, "step": 1251 }, { "epoch": 0.026207820480616262, "grad_norm": 0.18579310178756714, "learning_rate": 0.0001999641095698456, "loss": 11.703, "step": 1252 }, { "epoch": 0.026228753244578414, "grad_norm": 0.16533511877059937, "learning_rate": 0.00019996405080885732, "loss": 11.704, "step": 1253 }, { "epoch": 0.02624968600854057, "grad_norm": 0.22132480144500732, "learning_rate": 0.0001999639919998143, "loss": 11.7033, "step": 1254 }, { "epoch": 0.02627061877250272, "grad_norm": 0.1992231011390686, "learning_rate": 0.00019996393314271658, "loss": 11.6915, "step": 1255 }, { "epoch": 0.026291551536464876, "grad_norm": 0.20529380440711975, "learning_rate": 0.0001999638742375642, "loss": 11.6856, "step": 1256 }, { "epoch": 0.026312484300427028, "grad_norm": 0.16664712131023407, "learning_rate": 0.00019996381528435717, "loss": 11.7081, "step": 1257 }, { "epoch": 0.026333417064389183, "grad_norm": 0.16832786798477173, "learning_rate": 0.00019996375628309553, "loss": 11.698, "step": 1258 }, { "epoch": 0.026354349828351335, "grad_norm": 0.19437238574028015, "learning_rate": 0.00019996369723377933, "loss": 11.7042, "step": 1259 }, { "epoch": 0.02637528259231349, "grad_norm": 0.22625769674777985, "learning_rate": 0.00019996363813640854, "loss": 11.6906, "step": 1260 }, { "epoch": 0.02639621535627564, "grad_norm": 0.19776643812656403, "learning_rate": 0.00019996357899098327, "loss": 11.7008, "step": 1261 }, { "epoch": 0.026417148120237797, "grad_norm": 0.17610356211662292, "learning_rate": 0.00019996351979750348, "loss": 11.6867, "step": 1262 }, { "epoch": 0.02643808088419995, "grad_norm": 0.16193528473377228, "learning_rate": 0.00019996346055596923, "loss": 11.7033, "step": 1263 }, { "epoch": 0.026459013648162104, "grad_norm": 0.1987382471561432, "learning_rate": 0.00019996340126638055, "loss": 11.7021, "step": 1264 }, { "epoch": 0.026479946412124256, "grad_norm": 0.20331211388111115, "learning_rate": 0.00019996334192873744, "loss": 11.7063, "step": 1265 }, { "epoch": 0.02650087917608641, "grad_norm": 1.347020149230957, "learning_rate": 0.00019996328254303996, "loss": 11.7125, "step": 1266 }, { "epoch": 0.026521811940048563, "grad_norm": 0.19065259397029877, "learning_rate": 0.00019996322310928812, "loss": 11.699, "step": 1267 }, { "epoch": 0.026542744704010718, "grad_norm": 0.19683486223220825, "learning_rate": 0.00019996316362748195, "loss": 11.7012, "step": 1268 }, { "epoch": 0.02656367746797287, "grad_norm": 0.17272211611270905, "learning_rate": 0.0001999631040976215, "loss": 11.6905, "step": 1269 }, { "epoch": 0.026584610231935025, "grad_norm": 0.16042844951152802, "learning_rate": 0.00019996304451970676, "loss": 11.6846, "step": 1270 }, { "epoch": 0.026605542995897177, "grad_norm": 0.25821053981781006, "learning_rate": 0.0001999629848937378, "loss": 11.6926, "step": 1271 }, { "epoch": 0.026626475759859332, "grad_norm": 0.2370392084121704, "learning_rate": 0.00019996292521971465, "loss": 11.697, "step": 1272 }, { "epoch": 0.026647408523821487, "grad_norm": 0.1633588820695877, "learning_rate": 0.0001999628654976373, "loss": 11.6967, "step": 1273 }, { "epoch": 0.02666834128778364, "grad_norm": 0.17440295219421387, "learning_rate": 0.0001999628057275058, "loss": 11.687, "step": 1274 }, { "epoch": 0.026689274051745794, "grad_norm": 0.23700101673603058, "learning_rate": 0.00019996274590932016, "loss": 11.6942, "step": 1275 }, { "epoch": 0.026710206815707946, "grad_norm": 0.21940161287784576, "learning_rate": 0.00019996268604308043, "loss": 11.6993, "step": 1276 }, { "epoch": 0.0267311395796701, "grad_norm": 0.1715882420539856, "learning_rate": 0.00019996262612878663, "loss": 11.6996, "step": 1277 }, { "epoch": 0.026752072343632253, "grad_norm": 0.21134750545024872, "learning_rate": 0.0001999625661664388, "loss": 11.7146, "step": 1278 }, { "epoch": 0.026773005107594408, "grad_norm": 0.17268136143684387, "learning_rate": 0.00019996250615603699, "loss": 11.6903, "step": 1279 }, { "epoch": 0.02679393787155656, "grad_norm": 0.16326722502708435, "learning_rate": 0.00019996244609758114, "loss": 11.6996, "step": 1280 }, { "epoch": 0.026814870635518715, "grad_norm": 0.2649374306201935, "learning_rate": 0.0001999623859910714, "loss": 11.6922, "step": 1281 }, { "epoch": 0.026835803399480867, "grad_norm": 0.2095048725605011, "learning_rate": 0.00019996232583650766, "loss": 11.69, "step": 1282 }, { "epoch": 0.026856736163443022, "grad_norm": 0.18605728447437286, "learning_rate": 0.0001999622656338901, "loss": 11.6995, "step": 1283 }, { "epoch": 0.026877668927405174, "grad_norm": 0.16606475412845612, "learning_rate": 0.00019996220538321863, "loss": 11.6978, "step": 1284 }, { "epoch": 0.02689860169136733, "grad_norm": 0.18916712701320648, "learning_rate": 0.0001999621450844934, "loss": 11.6965, "step": 1285 }, { "epoch": 0.02691953445532948, "grad_norm": 0.19225908815860748, "learning_rate": 0.00019996208473771428, "loss": 11.697, "step": 1286 }, { "epoch": 0.026940467219291636, "grad_norm": 0.19713501632213593, "learning_rate": 0.0001999620243428814, "loss": 11.7141, "step": 1287 }, { "epoch": 0.026961399983253788, "grad_norm": 0.1848166435956955, "learning_rate": 0.00019996196389999478, "loss": 11.6928, "step": 1288 }, { "epoch": 0.026982332747215943, "grad_norm": 0.24175281822681427, "learning_rate": 0.00019996190340905445, "loss": 11.713, "step": 1289 }, { "epoch": 0.027003265511178095, "grad_norm": 0.1786693036556244, "learning_rate": 0.00019996184287006044, "loss": 11.705, "step": 1290 }, { "epoch": 0.02702419827514025, "grad_norm": 0.18836678564548492, "learning_rate": 0.00019996178228301275, "loss": 11.7077, "step": 1291 }, { "epoch": 0.027045131039102402, "grad_norm": 0.17859140038490295, "learning_rate": 0.0001999617216479114, "loss": 11.7016, "step": 1292 }, { "epoch": 0.027066063803064557, "grad_norm": 0.18227091431617737, "learning_rate": 0.0001999616609647565, "loss": 11.7049, "step": 1293 }, { "epoch": 0.02708699656702671, "grad_norm": 0.2089729607105255, "learning_rate": 0.000199961600233548, "loss": 11.6923, "step": 1294 }, { "epoch": 0.027107929330988864, "grad_norm": 0.1533697247505188, "learning_rate": 0.00019996153945428595, "loss": 11.6874, "step": 1295 }, { "epoch": 0.027128862094951016, "grad_norm": 0.21059860289096832, "learning_rate": 0.00019996147862697038, "loss": 11.6885, "step": 1296 }, { "epoch": 0.02714979485891317, "grad_norm": 0.1813645213842392, "learning_rate": 0.00019996141775160134, "loss": 11.6971, "step": 1297 }, { "epoch": 0.027170727622875323, "grad_norm": 0.22645173966884613, "learning_rate": 0.00019996135682817884, "loss": 11.6955, "step": 1298 }, { "epoch": 0.027191660386837478, "grad_norm": 0.15300443768501282, "learning_rate": 0.00019996129585670293, "loss": 11.7034, "step": 1299 }, { "epoch": 0.02721259315079963, "grad_norm": 0.20166002213954926, "learning_rate": 0.0001999612348371736, "loss": 11.6874, "step": 1300 }, { "epoch": 0.027233525914761785, "grad_norm": 0.1845194697380066, "learning_rate": 0.0001999611737695909, "loss": 11.6924, "step": 1301 }, { "epoch": 0.02725445867872394, "grad_norm": 0.1762097179889679, "learning_rate": 0.00019996111265395487, "loss": 11.7071, "step": 1302 }, { "epoch": 0.027275391442686092, "grad_norm": 0.19759176671504974, "learning_rate": 0.00019996105149026555, "loss": 11.6966, "step": 1303 }, { "epoch": 0.027296324206648247, "grad_norm": 0.321618914604187, "learning_rate": 0.0001999609902785229, "loss": 11.6958, "step": 1304 }, { "epoch": 0.0273172569706104, "grad_norm": 0.17264817655086517, "learning_rate": 0.00019996092901872702, "loss": 11.6961, "step": 1305 }, { "epoch": 0.027338189734572554, "grad_norm": 0.22750535607337952, "learning_rate": 0.00019996086771087793, "loss": 11.6927, "step": 1306 }, { "epoch": 0.027359122498534706, "grad_norm": 0.18396812677383423, "learning_rate": 0.00019996080635497566, "loss": 11.6928, "step": 1307 }, { "epoch": 0.02738005526249686, "grad_norm": 0.17547260224819183, "learning_rate": 0.00019996074495102017, "loss": 11.7106, "step": 1308 }, { "epoch": 0.027400988026459013, "grad_norm": 0.1826314628124237, "learning_rate": 0.0001999606834990116, "loss": 11.6953, "step": 1309 }, { "epoch": 0.02742192079042117, "grad_norm": 0.17360717058181763, "learning_rate": 0.0001999606219989499, "loss": 11.6945, "step": 1310 }, { "epoch": 0.02744285355438332, "grad_norm": 0.17172861099243164, "learning_rate": 0.00019996056045083516, "loss": 11.7009, "step": 1311 }, { "epoch": 0.027463786318345475, "grad_norm": 0.25730013847351074, "learning_rate": 0.00019996049885466735, "loss": 11.6969, "step": 1312 }, { "epoch": 0.027484719082307627, "grad_norm": 0.1767657846212387, "learning_rate": 0.00019996043721044653, "loss": 11.6932, "step": 1313 }, { "epoch": 0.027505651846269782, "grad_norm": 0.18758073449134827, "learning_rate": 0.0001999603755181727, "loss": 11.6991, "step": 1314 }, { "epoch": 0.027526584610231934, "grad_norm": 0.16713957488536835, "learning_rate": 0.00019996031377784594, "loss": 11.7176, "step": 1315 }, { "epoch": 0.02754751737419409, "grad_norm": 0.18329432606697083, "learning_rate": 0.00019996025198946624, "loss": 11.696, "step": 1316 }, { "epoch": 0.02756845013815624, "grad_norm": 0.30154284834861755, "learning_rate": 0.00019996019015303368, "loss": 11.6959, "step": 1317 }, { "epoch": 0.027589382902118396, "grad_norm": 0.25122737884521484, "learning_rate": 0.0001999601282685482, "loss": 11.6991, "step": 1318 }, { "epoch": 0.027610315666080548, "grad_norm": 0.19841210544109344, "learning_rate": 0.00019996006633600993, "loss": 11.7038, "step": 1319 }, { "epoch": 0.027631248430042703, "grad_norm": 0.16938063502311707, "learning_rate": 0.00019996000435541883, "loss": 11.6973, "step": 1320 }, { "epoch": 0.027652181194004855, "grad_norm": 0.2004789561033249, "learning_rate": 0.00019995994232677495, "loss": 11.6941, "step": 1321 }, { "epoch": 0.02767311395796701, "grad_norm": 0.16003303229808807, "learning_rate": 0.00019995988025007833, "loss": 11.694, "step": 1322 }, { "epoch": 0.027694046721929162, "grad_norm": 0.16727976500988007, "learning_rate": 0.000199959818125329, "loss": 11.702, "step": 1323 }, { "epoch": 0.027714979485891317, "grad_norm": 0.26990532875061035, "learning_rate": 0.00019995975595252696, "loss": 11.6928, "step": 1324 }, { "epoch": 0.02773591224985347, "grad_norm": 0.19287078082561493, "learning_rate": 0.00019995969373167227, "loss": 11.6765, "step": 1325 }, { "epoch": 0.027756845013815624, "grad_norm": 0.18504565954208374, "learning_rate": 0.00019995963146276499, "loss": 11.6932, "step": 1326 }, { "epoch": 0.027777777777777776, "grad_norm": 0.21529622375965118, "learning_rate": 0.00019995956914580504, "loss": 11.6957, "step": 1327 }, { "epoch": 0.02779871054173993, "grad_norm": 0.22592516243457794, "learning_rate": 0.00019995950678079258, "loss": 11.688, "step": 1328 }, { "epoch": 0.027819643305702083, "grad_norm": 0.20662175118923187, "learning_rate": 0.00019995944436772755, "loss": 11.6926, "step": 1329 }, { "epoch": 0.02784057606966424, "grad_norm": 0.22462479770183563, "learning_rate": 0.00019995938190661003, "loss": 11.6748, "step": 1330 }, { "epoch": 0.027861508833626394, "grad_norm": 0.22371095418930054, "learning_rate": 0.00019995931939744002, "loss": 11.6987, "step": 1331 }, { "epoch": 0.027882441597588545, "grad_norm": 0.15320302546024323, "learning_rate": 0.00019995925684021757, "loss": 11.6954, "step": 1332 }, { "epoch": 0.0279033743615507, "grad_norm": 0.17657490074634552, "learning_rate": 0.0001999591942349427, "loss": 11.7018, "step": 1333 }, { "epoch": 0.027924307125512852, "grad_norm": 0.18902769684791565, "learning_rate": 0.00019995913158161544, "loss": 11.6875, "step": 1334 }, { "epoch": 0.027945239889475008, "grad_norm": 0.31634655594825745, "learning_rate": 0.00019995906888023584, "loss": 11.7083, "step": 1335 }, { "epoch": 0.02796617265343716, "grad_norm": 0.15627723932266235, "learning_rate": 0.00019995900613080386, "loss": 11.7125, "step": 1336 }, { "epoch": 0.027987105417399315, "grad_norm": 0.21034105122089386, "learning_rate": 0.00019995894333331963, "loss": 11.6961, "step": 1337 }, { "epoch": 0.028008038181361467, "grad_norm": 0.20232272148132324, "learning_rate": 0.00019995888048778313, "loss": 11.6931, "step": 1338 }, { "epoch": 0.028028970945323622, "grad_norm": 0.30428677797317505, "learning_rate": 0.00019995881759419439, "loss": 11.6839, "step": 1339 }, { "epoch": 0.028049903709285774, "grad_norm": 0.19729310274124146, "learning_rate": 0.00019995875465255342, "loss": 11.6996, "step": 1340 }, { "epoch": 0.02807083647324793, "grad_norm": 0.18270406126976013, "learning_rate": 0.00019995869166286029, "loss": 11.6909, "step": 1341 }, { "epoch": 0.02809176923721008, "grad_norm": 0.24786806106567383, "learning_rate": 0.00019995862862511504, "loss": 11.7029, "step": 1342 }, { "epoch": 0.028112702001172236, "grad_norm": 0.16628898680210114, "learning_rate": 0.00019995856553931763, "loss": 11.6954, "step": 1343 }, { "epoch": 0.028133634765134388, "grad_norm": 0.17468412220478058, "learning_rate": 0.00019995850240546817, "loss": 11.6906, "step": 1344 }, { "epoch": 0.028154567529096543, "grad_norm": 0.1715509593486786, "learning_rate": 0.00019995843922356664, "loss": 11.6923, "step": 1345 }, { "epoch": 0.028175500293058695, "grad_norm": 0.24447685480117798, "learning_rate": 0.00019995837599361306, "loss": 11.6946, "step": 1346 }, { "epoch": 0.02819643305702085, "grad_norm": 0.18721388280391693, "learning_rate": 0.00019995831271560753, "loss": 11.7019, "step": 1347 }, { "epoch": 0.028217365820983, "grad_norm": 0.18155066668987274, "learning_rate": 0.00019995824938955, "loss": 11.6995, "step": 1348 }, { "epoch": 0.028238298584945157, "grad_norm": 0.17603498697280884, "learning_rate": 0.00019995818601544057, "loss": 11.6933, "step": 1349 }, { "epoch": 0.02825923134890731, "grad_norm": 0.20190761983394623, "learning_rate": 0.00019995812259327922, "loss": 11.688, "step": 1350 }, { "epoch": 0.028280164112869464, "grad_norm": 0.21724218130111694, "learning_rate": 0.00019995805912306598, "loss": 11.6975, "step": 1351 }, { "epoch": 0.028301096876831616, "grad_norm": 0.21850459277629852, "learning_rate": 0.0001999579956048009, "loss": 11.7035, "step": 1352 }, { "epoch": 0.02832202964079377, "grad_norm": 0.15663015842437744, "learning_rate": 0.00019995793203848403, "loss": 11.6969, "step": 1353 }, { "epoch": 0.028342962404755923, "grad_norm": 0.1770308017730713, "learning_rate": 0.00019995786842411537, "loss": 11.6954, "step": 1354 }, { "epoch": 0.028363895168718078, "grad_norm": 0.19338594377040863, "learning_rate": 0.00019995780476169496, "loss": 11.7147, "step": 1355 }, { "epoch": 0.02838482793268023, "grad_norm": 0.18834681808948517, "learning_rate": 0.00019995774105122284, "loss": 11.6886, "step": 1356 }, { "epoch": 0.028405760696642385, "grad_norm": 0.2191622257232666, "learning_rate": 0.000199957677292699, "loss": 11.6978, "step": 1357 }, { "epoch": 0.028426693460604537, "grad_norm": 0.16816993057727814, "learning_rate": 0.00019995761348612354, "loss": 11.6923, "step": 1358 }, { "epoch": 0.028447626224566692, "grad_norm": 0.21188212931156158, "learning_rate": 0.0001999575496314964, "loss": 11.6973, "step": 1359 }, { "epoch": 0.028468558988528847, "grad_norm": 0.21211962401866913, "learning_rate": 0.00019995748572881772, "loss": 11.6943, "step": 1360 }, { "epoch": 0.028489491752491, "grad_norm": 0.18201389908790588, "learning_rate": 0.00019995742177808746, "loss": 11.6976, "step": 1361 }, { "epoch": 0.028510424516453154, "grad_norm": 0.20761659741401672, "learning_rate": 0.00019995735777930565, "loss": 11.6809, "step": 1362 }, { "epoch": 0.028531357280415306, "grad_norm": 0.20528990030288696, "learning_rate": 0.00019995729373247233, "loss": 11.6973, "step": 1363 }, { "epoch": 0.02855229004437746, "grad_norm": 0.16561351716518402, "learning_rate": 0.00019995722963758757, "loss": 11.7013, "step": 1364 }, { "epoch": 0.028573222808339613, "grad_norm": 0.1940583735704422, "learning_rate": 0.0001999571654946513, "loss": 11.7112, "step": 1365 }, { "epoch": 0.028594155572301768, "grad_norm": 0.17424635589122772, "learning_rate": 0.00019995710130366368, "loss": 11.6934, "step": 1366 }, { "epoch": 0.02861508833626392, "grad_norm": 0.19202820956707, "learning_rate": 0.00019995703706462465, "loss": 11.6803, "step": 1367 }, { "epoch": 0.028636021100226075, "grad_norm": 0.20115762948989868, "learning_rate": 0.0001999569727775343, "loss": 11.6954, "step": 1368 }, { "epoch": 0.028656953864188227, "grad_norm": 0.19098924100399017, "learning_rate": 0.0001999569084423926, "loss": 11.696, "step": 1369 }, { "epoch": 0.028677886628150382, "grad_norm": 0.21136583387851715, "learning_rate": 0.0001999568440591996, "loss": 11.6905, "step": 1370 }, { "epoch": 0.028698819392112534, "grad_norm": 0.20109957456588745, "learning_rate": 0.00019995677962795537, "loss": 11.7064, "step": 1371 }, { "epoch": 0.02871975215607469, "grad_norm": 0.17626671493053436, "learning_rate": 0.00019995671514865988, "loss": 11.6909, "step": 1372 }, { "epoch": 0.02874068492003684, "grad_norm": 0.1890535205602646, "learning_rate": 0.00019995665062131322, "loss": 11.6965, "step": 1373 }, { "epoch": 0.028761617683998996, "grad_norm": 0.15672802925109863, "learning_rate": 0.00019995658604591538, "loss": 11.6882, "step": 1374 }, { "epoch": 0.028782550447961148, "grad_norm": 0.17791080474853516, "learning_rate": 0.00019995652142246642, "loss": 11.6953, "step": 1375 }, { "epoch": 0.028803483211923303, "grad_norm": 0.17970192432403564, "learning_rate": 0.00019995645675096636, "loss": 11.6847, "step": 1376 }, { "epoch": 0.028824415975885455, "grad_norm": 0.20395781099796295, "learning_rate": 0.00019995639203141522, "loss": 11.7176, "step": 1377 }, { "epoch": 0.02884534873984761, "grad_norm": 0.2226274013519287, "learning_rate": 0.00019995632726381305, "loss": 11.7005, "step": 1378 }, { "epoch": 0.028866281503809762, "grad_norm": 0.2037922441959381, "learning_rate": 0.00019995626244815985, "loss": 11.6992, "step": 1379 }, { "epoch": 0.028887214267771917, "grad_norm": 0.15658579766750336, "learning_rate": 0.00019995619758445568, "loss": 11.7217, "step": 1380 }, { "epoch": 0.02890814703173407, "grad_norm": 0.1732609122991562, "learning_rate": 0.00019995613267270057, "loss": 11.6931, "step": 1381 }, { "epoch": 0.028929079795696224, "grad_norm": 0.1707267463207245, "learning_rate": 0.00019995606771289456, "loss": 11.6926, "step": 1382 }, { "epoch": 0.028950012559658376, "grad_norm": 0.1647636890411377, "learning_rate": 0.00019995600270503763, "loss": 11.6831, "step": 1383 }, { "epoch": 0.02897094532362053, "grad_norm": 0.22603926062583923, "learning_rate": 0.00019995593764912984, "loss": 11.7056, "step": 1384 }, { "epoch": 0.028991878087582683, "grad_norm": 0.18743278086185455, "learning_rate": 0.00019995587254517124, "loss": 11.7073, "step": 1385 }, { "epoch": 0.029012810851544838, "grad_norm": 0.20472770929336548, "learning_rate": 0.00019995580739316188, "loss": 11.6939, "step": 1386 }, { "epoch": 0.02903374361550699, "grad_norm": 0.2100651115179062, "learning_rate": 0.00019995574219310174, "loss": 11.6834, "step": 1387 }, { "epoch": 0.029054676379469145, "grad_norm": 0.18032029271125793, "learning_rate": 0.00019995567694499087, "loss": 11.7035, "step": 1388 }, { "epoch": 0.0290756091434313, "grad_norm": 0.16660019755363464, "learning_rate": 0.0001999556116488293, "loss": 11.6979, "step": 1389 }, { "epoch": 0.029096541907393452, "grad_norm": 0.19017304480075836, "learning_rate": 0.00019995554630461708, "loss": 11.701, "step": 1390 }, { "epoch": 0.029117474671355607, "grad_norm": 0.21152648329734802, "learning_rate": 0.0001999554809123542, "loss": 11.6991, "step": 1391 }, { "epoch": 0.02913840743531776, "grad_norm": 0.21837297081947327, "learning_rate": 0.00019995541547204073, "loss": 11.6953, "step": 1392 }, { "epoch": 0.029159340199279914, "grad_norm": 0.23849892616271973, "learning_rate": 0.00019995534998367668, "loss": 11.687, "step": 1393 }, { "epoch": 0.029180272963242066, "grad_norm": 0.20190490782260895, "learning_rate": 0.0001999552844472621, "loss": 11.6922, "step": 1394 }, { "epoch": 0.02920120572720422, "grad_norm": 0.1979132890701294, "learning_rate": 0.00019995521886279703, "loss": 11.6922, "step": 1395 }, { "epoch": 0.029222138491166373, "grad_norm": 0.15641696751117706, "learning_rate": 0.00019995515323028144, "loss": 11.6952, "step": 1396 }, { "epoch": 0.02924307125512853, "grad_norm": 0.18532080948352814, "learning_rate": 0.00019995508754971542, "loss": 11.6956, "step": 1397 }, { "epoch": 0.02926400401909068, "grad_norm": 0.21177910268306732, "learning_rate": 0.000199955021821099, "loss": 11.6936, "step": 1398 }, { "epoch": 0.029284936783052835, "grad_norm": 0.14861153066158295, "learning_rate": 0.0001999549560444322, "loss": 11.6841, "step": 1399 }, { "epoch": 0.029305869547014987, "grad_norm": 0.18581946194171906, "learning_rate": 0.00019995489021971502, "loss": 11.6921, "step": 1400 }, { "epoch": 0.029326802310977142, "grad_norm": 0.16882263123989105, "learning_rate": 0.00019995482434694757, "loss": 11.6899, "step": 1401 }, { "epoch": 0.029347735074939294, "grad_norm": 0.15766413509845734, "learning_rate": 0.00019995475842612978, "loss": 11.6921, "step": 1402 }, { "epoch": 0.02936866783890145, "grad_norm": 0.188850998878479, "learning_rate": 0.00019995469245726177, "loss": 11.6885, "step": 1403 }, { "epoch": 0.0293896006028636, "grad_norm": 0.16362819075584412, "learning_rate": 0.00019995462644034352, "loss": 11.7091, "step": 1404 }, { "epoch": 0.029410533366825756, "grad_norm": 0.2626252770423889, "learning_rate": 0.0001999545603753751, "loss": 11.7135, "step": 1405 }, { "epoch": 0.029431466130787908, "grad_norm": 0.31273430585861206, "learning_rate": 0.00019995449426235648, "loss": 11.7041, "step": 1406 }, { "epoch": 0.029452398894750063, "grad_norm": 0.21324528753757477, "learning_rate": 0.00019995442810128778, "loss": 11.6879, "step": 1407 }, { "epoch": 0.029473331658712215, "grad_norm": 0.18481804430484772, "learning_rate": 0.00019995436189216897, "loss": 11.6889, "step": 1408 }, { "epoch": 0.02949426442267437, "grad_norm": 0.18080171942710876, "learning_rate": 0.00019995429563500006, "loss": 11.6905, "step": 1409 }, { "epoch": 0.029515197186636522, "grad_norm": 0.22225596010684967, "learning_rate": 0.00019995422932978115, "loss": 11.6857, "step": 1410 }, { "epoch": 0.029536129950598677, "grad_norm": 0.20202431082725525, "learning_rate": 0.0001999541629765122, "loss": 11.6797, "step": 1411 }, { "epoch": 0.02955706271456083, "grad_norm": 0.1639149934053421, "learning_rate": 0.00019995409657519333, "loss": 11.6872, "step": 1412 }, { "epoch": 0.029577995478522984, "grad_norm": 0.15333840250968933, "learning_rate": 0.0001999540301258245, "loss": 11.6866, "step": 1413 }, { "epoch": 0.029598928242485136, "grad_norm": 0.180169478058815, "learning_rate": 0.00019995396362840578, "loss": 11.707, "step": 1414 }, { "epoch": 0.02961986100644729, "grad_norm": 0.21084469556808472, "learning_rate": 0.00019995389708293715, "loss": 11.6971, "step": 1415 }, { "epoch": 0.029640793770409443, "grad_norm": 0.19406509399414062, "learning_rate": 0.00019995383048941872, "loss": 11.6979, "step": 1416 }, { "epoch": 0.0296617265343716, "grad_norm": 0.1803949922323227, "learning_rate": 0.00019995376384785045, "loss": 11.6903, "step": 1417 }, { "epoch": 0.029682659298333754, "grad_norm": 0.16922912001609802, "learning_rate": 0.00019995369715823243, "loss": 11.699, "step": 1418 }, { "epoch": 0.029703592062295905, "grad_norm": 0.1837594360113144, "learning_rate": 0.00019995363042056466, "loss": 11.6964, "step": 1419 }, { "epoch": 0.02972452482625806, "grad_norm": 0.19552063941955566, "learning_rate": 0.00019995356363484716, "loss": 11.7072, "step": 1420 }, { "epoch": 0.029745457590220212, "grad_norm": 0.21379628777503967, "learning_rate": 0.00019995349680107998, "loss": 11.7107, "step": 1421 }, { "epoch": 0.029766390354182368, "grad_norm": 0.19001305103302002, "learning_rate": 0.00019995342991926314, "loss": 11.6898, "step": 1422 }, { "epoch": 0.02978732311814452, "grad_norm": 0.2838584780693054, "learning_rate": 0.0001999533629893967, "loss": 11.6897, "step": 1423 }, { "epoch": 0.029808255882106675, "grad_norm": 0.24202527105808258, "learning_rate": 0.00019995329601148068, "loss": 11.6881, "step": 1424 }, { "epoch": 0.029829188646068826, "grad_norm": 0.18146908283233643, "learning_rate": 0.0001999532289855151, "loss": 11.6952, "step": 1425 }, { "epoch": 0.02985012141003098, "grad_norm": 0.198717400431633, "learning_rate": 0.0001999531619115, "loss": 11.6826, "step": 1426 }, { "epoch": 0.029871054173993133, "grad_norm": 0.20577967166900635, "learning_rate": 0.00019995309478943542, "loss": 11.6862, "step": 1427 }, { "epoch": 0.02989198693795529, "grad_norm": 0.18713931739330292, "learning_rate": 0.00019995302761932138, "loss": 11.69, "step": 1428 }, { "epoch": 0.02991291970191744, "grad_norm": 0.1982475221157074, "learning_rate": 0.00019995296040115792, "loss": 11.6916, "step": 1429 }, { "epoch": 0.029933852465879596, "grad_norm": 0.20682808756828308, "learning_rate": 0.00019995289313494508, "loss": 11.6968, "step": 1430 }, { "epoch": 0.029954785229841747, "grad_norm": 0.20390555262565613, "learning_rate": 0.00019995282582068285, "loss": 11.7028, "step": 1431 }, { "epoch": 0.029975717993803903, "grad_norm": 0.20461975038051605, "learning_rate": 0.00019995275845837132, "loss": 11.701, "step": 1432 }, { "epoch": 0.029996650757766054, "grad_norm": 0.19270044565200806, "learning_rate": 0.00019995269104801047, "loss": 11.6936, "step": 1433 }, { "epoch": 0.03001758352172821, "grad_norm": 0.2060215175151825, "learning_rate": 0.00019995262358960035, "loss": 11.6932, "step": 1434 }, { "epoch": 0.03003851628569036, "grad_norm": 0.18003436923027039, "learning_rate": 0.00019995255608314105, "loss": 11.6925, "step": 1435 }, { "epoch": 0.030059449049652517, "grad_norm": 0.15480515360832214, "learning_rate": 0.00019995248852863252, "loss": 11.6959, "step": 1436 }, { "epoch": 0.03008038181361467, "grad_norm": 0.187699556350708, "learning_rate": 0.00019995242092607485, "loss": 11.6975, "step": 1437 }, { "epoch": 0.030101314577576824, "grad_norm": 0.25764134526252747, "learning_rate": 0.00019995235327546803, "loss": 11.6889, "step": 1438 }, { "epoch": 0.030122247341538975, "grad_norm": 0.234032541513443, "learning_rate": 0.0001999522855768121, "loss": 11.6915, "step": 1439 }, { "epoch": 0.03014318010550113, "grad_norm": 0.20094440877437592, "learning_rate": 0.0001999522178301071, "loss": 11.7043, "step": 1440 }, { "epoch": 0.030164112869463282, "grad_norm": 0.1742088496685028, "learning_rate": 0.00019995215003535308, "loss": 11.7033, "step": 1441 }, { "epoch": 0.030185045633425438, "grad_norm": 0.1595737487077713, "learning_rate": 0.00019995208219255006, "loss": 11.6935, "step": 1442 }, { "epoch": 0.03020597839738759, "grad_norm": 0.20237675309181213, "learning_rate": 0.0001999520143016981, "loss": 11.6948, "step": 1443 }, { "epoch": 0.030226911161349745, "grad_norm": 0.18916276097297668, "learning_rate": 0.00019995194636279715, "loss": 11.7016, "step": 1444 }, { "epoch": 0.030247843925311897, "grad_norm": 0.2126976102590561, "learning_rate": 0.00019995187837584731, "loss": 11.6937, "step": 1445 }, { "epoch": 0.030268776689274052, "grad_norm": 0.176852285861969, "learning_rate": 0.00019995181034084862, "loss": 11.6913, "step": 1446 }, { "epoch": 0.030289709453236207, "grad_norm": 0.18597927689552307, "learning_rate": 0.0001999517422578011, "loss": 11.6977, "step": 1447 }, { "epoch": 0.03031064221719836, "grad_norm": 0.1923484355211258, "learning_rate": 0.00019995167412670476, "loss": 11.7002, "step": 1448 }, { "epoch": 0.030331574981160514, "grad_norm": 0.28877583146095276, "learning_rate": 0.00019995160594755965, "loss": 11.7098, "step": 1449 }, { "epoch": 0.030352507745122666, "grad_norm": 0.17530372738838196, "learning_rate": 0.00019995153772036576, "loss": 11.6882, "step": 1450 }, { "epoch": 0.03037344050908482, "grad_norm": 0.2104862630367279, "learning_rate": 0.0001999514694451232, "loss": 11.6865, "step": 1451 }, { "epoch": 0.030394373273046973, "grad_norm": 0.17054864764213562, "learning_rate": 0.00019995140112183198, "loss": 11.7003, "step": 1452 }, { "epoch": 0.030415306037009128, "grad_norm": 0.19483444094657898, "learning_rate": 0.0001999513327504921, "loss": 11.6856, "step": 1453 }, { "epoch": 0.03043623880097128, "grad_norm": 0.17936775088310242, "learning_rate": 0.00019995126433110358, "loss": 11.6956, "step": 1454 }, { "epoch": 0.030457171564933435, "grad_norm": 0.2066003680229187, "learning_rate": 0.00019995119586366654, "loss": 11.6938, "step": 1455 }, { "epoch": 0.030478104328895587, "grad_norm": 0.186809703707695, "learning_rate": 0.00019995112734818093, "loss": 11.6924, "step": 1456 }, { "epoch": 0.030499037092857742, "grad_norm": 0.170927956700325, "learning_rate": 0.0001999510587846468, "loss": 11.7007, "step": 1457 }, { "epoch": 0.030519969856819894, "grad_norm": 0.17313505709171295, "learning_rate": 0.0001999509901730642, "loss": 11.6789, "step": 1458 }, { "epoch": 0.03054090262078205, "grad_norm": 0.18527500331401825, "learning_rate": 0.00019995092151343314, "loss": 11.6968, "step": 1459 }, { "epoch": 0.0305618353847442, "grad_norm": 0.1966073364019394, "learning_rate": 0.00019995085280575373, "loss": 11.6922, "step": 1460 }, { "epoch": 0.030582768148706356, "grad_norm": 0.1976875215768814, "learning_rate": 0.00019995078405002587, "loss": 11.6904, "step": 1461 }, { "epoch": 0.030603700912668508, "grad_norm": 0.1861024647951126, "learning_rate": 0.00019995071524624974, "loss": 11.6943, "step": 1462 }, { "epoch": 0.030624633676630663, "grad_norm": 0.17140217125415802, "learning_rate": 0.00019995064639442522, "loss": 11.6998, "step": 1463 }, { "epoch": 0.030645566440592815, "grad_norm": 0.17453287541866302, "learning_rate": 0.00019995057749455249, "loss": 11.6975, "step": 1464 }, { "epoch": 0.03066649920455497, "grad_norm": 0.17984740436077118, "learning_rate": 0.00019995050854663146, "loss": 11.6931, "step": 1465 }, { "epoch": 0.030687431968517122, "grad_norm": 0.2298465520143509, "learning_rate": 0.00019995043955066223, "loss": 11.6919, "step": 1466 }, { "epoch": 0.030708364732479277, "grad_norm": 0.1813271939754486, "learning_rate": 0.00019995037050664482, "loss": 11.6905, "step": 1467 }, { "epoch": 0.03072929749644143, "grad_norm": 0.18939471244812012, "learning_rate": 0.00019995030141457928, "loss": 11.7001, "step": 1468 }, { "epoch": 0.030750230260403584, "grad_norm": 0.22023260593414307, "learning_rate": 0.00019995023227446562, "loss": 11.683, "step": 1469 }, { "epoch": 0.030771163024365736, "grad_norm": 0.199149027466774, "learning_rate": 0.00019995016308630388, "loss": 11.6961, "step": 1470 }, { "epoch": 0.03079209578832789, "grad_norm": 0.186441570520401, "learning_rate": 0.0001999500938500941, "loss": 11.688, "step": 1471 }, { "epoch": 0.030813028552290043, "grad_norm": 0.18470601737499237, "learning_rate": 0.0001999500245658363, "loss": 11.7034, "step": 1472 }, { "epoch": 0.030833961316252198, "grad_norm": 0.15991438925266266, "learning_rate": 0.00019994995523353053, "loss": 11.6873, "step": 1473 }, { "epoch": 0.03085489408021435, "grad_norm": 0.190536767244339, "learning_rate": 0.0001999498858531768, "loss": 11.7011, "step": 1474 }, { "epoch": 0.030875826844176505, "grad_norm": 0.1499222069978714, "learning_rate": 0.00019994981642477516, "loss": 11.6881, "step": 1475 }, { "epoch": 0.03089675960813866, "grad_norm": 0.20265500247478485, "learning_rate": 0.0001999497469483256, "loss": 11.6929, "step": 1476 }, { "epoch": 0.030917692372100812, "grad_norm": 0.22579540312290192, "learning_rate": 0.00019994967742382826, "loss": 11.6972, "step": 1477 }, { "epoch": 0.030938625136062967, "grad_norm": 0.183686301112175, "learning_rate": 0.00019994960785128308, "loss": 11.6905, "step": 1478 }, { "epoch": 0.03095955790002512, "grad_norm": 0.15536482632160187, "learning_rate": 0.00019994953823069013, "loss": 11.7007, "step": 1479 }, { "epoch": 0.030980490663987274, "grad_norm": 0.24775893986225128, "learning_rate": 0.00019994946856204942, "loss": 11.7057, "step": 1480 }, { "epoch": 0.031001423427949426, "grad_norm": 0.14421498775482178, "learning_rate": 0.000199949398845361, "loss": 11.6783, "step": 1481 }, { "epoch": 0.03102235619191158, "grad_norm": 0.1729133278131485, "learning_rate": 0.0001999493290806249, "loss": 11.695, "step": 1482 }, { "epoch": 0.031043288955873733, "grad_norm": 0.19083106517791748, "learning_rate": 0.00019994925926784118, "loss": 11.6956, "step": 1483 }, { "epoch": 0.03106422171983589, "grad_norm": 0.1772618442773819, "learning_rate": 0.00019994918940700984, "loss": 11.6905, "step": 1484 }, { "epoch": 0.03108515448379804, "grad_norm": 0.17342720925807953, "learning_rate": 0.0001999491194981309, "loss": 11.701, "step": 1485 }, { "epoch": 0.031106087247760195, "grad_norm": 0.1753249317407608, "learning_rate": 0.0001999490495412044, "loss": 11.6922, "step": 1486 }, { "epoch": 0.031127020011722347, "grad_norm": 0.20226557552814484, "learning_rate": 0.00019994897953623042, "loss": 11.6866, "step": 1487 }, { "epoch": 0.031147952775684502, "grad_norm": 0.20717860758304596, "learning_rate": 0.00019994890948320896, "loss": 11.7001, "step": 1488 }, { "epoch": 0.031168885539646654, "grad_norm": 0.166544571518898, "learning_rate": 0.00019994883938214005, "loss": 11.7059, "step": 1489 }, { "epoch": 0.03118981830360881, "grad_norm": 0.1893533319234848, "learning_rate": 0.00019994876923302375, "loss": 11.6772, "step": 1490 }, { "epoch": 0.03121075106757096, "grad_norm": 0.17745643854141235, "learning_rate": 0.00019994869903586004, "loss": 11.6885, "step": 1491 }, { "epoch": 0.031231683831533116, "grad_norm": 0.17435693740844727, "learning_rate": 0.000199948628790649, "loss": 11.6869, "step": 1492 }, { "epoch": 0.03125261659549527, "grad_norm": 0.1525789499282837, "learning_rate": 0.00019994855849739067, "loss": 11.6975, "step": 1493 }, { "epoch": 0.03127354935945742, "grad_norm": 0.1969016045331955, "learning_rate": 0.00019994848815608506, "loss": 11.6879, "step": 1494 }, { "epoch": 0.03129448212341958, "grad_norm": 0.15303033590316772, "learning_rate": 0.00019994841776673216, "loss": 11.6967, "step": 1495 }, { "epoch": 0.03131541488738173, "grad_norm": 0.2933902442455292, "learning_rate": 0.00019994834732933213, "loss": 11.7184, "step": 1496 }, { "epoch": 0.03133634765134388, "grad_norm": 0.20411129295825958, "learning_rate": 0.00019994827684388487, "loss": 11.6941, "step": 1497 }, { "epoch": 0.031357280415306034, "grad_norm": 0.18967966735363007, "learning_rate": 0.0001999482063103905, "loss": 11.6986, "step": 1498 }, { "epoch": 0.03137821317926819, "grad_norm": 0.18530108034610748, "learning_rate": 0.00019994813572884898, "loss": 11.6919, "step": 1499 }, { "epoch": 0.031399145943230344, "grad_norm": 0.21265734732151031, "learning_rate": 0.00019994806509926043, "loss": 11.7097, "step": 1500 }, { "epoch": 0.031420078707192496, "grad_norm": 0.17610208690166473, "learning_rate": 0.00019994799442162484, "loss": 11.6956, "step": 1501 }, { "epoch": 0.03144101147115465, "grad_norm": 0.16219930350780487, "learning_rate": 0.00019994792369594223, "loss": 11.7005, "step": 1502 }, { "epoch": 0.03146194423511681, "grad_norm": 0.2099585235118866, "learning_rate": 0.00019994785292221266, "loss": 11.7052, "step": 1503 }, { "epoch": 0.03148287699907896, "grad_norm": 0.1934911459684372, "learning_rate": 0.00019994778210043616, "loss": 11.6851, "step": 1504 }, { "epoch": 0.03150380976304111, "grad_norm": 0.1954706907272339, "learning_rate": 0.00019994771123061272, "loss": 11.6765, "step": 1505 }, { "epoch": 0.03152474252700327, "grad_norm": 0.20284681022167206, "learning_rate": 0.00019994764031274246, "loss": 11.6811, "step": 1506 }, { "epoch": 0.03154567529096542, "grad_norm": 0.1594969630241394, "learning_rate": 0.00019994756934682534, "loss": 11.7055, "step": 1507 }, { "epoch": 0.03156660805492757, "grad_norm": 0.2060549259185791, "learning_rate": 0.00019994749833286143, "loss": 11.6948, "step": 1508 }, { "epoch": 0.031587540818889724, "grad_norm": 0.17116251587867737, "learning_rate": 0.00019994742727085077, "loss": 11.6923, "step": 1509 }, { "epoch": 0.03160847358285188, "grad_norm": 0.18504184484481812, "learning_rate": 0.00019994735616079336, "loss": 11.6819, "step": 1510 }, { "epoch": 0.031629406346814035, "grad_norm": 0.15002542734146118, "learning_rate": 0.00019994728500268924, "loss": 11.7076, "step": 1511 }, { "epoch": 0.031650339110776186, "grad_norm": 0.17257532477378845, "learning_rate": 0.00019994721379653849, "loss": 11.7027, "step": 1512 }, { "epoch": 0.03167127187473834, "grad_norm": 0.15315508842468262, "learning_rate": 0.0001999471425423411, "loss": 11.7043, "step": 1513 }, { "epoch": 0.0316922046387005, "grad_norm": 0.17811420559883118, "learning_rate": 0.0001999470712400971, "loss": 11.7012, "step": 1514 }, { "epoch": 0.03171313740266265, "grad_norm": 0.21308167278766632, "learning_rate": 0.00019994699988980653, "loss": 11.6999, "step": 1515 }, { "epoch": 0.0317340701666248, "grad_norm": 0.1527872234582901, "learning_rate": 0.00019994692849146945, "loss": 11.6962, "step": 1516 }, { "epoch": 0.03175500293058695, "grad_norm": 0.23543477058410645, "learning_rate": 0.0001999468570450859, "loss": 11.6894, "step": 1517 }, { "epoch": 0.03177593569454911, "grad_norm": 0.2471383810043335, "learning_rate": 0.00019994678555065586, "loss": 11.6931, "step": 1518 }, { "epoch": 0.03179686845851126, "grad_norm": 0.25395965576171875, "learning_rate": 0.00019994671400817943, "loss": 11.6828, "step": 1519 }, { "epoch": 0.031817801222473414, "grad_norm": 0.2357519418001175, "learning_rate": 0.0001999466424176566, "loss": 11.6931, "step": 1520 }, { "epoch": 0.031838733986435566, "grad_norm": 0.2758145332336426, "learning_rate": 0.0001999465707790874, "loss": 11.7149, "step": 1521 }, { "epoch": 0.031859666750397725, "grad_norm": 0.1949504017829895, "learning_rate": 0.00019994649909247189, "loss": 11.6868, "step": 1522 }, { "epoch": 0.03188059951435988, "grad_norm": 0.20208008587360382, "learning_rate": 0.00019994642735781008, "loss": 11.6853, "step": 1523 }, { "epoch": 0.03190153227832203, "grad_norm": 0.211217999458313, "learning_rate": 0.00019994635557510205, "loss": 11.6947, "step": 1524 }, { "epoch": 0.03192246504228418, "grad_norm": 0.22543227672576904, "learning_rate": 0.0001999462837443478, "loss": 11.6847, "step": 1525 }, { "epoch": 0.03194339780624634, "grad_norm": 0.19164687395095825, "learning_rate": 0.00019994621186554735, "loss": 11.6897, "step": 1526 }, { "epoch": 0.03196433057020849, "grad_norm": 0.16132104396820068, "learning_rate": 0.00019994613993870074, "loss": 11.6939, "step": 1527 }, { "epoch": 0.03198526333417064, "grad_norm": 0.22546960413455963, "learning_rate": 0.00019994606796380806, "loss": 11.6944, "step": 1528 }, { "epoch": 0.032006196098132794, "grad_norm": 0.22030499577522278, "learning_rate": 0.00019994599594086926, "loss": 11.7053, "step": 1529 }, { "epoch": 0.03202712886209495, "grad_norm": 0.18826083838939667, "learning_rate": 0.00019994592386988444, "loss": 11.688, "step": 1530 }, { "epoch": 0.032048061626057105, "grad_norm": 0.22918984293937683, "learning_rate": 0.00019994585175085364, "loss": 11.6995, "step": 1531 }, { "epoch": 0.032068994390019256, "grad_norm": 0.17122040688991547, "learning_rate": 0.0001999457795837768, "loss": 11.6931, "step": 1532 }, { "epoch": 0.032089927153981415, "grad_norm": 0.2272617369890213, "learning_rate": 0.00019994570736865406, "loss": 11.7011, "step": 1533 }, { "epoch": 0.03211085991794357, "grad_norm": 0.2156057059764862, "learning_rate": 0.0001999456351054854, "loss": 11.6904, "step": 1534 }, { "epoch": 0.03213179268190572, "grad_norm": 0.20138731598854065, "learning_rate": 0.0001999455627942709, "loss": 11.677, "step": 1535 }, { "epoch": 0.03215272544586787, "grad_norm": 0.20294278860092163, "learning_rate": 0.00019994549043501054, "loss": 11.694, "step": 1536 }, { "epoch": 0.03217365820983003, "grad_norm": 0.18489599227905273, "learning_rate": 0.0001999454180277044, "loss": 11.7036, "step": 1537 }, { "epoch": 0.03219459097379218, "grad_norm": 0.18159188330173492, "learning_rate": 0.00019994534557235248, "loss": 11.6788, "step": 1538 }, { "epoch": 0.03221552373775433, "grad_norm": 0.15764297544956207, "learning_rate": 0.00019994527306895483, "loss": 11.6797, "step": 1539 }, { "epoch": 0.032236456501716484, "grad_norm": 0.20385892689228058, "learning_rate": 0.00019994520051751146, "loss": 11.6978, "step": 1540 }, { "epoch": 0.03225738926567864, "grad_norm": 0.19727252423763275, "learning_rate": 0.00019994512791802246, "loss": 11.6905, "step": 1541 }, { "epoch": 0.032278322029640795, "grad_norm": 0.1876211166381836, "learning_rate": 0.00019994505527048782, "loss": 11.6872, "step": 1542 }, { "epoch": 0.03229925479360295, "grad_norm": 0.2079821527004242, "learning_rate": 0.0001999449825749076, "loss": 11.6923, "step": 1543 }, { "epoch": 0.0323201875575651, "grad_norm": 0.1420307457447052, "learning_rate": 0.00019994490983128182, "loss": 11.689, "step": 1544 }, { "epoch": 0.03234112032152726, "grad_norm": 0.1503608375787735, "learning_rate": 0.0001999448370396105, "loss": 11.6938, "step": 1545 }, { "epoch": 0.03236205308548941, "grad_norm": 0.2660914659500122, "learning_rate": 0.00019994476419989374, "loss": 11.7015, "step": 1546 }, { "epoch": 0.03238298584945156, "grad_norm": 0.18880899250507355, "learning_rate": 0.0001999446913121315, "loss": 11.6867, "step": 1547 }, { "epoch": 0.03240391861341371, "grad_norm": 0.2397981584072113, "learning_rate": 0.00019994461837632383, "loss": 11.6902, "step": 1548 }, { "epoch": 0.03242485137737587, "grad_norm": 0.19281452894210815, "learning_rate": 0.00019994454539247082, "loss": 11.6854, "step": 1549 }, { "epoch": 0.03244578414133802, "grad_norm": 0.15443897247314453, "learning_rate": 0.00019994447236057245, "loss": 11.6816, "step": 1550 }, { "epoch": 0.032466716905300175, "grad_norm": 0.22456936538219452, "learning_rate": 0.00019994439928062874, "loss": 11.7056, "step": 1551 }, { "epoch": 0.032487649669262327, "grad_norm": 0.25113368034362793, "learning_rate": 0.00019994432615263978, "loss": 11.7003, "step": 1552 }, { "epoch": 0.032508582433224485, "grad_norm": 0.19384656846523285, "learning_rate": 0.00019994425297660556, "loss": 11.7031, "step": 1553 }, { "epoch": 0.03252951519718664, "grad_norm": 0.18439625203609467, "learning_rate": 0.00019994417975252615, "loss": 11.6922, "step": 1554 }, { "epoch": 0.03255044796114879, "grad_norm": 0.23280379176139832, "learning_rate": 0.00019994410648040156, "loss": 11.6967, "step": 1555 }, { "epoch": 0.03257138072511094, "grad_norm": 0.19248747825622559, "learning_rate": 0.00019994403316023185, "loss": 11.694, "step": 1556 }, { "epoch": 0.0325923134890731, "grad_norm": 0.21915528178215027, "learning_rate": 0.000199943959792017, "loss": 11.692, "step": 1557 }, { "epoch": 0.03261324625303525, "grad_norm": 0.26945266127586365, "learning_rate": 0.00019994388637575714, "loss": 11.6995, "step": 1558 }, { "epoch": 0.0326341790169974, "grad_norm": 0.19191817939281464, "learning_rate": 0.00019994381291145224, "loss": 11.6782, "step": 1559 }, { "epoch": 0.032655111780959555, "grad_norm": 0.21872460842132568, "learning_rate": 0.00019994373939910234, "loss": 11.717, "step": 1560 }, { "epoch": 0.03267604454492171, "grad_norm": 0.22265934944152832, "learning_rate": 0.00019994366583870745, "loss": 11.6987, "step": 1561 }, { "epoch": 0.032696977308883865, "grad_norm": 0.21101737022399902, "learning_rate": 0.00019994359223026765, "loss": 11.7045, "step": 1562 }, { "epoch": 0.03271791007284602, "grad_norm": 0.17944662272930145, "learning_rate": 0.00019994351857378302, "loss": 11.6978, "step": 1563 }, { "epoch": 0.032738842836808175, "grad_norm": 0.21047823131084442, "learning_rate": 0.00019994344486925348, "loss": 11.6923, "step": 1564 }, { "epoch": 0.03275977560077033, "grad_norm": 0.2082463949918747, "learning_rate": 0.00019994337111667914, "loss": 11.6996, "step": 1565 }, { "epoch": 0.03278070836473248, "grad_norm": 0.18759185075759888, "learning_rate": 0.00019994329731606002, "loss": 11.6834, "step": 1566 }, { "epoch": 0.03280164112869463, "grad_norm": 0.35474199056625366, "learning_rate": 0.00019994322346739612, "loss": 11.6856, "step": 1567 }, { "epoch": 0.03282257389265679, "grad_norm": 0.2555352449417114, "learning_rate": 0.00019994314957068755, "loss": 11.7022, "step": 1568 }, { "epoch": 0.03284350665661894, "grad_norm": 0.23101544380187988, "learning_rate": 0.00019994307562593431, "loss": 11.698, "step": 1569 }, { "epoch": 0.03286443942058109, "grad_norm": 0.19691893458366394, "learning_rate": 0.00019994300163313642, "loss": 11.7119, "step": 1570 }, { "epoch": 0.032885372184543245, "grad_norm": 0.1888582557439804, "learning_rate": 0.0001999429275922939, "loss": 11.7022, "step": 1571 }, { "epoch": 0.032906304948505403, "grad_norm": 0.1646772027015686, "learning_rate": 0.00019994285350340683, "loss": 11.6772, "step": 1572 }, { "epoch": 0.032927237712467555, "grad_norm": 0.19809472560882568, "learning_rate": 0.00019994277936647526, "loss": 11.6881, "step": 1573 }, { "epoch": 0.03294817047642971, "grad_norm": 0.1910250335931778, "learning_rate": 0.00019994270518149912, "loss": 11.6982, "step": 1574 }, { "epoch": 0.03296910324039186, "grad_norm": 0.16075068712234497, "learning_rate": 0.0001999426309484786, "loss": 11.6978, "step": 1575 }, { "epoch": 0.03299003600435402, "grad_norm": 0.25057339668273926, "learning_rate": 0.0001999425566674136, "loss": 11.7067, "step": 1576 }, { "epoch": 0.03301096876831617, "grad_norm": 0.2313249260187149, "learning_rate": 0.00019994248233830422, "loss": 11.6948, "step": 1577 }, { "epoch": 0.03303190153227832, "grad_norm": 0.1590743213891983, "learning_rate": 0.0001999424079611505, "loss": 11.6781, "step": 1578 }, { "epoch": 0.03305283429624047, "grad_norm": 0.3071581721305847, "learning_rate": 0.00019994233353595243, "loss": 11.6944, "step": 1579 }, { "epoch": 0.03307376706020263, "grad_norm": 0.2000260055065155, "learning_rate": 0.0001999422590627101, "loss": 11.688, "step": 1580 }, { "epoch": 0.03309469982416478, "grad_norm": 0.1639111042022705, "learning_rate": 0.00019994218454142356, "loss": 11.686, "step": 1581 }, { "epoch": 0.033115632588126935, "grad_norm": 0.20757228136062622, "learning_rate": 0.00019994210997209276, "loss": 11.6891, "step": 1582 }, { "epoch": 0.03313656535208909, "grad_norm": 0.19255663454532623, "learning_rate": 0.00019994203535471781, "loss": 11.6792, "step": 1583 }, { "epoch": 0.033157498116051246, "grad_norm": 0.15836937725543976, "learning_rate": 0.0001999419606892987, "loss": 11.6881, "step": 1584 }, { "epoch": 0.0331784308800134, "grad_norm": 0.2144034057855606, "learning_rate": 0.0001999418859758355, "loss": 11.6914, "step": 1585 }, { "epoch": 0.03319936364397555, "grad_norm": 0.20291905105113983, "learning_rate": 0.00019994181121432822, "loss": 11.6979, "step": 1586 }, { "epoch": 0.0332202964079377, "grad_norm": 0.16239841282367706, "learning_rate": 0.00019994173640477692, "loss": 11.6932, "step": 1587 }, { "epoch": 0.03324122917189986, "grad_norm": 0.25237882137298584, "learning_rate": 0.00019994166154718165, "loss": 11.7085, "step": 1588 }, { "epoch": 0.03326216193586201, "grad_norm": 0.22903376817703247, "learning_rate": 0.00019994158664154237, "loss": 11.6865, "step": 1589 }, { "epoch": 0.03328309469982416, "grad_norm": 0.18472155928611755, "learning_rate": 0.0001999415116878592, "loss": 11.6866, "step": 1590 }, { "epoch": 0.03330402746378632, "grad_norm": 0.31898102164268494, "learning_rate": 0.00019994143668613214, "loss": 11.7053, "step": 1591 }, { "epoch": 0.033324960227748474, "grad_norm": 0.18640947341918945, "learning_rate": 0.00019994136163636119, "loss": 11.686, "step": 1592 }, { "epoch": 0.033345892991710625, "grad_norm": 0.18376120924949646, "learning_rate": 0.00019994128653854647, "loss": 11.691, "step": 1593 }, { "epoch": 0.03336682575567278, "grad_norm": 0.20429208874702454, "learning_rate": 0.00019994121139268794, "loss": 11.7075, "step": 1594 }, { "epoch": 0.033387758519634936, "grad_norm": 0.2409278303384781, "learning_rate": 0.0001999411361987857, "loss": 11.6907, "step": 1595 }, { "epoch": 0.03340869128359709, "grad_norm": 0.2511284649372101, "learning_rate": 0.0001999410609568397, "loss": 11.6977, "step": 1596 }, { "epoch": 0.03342962404755924, "grad_norm": 0.1684260070323944, "learning_rate": 0.00019994098566685007, "loss": 11.6915, "step": 1597 }, { "epoch": 0.03345055681152139, "grad_norm": 0.16723021864891052, "learning_rate": 0.0001999409103288168, "loss": 11.6985, "step": 1598 }, { "epoch": 0.03347148957548355, "grad_norm": 0.16482962667942047, "learning_rate": 0.00019994083494273993, "loss": 11.6918, "step": 1599 }, { "epoch": 0.0334924223394457, "grad_norm": 0.20297501981258392, "learning_rate": 0.0001999407595086195, "loss": 11.7013, "step": 1600 }, { "epoch": 0.03351335510340785, "grad_norm": 0.20053932070732117, "learning_rate": 0.00019994068402645553, "loss": 11.6945, "step": 1601 }, { "epoch": 0.033534287867370005, "grad_norm": 0.17548927664756775, "learning_rate": 0.00019994060849624809, "loss": 11.6959, "step": 1602 }, { "epoch": 0.033555220631332164, "grad_norm": 0.20200134813785553, "learning_rate": 0.00019994053291799718, "loss": 11.6896, "step": 1603 }, { "epoch": 0.033576153395294316, "grad_norm": 0.173550084233284, "learning_rate": 0.00019994045729170286, "loss": 11.6996, "step": 1604 }, { "epoch": 0.03359708615925647, "grad_norm": 0.21248577535152435, "learning_rate": 0.0001999403816173652, "loss": 11.7098, "step": 1605 }, { "epoch": 0.03361801892321862, "grad_norm": 0.2073383331298828, "learning_rate": 0.00019994030589498413, "loss": 11.6907, "step": 1606 }, { "epoch": 0.03363895168718078, "grad_norm": 0.2012162059545517, "learning_rate": 0.00019994023012455978, "loss": 11.6839, "step": 1607 }, { "epoch": 0.03365988445114293, "grad_norm": 0.192624032497406, "learning_rate": 0.00019994015430609217, "loss": 11.7131, "step": 1608 }, { "epoch": 0.03368081721510508, "grad_norm": 0.19335423409938812, "learning_rate": 0.00019994007843958131, "loss": 11.6949, "step": 1609 }, { "epoch": 0.03370174997906723, "grad_norm": 0.24078363180160522, "learning_rate": 0.00019994000252502724, "loss": 11.6935, "step": 1610 }, { "epoch": 0.03372268274302939, "grad_norm": 0.29374435544013977, "learning_rate": 0.00019993992656243002, "loss": 11.7028, "step": 1611 }, { "epoch": 0.033743615506991544, "grad_norm": 0.18524251878261566, "learning_rate": 0.00019993985055178967, "loss": 11.6976, "step": 1612 }, { "epoch": 0.033764548270953695, "grad_norm": 0.1704345941543579, "learning_rate": 0.00019993977449310626, "loss": 11.693, "step": 1613 }, { "epoch": 0.03378548103491585, "grad_norm": 0.1746574342250824, "learning_rate": 0.00019993969838637979, "loss": 11.7017, "step": 1614 }, { "epoch": 0.033806413798878006, "grad_norm": 0.18428853154182434, "learning_rate": 0.00019993962223161028, "loss": 11.703, "step": 1615 }, { "epoch": 0.03382734656284016, "grad_norm": 0.16744263470172882, "learning_rate": 0.00019993954602879778, "loss": 11.6821, "step": 1616 }, { "epoch": 0.03384827932680231, "grad_norm": 0.2129075676202774, "learning_rate": 0.00019993946977794237, "loss": 11.6848, "step": 1617 }, { "epoch": 0.03386921209076446, "grad_norm": 0.1733381599187851, "learning_rate": 0.00019993939347904405, "loss": 11.6934, "step": 1618 }, { "epoch": 0.03389014485472662, "grad_norm": 0.19218666851520538, "learning_rate": 0.00019993931713210286, "loss": 11.6907, "step": 1619 }, { "epoch": 0.03391107761868877, "grad_norm": 0.1871860921382904, "learning_rate": 0.00019993924073711882, "loss": 11.6876, "step": 1620 }, { "epoch": 0.03393201038265092, "grad_norm": 0.16435852646827698, "learning_rate": 0.00019993916429409197, "loss": 11.6822, "step": 1621 }, { "epoch": 0.03395294314661308, "grad_norm": 0.2190190553665161, "learning_rate": 0.00019993908780302242, "loss": 11.6844, "step": 1622 }, { "epoch": 0.033973875910575234, "grad_norm": 0.15037928521633148, "learning_rate": 0.0001999390112639101, "loss": 11.6786, "step": 1623 }, { "epoch": 0.033994808674537386, "grad_norm": 0.238820418715477, "learning_rate": 0.0001999389346767551, "loss": 11.7004, "step": 1624 }, { "epoch": 0.03401574143849954, "grad_norm": 0.18700572848320007, "learning_rate": 0.00019993885804155747, "loss": 11.6973, "step": 1625 }, { "epoch": 0.034036674202461696, "grad_norm": 0.19775007665157318, "learning_rate": 0.0001999387813583172, "loss": 11.7001, "step": 1626 }, { "epoch": 0.03405760696642385, "grad_norm": 0.20971694588661194, "learning_rate": 0.00019993870462703438, "loss": 11.7008, "step": 1627 }, { "epoch": 0.034078539730386, "grad_norm": 0.1727292388677597, "learning_rate": 0.00019993862784770903, "loss": 11.688, "step": 1628 }, { "epoch": 0.03409947249434815, "grad_norm": 0.19454215466976166, "learning_rate": 0.00019993855102034113, "loss": 11.6817, "step": 1629 }, { "epoch": 0.03412040525831031, "grad_norm": 0.15772849321365356, "learning_rate": 0.00019993847414493082, "loss": 11.694, "step": 1630 }, { "epoch": 0.03414133802227246, "grad_norm": 0.19280454516410828, "learning_rate": 0.00019993839722147808, "loss": 11.6877, "step": 1631 }, { "epoch": 0.034162270786234614, "grad_norm": 0.20138253271579742, "learning_rate": 0.0001999383202499829, "loss": 11.6945, "step": 1632 }, { "epoch": 0.034183203550196765, "grad_norm": 0.17524699866771698, "learning_rate": 0.0001999382432304454, "loss": 11.7006, "step": 1633 }, { "epoch": 0.034204136314158924, "grad_norm": 0.17605315148830414, "learning_rate": 0.0001999381661628656, "loss": 11.6814, "step": 1634 }, { "epoch": 0.034225069078121076, "grad_norm": 0.23150673508644104, "learning_rate": 0.0001999380890472435, "loss": 11.6986, "step": 1635 }, { "epoch": 0.03424600184208323, "grad_norm": 0.20708498358726501, "learning_rate": 0.00019993801188357915, "loss": 11.7, "step": 1636 }, { "epoch": 0.03426693460604538, "grad_norm": 0.18795958161354065, "learning_rate": 0.0001999379346718726, "loss": 11.6885, "step": 1637 }, { "epoch": 0.03428786737000754, "grad_norm": 0.17478413879871368, "learning_rate": 0.00019993785741212388, "loss": 11.6948, "step": 1638 }, { "epoch": 0.03430880013396969, "grad_norm": 0.25596487522125244, "learning_rate": 0.00019993778010433304, "loss": 11.6921, "step": 1639 }, { "epoch": 0.03432973289793184, "grad_norm": 0.17061986029148102, "learning_rate": 0.0001999377027485001, "loss": 11.6902, "step": 1640 }, { "epoch": 0.03435066566189399, "grad_norm": 0.25285857915878296, "learning_rate": 0.0001999376253446251, "loss": 11.6869, "step": 1641 }, { "epoch": 0.03437159842585615, "grad_norm": 0.2147378772497177, "learning_rate": 0.00019993754789270808, "loss": 11.6806, "step": 1642 }, { "epoch": 0.034392531189818304, "grad_norm": 0.18485470116138458, "learning_rate": 0.0001999374703927491, "loss": 11.7023, "step": 1643 }, { "epoch": 0.034413463953780456, "grad_norm": 0.17598317563533783, "learning_rate": 0.00019993739284474815, "loss": 11.6961, "step": 1644 }, { "epoch": 0.03443439671774261, "grad_norm": 0.23737365007400513, "learning_rate": 0.0001999373152487053, "loss": 11.693, "step": 1645 }, { "epoch": 0.034455329481704766, "grad_norm": 0.22466854751110077, "learning_rate": 0.0001999372376046206, "loss": 11.7142, "step": 1646 }, { "epoch": 0.03447626224566692, "grad_norm": 0.15630686283111572, "learning_rate": 0.00019993715991249405, "loss": 11.6901, "step": 1647 }, { "epoch": 0.03449719500962907, "grad_norm": 0.19162636995315552, "learning_rate": 0.00019993708217232569, "loss": 11.6903, "step": 1648 }, { "epoch": 0.03451812777359123, "grad_norm": 0.22654467821121216, "learning_rate": 0.0001999370043841156, "loss": 11.7011, "step": 1649 }, { "epoch": 0.03453906053755338, "grad_norm": 0.21147307753562927, "learning_rate": 0.00019993692654786375, "loss": 11.69, "step": 1650 }, { "epoch": 0.03455999330151553, "grad_norm": 0.1744055449962616, "learning_rate": 0.00019993684866357025, "loss": 11.6756, "step": 1651 }, { "epoch": 0.034580926065477684, "grad_norm": 0.21075735986232758, "learning_rate": 0.00019993677073123508, "loss": 11.6991, "step": 1652 }, { "epoch": 0.03460185882943984, "grad_norm": 0.1985132098197937, "learning_rate": 0.00019993669275085834, "loss": 11.6765, "step": 1653 }, { "epoch": 0.034622791593401994, "grad_norm": 0.18572424352169037, "learning_rate": 0.00019993661472244, "loss": 11.707, "step": 1654 }, { "epoch": 0.034643724357364146, "grad_norm": 0.1878645420074463, "learning_rate": 0.00019993653664598013, "loss": 11.7002, "step": 1655 }, { "epoch": 0.0346646571213263, "grad_norm": 0.17713505029678345, "learning_rate": 0.00019993645852147877, "loss": 11.7137, "step": 1656 }, { "epoch": 0.034685589885288456, "grad_norm": 0.1931058168411255, "learning_rate": 0.00019993638034893595, "loss": 11.6988, "step": 1657 }, { "epoch": 0.03470652264925061, "grad_norm": 0.1964341700077057, "learning_rate": 0.0001999363021283517, "loss": 11.6747, "step": 1658 }, { "epoch": 0.03472745541321276, "grad_norm": 0.1958533078432083, "learning_rate": 0.0001999362238597261, "loss": 11.6928, "step": 1659 }, { "epoch": 0.03474838817717491, "grad_norm": 0.32993239164352417, "learning_rate": 0.0001999361455430591, "loss": 11.6844, "step": 1660 }, { "epoch": 0.03476932094113707, "grad_norm": 0.22663593292236328, "learning_rate": 0.00019993606717835083, "loss": 11.6894, "step": 1661 }, { "epoch": 0.03479025370509922, "grad_norm": 0.1732775866985321, "learning_rate": 0.00019993598876560129, "loss": 11.6965, "step": 1662 }, { "epoch": 0.034811186469061374, "grad_norm": 0.1854775995016098, "learning_rate": 0.0001999359103048105, "loss": 11.684, "step": 1663 }, { "epoch": 0.034832119233023526, "grad_norm": 0.22227901220321655, "learning_rate": 0.00019993583179597852, "loss": 11.714, "step": 1664 }, { "epoch": 0.034853051996985684, "grad_norm": 0.1504851132631302, "learning_rate": 0.00019993575323910538, "loss": 11.6795, "step": 1665 }, { "epoch": 0.034873984760947836, "grad_norm": 0.21111278235912323, "learning_rate": 0.0001999356746341911, "loss": 11.6965, "step": 1666 }, { "epoch": 0.03489491752490999, "grad_norm": 0.18016056716442108, "learning_rate": 0.00019993559598123581, "loss": 11.6835, "step": 1667 }, { "epoch": 0.03491585028887214, "grad_norm": 0.23609957098960876, "learning_rate": 0.00019993551728023944, "loss": 11.6978, "step": 1668 }, { "epoch": 0.0349367830528343, "grad_norm": 0.17127245664596558, "learning_rate": 0.00019993543853120204, "loss": 11.6792, "step": 1669 }, { "epoch": 0.03495771581679645, "grad_norm": 0.20461292564868927, "learning_rate": 0.00019993535973412366, "loss": 11.6648, "step": 1670 }, { "epoch": 0.0349786485807586, "grad_norm": 0.1809781938791275, "learning_rate": 0.0001999352808890044, "loss": 11.694, "step": 1671 }, { "epoch": 0.034999581344720754, "grad_norm": 0.16290394961833954, "learning_rate": 0.00019993520199584422, "loss": 11.6928, "step": 1672 }, { "epoch": 0.03502051410868291, "grad_norm": 0.15953557193279266, "learning_rate": 0.00019993512305464318, "loss": 11.705, "step": 1673 }, { "epoch": 0.035041446872645064, "grad_norm": 0.26029297709465027, "learning_rate": 0.00019993504406540135, "loss": 11.7159, "step": 1674 }, { "epoch": 0.035062379636607216, "grad_norm": 0.15838609635829926, "learning_rate": 0.00019993496502811872, "loss": 11.6979, "step": 1675 }, { "epoch": 0.03508331240056937, "grad_norm": 0.20803837478160858, "learning_rate": 0.00019993488594279536, "loss": 11.7001, "step": 1676 }, { "epoch": 0.035104245164531526, "grad_norm": 0.2670416235923767, "learning_rate": 0.00019993480680943128, "loss": 11.6754, "step": 1677 }, { "epoch": 0.03512517792849368, "grad_norm": 0.18490368127822876, "learning_rate": 0.00019993472762802657, "loss": 11.6985, "step": 1678 }, { "epoch": 0.03514611069245583, "grad_norm": 0.20965176820755005, "learning_rate": 0.0001999346483985812, "loss": 11.6953, "step": 1679 }, { "epoch": 0.03516704345641799, "grad_norm": 0.19995367527008057, "learning_rate": 0.00019993456912109525, "loss": 11.6865, "step": 1680 }, { "epoch": 0.03518797622038014, "grad_norm": 0.18324652314186096, "learning_rate": 0.00019993448979556874, "loss": 11.7, "step": 1681 }, { "epoch": 0.03520890898434229, "grad_norm": 0.16024358570575714, "learning_rate": 0.00019993441042200175, "loss": 11.6908, "step": 1682 }, { "epoch": 0.035229841748304444, "grad_norm": 0.21891634166240692, "learning_rate": 0.00019993433100039428, "loss": 11.7014, "step": 1683 }, { "epoch": 0.0352507745122666, "grad_norm": 0.19301582872867584, "learning_rate": 0.00019993425153074635, "loss": 11.6694, "step": 1684 }, { "epoch": 0.035271707276228755, "grad_norm": 0.1467723846435547, "learning_rate": 0.00019993417201305804, "loss": 11.6958, "step": 1685 }, { "epoch": 0.035292640040190906, "grad_norm": 0.17191551625728607, "learning_rate": 0.00019993409244732936, "loss": 11.6991, "step": 1686 }, { "epoch": 0.03531357280415306, "grad_norm": 0.1851450651884079, "learning_rate": 0.00019993401283356036, "loss": 11.6996, "step": 1687 }, { "epoch": 0.03533450556811522, "grad_norm": 0.2137315273284912, "learning_rate": 0.0001999339331717511, "loss": 11.7085, "step": 1688 }, { "epoch": 0.03535543833207737, "grad_norm": 0.1937512904405594, "learning_rate": 0.00019993385346190156, "loss": 11.6891, "step": 1689 }, { "epoch": 0.03537637109603952, "grad_norm": 0.18129418790340424, "learning_rate": 0.00019993377370401184, "loss": 11.6953, "step": 1690 }, { "epoch": 0.03539730386000167, "grad_norm": 0.17325811088085175, "learning_rate": 0.00019993369389808194, "loss": 11.6939, "step": 1691 }, { "epoch": 0.03541823662396383, "grad_norm": 0.2260933518409729, "learning_rate": 0.00019993361404411193, "loss": 11.7039, "step": 1692 }, { "epoch": 0.03543916938792598, "grad_norm": 0.17860619723796844, "learning_rate": 0.00019993353414210182, "loss": 11.6964, "step": 1693 }, { "epoch": 0.035460102151888134, "grad_norm": 0.1997687816619873, "learning_rate": 0.00019993345419205163, "loss": 11.6941, "step": 1694 }, { "epoch": 0.035481034915850286, "grad_norm": 0.24926868081092834, "learning_rate": 0.00019993337419396145, "loss": 11.6987, "step": 1695 }, { "epoch": 0.035501967679812445, "grad_norm": 0.31266501545906067, "learning_rate": 0.0001999332941478313, "loss": 11.6992, "step": 1696 }, { "epoch": 0.0355229004437746, "grad_norm": 0.1953706592321396, "learning_rate": 0.0001999332140536612, "loss": 11.7108, "step": 1697 }, { "epoch": 0.03554383320773675, "grad_norm": 0.1912543624639511, "learning_rate": 0.0001999331339114512, "loss": 11.6914, "step": 1698 }, { "epoch": 0.0355647659716989, "grad_norm": 0.15441913902759552, "learning_rate": 0.00019993305372120135, "loss": 11.7012, "step": 1699 }, { "epoch": 0.03558569873566106, "grad_norm": 0.1894526183605194, "learning_rate": 0.00019993297348291165, "loss": 11.7016, "step": 1700 }, { "epoch": 0.03560663149962321, "grad_norm": 0.20395459234714508, "learning_rate": 0.0001999328931965822, "loss": 11.6981, "step": 1701 }, { "epoch": 0.03562756426358536, "grad_norm": 0.17641642689704895, "learning_rate": 0.000199932812862213, "loss": 11.7012, "step": 1702 }, { "epoch": 0.035648497027547514, "grad_norm": 0.22820016741752625, "learning_rate": 0.0001999327324798041, "loss": 11.7161, "step": 1703 }, { "epoch": 0.03566942979150967, "grad_norm": 0.2001202255487442, "learning_rate": 0.0001999326520493555, "loss": 11.6809, "step": 1704 }, { "epoch": 0.035690362555471825, "grad_norm": 0.177347332239151, "learning_rate": 0.00019993257157086732, "loss": 11.6719, "step": 1705 }, { "epoch": 0.035711295319433976, "grad_norm": 0.16470126807689667, "learning_rate": 0.00019993249104433952, "loss": 11.704, "step": 1706 }, { "epoch": 0.035732228083396135, "grad_norm": 0.2714090943336487, "learning_rate": 0.00019993241046977218, "loss": 11.6919, "step": 1707 }, { "epoch": 0.03575316084735829, "grad_norm": 0.16876628994941711, "learning_rate": 0.0001999323298471653, "loss": 11.6785, "step": 1708 }, { "epoch": 0.03577409361132044, "grad_norm": 0.24535316228866577, "learning_rate": 0.000199932249176519, "loss": 11.6971, "step": 1709 }, { "epoch": 0.03579502637528259, "grad_norm": 0.21297471225261688, "learning_rate": 0.0001999321684578332, "loss": 11.6859, "step": 1710 }, { "epoch": 0.03581595913924475, "grad_norm": 0.2422109693288803, "learning_rate": 0.00019993208769110806, "loss": 11.6863, "step": 1711 }, { "epoch": 0.0358368919032069, "grad_norm": 0.2128925919532776, "learning_rate": 0.00019993200687634354, "loss": 11.6886, "step": 1712 }, { "epoch": 0.03585782466716905, "grad_norm": 0.1781654953956604, "learning_rate": 0.0001999319260135397, "loss": 11.6852, "step": 1713 }, { "epoch": 0.035878757431131204, "grad_norm": 0.1967514604330063, "learning_rate": 0.00019993184510269657, "loss": 11.6844, "step": 1714 }, { "epoch": 0.03589969019509336, "grad_norm": 0.23676513135433197, "learning_rate": 0.00019993176414381423, "loss": 11.6884, "step": 1715 }, { "epoch": 0.035920622959055515, "grad_norm": 0.14875227212905884, "learning_rate": 0.00019993168313689266, "loss": 11.685, "step": 1716 }, { "epoch": 0.03594155572301767, "grad_norm": 0.22977805137634277, "learning_rate": 0.00019993160208193193, "loss": 11.6952, "step": 1717 }, { "epoch": 0.03596248848697982, "grad_norm": 0.15839052200317383, "learning_rate": 0.0001999315209789321, "loss": 11.677, "step": 1718 }, { "epoch": 0.03598342125094198, "grad_norm": 0.19465769827365875, "learning_rate": 0.00019993143982789316, "loss": 11.6939, "step": 1719 }, { "epoch": 0.03600435401490413, "grad_norm": 0.19243896007537842, "learning_rate": 0.00019993135862881518, "loss": 11.6931, "step": 1720 }, { "epoch": 0.03602528677886628, "grad_norm": 0.14789530634880066, "learning_rate": 0.00019993127738169818, "loss": 11.6748, "step": 1721 }, { "epoch": 0.03604621954282843, "grad_norm": 0.2054479569196701, "learning_rate": 0.00019993119608654225, "loss": 11.6964, "step": 1722 }, { "epoch": 0.03606715230679059, "grad_norm": 0.17100295424461365, "learning_rate": 0.00019993111474334735, "loss": 11.7057, "step": 1723 }, { "epoch": 0.03608808507075274, "grad_norm": 0.20872926712036133, "learning_rate": 0.0001999310333521136, "loss": 11.7086, "step": 1724 }, { "epoch": 0.036109017834714895, "grad_norm": 0.19237899780273438, "learning_rate": 0.00019993095191284097, "loss": 11.6979, "step": 1725 }, { "epoch": 0.036129950598677046, "grad_norm": 0.24476522207260132, "learning_rate": 0.00019993087042552955, "loss": 11.707, "step": 1726 }, { "epoch": 0.036150883362639205, "grad_norm": 0.19381417334079742, "learning_rate": 0.00019993078889017933, "loss": 11.6898, "step": 1727 }, { "epoch": 0.03617181612660136, "grad_norm": 0.17042936384677887, "learning_rate": 0.00019993070730679039, "loss": 11.6858, "step": 1728 }, { "epoch": 0.03619274889056351, "grad_norm": 0.21337485313415527, "learning_rate": 0.00019993062567536275, "loss": 11.6902, "step": 1729 }, { "epoch": 0.03621368165452566, "grad_norm": 0.20024484395980835, "learning_rate": 0.00019993054399589647, "loss": 11.6994, "step": 1730 }, { "epoch": 0.03623461441848782, "grad_norm": 0.17976604402065277, "learning_rate": 0.00019993046226839158, "loss": 11.7003, "step": 1731 }, { "epoch": 0.03625554718244997, "grad_norm": 0.17421992123126984, "learning_rate": 0.0001999303804928481, "loss": 11.6902, "step": 1732 }, { "epoch": 0.03627647994641212, "grad_norm": 0.18283824622631073, "learning_rate": 0.0001999302986692661, "loss": 11.6889, "step": 1733 }, { "epoch": 0.036297412710374274, "grad_norm": 0.19885757565498352, "learning_rate": 0.00019993021679764558, "loss": 11.7067, "step": 1734 }, { "epoch": 0.03631834547433643, "grad_norm": 0.20485636591911316, "learning_rate": 0.00019993013487798662, "loss": 11.7026, "step": 1735 }, { "epoch": 0.036339278238298585, "grad_norm": 0.16433335840702057, "learning_rate": 0.00019993005291028923, "loss": 11.687, "step": 1736 }, { "epoch": 0.03636021100226074, "grad_norm": 0.1664646416902542, "learning_rate": 0.00019992997089455345, "loss": 11.6817, "step": 1737 }, { "epoch": 0.036381143766222895, "grad_norm": 0.14878371357917786, "learning_rate": 0.00019992988883077935, "loss": 11.6904, "step": 1738 }, { "epoch": 0.03640207653018505, "grad_norm": 0.2352319359779358, "learning_rate": 0.00019992980671896694, "loss": 11.6965, "step": 1739 }, { "epoch": 0.0364230092941472, "grad_norm": 0.21640585362911224, "learning_rate": 0.0001999297245591163, "loss": 11.6964, "step": 1740 }, { "epoch": 0.03644394205810935, "grad_norm": 0.2166300266981125, "learning_rate": 0.0001999296423512274, "loss": 11.6905, "step": 1741 }, { "epoch": 0.03646487482207151, "grad_norm": 0.18862859904766083, "learning_rate": 0.00019992956009530036, "loss": 11.6783, "step": 1742 }, { "epoch": 0.03648580758603366, "grad_norm": 0.19671311974525452, "learning_rate": 0.00019992947779133512, "loss": 11.6887, "step": 1743 }, { "epoch": 0.03650674034999581, "grad_norm": 0.21473325788974762, "learning_rate": 0.0001999293954393318, "loss": 11.6885, "step": 1744 }, { "epoch": 0.036527673113957965, "grad_norm": 0.23741932213306427, "learning_rate": 0.00019992931303929043, "loss": 11.705, "step": 1745 }, { "epoch": 0.03654860587792012, "grad_norm": 0.1580580174922943, "learning_rate": 0.00019992923059121106, "loss": 11.6809, "step": 1746 }, { "epoch": 0.036569538641882275, "grad_norm": 0.22073006629943848, "learning_rate": 0.00019992914809509365, "loss": 11.6797, "step": 1747 }, { "epoch": 0.03659047140584443, "grad_norm": 0.20351414382457733, "learning_rate": 0.00019992906555093832, "loss": 11.6968, "step": 1748 }, { "epoch": 0.03661140416980658, "grad_norm": 0.19644592702388763, "learning_rate": 0.0001999289829587451, "loss": 11.6881, "step": 1749 }, { "epoch": 0.03663233693376874, "grad_norm": 0.15681444108486176, "learning_rate": 0.00019992890031851398, "loss": 11.681, "step": 1750 }, { "epoch": 0.03665326969773089, "grad_norm": 0.16514728963375092, "learning_rate": 0.0001999288176302451, "loss": 11.6826, "step": 1751 }, { "epoch": 0.03667420246169304, "grad_norm": 0.21384908258914948, "learning_rate": 0.00019992873489393836, "loss": 11.6862, "step": 1752 }, { "epoch": 0.03669513522565519, "grad_norm": 0.21787546575069427, "learning_rate": 0.00019992865210959393, "loss": 11.6913, "step": 1753 }, { "epoch": 0.03671606798961735, "grad_norm": 0.19558075070381165, "learning_rate": 0.00019992856927721176, "loss": 11.6936, "step": 1754 }, { "epoch": 0.0367370007535795, "grad_norm": 0.20743155479431152, "learning_rate": 0.00019992848639679194, "loss": 11.6829, "step": 1755 }, { "epoch": 0.036757933517541655, "grad_norm": 0.2362898290157318, "learning_rate": 0.0001999284034683345, "loss": 11.6767, "step": 1756 }, { "epoch": 0.03677886628150381, "grad_norm": 0.17062269151210785, "learning_rate": 0.00019992832049183946, "loss": 11.6718, "step": 1757 }, { "epoch": 0.036799799045465965, "grad_norm": 0.19699332118034363, "learning_rate": 0.00019992823746730687, "loss": 11.6829, "step": 1758 }, { "epoch": 0.03682073180942812, "grad_norm": 0.20409934222698212, "learning_rate": 0.0001999281543947368, "loss": 11.6886, "step": 1759 }, { "epoch": 0.03684166457339027, "grad_norm": 0.2692883312702179, "learning_rate": 0.00019992807127412925, "loss": 11.7021, "step": 1760 }, { "epoch": 0.03686259733735242, "grad_norm": 0.20494525134563446, "learning_rate": 0.00019992798810548426, "loss": 11.6883, "step": 1761 }, { "epoch": 0.03688353010131458, "grad_norm": 0.19636845588684082, "learning_rate": 0.00019992790488880188, "loss": 11.6912, "step": 1762 }, { "epoch": 0.03690446286527673, "grad_norm": 0.2637939751148224, "learning_rate": 0.00019992782162408216, "loss": 11.6967, "step": 1763 }, { "epoch": 0.03692539562923888, "grad_norm": 0.19999371469020844, "learning_rate": 0.00019992773831132516, "loss": 11.6946, "step": 1764 }, { "epoch": 0.03694632839320104, "grad_norm": 0.19004333019256592, "learning_rate": 0.00019992765495053085, "loss": 11.6841, "step": 1765 }, { "epoch": 0.03696726115716319, "grad_norm": 0.21184423565864563, "learning_rate": 0.00019992757154169936, "loss": 11.71, "step": 1766 }, { "epoch": 0.036988193921125345, "grad_norm": 0.20238201320171356, "learning_rate": 0.00019992748808483066, "loss": 11.6974, "step": 1767 }, { "epoch": 0.0370091266850875, "grad_norm": 0.6438148021697998, "learning_rate": 0.0001999274045799248, "loss": 11.6792, "step": 1768 }, { "epoch": 0.037030059449049656, "grad_norm": 0.2072240710258484, "learning_rate": 0.00019992732102698185, "loss": 11.695, "step": 1769 }, { "epoch": 0.03705099221301181, "grad_norm": 0.16324518620967865, "learning_rate": 0.00019992723742600184, "loss": 11.6849, "step": 1770 }, { "epoch": 0.03707192497697396, "grad_norm": 0.17736802995204926, "learning_rate": 0.0001999271537769848, "loss": 11.6866, "step": 1771 }, { "epoch": 0.03709285774093611, "grad_norm": 0.2318897843360901, "learning_rate": 0.00019992707007993076, "loss": 11.6793, "step": 1772 }, { "epoch": 0.03711379050489827, "grad_norm": 0.20480111241340637, "learning_rate": 0.0001999269863348398, "loss": 11.698, "step": 1773 }, { "epoch": 0.03713472326886042, "grad_norm": 0.19569580256938934, "learning_rate": 0.00019992690254171192, "loss": 11.6927, "step": 1774 }, { "epoch": 0.03715565603282257, "grad_norm": 0.19065183401107788, "learning_rate": 0.00019992681870054717, "loss": 11.688, "step": 1775 }, { "epoch": 0.037176588796784725, "grad_norm": 0.19697697460651398, "learning_rate": 0.0001999267348113456, "loss": 11.6813, "step": 1776 }, { "epoch": 0.037197521560746884, "grad_norm": 0.2109706699848175, "learning_rate": 0.00019992665087410725, "loss": 11.6968, "step": 1777 }, { "epoch": 0.037218454324709035, "grad_norm": 0.1905667781829834, "learning_rate": 0.00019992656688883216, "loss": 11.687, "step": 1778 }, { "epoch": 0.03723938708867119, "grad_norm": 0.2251368910074234, "learning_rate": 0.00019992648285552036, "loss": 11.6965, "step": 1779 }, { "epoch": 0.03726031985263334, "grad_norm": 0.21572542190551758, "learning_rate": 0.00019992639877417192, "loss": 11.7123, "step": 1780 }, { "epoch": 0.0372812526165955, "grad_norm": 0.20870710909366608, "learning_rate": 0.00019992631464478682, "loss": 11.6899, "step": 1781 }, { "epoch": 0.03730218538055765, "grad_norm": 0.29544833302497864, "learning_rate": 0.00019992623046736517, "loss": 11.7013, "step": 1782 }, { "epoch": 0.0373231181445198, "grad_norm": 0.21319548785686493, "learning_rate": 0.00019992614624190694, "loss": 11.698, "step": 1783 }, { "epoch": 0.03734405090848195, "grad_norm": 0.19996292889118195, "learning_rate": 0.00019992606196841224, "loss": 11.6856, "step": 1784 }, { "epoch": 0.03736498367244411, "grad_norm": 0.1779024749994278, "learning_rate": 0.00019992597764688106, "loss": 11.6787, "step": 1785 }, { "epoch": 0.037385916436406263, "grad_norm": 0.1708696037530899, "learning_rate": 0.00019992589327731347, "loss": 11.694, "step": 1786 }, { "epoch": 0.037406849200368415, "grad_norm": 0.1843353807926178, "learning_rate": 0.00019992580885970954, "loss": 11.7078, "step": 1787 }, { "epoch": 0.03742778196433057, "grad_norm": 0.1650540679693222, "learning_rate": 0.00019992572439406922, "loss": 11.6983, "step": 1788 }, { "epoch": 0.037448714728292726, "grad_norm": 0.24350932240486145, "learning_rate": 0.0001999256398803926, "loss": 11.6954, "step": 1789 }, { "epoch": 0.03746964749225488, "grad_norm": 0.20756042003631592, "learning_rate": 0.00019992555531867973, "loss": 11.7161, "step": 1790 }, { "epoch": 0.03749058025621703, "grad_norm": 0.24108368158340454, "learning_rate": 0.00019992547070893065, "loss": 11.6954, "step": 1791 }, { "epoch": 0.03751151302017918, "grad_norm": 0.1909073293209076, "learning_rate": 0.00019992538605114542, "loss": 11.6923, "step": 1792 }, { "epoch": 0.03753244578414134, "grad_norm": 0.22796183824539185, "learning_rate": 0.00019992530134532401, "loss": 11.6958, "step": 1793 }, { "epoch": 0.03755337854810349, "grad_norm": 0.18242551386356354, "learning_rate": 0.00019992521659146652, "loss": 11.6989, "step": 1794 }, { "epoch": 0.03757431131206564, "grad_norm": 0.20197971165180206, "learning_rate": 0.00019992513178957298, "loss": 11.6841, "step": 1795 }, { "epoch": 0.0375952440760278, "grad_norm": 0.19269485771656036, "learning_rate": 0.0001999250469396434, "loss": 11.6937, "step": 1796 }, { "epoch": 0.037616176839989954, "grad_norm": 0.1665971875190735, "learning_rate": 0.00019992496204167793, "loss": 11.6923, "step": 1797 }, { "epoch": 0.037637109603952106, "grad_norm": 0.2751714587211609, "learning_rate": 0.00019992487709567645, "loss": 11.6903, "step": 1798 }, { "epoch": 0.03765804236791426, "grad_norm": 0.1829904168844223, "learning_rate": 0.00019992479210163911, "loss": 11.6864, "step": 1799 }, { "epoch": 0.037678975131876416, "grad_norm": 0.18379132449626923, "learning_rate": 0.0001999247070595659, "loss": 11.6853, "step": 1800 }, { "epoch": 0.03769990789583857, "grad_norm": 0.23973116278648376, "learning_rate": 0.0001999246219694569, "loss": 11.6864, "step": 1801 }, { "epoch": 0.03772084065980072, "grad_norm": 0.20343375205993652, "learning_rate": 0.0001999245368313121, "loss": 11.6954, "step": 1802 }, { "epoch": 0.03774177342376287, "grad_norm": 0.23641742765903473, "learning_rate": 0.00019992445164513162, "loss": 11.6978, "step": 1803 }, { "epoch": 0.03776270618772503, "grad_norm": 0.18318425118923187, "learning_rate": 0.00019992436641091542, "loss": 11.6951, "step": 1804 }, { "epoch": 0.03778363895168718, "grad_norm": 0.19029732048511505, "learning_rate": 0.0001999242811286636, "loss": 11.6935, "step": 1805 }, { "epoch": 0.037804571715649334, "grad_norm": 0.17006796598434448, "learning_rate": 0.00019992419579837613, "loss": 11.6957, "step": 1806 }, { "epoch": 0.037825504479611485, "grad_norm": 0.21806849539279938, "learning_rate": 0.00019992411042005315, "loss": 11.7126, "step": 1807 }, { "epoch": 0.037846437243573644, "grad_norm": 0.20563872158527374, "learning_rate": 0.00019992402499369462, "loss": 11.6895, "step": 1808 }, { "epoch": 0.037867370007535796, "grad_norm": 0.22162999212741852, "learning_rate": 0.00019992393951930062, "loss": 11.7012, "step": 1809 }, { "epoch": 0.03788830277149795, "grad_norm": 0.16604885458946228, "learning_rate": 0.00019992385399687117, "loss": 11.6847, "step": 1810 }, { "epoch": 0.0379092355354601, "grad_norm": 0.19409433007240295, "learning_rate": 0.00019992376842640632, "loss": 11.6895, "step": 1811 }, { "epoch": 0.03793016829942226, "grad_norm": 0.2079227715730667, "learning_rate": 0.00019992368280790613, "loss": 11.6885, "step": 1812 }, { "epoch": 0.03795110106338441, "grad_norm": 0.19192327558994293, "learning_rate": 0.0001999235971413706, "loss": 11.6717, "step": 1813 }, { "epoch": 0.03797203382734656, "grad_norm": 0.1637592762708664, "learning_rate": 0.0001999235114267998, "loss": 11.6924, "step": 1814 }, { "epoch": 0.03799296659130871, "grad_norm": 0.16383333504199982, "learning_rate": 0.00019992342566419374, "loss": 11.7018, "step": 1815 }, { "epoch": 0.03801389935527087, "grad_norm": 0.1724095493555069, "learning_rate": 0.00019992333985355254, "loss": 11.7002, "step": 1816 }, { "epoch": 0.038034832119233024, "grad_norm": 0.18651637434959412, "learning_rate": 0.00019992325399487615, "loss": 11.683, "step": 1817 }, { "epoch": 0.038055764883195176, "grad_norm": 0.1623365879058838, "learning_rate": 0.00019992316808816466, "loss": 11.6902, "step": 1818 }, { "epoch": 0.03807669764715733, "grad_norm": 0.1774895191192627, "learning_rate": 0.0001999230821334181, "loss": 11.6967, "step": 1819 }, { "epoch": 0.038097630411119486, "grad_norm": 0.2165115624666214, "learning_rate": 0.00019992299613063653, "loss": 11.6896, "step": 1820 }, { "epoch": 0.03811856317508164, "grad_norm": 0.17606817185878754, "learning_rate": 0.00019992291007981996, "loss": 11.6992, "step": 1821 }, { "epoch": 0.03813949593904379, "grad_norm": 0.1652824878692627, "learning_rate": 0.00019992282398096845, "loss": 11.6887, "step": 1822 }, { "epoch": 0.03816042870300595, "grad_norm": 0.15361852943897247, "learning_rate": 0.00019992273783408203, "loss": 11.6887, "step": 1823 }, { "epoch": 0.0381813614669681, "grad_norm": 0.191130131483078, "learning_rate": 0.00019992265163916074, "loss": 11.7033, "step": 1824 }, { "epoch": 0.03820229423093025, "grad_norm": 0.18915283679962158, "learning_rate": 0.00019992256539620466, "loss": 11.6988, "step": 1825 }, { "epoch": 0.038223226994892404, "grad_norm": 0.2086610198020935, "learning_rate": 0.00019992247910521376, "loss": 11.6959, "step": 1826 }, { "epoch": 0.03824415975885456, "grad_norm": 0.17582343518733978, "learning_rate": 0.00019992239276618815, "loss": 11.6884, "step": 1827 }, { "epoch": 0.038265092522816714, "grad_norm": 0.17567095160484314, "learning_rate": 0.0001999223063791278, "loss": 11.6873, "step": 1828 }, { "epoch": 0.038286025286778866, "grad_norm": 0.21099252998828888, "learning_rate": 0.00019992221994403285, "loss": 11.6963, "step": 1829 }, { "epoch": 0.03830695805074102, "grad_norm": 0.1585899144411087, "learning_rate": 0.00019992213346090325, "loss": 11.6871, "step": 1830 }, { "epoch": 0.038327890814703176, "grad_norm": 0.178876593708992, "learning_rate": 0.0001999220469297391, "loss": 11.6906, "step": 1831 }, { "epoch": 0.03834882357866533, "grad_norm": 0.18448799848556519, "learning_rate": 0.0001999219603505404, "loss": 11.6713, "step": 1832 }, { "epoch": 0.03836975634262748, "grad_norm": 0.21442317962646484, "learning_rate": 0.0001999218737233072, "loss": 11.6967, "step": 1833 }, { "epoch": 0.03839068910658963, "grad_norm": 0.1743822693824768, "learning_rate": 0.0001999217870480396, "loss": 11.696, "step": 1834 }, { "epoch": 0.03841162187055179, "grad_norm": 0.24022020399570465, "learning_rate": 0.00019992170032473756, "loss": 11.6989, "step": 1835 }, { "epoch": 0.03843255463451394, "grad_norm": 0.21692633628845215, "learning_rate": 0.00019992161355340116, "loss": 11.6962, "step": 1836 }, { "epoch": 0.038453487398476094, "grad_norm": 0.2182351052761078, "learning_rate": 0.00019992152673403043, "loss": 11.7086, "step": 1837 }, { "epoch": 0.038474420162438246, "grad_norm": 0.19106128811836243, "learning_rate": 0.00019992143986662545, "loss": 11.686, "step": 1838 }, { "epoch": 0.038495352926400404, "grad_norm": 0.19116170704364777, "learning_rate": 0.00019992135295118622, "loss": 11.6946, "step": 1839 }, { "epoch": 0.038516285690362556, "grad_norm": 0.26012352108955383, "learning_rate": 0.00019992126598771277, "loss": 11.6922, "step": 1840 }, { "epoch": 0.03853721845432471, "grad_norm": 0.19290542602539062, "learning_rate": 0.00019992117897620518, "loss": 11.6869, "step": 1841 }, { "epoch": 0.03855815121828686, "grad_norm": 0.1866251528263092, "learning_rate": 0.00019992109191666347, "loss": 11.7002, "step": 1842 }, { "epoch": 0.03857908398224902, "grad_norm": 0.17865796387195587, "learning_rate": 0.0001999210048090877, "loss": 11.6968, "step": 1843 }, { "epoch": 0.03860001674621117, "grad_norm": 0.24708999693393707, "learning_rate": 0.0001999209176534779, "loss": 11.6891, "step": 1844 }, { "epoch": 0.03862094951017332, "grad_norm": 0.19699278473854065, "learning_rate": 0.0001999208304498341, "loss": 11.693, "step": 1845 }, { "epoch": 0.038641882274135474, "grad_norm": 0.24771390855312347, "learning_rate": 0.00019992074319815636, "loss": 11.7047, "step": 1846 }, { "epoch": 0.03866281503809763, "grad_norm": 0.18583358824253082, "learning_rate": 0.00019992065589844474, "loss": 11.6976, "step": 1847 }, { "epoch": 0.038683747802059784, "grad_norm": 0.2007349729537964, "learning_rate": 0.00019992056855069925, "loss": 11.6871, "step": 1848 }, { "epoch": 0.038704680566021936, "grad_norm": 0.1870001256465912, "learning_rate": 0.0001999204811549199, "loss": 11.6926, "step": 1849 }, { "epoch": 0.03872561332998409, "grad_norm": 0.2080266922712326, "learning_rate": 0.00019992039371110682, "loss": 11.6824, "step": 1850 }, { "epoch": 0.038746546093946246, "grad_norm": 0.2261780947446823, "learning_rate": 0.00019992030621926, "loss": 11.681, "step": 1851 }, { "epoch": 0.0387674788579084, "grad_norm": 0.2733144760131836, "learning_rate": 0.00019992021867937948, "loss": 11.7059, "step": 1852 }, { "epoch": 0.03878841162187055, "grad_norm": 0.18795974552631378, "learning_rate": 0.0001999201310914653, "loss": 11.6861, "step": 1853 }, { "epoch": 0.03880934438583271, "grad_norm": 0.21094940602779388, "learning_rate": 0.0001999200434555175, "loss": 11.698, "step": 1854 }, { "epoch": 0.03883027714979486, "grad_norm": 0.25098124146461487, "learning_rate": 0.00019991995577153612, "loss": 11.6944, "step": 1855 }, { "epoch": 0.03885120991375701, "grad_norm": 0.18977421522140503, "learning_rate": 0.00019991986803952127, "loss": 11.6878, "step": 1856 }, { "epoch": 0.038872142677719164, "grad_norm": 0.2595282793045044, "learning_rate": 0.0001999197802594729, "loss": 11.6965, "step": 1857 }, { "epoch": 0.03889307544168132, "grad_norm": 0.19280178844928741, "learning_rate": 0.00019991969243139107, "loss": 11.6892, "step": 1858 }, { "epoch": 0.038914008205643474, "grad_norm": 0.24019765853881836, "learning_rate": 0.00019991960455527588, "loss": 11.6865, "step": 1859 }, { "epoch": 0.038934940969605626, "grad_norm": 0.2101883888244629, "learning_rate": 0.00019991951663112732, "loss": 11.6914, "step": 1860 }, { "epoch": 0.03895587373356778, "grad_norm": 0.160960391163826, "learning_rate": 0.00019991942865894545, "loss": 11.6843, "step": 1861 }, { "epoch": 0.03897680649752994, "grad_norm": 0.18627232313156128, "learning_rate": 0.0001999193406387303, "loss": 11.6935, "step": 1862 }, { "epoch": 0.03899773926149209, "grad_norm": 0.207243874669075, "learning_rate": 0.00019991925257048193, "loss": 11.6754, "step": 1863 }, { "epoch": 0.03901867202545424, "grad_norm": 0.2135990709066391, "learning_rate": 0.00019991916445420035, "loss": 11.6891, "step": 1864 }, { "epoch": 0.03903960478941639, "grad_norm": 0.1821281462907791, "learning_rate": 0.00019991907628988566, "loss": 11.687, "step": 1865 }, { "epoch": 0.03906053755337855, "grad_norm": 0.19504328072071075, "learning_rate": 0.00019991898807753783, "loss": 11.6886, "step": 1866 }, { "epoch": 0.0390814703173407, "grad_norm": 0.29728859663009644, "learning_rate": 0.00019991889981715698, "loss": 11.6954, "step": 1867 }, { "epoch": 0.039102403081302854, "grad_norm": 0.16318964958190918, "learning_rate": 0.0001999188115087431, "loss": 11.6775, "step": 1868 }, { "epoch": 0.039123335845265006, "grad_norm": 0.17562679946422577, "learning_rate": 0.00019991872315229623, "loss": 11.6905, "step": 1869 }, { "epoch": 0.039144268609227165, "grad_norm": 0.17269589006900787, "learning_rate": 0.00019991863474781642, "loss": 11.6901, "step": 1870 }, { "epoch": 0.039165201373189316, "grad_norm": 0.2615796625614166, "learning_rate": 0.00019991854629530375, "loss": 11.7055, "step": 1871 }, { "epoch": 0.03918613413715147, "grad_norm": 0.20445355772972107, "learning_rate": 0.0001999184577947582, "loss": 11.6881, "step": 1872 }, { "epoch": 0.03920706690111362, "grad_norm": 0.1796661913394928, "learning_rate": 0.00019991836924617985, "loss": 11.6818, "step": 1873 }, { "epoch": 0.03922799966507578, "grad_norm": 0.2287951111793518, "learning_rate": 0.0001999182806495688, "loss": 11.687, "step": 1874 }, { "epoch": 0.03924893242903793, "grad_norm": 0.2076094001531601, "learning_rate": 0.00019991819200492495, "loss": 11.7039, "step": 1875 }, { "epoch": 0.03926986519300008, "grad_norm": 0.21726880967617035, "learning_rate": 0.00019991810331224845, "loss": 11.6839, "step": 1876 }, { "epoch": 0.039290797956962234, "grad_norm": 0.16321152448654175, "learning_rate": 0.0001999180145715393, "loss": 11.6955, "step": 1877 }, { "epoch": 0.03931173072092439, "grad_norm": 0.21664297580718994, "learning_rate": 0.0001999179257827976, "loss": 11.6948, "step": 1878 }, { "epoch": 0.039332663484886544, "grad_norm": 0.2162819802761078, "learning_rate": 0.00019991783694602334, "loss": 11.6992, "step": 1879 }, { "epoch": 0.039353596248848696, "grad_norm": 0.2023153454065323, "learning_rate": 0.00019991774806121654, "loss": 11.6907, "step": 1880 }, { "epoch": 0.039374529012810855, "grad_norm": 0.17707067728042603, "learning_rate": 0.0001999176591283773, "loss": 11.6979, "step": 1881 }, { "epoch": 0.03939546177677301, "grad_norm": 0.19622434675693512, "learning_rate": 0.00019991757014750565, "loss": 11.68, "step": 1882 }, { "epoch": 0.03941639454073516, "grad_norm": 0.15399570763111115, "learning_rate": 0.00019991748111860161, "loss": 11.7018, "step": 1883 }, { "epoch": 0.03943732730469731, "grad_norm": 0.18279927968978882, "learning_rate": 0.00019991739204166522, "loss": 11.6958, "step": 1884 }, { "epoch": 0.03945826006865947, "grad_norm": 0.21408303081989288, "learning_rate": 0.00019991730291669656, "loss": 11.6963, "step": 1885 }, { "epoch": 0.03947919283262162, "grad_norm": 0.207020565867424, "learning_rate": 0.00019991721374369563, "loss": 11.7011, "step": 1886 }, { "epoch": 0.03950012559658377, "grad_norm": 0.22083938121795654, "learning_rate": 0.0001999171245226625, "loss": 11.6925, "step": 1887 }, { "epoch": 0.039521058360545924, "grad_norm": 0.1965465247631073, "learning_rate": 0.0001999170352535972, "loss": 11.6833, "step": 1888 }, { "epoch": 0.03954199112450808, "grad_norm": 0.36581045389175415, "learning_rate": 0.0001999169459364998, "loss": 11.7042, "step": 1889 }, { "epoch": 0.039562923888470235, "grad_norm": 0.18853703141212463, "learning_rate": 0.00019991685657137033, "loss": 11.6851, "step": 1890 }, { "epoch": 0.039583856652432386, "grad_norm": 0.21098266541957855, "learning_rate": 0.00019991676715820877, "loss": 11.6969, "step": 1891 }, { "epoch": 0.03960478941639454, "grad_norm": 0.1913221776485443, "learning_rate": 0.00019991667769701525, "loss": 11.6637, "step": 1892 }, { "epoch": 0.0396257221803567, "grad_norm": 0.22160738706588745, "learning_rate": 0.0001999165881877898, "loss": 11.6882, "step": 1893 }, { "epoch": 0.03964665494431885, "grad_norm": 0.23496975004673004, "learning_rate": 0.00019991649863053241, "loss": 11.6975, "step": 1894 }, { "epoch": 0.039667587708281, "grad_norm": 0.2199891209602356, "learning_rate": 0.0001999164090252432, "loss": 11.6898, "step": 1895 }, { "epoch": 0.03968852047224315, "grad_norm": 0.20431359112262726, "learning_rate": 0.0001999163193719221, "loss": 11.697, "step": 1896 }, { "epoch": 0.03970945323620531, "grad_norm": 0.18066637217998505, "learning_rate": 0.00019991622967056925, "loss": 11.6839, "step": 1897 }, { "epoch": 0.03973038600016746, "grad_norm": 0.20156770944595337, "learning_rate": 0.0001999161399211847, "loss": 11.6798, "step": 1898 }, { "epoch": 0.039751318764129615, "grad_norm": 0.21006059646606445, "learning_rate": 0.00019991605012376846, "loss": 11.6933, "step": 1899 }, { "epoch": 0.039772251528091766, "grad_norm": 0.1865173578262329, "learning_rate": 0.00019991596027832054, "loss": 11.6922, "step": 1900 }, { "epoch": 0.039793184292053925, "grad_norm": 0.2636772096157074, "learning_rate": 0.00019991587038484102, "loss": 11.6986, "step": 1901 }, { "epoch": 0.03981411705601608, "grad_norm": 0.22092464566230774, "learning_rate": 0.00019991578044332994, "loss": 11.6956, "step": 1902 }, { "epoch": 0.03983504981997823, "grad_norm": 0.24868132174015045, "learning_rate": 0.00019991569045378738, "loss": 11.6843, "step": 1903 }, { "epoch": 0.03985598258394038, "grad_norm": 0.17647439241409302, "learning_rate": 0.0001999156004162133, "loss": 11.7076, "step": 1904 }, { "epoch": 0.03987691534790254, "grad_norm": 0.15046581625938416, "learning_rate": 0.0001999155103306078, "loss": 11.7006, "step": 1905 }, { "epoch": 0.03989784811186469, "grad_norm": 0.17011085152626038, "learning_rate": 0.0001999154201969709, "loss": 11.7041, "step": 1906 }, { "epoch": 0.03991878087582684, "grad_norm": 0.20463213324546814, "learning_rate": 0.00019991533001530268, "loss": 11.6733, "step": 1907 }, { "epoch": 0.039939713639788994, "grad_norm": 0.17431162297725677, "learning_rate": 0.00019991523978560313, "loss": 11.6858, "step": 1908 }, { "epoch": 0.03996064640375115, "grad_norm": 0.1848105788230896, "learning_rate": 0.00019991514950787236, "loss": 11.6965, "step": 1909 }, { "epoch": 0.039981579167713305, "grad_norm": 0.19757667183876038, "learning_rate": 0.00019991505918211036, "loss": 11.7002, "step": 1910 }, { "epoch": 0.04000251193167546, "grad_norm": 0.2215588241815567, "learning_rate": 0.00019991496880831716, "loss": 11.69, "step": 1911 }, { "epoch": 0.040023444695637615, "grad_norm": 0.19145286083221436, "learning_rate": 0.00019991487838649287, "loss": 11.6829, "step": 1912 }, { "epoch": 0.04004437745959977, "grad_norm": 0.16752086579799652, "learning_rate": 0.00019991478791663746, "loss": 11.6859, "step": 1913 }, { "epoch": 0.04006531022356192, "grad_norm": 0.17015370726585388, "learning_rate": 0.00019991469739875104, "loss": 11.6847, "step": 1914 }, { "epoch": 0.04008624298752407, "grad_norm": 0.14879444241523743, "learning_rate": 0.00019991460683283364, "loss": 11.6948, "step": 1915 }, { "epoch": 0.04010717575148623, "grad_norm": 0.20531146228313446, "learning_rate": 0.00019991451621888525, "loss": 11.6931, "step": 1916 }, { "epoch": 0.04012810851544838, "grad_norm": 0.2719517946243286, "learning_rate": 0.00019991442555690597, "loss": 11.7058, "step": 1917 }, { "epoch": 0.04014904127941053, "grad_norm": 0.2785519063472748, "learning_rate": 0.0001999143348468958, "loss": 11.6975, "step": 1918 }, { "epoch": 0.040169974043372685, "grad_norm": 0.29052823781967163, "learning_rate": 0.00019991424408885482, "loss": 11.6804, "step": 1919 }, { "epoch": 0.04019090680733484, "grad_norm": 0.1930067092180252, "learning_rate": 0.00019991415328278307, "loss": 11.6921, "step": 1920 }, { "epoch": 0.040211839571296995, "grad_norm": 0.1709919571876526, "learning_rate": 0.00019991406242868056, "loss": 11.6859, "step": 1921 }, { "epoch": 0.04023277233525915, "grad_norm": 0.22693264484405518, "learning_rate": 0.00019991397152654736, "loss": 11.7011, "step": 1922 }, { "epoch": 0.0402537050992213, "grad_norm": 0.16379515826702118, "learning_rate": 0.00019991388057638353, "loss": 11.6856, "step": 1923 }, { "epoch": 0.04027463786318346, "grad_norm": 0.18979093432426453, "learning_rate": 0.0001999137895781891, "loss": 11.6979, "step": 1924 }, { "epoch": 0.04029557062714561, "grad_norm": 0.2536502182483673, "learning_rate": 0.0001999136985319641, "loss": 11.6876, "step": 1925 }, { "epoch": 0.04031650339110776, "grad_norm": 0.23173007369041443, "learning_rate": 0.00019991360743770858, "loss": 11.7028, "step": 1926 }, { "epoch": 0.04033743615506991, "grad_norm": 0.2077675312757492, "learning_rate": 0.00019991351629542256, "loss": 11.6878, "step": 1927 }, { "epoch": 0.04035836891903207, "grad_norm": 0.17110708355903625, "learning_rate": 0.00019991342510510617, "loss": 11.6853, "step": 1928 }, { "epoch": 0.04037930168299422, "grad_norm": 0.24986688792705536, "learning_rate": 0.00019991333386675937, "loss": 11.713, "step": 1929 }, { "epoch": 0.040400234446956375, "grad_norm": 0.16923150420188904, "learning_rate": 0.0001999132425803822, "loss": 11.6868, "step": 1930 }, { "epoch": 0.04042116721091853, "grad_norm": 0.1472962200641632, "learning_rate": 0.00019991315124597475, "loss": 11.6845, "step": 1931 }, { "epoch": 0.040442099974880685, "grad_norm": 0.1836709976196289, "learning_rate": 0.00019991305986353706, "loss": 11.6968, "step": 1932 }, { "epoch": 0.04046303273884284, "grad_norm": 0.20905019342899323, "learning_rate": 0.00019991296843306912, "loss": 11.6912, "step": 1933 }, { "epoch": 0.04048396550280499, "grad_norm": 0.20521332323551178, "learning_rate": 0.00019991287695457106, "loss": 11.6866, "step": 1934 }, { "epoch": 0.04050489826676714, "grad_norm": 0.19200633466243744, "learning_rate": 0.00019991278542804287, "loss": 11.677, "step": 1935 }, { "epoch": 0.0405258310307293, "grad_norm": 0.24926282465457916, "learning_rate": 0.0001999126938534846, "loss": 11.6935, "step": 1936 }, { "epoch": 0.04054676379469145, "grad_norm": 0.21226300299167633, "learning_rate": 0.00019991260223089626, "loss": 11.6965, "step": 1937 }, { "epoch": 0.0405676965586536, "grad_norm": 0.23050031065940857, "learning_rate": 0.00019991251056027798, "loss": 11.6969, "step": 1938 }, { "epoch": 0.04058862932261576, "grad_norm": 0.19238509237766266, "learning_rate": 0.0001999124188416297, "loss": 11.7024, "step": 1939 }, { "epoch": 0.04060956208657791, "grad_norm": 0.18348908424377441, "learning_rate": 0.00019991232707495157, "loss": 11.6865, "step": 1940 }, { "epoch": 0.040630494850540065, "grad_norm": 0.21106940507888794, "learning_rate": 0.00019991223526024356, "loss": 11.6937, "step": 1941 }, { "epoch": 0.04065142761450222, "grad_norm": 0.24800235033035278, "learning_rate": 0.00019991214339750574, "loss": 11.7048, "step": 1942 }, { "epoch": 0.040672360378464376, "grad_norm": 0.21363219618797302, "learning_rate": 0.00019991205148673814, "loss": 11.7036, "step": 1943 }, { "epoch": 0.04069329314242653, "grad_norm": 0.1778796911239624, "learning_rate": 0.00019991195952794084, "loss": 11.6871, "step": 1944 }, { "epoch": 0.04071422590638868, "grad_norm": 0.19651195406913757, "learning_rate": 0.00019991186752111383, "loss": 11.7082, "step": 1945 }, { "epoch": 0.04073515867035083, "grad_norm": 0.23070184886455536, "learning_rate": 0.0001999117754662572, "loss": 11.7217, "step": 1946 }, { "epoch": 0.04075609143431299, "grad_norm": 0.1876429319381714, "learning_rate": 0.00019991168336337095, "loss": 11.685, "step": 1947 }, { "epoch": 0.04077702419827514, "grad_norm": 0.21715889871120453, "learning_rate": 0.00019991159121245518, "loss": 11.6785, "step": 1948 }, { "epoch": 0.04079795696223729, "grad_norm": 0.2126368135213852, "learning_rate": 0.0001999114990135099, "loss": 11.6948, "step": 1949 }, { "epoch": 0.040818889726199445, "grad_norm": 0.1770307570695877, "learning_rate": 0.00019991140676653516, "loss": 11.6906, "step": 1950 }, { "epoch": 0.040839822490161604, "grad_norm": 0.1840764433145523, "learning_rate": 0.000199911314471531, "loss": 11.691, "step": 1951 }, { "epoch": 0.040860755254123755, "grad_norm": 0.1890905648469925, "learning_rate": 0.00019991122212849747, "loss": 11.6886, "step": 1952 }, { "epoch": 0.04088168801808591, "grad_norm": 0.23662446439266205, "learning_rate": 0.00019991112973743463, "loss": 11.6932, "step": 1953 }, { "epoch": 0.04090262078204806, "grad_norm": 0.19402433931827545, "learning_rate": 0.0001999110372983425, "loss": 11.6924, "step": 1954 }, { "epoch": 0.04092355354601022, "grad_norm": 0.19005492329597473, "learning_rate": 0.00019991094481122112, "loss": 11.6947, "step": 1955 }, { "epoch": 0.04094448630997237, "grad_norm": 0.30281057953834534, "learning_rate": 0.00019991085227607055, "loss": 11.6951, "step": 1956 }, { "epoch": 0.04096541907393452, "grad_norm": 0.2309803068637848, "learning_rate": 0.00019991075969289082, "loss": 11.6786, "step": 1957 }, { "epoch": 0.04098635183789667, "grad_norm": 0.19095179438591003, "learning_rate": 0.000199910667061682, "loss": 11.6948, "step": 1958 }, { "epoch": 0.04100728460185883, "grad_norm": 0.21261349320411682, "learning_rate": 0.00019991057438244412, "loss": 11.6905, "step": 1959 }, { "epoch": 0.04102821736582098, "grad_norm": 0.20563682913780212, "learning_rate": 0.00019991048165517722, "loss": 11.6913, "step": 1960 }, { "epoch": 0.041049150129783135, "grad_norm": 0.20768354833126068, "learning_rate": 0.00019991038887988137, "loss": 11.6823, "step": 1961 }, { "epoch": 0.04107008289374529, "grad_norm": 0.1865091472864151, "learning_rate": 0.00019991029605655655, "loss": 11.6909, "step": 1962 }, { "epoch": 0.041091015657707446, "grad_norm": 0.18984158337116241, "learning_rate": 0.00019991020318520288, "loss": 11.6873, "step": 1963 }, { "epoch": 0.0411119484216696, "grad_norm": 0.20225374400615692, "learning_rate": 0.00019991011026582038, "loss": 11.6943, "step": 1964 }, { "epoch": 0.04113288118563175, "grad_norm": 0.20397986471652985, "learning_rate": 0.00019991001729840905, "loss": 11.6842, "step": 1965 }, { "epoch": 0.0411538139495939, "grad_norm": 0.20764239132404327, "learning_rate": 0.00019990992428296902, "loss": 11.6849, "step": 1966 }, { "epoch": 0.04117474671355606, "grad_norm": 0.2110804170370102, "learning_rate": 0.00019990983121950025, "loss": 11.696, "step": 1967 }, { "epoch": 0.04119567947751821, "grad_norm": 0.2194211632013321, "learning_rate": 0.00019990973810800285, "loss": 11.6858, "step": 1968 }, { "epoch": 0.04121661224148036, "grad_norm": 0.14861978590488434, "learning_rate": 0.00019990964494847683, "loss": 11.6823, "step": 1969 }, { "epoch": 0.04123754500544252, "grad_norm": 0.1747942566871643, "learning_rate": 0.00019990955174092222, "loss": 11.6918, "step": 1970 }, { "epoch": 0.041258477769404674, "grad_norm": 0.20299527049064636, "learning_rate": 0.00019990945848533912, "loss": 11.7188, "step": 1971 }, { "epoch": 0.041279410533366825, "grad_norm": 0.17580600082874298, "learning_rate": 0.0001999093651817275, "loss": 11.7072, "step": 1972 }, { "epoch": 0.04130034329732898, "grad_norm": 0.20460742712020874, "learning_rate": 0.0001999092718300875, "loss": 11.6828, "step": 1973 }, { "epoch": 0.041321276061291136, "grad_norm": 0.2506440579891205, "learning_rate": 0.00019990917843041905, "loss": 11.6953, "step": 1974 }, { "epoch": 0.04134220882525329, "grad_norm": 0.22308965027332306, "learning_rate": 0.0001999090849827223, "loss": 11.6865, "step": 1975 }, { "epoch": 0.04136314158921544, "grad_norm": 0.196555495262146, "learning_rate": 0.00019990899148699724, "loss": 11.6901, "step": 1976 }, { "epoch": 0.04138407435317759, "grad_norm": 0.19013893604278564, "learning_rate": 0.00019990889794324393, "loss": 11.6808, "step": 1977 }, { "epoch": 0.04140500711713975, "grad_norm": 0.21117405593395233, "learning_rate": 0.00019990880435146239, "loss": 11.6925, "step": 1978 }, { "epoch": 0.0414259398811019, "grad_norm": 0.18939225375652313, "learning_rate": 0.0001999087107116527, "loss": 11.6786, "step": 1979 }, { "epoch": 0.04144687264506405, "grad_norm": 0.1620718091726303, "learning_rate": 0.00019990861702381488, "loss": 11.691, "step": 1980 }, { "epoch": 0.041467805409026205, "grad_norm": 0.2793099880218506, "learning_rate": 0.000199908523287949, "loss": 11.6982, "step": 1981 }, { "epoch": 0.041488738172988364, "grad_norm": 0.19830739498138428, "learning_rate": 0.0001999084295040551, "loss": 11.7049, "step": 1982 }, { "epoch": 0.041509670936950516, "grad_norm": 0.17789191007614136, "learning_rate": 0.00019990833567213318, "loss": 11.6884, "step": 1983 }, { "epoch": 0.04153060370091267, "grad_norm": 0.20679813623428345, "learning_rate": 0.00019990824179218336, "loss": 11.6892, "step": 1984 }, { "epoch": 0.04155153646487482, "grad_norm": 0.1798635870218277, "learning_rate": 0.00019990814786420563, "loss": 11.687, "step": 1985 }, { "epoch": 0.04157246922883698, "grad_norm": 0.22747057676315308, "learning_rate": 0.00019990805388820003, "loss": 11.6914, "step": 1986 }, { "epoch": 0.04159340199279913, "grad_norm": 0.18832553923130035, "learning_rate": 0.00019990795986416665, "loss": 11.6969, "step": 1987 }, { "epoch": 0.04161433475676128, "grad_norm": 0.17242088913917542, "learning_rate": 0.0001999078657921055, "loss": 11.6977, "step": 1988 }, { "epoch": 0.04163526752072343, "grad_norm": 0.21465781331062317, "learning_rate": 0.00019990777167201663, "loss": 11.7071, "step": 1989 }, { "epoch": 0.04165620028468559, "grad_norm": 0.19739125669002533, "learning_rate": 0.0001999076775039001, "loss": 11.6979, "step": 1990 }, { "epoch": 0.041677133048647744, "grad_norm": 0.22503745555877686, "learning_rate": 0.000199907583287756, "loss": 11.7098, "step": 1991 }, { "epoch": 0.041698065812609895, "grad_norm": 0.22234907746315002, "learning_rate": 0.00019990748902358422, "loss": 11.6869, "step": 1992 }, { "epoch": 0.04171899857657205, "grad_norm": 0.22169961035251617, "learning_rate": 0.00019990739471138498, "loss": 11.6847, "step": 1993 }, { "epoch": 0.041739931340534206, "grad_norm": 0.21918493509292603, "learning_rate": 0.00019990730035115823, "loss": 11.6871, "step": 1994 }, { "epoch": 0.04176086410449636, "grad_norm": 0.20794105529785156, "learning_rate": 0.00019990720594290404, "loss": 11.6934, "step": 1995 }, { "epoch": 0.04178179686845851, "grad_norm": 0.22862665355205536, "learning_rate": 0.0001999071114866225, "loss": 11.6957, "step": 1996 }, { "epoch": 0.04180272963242067, "grad_norm": 0.21029870212078094, "learning_rate": 0.00019990701698231354, "loss": 11.688, "step": 1997 }, { "epoch": 0.04182366239638282, "grad_norm": 0.28777968883514404, "learning_rate": 0.0001999069224299773, "loss": 11.6883, "step": 1998 }, { "epoch": 0.04184459516034497, "grad_norm": 0.1791917383670807, "learning_rate": 0.0001999068278296138, "loss": 11.7023, "step": 1999 }, { "epoch": 0.041865527924307123, "grad_norm": 0.23200559616088867, "learning_rate": 0.00019990673318122308, "loss": 11.7008, "step": 2000 }, { "epoch": 0.041865527924307123, "eval_loss": 11.691899299621582, "eval_runtime": 34.3641, "eval_samples_per_second": 27.965, "eval_steps_per_second": 7.013, "step": 2000 }, { "epoch": 0.04188646068826928, "grad_norm": 0.1926249861717224, "learning_rate": 0.00019990663848480522, "loss": 11.6872, "step": 2001 }, { "epoch": 0.041907393452231434, "grad_norm": 0.2421025037765503, "learning_rate": 0.0001999065437403602, "loss": 11.695, "step": 2002 }, { "epoch": 0.041928326216193586, "grad_norm": 0.18050441145896912, "learning_rate": 0.00019990644894788814, "loss": 11.6948, "step": 2003 }, { "epoch": 0.04194925898015574, "grad_norm": 0.18859733641147614, "learning_rate": 0.00019990635410738902, "loss": 11.696, "step": 2004 }, { "epoch": 0.041970191744117896, "grad_norm": 0.18509113788604736, "learning_rate": 0.00019990625921886292, "loss": 11.7109, "step": 2005 }, { "epoch": 0.04199112450808005, "grad_norm": 0.2028629034757614, "learning_rate": 0.00019990616428230986, "loss": 11.6973, "step": 2006 }, { "epoch": 0.0420120572720422, "grad_norm": 0.2675866186618805, "learning_rate": 0.00019990606929772993, "loss": 11.7052, "step": 2007 }, { "epoch": 0.04203299003600435, "grad_norm": 0.1827852427959442, "learning_rate": 0.00019990597426512317, "loss": 11.7007, "step": 2008 }, { "epoch": 0.04205392279996651, "grad_norm": 0.2643209397792816, "learning_rate": 0.00019990587918448957, "loss": 11.7039, "step": 2009 }, { "epoch": 0.04207485556392866, "grad_norm": 0.22506923973560333, "learning_rate": 0.0001999057840558292, "loss": 11.6944, "step": 2010 }, { "epoch": 0.042095788327890814, "grad_norm": 0.21118344366550446, "learning_rate": 0.00019990568887914213, "loss": 11.6993, "step": 2011 }, { "epoch": 0.042116721091852966, "grad_norm": 0.24563150107860565, "learning_rate": 0.00019990559365442844, "loss": 11.6821, "step": 2012 }, { "epoch": 0.042137653855815124, "grad_norm": 0.22892585396766663, "learning_rate": 0.00019990549838168806, "loss": 11.7001, "step": 2013 }, { "epoch": 0.042158586619777276, "grad_norm": 0.1760919690132141, "learning_rate": 0.00019990540306092112, "loss": 11.6896, "step": 2014 }, { "epoch": 0.04217951938373943, "grad_norm": 0.22039225697517395, "learning_rate": 0.0001999053076921277, "loss": 11.6905, "step": 2015 }, { "epoch": 0.04220045214770158, "grad_norm": 0.21134738624095917, "learning_rate": 0.00019990521227530774, "loss": 11.7047, "step": 2016 }, { "epoch": 0.04222138491166374, "grad_norm": 0.19934573769569397, "learning_rate": 0.00019990511681046137, "loss": 11.6823, "step": 2017 }, { "epoch": 0.04224231767562589, "grad_norm": 0.15831471979618073, "learning_rate": 0.00019990502129758862, "loss": 11.6679, "step": 2018 }, { "epoch": 0.04226325043958804, "grad_norm": 0.20102305710315704, "learning_rate": 0.0001999049257366895, "loss": 11.6834, "step": 2019 }, { "epoch": 0.042284183203550194, "grad_norm": 0.2602069675922394, "learning_rate": 0.0001999048301277641, "loss": 11.6849, "step": 2020 }, { "epoch": 0.04230511596751235, "grad_norm": 0.2875570058822632, "learning_rate": 0.00019990473447081245, "loss": 11.6896, "step": 2021 }, { "epoch": 0.042326048731474504, "grad_norm": 0.16458892822265625, "learning_rate": 0.00019990463876583454, "loss": 11.6938, "step": 2022 }, { "epoch": 0.042346981495436656, "grad_norm": 0.2332230508327484, "learning_rate": 0.00019990454301283052, "loss": 11.7026, "step": 2023 }, { "epoch": 0.04236791425939881, "grad_norm": 0.20414690673351288, "learning_rate": 0.00019990444721180035, "loss": 11.7021, "step": 2024 }, { "epoch": 0.042388847023360966, "grad_norm": 0.1825198382139206, "learning_rate": 0.00019990435136274415, "loss": 11.6773, "step": 2025 }, { "epoch": 0.04240977978732312, "grad_norm": 0.18461336195468903, "learning_rate": 0.0001999042554656619, "loss": 11.6857, "step": 2026 }, { "epoch": 0.04243071255128527, "grad_norm": 0.17724277079105377, "learning_rate": 0.00019990415952055366, "loss": 11.6949, "step": 2027 }, { "epoch": 0.04245164531524743, "grad_norm": 0.19955292344093323, "learning_rate": 0.0001999040635274195, "loss": 11.6949, "step": 2028 }, { "epoch": 0.04247257807920958, "grad_norm": 0.20618894696235657, "learning_rate": 0.00019990396748625946, "loss": 11.697, "step": 2029 }, { "epoch": 0.04249351084317173, "grad_norm": 0.17658090591430664, "learning_rate": 0.0001999038713970736, "loss": 11.6818, "step": 2030 }, { "epoch": 0.042514443607133884, "grad_norm": 0.21773891150951385, "learning_rate": 0.00019990377525986192, "loss": 11.6959, "step": 2031 }, { "epoch": 0.04253537637109604, "grad_norm": 0.25529250502586365, "learning_rate": 0.0001999036790746245, "loss": 11.6946, "step": 2032 }, { "epoch": 0.042556309135058194, "grad_norm": 0.22659973800182343, "learning_rate": 0.00019990358284136138, "loss": 11.6851, "step": 2033 }, { "epoch": 0.042577241899020346, "grad_norm": 0.2168387472629547, "learning_rate": 0.00019990348656007261, "loss": 11.693, "step": 2034 }, { "epoch": 0.0425981746629825, "grad_norm": 0.20992520451545715, "learning_rate": 0.00019990339023075824, "loss": 11.6852, "step": 2035 }, { "epoch": 0.04261910742694466, "grad_norm": 0.18874591588974, "learning_rate": 0.00019990329385341827, "loss": 11.6945, "step": 2036 }, { "epoch": 0.04264004019090681, "grad_norm": 0.2071966528892517, "learning_rate": 0.0001999031974280528, "loss": 11.6906, "step": 2037 }, { "epoch": 0.04266097295486896, "grad_norm": 0.17918837070465088, "learning_rate": 0.0001999031009546619, "loss": 11.6868, "step": 2038 }, { "epoch": 0.04268190571883111, "grad_norm": 0.29140445590019226, "learning_rate": 0.00019990300443324555, "loss": 11.7063, "step": 2039 }, { "epoch": 0.04270283848279327, "grad_norm": 0.20870216190814972, "learning_rate": 0.0001999029078638038, "loss": 11.6991, "step": 2040 }, { "epoch": 0.04272377124675542, "grad_norm": 0.1817610114812851, "learning_rate": 0.00019990281124633677, "loss": 11.6835, "step": 2041 }, { "epoch": 0.042744704010717574, "grad_norm": 0.19880527257919312, "learning_rate": 0.0001999027145808444, "loss": 11.7015, "step": 2042 }, { "epoch": 0.042765636774679726, "grad_norm": 0.2196541279554367, "learning_rate": 0.0001999026178673268, "loss": 11.6917, "step": 2043 }, { "epoch": 0.042786569538641885, "grad_norm": 0.19103428721427917, "learning_rate": 0.00019990252110578408, "loss": 11.6932, "step": 2044 }, { "epoch": 0.042807502302604036, "grad_norm": 0.16572946310043335, "learning_rate": 0.00019990242429621615, "loss": 11.6905, "step": 2045 }, { "epoch": 0.04282843506656619, "grad_norm": 0.2169056534767151, "learning_rate": 0.00019990232743862313, "loss": 11.679, "step": 2046 }, { "epoch": 0.04284936783052834, "grad_norm": 0.24378043413162231, "learning_rate": 0.00019990223053300508, "loss": 11.675, "step": 2047 }, { "epoch": 0.0428703005944905, "grad_norm": 0.21816086769104004, "learning_rate": 0.000199902133579362, "loss": 11.7006, "step": 2048 }, { "epoch": 0.04289123335845265, "grad_norm": 0.20578965544700623, "learning_rate": 0.00019990203657769398, "loss": 11.6924, "step": 2049 }, { "epoch": 0.0429121661224148, "grad_norm": 0.2734138071537018, "learning_rate": 0.00019990193952800105, "loss": 11.7009, "step": 2050 }, { "epoch": 0.042933098886376954, "grad_norm": 0.18753376603126526, "learning_rate": 0.00019990184243028324, "loss": 11.6832, "step": 2051 }, { "epoch": 0.04295403165033911, "grad_norm": 0.17774860560894012, "learning_rate": 0.00019990174528454065, "loss": 11.6948, "step": 2052 }, { "epoch": 0.042974964414301264, "grad_norm": 0.2659785747528076, "learning_rate": 0.00019990164809077326, "loss": 11.6948, "step": 2053 }, { "epoch": 0.042995897178263416, "grad_norm": 0.22773435711860657, "learning_rate": 0.00019990155084898112, "loss": 11.6785, "step": 2054 }, { "epoch": 0.043016829942225575, "grad_norm": 0.18319858610630035, "learning_rate": 0.00019990145355916434, "loss": 11.6986, "step": 2055 }, { "epoch": 0.04303776270618773, "grad_norm": 0.1894085705280304, "learning_rate": 0.00019990135622132294, "loss": 11.6834, "step": 2056 }, { "epoch": 0.04305869547014988, "grad_norm": 0.19811289012432098, "learning_rate": 0.00019990125883545692, "loss": 11.6915, "step": 2057 }, { "epoch": 0.04307962823411203, "grad_norm": 0.18629254400730133, "learning_rate": 0.0001999011614015664, "loss": 11.6798, "step": 2058 }, { "epoch": 0.04310056099807419, "grad_norm": 0.20501954853534698, "learning_rate": 0.00019990106391965136, "loss": 11.6891, "step": 2059 }, { "epoch": 0.04312149376203634, "grad_norm": 0.17077085375785828, "learning_rate": 0.00019990096638971189, "loss": 11.682, "step": 2060 }, { "epoch": 0.04314242652599849, "grad_norm": 0.17544899880886078, "learning_rate": 0.000199900868811748, "loss": 11.6948, "step": 2061 }, { "epoch": 0.043163359289960644, "grad_norm": 0.20619076490402222, "learning_rate": 0.0001999007711857598, "loss": 11.6882, "step": 2062 }, { "epoch": 0.0431842920539228, "grad_norm": 0.18304923176765442, "learning_rate": 0.00019990067351174725, "loss": 11.704, "step": 2063 }, { "epoch": 0.043205224817884955, "grad_norm": 0.2043222188949585, "learning_rate": 0.00019990057578971046, "loss": 11.6953, "step": 2064 }, { "epoch": 0.043226157581847106, "grad_norm": 0.17939594388008118, "learning_rate": 0.00019990047801964948, "loss": 11.6853, "step": 2065 }, { "epoch": 0.04324709034580926, "grad_norm": 0.21067652106285095, "learning_rate": 0.00019990038020156433, "loss": 11.6909, "step": 2066 }, { "epoch": 0.04326802310977142, "grad_norm": 0.20347560942173004, "learning_rate": 0.00019990028233545507, "loss": 11.6933, "step": 2067 }, { "epoch": 0.04328895587373357, "grad_norm": 0.1693362295627594, "learning_rate": 0.00019990018442132175, "loss": 11.6946, "step": 2068 }, { "epoch": 0.04330988863769572, "grad_norm": 0.3141755759716034, "learning_rate": 0.00019990008645916438, "loss": 11.6918, "step": 2069 }, { "epoch": 0.04333082140165787, "grad_norm": 0.23367606103420258, "learning_rate": 0.00019989998844898308, "loss": 11.6768, "step": 2070 }, { "epoch": 0.04335175416562003, "grad_norm": 0.23011039197444916, "learning_rate": 0.0001998998903907778, "loss": 11.702, "step": 2071 }, { "epoch": 0.04337268692958218, "grad_norm": 0.18738499283790588, "learning_rate": 0.0001998997922845487, "loss": 11.6859, "step": 2072 }, { "epoch": 0.043393619693544334, "grad_norm": 0.16890814900398254, "learning_rate": 0.00019989969413029571, "loss": 11.7038, "step": 2073 }, { "epoch": 0.043414552457506486, "grad_norm": 0.18939828872680664, "learning_rate": 0.00019989959592801895, "loss": 11.6704, "step": 2074 }, { "epoch": 0.043435485221468645, "grad_norm": 0.20424848794937134, "learning_rate": 0.00019989949767771847, "loss": 11.692, "step": 2075 }, { "epoch": 0.0434564179854308, "grad_norm": 0.1622440665960312, "learning_rate": 0.0001998993993793943, "loss": 11.6928, "step": 2076 }, { "epoch": 0.04347735074939295, "grad_norm": 0.19158895313739777, "learning_rate": 0.00019989930103304646, "loss": 11.6955, "step": 2077 }, { "epoch": 0.0434982835133551, "grad_norm": 0.19535431265830994, "learning_rate": 0.00019989920263867506, "loss": 11.688, "step": 2078 }, { "epoch": 0.04351921627731726, "grad_norm": 0.21397823095321655, "learning_rate": 0.0001998991041962801, "loss": 11.7016, "step": 2079 }, { "epoch": 0.04354014904127941, "grad_norm": 0.32056736946105957, "learning_rate": 0.0001998990057058616, "loss": 11.7039, "step": 2080 }, { "epoch": 0.04356108180524156, "grad_norm": 0.21669965982437134, "learning_rate": 0.0001998989071674197, "loss": 11.6899, "step": 2081 }, { "epoch": 0.043582014569203714, "grad_norm": 0.18255376815795898, "learning_rate": 0.00019989880858095437, "loss": 11.6776, "step": 2082 }, { "epoch": 0.04360294733316587, "grad_norm": 0.19071033596992493, "learning_rate": 0.00019989870994646566, "loss": 11.7012, "step": 2083 }, { "epoch": 0.043623880097128025, "grad_norm": 0.1759682446718216, "learning_rate": 0.00019989861126395368, "loss": 11.6907, "step": 2084 }, { "epoch": 0.043644812861090176, "grad_norm": 0.1911759376525879, "learning_rate": 0.00019989851253341843, "loss": 11.7006, "step": 2085 }, { "epoch": 0.043665745625052335, "grad_norm": 0.2084493190050125, "learning_rate": 0.00019989841375485994, "loss": 11.6953, "step": 2086 }, { "epoch": 0.04368667838901449, "grad_norm": 0.21097096800804138, "learning_rate": 0.00019989831492827828, "loss": 11.691, "step": 2087 }, { "epoch": 0.04370761115297664, "grad_norm": 0.20818141102790833, "learning_rate": 0.00019989821605367352, "loss": 11.6852, "step": 2088 }, { "epoch": 0.04372854391693879, "grad_norm": 0.18231505155563354, "learning_rate": 0.00019989811713104567, "loss": 11.6902, "step": 2089 }, { "epoch": 0.04374947668090095, "grad_norm": 0.17765365540981293, "learning_rate": 0.0001998980181603948, "loss": 11.694, "step": 2090 }, { "epoch": 0.0437704094448631, "grad_norm": 0.19760869443416595, "learning_rate": 0.00019989791914172095, "loss": 11.6831, "step": 2091 }, { "epoch": 0.04379134220882525, "grad_norm": 0.257284939289093, "learning_rate": 0.00019989782007502415, "loss": 11.6823, "step": 2092 }, { "epoch": 0.043812274972787404, "grad_norm": 0.1908496618270874, "learning_rate": 0.0001998977209603045, "loss": 11.6895, "step": 2093 }, { "epoch": 0.04383320773674956, "grad_norm": 0.1977318972349167, "learning_rate": 0.00019989762179756197, "loss": 11.6897, "step": 2094 }, { "epoch": 0.043854140500711715, "grad_norm": 0.19211864471435547, "learning_rate": 0.00019989752258679669, "loss": 11.6801, "step": 2095 }, { "epoch": 0.04387507326467387, "grad_norm": 0.18775737285614014, "learning_rate": 0.00019989742332800865, "loss": 11.6927, "step": 2096 }, { "epoch": 0.04389600602863602, "grad_norm": 0.20666900277137756, "learning_rate": 0.0001998973240211979, "loss": 11.6885, "step": 2097 }, { "epoch": 0.04391693879259818, "grad_norm": 0.179963156580925, "learning_rate": 0.00019989722466636455, "loss": 11.6894, "step": 2098 }, { "epoch": 0.04393787155656033, "grad_norm": 0.19678618013858795, "learning_rate": 0.0001998971252635086, "loss": 11.6855, "step": 2099 }, { "epoch": 0.04395880432052248, "grad_norm": 0.21039412915706635, "learning_rate": 0.00019989702581263007, "loss": 11.6959, "step": 2100 }, { "epoch": 0.04397973708448463, "grad_norm": 0.20637358725070953, "learning_rate": 0.00019989692631372903, "loss": 11.6914, "step": 2101 }, { "epoch": 0.04400066984844679, "grad_norm": 0.20533156394958496, "learning_rate": 0.00019989682676680554, "loss": 11.6877, "step": 2102 }, { "epoch": 0.04402160261240894, "grad_norm": 0.17706432938575745, "learning_rate": 0.00019989672717185964, "loss": 11.6932, "step": 2103 }, { "epoch": 0.044042535376371095, "grad_norm": 0.1997889131307602, "learning_rate": 0.00019989662752889142, "loss": 11.6938, "step": 2104 }, { "epoch": 0.044063468140333246, "grad_norm": 0.163868248462677, "learning_rate": 0.00019989652783790085, "loss": 11.6884, "step": 2105 }, { "epoch": 0.044084400904295405, "grad_norm": 0.20794302225112915, "learning_rate": 0.00019989642809888804, "loss": 11.6919, "step": 2106 }, { "epoch": 0.04410533366825756, "grad_norm": 0.2105337679386139, "learning_rate": 0.00019989632831185302, "loss": 11.6897, "step": 2107 }, { "epoch": 0.04412626643221971, "grad_norm": 0.22137956321239471, "learning_rate": 0.0001998962284767958, "loss": 11.6808, "step": 2108 }, { "epoch": 0.04414719919618186, "grad_norm": 0.2083338499069214, "learning_rate": 0.00019989612859371648, "loss": 11.6836, "step": 2109 }, { "epoch": 0.04416813196014402, "grad_norm": 0.18747936189174652, "learning_rate": 0.0001998960286626151, "loss": 11.6988, "step": 2110 }, { "epoch": 0.04418906472410617, "grad_norm": 0.23167335987091064, "learning_rate": 0.0001998959286834917, "loss": 11.6887, "step": 2111 }, { "epoch": 0.04420999748806832, "grad_norm": 0.18372222781181335, "learning_rate": 0.0001998958286563463, "loss": 11.6927, "step": 2112 }, { "epoch": 0.04423093025203048, "grad_norm": 0.23363164067268372, "learning_rate": 0.00019989572858117899, "loss": 11.6983, "step": 2113 }, { "epoch": 0.04425186301599263, "grad_norm": 0.25024935603141785, "learning_rate": 0.0001998956284579898, "loss": 11.6882, "step": 2114 }, { "epoch": 0.044272795779954785, "grad_norm": 0.18535417318344116, "learning_rate": 0.00019989552828677878, "loss": 11.6727, "step": 2115 }, { "epoch": 0.04429372854391694, "grad_norm": 0.17848168313503265, "learning_rate": 0.00019989542806754597, "loss": 11.6958, "step": 2116 }, { "epoch": 0.044314661307879095, "grad_norm": 0.20962601900100708, "learning_rate": 0.00019989532780029142, "loss": 11.6775, "step": 2117 }, { "epoch": 0.04433559407184125, "grad_norm": 0.3050377368927002, "learning_rate": 0.0001998952274850152, "loss": 11.7039, "step": 2118 }, { "epoch": 0.0443565268358034, "grad_norm": 0.24818937480449677, "learning_rate": 0.00019989512712171735, "loss": 11.691, "step": 2119 }, { "epoch": 0.04437745959976555, "grad_norm": 0.214211106300354, "learning_rate": 0.00019989502671039787, "loss": 11.6989, "step": 2120 }, { "epoch": 0.04439839236372771, "grad_norm": 0.1791163682937622, "learning_rate": 0.00019989492625105691, "loss": 11.6726, "step": 2121 }, { "epoch": 0.04441932512768986, "grad_norm": 0.24609428644180298, "learning_rate": 0.0001998948257436944, "loss": 11.694, "step": 2122 }, { "epoch": 0.04444025789165201, "grad_norm": 0.25669294595718384, "learning_rate": 0.00019989472518831046, "loss": 11.6996, "step": 2123 }, { "epoch": 0.044461190655614165, "grad_norm": 0.1745741367340088, "learning_rate": 0.00019989462458490517, "loss": 11.6904, "step": 2124 }, { "epoch": 0.044482123419576323, "grad_norm": 0.2009766846895218, "learning_rate": 0.00019989452393347844, "loss": 11.6823, "step": 2125 }, { "epoch": 0.044503056183538475, "grad_norm": 0.1860952079296112, "learning_rate": 0.00019989442323403047, "loss": 11.6966, "step": 2126 }, { "epoch": 0.04452398894750063, "grad_norm": 0.18486204743385315, "learning_rate": 0.00019989432248656127, "loss": 11.692, "step": 2127 }, { "epoch": 0.04454492171146278, "grad_norm": 0.24851523339748383, "learning_rate": 0.00019989422169107082, "loss": 11.6941, "step": 2128 }, { "epoch": 0.04456585447542494, "grad_norm": 0.252393901348114, "learning_rate": 0.00019989412084755922, "loss": 11.6963, "step": 2129 }, { "epoch": 0.04458678723938709, "grad_norm": 0.2456841915845871, "learning_rate": 0.00019989401995602654, "loss": 11.7064, "step": 2130 }, { "epoch": 0.04460772000334924, "grad_norm": 0.21431134641170502, "learning_rate": 0.00019989391901647282, "loss": 11.687, "step": 2131 }, { "epoch": 0.04462865276731139, "grad_norm": 0.17053639888763428, "learning_rate": 0.00019989381802889807, "loss": 11.6836, "step": 2132 }, { "epoch": 0.04464958553127355, "grad_norm": 0.35788068175315857, "learning_rate": 0.00019989371699330235, "loss": 11.7058, "step": 2133 }, { "epoch": 0.0446705182952357, "grad_norm": 0.18493513762950897, "learning_rate": 0.00019989361590968573, "loss": 11.6914, "step": 2134 }, { "epoch": 0.044691451059197855, "grad_norm": 0.1945054680109024, "learning_rate": 0.0001998935147780482, "loss": 11.7029, "step": 2135 }, { "epoch": 0.04471238382316001, "grad_norm": 0.17933407425880432, "learning_rate": 0.00019989341359838992, "loss": 11.696, "step": 2136 }, { "epoch": 0.044733316587122166, "grad_norm": 0.2596132755279541, "learning_rate": 0.00019989331237071082, "loss": 11.7019, "step": 2137 }, { "epoch": 0.04475424935108432, "grad_norm": 0.2366141378879547, "learning_rate": 0.00019989321109501105, "loss": 11.6868, "step": 2138 }, { "epoch": 0.04477518211504647, "grad_norm": 0.20210860669612885, "learning_rate": 0.0001998931097712906, "loss": 11.6974, "step": 2139 }, { "epoch": 0.04479611487900863, "grad_norm": 0.17652416229248047, "learning_rate": 0.0001998930083995495, "loss": 11.689, "step": 2140 }, { "epoch": 0.04481704764297078, "grad_norm": 0.1527976542711258, "learning_rate": 0.00019989290697978787, "loss": 11.6889, "step": 2141 }, { "epoch": 0.04483798040693293, "grad_norm": 0.21039028465747833, "learning_rate": 0.0001998928055120057, "loss": 11.6835, "step": 2142 }, { "epoch": 0.04485891317089508, "grad_norm": 0.17594504356384277, "learning_rate": 0.00019989270399620307, "loss": 11.6723, "step": 2143 }, { "epoch": 0.04487984593485724, "grad_norm": 0.21689572930335999, "learning_rate": 0.00019989260243237999, "loss": 11.7009, "step": 2144 }, { "epoch": 0.044900778698819394, "grad_norm": 0.2186063677072525, "learning_rate": 0.00019989250082053653, "loss": 11.6975, "step": 2145 }, { "epoch": 0.044921711462781545, "grad_norm": 0.20465019345283508, "learning_rate": 0.00019989239916067277, "loss": 11.6912, "step": 2146 }, { "epoch": 0.0449426442267437, "grad_norm": 0.20454446971416473, "learning_rate": 0.00019989229745278874, "loss": 11.7002, "step": 2147 }, { "epoch": 0.044963576990705856, "grad_norm": 0.28617915511131287, "learning_rate": 0.00019989219569688444, "loss": 11.7018, "step": 2148 }, { "epoch": 0.04498450975466801, "grad_norm": 0.16860440373420715, "learning_rate": 0.00019989209389296, "loss": 11.6828, "step": 2149 }, { "epoch": 0.04500544251863016, "grad_norm": 0.19384998083114624, "learning_rate": 0.0001998919920410154, "loss": 11.7054, "step": 2150 }, { "epoch": 0.04502637528259231, "grad_norm": 0.20638011395931244, "learning_rate": 0.00019989189014105071, "loss": 11.7146, "step": 2151 }, { "epoch": 0.04504730804655447, "grad_norm": 0.19172360002994537, "learning_rate": 0.00019989178819306602, "loss": 11.6954, "step": 2152 }, { "epoch": 0.04506824081051662, "grad_norm": 0.19583064317703247, "learning_rate": 0.0001998916861970613, "loss": 11.7023, "step": 2153 }, { "epoch": 0.04508917357447877, "grad_norm": 0.23495638370513916, "learning_rate": 0.0001998915841530367, "loss": 11.683, "step": 2154 }, { "epoch": 0.045110106338440925, "grad_norm": 0.1994386613368988, "learning_rate": 0.0001998914820609922, "loss": 11.6781, "step": 2155 }, { "epoch": 0.045131039102403084, "grad_norm": 0.3011615574359894, "learning_rate": 0.00019989137992092783, "loss": 11.7157, "step": 2156 }, { "epoch": 0.045151971866365236, "grad_norm": 0.17901566624641418, "learning_rate": 0.0001998912777328437, "loss": 11.7005, "step": 2157 }, { "epoch": 0.04517290463032739, "grad_norm": 0.20235444605350494, "learning_rate": 0.0001998911754967398, "loss": 11.6819, "step": 2158 }, { "epoch": 0.04519383739428954, "grad_norm": 0.17987075448036194, "learning_rate": 0.00019989107321261625, "loss": 11.6922, "step": 2159 }, { "epoch": 0.0452147701582517, "grad_norm": 0.17730575799942017, "learning_rate": 0.00019989097088047303, "loss": 11.6883, "step": 2160 }, { "epoch": 0.04523570292221385, "grad_norm": 0.22105906903743744, "learning_rate": 0.00019989086850031026, "loss": 11.6761, "step": 2161 }, { "epoch": 0.045256635686176, "grad_norm": 0.17211657762527466, "learning_rate": 0.00019989076607212792, "loss": 11.6817, "step": 2162 }, { "epoch": 0.04527756845013815, "grad_norm": 0.2019694447517395, "learning_rate": 0.00019989066359592608, "loss": 11.6972, "step": 2163 }, { "epoch": 0.04529850121410031, "grad_norm": 0.2332383245229721, "learning_rate": 0.0001998905610717048, "loss": 11.7013, "step": 2164 }, { "epoch": 0.045319433978062464, "grad_norm": 0.23124106228351593, "learning_rate": 0.00019989045849946413, "loss": 11.6795, "step": 2165 }, { "epoch": 0.045340366742024615, "grad_norm": 0.4456845223903656, "learning_rate": 0.0001998903558792041, "loss": 11.6997, "step": 2166 }, { "epoch": 0.04536129950598677, "grad_norm": 0.18659748136997223, "learning_rate": 0.00019989025321092481, "loss": 11.6939, "step": 2167 }, { "epoch": 0.045382232269948926, "grad_norm": 0.16388724744319916, "learning_rate": 0.00019989015049462624, "loss": 11.6904, "step": 2168 }, { "epoch": 0.04540316503391108, "grad_norm": 0.20726820826530457, "learning_rate": 0.00019989004773030846, "loss": 11.6835, "step": 2169 }, { "epoch": 0.04542409779787323, "grad_norm": 0.19452884793281555, "learning_rate": 0.00019988994491797156, "loss": 11.6894, "step": 2170 }, { "epoch": 0.04544503056183539, "grad_norm": 0.207498237490654, "learning_rate": 0.00019988984205761555, "loss": 11.6862, "step": 2171 }, { "epoch": 0.04546596332579754, "grad_norm": 0.24847517907619476, "learning_rate": 0.0001998897391492405, "loss": 11.704, "step": 2172 }, { "epoch": 0.04548689608975969, "grad_norm": 0.27656951546669006, "learning_rate": 0.00019988963619284645, "loss": 11.701, "step": 2173 }, { "epoch": 0.04550782885372184, "grad_norm": 0.24380703270435333, "learning_rate": 0.00019988953318843345, "loss": 11.6893, "step": 2174 }, { "epoch": 0.045528761617684, "grad_norm": 0.19690468907356262, "learning_rate": 0.00019988943013600157, "loss": 11.6983, "step": 2175 }, { "epoch": 0.045549694381646154, "grad_norm": 0.1993691623210907, "learning_rate": 0.00019988932703555082, "loss": 11.6825, "step": 2176 }, { "epoch": 0.045570627145608306, "grad_norm": 0.17589829862117767, "learning_rate": 0.00019988922388708123, "loss": 11.6842, "step": 2177 }, { "epoch": 0.04559155990957046, "grad_norm": 0.22092176973819733, "learning_rate": 0.00019988912069059293, "loss": 11.7072, "step": 2178 }, { "epoch": 0.045612492673532616, "grad_norm": 0.24413315951824188, "learning_rate": 0.00019988901744608594, "loss": 11.7006, "step": 2179 }, { "epoch": 0.04563342543749477, "grad_norm": 0.1917635202407837, "learning_rate": 0.00019988891415356026, "loss": 11.692, "step": 2180 }, { "epoch": 0.04565435820145692, "grad_norm": 0.20753297209739685, "learning_rate": 0.000199888810813016, "loss": 11.6911, "step": 2181 }, { "epoch": 0.04567529096541907, "grad_norm": 0.20730896294116974, "learning_rate": 0.00019988870742445317, "loss": 11.6875, "step": 2182 }, { "epoch": 0.04569622372938123, "grad_norm": 0.1771773099899292, "learning_rate": 0.00019988860398787188, "loss": 11.6909, "step": 2183 }, { "epoch": 0.04571715649334338, "grad_norm": 0.17794892191886902, "learning_rate": 0.0001998885005032721, "loss": 11.6986, "step": 2184 }, { "epoch": 0.045738089257305534, "grad_norm": 0.2299412041902542, "learning_rate": 0.00019988839697065392, "loss": 11.6763, "step": 2185 }, { "epoch": 0.045759022021267685, "grad_norm": 0.2508960962295532, "learning_rate": 0.00019988829339001737, "loss": 11.6977, "step": 2186 }, { "epoch": 0.045779954785229844, "grad_norm": 0.17720121145248413, "learning_rate": 0.00019988818976136256, "loss": 11.689, "step": 2187 }, { "epoch": 0.045800887549191996, "grad_norm": 0.22473281621932983, "learning_rate": 0.00019988808608468946, "loss": 11.7076, "step": 2188 }, { "epoch": 0.04582182031315415, "grad_norm": 0.25827643275260925, "learning_rate": 0.00019988798235999816, "loss": 11.6685, "step": 2189 }, { "epoch": 0.0458427530771163, "grad_norm": 0.1808309108018875, "learning_rate": 0.00019988787858728872, "loss": 11.6831, "step": 2190 }, { "epoch": 0.04586368584107846, "grad_norm": 0.17685091495513916, "learning_rate": 0.00019988777476656115, "loss": 11.6846, "step": 2191 }, { "epoch": 0.04588461860504061, "grad_norm": 0.1922205686569214, "learning_rate": 0.00019988767089781554, "loss": 11.6935, "step": 2192 }, { "epoch": 0.04590555136900276, "grad_norm": 0.23031841218471527, "learning_rate": 0.00019988756698105192, "loss": 11.6826, "step": 2193 }, { "epoch": 0.04592648413296491, "grad_norm": 0.18399420380592346, "learning_rate": 0.00019988746301627034, "loss": 11.6897, "step": 2194 }, { "epoch": 0.04594741689692707, "grad_norm": 0.2183176726102829, "learning_rate": 0.00019988735900347088, "loss": 11.685, "step": 2195 }, { "epoch": 0.045968349660889224, "grad_norm": 0.17137539386749268, "learning_rate": 0.00019988725494265354, "loss": 11.6975, "step": 2196 }, { "epoch": 0.045989282424851376, "grad_norm": 0.2077958881855011, "learning_rate": 0.00019988715083381838, "loss": 11.6882, "step": 2197 }, { "epoch": 0.046010215188813534, "grad_norm": 0.24343743920326233, "learning_rate": 0.0001998870466769655, "loss": 11.697, "step": 2198 }, { "epoch": 0.046031147952775686, "grad_norm": 0.24534259736537933, "learning_rate": 0.00019988694247209492, "loss": 11.6846, "step": 2199 }, { "epoch": 0.04605208071673784, "grad_norm": 0.19025522470474243, "learning_rate": 0.00019988683821920667, "loss": 11.6941, "step": 2200 }, { "epoch": 0.04607301348069999, "grad_norm": 0.20978179574012756, "learning_rate": 0.0001998867339183008, "loss": 11.699, "step": 2201 }, { "epoch": 0.04609394624466215, "grad_norm": 0.21021275222301483, "learning_rate": 0.0001998866295693774, "loss": 11.6969, "step": 2202 }, { "epoch": 0.0461148790086243, "grad_norm": 0.17464786767959595, "learning_rate": 0.0001998865251724365, "loss": 11.6943, "step": 2203 }, { "epoch": 0.04613581177258645, "grad_norm": 0.21943403780460358, "learning_rate": 0.00019988642072747812, "loss": 11.7011, "step": 2204 }, { "epoch": 0.046156744536548604, "grad_norm": 0.1891343742609024, "learning_rate": 0.00019988631623450235, "loss": 11.6876, "step": 2205 }, { "epoch": 0.04617767730051076, "grad_norm": 0.18391567468643188, "learning_rate": 0.00019988621169350922, "loss": 11.6958, "step": 2206 }, { "epoch": 0.046198610064472914, "grad_norm": 0.2185075879096985, "learning_rate": 0.00019988610710449875, "loss": 11.7036, "step": 2207 }, { "epoch": 0.046219542828435066, "grad_norm": 0.2096470147371292, "learning_rate": 0.00019988600246747108, "loss": 11.7071, "step": 2208 }, { "epoch": 0.04624047559239722, "grad_norm": 0.1965436041355133, "learning_rate": 0.0001998858977824262, "loss": 11.6861, "step": 2209 }, { "epoch": 0.046261408356359376, "grad_norm": 0.19003348052501678, "learning_rate": 0.00019988579304936415, "loss": 11.6931, "step": 2210 }, { "epoch": 0.04628234112032153, "grad_norm": 0.43466082215309143, "learning_rate": 0.000199885688268285, "loss": 11.6788, "step": 2211 }, { "epoch": 0.04630327388428368, "grad_norm": 0.18592506647109985, "learning_rate": 0.0001998855834391888, "loss": 11.6842, "step": 2212 }, { "epoch": 0.04632420664824583, "grad_norm": 0.23819592595100403, "learning_rate": 0.00019988547856207562, "loss": 11.6851, "step": 2213 }, { "epoch": 0.04634513941220799, "grad_norm": 0.18776065111160278, "learning_rate": 0.00019988537363694545, "loss": 11.6773, "step": 2214 }, { "epoch": 0.04636607217617014, "grad_norm": 0.23819957673549652, "learning_rate": 0.00019988526866379842, "loss": 11.6826, "step": 2215 }, { "epoch": 0.046387004940132294, "grad_norm": 0.2108408659696579, "learning_rate": 0.00019988516364263453, "loss": 11.7054, "step": 2216 }, { "epoch": 0.046407937704094446, "grad_norm": 0.22367067635059357, "learning_rate": 0.0001998850585734538, "loss": 11.6868, "step": 2217 }, { "epoch": 0.046428870468056604, "grad_norm": 0.27231496572494507, "learning_rate": 0.00019988495345625636, "loss": 11.6899, "step": 2218 }, { "epoch": 0.046449803232018756, "grad_norm": 0.18960095942020416, "learning_rate": 0.00019988484829104223, "loss": 11.6775, "step": 2219 }, { "epoch": 0.04647073599598091, "grad_norm": 0.18668821454048157, "learning_rate": 0.0001998847430778114, "loss": 11.6995, "step": 2220 }, { "epoch": 0.04649166875994306, "grad_norm": 0.20700259506702423, "learning_rate": 0.00019988463781656402, "loss": 11.7038, "step": 2221 }, { "epoch": 0.04651260152390522, "grad_norm": 0.20639774203300476, "learning_rate": 0.00019988453250730008, "loss": 11.6786, "step": 2222 }, { "epoch": 0.04653353428786737, "grad_norm": 0.20485393702983856, "learning_rate": 0.00019988442715001962, "loss": 11.7106, "step": 2223 }, { "epoch": 0.04655446705182952, "grad_norm": 0.2415439337491989, "learning_rate": 0.00019988432174472274, "loss": 11.6769, "step": 2224 }, { "epoch": 0.046575399815791674, "grad_norm": 0.19361166656017303, "learning_rate": 0.00019988421629140945, "loss": 11.7039, "step": 2225 }, { "epoch": 0.04659633257975383, "grad_norm": 0.27892574667930603, "learning_rate": 0.00019988411079007985, "loss": 11.6834, "step": 2226 }, { "epoch": 0.046617265343715984, "grad_norm": 0.16679105162620544, "learning_rate": 0.00019988400524073395, "loss": 11.6854, "step": 2227 }, { "epoch": 0.046638198107678136, "grad_norm": 0.222456157207489, "learning_rate": 0.00019988389964337175, "loss": 11.688, "step": 2228 }, { "epoch": 0.046659130871640295, "grad_norm": 0.28292304277420044, "learning_rate": 0.00019988379399799342, "loss": 11.6957, "step": 2229 }, { "epoch": 0.046680063635602446, "grad_norm": 0.21001379191875458, "learning_rate": 0.0001998836883045989, "loss": 11.6995, "step": 2230 }, { "epoch": 0.0467009963995646, "grad_norm": 0.17871032655239105, "learning_rate": 0.00019988358256318832, "loss": 11.6927, "step": 2231 }, { "epoch": 0.04672192916352675, "grad_norm": 0.23156127333641052, "learning_rate": 0.0001998834767737617, "loss": 11.6922, "step": 2232 }, { "epoch": 0.04674286192748891, "grad_norm": 0.21493172645568848, "learning_rate": 0.00019988337093631908, "loss": 11.6868, "step": 2233 }, { "epoch": 0.04676379469145106, "grad_norm": 0.16766142845153809, "learning_rate": 0.00019988326505086056, "loss": 11.6803, "step": 2234 }, { "epoch": 0.04678472745541321, "grad_norm": 0.2765376567840576, "learning_rate": 0.0001998831591173861, "loss": 11.7182, "step": 2235 }, { "epoch": 0.046805660219375364, "grad_norm": 0.1992664486169815, "learning_rate": 0.0001998830531358958, "loss": 11.6936, "step": 2236 }, { "epoch": 0.04682659298333752, "grad_norm": 0.2589881420135498, "learning_rate": 0.00019988294710638976, "loss": 11.7117, "step": 2237 }, { "epoch": 0.046847525747299675, "grad_norm": 0.21725034713745117, "learning_rate": 0.00019988284102886796, "loss": 11.6846, "step": 2238 }, { "epoch": 0.046868458511261826, "grad_norm": 0.2629028260707855, "learning_rate": 0.0001998827349033305, "loss": 11.6872, "step": 2239 }, { "epoch": 0.04688939127522398, "grad_norm": 0.18945392966270447, "learning_rate": 0.00019988262872977738, "loss": 11.6976, "step": 2240 }, { "epoch": 0.04691032403918614, "grad_norm": 0.2646414339542389, "learning_rate": 0.00019988252250820868, "loss": 11.6978, "step": 2241 }, { "epoch": 0.04693125680314829, "grad_norm": 0.2078169733285904, "learning_rate": 0.00019988241623862445, "loss": 11.6998, "step": 2242 }, { "epoch": 0.04695218956711044, "grad_norm": 0.22707703709602356, "learning_rate": 0.00019988230992102474, "loss": 11.6934, "step": 2243 }, { "epoch": 0.04697312233107259, "grad_norm": 0.2129303365945816, "learning_rate": 0.00019988220355540964, "loss": 11.6781, "step": 2244 }, { "epoch": 0.04699405509503475, "grad_norm": 0.18965940177440643, "learning_rate": 0.00019988209714177913, "loss": 11.6812, "step": 2245 }, { "epoch": 0.0470149878589969, "grad_norm": 0.3012203872203827, "learning_rate": 0.00019988199068013328, "loss": 11.7024, "step": 2246 }, { "epoch": 0.047035920622959054, "grad_norm": 0.25145748257637024, "learning_rate": 0.0001998818841704722, "loss": 11.6855, "step": 2247 }, { "epoch": 0.047056853386921206, "grad_norm": 0.1709877848625183, "learning_rate": 0.00019988177761279586, "loss": 11.6832, "step": 2248 }, { "epoch": 0.047077786150883365, "grad_norm": 0.21679095923900604, "learning_rate": 0.00019988167100710435, "loss": 11.6813, "step": 2249 }, { "epoch": 0.04709871891484552, "grad_norm": 0.19508065283298492, "learning_rate": 0.00019988156435339773, "loss": 11.6917, "step": 2250 }, { "epoch": 0.04711965167880767, "grad_norm": 0.17977692186832428, "learning_rate": 0.00019988145765167603, "loss": 11.6839, "step": 2251 }, { "epoch": 0.04714058444276982, "grad_norm": 0.21888220310211182, "learning_rate": 0.00019988135090193932, "loss": 11.6884, "step": 2252 }, { "epoch": 0.04716151720673198, "grad_norm": 0.2336946576833725, "learning_rate": 0.00019988124410418766, "loss": 11.6873, "step": 2253 }, { "epoch": 0.04718244997069413, "grad_norm": 0.19990062713623047, "learning_rate": 0.00019988113725842105, "loss": 11.6854, "step": 2254 }, { "epoch": 0.04720338273465628, "grad_norm": 0.20138926804065704, "learning_rate": 0.0001998810303646396, "loss": 11.6888, "step": 2255 }, { "epoch": 0.04722431549861844, "grad_norm": 0.18933822214603424, "learning_rate": 0.00019988092342284334, "loss": 11.6936, "step": 2256 }, { "epoch": 0.04724524826258059, "grad_norm": 0.21797621250152588, "learning_rate": 0.00019988081643303228, "loss": 11.6948, "step": 2257 }, { "epoch": 0.047266181026542745, "grad_norm": 0.20402945578098297, "learning_rate": 0.00019988070939520655, "loss": 11.6873, "step": 2258 }, { "epoch": 0.047287113790504896, "grad_norm": 0.27185776829719543, "learning_rate": 0.00019988060230936614, "loss": 11.6878, "step": 2259 }, { "epoch": 0.047308046554467055, "grad_norm": 0.18782466650009155, "learning_rate": 0.00019988049517551113, "loss": 11.6843, "step": 2260 }, { "epoch": 0.04732897931842921, "grad_norm": 0.2956112325191498, "learning_rate": 0.00019988038799364157, "loss": 11.6871, "step": 2261 }, { "epoch": 0.04734991208239136, "grad_norm": 0.19438797235488892, "learning_rate": 0.0001998802807637575, "loss": 11.6723, "step": 2262 }, { "epoch": 0.04737084484635351, "grad_norm": 0.21887058019638062, "learning_rate": 0.000199880173485859, "loss": 11.681, "step": 2263 }, { "epoch": 0.04739177761031567, "grad_norm": 0.22529388964176178, "learning_rate": 0.00019988006615994606, "loss": 11.6949, "step": 2264 }, { "epoch": 0.04741271037427782, "grad_norm": 0.20183075964450836, "learning_rate": 0.00019987995878601876, "loss": 11.7009, "step": 2265 }, { "epoch": 0.04743364313823997, "grad_norm": 0.17404253780841827, "learning_rate": 0.00019987985136407722, "loss": 11.6725, "step": 2266 }, { "epoch": 0.047454575902202124, "grad_norm": 0.20555132627487183, "learning_rate": 0.00019987974389412138, "loss": 11.6954, "step": 2267 }, { "epoch": 0.04747550866616428, "grad_norm": 0.18861563503742218, "learning_rate": 0.0001998796363761514, "loss": 11.691, "step": 2268 }, { "epoch": 0.047496441430126435, "grad_norm": 0.2077503800392151, "learning_rate": 0.00019987952881016723, "loss": 11.6861, "step": 2269 }, { "epoch": 0.04751737419408859, "grad_norm": 0.21747545897960663, "learning_rate": 0.00019987942119616898, "loss": 11.6858, "step": 2270 }, { "epoch": 0.04753830695805074, "grad_norm": 0.2743767201900482, "learning_rate": 0.0001998793135341567, "loss": 11.6878, "step": 2271 }, { "epoch": 0.0475592397220129, "grad_norm": 0.22839757800102234, "learning_rate": 0.00019987920582413042, "loss": 11.6976, "step": 2272 }, { "epoch": 0.04758017248597505, "grad_norm": 0.21716387569904327, "learning_rate": 0.00019987909806609022, "loss": 11.7147, "step": 2273 }, { "epoch": 0.0476011052499372, "grad_norm": 0.1875057816505432, "learning_rate": 0.00019987899026003615, "loss": 11.6768, "step": 2274 }, { "epoch": 0.04762203801389935, "grad_norm": 0.21010206639766693, "learning_rate": 0.0001998788824059682, "loss": 11.6934, "step": 2275 }, { "epoch": 0.04764297077786151, "grad_norm": 0.19732753932476044, "learning_rate": 0.0001998787745038865, "loss": 11.694, "step": 2276 }, { "epoch": 0.04766390354182366, "grad_norm": 0.21557126939296722, "learning_rate": 0.00019987866655379107, "loss": 11.7077, "step": 2277 }, { "epoch": 0.047684836305785815, "grad_norm": 0.23049971461296082, "learning_rate": 0.00019987855855568197, "loss": 11.6921, "step": 2278 }, { "epoch": 0.047705769069747966, "grad_norm": 0.2250385582447052, "learning_rate": 0.00019987845050955922, "loss": 11.6943, "step": 2279 }, { "epoch": 0.047726701833710125, "grad_norm": 0.19287864863872528, "learning_rate": 0.0001998783424154229, "loss": 11.6914, "step": 2280 }, { "epoch": 0.04774763459767228, "grad_norm": 0.21415841579437256, "learning_rate": 0.0001998782342732731, "loss": 11.701, "step": 2281 }, { "epoch": 0.04776856736163443, "grad_norm": 0.22677205502986908, "learning_rate": 0.0001998781260831098, "loss": 11.6916, "step": 2282 }, { "epoch": 0.04778950012559658, "grad_norm": 0.22185255587100983, "learning_rate": 0.0001998780178449331, "loss": 11.6897, "step": 2283 }, { "epoch": 0.04781043288955874, "grad_norm": 0.2123270034790039, "learning_rate": 0.00019987790955874302, "loss": 11.6896, "step": 2284 }, { "epoch": 0.04783136565352089, "grad_norm": 0.16133753955364227, "learning_rate": 0.00019987780122453962, "loss": 11.6835, "step": 2285 }, { "epoch": 0.04785229841748304, "grad_norm": 0.17149639129638672, "learning_rate": 0.00019987769284232298, "loss": 11.6895, "step": 2286 }, { "epoch": 0.0478732311814452, "grad_norm": 0.21503904461860657, "learning_rate": 0.00019987758441209312, "loss": 11.6996, "step": 2287 }, { "epoch": 0.04789416394540735, "grad_norm": 0.2924991548061371, "learning_rate": 0.0001998774759338501, "loss": 11.7023, "step": 2288 }, { "epoch": 0.047915096709369505, "grad_norm": 0.16939659416675568, "learning_rate": 0.00019987736740759398, "loss": 11.6937, "step": 2289 }, { "epoch": 0.04793602947333166, "grad_norm": 0.2003972977399826, "learning_rate": 0.00019987725883332483, "loss": 11.6936, "step": 2290 }, { "epoch": 0.047956962237293815, "grad_norm": 0.27810490131378174, "learning_rate": 0.00019987715021104263, "loss": 11.6984, "step": 2291 }, { "epoch": 0.04797789500125597, "grad_norm": 0.21128341555595398, "learning_rate": 0.0001998770415407475, "loss": 11.6967, "step": 2292 }, { "epoch": 0.04799882776521812, "grad_norm": 0.18159165978431702, "learning_rate": 0.0001998769328224395, "loss": 11.6972, "step": 2293 }, { "epoch": 0.04801976052918027, "grad_norm": 0.17952314019203186, "learning_rate": 0.00019987682405611863, "loss": 11.6832, "step": 2294 }, { "epoch": 0.04804069329314243, "grad_norm": 1.4596315622329712, "learning_rate": 0.000199876715241785, "loss": 11.7031, "step": 2295 }, { "epoch": 0.04806162605710458, "grad_norm": 0.17665976285934448, "learning_rate": 0.0001998766063794386, "loss": 11.7, "step": 2296 }, { "epoch": 0.04808255882106673, "grad_norm": 0.18183684349060059, "learning_rate": 0.00019987649746907953, "loss": 11.6824, "step": 2297 }, { "epoch": 0.048103491585028885, "grad_norm": 0.24777525663375854, "learning_rate": 0.00019987638851070783, "loss": 11.6991, "step": 2298 }, { "epoch": 0.04812442434899104, "grad_norm": 0.22478845715522766, "learning_rate": 0.00019987627950432353, "loss": 11.6823, "step": 2299 }, { "epoch": 0.048145357112953195, "grad_norm": 0.22161626815795898, "learning_rate": 0.0001998761704499267, "loss": 11.7035, "step": 2300 }, { "epoch": 0.04816628987691535, "grad_norm": 0.2220117449760437, "learning_rate": 0.0001998760613475174, "loss": 11.6995, "step": 2301 }, { "epoch": 0.0481872226408775, "grad_norm": 0.2162020206451416, "learning_rate": 0.0001998759521970957, "loss": 11.7013, "step": 2302 }, { "epoch": 0.04820815540483966, "grad_norm": 0.24097146093845367, "learning_rate": 0.00019987584299866162, "loss": 11.6944, "step": 2303 }, { "epoch": 0.04822908816880181, "grad_norm": 0.21196450293064117, "learning_rate": 0.0001998757337522152, "loss": 11.6905, "step": 2304 }, { "epoch": 0.04825002093276396, "grad_norm": 0.25707849860191345, "learning_rate": 0.00019987562445775652, "loss": 11.7036, "step": 2305 }, { "epoch": 0.04827095369672611, "grad_norm": 0.181512251496315, "learning_rate": 0.00019987551511528562, "loss": 11.6735, "step": 2306 }, { "epoch": 0.04829188646068827, "grad_norm": 0.2110210359096527, "learning_rate": 0.0001998754057248026, "loss": 11.6678, "step": 2307 }, { "epoch": 0.04831281922465042, "grad_norm": 0.1769963800907135, "learning_rate": 0.0001998752962863074, "loss": 11.6801, "step": 2308 }, { "epoch": 0.048333751988612575, "grad_norm": 0.25303536653518677, "learning_rate": 0.0001998751867998002, "loss": 11.6989, "step": 2309 }, { "epoch": 0.04835468475257473, "grad_norm": 0.205826535820961, "learning_rate": 0.00019987507726528097, "loss": 11.6852, "step": 2310 }, { "epoch": 0.048375617516536885, "grad_norm": 0.23439079523086548, "learning_rate": 0.0001998749676827498, "loss": 11.7016, "step": 2311 }, { "epoch": 0.04839655028049904, "grad_norm": 0.23615460097789764, "learning_rate": 0.00019987485805220674, "loss": 11.6885, "step": 2312 }, { "epoch": 0.04841748304446119, "grad_norm": 0.27652642130851746, "learning_rate": 0.00019987474837365182, "loss": 11.6844, "step": 2313 }, { "epoch": 0.04843841580842335, "grad_norm": 0.25308969616889954, "learning_rate": 0.00019987463864708512, "loss": 11.6838, "step": 2314 }, { "epoch": 0.0484593485723855, "grad_norm": 0.2000657021999359, "learning_rate": 0.00019987452887250667, "loss": 11.6969, "step": 2315 }, { "epoch": 0.04848028133634765, "grad_norm": 0.19865944981575012, "learning_rate": 0.00019987441904991652, "loss": 11.6997, "step": 2316 }, { "epoch": 0.0485012141003098, "grad_norm": 0.24321028590202332, "learning_rate": 0.00019987430917931472, "loss": 11.6915, "step": 2317 }, { "epoch": 0.04852214686427196, "grad_norm": 0.1776159554719925, "learning_rate": 0.0001998741992607014, "loss": 11.6755, "step": 2318 }, { "epoch": 0.04854307962823411, "grad_norm": 0.21889035403728485, "learning_rate": 0.0001998740892940765, "loss": 11.6876, "step": 2319 }, { "epoch": 0.048564012392196265, "grad_norm": 0.19432294368743896, "learning_rate": 0.00019987397927944013, "loss": 11.6989, "step": 2320 }, { "epoch": 0.04858494515615842, "grad_norm": 0.17761628329753876, "learning_rate": 0.00019987386921679234, "loss": 11.6803, "step": 2321 }, { "epoch": 0.048605877920120576, "grad_norm": 0.23207886517047882, "learning_rate": 0.0001998737591061332, "loss": 11.6832, "step": 2322 }, { "epoch": 0.04862681068408273, "grad_norm": 0.1978832483291626, "learning_rate": 0.00019987364894746273, "loss": 11.6852, "step": 2323 }, { "epoch": 0.04864774344804488, "grad_norm": 0.19966143369674683, "learning_rate": 0.00019987353874078098, "loss": 11.6859, "step": 2324 }, { "epoch": 0.04866867621200703, "grad_norm": 0.2393134981393814, "learning_rate": 0.00019987342848608804, "loss": 11.6919, "step": 2325 }, { "epoch": 0.04868960897596919, "grad_norm": 0.19311997294425964, "learning_rate": 0.00019987331818338393, "loss": 11.6802, "step": 2326 }, { "epoch": 0.04871054173993134, "grad_norm": 0.21211141347885132, "learning_rate": 0.0001998732078326687, "loss": 11.6975, "step": 2327 }, { "epoch": 0.04873147450389349, "grad_norm": 0.18463411927223206, "learning_rate": 0.00019987309743394244, "loss": 11.6928, "step": 2328 }, { "epoch": 0.048752407267855645, "grad_norm": 0.23145900666713715, "learning_rate": 0.00019987298698720518, "loss": 11.6773, "step": 2329 }, { "epoch": 0.048773340031817804, "grad_norm": 0.22050891816616058, "learning_rate": 0.00019987287649245698, "loss": 11.6775, "step": 2330 }, { "epoch": 0.048794272795779955, "grad_norm": 0.24958762526512146, "learning_rate": 0.00019987276594969787, "loss": 11.6881, "step": 2331 }, { "epoch": 0.04881520555974211, "grad_norm": 0.19762007892131805, "learning_rate": 0.00019987265535892793, "loss": 11.678, "step": 2332 }, { "epoch": 0.04883613832370426, "grad_norm": 0.34441933035850525, "learning_rate": 0.00019987254472014722, "loss": 11.702, "step": 2333 }, { "epoch": 0.04885707108766642, "grad_norm": 0.21855659782886505, "learning_rate": 0.00019987243403335572, "loss": 11.6757, "step": 2334 }, { "epoch": 0.04887800385162857, "grad_norm": 0.16753891110420227, "learning_rate": 0.0001998723232985536, "loss": 11.7037, "step": 2335 }, { "epoch": 0.04889893661559072, "grad_norm": 0.2211945354938507, "learning_rate": 0.00019987221251574082, "loss": 11.6889, "step": 2336 }, { "epoch": 0.04891986937955287, "grad_norm": 0.2696496844291687, "learning_rate": 0.00019987210168491748, "loss": 11.6891, "step": 2337 }, { "epoch": 0.04894080214351503, "grad_norm": 0.21445950865745544, "learning_rate": 0.00019987199080608362, "loss": 11.6827, "step": 2338 }, { "epoch": 0.048961734907477183, "grad_norm": 0.1830948293209076, "learning_rate": 0.00019987187987923928, "loss": 11.6921, "step": 2339 }, { "epoch": 0.048982667671439335, "grad_norm": 0.20651009678840637, "learning_rate": 0.00019987176890438454, "loss": 11.6828, "step": 2340 }, { "epoch": 0.04900360043540149, "grad_norm": 0.20157325267791748, "learning_rate": 0.0001998716578815194, "loss": 11.6806, "step": 2341 }, { "epoch": 0.049024533199363646, "grad_norm": 0.2120952606201172, "learning_rate": 0.00019987154681064403, "loss": 11.6909, "step": 2342 }, { "epoch": 0.0490454659633258, "grad_norm": 0.19102813303470612, "learning_rate": 0.00019987143569175832, "loss": 11.6847, "step": 2343 }, { "epoch": 0.04906639872728795, "grad_norm": 0.28502577543258667, "learning_rate": 0.00019987132452486245, "loss": 11.6894, "step": 2344 }, { "epoch": 0.04908733149125011, "grad_norm": 0.16739580035209656, "learning_rate": 0.00019987121330995643, "loss": 11.6934, "step": 2345 }, { "epoch": 0.04910826425521226, "grad_norm": 0.21359749138355255, "learning_rate": 0.00019987110204704034, "loss": 11.6967, "step": 2346 }, { "epoch": 0.04912919701917441, "grad_norm": 0.20695649087429047, "learning_rate": 0.00019987099073611418, "loss": 11.6804, "step": 2347 }, { "epoch": 0.04915012978313656, "grad_norm": 0.18774186074733734, "learning_rate": 0.00019987087937717806, "loss": 11.6877, "step": 2348 }, { "epoch": 0.04917106254709872, "grad_norm": 0.24873755872249603, "learning_rate": 0.00019987076797023197, "loss": 11.6978, "step": 2349 }, { "epoch": 0.049191995311060874, "grad_norm": 0.21778284013271332, "learning_rate": 0.000199870656515276, "loss": 11.6858, "step": 2350 }, { "epoch": 0.049212928075023026, "grad_norm": 0.25346457958221436, "learning_rate": 0.00019987054501231023, "loss": 11.7032, "step": 2351 }, { "epoch": 0.04923386083898518, "grad_norm": 0.2298104166984558, "learning_rate": 0.0001998704334613347, "loss": 11.6823, "step": 2352 }, { "epoch": 0.049254793602947336, "grad_norm": 0.225933238863945, "learning_rate": 0.00019987032186234943, "loss": 11.681, "step": 2353 }, { "epoch": 0.04927572636690949, "grad_norm": 0.2730838656425476, "learning_rate": 0.00019987021021535448, "loss": 11.6868, "step": 2354 }, { "epoch": 0.04929665913087164, "grad_norm": 0.22272375226020813, "learning_rate": 0.00019987009852034991, "loss": 11.7065, "step": 2355 }, { "epoch": 0.04931759189483379, "grad_norm": 0.21444839239120483, "learning_rate": 0.00019986998677733585, "loss": 11.7026, "step": 2356 }, { "epoch": 0.04933852465879595, "grad_norm": 0.2050708681344986, "learning_rate": 0.00019986987498631225, "loss": 11.6773, "step": 2357 }, { "epoch": 0.0493594574227581, "grad_norm": 0.21537452936172485, "learning_rate": 0.00019986976314727918, "loss": 11.6924, "step": 2358 }, { "epoch": 0.049380390186720254, "grad_norm": 0.22452256083488464, "learning_rate": 0.00019986965126023672, "loss": 11.689, "step": 2359 }, { "epoch": 0.049401322950682405, "grad_norm": 0.22391662001609802, "learning_rate": 0.00019986953932518491, "loss": 11.6987, "step": 2360 }, { "epoch": 0.049422255714644564, "grad_norm": 0.20412340760231018, "learning_rate": 0.00019986942734212383, "loss": 11.6827, "step": 2361 }, { "epoch": 0.049443188478606716, "grad_norm": 0.2432674765586853, "learning_rate": 0.00019986931531105353, "loss": 11.6879, "step": 2362 }, { "epoch": 0.04946412124256887, "grad_norm": 0.2581133544445038, "learning_rate": 0.000199869203231974, "loss": 11.6784, "step": 2363 }, { "epoch": 0.04948505400653102, "grad_norm": 0.21293476223945618, "learning_rate": 0.0001998690911048854, "loss": 11.6935, "step": 2364 }, { "epoch": 0.04950598677049318, "grad_norm": 0.16448496282100677, "learning_rate": 0.00019986897892978767, "loss": 11.6861, "step": 2365 }, { "epoch": 0.04952691953445533, "grad_norm": 0.18691594898700714, "learning_rate": 0.00019986886670668094, "loss": 11.6944, "step": 2366 }, { "epoch": 0.04954785229841748, "grad_norm": 0.16399052739143372, "learning_rate": 0.00019986875443556525, "loss": 11.6914, "step": 2367 }, { "epoch": 0.04956878506237963, "grad_norm": 0.20632882416248322, "learning_rate": 0.00019986864211644069, "loss": 11.6873, "step": 2368 }, { "epoch": 0.04958971782634179, "grad_norm": 0.18853501975536346, "learning_rate": 0.0001998685297493072, "loss": 11.6845, "step": 2369 }, { "epoch": 0.049610650590303944, "grad_norm": 0.22316855192184448, "learning_rate": 0.00019986841733416494, "loss": 11.6906, "step": 2370 }, { "epoch": 0.049631583354266096, "grad_norm": 0.16819006204605103, "learning_rate": 0.00019986830487101392, "loss": 11.688, "step": 2371 }, { "epoch": 0.049652516118228254, "grad_norm": 0.2813185751438141, "learning_rate": 0.00019986819235985425, "loss": 11.7009, "step": 2372 }, { "epoch": 0.049673448882190406, "grad_norm": 0.217886820435524, "learning_rate": 0.00019986807980068588, "loss": 11.6892, "step": 2373 }, { "epoch": 0.04969438164615256, "grad_norm": 0.21223565936088562, "learning_rate": 0.00019986796719350896, "loss": 11.684, "step": 2374 }, { "epoch": 0.04971531441011471, "grad_norm": 0.22739005088806152, "learning_rate": 0.00019986785453832346, "loss": 11.6882, "step": 2375 }, { "epoch": 0.04973624717407687, "grad_norm": 0.24019408226013184, "learning_rate": 0.00019986774183512954, "loss": 11.6945, "step": 2376 }, { "epoch": 0.04975717993803902, "grad_norm": 0.19218774139881134, "learning_rate": 0.00019986762908392715, "loss": 11.6868, "step": 2377 }, { "epoch": 0.04977811270200117, "grad_norm": 0.2270030826330185, "learning_rate": 0.00019986751628471642, "loss": 11.7009, "step": 2378 }, { "epoch": 0.049799045465963324, "grad_norm": 0.16625110805034637, "learning_rate": 0.00019986740343749736, "loss": 11.6927, "step": 2379 }, { "epoch": 0.04981997822992548, "grad_norm": 0.18834680318832397, "learning_rate": 0.00019986729054227004, "loss": 11.6854, "step": 2380 }, { "epoch": 0.049840910993887634, "grad_norm": 0.18183614313602448, "learning_rate": 0.0001998671775990345, "loss": 11.6864, "step": 2381 }, { "epoch": 0.049861843757849786, "grad_norm": 0.23681774735450745, "learning_rate": 0.00019986706460779083, "loss": 11.6853, "step": 2382 }, { "epoch": 0.04988277652181194, "grad_norm": 0.2396143674850464, "learning_rate": 0.00019986695156853904, "loss": 11.6838, "step": 2383 }, { "epoch": 0.049903709285774096, "grad_norm": 0.19647398591041565, "learning_rate": 0.00019986683848127921, "loss": 11.6975, "step": 2384 }, { "epoch": 0.04992464204973625, "grad_norm": 0.23633114993572235, "learning_rate": 0.0001998667253460114, "loss": 11.6917, "step": 2385 }, { "epoch": 0.0499455748136984, "grad_norm": 0.22390002012252808, "learning_rate": 0.00019986661216273564, "loss": 11.6821, "step": 2386 }, { "epoch": 0.04996650757766055, "grad_norm": 0.20567116141319275, "learning_rate": 0.000199866498931452, "loss": 11.6791, "step": 2387 }, { "epoch": 0.04998744034162271, "grad_norm": 0.2680513560771942, "learning_rate": 0.00019986638565216053, "loss": 11.6831, "step": 2388 }, { "epoch": 0.05000837310558486, "grad_norm": 0.1999431997537613, "learning_rate": 0.0001998662723248613, "loss": 11.7031, "step": 2389 }, { "epoch": 0.050029305869547014, "grad_norm": 0.23200024664402008, "learning_rate": 0.00019986615894955435, "loss": 11.6754, "step": 2390 }, { "epoch": 0.050050238633509166, "grad_norm": 0.2593096196651459, "learning_rate": 0.00019986604552623972, "loss": 11.7092, "step": 2391 }, { "epoch": 0.050071171397471324, "grad_norm": 0.19123408198356628, "learning_rate": 0.00019986593205491748, "loss": 11.7048, "step": 2392 }, { "epoch": 0.050092104161433476, "grad_norm": 0.19144579768180847, "learning_rate": 0.00019986581853558772, "loss": 11.6978, "step": 2393 }, { "epoch": 0.05011303692539563, "grad_norm": 0.18005508184432983, "learning_rate": 0.00019986570496825044, "loss": 11.6828, "step": 2394 }, { "epoch": 0.05013396968935778, "grad_norm": 0.19424457848072052, "learning_rate": 0.0001998655913529057, "loss": 11.6874, "step": 2395 }, { "epoch": 0.05015490245331994, "grad_norm": 0.19163928925991058, "learning_rate": 0.00019986547768955357, "loss": 11.6894, "step": 2396 }, { "epoch": 0.05017583521728209, "grad_norm": 0.26710087060928345, "learning_rate": 0.00019986536397819413, "loss": 11.7004, "step": 2397 }, { "epoch": 0.05019676798124424, "grad_norm": 0.16794386506080627, "learning_rate": 0.00019986525021882737, "loss": 11.6899, "step": 2398 }, { "epoch": 0.050217700745206394, "grad_norm": 0.21277938783168793, "learning_rate": 0.00019986513641145345, "loss": 11.6858, "step": 2399 }, { "epoch": 0.05023863350916855, "grad_norm": 0.19889792799949646, "learning_rate": 0.00019986502255607227, "loss": 11.695, "step": 2400 }, { "epoch": 0.050259566273130704, "grad_norm": 0.22137054800987244, "learning_rate": 0.00019986490865268403, "loss": 11.6848, "step": 2401 }, { "epoch": 0.050280499037092856, "grad_norm": 0.21739009022712708, "learning_rate": 0.0001998647947012887, "loss": 11.7013, "step": 2402 }, { "epoch": 0.050301431801055015, "grad_norm": 0.22062243521213531, "learning_rate": 0.0001998646807018864, "loss": 11.6851, "step": 2403 }, { "epoch": 0.050322364565017166, "grad_norm": 0.205727219581604, "learning_rate": 0.0001998645666544771, "loss": 11.6873, "step": 2404 }, { "epoch": 0.05034329732897932, "grad_norm": 0.18325753509998322, "learning_rate": 0.00019986445255906093, "loss": 11.6739, "step": 2405 }, { "epoch": 0.05036423009294147, "grad_norm": 0.21920615434646606, "learning_rate": 0.0001998643384156379, "loss": 11.6933, "step": 2406 }, { "epoch": 0.05038516285690363, "grad_norm": 0.24103882908821106, "learning_rate": 0.00019986422422420809, "loss": 11.6842, "step": 2407 }, { "epoch": 0.05040609562086578, "grad_norm": 0.2375984936952591, "learning_rate": 0.00019986410998477154, "loss": 11.7178, "step": 2408 }, { "epoch": 0.05042702838482793, "grad_norm": 0.17927493155002594, "learning_rate": 0.00019986399569732832, "loss": 11.6913, "step": 2409 }, { "epoch": 0.050447961148790084, "grad_norm": 0.18031133711338043, "learning_rate": 0.00019986388136187847, "loss": 11.6842, "step": 2410 }, { "epoch": 0.05046889391275224, "grad_norm": 0.16854912042617798, "learning_rate": 0.00019986376697842206, "loss": 11.6711, "step": 2411 }, { "epoch": 0.050489826676714394, "grad_norm": 0.1947573721408844, "learning_rate": 0.00019986365254695914, "loss": 11.6814, "step": 2412 }, { "epoch": 0.050510759440676546, "grad_norm": 0.24933676421642303, "learning_rate": 0.00019986353806748973, "loss": 11.6778, "step": 2413 }, { "epoch": 0.0505316922046387, "grad_norm": 0.19331087172031403, "learning_rate": 0.00019986342354001395, "loss": 11.6888, "step": 2414 }, { "epoch": 0.05055262496860086, "grad_norm": 0.2133219838142395, "learning_rate": 0.00019986330896453177, "loss": 11.6935, "step": 2415 }, { "epoch": 0.05057355773256301, "grad_norm": 0.1787268966436386, "learning_rate": 0.00019986319434104336, "loss": 11.685, "step": 2416 }, { "epoch": 0.05059449049652516, "grad_norm": 0.16529640555381775, "learning_rate": 0.0001998630796695487, "loss": 11.7073, "step": 2417 }, { "epoch": 0.05061542326048731, "grad_norm": 0.21685831248760223, "learning_rate": 0.00019986296495004782, "loss": 11.701, "step": 2418 }, { "epoch": 0.05063635602444947, "grad_norm": 0.18903616070747375, "learning_rate": 0.00019986285018254083, "loss": 11.6795, "step": 2419 }, { "epoch": 0.05065728878841162, "grad_norm": 0.23420214653015137, "learning_rate": 0.00019986273536702777, "loss": 11.6808, "step": 2420 }, { "epoch": 0.050678221552373774, "grad_norm": 0.30211809277534485, "learning_rate": 0.00019986262050350871, "loss": 11.6966, "step": 2421 }, { "epoch": 0.050699154316335926, "grad_norm": 0.2114190310239792, "learning_rate": 0.00019986250559198365, "loss": 11.6868, "step": 2422 }, { "epoch": 0.050720087080298085, "grad_norm": 0.25571122765541077, "learning_rate": 0.0001998623906324527, "loss": 11.6774, "step": 2423 }, { "epoch": 0.050741019844260236, "grad_norm": 0.2079930305480957, "learning_rate": 0.0001998622756249159, "loss": 11.7094, "step": 2424 }, { "epoch": 0.05076195260822239, "grad_norm": 0.21048837900161743, "learning_rate": 0.00019986216056937331, "loss": 11.6986, "step": 2425 }, { "epoch": 0.05078288537218454, "grad_norm": 0.18062132596969604, "learning_rate": 0.00019986204546582496, "loss": 11.6906, "step": 2426 }, { "epoch": 0.0508038181361467, "grad_norm": 0.21193957328796387, "learning_rate": 0.00019986193031427095, "loss": 11.7006, "step": 2427 }, { "epoch": 0.05082475090010885, "grad_norm": 0.1827167570590973, "learning_rate": 0.00019986181511471126, "loss": 11.6819, "step": 2428 }, { "epoch": 0.050845683664071, "grad_norm": 0.2620861828327179, "learning_rate": 0.00019986169986714605, "loss": 11.7023, "step": 2429 }, { "epoch": 0.05086661642803316, "grad_norm": 0.17707280814647675, "learning_rate": 0.00019986158457157527, "loss": 11.6849, "step": 2430 }, { "epoch": 0.05088754919199531, "grad_norm": 0.21257144212722778, "learning_rate": 0.00019986146922799906, "loss": 11.671, "step": 2431 }, { "epoch": 0.050908481955957464, "grad_norm": 0.21235454082489014, "learning_rate": 0.00019986135383641743, "loss": 11.6896, "step": 2432 }, { "epoch": 0.050929414719919616, "grad_norm": 0.2078995257616043, "learning_rate": 0.00019986123839683045, "loss": 11.6928, "step": 2433 }, { "epoch": 0.050950347483881775, "grad_norm": 0.17438235878944397, "learning_rate": 0.00019986112290923814, "loss": 11.7, "step": 2434 }, { "epoch": 0.05097128024784393, "grad_norm": 0.19272935390472412, "learning_rate": 0.00019986100737364061, "loss": 11.6963, "step": 2435 }, { "epoch": 0.05099221301180608, "grad_norm": 0.200153648853302, "learning_rate": 0.00019986089179003792, "loss": 11.694, "step": 2436 }, { "epoch": 0.05101314577576823, "grad_norm": 0.19104711711406708, "learning_rate": 0.0001998607761584301, "loss": 11.6887, "step": 2437 }, { "epoch": 0.05103407853973039, "grad_norm": 0.17031265795230865, "learning_rate": 0.00019986066047881715, "loss": 11.6834, "step": 2438 }, { "epoch": 0.05105501130369254, "grad_norm": 0.2009916603565216, "learning_rate": 0.0001998605447511992, "loss": 11.6941, "step": 2439 }, { "epoch": 0.05107594406765469, "grad_norm": 0.26956942677497864, "learning_rate": 0.00019986042897557628, "loss": 11.7022, "step": 2440 }, { "epoch": 0.051096876831616844, "grad_norm": 0.21206752955913544, "learning_rate": 0.0001998603131519485, "loss": 11.6868, "step": 2441 }, { "epoch": 0.051117809595579, "grad_norm": 0.22868312895298004, "learning_rate": 0.0001998601972803158, "loss": 11.6847, "step": 2442 }, { "epoch": 0.051138742359541155, "grad_norm": 0.1986686736345291, "learning_rate": 0.00019986008136067834, "loss": 11.689, "step": 2443 }, { "epoch": 0.051159675123503306, "grad_norm": 0.20431870222091675, "learning_rate": 0.00019985996539303614, "loss": 11.6889, "step": 2444 }, { "epoch": 0.05118060788746546, "grad_norm": 0.22819273173809052, "learning_rate": 0.00019985984937738924, "loss": 11.6706, "step": 2445 }, { "epoch": 0.05120154065142762, "grad_norm": 0.1976001262664795, "learning_rate": 0.0001998597333137377, "loss": 11.6908, "step": 2446 }, { "epoch": 0.05122247341538977, "grad_norm": 0.17801663279533386, "learning_rate": 0.00019985961720208158, "loss": 11.6862, "step": 2447 }, { "epoch": 0.05124340617935192, "grad_norm": 0.17211264371871948, "learning_rate": 0.000199859501042421, "loss": 11.702, "step": 2448 }, { "epoch": 0.05126433894331407, "grad_norm": 0.22815296053886414, "learning_rate": 0.0001998593848347559, "loss": 11.6891, "step": 2449 }, { "epoch": 0.05128527170727623, "grad_norm": 0.22736239433288574, "learning_rate": 0.00019985926857908642, "loss": 11.684, "step": 2450 }, { "epoch": 0.05130620447123838, "grad_norm": 0.1813129484653473, "learning_rate": 0.00019985915227541256, "loss": 11.6924, "step": 2451 }, { "epoch": 0.051327137235200535, "grad_norm": 0.17710749804973602, "learning_rate": 0.0001998590359237344, "loss": 11.68, "step": 2452 }, { "epoch": 0.051348069999162686, "grad_norm": 0.1889916956424713, "learning_rate": 0.00019985891952405203, "loss": 11.6871, "step": 2453 }, { "epoch": 0.051369002763124845, "grad_norm": 0.19577856361865997, "learning_rate": 0.00019985880307636547, "loss": 11.6795, "step": 2454 }, { "epoch": 0.051389935527087, "grad_norm": 0.305498868227005, "learning_rate": 0.00019985868658067477, "loss": 11.6936, "step": 2455 }, { "epoch": 0.05141086829104915, "grad_norm": 0.22907839715480804, "learning_rate": 0.00019985857003698002, "loss": 11.6952, "step": 2456 }, { "epoch": 0.0514318010550113, "grad_norm": 0.24353879690170288, "learning_rate": 0.00019985845344528124, "loss": 11.6811, "step": 2457 }, { "epoch": 0.05145273381897346, "grad_norm": 0.20943133533000946, "learning_rate": 0.0001998583368055785, "loss": 11.6894, "step": 2458 }, { "epoch": 0.05147366658293561, "grad_norm": 0.20786601305007935, "learning_rate": 0.00019985822011787184, "loss": 11.6824, "step": 2459 }, { "epoch": 0.05149459934689776, "grad_norm": 0.18343594670295715, "learning_rate": 0.0001998581033821614, "loss": 11.6829, "step": 2460 }, { "epoch": 0.05151553211085992, "grad_norm": 0.202754944562912, "learning_rate": 0.00019985798659844707, "loss": 11.6845, "step": 2461 }, { "epoch": 0.05153646487482207, "grad_norm": 0.20669116079807281, "learning_rate": 0.00019985786976672908, "loss": 11.6825, "step": 2462 }, { "epoch": 0.051557397638784225, "grad_norm": 0.22635002434253693, "learning_rate": 0.00019985775288700736, "loss": 11.6783, "step": 2463 }, { "epoch": 0.05157833040274638, "grad_norm": 0.22905124723911285, "learning_rate": 0.00019985763595928204, "loss": 11.6909, "step": 2464 }, { "epoch": 0.051599263166708535, "grad_norm": 0.2668662667274475, "learning_rate": 0.00019985751898355316, "loss": 11.6941, "step": 2465 }, { "epoch": 0.05162019593067069, "grad_norm": 0.18852394819259644, "learning_rate": 0.00019985740195982077, "loss": 11.6892, "step": 2466 }, { "epoch": 0.05164112869463284, "grad_norm": 0.2252446413040161, "learning_rate": 0.00019985728488808492, "loss": 11.6906, "step": 2467 }, { "epoch": 0.05166206145859499, "grad_norm": 0.20637713372707367, "learning_rate": 0.00019985716776834566, "loss": 11.7048, "step": 2468 }, { "epoch": 0.05168299422255715, "grad_norm": 0.1897292584180832, "learning_rate": 0.00019985705060060306, "loss": 11.6749, "step": 2469 }, { "epoch": 0.0517039269865193, "grad_norm": 0.1795232743024826, "learning_rate": 0.0001998569333848572, "loss": 11.688, "step": 2470 }, { "epoch": 0.05172485975048145, "grad_norm": 0.17927764356136322, "learning_rate": 0.0001998568161211081, "loss": 11.6935, "step": 2471 }, { "epoch": 0.051745792514443605, "grad_norm": 0.21930843591690063, "learning_rate": 0.0001998566988093558, "loss": 11.689, "step": 2472 }, { "epoch": 0.05176672527840576, "grad_norm": 0.198556587100029, "learning_rate": 0.00019985658144960043, "loss": 11.6888, "step": 2473 }, { "epoch": 0.051787658042367915, "grad_norm": 0.2109774500131607, "learning_rate": 0.00019985646404184197, "loss": 11.6704, "step": 2474 }, { "epoch": 0.05180859080633007, "grad_norm": 0.17833098769187927, "learning_rate": 0.0001998563465860805, "loss": 11.6951, "step": 2475 }, { "epoch": 0.05182952357029222, "grad_norm": 0.16912239789962769, "learning_rate": 0.0001998562290823161, "loss": 11.7081, "step": 2476 }, { "epoch": 0.05185045633425438, "grad_norm": 0.20283497869968414, "learning_rate": 0.00019985611153054879, "loss": 11.6915, "step": 2477 }, { "epoch": 0.05187138909821653, "grad_norm": 0.21684226393699646, "learning_rate": 0.00019985599393077867, "loss": 11.6864, "step": 2478 }, { "epoch": 0.05189232186217868, "grad_norm": 0.23096266388893127, "learning_rate": 0.00019985587628300577, "loss": 11.6784, "step": 2479 }, { "epoch": 0.05191325462614083, "grad_norm": 0.1712547093629837, "learning_rate": 0.00019985575858723014, "loss": 11.6801, "step": 2480 }, { "epoch": 0.05193418739010299, "grad_norm": 0.22916482388973236, "learning_rate": 0.00019985564084345185, "loss": 11.6773, "step": 2481 }, { "epoch": 0.05195512015406514, "grad_norm": 0.1680310219526291, "learning_rate": 0.00019985552305167093, "loss": 11.6929, "step": 2482 }, { "epoch": 0.051976052918027295, "grad_norm": 0.19047605991363525, "learning_rate": 0.0001998554052118875, "loss": 11.6784, "step": 2483 }, { "epoch": 0.05199698568198945, "grad_norm": 0.2587713897228241, "learning_rate": 0.00019985528732410158, "loss": 11.688, "step": 2484 }, { "epoch": 0.052017918445951605, "grad_norm": 0.189195916056633, "learning_rate": 0.00019985516938831317, "loss": 11.6931, "step": 2485 }, { "epoch": 0.05203885120991376, "grad_norm": 0.18372249603271484, "learning_rate": 0.0001998550514045224, "loss": 11.6926, "step": 2486 }, { "epoch": 0.05205978397387591, "grad_norm": 0.1901470571756363, "learning_rate": 0.00019985493337272933, "loss": 11.6946, "step": 2487 }, { "epoch": 0.05208071673783807, "grad_norm": 0.2095479816198349, "learning_rate": 0.00019985481529293395, "loss": 11.696, "step": 2488 }, { "epoch": 0.05210164950180022, "grad_norm": 0.20749567449092865, "learning_rate": 0.0001998546971651364, "loss": 11.6714, "step": 2489 }, { "epoch": 0.05212258226576237, "grad_norm": 0.2058810293674469, "learning_rate": 0.0001998545789893367, "loss": 11.6872, "step": 2490 }, { "epoch": 0.05214351502972452, "grad_norm": 0.2000868022441864, "learning_rate": 0.00019985446076553487, "loss": 11.6868, "step": 2491 }, { "epoch": 0.05216444779368668, "grad_norm": 0.17595210671424866, "learning_rate": 0.000199854342493731, "loss": 11.6832, "step": 2492 }, { "epoch": 0.05218538055764883, "grad_norm": 0.19151638448238373, "learning_rate": 0.00019985422417392515, "loss": 11.6886, "step": 2493 }, { "epoch": 0.052206313321610985, "grad_norm": 0.19459977746009827, "learning_rate": 0.00019985410580611738, "loss": 11.6632, "step": 2494 }, { "epoch": 0.05222724608557314, "grad_norm": 0.24271582067012787, "learning_rate": 0.00019985398739030775, "loss": 11.674, "step": 2495 }, { "epoch": 0.052248178849535296, "grad_norm": 0.21045397222042084, "learning_rate": 0.0001998538689264963, "loss": 11.691, "step": 2496 }, { "epoch": 0.05226911161349745, "grad_norm": 0.22083456814289093, "learning_rate": 0.00019985375041468308, "loss": 11.7078, "step": 2497 }, { "epoch": 0.0522900443774596, "grad_norm": 0.21663668751716614, "learning_rate": 0.00019985363185486818, "loss": 11.6813, "step": 2498 }, { "epoch": 0.05231097714142175, "grad_norm": 0.24551427364349365, "learning_rate": 0.00019985351324705163, "loss": 11.7028, "step": 2499 }, { "epoch": 0.05233190990538391, "grad_norm": 0.2257055640220642, "learning_rate": 0.00019985339459123352, "loss": 11.682, "step": 2500 }, { "epoch": 0.05235284266934606, "grad_norm": 0.1933363974094391, "learning_rate": 0.00019985327588741385, "loss": 11.6731, "step": 2501 }, { "epoch": 0.05237377543330821, "grad_norm": 0.19534440338611603, "learning_rate": 0.0001998531571355927, "loss": 11.6923, "step": 2502 }, { "epoch": 0.052394708197270365, "grad_norm": 0.1975090056657791, "learning_rate": 0.00019985303833577019, "loss": 11.6872, "step": 2503 }, { "epoch": 0.052415640961232524, "grad_norm": 0.22052600979804993, "learning_rate": 0.00019985291948794627, "loss": 11.6872, "step": 2504 }, { "epoch": 0.052436573725194675, "grad_norm": 0.2007121443748474, "learning_rate": 0.00019985280059212106, "loss": 11.6898, "step": 2505 }, { "epoch": 0.05245750648915683, "grad_norm": 0.22146281599998474, "learning_rate": 0.00019985268164829462, "loss": 11.7033, "step": 2506 }, { "epoch": 0.05247843925311898, "grad_norm": 0.2325914055109024, "learning_rate": 0.000199852562656467, "loss": 11.671, "step": 2507 }, { "epoch": 0.05249937201708114, "grad_norm": 0.19621075689792633, "learning_rate": 0.00019985244361663825, "loss": 11.6801, "step": 2508 }, { "epoch": 0.05252030478104329, "grad_norm": 0.18580088019371033, "learning_rate": 0.00019985232452880843, "loss": 11.7002, "step": 2509 }, { "epoch": 0.05254123754500544, "grad_norm": 0.20759840309619904, "learning_rate": 0.00019985220539297757, "loss": 11.6967, "step": 2510 }, { "epoch": 0.05256217030896759, "grad_norm": 0.1832922399044037, "learning_rate": 0.00019985208620914576, "loss": 11.6802, "step": 2511 }, { "epoch": 0.05258310307292975, "grad_norm": 0.21077316999435425, "learning_rate": 0.00019985196697731308, "loss": 11.6832, "step": 2512 }, { "epoch": 0.0526040358368919, "grad_norm": 0.24529021978378296, "learning_rate": 0.00019985184769747956, "loss": 11.7011, "step": 2513 }, { "epoch": 0.052624968600854055, "grad_norm": 0.2773192822933197, "learning_rate": 0.00019985172836964522, "loss": 11.6821, "step": 2514 }, { "epoch": 0.05264590136481621, "grad_norm": 0.1864338368177414, "learning_rate": 0.00019985160899381018, "loss": 11.673, "step": 2515 }, { "epoch": 0.052666834128778366, "grad_norm": 0.23625467717647552, "learning_rate": 0.00019985148956997445, "loss": 11.6884, "step": 2516 }, { "epoch": 0.05268776689274052, "grad_norm": 0.252887487411499, "learning_rate": 0.0001998513700981381, "loss": 11.6922, "step": 2517 }, { "epoch": 0.05270869965670267, "grad_norm": 0.19565905630588531, "learning_rate": 0.00019985125057830124, "loss": 11.6895, "step": 2518 }, { "epoch": 0.05272963242066483, "grad_norm": 0.17005802690982819, "learning_rate": 0.00019985113101046387, "loss": 11.6891, "step": 2519 }, { "epoch": 0.05275056518462698, "grad_norm": 0.19398777186870575, "learning_rate": 0.00019985101139462604, "loss": 11.6854, "step": 2520 }, { "epoch": 0.05277149794858913, "grad_norm": 0.19140750169754028, "learning_rate": 0.00019985089173078785, "loss": 11.6763, "step": 2521 }, { "epoch": 0.05279243071255128, "grad_norm": 0.23890335857868195, "learning_rate": 0.00019985077201894932, "loss": 11.7077, "step": 2522 }, { "epoch": 0.05281336347651344, "grad_norm": 0.1859407275915146, "learning_rate": 0.0001998506522591105, "loss": 11.6863, "step": 2523 }, { "epoch": 0.052834296240475594, "grad_norm": 0.22602176666259766, "learning_rate": 0.0001998505324512715, "loss": 11.703, "step": 2524 }, { "epoch": 0.052855229004437745, "grad_norm": 0.21073530614376068, "learning_rate": 0.00019985041259543235, "loss": 11.6866, "step": 2525 }, { "epoch": 0.0528761617683999, "grad_norm": 0.2389652132987976, "learning_rate": 0.0001998502926915931, "loss": 11.6954, "step": 2526 }, { "epoch": 0.052897094532362056, "grad_norm": 0.23829473555088043, "learning_rate": 0.0001998501727397538, "loss": 11.6826, "step": 2527 }, { "epoch": 0.05291802729632421, "grad_norm": 0.24379730224609375, "learning_rate": 0.00019985005273991452, "loss": 11.6852, "step": 2528 }, { "epoch": 0.05293896006028636, "grad_norm": 0.2107950747013092, "learning_rate": 0.00019984993269207534, "loss": 11.6683, "step": 2529 }, { "epoch": 0.05295989282424851, "grad_norm": 0.24127840995788574, "learning_rate": 0.0001998498125962363, "loss": 11.6968, "step": 2530 }, { "epoch": 0.05298082558821067, "grad_norm": 0.23116442561149597, "learning_rate": 0.00019984969245239744, "loss": 11.6876, "step": 2531 }, { "epoch": 0.05300175835217282, "grad_norm": 0.23769548535346985, "learning_rate": 0.00019984957226055884, "loss": 11.6949, "step": 2532 }, { "epoch": 0.05302269111613497, "grad_norm": 0.1834048479795456, "learning_rate": 0.00019984945202072054, "loss": 11.7041, "step": 2533 }, { "epoch": 0.053043623880097125, "grad_norm": 0.21443131566047668, "learning_rate": 0.0001998493317328826, "loss": 11.6987, "step": 2534 }, { "epoch": 0.053064556644059284, "grad_norm": 0.201083704829216, "learning_rate": 0.00019984921139704512, "loss": 11.6829, "step": 2535 }, { "epoch": 0.053085489408021436, "grad_norm": 0.18665067851543427, "learning_rate": 0.00019984909101320808, "loss": 11.6897, "step": 2536 }, { "epoch": 0.05310642217198359, "grad_norm": 0.21524320542812347, "learning_rate": 0.0001998489705813716, "loss": 11.6917, "step": 2537 }, { "epoch": 0.05312735493594574, "grad_norm": 0.2856011688709259, "learning_rate": 0.00019984885010153574, "loss": 11.6876, "step": 2538 }, { "epoch": 0.0531482876999079, "grad_norm": 0.21598827838897705, "learning_rate": 0.0001998487295737005, "loss": 11.6763, "step": 2539 }, { "epoch": 0.05316922046387005, "grad_norm": 1.313502311706543, "learning_rate": 0.000199848608997866, "loss": 11.6893, "step": 2540 }, { "epoch": 0.0531901532278322, "grad_norm": 0.29555651545524597, "learning_rate": 0.00019984848837403224, "loss": 11.6804, "step": 2541 }, { "epoch": 0.05321108599179435, "grad_norm": 0.1847696304321289, "learning_rate": 0.00019984836770219934, "loss": 11.6819, "step": 2542 }, { "epoch": 0.05323201875575651, "grad_norm": 0.21184813976287842, "learning_rate": 0.00019984824698236734, "loss": 11.6952, "step": 2543 }, { "epoch": 0.053252951519718664, "grad_norm": 0.2437179982662201, "learning_rate": 0.00019984812621453625, "loss": 11.7023, "step": 2544 }, { "epoch": 0.053273884283680815, "grad_norm": 0.6394315361976624, "learning_rate": 0.0001998480053987062, "loss": 11.6342, "step": 2545 }, { "epoch": 0.053294817047642974, "grad_norm": 0.21922065317630768, "learning_rate": 0.0001998478845348772, "loss": 11.6855, "step": 2546 }, { "epoch": 0.053315749811605126, "grad_norm": 0.264260470867157, "learning_rate": 0.0001998477636230493, "loss": 11.6946, "step": 2547 }, { "epoch": 0.05333668257556728, "grad_norm": 0.2363930195569992, "learning_rate": 0.0001998476426632226, "loss": 11.6996, "step": 2548 }, { "epoch": 0.05335761533952943, "grad_norm": 0.18585237860679626, "learning_rate": 0.00019984752165539714, "loss": 11.6997, "step": 2549 }, { "epoch": 0.05337854810349159, "grad_norm": 0.22320279479026794, "learning_rate": 0.00019984740059957298, "loss": 11.6893, "step": 2550 }, { "epoch": 0.05339948086745374, "grad_norm": 0.18397001922130585, "learning_rate": 0.00019984727949575012, "loss": 11.6964, "step": 2551 }, { "epoch": 0.05342041363141589, "grad_norm": 0.29149818420410156, "learning_rate": 0.00019984715834392872, "loss": 11.6737, "step": 2552 }, { "epoch": 0.053441346395378043, "grad_norm": 0.19796261191368103, "learning_rate": 0.0001998470371441088, "loss": 11.6813, "step": 2553 }, { "epoch": 0.0534622791593402, "grad_norm": 0.23285993933677673, "learning_rate": 0.0001998469158962904, "loss": 11.6958, "step": 2554 }, { "epoch": 0.053483211923302354, "grad_norm": 0.22040189802646637, "learning_rate": 0.00019984679460047358, "loss": 11.6847, "step": 2555 }, { "epoch": 0.053504144687264506, "grad_norm": 0.19050686061382294, "learning_rate": 0.0001998466732566584, "loss": 11.6907, "step": 2556 }, { "epoch": 0.05352507745122666, "grad_norm": 0.2612169086933136, "learning_rate": 0.00019984655186484492, "loss": 11.6781, "step": 2557 }, { "epoch": 0.053546010215188816, "grad_norm": 0.18106365203857422, "learning_rate": 0.0001998464304250332, "loss": 11.6797, "step": 2558 }, { "epoch": 0.05356694297915097, "grad_norm": 0.20007912814617157, "learning_rate": 0.00019984630893722332, "loss": 11.6981, "step": 2559 }, { "epoch": 0.05358787574311312, "grad_norm": 0.21460385620594025, "learning_rate": 0.0001998461874014153, "loss": 11.6897, "step": 2560 }, { "epoch": 0.05360880850707527, "grad_norm": 0.21667024493217468, "learning_rate": 0.00019984606581760923, "loss": 11.6941, "step": 2561 }, { "epoch": 0.05362974127103743, "grad_norm": 0.20324425399303436, "learning_rate": 0.00019984594418580515, "loss": 11.7003, "step": 2562 }, { "epoch": 0.05365067403499958, "grad_norm": 0.18974632024765015, "learning_rate": 0.00019984582250600312, "loss": 11.6804, "step": 2563 }, { "epoch": 0.053671606798961734, "grad_norm": 0.21001331508159637, "learning_rate": 0.00019984570077820322, "loss": 11.6881, "step": 2564 }, { "epoch": 0.053692539562923886, "grad_norm": 0.22054657340049744, "learning_rate": 0.00019984557900240546, "loss": 11.6975, "step": 2565 }, { "epoch": 0.053713472326886044, "grad_norm": 0.19587461650371552, "learning_rate": 0.00019984545717860997, "loss": 11.6822, "step": 2566 }, { "epoch": 0.053734405090848196, "grad_norm": 0.2362542748451233, "learning_rate": 0.00019984533530681672, "loss": 11.6956, "step": 2567 }, { "epoch": 0.05375533785481035, "grad_norm": 0.24767433106899261, "learning_rate": 0.00019984521338702583, "loss": 11.6875, "step": 2568 }, { "epoch": 0.0537762706187725, "grad_norm": 0.19894760847091675, "learning_rate": 0.00019984509141923734, "loss": 11.6841, "step": 2569 }, { "epoch": 0.05379720338273466, "grad_norm": 0.27543652057647705, "learning_rate": 0.00019984496940345133, "loss": 11.6903, "step": 2570 }, { "epoch": 0.05381813614669681, "grad_norm": 0.22931590676307678, "learning_rate": 0.00019984484733966785, "loss": 11.7022, "step": 2571 }, { "epoch": 0.05383906891065896, "grad_norm": 0.2080826312303543, "learning_rate": 0.00019984472522788696, "loss": 11.695, "step": 2572 }, { "epoch": 0.053860001674621114, "grad_norm": 0.16595666110515594, "learning_rate": 0.00019984460306810868, "loss": 11.6915, "step": 2573 }, { "epoch": 0.05388093443858327, "grad_norm": 0.27658092975616455, "learning_rate": 0.00019984448086033313, "loss": 11.6786, "step": 2574 }, { "epoch": 0.053901867202545424, "grad_norm": 0.2036275714635849, "learning_rate": 0.0001998443586045603, "loss": 11.6944, "step": 2575 }, { "epoch": 0.053922799966507576, "grad_norm": 0.26937490701675415, "learning_rate": 0.0001998442363007903, "loss": 11.7011, "step": 2576 }, { "epoch": 0.053943732730469734, "grad_norm": 0.34218069911003113, "learning_rate": 0.00019984411394902317, "loss": 11.6891, "step": 2577 }, { "epoch": 0.053964665494431886, "grad_norm": 0.18985994160175323, "learning_rate": 0.000199843991549259, "loss": 11.6974, "step": 2578 }, { "epoch": 0.05398559825839404, "grad_norm": 0.22661767899990082, "learning_rate": 0.0001998438691014978, "loss": 11.6939, "step": 2579 }, { "epoch": 0.05400653102235619, "grad_norm": 0.19402526319026947, "learning_rate": 0.00019984374660573964, "loss": 11.7057, "step": 2580 }, { "epoch": 0.05402746378631835, "grad_norm": 0.21116086840629578, "learning_rate": 0.0001998436240619846, "loss": 11.6937, "step": 2581 }, { "epoch": 0.0540483965502805, "grad_norm": 0.1989426612854004, "learning_rate": 0.00019984350147023275, "loss": 11.688, "step": 2582 }, { "epoch": 0.05406932931424265, "grad_norm": 0.21237680315971375, "learning_rate": 0.0001998433788304841, "loss": 11.6916, "step": 2583 }, { "epoch": 0.054090262078204804, "grad_norm": 0.22134865820407867, "learning_rate": 0.00019984325614273875, "loss": 11.6786, "step": 2584 }, { "epoch": 0.05411119484216696, "grad_norm": 0.19112814962863922, "learning_rate": 0.00019984313340699674, "loss": 11.6867, "step": 2585 }, { "epoch": 0.054132127606129114, "grad_norm": 0.19419535994529724, "learning_rate": 0.00019984301062325814, "loss": 11.6793, "step": 2586 }, { "epoch": 0.054153060370091266, "grad_norm": 0.22541072964668274, "learning_rate": 0.000199842887791523, "loss": 11.6934, "step": 2587 }, { "epoch": 0.05417399313405342, "grad_norm": 0.1841416358947754, "learning_rate": 0.0001998427649117914, "loss": 11.7057, "step": 2588 }, { "epoch": 0.054194925898015577, "grad_norm": 0.18533332645893097, "learning_rate": 0.00019984264198406336, "loss": 11.6987, "step": 2589 }, { "epoch": 0.05421585866197773, "grad_norm": 0.47544798254966736, "learning_rate": 0.00019984251900833898, "loss": 11.7077, "step": 2590 }, { "epoch": 0.05423679142593988, "grad_norm": 0.19981959462165833, "learning_rate": 0.00019984239598461827, "loss": 11.6857, "step": 2591 }, { "epoch": 0.05425772418990203, "grad_norm": 0.21301421523094177, "learning_rate": 0.00019984227291290133, "loss": 11.703, "step": 2592 }, { "epoch": 0.05427865695386419, "grad_norm": 0.21568480134010315, "learning_rate": 0.00019984214979318823, "loss": 11.692, "step": 2593 }, { "epoch": 0.05429958971782634, "grad_norm": 0.18406237661838531, "learning_rate": 0.000199842026625479, "loss": 11.6919, "step": 2594 }, { "epoch": 0.054320522481788494, "grad_norm": 0.2074771225452423, "learning_rate": 0.0001998419034097737, "loss": 11.6876, "step": 2595 }, { "epoch": 0.054341455245750646, "grad_norm": 0.1600818783044815, "learning_rate": 0.0001998417801460724, "loss": 11.686, "step": 2596 }, { "epoch": 0.054362388009712805, "grad_norm": 0.20065151154994965, "learning_rate": 0.00019984165683437514, "loss": 11.686, "step": 2597 }, { "epoch": 0.054383320773674956, "grad_norm": 0.20540638267993927, "learning_rate": 0.00019984153347468202, "loss": 11.6904, "step": 2598 }, { "epoch": 0.05440425353763711, "grad_norm": 0.21342355012893677, "learning_rate": 0.00019984141006699306, "loss": 11.6615, "step": 2599 }, { "epoch": 0.05442518630159926, "grad_norm": 0.18093740940093994, "learning_rate": 0.0001998412866113083, "loss": 11.686, "step": 2600 }, { "epoch": 0.05444611906556142, "grad_norm": 0.24119028449058533, "learning_rate": 0.00019984116310762788, "loss": 11.6838, "step": 2601 }, { "epoch": 0.05446705182952357, "grad_norm": 0.19987648725509644, "learning_rate": 0.0001998410395559518, "loss": 11.7108, "step": 2602 }, { "epoch": 0.05448798459348572, "grad_norm": 0.22319744527339935, "learning_rate": 0.00019984091595628013, "loss": 11.6874, "step": 2603 }, { "epoch": 0.05450891735744788, "grad_norm": 0.18052902817726135, "learning_rate": 0.00019984079230861296, "loss": 11.6768, "step": 2604 }, { "epoch": 0.05452985012141003, "grad_norm": 0.25533491373062134, "learning_rate": 0.00019984066861295028, "loss": 11.6811, "step": 2605 }, { "epoch": 0.054550782885372184, "grad_norm": 0.20748203992843628, "learning_rate": 0.0001998405448692922, "loss": 11.6812, "step": 2606 }, { "epoch": 0.054571715649334336, "grad_norm": 0.19983460009098053, "learning_rate": 0.00019984042107763878, "loss": 11.7102, "step": 2607 }, { "epoch": 0.054592648413296495, "grad_norm": 0.20880162715911865, "learning_rate": 0.00019984029723799006, "loss": 11.6951, "step": 2608 }, { "epoch": 0.05461358117725865, "grad_norm": 0.24215075373649597, "learning_rate": 0.0001998401733503461, "loss": 11.6962, "step": 2609 }, { "epoch": 0.0546345139412208, "grad_norm": 0.22109679877758026, "learning_rate": 0.00019984004941470698, "loss": 11.6926, "step": 2610 }, { "epoch": 0.05465544670518295, "grad_norm": 0.2171069085597992, "learning_rate": 0.00019983992543107275, "loss": 11.6817, "step": 2611 }, { "epoch": 0.05467637946914511, "grad_norm": 0.26119109988212585, "learning_rate": 0.00019983980139944344, "loss": 11.6923, "step": 2612 }, { "epoch": 0.05469731223310726, "grad_norm": 0.20044010877609253, "learning_rate": 0.00019983967731981918, "loss": 11.6751, "step": 2613 }, { "epoch": 0.05471824499706941, "grad_norm": 0.1837320476770401, "learning_rate": 0.00019983955319219997, "loss": 11.6997, "step": 2614 }, { "epoch": 0.054739177761031564, "grad_norm": 0.23604239523410797, "learning_rate": 0.00019983942901658587, "loss": 11.7041, "step": 2615 }, { "epoch": 0.05476011052499372, "grad_norm": 0.19235703349113464, "learning_rate": 0.00019983930479297696, "loss": 11.6814, "step": 2616 }, { "epoch": 0.054781043288955875, "grad_norm": 0.27167463302612305, "learning_rate": 0.0001998391805213733, "loss": 11.6959, "step": 2617 }, { "epoch": 0.054801976052918026, "grad_norm": 0.2916293442249298, "learning_rate": 0.00019983905620177495, "loss": 11.6905, "step": 2618 }, { "epoch": 0.05482290881688018, "grad_norm": 0.2135070562362671, "learning_rate": 0.00019983893183418195, "loss": 11.673, "step": 2619 }, { "epoch": 0.05484384158084234, "grad_norm": 0.2900821268558502, "learning_rate": 0.0001998388074185944, "loss": 11.6799, "step": 2620 }, { "epoch": 0.05486477434480449, "grad_norm": 0.2505926191806793, "learning_rate": 0.00019983868295501232, "loss": 11.6823, "step": 2621 }, { "epoch": 0.05488570710876664, "grad_norm": 0.29013532400131226, "learning_rate": 0.0001998385584434358, "loss": 11.7081, "step": 2622 }, { "epoch": 0.05490663987272879, "grad_norm": 0.21748703718185425, "learning_rate": 0.00019983843388386485, "loss": 11.6898, "step": 2623 }, { "epoch": 0.05492757263669095, "grad_norm": 0.20765767991542816, "learning_rate": 0.0001998383092762996, "loss": 11.6937, "step": 2624 }, { "epoch": 0.0549485054006531, "grad_norm": 0.19215625524520874, "learning_rate": 0.00019983818462074003, "loss": 11.6867, "step": 2625 }, { "epoch": 0.054969438164615254, "grad_norm": 0.25645801424980164, "learning_rate": 0.0001998380599171863, "loss": 11.6864, "step": 2626 }, { "epoch": 0.054990370928577406, "grad_norm": 0.18019743263721466, "learning_rate": 0.00019983793516563836, "loss": 11.6987, "step": 2627 }, { "epoch": 0.055011303692539565, "grad_norm": 0.2615429162979126, "learning_rate": 0.00019983781036609635, "loss": 11.6924, "step": 2628 }, { "epoch": 0.05503223645650172, "grad_norm": 0.2690794765949249, "learning_rate": 0.0001998376855185603, "loss": 11.6932, "step": 2629 }, { "epoch": 0.05505316922046387, "grad_norm": 0.22672322392463684, "learning_rate": 0.00019983756062303028, "loss": 11.6944, "step": 2630 }, { "epoch": 0.05507410198442602, "grad_norm": 0.3044014275074005, "learning_rate": 0.00019983743567950635, "loss": 11.6742, "step": 2631 }, { "epoch": 0.05509503474838818, "grad_norm": 0.21007893979549408, "learning_rate": 0.00019983731068798854, "loss": 11.6962, "step": 2632 }, { "epoch": 0.05511596751235033, "grad_norm": 0.19847559928894043, "learning_rate": 0.00019983718564847694, "loss": 11.6828, "step": 2633 }, { "epoch": 0.05513690027631248, "grad_norm": 0.19232802093029022, "learning_rate": 0.00019983706056097162, "loss": 11.6992, "step": 2634 }, { "epoch": 0.05515783304027464, "grad_norm": 0.20513488352298737, "learning_rate": 0.00019983693542547263, "loss": 11.6865, "step": 2635 }, { "epoch": 0.05517876580423679, "grad_norm": 0.18773622810840607, "learning_rate": 0.00019983681024198, "loss": 11.6927, "step": 2636 }, { "epoch": 0.055199698568198945, "grad_norm": 0.2883535325527191, "learning_rate": 0.0001998366850104938, "loss": 11.7193, "step": 2637 }, { "epoch": 0.055220631332161096, "grad_norm": 0.20113129913806915, "learning_rate": 0.00019983655973101417, "loss": 11.6914, "step": 2638 }, { "epoch": 0.055241564096123255, "grad_norm": 0.31243398785591125, "learning_rate": 0.00019983643440354105, "loss": 11.6817, "step": 2639 }, { "epoch": 0.05526249686008541, "grad_norm": 0.16961149871349335, "learning_rate": 0.00019983630902807456, "loss": 11.6928, "step": 2640 }, { "epoch": 0.05528342962404756, "grad_norm": 0.22599992156028748, "learning_rate": 0.00019983618360461475, "loss": 11.6922, "step": 2641 }, { "epoch": 0.05530436238800971, "grad_norm": 0.2127876579761505, "learning_rate": 0.0001998360581331617, "loss": 11.6885, "step": 2642 }, { "epoch": 0.05532529515197187, "grad_norm": 0.22336791455745697, "learning_rate": 0.00019983593261371546, "loss": 11.685, "step": 2643 }, { "epoch": 0.05534622791593402, "grad_norm": 0.18050000071525574, "learning_rate": 0.00019983580704627612, "loss": 11.6796, "step": 2644 }, { "epoch": 0.05536716067989617, "grad_norm": 0.2155941128730774, "learning_rate": 0.00019983568143084365, "loss": 11.6767, "step": 2645 }, { "epoch": 0.055388093443858324, "grad_norm": 0.19182147085666656, "learning_rate": 0.00019983555576741816, "loss": 11.6811, "step": 2646 }, { "epoch": 0.05540902620782048, "grad_norm": 0.21861638128757477, "learning_rate": 0.00019983543005599978, "loss": 11.6871, "step": 2647 }, { "epoch": 0.055429958971782635, "grad_norm": 0.2333867996931076, "learning_rate": 0.00019983530429658846, "loss": 11.7061, "step": 2648 }, { "epoch": 0.05545089173574479, "grad_norm": 0.2145845890045166, "learning_rate": 0.0001998351784891843, "loss": 11.6938, "step": 2649 }, { "epoch": 0.05547182449970694, "grad_norm": 0.25274187326431274, "learning_rate": 0.00019983505263378742, "loss": 11.6922, "step": 2650 }, { "epoch": 0.0554927572636691, "grad_norm": 0.2463081032037735, "learning_rate": 0.0001998349267303978, "loss": 11.6924, "step": 2651 }, { "epoch": 0.05551369002763125, "grad_norm": 0.20709338784217834, "learning_rate": 0.00019983480077901552, "loss": 11.6915, "step": 2652 }, { "epoch": 0.0555346227915934, "grad_norm": 0.23565815389156342, "learning_rate": 0.00019983467477964066, "loss": 11.6935, "step": 2653 }, { "epoch": 0.05555555555555555, "grad_norm": 0.18120744824409485, "learning_rate": 0.00019983454873227327, "loss": 11.6809, "step": 2654 }, { "epoch": 0.05557648831951771, "grad_norm": 0.20059369504451752, "learning_rate": 0.00019983442263691342, "loss": 11.6932, "step": 2655 }, { "epoch": 0.05559742108347986, "grad_norm": 0.2368079423904419, "learning_rate": 0.00019983429649356115, "loss": 11.6923, "step": 2656 }, { "epoch": 0.055618353847442015, "grad_norm": 0.22077305614948273, "learning_rate": 0.00019983417030221653, "loss": 11.6893, "step": 2657 }, { "epoch": 0.055639286611404166, "grad_norm": 0.1912200003862381, "learning_rate": 0.00019983404406287965, "loss": 11.6895, "step": 2658 }, { "epoch": 0.055660219375366325, "grad_norm": 0.20136523246765137, "learning_rate": 0.00019983391777555053, "loss": 11.6801, "step": 2659 }, { "epoch": 0.05568115213932848, "grad_norm": 0.1955609768629074, "learning_rate": 0.00019983379144022925, "loss": 11.6998, "step": 2660 }, { "epoch": 0.05570208490329063, "grad_norm": 0.2036706656217575, "learning_rate": 0.00019983366505691582, "loss": 11.6854, "step": 2661 }, { "epoch": 0.05572301766725279, "grad_norm": 0.20347945392131805, "learning_rate": 0.0001998335386256104, "loss": 11.685, "step": 2662 }, { "epoch": 0.05574395043121494, "grad_norm": 0.1961677074432373, "learning_rate": 0.000199833412146313, "loss": 11.69, "step": 2663 }, { "epoch": 0.05576488319517709, "grad_norm": 0.21770773828029633, "learning_rate": 0.00019983328561902367, "loss": 11.6861, "step": 2664 }, { "epoch": 0.05578581595913924, "grad_norm": 0.21364295482635498, "learning_rate": 0.00019983315904374246, "loss": 11.6949, "step": 2665 }, { "epoch": 0.0558067487231014, "grad_norm": 0.2245214879512787, "learning_rate": 0.00019983303242046945, "loss": 11.68, "step": 2666 }, { "epoch": 0.05582768148706355, "grad_norm": 0.20346075296401978, "learning_rate": 0.0001998329057492047, "loss": 11.6831, "step": 2667 }, { "epoch": 0.055848614251025705, "grad_norm": 0.24305009841918945, "learning_rate": 0.0001998327790299483, "loss": 11.7022, "step": 2668 }, { "epoch": 0.05586954701498786, "grad_norm": 0.16714197397232056, "learning_rate": 0.00019983265226270025, "loss": 11.6763, "step": 2669 }, { "epoch": 0.055890479778950015, "grad_norm": 0.21798622608184814, "learning_rate": 0.00019983252544746065, "loss": 11.6997, "step": 2670 }, { "epoch": 0.05591141254291217, "grad_norm": 0.24939945340156555, "learning_rate": 0.00019983239858422955, "loss": 11.6892, "step": 2671 }, { "epoch": 0.05593234530687432, "grad_norm": 0.18466037511825562, "learning_rate": 0.00019983227167300706, "loss": 11.691, "step": 2672 }, { "epoch": 0.05595327807083647, "grad_norm": 0.18045291304588318, "learning_rate": 0.00019983214471379313, "loss": 11.6933, "step": 2673 }, { "epoch": 0.05597421083479863, "grad_norm": 0.19768813252449036, "learning_rate": 0.00019983201770658793, "loss": 11.6946, "step": 2674 }, { "epoch": 0.05599514359876078, "grad_norm": 0.20162749290466309, "learning_rate": 0.00019983189065139146, "loss": 11.6874, "step": 2675 }, { "epoch": 0.05601607636272293, "grad_norm": 0.23355990648269653, "learning_rate": 0.0001998317635482038, "loss": 11.6892, "step": 2676 }, { "epoch": 0.056037009126685085, "grad_norm": 0.20058554410934448, "learning_rate": 0.00019983163639702504, "loss": 11.6907, "step": 2677 }, { "epoch": 0.056057941890647243, "grad_norm": 0.2526830732822418, "learning_rate": 0.0001998315091978552, "loss": 11.6844, "step": 2678 }, { "epoch": 0.056078874654609395, "grad_norm": 0.21473591029644012, "learning_rate": 0.00019983138195069433, "loss": 11.6868, "step": 2679 }, { "epoch": 0.05609980741857155, "grad_norm": 0.196672722697258, "learning_rate": 0.00019983125465554253, "loss": 11.6814, "step": 2680 }, { "epoch": 0.0561207401825337, "grad_norm": 0.19453167915344238, "learning_rate": 0.00019983112731239983, "loss": 11.6872, "step": 2681 }, { "epoch": 0.05614167294649586, "grad_norm": 0.314908504486084, "learning_rate": 0.00019983099992126633, "loss": 11.6949, "step": 2682 }, { "epoch": 0.05616260571045801, "grad_norm": 0.24331213533878326, "learning_rate": 0.00019983087248214205, "loss": 11.6845, "step": 2683 }, { "epoch": 0.05618353847442016, "grad_norm": 0.2124185711145401, "learning_rate": 0.00019983074499502708, "loss": 11.6997, "step": 2684 }, { "epoch": 0.05620447123838231, "grad_norm": 0.17494958639144897, "learning_rate": 0.00019983061745992145, "loss": 11.6875, "step": 2685 }, { "epoch": 0.05622540400234447, "grad_norm": 0.25991442799568176, "learning_rate": 0.00019983048987682526, "loss": 11.6829, "step": 2686 }, { "epoch": 0.05624633676630662, "grad_norm": 0.2033088356256485, "learning_rate": 0.00019983036224573858, "loss": 11.6806, "step": 2687 }, { "epoch": 0.056267269530268775, "grad_norm": 0.22890757024288177, "learning_rate": 0.00019983023456666139, "loss": 11.7002, "step": 2688 }, { "epoch": 0.05628820229423093, "grad_norm": 0.22536565363407135, "learning_rate": 0.00019983010683959383, "loss": 11.6841, "step": 2689 }, { "epoch": 0.056309135058193086, "grad_norm": 0.20192070305347443, "learning_rate": 0.00019982997906453593, "loss": 11.6961, "step": 2690 }, { "epoch": 0.05633006782215524, "grad_norm": 0.226556658744812, "learning_rate": 0.00019982985124148775, "loss": 11.6923, "step": 2691 }, { "epoch": 0.05635100058611739, "grad_norm": 0.2111375480890274, "learning_rate": 0.0001998297233704494, "loss": 11.6962, "step": 2692 }, { "epoch": 0.05637193335007955, "grad_norm": 0.18085432052612305, "learning_rate": 0.0001998295954514209, "loss": 11.6875, "step": 2693 }, { "epoch": 0.0563928661140417, "grad_norm": 0.23586530983448029, "learning_rate": 0.00019982946748440226, "loss": 11.6697, "step": 2694 }, { "epoch": 0.05641379887800385, "grad_norm": 0.3068142533302307, "learning_rate": 0.00019982933946939362, "loss": 11.6983, "step": 2695 }, { "epoch": 0.056434731641966, "grad_norm": 0.2123165875673294, "learning_rate": 0.00019982921140639502, "loss": 11.6921, "step": 2696 }, { "epoch": 0.05645566440592816, "grad_norm": 0.24512459337711334, "learning_rate": 0.00019982908329540652, "loss": 11.684, "step": 2697 }, { "epoch": 0.056476597169890314, "grad_norm": 0.2226938158273697, "learning_rate": 0.00019982895513642816, "loss": 11.6764, "step": 2698 }, { "epoch": 0.056497529933852465, "grad_norm": 0.21005170047283173, "learning_rate": 0.00019982882692946006, "loss": 11.6893, "step": 2699 }, { "epoch": 0.05651846269781462, "grad_norm": 0.3502645194530487, "learning_rate": 0.0001998286986745022, "loss": 11.6897, "step": 2700 }, { "epoch": 0.056539395461776776, "grad_norm": 0.22961686551570892, "learning_rate": 0.0001998285703715547, "loss": 11.6891, "step": 2701 }, { "epoch": 0.05656032822573893, "grad_norm": 0.19937552511692047, "learning_rate": 0.0001998284420206176, "loss": 11.6998, "step": 2702 }, { "epoch": 0.05658126098970108, "grad_norm": 0.21158809959888458, "learning_rate": 0.00019982831362169102, "loss": 11.6719, "step": 2703 }, { "epoch": 0.05660219375366323, "grad_norm": 0.20439864695072174, "learning_rate": 0.00019982818517477489, "loss": 11.6844, "step": 2704 }, { "epoch": 0.05662312651762539, "grad_norm": 0.1811400204896927, "learning_rate": 0.0001998280566798694, "loss": 11.6905, "step": 2705 }, { "epoch": 0.05664405928158754, "grad_norm": 0.17060944437980652, "learning_rate": 0.00019982792813697455, "loss": 11.6823, "step": 2706 }, { "epoch": 0.05666499204554969, "grad_norm": 0.20497088134288788, "learning_rate": 0.0001998277995460904, "loss": 11.6823, "step": 2707 }, { "epoch": 0.056685924809511845, "grad_norm": 0.19333137571811676, "learning_rate": 0.00019982767090721707, "loss": 11.6846, "step": 2708 }, { "epoch": 0.056706857573474004, "grad_norm": 0.2691827416419983, "learning_rate": 0.00019982754222035452, "loss": 11.6929, "step": 2709 }, { "epoch": 0.056727790337436156, "grad_norm": 0.30538833141326904, "learning_rate": 0.00019982741348550289, "loss": 11.6791, "step": 2710 }, { "epoch": 0.05674872310139831, "grad_norm": 0.2404932826757431, "learning_rate": 0.00019982728470266224, "loss": 11.6943, "step": 2711 }, { "epoch": 0.05676965586536046, "grad_norm": 0.17752975225448608, "learning_rate": 0.00019982715587183258, "loss": 11.6818, "step": 2712 }, { "epoch": 0.05679058862932262, "grad_norm": 0.25968626141548157, "learning_rate": 0.00019982702699301402, "loss": 11.7034, "step": 2713 }, { "epoch": 0.05681152139328477, "grad_norm": 0.21470552682876587, "learning_rate": 0.0001998268980662066, "loss": 11.6962, "step": 2714 }, { "epoch": 0.05683245415724692, "grad_norm": 0.21312719583511353, "learning_rate": 0.00019982676909141044, "loss": 11.7006, "step": 2715 }, { "epoch": 0.05685338692120907, "grad_norm": 0.17065052688121796, "learning_rate": 0.00019982664006862552, "loss": 11.6942, "step": 2716 }, { "epoch": 0.05687431968517123, "grad_norm": 0.1740816980600357, "learning_rate": 0.00019982651099785192, "loss": 11.6833, "step": 2717 }, { "epoch": 0.056895252449133384, "grad_norm": 0.190944105386734, "learning_rate": 0.00019982638187908975, "loss": 11.694, "step": 2718 }, { "epoch": 0.056916185213095535, "grad_norm": 0.19802534580230713, "learning_rate": 0.000199826252712339, "loss": 11.6891, "step": 2719 }, { "epoch": 0.056937117977057694, "grad_norm": 0.21845443546772003, "learning_rate": 0.00019982612349759977, "loss": 11.7012, "step": 2720 }, { "epoch": 0.056958050741019846, "grad_norm": 0.22012916207313538, "learning_rate": 0.00019982599423487213, "loss": 11.6797, "step": 2721 }, { "epoch": 0.056978983504982, "grad_norm": 0.2154974788427353, "learning_rate": 0.00019982586492415616, "loss": 11.6896, "step": 2722 }, { "epoch": 0.05699991626894415, "grad_norm": 0.2320767045021057, "learning_rate": 0.00019982573556545188, "loss": 11.6947, "step": 2723 }, { "epoch": 0.05702084903290631, "grad_norm": 0.22532202303409576, "learning_rate": 0.00019982560615875935, "loss": 11.6815, "step": 2724 }, { "epoch": 0.05704178179686846, "grad_norm": 0.20972086489200592, "learning_rate": 0.00019982547670407868, "loss": 11.677, "step": 2725 }, { "epoch": 0.05706271456083061, "grad_norm": 0.22673554718494415, "learning_rate": 0.00019982534720140987, "loss": 11.7021, "step": 2726 }, { "epoch": 0.05708364732479276, "grad_norm": 0.20399878919124603, "learning_rate": 0.00019982521765075304, "loss": 11.6742, "step": 2727 }, { "epoch": 0.05710458008875492, "grad_norm": 0.23283901810646057, "learning_rate": 0.0001998250880521082, "loss": 11.6815, "step": 2728 }, { "epoch": 0.057125512852717074, "grad_norm": 0.19245803356170654, "learning_rate": 0.00019982495840547548, "loss": 11.7088, "step": 2729 }, { "epoch": 0.057146445616679226, "grad_norm": 0.19647915661334991, "learning_rate": 0.00019982482871085488, "loss": 11.6801, "step": 2730 }, { "epoch": 0.05716737838064138, "grad_norm": 0.2282344102859497, "learning_rate": 0.00019982469896824648, "loss": 11.68, "step": 2731 }, { "epoch": 0.057188311144603536, "grad_norm": 0.2050354927778244, "learning_rate": 0.00019982456917765035, "loss": 11.69, "step": 2732 }, { "epoch": 0.05720924390856569, "grad_norm": 0.1876426339149475, "learning_rate": 0.00019982443933906657, "loss": 11.6822, "step": 2733 }, { "epoch": 0.05723017667252784, "grad_norm": 0.22231604158878326, "learning_rate": 0.00019982430945249515, "loss": 11.6949, "step": 2734 }, { "epoch": 0.05725110943648999, "grad_norm": 0.2447212189435959, "learning_rate": 0.0001998241795179362, "loss": 11.6961, "step": 2735 }, { "epoch": 0.05727204220045215, "grad_norm": 0.19265064597129822, "learning_rate": 0.00019982404953538976, "loss": 11.6865, "step": 2736 }, { "epoch": 0.0572929749644143, "grad_norm": 0.22581149637699127, "learning_rate": 0.0001998239195048559, "loss": 11.7135, "step": 2737 }, { "epoch": 0.057313907728376454, "grad_norm": 0.21611471474170685, "learning_rate": 0.00019982378942633467, "loss": 11.6855, "step": 2738 }, { "epoch": 0.057334840492338605, "grad_norm": 0.23089835047721863, "learning_rate": 0.00019982365929982617, "loss": 11.6894, "step": 2739 }, { "epoch": 0.057355773256300764, "grad_norm": 0.18165910243988037, "learning_rate": 0.00019982352912533043, "loss": 11.6915, "step": 2740 }, { "epoch": 0.057376706020262916, "grad_norm": 0.2188018560409546, "learning_rate": 0.0001998233989028475, "loss": 11.7047, "step": 2741 }, { "epoch": 0.05739763878422507, "grad_norm": 0.22060230374336243, "learning_rate": 0.00019982326863237748, "loss": 11.6778, "step": 2742 }, { "epoch": 0.05741857154818722, "grad_norm": 0.21238799393177032, "learning_rate": 0.00019982313831392042, "loss": 11.6733, "step": 2743 }, { "epoch": 0.05743950431214938, "grad_norm": 0.203526571393013, "learning_rate": 0.00019982300794747638, "loss": 11.6917, "step": 2744 }, { "epoch": 0.05746043707611153, "grad_norm": 0.1882745623588562, "learning_rate": 0.0001998228775330454, "loss": 11.6884, "step": 2745 }, { "epoch": 0.05748136984007368, "grad_norm": 0.6672807931900024, "learning_rate": 0.00019982274707062753, "loss": 11.6671, "step": 2746 }, { "epoch": 0.05750230260403583, "grad_norm": 0.23615533113479614, "learning_rate": 0.0001998226165602229, "loss": 11.6871, "step": 2747 }, { "epoch": 0.05752323536799799, "grad_norm": 0.2080155909061432, "learning_rate": 0.00019982248600183155, "loss": 11.6878, "step": 2748 }, { "epoch": 0.057544168131960144, "grad_norm": 0.18526691198349, "learning_rate": 0.00019982235539545354, "loss": 11.68, "step": 2749 }, { "epoch": 0.057565100895922296, "grad_norm": 0.20839795470237732, "learning_rate": 0.00019982222474108888, "loss": 11.6963, "step": 2750 }, { "epoch": 0.057586033659884454, "grad_norm": 0.21881501376628876, "learning_rate": 0.00019982209403873773, "loss": 11.7052, "step": 2751 }, { "epoch": 0.057606966423846606, "grad_norm": 0.27846360206604004, "learning_rate": 0.00019982196328840007, "loss": 11.6805, "step": 2752 }, { "epoch": 0.05762789918780876, "grad_norm": 0.21335244178771973, "learning_rate": 0.000199821832490076, "loss": 11.6906, "step": 2753 }, { "epoch": 0.05764883195177091, "grad_norm": 0.31618332862854004, "learning_rate": 0.00019982170164376553, "loss": 11.7052, "step": 2754 }, { "epoch": 0.05766976471573307, "grad_norm": 0.18546602129936218, "learning_rate": 0.0001998215707494688, "loss": 11.6807, "step": 2755 }, { "epoch": 0.05769069747969522, "grad_norm": 0.1883249580860138, "learning_rate": 0.00019982143980718585, "loss": 11.6938, "step": 2756 }, { "epoch": 0.05771163024365737, "grad_norm": 0.22566376626491547, "learning_rate": 0.00019982130881691674, "loss": 11.6821, "step": 2757 }, { "epoch": 0.057732563007619524, "grad_norm": 0.19639627635478973, "learning_rate": 0.0001998211777786615, "loss": 11.6761, "step": 2758 }, { "epoch": 0.05775349577158168, "grad_norm": 0.18196439743041992, "learning_rate": 0.00019982104669242023, "loss": 11.6818, "step": 2759 }, { "epoch": 0.057774428535543834, "grad_norm": 0.2146778553724289, "learning_rate": 0.000199820915558193, "loss": 11.6732, "step": 2760 }, { "epoch": 0.057795361299505986, "grad_norm": 0.2665591835975647, "learning_rate": 0.00019982078437597985, "loss": 11.6894, "step": 2761 }, { "epoch": 0.05781629406346814, "grad_norm": 0.23707859218120575, "learning_rate": 0.00019982065314578084, "loss": 11.6932, "step": 2762 }, { "epoch": 0.057837226827430296, "grad_norm": 0.17600063979625702, "learning_rate": 0.000199820521867596, "loss": 11.6771, "step": 2763 }, { "epoch": 0.05785815959139245, "grad_norm": 0.2003391683101654, "learning_rate": 0.0001998203905414255, "loss": 11.67, "step": 2764 }, { "epoch": 0.0578790923553546, "grad_norm": 0.20037148892879486, "learning_rate": 0.00019982025916726932, "loss": 11.6955, "step": 2765 }, { "epoch": 0.05790002511931675, "grad_norm": 0.2081756591796875, "learning_rate": 0.00019982012774512754, "loss": 11.676, "step": 2766 }, { "epoch": 0.05792095788327891, "grad_norm": 0.1968836784362793, "learning_rate": 0.00019981999627500022, "loss": 11.6932, "step": 2767 }, { "epoch": 0.05794189064724106, "grad_norm": 0.2231103777885437, "learning_rate": 0.00019981986475688741, "loss": 11.6937, "step": 2768 }, { "epoch": 0.057962823411203214, "grad_norm": 0.22535473108291626, "learning_rate": 0.00019981973319078923, "loss": 11.7049, "step": 2769 }, { "epoch": 0.057983756175165366, "grad_norm": 0.2480146288871765, "learning_rate": 0.00019981960157670566, "loss": 11.684, "step": 2770 }, { "epoch": 0.058004688939127524, "grad_norm": 0.22597232460975647, "learning_rate": 0.00019981946991463683, "loss": 11.7114, "step": 2771 }, { "epoch": 0.058025621703089676, "grad_norm": 0.26872023940086365, "learning_rate": 0.00019981933820458275, "loss": 11.6949, "step": 2772 }, { "epoch": 0.05804655446705183, "grad_norm": 0.21683558821678162, "learning_rate": 0.00019981920644654357, "loss": 11.7052, "step": 2773 }, { "epoch": 0.05806748723101398, "grad_norm": 0.1968930959701538, "learning_rate": 0.00019981907464051925, "loss": 11.6811, "step": 2774 }, { "epoch": 0.05808841999497614, "grad_norm": 0.24113145470619202, "learning_rate": 0.00019981894278650996, "loss": 11.6866, "step": 2775 }, { "epoch": 0.05810935275893829, "grad_norm": 0.20057053864002228, "learning_rate": 0.00019981881088451564, "loss": 11.6936, "step": 2776 }, { "epoch": 0.05813028552290044, "grad_norm": 0.2339523732662201, "learning_rate": 0.00019981867893453643, "loss": 11.7016, "step": 2777 }, { "epoch": 0.0581512182868626, "grad_norm": 0.3123690187931061, "learning_rate": 0.0001998185469365724, "loss": 11.7126, "step": 2778 }, { "epoch": 0.05817215105082475, "grad_norm": 0.2099543660879135, "learning_rate": 0.00019981841489062358, "loss": 11.6963, "step": 2779 }, { "epoch": 0.058193083814786904, "grad_norm": 0.17349907755851746, "learning_rate": 0.00019981828279669005, "loss": 11.6877, "step": 2780 }, { "epoch": 0.058214016578749056, "grad_norm": 0.22242510318756104, "learning_rate": 0.00019981815065477188, "loss": 11.7003, "step": 2781 }, { "epoch": 0.058234949342711215, "grad_norm": 0.2684323489665985, "learning_rate": 0.0001998180184648691, "loss": 11.6807, "step": 2782 }, { "epoch": 0.058255882106673366, "grad_norm": 0.22463001310825348, "learning_rate": 0.00019981788622698183, "loss": 11.6986, "step": 2783 }, { "epoch": 0.05827681487063552, "grad_norm": 0.19378945231437683, "learning_rate": 0.0001998177539411101, "loss": 11.6812, "step": 2784 }, { "epoch": 0.05829774763459767, "grad_norm": 0.2645070254802704, "learning_rate": 0.00019981762160725395, "loss": 11.7049, "step": 2785 }, { "epoch": 0.05831868039855983, "grad_norm": 0.2122853398323059, "learning_rate": 0.0001998174892254135, "loss": 11.6931, "step": 2786 }, { "epoch": 0.05833961316252198, "grad_norm": 0.1981516033411026, "learning_rate": 0.00019981735679558876, "loss": 11.676, "step": 2787 }, { "epoch": 0.05836054592648413, "grad_norm": 0.27290990948677063, "learning_rate": 0.0001998172243177798, "loss": 11.6884, "step": 2788 }, { "epoch": 0.058381478690446284, "grad_norm": 0.1859254091978073, "learning_rate": 0.00019981709179198672, "loss": 11.6886, "step": 2789 }, { "epoch": 0.05840241145440844, "grad_norm": 0.2293388694524765, "learning_rate": 0.00019981695921820957, "loss": 11.6829, "step": 2790 }, { "epoch": 0.058423344218370594, "grad_norm": 0.21547068655490875, "learning_rate": 0.0001998168265964484, "loss": 11.68, "step": 2791 }, { "epoch": 0.058444276982332746, "grad_norm": 0.20587719976902008, "learning_rate": 0.0001998166939267033, "loss": 11.6891, "step": 2792 }, { "epoch": 0.0584652097462949, "grad_norm": 0.21845340728759766, "learning_rate": 0.00019981656120897427, "loss": 11.6983, "step": 2793 }, { "epoch": 0.05848614251025706, "grad_norm": 0.240823894739151, "learning_rate": 0.00019981642844326146, "loss": 11.6897, "step": 2794 }, { "epoch": 0.05850707527421921, "grad_norm": 0.24943752586841583, "learning_rate": 0.00019981629562956487, "loss": 11.6878, "step": 2795 }, { "epoch": 0.05852800803818136, "grad_norm": 0.21770668029785156, "learning_rate": 0.0001998161627678846, "loss": 11.686, "step": 2796 }, { "epoch": 0.05854894080214351, "grad_norm": 0.18754509091377258, "learning_rate": 0.0001998160298582207, "loss": 11.6868, "step": 2797 }, { "epoch": 0.05856987356610567, "grad_norm": 0.31918010115623474, "learning_rate": 0.00019981589690057323, "loss": 11.6958, "step": 2798 }, { "epoch": 0.05859080633006782, "grad_norm": 0.22833703458309174, "learning_rate": 0.00019981576389494228, "loss": 11.6984, "step": 2799 }, { "epoch": 0.058611739094029974, "grad_norm": 0.2043858766555786, "learning_rate": 0.00019981563084132785, "loss": 11.6745, "step": 2800 }, { "epoch": 0.058632671857992126, "grad_norm": 0.27641651034355164, "learning_rate": 0.00019981549773973004, "loss": 11.7043, "step": 2801 }, { "epoch": 0.058653604621954285, "grad_norm": 0.18273095786571503, "learning_rate": 0.00019981536459014895, "loss": 11.6787, "step": 2802 }, { "epoch": 0.058674537385916437, "grad_norm": 0.20952656865119934, "learning_rate": 0.00019981523139258464, "loss": 11.7128, "step": 2803 }, { "epoch": 0.05869547014987859, "grad_norm": 0.2017338126897812, "learning_rate": 0.00019981509814703713, "loss": 11.7045, "step": 2804 }, { "epoch": 0.05871640291384074, "grad_norm": 0.2679648995399475, "learning_rate": 0.00019981496485350645, "loss": 11.6876, "step": 2805 }, { "epoch": 0.0587373356778029, "grad_norm": 0.22752927243709564, "learning_rate": 0.00019981483151199276, "loss": 11.7033, "step": 2806 }, { "epoch": 0.05875826844176505, "grad_norm": 0.1877589076757431, "learning_rate": 0.0001998146981224961, "loss": 11.6838, "step": 2807 }, { "epoch": 0.0587792012057272, "grad_norm": 0.23144258558750153, "learning_rate": 0.00019981456468501647, "loss": 11.6966, "step": 2808 }, { "epoch": 0.05880013396968936, "grad_norm": 0.23788613080978394, "learning_rate": 0.00019981443119955402, "loss": 11.7025, "step": 2809 }, { "epoch": 0.05882106673365151, "grad_norm": 0.20398713648319244, "learning_rate": 0.00019981429766610873, "loss": 11.6794, "step": 2810 }, { "epoch": 0.058841999497613665, "grad_norm": 0.2137889266014099, "learning_rate": 0.00019981416408468072, "loss": 11.6814, "step": 2811 }, { "epoch": 0.058862932261575816, "grad_norm": 0.21044452488422394, "learning_rate": 0.00019981403045527007, "loss": 11.6781, "step": 2812 }, { "epoch": 0.058883865025537975, "grad_norm": 0.2302992343902588, "learning_rate": 0.00019981389677787678, "loss": 11.6697, "step": 2813 }, { "epoch": 0.05890479778950013, "grad_norm": 0.18844346702098846, "learning_rate": 0.00019981376305250098, "loss": 11.6982, "step": 2814 }, { "epoch": 0.05892573055346228, "grad_norm": 0.2165585458278656, "learning_rate": 0.00019981362927914268, "loss": 11.6799, "step": 2815 }, { "epoch": 0.05894666331742443, "grad_norm": 0.19900165498256683, "learning_rate": 0.00019981349545780198, "loss": 11.681, "step": 2816 }, { "epoch": 0.05896759608138659, "grad_norm": 0.21236638724803925, "learning_rate": 0.00019981336158847893, "loss": 11.689, "step": 2817 }, { "epoch": 0.05898852884534874, "grad_norm": 0.19388948380947113, "learning_rate": 0.00019981322767117357, "loss": 11.6868, "step": 2818 }, { "epoch": 0.05900946160931089, "grad_norm": 0.23067888617515564, "learning_rate": 0.00019981309370588603, "loss": 11.6819, "step": 2819 }, { "epoch": 0.059030394373273044, "grad_norm": 0.24995233118534088, "learning_rate": 0.00019981295969261634, "loss": 11.6831, "step": 2820 }, { "epoch": 0.0590513271372352, "grad_norm": 0.1949579119682312, "learning_rate": 0.00019981282563136452, "loss": 11.6893, "step": 2821 }, { "epoch": 0.059072259901197355, "grad_norm": 0.2816891074180603, "learning_rate": 0.00019981269152213069, "loss": 11.6929, "step": 2822 }, { "epoch": 0.05909319266515951, "grad_norm": 0.19915269315242767, "learning_rate": 0.0001998125573649149, "loss": 11.6879, "step": 2823 }, { "epoch": 0.05911412542912166, "grad_norm": 0.19666974246501923, "learning_rate": 0.00019981242315971723, "loss": 11.6987, "step": 2824 }, { "epoch": 0.05913505819308382, "grad_norm": 0.2099580317735672, "learning_rate": 0.00019981228890653772, "loss": 11.6872, "step": 2825 }, { "epoch": 0.05915599095704597, "grad_norm": 0.2602591812610626, "learning_rate": 0.00019981215460537644, "loss": 11.6842, "step": 2826 }, { "epoch": 0.05917692372100812, "grad_norm": 1.7909421920776367, "learning_rate": 0.00019981202025623346, "loss": 11.6791, "step": 2827 }, { "epoch": 0.05919785648497027, "grad_norm": 0.21142077445983887, "learning_rate": 0.00019981188585910885, "loss": 11.6794, "step": 2828 }, { "epoch": 0.05921878924893243, "grad_norm": 0.7121374607086182, "learning_rate": 0.00019981175141400262, "loss": 11.7075, "step": 2829 }, { "epoch": 0.05923972201289458, "grad_norm": 0.1907658576965332, "learning_rate": 0.00019981161692091492, "loss": 11.6737, "step": 2830 }, { "epoch": 0.059260654776856735, "grad_norm": 0.2294521927833557, "learning_rate": 0.00019981148237984577, "loss": 11.6684, "step": 2831 }, { "epoch": 0.059281587540818886, "grad_norm": 0.2512626349925995, "learning_rate": 0.00019981134779079526, "loss": 11.6989, "step": 2832 }, { "epoch": 0.059302520304781045, "grad_norm": 0.20264139771461487, "learning_rate": 0.0001998112131537634, "loss": 11.6871, "step": 2833 }, { "epoch": 0.0593234530687432, "grad_norm": 0.22388899326324463, "learning_rate": 0.0001998110784687503, "loss": 11.6761, "step": 2834 }, { "epoch": 0.05934438583270535, "grad_norm": 0.21415995061397552, "learning_rate": 0.00019981094373575605, "loss": 11.6931, "step": 2835 }, { "epoch": 0.05936531859666751, "grad_norm": 0.21574176847934723, "learning_rate": 0.00019981080895478063, "loss": 11.7001, "step": 2836 }, { "epoch": 0.05938625136062966, "grad_norm": 0.24043776094913483, "learning_rate": 0.00019981067412582415, "loss": 11.6989, "step": 2837 }, { "epoch": 0.05940718412459181, "grad_norm": 0.20599853992462158, "learning_rate": 0.0001998105392488867, "loss": 11.6743, "step": 2838 }, { "epoch": 0.05942811688855396, "grad_norm": 0.2296242117881775, "learning_rate": 0.00019981040432396835, "loss": 11.7098, "step": 2839 }, { "epoch": 0.05944904965251612, "grad_norm": 0.21903209388256073, "learning_rate": 0.0001998102693510691, "loss": 11.6954, "step": 2840 }, { "epoch": 0.05946998241647827, "grad_norm": 0.21892118453979492, "learning_rate": 0.00019981013433018906, "loss": 11.6918, "step": 2841 }, { "epoch": 0.059490915180440425, "grad_norm": 0.19473539292812347, "learning_rate": 0.00019980999926132832, "loss": 11.6954, "step": 2842 }, { "epoch": 0.05951184794440258, "grad_norm": 0.21841047704219818, "learning_rate": 0.00019980986414448685, "loss": 11.6955, "step": 2843 }, { "epoch": 0.059532780708364735, "grad_norm": 0.20432843267917633, "learning_rate": 0.00019980972897966484, "loss": 11.6834, "step": 2844 }, { "epoch": 0.05955371347232689, "grad_norm": 0.19004733860492706, "learning_rate": 0.00019980959376686227, "loss": 11.6878, "step": 2845 }, { "epoch": 0.05957464623628904, "grad_norm": 0.20156073570251465, "learning_rate": 0.00019980945850607922, "loss": 11.696, "step": 2846 }, { "epoch": 0.05959557900025119, "grad_norm": 0.2354532778263092, "learning_rate": 0.00019980932319731576, "loss": 11.691, "step": 2847 }, { "epoch": 0.05961651176421335, "grad_norm": 0.209737628698349, "learning_rate": 0.000199809187840572, "loss": 11.685, "step": 2848 }, { "epoch": 0.0596374445281755, "grad_norm": 0.21202737092971802, "learning_rate": 0.0001998090524358479, "loss": 11.6914, "step": 2849 }, { "epoch": 0.05965837729213765, "grad_norm": 0.17638923227787018, "learning_rate": 0.00019980891698314363, "loss": 11.7014, "step": 2850 }, { "epoch": 0.059679310056099805, "grad_norm": 0.18967078626155853, "learning_rate": 0.0001998087814824592, "loss": 11.6872, "step": 2851 }, { "epoch": 0.05970024282006196, "grad_norm": 0.2213836908340454, "learning_rate": 0.0001998086459337947, "loss": 11.6845, "step": 2852 }, { "epoch": 0.059721175584024115, "grad_norm": 0.1769258677959442, "learning_rate": 0.00019980851033715017, "loss": 11.6918, "step": 2853 }, { "epoch": 0.05974210834798627, "grad_norm": 0.22930431365966797, "learning_rate": 0.0001998083746925257, "loss": 11.6916, "step": 2854 }, { "epoch": 0.05976304111194842, "grad_norm": 0.20311294496059418, "learning_rate": 0.0001998082389999213, "loss": 11.6931, "step": 2855 }, { "epoch": 0.05978397387591058, "grad_norm": 0.1703055500984192, "learning_rate": 0.00019980810325933714, "loss": 11.6872, "step": 2856 }, { "epoch": 0.05980490663987273, "grad_norm": 0.24084998667240143, "learning_rate": 0.0001998079674707732, "loss": 11.6949, "step": 2857 }, { "epoch": 0.05982583940383488, "grad_norm": 0.21180465817451477, "learning_rate": 0.00019980783163422956, "loss": 11.6828, "step": 2858 }, { "epoch": 0.05984677216779703, "grad_norm": 0.20927296578884125, "learning_rate": 0.00019980769574970633, "loss": 11.6871, "step": 2859 }, { "epoch": 0.05986770493175919, "grad_norm": 0.24703525006771088, "learning_rate": 0.0001998075598172035, "loss": 11.6814, "step": 2860 }, { "epoch": 0.05988863769572134, "grad_norm": 0.24021613597869873, "learning_rate": 0.0001998074238367212, "loss": 11.7042, "step": 2861 }, { "epoch": 0.059909570459683495, "grad_norm": 0.1857343167066574, "learning_rate": 0.0001998072878082595, "loss": 11.6845, "step": 2862 }, { "epoch": 0.059930503223645654, "grad_norm": 0.20526733994483948, "learning_rate": 0.00019980715173181836, "loss": 11.6844, "step": 2863 }, { "epoch": 0.059951435987607805, "grad_norm": 0.26872631907463074, "learning_rate": 0.00019980701560739798, "loss": 11.6956, "step": 2864 }, { "epoch": 0.05997236875156996, "grad_norm": 0.18952171504497528, "learning_rate": 0.00019980687943499834, "loss": 11.6786, "step": 2865 }, { "epoch": 0.05999330151553211, "grad_norm": 0.3101654350757599, "learning_rate": 0.00019980674321461953, "loss": 11.7, "step": 2866 }, { "epoch": 0.06001423427949427, "grad_norm": 0.22949564456939697, "learning_rate": 0.00019980660694626163, "loss": 11.6888, "step": 2867 }, { "epoch": 0.06003516704345642, "grad_norm": 0.17610955238342285, "learning_rate": 0.0001998064706299247, "loss": 11.6845, "step": 2868 }, { "epoch": 0.06005609980741857, "grad_norm": 0.21883641183376312, "learning_rate": 0.0001998063342656088, "loss": 11.6851, "step": 2869 }, { "epoch": 0.06007703257138072, "grad_norm": 0.16784465312957764, "learning_rate": 0.000199806197853314, "loss": 11.6841, "step": 2870 }, { "epoch": 0.06009796533534288, "grad_norm": 0.2208878993988037, "learning_rate": 0.00019980606139304035, "loss": 11.6678, "step": 2871 }, { "epoch": 0.06011889809930503, "grad_norm": 0.24563132226467133, "learning_rate": 0.00019980592488478793, "loss": 11.6875, "step": 2872 }, { "epoch": 0.060139830863267185, "grad_norm": 0.3043941259384155, "learning_rate": 0.0001998057883285568, "loss": 11.6926, "step": 2873 }, { "epoch": 0.06016076362722934, "grad_norm": 0.23298631608486176, "learning_rate": 0.000199805651724347, "loss": 11.6746, "step": 2874 }, { "epoch": 0.060181696391191496, "grad_norm": 0.18698102235794067, "learning_rate": 0.00019980551507215865, "loss": 11.6831, "step": 2875 }, { "epoch": 0.06020262915515365, "grad_norm": 0.24750931560993195, "learning_rate": 0.0001998053783719918, "loss": 11.6902, "step": 2876 }, { "epoch": 0.0602235619191158, "grad_norm": 0.21805886924266815, "learning_rate": 0.00019980524162384648, "loss": 11.6956, "step": 2877 }, { "epoch": 0.06024449468307795, "grad_norm": 0.21429850161075592, "learning_rate": 0.00019980510482772278, "loss": 11.6852, "step": 2878 }, { "epoch": 0.06026542744704011, "grad_norm": 0.20901405811309814, "learning_rate": 0.0001998049679836208, "loss": 11.6648, "step": 2879 }, { "epoch": 0.06028636021100226, "grad_norm": 0.2089764028787613, "learning_rate": 0.00019980483109154054, "loss": 11.6898, "step": 2880 }, { "epoch": 0.06030729297496441, "grad_norm": 0.21110397577285767, "learning_rate": 0.00019980469415148213, "loss": 11.6884, "step": 2881 }, { "epoch": 0.060328225738926565, "grad_norm": 0.21771793067455292, "learning_rate": 0.00019980455716344558, "loss": 11.6802, "step": 2882 }, { "epoch": 0.060349158502888724, "grad_norm": 0.17885369062423706, "learning_rate": 0.00019980442012743096, "loss": 11.679, "step": 2883 }, { "epoch": 0.060370091266850875, "grad_norm": 0.1862775832414627, "learning_rate": 0.0001998042830434384, "loss": 11.684, "step": 2884 }, { "epoch": 0.06039102403081303, "grad_norm": 0.19412928819656372, "learning_rate": 0.00019980414591146787, "loss": 11.6733, "step": 2885 }, { "epoch": 0.06041195679477518, "grad_norm": 0.3366614282131195, "learning_rate": 0.0001998040087315195, "loss": 11.7027, "step": 2886 }, { "epoch": 0.06043288955873734, "grad_norm": 0.263977974653244, "learning_rate": 0.00019980387150359338, "loss": 11.6863, "step": 2887 }, { "epoch": 0.06045382232269949, "grad_norm": 0.23638996481895447, "learning_rate": 0.0001998037342276895, "loss": 11.695, "step": 2888 }, { "epoch": 0.06047475508666164, "grad_norm": 0.27743491530418396, "learning_rate": 0.00019980359690380798, "loss": 11.6914, "step": 2889 }, { "epoch": 0.06049568785062379, "grad_norm": 0.21373184025287628, "learning_rate": 0.0001998034595319489, "loss": 11.6834, "step": 2890 }, { "epoch": 0.06051662061458595, "grad_norm": 0.19484098255634308, "learning_rate": 0.00019980332211211226, "loss": 11.6833, "step": 2891 }, { "epoch": 0.060537553378548103, "grad_norm": 0.1999412477016449, "learning_rate": 0.00019980318464429817, "loss": 11.6746, "step": 2892 }, { "epoch": 0.060558486142510255, "grad_norm": 0.23480919003486633, "learning_rate": 0.0001998030471285067, "loss": 11.6836, "step": 2893 }, { "epoch": 0.060579418906472414, "grad_norm": 0.19441764056682587, "learning_rate": 0.00019980290956473788, "loss": 11.6883, "step": 2894 }, { "epoch": 0.060600351670434566, "grad_norm": 0.19107899069786072, "learning_rate": 0.00019980277195299183, "loss": 11.6909, "step": 2895 }, { "epoch": 0.06062128443439672, "grad_norm": 0.20983469486236572, "learning_rate": 0.00019980263429326855, "loss": 11.6943, "step": 2896 }, { "epoch": 0.06064221719835887, "grad_norm": 0.23459310829639435, "learning_rate": 0.00019980249658556817, "loss": 11.6753, "step": 2897 }, { "epoch": 0.06066314996232103, "grad_norm": 0.28810396790504456, "learning_rate": 0.00019980235882989072, "loss": 11.6736, "step": 2898 }, { "epoch": 0.06068408272628318, "grad_norm": 0.23257973790168762, "learning_rate": 0.0001998022210262363, "loss": 11.6983, "step": 2899 }, { "epoch": 0.06070501549024533, "grad_norm": 0.19930122792720795, "learning_rate": 0.00019980208317460494, "loss": 11.6839, "step": 2900 }, { "epoch": 0.06072594825420748, "grad_norm": 0.1867225170135498, "learning_rate": 0.00019980194527499673, "loss": 11.6802, "step": 2901 }, { "epoch": 0.06074688101816964, "grad_norm": 0.21903295814990997, "learning_rate": 0.00019980180732741168, "loss": 11.6858, "step": 2902 }, { "epoch": 0.060767813782131794, "grad_norm": 0.18905501067638397, "learning_rate": 0.00019980166933184995, "loss": 11.6753, "step": 2903 }, { "epoch": 0.060788746546093946, "grad_norm": 0.21802160143852234, "learning_rate": 0.00019980153128831152, "loss": 11.6675, "step": 2904 }, { "epoch": 0.0608096793100561, "grad_norm": 0.20501454174518585, "learning_rate": 0.0001998013931967965, "loss": 11.6845, "step": 2905 }, { "epoch": 0.060830612074018256, "grad_norm": 0.21319739520549774, "learning_rate": 0.00019980125505730497, "loss": 11.6898, "step": 2906 }, { "epoch": 0.06085154483798041, "grad_norm": 0.2347480058670044, "learning_rate": 0.00019980111686983696, "loss": 11.7082, "step": 2907 }, { "epoch": 0.06087247760194256, "grad_norm": 0.23517613112926483, "learning_rate": 0.00019980097863439258, "loss": 11.7047, "step": 2908 }, { "epoch": 0.06089341036590471, "grad_norm": 0.18869219720363617, "learning_rate": 0.00019980084035097184, "loss": 11.6885, "step": 2909 }, { "epoch": 0.06091434312986687, "grad_norm": 0.215090811252594, "learning_rate": 0.00019980070201957486, "loss": 11.6758, "step": 2910 }, { "epoch": 0.06093527589382902, "grad_norm": 0.17856132984161377, "learning_rate": 0.00019980056364020166, "loss": 11.6785, "step": 2911 }, { "epoch": 0.060956208657791174, "grad_norm": 0.1864752620458603, "learning_rate": 0.00019980042521285235, "loss": 11.6884, "step": 2912 }, { "epoch": 0.060977141421753325, "grad_norm": 0.16673240065574646, "learning_rate": 0.00019980028673752696, "loss": 11.6774, "step": 2913 }, { "epoch": 0.060998074185715484, "grad_norm": 0.1683795005083084, "learning_rate": 0.00019980014821422557, "loss": 11.6842, "step": 2914 }, { "epoch": 0.061019006949677636, "grad_norm": 0.174799382686615, "learning_rate": 0.00019980000964294826, "loss": 11.6932, "step": 2915 }, { "epoch": 0.06103993971363979, "grad_norm": 0.1711519956588745, "learning_rate": 0.00019979987102369505, "loss": 11.6788, "step": 2916 }, { "epoch": 0.06106087247760194, "grad_norm": 0.21572905778884888, "learning_rate": 0.0001997997323564661, "loss": 11.6928, "step": 2917 }, { "epoch": 0.0610818052415641, "grad_norm": 0.22072874009609222, "learning_rate": 0.00019979959364126138, "loss": 11.688, "step": 2918 }, { "epoch": 0.06110273800552625, "grad_norm": 0.20629070699214935, "learning_rate": 0.000199799454878081, "loss": 11.6792, "step": 2919 }, { "epoch": 0.0611236707694884, "grad_norm": 0.22080650925636292, "learning_rate": 0.00019979931606692504, "loss": 11.6824, "step": 2920 }, { "epoch": 0.06114460353345056, "grad_norm": 0.25012096762657166, "learning_rate": 0.00019979917720779354, "loss": 11.6947, "step": 2921 }, { "epoch": 0.06116553629741271, "grad_norm": 0.272830992937088, "learning_rate": 0.00019979903830068656, "loss": 11.6654, "step": 2922 }, { "epoch": 0.061186469061374864, "grad_norm": 0.1893187016248703, "learning_rate": 0.0001997988993456042, "loss": 11.703, "step": 2923 }, { "epoch": 0.061207401825337016, "grad_norm": 0.19319868087768555, "learning_rate": 0.0001997987603425465, "loss": 11.6802, "step": 2924 }, { "epoch": 0.061228334589299174, "grad_norm": 0.19862748682498932, "learning_rate": 0.00019979862129151354, "loss": 11.6808, "step": 2925 }, { "epoch": 0.061249267353261326, "grad_norm": 0.2285420298576355, "learning_rate": 0.00019979848219250538, "loss": 11.6927, "step": 2926 }, { "epoch": 0.06127020011722348, "grad_norm": 0.1965319961309433, "learning_rate": 0.00019979834304552208, "loss": 11.6853, "step": 2927 }, { "epoch": 0.06129113288118563, "grad_norm": 0.19443999230861664, "learning_rate": 0.00019979820385056374, "loss": 11.6718, "step": 2928 }, { "epoch": 0.06131206564514779, "grad_norm": 0.17421162128448486, "learning_rate": 0.00019979806460763042, "loss": 11.684, "step": 2929 }, { "epoch": 0.06133299840910994, "grad_norm": 0.1901569664478302, "learning_rate": 0.00019979792531672213, "loss": 11.702, "step": 2930 }, { "epoch": 0.06135393117307209, "grad_norm": 0.2841908633708954, "learning_rate": 0.00019979778597783898, "loss": 11.6866, "step": 2931 }, { "epoch": 0.061374863937034244, "grad_norm": 0.1761660873889923, "learning_rate": 0.00019979764659098105, "loss": 11.6942, "step": 2932 }, { "epoch": 0.0613957967009964, "grad_norm": 0.23254583775997162, "learning_rate": 0.0001997975071561484, "loss": 11.6821, "step": 2933 }, { "epoch": 0.061416729464958554, "grad_norm": 0.21115967631340027, "learning_rate": 0.00019979736767334108, "loss": 11.7018, "step": 2934 }, { "epoch": 0.061437662228920706, "grad_norm": 0.19763456284999847, "learning_rate": 0.0001997972281425591, "loss": 11.6688, "step": 2935 }, { "epoch": 0.06145859499288286, "grad_norm": 0.19838722050189972, "learning_rate": 0.0001997970885638027, "loss": 11.6826, "step": 2936 }, { "epoch": 0.061479527756845016, "grad_norm": 0.23136459290981293, "learning_rate": 0.00019979694893707177, "loss": 11.682, "step": 2937 }, { "epoch": 0.06150046052080717, "grad_norm": 0.26123225688934326, "learning_rate": 0.00019979680926236648, "loss": 11.6966, "step": 2938 }, { "epoch": 0.06152139328476932, "grad_norm": 0.2518416941165924, "learning_rate": 0.00019979666953968685, "loss": 11.6937, "step": 2939 }, { "epoch": 0.06154232604873147, "grad_norm": 0.2306191772222519, "learning_rate": 0.00019979652976903295, "loss": 11.6888, "step": 2940 }, { "epoch": 0.06156325881269363, "grad_norm": 0.22480876743793488, "learning_rate": 0.0001997963899504049, "loss": 11.7024, "step": 2941 }, { "epoch": 0.06158419157665578, "grad_norm": 0.21772930026054382, "learning_rate": 0.0001997962500838027, "loss": 11.6868, "step": 2942 }, { "epoch": 0.061605124340617934, "grad_norm": 0.2440931349992752, "learning_rate": 0.0001997961101692264, "loss": 11.6882, "step": 2943 }, { "epoch": 0.061626057104580086, "grad_norm": 0.23516477644443512, "learning_rate": 0.00019979597020667615, "loss": 11.6929, "step": 2944 }, { "epoch": 0.061646989868542244, "grad_norm": 0.1718173325061798, "learning_rate": 0.000199795830196152, "loss": 11.6858, "step": 2945 }, { "epoch": 0.061667922632504396, "grad_norm": 0.21232813596725464, "learning_rate": 0.00019979569013765395, "loss": 11.6947, "step": 2946 }, { "epoch": 0.06168885539646655, "grad_norm": 0.19315233826637268, "learning_rate": 0.00019979555003118215, "loss": 11.6996, "step": 2947 }, { "epoch": 0.0617097881604287, "grad_norm": 0.2189750075340271, "learning_rate": 0.0001997954098767366, "loss": 11.6803, "step": 2948 }, { "epoch": 0.06173072092439086, "grad_norm": 0.24638940393924713, "learning_rate": 0.00019979526967431742, "loss": 11.6899, "step": 2949 }, { "epoch": 0.06175165368835301, "grad_norm": 0.18571840226650238, "learning_rate": 0.00019979512942392466, "loss": 11.6877, "step": 2950 }, { "epoch": 0.06177258645231516, "grad_norm": 0.20082442462444305, "learning_rate": 0.00019979498912555833, "loss": 11.6881, "step": 2951 }, { "epoch": 0.06179351921627732, "grad_norm": 0.2652429938316345, "learning_rate": 0.00019979484877921859, "loss": 11.6842, "step": 2952 }, { "epoch": 0.06181445198023947, "grad_norm": 0.24132457375526428, "learning_rate": 0.00019979470838490547, "loss": 11.6823, "step": 2953 }, { "epoch": 0.061835384744201624, "grad_norm": 0.20634645223617554, "learning_rate": 0.000199794567942619, "loss": 11.6899, "step": 2954 }, { "epoch": 0.061856317508163776, "grad_norm": 0.22162137925624847, "learning_rate": 0.00019979442745235933, "loss": 11.6994, "step": 2955 }, { "epoch": 0.061877250272125935, "grad_norm": 0.24432410299777985, "learning_rate": 0.00019979428691412645, "loss": 11.6921, "step": 2956 }, { "epoch": 0.061898183036088086, "grad_norm": 0.22144871950149536, "learning_rate": 0.00019979414632792045, "loss": 11.7042, "step": 2957 }, { "epoch": 0.06191911580005024, "grad_norm": 0.23349620401859283, "learning_rate": 0.00019979400569374142, "loss": 11.6814, "step": 2958 }, { "epoch": 0.06194004856401239, "grad_norm": 0.24162331223487854, "learning_rate": 0.0001997938650115894, "loss": 11.6899, "step": 2959 }, { "epoch": 0.06196098132797455, "grad_norm": 0.20129427313804626, "learning_rate": 0.00019979372428146448, "loss": 11.6813, "step": 2960 }, { "epoch": 0.0619819140919367, "grad_norm": 0.29292356967926025, "learning_rate": 0.0001997935835033667, "loss": 11.706, "step": 2961 }, { "epoch": 0.06200284685589885, "grad_norm": 0.21028541028499603, "learning_rate": 0.00019979344267729614, "loss": 11.6864, "step": 2962 }, { "epoch": 0.062023779619861004, "grad_norm": 0.24321283400058746, "learning_rate": 0.0001997933018032529, "loss": 11.6985, "step": 2963 }, { "epoch": 0.06204471238382316, "grad_norm": 0.2364351749420166, "learning_rate": 0.00019979316088123702, "loss": 11.6666, "step": 2964 }, { "epoch": 0.062065645147785314, "grad_norm": 0.21061500906944275, "learning_rate": 0.00019979301991124854, "loss": 11.6921, "step": 2965 }, { "epoch": 0.062086577911747466, "grad_norm": 0.2234106957912445, "learning_rate": 0.00019979287889328756, "loss": 11.69, "step": 2966 }, { "epoch": 0.06210751067570962, "grad_norm": 0.18902626633644104, "learning_rate": 0.00019979273782735416, "loss": 11.6875, "step": 2967 }, { "epoch": 0.06212844343967178, "grad_norm": 0.2225654274225235, "learning_rate": 0.0001997925967134484, "loss": 11.6883, "step": 2968 }, { "epoch": 0.06214937620363393, "grad_norm": 0.2073356956243515, "learning_rate": 0.0001997924555515703, "loss": 11.6896, "step": 2969 }, { "epoch": 0.06217030896759608, "grad_norm": 0.19847290217876434, "learning_rate": 0.00019979231434172, "loss": 11.687, "step": 2970 }, { "epoch": 0.06219124173155823, "grad_norm": 0.23313863575458527, "learning_rate": 0.00019979217308389752, "loss": 11.7017, "step": 2971 }, { "epoch": 0.06221217449552039, "grad_norm": 0.21527224779129028, "learning_rate": 0.00019979203177810295, "loss": 11.6975, "step": 2972 }, { "epoch": 0.06223310725948254, "grad_norm": 0.21010461449623108, "learning_rate": 0.00019979189042433634, "loss": 11.6806, "step": 2973 }, { "epoch": 0.062254040023444694, "grad_norm": 0.2171105295419693, "learning_rate": 0.0001997917490225978, "loss": 11.6865, "step": 2974 }, { "epoch": 0.062274972787406846, "grad_norm": 0.20802639424800873, "learning_rate": 0.00019979160757288732, "loss": 11.698, "step": 2975 }, { "epoch": 0.062295905551369005, "grad_norm": 0.2005263715982437, "learning_rate": 0.00019979146607520504, "loss": 11.7041, "step": 2976 }, { "epoch": 0.062316838315331156, "grad_norm": 0.2748631536960602, "learning_rate": 0.00019979132452955102, "loss": 11.6828, "step": 2977 }, { "epoch": 0.06233777107929331, "grad_norm": 0.20265693962574005, "learning_rate": 0.00019979118293592528, "loss": 11.6845, "step": 2978 }, { "epoch": 0.06235870384325547, "grad_norm": 0.2611575722694397, "learning_rate": 0.00019979104129432793, "loss": 11.6974, "step": 2979 }, { "epoch": 0.06237963660721762, "grad_norm": 0.16995176672935486, "learning_rate": 0.000199790899604759, "loss": 11.6802, "step": 2980 }, { "epoch": 0.06240056937117977, "grad_norm": 0.27839651703834534, "learning_rate": 0.00019979075786721864, "loss": 11.6989, "step": 2981 }, { "epoch": 0.06242150213514192, "grad_norm": 0.20127254724502563, "learning_rate": 0.00019979061608170682, "loss": 11.6905, "step": 2982 }, { "epoch": 0.06244243489910408, "grad_norm": 0.2447262704372406, "learning_rate": 0.00019979047424822368, "loss": 11.6825, "step": 2983 }, { "epoch": 0.06246336766306623, "grad_norm": 0.19480422139167786, "learning_rate": 0.00019979033236676923, "loss": 11.6874, "step": 2984 }, { "epoch": 0.062484300427028384, "grad_norm": 0.21048925817012787, "learning_rate": 0.00019979019043734358, "loss": 11.6817, "step": 2985 }, { "epoch": 0.06250523319099054, "grad_norm": 0.19275346398353577, "learning_rate": 0.00019979004845994679, "loss": 11.677, "step": 2986 }, { "epoch": 0.0625261659549527, "grad_norm": 0.29428932070732117, "learning_rate": 0.00019978990643457892, "loss": 11.6832, "step": 2987 }, { "epoch": 0.06254709871891484, "grad_norm": 0.2938465476036072, "learning_rate": 0.00019978976436124004, "loss": 11.6738, "step": 2988 }, { "epoch": 0.062568031482877, "grad_norm": 0.2072836011648178, "learning_rate": 0.0001997896222399302, "loss": 11.6922, "step": 2989 }, { "epoch": 0.06258896424683916, "grad_norm": 0.1975475251674652, "learning_rate": 0.00019978948007064951, "loss": 11.6857, "step": 2990 }, { "epoch": 0.0626098970108013, "grad_norm": 0.17653390765190125, "learning_rate": 0.00019978933785339803, "loss": 11.6824, "step": 2991 }, { "epoch": 0.06263082977476346, "grad_norm": 0.20871980488300323, "learning_rate": 0.0001997891955881758, "loss": 11.6763, "step": 2992 }, { "epoch": 0.06265176253872562, "grad_norm": 0.3340071141719818, "learning_rate": 0.00019978905327498292, "loss": 11.6729, "step": 2993 }, { "epoch": 0.06267269530268776, "grad_norm": 0.19002746045589447, "learning_rate": 0.00019978891091381945, "loss": 11.6855, "step": 2994 }, { "epoch": 0.06269362806664992, "grad_norm": 0.20547157526016235, "learning_rate": 0.0001997887685046854, "loss": 11.6773, "step": 2995 }, { "epoch": 0.06271456083061207, "grad_norm": 0.22447077929973602, "learning_rate": 0.00019978862604758094, "loss": 11.6919, "step": 2996 }, { "epoch": 0.06273549359457423, "grad_norm": 0.21193726360797882, "learning_rate": 0.00019978848354250606, "loss": 11.7024, "step": 2997 }, { "epoch": 0.06275642635853639, "grad_norm": 0.2126999944448471, "learning_rate": 0.00019978834098946085, "loss": 11.6806, "step": 2998 }, { "epoch": 0.06277735912249853, "grad_norm": 0.1971103399991989, "learning_rate": 0.0001997881983884454, "loss": 11.6913, "step": 2999 }, { "epoch": 0.06279829188646069, "grad_norm": 0.2518889307975769, "learning_rate": 0.00019978805573945977, "loss": 11.7033, "step": 3000 }, { "epoch": 0.06279829188646069, "eval_loss": 11.687881469726562, "eval_runtime": 34.3411, "eval_samples_per_second": 27.984, "eval_steps_per_second": 7.018, "step": 3000 }, { "epoch": 0.06281922465042285, "grad_norm": 0.2032162994146347, "learning_rate": 0.000199787913042504, "loss": 11.6972, "step": 3001 }, { "epoch": 0.06284015741438499, "grad_norm": 0.18998132646083832, "learning_rate": 0.0001997877702975782, "loss": 11.6724, "step": 3002 }, { "epoch": 0.06286109017834715, "grad_norm": 0.23764245212078094, "learning_rate": 0.00019978762750468243, "loss": 11.6996, "step": 3003 }, { "epoch": 0.0628820229423093, "grad_norm": 0.19004900753498077, "learning_rate": 0.00019978748466381672, "loss": 11.6818, "step": 3004 }, { "epoch": 0.06290295570627145, "grad_norm": 0.2428196668624878, "learning_rate": 0.00019978734177498116, "loss": 11.7095, "step": 3005 }, { "epoch": 0.06292388847023361, "grad_norm": 0.2502398192882538, "learning_rate": 0.00019978719883817587, "loss": 11.6731, "step": 3006 }, { "epoch": 0.06294482123419576, "grad_norm": 0.20550455152988434, "learning_rate": 0.00019978705585340083, "loss": 11.6841, "step": 3007 }, { "epoch": 0.06296575399815792, "grad_norm": 0.24312478303909302, "learning_rate": 0.00019978691282065617, "loss": 11.6897, "step": 3008 }, { "epoch": 0.06298668676212008, "grad_norm": 0.21436098217964172, "learning_rate": 0.00019978676973994195, "loss": 11.687, "step": 3009 }, { "epoch": 0.06300761952608222, "grad_norm": 0.20999203622341156, "learning_rate": 0.0001997866266112582, "loss": 11.6803, "step": 3010 }, { "epoch": 0.06302855229004438, "grad_norm": 0.1881292313337326, "learning_rate": 0.00019978648343460504, "loss": 11.6988, "step": 3011 }, { "epoch": 0.06304948505400654, "grad_norm": 0.1934300661087036, "learning_rate": 0.00019978634020998253, "loss": 11.6852, "step": 3012 }, { "epoch": 0.06307041781796868, "grad_norm": 0.22149242460727692, "learning_rate": 0.00019978619693739069, "loss": 11.6926, "step": 3013 }, { "epoch": 0.06309135058193084, "grad_norm": 0.20417219400405884, "learning_rate": 0.00019978605361682967, "loss": 11.6895, "step": 3014 }, { "epoch": 0.06311228334589299, "grad_norm": 0.2119230180978775, "learning_rate": 0.00019978591024829946, "loss": 11.6769, "step": 3015 }, { "epoch": 0.06313321610985514, "grad_norm": 0.15733198821544647, "learning_rate": 0.00019978576683180017, "loss": 11.6858, "step": 3016 }, { "epoch": 0.0631541488738173, "grad_norm": 0.2474655658006668, "learning_rate": 0.00019978562336733186, "loss": 11.6795, "step": 3017 }, { "epoch": 0.06317508163777945, "grad_norm": 0.18258075416088104, "learning_rate": 0.00019978547985489463, "loss": 11.6834, "step": 3018 }, { "epoch": 0.06319601440174161, "grad_norm": 0.27385279536247253, "learning_rate": 0.0001997853362944885, "loss": 11.6883, "step": 3019 }, { "epoch": 0.06321694716570377, "grad_norm": 0.20396855473518372, "learning_rate": 0.00019978519268611355, "loss": 11.6925, "step": 3020 }, { "epoch": 0.06323787992966591, "grad_norm": 0.21864421665668488, "learning_rate": 0.00019978504902976987, "loss": 11.6812, "step": 3021 }, { "epoch": 0.06325881269362807, "grad_norm": 0.2444555163383484, "learning_rate": 0.0001997849053254575, "loss": 11.6777, "step": 3022 }, { "epoch": 0.06327974545759021, "grad_norm": 0.197214275598526, "learning_rate": 0.00019978476157317653, "loss": 11.6792, "step": 3023 }, { "epoch": 0.06330067822155237, "grad_norm": 0.2016814798116684, "learning_rate": 0.00019978461777292705, "loss": 11.6879, "step": 3024 }, { "epoch": 0.06332161098551453, "grad_norm": 0.1915791630744934, "learning_rate": 0.0001997844739247091, "loss": 11.6867, "step": 3025 }, { "epoch": 0.06334254374947668, "grad_norm": 0.2828287184238434, "learning_rate": 0.00019978433002852274, "loss": 11.7012, "step": 3026 }, { "epoch": 0.06336347651343884, "grad_norm": 0.19436374306678772, "learning_rate": 0.00019978418608436807, "loss": 11.6793, "step": 3027 }, { "epoch": 0.063384409277401, "grad_norm": 0.23244968056678772, "learning_rate": 0.0001997840420922451, "loss": 11.6898, "step": 3028 }, { "epoch": 0.06340534204136314, "grad_norm": 0.2183145135641098, "learning_rate": 0.00019978389805215397, "loss": 11.677, "step": 3029 }, { "epoch": 0.0634262748053253, "grad_norm": 0.20782862603664398, "learning_rate": 0.00019978375396409472, "loss": 11.6886, "step": 3030 }, { "epoch": 0.06344720756928744, "grad_norm": 0.18681448698043823, "learning_rate": 0.00019978360982806742, "loss": 11.6923, "step": 3031 }, { "epoch": 0.0634681403332496, "grad_norm": 0.23974168300628662, "learning_rate": 0.00019978346564407216, "loss": 11.6789, "step": 3032 }, { "epoch": 0.06348907309721176, "grad_norm": 0.17572659254074097, "learning_rate": 0.00019978332141210893, "loss": 11.6774, "step": 3033 }, { "epoch": 0.0635100058611739, "grad_norm": 0.2687656581401825, "learning_rate": 0.0001997831771321779, "loss": 11.6811, "step": 3034 }, { "epoch": 0.06353093862513606, "grad_norm": 1.3304896354675293, "learning_rate": 0.00019978303280427913, "loss": 11.7631, "step": 3035 }, { "epoch": 0.06355187138909822, "grad_norm": 0.23953188955783844, "learning_rate": 0.0001997828884284126, "loss": 11.6808, "step": 3036 }, { "epoch": 0.06357280415306037, "grad_norm": 0.2532895505428314, "learning_rate": 0.00019978274400457842, "loss": 11.6928, "step": 3037 }, { "epoch": 0.06359373691702253, "grad_norm": 0.20033705234527588, "learning_rate": 0.00019978259953277674, "loss": 11.6906, "step": 3038 }, { "epoch": 0.06361466968098468, "grad_norm": 0.15914490818977356, "learning_rate": 0.00019978245501300754, "loss": 11.6778, "step": 3039 }, { "epoch": 0.06363560244494683, "grad_norm": 0.23324604332447052, "learning_rate": 0.0001997823104452709, "loss": 11.6961, "step": 3040 }, { "epoch": 0.06365653520890899, "grad_norm": 0.22780606150627136, "learning_rate": 0.00019978216582956692, "loss": 11.6914, "step": 3041 }, { "epoch": 0.06367746797287113, "grad_norm": 0.21773123741149902, "learning_rate": 0.00019978202116589563, "loss": 11.6912, "step": 3042 }, { "epoch": 0.06369840073683329, "grad_norm": 0.2320575714111328, "learning_rate": 0.00019978187645425716, "loss": 11.6829, "step": 3043 }, { "epoch": 0.06371933350079545, "grad_norm": 0.26170778274536133, "learning_rate": 0.0001997817316946515, "loss": 11.6987, "step": 3044 }, { "epoch": 0.0637402662647576, "grad_norm": 0.2448022961616516, "learning_rate": 0.0001997815868870788, "loss": 11.6984, "step": 3045 }, { "epoch": 0.06376119902871975, "grad_norm": 0.19884319603443146, "learning_rate": 0.00019978144203153906, "loss": 11.6813, "step": 3046 }, { "epoch": 0.06378213179268191, "grad_norm": 0.1674896478652954, "learning_rate": 0.0001997812971280324, "loss": 11.6705, "step": 3047 }, { "epoch": 0.06380306455664406, "grad_norm": 0.2048177719116211, "learning_rate": 0.00019978115217655887, "loss": 11.6849, "step": 3048 }, { "epoch": 0.06382399732060622, "grad_norm": 0.2531914710998535, "learning_rate": 0.00019978100717711855, "loss": 11.682, "step": 3049 }, { "epoch": 0.06384493008456836, "grad_norm": 0.21815969049930573, "learning_rate": 0.00019978086212971147, "loss": 11.6968, "step": 3050 }, { "epoch": 0.06386586284853052, "grad_norm": 0.2189498394727707, "learning_rate": 0.00019978071703433774, "loss": 11.7044, "step": 3051 }, { "epoch": 0.06388679561249268, "grad_norm": 0.21687622368335724, "learning_rate": 0.00019978057189099744, "loss": 11.6926, "step": 3052 }, { "epoch": 0.06390772837645482, "grad_norm": 0.20837561786174774, "learning_rate": 0.0001997804266996906, "loss": 11.6924, "step": 3053 }, { "epoch": 0.06392866114041698, "grad_norm": 0.17201676964759827, "learning_rate": 0.00019978028146041733, "loss": 11.692, "step": 3054 }, { "epoch": 0.06394959390437914, "grad_norm": 0.20099769532680511, "learning_rate": 0.00019978013617317765, "loss": 11.6887, "step": 3055 }, { "epoch": 0.06397052666834128, "grad_norm": 0.2307031899690628, "learning_rate": 0.00019977999083797165, "loss": 11.6863, "step": 3056 }, { "epoch": 0.06399145943230344, "grad_norm": 0.20922251045703888, "learning_rate": 0.00019977984545479943, "loss": 11.6846, "step": 3057 }, { "epoch": 0.06401239219626559, "grad_norm": 0.20597760379314423, "learning_rate": 0.00019977970002366105, "loss": 11.6869, "step": 3058 }, { "epoch": 0.06403332496022775, "grad_norm": 0.2829843759536743, "learning_rate": 0.00019977955454455657, "loss": 11.7008, "step": 3059 }, { "epoch": 0.0640542577241899, "grad_norm": 0.20590022206306458, "learning_rate": 0.00019977940901748603, "loss": 11.6764, "step": 3060 }, { "epoch": 0.06407519048815205, "grad_norm": 0.7250024676322937, "learning_rate": 0.00019977926344244958, "loss": 11.6586, "step": 3061 }, { "epoch": 0.06409612325211421, "grad_norm": 0.2224176973104477, "learning_rate": 0.00019977911781944718, "loss": 11.6911, "step": 3062 }, { "epoch": 0.06411705601607637, "grad_norm": 0.20193594694137573, "learning_rate": 0.00019977897214847897, "loss": 11.6762, "step": 3063 }, { "epoch": 0.06413798878003851, "grad_norm": 0.1873568296432495, "learning_rate": 0.00019977882642954503, "loss": 11.6718, "step": 3064 }, { "epoch": 0.06415892154400067, "grad_norm": 0.210199236869812, "learning_rate": 0.00019977868066264543, "loss": 11.702, "step": 3065 }, { "epoch": 0.06417985430796283, "grad_norm": 0.2106093168258667, "learning_rate": 0.00019977853484778017, "loss": 11.6746, "step": 3066 }, { "epoch": 0.06420078707192498, "grad_norm": 0.17503783106803894, "learning_rate": 0.00019977838898494938, "loss": 11.6871, "step": 3067 }, { "epoch": 0.06422171983588713, "grad_norm": 0.23316265642642975, "learning_rate": 0.00019977824307415313, "loss": 11.6831, "step": 3068 }, { "epoch": 0.06424265259984928, "grad_norm": 0.18241968750953674, "learning_rate": 0.00019977809711539148, "loss": 11.6886, "step": 3069 }, { "epoch": 0.06426358536381144, "grad_norm": 0.22676816582679749, "learning_rate": 0.00019977795110866448, "loss": 11.6769, "step": 3070 }, { "epoch": 0.0642845181277736, "grad_norm": 0.24231667816638947, "learning_rate": 0.00019977780505397227, "loss": 11.7047, "step": 3071 }, { "epoch": 0.06430545089173574, "grad_norm": 0.25787484645843506, "learning_rate": 0.00019977765895131481, "loss": 11.6784, "step": 3072 }, { "epoch": 0.0643263836556979, "grad_norm": 0.17566457390785217, "learning_rate": 0.00019977751280069223, "loss": 11.6882, "step": 3073 }, { "epoch": 0.06434731641966006, "grad_norm": 0.1962793618440628, "learning_rate": 0.00019977736660210465, "loss": 11.6902, "step": 3074 }, { "epoch": 0.0643682491836222, "grad_norm": 0.24729686975479126, "learning_rate": 0.00019977722035555205, "loss": 11.6809, "step": 3075 }, { "epoch": 0.06438918194758436, "grad_norm": 0.23104164004325867, "learning_rate": 0.00019977707406103456, "loss": 11.6881, "step": 3076 }, { "epoch": 0.0644101147115465, "grad_norm": 0.18755224347114563, "learning_rate": 0.00019977692771855221, "loss": 11.6822, "step": 3077 }, { "epoch": 0.06443104747550867, "grad_norm": 0.2193361073732376, "learning_rate": 0.0001997767813281051, "loss": 11.6765, "step": 3078 }, { "epoch": 0.06445198023947082, "grad_norm": 0.22195008397102356, "learning_rate": 0.00019977663488969331, "loss": 11.7079, "step": 3079 }, { "epoch": 0.06447291300343297, "grad_norm": 0.22386465966701508, "learning_rate": 0.00019977648840331687, "loss": 11.687, "step": 3080 }, { "epoch": 0.06449384576739513, "grad_norm": 0.20792008936405182, "learning_rate": 0.0001997763418689759, "loss": 11.7016, "step": 3081 }, { "epoch": 0.06451477853135729, "grad_norm": 0.2240791916847229, "learning_rate": 0.0001997761952866704, "loss": 11.6745, "step": 3082 }, { "epoch": 0.06453571129531943, "grad_norm": 0.2359321266412735, "learning_rate": 0.00019977604865640053, "loss": 11.6994, "step": 3083 }, { "epoch": 0.06455664405928159, "grad_norm": 0.20587831735610962, "learning_rate": 0.0001997759019781663, "loss": 11.6886, "step": 3084 }, { "epoch": 0.06457757682324373, "grad_norm": 0.18761420249938965, "learning_rate": 0.00019977575525196775, "loss": 11.6817, "step": 3085 }, { "epoch": 0.0645985095872059, "grad_norm": 0.23059071600437164, "learning_rate": 0.00019977560847780508, "loss": 11.684, "step": 3086 }, { "epoch": 0.06461944235116805, "grad_norm": 0.1906694769859314, "learning_rate": 0.00019977546165567822, "loss": 11.6854, "step": 3087 }, { "epoch": 0.0646403751151302, "grad_norm": 0.20168548822402954, "learning_rate": 0.0001997753147855873, "loss": 11.6924, "step": 3088 }, { "epoch": 0.06466130787909236, "grad_norm": 0.23635923862457275, "learning_rate": 0.0001997751678675324, "loss": 11.6813, "step": 3089 }, { "epoch": 0.06468224064305451, "grad_norm": 0.23828598856925964, "learning_rate": 0.00019977502090151356, "loss": 11.7029, "step": 3090 }, { "epoch": 0.06470317340701666, "grad_norm": 0.18989573419094086, "learning_rate": 0.00019977487388753086, "loss": 11.6754, "step": 3091 }, { "epoch": 0.06472410617097882, "grad_norm": 0.22373902797698975, "learning_rate": 0.0001997747268255844, "loss": 11.7041, "step": 3092 }, { "epoch": 0.06474503893494096, "grad_norm": 0.33935970067977905, "learning_rate": 0.0001997745797156742, "loss": 11.6897, "step": 3093 }, { "epoch": 0.06476597169890312, "grad_norm": 0.21484866738319397, "learning_rate": 0.00019977443255780043, "loss": 11.6923, "step": 3094 }, { "epoch": 0.06478690446286528, "grad_norm": 0.2607479393482208, "learning_rate": 0.00019977428535196303, "loss": 11.6817, "step": 3095 }, { "epoch": 0.06480783722682742, "grad_norm": 0.1959705948829651, "learning_rate": 0.00019977413809816214, "loss": 11.6793, "step": 3096 }, { "epoch": 0.06482876999078958, "grad_norm": 0.20073185861110687, "learning_rate": 0.0001997739907963978, "loss": 11.689, "step": 3097 }, { "epoch": 0.06484970275475174, "grad_norm": 0.20829951763153076, "learning_rate": 0.00019977384344667013, "loss": 11.6798, "step": 3098 }, { "epoch": 0.06487063551871389, "grad_norm": 0.21265028417110443, "learning_rate": 0.0001997736960489792, "loss": 11.6829, "step": 3099 }, { "epoch": 0.06489156828267605, "grad_norm": 0.1821887344121933, "learning_rate": 0.00019977354860332503, "loss": 11.6871, "step": 3100 }, { "epoch": 0.0649125010466382, "grad_norm": 0.21544435620307922, "learning_rate": 0.0001997734011097077, "loss": 11.6811, "step": 3101 }, { "epoch": 0.06493343381060035, "grad_norm": 0.24210675060749054, "learning_rate": 0.00019977325356812734, "loss": 11.6989, "step": 3102 }, { "epoch": 0.06495436657456251, "grad_norm": 0.2251519113779068, "learning_rate": 0.00019977310597858394, "loss": 11.691, "step": 3103 }, { "epoch": 0.06497529933852465, "grad_norm": 0.22325322031974792, "learning_rate": 0.0001997729583410776, "loss": 11.6769, "step": 3104 }, { "epoch": 0.06499623210248681, "grad_norm": 0.2276744395494461, "learning_rate": 0.00019977281065560842, "loss": 11.684, "step": 3105 }, { "epoch": 0.06501716486644897, "grad_norm": 0.24068975448608398, "learning_rate": 0.00019977266292217646, "loss": 11.7028, "step": 3106 }, { "epoch": 0.06503809763041112, "grad_norm": 0.20044110715389252, "learning_rate": 0.00019977251514078175, "loss": 11.6935, "step": 3107 }, { "epoch": 0.06505903039437327, "grad_norm": 0.24334928393363953, "learning_rate": 0.0001997723673114244, "loss": 11.6656, "step": 3108 }, { "epoch": 0.06507996315833543, "grad_norm": 0.20206989347934723, "learning_rate": 0.0001997722194341045, "loss": 11.6913, "step": 3109 }, { "epoch": 0.06510089592229758, "grad_norm": 0.2566852271556854, "learning_rate": 0.00019977207150882208, "loss": 11.6923, "step": 3110 }, { "epoch": 0.06512182868625974, "grad_norm": 0.21299836039543152, "learning_rate": 0.00019977192353557723, "loss": 11.6818, "step": 3111 }, { "epoch": 0.06514276145022188, "grad_norm": 0.17835864424705505, "learning_rate": 0.00019977177551437004, "loss": 11.6876, "step": 3112 }, { "epoch": 0.06516369421418404, "grad_norm": 0.22879236936569214, "learning_rate": 0.00019977162744520051, "loss": 11.6896, "step": 3113 }, { "epoch": 0.0651846269781462, "grad_norm": 0.17208005487918854, "learning_rate": 0.00019977147932806875, "loss": 11.6823, "step": 3114 }, { "epoch": 0.06520555974210834, "grad_norm": 0.1991894245147705, "learning_rate": 0.00019977133116297487, "loss": 11.6872, "step": 3115 }, { "epoch": 0.0652264925060705, "grad_norm": 0.19871573150157928, "learning_rate": 0.00019977118294991892, "loss": 11.6911, "step": 3116 }, { "epoch": 0.06524742527003266, "grad_norm": 0.22375154495239258, "learning_rate": 0.00019977103468890096, "loss": 11.69, "step": 3117 }, { "epoch": 0.0652683580339948, "grad_norm": 0.18524935841560364, "learning_rate": 0.0001997708863799211, "loss": 11.692, "step": 3118 }, { "epoch": 0.06528929079795696, "grad_norm": 0.23941227793693542, "learning_rate": 0.0001997707380229793, "loss": 11.6751, "step": 3119 }, { "epoch": 0.06531022356191911, "grad_norm": 0.20361310243606567, "learning_rate": 0.00019977058961807577, "loss": 11.6961, "step": 3120 }, { "epoch": 0.06533115632588127, "grad_norm": 0.18585944175720215, "learning_rate": 0.00019977044116521048, "loss": 11.6902, "step": 3121 }, { "epoch": 0.06535208908984343, "grad_norm": 0.19877628982067108, "learning_rate": 0.00019977029266438355, "loss": 11.6793, "step": 3122 }, { "epoch": 0.06537302185380557, "grad_norm": 0.1862904578447342, "learning_rate": 0.00019977014411559502, "loss": 11.687, "step": 3123 }, { "epoch": 0.06539395461776773, "grad_norm": 0.1968560516834259, "learning_rate": 0.00019976999551884502, "loss": 11.6896, "step": 3124 }, { "epoch": 0.06541488738172989, "grad_norm": 0.20550040900707245, "learning_rate": 0.00019976984687413357, "loss": 11.6892, "step": 3125 }, { "epoch": 0.06543582014569203, "grad_norm": 0.20080125331878662, "learning_rate": 0.00019976969818146073, "loss": 11.6953, "step": 3126 }, { "epoch": 0.06545675290965419, "grad_norm": 0.19424456357955933, "learning_rate": 0.0001997695494408266, "loss": 11.6781, "step": 3127 }, { "epoch": 0.06547768567361635, "grad_norm": 0.19367031753063202, "learning_rate": 0.00019976940065223129, "loss": 11.6779, "step": 3128 }, { "epoch": 0.0654986184375785, "grad_norm": 0.25443896651268005, "learning_rate": 0.00019976925181567482, "loss": 11.6772, "step": 3129 }, { "epoch": 0.06551955120154065, "grad_norm": 0.18316291272640228, "learning_rate": 0.00019976910293115725, "loss": 11.6912, "step": 3130 }, { "epoch": 0.0655404839655028, "grad_norm": 0.2526509165763855, "learning_rate": 0.00019976895399867868, "loss": 11.6875, "step": 3131 }, { "epoch": 0.06556141672946496, "grad_norm": 0.2049289345741272, "learning_rate": 0.00019976880501823918, "loss": 11.679, "step": 3132 }, { "epoch": 0.06558234949342712, "grad_norm": 0.20842866599559784, "learning_rate": 0.0001997686559898388, "loss": 11.674, "step": 3133 }, { "epoch": 0.06560328225738926, "grad_norm": 0.21267196536064148, "learning_rate": 0.00019976850691347763, "loss": 11.7012, "step": 3134 }, { "epoch": 0.06562421502135142, "grad_norm": 0.2260337918996811, "learning_rate": 0.00019976835778915574, "loss": 11.6791, "step": 3135 }, { "epoch": 0.06564514778531358, "grad_norm": 0.26534050703048706, "learning_rate": 0.00019976820861687322, "loss": 11.6824, "step": 3136 }, { "epoch": 0.06566608054927572, "grad_norm": 0.2867373526096344, "learning_rate": 0.0001997680593966301, "loss": 11.7033, "step": 3137 }, { "epoch": 0.06568701331323788, "grad_norm": 0.22056205570697784, "learning_rate": 0.00019976791012842648, "loss": 11.6898, "step": 3138 }, { "epoch": 0.06570794607720003, "grad_norm": 0.2526393234729767, "learning_rate": 0.00019976776081226245, "loss": 11.6916, "step": 3139 }, { "epoch": 0.06572887884116219, "grad_norm": 0.22641456127166748, "learning_rate": 0.00019976761144813803, "loss": 11.7016, "step": 3140 }, { "epoch": 0.06574981160512434, "grad_norm": 0.18165040016174316, "learning_rate": 0.00019976746203605332, "loss": 11.69, "step": 3141 }, { "epoch": 0.06577074436908649, "grad_norm": 0.23566600680351257, "learning_rate": 0.0001997673125760084, "loss": 11.7008, "step": 3142 }, { "epoch": 0.06579167713304865, "grad_norm": 0.25042274594306946, "learning_rate": 0.00019976716306800336, "loss": 11.7099, "step": 3143 }, { "epoch": 0.06581260989701081, "grad_norm": 0.18526610732078552, "learning_rate": 0.00019976701351203822, "loss": 11.6848, "step": 3144 }, { "epoch": 0.06583354266097295, "grad_norm": 0.19686353206634521, "learning_rate": 0.00019976686390811308, "loss": 11.6803, "step": 3145 }, { "epoch": 0.06585447542493511, "grad_norm": 0.2367008775472641, "learning_rate": 0.00019976671425622799, "loss": 11.6892, "step": 3146 }, { "epoch": 0.06587540818889726, "grad_norm": 0.19027996063232422, "learning_rate": 0.00019976656455638305, "loss": 11.6918, "step": 3147 }, { "epoch": 0.06589634095285941, "grad_norm": 0.18002358078956604, "learning_rate": 0.00019976641480857834, "loss": 11.6889, "step": 3148 }, { "epoch": 0.06591727371682157, "grad_norm": 0.2630826234817505, "learning_rate": 0.0001997662650128139, "loss": 11.6961, "step": 3149 }, { "epoch": 0.06593820648078372, "grad_norm": 0.18451076745986938, "learning_rate": 0.00019976611516908983, "loss": 11.6784, "step": 3150 }, { "epoch": 0.06595913924474588, "grad_norm": 0.23191629350185394, "learning_rate": 0.00019976596527740618, "loss": 11.6919, "step": 3151 }, { "epoch": 0.06598007200870804, "grad_norm": 0.18758119642734528, "learning_rate": 0.00019976581533776304, "loss": 11.6716, "step": 3152 }, { "epoch": 0.06600100477267018, "grad_norm": 0.23378397524356842, "learning_rate": 0.00019976566535016046, "loss": 11.6896, "step": 3153 }, { "epoch": 0.06602193753663234, "grad_norm": 0.19071874022483826, "learning_rate": 0.00019976551531459852, "loss": 11.6993, "step": 3154 }, { "epoch": 0.0660428703005945, "grad_norm": 0.25740036368370056, "learning_rate": 0.0001997653652310773, "loss": 11.693, "step": 3155 }, { "epoch": 0.06606380306455664, "grad_norm": 0.2894989550113678, "learning_rate": 0.0001997652150995969, "loss": 11.7117, "step": 3156 }, { "epoch": 0.0660847358285188, "grad_norm": 0.1969822645187378, "learning_rate": 0.00019976506492015735, "loss": 11.7036, "step": 3157 }, { "epoch": 0.06610566859248095, "grad_norm": 0.21097125113010406, "learning_rate": 0.00019976491469275872, "loss": 11.6808, "step": 3158 }, { "epoch": 0.0661266013564431, "grad_norm": 0.18776026368141174, "learning_rate": 0.00019976476441740108, "loss": 11.6824, "step": 3159 }, { "epoch": 0.06614753412040526, "grad_norm": 0.2428940236568451, "learning_rate": 0.00019976461409408456, "loss": 11.6751, "step": 3160 }, { "epoch": 0.06616846688436741, "grad_norm": 0.2452944964170456, "learning_rate": 0.00019976446372280916, "loss": 11.6806, "step": 3161 }, { "epoch": 0.06618939964832957, "grad_norm": 0.28466796875, "learning_rate": 0.00019976431330357499, "loss": 11.7102, "step": 3162 }, { "epoch": 0.06621033241229173, "grad_norm": 0.2222803831100464, "learning_rate": 0.00019976416283638213, "loss": 11.6978, "step": 3163 }, { "epoch": 0.06623126517625387, "grad_norm": 0.2114889770746231, "learning_rate": 0.00019976401232123063, "loss": 11.6717, "step": 3164 }, { "epoch": 0.06625219794021603, "grad_norm": 0.2327597737312317, "learning_rate": 0.00019976386175812056, "loss": 11.6825, "step": 3165 }, { "epoch": 0.06627313070417817, "grad_norm": 0.2823273837566376, "learning_rate": 0.00019976371114705202, "loss": 11.6842, "step": 3166 }, { "epoch": 0.06629406346814033, "grad_norm": 0.2707946300506592, "learning_rate": 0.00019976356048802502, "loss": 11.6799, "step": 3167 }, { "epoch": 0.06631499623210249, "grad_norm": 0.22849878668785095, "learning_rate": 0.00019976340978103974, "loss": 11.6887, "step": 3168 }, { "epoch": 0.06633592899606464, "grad_norm": 0.23201480507850647, "learning_rate": 0.00019976325902609614, "loss": 11.693, "step": 3169 }, { "epoch": 0.0663568617600268, "grad_norm": 0.21190401911735535, "learning_rate": 0.00019976310822319437, "loss": 11.6808, "step": 3170 }, { "epoch": 0.06637779452398895, "grad_norm": 0.20965763926506042, "learning_rate": 0.00019976295737233446, "loss": 11.6785, "step": 3171 }, { "epoch": 0.0663987272879511, "grad_norm": 0.2050105631351471, "learning_rate": 0.0001997628064735165, "loss": 11.6826, "step": 3172 }, { "epoch": 0.06641966005191326, "grad_norm": 0.22305184602737427, "learning_rate": 0.00019976265552674059, "loss": 11.6776, "step": 3173 }, { "epoch": 0.0664405928158754, "grad_norm": 0.8338701725006104, "learning_rate": 0.0001997625045320067, "loss": 11.6746, "step": 3174 }, { "epoch": 0.06646152557983756, "grad_norm": 0.25681400299072266, "learning_rate": 0.00019976235348931505, "loss": 11.6792, "step": 3175 }, { "epoch": 0.06648245834379972, "grad_norm": 0.2722398042678833, "learning_rate": 0.00019976220239866562, "loss": 11.6906, "step": 3176 }, { "epoch": 0.06650339110776186, "grad_norm": 0.20481859147548676, "learning_rate": 0.00019976205126005846, "loss": 11.6823, "step": 3177 }, { "epoch": 0.06652432387172402, "grad_norm": 0.2150936871767044, "learning_rate": 0.00019976190007349372, "loss": 11.6945, "step": 3178 }, { "epoch": 0.06654525663568618, "grad_norm": 0.2607493996620178, "learning_rate": 0.0001997617488389714, "loss": 11.69, "step": 3179 }, { "epoch": 0.06656618939964833, "grad_norm": 0.20046745240688324, "learning_rate": 0.00019976159755649166, "loss": 11.6855, "step": 3180 }, { "epoch": 0.06658712216361048, "grad_norm": 0.20864138007164001, "learning_rate": 0.0001997614462260545, "loss": 11.6781, "step": 3181 }, { "epoch": 0.06660805492757264, "grad_norm": 0.21877551078796387, "learning_rate": 0.00019976129484766, "loss": 11.6778, "step": 3182 }, { "epoch": 0.06662898769153479, "grad_norm": 0.2012626975774765, "learning_rate": 0.00019976114342130824, "loss": 11.6884, "step": 3183 }, { "epoch": 0.06664992045549695, "grad_norm": 0.21529759466648102, "learning_rate": 0.00019976099194699934, "loss": 11.7056, "step": 3184 }, { "epoch": 0.06667085321945909, "grad_norm": 0.2073601931333542, "learning_rate": 0.00019976084042473328, "loss": 11.6899, "step": 3185 }, { "epoch": 0.06669178598342125, "grad_norm": 0.2653871178627014, "learning_rate": 0.00019976068885451022, "loss": 11.6841, "step": 3186 }, { "epoch": 0.06671271874738341, "grad_norm": 0.22858890891075134, "learning_rate": 0.00019976053723633018, "loss": 11.6981, "step": 3187 }, { "epoch": 0.06673365151134555, "grad_norm": 0.20421764254570007, "learning_rate": 0.00019976038557019328, "loss": 11.6812, "step": 3188 }, { "epoch": 0.06675458427530771, "grad_norm": 0.3006892800331116, "learning_rate": 0.00019976023385609955, "loss": 11.6989, "step": 3189 }, { "epoch": 0.06677551703926987, "grad_norm": 0.21623940765857697, "learning_rate": 0.00019976008209404906, "loss": 11.689, "step": 3190 }, { "epoch": 0.06679644980323202, "grad_norm": 0.2622602880001068, "learning_rate": 0.0001997599302840419, "loss": 11.6845, "step": 3191 }, { "epoch": 0.06681738256719418, "grad_norm": 0.19452838599681854, "learning_rate": 0.00019975977842607816, "loss": 11.6869, "step": 3192 }, { "epoch": 0.06683831533115632, "grad_norm": 0.20791201293468475, "learning_rate": 0.00019975962652015787, "loss": 11.6784, "step": 3193 }, { "epoch": 0.06685924809511848, "grad_norm": 0.1978064626455307, "learning_rate": 0.00019975947456628114, "loss": 11.6895, "step": 3194 }, { "epoch": 0.06688018085908064, "grad_norm": 0.24535636603832245, "learning_rate": 0.00019975932256444803, "loss": 11.6965, "step": 3195 }, { "epoch": 0.06690111362304278, "grad_norm": 0.25028184056282043, "learning_rate": 0.0001997591705146586, "loss": 11.696, "step": 3196 }, { "epoch": 0.06692204638700494, "grad_norm": 0.23805443942546844, "learning_rate": 0.00019975901841691297, "loss": 11.6912, "step": 3197 }, { "epoch": 0.0669429791509671, "grad_norm": 0.1957937479019165, "learning_rate": 0.0001997588662712112, "loss": 11.6765, "step": 3198 }, { "epoch": 0.06696391191492924, "grad_norm": 0.21180684864521027, "learning_rate": 0.0001997587140775533, "loss": 11.6748, "step": 3199 }, { "epoch": 0.0669848446788914, "grad_norm": 0.21619591116905212, "learning_rate": 0.0001997585618359394, "loss": 11.6753, "step": 3200 }, { "epoch": 0.06700577744285355, "grad_norm": 0.19469794631004333, "learning_rate": 0.00019975840954636955, "loss": 11.6877, "step": 3201 }, { "epoch": 0.0670267102068157, "grad_norm": 0.2410467267036438, "learning_rate": 0.00019975825720884384, "loss": 11.6794, "step": 3202 }, { "epoch": 0.06704764297077787, "grad_norm": 0.20028090476989746, "learning_rate": 0.00019975810482336233, "loss": 11.6667, "step": 3203 }, { "epoch": 0.06706857573474001, "grad_norm": 0.18633921444416046, "learning_rate": 0.00019975795238992512, "loss": 11.6837, "step": 3204 }, { "epoch": 0.06708950849870217, "grad_norm": 0.1784266084432602, "learning_rate": 0.00019975779990853224, "loss": 11.6907, "step": 3205 }, { "epoch": 0.06711044126266433, "grad_norm": 0.24539202451705933, "learning_rate": 0.0001997576473791838, "loss": 11.675, "step": 3206 }, { "epoch": 0.06713137402662647, "grad_norm": 0.20851048827171326, "learning_rate": 0.00019975749480187987, "loss": 11.6881, "step": 3207 }, { "epoch": 0.06715230679058863, "grad_norm": 0.22247019410133362, "learning_rate": 0.00019975734217662049, "loss": 11.6794, "step": 3208 }, { "epoch": 0.06717323955455078, "grad_norm": 0.22431589663028717, "learning_rate": 0.00019975718950340576, "loss": 11.6964, "step": 3209 }, { "epoch": 0.06719417231851293, "grad_norm": 0.18023905158042908, "learning_rate": 0.00019975703678223576, "loss": 11.6871, "step": 3210 }, { "epoch": 0.0672151050824751, "grad_norm": 0.2124049961566925, "learning_rate": 0.00019975688401311053, "loss": 11.6849, "step": 3211 }, { "epoch": 0.06723603784643724, "grad_norm": 0.18655925989151, "learning_rate": 0.00019975673119603023, "loss": 11.6738, "step": 3212 }, { "epoch": 0.0672569706103994, "grad_norm": 0.2200450301170349, "learning_rate": 0.0001997565783309948, "loss": 11.6951, "step": 3213 }, { "epoch": 0.06727790337436156, "grad_norm": 0.21039381623268127, "learning_rate": 0.00019975642541800442, "loss": 11.6795, "step": 3214 }, { "epoch": 0.0672988361383237, "grad_norm": 0.2195279598236084, "learning_rate": 0.0001997562724570591, "loss": 11.6874, "step": 3215 }, { "epoch": 0.06731976890228586, "grad_norm": 0.20529498159885406, "learning_rate": 0.00019975611944815898, "loss": 11.6768, "step": 3216 }, { "epoch": 0.06734070166624802, "grad_norm": 0.19152066111564636, "learning_rate": 0.00019975596639130406, "loss": 11.6786, "step": 3217 }, { "epoch": 0.06736163443021016, "grad_norm": 0.2217441350221634, "learning_rate": 0.00019975581328649447, "loss": 11.6799, "step": 3218 }, { "epoch": 0.06738256719417232, "grad_norm": 0.22667604684829712, "learning_rate": 0.00019975566013373026, "loss": 11.6998, "step": 3219 }, { "epoch": 0.06740349995813447, "grad_norm": 0.2075543999671936, "learning_rate": 0.0001997555069330115, "loss": 11.6715, "step": 3220 }, { "epoch": 0.06742443272209662, "grad_norm": 0.20865193009376526, "learning_rate": 0.00019975535368433825, "loss": 11.6924, "step": 3221 }, { "epoch": 0.06744536548605878, "grad_norm": 0.1701095700263977, "learning_rate": 0.0001997552003877106, "loss": 11.7009, "step": 3222 }, { "epoch": 0.06746629825002093, "grad_norm": 0.26359668374061584, "learning_rate": 0.00019975504704312867, "loss": 11.703, "step": 3223 }, { "epoch": 0.06748723101398309, "grad_norm": 0.1982908397912979, "learning_rate": 0.00019975489365059245, "loss": 11.7073, "step": 3224 }, { "epoch": 0.06750816377794525, "grad_norm": 0.21803414821624756, "learning_rate": 0.00019975474021010205, "loss": 11.6897, "step": 3225 }, { "epoch": 0.06752909654190739, "grad_norm": 0.30500829219818115, "learning_rate": 0.00019975458672165757, "loss": 11.6985, "step": 3226 }, { "epoch": 0.06755002930586955, "grad_norm": 0.21715953946113586, "learning_rate": 0.00019975443318525908, "loss": 11.6761, "step": 3227 }, { "epoch": 0.0675709620698317, "grad_norm": 0.23570282757282257, "learning_rate": 0.0001997542796009066, "loss": 11.682, "step": 3228 }, { "epoch": 0.06759189483379385, "grad_norm": 0.20343513786792755, "learning_rate": 0.00019975412596860025, "loss": 11.6899, "step": 3229 }, { "epoch": 0.06761282759775601, "grad_norm": 0.24953694641590118, "learning_rate": 0.0001997539722883401, "loss": 11.6861, "step": 3230 }, { "epoch": 0.06763376036171816, "grad_norm": 0.20688728988170624, "learning_rate": 0.0001997538185601262, "loss": 11.6819, "step": 3231 }, { "epoch": 0.06765469312568032, "grad_norm": 0.22553884983062744, "learning_rate": 0.00019975366478395865, "loss": 11.6965, "step": 3232 }, { "epoch": 0.06767562588964247, "grad_norm": 0.20793473720550537, "learning_rate": 0.0001997535109598375, "loss": 11.691, "step": 3233 }, { "epoch": 0.06769655865360462, "grad_norm": 0.18636693060398102, "learning_rate": 0.00019975335708776287, "loss": 11.6981, "step": 3234 }, { "epoch": 0.06771749141756678, "grad_norm": 0.24455316364765167, "learning_rate": 0.00019975320316773475, "loss": 11.6831, "step": 3235 }, { "epoch": 0.06773842418152892, "grad_norm": 0.18075302243232727, "learning_rate": 0.00019975304919975332, "loss": 11.6755, "step": 3236 }, { "epoch": 0.06775935694549108, "grad_norm": 0.2045183926820755, "learning_rate": 0.00019975289518381857, "loss": 11.6816, "step": 3237 }, { "epoch": 0.06778028970945324, "grad_norm": 0.22341056168079376, "learning_rate": 0.0001997527411199306, "loss": 11.691, "step": 3238 }, { "epoch": 0.06780122247341538, "grad_norm": 0.22709009051322937, "learning_rate": 0.00019975258700808952, "loss": 11.6796, "step": 3239 }, { "epoch": 0.06782215523737754, "grad_norm": 0.1914537400007248, "learning_rate": 0.00019975243284829535, "loss": 11.6915, "step": 3240 }, { "epoch": 0.0678430880013397, "grad_norm": 0.19322094321250916, "learning_rate": 0.0001997522786405482, "loss": 11.6733, "step": 3241 }, { "epoch": 0.06786402076530185, "grad_norm": 0.19647566974163055, "learning_rate": 0.0001997521243848481, "loss": 11.6886, "step": 3242 }, { "epoch": 0.067884953529264, "grad_norm": 0.19956864416599274, "learning_rate": 0.0001997519700811952, "loss": 11.6837, "step": 3243 }, { "epoch": 0.06790588629322616, "grad_norm": 0.20247739553451538, "learning_rate": 0.0001997518157295895, "loss": 11.6857, "step": 3244 }, { "epoch": 0.06792681905718831, "grad_norm": 0.18708665668964386, "learning_rate": 0.00019975166133003112, "loss": 11.691, "step": 3245 }, { "epoch": 0.06794775182115047, "grad_norm": 0.19930845499038696, "learning_rate": 0.00019975150688252008, "loss": 11.6905, "step": 3246 }, { "epoch": 0.06796868458511261, "grad_norm": 0.2758757472038269, "learning_rate": 0.00019975135238705654, "loss": 11.6887, "step": 3247 }, { "epoch": 0.06798961734907477, "grad_norm": 0.18931390345096588, "learning_rate": 0.0001997511978436405, "loss": 11.6916, "step": 3248 }, { "epoch": 0.06801055011303693, "grad_norm": 0.2318170815706253, "learning_rate": 0.00019975104325227206, "loss": 11.6848, "step": 3249 }, { "epoch": 0.06803148287699907, "grad_norm": 0.20163771510124207, "learning_rate": 0.0001997508886129513, "loss": 11.6804, "step": 3250 }, { "epoch": 0.06805241564096123, "grad_norm": 0.15694501996040344, "learning_rate": 0.0001997507339256783, "loss": 11.6751, "step": 3251 }, { "epoch": 0.06807334840492339, "grad_norm": 0.26393812894821167, "learning_rate": 0.0001997505791904531, "loss": 11.6808, "step": 3252 }, { "epoch": 0.06809428116888554, "grad_norm": 0.20130883157253265, "learning_rate": 0.0001997504244072758, "loss": 11.6796, "step": 3253 }, { "epoch": 0.0681152139328477, "grad_norm": 0.17777018249034882, "learning_rate": 0.00019975026957614648, "loss": 11.6828, "step": 3254 }, { "epoch": 0.06813614669680984, "grad_norm": 0.1736140251159668, "learning_rate": 0.0001997501146970652, "loss": 11.6888, "step": 3255 }, { "epoch": 0.068157079460772, "grad_norm": 0.27097001671791077, "learning_rate": 0.00019974995977003205, "loss": 11.6907, "step": 3256 }, { "epoch": 0.06817801222473416, "grad_norm": 0.16269877552986145, "learning_rate": 0.00019974980479504708, "loss": 11.6753, "step": 3257 }, { "epoch": 0.0681989449886963, "grad_norm": 0.3423207104206085, "learning_rate": 0.0001997496497721104, "loss": 11.677, "step": 3258 }, { "epoch": 0.06821987775265846, "grad_norm": 0.2160489857196808, "learning_rate": 0.00019974949470122203, "loss": 11.691, "step": 3259 }, { "epoch": 0.06824081051662062, "grad_norm": 0.18601049482822418, "learning_rate": 0.00019974933958238214, "loss": 11.6842, "step": 3260 }, { "epoch": 0.06826174328058277, "grad_norm": 0.20024187862873077, "learning_rate": 0.0001997491844155907, "loss": 11.6869, "step": 3261 }, { "epoch": 0.06828267604454492, "grad_norm": 0.22031240165233612, "learning_rate": 0.00019974902920084784, "loss": 11.6967, "step": 3262 }, { "epoch": 0.06830360880850707, "grad_norm": 0.19889290630817413, "learning_rate": 0.00019974887393815361, "loss": 11.6749, "step": 3263 }, { "epoch": 0.06832454157246923, "grad_norm": 0.2440374195575714, "learning_rate": 0.0001997487186275081, "loss": 11.6804, "step": 3264 }, { "epoch": 0.06834547433643139, "grad_norm": 0.18286308646202087, "learning_rate": 0.00019974856326891138, "loss": 11.6753, "step": 3265 }, { "epoch": 0.06836640710039353, "grad_norm": 0.17288310825824738, "learning_rate": 0.00019974840786236354, "loss": 11.6815, "step": 3266 }, { "epoch": 0.06838733986435569, "grad_norm": 0.2425706386566162, "learning_rate": 0.00019974825240786466, "loss": 11.6898, "step": 3267 }, { "epoch": 0.06840827262831785, "grad_norm": 0.20023325085639954, "learning_rate": 0.00019974809690541476, "loss": 11.6954, "step": 3268 }, { "epoch": 0.06842920539228, "grad_norm": 0.18914780020713806, "learning_rate": 0.00019974794135501397, "loss": 11.6792, "step": 3269 }, { "epoch": 0.06845013815624215, "grad_norm": 0.153954416513443, "learning_rate": 0.0001997477857566623, "loss": 11.6912, "step": 3270 }, { "epoch": 0.06847107092020431, "grad_norm": 0.23949259519577026, "learning_rate": 0.00019974763011035992, "loss": 11.6903, "step": 3271 }, { "epoch": 0.06849200368416646, "grad_norm": 0.23471003770828247, "learning_rate": 0.00019974747441610686, "loss": 11.689, "step": 3272 }, { "epoch": 0.06851293644812861, "grad_norm": 0.21726955473423004, "learning_rate": 0.00019974731867390317, "loss": 11.6724, "step": 3273 }, { "epoch": 0.06853386921209076, "grad_norm": 0.1859155297279358, "learning_rate": 0.00019974716288374895, "loss": 11.67, "step": 3274 }, { "epoch": 0.06855480197605292, "grad_norm": 0.189950630068779, "learning_rate": 0.0001997470070456443, "loss": 11.7054, "step": 3275 }, { "epoch": 0.06857573474001508, "grad_norm": 0.22423559427261353, "learning_rate": 0.0001997468511595892, "loss": 11.6885, "step": 3276 }, { "epoch": 0.06859666750397722, "grad_norm": 0.2131328135728836, "learning_rate": 0.00019974669522558383, "loss": 11.6787, "step": 3277 }, { "epoch": 0.06861760026793938, "grad_norm": 0.17617028951644897, "learning_rate": 0.00019974653924362823, "loss": 11.6744, "step": 3278 }, { "epoch": 0.06863853303190154, "grad_norm": 0.22218428552150726, "learning_rate": 0.00019974638321372244, "loss": 11.6838, "step": 3279 }, { "epoch": 0.06865946579586368, "grad_norm": 0.1834724247455597, "learning_rate": 0.0001997462271358666, "loss": 11.6919, "step": 3280 }, { "epoch": 0.06868039855982584, "grad_norm": 0.21838954091072083, "learning_rate": 0.0001997460710100607, "loss": 11.6887, "step": 3281 }, { "epoch": 0.06870133132378799, "grad_norm": 0.20243239402770996, "learning_rate": 0.00019974591483630492, "loss": 11.7031, "step": 3282 }, { "epoch": 0.06872226408775015, "grad_norm": 0.232522651553154, "learning_rate": 0.00019974575861459926, "loss": 11.7, "step": 3283 }, { "epoch": 0.0687431968517123, "grad_norm": 0.1914551854133606, "learning_rate": 0.0001997456023449438, "loss": 11.6829, "step": 3284 }, { "epoch": 0.06876412961567445, "grad_norm": 0.17035973072052002, "learning_rate": 0.00019974544602733866, "loss": 11.6859, "step": 3285 }, { "epoch": 0.06878506237963661, "grad_norm": 0.195402130484581, "learning_rate": 0.00019974528966178385, "loss": 11.6795, "step": 3286 }, { "epoch": 0.06880599514359877, "grad_norm": 0.21849475800991058, "learning_rate": 0.0001997451332482795, "loss": 11.6715, "step": 3287 }, { "epoch": 0.06882692790756091, "grad_norm": 0.1922868937253952, "learning_rate": 0.00019974497678682568, "loss": 11.6879, "step": 3288 }, { "epoch": 0.06884786067152307, "grad_norm": 0.23839370906352997, "learning_rate": 0.00019974482027742242, "loss": 11.6851, "step": 3289 }, { "epoch": 0.06886879343548521, "grad_norm": 0.3444144129753113, "learning_rate": 0.00019974466372006983, "loss": 11.6875, "step": 3290 }, { "epoch": 0.06888972619944737, "grad_norm": 0.2024206966161728, "learning_rate": 0.00019974450711476797, "loss": 11.6857, "step": 3291 }, { "epoch": 0.06891065896340953, "grad_norm": 0.29873064160346985, "learning_rate": 0.00019974435046151698, "loss": 11.6876, "step": 3292 }, { "epoch": 0.06893159172737168, "grad_norm": 0.21664969623088837, "learning_rate": 0.00019974419376031684, "loss": 11.688, "step": 3293 }, { "epoch": 0.06895252449133384, "grad_norm": 0.2051258087158203, "learning_rate": 0.00019974403701116767, "loss": 11.6842, "step": 3294 }, { "epoch": 0.068973457255296, "grad_norm": 0.2346448451280594, "learning_rate": 0.00019974388021406955, "loss": 11.6877, "step": 3295 }, { "epoch": 0.06899439001925814, "grad_norm": 0.20451176166534424, "learning_rate": 0.00019974372336902256, "loss": 11.6899, "step": 3296 }, { "epoch": 0.0690153227832203, "grad_norm": 0.27269497513771057, "learning_rate": 0.00019974356647602673, "loss": 11.6881, "step": 3297 }, { "epoch": 0.06903625554718246, "grad_norm": 0.21696478128433228, "learning_rate": 0.0001997434095350822, "loss": 11.6861, "step": 3298 }, { "epoch": 0.0690571883111446, "grad_norm": 0.2355942577123642, "learning_rate": 0.000199743252546189, "loss": 11.6934, "step": 3299 }, { "epoch": 0.06907812107510676, "grad_norm": 0.19469603896141052, "learning_rate": 0.00019974309550934723, "loss": 11.6676, "step": 3300 }, { "epoch": 0.0690990538390689, "grad_norm": 0.21754741668701172, "learning_rate": 0.00019974293842455693, "loss": 11.6959, "step": 3301 }, { "epoch": 0.06911998660303106, "grad_norm": 0.18198618292808533, "learning_rate": 0.00019974278129181823, "loss": 11.6691, "step": 3302 }, { "epoch": 0.06914091936699322, "grad_norm": 0.2581397294998169, "learning_rate": 0.00019974262411113117, "loss": 11.6896, "step": 3303 }, { "epoch": 0.06916185213095537, "grad_norm": 0.22030402719974518, "learning_rate": 0.00019974246688249584, "loss": 11.7045, "step": 3304 }, { "epoch": 0.06918278489491753, "grad_norm": 0.200722336769104, "learning_rate": 0.00019974230960591226, "loss": 11.6761, "step": 3305 }, { "epoch": 0.06920371765887968, "grad_norm": 0.22051702439785004, "learning_rate": 0.00019974215228138058, "loss": 11.697, "step": 3306 }, { "epoch": 0.06922465042284183, "grad_norm": 0.18887382745742798, "learning_rate": 0.00019974199490890087, "loss": 11.6752, "step": 3307 }, { "epoch": 0.06924558318680399, "grad_norm": 0.18694163858890533, "learning_rate": 0.00019974183748847318, "loss": 11.686, "step": 3308 }, { "epoch": 0.06926651595076613, "grad_norm": 0.2606842815876007, "learning_rate": 0.00019974168002009758, "loss": 11.6838, "step": 3309 }, { "epoch": 0.06928744871472829, "grad_norm": 0.19666114449501038, "learning_rate": 0.00019974152250377413, "loss": 11.697, "step": 3310 }, { "epoch": 0.06930838147869045, "grad_norm": 0.23007944226264954, "learning_rate": 0.00019974136493950298, "loss": 11.6729, "step": 3311 }, { "epoch": 0.0693293142426526, "grad_norm": 0.22703006863594055, "learning_rate": 0.0001997412073272841, "loss": 11.672, "step": 3312 }, { "epoch": 0.06935024700661475, "grad_norm": 0.20845861732959747, "learning_rate": 0.00019974104966711767, "loss": 11.6832, "step": 3313 }, { "epoch": 0.06937117977057691, "grad_norm": 0.26348891854286194, "learning_rate": 0.0001997408919590037, "loss": 11.6857, "step": 3314 }, { "epoch": 0.06939211253453906, "grad_norm": 0.27258533239364624, "learning_rate": 0.0001997407342029423, "loss": 11.6972, "step": 3315 }, { "epoch": 0.06941304529850122, "grad_norm": 0.24648930132389069, "learning_rate": 0.0001997405763989335, "loss": 11.6799, "step": 3316 }, { "epoch": 0.06943397806246336, "grad_norm": 0.18532976508140564, "learning_rate": 0.0001997404185469774, "loss": 11.6787, "step": 3317 }, { "epoch": 0.06945491082642552, "grad_norm": 0.25985613465309143, "learning_rate": 0.00019974026064707414, "loss": 11.6941, "step": 3318 }, { "epoch": 0.06947584359038768, "grad_norm": 0.1873682290315628, "learning_rate": 0.0001997401026992237, "loss": 11.6898, "step": 3319 }, { "epoch": 0.06949677635434982, "grad_norm": 0.17623485624790192, "learning_rate": 0.0001997399447034262, "loss": 11.6846, "step": 3320 }, { "epoch": 0.06951770911831198, "grad_norm": 0.24543067812919617, "learning_rate": 0.00019973978665968172, "loss": 11.6818, "step": 3321 }, { "epoch": 0.06953864188227414, "grad_norm": 0.5221496820449829, "learning_rate": 0.00019973962856799032, "loss": 11.688, "step": 3322 }, { "epoch": 0.06955957464623629, "grad_norm": 0.20896489918231964, "learning_rate": 0.00019973947042835205, "loss": 11.6937, "step": 3323 }, { "epoch": 0.06958050741019844, "grad_norm": 0.20650486648082733, "learning_rate": 0.00019973931224076706, "loss": 11.6953, "step": 3324 }, { "epoch": 0.06960144017416059, "grad_norm": 0.24106557667255402, "learning_rate": 0.00019973915400523534, "loss": 11.6947, "step": 3325 }, { "epoch": 0.06962237293812275, "grad_norm": 0.2307906597852707, "learning_rate": 0.00019973899572175706, "loss": 11.6813, "step": 3326 }, { "epoch": 0.0696433057020849, "grad_norm": 0.23591814935207367, "learning_rate": 0.0001997388373903322, "loss": 11.693, "step": 3327 }, { "epoch": 0.06966423846604705, "grad_norm": 0.3794025182723999, "learning_rate": 0.00019973867901096093, "loss": 11.6988, "step": 3328 }, { "epoch": 0.06968517123000921, "grad_norm": 0.2358579933643341, "learning_rate": 0.00019973852058364323, "loss": 11.6927, "step": 3329 }, { "epoch": 0.06970610399397137, "grad_norm": 0.22689472138881683, "learning_rate": 0.00019973836210837926, "loss": 11.684, "step": 3330 }, { "epoch": 0.06972703675793351, "grad_norm": 0.19263529777526855, "learning_rate": 0.00019973820358516907, "loss": 11.6885, "step": 3331 }, { "epoch": 0.06974796952189567, "grad_norm": 0.19247157871723175, "learning_rate": 0.0001997380450140127, "loss": 11.6917, "step": 3332 }, { "epoch": 0.06976890228585783, "grad_norm": 0.20724689960479736, "learning_rate": 0.00019973788639491023, "loss": 11.689, "step": 3333 }, { "epoch": 0.06978983504981998, "grad_norm": 0.2179127335548401, "learning_rate": 0.00019973772772786182, "loss": 11.6816, "step": 3334 }, { "epoch": 0.06981076781378213, "grad_norm": 0.18525880575180054, "learning_rate": 0.00019973756901286744, "loss": 11.6949, "step": 3335 }, { "epoch": 0.06983170057774428, "grad_norm": 0.2776298522949219, "learning_rate": 0.00019973741024992723, "loss": 11.7013, "step": 3336 }, { "epoch": 0.06985263334170644, "grad_norm": 0.18747995793819427, "learning_rate": 0.00019973725143904126, "loss": 11.6905, "step": 3337 }, { "epoch": 0.0698735661056686, "grad_norm": 0.2285935878753662, "learning_rate": 0.00019973709258020957, "loss": 11.6975, "step": 3338 }, { "epoch": 0.06989449886963074, "grad_norm": 0.2262919396162033, "learning_rate": 0.00019973693367343227, "loss": 11.6827, "step": 3339 }, { "epoch": 0.0699154316335929, "grad_norm": 0.19292284548282623, "learning_rate": 0.00019973677471870943, "loss": 11.6884, "step": 3340 }, { "epoch": 0.06993636439755506, "grad_norm": 0.24903258681297302, "learning_rate": 0.00019973661571604109, "loss": 11.6983, "step": 3341 }, { "epoch": 0.0699572971615172, "grad_norm": 0.26924896240234375, "learning_rate": 0.0001997364566654274, "loss": 11.6857, "step": 3342 }, { "epoch": 0.06997822992547936, "grad_norm": 0.2750115990638733, "learning_rate": 0.0001997362975668684, "loss": 11.6647, "step": 3343 }, { "epoch": 0.06999916268944151, "grad_norm": 0.19818373024463654, "learning_rate": 0.00019973613842036412, "loss": 11.6778, "step": 3344 }, { "epoch": 0.07002009545340367, "grad_norm": 0.21276414394378662, "learning_rate": 0.00019973597922591472, "loss": 11.6797, "step": 3345 }, { "epoch": 0.07004102821736582, "grad_norm": 0.23818765580654144, "learning_rate": 0.00019973581998352022, "loss": 11.7096, "step": 3346 }, { "epoch": 0.07006196098132797, "grad_norm": 0.2202645242214203, "learning_rate": 0.0001997356606931807, "loss": 11.6897, "step": 3347 }, { "epoch": 0.07008289374529013, "grad_norm": 0.20821304619312286, "learning_rate": 0.00019973550135489628, "loss": 11.6797, "step": 3348 }, { "epoch": 0.07010382650925229, "grad_norm": 0.19176767766475677, "learning_rate": 0.000199735341968667, "loss": 11.6922, "step": 3349 }, { "epoch": 0.07012475927321443, "grad_norm": 0.2731477618217468, "learning_rate": 0.00019973518253449293, "loss": 11.6833, "step": 3350 }, { "epoch": 0.07014569203717659, "grad_norm": 0.3198830485343933, "learning_rate": 0.00019973502305237415, "loss": 11.6955, "step": 3351 }, { "epoch": 0.07016662480113874, "grad_norm": 0.21967419981956482, "learning_rate": 0.00019973486352231075, "loss": 11.6888, "step": 3352 }, { "epoch": 0.0701875575651009, "grad_norm": 0.20287945866584778, "learning_rate": 0.00019973470394430286, "loss": 11.6888, "step": 3353 }, { "epoch": 0.07020849032906305, "grad_norm": 0.2201998084783554, "learning_rate": 0.00019973454431835043, "loss": 11.6788, "step": 3354 }, { "epoch": 0.0702294230930252, "grad_norm": 0.2606266438961029, "learning_rate": 0.00019973438464445363, "loss": 11.7013, "step": 3355 }, { "epoch": 0.07025035585698736, "grad_norm": 0.26706260442733765, "learning_rate": 0.00019973422492261249, "loss": 11.6974, "step": 3356 }, { "epoch": 0.07027128862094952, "grad_norm": 0.21757179498672485, "learning_rate": 0.00019973406515282714, "loss": 11.6905, "step": 3357 }, { "epoch": 0.07029222138491166, "grad_norm": 0.23420242965221405, "learning_rate": 0.0001997339053350976, "loss": 11.6667, "step": 3358 }, { "epoch": 0.07031315414887382, "grad_norm": 0.20600594580173492, "learning_rate": 0.00019973374546942402, "loss": 11.698, "step": 3359 }, { "epoch": 0.07033408691283598, "grad_norm": 0.1651882827281952, "learning_rate": 0.00019973358555580638, "loss": 11.6945, "step": 3360 }, { "epoch": 0.07035501967679812, "grad_norm": 0.22107188403606415, "learning_rate": 0.0001997334255942448, "loss": 11.6924, "step": 3361 }, { "epoch": 0.07037595244076028, "grad_norm": 0.21741144359111786, "learning_rate": 0.0001997332655847394, "loss": 11.6882, "step": 3362 }, { "epoch": 0.07039688520472243, "grad_norm": 0.23422269523143768, "learning_rate": 0.0001997331055272902, "loss": 11.6711, "step": 3363 }, { "epoch": 0.07041781796868458, "grad_norm": 0.20503103733062744, "learning_rate": 0.00019973294542189734, "loss": 11.6944, "step": 3364 }, { "epoch": 0.07043875073264674, "grad_norm": 0.1974618285894394, "learning_rate": 0.00019973278526856078, "loss": 11.6902, "step": 3365 }, { "epoch": 0.07045968349660889, "grad_norm": 0.22914746403694153, "learning_rate": 0.00019973262506728072, "loss": 11.6906, "step": 3366 }, { "epoch": 0.07048061626057105, "grad_norm": 0.21745021641254425, "learning_rate": 0.0001997324648180572, "loss": 11.6753, "step": 3367 }, { "epoch": 0.0705015490245332, "grad_norm": 0.19583149254322052, "learning_rate": 0.00019973230452089027, "loss": 11.6779, "step": 3368 }, { "epoch": 0.07052248178849535, "grad_norm": 0.2661895453929901, "learning_rate": 0.00019973214417578, "loss": 11.6843, "step": 3369 }, { "epoch": 0.07054341455245751, "grad_norm": 0.23591944575309753, "learning_rate": 0.0001997319837827265, "loss": 11.6829, "step": 3370 }, { "epoch": 0.07056434731641965, "grad_norm": 0.22219954431056976, "learning_rate": 0.00019973182334172984, "loss": 11.6937, "step": 3371 }, { "epoch": 0.07058528008038181, "grad_norm": 0.18059206008911133, "learning_rate": 0.0001997316628527901, "loss": 11.6816, "step": 3372 }, { "epoch": 0.07060621284434397, "grad_norm": 0.20975062251091003, "learning_rate": 0.00019973150231590735, "loss": 11.6778, "step": 3373 }, { "epoch": 0.07062714560830612, "grad_norm": 0.20448502898216248, "learning_rate": 0.00019973134173108167, "loss": 11.6801, "step": 3374 }, { "epoch": 0.07064807837226827, "grad_norm": 0.2413414567708969, "learning_rate": 0.00019973118109831313, "loss": 11.6751, "step": 3375 }, { "epoch": 0.07066901113623043, "grad_norm": 0.2661558985710144, "learning_rate": 0.0001997310204176018, "loss": 11.6962, "step": 3376 }, { "epoch": 0.07068994390019258, "grad_norm": 0.21786250174045563, "learning_rate": 0.00019973085968894784, "loss": 11.6789, "step": 3377 }, { "epoch": 0.07071087666415474, "grad_norm": 0.2917881906032562, "learning_rate": 0.00019973069891235116, "loss": 11.698, "step": 3378 }, { "epoch": 0.07073180942811688, "grad_norm": 0.25330650806427, "learning_rate": 0.00019973053808781198, "loss": 11.6844, "step": 3379 }, { "epoch": 0.07075274219207904, "grad_norm": 0.2723681330680847, "learning_rate": 0.00019973037721533033, "loss": 11.6707, "step": 3380 }, { "epoch": 0.0707736749560412, "grad_norm": 0.18041549623012543, "learning_rate": 0.0001997302162949063, "loss": 11.6867, "step": 3381 }, { "epoch": 0.07079460772000334, "grad_norm": 0.19976122677326202, "learning_rate": 0.00019973005532653994, "loss": 11.6929, "step": 3382 }, { "epoch": 0.0708155404839655, "grad_norm": 0.1998077780008316, "learning_rate": 0.00019972989431023134, "loss": 11.6914, "step": 3383 }, { "epoch": 0.07083647324792766, "grad_norm": 0.24568624794483185, "learning_rate": 0.00019972973324598061, "loss": 11.6868, "step": 3384 }, { "epoch": 0.0708574060118898, "grad_norm": 0.24128231406211853, "learning_rate": 0.00019972957213378775, "loss": 11.702, "step": 3385 }, { "epoch": 0.07087833877585197, "grad_norm": 0.24112263321876526, "learning_rate": 0.00019972941097365292, "loss": 11.6929, "step": 3386 }, { "epoch": 0.07089927153981412, "grad_norm": 0.3032545745372772, "learning_rate": 0.00019972924976557617, "loss": 11.6978, "step": 3387 }, { "epoch": 0.07092020430377627, "grad_norm": 0.25745531916618347, "learning_rate": 0.00019972908850955756, "loss": 11.6805, "step": 3388 }, { "epoch": 0.07094113706773843, "grad_norm": 0.27675917744636536, "learning_rate": 0.00019972892720559717, "loss": 11.6835, "step": 3389 }, { "epoch": 0.07096206983170057, "grad_norm": 0.3030909299850464, "learning_rate": 0.0001997287658536951, "loss": 11.6882, "step": 3390 }, { "epoch": 0.07098300259566273, "grad_norm": 0.2186930626630783, "learning_rate": 0.0001997286044538514, "loss": 11.6883, "step": 3391 }, { "epoch": 0.07100393535962489, "grad_norm": 0.2222408950328827, "learning_rate": 0.00019972844300606617, "loss": 11.6975, "step": 3392 }, { "epoch": 0.07102486812358703, "grad_norm": 0.2621840238571167, "learning_rate": 0.00019972828151033946, "loss": 11.7001, "step": 3393 }, { "epoch": 0.0710458008875492, "grad_norm": 0.22093217074871063, "learning_rate": 0.0001997281199666714, "loss": 11.6828, "step": 3394 }, { "epoch": 0.07106673365151135, "grad_norm": 0.16572795808315277, "learning_rate": 0.00019972795837506203, "loss": 11.6777, "step": 3395 }, { "epoch": 0.0710876664154735, "grad_norm": 0.3666248619556427, "learning_rate": 0.00019972779673551138, "loss": 11.7092, "step": 3396 }, { "epoch": 0.07110859917943566, "grad_norm": 0.22336402535438538, "learning_rate": 0.00019972763504801964, "loss": 11.6865, "step": 3397 }, { "epoch": 0.0711295319433978, "grad_norm": 0.19655074179172516, "learning_rate": 0.00019972747331258682, "loss": 11.6891, "step": 3398 }, { "epoch": 0.07115046470735996, "grad_norm": 0.21844522655010223, "learning_rate": 0.00019972731152921298, "loss": 11.6827, "step": 3399 }, { "epoch": 0.07117139747132212, "grad_norm": 0.1949605941772461, "learning_rate": 0.00019972714969789822, "loss": 11.6919, "step": 3400 }, { "epoch": 0.07119233023528426, "grad_norm": 0.20632579922676086, "learning_rate": 0.00019972698781864264, "loss": 11.6799, "step": 3401 }, { "epoch": 0.07121326299924642, "grad_norm": 0.2409457117319107, "learning_rate": 0.0001997268258914463, "loss": 11.6954, "step": 3402 }, { "epoch": 0.07123419576320858, "grad_norm": 0.22392700612545013, "learning_rate": 0.00019972666391630925, "loss": 11.6879, "step": 3403 }, { "epoch": 0.07125512852717072, "grad_norm": 0.21376608312129974, "learning_rate": 0.00019972650189323162, "loss": 11.6945, "step": 3404 }, { "epoch": 0.07127606129113288, "grad_norm": 0.2892071306705475, "learning_rate": 0.00019972633982221348, "loss": 11.7029, "step": 3405 }, { "epoch": 0.07129699405509503, "grad_norm": 0.23597171902656555, "learning_rate": 0.00019972617770325487, "loss": 11.6864, "step": 3406 }, { "epoch": 0.07131792681905719, "grad_norm": 0.21064595878124237, "learning_rate": 0.00019972601553635587, "loss": 11.6902, "step": 3407 }, { "epoch": 0.07133885958301935, "grad_norm": 0.19468611478805542, "learning_rate": 0.0001997258533215166, "loss": 11.6936, "step": 3408 }, { "epoch": 0.07135979234698149, "grad_norm": 0.22561359405517578, "learning_rate": 0.00019972569105873705, "loss": 11.6939, "step": 3409 }, { "epoch": 0.07138072511094365, "grad_norm": 0.26964303851127625, "learning_rate": 0.00019972552874801745, "loss": 11.7016, "step": 3410 }, { "epoch": 0.07140165787490581, "grad_norm": 0.19663576781749725, "learning_rate": 0.00019972536638935775, "loss": 11.6929, "step": 3411 }, { "epoch": 0.07142259063886795, "grad_norm": 0.2689538598060608, "learning_rate": 0.00019972520398275806, "loss": 11.6997, "step": 3412 }, { "epoch": 0.07144352340283011, "grad_norm": 0.22469010949134827, "learning_rate": 0.0001997250415282185, "loss": 11.6937, "step": 3413 }, { "epoch": 0.07146445616679227, "grad_norm": 0.1774846762418747, "learning_rate": 0.00019972487902573905, "loss": 11.6832, "step": 3414 }, { "epoch": 0.07148538893075441, "grad_norm": 0.2574011981487274, "learning_rate": 0.0001997247164753199, "loss": 11.6799, "step": 3415 }, { "epoch": 0.07150632169471657, "grad_norm": 0.24214306473731995, "learning_rate": 0.0001997245538769611, "loss": 11.6838, "step": 3416 }, { "epoch": 0.07152725445867872, "grad_norm": 0.20848606526851654, "learning_rate": 0.0001997243912306627, "loss": 11.6885, "step": 3417 }, { "epoch": 0.07154818722264088, "grad_norm": 0.20551423728466034, "learning_rate": 0.00019972422853642473, "loss": 11.6777, "step": 3418 }, { "epoch": 0.07156911998660304, "grad_norm": 0.2943011224269867, "learning_rate": 0.00019972406579424738, "loss": 11.6856, "step": 3419 }, { "epoch": 0.07159005275056518, "grad_norm": 0.22972437739372253, "learning_rate": 0.00019972390300413063, "loss": 11.688, "step": 3420 }, { "epoch": 0.07161098551452734, "grad_norm": 0.23529192805290222, "learning_rate": 0.00019972374016607466, "loss": 11.6849, "step": 3421 }, { "epoch": 0.0716319182784895, "grad_norm": 0.21034225821495056, "learning_rate": 0.00019972357728007944, "loss": 11.6929, "step": 3422 }, { "epoch": 0.07165285104245164, "grad_norm": 0.19308948516845703, "learning_rate": 0.00019972341434614513, "loss": 11.6764, "step": 3423 }, { "epoch": 0.0716737838064138, "grad_norm": 0.21490146219730377, "learning_rate": 0.00019972325136427178, "loss": 11.6939, "step": 3424 }, { "epoch": 0.07169471657037595, "grad_norm": 0.2374197542667389, "learning_rate": 0.00019972308833445944, "loss": 11.6886, "step": 3425 }, { "epoch": 0.0717156493343381, "grad_norm": 0.24873094260692596, "learning_rate": 0.0001997229252567082, "loss": 11.6854, "step": 3426 }, { "epoch": 0.07173658209830026, "grad_norm": 0.30303755402565, "learning_rate": 0.00019972276213101818, "loss": 11.6872, "step": 3427 }, { "epoch": 0.07175751486226241, "grad_norm": 0.22074539959430695, "learning_rate": 0.00019972259895738943, "loss": 11.702, "step": 3428 }, { "epoch": 0.07177844762622457, "grad_norm": 0.1970841884613037, "learning_rate": 0.00019972243573582201, "loss": 11.6694, "step": 3429 }, { "epoch": 0.07179938039018673, "grad_norm": 0.19130097329616547, "learning_rate": 0.00019972227246631603, "loss": 11.6996, "step": 3430 }, { "epoch": 0.07182031315414887, "grad_norm": 0.2646918296813965, "learning_rate": 0.00019972210914887155, "loss": 11.6762, "step": 3431 }, { "epoch": 0.07184124591811103, "grad_norm": 0.2160549759864807, "learning_rate": 0.00019972194578348864, "loss": 11.6888, "step": 3432 }, { "epoch": 0.07186217868207317, "grad_norm": 0.263484925031662, "learning_rate": 0.0001997217823701674, "loss": 11.6918, "step": 3433 }, { "epoch": 0.07188311144603533, "grad_norm": 0.30117955803871155, "learning_rate": 0.00019972161890890794, "loss": 11.6904, "step": 3434 }, { "epoch": 0.07190404420999749, "grad_norm": 0.22412507236003876, "learning_rate": 0.00019972145539971025, "loss": 11.689, "step": 3435 }, { "epoch": 0.07192497697395964, "grad_norm": 0.18352662026882172, "learning_rate": 0.00019972129184257447, "loss": 11.6873, "step": 3436 }, { "epoch": 0.0719459097379218, "grad_norm": 0.18776151537895203, "learning_rate": 0.00019972112823750068, "loss": 11.683, "step": 3437 }, { "epoch": 0.07196684250188395, "grad_norm": 0.2015008181333542, "learning_rate": 0.00019972096458448893, "loss": 11.6801, "step": 3438 }, { "epoch": 0.0719877752658461, "grad_norm": 0.2591293156147003, "learning_rate": 0.00019972080088353933, "loss": 11.6927, "step": 3439 }, { "epoch": 0.07200870802980826, "grad_norm": 0.18638984858989716, "learning_rate": 0.00019972063713465191, "loss": 11.6859, "step": 3440 }, { "epoch": 0.0720296407937704, "grad_norm": 0.2103378176689148, "learning_rate": 0.00019972047333782683, "loss": 11.6939, "step": 3441 }, { "epoch": 0.07205057355773256, "grad_norm": 0.21678593754768372, "learning_rate": 0.0001997203094930641, "loss": 11.6776, "step": 3442 }, { "epoch": 0.07207150632169472, "grad_norm": 0.3061707615852356, "learning_rate": 0.00019972014560036382, "loss": 11.6955, "step": 3443 }, { "epoch": 0.07209243908565686, "grad_norm": 0.2673845887184143, "learning_rate": 0.00019971998165972603, "loss": 11.6991, "step": 3444 }, { "epoch": 0.07211337184961902, "grad_norm": 0.22621513903141022, "learning_rate": 0.0001997198176711509, "loss": 11.6993, "step": 3445 }, { "epoch": 0.07213430461358118, "grad_norm": 0.21316270530223846, "learning_rate": 0.00019971965363463842, "loss": 11.6932, "step": 3446 }, { "epoch": 0.07215523737754333, "grad_norm": 0.1875891089439392, "learning_rate": 0.00019971948955018872, "loss": 11.687, "step": 3447 }, { "epoch": 0.07217617014150549, "grad_norm": 0.2235405147075653, "learning_rate": 0.00019971932541780184, "loss": 11.6929, "step": 3448 }, { "epoch": 0.07219710290546764, "grad_norm": 0.21857967972755432, "learning_rate": 0.0001997191612374779, "loss": 11.6866, "step": 3449 }, { "epoch": 0.07221803566942979, "grad_norm": 0.2595614194869995, "learning_rate": 0.00019971899700921698, "loss": 11.6726, "step": 3450 }, { "epoch": 0.07223896843339195, "grad_norm": 0.22849403321743011, "learning_rate": 0.0001997188327330191, "loss": 11.6901, "step": 3451 }, { "epoch": 0.07225990119735409, "grad_norm": 0.18989357352256775, "learning_rate": 0.00019971866840888444, "loss": 11.7027, "step": 3452 }, { "epoch": 0.07228083396131625, "grad_norm": 0.1874113827943802, "learning_rate": 0.00019971850403681295, "loss": 11.6868, "step": 3453 }, { "epoch": 0.07230176672527841, "grad_norm": 0.19561655819416046, "learning_rate": 0.00019971833961680483, "loss": 11.6838, "step": 3454 }, { "epoch": 0.07232269948924056, "grad_norm": 0.2647586464881897, "learning_rate": 0.00019971817514886006, "loss": 11.6793, "step": 3455 }, { "epoch": 0.07234363225320271, "grad_norm": 0.24825412034988403, "learning_rate": 0.00019971801063297878, "loss": 11.6803, "step": 3456 }, { "epoch": 0.07236456501716487, "grad_norm": 0.19569775462150574, "learning_rate": 0.00019971784606916107, "loss": 11.6923, "step": 3457 }, { "epoch": 0.07238549778112702, "grad_norm": 0.19980603456497192, "learning_rate": 0.00019971768145740697, "loss": 11.6882, "step": 3458 }, { "epoch": 0.07240643054508918, "grad_norm": 0.1961735188961029, "learning_rate": 0.0001997175167977166, "loss": 11.6785, "step": 3459 }, { "epoch": 0.07242736330905132, "grad_norm": 0.20914745330810547, "learning_rate": 0.00019971735209009005, "loss": 11.6817, "step": 3460 }, { "epoch": 0.07244829607301348, "grad_norm": 0.2435348629951477, "learning_rate": 0.0001997171873345273, "loss": 11.6898, "step": 3461 }, { "epoch": 0.07246922883697564, "grad_norm": 0.30153805017471313, "learning_rate": 0.00019971702253102856, "loss": 11.6939, "step": 3462 }, { "epoch": 0.07249016160093778, "grad_norm": 0.18981754779815674, "learning_rate": 0.0001997168576795938, "loss": 11.6814, "step": 3463 }, { "epoch": 0.07251109436489994, "grad_norm": 0.22395357489585876, "learning_rate": 0.00019971669278022317, "loss": 11.6913, "step": 3464 }, { "epoch": 0.0725320271288621, "grad_norm": 0.2820430397987366, "learning_rate": 0.00019971652783291673, "loss": 11.6704, "step": 3465 }, { "epoch": 0.07255295989282425, "grad_norm": 0.2301824688911438, "learning_rate": 0.00019971636283767458, "loss": 11.6983, "step": 3466 }, { "epoch": 0.0725738926567864, "grad_norm": 0.18404819071292877, "learning_rate": 0.00019971619779449675, "loss": 11.6911, "step": 3467 }, { "epoch": 0.07259482542074855, "grad_norm": 0.18296346068382263, "learning_rate": 0.00019971603270338336, "loss": 11.6684, "step": 3468 }, { "epoch": 0.07261575818471071, "grad_norm": 0.2019938975572586, "learning_rate": 0.00019971586756433444, "loss": 11.6973, "step": 3469 }, { "epoch": 0.07263669094867287, "grad_norm": 0.29649782180786133, "learning_rate": 0.00019971570237735012, "loss": 11.6848, "step": 3470 }, { "epoch": 0.07265762371263501, "grad_norm": 0.339201956987381, "learning_rate": 0.0001997155371424305, "loss": 11.6752, "step": 3471 }, { "epoch": 0.07267855647659717, "grad_norm": 0.17651419341564178, "learning_rate": 0.0001997153718595756, "loss": 11.705, "step": 3472 }, { "epoch": 0.07269948924055933, "grad_norm": 0.2522038221359253, "learning_rate": 0.0001997152065287855, "loss": 11.692, "step": 3473 }, { "epoch": 0.07272042200452147, "grad_norm": 0.2701881229877472, "learning_rate": 0.00019971504115006032, "loss": 11.6993, "step": 3474 }, { "epoch": 0.07274135476848363, "grad_norm": 0.3071417808532715, "learning_rate": 0.0001997148757234001, "loss": 11.7077, "step": 3475 }, { "epoch": 0.07276228753244579, "grad_norm": 0.25081032514572144, "learning_rate": 0.00019971471024880498, "loss": 11.6929, "step": 3476 }, { "epoch": 0.07278322029640794, "grad_norm": 0.21283161640167236, "learning_rate": 0.00019971454472627497, "loss": 11.6884, "step": 3477 }, { "epoch": 0.0728041530603701, "grad_norm": 0.17689856886863708, "learning_rate": 0.00019971437915581022, "loss": 11.6908, "step": 3478 }, { "epoch": 0.07282508582433224, "grad_norm": 0.2867826819419861, "learning_rate": 0.00019971421353741074, "loss": 11.6896, "step": 3479 }, { "epoch": 0.0728460185882944, "grad_norm": 0.25669941306114197, "learning_rate": 0.0001997140478710766, "loss": 11.6841, "step": 3480 }, { "epoch": 0.07286695135225656, "grad_norm": 0.21168267726898193, "learning_rate": 0.000199713882156808, "loss": 11.6838, "step": 3481 }, { "epoch": 0.0728878841162187, "grad_norm": 0.2233179658651352, "learning_rate": 0.00019971371639460487, "loss": 11.6911, "step": 3482 }, { "epoch": 0.07290881688018086, "grad_norm": 0.23868092894554138, "learning_rate": 0.00019971355058446739, "loss": 11.676, "step": 3483 }, { "epoch": 0.07292974964414302, "grad_norm": 0.2617088556289673, "learning_rate": 0.00019971338472639562, "loss": 11.6821, "step": 3484 }, { "epoch": 0.07295068240810516, "grad_norm": 0.2244836688041687, "learning_rate": 0.00019971321882038961, "loss": 11.6863, "step": 3485 }, { "epoch": 0.07297161517206732, "grad_norm": 0.22967319190502167, "learning_rate": 0.00019971305286644944, "loss": 11.6885, "step": 3486 }, { "epoch": 0.07299254793602947, "grad_norm": 0.22431057691574097, "learning_rate": 0.00019971288686457524, "loss": 11.6786, "step": 3487 }, { "epoch": 0.07301348069999163, "grad_norm": 0.2256944626569748, "learning_rate": 0.00019971272081476703, "loss": 11.6924, "step": 3488 }, { "epoch": 0.07303441346395378, "grad_norm": 0.24486882984638214, "learning_rate": 0.00019971255471702492, "loss": 11.6926, "step": 3489 }, { "epoch": 0.07305534622791593, "grad_norm": 0.19779883325099945, "learning_rate": 0.000199712388571349, "loss": 11.6875, "step": 3490 }, { "epoch": 0.07307627899187809, "grad_norm": 0.2230442613363266, "learning_rate": 0.00019971222237773938, "loss": 11.6805, "step": 3491 }, { "epoch": 0.07309721175584025, "grad_norm": 0.27819398045539856, "learning_rate": 0.00019971205613619603, "loss": 11.7071, "step": 3492 }, { "epoch": 0.07311814451980239, "grad_norm": 0.21071431040763855, "learning_rate": 0.00019971188984671908, "loss": 11.6899, "step": 3493 }, { "epoch": 0.07313907728376455, "grad_norm": 0.24654603004455566, "learning_rate": 0.00019971172350930868, "loss": 11.6818, "step": 3494 }, { "epoch": 0.0731600100477267, "grad_norm": 0.2027253359556198, "learning_rate": 0.00019971155712396487, "loss": 11.6926, "step": 3495 }, { "epoch": 0.07318094281168885, "grad_norm": 0.24814537167549133, "learning_rate": 0.00019971139069068766, "loss": 11.689, "step": 3496 }, { "epoch": 0.07320187557565101, "grad_norm": 0.23152482509613037, "learning_rate": 0.00019971122420947722, "loss": 11.6794, "step": 3497 }, { "epoch": 0.07322280833961316, "grad_norm": 0.20030349493026733, "learning_rate": 0.00019971105768033359, "loss": 11.6939, "step": 3498 }, { "epoch": 0.07324374110357532, "grad_norm": 0.2686779201030731, "learning_rate": 0.00019971089110325685, "loss": 11.6688, "step": 3499 }, { "epoch": 0.07326467386753747, "grad_norm": 0.19671286642551422, "learning_rate": 0.0001997107244782471, "loss": 11.6785, "step": 3500 }, { "epoch": 0.07328560663149962, "grad_norm": 0.22964325547218323, "learning_rate": 0.00019971055780530438, "loss": 11.6821, "step": 3501 }, { "epoch": 0.07330653939546178, "grad_norm": 0.1735086292028427, "learning_rate": 0.0001997103910844288, "loss": 11.686, "step": 3502 }, { "epoch": 0.07332747215942394, "grad_norm": 0.2394169420003891, "learning_rate": 0.00019971022431562046, "loss": 11.7019, "step": 3503 }, { "epoch": 0.07334840492338608, "grad_norm": 0.19609636068344116, "learning_rate": 0.0001997100574988794, "loss": 11.6806, "step": 3504 }, { "epoch": 0.07336933768734824, "grad_norm": 0.18799306452274323, "learning_rate": 0.00019970989063420574, "loss": 11.6882, "step": 3505 }, { "epoch": 0.07339027045131039, "grad_norm": 0.1888803243637085, "learning_rate": 0.0001997097237215995, "loss": 11.6774, "step": 3506 }, { "epoch": 0.07341120321527254, "grad_norm": 0.20970197021961212, "learning_rate": 0.00019970955676106082, "loss": 11.6893, "step": 3507 }, { "epoch": 0.0734321359792347, "grad_norm": 0.24016901850700378, "learning_rate": 0.00019970938975258978, "loss": 11.6767, "step": 3508 }, { "epoch": 0.07345306874319685, "grad_norm": 0.4354538321495056, "learning_rate": 0.00019970922269618642, "loss": 11.6999, "step": 3509 }, { "epoch": 0.073474001507159, "grad_norm": 0.23087292909622192, "learning_rate": 0.00019970905559185082, "loss": 11.677, "step": 3510 }, { "epoch": 0.07349493427112117, "grad_norm": 0.26701200008392334, "learning_rate": 0.0001997088884395831, "loss": 11.6818, "step": 3511 }, { "epoch": 0.07351586703508331, "grad_norm": 0.20426470041275024, "learning_rate": 0.00019970872123938331, "loss": 11.6837, "step": 3512 }, { "epoch": 0.07353679979904547, "grad_norm": 0.2532886862754822, "learning_rate": 0.00019970855399125155, "loss": 11.6798, "step": 3513 }, { "epoch": 0.07355773256300761, "grad_norm": 0.22745627164840698, "learning_rate": 0.00019970838669518788, "loss": 11.6886, "step": 3514 }, { "epoch": 0.07357866532696977, "grad_norm": 0.20558185875415802, "learning_rate": 0.00019970821935119237, "loss": 11.6889, "step": 3515 }, { "epoch": 0.07359959809093193, "grad_norm": 0.2283947914838791, "learning_rate": 0.00019970805195926517, "loss": 11.6796, "step": 3516 }, { "epoch": 0.07362053085489408, "grad_norm": 0.2031712681055069, "learning_rate": 0.00019970788451940626, "loss": 11.6748, "step": 3517 }, { "epoch": 0.07364146361885623, "grad_norm": 0.28781968355178833, "learning_rate": 0.0001997077170316158, "loss": 11.6846, "step": 3518 }, { "epoch": 0.0736623963828184, "grad_norm": 0.22059254348278046, "learning_rate": 0.00019970754949589386, "loss": 11.6767, "step": 3519 }, { "epoch": 0.07368332914678054, "grad_norm": 0.3207847774028778, "learning_rate": 0.00019970738191224044, "loss": 11.675, "step": 3520 }, { "epoch": 0.0737042619107427, "grad_norm": 0.23642569780349731, "learning_rate": 0.00019970721428065575, "loss": 11.706, "step": 3521 }, { "epoch": 0.07372519467470484, "grad_norm": 0.21537242829799652, "learning_rate": 0.00019970704660113978, "loss": 11.7005, "step": 3522 }, { "epoch": 0.073746127438667, "grad_norm": 0.25016120076179504, "learning_rate": 0.00019970687887369261, "loss": 11.7029, "step": 3523 }, { "epoch": 0.07376706020262916, "grad_norm": 0.21069346368312836, "learning_rate": 0.0001997067110983144, "loss": 11.6805, "step": 3524 }, { "epoch": 0.0737879929665913, "grad_norm": 0.23178362846374512, "learning_rate": 0.0001997065432750051, "loss": 11.6713, "step": 3525 }, { "epoch": 0.07380892573055346, "grad_norm": 0.23365455865859985, "learning_rate": 0.00019970637540376495, "loss": 11.6874, "step": 3526 }, { "epoch": 0.07382985849451562, "grad_norm": 0.32049688696861267, "learning_rate": 0.0001997062074845939, "loss": 11.681, "step": 3527 }, { "epoch": 0.07385079125847777, "grad_norm": 0.20227548480033875, "learning_rate": 0.00019970603951749208, "loss": 11.6872, "step": 3528 }, { "epoch": 0.07387172402243992, "grad_norm": 0.2051212340593338, "learning_rate": 0.00019970587150245955, "loss": 11.685, "step": 3529 }, { "epoch": 0.07389265678640208, "grad_norm": 0.20758646726608276, "learning_rate": 0.00019970570343949646, "loss": 11.6783, "step": 3530 }, { "epoch": 0.07391358955036423, "grad_norm": 0.1884891539812088, "learning_rate": 0.00019970553532860282, "loss": 11.6929, "step": 3531 }, { "epoch": 0.07393452231432639, "grad_norm": 0.26505187153816223, "learning_rate": 0.0001997053671697787, "loss": 11.6974, "step": 3532 }, { "epoch": 0.07395545507828853, "grad_norm": 0.23571594059467316, "learning_rate": 0.00019970519896302425, "loss": 11.6853, "step": 3533 }, { "epoch": 0.07397638784225069, "grad_norm": 0.2361876666545868, "learning_rate": 0.0001997050307083395, "loss": 11.6901, "step": 3534 }, { "epoch": 0.07399732060621285, "grad_norm": 0.22148315608501434, "learning_rate": 0.00019970486240572454, "loss": 11.6727, "step": 3535 }, { "epoch": 0.074018253370175, "grad_norm": 0.21777582168579102, "learning_rate": 0.00019970469405517944, "loss": 11.6908, "step": 3536 }, { "epoch": 0.07403918613413715, "grad_norm": 0.24177239835262299, "learning_rate": 0.00019970452565670432, "loss": 11.6997, "step": 3537 }, { "epoch": 0.07406011889809931, "grad_norm": 0.19486494362354279, "learning_rate": 0.00019970435721029922, "loss": 11.6826, "step": 3538 }, { "epoch": 0.07408105166206146, "grad_norm": 0.20738781988620758, "learning_rate": 0.00019970418871596425, "loss": 11.6813, "step": 3539 }, { "epoch": 0.07410198442602361, "grad_norm": 0.24025888741016388, "learning_rate": 0.0001997040201736995, "loss": 11.6839, "step": 3540 }, { "epoch": 0.07412291718998576, "grad_norm": 0.19232279062271118, "learning_rate": 0.000199703851583505, "loss": 11.6811, "step": 3541 }, { "epoch": 0.07414384995394792, "grad_norm": 0.23386134207248688, "learning_rate": 0.00019970368294538085, "loss": 11.6885, "step": 3542 }, { "epoch": 0.07416478271791008, "grad_norm": 0.201969176530838, "learning_rate": 0.00019970351425932715, "loss": 11.7003, "step": 3543 }, { "epoch": 0.07418571548187222, "grad_norm": 0.27559545636177063, "learning_rate": 0.000199703345525344, "loss": 11.7043, "step": 3544 }, { "epoch": 0.07420664824583438, "grad_norm": 0.21303407847881317, "learning_rate": 0.0001997031767434314, "loss": 11.6831, "step": 3545 }, { "epoch": 0.07422758100979654, "grad_norm": 0.23956342041492462, "learning_rate": 0.00019970300791358954, "loss": 11.6827, "step": 3546 }, { "epoch": 0.07424851377375868, "grad_norm": 0.3222784101963043, "learning_rate": 0.00019970283903581842, "loss": 11.6818, "step": 3547 }, { "epoch": 0.07426944653772084, "grad_norm": 0.19400516152381897, "learning_rate": 0.00019970267011011817, "loss": 11.6695, "step": 3548 }, { "epoch": 0.07429037930168299, "grad_norm": 0.28526970744132996, "learning_rate": 0.0001997025011364888, "loss": 11.6853, "step": 3549 }, { "epoch": 0.07431131206564515, "grad_norm": 0.2261950671672821, "learning_rate": 0.00019970233211493048, "loss": 11.6893, "step": 3550 }, { "epoch": 0.0743322448296073, "grad_norm": 0.24411773681640625, "learning_rate": 0.00019970216304544326, "loss": 11.6882, "step": 3551 }, { "epoch": 0.07435317759356945, "grad_norm": 0.3096255362033844, "learning_rate": 0.00019970199392802717, "loss": 11.6791, "step": 3552 }, { "epoch": 0.07437411035753161, "grad_norm": 0.2632697522640228, "learning_rate": 0.00019970182476268235, "loss": 11.6784, "step": 3553 }, { "epoch": 0.07439504312149377, "grad_norm": 0.19441962242126465, "learning_rate": 0.00019970165554940888, "loss": 11.7028, "step": 3554 }, { "epoch": 0.07441597588545591, "grad_norm": 0.22048884630203247, "learning_rate": 0.00019970148628820682, "loss": 11.7075, "step": 3555 }, { "epoch": 0.07443690864941807, "grad_norm": 0.24447934329509735, "learning_rate": 0.00019970131697907622, "loss": 11.6797, "step": 3556 }, { "epoch": 0.07445784141338023, "grad_norm": 0.20876140892505646, "learning_rate": 0.00019970114762201725, "loss": 11.6702, "step": 3557 }, { "epoch": 0.07447877417734237, "grad_norm": 0.20903310179710388, "learning_rate": 0.00019970097821702992, "loss": 11.6794, "step": 3558 }, { "epoch": 0.07449970694130453, "grad_norm": 0.2671207785606384, "learning_rate": 0.00019970080876411436, "loss": 11.6886, "step": 3559 }, { "epoch": 0.07452063970526668, "grad_norm": 0.19684959948062897, "learning_rate": 0.00019970063926327058, "loss": 11.6827, "step": 3560 }, { "epoch": 0.07454157246922884, "grad_norm": 0.2154642790555954, "learning_rate": 0.00019970046971449872, "loss": 11.6848, "step": 3561 }, { "epoch": 0.074562505233191, "grad_norm": 0.21259766817092896, "learning_rate": 0.00019970030011779883, "loss": 11.6796, "step": 3562 }, { "epoch": 0.07458343799715314, "grad_norm": 0.20580996572971344, "learning_rate": 0.00019970013047317103, "loss": 11.6893, "step": 3563 }, { "epoch": 0.0746043707611153, "grad_norm": 0.21016770601272583, "learning_rate": 0.00019969996078061537, "loss": 11.6899, "step": 3564 }, { "epoch": 0.07462530352507746, "grad_norm": 0.23947471380233765, "learning_rate": 0.00019969979104013196, "loss": 11.6779, "step": 3565 }, { "epoch": 0.0746462362890396, "grad_norm": 0.21078582108020782, "learning_rate": 0.00019969962125172087, "loss": 11.6934, "step": 3566 }, { "epoch": 0.07466716905300176, "grad_norm": 0.21072520315647125, "learning_rate": 0.00019969945141538214, "loss": 11.6876, "step": 3567 }, { "epoch": 0.0746881018169639, "grad_norm": 0.27952802181243896, "learning_rate": 0.00019969928153111587, "loss": 11.6941, "step": 3568 }, { "epoch": 0.07470903458092606, "grad_norm": 0.16871985793113708, "learning_rate": 0.0001996991115989222, "loss": 11.6878, "step": 3569 }, { "epoch": 0.07472996734488822, "grad_norm": 0.22665131092071533, "learning_rate": 0.00019969894161880116, "loss": 11.6875, "step": 3570 }, { "epoch": 0.07475090010885037, "grad_norm": 0.17920301854610443, "learning_rate": 0.00019969877159075282, "loss": 11.6791, "step": 3571 }, { "epoch": 0.07477183287281253, "grad_norm": 0.2130887359380722, "learning_rate": 0.0001996986015147773, "loss": 11.7063, "step": 3572 }, { "epoch": 0.07479276563677469, "grad_norm": 0.24465659260749817, "learning_rate": 0.00019969843139087468, "loss": 11.6817, "step": 3573 }, { "epoch": 0.07481369840073683, "grad_norm": 0.20117831230163574, "learning_rate": 0.000199698261219045, "loss": 11.6767, "step": 3574 }, { "epoch": 0.07483463116469899, "grad_norm": 0.34075555205345154, "learning_rate": 0.00019969809099928837, "loss": 11.6885, "step": 3575 }, { "epoch": 0.07485556392866113, "grad_norm": 0.18191497027873993, "learning_rate": 0.0001996979207316049, "loss": 11.6847, "step": 3576 }, { "epoch": 0.07487649669262329, "grad_norm": 0.25940150022506714, "learning_rate": 0.0001996977504159946, "loss": 11.6948, "step": 3577 }, { "epoch": 0.07489742945658545, "grad_norm": 0.23477879166603088, "learning_rate": 0.0001996975800524576, "loss": 11.6938, "step": 3578 }, { "epoch": 0.0749183622205476, "grad_norm": 0.31466421484947205, "learning_rate": 0.000199697409640994, "loss": 11.6824, "step": 3579 }, { "epoch": 0.07493929498450976, "grad_norm": 0.20251250267028809, "learning_rate": 0.00019969723918160384, "loss": 11.6856, "step": 3580 }, { "epoch": 0.07496022774847191, "grad_norm": 0.21364638209342957, "learning_rate": 0.0001996970686742872, "loss": 11.6728, "step": 3581 }, { "epoch": 0.07498116051243406, "grad_norm": 0.18909424543380737, "learning_rate": 0.00019969689811904422, "loss": 11.6583, "step": 3582 }, { "epoch": 0.07500209327639622, "grad_norm": 0.1895008683204651, "learning_rate": 0.00019969672751587493, "loss": 11.6844, "step": 3583 }, { "epoch": 0.07502302604035836, "grad_norm": 0.22773222625255585, "learning_rate": 0.0001996965568647794, "loss": 11.6883, "step": 3584 }, { "epoch": 0.07504395880432052, "grad_norm": 0.19576764106750488, "learning_rate": 0.00019969638616575778, "loss": 11.6781, "step": 3585 }, { "epoch": 0.07506489156828268, "grad_norm": 0.22434988617897034, "learning_rate": 0.0001996962154188101, "loss": 11.6926, "step": 3586 }, { "epoch": 0.07508582433224482, "grad_norm": 0.23762013018131256, "learning_rate": 0.00019969604462393643, "loss": 11.6858, "step": 3587 }, { "epoch": 0.07510675709620698, "grad_norm": 0.2341863363981247, "learning_rate": 0.0001996958737811369, "loss": 11.6889, "step": 3588 }, { "epoch": 0.07512768986016914, "grad_norm": 0.24461494386196136, "learning_rate": 0.00019969570289041154, "loss": 11.7014, "step": 3589 }, { "epoch": 0.07514862262413129, "grad_norm": 0.21808314323425293, "learning_rate": 0.00019969553195176048, "loss": 11.6946, "step": 3590 }, { "epoch": 0.07516955538809345, "grad_norm": 0.3402266800403595, "learning_rate": 0.00019969536096518375, "loss": 11.6775, "step": 3591 }, { "epoch": 0.0751904881520556, "grad_norm": 0.2101178616285324, "learning_rate": 0.00019969518993068148, "loss": 11.6786, "step": 3592 }, { "epoch": 0.07521142091601775, "grad_norm": 0.212107852101326, "learning_rate": 0.00019969501884825372, "loss": 11.6842, "step": 3593 }, { "epoch": 0.07523235367997991, "grad_norm": 0.20146189630031586, "learning_rate": 0.00019969484771790059, "loss": 11.6804, "step": 3594 }, { "epoch": 0.07525328644394205, "grad_norm": 0.22359085083007812, "learning_rate": 0.00019969467653962214, "loss": 11.7011, "step": 3595 }, { "epoch": 0.07527421920790421, "grad_norm": 0.2141193300485611, "learning_rate": 0.00019969450531341846, "loss": 11.6643, "step": 3596 }, { "epoch": 0.07529515197186637, "grad_norm": 0.22416813671588898, "learning_rate": 0.00019969433403928963, "loss": 11.6935, "step": 3597 }, { "epoch": 0.07531608473582851, "grad_norm": 0.22534339129924774, "learning_rate": 0.00019969416271723572, "loss": 11.6893, "step": 3598 }, { "epoch": 0.07533701749979067, "grad_norm": 0.22097215056419373, "learning_rate": 0.00019969399134725686, "loss": 11.6902, "step": 3599 }, { "epoch": 0.07535795026375283, "grad_norm": 0.229040265083313, "learning_rate": 0.00019969381992935306, "loss": 11.7042, "step": 3600 }, { "epoch": 0.07537888302771498, "grad_norm": 0.2422393560409546, "learning_rate": 0.0001996936484635245, "loss": 11.6842, "step": 3601 }, { "epoch": 0.07539981579167714, "grad_norm": 0.23500920832157135, "learning_rate": 0.00019969347694977118, "loss": 11.6807, "step": 3602 }, { "epoch": 0.07542074855563928, "grad_norm": 0.24315713346004486, "learning_rate": 0.0001996933053880932, "loss": 11.6911, "step": 3603 }, { "epoch": 0.07544168131960144, "grad_norm": 0.21414552628993988, "learning_rate": 0.00019969313377849065, "loss": 11.6799, "step": 3604 }, { "epoch": 0.0754626140835636, "grad_norm": 0.20402765274047852, "learning_rate": 0.0001996929621209636, "loss": 11.6956, "step": 3605 }, { "epoch": 0.07548354684752574, "grad_norm": 0.2254820466041565, "learning_rate": 0.0001996927904155122, "loss": 11.6895, "step": 3606 }, { "epoch": 0.0755044796114879, "grad_norm": 0.20557983219623566, "learning_rate": 0.0001996926186621364, "loss": 11.6906, "step": 3607 }, { "epoch": 0.07552541237545006, "grad_norm": 0.21710070967674255, "learning_rate": 0.0001996924468608364, "loss": 11.6666, "step": 3608 }, { "epoch": 0.0755463451394122, "grad_norm": 0.29121434688568115, "learning_rate": 0.00019969227501161226, "loss": 11.6898, "step": 3609 }, { "epoch": 0.07556727790337436, "grad_norm": 0.2065453827381134, "learning_rate": 0.00019969210311446403, "loss": 11.6901, "step": 3610 }, { "epoch": 0.07558821066733651, "grad_norm": 0.18777534365653992, "learning_rate": 0.0001996919311693918, "loss": 11.6762, "step": 3611 }, { "epoch": 0.07560914343129867, "grad_norm": 0.20266884565353394, "learning_rate": 0.00019969175917639565, "loss": 11.6751, "step": 3612 }, { "epoch": 0.07563007619526083, "grad_norm": 0.19248051941394806, "learning_rate": 0.00019969158713547568, "loss": 11.6909, "step": 3613 }, { "epoch": 0.07565100895922297, "grad_norm": 0.21229170262813568, "learning_rate": 0.000199691415046632, "loss": 11.6866, "step": 3614 }, { "epoch": 0.07567194172318513, "grad_norm": 0.22407166659832, "learning_rate": 0.00019969124290986464, "loss": 11.6664, "step": 3615 }, { "epoch": 0.07569287448714729, "grad_norm": 0.21647049486637115, "learning_rate": 0.0001996910707251737, "loss": 11.6884, "step": 3616 }, { "epoch": 0.07571380725110943, "grad_norm": 0.18874354660511017, "learning_rate": 0.00019969089849255925, "loss": 11.6682, "step": 3617 }, { "epoch": 0.07573474001507159, "grad_norm": 0.18642978370189667, "learning_rate": 0.00019969072621202138, "loss": 11.6857, "step": 3618 }, { "epoch": 0.07575567277903375, "grad_norm": 0.2339247465133667, "learning_rate": 0.00019969055388356023, "loss": 11.6973, "step": 3619 }, { "epoch": 0.0757766055429959, "grad_norm": 0.27921900153160095, "learning_rate": 0.0001996903815071758, "loss": 11.6879, "step": 3620 }, { "epoch": 0.07579753830695805, "grad_norm": 0.22025929391384125, "learning_rate": 0.0001996902090828682, "loss": 11.6842, "step": 3621 }, { "epoch": 0.0758184710709202, "grad_norm": 0.24478165805339813, "learning_rate": 0.00019969003661063752, "loss": 11.682, "step": 3622 }, { "epoch": 0.07583940383488236, "grad_norm": 0.19785469770431519, "learning_rate": 0.00019968986409048384, "loss": 11.6949, "step": 3623 }, { "epoch": 0.07586033659884452, "grad_norm": 0.19819892942905426, "learning_rate": 0.00019968969152240725, "loss": 11.6999, "step": 3624 }, { "epoch": 0.07588126936280666, "grad_norm": 0.18645915389060974, "learning_rate": 0.00019968951890640782, "loss": 11.6924, "step": 3625 }, { "epoch": 0.07590220212676882, "grad_norm": 0.19158945977687836, "learning_rate": 0.00019968934624248568, "loss": 11.6774, "step": 3626 }, { "epoch": 0.07592313489073098, "grad_norm": 0.24755260348320007, "learning_rate": 0.00019968917353064083, "loss": 11.6972, "step": 3627 }, { "epoch": 0.07594406765469312, "grad_norm": 0.17777623236179352, "learning_rate": 0.00019968900077087343, "loss": 11.7011, "step": 3628 }, { "epoch": 0.07596500041865528, "grad_norm": 0.23685140907764435, "learning_rate": 0.00019968882796318348, "loss": 11.6947, "step": 3629 }, { "epoch": 0.07598593318261743, "grad_norm": 0.20931053161621094, "learning_rate": 0.00019968865510757117, "loss": 11.6839, "step": 3630 }, { "epoch": 0.07600686594657959, "grad_norm": 0.19587242603302002, "learning_rate": 0.0001996884822040365, "loss": 11.6842, "step": 3631 }, { "epoch": 0.07602779871054174, "grad_norm": 0.17811670899391174, "learning_rate": 0.00019968830925257955, "loss": 11.6832, "step": 3632 }, { "epoch": 0.07604873147450389, "grad_norm": 0.192644864320755, "learning_rate": 0.00019968813625320046, "loss": 11.6779, "step": 3633 }, { "epoch": 0.07606966423846605, "grad_norm": 0.22482886910438538, "learning_rate": 0.00019968796320589928, "loss": 11.6878, "step": 3634 }, { "epoch": 0.0760905970024282, "grad_norm": 0.19169530272483826, "learning_rate": 0.00019968779011067612, "loss": 11.6714, "step": 3635 }, { "epoch": 0.07611152976639035, "grad_norm": 0.2352064549922943, "learning_rate": 0.00019968761696753103, "loss": 11.6797, "step": 3636 }, { "epoch": 0.07613246253035251, "grad_norm": 0.1946367770433426, "learning_rate": 0.00019968744377646408, "loss": 11.6736, "step": 3637 }, { "epoch": 0.07615339529431465, "grad_norm": 0.2139919400215149, "learning_rate": 0.0001996872705374754, "loss": 11.6837, "step": 3638 }, { "epoch": 0.07617432805827681, "grad_norm": 0.2870541214942932, "learning_rate": 0.00019968709725056504, "loss": 11.6842, "step": 3639 }, { "epoch": 0.07619526082223897, "grad_norm": 0.1989719122648239, "learning_rate": 0.00019968692391573312, "loss": 11.6821, "step": 3640 }, { "epoch": 0.07621619358620112, "grad_norm": 0.1706673502922058, "learning_rate": 0.00019968675053297968, "loss": 11.6865, "step": 3641 }, { "epoch": 0.07623712635016328, "grad_norm": 0.21842454373836517, "learning_rate": 0.0001996865771023048, "loss": 11.6651, "step": 3642 }, { "epoch": 0.07625805911412543, "grad_norm": 0.21066907048225403, "learning_rate": 0.00019968640362370862, "loss": 11.6845, "step": 3643 }, { "epoch": 0.07627899187808758, "grad_norm": 0.19327428936958313, "learning_rate": 0.00019968623009719116, "loss": 11.6932, "step": 3644 }, { "epoch": 0.07629992464204974, "grad_norm": 0.2159273624420166, "learning_rate": 0.00019968605652275257, "loss": 11.6732, "step": 3645 }, { "epoch": 0.0763208574060119, "grad_norm": 0.18519364297389984, "learning_rate": 0.00019968588290039286, "loss": 11.6892, "step": 3646 }, { "epoch": 0.07634179016997404, "grad_norm": 0.20918111503124237, "learning_rate": 0.00019968570923011215, "loss": 11.6769, "step": 3647 }, { "epoch": 0.0763627229339362, "grad_norm": 0.23385314643383026, "learning_rate": 0.00019968553551191054, "loss": 11.6982, "step": 3648 }, { "epoch": 0.07638365569789834, "grad_norm": 0.25353240966796875, "learning_rate": 0.00019968536174578807, "loss": 11.6967, "step": 3649 }, { "epoch": 0.0764045884618605, "grad_norm": 0.34989577531814575, "learning_rate": 0.00019968518793174486, "loss": 11.683, "step": 3650 }, { "epoch": 0.07642552122582266, "grad_norm": 0.32644781470298767, "learning_rate": 0.000199685014069781, "loss": 11.701, "step": 3651 }, { "epoch": 0.07644645398978481, "grad_norm": 0.1767832636833191, "learning_rate": 0.00019968484015989654, "loss": 11.6834, "step": 3652 }, { "epoch": 0.07646738675374697, "grad_norm": 0.22429026663303375, "learning_rate": 0.0001996846662020916, "loss": 11.6864, "step": 3653 }, { "epoch": 0.07648831951770912, "grad_norm": 0.19902470707893372, "learning_rate": 0.00019968449219636623, "loss": 11.6893, "step": 3654 }, { "epoch": 0.07650925228167127, "grad_norm": 0.19800469279289246, "learning_rate": 0.0001996843181427205, "loss": 11.6673, "step": 3655 }, { "epoch": 0.07653018504563343, "grad_norm": 0.21168223023414612, "learning_rate": 0.00019968414404115454, "loss": 11.6887, "step": 3656 }, { "epoch": 0.07655111780959557, "grad_norm": 0.20996101200580597, "learning_rate": 0.00019968396989166842, "loss": 11.6784, "step": 3657 }, { "epoch": 0.07657205057355773, "grad_norm": 0.23619389533996582, "learning_rate": 0.00019968379569426222, "loss": 11.6732, "step": 3658 }, { "epoch": 0.07659298333751989, "grad_norm": 0.2637130320072174, "learning_rate": 0.00019968362144893602, "loss": 11.6896, "step": 3659 }, { "epoch": 0.07661391610148204, "grad_norm": 0.28702712059020996, "learning_rate": 0.0001996834471556899, "loss": 11.708, "step": 3660 }, { "epoch": 0.0766348488654442, "grad_norm": 0.2786884009838104, "learning_rate": 0.00019968327281452393, "loss": 11.6938, "step": 3661 }, { "epoch": 0.07665578162940635, "grad_norm": 0.20730625092983246, "learning_rate": 0.00019968309842543827, "loss": 11.6765, "step": 3662 }, { "epoch": 0.0766767143933685, "grad_norm": 0.1788245141506195, "learning_rate": 0.0001996829239884329, "loss": 11.6876, "step": 3663 }, { "epoch": 0.07669764715733066, "grad_norm": 0.2086619734764099, "learning_rate": 0.00019968274950350797, "loss": 11.6831, "step": 3664 }, { "epoch": 0.0767185799212928, "grad_norm": 0.24592505395412445, "learning_rate": 0.00019968257497066355, "loss": 11.6745, "step": 3665 }, { "epoch": 0.07673951268525496, "grad_norm": 0.19416595995426178, "learning_rate": 0.0001996824003898997, "loss": 11.6747, "step": 3666 }, { "epoch": 0.07676044544921712, "grad_norm": 0.21102838218212128, "learning_rate": 0.00019968222576121651, "loss": 11.6969, "step": 3667 }, { "epoch": 0.07678137821317926, "grad_norm": 0.2422448992729187, "learning_rate": 0.0001996820510846141, "loss": 11.6901, "step": 3668 }, { "epoch": 0.07680231097714142, "grad_norm": 0.22104611992835999, "learning_rate": 0.00019968187636009251, "loss": 11.687, "step": 3669 }, { "epoch": 0.07682324374110358, "grad_norm": 0.19736993312835693, "learning_rate": 0.00019968170158765188, "loss": 11.6928, "step": 3670 }, { "epoch": 0.07684417650506573, "grad_norm": 0.2355939745903015, "learning_rate": 0.00019968152676729225, "loss": 11.6926, "step": 3671 }, { "epoch": 0.07686510926902788, "grad_norm": 0.2903113067150116, "learning_rate": 0.00019968135189901367, "loss": 11.6867, "step": 3672 }, { "epoch": 0.07688604203299004, "grad_norm": 0.19973617792129517, "learning_rate": 0.00019968117698281632, "loss": 11.6915, "step": 3673 }, { "epoch": 0.07690697479695219, "grad_norm": 0.18100494146347046, "learning_rate": 0.0001996810020187002, "loss": 11.6733, "step": 3674 }, { "epoch": 0.07692790756091435, "grad_norm": 0.20892317593097687, "learning_rate": 0.00019968082700666542, "loss": 11.6763, "step": 3675 }, { "epoch": 0.07694884032487649, "grad_norm": 0.18457546830177307, "learning_rate": 0.0001996806519467121, "loss": 11.7016, "step": 3676 }, { "epoch": 0.07696977308883865, "grad_norm": 0.24418596923351288, "learning_rate": 0.0001996804768388403, "loss": 11.7016, "step": 3677 }, { "epoch": 0.07699070585280081, "grad_norm": 0.21117500960826874, "learning_rate": 0.00019968030168305003, "loss": 11.6852, "step": 3678 }, { "epoch": 0.07701163861676295, "grad_norm": 0.19062744081020355, "learning_rate": 0.0001996801264793415, "loss": 11.6901, "step": 3679 }, { "epoch": 0.07703257138072511, "grad_norm": 0.2982674539089203, "learning_rate": 0.00019967995122771473, "loss": 11.6806, "step": 3680 }, { "epoch": 0.07705350414468727, "grad_norm": 0.2362060546875, "learning_rate": 0.00019967977592816978, "loss": 11.6703, "step": 3681 }, { "epoch": 0.07707443690864942, "grad_norm": 0.2414025515317917, "learning_rate": 0.00019967960058070677, "loss": 11.686, "step": 3682 }, { "epoch": 0.07709536967261157, "grad_norm": 0.21353185176849365, "learning_rate": 0.00019967942518532577, "loss": 11.6678, "step": 3683 }, { "epoch": 0.07711630243657372, "grad_norm": 0.22872644662857056, "learning_rate": 0.00019967924974202691, "loss": 11.6721, "step": 3684 }, { "epoch": 0.07713723520053588, "grad_norm": 0.1974843442440033, "learning_rate": 0.00019967907425081022, "loss": 11.6798, "step": 3685 }, { "epoch": 0.07715816796449804, "grad_norm": 0.22091245651245117, "learning_rate": 0.00019967889871167583, "loss": 11.6723, "step": 3686 }, { "epoch": 0.07717910072846018, "grad_norm": 0.3213334083557129, "learning_rate": 0.00019967872312462375, "loss": 11.6804, "step": 3687 }, { "epoch": 0.07720003349242234, "grad_norm": 0.2286922037601471, "learning_rate": 0.00019967854748965413, "loss": 11.6796, "step": 3688 }, { "epoch": 0.0772209662563845, "grad_norm": 0.18248245120048523, "learning_rate": 0.00019967837180676702, "loss": 11.6934, "step": 3689 }, { "epoch": 0.07724189902034664, "grad_norm": 0.21809475123882294, "learning_rate": 0.00019967819607596255, "loss": 11.6824, "step": 3690 }, { "epoch": 0.0772628317843088, "grad_norm": 0.18885786831378937, "learning_rate": 0.00019967802029724076, "loss": 11.6824, "step": 3691 }, { "epoch": 0.07728376454827095, "grad_norm": 0.19088132679462433, "learning_rate": 0.00019967784447060173, "loss": 11.6825, "step": 3692 }, { "epoch": 0.0773046973122331, "grad_norm": 0.2733035981655121, "learning_rate": 0.00019967766859604557, "loss": 11.6876, "step": 3693 }, { "epoch": 0.07732563007619526, "grad_norm": 0.17722053825855255, "learning_rate": 0.00019967749267357237, "loss": 11.6857, "step": 3694 }, { "epoch": 0.07734656284015741, "grad_norm": 0.25338977575302124, "learning_rate": 0.0001996773167031822, "loss": 11.6802, "step": 3695 }, { "epoch": 0.07736749560411957, "grad_norm": 0.167621910572052, "learning_rate": 0.00019967714068487512, "loss": 11.6803, "step": 3696 }, { "epoch": 0.07738842836808173, "grad_norm": 0.22782449424266815, "learning_rate": 0.0001996769646186513, "loss": 11.6722, "step": 3697 }, { "epoch": 0.07740936113204387, "grad_norm": 0.24737468361854553, "learning_rate": 0.0001996767885045107, "loss": 11.6812, "step": 3698 }, { "epoch": 0.07743029389600603, "grad_norm": 0.22692160308361053, "learning_rate": 0.00019967661234245352, "loss": 11.686, "step": 3699 }, { "epoch": 0.07745122665996818, "grad_norm": 0.3585287034511566, "learning_rate": 0.00019967643613247974, "loss": 11.678, "step": 3700 }, { "epoch": 0.07747215942393033, "grad_norm": 0.20679505169391632, "learning_rate": 0.00019967625987458954, "loss": 11.6946, "step": 3701 }, { "epoch": 0.07749309218789249, "grad_norm": 0.19862346351146698, "learning_rate": 0.00019967608356878297, "loss": 11.6726, "step": 3702 }, { "epoch": 0.07751402495185464, "grad_norm": 0.26638442277908325, "learning_rate": 0.0001996759072150601, "loss": 11.6849, "step": 3703 }, { "epoch": 0.0775349577158168, "grad_norm": 0.22838586568832397, "learning_rate": 0.00019967573081342103, "loss": 11.6829, "step": 3704 }, { "epoch": 0.07755589047977896, "grad_norm": 0.2236328423023224, "learning_rate": 0.0001996755543638658, "loss": 11.6803, "step": 3705 }, { "epoch": 0.0775768232437411, "grad_norm": 0.25718632340431213, "learning_rate": 0.00019967537786639458, "loss": 11.6817, "step": 3706 }, { "epoch": 0.07759775600770326, "grad_norm": 0.20424656569957733, "learning_rate": 0.00019967520132100738, "loss": 11.6911, "step": 3707 }, { "epoch": 0.07761868877166542, "grad_norm": 0.22972016036510468, "learning_rate": 0.00019967502472770433, "loss": 11.6872, "step": 3708 }, { "epoch": 0.07763962153562756, "grad_norm": 0.20190592110157013, "learning_rate": 0.00019967484808648548, "loss": 11.6924, "step": 3709 }, { "epoch": 0.07766055429958972, "grad_norm": 0.25534263253211975, "learning_rate": 0.00019967467139735094, "loss": 11.6954, "step": 3710 }, { "epoch": 0.07768148706355187, "grad_norm": 0.21555879712104797, "learning_rate": 0.0001996744946603008, "loss": 11.68, "step": 3711 }, { "epoch": 0.07770241982751402, "grad_norm": 0.24061550199985504, "learning_rate": 0.00019967431787533513, "loss": 11.6949, "step": 3712 }, { "epoch": 0.07772335259147618, "grad_norm": 0.21000991761684418, "learning_rate": 0.000199674141042454, "loss": 11.6729, "step": 3713 }, { "epoch": 0.07774428535543833, "grad_norm": 0.22086331248283386, "learning_rate": 0.00019967396416165753, "loss": 11.6957, "step": 3714 }, { "epoch": 0.07776521811940049, "grad_norm": 0.25522616505622864, "learning_rate": 0.00019967378723294578, "loss": 11.6961, "step": 3715 }, { "epoch": 0.07778615088336265, "grad_norm": 0.1993274837732315, "learning_rate": 0.00019967361025631885, "loss": 11.6879, "step": 3716 }, { "epoch": 0.07780708364732479, "grad_norm": 0.21720896661281586, "learning_rate": 0.0001996734332317768, "loss": 11.6903, "step": 3717 }, { "epoch": 0.07782801641128695, "grad_norm": 0.2822534441947937, "learning_rate": 0.00019967325615931976, "loss": 11.6879, "step": 3718 }, { "epoch": 0.0778489491752491, "grad_norm": 0.20485582947731018, "learning_rate": 0.0001996730790389478, "loss": 11.6931, "step": 3719 }, { "epoch": 0.07786988193921125, "grad_norm": 0.25610703229904175, "learning_rate": 0.00019967290187066094, "loss": 11.6871, "step": 3720 }, { "epoch": 0.07789081470317341, "grad_norm": 0.1896510273218155, "learning_rate": 0.00019967272465445937, "loss": 11.6864, "step": 3721 }, { "epoch": 0.07791174746713556, "grad_norm": 0.22441422939300537, "learning_rate": 0.0001996725473903431, "loss": 11.6642, "step": 3722 }, { "epoch": 0.07793268023109771, "grad_norm": 0.21734198927879333, "learning_rate": 0.00019967237007831223, "loss": 11.7008, "step": 3723 }, { "epoch": 0.07795361299505987, "grad_norm": 0.2294507473707199, "learning_rate": 0.00019967219271836687, "loss": 11.6786, "step": 3724 }, { "epoch": 0.07797454575902202, "grad_norm": 0.18516749143600464, "learning_rate": 0.0001996720153105071, "loss": 11.6869, "step": 3725 }, { "epoch": 0.07799547852298418, "grad_norm": 0.18767283856868744, "learning_rate": 0.00019967183785473295, "loss": 11.6833, "step": 3726 }, { "epoch": 0.07801641128694632, "grad_norm": 0.19611519575119019, "learning_rate": 0.00019967166035104458, "loss": 11.6666, "step": 3727 }, { "epoch": 0.07803734405090848, "grad_norm": 0.216780424118042, "learning_rate": 0.00019967148279944206, "loss": 11.6964, "step": 3728 }, { "epoch": 0.07805827681487064, "grad_norm": 0.2453322559595108, "learning_rate": 0.00019967130519992545, "loss": 11.6847, "step": 3729 }, { "epoch": 0.07807920957883278, "grad_norm": 0.314677894115448, "learning_rate": 0.00019967112755249483, "loss": 11.7015, "step": 3730 }, { "epoch": 0.07810014234279494, "grad_norm": 0.2203620821237564, "learning_rate": 0.00019967094985715034, "loss": 11.6728, "step": 3731 }, { "epoch": 0.0781210751067571, "grad_norm": 0.1935720294713974, "learning_rate": 0.000199670772113892, "loss": 11.6732, "step": 3732 }, { "epoch": 0.07814200787071925, "grad_norm": 0.23547904193401337, "learning_rate": 0.0001996705943227199, "loss": 11.6894, "step": 3733 }, { "epoch": 0.0781629406346814, "grad_norm": 0.16982926428318024, "learning_rate": 0.0001996704164836342, "loss": 11.6772, "step": 3734 }, { "epoch": 0.07818387339864356, "grad_norm": 0.2780240774154663, "learning_rate": 0.00019967023859663487, "loss": 11.6731, "step": 3735 }, { "epoch": 0.07820480616260571, "grad_norm": 0.2121589332818985, "learning_rate": 0.0001996700606617221, "loss": 11.6834, "step": 3736 }, { "epoch": 0.07822573892656787, "grad_norm": 0.24014274775981903, "learning_rate": 0.00019966988267889596, "loss": 11.689, "step": 3737 }, { "epoch": 0.07824667169053001, "grad_norm": 0.19720496237277985, "learning_rate": 0.00019966970464815648, "loss": 11.6902, "step": 3738 }, { "epoch": 0.07826760445449217, "grad_norm": 0.22052830457687378, "learning_rate": 0.00019966952656950374, "loss": 11.7051, "step": 3739 }, { "epoch": 0.07828853721845433, "grad_norm": 0.22953754663467407, "learning_rate": 0.0001996693484429379, "loss": 11.7185, "step": 3740 }, { "epoch": 0.07830946998241647, "grad_norm": 0.22684557735919952, "learning_rate": 0.000199669170268459, "loss": 11.6931, "step": 3741 }, { "epoch": 0.07833040274637863, "grad_norm": 0.36936500668525696, "learning_rate": 0.00019966899204606713, "loss": 11.6947, "step": 3742 }, { "epoch": 0.07835133551034079, "grad_norm": 0.23313795030117035, "learning_rate": 0.0001996688137757624, "loss": 11.6771, "step": 3743 }, { "epoch": 0.07837226827430294, "grad_norm": 0.1660475730895996, "learning_rate": 0.00019966863545754483, "loss": 11.6606, "step": 3744 }, { "epoch": 0.0783932010382651, "grad_norm": 0.2761586606502533, "learning_rate": 0.0001996684570914146, "loss": 11.6815, "step": 3745 }, { "epoch": 0.07841413380222724, "grad_norm": 0.2090524286031723, "learning_rate": 0.0001996682786773717, "loss": 11.6859, "step": 3746 }, { "epoch": 0.0784350665661894, "grad_norm": 0.24984511733055115, "learning_rate": 0.00019966810021541627, "loss": 11.686, "step": 3747 }, { "epoch": 0.07845599933015156, "grad_norm": 0.24963022768497467, "learning_rate": 0.00019966792170554842, "loss": 11.6771, "step": 3748 }, { "epoch": 0.0784769320941137, "grad_norm": 0.46363112330436707, "learning_rate": 0.00019966774314776817, "loss": 11.721, "step": 3749 }, { "epoch": 0.07849786485807586, "grad_norm": 0.24367496371269226, "learning_rate": 0.00019966756454207568, "loss": 11.6937, "step": 3750 }, { "epoch": 0.07851879762203802, "grad_norm": 0.19969409704208374, "learning_rate": 0.00019966738588847094, "loss": 11.6887, "step": 3751 }, { "epoch": 0.07853973038600016, "grad_norm": 0.19279822707176208, "learning_rate": 0.00019966720718695411, "loss": 11.685, "step": 3752 }, { "epoch": 0.07856066314996232, "grad_norm": 0.19059550762176514, "learning_rate": 0.00019966702843752528, "loss": 11.6872, "step": 3753 }, { "epoch": 0.07858159591392447, "grad_norm": 0.25740310549736023, "learning_rate": 0.00019966684964018447, "loss": 11.6929, "step": 3754 }, { "epoch": 0.07860252867788663, "grad_norm": 0.21246019005775452, "learning_rate": 0.00019966667079493182, "loss": 11.6985, "step": 3755 }, { "epoch": 0.07862346144184879, "grad_norm": 0.20077846944332123, "learning_rate": 0.00019966649190176743, "loss": 11.6871, "step": 3756 }, { "epoch": 0.07864439420581093, "grad_norm": 0.2322218418121338, "learning_rate": 0.00019966631296069136, "loss": 11.6855, "step": 3757 }, { "epoch": 0.07866532696977309, "grad_norm": 0.21866856515407562, "learning_rate": 0.00019966613397170367, "loss": 11.6662, "step": 3758 }, { "epoch": 0.07868625973373525, "grad_norm": 0.20979835093021393, "learning_rate": 0.00019966595493480446, "loss": 11.6799, "step": 3759 }, { "epoch": 0.07870719249769739, "grad_norm": 0.19299964606761932, "learning_rate": 0.00019966577584999386, "loss": 11.6777, "step": 3760 }, { "epoch": 0.07872812526165955, "grad_norm": 0.2563614249229431, "learning_rate": 0.00019966559671727191, "loss": 11.7038, "step": 3761 }, { "epoch": 0.07874905802562171, "grad_norm": 0.258829802274704, "learning_rate": 0.00019966541753663872, "loss": 11.7005, "step": 3762 }, { "epoch": 0.07876999078958385, "grad_norm": 0.19658739864826202, "learning_rate": 0.00019966523830809436, "loss": 11.6861, "step": 3763 }, { "epoch": 0.07879092355354601, "grad_norm": 0.19572432339191437, "learning_rate": 0.00019966505903163895, "loss": 11.6916, "step": 3764 }, { "epoch": 0.07881185631750816, "grad_norm": 0.21028514206409454, "learning_rate": 0.00019966487970727253, "loss": 11.6945, "step": 3765 }, { "epoch": 0.07883278908147032, "grad_norm": 0.23561309278011322, "learning_rate": 0.0001996647003349952, "loss": 11.6919, "step": 3766 }, { "epoch": 0.07885372184543248, "grad_norm": 0.20758765935897827, "learning_rate": 0.00019966452091480704, "loss": 11.6798, "step": 3767 }, { "epoch": 0.07887465460939462, "grad_norm": 0.19488102197647095, "learning_rate": 0.00019966434144670812, "loss": 11.6826, "step": 3768 }, { "epoch": 0.07889558737335678, "grad_norm": 0.19896551966667175, "learning_rate": 0.00019966416193069862, "loss": 11.673, "step": 3769 }, { "epoch": 0.07891652013731894, "grad_norm": 0.2303493171930313, "learning_rate": 0.00019966398236677852, "loss": 11.7004, "step": 3770 }, { "epoch": 0.07893745290128108, "grad_norm": 0.20386344194412231, "learning_rate": 0.00019966380275494795, "loss": 11.6956, "step": 3771 }, { "epoch": 0.07895838566524324, "grad_norm": 0.16300229728221893, "learning_rate": 0.000199663623095207, "loss": 11.6709, "step": 3772 }, { "epoch": 0.07897931842920539, "grad_norm": 0.24065758287906647, "learning_rate": 0.00019966344338755577, "loss": 11.6774, "step": 3773 }, { "epoch": 0.07900025119316754, "grad_norm": 0.21141381561756134, "learning_rate": 0.0001996632636319943, "loss": 11.6901, "step": 3774 }, { "epoch": 0.0790211839571297, "grad_norm": 0.16616316139698029, "learning_rate": 0.0001996630838285227, "loss": 11.6936, "step": 3775 }, { "epoch": 0.07904211672109185, "grad_norm": 0.2781158685684204, "learning_rate": 0.00019966290397714108, "loss": 11.6779, "step": 3776 }, { "epoch": 0.07906304948505401, "grad_norm": 0.23224636912345886, "learning_rate": 0.00019966272407784946, "loss": 11.6669, "step": 3777 }, { "epoch": 0.07908398224901617, "grad_norm": 0.19872620701789856, "learning_rate": 0.000199662544130648, "loss": 11.6773, "step": 3778 }, { "epoch": 0.07910491501297831, "grad_norm": 0.246055006980896, "learning_rate": 0.00019966236413553677, "loss": 11.6934, "step": 3779 }, { "epoch": 0.07912584777694047, "grad_norm": 0.21325235068798065, "learning_rate": 0.00019966218409251584, "loss": 11.6921, "step": 3780 }, { "epoch": 0.07914678054090261, "grad_norm": 0.18753302097320557, "learning_rate": 0.0001996620040015853, "loss": 11.689, "step": 3781 }, { "epoch": 0.07916771330486477, "grad_norm": 0.22755591571331024, "learning_rate": 0.00019966182386274524, "loss": 11.6978, "step": 3782 }, { "epoch": 0.07918864606882693, "grad_norm": 0.21917074918746948, "learning_rate": 0.0001996616436759957, "loss": 11.6906, "step": 3783 }, { "epoch": 0.07920957883278908, "grad_norm": 0.23673729598522186, "learning_rate": 0.00019966146344133686, "loss": 11.692, "step": 3784 }, { "epoch": 0.07923051159675124, "grad_norm": 0.20162847638130188, "learning_rate": 0.00019966128315876874, "loss": 11.6876, "step": 3785 }, { "epoch": 0.0792514443607134, "grad_norm": 0.2085001915693283, "learning_rate": 0.00019966110282829144, "loss": 11.6871, "step": 3786 }, { "epoch": 0.07927237712467554, "grad_norm": 0.23247674107551575, "learning_rate": 0.00019966092244990506, "loss": 11.6875, "step": 3787 }, { "epoch": 0.0792933098886377, "grad_norm": 0.26118531823158264, "learning_rate": 0.00019966074202360968, "loss": 11.6976, "step": 3788 }, { "epoch": 0.07931424265259986, "grad_norm": 0.2220815122127533, "learning_rate": 0.0001996605615494054, "loss": 11.6917, "step": 3789 }, { "epoch": 0.079335175416562, "grad_norm": 0.22110509872436523, "learning_rate": 0.00019966038102729226, "loss": 11.6759, "step": 3790 }, { "epoch": 0.07935610818052416, "grad_norm": 0.21295303106307983, "learning_rate": 0.00019966020045727038, "loss": 11.6752, "step": 3791 }, { "epoch": 0.0793770409444863, "grad_norm": 0.21501637995243073, "learning_rate": 0.00019966001983933986, "loss": 11.6836, "step": 3792 }, { "epoch": 0.07939797370844846, "grad_norm": 0.29997968673706055, "learning_rate": 0.00019965983917350074, "loss": 11.6838, "step": 3793 }, { "epoch": 0.07941890647241062, "grad_norm": 0.20464932918548584, "learning_rate": 0.00019965965845975317, "loss": 11.6766, "step": 3794 }, { "epoch": 0.07943983923637277, "grad_norm": 0.18355797231197357, "learning_rate": 0.0001996594776980972, "loss": 11.6751, "step": 3795 }, { "epoch": 0.07946077200033493, "grad_norm": 0.20638194680213928, "learning_rate": 0.00019965929688853292, "loss": 11.6921, "step": 3796 }, { "epoch": 0.07948170476429708, "grad_norm": 0.23574097454547882, "learning_rate": 0.0001996591160310604, "loss": 11.696, "step": 3797 }, { "epoch": 0.07950263752825923, "grad_norm": 0.22022627294063568, "learning_rate": 0.00019965893512567976, "loss": 11.6852, "step": 3798 }, { "epoch": 0.07952357029222139, "grad_norm": 0.22237184643745422, "learning_rate": 0.00019965875417239111, "loss": 11.6887, "step": 3799 }, { "epoch": 0.07954450305618353, "grad_norm": 0.18787917494773865, "learning_rate": 0.00019965857317119444, "loss": 11.6837, "step": 3800 }, { "epoch": 0.07956543582014569, "grad_norm": 0.21905960142612457, "learning_rate": 0.00019965839212208994, "loss": 11.6782, "step": 3801 }, { "epoch": 0.07958636858410785, "grad_norm": 0.1956193447113037, "learning_rate": 0.0001996582110250776, "loss": 11.685, "step": 3802 }, { "epoch": 0.07960730134807, "grad_norm": 0.21716809272766113, "learning_rate": 0.00019965802988015764, "loss": 11.6793, "step": 3803 }, { "epoch": 0.07962823411203215, "grad_norm": 0.18521195650100708, "learning_rate": 0.00019965784868733, "loss": 11.6897, "step": 3804 }, { "epoch": 0.07964916687599431, "grad_norm": 0.20876753330230713, "learning_rate": 0.00019965766744659488, "loss": 11.6813, "step": 3805 }, { "epoch": 0.07967009963995646, "grad_norm": 0.20429132878780365, "learning_rate": 0.00019965748615795228, "loss": 11.7087, "step": 3806 }, { "epoch": 0.07969103240391862, "grad_norm": 0.23806874454021454, "learning_rate": 0.00019965730482140236, "loss": 11.6967, "step": 3807 }, { "epoch": 0.07971196516788076, "grad_norm": 0.21865446865558624, "learning_rate": 0.00019965712343694516, "loss": 11.6799, "step": 3808 }, { "epoch": 0.07973289793184292, "grad_norm": 0.19842495024204254, "learning_rate": 0.0001996569420045808, "loss": 11.6764, "step": 3809 }, { "epoch": 0.07975383069580508, "grad_norm": 0.22407597303390503, "learning_rate": 0.00019965676052430931, "loss": 11.7002, "step": 3810 }, { "epoch": 0.07977476345976722, "grad_norm": 0.23380346596240997, "learning_rate": 0.0001996565789961309, "loss": 11.6859, "step": 3811 }, { "epoch": 0.07979569622372938, "grad_norm": 0.19407452642917633, "learning_rate": 0.00019965639742004548, "loss": 11.6946, "step": 3812 }, { "epoch": 0.07981662898769154, "grad_norm": 0.26472216844558716, "learning_rate": 0.0001996562157960533, "loss": 11.6782, "step": 3813 }, { "epoch": 0.07983756175165369, "grad_norm": 0.26487255096435547, "learning_rate": 0.00019965603412415436, "loss": 11.6953, "step": 3814 }, { "epoch": 0.07985849451561584, "grad_norm": 0.20833638310432434, "learning_rate": 0.00019965585240434876, "loss": 11.6849, "step": 3815 }, { "epoch": 0.07987942727957799, "grad_norm": 0.22656367719173431, "learning_rate": 0.00019965567063663658, "loss": 11.6812, "step": 3816 }, { "epoch": 0.07990036004354015, "grad_norm": 0.19247844815254211, "learning_rate": 0.00019965548882101795, "loss": 11.6782, "step": 3817 }, { "epoch": 0.0799212928075023, "grad_norm": 0.19170795381069183, "learning_rate": 0.00019965530695749292, "loss": 11.6816, "step": 3818 }, { "epoch": 0.07994222557146445, "grad_norm": 0.23454666137695312, "learning_rate": 0.0001996551250460616, "loss": 11.6849, "step": 3819 }, { "epoch": 0.07996315833542661, "grad_norm": 0.2057657539844513, "learning_rate": 0.00019965494308672407, "loss": 11.6786, "step": 3820 }, { "epoch": 0.07998409109938877, "grad_norm": 0.23132725059986115, "learning_rate": 0.00019965476107948037, "loss": 11.6968, "step": 3821 }, { "epoch": 0.08000502386335091, "grad_norm": 0.1609712690114975, "learning_rate": 0.00019965457902433064, "loss": 11.6877, "step": 3822 }, { "epoch": 0.08002595662731307, "grad_norm": 0.19717802107334137, "learning_rate": 0.00019965439692127496, "loss": 11.6893, "step": 3823 }, { "epoch": 0.08004688939127523, "grad_norm": 0.2981474697589874, "learning_rate": 0.00019965421477031344, "loss": 11.6953, "step": 3824 }, { "epoch": 0.08006782215523738, "grad_norm": 0.173371821641922, "learning_rate": 0.00019965403257144612, "loss": 11.6857, "step": 3825 }, { "epoch": 0.08008875491919953, "grad_norm": 0.19451552629470825, "learning_rate": 0.0001996538503246731, "loss": 11.6863, "step": 3826 }, { "epoch": 0.08010968768316168, "grad_norm": 0.27451473474502563, "learning_rate": 0.00019965366802999452, "loss": 11.6831, "step": 3827 }, { "epoch": 0.08013062044712384, "grad_norm": 0.21089810132980347, "learning_rate": 0.00019965348568741037, "loss": 11.6888, "step": 3828 }, { "epoch": 0.080151553211086, "grad_norm": 0.21006733179092407, "learning_rate": 0.00019965330329692086, "loss": 11.6784, "step": 3829 }, { "epoch": 0.08017248597504814, "grad_norm": 0.21288396418094635, "learning_rate": 0.00019965312085852595, "loss": 11.687, "step": 3830 }, { "epoch": 0.0801934187390103, "grad_norm": 0.23065443336963654, "learning_rate": 0.0001996529383722258, "loss": 11.6958, "step": 3831 }, { "epoch": 0.08021435150297246, "grad_norm": 0.19945701956748962, "learning_rate": 0.0001996527558380205, "loss": 11.6967, "step": 3832 }, { "epoch": 0.0802352842669346, "grad_norm": 0.18293581902980804, "learning_rate": 0.0001996525732559101, "loss": 11.6749, "step": 3833 }, { "epoch": 0.08025621703089676, "grad_norm": 0.2290715128183365, "learning_rate": 0.00019965239062589473, "loss": 11.6822, "step": 3834 }, { "epoch": 0.0802771497948589, "grad_norm": 0.20650263130664825, "learning_rate": 0.0001996522079479745, "loss": 11.691, "step": 3835 }, { "epoch": 0.08029808255882107, "grad_norm": 0.24040189385414124, "learning_rate": 0.00019965202522214938, "loss": 11.6906, "step": 3836 }, { "epoch": 0.08031901532278322, "grad_norm": 0.27688804268836975, "learning_rate": 0.00019965184244841958, "loss": 11.6857, "step": 3837 }, { "epoch": 0.08033994808674537, "grad_norm": 0.1915939301252365, "learning_rate": 0.00019965165962678512, "loss": 11.6814, "step": 3838 }, { "epoch": 0.08036088085070753, "grad_norm": 0.2138083279132843, "learning_rate": 0.0001996514767572461, "loss": 11.6828, "step": 3839 }, { "epoch": 0.08038181361466969, "grad_norm": 0.25249189138412476, "learning_rate": 0.00019965129383980266, "loss": 11.6828, "step": 3840 }, { "epoch": 0.08040274637863183, "grad_norm": 0.265613317489624, "learning_rate": 0.00019965111087445482, "loss": 11.701, "step": 3841 }, { "epoch": 0.08042367914259399, "grad_norm": 0.19849815964698792, "learning_rate": 0.0001996509278612027, "loss": 11.6994, "step": 3842 }, { "epoch": 0.08044461190655613, "grad_norm": 0.21138522028923035, "learning_rate": 0.00019965074480004637, "loss": 11.6818, "step": 3843 }, { "epoch": 0.0804655446705183, "grad_norm": 0.20964078605175018, "learning_rate": 0.00019965056169098593, "loss": 11.687, "step": 3844 }, { "epoch": 0.08048647743448045, "grad_norm": 0.24262988567352295, "learning_rate": 0.00019965037853402148, "loss": 11.6908, "step": 3845 }, { "epoch": 0.0805074101984426, "grad_norm": 0.17778325080871582, "learning_rate": 0.0001996501953291531, "loss": 11.6855, "step": 3846 }, { "epoch": 0.08052834296240476, "grad_norm": 0.21332859992980957, "learning_rate": 0.00019965001207638086, "loss": 11.682, "step": 3847 }, { "epoch": 0.08054927572636691, "grad_norm": 0.23827685415744781, "learning_rate": 0.0001996498287757049, "loss": 11.6795, "step": 3848 }, { "epoch": 0.08057020849032906, "grad_norm": 0.1932062804698944, "learning_rate": 0.0001996496454271252, "loss": 11.6948, "step": 3849 }, { "epoch": 0.08059114125429122, "grad_norm": 0.230176642537117, "learning_rate": 0.00019964946203064198, "loss": 11.702, "step": 3850 }, { "epoch": 0.08061207401825338, "grad_norm": 0.22295141220092773, "learning_rate": 0.00019964927858625523, "loss": 11.6835, "step": 3851 }, { "epoch": 0.08063300678221552, "grad_norm": 0.22400104999542236, "learning_rate": 0.00019964909509396508, "loss": 11.6934, "step": 3852 }, { "epoch": 0.08065393954617768, "grad_norm": 0.18918201327323914, "learning_rate": 0.00019964891155377164, "loss": 11.6901, "step": 3853 }, { "epoch": 0.08067487231013983, "grad_norm": 0.23852793872356415, "learning_rate": 0.00019964872796567496, "loss": 11.6911, "step": 3854 }, { "epoch": 0.08069580507410198, "grad_norm": 0.21872903406620026, "learning_rate": 0.00019964854432967513, "loss": 11.6884, "step": 3855 }, { "epoch": 0.08071673783806414, "grad_norm": 0.20170651376247406, "learning_rate": 0.00019964836064577225, "loss": 11.6953, "step": 3856 }, { "epoch": 0.08073767060202629, "grad_norm": 0.1718340367078781, "learning_rate": 0.00019964817691396638, "loss": 11.6844, "step": 3857 }, { "epoch": 0.08075860336598845, "grad_norm": 0.20641960203647614, "learning_rate": 0.00019964799313425767, "loss": 11.6798, "step": 3858 }, { "epoch": 0.0807795361299506, "grad_norm": 0.20226064324378967, "learning_rate": 0.00019964780930664615, "loss": 11.6843, "step": 3859 }, { "epoch": 0.08080046889391275, "grad_norm": 0.19165845215320587, "learning_rate": 0.00019964762543113197, "loss": 11.6795, "step": 3860 }, { "epoch": 0.08082140165787491, "grad_norm": 0.21666592359542847, "learning_rate": 0.00019964744150771516, "loss": 11.6777, "step": 3861 }, { "epoch": 0.08084233442183705, "grad_norm": 0.2176249474287033, "learning_rate": 0.0001996472575363958, "loss": 11.672, "step": 3862 }, { "epoch": 0.08086326718579921, "grad_norm": 0.2230718582868576, "learning_rate": 0.00019964707351717405, "loss": 11.6948, "step": 3863 }, { "epoch": 0.08088419994976137, "grad_norm": 0.17974483966827393, "learning_rate": 0.00019964688945004993, "loss": 11.685, "step": 3864 }, { "epoch": 0.08090513271372352, "grad_norm": 0.18761447072029114, "learning_rate": 0.00019964670533502353, "loss": 11.6744, "step": 3865 }, { "epoch": 0.08092606547768567, "grad_norm": 0.274818480014801, "learning_rate": 0.000199646521172095, "loss": 11.69, "step": 3866 }, { "epoch": 0.08094699824164783, "grad_norm": 0.2497074156999588, "learning_rate": 0.00019964633696126438, "loss": 11.6899, "step": 3867 }, { "epoch": 0.08096793100560998, "grad_norm": 0.2284369319677353, "learning_rate": 0.00019964615270253176, "loss": 11.6932, "step": 3868 }, { "epoch": 0.08098886376957214, "grad_norm": 0.28408002853393555, "learning_rate": 0.00019964596839589725, "loss": 11.7053, "step": 3869 }, { "epoch": 0.08100979653353428, "grad_norm": 0.24326828122138977, "learning_rate": 0.00019964578404136095, "loss": 11.7004, "step": 3870 }, { "epoch": 0.08103072929749644, "grad_norm": 0.21179236471652985, "learning_rate": 0.00019964559963892286, "loss": 11.6783, "step": 3871 }, { "epoch": 0.0810516620614586, "grad_norm": 0.2395537942647934, "learning_rate": 0.00019964541518858317, "loss": 11.6836, "step": 3872 }, { "epoch": 0.08107259482542074, "grad_norm": 0.2439740151166916, "learning_rate": 0.00019964523069034194, "loss": 11.6894, "step": 3873 }, { "epoch": 0.0810935275893829, "grad_norm": 0.2380974292755127, "learning_rate": 0.00019964504614419925, "loss": 11.6687, "step": 3874 }, { "epoch": 0.08111446035334506, "grad_norm": 0.2309272438287735, "learning_rate": 0.00019964486155015515, "loss": 11.6847, "step": 3875 }, { "epoch": 0.0811353931173072, "grad_norm": 0.248855322599411, "learning_rate": 0.00019964467690820984, "loss": 11.6856, "step": 3876 }, { "epoch": 0.08115632588126936, "grad_norm": 0.36072251200675964, "learning_rate": 0.00019964449221836328, "loss": 11.6661, "step": 3877 }, { "epoch": 0.08117725864523152, "grad_norm": 0.25537925958633423, "learning_rate": 0.0001996443074806156, "loss": 11.694, "step": 3878 }, { "epoch": 0.08119819140919367, "grad_norm": 0.21748767793178558, "learning_rate": 0.00019964412269496697, "loss": 11.6886, "step": 3879 }, { "epoch": 0.08121912417315583, "grad_norm": 0.27731946110725403, "learning_rate": 0.00019964393786141738, "loss": 11.686, "step": 3880 }, { "epoch": 0.08124005693711797, "grad_norm": 0.25630882382392883, "learning_rate": 0.00019964375297996692, "loss": 11.6804, "step": 3881 }, { "epoch": 0.08126098970108013, "grad_norm": 0.22255024313926697, "learning_rate": 0.00019964356805061577, "loss": 11.6864, "step": 3882 }, { "epoch": 0.08128192246504229, "grad_norm": 1.652943730354309, "learning_rate": 0.00019964338307336394, "loss": 11.657, "step": 3883 }, { "epoch": 0.08130285522900443, "grad_norm": 0.22207039594650269, "learning_rate": 0.0001996431980482115, "loss": 11.6924, "step": 3884 }, { "epoch": 0.08132378799296659, "grad_norm": 0.29402047395706177, "learning_rate": 0.00019964301297515863, "loss": 11.6821, "step": 3885 }, { "epoch": 0.08134472075692875, "grad_norm": 0.2089485377073288, "learning_rate": 0.00019964282785420534, "loss": 11.6969, "step": 3886 }, { "epoch": 0.0813656535208909, "grad_norm": 0.22799170017242432, "learning_rate": 0.00019964264268535175, "loss": 11.6787, "step": 3887 }, { "epoch": 0.08138658628485305, "grad_norm": 0.22324605286121368, "learning_rate": 0.00019964245746859795, "loss": 11.6732, "step": 3888 }, { "epoch": 0.0814075190488152, "grad_norm": 0.24498949944972992, "learning_rate": 0.00019964227220394402, "loss": 11.6762, "step": 3889 }, { "epoch": 0.08142845181277736, "grad_norm": 0.23825319111347198, "learning_rate": 0.00019964208689139006, "loss": 11.6963, "step": 3890 }, { "epoch": 0.08144938457673952, "grad_norm": 0.2332179993391037, "learning_rate": 0.00019964190153093613, "loss": 11.6981, "step": 3891 }, { "epoch": 0.08147031734070166, "grad_norm": 0.25944653153419495, "learning_rate": 0.00019964171612258237, "loss": 11.6906, "step": 3892 }, { "epoch": 0.08149125010466382, "grad_norm": 0.24350403249263763, "learning_rate": 0.00019964153066632885, "loss": 11.6777, "step": 3893 }, { "epoch": 0.08151218286862598, "grad_norm": 0.2380971908569336, "learning_rate": 0.0001996413451621756, "loss": 11.6954, "step": 3894 }, { "epoch": 0.08153311563258812, "grad_norm": 0.2008945196866989, "learning_rate": 0.0001996411596101228, "loss": 11.6915, "step": 3895 }, { "epoch": 0.08155404839655028, "grad_norm": 0.21452374756336212, "learning_rate": 0.00019964097401017048, "loss": 11.6802, "step": 3896 }, { "epoch": 0.08157498116051243, "grad_norm": 0.20618881285190582, "learning_rate": 0.00019964078836231874, "loss": 11.6727, "step": 3897 }, { "epoch": 0.08159591392447459, "grad_norm": 0.23645669221878052, "learning_rate": 0.00019964060266656767, "loss": 11.6921, "step": 3898 }, { "epoch": 0.08161684668843674, "grad_norm": 0.2872469425201416, "learning_rate": 0.0001996404169229174, "loss": 11.684, "step": 3899 }, { "epoch": 0.08163777945239889, "grad_norm": 0.22484859824180603, "learning_rate": 0.00019964023113136796, "loss": 11.6904, "step": 3900 }, { "epoch": 0.08165871221636105, "grad_norm": 0.29662320017814636, "learning_rate": 0.00019964004529191947, "loss": 11.7028, "step": 3901 }, { "epoch": 0.08167964498032321, "grad_norm": 0.2513805627822876, "learning_rate": 0.00019963985940457205, "loss": 11.6857, "step": 3902 }, { "epoch": 0.08170057774428535, "grad_norm": 0.3273869454860687, "learning_rate": 0.00019963967346932568, "loss": 11.6798, "step": 3903 }, { "epoch": 0.08172151050824751, "grad_norm": 0.2123744934797287, "learning_rate": 0.00019963948748618056, "loss": 11.6814, "step": 3904 }, { "epoch": 0.08174244327220967, "grad_norm": 0.21722856163978577, "learning_rate": 0.00019963930145513675, "loss": 11.6992, "step": 3905 }, { "epoch": 0.08176337603617181, "grad_norm": 0.1818975955247879, "learning_rate": 0.00019963911537619432, "loss": 11.6807, "step": 3906 }, { "epoch": 0.08178430880013397, "grad_norm": 0.2661203444004059, "learning_rate": 0.00019963892924935336, "loss": 11.6851, "step": 3907 }, { "epoch": 0.08180524156409612, "grad_norm": 0.23128563165664673, "learning_rate": 0.000199638743074614, "loss": 11.6873, "step": 3908 }, { "epoch": 0.08182617432805828, "grad_norm": 0.23625847697257996, "learning_rate": 0.0001996385568519763, "loss": 11.6884, "step": 3909 }, { "epoch": 0.08184710709202044, "grad_norm": 0.21385590732097626, "learning_rate": 0.0001996383705814403, "loss": 11.693, "step": 3910 }, { "epoch": 0.08186803985598258, "grad_norm": 0.22929194569587708, "learning_rate": 0.00019963818426300617, "loss": 11.6865, "step": 3911 }, { "epoch": 0.08188897261994474, "grad_norm": 0.19366005063056946, "learning_rate": 0.00019963799789667397, "loss": 11.6928, "step": 3912 }, { "epoch": 0.0819099053839069, "grad_norm": 0.19130995869636536, "learning_rate": 0.00019963781148244378, "loss": 11.6926, "step": 3913 }, { "epoch": 0.08193083814786904, "grad_norm": 0.2132561355829239, "learning_rate": 0.0001996376250203157, "loss": 11.6892, "step": 3914 }, { "epoch": 0.0819517709118312, "grad_norm": 0.23186412453651428, "learning_rate": 0.00019963743851028983, "loss": 11.6903, "step": 3915 }, { "epoch": 0.08197270367579335, "grad_norm": 0.28330427408218384, "learning_rate": 0.0001996372519523662, "loss": 11.6767, "step": 3916 }, { "epoch": 0.0819936364397555, "grad_norm": 0.2502373456954956, "learning_rate": 0.000199637065346545, "loss": 11.6855, "step": 3917 }, { "epoch": 0.08201456920371766, "grad_norm": 0.21801428496837616, "learning_rate": 0.00019963687869282626, "loss": 11.675, "step": 3918 }, { "epoch": 0.08203550196767981, "grad_norm": 0.24664360284805298, "learning_rate": 0.00019963669199121004, "loss": 11.6816, "step": 3919 }, { "epoch": 0.08205643473164197, "grad_norm": 0.2694905996322632, "learning_rate": 0.00019963650524169651, "loss": 11.6921, "step": 3920 }, { "epoch": 0.08207736749560413, "grad_norm": 0.25803253054618835, "learning_rate": 0.00019963631844428567, "loss": 11.6919, "step": 3921 }, { "epoch": 0.08209830025956627, "grad_norm": 0.2438300997018814, "learning_rate": 0.0001996361315989777, "loss": 11.6869, "step": 3922 }, { "epoch": 0.08211923302352843, "grad_norm": 0.24481533467769623, "learning_rate": 0.0001996359447057726, "loss": 11.6792, "step": 3923 }, { "epoch": 0.08214016578749057, "grad_norm": 0.20662103593349457, "learning_rate": 0.00019963575776467055, "loss": 11.6813, "step": 3924 }, { "epoch": 0.08216109855145273, "grad_norm": 0.32019683718681335, "learning_rate": 0.00019963557077567155, "loss": 11.6895, "step": 3925 }, { "epoch": 0.08218203131541489, "grad_norm": 0.21832172572612762, "learning_rate": 0.00019963538373877576, "loss": 11.6751, "step": 3926 }, { "epoch": 0.08220296407937704, "grad_norm": 0.1800602227449417, "learning_rate": 0.00019963519665398325, "loss": 11.6661, "step": 3927 }, { "epoch": 0.0822238968433392, "grad_norm": 0.28827333450317383, "learning_rate": 0.00019963500952129408, "loss": 11.6927, "step": 3928 }, { "epoch": 0.08224482960730135, "grad_norm": 0.2803322374820709, "learning_rate": 0.00019963482234070838, "loss": 11.6788, "step": 3929 }, { "epoch": 0.0822657623712635, "grad_norm": 0.24061596393585205, "learning_rate": 0.00019963463511222623, "loss": 11.6919, "step": 3930 }, { "epoch": 0.08228669513522566, "grad_norm": 0.2519931197166443, "learning_rate": 0.00019963444783584772, "loss": 11.6729, "step": 3931 }, { "epoch": 0.0823076278991878, "grad_norm": 0.23751719295978546, "learning_rate": 0.0001996342605115729, "loss": 11.672, "step": 3932 }, { "epoch": 0.08232856066314996, "grad_norm": 0.2429262399673462, "learning_rate": 0.00019963407313940192, "loss": 11.6903, "step": 3933 }, { "epoch": 0.08234949342711212, "grad_norm": 0.29368236660957336, "learning_rate": 0.00019963388571933485, "loss": 11.6887, "step": 3934 }, { "epoch": 0.08237042619107426, "grad_norm": 0.27016180753707886, "learning_rate": 0.00019963369825137177, "loss": 11.6923, "step": 3935 }, { "epoch": 0.08239135895503642, "grad_norm": 0.23408155143260956, "learning_rate": 0.00019963351073551278, "loss": 11.6838, "step": 3936 }, { "epoch": 0.08241229171899858, "grad_norm": 0.2261156588792801, "learning_rate": 0.00019963332317175794, "loss": 11.6927, "step": 3937 }, { "epoch": 0.08243322448296073, "grad_norm": 0.2191910445690155, "learning_rate": 0.0001996331355601074, "loss": 11.6821, "step": 3938 }, { "epoch": 0.08245415724692289, "grad_norm": 0.21293875575065613, "learning_rate": 0.00019963294790056117, "loss": 11.712, "step": 3939 }, { "epoch": 0.08247509001088504, "grad_norm": 0.2144477814435959, "learning_rate": 0.00019963276019311943, "loss": 11.685, "step": 3940 }, { "epoch": 0.08249602277484719, "grad_norm": 0.2719860076904297, "learning_rate": 0.0001996325724377822, "loss": 11.6966, "step": 3941 }, { "epoch": 0.08251695553880935, "grad_norm": 0.1808824986219406, "learning_rate": 0.0001996323846345496, "loss": 11.6656, "step": 3942 }, { "epoch": 0.08253788830277149, "grad_norm": 0.20600856840610504, "learning_rate": 0.0001996321967834217, "loss": 11.6932, "step": 3943 }, { "epoch": 0.08255882106673365, "grad_norm": 0.21245087683200836, "learning_rate": 0.00019963200888439863, "loss": 11.676, "step": 3944 }, { "epoch": 0.08257975383069581, "grad_norm": 0.19337132573127747, "learning_rate": 0.00019963182093748046, "loss": 11.6777, "step": 3945 }, { "epoch": 0.08260068659465795, "grad_norm": 0.21957257390022278, "learning_rate": 0.00019963163294266725, "loss": 11.6664, "step": 3946 }, { "epoch": 0.08262161935862011, "grad_norm": 0.2097797393798828, "learning_rate": 0.00019963144489995913, "loss": 11.6901, "step": 3947 }, { "epoch": 0.08264255212258227, "grad_norm": 0.24050264060497284, "learning_rate": 0.0001996312568093562, "loss": 11.6824, "step": 3948 }, { "epoch": 0.08266348488654442, "grad_norm": 0.2535663843154907, "learning_rate": 0.0001996310686708585, "loss": 11.6901, "step": 3949 }, { "epoch": 0.08268441765050658, "grad_norm": 0.21300870180130005, "learning_rate": 0.00019963088048446616, "loss": 11.6835, "step": 3950 }, { "epoch": 0.08270535041446872, "grad_norm": 0.2095288783311844, "learning_rate": 0.00019963069225017926, "loss": 11.6951, "step": 3951 }, { "epoch": 0.08272628317843088, "grad_norm": 0.2641160786151886, "learning_rate": 0.0001996305039679979, "loss": 11.6906, "step": 3952 }, { "epoch": 0.08274721594239304, "grad_norm": 0.19041727483272552, "learning_rate": 0.00019963031563792214, "loss": 11.6736, "step": 3953 }, { "epoch": 0.08276814870635518, "grad_norm": 0.17836041748523712, "learning_rate": 0.0001996301272599521, "loss": 11.6852, "step": 3954 }, { "epoch": 0.08278908147031734, "grad_norm": 0.24662788212299347, "learning_rate": 0.00019962993883408786, "loss": 11.6879, "step": 3955 }, { "epoch": 0.0828100142342795, "grad_norm": 0.25455912947654724, "learning_rate": 0.0001996297503603295, "loss": 11.7024, "step": 3956 }, { "epoch": 0.08283094699824164, "grad_norm": 0.21010589599609375, "learning_rate": 0.00019962956183867714, "loss": 11.6809, "step": 3957 }, { "epoch": 0.0828518797622038, "grad_norm": 0.21861299872398376, "learning_rate": 0.00019962937326913085, "loss": 11.6812, "step": 3958 }, { "epoch": 0.08287281252616595, "grad_norm": 0.20745185017585754, "learning_rate": 0.00019962918465169072, "loss": 11.6802, "step": 3959 }, { "epoch": 0.0828937452901281, "grad_norm": 0.2286778688430786, "learning_rate": 0.00019962899598635682, "loss": 11.6871, "step": 3960 }, { "epoch": 0.08291467805409027, "grad_norm": 0.2372502237558365, "learning_rate": 0.0001996288072731293, "loss": 11.6829, "step": 3961 }, { "epoch": 0.08293561081805241, "grad_norm": 0.22296187281608582, "learning_rate": 0.0001996286185120082, "loss": 11.6816, "step": 3962 }, { "epoch": 0.08295654358201457, "grad_norm": 0.23618479073047638, "learning_rate": 0.00019962842970299365, "loss": 11.6953, "step": 3963 }, { "epoch": 0.08297747634597673, "grad_norm": 0.21607623994350433, "learning_rate": 0.0001996282408460857, "loss": 11.6848, "step": 3964 }, { "epoch": 0.08299840910993887, "grad_norm": 0.18379417061805725, "learning_rate": 0.00019962805194128445, "loss": 11.6837, "step": 3965 }, { "epoch": 0.08301934187390103, "grad_norm": 0.21712742745876312, "learning_rate": 0.00019962786298859003, "loss": 11.6763, "step": 3966 }, { "epoch": 0.08304027463786319, "grad_norm": 2.6931185722351074, "learning_rate": 0.00019962767398800244, "loss": 11.6507, "step": 3967 }, { "epoch": 0.08306120740182533, "grad_norm": 0.17322412133216858, "learning_rate": 0.00019962748493952188, "loss": 11.698, "step": 3968 }, { "epoch": 0.0830821401657875, "grad_norm": 0.22320672869682312, "learning_rate": 0.0001996272958431484, "loss": 11.686, "step": 3969 }, { "epoch": 0.08310307292974964, "grad_norm": 2.811279535293579, "learning_rate": 0.00019962710669888206, "loss": 11.6823, "step": 3970 }, { "epoch": 0.0831240056937118, "grad_norm": 0.22652944922447205, "learning_rate": 0.00019962691750672294, "loss": 11.6962, "step": 3971 }, { "epoch": 0.08314493845767396, "grad_norm": 0.23025965690612793, "learning_rate": 0.0001996267282666712, "loss": 11.6769, "step": 3972 }, { "epoch": 0.0831658712216361, "grad_norm": 0.2024911791086197, "learning_rate": 0.0001996265389787269, "loss": 11.6897, "step": 3973 }, { "epoch": 0.08318680398559826, "grad_norm": 0.2151106894016266, "learning_rate": 0.0001996263496428901, "loss": 11.6847, "step": 3974 }, { "epoch": 0.08320773674956042, "grad_norm": 0.29240137338638306, "learning_rate": 0.00019962616025916098, "loss": 11.7008, "step": 3975 }, { "epoch": 0.08322866951352256, "grad_norm": 0.2084052413702011, "learning_rate": 0.00019962597082753952, "loss": 11.6843, "step": 3976 }, { "epoch": 0.08324960227748472, "grad_norm": 0.20406553149223328, "learning_rate": 0.00019962578134802582, "loss": 11.6703, "step": 3977 }, { "epoch": 0.08327053504144687, "grad_norm": 0.2741427421569824, "learning_rate": 0.00019962559182062009, "loss": 11.6956, "step": 3978 }, { "epoch": 0.08329146780540903, "grad_norm": 0.2108343243598938, "learning_rate": 0.0001996254022453223, "loss": 11.6754, "step": 3979 }, { "epoch": 0.08331240056937118, "grad_norm": 0.2144295871257782, "learning_rate": 0.0001996252126221326, "loss": 11.6816, "step": 3980 }, { "epoch": 0.08333333333333333, "grad_norm": 0.2648153305053711, "learning_rate": 0.00019962502295105104, "loss": 11.702, "step": 3981 }, { "epoch": 0.08335426609729549, "grad_norm": 0.197517529129982, "learning_rate": 0.00019962483323207773, "loss": 11.6978, "step": 3982 }, { "epoch": 0.08337519886125765, "grad_norm": 0.21108843386173248, "learning_rate": 0.00019962464346521278, "loss": 11.6843, "step": 3983 }, { "epoch": 0.08339613162521979, "grad_norm": 0.3208363950252533, "learning_rate": 0.0001996244536504563, "loss": 11.6854, "step": 3984 }, { "epoch": 0.08341706438918195, "grad_norm": 0.24551455676555634, "learning_rate": 0.0001996242637878083, "loss": 11.6937, "step": 3985 }, { "epoch": 0.0834379971531441, "grad_norm": 0.1845862865447998, "learning_rate": 0.00019962407387726897, "loss": 11.6897, "step": 3986 }, { "epoch": 0.08345892991710625, "grad_norm": 0.23354578018188477, "learning_rate": 0.00019962388391883828, "loss": 11.692, "step": 3987 }, { "epoch": 0.08347986268106841, "grad_norm": 0.20458106696605682, "learning_rate": 0.00019962369391251646, "loss": 11.685, "step": 3988 }, { "epoch": 0.08350079544503056, "grad_norm": 0.18870210647583008, "learning_rate": 0.0001996235038583035, "loss": 11.6671, "step": 3989 }, { "epoch": 0.08352172820899272, "grad_norm": 0.21001477539539337, "learning_rate": 0.00019962331375619953, "loss": 11.686, "step": 3990 }, { "epoch": 0.08354266097295487, "grad_norm": 0.2660890817642212, "learning_rate": 0.00019962312360620463, "loss": 11.6879, "step": 3991 }, { "epoch": 0.08356359373691702, "grad_norm": 0.23483434319496155, "learning_rate": 0.00019962293340831892, "loss": 11.6808, "step": 3992 }, { "epoch": 0.08358452650087918, "grad_norm": 0.21045179665088654, "learning_rate": 0.00019962274316254246, "loss": 11.6747, "step": 3993 }, { "epoch": 0.08360545926484134, "grad_norm": 0.2181558609008789, "learning_rate": 0.00019962255286887532, "loss": 11.6849, "step": 3994 }, { "epoch": 0.08362639202880348, "grad_norm": 0.23599155247211456, "learning_rate": 0.00019962236252731765, "loss": 11.7024, "step": 3995 }, { "epoch": 0.08364732479276564, "grad_norm": 0.24010998010635376, "learning_rate": 0.00019962217213786954, "loss": 11.6868, "step": 3996 }, { "epoch": 0.08366825755672778, "grad_norm": 0.2667767405509949, "learning_rate": 0.000199621981700531, "loss": 11.6965, "step": 3997 }, { "epoch": 0.08368919032068994, "grad_norm": 0.24150410294532776, "learning_rate": 0.0001996217912153022, "loss": 11.6846, "step": 3998 }, { "epoch": 0.0837101230846521, "grad_norm": 0.35056087374687195, "learning_rate": 0.00019962160068218323, "loss": 11.6644, "step": 3999 }, { "epoch": 0.08373105584861425, "grad_norm": 0.2343379259109497, "learning_rate": 0.00019962141010117414, "loss": 11.6775, "step": 4000 }, { "epoch": 0.08373105584861425, "eval_loss": 11.685898780822754, "eval_runtime": 34.3302, "eval_samples_per_second": 27.993, "eval_steps_per_second": 7.02, "step": 4000 }, { "epoch": 0.0837519886125764, "grad_norm": 0.22373144328594208, "learning_rate": 0.00019962121947227502, "loss": 11.6826, "step": 4001 }, { "epoch": 0.08377292137653856, "grad_norm": 0.26875415444374084, "learning_rate": 0.00019962102879548605, "loss": 11.6828, "step": 4002 }, { "epoch": 0.08379385414050071, "grad_norm": 0.18919962644577026, "learning_rate": 0.0001996208380708072, "loss": 11.678, "step": 4003 }, { "epoch": 0.08381478690446287, "grad_norm": 0.1992950439453125, "learning_rate": 0.00019962064729823863, "loss": 11.6724, "step": 4004 }, { "epoch": 0.08383571966842501, "grad_norm": 0.2308778315782547, "learning_rate": 0.00019962045647778042, "loss": 11.666, "step": 4005 }, { "epoch": 0.08385665243238717, "grad_norm": 0.21039408445358276, "learning_rate": 0.00019962026560943269, "loss": 11.6885, "step": 4006 }, { "epoch": 0.08387758519634933, "grad_norm": 0.2078806757926941, "learning_rate": 0.00019962007469319547, "loss": 11.6968, "step": 4007 }, { "epoch": 0.08389851796031148, "grad_norm": 0.2504064738750458, "learning_rate": 0.00019961988372906885, "loss": 11.7009, "step": 4008 }, { "epoch": 0.08391945072427363, "grad_norm": 0.20471064746379852, "learning_rate": 0.00019961969271705302, "loss": 11.6882, "step": 4009 }, { "epoch": 0.08394038348823579, "grad_norm": 0.22976817190647125, "learning_rate": 0.00019961950165714795, "loss": 11.7027, "step": 4010 }, { "epoch": 0.08396131625219794, "grad_norm": 0.26581868529319763, "learning_rate": 0.00019961931054935384, "loss": 11.6858, "step": 4011 }, { "epoch": 0.0839822490161601, "grad_norm": 0.2168094664812088, "learning_rate": 0.00019961911939367068, "loss": 11.6779, "step": 4012 }, { "epoch": 0.08400318178012224, "grad_norm": 0.2256901115179062, "learning_rate": 0.00019961892819009863, "loss": 11.69, "step": 4013 }, { "epoch": 0.0840241145440844, "grad_norm": 0.1885414719581604, "learning_rate": 0.00019961873693863778, "loss": 11.6899, "step": 4014 }, { "epoch": 0.08404504730804656, "grad_norm": 0.21240843832492828, "learning_rate": 0.00019961854563928824, "loss": 11.6969, "step": 4015 }, { "epoch": 0.0840659800720087, "grad_norm": 0.22694319486618042, "learning_rate": 0.00019961835429205, "loss": 11.6935, "step": 4016 }, { "epoch": 0.08408691283597086, "grad_norm": 0.17049741744995117, "learning_rate": 0.00019961816289692328, "loss": 11.6782, "step": 4017 }, { "epoch": 0.08410784559993302, "grad_norm": 0.2855082154273987, "learning_rate": 0.00019961797145390806, "loss": 11.6871, "step": 4018 }, { "epoch": 0.08412877836389517, "grad_norm": 0.26761969923973083, "learning_rate": 0.00019961777996300453, "loss": 11.6945, "step": 4019 }, { "epoch": 0.08414971112785732, "grad_norm": 0.26475968956947327, "learning_rate": 0.0001996175884242127, "loss": 11.6876, "step": 4020 }, { "epoch": 0.08417064389181948, "grad_norm": 0.20388619601726532, "learning_rate": 0.00019961739683753272, "loss": 11.6744, "step": 4021 }, { "epoch": 0.08419157665578163, "grad_norm": 0.20849458873271942, "learning_rate": 0.00019961720520296464, "loss": 11.6769, "step": 4022 }, { "epoch": 0.08421250941974379, "grad_norm": 0.22752514481544495, "learning_rate": 0.0001996170135205086, "loss": 11.6935, "step": 4023 }, { "epoch": 0.08423344218370593, "grad_norm": 0.2322757989168167, "learning_rate": 0.00019961682179016465, "loss": 11.6723, "step": 4024 }, { "epoch": 0.08425437494766809, "grad_norm": 0.24578504264354706, "learning_rate": 0.0001996166300119329, "loss": 11.6806, "step": 4025 }, { "epoch": 0.08427530771163025, "grad_norm": 0.2573462128639221, "learning_rate": 0.00019961643818581343, "loss": 11.6825, "step": 4026 }, { "epoch": 0.0842962404755924, "grad_norm": 0.1555696278810501, "learning_rate": 0.00019961624631180638, "loss": 11.6747, "step": 4027 }, { "epoch": 0.08431717323955455, "grad_norm": 0.22848662734031677, "learning_rate": 0.00019961605438991174, "loss": 11.6966, "step": 4028 }, { "epoch": 0.08433810600351671, "grad_norm": 0.190124973654747, "learning_rate": 0.00019961586242012974, "loss": 11.6822, "step": 4029 }, { "epoch": 0.08435903876747886, "grad_norm": 0.23074018955230713, "learning_rate": 0.00019961567040246037, "loss": 11.6727, "step": 4030 }, { "epoch": 0.08437997153144101, "grad_norm": 0.23472918570041656, "learning_rate": 0.00019961547833690372, "loss": 11.6723, "step": 4031 }, { "epoch": 0.08440090429540316, "grad_norm": 0.20260298252105713, "learning_rate": 0.00019961528622345994, "loss": 11.6877, "step": 4032 }, { "epoch": 0.08442183705936532, "grad_norm": 0.23343589901924133, "learning_rate": 0.0001996150940621291, "loss": 11.6912, "step": 4033 }, { "epoch": 0.08444276982332748, "grad_norm": 0.25518178939819336, "learning_rate": 0.00019961490185291128, "loss": 11.6926, "step": 4034 }, { "epoch": 0.08446370258728962, "grad_norm": 0.2423936128616333, "learning_rate": 0.00019961470959580657, "loss": 11.6872, "step": 4035 }, { "epoch": 0.08448463535125178, "grad_norm": 0.21364383399486542, "learning_rate": 0.00019961451729081509, "loss": 11.6834, "step": 4036 }, { "epoch": 0.08450556811521394, "grad_norm": 0.20987236499786377, "learning_rate": 0.0001996143249379369, "loss": 11.6907, "step": 4037 }, { "epoch": 0.08452650087917608, "grad_norm": 0.2131832242012024, "learning_rate": 0.00019961413253717213, "loss": 11.6736, "step": 4038 }, { "epoch": 0.08454743364313824, "grad_norm": 0.2304953932762146, "learning_rate": 0.00019961394008852082, "loss": 11.6677, "step": 4039 }, { "epoch": 0.08456836640710039, "grad_norm": 0.2949059307575226, "learning_rate": 0.00019961374759198313, "loss": 11.6701, "step": 4040 }, { "epoch": 0.08458929917106255, "grad_norm": 0.2630934715270996, "learning_rate": 0.00019961355504755906, "loss": 11.6879, "step": 4041 }, { "epoch": 0.0846102319350247, "grad_norm": 0.19307737052440643, "learning_rate": 0.00019961336245524883, "loss": 11.6668, "step": 4042 }, { "epoch": 0.08463116469898685, "grad_norm": 0.20229238271713257, "learning_rate": 0.00019961316981505242, "loss": 11.6771, "step": 4043 }, { "epoch": 0.08465209746294901, "grad_norm": 0.17837011814117432, "learning_rate": 0.00019961297712696997, "loss": 11.6806, "step": 4044 }, { "epoch": 0.08467303022691117, "grad_norm": 0.23374806344509125, "learning_rate": 0.00019961278439100153, "loss": 11.71, "step": 4045 }, { "epoch": 0.08469396299087331, "grad_norm": 0.2936651408672333, "learning_rate": 0.00019961259160714728, "loss": 11.6716, "step": 4046 }, { "epoch": 0.08471489575483547, "grad_norm": 0.23450309038162231, "learning_rate": 0.00019961239877540723, "loss": 11.6963, "step": 4047 }, { "epoch": 0.08473582851879762, "grad_norm": 0.2548101842403412, "learning_rate": 0.00019961220589578152, "loss": 11.6886, "step": 4048 }, { "epoch": 0.08475676128275977, "grad_norm": 0.2852790653705597, "learning_rate": 0.0001996120129682702, "loss": 11.681, "step": 4049 }, { "epoch": 0.08477769404672193, "grad_norm": 0.20469865202903748, "learning_rate": 0.00019961181999287342, "loss": 11.6894, "step": 4050 }, { "epoch": 0.08479862681068408, "grad_norm": 0.2587399482727051, "learning_rate": 0.00019961162696959124, "loss": 11.6804, "step": 4051 }, { "epoch": 0.08481955957464624, "grad_norm": 0.19260479509830475, "learning_rate": 0.00019961143389842374, "loss": 11.6719, "step": 4052 }, { "epoch": 0.0848404923386084, "grad_norm": 0.21668243408203125, "learning_rate": 0.00019961124077937103, "loss": 11.7053, "step": 4053 }, { "epoch": 0.08486142510257054, "grad_norm": 0.17474189400672913, "learning_rate": 0.0001996110476124332, "loss": 11.6822, "step": 4054 }, { "epoch": 0.0848823578665327, "grad_norm": 0.18087312579154968, "learning_rate": 0.00019961085439761035, "loss": 11.6834, "step": 4055 }, { "epoch": 0.08490329063049486, "grad_norm": 0.27005988359451294, "learning_rate": 0.00019961066113490257, "loss": 11.6972, "step": 4056 }, { "epoch": 0.084924223394457, "grad_norm": 0.2997986972332001, "learning_rate": 0.00019961046782430994, "loss": 11.6885, "step": 4057 }, { "epoch": 0.08494515615841916, "grad_norm": 0.218805193901062, "learning_rate": 0.00019961027446583257, "loss": 11.6964, "step": 4058 }, { "epoch": 0.0849660889223813, "grad_norm": 0.2723298966884613, "learning_rate": 0.00019961008105947055, "loss": 11.6951, "step": 4059 }, { "epoch": 0.08498702168634346, "grad_norm": 0.19191573560237885, "learning_rate": 0.00019960988760522395, "loss": 11.6727, "step": 4060 }, { "epoch": 0.08500795445030562, "grad_norm": 0.21520006656646729, "learning_rate": 0.00019960969410309288, "loss": 11.6756, "step": 4061 }, { "epoch": 0.08502888721426777, "grad_norm": 0.27789366245269775, "learning_rate": 0.00019960950055307745, "loss": 11.6917, "step": 4062 }, { "epoch": 0.08504981997822993, "grad_norm": 0.22448426485061646, "learning_rate": 0.00019960930695517773, "loss": 11.675, "step": 4063 }, { "epoch": 0.08507075274219209, "grad_norm": 0.23481205105781555, "learning_rate": 0.0001996091133093938, "loss": 11.684, "step": 4064 }, { "epoch": 0.08509168550615423, "grad_norm": 0.309292197227478, "learning_rate": 0.00019960891961572582, "loss": 11.6976, "step": 4065 }, { "epoch": 0.08511261827011639, "grad_norm": 0.19072142243385315, "learning_rate": 0.00019960872587417377, "loss": 11.6796, "step": 4066 }, { "epoch": 0.08513355103407853, "grad_norm": 0.18744154274463654, "learning_rate": 0.00019960853208473786, "loss": 11.6869, "step": 4067 }, { "epoch": 0.08515448379804069, "grad_norm": 0.2223673015832901, "learning_rate": 0.0001996083382474181, "loss": 11.6848, "step": 4068 }, { "epoch": 0.08517541656200285, "grad_norm": 0.24065563082695007, "learning_rate": 0.00019960814436221464, "loss": 11.6906, "step": 4069 }, { "epoch": 0.085196349325965, "grad_norm": 0.18598075211048126, "learning_rate": 0.00019960795042912755, "loss": 11.664, "step": 4070 }, { "epoch": 0.08521728208992715, "grad_norm": 0.26779821515083313, "learning_rate": 0.00019960775644815694, "loss": 11.7014, "step": 4071 }, { "epoch": 0.08523821485388931, "grad_norm": 0.23112669587135315, "learning_rate": 0.00019960756241930284, "loss": 11.6662, "step": 4072 }, { "epoch": 0.08525914761785146, "grad_norm": 0.23085527122020721, "learning_rate": 0.0001996073683425654, "loss": 11.6869, "step": 4073 }, { "epoch": 0.08528008038181362, "grad_norm": 0.2368977963924408, "learning_rate": 0.00019960717421794473, "loss": 11.6899, "step": 4074 }, { "epoch": 0.08530101314577576, "grad_norm": 0.20844188332557678, "learning_rate": 0.00019960698004544086, "loss": 11.6868, "step": 4075 }, { "epoch": 0.08532194590973792, "grad_norm": 0.204427108168602, "learning_rate": 0.00019960678582505394, "loss": 11.6942, "step": 4076 }, { "epoch": 0.08534287867370008, "grad_norm": 0.2058781236410141, "learning_rate": 0.00019960659155678404, "loss": 11.6797, "step": 4077 }, { "epoch": 0.08536381143766222, "grad_norm": 0.23705066740512848, "learning_rate": 0.00019960639724063124, "loss": 11.6968, "step": 4078 }, { "epoch": 0.08538474420162438, "grad_norm": 0.26061609387397766, "learning_rate": 0.00019960620287659566, "loss": 11.6855, "step": 4079 }, { "epoch": 0.08540567696558654, "grad_norm": 0.19869667291641235, "learning_rate": 0.0001996060084646774, "loss": 11.6862, "step": 4080 }, { "epoch": 0.08542660972954869, "grad_norm": 0.23062238097190857, "learning_rate": 0.00019960581400487652, "loss": 11.6888, "step": 4081 }, { "epoch": 0.08544754249351084, "grad_norm": 0.30234259366989136, "learning_rate": 0.00019960561949719312, "loss": 11.6804, "step": 4082 }, { "epoch": 0.085468475257473, "grad_norm": 0.24406102299690247, "learning_rate": 0.0001996054249416273, "loss": 11.6968, "step": 4083 }, { "epoch": 0.08548940802143515, "grad_norm": 0.23275406658649445, "learning_rate": 0.0001996052303381792, "loss": 11.691, "step": 4084 }, { "epoch": 0.0855103407853973, "grad_norm": 0.1840139478445053, "learning_rate": 0.00019960503568684882, "loss": 11.6858, "step": 4085 }, { "epoch": 0.08553127354935945, "grad_norm": 0.2132430523633957, "learning_rate": 0.0001996048409876363, "loss": 11.6936, "step": 4086 }, { "epoch": 0.08555220631332161, "grad_norm": 0.24695660173892975, "learning_rate": 0.00019960464624054177, "loss": 11.6849, "step": 4087 }, { "epoch": 0.08557313907728377, "grad_norm": 0.31447046995162964, "learning_rate": 0.00019960445144556527, "loss": 11.683, "step": 4088 }, { "epoch": 0.08559407184124591, "grad_norm": 0.19146855175495148, "learning_rate": 0.00019960425660270695, "loss": 11.6936, "step": 4089 }, { "epoch": 0.08561500460520807, "grad_norm": 0.1915295124053955, "learning_rate": 0.00019960406171196682, "loss": 11.6946, "step": 4090 }, { "epoch": 0.08563593736917023, "grad_norm": 0.26506713032722473, "learning_rate": 0.00019960386677334504, "loss": 11.6984, "step": 4091 }, { "epoch": 0.08565687013313238, "grad_norm": 0.20682357251644135, "learning_rate": 0.0001996036717868417, "loss": 11.6782, "step": 4092 }, { "epoch": 0.08567780289709453, "grad_norm": 0.17413677275180817, "learning_rate": 0.00019960347675245684, "loss": 11.687, "step": 4093 }, { "epoch": 0.08569873566105668, "grad_norm": 0.19881679117679596, "learning_rate": 0.0001996032816701906, "loss": 11.6819, "step": 4094 }, { "epoch": 0.08571966842501884, "grad_norm": 0.23766177892684937, "learning_rate": 0.0001996030865400431, "loss": 11.6897, "step": 4095 }, { "epoch": 0.085740601188981, "grad_norm": 0.2194356620311737, "learning_rate": 0.0001996028913620144, "loss": 11.6799, "step": 4096 }, { "epoch": 0.08576153395294314, "grad_norm": 0.2539765238761902, "learning_rate": 0.00019960269613610455, "loss": 11.6984, "step": 4097 }, { "epoch": 0.0857824667169053, "grad_norm": 0.2084967941045761, "learning_rate": 0.00019960250086231373, "loss": 11.6711, "step": 4098 }, { "epoch": 0.08580339948086746, "grad_norm": 0.2418900728225708, "learning_rate": 0.00019960230554064197, "loss": 11.6881, "step": 4099 }, { "epoch": 0.0858243322448296, "grad_norm": 0.20678609609603882, "learning_rate": 0.00019960211017108938, "loss": 11.6997, "step": 4100 }, { "epoch": 0.08584526500879176, "grad_norm": 0.2788871228694916, "learning_rate": 0.00019960191475365608, "loss": 11.7085, "step": 4101 }, { "epoch": 0.08586619777275391, "grad_norm": 0.2545692026615143, "learning_rate": 0.00019960171928834215, "loss": 11.6919, "step": 4102 }, { "epoch": 0.08588713053671607, "grad_norm": 0.19681409001350403, "learning_rate": 0.00019960152377514767, "loss": 11.6813, "step": 4103 }, { "epoch": 0.08590806330067823, "grad_norm": 0.257858008146286, "learning_rate": 0.00019960132821407274, "loss": 11.6891, "step": 4104 }, { "epoch": 0.08592899606464037, "grad_norm": 0.20109903812408447, "learning_rate": 0.00019960113260511744, "loss": 11.6792, "step": 4105 }, { "epoch": 0.08594992882860253, "grad_norm": 0.2620287239551544, "learning_rate": 0.00019960093694828187, "loss": 11.7076, "step": 4106 }, { "epoch": 0.08597086159256469, "grad_norm": 0.21601419150829315, "learning_rate": 0.00019960074124356617, "loss": 11.6837, "step": 4107 }, { "epoch": 0.08599179435652683, "grad_norm": 0.2597157657146454, "learning_rate": 0.00019960054549097037, "loss": 11.6826, "step": 4108 }, { "epoch": 0.08601272712048899, "grad_norm": 0.2521841526031494, "learning_rate": 0.0001996003496904946, "loss": 11.6776, "step": 4109 }, { "epoch": 0.08603365988445115, "grad_norm": 0.24456915259361267, "learning_rate": 0.00019960015384213895, "loss": 11.7123, "step": 4110 }, { "epoch": 0.0860545926484133, "grad_norm": 0.21414117515087128, "learning_rate": 0.00019959995794590352, "loss": 11.6821, "step": 4111 }, { "epoch": 0.08607552541237545, "grad_norm": 0.2572014629840851, "learning_rate": 0.0001995997620017884, "loss": 11.7034, "step": 4112 }, { "epoch": 0.0860964581763376, "grad_norm": 0.20860497653484344, "learning_rate": 0.00019959956600979365, "loss": 11.6968, "step": 4113 }, { "epoch": 0.08611739094029976, "grad_norm": 0.19457155466079712, "learning_rate": 0.0001995993699699194, "loss": 11.6854, "step": 4114 }, { "epoch": 0.08613832370426192, "grad_norm": 0.21321623027324677, "learning_rate": 0.00019959917388216573, "loss": 11.6909, "step": 4115 }, { "epoch": 0.08615925646822406, "grad_norm": 0.2480183094739914, "learning_rate": 0.00019959897774653277, "loss": 11.7042, "step": 4116 }, { "epoch": 0.08618018923218622, "grad_norm": 0.219650536775589, "learning_rate": 0.00019959878156302057, "loss": 11.6898, "step": 4117 }, { "epoch": 0.08620112199614838, "grad_norm": 0.20048843324184418, "learning_rate": 0.00019959858533162922, "loss": 11.6835, "step": 4118 }, { "epoch": 0.08622205476011052, "grad_norm": 0.22168207168579102, "learning_rate": 0.00019959838905235885, "loss": 11.6913, "step": 4119 }, { "epoch": 0.08624298752407268, "grad_norm": 0.9854568839073181, "learning_rate": 0.00019959819272520954, "loss": 11.6434, "step": 4120 }, { "epoch": 0.08626392028803483, "grad_norm": 0.22056645154953003, "learning_rate": 0.00019959799635018138, "loss": 11.6862, "step": 4121 }, { "epoch": 0.08628485305199698, "grad_norm": 0.23939797282218933, "learning_rate": 0.00019959779992727446, "loss": 11.6901, "step": 4122 }, { "epoch": 0.08630578581595914, "grad_norm": 0.2087520807981491, "learning_rate": 0.00019959760345648891, "loss": 11.6787, "step": 4123 }, { "epoch": 0.08632671857992129, "grad_norm": 0.2156488597393036, "learning_rate": 0.00019959740693782475, "loss": 11.6886, "step": 4124 }, { "epoch": 0.08634765134388345, "grad_norm": 0.20855732262134552, "learning_rate": 0.00019959721037128217, "loss": 11.6872, "step": 4125 }, { "epoch": 0.0863685841078456, "grad_norm": 0.21423877775669098, "learning_rate": 0.0001995970137568612, "loss": 11.6864, "step": 4126 }, { "epoch": 0.08638951687180775, "grad_norm": 0.316301554441452, "learning_rate": 0.00019959681709456195, "loss": 11.6864, "step": 4127 }, { "epoch": 0.08641044963576991, "grad_norm": 0.2285989671945572, "learning_rate": 0.00019959662038438448, "loss": 11.688, "step": 4128 }, { "epoch": 0.08643138239973205, "grad_norm": 0.2104010134935379, "learning_rate": 0.00019959642362632897, "loss": 11.6886, "step": 4129 }, { "epoch": 0.08645231516369421, "grad_norm": 0.21599268913269043, "learning_rate": 0.00019959622682039544, "loss": 11.6865, "step": 4130 }, { "epoch": 0.08647324792765637, "grad_norm": 0.18573272228240967, "learning_rate": 0.000199596029966584, "loss": 11.6911, "step": 4131 }, { "epoch": 0.08649418069161852, "grad_norm": 0.21162371337413788, "learning_rate": 0.00019959583306489474, "loss": 11.6874, "step": 4132 }, { "epoch": 0.08651511345558068, "grad_norm": 0.1852189004421234, "learning_rate": 0.0001995956361153278, "loss": 11.6776, "step": 4133 }, { "epoch": 0.08653604621954283, "grad_norm": 0.2348852902650833, "learning_rate": 0.00019959543911788323, "loss": 11.6887, "step": 4134 }, { "epoch": 0.08655697898350498, "grad_norm": 0.28434744477272034, "learning_rate": 0.00019959524207256114, "loss": 11.6786, "step": 4135 }, { "epoch": 0.08657791174746714, "grad_norm": 0.23740310966968536, "learning_rate": 0.0001995950449793616, "loss": 11.6816, "step": 4136 }, { "epoch": 0.0865988445114293, "grad_norm": 0.3061276078224182, "learning_rate": 0.00019959484783828475, "loss": 11.6994, "step": 4137 }, { "epoch": 0.08661977727539144, "grad_norm": 0.23551592230796814, "learning_rate": 0.00019959465064933065, "loss": 11.6982, "step": 4138 }, { "epoch": 0.0866407100393536, "grad_norm": 0.25295647978782654, "learning_rate": 0.0001995944534124994, "loss": 11.6794, "step": 4139 }, { "epoch": 0.08666164280331574, "grad_norm": 0.2662818431854248, "learning_rate": 0.0001995942561277911, "loss": 11.6928, "step": 4140 }, { "epoch": 0.0866825755672779, "grad_norm": 0.2969227135181427, "learning_rate": 0.0001995940587952059, "loss": 11.6929, "step": 4141 }, { "epoch": 0.08670350833124006, "grad_norm": 0.24261896312236786, "learning_rate": 0.00019959386141474376, "loss": 11.678, "step": 4142 }, { "epoch": 0.0867244410952022, "grad_norm": 0.1789897084236145, "learning_rate": 0.0001995936639864049, "loss": 11.6774, "step": 4143 }, { "epoch": 0.08674537385916437, "grad_norm": 0.22283315658569336, "learning_rate": 0.00019959346651018934, "loss": 11.6884, "step": 4144 }, { "epoch": 0.08676630662312652, "grad_norm": 0.24220672249794006, "learning_rate": 0.00019959326898609723, "loss": 11.6861, "step": 4145 }, { "epoch": 0.08678723938708867, "grad_norm": 0.1955297291278839, "learning_rate": 0.00019959307141412862, "loss": 11.6911, "step": 4146 }, { "epoch": 0.08680817215105083, "grad_norm": 0.20322692394256592, "learning_rate": 0.00019959287379428363, "loss": 11.6772, "step": 4147 }, { "epoch": 0.08682910491501297, "grad_norm": 0.2132789045572281, "learning_rate": 0.00019959267612656235, "loss": 11.6781, "step": 4148 }, { "epoch": 0.08685003767897513, "grad_norm": 0.18899226188659668, "learning_rate": 0.00019959247841096488, "loss": 11.6871, "step": 4149 }, { "epoch": 0.08687097044293729, "grad_norm": 0.22043101489543915, "learning_rate": 0.00019959228064749134, "loss": 11.6913, "step": 4150 }, { "epoch": 0.08689190320689943, "grad_norm": 0.20458774268627167, "learning_rate": 0.00019959208283614176, "loss": 11.6878, "step": 4151 }, { "epoch": 0.0869128359708616, "grad_norm": 0.2768487334251404, "learning_rate": 0.0001995918849769163, "loss": 11.6794, "step": 4152 }, { "epoch": 0.08693376873482375, "grad_norm": 0.1833738386631012, "learning_rate": 0.00019959168706981496, "loss": 11.6775, "step": 4153 }, { "epoch": 0.0869547014987859, "grad_norm": 0.3047119379043579, "learning_rate": 0.00019959148911483795, "loss": 11.6744, "step": 4154 }, { "epoch": 0.08697563426274806, "grad_norm": 0.21109911799430847, "learning_rate": 0.00019959129111198531, "loss": 11.6832, "step": 4155 }, { "epoch": 0.0869965670267102, "grad_norm": 0.271443247795105, "learning_rate": 0.00019959109306125713, "loss": 11.7097, "step": 4156 }, { "epoch": 0.08701749979067236, "grad_norm": 0.22943374514579773, "learning_rate": 0.00019959089496265352, "loss": 11.6833, "step": 4157 }, { "epoch": 0.08703843255463452, "grad_norm": 0.20966054499149323, "learning_rate": 0.00019959069681617456, "loss": 11.692, "step": 4158 }, { "epoch": 0.08705936531859666, "grad_norm": 0.2757425308227539, "learning_rate": 0.0001995904986218204, "loss": 11.6957, "step": 4159 }, { "epoch": 0.08708029808255882, "grad_norm": 0.21391817927360535, "learning_rate": 0.00019959030037959105, "loss": 11.6917, "step": 4160 }, { "epoch": 0.08710123084652098, "grad_norm": 0.20710763335227966, "learning_rate": 0.00019959010208948666, "loss": 11.67, "step": 4161 }, { "epoch": 0.08712216361048312, "grad_norm": 0.20893731713294983, "learning_rate": 0.0001995899037515073, "loss": 11.6811, "step": 4162 }, { "epoch": 0.08714309637444528, "grad_norm": 0.2190057933330536, "learning_rate": 0.0001995897053656531, "loss": 11.6962, "step": 4163 }, { "epoch": 0.08716402913840743, "grad_norm": 0.21827420592308044, "learning_rate": 0.0001995895069319241, "loss": 11.6907, "step": 4164 }, { "epoch": 0.08718496190236959, "grad_norm": 0.2157670110464096, "learning_rate": 0.00019958930845032048, "loss": 11.6883, "step": 4165 }, { "epoch": 0.08720589466633175, "grad_norm": 0.19927763938903809, "learning_rate": 0.00019958910992084222, "loss": 11.6965, "step": 4166 }, { "epoch": 0.08722682743029389, "grad_norm": 0.1968553215265274, "learning_rate": 0.00019958891134348953, "loss": 11.6837, "step": 4167 }, { "epoch": 0.08724776019425605, "grad_norm": 0.2106599062681198, "learning_rate": 0.00019958871271826242, "loss": 11.6875, "step": 4168 }, { "epoch": 0.08726869295821821, "grad_norm": 0.2223326414823532, "learning_rate": 0.00019958851404516104, "loss": 11.6896, "step": 4169 }, { "epoch": 0.08728962572218035, "grad_norm": 0.23098427057266235, "learning_rate": 0.00019958831532418546, "loss": 11.6865, "step": 4170 }, { "epoch": 0.08731055848614251, "grad_norm": 0.2306743711233139, "learning_rate": 0.0001995881165553358, "loss": 11.6891, "step": 4171 }, { "epoch": 0.08733149125010467, "grad_norm": 0.2236609309911728, "learning_rate": 0.00019958791773861211, "loss": 11.6896, "step": 4172 }, { "epoch": 0.08735242401406682, "grad_norm": 0.25905585289001465, "learning_rate": 0.00019958771887401453, "loss": 11.6739, "step": 4173 }, { "epoch": 0.08737335677802897, "grad_norm": 0.301694393157959, "learning_rate": 0.00019958751996154313, "loss": 11.6889, "step": 4174 }, { "epoch": 0.08739428954199112, "grad_norm": 0.21441707015037537, "learning_rate": 0.000199587321001198, "loss": 11.7042, "step": 4175 }, { "epoch": 0.08741522230595328, "grad_norm": 0.19119995832443237, "learning_rate": 0.00019958712199297926, "loss": 11.6818, "step": 4176 }, { "epoch": 0.08743615506991544, "grad_norm": 0.21030159294605255, "learning_rate": 0.00019958692293688701, "loss": 11.6758, "step": 4177 }, { "epoch": 0.08745708783387758, "grad_norm": 0.24201750755310059, "learning_rate": 0.0001995867238329213, "loss": 11.6876, "step": 4178 }, { "epoch": 0.08747802059783974, "grad_norm": 0.23698729276657104, "learning_rate": 0.00019958652468108227, "loss": 11.6828, "step": 4179 }, { "epoch": 0.0874989533618019, "grad_norm": 0.34077751636505127, "learning_rate": 0.00019958632548137002, "loss": 11.6763, "step": 4180 }, { "epoch": 0.08751988612576404, "grad_norm": 0.2151852697134018, "learning_rate": 0.0001995861262337846, "loss": 11.6793, "step": 4181 }, { "epoch": 0.0875408188897262, "grad_norm": 0.22545966506004333, "learning_rate": 0.00019958592693832615, "loss": 11.6734, "step": 4182 }, { "epoch": 0.08756175165368835, "grad_norm": 0.18389782309532166, "learning_rate": 0.00019958572759499473, "loss": 11.7, "step": 4183 }, { "epoch": 0.0875826844176505, "grad_norm": 0.17394782602787018, "learning_rate": 0.00019958552820379047, "loss": 11.6831, "step": 4184 }, { "epoch": 0.08760361718161266, "grad_norm": 0.20722459256649017, "learning_rate": 0.00019958532876471346, "loss": 11.6749, "step": 4185 }, { "epoch": 0.08762454994557481, "grad_norm": 0.20548732578754425, "learning_rate": 0.0001995851292777638, "loss": 11.6752, "step": 4186 }, { "epoch": 0.08764548270953697, "grad_norm": 0.24861834943294525, "learning_rate": 0.00019958492974294154, "loss": 11.6921, "step": 4187 }, { "epoch": 0.08766641547349913, "grad_norm": 0.21494974195957184, "learning_rate": 0.00019958473016024682, "loss": 11.675, "step": 4188 }, { "epoch": 0.08768734823746127, "grad_norm": 0.22053351998329163, "learning_rate": 0.00019958453052967972, "loss": 11.7109, "step": 4189 }, { "epoch": 0.08770828100142343, "grad_norm": 0.1850854456424713, "learning_rate": 0.00019958433085124036, "loss": 11.677, "step": 4190 }, { "epoch": 0.08772921376538557, "grad_norm": 0.2504333555698395, "learning_rate": 0.00019958413112492878, "loss": 11.6883, "step": 4191 }, { "epoch": 0.08775014652934773, "grad_norm": 0.22964458167552948, "learning_rate": 0.00019958393135074515, "loss": 11.7011, "step": 4192 }, { "epoch": 0.08777107929330989, "grad_norm": 0.2868655025959015, "learning_rate": 0.0001995837315286895, "loss": 11.6904, "step": 4193 }, { "epoch": 0.08779201205727204, "grad_norm": 0.203236922621727, "learning_rate": 0.00019958353165876197, "loss": 11.677, "step": 4194 }, { "epoch": 0.0878129448212342, "grad_norm": 0.16086354851722717, "learning_rate": 0.0001995833317409626, "loss": 11.6909, "step": 4195 }, { "epoch": 0.08783387758519635, "grad_norm": 0.19215284287929535, "learning_rate": 0.0001995831317752916, "loss": 11.673, "step": 4196 }, { "epoch": 0.0878548103491585, "grad_norm": 0.2930029332637787, "learning_rate": 0.00019958293176174893, "loss": 11.6814, "step": 4197 }, { "epoch": 0.08787574311312066, "grad_norm": 0.23312611877918243, "learning_rate": 0.0001995827317003348, "loss": 11.6824, "step": 4198 }, { "epoch": 0.08789667587708282, "grad_norm": 0.19893351197242737, "learning_rate": 0.00019958253159104922, "loss": 11.6832, "step": 4199 }, { "epoch": 0.08791760864104496, "grad_norm": 0.17703485488891602, "learning_rate": 0.00019958233143389234, "loss": 11.6838, "step": 4200 }, { "epoch": 0.08793854140500712, "grad_norm": 0.21387717127799988, "learning_rate": 0.00019958213122886422, "loss": 11.6839, "step": 4201 }, { "epoch": 0.08795947416896926, "grad_norm": 0.24165278673171997, "learning_rate": 0.000199581930975965, "loss": 11.6865, "step": 4202 }, { "epoch": 0.08798040693293142, "grad_norm": 0.22588993608951569, "learning_rate": 0.00019958173067519472, "loss": 11.693, "step": 4203 }, { "epoch": 0.08800133969689358, "grad_norm": 0.3268377482891083, "learning_rate": 0.0001995815303265535, "loss": 11.688, "step": 4204 }, { "epoch": 0.08802227246085573, "grad_norm": 0.20042875409126282, "learning_rate": 0.00019958132993004148, "loss": 11.687, "step": 4205 }, { "epoch": 0.08804320522481789, "grad_norm": 0.18892738223075867, "learning_rate": 0.0001995811294856587, "loss": 11.6725, "step": 4206 }, { "epoch": 0.08806413798878004, "grad_norm": 0.1652413159608841, "learning_rate": 0.00019958092899340527, "loss": 11.684, "step": 4207 }, { "epoch": 0.08808507075274219, "grad_norm": 0.2548951506614685, "learning_rate": 0.0001995807284532813, "loss": 11.6866, "step": 4208 }, { "epoch": 0.08810600351670435, "grad_norm": 0.25000089406967163, "learning_rate": 0.00019958052786528686, "loss": 11.7023, "step": 4209 }, { "epoch": 0.08812693628066649, "grad_norm": 0.21009542047977448, "learning_rate": 0.0001995803272294221, "loss": 11.6882, "step": 4210 }, { "epoch": 0.08814786904462865, "grad_norm": 0.22101949155330658, "learning_rate": 0.00019958012654568705, "loss": 11.6793, "step": 4211 }, { "epoch": 0.08816880180859081, "grad_norm": 0.23424488306045532, "learning_rate": 0.00019957992581408186, "loss": 11.6997, "step": 4212 }, { "epoch": 0.08818973457255296, "grad_norm": 0.2346920520067215, "learning_rate": 0.0001995797250346066, "loss": 11.6806, "step": 4213 }, { "epoch": 0.08821066733651511, "grad_norm": 0.21364907920360565, "learning_rate": 0.00019957952420726136, "loss": 11.7064, "step": 4214 }, { "epoch": 0.08823160010047727, "grad_norm": 0.22109957039356232, "learning_rate": 0.00019957932333204624, "loss": 11.683, "step": 4215 }, { "epoch": 0.08825253286443942, "grad_norm": 0.27763256430625916, "learning_rate": 0.00019957912240896138, "loss": 11.7124, "step": 4216 }, { "epoch": 0.08827346562840158, "grad_norm": 0.22927434742450714, "learning_rate": 0.0001995789214380068, "loss": 11.6856, "step": 4217 }, { "epoch": 0.08829439839236372, "grad_norm": 0.21863479912281036, "learning_rate": 0.00019957872041918268, "loss": 11.6925, "step": 4218 }, { "epoch": 0.08831533115632588, "grad_norm": 0.28286653757095337, "learning_rate": 0.00019957851935248903, "loss": 11.6828, "step": 4219 }, { "epoch": 0.08833626392028804, "grad_norm": 0.20238567888736725, "learning_rate": 0.000199578318237926, "loss": 11.6739, "step": 4220 }, { "epoch": 0.08835719668425018, "grad_norm": 0.22125370800495148, "learning_rate": 0.0001995781170754937, "loss": 11.6945, "step": 4221 }, { "epoch": 0.08837812944821234, "grad_norm": 0.24201014637947083, "learning_rate": 0.00019957791586519216, "loss": 11.6725, "step": 4222 }, { "epoch": 0.0883990622121745, "grad_norm": 0.20722642540931702, "learning_rate": 0.00019957771460702155, "loss": 11.688, "step": 4223 }, { "epoch": 0.08841999497613665, "grad_norm": 0.2464868724346161, "learning_rate": 0.00019957751330098192, "loss": 11.7096, "step": 4224 }, { "epoch": 0.0884409277400988, "grad_norm": 0.24636372923851013, "learning_rate": 0.00019957731194707341, "loss": 11.6906, "step": 4225 }, { "epoch": 0.08846186050406096, "grad_norm": 0.25903037190437317, "learning_rate": 0.00019957711054529608, "loss": 11.689, "step": 4226 }, { "epoch": 0.08848279326802311, "grad_norm": 0.20456895232200623, "learning_rate": 0.00019957690909565002, "loss": 11.6773, "step": 4227 }, { "epoch": 0.08850372603198527, "grad_norm": 0.20267890393733978, "learning_rate": 0.0001995767075981354, "loss": 11.681, "step": 4228 }, { "epoch": 0.08852465879594741, "grad_norm": 0.2524906396865845, "learning_rate": 0.0001995765060527522, "loss": 11.6759, "step": 4229 }, { "epoch": 0.08854559155990957, "grad_norm": 0.2921079993247986, "learning_rate": 0.0001995763044595006, "loss": 11.6919, "step": 4230 }, { "epoch": 0.08856652432387173, "grad_norm": 0.2304719090461731, "learning_rate": 0.00019957610281838068, "loss": 11.6839, "step": 4231 }, { "epoch": 0.08858745708783387, "grad_norm": 0.18206754326820374, "learning_rate": 0.0001995759011293925, "loss": 11.6903, "step": 4232 }, { "epoch": 0.08860838985179603, "grad_norm": 0.20894521474838257, "learning_rate": 0.00019957569939253625, "loss": 11.6786, "step": 4233 }, { "epoch": 0.08862932261575819, "grad_norm": 0.2674292027950287, "learning_rate": 0.0001995754976078119, "loss": 11.6988, "step": 4234 }, { "epoch": 0.08865025537972034, "grad_norm": 0.2507908046245575, "learning_rate": 0.00019957529577521968, "loss": 11.686, "step": 4235 }, { "epoch": 0.0886711881436825, "grad_norm": 0.22441045939922333, "learning_rate": 0.00019957509389475957, "loss": 11.6834, "step": 4236 }, { "epoch": 0.08869212090764464, "grad_norm": 0.22634974122047424, "learning_rate": 0.0001995748919664317, "loss": 11.6784, "step": 4237 }, { "epoch": 0.0887130536716068, "grad_norm": 0.3070991039276123, "learning_rate": 0.00019957468999023625, "loss": 11.6869, "step": 4238 }, { "epoch": 0.08873398643556896, "grad_norm": 0.20759934186935425, "learning_rate": 0.00019957448796617322, "loss": 11.6823, "step": 4239 }, { "epoch": 0.0887549191995311, "grad_norm": 0.24754664301872253, "learning_rate": 0.0001995742858942427, "loss": 11.6813, "step": 4240 }, { "epoch": 0.08877585196349326, "grad_norm": 0.26023638248443604, "learning_rate": 0.00019957408377444487, "loss": 11.6916, "step": 4241 }, { "epoch": 0.08879678472745542, "grad_norm": 0.20267927646636963, "learning_rate": 0.00019957388160677977, "loss": 11.6631, "step": 4242 }, { "epoch": 0.08881771749141756, "grad_norm": 0.2799358665943146, "learning_rate": 0.0001995736793912475, "loss": 11.6706, "step": 4243 }, { "epoch": 0.08883865025537972, "grad_norm": 0.23108689486980438, "learning_rate": 0.0001995734771278482, "loss": 11.6941, "step": 4244 }, { "epoch": 0.08885958301934187, "grad_norm": 0.26285481452941895, "learning_rate": 0.0001995732748165819, "loss": 11.6695, "step": 4245 }, { "epoch": 0.08888051578330403, "grad_norm": 0.190062016248703, "learning_rate": 0.00019957307245744874, "loss": 11.6983, "step": 4246 }, { "epoch": 0.08890144854726618, "grad_norm": 0.2818266749382019, "learning_rate": 0.0001995728700504488, "loss": 11.6771, "step": 4247 }, { "epoch": 0.08892238131122833, "grad_norm": 0.21661807596683502, "learning_rate": 0.0001995726675955822, "loss": 11.7016, "step": 4248 }, { "epoch": 0.08894331407519049, "grad_norm": 0.25207164883613586, "learning_rate": 0.00019957246509284905, "loss": 11.679, "step": 4249 }, { "epoch": 0.08896424683915265, "grad_norm": 0.20041613280773163, "learning_rate": 0.00019957226254224935, "loss": 11.6784, "step": 4250 }, { "epoch": 0.08898517960311479, "grad_norm": 0.2009960412979126, "learning_rate": 0.00019957205994378333, "loss": 11.6923, "step": 4251 }, { "epoch": 0.08900611236707695, "grad_norm": 0.24910247325897217, "learning_rate": 0.000199571857297451, "loss": 11.6823, "step": 4252 }, { "epoch": 0.08902704513103911, "grad_norm": 0.25115737318992615, "learning_rate": 0.0001995716546032525, "loss": 11.6843, "step": 4253 }, { "epoch": 0.08904797789500125, "grad_norm": 0.20606572926044464, "learning_rate": 0.00019957145186118788, "loss": 11.6973, "step": 4254 }, { "epoch": 0.08906891065896341, "grad_norm": 0.2248544543981552, "learning_rate": 0.0001995712490712573, "loss": 11.6897, "step": 4255 }, { "epoch": 0.08908984342292556, "grad_norm": 0.20216582715511322, "learning_rate": 0.0001995710462334608, "loss": 11.6729, "step": 4256 }, { "epoch": 0.08911077618688772, "grad_norm": 0.2103743851184845, "learning_rate": 0.0001995708433477985, "loss": 11.6823, "step": 4257 }, { "epoch": 0.08913170895084987, "grad_norm": 0.24388736486434937, "learning_rate": 0.00019957064041427054, "loss": 11.6675, "step": 4258 }, { "epoch": 0.08915264171481202, "grad_norm": 0.24141237139701843, "learning_rate": 0.00019957043743287694, "loss": 11.69, "step": 4259 }, { "epoch": 0.08917357447877418, "grad_norm": 0.1965312957763672, "learning_rate": 0.00019957023440361784, "loss": 11.6888, "step": 4260 }, { "epoch": 0.08919450724273634, "grad_norm": 0.22802449762821198, "learning_rate": 0.00019957003132649334, "loss": 11.6809, "step": 4261 }, { "epoch": 0.08921544000669848, "grad_norm": 0.28325480222702026, "learning_rate": 0.00019956982820150355, "loss": 11.6903, "step": 4262 }, { "epoch": 0.08923637277066064, "grad_norm": 0.2348574846982956, "learning_rate": 0.00019956962502864852, "loss": 11.6786, "step": 4263 }, { "epoch": 0.08925730553462279, "grad_norm": 0.20398291945457458, "learning_rate": 0.00019956942180792837, "loss": 11.6862, "step": 4264 }, { "epoch": 0.08927823829858494, "grad_norm": 0.2295929342508316, "learning_rate": 0.00019956921853934322, "loss": 11.6741, "step": 4265 }, { "epoch": 0.0892991710625471, "grad_norm": 0.24461735785007477, "learning_rate": 0.00019956901522289315, "loss": 11.6788, "step": 4266 }, { "epoch": 0.08932010382650925, "grad_norm": 0.22329413890838623, "learning_rate": 0.00019956881185857826, "loss": 11.6793, "step": 4267 }, { "epoch": 0.0893410365904714, "grad_norm": 0.29004380106925964, "learning_rate": 0.00019956860844639864, "loss": 11.6919, "step": 4268 }, { "epoch": 0.08936196935443357, "grad_norm": 0.22996960580348969, "learning_rate": 0.00019956840498635439, "loss": 11.6944, "step": 4269 }, { "epoch": 0.08938290211839571, "grad_norm": 0.1697688102722168, "learning_rate": 0.00019956820147844565, "loss": 11.6745, "step": 4270 }, { "epoch": 0.08940383488235787, "grad_norm": 0.22438198328018188, "learning_rate": 0.00019956799792267242, "loss": 11.6767, "step": 4271 }, { "epoch": 0.08942476764632001, "grad_norm": 0.22618789970874786, "learning_rate": 0.00019956779431903489, "loss": 11.684, "step": 4272 }, { "epoch": 0.08944570041028217, "grad_norm": 0.2601369321346283, "learning_rate": 0.0001995675906675331, "loss": 11.6942, "step": 4273 }, { "epoch": 0.08946663317424433, "grad_norm": 0.22234463691711426, "learning_rate": 0.00019956738696816725, "loss": 11.6974, "step": 4274 }, { "epoch": 0.08948756593820648, "grad_norm": 0.20638567209243774, "learning_rate": 0.00019956718322093728, "loss": 11.6808, "step": 4275 }, { "epoch": 0.08950849870216863, "grad_norm": 0.212487131357193, "learning_rate": 0.0001995669794258434, "loss": 11.6788, "step": 4276 }, { "epoch": 0.0895294314661308, "grad_norm": 0.21881940960884094, "learning_rate": 0.0001995667755828857, "loss": 11.6858, "step": 4277 }, { "epoch": 0.08955036423009294, "grad_norm": 0.19529619812965393, "learning_rate": 0.00019956657169206422, "loss": 11.6761, "step": 4278 }, { "epoch": 0.0895712969940551, "grad_norm": 0.20694179832935333, "learning_rate": 0.0001995663677533791, "loss": 11.6844, "step": 4279 }, { "epoch": 0.08959222975801726, "grad_norm": 0.22506244480609894, "learning_rate": 0.00019956616376683045, "loss": 11.6843, "step": 4280 }, { "epoch": 0.0896131625219794, "grad_norm": 0.28897449374198914, "learning_rate": 0.00019956595973241834, "loss": 11.6837, "step": 4281 }, { "epoch": 0.08963409528594156, "grad_norm": 0.2472546398639679, "learning_rate": 0.00019956575565014287, "loss": 11.6957, "step": 4282 }, { "epoch": 0.0896550280499037, "grad_norm": 0.25635766983032227, "learning_rate": 0.00019956555152000416, "loss": 11.6788, "step": 4283 }, { "epoch": 0.08967596081386586, "grad_norm": 0.2081732451915741, "learning_rate": 0.00019956534734200226, "loss": 11.6728, "step": 4284 }, { "epoch": 0.08969689357782802, "grad_norm": 0.233876034617424, "learning_rate": 0.00019956514311613734, "loss": 11.6892, "step": 4285 }, { "epoch": 0.08971782634179017, "grad_norm": 0.19954164326190948, "learning_rate": 0.00019956493884240944, "loss": 11.679, "step": 4286 }, { "epoch": 0.08973875910575232, "grad_norm": 0.2298687845468521, "learning_rate": 0.0001995647345208187, "loss": 11.6861, "step": 4287 }, { "epoch": 0.08975969186971448, "grad_norm": 0.26491475105285645, "learning_rate": 0.0001995645301513652, "loss": 11.6746, "step": 4288 }, { "epoch": 0.08978062463367663, "grad_norm": 0.21484430134296417, "learning_rate": 0.00019956432573404903, "loss": 11.6778, "step": 4289 }, { "epoch": 0.08980155739763879, "grad_norm": 0.2038814127445221, "learning_rate": 0.00019956412126887027, "loss": 11.6904, "step": 4290 }, { "epoch": 0.08982249016160093, "grad_norm": 0.20989304780960083, "learning_rate": 0.00019956391675582903, "loss": 11.6773, "step": 4291 }, { "epoch": 0.08984342292556309, "grad_norm": 0.2729890048503876, "learning_rate": 0.00019956371219492547, "loss": 11.6694, "step": 4292 }, { "epoch": 0.08986435568952525, "grad_norm": 0.2741398513317108, "learning_rate": 0.00019956350758615961, "loss": 11.6963, "step": 4293 }, { "epoch": 0.0898852884534874, "grad_norm": 0.22821077704429626, "learning_rate": 0.00019956330292953157, "loss": 11.6832, "step": 4294 }, { "epoch": 0.08990622121744955, "grad_norm": 0.20762915909290314, "learning_rate": 0.00019956309822504148, "loss": 11.6784, "step": 4295 }, { "epoch": 0.08992715398141171, "grad_norm": 0.19882477819919586, "learning_rate": 0.00019956289347268936, "loss": 11.6784, "step": 4296 }, { "epoch": 0.08994808674537386, "grad_norm": 0.18540741503238678, "learning_rate": 0.00019956268867247544, "loss": 11.6621, "step": 4297 }, { "epoch": 0.08996901950933602, "grad_norm": 0.3226054906845093, "learning_rate": 0.00019956248382439968, "loss": 11.6841, "step": 4298 }, { "epoch": 0.08998995227329816, "grad_norm": 0.1948930025100708, "learning_rate": 0.00019956227892846228, "loss": 11.6897, "step": 4299 }, { "epoch": 0.09001088503726032, "grad_norm": 0.25971555709838867, "learning_rate": 0.00019956207398466326, "loss": 11.6665, "step": 4300 }, { "epoch": 0.09003181780122248, "grad_norm": 0.2430376559495926, "learning_rate": 0.00019956186899300278, "loss": 11.6936, "step": 4301 }, { "epoch": 0.09005275056518462, "grad_norm": 0.3091050088405609, "learning_rate": 0.00019956166395348088, "loss": 11.6708, "step": 4302 }, { "epoch": 0.09007368332914678, "grad_norm": 0.2154577374458313, "learning_rate": 0.00019956145886609772, "loss": 11.6892, "step": 4303 }, { "epoch": 0.09009461609310894, "grad_norm": 0.2361663579940796, "learning_rate": 0.00019956125373085337, "loss": 11.6696, "step": 4304 }, { "epoch": 0.09011554885707108, "grad_norm": 0.26339712738990784, "learning_rate": 0.00019956104854774794, "loss": 11.6805, "step": 4305 }, { "epoch": 0.09013648162103324, "grad_norm": 0.18492095172405243, "learning_rate": 0.0001995608433167815, "loss": 11.6754, "step": 4306 }, { "epoch": 0.09015741438499539, "grad_norm": 0.21474623680114746, "learning_rate": 0.00019956063803795417, "loss": 11.6824, "step": 4307 }, { "epoch": 0.09017834714895755, "grad_norm": 0.185768261551857, "learning_rate": 0.00019956043271126606, "loss": 11.6673, "step": 4308 }, { "epoch": 0.0901992799129197, "grad_norm": 0.20302343368530273, "learning_rate": 0.00019956022733671723, "loss": 11.678, "step": 4309 }, { "epoch": 0.09022021267688185, "grad_norm": 0.29271209239959717, "learning_rate": 0.00019956002191430785, "loss": 11.6708, "step": 4310 }, { "epoch": 0.09024114544084401, "grad_norm": 0.22457005083560944, "learning_rate": 0.00019955981644403795, "loss": 11.6904, "step": 4311 }, { "epoch": 0.09026207820480617, "grad_norm": 0.20543712377548218, "learning_rate": 0.00019955961092590763, "loss": 11.6697, "step": 4312 }, { "epoch": 0.09028301096876831, "grad_norm": 0.2759888470172882, "learning_rate": 0.00019955940535991702, "loss": 11.6864, "step": 4313 }, { "epoch": 0.09030394373273047, "grad_norm": 0.21010081470012665, "learning_rate": 0.00019955919974606624, "loss": 11.6791, "step": 4314 }, { "epoch": 0.09032487649669263, "grad_norm": 0.176194429397583, "learning_rate": 0.0001995589940843553, "loss": 11.6974, "step": 4315 }, { "epoch": 0.09034580926065477, "grad_norm": 0.2141769677400589, "learning_rate": 0.0001995587883747844, "loss": 11.688, "step": 4316 }, { "epoch": 0.09036674202461693, "grad_norm": 0.24427153170108795, "learning_rate": 0.00019955858261735357, "loss": 11.6799, "step": 4317 }, { "epoch": 0.09038767478857908, "grad_norm": 0.2210165411233902, "learning_rate": 0.00019955837681206296, "loss": 11.6786, "step": 4318 }, { "epoch": 0.09040860755254124, "grad_norm": 0.19411030411720276, "learning_rate": 0.00019955817095891262, "loss": 11.6901, "step": 4319 }, { "epoch": 0.0904295403165034, "grad_norm": 0.22254377603530884, "learning_rate": 0.00019955796505790268, "loss": 11.6627, "step": 4320 }, { "epoch": 0.09045047308046554, "grad_norm": 0.20812775194644928, "learning_rate": 0.00019955775910903323, "loss": 11.6891, "step": 4321 }, { "epoch": 0.0904714058444277, "grad_norm": 0.21940012276172638, "learning_rate": 0.00019955755311230438, "loss": 11.6836, "step": 4322 }, { "epoch": 0.09049233860838986, "grad_norm": 0.20068244636058807, "learning_rate": 0.0001995573470677162, "loss": 11.6784, "step": 4323 }, { "epoch": 0.090513271372352, "grad_norm": 0.17645351588726044, "learning_rate": 0.00019955714097526884, "loss": 11.6877, "step": 4324 }, { "epoch": 0.09053420413631416, "grad_norm": 0.24253182113170624, "learning_rate": 0.00019955693483496233, "loss": 11.7041, "step": 4325 }, { "epoch": 0.0905551369002763, "grad_norm": 0.18042497336864471, "learning_rate": 0.00019955672864679683, "loss": 11.6707, "step": 4326 }, { "epoch": 0.09057606966423846, "grad_norm": 0.2101825475692749, "learning_rate": 0.0001995565224107724, "loss": 11.685, "step": 4327 }, { "epoch": 0.09059700242820062, "grad_norm": 0.22122474014759064, "learning_rate": 0.00019955631612688914, "loss": 11.678, "step": 4328 }, { "epoch": 0.09061793519216277, "grad_norm": 0.20711608231067657, "learning_rate": 0.00019955610979514722, "loss": 11.6885, "step": 4329 }, { "epoch": 0.09063886795612493, "grad_norm": 0.1994129866361618, "learning_rate": 0.00019955590341554663, "loss": 11.6878, "step": 4330 }, { "epoch": 0.09065980072008709, "grad_norm": 0.2346254140138626, "learning_rate": 0.00019955569698808755, "loss": 11.6827, "step": 4331 }, { "epoch": 0.09068073348404923, "grad_norm": 0.29591357707977295, "learning_rate": 0.00019955549051277002, "loss": 11.6907, "step": 4332 }, { "epoch": 0.09070166624801139, "grad_norm": 0.18319126963615417, "learning_rate": 0.00019955528398959423, "loss": 11.6727, "step": 4333 }, { "epoch": 0.09072259901197353, "grad_norm": 0.21487760543823242, "learning_rate": 0.00019955507741856017, "loss": 11.69, "step": 4334 }, { "epoch": 0.09074353177593569, "grad_norm": 0.27721109986305237, "learning_rate": 0.000199554870799668, "loss": 11.709, "step": 4335 }, { "epoch": 0.09076446453989785, "grad_norm": 0.19396069645881653, "learning_rate": 0.0001995546641329178, "loss": 11.6646, "step": 4336 }, { "epoch": 0.09078539730386, "grad_norm": 0.19421017169952393, "learning_rate": 0.0001995544574183097, "loss": 11.6866, "step": 4337 }, { "epoch": 0.09080633006782216, "grad_norm": 0.24275441467761993, "learning_rate": 0.00019955425065584374, "loss": 11.6745, "step": 4338 }, { "epoch": 0.09082726283178431, "grad_norm": 0.2564951479434967, "learning_rate": 0.0001995540438455201, "loss": 11.6723, "step": 4339 }, { "epoch": 0.09084819559574646, "grad_norm": 0.1743279993534088, "learning_rate": 0.00019955383698733883, "loss": 11.6839, "step": 4340 }, { "epoch": 0.09086912835970862, "grad_norm": 0.17952509224414825, "learning_rate": 0.0001995536300813, "loss": 11.6993, "step": 4341 }, { "epoch": 0.09089006112367078, "grad_norm": 0.20434808731079102, "learning_rate": 0.0001995534231274038, "loss": 11.6879, "step": 4342 }, { "epoch": 0.09091099388763292, "grad_norm": 0.21128638088703156, "learning_rate": 0.00019955321612565025, "loss": 11.6771, "step": 4343 }, { "epoch": 0.09093192665159508, "grad_norm": 0.2235097885131836, "learning_rate": 0.00019955300907603945, "loss": 11.6818, "step": 4344 }, { "epoch": 0.09095285941555722, "grad_norm": 0.2395257204771042, "learning_rate": 0.00019955280197857153, "loss": 11.6908, "step": 4345 }, { "epoch": 0.09097379217951938, "grad_norm": 0.18470829725265503, "learning_rate": 0.0001995525948332466, "loss": 11.664, "step": 4346 }, { "epoch": 0.09099472494348154, "grad_norm": 0.18144655227661133, "learning_rate": 0.00019955238764006472, "loss": 11.6881, "step": 4347 }, { "epoch": 0.09101565770744369, "grad_norm": 0.19254465401172638, "learning_rate": 0.00019955218039902604, "loss": 11.686, "step": 4348 }, { "epoch": 0.09103659047140585, "grad_norm": 0.20904798805713654, "learning_rate": 0.0001995519731101306, "loss": 11.6899, "step": 4349 }, { "epoch": 0.091057523235368, "grad_norm": 0.2557923495769501, "learning_rate": 0.00019955176577337856, "loss": 11.6897, "step": 4350 }, { "epoch": 0.09107845599933015, "grad_norm": 0.24258758127689362, "learning_rate": 0.00019955155838877, "loss": 11.6821, "step": 4351 }, { "epoch": 0.09109938876329231, "grad_norm": 0.2392372339963913, "learning_rate": 0.000199551350956305, "loss": 11.6888, "step": 4352 }, { "epoch": 0.09112032152725445, "grad_norm": 0.21391384303569794, "learning_rate": 0.00019955114347598366, "loss": 11.6903, "step": 4353 }, { "epoch": 0.09114125429121661, "grad_norm": 0.3375825881958008, "learning_rate": 0.0001995509359478061, "loss": 11.7023, "step": 4354 }, { "epoch": 0.09116218705517877, "grad_norm": 0.2622283697128296, "learning_rate": 0.0001995507283717724, "loss": 11.6767, "step": 4355 }, { "epoch": 0.09118311981914091, "grad_norm": 0.20774106681346893, "learning_rate": 0.00019955052074788267, "loss": 11.6814, "step": 4356 }, { "epoch": 0.09120405258310307, "grad_norm": 0.22325819730758667, "learning_rate": 0.000199550313076137, "loss": 11.6893, "step": 4357 }, { "epoch": 0.09122498534706523, "grad_norm": 0.2565959393978119, "learning_rate": 0.00019955010535653553, "loss": 11.7051, "step": 4358 }, { "epoch": 0.09124591811102738, "grad_norm": 0.17872360348701477, "learning_rate": 0.0001995498975890783, "loss": 11.6884, "step": 4359 }, { "epoch": 0.09126685087498954, "grad_norm": 0.26675572991371155, "learning_rate": 0.00019954968977376546, "loss": 11.6906, "step": 4360 }, { "epoch": 0.09128778363895168, "grad_norm": 0.20655566453933716, "learning_rate": 0.00019954948191059708, "loss": 11.674, "step": 4361 }, { "epoch": 0.09130871640291384, "grad_norm": 0.2658756673336029, "learning_rate": 0.00019954927399957328, "loss": 11.6698, "step": 4362 }, { "epoch": 0.091329649166876, "grad_norm": 0.24320372939109802, "learning_rate": 0.00019954906604069414, "loss": 11.6852, "step": 4363 }, { "epoch": 0.09135058193083814, "grad_norm": 0.19369374215602875, "learning_rate": 0.00019954885803395977, "loss": 11.6775, "step": 4364 }, { "epoch": 0.0913715146948003, "grad_norm": 0.2046755701303482, "learning_rate": 0.00019954864997937026, "loss": 11.6982, "step": 4365 }, { "epoch": 0.09139244745876246, "grad_norm": 0.22090758383274078, "learning_rate": 0.00019954844187692572, "loss": 11.6909, "step": 4366 }, { "epoch": 0.0914133802227246, "grad_norm": 0.20026826858520508, "learning_rate": 0.00019954823372662626, "loss": 11.6758, "step": 4367 }, { "epoch": 0.09143431298668676, "grad_norm": 0.187777578830719, "learning_rate": 0.00019954802552847196, "loss": 11.683, "step": 4368 }, { "epoch": 0.09145524575064892, "grad_norm": 0.2144186943769455, "learning_rate": 0.00019954781728246293, "loss": 11.683, "step": 4369 }, { "epoch": 0.09147617851461107, "grad_norm": 0.24479784071445465, "learning_rate": 0.00019954760898859927, "loss": 11.6831, "step": 4370 }, { "epoch": 0.09149711127857323, "grad_norm": 0.18353021144866943, "learning_rate": 0.0001995474006468811, "loss": 11.695, "step": 4371 }, { "epoch": 0.09151804404253537, "grad_norm": 0.17407935857772827, "learning_rate": 0.00019954719225730847, "loss": 11.6771, "step": 4372 }, { "epoch": 0.09153897680649753, "grad_norm": 0.20162639021873474, "learning_rate": 0.00019954698381988154, "loss": 11.691, "step": 4373 }, { "epoch": 0.09155990957045969, "grad_norm": 0.22021955251693726, "learning_rate": 0.00019954677533460036, "loss": 11.6866, "step": 4374 }, { "epoch": 0.09158084233442183, "grad_norm": 0.22271335124969482, "learning_rate": 0.00019954656680146502, "loss": 11.6885, "step": 4375 }, { "epoch": 0.09160177509838399, "grad_norm": 0.2606235444545746, "learning_rate": 0.0001995463582204757, "loss": 11.6965, "step": 4376 }, { "epoch": 0.09162270786234615, "grad_norm": 0.19238336384296417, "learning_rate": 0.00019954614959163242, "loss": 11.6921, "step": 4377 }, { "epoch": 0.0916436406263083, "grad_norm": 0.22724927961826324, "learning_rate": 0.00019954594091493535, "loss": 11.6845, "step": 4378 }, { "epoch": 0.09166457339027045, "grad_norm": 0.21970495581626892, "learning_rate": 0.0001995457321903845, "loss": 11.6794, "step": 4379 }, { "epoch": 0.0916855061542326, "grad_norm": 0.18438519537448883, "learning_rate": 0.00019954552341798005, "loss": 11.6809, "step": 4380 }, { "epoch": 0.09170643891819476, "grad_norm": 0.21503882110118866, "learning_rate": 0.00019954531459772206, "loss": 11.684, "step": 4381 }, { "epoch": 0.09172737168215692, "grad_norm": 0.22520118951797485, "learning_rate": 0.00019954510572961067, "loss": 11.6945, "step": 4382 }, { "epoch": 0.09174830444611906, "grad_norm": 0.17679336667060852, "learning_rate": 0.00019954489681364593, "loss": 11.6924, "step": 4383 }, { "epoch": 0.09176923721008122, "grad_norm": 0.2101430743932724, "learning_rate": 0.00019954468784982797, "loss": 11.6633, "step": 4384 }, { "epoch": 0.09179016997404338, "grad_norm": 0.2758921682834625, "learning_rate": 0.00019954447883815686, "loss": 11.6798, "step": 4385 }, { "epoch": 0.09181110273800552, "grad_norm": 0.2501561641693115, "learning_rate": 0.00019954426977863277, "loss": 11.671, "step": 4386 }, { "epoch": 0.09183203550196768, "grad_norm": 0.27605047821998596, "learning_rate": 0.0001995440606712557, "loss": 11.6692, "step": 4387 }, { "epoch": 0.09185296826592983, "grad_norm": 0.20765650272369385, "learning_rate": 0.00019954385151602585, "loss": 11.6768, "step": 4388 }, { "epoch": 0.09187390102989199, "grad_norm": 0.20425264537334442, "learning_rate": 0.00019954364231294328, "loss": 11.6696, "step": 4389 }, { "epoch": 0.09189483379385414, "grad_norm": 0.20310814678668976, "learning_rate": 0.00019954343306200804, "loss": 11.6801, "step": 4390 }, { "epoch": 0.09191576655781629, "grad_norm": 0.23510050773620605, "learning_rate": 0.0001995432237632203, "loss": 11.6866, "step": 4391 }, { "epoch": 0.09193669932177845, "grad_norm": 0.2969638407230377, "learning_rate": 0.00019954301441658015, "loss": 11.6927, "step": 4392 }, { "epoch": 0.0919576320857406, "grad_norm": 0.21866317093372345, "learning_rate": 0.00019954280502208765, "loss": 11.6756, "step": 4393 }, { "epoch": 0.09197856484970275, "grad_norm": 0.22765035927295685, "learning_rate": 0.00019954259557974298, "loss": 11.683, "step": 4394 }, { "epoch": 0.09199949761366491, "grad_norm": 0.2716369330883026, "learning_rate": 0.00019954238608954614, "loss": 11.701, "step": 4395 }, { "epoch": 0.09202043037762707, "grad_norm": 0.18898192048072815, "learning_rate": 0.0001995421765514973, "loss": 11.685, "step": 4396 }, { "epoch": 0.09204136314158921, "grad_norm": 0.1915827989578247, "learning_rate": 0.00019954196696559656, "loss": 11.669, "step": 4397 }, { "epoch": 0.09206229590555137, "grad_norm": 0.24798688292503357, "learning_rate": 0.00019954175733184396, "loss": 11.6907, "step": 4398 }, { "epoch": 0.09208322866951352, "grad_norm": 0.26459068059921265, "learning_rate": 0.00019954154765023966, "loss": 11.696, "step": 4399 }, { "epoch": 0.09210416143347568, "grad_norm": 0.23984698951244354, "learning_rate": 0.00019954133792078374, "loss": 11.6622, "step": 4400 }, { "epoch": 0.09212509419743783, "grad_norm": 0.22329886257648468, "learning_rate": 0.0001995411281434763, "loss": 11.6727, "step": 4401 }, { "epoch": 0.09214602696139998, "grad_norm": 0.20562201738357544, "learning_rate": 0.00019954091831831747, "loss": 11.6743, "step": 4402 }, { "epoch": 0.09216695972536214, "grad_norm": 0.21581605076789856, "learning_rate": 0.0001995407084453073, "loss": 11.6797, "step": 4403 }, { "epoch": 0.0921878924893243, "grad_norm": 0.21256199479103088, "learning_rate": 0.00019954049852444593, "loss": 11.6856, "step": 4404 }, { "epoch": 0.09220882525328644, "grad_norm": 0.19942191243171692, "learning_rate": 0.00019954028855573346, "loss": 11.674, "step": 4405 }, { "epoch": 0.0922297580172486, "grad_norm": 0.23382125794887543, "learning_rate": 0.00019954007853916994, "loss": 11.671, "step": 4406 }, { "epoch": 0.09225069078121075, "grad_norm": 0.19201062619686127, "learning_rate": 0.00019953986847475553, "loss": 11.6942, "step": 4407 }, { "epoch": 0.0922716235451729, "grad_norm": 0.21762946248054504, "learning_rate": 0.0001995396583624903, "loss": 11.6798, "step": 4408 }, { "epoch": 0.09229255630913506, "grad_norm": 0.19129660725593567, "learning_rate": 0.00019953944820237437, "loss": 11.6856, "step": 4409 }, { "epoch": 0.09231348907309721, "grad_norm": 0.2171442210674286, "learning_rate": 0.00019953923799440786, "loss": 11.6794, "step": 4410 }, { "epoch": 0.09233442183705937, "grad_norm": 0.21364499628543854, "learning_rate": 0.00019953902773859082, "loss": 11.6962, "step": 4411 }, { "epoch": 0.09235535460102152, "grad_norm": 0.33415308594703674, "learning_rate": 0.00019953881743492338, "loss": 11.6978, "step": 4412 }, { "epoch": 0.09237628736498367, "grad_norm": 0.24485667049884796, "learning_rate": 0.00019953860708340563, "loss": 11.6908, "step": 4413 }, { "epoch": 0.09239722012894583, "grad_norm": 0.20229874551296234, "learning_rate": 0.00019953839668403767, "loss": 11.6831, "step": 4414 }, { "epoch": 0.09241815289290797, "grad_norm": 0.31930896639823914, "learning_rate": 0.0001995381862368196, "loss": 11.6857, "step": 4415 }, { "epoch": 0.09243908565687013, "grad_norm": 0.22931456565856934, "learning_rate": 0.00019953797574175157, "loss": 11.6827, "step": 4416 }, { "epoch": 0.09246001842083229, "grad_norm": 0.19238504767417908, "learning_rate": 0.00019953776519883362, "loss": 11.676, "step": 4417 }, { "epoch": 0.09248095118479444, "grad_norm": 0.1920665204524994, "learning_rate": 0.00019953755460806584, "loss": 11.6865, "step": 4418 }, { "epoch": 0.0925018839487566, "grad_norm": 0.3281708359718323, "learning_rate": 0.0001995373439694484, "loss": 11.6755, "step": 4419 }, { "epoch": 0.09252281671271875, "grad_norm": 0.21034300327301025, "learning_rate": 0.00019953713328298137, "loss": 11.6895, "step": 4420 }, { "epoch": 0.0925437494766809, "grad_norm": 0.2531827986240387, "learning_rate": 0.00019953692254866482, "loss": 11.6635, "step": 4421 }, { "epoch": 0.09256468224064306, "grad_norm": 0.24643540382385254, "learning_rate": 0.0001995367117664989, "loss": 11.6895, "step": 4422 }, { "epoch": 0.0925856150046052, "grad_norm": 0.20900771021842957, "learning_rate": 0.00019953650093648364, "loss": 11.693, "step": 4423 }, { "epoch": 0.09260654776856736, "grad_norm": 0.23996520042419434, "learning_rate": 0.00019953629005861924, "loss": 11.6982, "step": 4424 }, { "epoch": 0.09262748053252952, "grad_norm": 0.1650024950504303, "learning_rate": 0.00019953607913290574, "loss": 11.6857, "step": 4425 }, { "epoch": 0.09264841329649166, "grad_norm": 0.2505436837673187, "learning_rate": 0.00019953586815934322, "loss": 11.686, "step": 4426 }, { "epoch": 0.09266934606045382, "grad_norm": 0.22085817158222198, "learning_rate": 0.00019953565713793183, "loss": 11.6702, "step": 4427 }, { "epoch": 0.09269027882441598, "grad_norm": 0.20063184201717377, "learning_rate": 0.00019953544606867167, "loss": 11.6694, "step": 4428 }, { "epoch": 0.09271121158837813, "grad_norm": 0.21323184669017792, "learning_rate": 0.00019953523495156282, "loss": 11.6779, "step": 4429 }, { "epoch": 0.09273214435234028, "grad_norm": 0.17690305411815643, "learning_rate": 0.00019953502378660537, "loss": 11.6868, "step": 4430 }, { "epoch": 0.09275307711630244, "grad_norm": 0.18477670848369598, "learning_rate": 0.00019953481257379947, "loss": 11.6796, "step": 4431 }, { "epoch": 0.09277400988026459, "grad_norm": 0.22397831082344055, "learning_rate": 0.00019953460131314515, "loss": 11.6781, "step": 4432 }, { "epoch": 0.09279494264422675, "grad_norm": 0.24585294723510742, "learning_rate": 0.0001995343900046426, "loss": 11.6784, "step": 4433 }, { "epoch": 0.09281587540818889, "grad_norm": 0.20671649277210236, "learning_rate": 0.00019953417864829187, "loss": 11.6881, "step": 4434 }, { "epoch": 0.09283680817215105, "grad_norm": 0.22055241465568542, "learning_rate": 0.00019953396724409304, "loss": 11.6914, "step": 4435 }, { "epoch": 0.09285774093611321, "grad_norm": 0.21207433938980103, "learning_rate": 0.00019953375579204625, "loss": 11.6821, "step": 4436 }, { "epoch": 0.09287867370007535, "grad_norm": 0.8394518494606018, "learning_rate": 0.00019953354429215158, "loss": 11.7054, "step": 4437 }, { "epoch": 0.09289960646403751, "grad_norm": 0.21998171508312225, "learning_rate": 0.00019953333274440915, "loss": 11.6871, "step": 4438 }, { "epoch": 0.09292053922799967, "grad_norm": 0.2188493013381958, "learning_rate": 0.00019953312114881903, "loss": 11.6914, "step": 4439 }, { "epoch": 0.09294147199196182, "grad_norm": 0.20943418145179749, "learning_rate": 0.00019953290950538137, "loss": 11.6809, "step": 4440 }, { "epoch": 0.09296240475592397, "grad_norm": 0.2280757576227188, "learning_rate": 0.00019953269781409626, "loss": 11.666, "step": 4441 }, { "epoch": 0.09298333751988612, "grad_norm": 0.21057488024234772, "learning_rate": 0.00019953248607496377, "loss": 11.6897, "step": 4442 }, { "epoch": 0.09300427028384828, "grad_norm": 0.17987513542175293, "learning_rate": 0.00019953227428798397, "loss": 11.6657, "step": 4443 }, { "epoch": 0.09302520304781044, "grad_norm": 0.2426188439130783, "learning_rate": 0.00019953206245315708, "loss": 11.672, "step": 4444 }, { "epoch": 0.09304613581177258, "grad_norm": 0.3025939166545868, "learning_rate": 0.0001995318505704831, "loss": 11.6738, "step": 4445 }, { "epoch": 0.09306706857573474, "grad_norm": 0.19875067472457886, "learning_rate": 0.00019953163863996217, "loss": 11.6759, "step": 4446 }, { "epoch": 0.0930880013396969, "grad_norm": 0.25586336851119995, "learning_rate": 0.0001995314266615944, "loss": 11.6798, "step": 4447 }, { "epoch": 0.09310893410365904, "grad_norm": 0.17504116892814636, "learning_rate": 0.00019953121463537986, "loss": 11.688, "step": 4448 }, { "epoch": 0.0931298668676212, "grad_norm": 0.22005581855773926, "learning_rate": 0.0001995310025613187, "loss": 11.6862, "step": 4449 }, { "epoch": 0.09315079963158335, "grad_norm": 0.16369187831878662, "learning_rate": 0.00019953079043941093, "loss": 11.677, "step": 4450 }, { "epoch": 0.0931717323955455, "grad_norm": 0.29532551765441895, "learning_rate": 0.00019953057826965678, "loss": 11.6986, "step": 4451 }, { "epoch": 0.09319266515950766, "grad_norm": 0.21146102249622345, "learning_rate": 0.00019953036605205627, "loss": 11.6828, "step": 4452 }, { "epoch": 0.09321359792346981, "grad_norm": 0.2228243201971054, "learning_rate": 0.00019953015378660951, "loss": 11.6839, "step": 4453 }, { "epoch": 0.09323453068743197, "grad_norm": 0.19558167457580566, "learning_rate": 0.00019952994147331662, "loss": 11.6809, "step": 4454 }, { "epoch": 0.09325546345139413, "grad_norm": 0.19476789236068726, "learning_rate": 0.00019952972911217766, "loss": 11.6883, "step": 4455 }, { "epoch": 0.09327639621535627, "grad_norm": 0.22493264079093933, "learning_rate": 0.00019952951670319279, "loss": 11.6928, "step": 4456 }, { "epoch": 0.09329732897931843, "grad_norm": 0.20019854605197906, "learning_rate": 0.0001995293042463621, "loss": 11.706, "step": 4457 }, { "epoch": 0.09331826174328059, "grad_norm": 0.17723192274570465, "learning_rate": 0.00019952909174168567, "loss": 11.6728, "step": 4458 }, { "epoch": 0.09333919450724273, "grad_norm": 0.21317531168460846, "learning_rate": 0.0001995288791891636, "loss": 11.6776, "step": 4459 }, { "epoch": 0.09336012727120489, "grad_norm": 0.24463273584842682, "learning_rate": 0.00019952866658879602, "loss": 11.6865, "step": 4460 }, { "epoch": 0.09338106003516704, "grad_norm": 0.20802366733551025, "learning_rate": 0.00019952845394058296, "loss": 11.6913, "step": 4461 }, { "epoch": 0.0934019927991292, "grad_norm": 0.2395017445087433, "learning_rate": 0.00019952824124452466, "loss": 11.698, "step": 4462 }, { "epoch": 0.09342292556309136, "grad_norm": 0.253399133682251, "learning_rate": 0.00019952802850062108, "loss": 11.6973, "step": 4463 }, { "epoch": 0.0934438583270535, "grad_norm": 0.20090743899345398, "learning_rate": 0.00019952781570887243, "loss": 11.6941, "step": 4464 }, { "epoch": 0.09346479109101566, "grad_norm": 0.19387899339199066, "learning_rate": 0.0001995276028692787, "loss": 11.6799, "step": 4465 }, { "epoch": 0.09348572385497782, "grad_norm": 0.19191846251487732, "learning_rate": 0.0001995273899818401, "loss": 11.6744, "step": 4466 }, { "epoch": 0.09350665661893996, "grad_norm": 0.25989097356796265, "learning_rate": 0.00019952717704655668, "loss": 11.7063, "step": 4467 }, { "epoch": 0.09352758938290212, "grad_norm": 0.22835217416286469, "learning_rate": 0.00019952696406342856, "loss": 11.6933, "step": 4468 }, { "epoch": 0.09354852214686427, "grad_norm": 0.23307698965072632, "learning_rate": 0.00019952675103245584, "loss": 11.6949, "step": 4469 }, { "epoch": 0.09356945491082642, "grad_norm": 0.1999225914478302, "learning_rate": 0.0001995265379536386, "loss": 11.677, "step": 4470 }, { "epoch": 0.09359038767478858, "grad_norm": 0.2737245559692383, "learning_rate": 0.00019952632482697698, "loss": 11.6857, "step": 4471 }, { "epoch": 0.09361132043875073, "grad_norm": 0.20888276398181915, "learning_rate": 0.00019952611165247101, "loss": 11.6988, "step": 4472 }, { "epoch": 0.09363225320271289, "grad_norm": 0.2521858513355255, "learning_rate": 0.0001995258984301209, "loss": 11.6886, "step": 4473 }, { "epoch": 0.09365318596667505, "grad_norm": 0.1944669485092163, "learning_rate": 0.00019952568515992666, "loss": 11.6798, "step": 4474 }, { "epoch": 0.09367411873063719, "grad_norm": 0.27581819891929626, "learning_rate": 0.00019952547184188846, "loss": 11.6853, "step": 4475 }, { "epoch": 0.09369505149459935, "grad_norm": 0.1882285177707672, "learning_rate": 0.00019952525847600634, "loss": 11.6791, "step": 4476 }, { "epoch": 0.0937159842585615, "grad_norm": 0.25585758686065674, "learning_rate": 0.00019952504506228044, "loss": 11.6821, "step": 4477 }, { "epoch": 0.09373691702252365, "grad_norm": 0.25011324882507324, "learning_rate": 0.00019952483160071087, "loss": 11.6739, "step": 4478 }, { "epoch": 0.09375784978648581, "grad_norm": 0.2927612364292145, "learning_rate": 0.00019952461809129773, "loss": 11.7028, "step": 4479 }, { "epoch": 0.09377878255044796, "grad_norm": 0.2778088450431824, "learning_rate": 0.00019952440453404108, "loss": 11.701, "step": 4480 }, { "epoch": 0.09379971531441011, "grad_norm": 0.21054698526859283, "learning_rate": 0.00019952419092894108, "loss": 11.6892, "step": 4481 }, { "epoch": 0.09382064807837227, "grad_norm": 0.23851142823696136, "learning_rate": 0.00019952397727599782, "loss": 11.6726, "step": 4482 }, { "epoch": 0.09384158084233442, "grad_norm": 0.19419951736927032, "learning_rate": 0.00019952376357521134, "loss": 11.6833, "step": 4483 }, { "epoch": 0.09386251360629658, "grad_norm": 0.22311952710151672, "learning_rate": 0.0001995235498265818, "loss": 11.6939, "step": 4484 }, { "epoch": 0.09388344637025874, "grad_norm": 0.23972180485725403, "learning_rate": 0.00019952333603010934, "loss": 11.6806, "step": 4485 }, { "epoch": 0.09390437913422088, "grad_norm": 0.2232753336429596, "learning_rate": 0.00019952312218579396, "loss": 11.6623, "step": 4486 }, { "epoch": 0.09392531189818304, "grad_norm": 0.258847713470459, "learning_rate": 0.00019952290829363588, "loss": 11.6741, "step": 4487 }, { "epoch": 0.09394624466214518, "grad_norm": 0.18953008949756622, "learning_rate": 0.0001995226943536351, "loss": 11.6825, "step": 4488 }, { "epoch": 0.09396717742610734, "grad_norm": 0.21932923793792725, "learning_rate": 0.00019952248036579179, "loss": 11.6897, "step": 4489 }, { "epoch": 0.0939881101900695, "grad_norm": 0.21870018541812897, "learning_rate": 0.00019952226633010602, "loss": 11.6743, "step": 4490 }, { "epoch": 0.09400904295403165, "grad_norm": 0.2527317702770233, "learning_rate": 0.0001995220522465779, "loss": 11.6896, "step": 4491 }, { "epoch": 0.0940299757179938, "grad_norm": 0.2932043969631195, "learning_rate": 0.00019952183811520753, "loss": 11.6913, "step": 4492 }, { "epoch": 0.09405090848195596, "grad_norm": 0.2339760661125183, "learning_rate": 0.00019952162393599502, "loss": 11.6931, "step": 4493 }, { "epoch": 0.09407184124591811, "grad_norm": 0.24946650862693787, "learning_rate": 0.0001995214097089405, "loss": 11.6823, "step": 4494 }, { "epoch": 0.09409277400988027, "grad_norm": 0.24683518707752228, "learning_rate": 0.000199521195434044, "loss": 11.6887, "step": 4495 }, { "epoch": 0.09411370677384241, "grad_norm": 0.22329989075660706, "learning_rate": 0.00019952098111130568, "loss": 11.6812, "step": 4496 }, { "epoch": 0.09413463953780457, "grad_norm": 0.2561209499835968, "learning_rate": 0.00019952076674072564, "loss": 11.6925, "step": 4497 }, { "epoch": 0.09415557230176673, "grad_norm": 0.2914786636829376, "learning_rate": 0.000199520552322304, "loss": 11.7001, "step": 4498 }, { "epoch": 0.09417650506572887, "grad_norm": 0.23190811276435852, "learning_rate": 0.00019952033785604082, "loss": 11.6822, "step": 4499 }, { "epoch": 0.09419743782969103, "grad_norm": 0.19247987866401672, "learning_rate": 0.0001995201233419362, "loss": 11.6905, "step": 4500 }, { "epoch": 0.09421837059365319, "grad_norm": 0.2175147831439972, "learning_rate": 0.00019951990877999025, "loss": 11.6791, "step": 4501 }, { "epoch": 0.09423930335761534, "grad_norm": 0.21551688015460968, "learning_rate": 0.00019951969417020312, "loss": 11.6829, "step": 4502 }, { "epoch": 0.0942602361215775, "grad_norm": 0.23501774668693542, "learning_rate": 0.00019951947951257485, "loss": 11.6834, "step": 4503 }, { "epoch": 0.09428116888553964, "grad_norm": 0.2076350599527359, "learning_rate": 0.00019951926480710558, "loss": 11.6795, "step": 4504 }, { "epoch": 0.0943021016495018, "grad_norm": 0.199014350771904, "learning_rate": 0.00019951905005379542, "loss": 11.663, "step": 4505 }, { "epoch": 0.09432303441346396, "grad_norm": 0.27299508452415466, "learning_rate": 0.00019951883525264443, "loss": 11.6842, "step": 4506 }, { "epoch": 0.0943439671774261, "grad_norm": 0.23127412796020508, "learning_rate": 0.00019951862040365278, "loss": 11.6896, "step": 4507 }, { "epoch": 0.09436489994138826, "grad_norm": 0.22358934581279755, "learning_rate": 0.0001995184055068205, "loss": 11.6837, "step": 4508 }, { "epoch": 0.09438583270535042, "grad_norm": 0.21964821219444275, "learning_rate": 0.00019951819056214772, "loss": 11.6828, "step": 4509 }, { "epoch": 0.09440676546931256, "grad_norm": 0.26788797974586487, "learning_rate": 0.0001995179755696346, "loss": 11.6824, "step": 4510 }, { "epoch": 0.09442769823327472, "grad_norm": 0.21890360116958618, "learning_rate": 0.00019951776052928115, "loss": 11.6897, "step": 4511 }, { "epoch": 0.09444863099723688, "grad_norm": 0.2972078323364258, "learning_rate": 0.00019951754544108753, "loss": 11.6967, "step": 4512 }, { "epoch": 0.09446956376119903, "grad_norm": 0.22230270504951477, "learning_rate": 0.00019951733030505384, "loss": 11.6916, "step": 4513 }, { "epoch": 0.09449049652516119, "grad_norm": 0.20857536792755127, "learning_rate": 0.0001995171151211802, "loss": 11.7015, "step": 4514 }, { "epoch": 0.09451142928912333, "grad_norm": 0.273573100566864, "learning_rate": 0.00019951689988946666, "loss": 11.6896, "step": 4515 }, { "epoch": 0.09453236205308549, "grad_norm": 0.2248217761516571, "learning_rate": 0.00019951668460991333, "loss": 11.6678, "step": 4516 }, { "epoch": 0.09455329481704765, "grad_norm": 0.22430434823036194, "learning_rate": 0.00019951646928252037, "loss": 11.6779, "step": 4517 }, { "epoch": 0.09457422758100979, "grad_norm": 0.2046017050743103, "learning_rate": 0.00019951625390728783, "loss": 11.6778, "step": 4518 }, { "epoch": 0.09459516034497195, "grad_norm": 0.32008862495422363, "learning_rate": 0.00019951603848421585, "loss": 11.685, "step": 4519 }, { "epoch": 0.09461609310893411, "grad_norm": 0.2124159038066864, "learning_rate": 0.0001995158230133045, "loss": 11.6802, "step": 4520 }, { "epoch": 0.09463702587289625, "grad_norm": 0.20815639197826385, "learning_rate": 0.0001995156074945539, "loss": 11.6979, "step": 4521 }, { "epoch": 0.09465795863685841, "grad_norm": 0.22956126928329468, "learning_rate": 0.00019951539192796417, "loss": 11.6666, "step": 4522 }, { "epoch": 0.09467889140082056, "grad_norm": 0.19841602444648743, "learning_rate": 0.00019951517631353538, "loss": 11.6822, "step": 4523 }, { "epoch": 0.09469982416478272, "grad_norm": 0.20444585382938385, "learning_rate": 0.00019951496065126766, "loss": 11.6852, "step": 4524 }, { "epoch": 0.09472075692874488, "grad_norm": 0.28294637799263, "learning_rate": 0.00019951474494116109, "loss": 11.6873, "step": 4525 }, { "epoch": 0.09474168969270702, "grad_norm": 0.22794803977012634, "learning_rate": 0.0001995145291832158, "loss": 11.6771, "step": 4526 }, { "epoch": 0.09476262245666918, "grad_norm": 0.20687216520309448, "learning_rate": 0.00019951431337743186, "loss": 11.6863, "step": 4527 }, { "epoch": 0.09478355522063134, "grad_norm": 0.33973994851112366, "learning_rate": 0.00019951409752380942, "loss": 11.7071, "step": 4528 }, { "epoch": 0.09480448798459348, "grad_norm": 0.2556692063808441, "learning_rate": 0.00019951388162234857, "loss": 11.6732, "step": 4529 }, { "epoch": 0.09482542074855564, "grad_norm": 0.23439720273017883, "learning_rate": 0.00019951366567304936, "loss": 11.6604, "step": 4530 }, { "epoch": 0.09484635351251779, "grad_norm": 0.30509692430496216, "learning_rate": 0.000199513449675912, "loss": 11.6936, "step": 4531 }, { "epoch": 0.09486728627647995, "grad_norm": 0.280767560005188, "learning_rate": 0.00019951323363093647, "loss": 11.6885, "step": 4532 }, { "epoch": 0.0948882190404421, "grad_norm": 0.2492034137248993, "learning_rate": 0.00019951301753812296, "loss": 11.6976, "step": 4533 }, { "epoch": 0.09490915180440425, "grad_norm": 0.2305673062801361, "learning_rate": 0.00019951280139747155, "loss": 11.6901, "step": 4534 }, { "epoch": 0.09493008456836641, "grad_norm": 0.20894280076026917, "learning_rate": 0.00019951258520898234, "loss": 11.671, "step": 4535 }, { "epoch": 0.09495101733232857, "grad_norm": 0.30534759163856506, "learning_rate": 0.0001995123689726554, "loss": 11.6783, "step": 4536 }, { "epoch": 0.09497195009629071, "grad_norm": 0.3040326237678528, "learning_rate": 0.00019951215268849092, "loss": 11.7007, "step": 4537 }, { "epoch": 0.09499288286025287, "grad_norm": 0.33829307556152344, "learning_rate": 0.00019951193635648894, "loss": 11.6877, "step": 4538 }, { "epoch": 0.09501381562421501, "grad_norm": 0.22591328620910645, "learning_rate": 0.0001995117199766496, "loss": 11.6829, "step": 4539 }, { "epoch": 0.09503474838817717, "grad_norm": 0.21366707980632782, "learning_rate": 0.00019951150354897295, "loss": 11.6791, "step": 4540 }, { "epoch": 0.09505568115213933, "grad_norm": 0.21910683810710907, "learning_rate": 0.00019951128707345914, "loss": 11.6827, "step": 4541 }, { "epoch": 0.09507661391610148, "grad_norm": 0.24421027302742004, "learning_rate": 0.00019951107055010824, "loss": 11.6916, "step": 4542 }, { "epoch": 0.09509754668006364, "grad_norm": 0.21872171759605408, "learning_rate": 0.00019951085397892037, "loss": 11.6886, "step": 4543 }, { "epoch": 0.0951184794440258, "grad_norm": 0.21455290913581848, "learning_rate": 0.00019951063735989565, "loss": 11.6739, "step": 4544 }, { "epoch": 0.09513941220798794, "grad_norm": 0.1849103569984436, "learning_rate": 0.0001995104206930342, "loss": 11.6824, "step": 4545 }, { "epoch": 0.0951603449719501, "grad_norm": 0.26595771312713623, "learning_rate": 0.00019951020397833609, "loss": 11.6881, "step": 4546 }, { "epoch": 0.09518127773591226, "grad_norm": 0.18950031697750092, "learning_rate": 0.0001995099872158014, "loss": 11.689, "step": 4547 }, { "epoch": 0.0952022104998744, "grad_norm": 0.23956701159477234, "learning_rate": 0.0001995097704054303, "loss": 11.6869, "step": 4548 }, { "epoch": 0.09522314326383656, "grad_norm": 0.234455868601799, "learning_rate": 0.0001995095535472228, "loss": 11.6803, "step": 4549 }, { "epoch": 0.0952440760277987, "grad_norm": 0.21737134456634521, "learning_rate": 0.00019950933664117913, "loss": 11.6897, "step": 4550 }, { "epoch": 0.09526500879176086, "grad_norm": 0.2487403154373169, "learning_rate": 0.0001995091196872993, "loss": 11.6942, "step": 4551 }, { "epoch": 0.09528594155572302, "grad_norm": 0.1882897913455963, "learning_rate": 0.00019950890268558344, "loss": 11.6973, "step": 4552 }, { "epoch": 0.09530687431968517, "grad_norm": 0.2567608058452606, "learning_rate": 0.00019950868563603166, "loss": 11.6971, "step": 4553 }, { "epoch": 0.09532780708364733, "grad_norm": 0.23583123087882996, "learning_rate": 0.00019950846853864404, "loss": 11.6819, "step": 4554 }, { "epoch": 0.09534873984760948, "grad_norm": 0.24148444831371307, "learning_rate": 0.00019950825139342074, "loss": 11.6814, "step": 4555 }, { "epoch": 0.09536967261157163, "grad_norm": 0.20279237627983093, "learning_rate": 0.00019950803420036182, "loss": 11.6687, "step": 4556 }, { "epoch": 0.09539060537553379, "grad_norm": 0.2273595631122589, "learning_rate": 0.0001995078169594674, "loss": 11.6795, "step": 4557 }, { "epoch": 0.09541153813949593, "grad_norm": 0.23381751775741577, "learning_rate": 0.00019950759967073755, "loss": 11.6975, "step": 4558 }, { "epoch": 0.09543247090345809, "grad_norm": 0.25972431898117065, "learning_rate": 0.00019950738233417243, "loss": 11.679, "step": 4559 }, { "epoch": 0.09545340366742025, "grad_norm": 0.2848234176635742, "learning_rate": 0.00019950716494977208, "loss": 11.695, "step": 4560 }, { "epoch": 0.0954743364313824, "grad_norm": 0.20759902894496918, "learning_rate": 0.00019950694751753666, "loss": 11.6746, "step": 4561 }, { "epoch": 0.09549526919534455, "grad_norm": 0.1999208629131317, "learning_rate": 0.00019950673003746628, "loss": 11.6684, "step": 4562 }, { "epoch": 0.09551620195930671, "grad_norm": 0.21515128016471863, "learning_rate": 0.000199506512509561, "loss": 11.6885, "step": 4563 }, { "epoch": 0.09553713472326886, "grad_norm": 0.20959824323654175, "learning_rate": 0.00019950629493382095, "loss": 11.6828, "step": 4564 }, { "epoch": 0.09555806748723102, "grad_norm": 0.2524110674858093, "learning_rate": 0.00019950607731024624, "loss": 11.694, "step": 4565 }, { "epoch": 0.09557900025119316, "grad_norm": 0.1974067986011505, "learning_rate": 0.00019950585963883697, "loss": 11.6746, "step": 4566 }, { "epoch": 0.09559993301515532, "grad_norm": 0.21646998822689056, "learning_rate": 0.0001995056419195932, "loss": 11.6893, "step": 4567 }, { "epoch": 0.09562086577911748, "grad_norm": 0.2542760372161865, "learning_rate": 0.0001995054241525151, "loss": 11.6875, "step": 4568 }, { "epoch": 0.09564179854307962, "grad_norm": 0.21397487819194794, "learning_rate": 0.00019950520633760273, "loss": 11.6971, "step": 4569 }, { "epoch": 0.09566273130704178, "grad_norm": 0.24963130056858063, "learning_rate": 0.00019950498847485623, "loss": 11.6918, "step": 4570 }, { "epoch": 0.09568366407100394, "grad_norm": 0.2248779833316803, "learning_rate": 0.00019950477056427572, "loss": 11.6867, "step": 4571 }, { "epoch": 0.09570459683496609, "grad_norm": 0.18498969078063965, "learning_rate": 0.00019950455260586122, "loss": 11.6718, "step": 4572 }, { "epoch": 0.09572552959892824, "grad_norm": 0.22122377157211304, "learning_rate": 0.00019950433459961292, "loss": 11.688, "step": 4573 }, { "epoch": 0.0957464623628904, "grad_norm": 0.23459391295909882, "learning_rate": 0.00019950411654553088, "loss": 11.6784, "step": 4574 }, { "epoch": 0.09576739512685255, "grad_norm": 0.20681196451187134, "learning_rate": 0.0001995038984436152, "loss": 11.6702, "step": 4575 }, { "epoch": 0.0957883278908147, "grad_norm": 0.2184467315673828, "learning_rate": 0.00019950368029386604, "loss": 11.6858, "step": 4576 }, { "epoch": 0.09580926065477685, "grad_norm": 0.24598489701747894, "learning_rate": 0.00019950346209628343, "loss": 11.6827, "step": 4577 }, { "epoch": 0.09583019341873901, "grad_norm": 0.20866185426712036, "learning_rate": 0.00019950324385086755, "loss": 11.6786, "step": 4578 }, { "epoch": 0.09585112618270117, "grad_norm": 0.2359655350446701, "learning_rate": 0.0001995030255576184, "loss": 11.702, "step": 4579 }, { "epoch": 0.09587205894666331, "grad_norm": 0.2644476592540741, "learning_rate": 0.00019950280721653624, "loss": 11.6808, "step": 4580 }, { "epoch": 0.09589299171062547, "grad_norm": 0.19856853783130646, "learning_rate": 0.000199502588827621, "loss": 11.6992, "step": 4581 }, { "epoch": 0.09591392447458763, "grad_norm": 0.24641981720924377, "learning_rate": 0.00019950237039087295, "loss": 11.6621, "step": 4582 }, { "epoch": 0.09593485723854978, "grad_norm": 0.7335288524627686, "learning_rate": 0.00019950215190629207, "loss": 11.6777, "step": 4583 }, { "epoch": 0.09595579000251193, "grad_norm": 0.21065901219844818, "learning_rate": 0.00019950193337387853, "loss": 11.6687, "step": 4584 }, { "epoch": 0.09597672276647408, "grad_norm": 0.17840801179409027, "learning_rate": 0.0001995017147936324, "loss": 11.6787, "step": 4585 }, { "epoch": 0.09599765553043624, "grad_norm": 0.22557438910007477, "learning_rate": 0.0001995014961655538, "loss": 11.6849, "step": 4586 }, { "epoch": 0.0960185882943984, "grad_norm": 0.211531400680542, "learning_rate": 0.00019950127748964285, "loss": 11.6762, "step": 4587 }, { "epoch": 0.09603952105836054, "grad_norm": 0.24714019894599915, "learning_rate": 0.00019950105876589963, "loss": 11.6927, "step": 4588 }, { "epoch": 0.0960604538223227, "grad_norm": 0.2559976875782013, "learning_rate": 0.00019950083999432428, "loss": 11.6731, "step": 4589 }, { "epoch": 0.09608138658628486, "grad_norm": 0.24361149966716766, "learning_rate": 0.00019950062117491687, "loss": 11.6814, "step": 4590 }, { "epoch": 0.096102319350247, "grad_norm": 0.22312168776988983, "learning_rate": 0.0001995004023076775, "loss": 11.6805, "step": 4591 }, { "epoch": 0.09612325211420916, "grad_norm": 0.23439960181713104, "learning_rate": 0.00019950018339260633, "loss": 11.697, "step": 4592 }, { "epoch": 0.09614418487817131, "grad_norm": 0.23031477630138397, "learning_rate": 0.00019949996442970337, "loss": 11.6895, "step": 4593 }, { "epoch": 0.09616511764213347, "grad_norm": 0.23754346370697021, "learning_rate": 0.00019949974541896883, "loss": 11.6717, "step": 4594 }, { "epoch": 0.09618605040609562, "grad_norm": 0.23222580552101135, "learning_rate": 0.00019949952636040276, "loss": 11.6785, "step": 4595 }, { "epoch": 0.09620698317005777, "grad_norm": 0.2313169687986374, "learning_rate": 0.0001994993072540053, "loss": 11.703, "step": 4596 }, { "epoch": 0.09622791593401993, "grad_norm": 0.2557763457298279, "learning_rate": 0.00019949908809977645, "loss": 11.6879, "step": 4597 }, { "epoch": 0.09624884869798209, "grad_norm": 0.24224646389484406, "learning_rate": 0.00019949886889771644, "loss": 11.7003, "step": 4598 }, { "epoch": 0.09626978146194423, "grad_norm": 0.24619780480861664, "learning_rate": 0.00019949864964782536, "loss": 11.6829, "step": 4599 }, { "epoch": 0.09629071422590639, "grad_norm": 0.25508975982666016, "learning_rate": 0.00019949843035010326, "loss": 11.6901, "step": 4600 }, { "epoch": 0.09631164698986855, "grad_norm": 0.18268293142318726, "learning_rate": 0.00019949821100455025, "loss": 11.6894, "step": 4601 }, { "epoch": 0.0963325797538307, "grad_norm": 0.21652977168560028, "learning_rate": 0.00019949799161116647, "loss": 11.6778, "step": 4602 }, { "epoch": 0.09635351251779285, "grad_norm": 0.1924894005060196, "learning_rate": 0.00019949777216995203, "loss": 11.6806, "step": 4603 }, { "epoch": 0.096374445281755, "grad_norm": 0.22867508232593536, "learning_rate": 0.00019949755268090699, "loss": 11.6754, "step": 4604 }, { "epoch": 0.09639537804571716, "grad_norm": 0.22981970012187958, "learning_rate": 0.00019949733314403152, "loss": 11.6806, "step": 4605 }, { "epoch": 0.09641631080967931, "grad_norm": 0.19816793501377106, "learning_rate": 0.00019949711355932566, "loss": 11.6773, "step": 4606 }, { "epoch": 0.09643724357364146, "grad_norm": 0.23000511527061462, "learning_rate": 0.00019949689392678954, "loss": 11.6849, "step": 4607 }, { "epoch": 0.09645817633760362, "grad_norm": 0.21532778441905975, "learning_rate": 0.00019949667424642328, "loss": 11.6752, "step": 4608 }, { "epoch": 0.09647910910156578, "grad_norm": 0.18315744400024414, "learning_rate": 0.00019949645451822697, "loss": 11.6892, "step": 4609 }, { "epoch": 0.09650004186552792, "grad_norm": 0.21257470548152924, "learning_rate": 0.0001994962347422007, "loss": 11.6709, "step": 4610 }, { "epoch": 0.09652097462949008, "grad_norm": 0.2699788510799408, "learning_rate": 0.00019949601491834465, "loss": 11.6823, "step": 4611 }, { "epoch": 0.09654190739345223, "grad_norm": 0.30652499198913574, "learning_rate": 0.00019949579504665882, "loss": 11.6979, "step": 4612 }, { "epoch": 0.09656284015741438, "grad_norm": 0.21346096694469452, "learning_rate": 0.00019949557512714342, "loss": 11.6659, "step": 4613 }, { "epoch": 0.09658377292137654, "grad_norm": 0.2365170419216156, "learning_rate": 0.00019949535515979843, "loss": 11.6832, "step": 4614 }, { "epoch": 0.09660470568533869, "grad_norm": 0.25678902864456177, "learning_rate": 0.0001994951351446241, "loss": 11.6542, "step": 4615 }, { "epoch": 0.09662563844930085, "grad_norm": 0.20932932198047638, "learning_rate": 0.0001994949150816204, "loss": 11.6992, "step": 4616 }, { "epoch": 0.096646571213263, "grad_norm": 0.24401608109474182, "learning_rate": 0.00019949469497078755, "loss": 11.6934, "step": 4617 }, { "epoch": 0.09666750397722515, "grad_norm": 0.21527493000030518, "learning_rate": 0.0001994944748121256, "loss": 11.6884, "step": 4618 }, { "epoch": 0.09668843674118731, "grad_norm": 0.23974190652370453, "learning_rate": 0.00019949425460563466, "loss": 11.6854, "step": 4619 }, { "epoch": 0.09670936950514945, "grad_norm": 0.2240605354309082, "learning_rate": 0.0001994940343513148, "loss": 11.6859, "step": 4620 }, { "epoch": 0.09673030226911161, "grad_norm": 0.2097027748823166, "learning_rate": 0.0001994938140491662, "loss": 11.6751, "step": 4621 }, { "epoch": 0.09675123503307377, "grad_norm": 0.2419435977935791, "learning_rate": 0.00019949359369918893, "loss": 11.6873, "step": 4622 }, { "epoch": 0.09677216779703592, "grad_norm": 0.24452602863311768, "learning_rate": 0.0001994933733013831, "loss": 11.6897, "step": 4623 }, { "epoch": 0.09679310056099807, "grad_norm": 0.26683661341667175, "learning_rate": 0.00019949315285574878, "loss": 11.6889, "step": 4624 }, { "epoch": 0.09681403332496023, "grad_norm": 0.2435697466135025, "learning_rate": 0.00019949293236228615, "loss": 11.6841, "step": 4625 }, { "epoch": 0.09683496608892238, "grad_norm": 0.21409308910369873, "learning_rate": 0.00019949271182099522, "loss": 11.6754, "step": 4626 }, { "epoch": 0.09685589885288454, "grad_norm": 0.237098827958107, "learning_rate": 0.0001994924912318762, "loss": 11.68, "step": 4627 }, { "epoch": 0.0968768316168467, "grad_norm": 0.16684861481189728, "learning_rate": 0.00019949227059492915, "loss": 11.6695, "step": 4628 }, { "epoch": 0.09689776438080884, "grad_norm": 0.19034750759601593, "learning_rate": 0.00019949204991015411, "loss": 11.6752, "step": 4629 }, { "epoch": 0.096918697144771, "grad_norm": 0.18692737817764282, "learning_rate": 0.0001994918291775513, "loss": 11.6796, "step": 4630 }, { "epoch": 0.09693962990873314, "grad_norm": 0.24196787178516388, "learning_rate": 0.0001994916083971208, "loss": 11.6869, "step": 4631 }, { "epoch": 0.0969605626726953, "grad_norm": 0.26891863346099854, "learning_rate": 0.0001994913875688626, "loss": 11.6783, "step": 4632 }, { "epoch": 0.09698149543665746, "grad_norm": 0.2286616414785385, "learning_rate": 0.00019949116669277698, "loss": 11.6934, "step": 4633 }, { "epoch": 0.0970024282006196, "grad_norm": 0.2253054529428482, "learning_rate": 0.00019949094576886392, "loss": 11.682, "step": 4634 }, { "epoch": 0.09702336096458176, "grad_norm": 0.20375066995620728, "learning_rate": 0.00019949072479712355, "loss": 11.6887, "step": 4635 }, { "epoch": 0.09704429372854392, "grad_norm": 0.19486580789089203, "learning_rate": 0.00019949050377755602, "loss": 11.6764, "step": 4636 }, { "epoch": 0.09706522649250607, "grad_norm": 0.2608586549758911, "learning_rate": 0.0001994902827101614, "loss": 11.6814, "step": 4637 }, { "epoch": 0.09708615925646823, "grad_norm": 0.23862725496292114, "learning_rate": 0.00019949006159493987, "loss": 11.685, "step": 4638 }, { "epoch": 0.09710709202043037, "grad_norm": 0.2119758278131485, "learning_rate": 0.0001994898404318914, "loss": 11.6666, "step": 4639 }, { "epoch": 0.09712802478439253, "grad_norm": 0.20365500450134277, "learning_rate": 0.0001994896192210162, "loss": 11.6934, "step": 4640 }, { "epoch": 0.09714895754835469, "grad_norm": 0.22935399413108826, "learning_rate": 0.00019948939796231435, "loss": 11.6773, "step": 4641 }, { "epoch": 0.09716989031231683, "grad_norm": 0.24711988866329193, "learning_rate": 0.00019948917665578596, "loss": 11.6921, "step": 4642 }, { "epoch": 0.09719082307627899, "grad_norm": 0.2076830118894577, "learning_rate": 0.00019948895530143108, "loss": 11.6992, "step": 4643 }, { "epoch": 0.09721175584024115, "grad_norm": 0.19700108468532562, "learning_rate": 0.00019948873389924988, "loss": 11.6785, "step": 4644 }, { "epoch": 0.0972326886042033, "grad_norm": 0.24925267696380615, "learning_rate": 0.0001994885124492425, "loss": 11.6815, "step": 4645 }, { "epoch": 0.09725362136816545, "grad_norm": 0.19548405706882477, "learning_rate": 0.00019948829095140896, "loss": 11.6774, "step": 4646 }, { "epoch": 0.0972745541321276, "grad_norm": 0.22391915321350098, "learning_rate": 0.0001994880694057494, "loss": 11.6847, "step": 4647 }, { "epoch": 0.09729548689608976, "grad_norm": 0.23594319820404053, "learning_rate": 0.00019948784781226395, "loss": 11.6894, "step": 4648 }, { "epoch": 0.09731641966005192, "grad_norm": 0.2502136826515198, "learning_rate": 0.0001994876261709527, "loss": 11.694, "step": 4649 }, { "epoch": 0.09733735242401406, "grad_norm": 0.23172670602798462, "learning_rate": 0.00019948740448181575, "loss": 11.6818, "step": 4650 }, { "epoch": 0.09735828518797622, "grad_norm": 0.2789163887500763, "learning_rate": 0.0001994871827448532, "loss": 11.6786, "step": 4651 }, { "epoch": 0.09737921795193838, "grad_norm": 0.23625898361206055, "learning_rate": 0.00019948696096006518, "loss": 11.6711, "step": 4652 }, { "epoch": 0.09740015071590052, "grad_norm": 0.22814378142356873, "learning_rate": 0.00019948673912745176, "loss": 11.6737, "step": 4653 }, { "epoch": 0.09742108347986268, "grad_norm": 0.23671689629554749, "learning_rate": 0.00019948651724701312, "loss": 11.696, "step": 4654 }, { "epoch": 0.09744201624382483, "grad_norm": 0.3186839818954468, "learning_rate": 0.00019948629531874924, "loss": 11.6936, "step": 4655 }, { "epoch": 0.09746294900778699, "grad_norm": 0.22761468589305878, "learning_rate": 0.00019948607334266036, "loss": 11.6869, "step": 4656 }, { "epoch": 0.09748388177174915, "grad_norm": 0.23612622916698456, "learning_rate": 0.00019948585131874654, "loss": 11.6782, "step": 4657 }, { "epoch": 0.09750481453571129, "grad_norm": 0.22752879559993744, "learning_rate": 0.00019948562924700783, "loss": 11.6854, "step": 4658 }, { "epoch": 0.09752574729967345, "grad_norm": 0.20288988947868347, "learning_rate": 0.00019948540712744445, "loss": 11.6814, "step": 4659 }, { "epoch": 0.09754668006363561, "grad_norm": 0.21145957708358765, "learning_rate": 0.0001994851849600564, "loss": 11.6813, "step": 4660 }, { "epoch": 0.09756761282759775, "grad_norm": 0.20894262194633484, "learning_rate": 0.0001994849627448438, "loss": 11.6745, "step": 4661 }, { "epoch": 0.09758854559155991, "grad_norm": 0.21434736251831055, "learning_rate": 0.00019948474048180683, "loss": 11.6886, "step": 4662 }, { "epoch": 0.09760947835552207, "grad_norm": 0.21951283514499664, "learning_rate": 0.00019948451817094554, "loss": 11.6661, "step": 4663 }, { "epoch": 0.09763041111948421, "grad_norm": 0.17468836903572083, "learning_rate": 0.00019948429581226004, "loss": 11.6789, "step": 4664 }, { "epoch": 0.09765134388344637, "grad_norm": 0.22113072872161865, "learning_rate": 0.00019948407340575046, "loss": 11.6814, "step": 4665 }, { "epoch": 0.09767227664740852, "grad_norm": 0.23955956101417542, "learning_rate": 0.0001994838509514169, "loss": 11.6779, "step": 4666 }, { "epoch": 0.09769320941137068, "grad_norm": 0.23134846985340118, "learning_rate": 0.00019948362844925943, "loss": 11.6873, "step": 4667 }, { "epoch": 0.09771414217533284, "grad_norm": 0.29770931601524353, "learning_rate": 0.0001994834058992782, "loss": 11.6896, "step": 4668 }, { "epoch": 0.09773507493929498, "grad_norm": 0.30464693903923035, "learning_rate": 0.00019948318330147328, "loss": 11.7074, "step": 4669 }, { "epoch": 0.09775600770325714, "grad_norm": 0.21815650165081024, "learning_rate": 0.00019948296065584484, "loss": 11.6877, "step": 4670 }, { "epoch": 0.0977769404672193, "grad_norm": 0.20180150866508484, "learning_rate": 0.00019948273796239292, "loss": 11.6792, "step": 4671 }, { "epoch": 0.09779787323118144, "grad_norm": 0.291523814201355, "learning_rate": 0.00019948251522111768, "loss": 11.6911, "step": 4672 }, { "epoch": 0.0978188059951436, "grad_norm": 0.22898587584495544, "learning_rate": 0.00019948229243201918, "loss": 11.6754, "step": 4673 }, { "epoch": 0.09783973875910575, "grad_norm": 0.2842503786087036, "learning_rate": 0.00019948206959509755, "loss": 11.6994, "step": 4674 }, { "epoch": 0.0978606715230679, "grad_norm": 0.23736388981342316, "learning_rate": 0.0001994818467103529, "loss": 11.6935, "step": 4675 }, { "epoch": 0.09788160428703006, "grad_norm": 0.24664458632469177, "learning_rate": 0.00019948162377778533, "loss": 11.6918, "step": 4676 }, { "epoch": 0.09790253705099221, "grad_norm": 0.23058417439460754, "learning_rate": 0.00019948140079739494, "loss": 11.6845, "step": 4677 }, { "epoch": 0.09792346981495437, "grad_norm": 0.9084103107452393, "learning_rate": 0.00019948117776918189, "loss": 11.6426, "step": 4678 }, { "epoch": 0.09794440257891653, "grad_norm": 0.2677760422229767, "learning_rate": 0.00019948095469314621, "loss": 11.6805, "step": 4679 }, { "epoch": 0.09796533534287867, "grad_norm": 0.19264060258865356, "learning_rate": 0.00019948073156928804, "loss": 11.687, "step": 4680 }, { "epoch": 0.09798626810684083, "grad_norm": 0.20900416374206543, "learning_rate": 0.0001994805083976075, "loss": 11.6807, "step": 4681 }, { "epoch": 0.09800720087080297, "grad_norm": 0.1962137073278427, "learning_rate": 0.00019948028517810466, "loss": 11.6896, "step": 4682 }, { "epoch": 0.09802813363476513, "grad_norm": 0.2791329026222229, "learning_rate": 0.00019948006191077968, "loss": 11.6828, "step": 4683 }, { "epoch": 0.09804906639872729, "grad_norm": 0.28257107734680176, "learning_rate": 0.00019947983859563265, "loss": 11.6772, "step": 4684 }, { "epoch": 0.09806999916268944, "grad_norm": 0.22424335777759552, "learning_rate": 0.00019947961523266363, "loss": 11.7, "step": 4685 }, { "epoch": 0.0980909319266516, "grad_norm": 0.25988513231277466, "learning_rate": 0.0001994793918218728, "loss": 11.6881, "step": 4686 }, { "epoch": 0.09811186469061375, "grad_norm": 0.19790594279766083, "learning_rate": 0.0001994791683632602, "loss": 11.6834, "step": 4687 }, { "epoch": 0.0981327974545759, "grad_norm": 0.21447111666202545, "learning_rate": 0.000199478944856826, "loss": 11.696, "step": 4688 }, { "epoch": 0.09815373021853806, "grad_norm": 0.23854708671569824, "learning_rate": 0.00019947872130257025, "loss": 11.7025, "step": 4689 }, { "epoch": 0.09817466298250022, "grad_norm": 0.2307077795267105, "learning_rate": 0.00019947849770049313, "loss": 11.6899, "step": 4690 }, { "epoch": 0.09819559574646236, "grad_norm": 0.18145230412483215, "learning_rate": 0.00019947827405059467, "loss": 11.6839, "step": 4691 }, { "epoch": 0.09821652851042452, "grad_norm": 0.24589157104492188, "learning_rate": 0.000199478050352875, "loss": 11.6842, "step": 4692 }, { "epoch": 0.09823746127438666, "grad_norm": 0.3211456537246704, "learning_rate": 0.00019947782660733427, "loss": 11.6759, "step": 4693 }, { "epoch": 0.09825839403834882, "grad_norm": 0.2507379651069641, "learning_rate": 0.00019947760281397253, "loss": 11.6935, "step": 4694 }, { "epoch": 0.09827932680231098, "grad_norm": 0.1884298026561737, "learning_rate": 0.0001994773789727899, "loss": 11.6831, "step": 4695 }, { "epoch": 0.09830025956627313, "grad_norm": 0.23230589926242828, "learning_rate": 0.00019947715508378654, "loss": 11.6845, "step": 4696 }, { "epoch": 0.09832119233023529, "grad_norm": 0.24057315289974213, "learning_rate": 0.0001994769311469625, "loss": 11.692, "step": 4697 }, { "epoch": 0.09834212509419744, "grad_norm": 0.2617988586425781, "learning_rate": 0.0001994767071623179, "loss": 11.6798, "step": 4698 }, { "epoch": 0.09836305785815959, "grad_norm": 0.22502046823501587, "learning_rate": 0.00019947648312985285, "loss": 11.6863, "step": 4699 }, { "epoch": 0.09838399062212175, "grad_norm": 0.24130979180335999, "learning_rate": 0.0001994762590495675, "loss": 11.6799, "step": 4700 }, { "epoch": 0.09840492338608389, "grad_norm": 0.262909859418869, "learning_rate": 0.00019947603492146185, "loss": 11.693, "step": 4701 }, { "epoch": 0.09842585615004605, "grad_norm": 0.3399937152862549, "learning_rate": 0.00019947581074553612, "loss": 11.6815, "step": 4702 }, { "epoch": 0.09844678891400821, "grad_norm": 0.24042029678821564, "learning_rate": 0.00019947558652179037, "loss": 11.69, "step": 4703 }, { "epoch": 0.09846772167797035, "grad_norm": 0.16720832884311676, "learning_rate": 0.00019947536225022473, "loss": 11.6759, "step": 4704 }, { "epoch": 0.09848865444193251, "grad_norm": 0.2394900619983673, "learning_rate": 0.00019947513793083926, "loss": 11.704, "step": 4705 }, { "epoch": 0.09850958720589467, "grad_norm": 0.24526549875736237, "learning_rate": 0.0001994749135636341, "loss": 11.6768, "step": 4706 }, { "epoch": 0.09853051996985682, "grad_norm": 0.251510351896286, "learning_rate": 0.00019947468914860935, "loss": 11.6841, "step": 4707 }, { "epoch": 0.09855145273381898, "grad_norm": 0.22600211203098297, "learning_rate": 0.00019947446468576515, "loss": 11.6962, "step": 4708 }, { "epoch": 0.09857238549778112, "grad_norm": 0.23643168807029724, "learning_rate": 0.00019947424017510156, "loss": 11.6757, "step": 4709 }, { "epoch": 0.09859331826174328, "grad_norm": 0.23466432094573975, "learning_rate": 0.00019947401561661872, "loss": 11.6644, "step": 4710 }, { "epoch": 0.09861425102570544, "grad_norm": 0.21531327068805695, "learning_rate": 0.00019947379101031674, "loss": 11.6953, "step": 4711 }, { "epoch": 0.09863518378966758, "grad_norm": 0.21628274023532867, "learning_rate": 0.0001994735663561957, "loss": 11.6923, "step": 4712 }, { "epoch": 0.09865611655362974, "grad_norm": 0.23379436135292053, "learning_rate": 0.0001994733416542557, "loss": 11.6841, "step": 4713 }, { "epoch": 0.0986770493175919, "grad_norm": 0.2207472026348114, "learning_rate": 0.0001994731169044969, "loss": 11.671, "step": 4714 }, { "epoch": 0.09869798208155404, "grad_norm": 0.24778202176094055, "learning_rate": 0.00019947289210691936, "loss": 11.6825, "step": 4715 }, { "epoch": 0.0987189148455162, "grad_norm": 0.2454725056886673, "learning_rate": 0.00019947266726152326, "loss": 11.6902, "step": 4716 }, { "epoch": 0.09873984760947836, "grad_norm": 0.2608007788658142, "learning_rate": 0.0001994724423683086, "loss": 11.6839, "step": 4717 }, { "epoch": 0.09876078037344051, "grad_norm": 0.24596577882766724, "learning_rate": 0.00019947221742727556, "loss": 11.6854, "step": 4718 }, { "epoch": 0.09878171313740267, "grad_norm": 0.24882663786411285, "learning_rate": 0.00019947199243842422, "loss": 11.6945, "step": 4719 }, { "epoch": 0.09880264590136481, "grad_norm": 0.2626027762889862, "learning_rate": 0.00019947176740175472, "loss": 11.6816, "step": 4720 }, { "epoch": 0.09882357866532697, "grad_norm": 0.2728167176246643, "learning_rate": 0.00019947154231726716, "loss": 11.6774, "step": 4721 }, { "epoch": 0.09884451142928913, "grad_norm": 0.19889922440052032, "learning_rate": 0.0001994713171849616, "loss": 11.7068, "step": 4722 }, { "epoch": 0.09886544419325127, "grad_norm": 0.23431724309921265, "learning_rate": 0.0001994710920048382, "loss": 11.6933, "step": 4723 }, { "epoch": 0.09888637695721343, "grad_norm": 0.22648866474628448, "learning_rate": 0.00019947086677689707, "loss": 11.6787, "step": 4724 }, { "epoch": 0.09890730972117559, "grad_norm": 0.2007981538772583, "learning_rate": 0.00019947064150113825, "loss": 11.6866, "step": 4725 }, { "epoch": 0.09892824248513774, "grad_norm": 0.2287522703409195, "learning_rate": 0.00019947041617756194, "loss": 11.6789, "step": 4726 }, { "epoch": 0.0989491752490999, "grad_norm": 0.19910229742527008, "learning_rate": 0.0001994701908061682, "loss": 11.673, "step": 4727 }, { "epoch": 0.09897010801306204, "grad_norm": 0.2847934663295746, "learning_rate": 0.00019946996538695714, "loss": 11.6794, "step": 4728 }, { "epoch": 0.0989910407770242, "grad_norm": 0.28134390711784363, "learning_rate": 0.00019946973991992887, "loss": 11.692, "step": 4729 }, { "epoch": 0.09901197354098636, "grad_norm": 0.2502131164073944, "learning_rate": 0.00019946951440508352, "loss": 11.6737, "step": 4730 }, { "epoch": 0.0990329063049485, "grad_norm": 0.2068367749452591, "learning_rate": 0.00019946928884242117, "loss": 11.6904, "step": 4731 }, { "epoch": 0.09905383906891066, "grad_norm": 0.23699253797531128, "learning_rate": 0.00019946906323194195, "loss": 11.6828, "step": 4732 }, { "epoch": 0.09907477183287282, "grad_norm": 0.16372527182102203, "learning_rate": 0.00019946883757364595, "loss": 11.6982, "step": 4733 }, { "epoch": 0.09909570459683496, "grad_norm": 0.24530713260173798, "learning_rate": 0.0001994686118675333, "loss": 11.6692, "step": 4734 }, { "epoch": 0.09911663736079712, "grad_norm": 0.23499011993408203, "learning_rate": 0.00019946838611360404, "loss": 11.683, "step": 4735 }, { "epoch": 0.09913757012475927, "grad_norm": 0.2494925707578659, "learning_rate": 0.0001994681603118584, "loss": 11.6773, "step": 4736 }, { "epoch": 0.09915850288872143, "grad_norm": 0.23063217103481293, "learning_rate": 0.0001994679344622964, "loss": 11.6861, "step": 4737 }, { "epoch": 0.09917943565268358, "grad_norm": 0.1929047703742981, "learning_rate": 0.00019946770856491819, "loss": 11.6634, "step": 4738 }, { "epoch": 0.09920036841664573, "grad_norm": 0.18770088255405426, "learning_rate": 0.0001994674826197238, "loss": 11.688, "step": 4739 }, { "epoch": 0.09922130118060789, "grad_norm": 0.2541235089302063, "learning_rate": 0.00019946725662671347, "loss": 11.6956, "step": 4740 }, { "epoch": 0.09924223394457005, "grad_norm": 0.22850744426250458, "learning_rate": 0.0001994670305858872, "loss": 11.6939, "step": 4741 }, { "epoch": 0.09926316670853219, "grad_norm": 0.1953747421503067, "learning_rate": 0.0001994668044972451, "loss": 11.6792, "step": 4742 }, { "epoch": 0.09928409947249435, "grad_norm": 0.2838749885559082, "learning_rate": 0.00019946657836078738, "loss": 11.6841, "step": 4743 }, { "epoch": 0.09930503223645651, "grad_norm": 0.21442949771881104, "learning_rate": 0.00019946635217651406, "loss": 11.6689, "step": 4744 }, { "epoch": 0.09932596500041865, "grad_norm": 0.30828407406806946, "learning_rate": 0.00019946612594442528, "loss": 11.7011, "step": 4745 }, { "epoch": 0.09934689776438081, "grad_norm": 0.4008057713508606, "learning_rate": 0.0001994658996645211, "loss": 11.695, "step": 4746 }, { "epoch": 0.09936783052834296, "grad_norm": 0.22690246999263763, "learning_rate": 0.0001994656733368017, "loss": 11.6846, "step": 4747 }, { "epoch": 0.09938876329230512, "grad_norm": 0.22103071212768555, "learning_rate": 0.0001994654469612671, "loss": 11.6726, "step": 4748 }, { "epoch": 0.09940969605626727, "grad_norm": 0.24522212147712708, "learning_rate": 0.00019946522053791753, "loss": 11.6981, "step": 4749 }, { "epoch": 0.09943062882022942, "grad_norm": 0.2090194970369339, "learning_rate": 0.00019946499406675304, "loss": 11.6916, "step": 4750 }, { "epoch": 0.09945156158419158, "grad_norm": 0.27771130204200745, "learning_rate": 0.0001994647675477737, "loss": 11.6797, "step": 4751 }, { "epoch": 0.09947249434815374, "grad_norm": 0.20965349674224854, "learning_rate": 0.00019946454098097969, "loss": 11.7054, "step": 4752 }, { "epoch": 0.09949342711211588, "grad_norm": 0.1915791630744934, "learning_rate": 0.00019946431436637104, "loss": 11.6885, "step": 4753 }, { "epoch": 0.09951435987607804, "grad_norm": 0.22618703544139862, "learning_rate": 0.00019946408770394795, "loss": 11.696, "step": 4754 }, { "epoch": 0.09953529264004018, "grad_norm": 0.2650756239891052, "learning_rate": 0.00019946386099371045, "loss": 11.6756, "step": 4755 }, { "epoch": 0.09955622540400234, "grad_norm": 0.22156165540218353, "learning_rate": 0.00019946363423565866, "loss": 11.6843, "step": 4756 }, { "epoch": 0.0995771581679645, "grad_norm": 0.20690318942070007, "learning_rate": 0.00019946340742979274, "loss": 11.6789, "step": 4757 }, { "epoch": 0.09959809093192665, "grad_norm": 0.22929662466049194, "learning_rate": 0.00019946318057611275, "loss": 11.6926, "step": 4758 }, { "epoch": 0.0996190236958888, "grad_norm": 0.21423973143100739, "learning_rate": 0.00019946295367461882, "loss": 11.685, "step": 4759 }, { "epoch": 0.09963995645985096, "grad_norm": 0.31187719106674194, "learning_rate": 0.00019946272672531105, "loss": 11.6903, "step": 4760 }, { "epoch": 0.09966088922381311, "grad_norm": 0.23590593039989471, "learning_rate": 0.00019946249972818957, "loss": 11.6894, "step": 4761 }, { "epoch": 0.09968182198777527, "grad_norm": 0.20354332029819489, "learning_rate": 0.00019946227268325447, "loss": 11.6772, "step": 4762 }, { "epoch": 0.09970275475173741, "grad_norm": 0.2402498573064804, "learning_rate": 0.00019946204559050584, "loss": 11.6793, "step": 4763 }, { "epoch": 0.09972368751569957, "grad_norm": 0.21299071609973907, "learning_rate": 0.00019946181844994384, "loss": 11.6802, "step": 4764 }, { "epoch": 0.09974462027966173, "grad_norm": 0.18986839056015015, "learning_rate": 0.00019946159126156852, "loss": 11.6936, "step": 4765 }, { "epoch": 0.09976555304362388, "grad_norm": 0.18120533227920532, "learning_rate": 0.00019946136402538005, "loss": 11.6887, "step": 4766 }, { "epoch": 0.09978648580758603, "grad_norm": 0.2501213550567627, "learning_rate": 0.00019946113674137848, "loss": 11.6776, "step": 4767 }, { "epoch": 0.09980741857154819, "grad_norm": 0.3137635886669159, "learning_rate": 0.00019946090940956398, "loss": 11.6805, "step": 4768 }, { "epoch": 0.09982835133551034, "grad_norm": 0.29983341693878174, "learning_rate": 0.0001994606820299366, "loss": 11.6995, "step": 4769 }, { "epoch": 0.0998492840994725, "grad_norm": 0.18412771821022034, "learning_rate": 0.00019946045460249648, "loss": 11.6835, "step": 4770 }, { "epoch": 0.09987021686343464, "grad_norm": 0.2584351897239685, "learning_rate": 0.00019946022712724376, "loss": 11.6728, "step": 4771 }, { "epoch": 0.0998911496273968, "grad_norm": 0.24974139034748077, "learning_rate": 0.00019945999960417846, "loss": 11.6904, "step": 4772 }, { "epoch": 0.09991208239135896, "grad_norm": 0.19142207503318787, "learning_rate": 0.00019945977203330076, "loss": 11.6694, "step": 4773 }, { "epoch": 0.0999330151553211, "grad_norm": 0.20548991858959198, "learning_rate": 0.00019945954441461078, "loss": 11.6755, "step": 4774 }, { "epoch": 0.09995394791928326, "grad_norm": 0.2873070538043976, "learning_rate": 0.0001994593167481086, "loss": 11.6967, "step": 4775 }, { "epoch": 0.09997488068324542, "grad_norm": 0.20347929000854492, "learning_rate": 0.00019945908903379434, "loss": 11.6895, "step": 4776 }, { "epoch": 0.09999581344720757, "grad_norm": 0.23179277777671814, "learning_rate": 0.0001994588612716681, "loss": 11.6863, "step": 4777 }, { "epoch": 0.10001674621116972, "grad_norm": 0.20578645169734955, "learning_rate": 0.00019945863346172997, "loss": 11.687, "step": 4778 }, { "epoch": 0.10003767897513188, "grad_norm": 0.2138931155204773, "learning_rate": 0.00019945840560398008, "loss": 11.6652, "step": 4779 }, { "epoch": 0.10005861173909403, "grad_norm": 0.23979796469211578, "learning_rate": 0.00019945817769841857, "loss": 11.6862, "step": 4780 }, { "epoch": 0.10007954450305619, "grad_norm": 0.24855312705039978, "learning_rate": 0.0001994579497450455, "loss": 11.686, "step": 4781 }, { "epoch": 0.10010047726701833, "grad_norm": 0.2787016034126282, "learning_rate": 0.000199457721743861, "loss": 11.6832, "step": 4782 }, { "epoch": 0.10012141003098049, "grad_norm": 0.226378932595253, "learning_rate": 0.00019945749369486518, "loss": 11.6852, "step": 4783 }, { "epoch": 0.10014234279494265, "grad_norm": 0.2721059024333954, "learning_rate": 0.00019945726559805817, "loss": 11.6897, "step": 4784 }, { "epoch": 0.1001632755589048, "grad_norm": 0.18188095092773438, "learning_rate": 0.00019945703745344003, "loss": 11.6854, "step": 4785 }, { "epoch": 0.10018420832286695, "grad_norm": 0.22580501437187195, "learning_rate": 0.00019945680926101091, "loss": 11.6992, "step": 4786 }, { "epoch": 0.10020514108682911, "grad_norm": 0.18150073289871216, "learning_rate": 0.00019945658102077092, "loss": 11.6874, "step": 4787 }, { "epoch": 0.10022607385079126, "grad_norm": 0.21829676628112793, "learning_rate": 0.00019945635273272013, "loss": 11.6865, "step": 4788 }, { "epoch": 0.10024700661475341, "grad_norm": 0.22361651062965393, "learning_rate": 0.00019945612439685874, "loss": 11.6723, "step": 4789 }, { "epoch": 0.10026793937871556, "grad_norm": 0.20542986690998077, "learning_rate": 0.00019945589601318676, "loss": 11.6763, "step": 4790 }, { "epoch": 0.10028887214267772, "grad_norm": 0.23359039425849915, "learning_rate": 0.00019945566758170432, "loss": 11.6821, "step": 4791 }, { "epoch": 0.10030980490663988, "grad_norm": 0.23249106109142303, "learning_rate": 0.00019945543910241154, "loss": 11.6915, "step": 4792 }, { "epoch": 0.10033073767060202, "grad_norm": 0.20880131423473358, "learning_rate": 0.00019945521057530857, "loss": 11.6653, "step": 4793 }, { "epoch": 0.10035167043456418, "grad_norm": 0.21802586317062378, "learning_rate": 0.00019945498200039547, "loss": 11.6739, "step": 4794 }, { "epoch": 0.10037260319852634, "grad_norm": 0.2663123905658722, "learning_rate": 0.00019945475337767237, "loss": 11.662, "step": 4795 }, { "epoch": 0.10039353596248848, "grad_norm": 0.23210841417312622, "learning_rate": 0.0001994545247071394, "loss": 11.6727, "step": 4796 }, { "epoch": 0.10041446872645064, "grad_norm": 0.19116324186325073, "learning_rate": 0.0001994542959887966, "loss": 11.6853, "step": 4797 }, { "epoch": 0.10043540149041279, "grad_norm": 0.22703664004802704, "learning_rate": 0.00019945406722264418, "loss": 11.704, "step": 4798 }, { "epoch": 0.10045633425437495, "grad_norm": 0.17928744852542877, "learning_rate": 0.00019945383840868215, "loss": 11.6769, "step": 4799 }, { "epoch": 0.1004772670183371, "grad_norm": 0.4679087996482849, "learning_rate": 0.0001994536095469107, "loss": 11.6878, "step": 4800 }, { "epoch": 0.10049819978229925, "grad_norm": 0.2641317844390869, "learning_rate": 0.0001994533806373299, "loss": 11.6871, "step": 4801 }, { "epoch": 0.10051913254626141, "grad_norm": 0.2258947789669037, "learning_rate": 0.00019945315167993986, "loss": 11.6869, "step": 4802 }, { "epoch": 0.10054006531022357, "grad_norm": 0.20687536895275116, "learning_rate": 0.00019945292267474068, "loss": 11.6834, "step": 4803 }, { "epoch": 0.10056099807418571, "grad_norm": 0.21994729340076447, "learning_rate": 0.0001994526936217325, "loss": 11.6957, "step": 4804 }, { "epoch": 0.10058193083814787, "grad_norm": 0.25375738739967346, "learning_rate": 0.00019945246452091544, "loss": 11.6862, "step": 4805 }, { "epoch": 0.10060286360211003, "grad_norm": 0.2229490876197815, "learning_rate": 0.00019945223537228956, "loss": 11.6869, "step": 4806 }, { "epoch": 0.10062379636607217, "grad_norm": 0.30117666721343994, "learning_rate": 0.000199452006175855, "loss": 11.686, "step": 4807 }, { "epoch": 0.10064472913003433, "grad_norm": 0.2430322915315628, "learning_rate": 0.0001994517769316119, "loss": 11.6716, "step": 4808 }, { "epoch": 0.10066566189399648, "grad_norm": 0.2311580777168274, "learning_rate": 0.00019945154763956029, "loss": 11.6865, "step": 4809 }, { "epoch": 0.10068659465795864, "grad_norm": 0.21821148693561554, "learning_rate": 0.00019945131829970033, "loss": 11.692, "step": 4810 }, { "epoch": 0.1007075274219208, "grad_norm": 0.24370308220386505, "learning_rate": 0.00019945108891203218, "loss": 11.6926, "step": 4811 }, { "epoch": 0.10072846018588294, "grad_norm": 0.23876261711120605, "learning_rate": 0.00019945085947655587, "loss": 11.6773, "step": 4812 }, { "epoch": 0.1007493929498451, "grad_norm": 0.25911226868629456, "learning_rate": 0.00019945062999327152, "loss": 11.6835, "step": 4813 }, { "epoch": 0.10077032571380726, "grad_norm": 0.31830260157585144, "learning_rate": 0.00019945040046217928, "loss": 11.6788, "step": 4814 }, { "epoch": 0.1007912584777694, "grad_norm": 0.20178134739398956, "learning_rate": 0.00019945017088327923, "loss": 11.6802, "step": 4815 }, { "epoch": 0.10081219124173156, "grad_norm": 0.31355318427085876, "learning_rate": 0.0001994499412565715, "loss": 11.6965, "step": 4816 }, { "epoch": 0.1008331240056937, "grad_norm": 0.195742666721344, "learning_rate": 0.0001994497115820562, "loss": 11.6826, "step": 4817 }, { "epoch": 0.10085405676965586, "grad_norm": 0.3978915512561798, "learning_rate": 0.0001994494818597334, "loss": 11.6712, "step": 4818 }, { "epoch": 0.10087498953361802, "grad_norm": 0.24089190363883972, "learning_rate": 0.00019944925208960327, "loss": 11.6717, "step": 4819 }, { "epoch": 0.10089592229758017, "grad_norm": 0.23090696334838867, "learning_rate": 0.00019944902227166586, "loss": 11.6668, "step": 4820 }, { "epoch": 0.10091685506154233, "grad_norm": 0.21185722947120667, "learning_rate": 0.00019944879240592133, "loss": 11.6859, "step": 4821 }, { "epoch": 0.10093778782550449, "grad_norm": 0.18636716902256012, "learning_rate": 0.00019944856249236979, "loss": 11.6804, "step": 4822 }, { "epoch": 0.10095872058946663, "grad_norm": 0.30800461769104004, "learning_rate": 0.0001994483325310113, "loss": 11.702, "step": 4823 }, { "epoch": 0.10097965335342879, "grad_norm": 0.20812474191188812, "learning_rate": 0.000199448102521846, "loss": 11.6859, "step": 4824 }, { "epoch": 0.10100058611739093, "grad_norm": 0.20137465000152588, "learning_rate": 0.00019944787246487403, "loss": 11.6775, "step": 4825 }, { "epoch": 0.10102151888135309, "grad_norm": 0.203482523560524, "learning_rate": 0.00019944764236009548, "loss": 11.6778, "step": 4826 }, { "epoch": 0.10104245164531525, "grad_norm": 0.21631130576133728, "learning_rate": 0.00019944741220751045, "loss": 11.6783, "step": 4827 }, { "epoch": 0.1010633844092774, "grad_norm": 0.24121324717998505, "learning_rate": 0.00019944718200711904, "loss": 11.6842, "step": 4828 }, { "epoch": 0.10108431717323955, "grad_norm": 0.19251425564289093, "learning_rate": 0.0001994469517589214, "loss": 11.6785, "step": 4829 }, { "epoch": 0.10110524993720171, "grad_norm": 0.1907745599746704, "learning_rate": 0.00019944672146291758, "loss": 11.6766, "step": 4830 }, { "epoch": 0.10112618270116386, "grad_norm": 0.2394951432943344, "learning_rate": 0.00019944649111910777, "loss": 11.6724, "step": 4831 }, { "epoch": 0.10114711546512602, "grad_norm": 0.2633683681488037, "learning_rate": 0.00019944626072749202, "loss": 11.6995, "step": 4832 }, { "epoch": 0.10116804822908818, "grad_norm": 0.20911280810832977, "learning_rate": 0.00019944603028807046, "loss": 11.6835, "step": 4833 }, { "epoch": 0.10118898099305032, "grad_norm": 0.2125711739063263, "learning_rate": 0.0001994457998008432, "loss": 11.6804, "step": 4834 }, { "epoch": 0.10120991375701248, "grad_norm": 0.21051929891109467, "learning_rate": 0.00019944556926581032, "loss": 11.675, "step": 4835 }, { "epoch": 0.10123084652097462, "grad_norm": 0.31446748971939087, "learning_rate": 0.00019944533868297202, "loss": 11.703, "step": 4836 }, { "epoch": 0.10125177928493678, "grad_norm": 0.20475292205810547, "learning_rate": 0.00019944510805232831, "loss": 11.6894, "step": 4837 }, { "epoch": 0.10127271204889894, "grad_norm": 0.2702731192111969, "learning_rate": 0.00019944487737387937, "loss": 11.6733, "step": 4838 }, { "epoch": 0.10129364481286109, "grad_norm": 0.23791873455047607, "learning_rate": 0.00019944464664762527, "loss": 11.6721, "step": 4839 }, { "epoch": 0.10131457757682324, "grad_norm": 0.1947019100189209, "learning_rate": 0.00019944441587356612, "loss": 11.6887, "step": 4840 }, { "epoch": 0.1013355103407854, "grad_norm": 0.23412863910198212, "learning_rate": 0.00019944418505170208, "loss": 11.6946, "step": 4841 }, { "epoch": 0.10135644310474755, "grad_norm": 0.20357809960842133, "learning_rate": 0.0001994439541820332, "loss": 11.6852, "step": 4842 }, { "epoch": 0.10137737586870971, "grad_norm": 0.21302959322929382, "learning_rate": 0.00019944372326455965, "loss": 11.6839, "step": 4843 }, { "epoch": 0.10139830863267185, "grad_norm": 0.21788515150547028, "learning_rate": 0.00019944349229928147, "loss": 11.6768, "step": 4844 }, { "epoch": 0.10141924139663401, "grad_norm": 0.2539425194263458, "learning_rate": 0.00019944326128619886, "loss": 11.6852, "step": 4845 }, { "epoch": 0.10144017416059617, "grad_norm": 0.2981727123260498, "learning_rate": 0.00019944303022531186, "loss": 11.6868, "step": 4846 }, { "epoch": 0.10146110692455831, "grad_norm": 0.23581847548484802, "learning_rate": 0.0001994427991166206, "loss": 11.6918, "step": 4847 }, { "epoch": 0.10148203968852047, "grad_norm": 0.19969041645526886, "learning_rate": 0.00019944256796012517, "loss": 11.6771, "step": 4848 }, { "epoch": 0.10150297245248263, "grad_norm": 0.21982502937316895, "learning_rate": 0.0001994423367558257, "loss": 11.6551, "step": 4849 }, { "epoch": 0.10152390521644478, "grad_norm": 0.2112652063369751, "learning_rate": 0.00019944210550372236, "loss": 11.6697, "step": 4850 }, { "epoch": 0.10154483798040694, "grad_norm": 0.203321635723114, "learning_rate": 0.00019944187420381515, "loss": 11.6748, "step": 4851 }, { "epoch": 0.10156577074436908, "grad_norm": 0.21358740329742432, "learning_rate": 0.0001994416428561043, "loss": 11.6884, "step": 4852 }, { "epoch": 0.10158670350833124, "grad_norm": 0.2875146269798279, "learning_rate": 0.00019944141146058982, "loss": 11.6859, "step": 4853 }, { "epoch": 0.1016076362722934, "grad_norm": 0.2347571849822998, "learning_rate": 0.00019944118001727183, "loss": 11.6793, "step": 4854 }, { "epoch": 0.10162856903625554, "grad_norm": 0.24667353928089142, "learning_rate": 0.00019944094852615053, "loss": 11.6762, "step": 4855 }, { "epoch": 0.1016495018002177, "grad_norm": 0.23761901259422302, "learning_rate": 0.00019944071698722594, "loss": 11.6634, "step": 4856 }, { "epoch": 0.10167043456417986, "grad_norm": 0.22866147756576538, "learning_rate": 0.00019944048540049823, "loss": 11.7005, "step": 4857 }, { "epoch": 0.101691367328142, "grad_norm": 0.21405978500843048, "learning_rate": 0.00019944025376596747, "loss": 11.6735, "step": 4858 }, { "epoch": 0.10171230009210416, "grad_norm": 0.25440865755081177, "learning_rate": 0.00019944002208363378, "loss": 11.6895, "step": 4859 }, { "epoch": 0.10173323285606632, "grad_norm": 0.1925293505191803, "learning_rate": 0.0001994397903534973, "loss": 11.6763, "step": 4860 }, { "epoch": 0.10175416562002847, "grad_norm": 0.2010919600725174, "learning_rate": 0.0001994395585755581, "loss": 11.6921, "step": 4861 }, { "epoch": 0.10177509838399063, "grad_norm": 0.225705087184906, "learning_rate": 0.0001994393267498163, "loss": 11.6835, "step": 4862 }, { "epoch": 0.10179603114795277, "grad_norm": 0.2581445276737213, "learning_rate": 0.00019943909487627205, "loss": 11.6617, "step": 4863 }, { "epoch": 0.10181696391191493, "grad_norm": 0.2993118166923523, "learning_rate": 0.00019943886295492542, "loss": 11.6854, "step": 4864 }, { "epoch": 0.10183789667587709, "grad_norm": 0.19513113796710968, "learning_rate": 0.00019943863098577654, "loss": 11.6759, "step": 4865 }, { "epoch": 0.10185882943983923, "grad_norm": 0.22632543742656708, "learning_rate": 0.0001994383989688255, "loss": 11.6809, "step": 4866 }, { "epoch": 0.10187976220380139, "grad_norm": 0.20076584815979004, "learning_rate": 0.00019943816690407246, "loss": 11.6832, "step": 4867 }, { "epoch": 0.10190069496776355, "grad_norm": 0.1977531462907791, "learning_rate": 0.00019943793479151748, "loss": 11.668, "step": 4868 }, { "epoch": 0.1019216277317257, "grad_norm": 0.2424498051404953, "learning_rate": 0.0001994377026311607, "loss": 11.7017, "step": 4869 }, { "epoch": 0.10194256049568785, "grad_norm": 0.24794013798236847, "learning_rate": 0.00019943747042300224, "loss": 11.6844, "step": 4870 }, { "epoch": 0.10196349325965, "grad_norm": 0.25695517659187317, "learning_rate": 0.0001994372381670422, "loss": 11.6903, "step": 4871 }, { "epoch": 0.10198442602361216, "grad_norm": 0.21920911967754364, "learning_rate": 0.00019943700586328063, "loss": 11.6707, "step": 4872 }, { "epoch": 0.10200535878757432, "grad_norm": 0.2338019460439682, "learning_rate": 0.00019943677351171775, "loss": 11.7023, "step": 4873 }, { "epoch": 0.10202629155153646, "grad_norm": 0.20590271055698395, "learning_rate": 0.0001994365411123536, "loss": 11.6916, "step": 4874 }, { "epoch": 0.10204722431549862, "grad_norm": 0.21276883780956268, "learning_rate": 0.0001994363086651883, "loss": 11.6852, "step": 4875 }, { "epoch": 0.10206815707946078, "grad_norm": 0.2209806591272354, "learning_rate": 0.000199436076170222, "loss": 11.6898, "step": 4876 }, { "epoch": 0.10208908984342292, "grad_norm": 0.23295295238494873, "learning_rate": 0.0001994358436274548, "loss": 11.6904, "step": 4877 }, { "epoch": 0.10211002260738508, "grad_norm": 0.21087588369846344, "learning_rate": 0.00019943561103688673, "loss": 11.6881, "step": 4878 }, { "epoch": 0.10213095537134723, "grad_norm": 0.20576350390911102, "learning_rate": 0.00019943537839851804, "loss": 11.669, "step": 4879 }, { "epoch": 0.10215188813530938, "grad_norm": 0.22437416017055511, "learning_rate": 0.0001994351457123487, "loss": 11.6785, "step": 4880 }, { "epoch": 0.10217282089927154, "grad_norm": 0.21855220198631287, "learning_rate": 0.00019943491297837895, "loss": 11.6852, "step": 4881 }, { "epoch": 0.10219375366323369, "grad_norm": 0.19532082974910736, "learning_rate": 0.00019943468019660885, "loss": 11.6815, "step": 4882 }, { "epoch": 0.10221468642719585, "grad_norm": 0.2650713324546814, "learning_rate": 0.00019943444736703848, "loss": 11.6876, "step": 4883 }, { "epoch": 0.102235619191158, "grad_norm": 0.22597499191761017, "learning_rate": 0.00019943421448966796, "loss": 11.679, "step": 4884 }, { "epoch": 0.10225655195512015, "grad_norm": 0.25546228885650635, "learning_rate": 0.00019943398156449743, "loss": 11.669, "step": 4885 }, { "epoch": 0.10227748471908231, "grad_norm": 0.28658899664878845, "learning_rate": 0.00019943374859152704, "loss": 11.6879, "step": 4886 }, { "epoch": 0.10229841748304445, "grad_norm": 0.23299945890903473, "learning_rate": 0.00019943351557075682, "loss": 11.6721, "step": 4887 }, { "epoch": 0.10231935024700661, "grad_norm": 0.24061702191829681, "learning_rate": 0.00019943328250218693, "loss": 11.694, "step": 4888 }, { "epoch": 0.10234028301096877, "grad_norm": 0.20778988301753998, "learning_rate": 0.00019943304938581743, "loss": 11.67, "step": 4889 }, { "epoch": 0.10236121577493092, "grad_norm": 0.2675463855266571, "learning_rate": 0.00019943281622164852, "loss": 11.6719, "step": 4890 }, { "epoch": 0.10238214853889308, "grad_norm": 0.2554197609424591, "learning_rate": 0.00019943258300968023, "loss": 11.6804, "step": 4891 }, { "epoch": 0.10240308130285523, "grad_norm": 0.266960084438324, "learning_rate": 0.0001994323497499127, "loss": 11.6811, "step": 4892 }, { "epoch": 0.10242401406681738, "grad_norm": 0.24915553629398346, "learning_rate": 0.00019943211644234604, "loss": 11.6747, "step": 4893 }, { "epoch": 0.10244494683077954, "grad_norm": 0.22675973176956177, "learning_rate": 0.0001994318830869804, "loss": 11.6799, "step": 4894 }, { "epoch": 0.1024658795947417, "grad_norm": 0.20705024898052216, "learning_rate": 0.00019943164968381588, "loss": 11.6763, "step": 4895 }, { "epoch": 0.10248681235870384, "grad_norm": 0.20931248366832733, "learning_rate": 0.00019943141623285254, "loss": 11.6912, "step": 4896 }, { "epoch": 0.102507745122666, "grad_norm": 0.22109031677246094, "learning_rate": 0.00019943118273409052, "loss": 11.6893, "step": 4897 }, { "epoch": 0.10252867788662814, "grad_norm": 0.2029445320367813, "learning_rate": 0.00019943094918752996, "loss": 11.6838, "step": 4898 }, { "epoch": 0.1025496106505903, "grad_norm": 0.22617903351783752, "learning_rate": 0.00019943071559317093, "loss": 11.6867, "step": 4899 }, { "epoch": 0.10257054341455246, "grad_norm": 0.238525852560997, "learning_rate": 0.00019943048195101356, "loss": 11.6835, "step": 4900 }, { "epoch": 0.1025914761785146, "grad_norm": 0.20072989165782928, "learning_rate": 0.00019943024826105799, "loss": 11.6834, "step": 4901 }, { "epoch": 0.10261240894247677, "grad_norm": 0.20806217193603516, "learning_rate": 0.00019943001452330428, "loss": 11.6818, "step": 4902 }, { "epoch": 0.10263334170643892, "grad_norm": 0.29049059748649597, "learning_rate": 0.00019942978073775257, "loss": 11.6937, "step": 4903 }, { "epoch": 0.10265427447040107, "grad_norm": 0.2715733051300049, "learning_rate": 0.00019942954690440302, "loss": 11.6989, "step": 4904 }, { "epoch": 0.10267520723436323, "grad_norm": 0.22571533918380737, "learning_rate": 0.00019942931302325564, "loss": 11.6861, "step": 4905 }, { "epoch": 0.10269613999832537, "grad_norm": 0.21043433248996735, "learning_rate": 0.0001994290790943106, "loss": 11.6732, "step": 4906 }, { "epoch": 0.10271707276228753, "grad_norm": 0.2726968228816986, "learning_rate": 0.00019942884511756802, "loss": 11.6928, "step": 4907 }, { "epoch": 0.10273800552624969, "grad_norm": 0.20395193994045258, "learning_rate": 0.000199428611093028, "loss": 11.6788, "step": 4908 }, { "epoch": 0.10275893829021183, "grad_norm": 0.22885552048683167, "learning_rate": 0.00019942837702069066, "loss": 11.6907, "step": 4909 }, { "epoch": 0.102779871054174, "grad_norm": 0.5390881896018982, "learning_rate": 0.0001994281429005561, "loss": 11.6759, "step": 4910 }, { "epoch": 0.10280080381813615, "grad_norm": 0.29220050573349, "learning_rate": 0.00019942790873262448, "loss": 11.6832, "step": 4911 }, { "epoch": 0.1028217365820983, "grad_norm": 0.22866016626358032, "learning_rate": 0.0001994276745168958, "loss": 11.682, "step": 4912 }, { "epoch": 0.10284266934606046, "grad_norm": 0.2355635166168213, "learning_rate": 0.00019942744025337029, "loss": 11.6868, "step": 4913 }, { "epoch": 0.1028636021100226, "grad_norm": 0.24462029337882996, "learning_rate": 0.00019942720594204802, "loss": 11.6861, "step": 4914 }, { "epoch": 0.10288453487398476, "grad_norm": 0.24715237319469452, "learning_rate": 0.00019942697158292907, "loss": 11.6902, "step": 4915 }, { "epoch": 0.10290546763794692, "grad_norm": 0.2618972659111023, "learning_rate": 0.00019942673717601362, "loss": 11.7024, "step": 4916 }, { "epoch": 0.10292640040190906, "grad_norm": 0.19494560360908508, "learning_rate": 0.00019942650272130173, "loss": 11.6787, "step": 4917 }, { "epoch": 0.10294733316587122, "grad_norm": 0.3561938405036926, "learning_rate": 0.0001994262682187935, "loss": 11.6728, "step": 4918 }, { "epoch": 0.10296826592983338, "grad_norm": 0.2332466095685959, "learning_rate": 0.00019942603366848908, "loss": 11.6842, "step": 4919 }, { "epoch": 0.10298919869379553, "grad_norm": 0.24556675553321838, "learning_rate": 0.0001994257990703886, "loss": 11.6867, "step": 4920 }, { "epoch": 0.10301013145775768, "grad_norm": 0.21040788292884827, "learning_rate": 0.00019942556442449215, "loss": 11.6936, "step": 4921 }, { "epoch": 0.10303106422171984, "grad_norm": 0.2101389467716217, "learning_rate": 0.0001994253297307998, "loss": 11.6724, "step": 4922 }, { "epoch": 0.10305199698568199, "grad_norm": 0.17990733683109283, "learning_rate": 0.00019942509498931173, "loss": 11.6717, "step": 4923 }, { "epoch": 0.10307292974964415, "grad_norm": 0.21676470339298248, "learning_rate": 0.000199424860200028, "loss": 11.6771, "step": 4924 }, { "epoch": 0.10309386251360629, "grad_norm": 0.23143211007118225, "learning_rate": 0.00019942462536294876, "loss": 11.6931, "step": 4925 }, { "epoch": 0.10311479527756845, "grad_norm": 0.24355968832969666, "learning_rate": 0.0001994243904780741, "loss": 11.6873, "step": 4926 }, { "epoch": 0.10313572804153061, "grad_norm": 0.19494101405143738, "learning_rate": 0.00019942415554540416, "loss": 11.6802, "step": 4927 }, { "epoch": 0.10315666080549275, "grad_norm": 0.2376421093940735, "learning_rate": 0.00019942392056493903, "loss": 11.6833, "step": 4928 }, { "epoch": 0.10317759356945491, "grad_norm": 0.17849941551685333, "learning_rate": 0.00019942368553667886, "loss": 11.6844, "step": 4929 }, { "epoch": 0.10319852633341707, "grad_norm": 0.20007489621639252, "learning_rate": 0.0001994234504606237, "loss": 11.6899, "step": 4930 }, { "epoch": 0.10321945909737922, "grad_norm": 0.2552034556865692, "learning_rate": 0.00019942321533677367, "loss": 11.6965, "step": 4931 }, { "epoch": 0.10324039186134137, "grad_norm": 0.2292584478855133, "learning_rate": 0.00019942298016512895, "loss": 11.6948, "step": 4932 }, { "epoch": 0.10326132462530352, "grad_norm": 0.2560672163963318, "learning_rate": 0.0001994227449456896, "loss": 11.6773, "step": 4933 }, { "epoch": 0.10328225738926568, "grad_norm": 0.2104548066854477, "learning_rate": 0.00019942250967845576, "loss": 11.6844, "step": 4934 }, { "epoch": 0.10330319015322784, "grad_norm": 0.22623778879642487, "learning_rate": 0.00019942227436342747, "loss": 11.6821, "step": 4935 }, { "epoch": 0.10332412291718998, "grad_norm": 0.22615596652030945, "learning_rate": 0.00019942203900060495, "loss": 11.6802, "step": 4936 }, { "epoch": 0.10334505568115214, "grad_norm": 0.2384594976902008, "learning_rate": 0.00019942180358998825, "loss": 11.7058, "step": 4937 }, { "epoch": 0.1033659884451143, "grad_norm": 0.2055058479309082, "learning_rate": 0.0001994215681315775, "loss": 11.6844, "step": 4938 }, { "epoch": 0.10338692120907644, "grad_norm": 0.21997232735157013, "learning_rate": 0.0001994213326253728, "loss": 11.69, "step": 4939 }, { "epoch": 0.1034078539730386, "grad_norm": 0.22911547124385834, "learning_rate": 0.0001994210970713743, "loss": 11.6849, "step": 4940 }, { "epoch": 0.10342878673700075, "grad_norm": 0.23179621994495392, "learning_rate": 0.00019942086146958204, "loss": 11.6854, "step": 4941 }, { "epoch": 0.1034497195009629, "grad_norm": 0.3502293825149536, "learning_rate": 0.0001994206258199962, "loss": 11.6787, "step": 4942 }, { "epoch": 0.10347065226492506, "grad_norm": 0.22954878211021423, "learning_rate": 0.0001994203901226169, "loss": 11.6945, "step": 4943 }, { "epoch": 0.10349158502888721, "grad_norm": 0.3217577636241913, "learning_rate": 0.0001994201543774442, "loss": 11.6948, "step": 4944 }, { "epoch": 0.10351251779284937, "grad_norm": 0.24297213554382324, "learning_rate": 0.00019941991858447823, "loss": 11.6799, "step": 4945 }, { "epoch": 0.10353345055681153, "grad_norm": 0.2453148365020752, "learning_rate": 0.00019941968274371914, "loss": 11.6896, "step": 4946 }, { "epoch": 0.10355438332077367, "grad_norm": 0.25387439131736755, "learning_rate": 0.00019941944685516701, "loss": 11.6858, "step": 4947 }, { "epoch": 0.10357531608473583, "grad_norm": 0.19816136360168457, "learning_rate": 0.00019941921091882195, "loss": 11.6744, "step": 4948 }, { "epoch": 0.10359624884869799, "grad_norm": 0.18442828953266144, "learning_rate": 0.00019941897493468407, "loss": 11.6879, "step": 4949 }, { "epoch": 0.10361718161266013, "grad_norm": 0.2500729262828827, "learning_rate": 0.00019941873890275353, "loss": 11.6808, "step": 4950 }, { "epoch": 0.10363811437662229, "grad_norm": 0.20096132159233093, "learning_rate": 0.00019941850282303037, "loss": 11.6784, "step": 4951 }, { "epoch": 0.10365904714058444, "grad_norm": 0.21893057227134705, "learning_rate": 0.00019941826669551477, "loss": 11.6888, "step": 4952 }, { "epoch": 0.1036799799045466, "grad_norm": 0.2339344620704651, "learning_rate": 0.0001994180305202068, "loss": 11.6916, "step": 4953 }, { "epoch": 0.10370091266850875, "grad_norm": 0.23283252120018005, "learning_rate": 0.00019941779429710662, "loss": 11.6997, "step": 4954 }, { "epoch": 0.1037218454324709, "grad_norm": 0.22555355727672577, "learning_rate": 0.0001994175580262143, "loss": 11.6691, "step": 4955 }, { "epoch": 0.10374277819643306, "grad_norm": 0.1944504827260971, "learning_rate": 0.00019941732170752996, "loss": 11.6753, "step": 4956 }, { "epoch": 0.10376371096039522, "grad_norm": 0.24576249718666077, "learning_rate": 0.00019941708534105374, "loss": 11.6756, "step": 4957 }, { "epoch": 0.10378464372435736, "grad_norm": 0.30600979924201965, "learning_rate": 0.00019941684892678572, "loss": 11.6839, "step": 4958 }, { "epoch": 0.10380557648831952, "grad_norm": 0.2231762558221817, "learning_rate": 0.000199416612464726, "loss": 11.6691, "step": 4959 }, { "epoch": 0.10382650925228167, "grad_norm": 0.26228782534599304, "learning_rate": 0.00019941637595487477, "loss": 11.6877, "step": 4960 }, { "epoch": 0.10384744201624382, "grad_norm": 0.2793363332748413, "learning_rate": 0.00019941613939723207, "loss": 11.6962, "step": 4961 }, { "epoch": 0.10386837478020598, "grad_norm": 0.21698208153247833, "learning_rate": 0.00019941590279179804, "loss": 11.6913, "step": 4962 }, { "epoch": 0.10388930754416813, "grad_norm": 0.2086041420698166, "learning_rate": 0.00019941566613857278, "loss": 11.6931, "step": 4963 }, { "epoch": 0.10391024030813029, "grad_norm": 0.2016361653804779, "learning_rate": 0.00019941542943755644, "loss": 11.6772, "step": 4964 }, { "epoch": 0.10393117307209244, "grad_norm": 0.2550946772098541, "learning_rate": 0.00019941519268874914, "loss": 11.6769, "step": 4965 }, { "epoch": 0.10395210583605459, "grad_norm": 0.30803626775741577, "learning_rate": 0.00019941495589215091, "loss": 11.7039, "step": 4966 }, { "epoch": 0.10397303860001675, "grad_norm": 0.19489829242229462, "learning_rate": 0.00019941471904776195, "loss": 11.6862, "step": 4967 }, { "epoch": 0.1039939713639789, "grad_norm": 0.27015939354896545, "learning_rate": 0.00019941448215558233, "loss": 11.6846, "step": 4968 }, { "epoch": 0.10401490412794105, "grad_norm": 0.2666819095611572, "learning_rate": 0.00019941424521561216, "loss": 11.6878, "step": 4969 }, { "epoch": 0.10403583689190321, "grad_norm": 0.24224720895290375, "learning_rate": 0.00019941400822785159, "loss": 11.6787, "step": 4970 }, { "epoch": 0.10405676965586536, "grad_norm": 0.23128056526184082, "learning_rate": 0.0001994137711923007, "loss": 11.6774, "step": 4971 }, { "epoch": 0.10407770241982751, "grad_norm": 0.22359953820705414, "learning_rate": 0.00019941353410895965, "loss": 11.6836, "step": 4972 }, { "epoch": 0.10409863518378967, "grad_norm": 0.2774122357368469, "learning_rate": 0.0001994132969778285, "loss": 11.6773, "step": 4973 }, { "epoch": 0.10411956794775182, "grad_norm": 0.26167455315589905, "learning_rate": 0.0001994130597989074, "loss": 11.6933, "step": 4974 }, { "epoch": 0.10414050071171398, "grad_norm": 0.2559363543987274, "learning_rate": 0.00019941282257219641, "loss": 11.6915, "step": 4975 }, { "epoch": 0.10416143347567614, "grad_norm": 0.298788458108902, "learning_rate": 0.00019941258529769572, "loss": 11.6911, "step": 4976 }, { "epoch": 0.10418236623963828, "grad_norm": 0.2127027064561844, "learning_rate": 0.0001994123479754054, "loss": 11.6849, "step": 4977 }, { "epoch": 0.10420329900360044, "grad_norm": 0.2618674337863922, "learning_rate": 0.0001994121106053256, "loss": 11.6941, "step": 4978 }, { "epoch": 0.10422423176756258, "grad_norm": 0.2267521470785141, "learning_rate": 0.00019941187318745635, "loss": 11.6937, "step": 4979 }, { "epoch": 0.10424516453152474, "grad_norm": 1.0947470664978027, "learning_rate": 0.00019941163572179783, "loss": 11.756, "step": 4980 }, { "epoch": 0.1042660972954869, "grad_norm": 0.25633496046066284, "learning_rate": 0.00019941139820835017, "loss": 11.6679, "step": 4981 }, { "epoch": 0.10428703005944905, "grad_norm": 0.23830324411392212, "learning_rate": 0.00019941116064711346, "loss": 11.6907, "step": 4982 }, { "epoch": 0.1043079628234112, "grad_norm": 0.21399664878845215, "learning_rate": 0.0001994109230380878, "loss": 11.6785, "step": 4983 }, { "epoch": 0.10432889558737336, "grad_norm": 0.22548872232437134, "learning_rate": 0.00019941068538127332, "loss": 11.6986, "step": 4984 }, { "epoch": 0.10434982835133551, "grad_norm": 0.2893086373806, "learning_rate": 0.00019941044767667014, "loss": 11.6924, "step": 4985 }, { "epoch": 0.10437076111529767, "grad_norm": 0.22792106866836548, "learning_rate": 0.00019941020992427836, "loss": 11.6945, "step": 4986 }, { "epoch": 0.10439169387925981, "grad_norm": 0.22190280258655548, "learning_rate": 0.0001994099721240981, "loss": 11.6885, "step": 4987 }, { "epoch": 0.10441262664322197, "grad_norm": 0.22797434031963348, "learning_rate": 0.00019940973427612946, "loss": 11.681, "step": 4988 }, { "epoch": 0.10443355940718413, "grad_norm": 0.20329894125461578, "learning_rate": 0.0001994094963803726, "loss": 11.6851, "step": 4989 }, { "epoch": 0.10445449217114627, "grad_norm": 0.24284163117408752, "learning_rate": 0.00019940925843682755, "loss": 11.6864, "step": 4990 }, { "epoch": 0.10447542493510843, "grad_norm": 0.24232149124145508, "learning_rate": 0.00019940902044549453, "loss": 11.6911, "step": 4991 }, { "epoch": 0.10449635769907059, "grad_norm": 0.24217617511749268, "learning_rate": 0.00019940878240637356, "loss": 11.683, "step": 4992 }, { "epoch": 0.10451729046303274, "grad_norm": 0.23670300841331482, "learning_rate": 0.00019940854431946484, "loss": 11.6791, "step": 4993 }, { "epoch": 0.1045382232269949, "grad_norm": 0.5406351089477539, "learning_rate": 0.00019940830618476842, "loss": 11.7088, "step": 4994 }, { "epoch": 0.10455915599095704, "grad_norm": 0.2726912498474121, "learning_rate": 0.00019940806800228443, "loss": 11.6836, "step": 4995 }, { "epoch": 0.1045800887549192, "grad_norm": 0.2038794308900833, "learning_rate": 0.000199407829772013, "loss": 11.6883, "step": 4996 }, { "epoch": 0.10460102151888136, "grad_norm": 0.2516912519931793, "learning_rate": 0.0001994075914939542, "loss": 11.6747, "step": 4997 }, { "epoch": 0.1046219542828435, "grad_norm": 0.24622605741024017, "learning_rate": 0.00019940735316810823, "loss": 11.6791, "step": 4998 }, { "epoch": 0.10464288704680566, "grad_norm": 0.25641900300979614, "learning_rate": 0.0001994071147944751, "loss": 11.6855, "step": 4999 }, { "epoch": 0.10466381981076782, "grad_norm": 0.25918176770210266, "learning_rate": 0.00019940687637305505, "loss": 11.7009, "step": 5000 }, { "epoch": 0.10466381981076782, "eval_loss": 11.684414863586426, "eval_runtime": 34.3134, "eval_samples_per_second": 28.007, "eval_steps_per_second": 7.023, "step": 5000 }, { "epoch": 0.10468475257472996, "grad_norm": 0.28169575333595276, "learning_rate": 0.00019940663790384805, "loss": 11.6929, "step": 5001 }, { "epoch": 0.10470568533869212, "grad_norm": 0.242720365524292, "learning_rate": 0.00019940639938685433, "loss": 11.6998, "step": 5002 }, { "epoch": 0.10472661810265428, "grad_norm": 0.2586379945278168, "learning_rate": 0.00019940616082207395, "loss": 11.6972, "step": 5003 }, { "epoch": 0.10474755086661643, "grad_norm": 0.21351900696754456, "learning_rate": 0.00019940592220950702, "loss": 11.6823, "step": 5004 }, { "epoch": 0.10476848363057858, "grad_norm": 0.210978701710701, "learning_rate": 0.0001994056835491537, "loss": 11.6689, "step": 5005 }, { "epoch": 0.10478941639454073, "grad_norm": 0.26070088148117065, "learning_rate": 0.00019940544484101403, "loss": 11.6829, "step": 5006 }, { "epoch": 0.10481034915850289, "grad_norm": 0.2324928492307663, "learning_rate": 0.00019940520608508822, "loss": 11.6965, "step": 5007 }, { "epoch": 0.10483128192246505, "grad_norm": 0.26156318187713623, "learning_rate": 0.0001994049672813763, "loss": 11.6839, "step": 5008 }, { "epoch": 0.10485221468642719, "grad_norm": 0.21709804236888885, "learning_rate": 0.00019940472842987846, "loss": 11.6789, "step": 5009 }, { "epoch": 0.10487314745038935, "grad_norm": 0.18974705040454865, "learning_rate": 0.00019940448953059475, "loss": 11.6872, "step": 5010 }, { "epoch": 0.10489408021435151, "grad_norm": 0.25612351298332214, "learning_rate": 0.00019940425058352531, "loss": 11.6785, "step": 5011 }, { "epoch": 0.10491501297831365, "grad_norm": 0.2732981741428375, "learning_rate": 0.00019940401158867027, "loss": 11.6841, "step": 5012 }, { "epoch": 0.10493594574227581, "grad_norm": 0.24811628460884094, "learning_rate": 0.00019940377254602969, "loss": 11.6787, "step": 5013 }, { "epoch": 0.10495687850623796, "grad_norm": 0.25064072012901306, "learning_rate": 0.00019940353345560376, "loss": 11.6972, "step": 5014 }, { "epoch": 0.10497781127020012, "grad_norm": 0.1857696771621704, "learning_rate": 0.00019940329431739255, "loss": 11.6668, "step": 5015 }, { "epoch": 0.10499874403416228, "grad_norm": 0.21822533011436462, "learning_rate": 0.00019940305513139618, "loss": 11.6797, "step": 5016 }, { "epoch": 0.10501967679812442, "grad_norm": 0.24380482733249664, "learning_rate": 0.00019940281589761477, "loss": 11.6786, "step": 5017 }, { "epoch": 0.10504060956208658, "grad_norm": 0.22411321103572845, "learning_rate": 0.00019940257661604845, "loss": 11.6911, "step": 5018 }, { "epoch": 0.10506154232604874, "grad_norm": 0.27432921528816223, "learning_rate": 0.0001994023372866973, "loss": 11.683, "step": 5019 }, { "epoch": 0.10508247509001088, "grad_norm": 0.23923668265342712, "learning_rate": 0.00019940209790956148, "loss": 11.7011, "step": 5020 }, { "epoch": 0.10510340785397304, "grad_norm": 0.2139732539653778, "learning_rate": 0.00019940185848464105, "loss": 11.6885, "step": 5021 }, { "epoch": 0.10512434061793519, "grad_norm": 0.21224799752235413, "learning_rate": 0.00019940161901193615, "loss": 11.6687, "step": 5022 }, { "epoch": 0.10514527338189734, "grad_norm": 0.21983122825622559, "learning_rate": 0.00019940137949144693, "loss": 11.6753, "step": 5023 }, { "epoch": 0.1051662061458595, "grad_norm": 0.2066604197025299, "learning_rate": 0.00019940113992317345, "loss": 11.6925, "step": 5024 }, { "epoch": 0.10518713890982165, "grad_norm": 0.22864460945129395, "learning_rate": 0.00019940090030711585, "loss": 11.6931, "step": 5025 }, { "epoch": 0.1052080716737838, "grad_norm": 0.21430671215057373, "learning_rate": 0.00019940066064327427, "loss": 11.689, "step": 5026 }, { "epoch": 0.10522900443774597, "grad_norm": 0.23968669772148132, "learning_rate": 0.0001994004209316488, "loss": 11.7041, "step": 5027 }, { "epoch": 0.10524993720170811, "grad_norm": 0.21092821657657623, "learning_rate": 0.00019940018117223953, "loss": 11.6865, "step": 5028 }, { "epoch": 0.10527086996567027, "grad_norm": 0.23946771025657654, "learning_rate": 0.0001993999413650466, "loss": 11.69, "step": 5029 }, { "epoch": 0.10529180272963241, "grad_norm": 0.28677451610565186, "learning_rate": 0.00019939970151007015, "loss": 11.7091, "step": 5030 }, { "epoch": 0.10531273549359457, "grad_norm": 0.24117766320705414, "learning_rate": 0.00019939946160731027, "loss": 11.6986, "step": 5031 }, { "epoch": 0.10533366825755673, "grad_norm": 0.19134944677352905, "learning_rate": 0.00019939922165676706, "loss": 11.6854, "step": 5032 }, { "epoch": 0.10535460102151888, "grad_norm": 0.2919115424156189, "learning_rate": 0.00019939898165844064, "loss": 11.6869, "step": 5033 }, { "epoch": 0.10537553378548103, "grad_norm": 0.24232475459575653, "learning_rate": 0.00019939874161233116, "loss": 11.6803, "step": 5034 }, { "epoch": 0.1053964665494432, "grad_norm": 0.20665080845355988, "learning_rate": 0.00019939850151843872, "loss": 11.6845, "step": 5035 }, { "epoch": 0.10541739931340534, "grad_norm": 0.21591487526893616, "learning_rate": 0.0001993982613767634, "loss": 11.688, "step": 5036 }, { "epoch": 0.1054383320773675, "grad_norm": 0.2338249236345291, "learning_rate": 0.0001993980211873054, "loss": 11.6688, "step": 5037 }, { "epoch": 0.10545926484132966, "grad_norm": 0.2501826882362366, "learning_rate": 0.0001993977809500647, "loss": 11.6887, "step": 5038 }, { "epoch": 0.1054801976052918, "grad_norm": 0.26532959938049316, "learning_rate": 0.00019939754066504155, "loss": 11.6876, "step": 5039 }, { "epoch": 0.10550113036925396, "grad_norm": 0.2244165688753128, "learning_rate": 0.000199397300332236, "loss": 11.6803, "step": 5040 }, { "epoch": 0.1055220631332161, "grad_norm": 0.2975502908229828, "learning_rate": 0.0001993970599516482, "loss": 11.6897, "step": 5041 }, { "epoch": 0.10554299589717826, "grad_norm": 0.21228115260601044, "learning_rate": 0.00019939681952327816, "loss": 11.6984, "step": 5042 }, { "epoch": 0.10556392866114042, "grad_norm": 0.22543732821941376, "learning_rate": 0.00019939657904712615, "loss": 11.6698, "step": 5043 }, { "epoch": 0.10558486142510257, "grad_norm": 0.2593076229095459, "learning_rate": 0.00019939633852319218, "loss": 11.6963, "step": 5044 }, { "epoch": 0.10560579418906473, "grad_norm": 0.23576286435127258, "learning_rate": 0.00019939609795147645, "loss": 11.6993, "step": 5045 }, { "epoch": 0.10562672695302688, "grad_norm": 0.3307250440120697, "learning_rate": 0.00019939585733197895, "loss": 11.69, "step": 5046 }, { "epoch": 0.10564765971698903, "grad_norm": 0.29183098673820496, "learning_rate": 0.00019939561666469993, "loss": 11.686, "step": 5047 }, { "epoch": 0.10566859248095119, "grad_norm": 0.20910893380641937, "learning_rate": 0.00019939537594963942, "loss": 11.6739, "step": 5048 }, { "epoch": 0.10568952524491333, "grad_norm": 0.1786544770002365, "learning_rate": 0.00019939513518679754, "loss": 11.6783, "step": 5049 }, { "epoch": 0.10571045800887549, "grad_norm": 0.24376280605793, "learning_rate": 0.00019939489437617445, "loss": 11.6732, "step": 5050 }, { "epoch": 0.10573139077283765, "grad_norm": 0.26810482144355774, "learning_rate": 0.00019939465351777024, "loss": 11.6694, "step": 5051 }, { "epoch": 0.1057523235367998, "grad_norm": 0.19931161403656006, "learning_rate": 0.00019939441261158502, "loss": 11.6772, "step": 5052 }, { "epoch": 0.10577325630076195, "grad_norm": 0.21523520350456238, "learning_rate": 0.00019939417165761893, "loss": 11.68, "step": 5053 }, { "epoch": 0.10579418906472411, "grad_norm": 0.22053535282611847, "learning_rate": 0.00019939393065587208, "loss": 11.6872, "step": 5054 }, { "epoch": 0.10581512182868626, "grad_norm": 0.22945702075958252, "learning_rate": 0.00019939368960634454, "loss": 11.6734, "step": 5055 }, { "epoch": 0.10583605459264842, "grad_norm": 0.2873021066188812, "learning_rate": 0.00019939344850903646, "loss": 11.6936, "step": 5056 }, { "epoch": 0.10585698735661056, "grad_norm": 0.18711650371551514, "learning_rate": 0.000199393207363948, "loss": 11.6858, "step": 5057 }, { "epoch": 0.10587792012057272, "grad_norm": 0.2349146157503128, "learning_rate": 0.0001993929661710792, "loss": 11.6838, "step": 5058 }, { "epoch": 0.10589885288453488, "grad_norm": 1.4242184162139893, "learning_rate": 0.00019939272493043022, "loss": 11.6062, "step": 5059 }, { "epoch": 0.10591978564849702, "grad_norm": 0.27502456307411194, "learning_rate": 0.00019939248364200115, "loss": 11.6904, "step": 5060 }, { "epoch": 0.10594071841245918, "grad_norm": 0.19721619784832, "learning_rate": 0.00019939224230579216, "loss": 11.6868, "step": 5061 }, { "epoch": 0.10596165117642134, "grad_norm": 0.24638743698596954, "learning_rate": 0.00019939200092180327, "loss": 11.6848, "step": 5062 }, { "epoch": 0.10598258394038348, "grad_norm": 0.2754746377468109, "learning_rate": 0.0001993917594900347, "loss": 11.6616, "step": 5063 }, { "epoch": 0.10600351670434564, "grad_norm": 0.24459972977638245, "learning_rate": 0.00019939151801048652, "loss": 11.6772, "step": 5064 }, { "epoch": 0.1060244494683078, "grad_norm": 0.24708133935928345, "learning_rate": 0.00019939127648315884, "loss": 11.6847, "step": 5065 }, { "epoch": 0.10604538223226995, "grad_norm": 0.21498329937458038, "learning_rate": 0.00019939103490805175, "loss": 11.6725, "step": 5066 }, { "epoch": 0.1060663149962321, "grad_norm": 0.24330343306064606, "learning_rate": 0.00019939079328516542, "loss": 11.6851, "step": 5067 }, { "epoch": 0.10608724776019425, "grad_norm": 0.2966148257255554, "learning_rate": 0.00019939055161449995, "loss": 11.69, "step": 5068 }, { "epoch": 0.10610818052415641, "grad_norm": 0.2747936546802521, "learning_rate": 0.00019939030989605546, "loss": 11.6843, "step": 5069 }, { "epoch": 0.10612911328811857, "grad_norm": 0.2659027874469757, "learning_rate": 0.00019939006812983202, "loss": 11.6623, "step": 5070 }, { "epoch": 0.10615004605208071, "grad_norm": 0.2600400447845459, "learning_rate": 0.00019938982631582982, "loss": 11.6947, "step": 5071 }, { "epoch": 0.10617097881604287, "grad_norm": 0.17726397514343262, "learning_rate": 0.0001993895844540489, "loss": 11.6872, "step": 5072 }, { "epoch": 0.10619191158000503, "grad_norm": 0.21423138678073883, "learning_rate": 0.00019938934254448945, "loss": 11.6779, "step": 5073 }, { "epoch": 0.10621284434396717, "grad_norm": 0.30551066994667053, "learning_rate": 0.00019938910058715152, "loss": 11.6844, "step": 5074 }, { "epoch": 0.10623377710792933, "grad_norm": 0.21119388937950134, "learning_rate": 0.0001993888585820353, "loss": 11.677, "step": 5075 }, { "epoch": 0.10625470987189148, "grad_norm": 0.21416282653808594, "learning_rate": 0.0001993886165291408, "loss": 11.6794, "step": 5076 }, { "epoch": 0.10627564263585364, "grad_norm": 0.2355877161026001, "learning_rate": 0.00019938837442846826, "loss": 11.6936, "step": 5077 }, { "epoch": 0.1062965753998158, "grad_norm": 0.23632192611694336, "learning_rate": 0.00019938813228001772, "loss": 11.6816, "step": 5078 }, { "epoch": 0.10631750816377794, "grad_norm": 0.3021021783351898, "learning_rate": 0.0001993878900837893, "loss": 11.6954, "step": 5079 }, { "epoch": 0.1063384409277401, "grad_norm": 0.23862381279468536, "learning_rate": 0.00019938764783978316, "loss": 11.6633, "step": 5080 }, { "epoch": 0.10635937369170226, "grad_norm": 0.26414021849632263, "learning_rate": 0.00019938740554799935, "loss": 11.6838, "step": 5081 }, { "epoch": 0.1063803064556644, "grad_norm": 0.23284044861793518, "learning_rate": 0.000199387163208438, "loss": 11.6694, "step": 5082 }, { "epoch": 0.10640123921962656, "grad_norm": 0.18230745196342468, "learning_rate": 0.00019938692082109932, "loss": 11.6868, "step": 5083 }, { "epoch": 0.1064221719835887, "grad_norm": 0.21125920116901398, "learning_rate": 0.0001993866783859833, "loss": 11.6748, "step": 5084 }, { "epoch": 0.10644310474755087, "grad_norm": 0.20117470622062683, "learning_rate": 0.00019938643590309014, "loss": 11.6964, "step": 5085 }, { "epoch": 0.10646403751151302, "grad_norm": 0.27656376361846924, "learning_rate": 0.00019938619337241993, "loss": 11.6776, "step": 5086 }, { "epoch": 0.10648497027547517, "grad_norm": 0.21585851907730103, "learning_rate": 0.00019938595079397277, "loss": 11.6795, "step": 5087 }, { "epoch": 0.10650590303943733, "grad_norm": 0.3010903298854828, "learning_rate": 0.0001993857081677488, "loss": 11.685, "step": 5088 }, { "epoch": 0.10652683580339949, "grad_norm": 0.21176734566688538, "learning_rate": 0.00019938546549374812, "loss": 11.6895, "step": 5089 }, { "epoch": 0.10654776856736163, "grad_norm": 0.27639251947402954, "learning_rate": 0.00019938522277197085, "loss": 11.6841, "step": 5090 }, { "epoch": 0.10656870133132379, "grad_norm": 0.2241632491350174, "learning_rate": 0.00019938498000241715, "loss": 11.6732, "step": 5091 }, { "epoch": 0.10658963409528595, "grad_norm": 0.360817551612854, "learning_rate": 0.00019938473718508705, "loss": 11.6799, "step": 5092 }, { "epoch": 0.1066105668592481, "grad_norm": 0.2341230809688568, "learning_rate": 0.00019938449431998074, "loss": 11.687, "step": 5093 }, { "epoch": 0.10663149962321025, "grad_norm": 0.2740664482116699, "learning_rate": 0.0001993842514070983, "loss": 11.6715, "step": 5094 }, { "epoch": 0.1066524323871724, "grad_norm": 0.27898791432380676, "learning_rate": 0.00019938400844643988, "loss": 11.6999, "step": 5095 }, { "epoch": 0.10667336515113456, "grad_norm": 0.26574140787124634, "learning_rate": 0.00019938376543800557, "loss": 11.6826, "step": 5096 }, { "epoch": 0.10669429791509671, "grad_norm": 0.21168263256549835, "learning_rate": 0.00019938352238179548, "loss": 11.6886, "step": 5097 }, { "epoch": 0.10671523067905886, "grad_norm": 0.20599029958248138, "learning_rate": 0.00019938327927780973, "loss": 11.6836, "step": 5098 }, { "epoch": 0.10673616344302102, "grad_norm": 0.2183440923690796, "learning_rate": 0.00019938303612604847, "loss": 11.6748, "step": 5099 }, { "epoch": 0.10675709620698318, "grad_norm": 0.21236887574195862, "learning_rate": 0.00019938279292651179, "loss": 11.6854, "step": 5100 }, { "epoch": 0.10677802897094532, "grad_norm": 0.1880977600812912, "learning_rate": 0.0001993825496791998, "loss": 11.6902, "step": 5101 }, { "epoch": 0.10679896173490748, "grad_norm": 0.2111562341451645, "learning_rate": 0.00019938230638411262, "loss": 11.6893, "step": 5102 }, { "epoch": 0.10681989449886962, "grad_norm": 0.23851332068443298, "learning_rate": 0.0001993820630412504, "loss": 11.6778, "step": 5103 }, { "epoch": 0.10684082726283178, "grad_norm": 0.22161172330379486, "learning_rate": 0.0001993818196506132, "loss": 11.6713, "step": 5104 }, { "epoch": 0.10686176002679394, "grad_norm": 0.29937484860420227, "learning_rate": 0.00019938157621220118, "loss": 11.6737, "step": 5105 }, { "epoch": 0.10688269279075609, "grad_norm": 0.28406620025634766, "learning_rate": 0.00019938133272601446, "loss": 11.698, "step": 5106 }, { "epoch": 0.10690362555471825, "grad_norm": 0.21044230461120605, "learning_rate": 0.00019938108919205312, "loss": 11.6856, "step": 5107 }, { "epoch": 0.1069245583186804, "grad_norm": 0.21970820426940918, "learning_rate": 0.00019938084561031732, "loss": 11.6761, "step": 5108 }, { "epoch": 0.10694549108264255, "grad_norm": 0.27927926182746887, "learning_rate": 0.0001993806019808071, "loss": 11.6935, "step": 5109 }, { "epoch": 0.10696642384660471, "grad_norm": 0.30278030037879944, "learning_rate": 0.0001993803583035227, "loss": 11.6893, "step": 5110 }, { "epoch": 0.10698735661056685, "grad_norm": 0.2097625881433487, "learning_rate": 0.00019938011457846413, "loss": 11.6961, "step": 5111 }, { "epoch": 0.10700828937452901, "grad_norm": 0.22516389191150665, "learning_rate": 0.00019937987080563159, "loss": 11.6698, "step": 5112 }, { "epoch": 0.10702922213849117, "grad_norm": 0.2768486440181732, "learning_rate": 0.0001993796269850251, "loss": 11.6846, "step": 5113 }, { "epoch": 0.10705015490245331, "grad_norm": 0.28146108984947205, "learning_rate": 0.00019937938311664487, "loss": 11.6806, "step": 5114 }, { "epoch": 0.10707108766641547, "grad_norm": 0.31452569365501404, "learning_rate": 0.000199379139200491, "loss": 11.671, "step": 5115 }, { "epoch": 0.10709202043037763, "grad_norm": 0.27778005599975586, "learning_rate": 0.0001993788952365635, "loss": 11.6838, "step": 5116 }, { "epoch": 0.10711295319433978, "grad_norm": 0.23806697130203247, "learning_rate": 0.00019937865122486264, "loss": 11.6882, "step": 5117 }, { "epoch": 0.10713388595830194, "grad_norm": 0.208242729306221, "learning_rate": 0.00019937840716538848, "loss": 11.6915, "step": 5118 }, { "epoch": 0.1071548187222641, "grad_norm": 0.22649234533309937, "learning_rate": 0.0001993781630581411, "loss": 11.6975, "step": 5119 }, { "epoch": 0.10717575148622624, "grad_norm": 0.2220088541507721, "learning_rate": 0.00019937791890312063, "loss": 11.6681, "step": 5120 }, { "epoch": 0.1071966842501884, "grad_norm": 0.2327054888010025, "learning_rate": 0.00019937767470032725, "loss": 11.6814, "step": 5121 }, { "epoch": 0.10721761701415054, "grad_norm": 0.2865576446056366, "learning_rate": 0.000199377430449761, "loss": 11.686, "step": 5122 }, { "epoch": 0.1072385497781127, "grad_norm": 0.2743217647075653, "learning_rate": 0.00019937718615142203, "loss": 11.6818, "step": 5123 }, { "epoch": 0.10725948254207486, "grad_norm": 0.33387988805770874, "learning_rate": 0.00019937694180531047, "loss": 11.6895, "step": 5124 }, { "epoch": 0.107280415306037, "grad_norm": 0.22453303635120392, "learning_rate": 0.0001993766974114264, "loss": 11.6832, "step": 5125 }, { "epoch": 0.10730134806999916, "grad_norm": 0.564026951789856, "learning_rate": 0.00019937645296976998, "loss": 11.6954, "step": 5126 }, { "epoch": 0.10732228083396132, "grad_norm": 0.22946509718894958, "learning_rate": 0.00019937620848034128, "loss": 11.6756, "step": 5127 }, { "epoch": 0.10734321359792347, "grad_norm": 0.26441630721092224, "learning_rate": 0.00019937596394314046, "loss": 11.6883, "step": 5128 }, { "epoch": 0.10736414636188563, "grad_norm": 0.2019282579421997, "learning_rate": 0.00019937571935816763, "loss": 11.6931, "step": 5129 }, { "epoch": 0.10738507912584777, "grad_norm": 0.2554166913032532, "learning_rate": 0.00019937547472542288, "loss": 11.6831, "step": 5130 }, { "epoch": 0.10740601188980993, "grad_norm": 0.20380660891532898, "learning_rate": 0.00019937523004490636, "loss": 11.6803, "step": 5131 }, { "epoch": 0.10742694465377209, "grad_norm": 0.2505183219909668, "learning_rate": 0.0001993749853166182, "loss": 11.6924, "step": 5132 }, { "epoch": 0.10744787741773423, "grad_norm": 0.22648616135120392, "learning_rate": 0.00019937474054055847, "loss": 11.6836, "step": 5133 }, { "epoch": 0.10746881018169639, "grad_norm": 0.1920483112335205, "learning_rate": 0.00019937449571672732, "loss": 11.6815, "step": 5134 }, { "epoch": 0.10748974294565855, "grad_norm": 0.3498586118221283, "learning_rate": 0.00019937425084512486, "loss": 11.7006, "step": 5135 }, { "epoch": 0.1075106757096207, "grad_norm": 0.24653737246990204, "learning_rate": 0.00019937400592575117, "loss": 11.6808, "step": 5136 }, { "epoch": 0.10753160847358285, "grad_norm": 0.2276907116174698, "learning_rate": 0.00019937376095860646, "loss": 11.6694, "step": 5137 }, { "epoch": 0.107552541237545, "grad_norm": 0.24133223295211792, "learning_rate": 0.00019937351594369074, "loss": 11.6727, "step": 5138 }, { "epoch": 0.10757347400150716, "grad_norm": 0.18064568936824799, "learning_rate": 0.00019937327088100418, "loss": 11.6872, "step": 5139 }, { "epoch": 0.10759440676546932, "grad_norm": 0.2303266078233719, "learning_rate": 0.00019937302577054694, "loss": 11.6756, "step": 5140 }, { "epoch": 0.10761533952943146, "grad_norm": 0.23024693131446838, "learning_rate": 0.00019937278061231908, "loss": 11.689, "step": 5141 }, { "epoch": 0.10763627229339362, "grad_norm": 0.2956181466579437, "learning_rate": 0.00019937253540632072, "loss": 11.6765, "step": 5142 }, { "epoch": 0.10765720505735578, "grad_norm": 0.28123384714126587, "learning_rate": 0.000199372290152552, "loss": 11.6925, "step": 5143 }, { "epoch": 0.10767813782131792, "grad_norm": 0.24043786525726318, "learning_rate": 0.00019937204485101303, "loss": 11.6922, "step": 5144 }, { "epoch": 0.10769907058528008, "grad_norm": 0.21726012229919434, "learning_rate": 0.00019937179950170394, "loss": 11.6877, "step": 5145 }, { "epoch": 0.10772000334924223, "grad_norm": 0.25442662835121155, "learning_rate": 0.0001993715541046248, "loss": 11.6667, "step": 5146 }, { "epoch": 0.10774093611320439, "grad_norm": 0.18508170545101166, "learning_rate": 0.00019937130865977577, "loss": 11.6915, "step": 5147 }, { "epoch": 0.10776186887716654, "grad_norm": 0.23940862715244293, "learning_rate": 0.000199371063167157, "loss": 11.6928, "step": 5148 }, { "epoch": 0.10778280164112869, "grad_norm": 0.17848069965839386, "learning_rate": 0.00019937081762676852, "loss": 11.6801, "step": 5149 }, { "epoch": 0.10780373440509085, "grad_norm": 0.21742495894432068, "learning_rate": 0.00019937057203861053, "loss": 11.6723, "step": 5150 }, { "epoch": 0.107824667169053, "grad_norm": 0.19573639333248138, "learning_rate": 0.0001993703264026831, "loss": 11.6925, "step": 5151 }, { "epoch": 0.10784559993301515, "grad_norm": 0.20327606797218323, "learning_rate": 0.00019937008071898636, "loss": 11.6674, "step": 5152 }, { "epoch": 0.10786653269697731, "grad_norm": 0.24101540446281433, "learning_rate": 0.00019936983498752042, "loss": 11.6853, "step": 5153 }, { "epoch": 0.10788746546093947, "grad_norm": 0.23669053614139557, "learning_rate": 0.00019936958920828543, "loss": 11.6989, "step": 5154 }, { "epoch": 0.10790839822490161, "grad_norm": 0.2327917218208313, "learning_rate": 0.0001993693433812815, "loss": 11.6862, "step": 5155 }, { "epoch": 0.10792933098886377, "grad_norm": 0.2133960872888565, "learning_rate": 0.0001993690975065087, "loss": 11.6753, "step": 5156 }, { "epoch": 0.10795026375282592, "grad_norm": 0.19681137800216675, "learning_rate": 0.00019936885158396722, "loss": 11.6768, "step": 5157 }, { "epoch": 0.10797119651678808, "grad_norm": 0.20495715737342834, "learning_rate": 0.0001993686056136571, "loss": 11.6817, "step": 5158 }, { "epoch": 0.10799212928075023, "grad_norm": 0.22010058164596558, "learning_rate": 0.00019936835959557853, "loss": 11.6873, "step": 5159 }, { "epoch": 0.10801306204471238, "grad_norm": 0.20201179385185242, "learning_rate": 0.00019936811352973157, "loss": 11.6768, "step": 5160 }, { "epoch": 0.10803399480867454, "grad_norm": 0.21365760266780853, "learning_rate": 0.00019936786741611638, "loss": 11.6766, "step": 5161 }, { "epoch": 0.1080549275726367, "grad_norm": 0.3692285418510437, "learning_rate": 0.00019936762125473306, "loss": 11.6847, "step": 5162 }, { "epoch": 0.10807586033659884, "grad_norm": 0.19174137711524963, "learning_rate": 0.00019936737504558177, "loss": 11.6844, "step": 5163 }, { "epoch": 0.108096793100561, "grad_norm": 0.20791836082935333, "learning_rate": 0.00019936712878866256, "loss": 11.6966, "step": 5164 }, { "epoch": 0.10811772586452315, "grad_norm": 0.2141919881105423, "learning_rate": 0.00019936688248397557, "loss": 11.668, "step": 5165 }, { "epoch": 0.1081386586284853, "grad_norm": 0.24468514323234558, "learning_rate": 0.00019936663613152097, "loss": 11.6763, "step": 5166 }, { "epoch": 0.10815959139244746, "grad_norm": 0.20221047103405, "learning_rate": 0.00019936638973129878, "loss": 11.6801, "step": 5167 }, { "epoch": 0.10818052415640961, "grad_norm": 0.2621046304702759, "learning_rate": 0.0001993661432833092, "loss": 11.6803, "step": 5168 }, { "epoch": 0.10820145692037177, "grad_norm": 0.27498331665992737, "learning_rate": 0.00019936589678755233, "loss": 11.6888, "step": 5169 }, { "epoch": 0.10822238968433393, "grad_norm": 0.31834226846694946, "learning_rate": 0.00019936565024402829, "loss": 11.6781, "step": 5170 }, { "epoch": 0.10824332244829607, "grad_norm": 0.23970109224319458, "learning_rate": 0.00019936540365273717, "loss": 11.6734, "step": 5171 }, { "epoch": 0.10826425521225823, "grad_norm": 0.2602989077568054, "learning_rate": 0.00019936515701367912, "loss": 11.6629, "step": 5172 }, { "epoch": 0.10828518797622037, "grad_norm": 0.18271280825138092, "learning_rate": 0.00019936491032685426, "loss": 11.6847, "step": 5173 }, { "epoch": 0.10830612074018253, "grad_norm": 0.2268477827310562, "learning_rate": 0.00019936466359226268, "loss": 11.6813, "step": 5174 }, { "epoch": 0.10832705350414469, "grad_norm": 0.27364930510520935, "learning_rate": 0.00019936441680990451, "loss": 11.6946, "step": 5175 }, { "epoch": 0.10834798626810684, "grad_norm": 0.2108142077922821, "learning_rate": 0.0001993641699797799, "loss": 11.6728, "step": 5176 }, { "epoch": 0.108368919032069, "grad_norm": 0.28045231103897095, "learning_rate": 0.0001993639231018889, "loss": 11.6892, "step": 5177 }, { "epoch": 0.10838985179603115, "grad_norm": 0.23475700616836548, "learning_rate": 0.00019936367617623171, "loss": 11.6904, "step": 5178 }, { "epoch": 0.1084107845599933, "grad_norm": 0.1964065283536911, "learning_rate": 0.0001993634292028084, "loss": 11.6692, "step": 5179 }, { "epoch": 0.10843171732395546, "grad_norm": 0.26310229301452637, "learning_rate": 0.00019936318218161908, "loss": 11.6823, "step": 5180 }, { "epoch": 0.10845265008791762, "grad_norm": 0.21519707143306732, "learning_rate": 0.00019936293511266392, "loss": 11.6734, "step": 5181 }, { "epoch": 0.10847358285187976, "grad_norm": 0.23506972193717957, "learning_rate": 0.00019936268799594298, "loss": 11.6726, "step": 5182 }, { "epoch": 0.10849451561584192, "grad_norm": 0.21989291906356812, "learning_rate": 0.0001993624408314564, "loss": 11.6993, "step": 5183 }, { "epoch": 0.10851544837980406, "grad_norm": 0.20844651758670807, "learning_rate": 0.00019936219361920432, "loss": 11.6748, "step": 5184 }, { "epoch": 0.10853638114376622, "grad_norm": 0.19873060286045074, "learning_rate": 0.00019936194635918685, "loss": 11.6881, "step": 5185 }, { "epoch": 0.10855731390772838, "grad_norm": 0.18479296565055847, "learning_rate": 0.00019936169905140406, "loss": 11.6761, "step": 5186 }, { "epoch": 0.10857824667169053, "grad_norm": 0.25024154782295227, "learning_rate": 0.00019936145169585615, "loss": 11.6799, "step": 5187 }, { "epoch": 0.10859917943565268, "grad_norm": 0.23319071531295776, "learning_rate": 0.0001993612042925432, "loss": 11.6669, "step": 5188 }, { "epoch": 0.10862011219961484, "grad_norm": 0.24438558518886566, "learning_rate": 0.0001993609568414653, "loss": 11.6898, "step": 5189 }, { "epoch": 0.10864104496357699, "grad_norm": 0.24133798480033875, "learning_rate": 0.0001993607093426226, "loss": 11.686, "step": 5190 }, { "epoch": 0.10866197772753915, "grad_norm": 0.21579551696777344, "learning_rate": 0.00019936046179601523, "loss": 11.6836, "step": 5191 }, { "epoch": 0.10868291049150129, "grad_norm": 0.24025081098079681, "learning_rate": 0.0001993602142016433, "loss": 11.6867, "step": 5192 }, { "epoch": 0.10870384325546345, "grad_norm": 0.21360166370868683, "learning_rate": 0.0001993599665595069, "loss": 11.69, "step": 5193 }, { "epoch": 0.10872477601942561, "grad_norm": 0.2228320837020874, "learning_rate": 0.0001993597188696062, "loss": 11.6811, "step": 5194 }, { "epoch": 0.10874570878338775, "grad_norm": 0.2511967718601227, "learning_rate": 0.0001993594711319413, "loss": 11.6762, "step": 5195 }, { "epoch": 0.10876664154734991, "grad_norm": 0.20799167454242706, "learning_rate": 0.0001993592233465123, "loss": 11.695, "step": 5196 }, { "epoch": 0.10878757431131207, "grad_norm": 0.20557700097560883, "learning_rate": 0.0001993589755133193, "loss": 11.6829, "step": 5197 }, { "epoch": 0.10880850707527422, "grad_norm": 0.21442757546901703, "learning_rate": 0.00019935872763236247, "loss": 11.6963, "step": 5198 }, { "epoch": 0.10882943983923637, "grad_norm": 0.18665380775928497, "learning_rate": 0.0001993584797036419, "loss": 11.6672, "step": 5199 }, { "epoch": 0.10885037260319852, "grad_norm": 0.2192908376455307, "learning_rate": 0.00019935823172715776, "loss": 11.6706, "step": 5200 }, { "epoch": 0.10887130536716068, "grad_norm": 0.2210923582315445, "learning_rate": 0.00019935798370291009, "loss": 11.6777, "step": 5201 }, { "epoch": 0.10889223813112284, "grad_norm": 0.27637264132499695, "learning_rate": 0.00019935773563089905, "loss": 11.6791, "step": 5202 }, { "epoch": 0.10891317089508498, "grad_norm": 0.356220543384552, "learning_rate": 0.00019935748751112473, "loss": 11.6789, "step": 5203 }, { "epoch": 0.10893410365904714, "grad_norm": 0.22359217703342438, "learning_rate": 0.0001993572393435873, "loss": 11.6789, "step": 5204 }, { "epoch": 0.1089550364230093, "grad_norm": 0.20246869325637817, "learning_rate": 0.00019935699112828687, "loss": 11.6856, "step": 5205 }, { "epoch": 0.10897596918697144, "grad_norm": 0.22368547320365906, "learning_rate": 0.00019935674286522353, "loss": 11.6909, "step": 5206 }, { "epoch": 0.1089969019509336, "grad_norm": 0.2431783825159073, "learning_rate": 0.0001993564945543974, "loss": 11.6782, "step": 5207 }, { "epoch": 0.10901783471489576, "grad_norm": 0.23659752309322357, "learning_rate": 0.00019935624619580862, "loss": 11.6797, "step": 5208 }, { "epoch": 0.1090387674788579, "grad_norm": 0.3117172122001648, "learning_rate": 0.00019935599778945728, "loss": 11.6776, "step": 5209 }, { "epoch": 0.10905970024282007, "grad_norm": 0.24672789871692657, "learning_rate": 0.00019935574933534355, "loss": 11.676, "step": 5210 }, { "epoch": 0.10908063300678221, "grad_norm": 0.175988107919693, "learning_rate": 0.00019935550083346752, "loss": 11.6949, "step": 5211 }, { "epoch": 0.10910156577074437, "grad_norm": 0.23661281168460846, "learning_rate": 0.0001993552522838293, "loss": 11.6685, "step": 5212 }, { "epoch": 0.10912249853470653, "grad_norm": 0.20167918503284454, "learning_rate": 0.00019935500368642903, "loss": 11.6762, "step": 5213 }, { "epoch": 0.10914343129866867, "grad_norm": 0.2361946403980255, "learning_rate": 0.00019935475504126682, "loss": 11.6825, "step": 5214 }, { "epoch": 0.10916436406263083, "grad_norm": 0.2736468017101288, "learning_rate": 0.00019935450634834276, "loss": 11.6821, "step": 5215 }, { "epoch": 0.10918529682659299, "grad_norm": 0.23377536237239838, "learning_rate": 0.00019935425760765702, "loss": 11.6811, "step": 5216 }, { "epoch": 0.10920622959055513, "grad_norm": 0.27655506134033203, "learning_rate": 0.00019935400881920972, "loss": 11.6991, "step": 5217 }, { "epoch": 0.1092271623545173, "grad_norm": 0.2754620611667633, "learning_rate": 0.00019935375998300092, "loss": 11.6974, "step": 5218 }, { "epoch": 0.10924809511847944, "grad_norm": 0.2502846121788025, "learning_rate": 0.0001993535110990308, "loss": 11.6815, "step": 5219 }, { "epoch": 0.1092690278824416, "grad_norm": 0.25876349210739136, "learning_rate": 0.0001993532621672994, "loss": 11.6816, "step": 5220 }, { "epoch": 0.10928996064640376, "grad_norm": 0.1906203180551529, "learning_rate": 0.00019935301318780698, "loss": 11.6898, "step": 5221 }, { "epoch": 0.1093108934103659, "grad_norm": 0.23004940152168274, "learning_rate": 0.00019935276416055352, "loss": 11.6818, "step": 5222 }, { "epoch": 0.10933182617432806, "grad_norm": 0.2654985785484314, "learning_rate": 0.00019935251508553921, "loss": 11.6849, "step": 5223 }, { "epoch": 0.10935275893829022, "grad_norm": 0.24384884536266327, "learning_rate": 0.00019935226596276415, "loss": 11.6684, "step": 5224 }, { "epoch": 0.10937369170225236, "grad_norm": 0.20082354545593262, "learning_rate": 0.00019935201679222847, "loss": 11.6753, "step": 5225 }, { "epoch": 0.10939462446621452, "grad_norm": 0.29195499420166016, "learning_rate": 0.0001993517675739323, "loss": 11.6839, "step": 5226 }, { "epoch": 0.10941555723017667, "grad_norm": 0.207699716091156, "learning_rate": 0.0001993515183078757, "loss": 11.6763, "step": 5227 }, { "epoch": 0.10943648999413882, "grad_norm": 0.2387475073337555, "learning_rate": 0.00019935126899405886, "loss": 11.6749, "step": 5228 }, { "epoch": 0.10945742275810098, "grad_norm": 0.2691906690597534, "learning_rate": 0.00019935101963248187, "loss": 11.6817, "step": 5229 }, { "epoch": 0.10947835552206313, "grad_norm": 0.23643498122692108, "learning_rate": 0.0001993507702231449, "loss": 11.6743, "step": 5230 }, { "epoch": 0.10949928828602529, "grad_norm": 0.3176893889904022, "learning_rate": 0.00019935052076604798, "loss": 11.6896, "step": 5231 }, { "epoch": 0.10952022104998745, "grad_norm": 0.32035791873931885, "learning_rate": 0.00019935027126119126, "loss": 11.6742, "step": 5232 }, { "epoch": 0.10954115381394959, "grad_norm": 0.26276895403862, "learning_rate": 0.0001993500217085749, "loss": 11.6907, "step": 5233 }, { "epoch": 0.10956208657791175, "grad_norm": 0.23588508367538452, "learning_rate": 0.000199349772108199, "loss": 11.6813, "step": 5234 }, { "epoch": 0.10958301934187391, "grad_norm": 0.21840587258338928, "learning_rate": 0.00019934952246006363, "loss": 11.6825, "step": 5235 }, { "epoch": 0.10960395210583605, "grad_norm": 0.18683037161827087, "learning_rate": 0.000199349272764169, "loss": 11.6826, "step": 5236 }, { "epoch": 0.10962488486979821, "grad_norm": 0.3145297169685364, "learning_rate": 0.00019934902302051516, "loss": 11.704, "step": 5237 }, { "epoch": 0.10964581763376036, "grad_norm": 0.18620461225509644, "learning_rate": 0.00019934877322910226, "loss": 11.6849, "step": 5238 }, { "epoch": 0.10966675039772251, "grad_norm": 0.24904419481754303, "learning_rate": 0.00019934852338993042, "loss": 11.6834, "step": 5239 }, { "epoch": 0.10968768316168467, "grad_norm": 0.2023281455039978, "learning_rate": 0.00019934827350299975, "loss": 11.6855, "step": 5240 }, { "epoch": 0.10970861592564682, "grad_norm": 0.2256787270307541, "learning_rate": 0.00019934802356831038, "loss": 11.6593, "step": 5241 }, { "epoch": 0.10972954868960898, "grad_norm": 0.21379388868808746, "learning_rate": 0.0001993477735858624, "loss": 11.688, "step": 5242 }, { "epoch": 0.10975048145357114, "grad_norm": 0.21351610124111176, "learning_rate": 0.00019934752355565597, "loss": 11.6958, "step": 5243 }, { "epoch": 0.10977141421753328, "grad_norm": 0.2934083342552185, "learning_rate": 0.0001993472734776912, "loss": 11.6858, "step": 5244 }, { "epoch": 0.10979234698149544, "grad_norm": 0.2946750521659851, "learning_rate": 0.00019934702335196817, "loss": 11.6885, "step": 5245 }, { "epoch": 0.10981327974545758, "grad_norm": 0.20615972578525543, "learning_rate": 0.0001993467731784871, "loss": 11.6829, "step": 5246 }, { "epoch": 0.10983421250941974, "grad_norm": 0.1966903656721115, "learning_rate": 0.000199346522957248, "loss": 11.6959, "step": 5247 }, { "epoch": 0.1098551452733819, "grad_norm": 0.2375546395778656, "learning_rate": 0.00019934627268825103, "loss": 11.6929, "step": 5248 }, { "epoch": 0.10987607803734405, "grad_norm": 0.19970634579658508, "learning_rate": 0.00019934602237149632, "loss": 11.6754, "step": 5249 }, { "epoch": 0.1098970108013062, "grad_norm": 0.25405052304267883, "learning_rate": 0.000199345772006984, "loss": 11.693, "step": 5250 }, { "epoch": 0.10991794356526836, "grad_norm": 0.24274910986423492, "learning_rate": 0.0001993455215947142, "loss": 11.6818, "step": 5251 }, { "epoch": 0.10993887632923051, "grad_norm": 0.18152683973312378, "learning_rate": 0.00019934527113468696, "loss": 11.6725, "step": 5252 }, { "epoch": 0.10995980909319267, "grad_norm": 0.18928812444210052, "learning_rate": 0.0001993450206269025, "loss": 11.6713, "step": 5253 }, { "epoch": 0.10998074185715481, "grad_norm": 0.23957012593746185, "learning_rate": 0.00019934477007136088, "loss": 11.6912, "step": 5254 }, { "epoch": 0.11000167462111697, "grad_norm": 0.19383859634399414, "learning_rate": 0.00019934451946806224, "loss": 11.6808, "step": 5255 }, { "epoch": 0.11002260738507913, "grad_norm": 0.26161816716194153, "learning_rate": 0.00019934426881700672, "loss": 11.6861, "step": 5256 }, { "epoch": 0.11004354014904127, "grad_norm": 0.25586065649986267, "learning_rate": 0.0001993440181181944, "loss": 11.6669, "step": 5257 }, { "epoch": 0.11006447291300343, "grad_norm": 0.24110908806324005, "learning_rate": 0.0001993437673716254, "loss": 11.697, "step": 5258 }, { "epoch": 0.11008540567696559, "grad_norm": 0.25237134099006653, "learning_rate": 0.0001993435165772999, "loss": 11.6752, "step": 5259 }, { "epoch": 0.11010633844092774, "grad_norm": 0.25130409002304077, "learning_rate": 0.00019934326573521795, "loss": 11.6864, "step": 5260 }, { "epoch": 0.1101272712048899, "grad_norm": 0.2706129252910614, "learning_rate": 0.00019934301484537972, "loss": 11.6807, "step": 5261 }, { "epoch": 0.11014820396885204, "grad_norm": 0.3206624686717987, "learning_rate": 0.0001993427639077853, "loss": 11.6832, "step": 5262 }, { "epoch": 0.1101691367328142, "grad_norm": 0.17711983621120453, "learning_rate": 0.0001993425129224348, "loss": 11.6884, "step": 5263 }, { "epoch": 0.11019006949677636, "grad_norm": 0.23845359683036804, "learning_rate": 0.0001993422618893284, "loss": 11.6717, "step": 5264 }, { "epoch": 0.1102110022607385, "grad_norm": 0.21010074019432068, "learning_rate": 0.00019934201080846619, "loss": 11.6664, "step": 5265 }, { "epoch": 0.11023193502470066, "grad_norm": 0.2385977953672409, "learning_rate": 0.00019934175967984829, "loss": 11.6864, "step": 5266 }, { "epoch": 0.11025286778866282, "grad_norm": 0.26050063967704773, "learning_rate": 0.00019934150850347478, "loss": 11.7004, "step": 5267 }, { "epoch": 0.11027380055262496, "grad_norm": 0.183120995759964, "learning_rate": 0.00019934125727934582, "loss": 11.6786, "step": 5268 }, { "epoch": 0.11029473331658712, "grad_norm": 0.2101728469133377, "learning_rate": 0.00019934100600746155, "loss": 11.682, "step": 5269 }, { "epoch": 0.11031566608054928, "grad_norm": 0.34649527072906494, "learning_rate": 0.00019934075468782205, "loss": 11.6993, "step": 5270 }, { "epoch": 0.11033659884451143, "grad_norm": 0.23973312973976135, "learning_rate": 0.00019934050332042746, "loss": 11.6886, "step": 5271 }, { "epoch": 0.11035753160847359, "grad_norm": 0.2893867492675781, "learning_rate": 0.00019934025190527789, "loss": 11.6718, "step": 5272 }, { "epoch": 0.11037846437243573, "grad_norm": 0.2161138951778412, "learning_rate": 0.00019934000044237349, "loss": 11.682, "step": 5273 }, { "epoch": 0.11039939713639789, "grad_norm": 0.2276775985956192, "learning_rate": 0.00019933974893171434, "loss": 11.6754, "step": 5274 }, { "epoch": 0.11042032990036005, "grad_norm": 0.2328047752380371, "learning_rate": 0.0001993394973733006, "loss": 11.6744, "step": 5275 }, { "epoch": 0.11044126266432219, "grad_norm": 0.22380462288856506, "learning_rate": 0.00019933924576713238, "loss": 11.6771, "step": 5276 }, { "epoch": 0.11046219542828435, "grad_norm": 0.2180800437927246, "learning_rate": 0.00019933899411320977, "loss": 11.6696, "step": 5277 }, { "epoch": 0.11048312819224651, "grad_norm": 0.22073471546173096, "learning_rate": 0.00019933874241153293, "loss": 11.6832, "step": 5278 }, { "epoch": 0.11050406095620866, "grad_norm": 0.22748062014579773, "learning_rate": 0.00019933849066210195, "loss": 11.691, "step": 5279 }, { "epoch": 0.11052499372017081, "grad_norm": 0.23020081222057343, "learning_rate": 0.000199338238864917, "loss": 11.687, "step": 5280 }, { "epoch": 0.11054592648413296, "grad_norm": 0.2817944586277008, "learning_rate": 0.00019933798701997812, "loss": 11.6909, "step": 5281 }, { "epoch": 0.11056685924809512, "grad_norm": 0.22256818413734436, "learning_rate": 0.0001993377351272855, "loss": 11.6683, "step": 5282 }, { "epoch": 0.11058779201205728, "grad_norm": 0.23454418778419495, "learning_rate": 0.00019933748318683926, "loss": 11.6926, "step": 5283 }, { "epoch": 0.11060872477601942, "grad_norm": 0.20085670053958893, "learning_rate": 0.0001993372311986395, "loss": 11.683, "step": 5284 }, { "epoch": 0.11062965753998158, "grad_norm": 0.29448580741882324, "learning_rate": 0.00019933697916268633, "loss": 11.6809, "step": 5285 }, { "epoch": 0.11065059030394374, "grad_norm": 0.23642095923423767, "learning_rate": 0.00019933672707897989, "loss": 11.672, "step": 5286 }, { "epoch": 0.11067152306790588, "grad_norm": 0.22382088005542755, "learning_rate": 0.00019933647494752027, "loss": 11.6759, "step": 5287 }, { "epoch": 0.11069245583186804, "grad_norm": 0.21155689656734467, "learning_rate": 0.00019933622276830764, "loss": 11.6786, "step": 5288 }, { "epoch": 0.11071338859583019, "grad_norm": 0.20744632184505463, "learning_rate": 0.00019933597054134207, "loss": 11.6788, "step": 5289 }, { "epoch": 0.11073432135979235, "grad_norm": 0.3470866084098816, "learning_rate": 0.00019933571826662375, "loss": 11.6788, "step": 5290 }, { "epoch": 0.1107552541237545, "grad_norm": 0.2439790517091751, "learning_rate": 0.00019933546594415273, "loss": 11.67, "step": 5291 }, { "epoch": 0.11077618688771665, "grad_norm": 0.2704711854457855, "learning_rate": 0.00019933521357392916, "loss": 11.6881, "step": 5292 }, { "epoch": 0.11079711965167881, "grad_norm": 0.2767643928527832, "learning_rate": 0.00019933496115595318, "loss": 11.6876, "step": 5293 }, { "epoch": 0.11081805241564097, "grad_norm": 0.35721921920776367, "learning_rate": 0.00019933470869022487, "loss": 11.7098, "step": 5294 }, { "epoch": 0.11083898517960311, "grad_norm": 0.25520068407058716, "learning_rate": 0.00019933445617674437, "loss": 11.6818, "step": 5295 }, { "epoch": 0.11085991794356527, "grad_norm": 0.22493976354599, "learning_rate": 0.00019933420361551183, "loss": 11.6851, "step": 5296 }, { "epoch": 0.11088085070752743, "grad_norm": 0.20708969235420227, "learning_rate": 0.00019933395100652735, "loss": 11.6818, "step": 5297 }, { "epoch": 0.11090178347148957, "grad_norm": 0.23468390107154846, "learning_rate": 0.00019933369834979105, "loss": 11.6708, "step": 5298 }, { "epoch": 0.11092271623545173, "grad_norm": 0.2564840316772461, "learning_rate": 0.00019933344564530305, "loss": 11.6886, "step": 5299 }, { "epoch": 0.11094364899941388, "grad_norm": 0.2241990566253662, "learning_rate": 0.00019933319289306344, "loss": 11.683, "step": 5300 }, { "epoch": 0.11096458176337604, "grad_norm": 0.2626439034938812, "learning_rate": 0.0001993329400930724, "loss": 11.6964, "step": 5301 }, { "epoch": 0.1109855145273382, "grad_norm": 0.20721693336963654, "learning_rate": 0.00019933268724533004, "loss": 11.6909, "step": 5302 }, { "epoch": 0.11100644729130034, "grad_norm": 0.324592262506485, "learning_rate": 0.00019933243434983643, "loss": 11.6879, "step": 5303 }, { "epoch": 0.1110273800552625, "grad_norm": 0.27106571197509766, "learning_rate": 0.00019933218140659174, "loss": 11.6963, "step": 5304 }, { "epoch": 0.11104831281922466, "grad_norm": 0.2170044332742691, "learning_rate": 0.0001993319284155961, "loss": 11.6856, "step": 5305 }, { "epoch": 0.1110692455831868, "grad_norm": 0.23044046759605408, "learning_rate": 0.0001993316753768496, "loss": 11.6779, "step": 5306 }, { "epoch": 0.11109017834714896, "grad_norm": 0.25636735558509827, "learning_rate": 0.00019933142229035236, "loss": 11.6888, "step": 5307 }, { "epoch": 0.1111111111111111, "grad_norm": 0.21371449530124664, "learning_rate": 0.00019933116915610454, "loss": 11.6709, "step": 5308 }, { "epoch": 0.11113204387507326, "grad_norm": 0.22429338097572327, "learning_rate": 0.00019933091597410622, "loss": 11.6906, "step": 5309 }, { "epoch": 0.11115297663903542, "grad_norm": 0.2378222495317459, "learning_rate": 0.00019933066274435755, "loss": 11.6869, "step": 5310 }, { "epoch": 0.11117390940299757, "grad_norm": 0.21004201471805573, "learning_rate": 0.00019933040946685862, "loss": 11.7005, "step": 5311 }, { "epoch": 0.11119484216695973, "grad_norm": 0.21763233840465546, "learning_rate": 0.00019933015614160957, "loss": 11.6752, "step": 5312 }, { "epoch": 0.11121577493092188, "grad_norm": 0.24822209775447845, "learning_rate": 0.00019932990276861053, "loss": 11.7057, "step": 5313 }, { "epoch": 0.11123670769488403, "grad_norm": 0.1929541677236557, "learning_rate": 0.00019932964934786163, "loss": 11.6876, "step": 5314 }, { "epoch": 0.11125764045884619, "grad_norm": 0.20054645836353302, "learning_rate": 0.00019932939587936297, "loss": 11.6789, "step": 5315 }, { "epoch": 0.11127857322280833, "grad_norm": 0.2196919173002243, "learning_rate": 0.0001993291423631147, "loss": 11.6952, "step": 5316 }, { "epoch": 0.11129950598677049, "grad_norm": 0.21544215083122253, "learning_rate": 0.00019932888879911688, "loss": 11.6866, "step": 5317 }, { "epoch": 0.11132043875073265, "grad_norm": 0.17770908772945404, "learning_rate": 0.0001993286351873697, "loss": 11.6753, "step": 5318 }, { "epoch": 0.1113413715146948, "grad_norm": 0.23254482448101044, "learning_rate": 0.0001993283815278732, "loss": 11.6816, "step": 5319 }, { "epoch": 0.11136230427865695, "grad_norm": 0.2334020584821701, "learning_rate": 0.00019932812782062762, "loss": 11.6849, "step": 5320 }, { "epoch": 0.11138323704261911, "grad_norm": 0.22564010322093964, "learning_rate": 0.000199327874065633, "loss": 11.6912, "step": 5321 }, { "epoch": 0.11140416980658126, "grad_norm": 0.2513313889503479, "learning_rate": 0.00019932762026288948, "loss": 11.7006, "step": 5322 }, { "epoch": 0.11142510257054342, "grad_norm": 0.2339639961719513, "learning_rate": 0.00019932736641239718, "loss": 11.6883, "step": 5323 }, { "epoch": 0.11144603533450557, "grad_norm": 0.2871369421482086, "learning_rate": 0.0001993271125141562, "loss": 11.6753, "step": 5324 }, { "epoch": 0.11146696809846772, "grad_norm": 0.2135179042816162, "learning_rate": 0.00019932685856816673, "loss": 11.6847, "step": 5325 }, { "epoch": 0.11148790086242988, "grad_norm": 0.2506956160068512, "learning_rate": 0.00019932660457442885, "loss": 11.6792, "step": 5326 }, { "epoch": 0.11150883362639202, "grad_norm": 0.21275018155574799, "learning_rate": 0.00019932635053294267, "loss": 11.6875, "step": 5327 }, { "epoch": 0.11152976639035418, "grad_norm": 0.22822047770023346, "learning_rate": 0.0001993260964437083, "loss": 11.6759, "step": 5328 }, { "epoch": 0.11155069915431634, "grad_norm": 0.2480841428041458, "learning_rate": 0.00019932584230672592, "loss": 11.6798, "step": 5329 }, { "epoch": 0.11157163191827849, "grad_norm": 0.2617553472518921, "learning_rate": 0.00019932558812199558, "loss": 11.6862, "step": 5330 }, { "epoch": 0.11159256468224064, "grad_norm": 0.2030051201581955, "learning_rate": 0.00019932533388951744, "loss": 11.6807, "step": 5331 }, { "epoch": 0.1116134974462028, "grad_norm": 0.20248055458068848, "learning_rate": 0.00019932507960929163, "loss": 11.6801, "step": 5332 }, { "epoch": 0.11163443021016495, "grad_norm": 0.22949622571468353, "learning_rate": 0.0001993248252813183, "loss": 11.6782, "step": 5333 }, { "epoch": 0.1116553629741271, "grad_norm": 0.26692110300064087, "learning_rate": 0.00019932457090559747, "loss": 11.6745, "step": 5334 }, { "epoch": 0.11167629573808925, "grad_norm": 0.22295823693275452, "learning_rate": 0.00019932431648212937, "loss": 11.682, "step": 5335 }, { "epoch": 0.11169722850205141, "grad_norm": 0.22246773540973663, "learning_rate": 0.00019932406201091408, "loss": 11.6915, "step": 5336 }, { "epoch": 0.11171816126601357, "grad_norm": 0.2390371859073639, "learning_rate": 0.00019932380749195172, "loss": 11.6834, "step": 5337 }, { "epoch": 0.11173909402997571, "grad_norm": 0.22774401307106018, "learning_rate": 0.0001993235529252424, "loss": 11.6821, "step": 5338 }, { "epoch": 0.11176002679393787, "grad_norm": 0.21887177228927612, "learning_rate": 0.00019932329831078629, "loss": 11.6751, "step": 5339 }, { "epoch": 0.11178095955790003, "grad_norm": 0.24018537998199463, "learning_rate": 0.00019932304364858347, "loss": 11.6824, "step": 5340 }, { "epoch": 0.11180189232186218, "grad_norm": 0.20748388767242432, "learning_rate": 0.00019932278893863405, "loss": 11.6791, "step": 5341 }, { "epoch": 0.11182282508582433, "grad_norm": 0.20112498104572296, "learning_rate": 0.0001993225341809382, "loss": 11.6769, "step": 5342 }, { "epoch": 0.11184375784978648, "grad_norm": 0.22554123401641846, "learning_rate": 0.00019932227937549598, "loss": 11.6737, "step": 5343 }, { "epoch": 0.11186469061374864, "grad_norm": 0.26010724902153015, "learning_rate": 0.0001993220245223076, "loss": 11.6901, "step": 5344 }, { "epoch": 0.1118856233777108, "grad_norm": 0.30005332827568054, "learning_rate": 0.0001993217696213731, "loss": 11.6878, "step": 5345 }, { "epoch": 0.11190655614167294, "grad_norm": 0.19679012894630432, "learning_rate": 0.00019932151467269265, "loss": 11.6788, "step": 5346 }, { "epoch": 0.1119274889056351, "grad_norm": 0.22013051807880402, "learning_rate": 0.00019932125967626635, "loss": 11.6935, "step": 5347 }, { "epoch": 0.11194842166959726, "grad_norm": 0.19986240565776825, "learning_rate": 0.00019932100463209432, "loss": 11.6818, "step": 5348 }, { "epoch": 0.1119693544335594, "grad_norm": 0.22555029392242432, "learning_rate": 0.00019932074954017673, "loss": 11.6731, "step": 5349 }, { "epoch": 0.11199028719752156, "grad_norm": 0.23278063535690308, "learning_rate": 0.00019932049440051362, "loss": 11.6722, "step": 5350 }, { "epoch": 0.11201121996148372, "grad_norm": 0.29505547881126404, "learning_rate": 0.0001993202392131052, "loss": 11.6748, "step": 5351 }, { "epoch": 0.11203215272544587, "grad_norm": 0.2282116562128067, "learning_rate": 0.00019931998397795152, "loss": 11.6801, "step": 5352 }, { "epoch": 0.11205308548940802, "grad_norm": 0.20810768008232117, "learning_rate": 0.00019931972869505272, "loss": 11.6851, "step": 5353 }, { "epoch": 0.11207401825337017, "grad_norm": 0.20940913259983063, "learning_rate": 0.00019931947336440894, "loss": 11.6881, "step": 5354 }, { "epoch": 0.11209495101733233, "grad_norm": 0.20716075599193573, "learning_rate": 0.00019931921798602034, "loss": 11.6776, "step": 5355 }, { "epoch": 0.11211588378129449, "grad_norm": 0.22493061423301697, "learning_rate": 0.00019931896255988697, "loss": 11.6869, "step": 5356 }, { "epoch": 0.11213681654525663, "grad_norm": 0.2331576645374298, "learning_rate": 0.000199318707086009, "loss": 11.6746, "step": 5357 }, { "epoch": 0.11215774930921879, "grad_norm": 0.2132083624601364, "learning_rate": 0.00019931845156438653, "loss": 11.6854, "step": 5358 }, { "epoch": 0.11217868207318095, "grad_norm": 0.24310800433158875, "learning_rate": 0.00019931819599501967, "loss": 11.6797, "step": 5359 }, { "epoch": 0.1121996148371431, "grad_norm": 0.19772174954414368, "learning_rate": 0.0001993179403779086, "loss": 11.6756, "step": 5360 }, { "epoch": 0.11222054760110525, "grad_norm": 0.21776960790157318, "learning_rate": 0.0001993176847130534, "loss": 11.6982, "step": 5361 }, { "epoch": 0.1122414803650674, "grad_norm": 0.22912508249282837, "learning_rate": 0.00019931742900045415, "loss": 11.6831, "step": 5362 }, { "epoch": 0.11226241312902956, "grad_norm": 0.24014794826507568, "learning_rate": 0.00019931717324011107, "loss": 11.6839, "step": 5363 }, { "epoch": 0.11228334589299171, "grad_norm": 0.22544479370117188, "learning_rate": 0.00019931691743202423, "loss": 11.6775, "step": 5364 }, { "epoch": 0.11230427865695386, "grad_norm": 0.2023395448923111, "learning_rate": 0.00019931666157619373, "loss": 11.6852, "step": 5365 }, { "epoch": 0.11232521142091602, "grad_norm": 0.18874680995941162, "learning_rate": 0.00019931640567261974, "loss": 11.6809, "step": 5366 }, { "epoch": 0.11234614418487818, "grad_norm": 0.24994461238384247, "learning_rate": 0.00019931614972130236, "loss": 11.6826, "step": 5367 }, { "epoch": 0.11236707694884032, "grad_norm": 0.22561782598495483, "learning_rate": 0.00019931589372224173, "loss": 11.6781, "step": 5368 }, { "epoch": 0.11238800971280248, "grad_norm": 0.20168188214302063, "learning_rate": 0.00019931563767543794, "loss": 11.6882, "step": 5369 }, { "epoch": 0.11240894247676463, "grad_norm": 0.21706262230873108, "learning_rate": 0.00019931538158089116, "loss": 11.6814, "step": 5370 }, { "epoch": 0.11242987524072678, "grad_norm": 0.17379321157932281, "learning_rate": 0.00019931512543860147, "loss": 11.685, "step": 5371 }, { "epoch": 0.11245080800468894, "grad_norm": 0.2910440266132355, "learning_rate": 0.00019931486924856898, "loss": 11.6704, "step": 5372 }, { "epoch": 0.11247174076865109, "grad_norm": 0.19832846522331238, "learning_rate": 0.00019931461301079387, "loss": 11.6837, "step": 5373 }, { "epoch": 0.11249267353261325, "grad_norm": 0.2076655924320221, "learning_rate": 0.00019931435672527624, "loss": 11.6696, "step": 5374 }, { "epoch": 0.1125136062965754, "grad_norm": 0.1828392595052719, "learning_rate": 0.0001993141003920162, "loss": 11.688, "step": 5375 }, { "epoch": 0.11253453906053755, "grad_norm": 0.25623828172683716, "learning_rate": 0.00019931384401101386, "loss": 11.6725, "step": 5376 }, { "epoch": 0.11255547182449971, "grad_norm": 0.22412927448749542, "learning_rate": 0.00019931358758226941, "loss": 11.6761, "step": 5377 }, { "epoch": 0.11257640458846185, "grad_norm": 0.23549190163612366, "learning_rate": 0.0001993133311057829, "loss": 11.6852, "step": 5378 }, { "epoch": 0.11259733735242401, "grad_norm": 0.24819669127464294, "learning_rate": 0.00019931307458155448, "loss": 11.676, "step": 5379 }, { "epoch": 0.11261827011638617, "grad_norm": 0.24470455944538116, "learning_rate": 0.0001993128180095843, "loss": 11.6656, "step": 5380 }, { "epoch": 0.11263920288034832, "grad_norm": 0.293641597032547, "learning_rate": 0.00019931256138987243, "loss": 11.686, "step": 5381 }, { "epoch": 0.11266013564431047, "grad_norm": 0.18543393909931183, "learning_rate": 0.00019931230472241903, "loss": 11.6922, "step": 5382 }, { "epoch": 0.11268106840827263, "grad_norm": 0.30255505442619324, "learning_rate": 0.00019931204800722422, "loss": 11.6706, "step": 5383 }, { "epoch": 0.11270200117223478, "grad_norm": 0.23472431302070618, "learning_rate": 0.0001993117912442881, "loss": 11.6701, "step": 5384 }, { "epoch": 0.11272293393619694, "grad_norm": 0.23972412943840027, "learning_rate": 0.00019931153443361087, "loss": 11.6978, "step": 5385 }, { "epoch": 0.1127438667001591, "grad_norm": 0.24198342859745026, "learning_rate": 0.00019931127757519254, "loss": 11.6901, "step": 5386 }, { "epoch": 0.11276479946412124, "grad_norm": 0.2351028025150299, "learning_rate": 0.0001993110206690333, "loss": 11.6813, "step": 5387 }, { "epoch": 0.1127857322280834, "grad_norm": 0.23425248265266418, "learning_rate": 0.00019931076371513328, "loss": 11.6913, "step": 5388 }, { "epoch": 0.11280666499204554, "grad_norm": 0.21724024415016174, "learning_rate": 0.00019931050671349255, "loss": 11.6573, "step": 5389 }, { "epoch": 0.1128275977560077, "grad_norm": 0.24076619744300842, "learning_rate": 0.00019931024966411126, "loss": 11.6676, "step": 5390 }, { "epoch": 0.11284853051996986, "grad_norm": 0.2480914145708084, "learning_rate": 0.0001993099925669896, "loss": 11.6909, "step": 5391 }, { "epoch": 0.112869463283932, "grad_norm": 0.241373673081398, "learning_rate": 0.0001993097354221276, "loss": 11.6811, "step": 5392 }, { "epoch": 0.11289039604789416, "grad_norm": 0.1977616250514984, "learning_rate": 0.00019930947822952542, "loss": 11.6782, "step": 5393 }, { "epoch": 0.11291132881185632, "grad_norm": 0.23467391729354858, "learning_rate": 0.0001993092209891832, "loss": 11.6791, "step": 5394 }, { "epoch": 0.11293226157581847, "grad_norm": 0.24718070030212402, "learning_rate": 0.00019930896370110103, "loss": 11.6924, "step": 5395 }, { "epoch": 0.11295319433978063, "grad_norm": 0.20724275708198547, "learning_rate": 0.00019930870636527908, "loss": 11.696, "step": 5396 }, { "epoch": 0.11297412710374277, "grad_norm": 0.2776947021484375, "learning_rate": 0.0001993084489817174, "loss": 11.6796, "step": 5397 }, { "epoch": 0.11299505986770493, "grad_norm": 0.2609831392765045, "learning_rate": 0.0001993081915504162, "loss": 11.6792, "step": 5398 }, { "epoch": 0.11301599263166709, "grad_norm": 0.22743132710456848, "learning_rate": 0.00019930793407137553, "loss": 11.6779, "step": 5399 }, { "epoch": 0.11303692539562923, "grad_norm": 0.21984639763832092, "learning_rate": 0.00019930767654459557, "loss": 11.6839, "step": 5400 }, { "epoch": 0.11305785815959139, "grad_norm": 0.20655032992362976, "learning_rate": 0.0001993074189700764, "loss": 11.6841, "step": 5401 }, { "epoch": 0.11307879092355355, "grad_norm": 0.20643186569213867, "learning_rate": 0.00019930716134781818, "loss": 11.6959, "step": 5402 }, { "epoch": 0.1130997236875157, "grad_norm": 0.2151416838169098, "learning_rate": 0.000199306903677821, "loss": 11.6762, "step": 5403 }, { "epoch": 0.11312065645147786, "grad_norm": 0.23202046751976013, "learning_rate": 0.00019930664596008502, "loss": 11.6648, "step": 5404 }, { "epoch": 0.11314158921544, "grad_norm": 0.2655690014362335, "learning_rate": 0.00019930638819461033, "loss": 11.6719, "step": 5405 }, { "epoch": 0.11316252197940216, "grad_norm": 0.20409804582595825, "learning_rate": 0.0001993061303813971, "loss": 11.6763, "step": 5406 }, { "epoch": 0.11318345474336432, "grad_norm": 0.21123917400836945, "learning_rate": 0.0001993058725204454, "loss": 11.6815, "step": 5407 }, { "epoch": 0.11320438750732646, "grad_norm": 0.24448958039283752, "learning_rate": 0.00019930561461175536, "loss": 11.6839, "step": 5408 }, { "epoch": 0.11322532027128862, "grad_norm": 0.21060003340244293, "learning_rate": 0.00019930535665532714, "loss": 11.7, "step": 5409 }, { "epoch": 0.11324625303525078, "grad_norm": 0.2313947230577469, "learning_rate": 0.00019930509865116085, "loss": 11.6847, "step": 5410 }, { "epoch": 0.11326718579921292, "grad_norm": 0.1983145773410797, "learning_rate": 0.00019930484059925657, "loss": 11.6798, "step": 5411 }, { "epoch": 0.11328811856317508, "grad_norm": 0.2658294141292572, "learning_rate": 0.00019930458249961452, "loss": 11.6791, "step": 5412 }, { "epoch": 0.11330905132713724, "grad_norm": 0.23253020644187927, "learning_rate": 0.00019930432435223474, "loss": 11.6566, "step": 5413 }, { "epoch": 0.11332998409109939, "grad_norm": 0.24011175334453583, "learning_rate": 0.00019930406615711738, "loss": 11.6776, "step": 5414 }, { "epoch": 0.11335091685506155, "grad_norm": 0.22041569650173187, "learning_rate": 0.00019930380791426258, "loss": 11.68, "step": 5415 }, { "epoch": 0.11337184961902369, "grad_norm": 0.24825610220432281, "learning_rate": 0.00019930354962367043, "loss": 11.6889, "step": 5416 }, { "epoch": 0.11339278238298585, "grad_norm": 0.22807952761650085, "learning_rate": 0.00019930329128534108, "loss": 11.69, "step": 5417 }, { "epoch": 0.11341371514694801, "grad_norm": 0.21168263256549835, "learning_rate": 0.00019930303289927465, "loss": 11.684, "step": 5418 }, { "epoch": 0.11343464791091015, "grad_norm": 0.21522215008735657, "learning_rate": 0.00019930277446547124, "loss": 11.677, "step": 5419 }, { "epoch": 0.11345558067487231, "grad_norm": 0.2597435414791107, "learning_rate": 0.00019930251598393103, "loss": 11.6781, "step": 5420 }, { "epoch": 0.11347651343883447, "grad_norm": 0.18927791714668274, "learning_rate": 0.00019930225745465408, "loss": 11.669, "step": 5421 }, { "epoch": 0.11349744620279661, "grad_norm": 0.27110007405281067, "learning_rate": 0.00019930199887764058, "loss": 11.6836, "step": 5422 }, { "epoch": 0.11351837896675877, "grad_norm": 0.20577135682106018, "learning_rate": 0.00019930174025289062, "loss": 11.6854, "step": 5423 }, { "epoch": 0.11353931173072092, "grad_norm": 0.23818710446357727, "learning_rate": 0.00019930148158040428, "loss": 11.6751, "step": 5424 }, { "epoch": 0.11356024449468308, "grad_norm": 0.2403985857963562, "learning_rate": 0.0001993012228601818, "loss": 11.6778, "step": 5425 }, { "epoch": 0.11358117725864524, "grad_norm": 0.24406076967716217, "learning_rate": 0.00019930096409222318, "loss": 11.6855, "step": 5426 }, { "epoch": 0.11360211002260738, "grad_norm": 0.20743343234062195, "learning_rate": 0.00019930070527652855, "loss": 11.6892, "step": 5427 }, { "epoch": 0.11362304278656954, "grad_norm": 0.24112050235271454, "learning_rate": 0.00019930044641309816, "loss": 11.6908, "step": 5428 }, { "epoch": 0.1136439755505317, "grad_norm": 0.2121131420135498, "learning_rate": 0.00019930018750193202, "loss": 11.6632, "step": 5429 }, { "epoch": 0.11366490831449384, "grad_norm": 0.24295392632484436, "learning_rate": 0.0001992999285430303, "loss": 11.6809, "step": 5430 }, { "epoch": 0.113685841078456, "grad_norm": 0.29915326833724976, "learning_rate": 0.00019929966953639313, "loss": 11.6836, "step": 5431 }, { "epoch": 0.11370677384241815, "grad_norm": 0.20126789808273315, "learning_rate": 0.00019929941048202058, "loss": 11.6816, "step": 5432 }, { "epoch": 0.1137277066063803, "grad_norm": 0.23617182672023773, "learning_rate": 0.00019929915137991284, "loss": 11.705, "step": 5433 }, { "epoch": 0.11374863937034246, "grad_norm": 0.183514803647995, "learning_rate": 0.00019929889223006998, "loss": 11.6818, "step": 5434 }, { "epoch": 0.11376957213430461, "grad_norm": 0.23953595757484436, "learning_rate": 0.0001992986330324922, "loss": 11.6936, "step": 5435 }, { "epoch": 0.11379050489826677, "grad_norm": 0.18970553576946259, "learning_rate": 0.00019929837378717955, "loss": 11.6774, "step": 5436 }, { "epoch": 0.11381143766222893, "grad_norm": 0.2173183113336563, "learning_rate": 0.00019929811449413218, "loss": 11.6862, "step": 5437 }, { "epoch": 0.11383237042619107, "grad_norm": 0.20163464546203613, "learning_rate": 0.00019929785515335023, "loss": 11.6707, "step": 5438 }, { "epoch": 0.11385330319015323, "grad_norm": 0.2733427882194519, "learning_rate": 0.0001992975957648338, "loss": 11.698, "step": 5439 }, { "epoch": 0.11387423595411539, "grad_norm": 0.2864752411842346, "learning_rate": 0.000199297336328583, "loss": 11.6835, "step": 5440 }, { "epoch": 0.11389516871807753, "grad_norm": 0.2279464453458786, "learning_rate": 0.000199297076844598, "loss": 11.6764, "step": 5441 }, { "epoch": 0.11391610148203969, "grad_norm": 0.22102315723896027, "learning_rate": 0.00019929681731287895, "loss": 11.6899, "step": 5442 }, { "epoch": 0.11393703424600184, "grad_norm": 0.2258249670267105, "learning_rate": 0.0001992965577334259, "loss": 11.696, "step": 5443 }, { "epoch": 0.113957967009964, "grad_norm": 0.23982711136341095, "learning_rate": 0.00019929629810623898, "loss": 11.6831, "step": 5444 }, { "epoch": 0.11397889977392615, "grad_norm": 0.220441535115242, "learning_rate": 0.00019929603843131836, "loss": 11.6798, "step": 5445 }, { "epoch": 0.1139998325378883, "grad_norm": 0.23868846893310547, "learning_rate": 0.00019929577870866414, "loss": 11.6734, "step": 5446 }, { "epoch": 0.11402076530185046, "grad_norm": 0.21137607097625732, "learning_rate": 0.00019929551893827646, "loss": 11.6941, "step": 5447 }, { "epoch": 0.11404169806581262, "grad_norm": 0.2792252004146576, "learning_rate": 0.00019929525912015542, "loss": 11.6831, "step": 5448 }, { "epoch": 0.11406263082977476, "grad_norm": 0.19722406566143036, "learning_rate": 0.00019929499925430116, "loss": 11.6737, "step": 5449 }, { "epoch": 0.11408356359373692, "grad_norm": 0.17992648482322693, "learning_rate": 0.00019929473934071382, "loss": 11.6777, "step": 5450 }, { "epoch": 0.11410449635769906, "grad_norm": 0.22392268478870392, "learning_rate": 0.00019929447937939347, "loss": 11.6885, "step": 5451 }, { "epoch": 0.11412542912166122, "grad_norm": 0.261545330286026, "learning_rate": 0.00019929421937034029, "loss": 11.6783, "step": 5452 }, { "epoch": 0.11414636188562338, "grad_norm": 0.2224660962820053, "learning_rate": 0.00019929395931355442, "loss": 11.6681, "step": 5453 }, { "epoch": 0.11416729464958553, "grad_norm": 0.21408864855766296, "learning_rate": 0.00019929369920903593, "loss": 11.6701, "step": 5454 }, { "epoch": 0.11418822741354769, "grad_norm": 0.242476224899292, "learning_rate": 0.00019929343905678498, "loss": 11.6767, "step": 5455 }, { "epoch": 0.11420916017750984, "grad_norm": 0.2969398498535156, "learning_rate": 0.00019929317885680167, "loss": 11.6861, "step": 5456 }, { "epoch": 0.11423009294147199, "grad_norm": 0.21800746023654938, "learning_rate": 0.00019929291860908615, "loss": 11.6668, "step": 5457 }, { "epoch": 0.11425102570543415, "grad_norm": 0.22150687873363495, "learning_rate": 0.00019929265831363852, "loss": 11.6808, "step": 5458 }, { "epoch": 0.11427195846939629, "grad_norm": 0.23022721707820892, "learning_rate": 0.00019929239797045894, "loss": 11.6936, "step": 5459 }, { "epoch": 0.11429289123335845, "grad_norm": 0.19837793707847595, "learning_rate": 0.00019929213757954747, "loss": 11.6704, "step": 5460 }, { "epoch": 0.11431382399732061, "grad_norm": 0.21741899847984314, "learning_rate": 0.00019929187714090433, "loss": 11.6898, "step": 5461 }, { "epoch": 0.11433475676128275, "grad_norm": 0.272749125957489, "learning_rate": 0.0001992916166545296, "loss": 11.686, "step": 5462 }, { "epoch": 0.11435568952524491, "grad_norm": 0.2466973513364792, "learning_rate": 0.00019929135612042337, "loss": 11.6939, "step": 5463 }, { "epoch": 0.11437662228920707, "grad_norm": 0.2445671260356903, "learning_rate": 0.0001992910955385858, "loss": 11.6845, "step": 5464 }, { "epoch": 0.11439755505316922, "grad_norm": 0.20795601606369019, "learning_rate": 0.000199290834909017, "loss": 11.6993, "step": 5465 }, { "epoch": 0.11441848781713138, "grad_norm": 0.2215278446674347, "learning_rate": 0.00019929057423171713, "loss": 11.6899, "step": 5466 }, { "epoch": 0.11443942058109353, "grad_norm": 0.22399994730949402, "learning_rate": 0.00019929031350668627, "loss": 11.6704, "step": 5467 }, { "epoch": 0.11446035334505568, "grad_norm": 0.17965437471866608, "learning_rate": 0.0001992900527339246, "loss": 11.6822, "step": 5468 }, { "epoch": 0.11448128610901784, "grad_norm": 0.18234331905841827, "learning_rate": 0.00019928979191343216, "loss": 11.6851, "step": 5469 }, { "epoch": 0.11450221887297998, "grad_norm": 0.20878879725933075, "learning_rate": 0.00019928953104520915, "loss": 11.6851, "step": 5470 }, { "epoch": 0.11452315163694214, "grad_norm": 0.2901149392127991, "learning_rate": 0.0001992892701292557, "loss": 11.69, "step": 5471 }, { "epoch": 0.1145440844009043, "grad_norm": 0.25281277298927307, "learning_rate": 0.00019928900916557188, "loss": 11.6769, "step": 5472 }, { "epoch": 0.11456501716486645, "grad_norm": 0.36206403374671936, "learning_rate": 0.00019928874815415787, "loss": 11.7081, "step": 5473 }, { "epoch": 0.1145859499288286, "grad_norm": 0.24325597286224365, "learning_rate": 0.00019928848709501373, "loss": 11.6641, "step": 5474 }, { "epoch": 0.11460688269279076, "grad_norm": 0.21163210272789001, "learning_rate": 0.00019928822598813963, "loss": 11.6785, "step": 5475 }, { "epoch": 0.11462781545675291, "grad_norm": 0.2446940690279007, "learning_rate": 0.00019928796483353574, "loss": 11.6821, "step": 5476 }, { "epoch": 0.11464874822071507, "grad_norm": 0.29382485151290894, "learning_rate": 0.00019928770363120208, "loss": 11.6895, "step": 5477 }, { "epoch": 0.11466968098467721, "grad_norm": 0.20081287622451782, "learning_rate": 0.00019928744238113883, "loss": 11.6653, "step": 5478 }, { "epoch": 0.11469061374863937, "grad_norm": 0.244780495762825, "learning_rate": 0.00019928718108334613, "loss": 11.6729, "step": 5479 }, { "epoch": 0.11471154651260153, "grad_norm": 0.2135285586118698, "learning_rate": 0.0001992869197378241, "loss": 11.6805, "step": 5480 }, { "epoch": 0.11473247927656367, "grad_norm": 0.1855139285326004, "learning_rate": 0.00019928665834457288, "loss": 11.6686, "step": 5481 }, { "epoch": 0.11475341204052583, "grad_norm": 0.2653339207172394, "learning_rate": 0.00019928639690359255, "loss": 11.6699, "step": 5482 }, { "epoch": 0.11477434480448799, "grad_norm": 0.2051820158958435, "learning_rate": 0.00019928613541488326, "loss": 11.685, "step": 5483 }, { "epoch": 0.11479527756845014, "grad_norm": 0.2803007960319519, "learning_rate": 0.00019928587387844514, "loss": 11.6878, "step": 5484 }, { "epoch": 0.1148162103324123, "grad_norm": 0.21339553594589233, "learning_rate": 0.0001992856122942783, "loss": 11.6792, "step": 5485 }, { "epoch": 0.11483714309637444, "grad_norm": 0.16763387620449066, "learning_rate": 0.0001992853506623829, "loss": 11.6845, "step": 5486 }, { "epoch": 0.1148580758603366, "grad_norm": 0.2541551887989044, "learning_rate": 0.00019928508898275901, "loss": 11.6752, "step": 5487 }, { "epoch": 0.11487900862429876, "grad_norm": 0.3222416043281555, "learning_rate": 0.0001992848272554068, "loss": 11.6744, "step": 5488 }, { "epoch": 0.1148999413882609, "grad_norm": 0.2571907937526703, "learning_rate": 0.0001992845654803264, "loss": 11.6812, "step": 5489 }, { "epoch": 0.11492087415222306, "grad_norm": 0.320909321308136, "learning_rate": 0.00019928430365751792, "loss": 11.6926, "step": 5490 }, { "epoch": 0.11494180691618522, "grad_norm": 0.1964409053325653, "learning_rate": 0.00019928404178698147, "loss": 11.6665, "step": 5491 }, { "epoch": 0.11496273968014736, "grad_norm": 0.2048041671514511, "learning_rate": 0.00019928377986871718, "loss": 11.6697, "step": 5492 }, { "epoch": 0.11498367244410952, "grad_norm": 0.20489703118801117, "learning_rate": 0.0001992835179027252, "loss": 11.6859, "step": 5493 }, { "epoch": 0.11500460520807167, "grad_norm": 0.2805745303630829, "learning_rate": 0.00019928325588900565, "loss": 11.689, "step": 5494 }, { "epoch": 0.11502553797203383, "grad_norm": 0.18670916557312012, "learning_rate": 0.0001992829938275586, "loss": 11.6704, "step": 5495 }, { "epoch": 0.11504647073599598, "grad_norm": 0.2587452232837677, "learning_rate": 0.0001992827317183843, "loss": 11.6742, "step": 5496 }, { "epoch": 0.11506740349995813, "grad_norm": 0.2070678472518921, "learning_rate": 0.00019928246956148278, "loss": 11.6717, "step": 5497 }, { "epoch": 0.11508833626392029, "grad_norm": 0.2414163053035736, "learning_rate": 0.00019928220735685416, "loss": 11.6857, "step": 5498 }, { "epoch": 0.11510926902788245, "grad_norm": 0.21501199901103973, "learning_rate": 0.00019928194510449861, "loss": 11.696, "step": 5499 }, { "epoch": 0.11513020179184459, "grad_norm": 0.2491493672132492, "learning_rate": 0.00019928168280441627, "loss": 11.6845, "step": 5500 }, { "epoch": 0.11515113455580675, "grad_norm": 0.22275954484939575, "learning_rate": 0.00019928142045660719, "loss": 11.6966, "step": 5501 }, { "epoch": 0.11517206731976891, "grad_norm": 0.2846195101737976, "learning_rate": 0.00019928115806107155, "loss": 11.6878, "step": 5502 }, { "epoch": 0.11519300008373105, "grad_norm": 0.2090843915939331, "learning_rate": 0.00019928089561780946, "loss": 11.6815, "step": 5503 }, { "epoch": 0.11521393284769321, "grad_norm": 0.22471971809864044, "learning_rate": 0.00019928063312682107, "loss": 11.6916, "step": 5504 }, { "epoch": 0.11523486561165536, "grad_norm": 0.2357635647058487, "learning_rate": 0.00019928037058810648, "loss": 11.6745, "step": 5505 }, { "epoch": 0.11525579837561752, "grad_norm": 0.20038729906082153, "learning_rate": 0.00019928010800166583, "loss": 11.6831, "step": 5506 }, { "epoch": 0.11527673113957967, "grad_norm": 0.22413849830627441, "learning_rate": 0.00019927984536749927, "loss": 11.696, "step": 5507 }, { "epoch": 0.11529766390354182, "grad_norm": 0.25306904315948486, "learning_rate": 0.00019927958268560686, "loss": 11.6735, "step": 5508 }, { "epoch": 0.11531859666750398, "grad_norm": 0.23560425639152527, "learning_rate": 0.00019927931995598877, "loss": 11.7043, "step": 5509 }, { "epoch": 0.11533952943146614, "grad_norm": 0.18490970134735107, "learning_rate": 0.00019927905717864513, "loss": 11.6774, "step": 5510 }, { "epoch": 0.11536046219542828, "grad_norm": 0.23991209268569946, "learning_rate": 0.00019927879435357608, "loss": 11.6864, "step": 5511 }, { "epoch": 0.11538139495939044, "grad_norm": 0.18402959406375885, "learning_rate": 0.0001992785314807817, "loss": 11.6832, "step": 5512 }, { "epoch": 0.11540232772335259, "grad_norm": 0.20010672509670258, "learning_rate": 0.00019927826856026216, "loss": 11.6731, "step": 5513 }, { "epoch": 0.11542326048731474, "grad_norm": 0.258294939994812, "learning_rate": 0.00019927800559201754, "loss": 11.6758, "step": 5514 }, { "epoch": 0.1154441932512769, "grad_norm": 0.2324696034193039, "learning_rate": 0.000199277742576048, "loss": 11.6873, "step": 5515 }, { "epoch": 0.11546512601523905, "grad_norm": 0.22930693626403809, "learning_rate": 0.00019927747951235366, "loss": 11.6832, "step": 5516 }, { "epoch": 0.1154860587792012, "grad_norm": 0.23371069133281708, "learning_rate": 0.00019927721640093467, "loss": 11.6811, "step": 5517 }, { "epoch": 0.11550699154316336, "grad_norm": 0.20620031654834747, "learning_rate": 0.0001992769532417911, "loss": 11.6714, "step": 5518 }, { "epoch": 0.11552792430712551, "grad_norm": 0.2114725112915039, "learning_rate": 0.00019927669003492313, "loss": 11.6862, "step": 5519 }, { "epoch": 0.11554885707108767, "grad_norm": 0.22908863425254822, "learning_rate": 0.00019927642678033082, "loss": 11.6914, "step": 5520 }, { "epoch": 0.11556978983504981, "grad_norm": 0.2580935060977936, "learning_rate": 0.00019927616347801438, "loss": 11.6942, "step": 5521 }, { "epoch": 0.11559072259901197, "grad_norm": 0.29437369108200073, "learning_rate": 0.0001992759001279739, "loss": 11.6931, "step": 5522 }, { "epoch": 0.11561165536297413, "grad_norm": 0.22505056858062744, "learning_rate": 0.00019927563673020952, "loss": 11.6862, "step": 5523 }, { "epoch": 0.11563258812693628, "grad_norm": 0.18069535493850708, "learning_rate": 0.00019927537328472134, "loss": 11.6856, "step": 5524 }, { "epoch": 0.11565352089089843, "grad_norm": 0.2294621467590332, "learning_rate": 0.0001992751097915095, "loss": 11.6951, "step": 5525 }, { "epoch": 0.11567445365486059, "grad_norm": 0.2578429579734802, "learning_rate": 0.0001992748462505741, "loss": 11.667, "step": 5526 }, { "epoch": 0.11569538641882274, "grad_norm": 0.24244707822799683, "learning_rate": 0.00019927458266191531, "loss": 11.681, "step": 5527 }, { "epoch": 0.1157163191827849, "grad_norm": 0.22267360985279083, "learning_rate": 0.00019927431902553325, "loss": 11.6818, "step": 5528 }, { "epoch": 0.11573725194674706, "grad_norm": 0.27205345034599304, "learning_rate": 0.00019927405534142802, "loss": 11.6912, "step": 5529 }, { "epoch": 0.1157581847107092, "grad_norm": 0.20778943598270416, "learning_rate": 0.00019927379160959978, "loss": 11.6608, "step": 5530 }, { "epoch": 0.11577911747467136, "grad_norm": 0.23261719942092896, "learning_rate": 0.0001992735278300486, "loss": 11.6826, "step": 5531 }, { "epoch": 0.1158000502386335, "grad_norm": 0.2475399672985077, "learning_rate": 0.0001992732640027747, "loss": 11.6877, "step": 5532 }, { "epoch": 0.11582098300259566, "grad_norm": 0.25966116786003113, "learning_rate": 0.0001992730001277781, "loss": 11.6866, "step": 5533 }, { "epoch": 0.11584191576655782, "grad_norm": 0.2174769788980484, "learning_rate": 0.000199272736205059, "loss": 11.6875, "step": 5534 }, { "epoch": 0.11586284853051997, "grad_norm": 0.21178822219371796, "learning_rate": 0.00019927247223461753, "loss": 11.693, "step": 5535 }, { "epoch": 0.11588378129448212, "grad_norm": 0.27651384472846985, "learning_rate": 0.00019927220821645375, "loss": 11.6796, "step": 5536 }, { "epoch": 0.11590471405844428, "grad_norm": 0.2136969119310379, "learning_rate": 0.00019927194415056784, "loss": 11.6771, "step": 5537 }, { "epoch": 0.11592564682240643, "grad_norm": 0.3259769380092621, "learning_rate": 0.00019927168003695995, "loss": 11.6846, "step": 5538 }, { "epoch": 0.11594657958636859, "grad_norm": 0.20894543826580048, "learning_rate": 0.00019927141587563015, "loss": 11.6692, "step": 5539 }, { "epoch": 0.11596751235033073, "grad_norm": 0.24234560132026672, "learning_rate": 0.0001992711516665786, "loss": 11.6828, "step": 5540 }, { "epoch": 0.11598844511429289, "grad_norm": 0.25758057832717896, "learning_rate": 0.0001992708874098054, "loss": 11.6759, "step": 5541 }, { "epoch": 0.11600937787825505, "grad_norm": 0.19109992682933807, "learning_rate": 0.0001992706231053107, "loss": 11.6667, "step": 5542 }, { "epoch": 0.1160303106422172, "grad_norm": 0.25085723400115967, "learning_rate": 0.00019927035875309462, "loss": 11.6826, "step": 5543 }, { "epoch": 0.11605124340617935, "grad_norm": 0.2253580093383789, "learning_rate": 0.00019927009435315732, "loss": 11.6736, "step": 5544 }, { "epoch": 0.11607217617014151, "grad_norm": 0.20228953659534454, "learning_rate": 0.00019926982990549886, "loss": 11.6825, "step": 5545 }, { "epoch": 0.11609310893410366, "grad_norm": 0.2682710587978363, "learning_rate": 0.00019926956541011946, "loss": 11.6791, "step": 5546 }, { "epoch": 0.11611404169806581, "grad_norm": 0.24486812949180603, "learning_rate": 0.00019926930086701914, "loss": 11.6931, "step": 5547 }, { "epoch": 0.11613497446202796, "grad_norm": 0.20725888013839722, "learning_rate": 0.00019926903627619808, "loss": 11.6842, "step": 5548 }, { "epoch": 0.11615590722599012, "grad_norm": 0.2324669510126114, "learning_rate": 0.00019926877163765642, "loss": 11.674, "step": 5549 }, { "epoch": 0.11617683998995228, "grad_norm": 0.23937855660915375, "learning_rate": 0.00019926850695139425, "loss": 11.6874, "step": 5550 }, { "epoch": 0.11619777275391442, "grad_norm": 0.2557623088359833, "learning_rate": 0.00019926824221741174, "loss": 11.6749, "step": 5551 }, { "epoch": 0.11621870551787658, "grad_norm": 0.3685707747936249, "learning_rate": 0.00019926797743570896, "loss": 11.6779, "step": 5552 }, { "epoch": 0.11623963828183874, "grad_norm": 0.2551349997520447, "learning_rate": 0.00019926771260628611, "loss": 11.7054, "step": 5553 }, { "epoch": 0.11626057104580088, "grad_norm": 0.25318166613578796, "learning_rate": 0.00019926744772914327, "loss": 11.6828, "step": 5554 }, { "epoch": 0.11628150380976304, "grad_norm": 0.23003137111663818, "learning_rate": 0.0001992671828042806, "loss": 11.681, "step": 5555 }, { "epoch": 0.1163024365737252, "grad_norm": 0.25186464190483093, "learning_rate": 0.0001992669178316982, "loss": 11.7022, "step": 5556 }, { "epoch": 0.11632336933768735, "grad_norm": 0.212715744972229, "learning_rate": 0.0001992666528113962, "loss": 11.6733, "step": 5557 }, { "epoch": 0.1163443021016495, "grad_norm": 0.2300056517124176, "learning_rate": 0.0001992663877433747, "loss": 11.6809, "step": 5558 }, { "epoch": 0.11636523486561165, "grad_norm": 0.18916192650794983, "learning_rate": 0.00019926612262763388, "loss": 11.6788, "step": 5559 }, { "epoch": 0.11638616762957381, "grad_norm": 0.22454650700092316, "learning_rate": 0.00019926585746417384, "loss": 11.6752, "step": 5560 }, { "epoch": 0.11640710039353597, "grad_norm": 0.2091239094734192, "learning_rate": 0.00019926559225299473, "loss": 11.6772, "step": 5561 }, { "epoch": 0.11642803315749811, "grad_norm": 0.2163504958152771, "learning_rate": 0.00019926532699409663, "loss": 11.6895, "step": 5562 }, { "epoch": 0.11644896592146027, "grad_norm": 0.23711735010147095, "learning_rate": 0.00019926506168747973, "loss": 11.6803, "step": 5563 }, { "epoch": 0.11646989868542243, "grad_norm": 0.19955839216709137, "learning_rate": 0.00019926479633314412, "loss": 11.6768, "step": 5564 }, { "epoch": 0.11649083144938457, "grad_norm": 0.24405308067798615, "learning_rate": 0.0001992645309310899, "loss": 11.6816, "step": 5565 }, { "epoch": 0.11651176421334673, "grad_norm": 0.2418394535779953, "learning_rate": 0.00019926426548131726, "loss": 11.6708, "step": 5566 }, { "epoch": 0.11653269697730888, "grad_norm": 0.2212546169757843, "learning_rate": 0.0001992639999838263, "loss": 11.673, "step": 5567 }, { "epoch": 0.11655362974127104, "grad_norm": 0.22972075641155243, "learning_rate": 0.00019926373443861713, "loss": 11.6804, "step": 5568 }, { "epoch": 0.1165745625052332, "grad_norm": 0.21475514769554138, "learning_rate": 0.00019926346884568993, "loss": 11.689, "step": 5569 }, { "epoch": 0.11659549526919534, "grad_norm": 0.24407127499580383, "learning_rate": 0.00019926320320504476, "loss": 11.6798, "step": 5570 }, { "epoch": 0.1166164280331575, "grad_norm": 0.20923909544944763, "learning_rate": 0.0001992629375166818, "loss": 11.6791, "step": 5571 }, { "epoch": 0.11663736079711966, "grad_norm": 0.19225136935710907, "learning_rate": 0.0001992626717806011, "loss": 11.6821, "step": 5572 }, { "epoch": 0.1166582935610818, "grad_norm": 0.1978362798690796, "learning_rate": 0.0001992624059968029, "loss": 11.6809, "step": 5573 }, { "epoch": 0.11667922632504396, "grad_norm": 0.2159586250782013, "learning_rate": 0.00019926214016528727, "loss": 11.6826, "step": 5574 }, { "epoch": 0.1167001590890061, "grad_norm": 0.2917434573173523, "learning_rate": 0.00019926187428605434, "loss": 11.6964, "step": 5575 }, { "epoch": 0.11672109185296826, "grad_norm": 0.2871035933494568, "learning_rate": 0.00019926160835910422, "loss": 11.6783, "step": 5576 }, { "epoch": 0.11674202461693042, "grad_norm": 0.2956005334854126, "learning_rate": 0.00019926134238443707, "loss": 11.6553, "step": 5577 }, { "epoch": 0.11676295738089257, "grad_norm": 0.23710189759731293, "learning_rate": 0.000199261076362053, "loss": 11.6936, "step": 5578 }, { "epoch": 0.11678389014485473, "grad_norm": 0.20178279280662537, "learning_rate": 0.00019926081029195212, "loss": 11.677, "step": 5579 }, { "epoch": 0.11680482290881689, "grad_norm": 0.26530858874320984, "learning_rate": 0.0001992605441741346, "loss": 11.6938, "step": 5580 }, { "epoch": 0.11682575567277903, "grad_norm": 0.297778844833374, "learning_rate": 0.00019926027800860056, "loss": 11.6748, "step": 5581 }, { "epoch": 0.11684668843674119, "grad_norm": 0.2353106141090393, "learning_rate": 0.00019926001179535008, "loss": 11.6881, "step": 5582 }, { "epoch": 0.11686762120070335, "grad_norm": 0.29796749353408813, "learning_rate": 0.00019925974553438333, "loss": 11.6816, "step": 5583 }, { "epoch": 0.11688855396466549, "grad_norm": 0.2251133769750595, "learning_rate": 0.00019925947922570046, "loss": 11.6886, "step": 5584 }, { "epoch": 0.11690948672862765, "grad_norm": 0.2734159529209137, "learning_rate": 0.00019925921286930156, "loss": 11.6872, "step": 5585 }, { "epoch": 0.1169304194925898, "grad_norm": 0.24932679533958435, "learning_rate": 0.00019925894646518675, "loss": 11.6842, "step": 5586 }, { "epoch": 0.11695135225655195, "grad_norm": 0.24027910828590393, "learning_rate": 0.00019925868001335618, "loss": 11.6789, "step": 5587 }, { "epoch": 0.11697228502051411, "grad_norm": 0.2447337508201599, "learning_rate": 0.00019925841351380998, "loss": 11.6765, "step": 5588 }, { "epoch": 0.11699321778447626, "grad_norm": 0.32684823870658875, "learning_rate": 0.00019925814696654826, "loss": 11.6962, "step": 5589 }, { "epoch": 0.11701415054843842, "grad_norm": 0.22212748229503632, "learning_rate": 0.00019925788037157118, "loss": 11.6866, "step": 5590 }, { "epoch": 0.11703508331240058, "grad_norm": 0.29878515005111694, "learning_rate": 0.00019925761372887883, "loss": 11.6819, "step": 5591 }, { "epoch": 0.11705601607636272, "grad_norm": 0.23310112953186035, "learning_rate": 0.00019925734703847136, "loss": 11.692, "step": 5592 }, { "epoch": 0.11707694884032488, "grad_norm": 0.24352885782718658, "learning_rate": 0.00019925708030034888, "loss": 11.6684, "step": 5593 }, { "epoch": 0.11709788160428702, "grad_norm": 0.23678961396217346, "learning_rate": 0.00019925681351451156, "loss": 11.6814, "step": 5594 }, { "epoch": 0.11711881436824918, "grad_norm": 0.24960888922214508, "learning_rate": 0.0001992565466809595, "loss": 11.6803, "step": 5595 }, { "epoch": 0.11713974713221134, "grad_norm": 0.24627672135829926, "learning_rate": 0.0001992562797996928, "loss": 11.7054, "step": 5596 }, { "epoch": 0.11716067989617349, "grad_norm": 0.24948623776435852, "learning_rate": 0.00019925601287071166, "loss": 11.6575, "step": 5597 }, { "epoch": 0.11718161266013565, "grad_norm": 0.2290610671043396, "learning_rate": 0.0001992557458940161, "loss": 11.6677, "step": 5598 }, { "epoch": 0.1172025454240978, "grad_norm": 0.2172287255525589, "learning_rate": 0.0001992554788696064, "loss": 11.6768, "step": 5599 }, { "epoch": 0.11722347818805995, "grad_norm": 0.25184547901153564, "learning_rate": 0.00019925521179748254, "loss": 11.6742, "step": 5600 }, { "epoch": 0.11724441095202211, "grad_norm": 0.22639578580856323, "learning_rate": 0.00019925494467764473, "loss": 11.6881, "step": 5601 }, { "epoch": 0.11726534371598425, "grad_norm": 0.24116194248199463, "learning_rate": 0.00019925467751009309, "loss": 11.6854, "step": 5602 }, { "epoch": 0.11728627647994641, "grad_norm": 0.2551514208316803, "learning_rate": 0.0001992544102948277, "loss": 11.6702, "step": 5603 }, { "epoch": 0.11730720924390857, "grad_norm": 0.18160748481750488, "learning_rate": 0.00019925414303184876, "loss": 11.6642, "step": 5604 }, { "epoch": 0.11732814200787071, "grad_norm": 0.16667766869068146, "learning_rate": 0.00019925387572115635, "loss": 11.6639, "step": 5605 }, { "epoch": 0.11734907477183287, "grad_norm": 0.21366378664970398, "learning_rate": 0.00019925360836275062, "loss": 11.681, "step": 5606 }, { "epoch": 0.11737000753579503, "grad_norm": 0.2101958692073822, "learning_rate": 0.00019925334095663167, "loss": 11.6884, "step": 5607 }, { "epoch": 0.11739094029975718, "grad_norm": 0.20384104549884796, "learning_rate": 0.0001992530735027997, "loss": 11.6701, "step": 5608 }, { "epoch": 0.11741187306371934, "grad_norm": 0.2139562964439392, "learning_rate": 0.00019925280600125475, "loss": 11.6919, "step": 5609 }, { "epoch": 0.11743280582768148, "grad_norm": 0.26922398805618286, "learning_rate": 0.000199252538451997, "loss": 11.6805, "step": 5610 }, { "epoch": 0.11745373859164364, "grad_norm": 0.24865113198757172, "learning_rate": 0.00019925227085502653, "loss": 11.6756, "step": 5611 }, { "epoch": 0.1174746713556058, "grad_norm": 0.23681612312793732, "learning_rate": 0.00019925200321034355, "loss": 11.6788, "step": 5612 }, { "epoch": 0.11749560411956794, "grad_norm": 0.21233603358268738, "learning_rate": 0.00019925173551794812, "loss": 11.6728, "step": 5613 }, { "epoch": 0.1175165368835301, "grad_norm": 0.28403356671333313, "learning_rate": 0.0001992514677778404, "loss": 11.6711, "step": 5614 }, { "epoch": 0.11753746964749226, "grad_norm": 0.21162760257720947, "learning_rate": 0.00019925119999002054, "loss": 11.6854, "step": 5615 }, { "epoch": 0.1175584024114544, "grad_norm": 0.2358776181936264, "learning_rate": 0.0001992509321544886, "loss": 11.6861, "step": 5616 }, { "epoch": 0.11757933517541656, "grad_norm": 0.21241413056850433, "learning_rate": 0.00019925066427124475, "loss": 11.671, "step": 5617 }, { "epoch": 0.11760026793937872, "grad_norm": 0.26944682002067566, "learning_rate": 0.0001992503963402891, "loss": 11.699, "step": 5618 }, { "epoch": 0.11762120070334087, "grad_norm": 0.3032608926296234, "learning_rate": 0.00019925012836162183, "loss": 11.6892, "step": 5619 }, { "epoch": 0.11764213346730303, "grad_norm": 0.23813648521900177, "learning_rate": 0.00019924986033524302, "loss": 11.6876, "step": 5620 }, { "epoch": 0.11766306623126517, "grad_norm": 0.27951642870903015, "learning_rate": 0.0001992495922611528, "loss": 11.6854, "step": 5621 }, { "epoch": 0.11768399899522733, "grad_norm": 0.25941985845565796, "learning_rate": 0.00019924932413935132, "loss": 11.672, "step": 5622 }, { "epoch": 0.11770493175918949, "grad_norm": 0.21483221650123596, "learning_rate": 0.00019924905596983874, "loss": 11.6872, "step": 5623 }, { "epoch": 0.11772586452315163, "grad_norm": 0.2005043774843216, "learning_rate": 0.0001992487877526151, "loss": 11.6759, "step": 5624 }, { "epoch": 0.11774679728711379, "grad_norm": 0.22153542935848236, "learning_rate": 0.00019924851948768062, "loss": 11.6774, "step": 5625 }, { "epoch": 0.11776773005107595, "grad_norm": 0.25954845547676086, "learning_rate": 0.00019924825117503535, "loss": 11.67, "step": 5626 }, { "epoch": 0.1177886628150381, "grad_norm": 0.22758042812347412, "learning_rate": 0.00019924798281467946, "loss": 11.6851, "step": 5627 }, { "epoch": 0.11780959557900025, "grad_norm": 0.20192919671535492, "learning_rate": 0.00019924771440661313, "loss": 11.6728, "step": 5628 }, { "epoch": 0.1178305283429624, "grad_norm": 0.29887428879737854, "learning_rate": 0.00019924744595083637, "loss": 11.6807, "step": 5629 }, { "epoch": 0.11785146110692456, "grad_norm": 0.2736852467060089, "learning_rate": 0.0001992471774473494, "loss": 11.6903, "step": 5630 }, { "epoch": 0.11787239387088672, "grad_norm": 0.19707798957824707, "learning_rate": 0.0001992469088961523, "loss": 11.6823, "step": 5631 }, { "epoch": 0.11789332663484886, "grad_norm": 0.20348714292049408, "learning_rate": 0.00019924664029724527, "loss": 11.664, "step": 5632 }, { "epoch": 0.11791425939881102, "grad_norm": 0.21712402999401093, "learning_rate": 0.00019924637165062836, "loss": 11.6948, "step": 5633 }, { "epoch": 0.11793519216277318, "grad_norm": 0.25020667910575867, "learning_rate": 0.0001992461029563017, "loss": 11.6793, "step": 5634 }, { "epoch": 0.11795612492673532, "grad_norm": 0.2114822268486023, "learning_rate": 0.0001992458342142655, "loss": 11.6806, "step": 5635 }, { "epoch": 0.11797705769069748, "grad_norm": 0.27050599455833435, "learning_rate": 0.0001992455654245198, "loss": 11.6817, "step": 5636 }, { "epoch": 0.11799799045465963, "grad_norm": 0.20418354868888855, "learning_rate": 0.00019924529658706482, "loss": 11.6827, "step": 5637 }, { "epoch": 0.11801892321862179, "grad_norm": 0.2454359531402588, "learning_rate": 0.00019924502770190058, "loss": 11.6816, "step": 5638 }, { "epoch": 0.11803985598258394, "grad_norm": 0.25666746497154236, "learning_rate": 0.0001992447587690273, "loss": 11.6679, "step": 5639 }, { "epoch": 0.11806078874654609, "grad_norm": 0.25897279381752014, "learning_rate": 0.00019924448978844508, "loss": 11.6738, "step": 5640 }, { "epoch": 0.11808172151050825, "grad_norm": 1.886332631111145, "learning_rate": 0.000199244220760154, "loss": 11.6402, "step": 5641 }, { "epoch": 0.1181026542744704, "grad_norm": 0.18652349710464478, "learning_rate": 0.0001992439516841543, "loss": 11.6818, "step": 5642 }, { "epoch": 0.11812358703843255, "grad_norm": 0.2068706899881363, "learning_rate": 0.000199243682560446, "loss": 11.6846, "step": 5643 }, { "epoch": 0.11814451980239471, "grad_norm": 0.224824458360672, "learning_rate": 0.00019924341338902927, "loss": 11.6785, "step": 5644 }, { "epoch": 0.11816545256635687, "grad_norm": 0.21497628092765808, "learning_rate": 0.00019924314416990425, "loss": 11.6732, "step": 5645 }, { "epoch": 0.11818638533031901, "grad_norm": 0.24353201687335968, "learning_rate": 0.00019924287490307108, "loss": 11.6787, "step": 5646 }, { "epoch": 0.11820731809428117, "grad_norm": 0.24346235394477844, "learning_rate": 0.00019924260558852987, "loss": 11.6834, "step": 5647 }, { "epoch": 0.11822825085824332, "grad_norm": 0.29080259799957275, "learning_rate": 0.00019924233622628073, "loss": 11.6828, "step": 5648 }, { "epoch": 0.11824918362220548, "grad_norm": 0.20239455997943878, "learning_rate": 0.00019924206681632382, "loss": 11.6782, "step": 5649 }, { "epoch": 0.11827011638616763, "grad_norm": 0.2023143321275711, "learning_rate": 0.00019924179735865925, "loss": 11.6814, "step": 5650 }, { "epoch": 0.11829104915012978, "grad_norm": 0.2796165347099304, "learning_rate": 0.0001992415278532872, "loss": 11.6689, "step": 5651 }, { "epoch": 0.11831198191409194, "grad_norm": 0.22790703177452087, "learning_rate": 0.0001992412583002077, "loss": 11.6816, "step": 5652 }, { "epoch": 0.1183329146780541, "grad_norm": 0.28617021441459656, "learning_rate": 0.00019924098869942097, "loss": 11.6881, "step": 5653 }, { "epoch": 0.11835384744201624, "grad_norm": 0.24484282732009888, "learning_rate": 0.0001992407190509271, "loss": 11.6906, "step": 5654 }, { "epoch": 0.1183747802059784, "grad_norm": 0.18055963516235352, "learning_rate": 0.00019924044935472622, "loss": 11.6681, "step": 5655 }, { "epoch": 0.11839571296994054, "grad_norm": 0.23694677650928497, "learning_rate": 0.00019924017961081848, "loss": 11.6995, "step": 5656 }, { "epoch": 0.1184166457339027, "grad_norm": 0.31236642599105835, "learning_rate": 0.000199239909819204, "loss": 11.6651, "step": 5657 }, { "epoch": 0.11843757849786486, "grad_norm": 0.22701787948608398, "learning_rate": 0.0001992396399798829, "loss": 11.6813, "step": 5658 }, { "epoch": 0.11845851126182701, "grad_norm": 0.25000444054603577, "learning_rate": 0.00019923937009285532, "loss": 11.6879, "step": 5659 }, { "epoch": 0.11847944402578917, "grad_norm": 0.2472127079963684, "learning_rate": 0.00019923910015812136, "loss": 11.664, "step": 5660 }, { "epoch": 0.11850037678975132, "grad_norm": 0.2277418076992035, "learning_rate": 0.0001992388301756812, "loss": 11.6953, "step": 5661 }, { "epoch": 0.11852130955371347, "grad_norm": 0.2976294159889221, "learning_rate": 0.00019923856014553498, "loss": 11.6877, "step": 5662 }, { "epoch": 0.11854224231767563, "grad_norm": 0.33976539969444275, "learning_rate": 0.00019923829006768273, "loss": 11.6782, "step": 5663 }, { "epoch": 0.11856317508163777, "grad_norm": 0.3622780442237854, "learning_rate": 0.00019923801994212469, "loss": 11.6816, "step": 5664 }, { "epoch": 0.11858410784559993, "grad_norm": 0.21713301539421082, "learning_rate": 0.0001992377497688609, "loss": 11.6799, "step": 5665 }, { "epoch": 0.11860504060956209, "grad_norm": 0.2915666103363037, "learning_rate": 0.00019923747954789158, "loss": 11.6887, "step": 5666 }, { "epoch": 0.11862597337352423, "grad_norm": 0.23418621718883514, "learning_rate": 0.0001992372092792168, "loss": 11.6766, "step": 5667 }, { "epoch": 0.1186469061374864, "grad_norm": 0.27536073327064514, "learning_rate": 0.0001992369389628367, "loss": 11.6989, "step": 5668 }, { "epoch": 0.11866783890144855, "grad_norm": 0.18179738521575928, "learning_rate": 0.0001992366685987514, "loss": 11.6941, "step": 5669 }, { "epoch": 0.1186887716654107, "grad_norm": 0.21993805468082428, "learning_rate": 0.0001992363981869611, "loss": 11.6695, "step": 5670 }, { "epoch": 0.11870970442937286, "grad_norm": 0.2279166728258133, "learning_rate": 0.00019923612772746583, "loss": 11.6716, "step": 5671 }, { "epoch": 0.11873063719333501, "grad_norm": 0.37841248512268066, "learning_rate": 0.00019923585722026577, "loss": 11.6911, "step": 5672 }, { "epoch": 0.11875156995729716, "grad_norm": 0.317049503326416, "learning_rate": 0.00019923558666536107, "loss": 11.6871, "step": 5673 }, { "epoch": 0.11877250272125932, "grad_norm": 0.22910015285015106, "learning_rate": 0.0001992353160627518, "loss": 11.6795, "step": 5674 }, { "epoch": 0.11879343548522146, "grad_norm": 0.27228760719299316, "learning_rate": 0.00019923504541243813, "loss": 11.6749, "step": 5675 }, { "epoch": 0.11881436824918362, "grad_norm": 0.24391081929206848, "learning_rate": 0.00019923477471442018, "loss": 11.6803, "step": 5676 }, { "epoch": 0.11883530101314578, "grad_norm": 0.36922410130500793, "learning_rate": 0.0001992345039686981, "loss": 11.6843, "step": 5677 }, { "epoch": 0.11885623377710793, "grad_norm": 0.20879077911376953, "learning_rate": 0.00019923423317527202, "loss": 11.6848, "step": 5678 }, { "epoch": 0.11887716654107008, "grad_norm": 0.23332366347312927, "learning_rate": 0.00019923396233414203, "loss": 11.7017, "step": 5679 }, { "epoch": 0.11889809930503224, "grad_norm": 0.21465261280536652, "learning_rate": 0.0001992336914453083, "loss": 11.6975, "step": 5680 }, { "epoch": 0.11891903206899439, "grad_norm": 0.19459252059459686, "learning_rate": 0.00019923342050877092, "loss": 11.6686, "step": 5681 }, { "epoch": 0.11893996483295655, "grad_norm": 0.18537281453609467, "learning_rate": 0.00019923314952453007, "loss": 11.6825, "step": 5682 }, { "epoch": 0.11896089759691869, "grad_norm": 0.22856955230236053, "learning_rate": 0.00019923287849258584, "loss": 11.679, "step": 5683 }, { "epoch": 0.11898183036088085, "grad_norm": 0.37633612751960754, "learning_rate": 0.0001992326074129384, "loss": 11.6736, "step": 5684 }, { "epoch": 0.11900276312484301, "grad_norm": 0.2336883842945099, "learning_rate": 0.00019923233628558787, "loss": 11.6852, "step": 5685 }, { "epoch": 0.11902369588880515, "grad_norm": 0.20346498489379883, "learning_rate": 0.00019923206511053435, "loss": 11.6741, "step": 5686 }, { "epoch": 0.11904462865276731, "grad_norm": 0.24797622859477997, "learning_rate": 0.00019923179388777796, "loss": 11.6832, "step": 5687 }, { "epoch": 0.11906556141672947, "grad_norm": 0.2673623263835907, "learning_rate": 0.00019923152261731888, "loss": 11.7017, "step": 5688 }, { "epoch": 0.11908649418069162, "grad_norm": 0.21520905196666718, "learning_rate": 0.0001992312512991572, "loss": 11.6961, "step": 5689 }, { "epoch": 0.11910742694465377, "grad_norm": 0.20583128929138184, "learning_rate": 0.0001992309799332931, "loss": 11.6805, "step": 5690 }, { "epoch": 0.11912835970861592, "grad_norm": 0.23670120537281036, "learning_rate": 0.00019923070851972664, "loss": 11.6686, "step": 5691 }, { "epoch": 0.11914929247257808, "grad_norm": 0.22798174619674683, "learning_rate": 0.00019923043705845803, "loss": 11.6831, "step": 5692 }, { "epoch": 0.11917022523654024, "grad_norm": 0.24360504746437073, "learning_rate": 0.00019923016554948734, "loss": 11.6902, "step": 5693 }, { "epoch": 0.11919115800050238, "grad_norm": 0.30772531032562256, "learning_rate": 0.00019922989399281474, "loss": 11.6693, "step": 5694 }, { "epoch": 0.11921209076446454, "grad_norm": 0.2534116208553314, "learning_rate": 0.0001992296223884403, "loss": 11.6804, "step": 5695 }, { "epoch": 0.1192330235284267, "grad_norm": 0.2604052722454071, "learning_rate": 0.00019922935073636423, "loss": 11.6759, "step": 5696 }, { "epoch": 0.11925395629238884, "grad_norm": 0.2659297585487366, "learning_rate": 0.00019922907903658664, "loss": 11.7002, "step": 5697 }, { "epoch": 0.119274889056351, "grad_norm": 0.22554175555706024, "learning_rate": 0.0001992288072891076, "loss": 11.6809, "step": 5698 }, { "epoch": 0.11929582182031316, "grad_norm": 0.2159452736377716, "learning_rate": 0.0001992285354939273, "loss": 11.676, "step": 5699 }, { "epoch": 0.1193167545842753, "grad_norm": 0.25949159264564514, "learning_rate": 0.00019922826365104584, "loss": 11.6911, "step": 5700 }, { "epoch": 0.11933768734823746, "grad_norm": 0.2476176768541336, "learning_rate": 0.0001992279917604634, "loss": 11.6757, "step": 5701 }, { "epoch": 0.11935862011219961, "grad_norm": 0.22773773968219757, "learning_rate": 0.00019922771982218006, "loss": 11.6734, "step": 5702 }, { "epoch": 0.11937955287616177, "grad_norm": 0.21036244928836823, "learning_rate": 0.00019922744783619595, "loss": 11.6763, "step": 5703 }, { "epoch": 0.11940048564012393, "grad_norm": 0.20890556275844574, "learning_rate": 0.00019922717580251123, "loss": 11.6765, "step": 5704 }, { "epoch": 0.11942141840408607, "grad_norm": 0.24890224635601044, "learning_rate": 0.00019922690372112603, "loss": 11.6817, "step": 5705 }, { "epoch": 0.11944235116804823, "grad_norm": 0.22476796805858612, "learning_rate": 0.00019922663159204043, "loss": 11.6819, "step": 5706 }, { "epoch": 0.11946328393201039, "grad_norm": 0.27529314160346985, "learning_rate": 0.00019922635941525463, "loss": 11.6916, "step": 5707 }, { "epoch": 0.11948421669597253, "grad_norm": 0.23241928219795227, "learning_rate": 0.00019922608719076873, "loss": 11.6828, "step": 5708 }, { "epoch": 0.11950514945993469, "grad_norm": 0.18491226434707642, "learning_rate": 0.00019922581491858284, "loss": 11.6744, "step": 5709 }, { "epoch": 0.11952608222389684, "grad_norm": 0.22270339727401733, "learning_rate": 0.00019922554259869712, "loss": 11.6841, "step": 5710 }, { "epoch": 0.119547014987859, "grad_norm": 1.4372990131378174, "learning_rate": 0.00019922527023111172, "loss": 11.6904, "step": 5711 }, { "epoch": 0.11956794775182115, "grad_norm": 0.2882181704044342, "learning_rate": 0.00019922499781582672, "loss": 11.6791, "step": 5712 }, { "epoch": 0.1195888805157833, "grad_norm": 0.22094926238059998, "learning_rate": 0.00019922472535284229, "loss": 11.6808, "step": 5713 }, { "epoch": 0.11960981327974546, "grad_norm": 0.2470320463180542, "learning_rate": 0.00019922445284215854, "loss": 11.6975, "step": 5714 }, { "epoch": 0.11963074604370762, "grad_norm": 0.24441172182559967, "learning_rate": 0.00019922418028377558, "loss": 11.6851, "step": 5715 }, { "epoch": 0.11965167880766976, "grad_norm": 0.20104461908340454, "learning_rate": 0.0001992239076776936, "loss": 11.6826, "step": 5716 }, { "epoch": 0.11967261157163192, "grad_norm": 0.21062332391738892, "learning_rate": 0.0001992236350239127, "loss": 11.6938, "step": 5717 }, { "epoch": 0.11969354433559407, "grad_norm": 0.19976945221424103, "learning_rate": 0.00019922336232243302, "loss": 11.68, "step": 5718 }, { "epoch": 0.11971447709955622, "grad_norm": 0.2495672106742859, "learning_rate": 0.00019922308957325463, "loss": 11.6742, "step": 5719 }, { "epoch": 0.11973540986351838, "grad_norm": 0.20884394645690918, "learning_rate": 0.00019922281677637774, "loss": 11.687, "step": 5720 }, { "epoch": 0.11975634262748053, "grad_norm": 0.24784277379512787, "learning_rate": 0.00019922254393180247, "loss": 11.6767, "step": 5721 }, { "epoch": 0.11977727539144269, "grad_norm": 0.21185606718063354, "learning_rate": 0.00019922227103952894, "loss": 11.6826, "step": 5722 }, { "epoch": 0.11979820815540485, "grad_norm": 0.246331587433815, "learning_rate": 0.00019922199809955724, "loss": 11.6984, "step": 5723 }, { "epoch": 0.11981914091936699, "grad_norm": 0.22363433241844177, "learning_rate": 0.00019922172511188758, "loss": 11.6891, "step": 5724 }, { "epoch": 0.11984007368332915, "grad_norm": 0.26389724016189575, "learning_rate": 0.00019922145207652, "loss": 11.6929, "step": 5725 }, { "epoch": 0.11986100644729131, "grad_norm": 0.28379231691360474, "learning_rate": 0.0001992211789934547, "loss": 11.6856, "step": 5726 }, { "epoch": 0.11988193921125345, "grad_norm": 0.2218414694070816, "learning_rate": 0.0001992209058626918, "loss": 11.6771, "step": 5727 }, { "epoch": 0.11990287197521561, "grad_norm": 0.23200483620166779, "learning_rate": 0.0001992206326842314, "loss": 11.6813, "step": 5728 }, { "epoch": 0.11992380473917776, "grad_norm": 0.2342853546142578, "learning_rate": 0.00019922035945807372, "loss": 11.683, "step": 5729 }, { "epoch": 0.11994473750313991, "grad_norm": 0.212441086769104, "learning_rate": 0.00019922008618421876, "loss": 11.6827, "step": 5730 }, { "epoch": 0.11996567026710207, "grad_norm": 0.1979297250509262, "learning_rate": 0.00019921981286266674, "loss": 11.6673, "step": 5731 }, { "epoch": 0.11998660303106422, "grad_norm": 0.20250093936920166, "learning_rate": 0.00019921953949341773, "loss": 11.6577, "step": 5732 }, { "epoch": 0.12000753579502638, "grad_norm": 0.19737191498279572, "learning_rate": 0.00019921926607647194, "loss": 11.6735, "step": 5733 }, { "epoch": 0.12002846855898854, "grad_norm": 0.2256578952074051, "learning_rate": 0.00019921899261182943, "loss": 11.6916, "step": 5734 }, { "epoch": 0.12004940132295068, "grad_norm": 0.25222498178482056, "learning_rate": 0.00019921871909949038, "loss": 11.6936, "step": 5735 }, { "epoch": 0.12007033408691284, "grad_norm": 0.24927085638046265, "learning_rate": 0.00019921844553945491, "loss": 11.6911, "step": 5736 }, { "epoch": 0.12009126685087498, "grad_norm": 0.27656930685043335, "learning_rate": 0.00019921817193172317, "loss": 11.6924, "step": 5737 }, { "epoch": 0.12011219961483714, "grad_norm": 0.18568354845046997, "learning_rate": 0.0001992178982762952, "loss": 11.6817, "step": 5738 }, { "epoch": 0.1201331323787993, "grad_norm": 0.2262815237045288, "learning_rate": 0.00019921762457317124, "loss": 11.6931, "step": 5739 }, { "epoch": 0.12015406514276145, "grad_norm": 0.22720833122730255, "learning_rate": 0.00019921735082235138, "loss": 11.6841, "step": 5740 }, { "epoch": 0.1201749979067236, "grad_norm": 0.21541281044483185, "learning_rate": 0.00019921707702383573, "loss": 11.6785, "step": 5741 }, { "epoch": 0.12019593067068576, "grad_norm": 0.2233670949935913, "learning_rate": 0.00019921680317762448, "loss": 11.678, "step": 5742 }, { "epoch": 0.12021686343464791, "grad_norm": 0.24371027946472168, "learning_rate": 0.00019921652928371768, "loss": 11.6793, "step": 5743 }, { "epoch": 0.12023779619861007, "grad_norm": 0.18117542564868927, "learning_rate": 0.00019921625534211553, "loss": 11.6761, "step": 5744 }, { "epoch": 0.12025872896257221, "grad_norm": 0.2231469601392746, "learning_rate": 0.00019921598135281814, "loss": 11.6797, "step": 5745 }, { "epoch": 0.12027966172653437, "grad_norm": 0.25159740447998047, "learning_rate": 0.00019921570731582563, "loss": 11.6826, "step": 5746 }, { "epoch": 0.12030059449049653, "grad_norm": 0.24655666947364807, "learning_rate": 0.00019921543323113815, "loss": 11.6874, "step": 5747 }, { "epoch": 0.12032152725445867, "grad_norm": 0.2554354667663574, "learning_rate": 0.00019921515909875583, "loss": 11.6701, "step": 5748 }, { "epoch": 0.12034246001842083, "grad_norm": 0.19616639614105225, "learning_rate": 0.00019921488491867877, "loss": 11.6777, "step": 5749 }, { "epoch": 0.12036339278238299, "grad_norm": 0.24223856627941132, "learning_rate": 0.00019921461069090715, "loss": 11.6679, "step": 5750 }, { "epoch": 0.12038432554634514, "grad_norm": 0.2648918926715851, "learning_rate": 0.00019921433641544107, "loss": 11.7008, "step": 5751 }, { "epoch": 0.1204052583103073, "grad_norm": 0.2181500494480133, "learning_rate": 0.00019921406209228067, "loss": 11.683, "step": 5752 }, { "epoch": 0.12042619107426944, "grad_norm": 0.24199125170707703, "learning_rate": 0.00019921378772142607, "loss": 11.6819, "step": 5753 }, { "epoch": 0.1204471238382316, "grad_norm": 0.3013482689857483, "learning_rate": 0.00019921351330287742, "loss": 11.6803, "step": 5754 }, { "epoch": 0.12046805660219376, "grad_norm": 0.2094881236553192, "learning_rate": 0.00019921323883663485, "loss": 11.6821, "step": 5755 }, { "epoch": 0.1204889893661559, "grad_norm": 0.30496591329574585, "learning_rate": 0.00019921296432269848, "loss": 11.6931, "step": 5756 }, { "epoch": 0.12050992213011806, "grad_norm": 0.21272452175617218, "learning_rate": 0.00019921268976106848, "loss": 11.6875, "step": 5757 }, { "epoch": 0.12053085489408022, "grad_norm": 0.2743176221847534, "learning_rate": 0.00019921241515174492, "loss": 11.6802, "step": 5758 }, { "epoch": 0.12055178765804236, "grad_norm": 0.1939336508512497, "learning_rate": 0.00019921214049472798, "loss": 11.6793, "step": 5759 }, { "epoch": 0.12057272042200452, "grad_norm": 0.20880787074565887, "learning_rate": 0.00019921186579001775, "loss": 11.6855, "step": 5760 }, { "epoch": 0.12059365318596668, "grad_norm": 0.20835722982883453, "learning_rate": 0.00019921159103761439, "loss": 11.6848, "step": 5761 }, { "epoch": 0.12061458594992883, "grad_norm": 0.2682372033596039, "learning_rate": 0.00019921131623751808, "loss": 11.6782, "step": 5762 }, { "epoch": 0.12063551871389099, "grad_norm": 0.22246627509593964, "learning_rate": 0.00019921104138972886, "loss": 11.6802, "step": 5763 }, { "epoch": 0.12065645147785313, "grad_norm": 0.2803773880004883, "learning_rate": 0.0001992107664942469, "loss": 11.6898, "step": 5764 }, { "epoch": 0.12067738424181529, "grad_norm": 0.22537311911582947, "learning_rate": 0.00019921049155107237, "loss": 11.6784, "step": 5765 }, { "epoch": 0.12069831700577745, "grad_norm": 0.22537857294082642, "learning_rate": 0.00019921021656020535, "loss": 11.6659, "step": 5766 }, { "epoch": 0.12071924976973959, "grad_norm": 0.32828766107559204, "learning_rate": 0.000199209941521646, "loss": 11.6659, "step": 5767 }, { "epoch": 0.12074018253370175, "grad_norm": 0.22736768424510956, "learning_rate": 0.00019920966643539441, "loss": 11.6892, "step": 5768 }, { "epoch": 0.12076111529766391, "grad_norm": 0.3203043043613434, "learning_rate": 0.00019920939130145076, "loss": 11.6931, "step": 5769 }, { "epoch": 0.12078204806162605, "grad_norm": 0.23515577614307404, "learning_rate": 0.00019920911611981518, "loss": 11.6896, "step": 5770 }, { "epoch": 0.12080298082558821, "grad_norm": 0.288709431886673, "learning_rate": 0.00019920884089048778, "loss": 11.6933, "step": 5771 }, { "epoch": 0.12082391358955036, "grad_norm": 0.274688720703125, "learning_rate": 0.0001992085656134687, "loss": 11.6976, "step": 5772 }, { "epoch": 0.12084484635351252, "grad_norm": 0.2265976518392563, "learning_rate": 0.0001992082902887581, "loss": 11.6825, "step": 5773 }, { "epoch": 0.12086577911747468, "grad_norm": 0.23005987703800201, "learning_rate": 0.00019920801491635605, "loss": 11.6939, "step": 5774 }, { "epoch": 0.12088671188143682, "grad_norm": 0.2223391979932785, "learning_rate": 0.00019920773949626275, "loss": 11.6816, "step": 5775 }, { "epoch": 0.12090764464539898, "grad_norm": 0.278048574924469, "learning_rate": 0.00019920746402847829, "loss": 11.6815, "step": 5776 }, { "epoch": 0.12092857740936114, "grad_norm": 0.2727142870426178, "learning_rate": 0.0001992071885130028, "loss": 11.6919, "step": 5777 }, { "epoch": 0.12094951017332328, "grad_norm": 0.24272705614566803, "learning_rate": 0.00019920691294983645, "loss": 11.6751, "step": 5778 }, { "epoch": 0.12097044293728544, "grad_norm": 0.25804072618484497, "learning_rate": 0.00019920663733897934, "loss": 11.6758, "step": 5779 }, { "epoch": 0.12099137570124759, "grad_norm": 0.2572377324104309, "learning_rate": 0.0001992063616804316, "loss": 11.6812, "step": 5780 }, { "epoch": 0.12101230846520974, "grad_norm": 0.20592303574085236, "learning_rate": 0.00019920608597419337, "loss": 11.6802, "step": 5781 }, { "epoch": 0.1210332412291719, "grad_norm": 0.32258281111717224, "learning_rate": 0.00019920581022026478, "loss": 11.694, "step": 5782 }, { "epoch": 0.12105417399313405, "grad_norm": 0.2222645878791809, "learning_rate": 0.000199205534418646, "loss": 11.6649, "step": 5783 }, { "epoch": 0.12107510675709621, "grad_norm": 0.22481553256511688, "learning_rate": 0.00019920525856933713, "loss": 11.6819, "step": 5784 }, { "epoch": 0.12109603952105837, "grad_norm": 0.2355034351348877, "learning_rate": 0.0001992049826723383, "loss": 11.6739, "step": 5785 }, { "epoch": 0.12111697228502051, "grad_norm": 0.187678262591362, "learning_rate": 0.00019920470672764963, "loss": 11.6981, "step": 5786 }, { "epoch": 0.12113790504898267, "grad_norm": 0.206658273935318, "learning_rate": 0.00019920443073527126, "loss": 11.6724, "step": 5787 }, { "epoch": 0.12115883781294483, "grad_norm": 0.24273328483104706, "learning_rate": 0.00019920415469520334, "loss": 11.6623, "step": 5788 }, { "epoch": 0.12117977057690697, "grad_norm": 0.2786363661289215, "learning_rate": 0.00019920387860744603, "loss": 11.677, "step": 5789 }, { "epoch": 0.12120070334086913, "grad_norm": 0.2317229062318802, "learning_rate": 0.0001992036024719994, "loss": 11.6768, "step": 5790 }, { "epoch": 0.12122163610483128, "grad_norm": 0.2391507625579834, "learning_rate": 0.00019920332628886361, "loss": 11.6902, "step": 5791 }, { "epoch": 0.12124256886879343, "grad_norm": 0.2900345027446747, "learning_rate": 0.0001992030500580388, "loss": 11.682, "step": 5792 }, { "epoch": 0.1212635016327556, "grad_norm": 0.2378455400466919, "learning_rate": 0.0001992027737795251, "loss": 11.6878, "step": 5793 }, { "epoch": 0.12128443439671774, "grad_norm": 0.32497066259384155, "learning_rate": 0.00019920249745332262, "loss": 11.6908, "step": 5794 }, { "epoch": 0.1213053671606799, "grad_norm": 0.26032784581184387, "learning_rate": 0.00019920222107943153, "loss": 11.6811, "step": 5795 }, { "epoch": 0.12132629992464206, "grad_norm": 0.1810353547334671, "learning_rate": 0.00019920194465785195, "loss": 11.6714, "step": 5796 }, { "epoch": 0.1213472326886042, "grad_norm": 0.2207222580909729, "learning_rate": 0.00019920166818858398, "loss": 11.6945, "step": 5797 }, { "epoch": 0.12136816545256636, "grad_norm": 0.18786373734474182, "learning_rate": 0.00019920139167162778, "loss": 11.6902, "step": 5798 }, { "epoch": 0.1213890982165285, "grad_norm": 0.246727854013443, "learning_rate": 0.00019920111510698352, "loss": 11.6839, "step": 5799 }, { "epoch": 0.12141003098049066, "grad_norm": 0.2624228894710541, "learning_rate": 0.00019920083849465125, "loss": 11.6829, "step": 5800 }, { "epoch": 0.12143096374445282, "grad_norm": 0.2518508732318878, "learning_rate": 0.0001992005618346312, "loss": 11.6701, "step": 5801 }, { "epoch": 0.12145189650841497, "grad_norm": 0.20216679573059082, "learning_rate": 0.00019920028512692342, "loss": 11.6922, "step": 5802 }, { "epoch": 0.12147282927237713, "grad_norm": 0.19721373915672302, "learning_rate": 0.00019920000837152807, "loss": 11.6825, "step": 5803 }, { "epoch": 0.12149376203633928, "grad_norm": 0.21387678384780884, "learning_rate": 0.00019919973156844528, "loss": 11.6719, "step": 5804 }, { "epoch": 0.12151469480030143, "grad_norm": 0.24241656064987183, "learning_rate": 0.00019919945471767522, "loss": 11.6838, "step": 5805 }, { "epoch": 0.12153562756426359, "grad_norm": 0.21464991569519043, "learning_rate": 0.00019919917781921798, "loss": 11.6792, "step": 5806 }, { "epoch": 0.12155656032822573, "grad_norm": 0.2415197193622589, "learning_rate": 0.0001991989008730737, "loss": 11.6812, "step": 5807 }, { "epoch": 0.12157749309218789, "grad_norm": 0.22818979620933533, "learning_rate": 0.00019919862387924252, "loss": 11.6487, "step": 5808 }, { "epoch": 0.12159842585615005, "grad_norm": 0.21312978863716125, "learning_rate": 0.00019919834683772457, "loss": 11.6859, "step": 5809 }, { "epoch": 0.1216193586201122, "grad_norm": 0.2336392104625702, "learning_rate": 0.00019919806974852002, "loss": 11.6764, "step": 5810 }, { "epoch": 0.12164029138407435, "grad_norm": 0.20999294519424438, "learning_rate": 0.00019919779261162894, "loss": 11.6722, "step": 5811 }, { "epoch": 0.12166122414803651, "grad_norm": 0.3556399345397949, "learning_rate": 0.00019919751542705151, "loss": 11.6971, "step": 5812 }, { "epoch": 0.12168215691199866, "grad_norm": 0.20670092105865479, "learning_rate": 0.00019919723819478784, "loss": 11.6927, "step": 5813 }, { "epoch": 0.12170308967596082, "grad_norm": 0.3776397109031677, "learning_rate": 0.00019919696091483808, "loss": 11.711, "step": 5814 }, { "epoch": 0.12172402243992297, "grad_norm": 0.26240503787994385, "learning_rate": 0.00019919668358720234, "loss": 11.6872, "step": 5815 }, { "epoch": 0.12174495520388512, "grad_norm": 0.22801199555397034, "learning_rate": 0.00019919640621188076, "loss": 11.688, "step": 5816 }, { "epoch": 0.12176588796784728, "grad_norm": 0.22030265629291534, "learning_rate": 0.00019919612878887348, "loss": 11.674, "step": 5817 }, { "epoch": 0.12178682073180942, "grad_norm": 0.22674915194511414, "learning_rate": 0.00019919585131818063, "loss": 11.6721, "step": 5818 }, { "epoch": 0.12180775349577158, "grad_norm": 0.20757567882537842, "learning_rate": 0.00019919557379980237, "loss": 11.6812, "step": 5819 }, { "epoch": 0.12182868625973374, "grad_norm": 0.2592710554599762, "learning_rate": 0.0001991952962337388, "loss": 11.6932, "step": 5820 }, { "epoch": 0.12184961902369588, "grad_norm": 0.22379931807518005, "learning_rate": 0.00019919501861999006, "loss": 11.6852, "step": 5821 }, { "epoch": 0.12187055178765804, "grad_norm": 0.2537369430065155, "learning_rate": 0.0001991947409585563, "loss": 11.6786, "step": 5822 }, { "epoch": 0.1218914845516202, "grad_norm": 0.28744980692863464, "learning_rate": 0.00019919446324943762, "loss": 11.679, "step": 5823 }, { "epoch": 0.12191241731558235, "grad_norm": 0.24630463123321533, "learning_rate": 0.00019919418549263414, "loss": 11.6832, "step": 5824 }, { "epoch": 0.1219333500795445, "grad_norm": 1.3657146692276, "learning_rate": 0.0001991939076881461, "loss": 11.7186, "step": 5825 }, { "epoch": 0.12195428284350665, "grad_norm": 0.222490593791008, "learning_rate": 0.0001991936298359735, "loss": 11.6799, "step": 5826 }, { "epoch": 0.12197521560746881, "grad_norm": 0.19935904443264008, "learning_rate": 0.00019919335193611658, "loss": 11.6743, "step": 5827 }, { "epoch": 0.12199614837143097, "grad_norm": 0.2482534795999527, "learning_rate": 0.0001991930739885754, "loss": 11.6751, "step": 5828 }, { "epoch": 0.12201708113539311, "grad_norm": 0.27876514196395874, "learning_rate": 0.00019919279599335014, "loss": 11.6683, "step": 5829 }, { "epoch": 0.12203801389935527, "grad_norm": 0.2863757610321045, "learning_rate": 0.00019919251795044092, "loss": 11.6769, "step": 5830 }, { "epoch": 0.12205894666331743, "grad_norm": 0.24315518140792847, "learning_rate": 0.00019919223985984786, "loss": 11.6836, "step": 5831 }, { "epoch": 0.12207987942727958, "grad_norm": 0.26010388135910034, "learning_rate": 0.0001991919617215711, "loss": 11.707, "step": 5832 }, { "epoch": 0.12210081219124173, "grad_norm": 0.2891998291015625, "learning_rate": 0.00019919168353561077, "loss": 11.6769, "step": 5833 }, { "epoch": 0.12212174495520388, "grad_norm": 0.24118344485759735, "learning_rate": 0.000199191405301967, "loss": 11.6752, "step": 5834 }, { "epoch": 0.12214267771916604, "grad_norm": 0.27486270666122437, "learning_rate": 0.00019919112702063995, "loss": 11.6772, "step": 5835 }, { "epoch": 0.1221636104831282, "grad_norm": 0.21849915385246277, "learning_rate": 0.00019919084869162972, "loss": 11.6744, "step": 5836 }, { "epoch": 0.12218454324709034, "grad_norm": 0.280732661485672, "learning_rate": 0.0001991905703149365, "loss": 11.6862, "step": 5837 }, { "epoch": 0.1222054760110525, "grad_norm": 0.23315638303756714, "learning_rate": 0.00019919029189056036, "loss": 11.6635, "step": 5838 }, { "epoch": 0.12222640877501466, "grad_norm": 0.19733311235904694, "learning_rate": 0.00019919001341850144, "loss": 11.6812, "step": 5839 }, { "epoch": 0.1222473415389768, "grad_norm": 0.23286165297031403, "learning_rate": 0.0001991897348987599, "loss": 11.6746, "step": 5840 }, { "epoch": 0.12226827430293896, "grad_norm": 0.20751480758190155, "learning_rate": 0.0001991894563313359, "loss": 11.6715, "step": 5841 }, { "epoch": 0.12228920706690112, "grad_norm": 0.20561616122722626, "learning_rate": 0.00019918917771622954, "loss": 11.6805, "step": 5842 }, { "epoch": 0.12231013983086327, "grad_norm": 0.2315058708190918, "learning_rate": 0.00019918889905344093, "loss": 11.6798, "step": 5843 }, { "epoch": 0.12233107259482542, "grad_norm": 0.18783795833587646, "learning_rate": 0.0001991886203429702, "loss": 11.687, "step": 5844 }, { "epoch": 0.12235200535878757, "grad_norm": 0.2858359217643738, "learning_rate": 0.00019918834158481755, "loss": 11.7018, "step": 5845 }, { "epoch": 0.12237293812274973, "grad_norm": 0.1947401911020279, "learning_rate": 0.0001991880627789831, "loss": 11.674, "step": 5846 }, { "epoch": 0.12239387088671189, "grad_norm": 0.260623037815094, "learning_rate": 0.00019918778392546691, "loss": 11.676, "step": 5847 }, { "epoch": 0.12241480365067403, "grad_norm": 0.2491980940103531, "learning_rate": 0.0001991875050242692, "loss": 11.6814, "step": 5848 }, { "epoch": 0.12243573641463619, "grad_norm": 0.25400853157043457, "learning_rate": 0.00019918722607539007, "loss": 11.6948, "step": 5849 }, { "epoch": 0.12245666917859835, "grad_norm": 0.23465637862682343, "learning_rate": 0.00019918694707882963, "loss": 11.6929, "step": 5850 }, { "epoch": 0.1224776019425605, "grad_norm": 0.2389112412929535, "learning_rate": 0.00019918666803458803, "loss": 11.6851, "step": 5851 }, { "epoch": 0.12249853470652265, "grad_norm": 0.29201552271842957, "learning_rate": 0.00019918638894266541, "loss": 11.6942, "step": 5852 }, { "epoch": 0.1225194674704848, "grad_norm": 0.28024038672447205, "learning_rate": 0.00019918610980306195, "loss": 11.6776, "step": 5853 }, { "epoch": 0.12254040023444696, "grad_norm": 0.23915843665599823, "learning_rate": 0.0001991858306157777, "loss": 11.6862, "step": 5854 }, { "epoch": 0.12256133299840911, "grad_norm": 0.24781860411167145, "learning_rate": 0.00019918555138081283, "loss": 11.6706, "step": 5855 }, { "epoch": 0.12258226576237126, "grad_norm": 0.22887858748435974, "learning_rate": 0.0001991852720981675, "loss": 11.6818, "step": 5856 }, { "epoch": 0.12260319852633342, "grad_norm": 0.2604514956474304, "learning_rate": 0.00019918499276784182, "loss": 11.6908, "step": 5857 }, { "epoch": 0.12262413129029558, "grad_norm": 0.24858663976192474, "learning_rate": 0.0001991847133898359, "loss": 11.689, "step": 5858 }, { "epoch": 0.12264506405425772, "grad_norm": 0.23400786519050598, "learning_rate": 0.00019918443396414994, "loss": 11.694, "step": 5859 }, { "epoch": 0.12266599681821988, "grad_norm": 0.2325136810541153, "learning_rate": 0.00019918415449078403, "loss": 11.6709, "step": 5860 }, { "epoch": 0.12268692958218202, "grad_norm": 0.26410871744155884, "learning_rate": 0.00019918387496973826, "loss": 11.6872, "step": 5861 }, { "epoch": 0.12270786234614418, "grad_norm": 0.2520331144332886, "learning_rate": 0.00019918359540101288, "loss": 11.6624, "step": 5862 }, { "epoch": 0.12272879511010634, "grad_norm": 0.25471293926239014, "learning_rate": 0.00019918331578460793, "loss": 11.6795, "step": 5863 }, { "epoch": 0.12274972787406849, "grad_norm": 0.35595273971557617, "learning_rate": 0.00019918303612052353, "loss": 11.6794, "step": 5864 }, { "epoch": 0.12277066063803065, "grad_norm": 0.19832676649093628, "learning_rate": 0.0001991827564087599, "loss": 11.6779, "step": 5865 }, { "epoch": 0.1227915934019928, "grad_norm": 0.23543961346149445, "learning_rate": 0.00019918247664931716, "loss": 11.6771, "step": 5866 }, { "epoch": 0.12281252616595495, "grad_norm": 0.1878320872783661, "learning_rate": 0.00019918219684219538, "loss": 11.6899, "step": 5867 }, { "epoch": 0.12283345892991711, "grad_norm": 0.266893595457077, "learning_rate": 0.0001991819169873947, "loss": 11.6895, "step": 5868 }, { "epoch": 0.12285439169387925, "grad_norm": 0.28688785433769226, "learning_rate": 0.00019918163708491533, "loss": 11.6726, "step": 5869 }, { "epoch": 0.12287532445784141, "grad_norm": 0.19353196024894714, "learning_rate": 0.00019918135713475733, "loss": 11.6819, "step": 5870 }, { "epoch": 0.12289625722180357, "grad_norm": 0.2058674693107605, "learning_rate": 0.00019918107713692092, "loss": 11.676, "step": 5871 }, { "epoch": 0.12291718998576572, "grad_norm": 0.2158740609884262, "learning_rate": 0.00019918079709140614, "loss": 11.6759, "step": 5872 }, { "epoch": 0.12293812274972787, "grad_norm": 0.20222748816013336, "learning_rate": 0.00019918051699821315, "loss": 11.6777, "step": 5873 }, { "epoch": 0.12295905551369003, "grad_norm": 0.17058975994586945, "learning_rate": 0.00019918023685734212, "loss": 11.6769, "step": 5874 }, { "epoch": 0.12297998827765218, "grad_norm": 0.20138904452323914, "learning_rate": 0.00019917995666879313, "loss": 11.6831, "step": 5875 }, { "epoch": 0.12300092104161434, "grad_norm": 0.2104150503873825, "learning_rate": 0.00019917967643256636, "loss": 11.699, "step": 5876 }, { "epoch": 0.1230218538055765, "grad_norm": 0.25958606600761414, "learning_rate": 0.00019917939614866195, "loss": 11.6872, "step": 5877 }, { "epoch": 0.12304278656953864, "grad_norm": 0.25080549716949463, "learning_rate": 0.00019917911581708003, "loss": 11.691, "step": 5878 }, { "epoch": 0.1230637193335008, "grad_norm": 0.2519742250442505, "learning_rate": 0.0001991788354378207, "loss": 11.695, "step": 5879 }, { "epoch": 0.12308465209746294, "grad_norm": 0.2170211374759674, "learning_rate": 0.0001991785550108841, "loss": 11.6849, "step": 5880 }, { "epoch": 0.1231055848614251, "grad_norm": 0.19325555860996246, "learning_rate": 0.0001991782745362704, "loss": 11.6752, "step": 5881 }, { "epoch": 0.12312651762538726, "grad_norm": 0.21098242700099945, "learning_rate": 0.00019917799401397972, "loss": 11.6821, "step": 5882 }, { "epoch": 0.1231474503893494, "grad_norm": 0.2536690831184387, "learning_rate": 0.00019917771344401218, "loss": 11.6773, "step": 5883 }, { "epoch": 0.12316838315331156, "grad_norm": 0.19485080242156982, "learning_rate": 0.00019917743282636793, "loss": 11.6802, "step": 5884 }, { "epoch": 0.12318931591727372, "grad_norm": 0.21503278613090515, "learning_rate": 0.00019917715216104712, "loss": 11.6528, "step": 5885 }, { "epoch": 0.12321024868123587, "grad_norm": 0.263814777135849, "learning_rate": 0.00019917687144804984, "loss": 11.6871, "step": 5886 }, { "epoch": 0.12323118144519803, "grad_norm": 0.22292715311050415, "learning_rate": 0.00019917659068737623, "loss": 11.6838, "step": 5887 }, { "epoch": 0.12325211420916017, "grad_norm": 0.20856398344039917, "learning_rate": 0.00019917630987902648, "loss": 11.6763, "step": 5888 }, { "epoch": 0.12327304697312233, "grad_norm": 0.3101460337638855, "learning_rate": 0.0001991760290230007, "loss": 11.6859, "step": 5889 }, { "epoch": 0.12329397973708449, "grad_norm": 0.23958374559879303, "learning_rate": 0.000199175748119299, "loss": 11.6828, "step": 5890 }, { "epoch": 0.12331491250104663, "grad_norm": 0.28170448541641235, "learning_rate": 0.00019917546716792154, "loss": 11.6835, "step": 5891 }, { "epoch": 0.12333584526500879, "grad_norm": 0.21833807229995728, "learning_rate": 0.0001991751861688684, "loss": 11.6927, "step": 5892 }, { "epoch": 0.12335677802897095, "grad_norm": 0.25850215554237366, "learning_rate": 0.00019917490512213982, "loss": 11.6748, "step": 5893 }, { "epoch": 0.1233777107929331, "grad_norm": 0.21189463138580322, "learning_rate": 0.00019917462402773583, "loss": 11.6734, "step": 5894 }, { "epoch": 0.12339864355689525, "grad_norm": 0.23385249078273773, "learning_rate": 0.00019917434288565665, "loss": 11.6779, "step": 5895 }, { "epoch": 0.1234195763208574, "grad_norm": 0.23781715333461761, "learning_rate": 0.00019917406169590233, "loss": 11.6776, "step": 5896 }, { "epoch": 0.12344050908481956, "grad_norm": 0.30789023637771606, "learning_rate": 0.0001991737804584731, "loss": 11.6943, "step": 5897 }, { "epoch": 0.12346144184878172, "grad_norm": 0.19743606448173523, "learning_rate": 0.000199173499173369, "loss": 11.6788, "step": 5898 }, { "epoch": 0.12348237461274386, "grad_norm": 0.3045704662799835, "learning_rate": 0.00019917321784059024, "loss": 11.6757, "step": 5899 }, { "epoch": 0.12350330737670602, "grad_norm": 0.2346508651971817, "learning_rate": 0.00019917293646013693, "loss": 11.6771, "step": 5900 }, { "epoch": 0.12352424014066818, "grad_norm": 0.2262217104434967, "learning_rate": 0.0001991726550320092, "loss": 11.6768, "step": 5901 }, { "epoch": 0.12354517290463032, "grad_norm": 0.25069960951805115, "learning_rate": 0.00019917237355620716, "loss": 11.6862, "step": 5902 }, { "epoch": 0.12356610566859248, "grad_norm": 0.2199583202600479, "learning_rate": 0.000199172092032731, "loss": 11.6915, "step": 5903 }, { "epoch": 0.12358703843255464, "grad_norm": 0.22630971670150757, "learning_rate": 0.00019917181046158082, "loss": 11.6751, "step": 5904 }, { "epoch": 0.12360797119651679, "grad_norm": 0.2821674942970276, "learning_rate": 0.00019917152884275677, "loss": 11.6786, "step": 5905 }, { "epoch": 0.12362890396047894, "grad_norm": 0.23972457647323608, "learning_rate": 0.00019917124717625897, "loss": 11.6803, "step": 5906 }, { "epoch": 0.12364983672444109, "grad_norm": 0.3548717498779297, "learning_rate": 0.0001991709654620876, "loss": 11.6977, "step": 5907 }, { "epoch": 0.12367076948840325, "grad_norm": 0.2301376610994339, "learning_rate": 0.0001991706837002427, "loss": 11.6909, "step": 5908 }, { "epoch": 0.12369170225236541, "grad_norm": 0.2030530571937561, "learning_rate": 0.00019917040189072448, "loss": 11.6923, "step": 5909 }, { "epoch": 0.12371263501632755, "grad_norm": 0.22839006781578064, "learning_rate": 0.00019917012003353309, "loss": 11.6787, "step": 5910 }, { "epoch": 0.12373356778028971, "grad_norm": 0.21891666948795319, "learning_rate": 0.00019916983812866862, "loss": 11.6805, "step": 5911 }, { "epoch": 0.12375450054425187, "grad_norm": 0.22711388766765594, "learning_rate": 0.0001991695561761312, "loss": 11.6702, "step": 5912 }, { "epoch": 0.12377543330821401, "grad_norm": 0.25970447063446045, "learning_rate": 0.00019916927417592102, "loss": 11.669, "step": 5913 }, { "epoch": 0.12379636607217617, "grad_norm": 0.21790777146816254, "learning_rate": 0.00019916899212803817, "loss": 11.6783, "step": 5914 }, { "epoch": 0.12381729883613832, "grad_norm": 0.23688004910945892, "learning_rate": 0.0001991687100324828, "loss": 11.6793, "step": 5915 }, { "epoch": 0.12383823160010048, "grad_norm": 0.21622592210769653, "learning_rate": 0.00019916842788925504, "loss": 11.6681, "step": 5916 }, { "epoch": 0.12385916436406263, "grad_norm": 0.23931777477264404, "learning_rate": 0.00019916814569835502, "loss": 11.6711, "step": 5917 }, { "epoch": 0.12388009712802478, "grad_norm": 0.26998719573020935, "learning_rate": 0.00019916786345978288, "loss": 11.6975, "step": 5918 }, { "epoch": 0.12390102989198694, "grad_norm": 0.23890119791030884, "learning_rate": 0.0001991675811735388, "loss": 11.6731, "step": 5919 }, { "epoch": 0.1239219626559491, "grad_norm": 0.198707714676857, "learning_rate": 0.00019916729883962285, "loss": 11.6833, "step": 5920 }, { "epoch": 0.12394289541991124, "grad_norm": 0.2988446354866028, "learning_rate": 0.0001991670164580352, "loss": 11.6851, "step": 5921 }, { "epoch": 0.1239638281838734, "grad_norm": 0.23403292894363403, "learning_rate": 0.00019916673402877596, "loss": 11.6812, "step": 5922 }, { "epoch": 0.12398476094783555, "grad_norm": 0.21689708530902863, "learning_rate": 0.0001991664515518453, "loss": 11.68, "step": 5923 }, { "epoch": 0.1240056937117977, "grad_norm": 0.2468191236257553, "learning_rate": 0.00019916616902724334, "loss": 11.696, "step": 5924 }, { "epoch": 0.12402662647575986, "grad_norm": 0.23676486313343048, "learning_rate": 0.0001991658864549702, "loss": 11.6892, "step": 5925 }, { "epoch": 0.12404755923972201, "grad_norm": 0.2418099045753479, "learning_rate": 0.00019916560383502604, "loss": 11.6682, "step": 5926 }, { "epoch": 0.12406849200368417, "grad_norm": 0.267540842294693, "learning_rate": 0.000199165321167411, "loss": 11.6834, "step": 5927 }, { "epoch": 0.12408942476764633, "grad_norm": 0.28155142068862915, "learning_rate": 0.00019916503845212517, "loss": 11.6903, "step": 5928 }, { "epoch": 0.12411035753160847, "grad_norm": 0.24984519183635712, "learning_rate": 0.00019916475568916876, "loss": 11.6666, "step": 5929 }, { "epoch": 0.12413129029557063, "grad_norm": 0.28198325634002686, "learning_rate": 0.00019916447287854182, "loss": 11.6934, "step": 5930 }, { "epoch": 0.12415222305953279, "grad_norm": 0.28235459327697754, "learning_rate": 0.00019916419002024457, "loss": 11.677, "step": 5931 }, { "epoch": 0.12417315582349493, "grad_norm": 0.29954516887664795, "learning_rate": 0.00019916390711427712, "loss": 11.6848, "step": 5932 }, { "epoch": 0.12419408858745709, "grad_norm": 0.2532212436199188, "learning_rate": 0.00019916362416063957, "loss": 11.6809, "step": 5933 }, { "epoch": 0.12421502135141924, "grad_norm": 0.1962195187807083, "learning_rate": 0.00019916334115933207, "loss": 11.6807, "step": 5934 }, { "epoch": 0.1242359541153814, "grad_norm": 0.21974794566631317, "learning_rate": 0.00019916305811035475, "loss": 11.6878, "step": 5935 }, { "epoch": 0.12425688687934355, "grad_norm": 0.25914496183395386, "learning_rate": 0.00019916277501370777, "loss": 11.7035, "step": 5936 }, { "epoch": 0.1242778196433057, "grad_norm": 0.2140607237815857, "learning_rate": 0.0001991624918693913, "loss": 11.6854, "step": 5937 }, { "epoch": 0.12429875240726786, "grad_norm": 0.2459641396999359, "learning_rate": 0.0001991622086774054, "loss": 11.6894, "step": 5938 }, { "epoch": 0.12431968517123002, "grad_norm": 0.19683408737182617, "learning_rate": 0.0001991619254377502, "loss": 11.6778, "step": 5939 }, { "epoch": 0.12434061793519216, "grad_norm": 0.18858861923217773, "learning_rate": 0.00019916164215042593, "loss": 11.6732, "step": 5940 }, { "epoch": 0.12436155069915432, "grad_norm": 0.21899031102657318, "learning_rate": 0.00019916135881543263, "loss": 11.6724, "step": 5941 }, { "epoch": 0.12438248346311646, "grad_norm": 0.2226872742176056, "learning_rate": 0.00019916107543277052, "loss": 11.6786, "step": 5942 }, { "epoch": 0.12440341622707862, "grad_norm": 0.20508362352848053, "learning_rate": 0.00019916079200243966, "loss": 11.6785, "step": 5943 }, { "epoch": 0.12442434899104078, "grad_norm": 0.2555512487888336, "learning_rate": 0.00019916050852444026, "loss": 11.6733, "step": 5944 }, { "epoch": 0.12444528175500293, "grad_norm": 0.26321178674697876, "learning_rate": 0.0001991602249987724, "loss": 11.6546, "step": 5945 }, { "epoch": 0.12446621451896508, "grad_norm": 0.22415974736213684, "learning_rate": 0.0001991599414254362, "loss": 11.6842, "step": 5946 }, { "epoch": 0.12448714728292724, "grad_norm": 0.2125932276248932, "learning_rate": 0.00019915965780443188, "loss": 11.6817, "step": 5947 }, { "epoch": 0.12450808004688939, "grad_norm": 0.24971792101860046, "learning_rate": 0.00019915937413575948, "loss": 11.6683, "step": 5948 }, { "epoch": 0.12452901281085155, "grad_norm": 0.21989662945270538, "learning_rate": 0.0001991590904194192, "loss": 11.6775, "step": 5949 }, { "epoch": 0.12454994557481369, "grad_norm": 0.28277722001075745, "learning_rate": 0.00019915880665541115, "loss": 11.6798, "step": 5950 }, { "epoch": 0.12457087833877585, "grad_norm": 0.2280733734369278, "learning_rate": 0.0001991585228437355, "loss": 11.671, "step": 5951 }, { "epoch": 0.12459181110273801, "grad_norm": 0.26144590973854065, "learning_rate": 0.00019915823898439231, "loss": 11.6769, "step": 5952 }, { "epoch": 0.12461274386670015, "grad_norm": 0.20692317187786102, "learning_rate": 0.0001991579550773818, "loss": 11.6838, "step": 5953 }, { "epoch": 0.12463367663066231, "grad_norm": 0.21952612698078156, "learning_rate": 0.00019915767112270409, "loss": 11.6766, "step": 5954 }, { "epoch": 0.12465460939462447, "grad_norm": 0.21579913794994354, "learning_rate": 0.0001991573871203593, "loss": 11.6782, "step": 5955 }, { "epoch": 0.12467554215858662, "grad_norm": 0.23925107717514038, "learning_rate": 0.00019915710307034755, "loss": 11.6893, "step": 5956 }, { "epoch": 0.12469647492254878, "grad_norm": 0.3034636080265045, "learning_rate": 0.000199156818972669, "loss": 11.698, "step": 5957 }, { "epoch": 0.12471740768651093, "grad_norm": 0.191847562789917, "learning_rate": 0.00019915653482732376, "loss": 11.6951, "step": 5958 }, { "epoch": 0.12473834045047308, "grad_norm": 0.1842862069606781, "learning_rate": 0.000199156250634312, "loss": 11.6819, "step": 5959 }, { "epoch": 0.12475927321443524, "grad_norm": 0.23544372618198395, "learning_rate": 0.00019915596639363386, "loss": 11.6758, "step": 5960 }, { "epoch": 0.12478020597839738, "grad_norm": 0.197844997048378, "learning_rate": 0.00019915568210528942, "loss": 11.6871, "step": 5961 }, { "epoch": 0.12480113874235954, "grad_norm": 0.23870232701301575, "learning_rate": 0.0001991553977692789, "loss": 11.6793, "step": 5962 }, { "epoch": 0.1248220715063217, "grad_norm": 0.3022700250148773, "learning_rate": 0.00019915511338560238, "loss": 11.6813, "step": 5963 }, { "epoch": 0.12484300427028384, "grad_norm": 0.20901577174663544, "learning_rate": 0.00019915482895426, "loss": 11.6929, "step": 5964 }, { "epoch": 0.124863937034246, "grad_norm": 0.2794334292411804, "learning_rate": 0.00019915454447525194, "loss": 11.6823, "step": 5965 }, { "epoch": 0.12488486979820816, "grad_norm": 0.23057372868061066, "learning_rate": 0.00019915425994857823, "loss": 11.6833, "step": 5966 }, { "epoch": 0.1249058025621703, "grad_norm": 0.2354975938796997, "learning_rate": 0.00019915397537423915, "loss": 11.6771, "step": 5967 }, { "epoch": 0.12492673532613247, "grad_norm": 0.26409390568733215, "learning_rate": 0.00019915369075223474, "loss": 11.691, "step": 5968 }, { "epoch": 0.12494766809009461, "grad_norm": 0.22676265239715576, "learning_rate": 0.00019915340608256515, "loss": 11.671, "step": 5969 }, { "epoch": 0.12496860085405677, "grad_norm": 0.19217844307422638, "learning_rate": 0.00019915312136523054, "loss": 11.6656, "step": 5970 }, { "epoch": 0.12498953361801893, "grad_norm": 0.2415182739496231, "learning_rate": 0.00019915283660023106, "loss": 11.6828, "step": 5971 }, { "epoch": 0.12501046638198107, "grad_norm": 0.2632999122142792, "learning_rate": 0.0001991525517875668, "loss": 11.679, "step": 5972 }, { "epoch": 0.12503139914594322, "grad_norm": 0.3311753571033478, "learning_rate": 0.00019915226692723793, "loss": 11.6832, "step": 5973 }, { "epoch": 0.1250523319099054, "grad_norm": 0.20964953303337097, "learning_rate": 0.0001991519820192446, "loss": 11.686, "step": 5974 }, { "epoch": 0.12507326467386753, "grad_norm": 0.2750978469848633, "learning_rate": 0.0001991516970635869, "loss": 11.6563, "step": 5975 }, { "epoch": 0.12509419743782968, "grad_norm": 0.20300640165805817, "learning_rate": 0.00019915141206026498, "loss": 11.6775, "step": 5976 }, { "epoch": 0.12511513020179185, "grad_norm": 0.3022705018520355, "learning_rate": 0.000199151127009279, "loss": 11.6918, "step": 5977 }, { "epoch": 0.125136062965754, "grad_norm": 0.20056676864624023, "learning_rate": 0.0001991508419106291, "loss": 11.6809, "step": 5978 }, { "epoch": 0.12515699572971614, "grad_norm": 0.24070307612419128, "learning_rate": 0.00019915055676431539, "loss": 11.7024, "step": 5979 }, { "epoch": 0.12517792849367831, "grad_norm": 0.1796240359544754, "learning_rate": 0.00019915027157033804, "loss": 11.6686, "step": 5980 }, { "epoch": 0.12519886125764046, "grad_norm": 0.26428213715553284, "learning_rate": 0.00019914998632869716, "loss": 11.6905, "step": 5981 }, { "epoch": 0.1252197940216026, "grad_norm": 1.5822491645812988, "learning_rate": 0.00019914970103939288, "loss": 11.6792, "step": 5982 }, { "epoch": 0.12524072678556478, "grad_norm": 0.2259613275527954, "learning_rate": 0.00019914941570242536, "loss": 11.695, "step": 5983 }, { "epoch": 0.12526165954952692, "grad_norm": 0.26331618428230286, "learning_rate": 0.00019914913031779472, "loss": 11.6827, "step": 5984 }, { "epoch": 0.12528259231348907, "grad_norm": 0.2025557905435562, "learning_rate": 0.0001991488448855011, "loss": 11.6753, "step": 5985 }, { "epoch": 0.12530352507745124, "grad_norm": 0.2516809105873108, "learning_rate": 0.00019914855940554466, "loss": 11.7056, "step": 5986 }, { "epoch": 0.12532445784141338, "grad_norm": 0.20170274376869202, "learning_rate": 0.0001991482738779255, "loss": 11.6917, "step": 5987 }, { "epoch": 0.12534539060537553, "grad_norm": 0.24819058179855347, "learning_rate": 0.0001991479883026438, "loss": 11.676, "step": 5988 }, { "epoch": 0.1253663233693377, "grad_norm": 0.22622078657150269, "learning_rate": 0.0001991477026796997, "loss": 11.6776, "step": 5989 }, { "epoch": 0.12538725613329985, "grad_norm": 0.22361715137958527, "learning_rate": 0.00019914741700909327, "loss": 11.6835, "step": 5990 }, { "epoch": 0.125408188897262, "grad_norm": 0.25697991251945496, "learning_rate": 0.0001991471312908247, "loss": 11.6846, "step": 5991 }, { "epoch": 0.12542912166122414, "grad_norm": 0.2299281358718872, "learning_rate": 0.0001991468455248941, "loss": 11.6767, "step": 5992 }, { "epoch": 0.1254500544251863, "grad_norm": 0.22337397933006287, "learning_rate": 0.00019914655971130165, "loss": 11.6791, "step": 5993 }, { "epoch": 0.12547098718914845, "grad_norm": 0.2262052744626999, "learning_rate": 0.00019914627385004744, "loss": 11.6814, "step": 5994 }, { "epoch": 0.1254919199531106, "grad_norm": 0.2607121765613556, "learning_rate": 0.00019914598794113167, "loss": 11.6707, "step": 5995 }, { "epoch": 0.12551285271707277, "grad_norm": 0.3109212815761566, "learning_rate": 0.0001991457019845544, "loss": 11.6662, "step": 5996 }, { "epoch": 0.12553378548103492, "grad_norm": 0.23452995717525482, "learning_rate": 0.00019914541598031583, "loss": 11.6835, "step": 5997 }, { "epoch": 0.12555471824499706, "grad_norm": 0.23028504848480225, "learning_rate": 0.00019914512992841605, "loss": 11.6832, "step": 5998 }, { "epoch": 0.12557565100895923, "grad_norm": 0.2388565093278885, "learning_rate": 0.00019914484382885524, "loss": 11.6783, "step": 5999 }, { "epoch": 0.12559658377292138, "grad_norm": 0.249961256980896, "learning_rate": 0.0001991445576816335, "loss": 11.6839, "step": 6000 }, { "epoch": 0.12559658377292138, "eval_loss": 11.682429313659668, "eval_runtime": 34.3395, "eval_samples_per_second": 27.985, "eval_steps_per_second": 7.018, "step": 6000 }, { "epoch": 0.12561751653688352, "grad_norm": 0.2614938020706177, "learning_rate": 0.000199144271486751, "loss": 11.6664, "step": 6001 }, { "epoch": 0.1256384493008457, "grad_norm": 0.24070070683956146, "learning_rate": 0.00019914398524420782, "loss": 11.6923, "step": 6002 }, { "epoch": 0.12565938206480784, "grad_norm": 0.25814828276634216, "learning_rate": 0.00019914369895400417, "loss": 11.6779, "step": 6003 }, { "epoch": 0.12568031482876998, "grad_norm": 0.25718849897384644, "learning_rate": 0.00019914341261614016, "loss": 11.6866, "step": 6004 }, { "epoch": 0.12570124759273216, "grad_norm": 0.3634536862373352, "learning_rate": 0.0001991431262306159, "loss": 11.6952, "step": 6005 }, { "epoch": 0.1257221803566943, "grad_norm": 0.21307744085788727, "learning_rate": 0.00019914283979743157, "loss": 11.6793, "step": 6006 }, { "epoch": 0.12574311312065645, "grad_norm": 0.22061382234096527, "learning_rate": 0.00019914255331658732, "loss": 11.6676, "step": 6007 }, { "epoch": 0.1257640458846186, "grad_norm": 0.4342307448387146, "learning_rate": 0.00019914226678808321, "loss": 11.6998, "step": 6008 }, { "epoch": 0.12578497864858076, "grad_norm": 0.1990649402141571, "learning_rate": 0.00019914198021191945, "loss": 11.6629, "step": 6009 }, { "epoch": 0.1258059114125429, "grad_norm": 0.2813149094581604, "learning_rate": 0.00019914169358809615, "loss": 11.6882, "step": 6010 }, { "epoch": 0.12582684417650505, "grad_norm": 0.2386009246110916, "learning_rate": 0.00019914140691661345, "loss": 11.6758, "step": 6011 }, { "epoch": 0.12584777694046723, "grad_norm": 0.22806192934513092, "learning_rate": 0.0001991411201974715, "loss": 11.6892, "step": 6012 }, { "epoch": 0.12586870970442937, "grad_norm": 0.22556142508983612, "learning_rate": 0.0001991408334306704, "loss": 11.6854, "step": 6013 }, { "epoch": 0.12588964246839152, "grad_norm": 0.2307409942150116, "learning_rate": 0.00019914054661621035, "loss": 11.6813, "step": 6014 }, { "epoch": 0.1259105752323537, "grad_norm": 0.2094028890132904, "learning_rate": 0.0001991402597540914, "loss": 11.6635, "step": 6015 }, { "epoch": 0.12593150799631583, "grad_norm": 0.2589356601238251, "learning_rate": 0.0001991399728443138, "loss": 11.693, "step": 6016 }, { "epoch": 0.12595244076027798, "grad_norm": 0.20969991385936737, "learning_rate": 0.0001991396858868776, "loss": 11.6741, "step": 6017 }, { "epoch": 0.12597337352424015, "grad_norm": 0.24948382377624512, "learning_rate": 0.00019913939888178297, "loss": 11.6658, "step": 6018 }, { "epoch": 0.1259943062882023, "grad_norm": 0.19883665442466736, "learning_rate": 0.00019913911182903004, "loss": 11.6793, "step": 6019 }, { "epoch": 0.12601523905216444, "grad_norm": 0.19356036186218262, "learning_rate": 0.00019913882472861896, "loss": 11.6858, "step": 6020 }, { "epoch": 0.1260361718161266, "grad_norm": 0.19826021790504456, "learning_rate": 0.00019913853758054986, "loss": 11.6746, "step": 6021 }, { "epoch": 0.12605710458008876, "grad_norm": 0.2137065827846527, "learning_rate": 0.00019913825038482286, "loss": 11.6936, "step": 6022 }, { "epoch": 0.1260780373440509, "grad_norm": 0.18757221102714539, "learning_rate": 0.0001991379631414381, "loss": 11.6814, "step": 6023 }, { "epoch": 0.12609897010801308, "grad_norm": 0.20751376450061798, "learning_rate": 0.00019913767585039578, "loss": 11.6831, "step": 6024 }, { "epoch": 0.12611990287197522, "grad_norm": 0.22791846096515656, "learning_rate": 0.00019913738851169596, "loss": 11.6783, "step": 6025 }, { "epoch": 0.12614083563593737, "grad_norm": 0.29249268770217896, "learning_rate": 0.00019913710112533884, "loss": 11.6873, "step": 6026 }, { "epoch": 0.1261617683998995, "grad_norm": 0.21353568136692047, "learning_rate": 0.00019913681369132454, "loss": 11.6802, "step": 6027 }, { "epoch": 0.12618270116386168, "grad_norm": 0.30962905287742615, "learning_rate": 0.00019913652620965312, "loss": 11.684, "step": 6028 }, { "epoch": 0.12620363392782383, "grad_norm": 0.23758967220783234, "learning_rate": 0.00019913623868032484, "loss": 11.6853, "step": 6029 }, { "epoch": 0.12622456669178597, "grad_norm": 0.30995312333106995, "learning_rate": 0.00019913595110333977, "loss": 11.6883, "step": 6030 }, { "epoch": 0.12624549945574814, "grad_norm": 0.21933113038539886, "learning_rate": 0.00019913566347869809, "loss": 11.6737, "step": 6031 }, { "epoch": 0.1262664322197103, "grad_norm": 0.23803724348545074, "learning_rate": 0.00019913537580639986, "loss": 11.681, "step": 6032 }, { "epoch": 0.12628736498367243, "grad_norm": 0.20241865515708923, "learning_rate": 0.0001991350880864453, "loss": 11.675, "step": 6033 }, { "epoch": 0.1263082977476346, "grad_norm": 0.2523190975189209, "learning_rate": 0.00019913480031883451, "loss": 11.6849, "step": 6034 }, { "epoch": 0.12632923051159675, "grad_norm": 0.3020024597644806, "learning_rate": 0.00019913451250356761, "loss": 11.681, "step": 6035 }, { "epoch": 0.1263501632755589, "grad_norm": 0.2475856989622116, "learning_rate": 0.0001991342246406448, "loss": 11.6647, "step": 6036 }, { "epoch": 0.12637109603952107, "grad_norm": 0.20814311504364014, "learning_rate": 0.00019913393673006615, "loss": 11.6688, "step": 6037 }, { "epoch": 0.12639202880348321, "grad_norm": 0.2120485156774521, "learning_rate": 0.00019913364877183188, "loss": 11.6642, "step": 6038 }, { "epoch": 0.12641296156744536, "grad_norm": 0.22347766160964966, "learning_rate": 0.00019913336076594201, "loss": 11.6787, "step": 6039 }, { "epoch": 0.12643389433140753, "grad_norm": 0.2884739339351654, "learning_rate": 0.00019913307271239678, "loss": 11.6996, "step": 6040 }, { "epoch": 0.12645482709536968, "grad_norm": 0.2641514539718628, "learning_rate": 0.0001991327846111963, "loss": 11.6712, "step": 6041 }, { "epoch": 0.12647575985933182, "grad_norm": 0.237997367978096, "learning_rate": 0.00019913249646234072, "loss": 11.6812, "step": 6042 }, { "epoch": 0.12649669262329397, "grad_norm": 0.22703956067562103, "learning_rate": 0.00019913220826583015, "loss": 11.6765, "step": 6043 }, { "epoch": 0.12651762538725614, "grad_norm": 0.20476584136486053, "learning_rate": 0.00019913192002166472, "loss": 11.686, "step": 6044 }, { "epoch": 0.12653855815121828, "grad_norm": 0.2606447637081146, "learning_rate": 0.00019913163172984458, "loss": 11.6723, "step": 6045 }, { "epoch": 0.12655949091518043, "grad_norm": 0.27533602714538574, "learning_rate": 0.00019913134339036992, "loss": 11.6901, "step": 6046 }, { "epoch": 0.1265804236791426, "grad_norm": 0.20942223072052002, "learning_rate": 0.0001991310550032408, "loss": 11.693, "step": 6047 }, { "epoch": 0.12660135644310475, "grad_norm": 0.23118019104003906, "learning_rate": 0.00019913076656845742, "loss": 11.6737, "step": 6048 }, { "epoch": 0.1266222892070669, "grad_norm": 0.21055129170417786, "learning_rate": 0.00019913047808601988, "loss": 11.6994, "step": 6049 }, { "epoch": 0.12664322197102906, "grad_norm": 0.2772804796695709, "learning_rate": 0.00019913018955592835, "loss": 11.7074, "step": 6050 }, { "epoch": 0.1266641547349912, "grad_norm": 0.2311055064201355, "learning_rate": 0.00019912990097818294, "loss": 11.6728, "step": 6051 }, { "epoch": 0.12668508749895335, "grad_norm": 0.20775045454502106, "learning_rate": 0.0001991296123527838, "loss": 11.6837, "step": 6052 }, { "epoch": 0.12670602026291553, "grad_norm": 0.22657103836536407, "learning_rate": 0.00019912932367973106, "loss": 11.6704, "step": 6053 }, { "epoch": 0.12672695302687767, "grad_norm": 0.23100323975086212, "learning_rate": 0.0001991290349590249, "loss": 11.6792, "step": 6054 }, { "epoch": 0.12674788579083981, "grad_norm": 0.23886479437351227, "learning_rate": 0.00019912874619066538, "loss": 11.6968, "step": 6055 }, { "epoch": 0.126768818554802, "grad_norm": 0.23081037402153015, "learning_rate": 0.0001991284573746527, "loss": 11.6823, "step": 6056 }, { "epoch": 0.12678975131876413, "grad_norm": 0.24000944197177887, "learning_rate": 0.000199128168510987, "loss": 11.677, "step": 6057 }, { "epoch": 0.12681068408272628, "grad_norm": 0.2083359807729721, "learning_rate": 0.00019912787959966842, "loss": 11.6631, "step": 6058 }, { "epoch": 0.12683161684668845, "grad_norm": 0.2110956758260727, "learning_rate": 0.00019912759064069703, "loss": 11.6699, "step": 6059 }, { "epoch": 0.1268525496106506, "grad_norm": 0.2570720613002777, "learning_rate": 0.00019912730163407306, "loss": 11.683, "step": 6060 }, { "epoch": 0.12687348237461274, "grad_norm": 0.27629855275154114, "learning_rate": 0.0001991270125797966, "loss": 11.6793, "step": 6061 }, { "epoch": 0.12689441513857488, "grad_norm": 0.23865000903606415, "learning_rate": 0.00019912672347786777, "loss": 11.682, "step": 6062 }, { "epoch": 0.12691534790253706, "grad_norm": 0.24894489347934723, "learning_rate": 0.00019912643432828676, "loss": 11.6726, "step": 6063 }, { "epoch": 0.1269362806664992, "grad_norm": 0.33388593792915344, "learning_rate": 0.00019912614513105373, "loss": 11.6906, "step": 6064 }, { "epoch": 0.12695721343046135, "grad_norm": 0.27776089310646057, "learning_rate": 0.0001991258558861687, "loss": 11.686, "step": 6065 }, { "epoch": 0.12697814619442352, "grad_norm": 0.2128971666097641, "learning_rate": 0.00019912556659363193, "loss": 11.6719, "step": 6066 }, { "epoch": 0.12699907895838566, "grad_norm": 0.24871216714382172, "learning_rate": 0.00019912527725344349, "loss": 11.6663, "step": 6067 }, { "epoch": 0.1270200117223478, "grad_norm": 0.23937039077281952, "learning_rate": 0.0001991249878656036, "loss": 11.6985, "step": 6068 }, { "epoch": 0.12704094448630998, "grad_norm": 0.20873111486434937, "learning_rate": 0.0001991246984301123, "loss": 11.6659, "step": 6069 }, { "epoch": 0.12706187725027213, "grad_norm": 0.28836652636528015, "learning_rate": 0.00019912440894696973, "loss": 11.6827, "step": 6070 }, { "epoch": 0.12708281001423427, "grad_norm": 0.2797240614891052, "learning_rate": 0.0001991241194161761, "loss": 11.6954, "step": 6071 }, { "epoch": 0.12710374277819644, "grad_norm": 0.30931204557418823, "learning_rate": 0.00019912382983773154, "loss": 11.6919, "step": 6072 }, { "epoch": 0.1271246755421586, "grad_norm": 0.22530822455883026, "learning_rate": 0.00019912354021163616, "loss": 11.685, "step": 6073 }, { "epoch": 0.12714560830612073, "grad_norm": 0.26634112000465393, "learning_rate": 0.00019912325053789013, "loss": 11.6956, "step": 6074 }, { "epoch": 0.1271665410700829, "grad_norm": 0.18578214943408966, "learning_rate": 0.00019912296081649351, "loss": 11.6817, "step": 6075 }, { "epoch": 0.12718747383404505, "grad_norm": 0.29774099588394165, "learning_rate": 0.00019912267104744654, "loss": 11.6962, "step": 6076 }, { "epoch": 0.1272084065980072, "grad_norm": 0.23704937100410461, "learning_rate": 0.0001991223812307493, "loss": 11.6904, "step": 6077 }, { "epoch": 0.12722933936196937, "grad_norm": 0.1919310986995697, "learning_rate": 0.00019912209136640195, "loss": 11.6745, "step": 6078 }, { "epoch": 0.1272502721259315, "grad_norm": 0.19981522858142853, "learning_rate": 0.0001991218014544046, "loss": 11.6892, "step": 6079 }, { "epoch": 0.12727120488989366, "grad_norm": 0.23531627655029297, "learning_rate": 0.00019912151149475746, "loss": 11.6963, "step": 6080 }, { "epoch": 0.1272921376538558, "grad_norm": 0.24686655402183533, "learning_rate": 0.00019912122148746058, "loss": 11.6914, "step": 6081 }, { "epoch": 0.12731307041781798, "grad_norm": 0.2695409655570984, "learning_rate": 0.00019912093143251416, "loss": 11.6871, "step": 6082 }, { "epoch": 0.12733400318178012, "grad_norm": 0.32354187965393066, "learning_rate": 0.00019912064132991835, "loss": 11.6621, "step": 6083 }, { "epoch": 0.12735493594574226, "grad_norm": 0.2222321778535843, "learning_rate": 0.00019912035117967324, "loss": 11.666, "step": 6084 }, { "epoch": 0.12737586870970444, "grad_norm": 0.22796979546546936, "learning_rate": 0.00019912006098177898, "loss": 11.6922, "step": 6085 }, { "epoch": 0.12739680147366658, "grad_norm": 0.2235351949930191, "learning_rate": 0.0001991197707362357, "loss": 11.6869, "step": 6086 }, { "epoch": 0.12741773423762873, "grad_norm": 0.2759843170642853, "learning_rate": 0.00019911948044304357, "loss": 11.6829, "step": 6087 }, { "epoch": 0.1274386670015909, "grad_norm": 0.22809699177742004, "learning_rate": 0.00019911919010220274, "loss": 11.6857, "step": 6088 }, { "epoch": 0.12745959976555304, "grad_norm": 0.20252925157546997, "learning_rate": 0.00019911889971371334, "loss": 11.6766, "step": 6089 }, { "epoch": 0.1274805325295152, "grad_norm": 0.22017157077789307, "learning_rate": 0.0001991186092775755, "loss": 11.6938, "step": 6090 }, { "epoch": 0.12750146529347736, "grad_norm": 0.22980479896068573, "learning_rate": 0.00019911831879378933, "loss": 11.6845, "step": 6091 }, { "epoch": 0.1275223980574395, "grad_norm": 0.2060582935810089, "learning_rate": 0.000199118028262355, "loss": 11.6774, "step": 6092 }, { "epoch": 0.12754333082140165, "grad_norm": 0.22804813086986542, "learning_rate": 0.00019911773768327263, "loss": 11.6836, "step": 6093 }, { "epoch": 0.12756426358536382, "grad_norm": 0.26673901081085205, "learning_rate": 0.00019911744705654243, "loss": 11.6858, "step": 6094 }, { "epoch": 0.12758519634932597, "grad_norm": 0.19843339920043945, "learning_rate": 0.00019911715638216447, "loss": 11.6903, "step": 6095 }, { "epoch": 0.1276061291132881, "grad_norm": 0.19412878155708313, "learning_rate": 0.0001991168656601389, "loss": 11.6564, "step": 6096 }, { "epoch": 0.12762706187725026, "grad_norm": 0.20784947276115417, "learning_rate": 0.00019911657489046585, "loss": 11.6842, "step": 6097 }, { "epoch": 0.12764799464121243, "grad_norm": 0.2074889838695526, "learning_rate": 0.00019911628407314548, "loss": 11.6828, "step": 6098 }, { "epoch": 0.12766892740517458, "grad_norm": 0.21146227419376373, "learning_rate": 0.00019911599320817795, "loss": 11.6788, "step": 6099 }, { "epoch": 0.12768986016913672, "grad_norm": 0.23311695456504822, "learning_rate": 0.00019911570229556335, "loss": 11.6877, "step": 6100 }, { "epoch": 0.1277107929330989, "grad_norm": 0.27518323063850403, "learning_rate": 0.00019911541133530185, "loss": 11.674, "step": 6101 }, { "epoch": 0.12773172569706104, "grad_norm": 0.22851160168647766, "learning_rate": 0.0001991151203273936, "loss": 11.6837, "step": 6102 }, { "epoch": 0.12775265846102318, "grad_norm": 0.23177984356880188, "learning_rate": 0.00019911482927183872, "loss": 11.6943, "step": 6103 }, { "epoch": 0.12777359122498536, "grad_norm": 0.2500301003456116, "learning_rate": 0.00019911453816863733, "loss": 11.687, "step": 6104 }, { "epoch": 0.1277945239889475, "grad_norm": 0.19283248484134674, "learning_rate": 0.00019911424701778962, "loss": 11.6856, "step": 6105 }, { "epoch": 0.12781545675290965, "grad_norm": 0.32130342721939087, "learning_rate": 0.00019911395581929573, "loss": 11.6757, "step": 6106 }, { "epoch": 0.12783638951687182, "grad_norm": 0.37278327345848083, "learning_rate": 0.00019911366457315573, "loss": 11.6842, "step": 6107 }, { "epoch": 0.12785732228083396, "grad_norm": 0.21444423496723175, "learning_rate": 0.00019911337327936982, "loss": 11.6894, "step": 6108 }, { "epoch": 0.1278782550447961, "grad_norm": 0.2274257093667984, "learning_rate": 0.00019911308193793813, "loss": 11.675, "step": 6109 }, { "epoch": 0.12789918780875828, "grad_norm": 0.233145609498024, "learning_rate": 0.0001991127905488608, "loss": 11.6882, "step": 6110 }, { "epoch": 0.12792012057272042, "grad_norm": 0.35053718090057373, "learning_rate": 0.00019911249911213795, "loss": 11.6713, "step": 6111 }, { "epoch": 0.12794105333668257, "grad_norm": 0.22805562615394592, "learning_rate": 0.00019911220762776975, "loss": 11.6951, "step": 6112 }, { "epoch": 0.12796198610064474, "grad_norm": 0.2678529918193817, "learning_rate": 0.00019911191609575633, "loss": 11.6713, "step": 6113 }, { "epoch": 0.1279829188646069, "grad_norm": 0.28297922015190125, "learning_rate": 0.00019911162451609782, "loss": 11.6864, "step": 6114 }, { "epoch": 0.12800385162856903, "grad_norm": 0.22973129153251648, "learning_rate": 0.00019911133288879435, "loss": 11.6777, "step": 6115 }, { "epoch": 0.12802478439253118, "grad_norm": 0.21522660553455353, "learning_rate": 0.00019911104121384608, "loss": 11.686, "step": 6116 }, { "epoch": 0.12804571715649335, "grad_norm": 0.262146919965744, "learning_rate": 0.00019911074949125314, "loss": 11.6857, "step": 6117 }, { "epoch": 0.1280666499204555, "grad_norm": 0.21217580139636993, "learning_rate": 0.00019911045772101572, "loss": 11.6794, "step": 6118 }, { "epoch": 0.12808758268441764, "grad_norm": 0.23636466264724731, "learning_rate": 0.00019911016590313388, "loss": 11.6832, "step": 6119 }, { "epoch": 0.1281085154483798, "grad_norm": 0.24597735702991486, "learning_rate": 0.0001991098740376078, "loss": 11.6849, "step": 6120 }, { "epoch": 0.12812944821234196, "grad_norm": 0.25160127878189087, "learning_rate": 0.00019910958212443762, "loss": 11.6764, "step": 6121 }, { "epoch": 0.1281503809763041, "grad_norm": 0.22463172674179077, "learning_rate": 0.00019910929016362348, "loss": 11.6725, "step": 6122 }, { "epoch": 0.12817131374026627, "grad_norm": 0.20391519367694855, "learning_rate": 0.00019910899815516551, "loss": 11.6892, "step": 6123 }, { "epoch": 0.12819224650422842, "grad_norm": 0.26280859112739563, "learning_rate": 0.00019910870609906388, "loss": 11.6748, "step": 6124 }, { "epoch": 0.12821317926819056, "grad_norm": 0.19904688000679016, "learning_rate": 0.00019910841399531866, "loss": 11.6772, "step": 6125 }, { "epoch": 0.12823411203215274, "grad_norm": 0.24485519528388977, "learning_rate": 0.00019910812184393008, "loss": 11.678, "step": 6126 }, { "epoch": 0.12825504479611488, "grad_norm": 0.2771899700164795, "learning_rate": 0.00019910782964489824, "loss": 11.7015, "step": 6127 }, { "epoch": 0.12827597756007703, "grad_norm": 0.23244355618953705, "learning_rate": 0.00019910753739822327, "loss": 11.694, "step": 6128 }, { "epoch": 0.1282969103240392, "grad_norm": 0.17514997720718384, "learning_rate": 0.00019910724510390531, "loss": 11.6638, "step": 6129 }, { "epoch": 0.12831784308800134, "grad_norm": 0.19094526767730713, "learning_rate": 0.00019910695276194456, "loss": 11.6892, "step": 6130 }, { "epoch": 0.1283387758519635, "grad_norm": 0.305643230676651, "learning_rate": 0.00019910666037234107, "loss": 11.6693, "step": 6131 }, { "epoch": 0.12835970861592566, "grad_norm": 0.2347991168498993, "learning_rate": 0.00019910636793509502, "loss": 11.6823, "step": 6132 }, { "epoch": 0.1283806413798878, "grad_norm": 0.24535298347473145, "learning_rate": 0.00019910607545020658, "loss": 11.6708, "step": 6133 }, { "epoch": 0.12840157414384995, "grad_norm": 0.22953243553638458, "learning_rate": 0.00019910578291767582, "loss": 11.6786, "step": 6134 }, { "epoch": 0.1284225069078121, "grad_norm": 0.2361738085746765, "learning_rate": 0.00019910549033750295, "loss": 11.6868, "step": 6135 }, { "epoch": 0.12844343967177427, "grad_norm": 0.2058936506509781, "learning_rate": 0.0001991051977096881, "loss": 11.6814, "step": 6136 }, { "epoch": 0.1284643724357364, "grad_norm": 0.23471929132938385, "learning_rate": 0.0001991049050342314, "loss": 11.6854, "step": 6137 }, { "epoch": 0.12848530519969856, "grad_norm": 0.3133462369441986, "learning_rate": 0.00019910461231113295, "loss": 11.6826, "step": 6138 }, { "epoch": 0.12850623796366073, "grad_norm": 0.254781037569046, "learning_rate": 0.00019910431954039295, "loss": 11.6896, "step": 6139 }, { "epoch": 0.12852717072762287, "grad_norm": 0.20655618607997894, "learning_rate": 0.00019910402672201148, "loss": 11.6839, "step": 6140 }, { "epoch": 0.12854810349158502, "grad_norm": 0.23383623361587524, "learning_rate": 0.00019910373385598876, "loss": 11.6857, "step": 6141 }, { "epoch": 0.1285690362555472, "grad_norm": 0.2231275588274002, "learning_rate": 0.00019910344094232487, "loss": 11.6858, "step": 6142 }, { "epoch": 0.12858996901950934, "grad_norm": 0.23463189601898193, "learning_rate": 0.00019910314798102, "loss": 11.6867, "step": 6143 }, { "epoch": 0.12861090178347148, "grad_norm": 0.2073572874069214, "learning_rate": 0.00019910285497207425, "loss": 11.6809, "step": 6144 }, { "epoch": 0.12863183454743365, "grad_norm": 0.2980562448501587, "learning_rate": 0.00019910256191548774, "loss": 11.6848, "step": 6145 }, { "epoch": 0.1286527673113958, "grad_norm": 0.1986197531223297, "learning_rate": 0.00019910226881126065, "loss": 11.6818, "step": 6146 }, { "epoch": 0.12867370007535794, "grad_norm": 0.2636845111846924, "learning_rate": 0.00019910197565939312, "loss": 11.6773, "step": 6147 }, { "epoch": 0.12869463283932012, "grad_norm": 0.2715630531311035, "learning_rate": 0.0001991016824598853, "loss": 11.7063, "step": 6148 }, { "epoch": 0.12871556560328226, "grad_norm": 0.2528342008590698, "learning_rate": 0.00019910138921273732, "loss": 11.6777, "step": 6149 }, { "epoch": 0.1287364983672444, "grad_norm": 0.2467769980430603, "learning_rate": 0.0001991010959179493, "loss": 11.6927, "step": 6150 }, { "epoch": 0.12875743113120655, "grad_norm": 0.2904030680656433, "learning_rate": 0.00019910080257552138, "loss": 11.6857, "step": 6151 }, { "epoch": 0.12877836389516872, "grad_norm": 0.2232300043106079, "learning_rate": 0.00019910050918545374, "loss": 11.6621, "step": 6152 }, { "epoch": 0.12879929665913087, "grad_norm": 0.20801614224910736, "learning_rate": 0.00019910021574774648, "loss": 11.6856, "step": 6153 }, { "epoch": 0.128820229423093, "grad_norm": 0.19551189243793488, "learning_rate": 0.0001990999222623998, "loss": 11.6712, "step": 6154 }, { "epoch": 0.12884116218705519, "grad_norm": 0.2449140101671219, "learning_rate": 0.00019909962872941377, "loss": 11.6744, "step": 6155 }, { "epoch": 0.12886209495101733, "grad_norm": 0.22237543761730194, "learning_rate": 0.00019909933514878855, "loss": 11.6844, "step": 6156 }, { "epoch": 0.12888302771497948, "grad_norm": 0.24530059099197388, "learning_rate": 0.00019909904152052432, "loss": 11.6643, "step": 6157 }, { "epoch": 0.12890396047894165, "grad_norm": 0.24969297647476196, "learning_rate": 0.0001990987478446212, "loss": 11.6963, "step": 6158 }, { "epoch": 0.1289248932429038, "grad_norm": 0.2478596717119217, "learning_rate": 0.0001990984541210793, "loss": 11.678, "step": 6159 }, { "epoch": 0.12894582600686594, "grad_norm": 0.34009459614753723, "learning_rate": 0.00019909816034989882, "loss": 11.6908, "step": 6160 }, { "epoch": 0.1289667587708281, "grad_norm": 0.28933286666870117, "learning_rate": 0.00019909786653107982, "loss": 11.6854, "step": 6161 }, { "epoch": 0.12898769153479026, "grad_norm": 0.2741210460662842, "learning_rate": 0.00019909757266462253, "loss": 11.6876, "step": 6162 }, { "epoch": 0.1290086242987524, "grad_norm": 0.2272779792547226, "learning_rate": 0.00019909727875052705, "loss": 11.6756, "step": 6163 }, { "epoch": 0.12902955706271457, "grad_norm": 0.22977697849273682, "learning_rate": 0.00019909698478879351, "loss": 11.6667, "step": 6164 }, { "epoch": 0.12905048982667672, "grad_norm": 0.24715346097946167, "learning_rate": 0.00019909669077942206, "loss": 11.6895, "step": 6165 }, { "epoch": 0.12907142259063886, "grad_norm": 0.23722562193870544, "learning_rate": 0.00019909639672241285, "loss": 11.6828, "step": 6166 }, { "epoch": 0.12909235535460103, "grad_norm": 0.2537432312965393, "learning_rate": 0.00019909610261776602, "loss": 11.6728, "step": 6167 }, { "epoch": 0.12911328811856318, "grad_norm": 0.23787276446819305, "learning_rate": 0.0001990958084654817, "loss": 11.6866, "step": 6168 }, { "epoch": 0.12913422088252532, "grad_norm": 0.22696471214294434, "learning_rate": 0.00019909551426556003, "loss": 11.6919, "step": 6169 }, { "epoch": 0.12915515364648747, "grad_norm": 0.23085463047027588, "learning_rate": 0.00019909522001800118, "loss": 11.6813, "step": 6170 }, { "epoch": 0.12917608641044964, "grad_norm": 0.22875221073627472, "learning_rate": 0.00019909492572280527, "loss": 11.6846, "step": 6171 }, { "epoch": 0.1291970191744118, "grad_norm": 0.21732698380947113, "learning_rate": 0.00019909463137997245, "loss": 11.6842, "step": 6172 }, { "epoch": 0.12921795193837393, "grad_norm": 0.26654958724975586, "learning_rate": 0.00019909433698950285, "loss": 11.7035, "step": 6173 }, { "epoch": 0.1292388847023361, "grad_norm": 0.2344515472650528, "learning_rate": 0.0001990940425513966, "loss": 11.6854, "step": 6174 }, { "epoch": 0.12925981746629825, "grad_norm": 0.24608801305294037, "learning_rate": 0.00019909374806565387, "loss": 11.6925, "step": 6175 }, { "epoch": 0.1292807502302604, "grad_norm": 0.2201344519853592, "learning_rate": 0.00019909345353227476, "loss": 11.6725, "step": 6176 }, { "epoch": 0.12930168299422257, "grad_norm": 0.22989030182361603, "learning_rate": 0.00019909315895125948, "loss": 11.6708, "step": 6177 }, { "epoch": 0.1293226157581847, "grad_norm": 0.2296871691942215, "learning_rate": 0.0001990928643226081, "loss": 11.6745, "step": 6178 }, { "epoch": 0.12934354852214686, "grad_norm": 0.2823159098625183, "learning_rate": 0.00019909256964632083, "loss": 11.6887, "step": 6179 }, { "epoch": 0.12936448128610903, "grad_norm": 0.26017165184020996, "learning_rate": 0.00019909227492239778, "loss": 11.6907, "step": 6180 }, { "epoch": 0.12938541405007117, "grad_norm": 0.22922497987747192, "learning_rate": 0.00019909198015083908, "loss": 11.663, "step": 6181 }, { "epoch": 0.12940634681403332, "grad_norm": 0.18738292157649994, "learning_rate": 0.00019909168533164485, "loss": 11.6755, "step": 6182 }, { "epoch": 0.1294272795779955, "grad_norm": 0.26407384872436523, "learning_rate": 0.00019909139046481527, "loss": 11.6827, "step": 6183 }, { "epoch": 0.12944821234195764, "grad_norm": 0.20406553149223328, "learning_rate": 0.0001990910955503505, "loss": 11.6743, "step": 6184 }, { "epoch": 0.12946914510591978, "grad_norm": 0.23400019109249115, "learning_rate": 0.00019909080058825063, "loss": 11.685, "step": 6185 }, { "epoch": 0.12949007786988193, "grad_norm": 0.3174911439418793, "learning_rate": 0.00019909050557851584, "loss": 11.6651, "step": 6186 }, { "epoch": 0.1295110106338441, "grad_norm": 0.24949882924556732, "learning_rate": 0.00019909021052114624, "loss": 11.6691, "step": 6187 }, { "epoch": 0.12953194339780624, "grad_norm": 0.2249019742012024, "learning_rate": 0.00019908991541614198, "loss": 11.6854, "step": 6188 }, { "epoch": 0.1295528761617684, "grad_norm": 0.2649982273578644, "learning_rate": 0.00019908962026350327, "loss": 11.6939, "step": 6189 }, { "epoch": 0.12957380892573056, "grad_norm": 0.2859746217727661, "learning_rate": 0.00019908932506323014, "loss": 11.7083, "step": 6190 }, { "epoch": 0.1295947416896927, "grad_norm": 0.22656114399433136, "learning_rate": 0.0001990890298153228, "loss": 11.6887, "step": 6191 }, { "epoch": 0.12961567445365485, "grad_norm": 0.2813858985900879, "learning_rate": 0.00019908873451978139, "loss": 11.6819, "step": 6192 }, { "epoch": 0.12963660721761702, "grad_norm": 0.21678680181503296, "learning_rate": 0.000199088439176606, "loss": 11.6835, "step": 6193 }, { "epoch": 0.12965753998157917, "grad_norm": 0.2378385365009308, "learning_rate": 0.00019908814378579684, "loss": 11.6982, "step": 6194 }, { "epoch": 0.1296784727455413, "grad_norm": 0.23049795627593994, "learning_rate": 0.00019908784834735404, "loss": 11.6736, "step": 6195 }, { "epoch": 0.12969940550950348, "grad_norm": 0.2968483567237854, "learning_rate": 0.00019908755286127773, "loss": 11.6794, "step": 6196 }, { "epoch": 0.12972033827346563, "grad_norm": 0.26273173093795776, "learning_rate": 0.000199087257327568, "loss": 11.6878, "step": 6197 }, { "epoch": 0.12974127103742777, "grad_norm": 0.24747087061405182, "learning_rate": 0.00019908696174622507, "loss": 11.6798, "step": 6198 }, { "epoch": 0.12976220380138995, "grad_norm": 0.23159068822860718, "learning_rate": 0.00019908666611724907, "loss": 11.6894, "step": 6199 }, { "epoch": 0.1297831365653521, "grad_norm": 0.23818470537662506, "learning_rate": 0.0001990863704406401, "loss": 11.675, "step": 6200 }, { "epoch": 0.12980406932931424, "grad_norm": 0.23294241726398468, "learning_rate": 0.00019908607471639834, "loss": 11.6698, "step": 6201 }, { "epoch": 0.1298250020932764, "grad_norm": 0.25774645805358887, "learning_rate": 0.00019908577894452387, "loss": 11.6702, "step": 6202 }, { "epoch": 0.12984593485723855, "grad_norm": 0.2319793403148651, "learning_rate": 0.00019908548312501693, "loss": 11.6837, "step": 6203 }, { "epoch": 0.1298668676212007, "grad_norm": 0.2194482833147049, "learning_rate": 0.0001990851872578776, "loss": 11.6653, "step": 6204 }, { "epoch": 0.12988780038516284, "grad_norm": 0.2426738142967224, "learning_rate": 0.00019908489134310605, "loss": 11.6868, "step": 6205 }, { "epoch": 0.12990873314912502, "grad_norm": 0.2009686976671219, "learning_rate": 0.00019908459538070237, "loss": 11.6671, "step": 6206 }, { "epoch": 0.12992966591308716, "grad_norm": 0.1859605759382248, "learning_rate": 0.00019908429937066677, "loss": 11.6754, "step": 6207 }, { "epoch": 0.1299505986770493, "grad_norm": 0.3201472759246826, "learning_rate": 0.00019908400331299935, "loss": 11.7005, "step": 6208 }, { "epoch": 0.12997153144101148, "grad_norm": 0.29635563492774963, "learning_rate": 0.00019908370720770027, "loss": 11.683, "step": 6209 }, { "epoch": 0.12999246420497362, "grad_norm": 0.24471907317638397, "learning_rate": 0.00019908341105476964, "loss": 11.6752, "step": 6210 }, { "epoch": 0.13001339696893577, "grad_norm": 0.2247561514377594, "learning_rate": 0.00019908311485420765, "loss": 11.6794, "step": 6211 }, { "epoch": 0.13003432973289794, "grad_norm": 0.2985564172267914, "learning_rate": 0.00019908281860601444, "loss": 11.6937, "step": 6212 }, { "epoch": 0.13005526249686009, "grad_norm": 0.33241933584213257, "learning_rate": 0.0001990825223101901, "loss": 11.6948, "step": 6213 }, { "epoch": 0.13007619526082223, "grad_norm": 0.24835318326950073, "learning_rate": 0.00019908222596673483, "loss": 11.6726, "step": 6214 }, { "epoch": 0.1300971280247844, "grad_norm": 0.20306961238384247, "learning_rate": 0.00019908192957564876, "loss": 11.6772, "step": 6215 }, { "epoch": 0.13011806078874655, "grad_norm": 0.23122335970401764, "learning_rate": 0.00019908163313693197, "loss": 11.6753, "step": 6216 }, { "epoch": 0.1301389935527087, "grad_norm": 0.23997065424919128, "learning_rate": 0.0001990813366505847, "loss": 11.6749, "step": 6217 }, { "epoch": 0.13015992631667087, "grad_norm": 0.2571246027946472, "learning_rate": 0.00019908104011660704, "loss": 11.6782, "step": 6218 }, { "epoch": 0.130180859080633, "grad_norm": 0.20769082009792328, "learning_rate": 0.00019908074353499914, "loss": 11.6624, "step": 6219 }, { "epoch": 0.13020179184459515, "grad_norm": 0.23589050769805908, "learning_rate": 0.00019908044690576112, "loss": 11.6738, "step": 6220 }, { "epoch": 0.13022272460855733, "grad_norm": 0.21840524673461914, "learning_rate": 0.00019908015022889315, "loss": 11.6842, "step": 6221 }, { "epoch": 0.13024365737251947, "grad_norm": 0.2216757982969284, "learning_rate": 0.00019907985350439534, "loss": 11.6722, "step": 6222 }, { "epoch": 0.13026459013648162, "grad_norm": 0.3356364369392395, "learning_rate": 0.0001990795567322679, "loss": 11.6912, "step": 6223 }, { "epoch": 0.13028552290044376, "grad_norm": 0.21588343381881714, "learning_rate": 0.00019907925991251093, "loss": 11.6815, "step": 6224 }, { "epoch": 0.13030645566440593, "grad_norm": 0.2101050466299057, "learning_rate": 0.00019907896304512457, "loss": 11.6891, "step": 6225 }, { "epoch": 0.13032738842836808, "grad_norm": 0.23065336048603058, "learning_rate": 0.00019907866613010894, "loss": 11.6702, "step": 6226 }, { "epoch": 0.13034832119233022, "grad_norm": 0.22400882840156555, "learning_rate": 0.00019907836916746425, "loss": 11.6969, "step": 6227 }, { "epoch": 0.1303692539562924, "grad_norm": 0.18877601623535156, "learning_rate": 0.0001990780721571906, "loss": 11.6678, "step": 6228 }, { "epoch": 0.13039018672025454, "grad_norm": 0.23476508259773254, "learning_rate": 0.0001990777750992881, "loss": 11.6869, "step": 6229 }, { "epoch": 0.1304111194842167, "grad_norm": 0.27899765968322754, "learning_rate": 0.00019907747799375694, "loss": 11.696, "step": 6230 }, { "epoch": 0.13043205224817886, "grad_norm": 0.2361874133348465, "learning_rate": 0.00019907718084059725, "loss": 11.6738, "step": 6231 }, { "epoch": 0.130452985012141, "grad_norm": 0.22866864502429962, "learning_rate": 0.00019907688363980918, "loss": 11.6854, "step": 6232 }, { "epoch": 0.13047391777610315, "grad_norm": 0.22823435068130493, "learning_rate": 0.00019907658639139287, "loss": 11.6835, "step": 6233 }, { "epoch": 0.13049485054006532, "grad_norm": 0.2462228238582611, "learning_rate": 0.00019907628909534845, "loss": 11.6809, "step": 6234 }, { "epoch": 0.13051578330402747, "grad_norm": 0.23809221386909485, "learning_rate": 0.00019907599175167612, "loss": 11.6851, "step": 6235 }, { "epoch": 0.1305367160679896, "grad_norm": 0.3316090404987335, "learning_rate": 0.0001990756943603759, "loss": 11.6914, "step": 6236 }, { "epoch": 0.13055764883195178, "grad_norm": 0.19693376123905182, "learning_rate": 0.00019907539692144805, "loss": 11.6934, "step": 6237 }, { "epoch": 0.13057858159591393, "grad_norm": 0.2171991914510727, "learning_rate": 0.00019907509943489267, "loss": 11.6948, "step": 6238 }, { "epoch": 0.13059951435987607, "grad_norm": 0.2555280923843384, "learning_rate": 0.00019907480190070988, "loss": 11.6845, "step": 6239 }, { "epoch": 0.13062044712383822, "grad_norm": 0.2574038803577423, "learning_rate": 0.00019907450431889986, "loss": 11.6875, "step": 6240 }, { "epoch": 0.1306413798878004, "grad_norm": 0.20522662997245789, "learning_rate": 0.00019907420668946276, "loss": 11.6902, "step": 6241 }, { "epoch": 0.13066231265176254, "grad_norm": 0.2289476990699768, "learning_rate": 0.0001990739090123987, "loss": 11.6876, "step": 6242 }, { "epoch": 0.13068324541572468, "grad_norm": 0.24957159161567688, "learning_rate": 0.0001990736112877078, "loss": 11.6812, "step": 6243 }, { "epoch": 0.13070417817968685, "grad_norm": 0.24429024755954742, "learning_rate": 0.00019907331351539024, "loss": 11.6884, "step": 6244 }, { "epoch": 0.130725110943649, "grad_norm": 0.24743421375751495, "learning_rate": 0.00019907301569544616, "loss": 11.6886, "step": 6245 }, { "epoch": 0.13074604370761114, "grad_norm": 0.23627056181430817, "learning_rate": 0.00019907271782787568, "loss": 11.668, "step": 6246 }, { "epoch": 0.13076697647157332, "grad_norm": 0.24100542068481445, "learning_rate": 0.000199072419912679, "loss": 11.6695, "step": 6247 }, { "epoch": 0.13078790923553546, "grad_norm": 0.24492527544498444, "learning_rate": 0.00019907212194985617, "loss": 11.6764, "step": 6248 }, { "epoch": 0.1308088419994976, "grad_norm": 0.2655044496059418, "learning_rate": 0.00019907182393940742, "loss": 11.67, "step": 6249 }, { "epoch": 0.13082977476345978, "grad_norm": 0.18230955302715302, "learning_rate": 0.00019907152588133283, "loss": 11.6771, "step": 6250 }, { "epoch": 0.13085070752742192, "grad_norm": 0.2248503565788269, "learning_rate": 0.0001990712277756326, "loss": 11.7073, "step": 6251 }, { "epoch": 0.13087164029138407, "grad_norm": 0.25543734431266785, "learning_rate": 0.00019907092962230684, "loss": 11.6589, "step": 6252 }, { "epoch": 0.13089257305534624, "grad_norm": 0.24869567155838013, "learning_rate": 0.0001990706314213557, "loss": 11.6826, "step": 6253 }, { "epoch": 0.13091350581930838, "grad_norm": 0.218894362449646, "learning_rate": 0.00019907033317277933, "loss": 11.6844, "step": 6254 }, { "epoch": 0.13093443858327053, "grad_norm": 0.3091304898262024, "learning_rate": 0.00019907003487657784, "loss": 11.6913, "step": 6255 }, { "epoch": 0.1309553713472327, "grad_norm": 0.23281194269657135, "learning_rate": 0.00019906973653275142, "loss": 11.6772, "step": 6256 }, { "epoch": 0.13097630411119485, "grad_norm": 0.21932612359523773, "learning_rate": 0.00019906943814130017, "loss": 11.6661, "step": 6257 }, { "epoch": 0.130997236875157, "grad_norm": 0.26287105679512024, "learning_rate": 0.00019906913970222428, "loss": 11.6972, "step": 6258 }, { "epoch": 0.13101816963911914, "grad_norm": 0.24170741438865662, "learning_rate": 0.00019906884121552386, "loss": 11.6828, "step": 6259 }, { "epoch": 0.1310391024030813, "grad_norm": 0.27559709548950195, "learning_rate": 0.00019906854268119905, "loss": 11.6844, "step": 6260 }, { "epoch": 0.13106003516704345, "grad_norm": 0.2296387404203415, "learning_rate": 0.00019906824409925, "loss": 11.6932, "step": 6261 }, { "epoch": 0.1310809679310056, "grad_norm": 0.25474265217781067, "learning_rate": 0.0001990679454696769, "loss": 11.6819, "step": 6262 }, { "epoch": 0.13110190069496777, "grad_norm": 0.2718814015388489, "learning_rate": 0.00019906764679247983, "loss": 11.6854, "step": 6263 }, { "epoch": 0.13112283345892992, "grad_norm": 0.2754584848880768, "learning_rate": 0.00019906734806765893, "loss": 11.6666, "step": 6264 }, { "epoch": 0.13114376622289206, "grad_norm": 0.24426326155662537, "learning_rate": 0.00019906704929521443, "loss": 11.689, "step": 6265 }, { "epoch": 0.13116469898685423, "grad_norm": 0.25348255038261414, "learning_rate": 0.00019906675047514635, "loss": 11.6774, "step": 6266 }, { "epoch": 0.13118563175081638, "grad_norm": 0.234542116522789, "learning_rate": 0.00019906645160745496, "loss": 11.6824, "step": 6267 }, { "epoch": 0.13120656451477852, "grad_norm": 0.2010740339756012, "learning_rate": 0.00019906615269214028, "loss": 11.6828, "step": 6268 }, { "epoch": 0.1312274972787407, "grad_norm": 0.2054675966501236, "learning_rate": 0.00019906585372920257, "loss": 11.6809, "step": 6269 }, { "epoch": 0.13124843004270284, "grad_norm": 0.1849612444639206, "learning_rate": 0.00019906555471864188, "loss": 11.6809, "step": 6270 }, { "epoch": 0.13126936280666499, "grad_norm": 0.26022788882255554, "learning_rate": 0.0001990652556604584, "loss": 11.6843, "step": 6271 }, { "epoch": 0.13129029557062716, "grad_norm": 0.3057040870189667, "learning_rate": 0.0001990649565546523, "loss": 11.6822, "step": 6272 }, { "epoch": 0.1313112283345893, "grad_norm": 0.23931260406970978, "learning_rate": 0.00019906465740122364, "loss": 11.6839, "step": 6273 }, { "epoch": 0.13133216109855145, "grad_norm": 0.19643084704875946, "learning_rate": 0.00019906435820017264, "loss": 11.6848, "step": 6274 }, { "epoch": 0.1313530938625136, "grad_norm": 0.22611503303050995, "learning_rate": 0.00019906405895149943, "loss": 11.6796, "step": 6275 }, { "epoch": 0.13137402662647577, "grad_norm": 0.2595960199832916, "learning_rate": 0.0001990637596552041, "loss": 11.667, "step": 6276 }, { "epoch": 0.1313949593904379, "grad_norm": 0.23809510469436646, "learning_rate": 0.0001990634603112869, "loss": 11.6808, "step": 6277 }, { "epoch": 0.13141589215440005, "grad_norm": 0.21462158858776093, "learning_rate": 0.00019906316091974785, "loss": 11.6724, "step": 6278 }, { "epoch": 0.13143682491836223, "grad_norm": 0.26891306042671204, "learning_rate": 0.00019906286148058718, "loss": 11.6868, "step": 6279 }, { "epoch": 0.13145775768232437, "grad_norm": 0.21296723186969757, "learning_rate": 0.000199062561993805, "loss": 11.6741, "step": 6280 }, { "epoch": 0.13147869044628652, "grad_norm": 0.2650008499622345, "learning_rate": 0.00019906226245940147, "loss": 11.6772, "step": 6281 }, { "epoch": 0.1314996232102487, "grad_norm": 0.24425970017910004, "learning_rate": 0.0001990619628773767, "loss": 11.6942, "step": 6282 }, { "epoch": 0.13152055597421083, "grad_norm": 0.19029220938682556, "learning_rate": 0.0001990616632477309, "loss": 11.6753, "step": 6283 }, { "epoch": 0.13154148873817298, "grad_norm": 0.2079734206199646, "learning_rate": 0.00019906136357046415, "loss": 11.6747, "step": 6284 }, { "epoch": 0.13156242150213515, "grad_norm": 0.2754921317100525, "learning_rate": 0.0001990610638455766, "loss": 11.6823, "step": 6285 }, { "epoch": 0.1315833542660973, "grad_norm": 0.26915156841278076, "learning_rate": 0.00019906076407306844, "loss": 11.6718, "step": 6286 }, { "epoch": 0.13160428703005944, "grad_norm": 0.25011637806892395, "learning_rate": 0.00019906046425293978, "loss": 11.6801, "step": 6287 }, { "epoch": 0.13162521979402161, "grad_norm": 0.29416099190711975, "learning_rate": 0.00019906016438519077, "loss": 11.6794, "step": 6288 }, { "epoch": 0.13164615255798376, "grad_norm": 0.2800239622592926, "learning_rate": 0.00019905986446982154, "loss": 11.6863, "step": 6289 }, { "epoch": 0.1316670853219459, "grad_norm": 0.29055994749069214, "learning_rate": 0.00019905956450683228, "loss": 11.6737, "step": 6290 }, { "epoch": 0.13168801808590808, "grad_norm": 0.25144028663635254, "learning_rate": 0.00019905926449622304, "loss": 11.6731, "step": 6291 }, { "epoch": 0.13170895084987022, "grad_norm": 0.24063684046268463, "learning_rate": 0.00019905896443799407, "loss": 11.6626, "step": 6292 }, { "epoch": 0.13172988361383237, "grad_norm": 0.2634398937225342, "learning_rate": 0.00019905866433214548, "loss": 11.6755, "step": 6293 }, { "epoch": 0.1317508163777945, "grad_norm": 0.2338569313287735, "learning_rate": 0.0001990583641786774, "loss": 11.6721, "step": 6294 }, { "epoch": 0.13177174914175668, "grad_norm": 0.22639258205890656, "learning_rate": 0.00019905806397758994, "loss": 11.6816, "step": 6295 }, { "epoch": 0.13179268190571883, "grad_norm": 0.235878586769104, "learning_rate": 0.00019905776372888335, "loss": 11.6979, "step": 6296 }, { "epoch": 0.13181361466968097, "grad_norm": 0.23971237242221832, "learning_rate": 0.00019905746343255767, "loss": 11.6936, "step": 6297 }, { "epoch": 0.13183454743364315, "grad_norm": 0.21827332675457, "learning_rate": 0.00019905716308861305, "loss": 11.6915, "step": 6298 }, { "epoch": 0.1318554801976053, "grad_norm": 0.2220945507287979, "learning_rate": 0.0001990568626970497, "loss": 11.6751, "step": 6299 }, { "epoch": 0.13187641296156744, "grad_norm": 0.26149919629096985, "learning_rate": 0.00019905656225786776, "loss": 11.6891, "step": 6300 }, { "epoch": 0.1318973457255296, "grad_norm": 0.3089776635169983, "learning_rate": 0.0001990562617710673, "loss": 11.6871, "step": 6301 }, { "epoch": 0.13191827848949175, "grad_norm": 0.2659865915775299, "learning_rate": 0.00019905596123664853, "loss": 11.678, "step": 6302 }, { "epoch": 0.1319392112534539, "grad_norm": 0.2134379744529724, "learning_rate": 0.0001990556606546116, "loss": 11.6856, "step": 6303 }, { "epoch": 0.13196014401741607, "grad_norm": 0.20870286226272583, "learning_rate": 0.00019905536002495658, "loss": 11.6704, "step": 6304 }, { "epoch": 0.13198107678137821, "grad_norm": 0.27805185317993164, "learning_rate": 0.00019905505934768368, "loss": 11.6853, "step": 6305 }, { "epoch": 0.13200200954534036, "grad_norm": 0.25831079483032227, "learning_rate": 0.00019905475862279302, "loss": 11.6526, "step": 6306 }, { "epoch": 0.13202294230930253, "grad_norm": 0.20993630588054657, "learning_rate": 0.00019905445785028478, "loss": 11.6983, "step": 6307 }, { "epoch": 0.13204387507326468, "grad_norm": 0.23349207639694214, "learning_rate": 0.00019905415703015903, "loss": 11.6899, "step": 6308 }, { "epoch": 0.13206480783722682, "grad_norm": 0.24591775238513947, "learning_rate": 0.000199053856162416, "loss": 11.6497, "step": 6309 }, { "epoch": 0.132085740601189, "grad_norm": 0.24677492678165436, "learning_rate": 0.0001990535552470558, "loss": 11.6844, "step": 6310 }, { "epoch": 0.13210667336515114, "grad_norm": 0.24845196306705475, "learning_rate": 0.00019905325428407855, "loss": 11.6906, "step": 6311 }, { "epoch": 0.13212760612911328, "grad_norm": 0.2380903959274292, "learning_rate": 0.00019905295327348445, "loss": 11.6774, "step": 6312 }, { "epoch": 0.13214853889307543, "grad_norm": 0.3158545196056366, "learning_rate": 0.00019905265221527356, "loss": 11.6704, "step": 6313 }, { "epoch": 0.1321694716570376, "grad_norm": 0.21436256170272827, "learning_rate": 0.00019905235110944611, "loss": 11.6784, "step": 6314 }, { "epoch": 0.13219040442099975, "grad_norm": 0.2514681816101074, "learning_rate": 0.0001990520499560022, "loss": 11.6831, "step": 6315 }, { "epoch": 0.1322113371849619, "grad_norm": 0.227645143866539, "learning_rate": 0.00019905174875494198, "loss": 11.6654, "step": 6316 }, { "epoch": 0.13223226994892406, "grad_norm": 0.34813907742500305, "learning_rate": 0.0001990514475062656, "loss": 11.6919, "step": 6317 }, { "epoch": 0.1322532027128862, "grad_norm": 0.21892446279525757, "learning_rate": 0.0001990511462099732, "loss": 11.6805, "step": 6318 }, { "epoch": 0.13227413547684835, "grad_norm": 0.22798243165016174, "learning_rate": 0.00019905084486606493, "loss": 11.671, "step": 6319 }, { "epoch": 0.13229506824081053, "grad_norm": 0.2789142429828644, "learning_rate": 0.00019905054347454096, "loss": 11.6854, "step": 6320 }, { "epoch": 0.13231600100477267, "grad_norm": 0.27895089983940125, "learning_rate": 0.00019905024203540136, "loss": 11.6785, "step": 6321 }, { "epoch": 0.13233693376873482, "grad_norm": 0.25275346636772156, "learning_rate": 0.0001990499405486464, "loss": 11.6773, "step": 6322 }, { "epoch": 0.132357866532697, "grad_norm": 0.22065556049346924, "learning_rate": 0.00019904963901427606, "loss": 11.6715, "step": 6323 }, { "epoch": 0.13237879929665913, "grad_norm": 0.24761620163917542, "learning_rate": 0.00019904933743229063, "loss": 11.6816, "step": 6324 }, { "epoch": 0.13239973206062128, "grad_norm": 0.29054877161979675, "learning_rate": 0.00019904903580269018, "loss": 11.6862, "step": 6325 }, { "epoch": 0.13242066482458345, "grad_norm": 0.26232513785362244, "learning_rate": 0.00019904873412547488, "loss": 11.6916, "step": 6326 }, { "epoch": 0.1324415975885456, "grad_norm": 0.8221006989479065, "learning_rate": 0.00019904843240064487, "loss": 11.6427, "step": 6327 }, { "epoch": 0.13246253035250774, "grad_norm": 0.23464879393577576, "learning_rate": 0.0001990481306282003, "loss": 11.6889, "step": 6328 }, { "epoch": 0.13248346311646989, "grad_norm": 0.2658486068248749, "learning_rate": 0.0001990478288081413, "loss": 11.6907, "step": 6329 }, { "epoch": 0.13250439588043206, "grad_norm": 0.22934173047542572, "learning_rate": 0.000199047526940468, "loss": 11.6671, "step": 6330 }, { "epoch": 0.1325253286443942, "grad_norm": 0.21767382323741913, "learning_rate": 0.0001990472250251806, "loss": 11.6805, "step": 6331 }, { "epoch": 0.13254626140835635, "grad_norm": 0.2512045204639435, "learning_rate": 0.0001990469230622792, "loss": 11.6713, "step": 6332 }, { "epoch": 0.13256719417231852, "grad_norm": 0.21975407004356384, "learning_rate": 0.00019904662105176398, "loss": 11.6786, "step": 6333 }, { "epoch": 0.13258812693628066, "grad_norm": 0.22118200361728668, "learning_rate": 0.00019904631899363503, "loss": 11.6931, "step": 6334 }, { "epoch": 0.1326090597002428, "grad_norm": 0.23370389640331268, "learning_rate": 0.00019904601688789254, "loss": 11.6772, "step": 6335 }, { "epoch": 0.13262999246420498, "grad_norm": 0.24549444019794464, "learning_rate": 0.00019904571473453668, "loss": 11.6945, "step": 6336 }, { "epoch": 0.13265092522816713, "grad_norm": 0.2276366800069809, "learning_rate": 0.00019904541253356752, "loss": 11.6747, "step": 6337 }, { "epoch": 0.13267185799212927, "grad_norm": 0.2935899496078491, "learning_rate": 0.00019904511028498523, "loss": 11.6665, "step": 6338 }, { "epoch": 0.13269279075609144, "grad_norm": 0.28032416105270386, "learning_rate": 0.00019904480798879002, "loss": 11.6866, "step": 6339 }, { "epoch": 0.1327137235200536, "grad_norm": 0.23003362119197845, "learning_rate": 0.000199044505644982, "loss": 11.6851, "step": 6340 }, { "epoch": 0.13273465628401573, "grad_norm": 0.3234767019748688, "learning_rate": 0.00019904420325356123, "loss": 11.6853, "step": 6341 }, { "epoch": 0.1327555890479779, "grad_norm": 1.1839417219161987, "learning_rate": 0.00019904390081452796, "loss": 11.7529, "step": 6342 }, { "epoch": 0.13277652181194005, "grad_norm": 0.28482359647750854, "learning_rate": 0.00019904359832788231, "loss": 11.6796, "step": 6343 }, { "epoch": 0.1327974545759022, "grad_norm": 0.21496327221393585, "learning_rate": 0.00019904329579362441, "loss": 11.6757, "step": 6344 }, { "epoch": 0.13281838733986437, "grad_norm": 0.261023610830307, "learning_rate": 0.0001990429932117544, "loss": 11.6716, "step": 6345 }, { "epoch": 0.1328393201038265, "grad_norm": 0.23329304158687592, "learning_rate": 0.00019904269058227246, "loss": 11.6741, "step": 6346 }, { "epoch": 0.13286025286778866, "grad_norm": 0.30821290612220764, "learning_rate": 0.00019904238790517873, "loss": 11.6648, "step": 6347 }, { "epoch": 0.1328811856317508, "grad_norm": 0.21118484437465668, "learning_rate": 0.0001990420851804733, "loss": 11.6857, "step": 6348 }, { "epoch": 0.13290211839571298, "grad_norm": 0.23518787324428558, "learning_rate": 0.0001990417824081564, "loss": 11.6813, "step": 6349 }, { "epoch": 0.13292305115967512, "grad_norm": 0.2197064608335495, "learning_rate": 0.00019904147958822808, "loss": 11.6778, "step": 6350 }, { "epoch": 0.13294398392363727, "grad_norm": 0.27269211411476135, "learning_rate": 0.00019904117672068857, "loss": 11.6936, "step": 6351 }, { "epoch": 0.13296491668759944, "grad_norm": 0.21063187718391418, "learning_rate": 0.00019904087380553796, "loss": 11.6827, "step": 6352 }, { "epoch": 0.13298584945156158, "grad_norm": 0.24645046889781952, "learning_rate": 0.00019904057084277641, "loss": 11.6961, "step": 6353 }, { "epoch": 0.13300678221552373, "grad_norm": 0.35378119349479675, "learning_rate": 0.00019904026783240411, "loss": 11.6919, "step": 6354 }, { "epoch": 0.1330277149794859, "grad_norm": 0.21189361810684204, "learning_rate": 0.00019903996477442114, "loss": 11.6763, "step": 6355 }, { "epoch": 0.13304864774344805, "grad_norm": 0.27757173776626587, "learning_rate": 0.00019903966166882767, "loss": 11.6849, "step": 6356 }, { "epoch": 0.1330695805074102, "grad_norm": 0.2214621603488922, "learning_rate": 0.00019903935851562388, "loss": 11.6839, "step": 6357 }, { "epoch": 0.13309051327137236, "grad_norm": 0.22982025146484375, "learning_rate": 0.00019903905531480986, "loss": 11.6829, "step": 6358 }, { "epoch": 0.1331114460353345, "grad_norm": 0.2628706395626068, "learning_rate": 0.00019903875206638577, "loss": 11.6806, "step": 6359 }, { "epoch": 0.13313237879929665, "grad_norm": 0.192151740193367, "learning_rate": 0.00019903844877035179, "loss": 11.6762, "step": 6360 }, { "epoch": 0.13315331156325882, "grad_norm": 0.22785942256450653, "learning_rate": 0.00019903814542670804, "loss": 11.6721, "step": 6361 }, { "epoch": 0.13317424432722097, "grad_norm": 0.23396269977092743, "learning_rate": 0.00019903784203545465, "loss": 11.6883, "step": 6362 }, { "epoch": 0.13319517709118311, "grad_norm": 0.2672731876373291, "learning_rate": 0.00019903753859659183, "loss": 11.686, "step": 6363 }, { "epoch": 0.1332161098551453, "grad_norm": 0.2431473284959793, "learning_rate": 0.00019903723511011966, "loss": 11.6842, "step": 6364 }, { "epoch": 0.13323704261910743, "grad_norm": 0.23897778987884521, "learning_rate": 0.00019903693157603827, "loss": 11.681, "step": 6365 }, { "epoch": 0.13325797538306958, "grad_norm": 0.2444944679737091, "learning_rate": 0.00019903662799434789, "loss": 11.6825, "step": 6366 }, { "epoch": 0.13327890814703172, "grad_norm": 0.2718065679073334, "learning_rate": 0.0001990363243650486, "loss": 11.6906, "step": 6367 }, { "epoch": 0.1332998409109939, "grad_norm": 0.2892203629016876, "learning_rate": 0.00019903602068814053, "loss": 11.6921, "step": 6368 }, { "epoch": 0.13332077367495604, "grad_norm": 0.3305739462375641, "learning_rate": 0.0001990357169636239, "loss": 11.6834, "step": 6369 }, { "epoch": 0.13334170643891818, "grad_norm": 0.20237666368484497, "learning_rate": 0.00019903541319149883, "loss": 11.6898, "step": 6370 }, { "epoch": 0.13336263920288036, "grad_norm": 0.26729273796081543, "learning_rate": 0.0001990351093717654, "loss": 11.6984, "step": 6371 }, { "epoch": 0.1333835719668425, "grad_norm": 0.2242877036333084, "learning_rate": 0.00019903480550442386, "loss": 11.6695, "step": 6372 }, { "epoch": 0.13340450473080465, "grad_norm": 0.2249222695827484, "learning_rate": 0.0001990345015894743, "loss": 11.6863, "step": 6373 }, { "epoch": 0.13342543749476682, "grad_norm": 0.28093934059143066, "learning_rate": 0.00019903419762691684, "loss": 11.6926, "step": 6374 }, { "epoch": 0.13344637025872896, "grad_norm": 0.28725665807724, "learning_rate": 0.00019903389361675166, "loss": 11.6947, "step": 6375 }, { "epoch": 0.1334673030226911, "grad_norm": 0.30846190452575684, "learning_rate": 0.00019903358955897894, "loss": 11.6845, "step": 6376 }, { "epoch": 0.13348823578665328, "grad_norm": 0.24364475905895233, "learning_rate": 0.00019903328545359874, "loss": 11.6752, "step": 6377 }, { "epoch": 0.13350916855061543, "grad_norm": 0.20165017247200012, "learning_rate": 0.00019903298130061127, "loss": 11.6803, "step": 6378 }, { "epoch": 0.13353010131457757, "grad_norm": 0.28938034176826477, "learning_rate": 0.0001990326771000167, "loss": 11.6916, "step": 6379 }, { "epoch": 0.13355103407853974, "grad_norm": 0.2813539505004883, "learning_rate": 0.0001990323728518151, "loss": 11.6889, "step": 6380 }, { "epoch": 0.1335719668425019, "grad_norm": 0.24498380720615387, "learning_rate": 0.00019903206855600666, "loss": 11.6856, "step": 6381 }, { "epoch": 0.13359289960646403, "grad_norm": 0.23579448461532593, "learning_rate": 0.0001990317642125915, "loss": 11.6814, "step": 6382 }, { "epoch": 0.13361383237042618, "grad_norm": 0.2131887972354889, "learning_rate": 0.0001990314598215698, "loss": 11.6869, "step": 6383 }, { "epoch": 0.13363476513438835, "grad_norm": 0.2698056697845459, "learning_rate": 0.00019903115538294171, "loss": 11.661, "step": 6384 }, { "epoch": 0.1336556978983505, "grad_norm": 0.23763802647590637, "learning_rate": 0.00019903085089670734, "loss": 11.6896, "step": 6385 }, { "epoch": 0.13367663066231264, "grad_norm": 0.21418607234954834, "learning_rate": 0.00019903054636286687, "loss": 11.6721, "step": 6386 }, { "epoch": 0.1336975634262748, "grad_norm": 0.2764618396759033, "learning_rate": 0.00019903024178142043, "loss": 11.6834, "step": 6387 }, { "epoch": 0.13371849619023696, "grad_norm": 0.19496887922286987, "learning_rate": 0.00019902993715236814, "loss": 11.6781, "step": 6388 }, { "epoch": 0.1337394289541991, "grad_norm": 0.18612174689769745, "learning_rate": 0.00019902963247571021, "loss": 11.6768, "step": 6389 }, { "epoch": 0.13376036171816127, "grad_norm": 0.283214271068573, "learning_rate": 0.00019902932775144673, "loss": 11.6779, "step": 6390 }, { "epoch": 0.13378129448212342, "grad_norm": 0.267821341753006, "learning_rate": 0.00019902902297957788, "loss": 11.6689, "step": 6391 }, { "epoch": 0.13380222724608556, "grad_norm": 0.2589128613471985, "learning_rate": 0.0001990287181601038, "loss": 11.6867, "step": 6392 }, { "epoch": 0.13382316001004774, "grad_norm": 0.2653430104255676, "learning_rate": 0.0001990284132930246, "loss": 11.6672, "step": 6393 }, { "epoch": 0.13384409277400988, "grad_norm": 0.23795123398303986, "learning_rate": 0.00019902810837834053, "loss": 11.669, "step": 6394 }, { "epoch": 0.13386502553797203, "grad_norm": 0.18759578466415405, "learning_rate": 0.00019902780341605157, "loss": 11.6682, "step": 6395 }, { "epoch": 0.1338859583019342, "grad_norm": 0.37683138251304626, "learning_rate": 0.000199027498406158, "loss": 11.6876, "step": 6396 }, { "epoch": 0.13390689106589634, "grad_norm": 0.27347639203071594, "learning_rate": 0.00019902719334865995, "loss": 11.6786, "step": 6397 }, { "epoch": 0.1339278238298585, "grad_norm": 0.21322743594646454, "learning_rate": 0.00019902688824355752, "loss": 11.6746, "step": 6398 }, { "epoch": 0.13394875659382066, "grad_norm": 0.22333842515945435, "learning_rate": 0.00019902658309085087, "loss": 11.6746, "step": 6399 }, { "epoch": 0.1339696893577828, "grad_norm": 0.21349753439426422, "learning_rate": 0.00019902627789054018, "loss": 11.6775, "step": 6400 }, { "epoch": 0.13399062212174495, "grad_norm": 0.22817957401275635, "learning_rate": 0.00019902597264262557, "loss": 11.6964, "step": 6401 }, { "epoch": 0.1340115548857071, "grad_norm": 0.27185380458831787, "learning_rate": 0.00019902566734710718, "loss": 11.6898, "step": 6402 }, { "epoch": 0.13403248764966927, "grad_norm": 0.21784840524196625, "learning_rate": 0.00019902536200398516, "loss": 11.6888, "step": 6403 }, { "epoch": 0.1340534204136314, "grad_norm": 0.2038109302520752, "learning_rate": 0.0001990250566132597, "loss": 11.6734, "step": 6404 }, { "epoch": 0.13407435317759356, "grad_norm": 0.2733900547027588, "learning_rate": 0.00019902475117493086, "loss": 11.6976, "step": 6405 }, { "epoch": 0.13409528594155573, "grad_norm": 0.23917169868946075, "learning_rate": 0.00019902444568899887, "loss": 11.7065, "step": 6406 }, { "epoch": 0.13411621870551788, "grad_norm": 0.31335362792015076, "learning_rate": 0.00019902414015546385, "loss": 11.6928, "step": 6407 }, { "epoch": 0.13413715146948002, "grad_norm": 0.2214415967464447, "learning_rate": 0.00019902383457432593, "loss": 11.6797, "step": 6408 }, { "epoch": 0.1341580842334422, "grad_norm": 0.2151128053665161, "learning_rate": 0.00019902352894558524, "loss": 11.673, "step": 6409 }, { "epoch": 0.13417901699740434, "grad_norm": 0.3043910562992096, "learning_rate": 0.00019902322326924198, "loss": 11.6944, "step": 6410 }, { "epoch": 0.13419994976136648, "grad_norm": 0.22134289145469666, "learning_rate": 0.00019902291754529629, "loss": 11.6868, "step": 6411 }, { "epoch": 0.13422088252532866, "grad_norm": 0.2578086853027344, "learning_rate": 0.00019902261177374828, "loss": 11.6799, "step": 6412 }, { "epoch": 0.1342418152892908, "grad_norm": 0.33474811911582947, "learning_rate": 0.00019902230595459814, "loss": 11.6934, "step": 6413 }, { "epoch": 0.13426274805325294, "grad_norm": 0.2409852147102356, "learning_rate": 0.00019902200008784597, "loss": 11.6868, "step": 6414 }, { "epoch": 0.13428368081721512, "grad_norm": 0.22685298323631287, "learning_rate": 0.00019902169417349195, "loss": 11.6911, "step": 6415 }, { "epoch": 0.13430461358117726, "grad_norm": 0.24672695994377136, "learning_rate": 0.0001990213882115362, "loss": 11.6732, "step": 6416 }, { "epoch": 0.1343255463451394, "grad_norm": 0.24585501849651337, "learning_rate": 0.0001990210822019789, "loss": 11.6625, "step": 6417 }, { "epoch": 0.13434647910910155, "grad_norm": 0.22903020679950714, "learning_rate": 0.00019902077614482018, "loss": 11.6826, "step": 6418 }, { "epoch": 0.13436741187306372, "grad_norm": 0.22910083830356598, "learning_rate": 0.00019902047004006018, "loss": 11.6892, "step": 6419 }, { "epoch": 0.13438834463702587, "grad_norm": 0.22889041900634766, "learning_rate": 0.00019902016388769907, "loss": 11.6686, "step": 6420 }, { "epoch": 0.13440927740098801, "grad_norm": 0.23962366580963135, "learning_rate": 0.00019901985768773695, "loss": 11.6896, "step": 6421 }, { "epoch": 0.1344302101649502, "grad_norm": 0.30411267280578613, "learning_rate": 0.00019901955144017403, "loss": 11.6844, "step": 6422 }, { "epoch": 0.13445114292891233, "grad_norm": 0.23558169603347778, "learning_rate": 0.00019901924514501042, "loss": 11.6841, "step": 6423 }, { "epoch": 0.13447207569287448, "grad_norm": 0.2337181270122528, "learning_rate": 0.00019901893880224628, "loss": 11.6876, "step": 6424 }, { "epoch": 0.13449300845683665, "grad_norm": 0.1916140913963318, "learning_rate": 0.00019901863241188176, "loss": 11.6698, "step": 6425 }, { "epoch": 0.1345139412207988, "grad_norm": 0.3195103406906128, "learning_rate": 0.000199018325973917, "loss": 11.6827, "step": 6426 }, { "epoch": 0.13453487398476094, "grad_norm": 0.2246934175491333, "learning_rate": 0.0001990180194883521, "loss": 11.6653, "step": 6427 }, { "epoch": 0.1345558067487231, "grad_norm": 0.25516602396965027, "learning_rate": 0.0001990177129551873, "loss": 11.6716, "step": 6428 }, { "epoch": 0.13457673951268526, "grad_norm": 0.2325463443994522, "learning_rate": 0.00019901740637442267, "loss": 11.7004, "step": 6429 }, { "epoch": 0.1345976722766474, "grad_norm": 0.19961713254451752, "learning_rate": 0.00019901709974605843, "loss": 11.6823, "step": 6430 }, { "epoch": 0.13461860504060957, "grad_norm": 0.23138900101184845, "learning_rate": 0.00019901679307009464, "loss": 11.6908, "step": 6431 }, { "epoch": 0.13463953780457172, "grad_norm": 0.23146548867225647, "learning_rate": 0.00019901648634653154, "loss": 11.6955, "step": 6432 }, { "epoch": 0.13466047056853386, "grad_norm": 0.2047802060842514, "learning_rate": 0.0001990161795753692, "loss": 11.682, "step": 6433 }, { "epoch": 0.13468140333249604, "grad_norm": 0.2234584093093872, "learning_rate": 0.0001990158727566078, "loss": 11.6682, "step": 6434 }, { "epoch": 0.13470233609645818, "grad_norm": 0.22209957242012024, "learning_rate": 0.00019901556589024748, "loss": 11.6713, "step": 6435 }, { "epoch": 0.13472326886042033, "grad_norm": 0.3923901617527008, "learning_rate": 0.00019901525897628842, "loss": 11.6713, "step": 6436 }, { "epoch": 0.13474420162438247, "grad_norm": 0.2383819818496704, "learning_rate": 0.0001990149520147307, "loss": 11.6807, "step": 6437 }, { "epoch": 0.13476513438834464, "grad_norm": 0.22869808971881866, "learning_rate": 0.00019901464500557455, "loss": 11.6775, "step": 6438 }, { "epoch": 0.1347860671523068, "grad_norm": 0.22619470953941345, "learning_rate": 0.00019901433794882004, "loss": 11.6857, "step": 6439 }, { "epoch": 0.13480699991626893, "grad_norm": 0.2627691626548767, "learning_rate": 0.00019901403084446738, "loss": 11.6856, "step": 6440 }, { "epoch": 0.1348279326802311, "grad_norm": 0.18454214930534363, "learning_rate": 0.00019901372369251667, "loss": 11.6831, "step": 6441 }, { "epoch": 0.13484886544419325, "grad_norm": 0.18638506531715393, "learning_rate": 0.0001990134164929681, "loss": 11.6728, "step": 6442 }, { "epoch": 0.1348697982081554, "grad_norm": 0.21723002195358276, "learning_rate": 0.00019901310924582178, "loss": 11.6833, "step": 6443 }, { "epoch": 0.13489073097211757, "grad_norm": 0.22595828771591187, "learning_rate": 0.00019901280195107788, "loss": 11.6703, "step": 6444 }, { "epoch": 0.1349116637360797, "grad_norm": 0.2121538668870926, "learning_rate": 0.00019901249460873655, "loss": 11.6769, "step": 6445 }, { "epoch": 0.13493259650004186, "grad_norm": 0.2019948661327362, "learning_rate": 0.0001990121872187979, "loss": 11.6781, "step": 6446 }, { "epoch": 0.13495352926400403, "grad_norm": 0.25495707988739014, "learning_rate": 0.00019901187978126216, "loss": 11.6802, "step": 6447 }, { "epoch": 0.13497446202796617, "grad_norm": 0.2069961130619049, "learning_rate": 0.0001990115722961294, "loss": 11.6821, "step": 6448 }, { "epoch": 0.13499539479192832, "grad_norm": 0.22898559272289276, "learning_rate": 0.00019901126476339979, "loss": 11.6965, "step": 6449 }, { "epoch": 0.1350163275558905, "grad_norm": 0.2746364176273346, "learning_rate": 0.00019901095718307348, "loss": 11.6882, "step": 6450 }, { "epoch": 0.13503726031985264, "grad_norm": 0.2084919512271881, "learning_rate": 0.00019901064955515061, "loss": 11.6807, "step": 6451 }, { "epoch": 0.13505819308381478, "grad_norm": 0.23760317265987396, "learning_rate": 0.00019901034187963133, "loss": 11.6688, "step": 6452 }, { "epoch": 0.13507912584777695, "grad_norm": 0.2234816998243332, "learning_rate": 0.0001990100341565158, "loss": 11.6835, "step": 6453 }, { "epoch": 0.1351000586117391, "grad_norm": 0.2557904124259949, "learning_rate": 0.00019900972638580417, "loss": 11.6657, "step": 6454 }, { "epoch": 0.13512099137570124, "grad_norm": 0.18222850561141968, "learning_rate": 0.0001990094185674966, "loss": 11.6824, "step": 6455 }, { "epoch": 0.1351419241396634, "grad_norm": 0.2824787199497223, "learning_rate": 0.00019900911070159317, "loss": 11.6734, "step": 6456 }, { "epoch": 0.13516285690362556, "grad_norm": 0.23514975607395172, "learning_rate": 0.00019900880278809414, "loss": 11.661, "step": 6457 }, { "epoch": 0.1351837896675877, "grad_norm": 0.3046308159828186, "learning_rate": 0.00019900849482699955, "loss": 11.6729, "step": 6458 }, { "epoch": 0.13520472243154985, "grad_norm": 0.2091812789440155, "learning_rate": 0.0001990081868183096, "loss": 11.6889, "step": 6459 }, { "epoch": 0.13522565519551202, "grad_norm": 0.20556679368019104, "learning_rate": 0.00019900787876202442, "loss": 11.6806, "step": 6460 }, { "epoch": 0.13524658795947417, "grad_norm": 0.30697381496429443, "learning_rate": 0.0001990075706581442, "loss": 11.6751, "step": 6461 }, { "epoch": 0.1352675207234363, "grad_norm": 0.275071382522583, "learning_rate": 0.00019900726250666903, "loss": 11.7051, "step": 6462 }, { "epoch": 0.13528845348739849, "grad_norm": 0.2119331657886505, "learning_rate": 0.00019900695430759912, "loss": 11.6862, "step": 6463 }, { "epoch": 0.13530938625136063, "grad_norm": 0.21669632196426392, "learning_rate": 0.00019900664606093454, "loss": 11.673, "step": 6464 }, { "epoch": 0.13533031901532278, "grad_norm": 0.2279079258441925, "learning_rate": 0.0001990063377666755, "loss": 11.6938, "step": 6465 }, { "epoch": 0.13535125177928495, "grad_norm": 0.2664385139942169, "learning_rate": 0.00019900602942482212, "loss": 11.6796, "step": 6466 }, { "epoch": 0.1353721845432471, "grad_norm": 0.4431801736354828, "learning_rate": 0.00019900572103537456, "loss": 11.7042, "step": 6467 }, { "epoch": 0.13539311730720924, "grad_norm": 0.18826048076152802, "learning_rate": 0.00019900541259833298, "loss": 11.6654, "step": 6468 }, { "epoch": 0.1354140500711714, "grad_norm": 0.2978411912918091, "learning_rate": 0.00019900510411369752, "loss": 11.681, "step": 6469 }, { "epoch": 0.13543498283513355, "grad_norm": 0.22684834897518158, "learning_rate": 0.00019900479558146827, "loss": 11.6904, "step": 6470 }, { "epoch": 0.1354559155990957, "grad_norm": 0.2339213490486145, "learning_rate": 0.00019900448700164547, "loss": 11.6768, "step": 6471 }, { "epoch": 0.13547684836305784, "grad_norm": 0.24914315342903137, "learning_rate": 0.00019900417837422923, "loss": 11.6872, "step": 6472 }, { "epoch": 0.13549778112702002, "grad_norm": 0.21475614607334137, "learning_rate": 0.0001990038696992197, "loss": 11.6619, "step": 6473 }, { "epoch": 0.13551871389098216, "grad_norm": 0.28342291712760925, "learning_rate": 0.00019900356097661703, "loss": 11.689, "step": 6474 }, { "epoch": 0.1355396466549443, "grad_norm": 0.2354506254196167, "learning_rate": 0.00019900325220642136, "loss": 11.6745, "step": 6475 }, { "epoch": 0.13556057941890648, "grad_norm": 0.26827648282051086, "learning_rate": 0.0001990029433886328, "loss": 11.6843, "step": 6476 }, { "epoch": 0.13558151218286862, "grad_norm": 0.21723328530788422, "learning_rate": 0.00019900263452325159, "loss": 11.6784, "step": 6477 }, { "epoch": 0.13560244494683077, "grad_norm": 0.26722559332847595, "learning_rate": 0.0001990023256102778, "loss": 11.6662, "step": 6478 }, { "epoch": 0.13562337771079294, "grad_norm": 0.24672886729240417, "learning_rate": 0.00019900201664971166, "loss": 11.6838, "step": 6479 }, { "epoch": 0.1356443104747551, "grad_norm": 0.22321562469005585, "learning_rate": 0.0001990017076415532, "loss": 11.6806, "step": 6480 }, { "epoch": 0.13566524323871723, "grad_norm": 0.19440002739429474, "learning_rate": 0.00019900139858580268, "loss": 11.6783, "step": 6481 }, { "epoch": 0.1356861760026794, "grad_norm": 0.20969703793525696, "learning_rate": 0.00019900108948246018, "loss": 11.6803, "step": 6482 }, { "epoch": 0.13570710876664155, "grad_norm": 0.18994756042957306, "learning_rate": 0.0001990007803315259, "loss": 11.6704, "step": 6483 }, { "epoch": 0.1357280415306037, "grad_norm": 0.2764515280723572, "learning_rate": 0.00019900047113299992, "loss": 11.6678, "step": 6484 }, { "epoch": 0.13574897429456587, "grad_norm": 0.21937376260757446, "learning_rate": 0.00019900016188688247, "loss": 11.6696, "step": 6485 }, { "epoch": 0.135769907058528, "grad_norm": 0.19167426228523254, "learning_rate": 0.00019899985259317363, "loss": 11.6698, "step": 6486 }, { "epoch": 0.13579083982249016, "grad_norm": 0.2729387879371643, "learning_rate": 0.0001989995432518736, "loss": 11.6798, "step": 6487 }, { "epoch": 0.13581177258645233, "grad_norm": 0.19994185864925385, "learning_rate": 0.0001989992338629825, "loss": 11.6705, "step": 6488 }, { "epoch": 0.13583270535041447, "grad_norm": 0.23281638324260712, "learning_rate": 0.00019899892442650047, "loss": 11.6853, "step": 6489 }, { "epoch": 0.13585363811437662, "grad_norm": 0.2546936273574829, "learning_rate": 0.0001989986149424277, "loss": 11.6891, "step": 6490 }, { "epoch": 0.13587457087833876, "grad_norm": 0.27302631735801697, "learning_rate": 0.00019899830541076429, "loss": 11.6787, "step": 6491 }, { "epoch": 0.13589550364230094, "grad_norm": 0.20575249195098877, "learning_rate": 0.0001989979958315104, "loss": 11.6817, "step": 6492 }, { "epoch": 0.13591643640626308, "grad_norm": 0.23015473783016205, "learning_rate": 0.00019899768620466622, "loss": 11.6817, "step": 6493 }, { "epoch": 0.13593736917022523, "grad_norm": 0.24070197343826294, "learning_rate": 0.00019899737653023182, "loss": 11.6971, "step": 6494 }, { "epoch": 0.1359583019341874, "grad_norm": 0.18742594122886658, "learning_rate": 0.00019899706680820745, "loss": 11.6775, "step": 6495 }, { "epoch": 0.13597923469814954, "grad_norm": 0.40413179993629456, "learning_rate": 0.00019899675703859317, "loss": 11.6676, "step": 6496 }, { "epoch": 0.1360001674621117, "grad_norm": 0.250366747379303, "learning_rate": 0.00019899644722138918, "loss": 11.6738, "step": 6497 }, { "epoch": 0.13602110022607386, "grad_norm": 0.2585626244544983, "learning_rate": 0.0001989961373565956, "loss": 11.6784, "step": 6498 }, { "epoch": 0.136042032990036, "grad_norm": 0.30811214447021484, "learning_rate": 0.0001989958274442126, "loss": 11.6762, "step": 6499 }, { "epoch": 0.13606296575399815, "grad_norm": 0.3228054642677307, "learning_rate": 0.00019899551748424034, "loss": 11.6876, "step": 6500 }, { "epoch": 0.13608389851796032, "grad_norm": 0.22449886798858643, "learning_rate": 0.00019899520747667895, "loss": 11.681, "step": 6501 }, { "epoch": 0.13610483128192247, "grad_norm": 0.2209651917219162, "learning_rate": 0.00019899489742152858, "loss": 11.6751, "step": 6502 }, { "epoch": 0.1361257640458846, "grad_norm": 0.25026193261146545, "learning_rate": 0.00019899458731878936, "loss": 11.681, "step": 6503 }, { "epoch": 0.13614669680984678, "grad_norm": 0.2599974274635315, "learning_rate": 0.00019899427716846147, "loss": 11.6704, "step": 6504 }, { "epoch": 0.13616762957380893, "grad_norm": 0.213665172457695, "learning_rate": 0.00019899396697054504, "loss": 11.6733, "step": 6505 }, { "epoch": 0.13618856233777107, "grad_norm": 0.3057374954223633, "learning_rate": 0.00019899365672504022, "loss": 11.69, "step": 6506 }, { "epoch": 0.13620949510173325, "grad_norm": 0.22676371037960052, "learning_rate": 0.00019899334643194716, "loss": 11.683, "step": 6507 }, { "epoch": 0.1362304278656954, "grad_norm": 0.24510715901851654, "learning_rate": 0.00019899303609126605, "loss": 11.6715, "step": 6508 }, { "epoch": 0.13625136062965754, "grad_norm": 0.24423252046108246, "learning_rate": 0.00019899272570299696, "loss": 11.6925, "step": 6509 }, { "epoch": 0.13627229339361968, "grad_norm": 0.19466403126716614, "learning_rate": 0.00019899241526714011, "loss": 11.6859, "step": 6510 }, { "epoch": 0.13629322615758185, "grad_norm": 0.22810958325862885, "learning_rate": 0.00019899210478369563, "loss": 11.6773, "step": 6511 }, { "epoch": 0.136314158921544, "grad_norm": 0.21757617592811584, "learning_rate": 0.00019899179425266364, "loss": 11.6773, "step": 6512 }, { "epoch": 0.13633509168550614, "grad_norm": 0.23314325511455536, "learning_rate": 0.00019899148367404432, "loss": 11.6793, "step": 6513 }, { "epoch": 0.13635602444946832, "grad_norm": 0.2853420078754425, "learning_rate": 0.0001989911730478378, "loss": 11.6748, "step": 6514 }, { "epoch": 0.13637695721343046, "grad_norm": 0.1871967762708664, "learning_rate": 0.00019899086237404426, "loss": 11.6742, "step": 6515 }, { "epoch": 0.1363978899773926, "grad_norm": 0.25111544132232666, "learning_rate": 0.00019899055165266383, "loss": 11.6808, "step": 6516 }, { "epoch": 0.13641882274135478, "grad_norm": 0.23024141788482666, "learning_rate": 0.00019899024088369663, "loss": 11.667, "step": 6517 }, { "epoch": 0.13643975550531692, "grad_norm": 0.24908925592899323, "learning_rate": 0.00019898993006714285, "loss": 11.6563, "step": 6518 }, { "epoch": 0.13646068826927907, "grad_norm": 0.21220842003822327, "learning_rate": 0.00019898961920300265, "loss": 11.6784, "step": 6519 }, { "epoch": 0.13648162103324124, "grad_norm": 0.2729625403881073, "learning_rate": 0.0001989893082912761, "loss": 11.679, "step": 6520 }, { "epoch": 0.13650255379720339, "grad_norm": 0.2197371870279312, "learning_rate": 0.00019898899733196345, "loss": 11.6749, "step": 6521 }, { "epoch": 0.13652348656116553, "grad_norm": 0.26789307594299316, "learning_rate": 0.00019898868632506483, "loss": 11.6826, "step": 6522 }, { "epoch": 0.1365444193251277, "grad_norm": 0.23239609599113464, "learning_rate": 0.00019898837527058033, "loss": 11.6884, "step": 6523 }, { "epoch": 0.13656535208908985, "grad_norm": 0.21309104561805725, "learning_rate": 0.00019898806416851012, "loss": 11.6772, "step": 6524 }, { "epoch": 0.136586284853052, "grad_norm": 0.28141501545906067, "learning_rate": 0.00019898775301885438, "loss": 11.6744, "step": 6525 }, { "epoch": 0.13660721761701414, "grad_norm": 0.31782400608062744, "learning_rate": 0.00019898744182161323, "loss": 11.697, "step": 6526 }, { "epoch": 0.1366281503809763, "grad_norm": 0.24356845021247864, "learning_rate": 0.00019898713057678688, "loss": 11.6769, "step": 6527 }, { "epoch": 0.13664908314493845, "grad_norm": 0.27841663360595703, "learning_rate": 0.0001989868192843754, "loss": 11.685, "step": 6528 }, { "epoch": 0.1366700159089006, "grad_norm": 0.22326740622520447, "learning_rate": 0.00019898650794437895, "loss": 11.6827, "step": 6529 }, { "epoch": 0.13669094867286277, "grad_norm": 0.2614239454269409, "learning_rate": 0.0001989861965567977, "loss": 11.6884, "step": 6530 }, { "epoch": 0.13671188143682492, "grad_norm": 0.22164681553840637, "learning_rate": 0.00019898588512163182, "loss": 11.6865, "step": 6531 }, { "epoch": 0.13673281420078706, "grad_norm": 0.22846677899360657, "learning_rate": 0.00019898557363888148, "loss": 11.6841, "step": 6532 }, { "epoch": 0.13675374696474923, "grad_norm": 0.2048823982477188, "learning_rate": 0.00019898526210854675, "loss": 11.6491, "step": 6533 }, { "epoch": 0.13677467972871138, "grad_norm": 0.27483126521110535, "learning_rate": 0.00019898495053062782, "loss": 11.6885, "step": 6534 }, { "epoch": 0.13679561249267352, "grad_norm": 0.26086458563804626, "learning_rate": 0.00019898463890512485, "loss": 11.6753, "step": 6535 }, { "epoch": 0.1368165452566357, "grad_norm": 0.28841647505760193, "learning_rate": 0.00019898432723203797, "loss": 11.6841, "step": 6536 }, { "epoch": 0.13683747802059784, "grad_norm": 0.21688590943813324, "learning_rate": 0.00019898401551136737, "loss": 11.6691, "step": 6537 }, { "epoch": 0.13685841078456, "grad_norm": 0.19609256088733673, "learning_rate": 0.00019898370374311312, "loss": 11.6808, "step": 6538 }, { "epoch": 0.13687934354852216, "grad_norm": 0.21036027371883392, "learning_rate": 0.00019898339192727545, "loss": 11.675, "step": 6539 }, { "epoch": 0.1369002763124843, "grad_norm": 0.21202899515628815, "learning_rate": 0.0001989830800638545, "loss": 11.6938, "step": 6540 }, { "epoch": 0.13692120907644645, "grad_norm": 0.2709442675113678, "learning_rate": 0.00019898276815285038, "loss": 11.6808, "step": 6541 }, { "epoch": 0.13694214184040862, "grad_norm": 0.25992095470428467, "learning_rate": 0.00019898245619426325, "loss": 11.68, "step": 6542 }, { "epoch": 0.13696307460437077, "grad_norm": 0.2260255366563797, "learning_rate": 0.0001989821441880933, "loss": 11.6766, "step": 6543 }, { "epoch": 0.1369840073683329, "grad_norm": 0.226702019572258, "learning_rate": 0.0001989818321343406, "loss": 11.667, "step": 6544 }, { "epoch": 0.13700494013229506, "grad_norm": 0.1878330558538437, "learning_rate": 0.00019898152003300538, "loss": 11.6751, "step": 6545 }, { "epoch": 0.13702587289625723, "grad_norm": 0.2620578706264496, "learning_rate": 0.00019898120788408775, "loss": 11.6741, "step": 6546 }, { "epoch": 0.13704680566021937, "grad_norm": 0.20266607403755188, "learning_rate": 0.0001989808956875879, "loss": 11.6837, "step": 6547 }, { "epoch": 0.13706773842418152, "grad_norm": 0.312420517206192, "learning_rate": 0.0001989805834435059, "loss": 11.6863, "step": 6548 }, { "epoch": 0.1370886711881437, "grad_norm": 0.28139403462409973, "learning_rate": 0.000198980271151842, "loss": 11.6675, "step": 6549 }, { "epoch": 0.13710960395210584, "grad_norm": 0.22320613265037537, "learning_rate": 0.00019897995881259627, "loss": 11.6892, "step": 6550 }, { "epoch": 0.13713053671606798, "grad_norm": 0.23442530632019043, "learning_rate": 0.0001989796464257689, "loss": 11.6839, "step": 6551 }, { "epoch": 0.13715146948003015, "grad_norm": 0.26861336827278137, "learning_rate": 0.00019897933399136002, "loss": 11.6893, "step": 6552 }, { "epoch": 0.1371724022439923, "grad_norm": 0.22563284635543823, "learning_rate": 0.0001989790215093698, "loss": 11.6765, "step": 6553 }, { "epoch": 0.13719333500795444, "grad_norm": 0.24542638659477234, "learning_rate": 0.0001989787089797984, "loss": 11.6977, "step": 6554 }, { "epoch": 0.13721426777191661, "grad_norm": 0.30366653203964233, "learning_rate": 0.00019897839640264592, "loss": 11.6786, "step": 6555 }, { "epoch": 0.13723520053587876, "grad_norm": 0.2418426275253296, "learning_rate": 0.00019897808377791255, "loss": 11.6859, "step": 6556 }, { "epoch": 0.1372561332998409, "grad_norm": 0.23798102140426636, "learning_rate": 0.0001989777711055984, "loss": 11.6763, "step": 6557 }, { "epoch": 0.13727706606380308, "grad_norm": 0.23712144792079926, "learning_rate": 0.00019897745838570372, "loss": 11.685, "step": 6558 }, { "epoch": 0.13729799882776522, "grad_norm": 0.30263689160346985, "learning_rate": 0.00019897714561822855, "loss": 11.6812, "step": 6559 }, { "epoch": 0.13731893159172737, "grad_norm": 0.21921880543231964, "learning_rate": 0.0001989768328031731, "loss": 11.6844, "step": 6560 }, { "epoch": 0.1373398643556895, "grad_norm": 0.250399112701416, "learning_rate": 0.00019897651994053747, "loss": 11.6664, "step": 6561 }, { "epoch": 0.13736079711965168, "grad_norm": 0.27081891894340515, "learning_rate": 0.00019897620703032188, "loss": 11.6822, "step": 6562 }, { "epoch": 0.13738172988361383, "grad_norm": 0.24035322666168213, "learning_rate": 0.0001989758940725264, "loss": 11.6785, "step": 6563 }, { "epoch": 0.13740266264757597, "grad_norm": 0.3056381642818451, "learning_rate": 0.0001989755810671513, "loss": 11.6862, "step": 6564 }, { "epoch": 0.13742359541153815, "grad_norm": 0.18447604775428772, "learning_rate": 0.00019897526801419658, "loss": 11.6989, "step": 6565 }, { "epoch": 0.1374445281755003, "grad_norm": 0.212478905916214, "learning_rate": 0.0001989749549136625, "loss": 11.6738, "step": 6566 }, { "epoch": 0.13746546093946244, "grad_norm": 0.22269727289676666, "learning_rate": 0.00019897464176554916, "loss": 11.6697, "step": 6567 }, { "epoch": 0.1374863937034246, "grad_norm": 0.2215951532125473, "learning_rate": 0.00019897432856985674, "loss": 11.6855, "step": 6568 }, { "epoch": 0.13750732646738675, "grad_norm": 0.2276919186115265, "learning_rate": 0.00019897401532658536, "loss": 11.6781, "step": 6569 }, { "epoch": 0.1375282592313489, "grad_norm": 0.2879074215888977, "learning_rate": 0.00019897370203573523, "loss": 11.6712, "step": 6570 }, { "epoch": 0.13754919199531107, "grad_norm": 0.223106250166893, "learning_rate": 0.0001989733886973064, "loss": 11.6818, "step": 6571 }, { "epoch": 0.13757012475927322, "grad_norm": 0.2146669626235962, "learning_rate": 0.0001989730753112991, "loss": 11.6739, "step": 6572 }, { "epoch": 0.13759105752323536, "grad_norm": 0.21633444726467133, "learning_rate": 0.00019897276187771348, "loss": 11.6849, "step": 6573 }, { "epoch": 0.13761199028719753, "grad_norm": 0.26566460728645325, "learning_rate": 0.00019897244839654966, "loss": 11.6741, "step": 6574 }, { "epoch": 0.13763292305115968, "grad_norm": 0.28368616104125977, "learning_rate": 0.0001989721348678078, "loss": 11.684, "step": 6575 }, { "epoch": 0.13765385581512182, "grad_norm": 0.2477784901857376, "learning_rate": 0.00019897182129148804, "loss": 11.6848, "step": 6576 }, { "epoch": 0.137674788579084, "grad_norm": 0.22646498680114746, "learning_rate": 0.00019897150766759055, "loss": 11.6775, "step": 6577 }, { "epoch": 0.13769572134304614, "grad_norm": 0.24673354625701904, "learning_rate": 0.00019897119399611547, "loss": 11.659, "step": 6578 }, { "epoch": 0.13771665410700829, "grad_norm": 0.22265811264514923, "learning_rate": 0.00019897088027706294, "loss": 11.6877, "step": 6579 }, { "epoch": 0.13773758687097043, "grad_norm": 0.2618047297000885, "learning_rate": 0.00019897056651043316, "loss": 11.6855, "step": 6580 }, { "epoch": 0.1377585196349326, "grad_norm": 0.22539344429969788, "learning_rate": 0.00019897025269622622, "loss": 11.6644, "step": 6581 }, { "epoch": 0.13777945239889475, "grad_norm": 0.3110145032405853, "learning_rate": 0.00019896993883444227, "loss": 11.6811, "step": 6582 }, { "epoch": 0.1378003851628569, "grad_norm": 0.1961732804775238, "learning_rate": 0.00019896962492508156, "loss": 11.6915, "step": 6583 }, { "epoch": 0.13782131792681906, "grad_norm": 0.19057022035121918, "learning_rate": 0.0001989693109681441, "loss": 11.6872, "step": 6584 }, { "epoch": 0.1378422506907812, "grad_norm": 0.24610720574855804, "learning_rate": 0.00019896899696363014, "loss": 11.6812, "step": 6585 }, { "epoch": 0.13786318345474335, "grad_norm": 0.24795259535312653, "learning_rate": 0.00019896868291153981, "loss": 11.6929, "step": 6586 }, { "epoch": 0.13788411621870553, "grad_norm": 0.2179405838251114, "learning_rate": 0.0001989683688118732, "loss": 11.6809, "step": 6587 }, { "epoch": 0.13790504898266767, "grad_norm": 0.21757452189922333, "learning_rate": 0.00019896805466463057, "loss": 11.6847, "step": 6588 }, { "epoch": 0.13792598174662982, "grad_norm": 0.25952473282814026, "learning_rate": 0.000198967740469812, "loss": 11.6838, "step": 6589 }, { "epoch": 0.137946914510592, "grad_norm": 0.17389488220214844, "learning_rate": 0.00019896742622741765, "loss": 11.6765, "step": 6590 }, { "epoch": 0.13796784727455413, "grad_norm": 0.23938821256160736, "learning_rate": 0.00019896711193744765, "loss": 11.6895, "step": 6591 }, { "epoch": 0.13798878003851628, "grad_norm": 0.23465128242969513, "learning_rate": 0.0001989667975999022, "loss": 11.6934, "step": 6592 }, { "epoch": 0.13800971280247845, "grad_norm": 0.24800381064414978, "learning_rate": 0.0001989664832147814, "loss": 11.6918, "step": 6593 }, { "epoch": 0.1380306455664406, "grad_norm": 0.24027501046657562, "learning_rate": 0.00019896616878208547, "loss": 11.6906, "step": 6594 }, { "epoch": 0.13805157833040274, "grad_norm": 0.20109206438064575, "learning_rate": 0.00019896585430181451, "loss": 11.6716, "step": 6595 }, { "epoch": 0.1380725110943649, "grad_norm": 0.2010556161403656, "learning_rate": 0.00019896553977396866, "loss": 11.6826, "step": 6596 }, { "epoch": 0.13809344385832706, "grad_norm": 0.24450218677520752, "learning_rate": 0.0001989652251985481, "loss": 11.6789, "step": 6597 }, { "epoch": 0.1381143766222892, "grad_norm": 0.24183523654937744, "learning_rate": 0.000198964910575553, "loss": 11.6909, "step": 6598 }, { "epoch": 0.13813530938625135, "grad_norm": 0.22157630324363708, "learning_rate": 0.00019896459590498344, "loss": 11.6801, "step": 6599 }, { "epoch": 0.13815624215021352, "grad_norm": 0.3036251366138458, "learning_rate": 0.00019896428118683964, "loss": 11.6764, "step": 6600 }, { "epoch": 0.13817717491417567, "grad_norm": 0.2324168086051941, "learning_rate": 0.00019896396642112173, "loss": 11.6803, "step": 6601 }, { "epoch": 0.1381981076781378, "grad_norm": 0.21975448727607727, "learning_rate": 0.00019896365160782984, "loss": 11.6767, "step": 6602 }, { "epoch": 0.13821904044209998, "grad_norm": 0.22603793442249298, "learning_rate": 0.00019896333674696413, "loss": 11.6825, "step": 6603 }, { "epoch": 0.13823997320606213, "grad_norm": 0.24403858184814453, "learning_rate": 0.0001989630218385248, "loss": 11.6659, "step": 6604 }, { "epoch": 0.13826090597002427, "grad_norm": 0.2741573452949524, "learning_rate": 0.00019896270688251195, "loss": 11.6683, "step": 6605 }, { "epoch": 0.13828183873398645, "grad_norm": 0.2688608467578888, "learning_rate": 0.00019896239187892572, "loss": 11.6777, "step": 6606 }, { "epoch": 0.1383027714979486, "grad_norm": 0.22964072227478027, "learning_rate": 0.00019896207682776627, "loss": 11.6817, "step": 6607 }, { "epoch": 0.13832370426191073, "grad_norm": 0.24241378903388977, "learning_rate": 0.00019896176172903382, "loss": 11.6883, "step": 6608 }, { "epoch": 0.1383446370258729, "grad_norm": 0.24227750301361084, "learning_rate": 0.00019896144658272845, "loss": 11.6885, "step": 6609 }, { "epoch": 0.13836556978983505, "grad_norm": 0.24234235286712646, "learning_rate": 0.00019896113138885034, "loss": 11.6572, "step": 6610 }, { "epoch": 0.1383865025537972, "grad_norm": 0.2624392509460449, "learning_rate": 0.00019896081614739961, "loss": 11.6761, "step": 6611 }, { "epoch": 0.13840743531775937, "grad_norm": 0.23452642560005188, "learning_rate": 0.00019896050085837643, "loss": 11.6781, "step": 6612 }, { "epoch": 0.13842836808172151, "grad_norm": 0.19744421541690826, "learning_rate": 0.00019896018552178094, "loss": 11.6837, "step": 6613 }, { "epoch": 0.13844930084568366, "grad_norm": 0.23135972023010254, "learning_rate": 0.00019895987013761335, "loss": 11.6825, "step": 6614 }, { "epoch": 0.1384702336096458, "grad_norm": 0.2489517778158188, "learning_rate": 0.00019895955470587375, "loss": 11.6767, "step": 6615 }, { "epoch": 0.13849116637360798, "grad_norm": 0.2365986406803131, "learning_rate": 0.0001989592392265623, "loss": 11.6901, "step": 6616 }, { "epoch": 0.13851209913757012, "grad_norm": 0.22342082858085632, "learning_rate": 0.00019895892369967917, "loss": 11.6892, "step": 6617 }, { "epoch": 0.13853303190153227, "grad_norm": 0.18720082938671112, "learning_rate": 0.0001989586081252245, "loss": 11.69, "step": 6618 }, { "epoch": 0.13855396466549444, "grad_norm": 0.27539026737213135, "learning_rate": 0.00019895829250319845, "loss": 11.6877, "step": 6619 }, { "epoch": 0.13857489742945658, "grad_norm": 0.2546946704387665, "learning_rate": 0.00019895797683360114, "loss": 11.6825, "step": 6620 }, { "epoch": 0.13859583019341873, "grad_norm": 0.2222323715686798, "learning_rate": 0.00019895766111643277, "loss": 11.6987, "step": 6621 }, { "epoch": 0.1386167629573809, "grad_norm": 0.2108924835920334, "learning_rate": 0.00019895734535169344, "loss": 11.692, "step": 6622 }, { "epoch": 0.13863769572134305, "grad_norm": 0.25615188479423523, "learning_rate": 0.00019895702953938335, "loss": 11.6851, "step": 6623 }, { "epoch": 0.1386586284853052, "grad_norm": 0.3089390993118286, "learning_rate": 0.00019895671367950267, "loss": 11.6823, "step": 6624 }, { "epoch": 0.13867956124926736, "grad_norm": 0.1959877610206604, "learning_rate": 0.00019895639777205143, "loss": 11.6832, "step": 6625 }, { "epoch": 0.1387004940132295, "grad_norm": 0.21597056090831757, "learning_rate": 0.00019895608181702993, "loss": 11.6802, "step": 6626 }, { "epoch": 0.13872142677719165, "grad_norm": 0.24929021298885345, "learning_rate": 0.00019895576581443827, "loss": 11.6803, "step": 6627 }, { "epoch": 0.13874235954115383, "grad_norm": 0.22843410074710846, "learning_rate": 0.00019895544976427653, "loss": 11.6592, "step": 6628 }, { "epoch": 0.13876329230511597, "grad_norm": 0.2569063901901245, "learning_rate": 0.000198955133666545, "loss": 11.6917, "step": 6629 }, { "epoch": 0.13878422506907812, "grad_norm": 0.24187402427196503, "learning_rate": 0.0001989548175212437, "loss": 11.6714, "step": 6630 }, { "epoch": 0.1388051578330403, "grad_norm": 0.23667627573013306, "learning_rate": 0.00019895450132837285, "loss": 11.674, "step": 6631 }, { "epoch": 0.13882609059700243, "grad_norm": 0.2578214406967163, "learning_rate": 0.00019895418508793258, "loss": 11.6872, "step": 6632 }, { "epoch": 0.13884702336096458, "grad_norm": 0.2259853184223175, "learning_rate": 0.00019895386879992307, "loss": 11.6711, "step": 6633 }, { "epoch": 0.13886795612492672, "grad_norm": 0.24953694641590118, "learning_rate": 0.0001989535524643444, "loss": 11.6926, "step": 6634 }, { "epoch": 0.1388888888888889, "grad_norm": 0.17909520864486694, "learning_rate": 0.00019895323608119686, "loss": 11.6947, "step": 6635 }, { "epoch": 0.13890982165285104, "grad_norm": 0.24594244360923767, "learning_rate": 0.00019895291965048043, "loss": 11.681, "step": 6636 }, { "epoch": 0.13893075441681318, "grad_norm": 0.21168090403079987, "learning_rate": 0.0001989526031721954, "loss": 11.6847, "step": 6637 }, { "epoch": 0.13895168718077536, "grad_norm": 0.22103506326675415, "learning_rate": 0.00019895228664634185, "loss": 11.6604, "step": 6638 }, { "epoch": 0.1389726199447375, "grad_norm": 0.2148781567811966, "learning_rate": 0.00019895197007291997, "loss": 11.6871, "step": 6639 }, { "epoch": 0.13899355270869965, "grad_norm": 0.21049530804157257, "learning_rate": 0.00019895165345192988, "loss": 11.6688, "step": 6640 }, { "epoch": 0.13901448547266182, "grad_norm": 0.2833816409111023, "learning_rate": 0.00019895133678337175, "loss": 11.6773, "step": 6641 }, { "epoch": 0.13903541823662396, "grad_norm": 0.2029673010110855, "learning_rate": 0.00019895102006724574, "loss": 11.6665, "step": 6642 }, { "epoch": 0.1390563510005861, "grad_norm": 0.25672605633735657, "learning_rate": 0.000198950703303552, "loss": 11.6822, "step": 6643 }, { "epoch": 0.13907728376454828, "grad_norm": 0.24411673843860626, "learning_rate": 0.00019895038649229063, "loss": 11.6821, "step": 6644 }, { "epoch": 0.13909821652851043, "grad_norm": 0.2449735701084137, "learning_rate": 0.00019895006963346185, "loss": 11.6978, "step": 6645 }, { "epoch": 0.13911914929247257, "grad_norm": 0.20552211999893188, "learning_rate": 0.00019894975272706579, "loss": 11.67, "step": 6646 }, { "epoch": 0.13914008205643474, "grad_norm": 0.20569109916687012, "learning_rate": 0.00019894943577310263, "loss": 11.6652, "step": 6647 }, { "epoch": 0.1391610148203969, "grad_norm": 0.23265932500362396, "learning_rate": 0.00019894911877157246, "loss": 11.6662, "step": 6648 }, { "epoch": 0.13918194758435903, "grad_norm": 0.21136347949504852, "learning_rate": 0.00019894880172247547, "loss": 11.6813, "step": 6649 }, { "epoch": 0.13920288034832118, "grad_norm": 0.2888972759246826, "learning_rate": 0.00019894848462581182, "loss": 11.6872, "step": 6650 }, { "epoch": 0.13922381311228335, "grad_norm": 0.20580770075321198, "learning_rate": 0.00019894816748158162, "loss": 11.6918, "step": 6651 }, { "epoch": 0.1392447458762455, "grad_norm": 0.28338494896888733, "learning_rate": 0.00019894785028978508, "loss": 11.682, "step": 6652 }, { "epoch": 0.13926567864020764, "grad_norm": 0.25456395745277405, "learning_rate": 0.00019894753305042234, "loss": 11.6918, "step": 6653 }, { "epoch": 0.1392866114041698, "grad_norm": 0.21906988322734833, "learning_rate": 0.00019894721576349352, "loss": 11.6695, "step": 6654 }, { "epoch": 0.13930754416813196, "grad_norm": 0.21319541335105896, "learning_rate": 0.0001989468984289988, "loss": 11.6695, "step": 6655 }, { "epoch": 0.1393284769320941, "grad_norm": 0.27615952491760254, "learning_rate": 0.0001989465810469383, "loss": 11.6717, "step": 6656 }, { "epoch": 0.13934940969605628, "grad_norm": 0.2203102558851242, "learning_rate": 0.0001989462636173122, "loss": 11.6657, "step": 6657 }, { "epoch": 0.13937034246001842, "grad_norm": 0.22180290520191193, "learning_rate": 0.00019894594614012066, "loss": 11.6681, "step": 6658 }, { "epoch": 0.13939127522398057, "grad_norm": 0.24075306951999664, "learning_rate": 0.0001989456286153638, "loss": 11.6819, "step": 6659 }, { "epoch": 0.13941220798794274, "grad_norm": 0.18772628903388977, "learning_rate": 0.00019894531104304181, "loss": 11.6728, "step": 6660 }, { "epoch": 0.13943314075190488, "grad_norm": 0.25128644704818726, "learning_rate": 0.00019894499342315483, "loss": 11.6932, "step": 6661 }, { "epoch": 0.13945407351586703, "grad_norm": 0.21043582260608673, "learning_rate": 0.00019894467575570302, "loss": 11.6731, "step": 6662 }, { "epoch": 0.1394750062798292, "grad_norm": 0.2664068341255188, "learning_rate": 0.0001989443580406865, "loss": 11.6893, "step": 6663 }, { "epoch": 0.13949593904379134, "grad_norm": 0.2302102893590927, "learning_rate": 0.00019894404027810545, "loss": 11.6719, "step": 6664 }, { "epoch": 0.1395168718077535, "grad_norm": 0.2412855476140976, "learning_rate": 0.00019894372246796, "loss": 11.7017, "step": 6665 }, { "epoch": 0.13953780457171566, "grad_norm": 0.2016516923904419, "learning_rate": 0.00019894340461025034, "loss": 11.6722, "step": 6666 }, { "epoch": 0.1395587373356778, "grad_norm": 0.24623648822307587, "learning_rate": 0.0001989430867049766, "loss": 11.6907, "step": 6667 }, { "epoch": 0.13957967009963995, "grad_norm": 0.2922070324420929, "learning_rate": 0.00019894276875213893, "loss": 11.6722, "step": 6668 }, { "epoch": 0.1396006028636021, "grad_norm": 0.27024975419044495, "learning_rate": 0.0001989424507517375, "loss": 11.6712, "step": 6669 }, { "epoch": 0.13962153562756427, "grad_norm": 0.25025564432144165, "learning_rate": 0.00019894213270377244, "loss": 11.6823, "step": 6670 }, { "epoch": 0.13964246839152641, "grad_norm": 0.24306859076023102, "learning_rate": 0.00019894181460824393, "loss": 11.6679, "step": 6671 }, { "epoch": 0.13966340115548856, "grad_norm": 0.1846303790807724, "learning_rate": 0.0001989414964651521, "loss": 11.6584, "step": 6672 }, { "epoch": 0.13968433391945073, "grad_norm": 0.2316766083240509, "learning_rate": 0.00019894117827449708, "loss": 11.6853, "step": 6673 }, { "epoch": 0.13970526668341288, "grad_norm": 0.2690059542655945, "learning_rate": 0.00019894086003627908, "loss": 11.6952, "step": 6674 }, { "epoch": 0.13972619944737502, "grad_norm": 0.28358903527259827, "learning_rate": 0.00019894054175049824, "loss": 11.6935, "step": 6675 }, { "epoch": 0.1397471322113372, "grad_norm": 0.26290255784988403, "learning_rate": 0.0001989402234171547, "loss": 11.675, "step": 6676 }, { "epoch": 0.13976806497529934, "grad_norm": 0.24705180525779724, "learning_rate": 0.00019893990503624855, "loss": 11.688, "step": 6677 }, { "epoch": 0.13978899773926148, "grad_norm": 0.23727372288703918, "learning_rate": 0.00019893958660778007, "loss": 11.6838, "step": 6678 }, { "epoch": 0.13980993050322366, "grad_norm": 0.25641927123069763, "learning_rate": 0.00019893926813174933, "loss": 11.6848, "step": 6679 }, { "epoch": 0.1398308632671858, "grad_norm": 0.18909797072410583, "learning_rate": 0.0001989389496081565, "loss": 11.673, "step": 6680 }, { "epoch": 0.13985179603114795, "grad_norm": 0.25866958498954773, "learning_rate": 0.0001989386310370017, "loss": 11.6976, "step": 6681 }, { "epoch": 0.13987272879511012, "grad_norm": 0.2670300304889679, "learning_rate": 0.00019893831241828518, "loss": 11.678, "step": 6682 }, { "epoch": 0.13989366155907226, "grad_norm": 0.23823614418506622, "learning_rate": 0.00019893799375200698, "loss": 11.6836, "step": 6683 }, { "epoch": 0.1399145943230344, "grad_norm": 0.24151228368282318, "learning_rate": 0.00019893767503816734, "loss": 11.6728, "step": 6684 }, { "epoch": 0.13993552708699658, "grad_norm": 0.22127652168273926, "learning_rate": 0.00019893735627676633, "loss": 11.6805, "step": 6685 }, { "epoch": 0.13995645985095873, "grad_norm": 0.2591462731361389, "learning_rate": 0.0001989370374678042, "loss": 11.6722, "step": 6686 }, { "epoch": 0.13997739261492087, "grad_norm": 0.2062819004058838, "learning_rate": 0.00019893671861128103, "loss": 11.6747, "step": 6687 }, { "epoch": 0.13999832537888302, "grad_norm": 0.21791018545627594, "learning_rate": 0.000198936399707197, "loss": 11.6839, "step": 6688 }, { "epoch": 0.1400192581428452, "grad_norm": 1.4878133535385132, "learning_rate": 0.00019893608075555226, "loss": 11.7165, "step": 6689 }, { "epoch": 0.14004019090680733, "grad_norm": 0.37630587816238403, "learning_rate": 0.00019893576175634696, "loss": 11.681, "step": 6690 }, { "epoch": 0.14006112367076948, "grad_norm": 0.24311740696430206, "learning_rate": 0.00019893544270958125, "loss": 11.6728, "step": 6691 }, { "epoch": 0.14008205643473165, "grad_norm": 0.22039352357387543, "learning_rate": 0.0001989351236152553, "loss": 11.6544, "step": 6692 }, { "epoch": 0.1401029891986938, "grad_norm": 0.24018914997577667, "learning_rate": 0.00019893480447336924, "loss": 11.6818, "step": 6693 }, { "epoch": 0.14012392196265594, "grad_norm": 0.2422512173652649, "learning_rate": 0.00019893448528392327, "loss": 11.6816, "step": 6694 }, { "epoch": 0.1401448547266181, "grad_norm": 0.2273750901222229, "learning_rate": 0.0001989341660469175, "loss": 11.683, "step": 6695 }, { "epoch": 0.14016578749058026, "grad_norm": 0.22287045419216156, "learning_rate": 0.00019893384676235206, "loss": 11.6798, "step": 6696 }, { "epoch": 0.1401867202545424, "grad_norm": 0.2773820757865906, "learning_rate": 0.00019893352743022717, "loss": 11.697, "step": 6697 }, { "epoch": 0.14020765301850457, "grad_norm": 0.2536224126815796, "learning_rate": 0.00019893320805054295, "loss": 11.6838, "step": 6698 }, { "epoch": 0.14022858578246672, "grad_norm": 0.2374860942363739, "learning_rate": 0.00019893288862329956, "loss": 11.6725, "step": 6699 }, { "epoch": 0.14024951854642886, "grad_norm": 0.2706301808357239, "learning_rate": 0.00019893256914849713, "loss": 11.6839, "step": 6700 }, { "epoch": 0.14027045131039104, "grad_norm": 0.278328537940979, "learning_rate": 0.00019893224962613584, "loss": 11.6775, "step": 6701 }, { "epoch": 0.14029138407435318, "grad_norm": 0.22091123461723328, "learning_rate": 0.0001989319300562158, "loss": 11.6824, "step": 6702 }, { "epoch": 0.14031231683831533, "grad_norm": 0.25043290853500366, "learning_rate": 0.00019893161043873725, "loss": 11.6688, "step": 6703 }, { "epoch": 0.14033324960227747, "grad_norm": 0.2742457389831543, "learning_rate": 0.00019893129077370024, "loss": 11.6709, "step": 6704 }, { "epoch": 0.14035418236623964, "grad_norm": 0.2650541067123413, "learning_rate": 0.00019893097106110503, "loss": 11.6995, "step": 6705 }, { "epoch": 0.1403751151302018, "grad_norm": 0.3386496603488922, "learning_rate": 0.0001989306513009517, "loss": 11.6866, "step": 6706 }, { "epoch": 0.14039604789416393, "grad_norm": 0.2237377166748047, "learning_rate": 0.00019893033149324043, "loss": 11.6743, "step": 6707 }, { "epoch": 0.1404169806581261, "grad_norm": 0.27017125487327576, "learning_rate": 0.00019893001163797137, "loss": 11.6725, "step": 6708 }, { "epoch": 0.14043791342208825, "grad_norm": 0.2547454833984375, "learning_rate": 0.00019892969173514465, "loss": 11.6775, "step": 6709 }, { "epoch": 0.1404588461860504, "grad_norm": 0.24610359966754913, "learning_rate": 0.00019892937178476043, "loss": 11.6691, "step": 6710 }, { "epoch": 0.14047977895001257, "grad_norm": 0.2569313943386078, "learning_rate": 0.0001989290517868189, "loss": 11.675, "step": 6711 }, { "epoch": 0.1405007117139747, "grad_norm": 0.2348586469888687, "learning_rate": 0.0001989287317413202, "loss": 11.6726, "step": 6712 }, { "epoch": 0.14052164447793686, "grad_norm": 0.32353392243385315, "learning_rate": 0.00019892841164826446, "loss": 11.6921, "step": 6713 }, { "epoch": 0.14054257724189903, "grad_norm": 0.21492575109004974, "learning_rate": 0.00019892809150765187, "loss": 11.6657, "step": 6714 }, { "epoch": 0.14056351000586118, "grad_norm": 0.19514793157577515, "learning_rate": 0.00019892777131948254, "loss": 11.6792, "step": 6715 }, { "epoch": 0.14058444276982332, "grad_norm": 0.21107031404972076, "learning_rate": 0.00019892745108375667, "loss": 11.6743, "step": 6716 }, { "epoch": 0.1406053755337855, "grad_norm": 0.2420702427625656, "learning_rate": 0.00019892713080047438, "loss": 11.6639, "step": 6717 }, { "epoch": 0.14062630829774764, "grad_norm": 0.2909882664680481, "learning_rate": 0.00019892681046963583, "loss": 11.6785, "step": 6718 }, { "epoch": 0.14064724106170978, "grad_norm": 0.36621853709220886, "learning_rate": 0.0001989264900912412, "loss": 11.6875, "step": 6719 }, { "epoch": 0.14066817382567195, "grad_norm": 0.27215275168418884, "learning_rate": 0.00019892616966529062, "loss": 11.6786, "step": 6720 }, { "epoch": 0.1406891065896341, "grad_norm": 0.25024306774139404, "learning_rate": 0.00019892584919178424, "loss": 11.6994, "step": 6721 }, { "epoch": 0.14071003935359624, "grad_norm": 0.2029506266117096, "learning_rate": 0.00019892552867072221, "loss": 11.6893, "step": 6722 }, { "epoch": 0.1407309721175584, "grad_norm": 0.22110435366630554, "learning_rate": 0.00019892520810210472, "loss": 11.6828, "step": 6723 }, { "epoch": 0.14075190488152056, "grad_norm": 0.23562836647033691, "learning_rate": 0.00019892488748593187, "loss": 11.6639, "step": 6724 }, { "epoch": 0.1407728376454827, "grad_norm": 0.2310592383146286, "learning_rate": 0.00019892456682220387, "loss": 11.6884, "step": 6725 }, { "epoch": 0.14079377040944485, "grad_norm": 0.25258317589759827, "learning_rate": 0.00019892424611092082, "loss": 11.6652, "step": 6726 }, { "epoch": 0.14081470317340702, "grad_norm": 0.2328561693429947, "learning_rate": 0.00019892392535208293, "loss": 11.6758, "step": 6727 }, { "epoch": 0.14083563593736917, "grad_norm": 0.2641560435295105, "learning_rate": 0.00019892360454569033, "loss": 11.6829, "step": 6728 }, { "epoch": 0.14085656870133131, "grad_norm": 0.25975024700164795, "learning_rate": 0.00019892328369174316, "loss": 11.6957, "step": 6729 }, { "epoch": 0.1408775014652935, "grad_norm": 0.22795765101909637, "learning_rate": 0.00019892296279024158, "loss": 11.6787, "step": 6730 }, { "epoch": 0.14089843422925563, "grad_norm": 0.21993504464626312, "learning_rate": 0.00019892264184118575, "loss": 11.6824, "step": 6731 }, { "epoch": 0.14091936699321778, "grad_norm": 0.20141565799713135, "learning_rate": 0.00019892232084457582, "loss": 11.6768, "step": 6732 }, { "epoch": 0.14094029975717995, "grad_norm": 0.24504630267620087, "learning_rate": 0.00019892199980041196, "loss": 11.6807, "step": 6733 }, { "epoch": 0.1409612325211421, "grad_norm": 0.2306309938430786, "learning_rate": 0.00019892167870869432, "loss": 11.6745, "step": 6734 }, { "epoch": 0.14098216528510424, "grad_norm": 0.2558354139328003, "learning_rate": 0.00019892135756942303, "loss": 11.6927, "step": 6735 }, { "epoch": 0.1410030980490664, "grad_norm": 0.2762264609336853, "learning_rate": 0.00019892103638259826, "loss": 11.6935, "step": 6736 }, { "epoch": 0.14102403081302856, "grad_norm": 0.18892355263233185, "learning_rate": 0.00019892071514822018, "loss": 11.6655, "step": 6737 }, { "epoch": 0.1410449635769907, "grad_norm": 0.25152304768562317, "learning_rate": 0.00019892039386628892, "loss": 11.6738, "step": 6738 }, { "epoch": 0.14106589634095287, "grad_norm": 0.3415258228778839, "learning_rate": 0.00019892007253680466, "loss": 11.6947, "step": 6739 }, { "epoch": 0.14108682910491502, "grad_norm": 0.22784188389778137, "learning_rate": 0.00019891975115976748, "loss": 11.6752, "step": 6740 }, { "epoch": 0.14110776186887716, "grad_norm": 0.24517184495925903, "learning_rate": 0.00019891942973517766, "loss": 11.6795, "step": 6741 }, { "epoch": 0.1411286946328393, "grad_norm": 0.2281709611415863, "learning_rate": 0.00019891910826303527, "loss": 11.6756, "step": 6742 }, { "epoch": 0.14114962739680148, "grad_norm": 0.23164205253124237, "learning_rate": 0.00019891878674334045, "loss": 11.6849, "step": 6743 }, { "epoch": 0.14117056016076363, "grad_norm": 0.29371392726898193, "learning_rate": 0.00019891846517609341, "loss": 11.6851, "step": 6744 }, { "epoch": 0.14119149292472577, "grad_norm": 0.2399923950433731, "learning_rate": 0.0001989181435612943, "loss": 11.6756, "step": 6745 }, { "epoch": 0.14121242568868794, "grad_norm": 0.3259544372558594, "learning_rate": 0.00019891782189894322, "loss": 11.6876, "step": 6746 }, { "epoch": 0.1412333584526501, "grad_norm": 0.26475808024406433, "learning_rate": 0.0001989175001890404, "loss": 11.6691, "step": 6747 }, { "epoch": 0.14125429121661223, "grad_norm": 0.24827824532985687, "learning_rate": 0.00019891717843158595, "loss": 11.6869, "step": 6748 }, { "epoch": 0.1412752239805744, "grad_norm": 0.1933814287185669, "learning_rate": 0.00019891685662658, "loss": 11.6792, "step": 6749 }, { "epoch": 0.14129615674453655, "grad_norm": 0.2453632950782776, "learning_rate": 0.00019891653477402277, "loss": 11.6771, "step": 6750 }, { "epoch": 0.1413170895084987, "grad_norm": 0.3180151879787445, "learning_rate": 0.00019891621287391434, "loss": 11.6893, "step": 6751 }, { "epoch": 0.14133802227246087, "grad_norm": 0.2613735496997833, "learning_rate": 0.00019891589092625495, "loss": 11.6695, "step": 6752 }, { "epoch": 0.141358955036423, "grad_norm": 0.2384997457265854, "learning_rate": 0.0001989155689310447, "loss": 11.6752, "step": 6753 }, { "epoch": 0.14137988780038516, "grad_norm": 0.26872092485427856, "learning_rate": 0.00019891524688828372, "loss": 11.6925, "step": 6754 }, { "epoch": 0.14140082056434733, "grad_norm": 0.2791878581047058, "learning_rate": 0.0001989149247979722, "loss": 11.6943, "step": 6755 }, { "epoch": 0.14142175332830947, "grad_norm": 0.22049075365066528, "learning_rate": 0.0001989146026601103, "loss": 11.674, "step": 6756 }, { "epoch": 0.14144268609227162, "grad_norm": 0.28010523319244385, "learning_rate": 0.0001989142804746982, "loss": 11.6788, "step": 6757 }, { "epoch": 0.14146361885623376, "grad_norm": 0.20275233685970306, "learning_rate": 0.00019891395824173598, "loss": 11.6914, "step": 6758 }, { "epoch": 0.14148455162019594, "grad_norm": 0.27075445652008057, "learning_rate": 0.00019891363596122387, "loss": 11.6833, "step": 6759 }, { "epoch": 0.14150548438415808, "grad_norm": 0.2246255874633789, "learning_rate": 0.000198913313633162, "loss": 11.6732, "step": 6760 }, { "epoch": 0.14152641714812023, "grad_norm": 0.2689114212989807, "learning_rate": 0.00019891299125755047, "loss": 11.6741, "step": 6761 }, { "epoch": 0.1415473499120824, "grad_norm": 0.2758270502090454, "learning_rate": 0.00019891266883438952, "loss": 11.7003, "step": 6762 }, { "epoch": 0.14156828267604454, "grad_norm": 0.1947350800037384, "learning_rate": 0.00019891234636367928, "loss": 11.6809, "step": 6763 }, { "epoch": 0.1415892154400067, "grad_norm": 0.29181185364723206, "learning_rate": 0.00019891202384541987, "loss": 11.6859, "step": 6764 }, { "epoch": 0.14161014820396886, "grad_norm": 0.22564172744750977, "learning_rate": 0.00019891170127961147, "loss": 11.6737, "step": 6765 }, { "epoch": 0.141631080967931, "grad_norm": 0.20519322156906128, "learning_rate": 0.00019891137866625424, "loss": 11.6824, "step": 6766 }, { "epoch": 0.14165201373189315, "grad_norm": 0.2637771964073181, "learning_rate": 0.00019891105600534833, "loss": 11.6859, "step": 6767 }, { "epoch": 0.14167294649585532, "grad_norm": 0.24242722988128662, "learning_rate": 0.00019891073329689386, "loss": 11.6745, "step": 6768 }, { "epoch": 0.14169387925981747, "grad_norm": 0.22577394545078278, "learning_rate": 0.00019891041054089106, "loss": 11.674, "step": 6769 }, { "epoch": 0.1417148120237796, "grad_norm": 0.36571553349494934, "learning_rate": 0.00019891008773734002, "loss": 11.6939, "step": 6770 }, { "epoch": 0.14173574478774179, "grad_norm": 0.21398839354515076, "learning_rate": 0.00019890976488624092, "loss": 11.6833, "step": 6771 }, { "epoch": 0.14175667755170393, "grad_norm": 0.2488231658935547, "learning_rate": 0.00019890944198759392, "loss": 11.6747, "step": 6772 }, { "epoch": 0.14177761031566607, "grad_norm": 0.18780717253684998, "learning_rate": 0.00019890911904139918, "loss": 11.6631, "step": 6773 }, { "epoch": 0.14179854307962825, "grad_norm": 0.35089629888534546, "learning_rate": 0.00019890879604765683, "loss": 11.6865, "step": 6774 }, { "epoch": 0.1418194758435904, "grad_norm": 0.20783810317516327, "learning_rate": 0.00019890847300636707, "loss": 11.6752, "step": 6775 }, { "epoch": 0.14184040860755254, "grad_norm": 0.317333459854126, "learning_rate": 0.00019890814991752997, "loss": 11.6726, "step": 6776 }, { "epoch": 0.14186134137151468, "grad_norm": 0.2737613022327423, "learning_rate": 0.00019890782678114576, "loss": 11.6865, "step": 6777 }, { "epoch": 0.14188227413547685, "grad_norm": 0.25381606817245483, "learning_rate": 0.0001989075035972146, "loss": 11.6808, "step": 6778 }, { "epoch": 0.141903206899439, "grad_norm": 0.2744264602661133, "learning_rate": 0.0001989071803657366, "loss": 11.6853, "step": 6779 }, { "epoch": 0.14192413966340114, "grad_norm": 0.28140363097190857, "learning_rate": 0.00019890685708671196, "loss": 11.6827, "step": 6780 }, { "epoch": 0.14194507242736332, "grad_norm": 0.21774138510227203, "learning_rate": 0.00019890653376014076, "loss": 11.6782, "step": 6781 }, { "epoch": 0.14196600519132546, "grad_norm": 0.2871921956539154, "learning_rate": 0.00019890621038602327, "loss": 11.6827, "step": 6782 }, { "epoch": 0.1419869379552876, "grad_norm": 0.24440310895442963, "learning_rate": 0.00019890588696435955, "loss": 11.683, "step": 6783 }, { "epoch": 0.14200787071924978, "grad_norm": 0.1891687512397766, "learning_rate": 0.00019890556349514977, "loss": 11.6726, "step": 6784 }, { "epoch": 0.14202880348321192, "grad_norm": 0.2353188395500183, "learning_rate": 0.00019890523997839412, "loss": 11.6797, "step": 6785 }, { "epoch": 0.14204973624717407, "grad_norm": 0.24124562740325928, "learning_rate": 0.00019890491641409274, "loss": 11.684, "step": 6786 }, { "epoch": 0.14207066901113624, "grad_norm": 0.25604087114334106, "learning_rate": 0.00019890459280224581, "loss": 11.6795, "step": 6787 }, { "epoch": 0.1420916017750984, "grad_norm": 0.2795744836330414, "learning_rate": 0.0001989042691428534, "loss": 11.6966, "step": 6788 }, { "epoch": 0.14211253453906053, "grad_norm": 0.23827527463436127, "learning_rate": 0.00019890394543591578, "loss": 11.6731, "step": 6789 }, { "epoch": 0.1421334673030227, "grad_norm": 0.26233503222465515, "learning_rate": 0.00019890362168143303, "loss": 11.6959, "step": 6790 }, { "epoch": 0.14215440006698485, "grad_norm": 0.2688696086406708, "learning_rate": 0.00019890329787940534, "loss": 11.6772, "step": 6791 }, { "epoch": 0.142175332830947, "grad_norm": 0.23585288226604462, "learning_rate": 0.00019890297402983285, "loss": 11.691, "step": 6792 }, { "epoch": 0.14219626559490914, "grad_norm": 0.3370892405509949, "learning_rate": 0.00019890265013271572, "loss": 11.6714, "step": 6793 }, { "epoch": 0.1422171983588713, "grad_norm": 0.26339441537857056, "learning_rate": 0.0001989023261880541, "loss": 11.6795, "step": 6794 }, { "epoch": 0.14223813112283346, "grad_norm": 0.2215748131275177, "learning_rate": 0.00019890200219584815, "loss": 11.6789, "step": 6795 }, { "epoch": 0.1422590638867956, "grad_norm": 0.31467878818511963, "learning_rate": 0.00019890167815609798, "loss": 11.6826, "step": 6796 }, { "epoch": 0.14227999665075777, "grad_norm": 0.23179756104946136, "learning_rate": 0.00019890135406880387, "loss": 11.6808, "step": 6797 }, { "epoch": 0.14230092941471992, "grad_norm": 0.260032594203949, "learning_rate": 0.00019890102993396584, "loss": 11.6824, "step": 6798 }, { "epoch": 0.14232186217868206, "grad_norm": 0.23980821669101715, "learning_rate": 0.00019890070575158414, "loss": 11.676, "step": 6799 }, { "epoch": 0.14234279494264424, "grad_norm": 0.19742189347743988, "learning_rate": 0.00019890038152165887, "loss": 11.6842, "step": 6800 }, { "epoch": 0.14236372770660638, "grad_norm": 0.244977667927742, "learning_rate": 0.0001989000572441902, "loss": 11.6759, "step": 6801 }, { "epoch": 0.14238466047056852, "grad_norm": 0.3128298819065094, "learning_rate": 0.0001988997329191783, "loss": 11.6901, "step": 6802 }, { "epoch": 0.1424055932345307, "grad_norm": 0.3583921790122986, "learning_rate": 0.0001988994085466233, "loss": 11.6806, "step": 6803 }, { "epoch": 0.14242652599849284, "grad_norm": 0.26216086745262146, "learning_rate": 0.00019889908412652537, "loss": 11.6715, "step": 6804 }, { "epoch": 0.142447458762455, "grad_norm": 0.2465658336877823, "learning_rate": 0.0001988987596588847, "loss": 11.6857, "step": 6805 }, { "epoch": 0.14246839152641716, "grad_norm": 0.2315993309020996, "learning_rate": 0.0001988984351437014, "loss": 11.6821, "step": 6806 }, { "epoch": 0.1424893242903793, "grad_norm": 0.2527305781841278, "learning_rate": 0.00019889811058097564, "loss": 11.6833, "step": 6807 }, { "epoch": 0.14251025705434145, "grad_norm": 0.23287732899188995, "learning_rate": 0.0001988977859707076, "loss": 11.6739, "step": 6808 }, { "epoch": 0.14253118981830362, "grad_norm": 0.24966487288475037, "learning_rate": 0.00019889746131289738, "loss": 11.6929, "step": 6809 }, { "epoch": 0.14255212258226577, "grad_norm": 0.2835473418235779, "learning_rate": 0.00019889713660754517, "loss": 11.6841, "step": 6810 }, { "epoch": 0.1425730553462279, "grad_norm": 0.27038493752479553, "learning_rate": 0.00019889681185465113, "loss": 11.6969, "step": 6811 }, { "epoch": 0.14259398811019006, "grad_norm": 0.21026167273521423, "learning_rate": 0.00019889648705421543, "loss": 11.6766, "step": 6812 }, { "epoch": 0.14261492087415223, "grad_norm": 0.2594211995601654, "learning_rate": 0.00019889616220623813, "loss": 11.6812, "step": 6813 }, { "epoch": 0.14263585363811437, "grad_norm": 0.2597777247428894, "learning_rate": 0.00019889583731071958, "loss": 11.6806, "step": 6814 }, { "epoch": 0.14265678640207652, "grad_norm": 0.25383082032203674, "learning_rate": 0.00019889551236765973, "loss": 11.6903, "step": 6815 }, { "epoch": 0.1426777191660387, "grad_norm": 0.21501490473747253, "learning_rate": 0.00019889518737705887, "loss": 11.6827, "step": 6816 }, { "epoch": 0.14269865193000084, "grad_norm": 0.21111522614955902, "learning_rate": 0.00019889486233891707, "loss": 11.688, "step": 6817 }, { "epoch": 0.14271958469396298, "grad_norm": 0.27020707726478577, "learning_rate": 0.00019889453725323457, "loss": 11.6734, "step": 6818 }, { "epoch": 0.14274051745792515, "grad_norm": 0.27591511607170105, "learning_rate": 0.00019889421212001147, "loss": 11.6602, "step": 6819 }, { "epoch": 0.1427614502218873, "grad_norm": 0.41743963956832886, "learning_rate": 0.00019889388693924794, "loss": 11.6765, "step": 6820 }, { "epoch": 0.14278238298584944, "grad_norm": 0.23329085111618042, "learning_rate": 0.00019889356171094412, "loss": 11.6726, "step": 6821 }, { "epoch": 0.14280331574981162, "grad_norm": 0.27860891819000244, "learning_rate": 0.0001988932364351002, "loss": 11.6834, "step": 6822 }, { "epoch": 0.14282424851377376, "grad_norm": 0.25065499544143677, "learning_rate": 0.00019889291111171634, "loss": 11.6861, "step": 6823 }, { "epoch": 0.1428451812777359, "grad_norm": 0.2568513751029968, "learning_rate": 0.00019889258574079264, "loss": 11.6671, "step": 6824 }, { "epoch": 0.14286611404169808, "grad_norm": 0.24750447273254395, "learning_rate": 0.0001988922603223293, "loss": 11.6754, "step": 6825 }, { "epoch": 0.14288704680566022, "grad_norm": 0.19408515095710754, "learning_rate": 0.00019889193485632647, "loss": 11.6676, "step": 6826 }, { "epoch": 0.14290797956962237, "grad_norm": 0.2260901927947998, "learning_rate": 0.00019889160934278432, "loss": 11.6686, "step": 6827 }, { "epoch": 0.14292891233358454, "grad_norm": 0.240731880068779, "learning_rate": 0.00019889128378170297, "loss": 11.6906, "step": 6828 }, { "epoch": 0.14294984509754669, "grad_norm": 0.2555024325847626, "learning_rate": 0.00019889095817308264, "loss": 11.6646, "step": 6829 }, { "epoch": 0.14297077786150883, "grad_norm": 0.27973103523254395, "learning_rate": 0.0001988906325169234, "loss": 11.6868, "step": 6830 }, { "epoch": 0.14299171062547097, "grad_norm": 0.23508082330226898, "learning_rate": 0.00019889030681322548, "loss": 11.678, "step": 6831 }, { "epoch": 0.14301264338943315, "grad_norm": 0.32488593459129333, "learning_rate": 0.00019888998106198897, "loss": 11.6716, "step": 6832 }, { "epoch": 0.1430335761533953, "grad_norm": 0.31057608127593994, "learning_rate": 0.0001988896552632141, "loss": 11.6824, "step": 6833 }, { "epoch": 0.14305450891735744, "grad_norm": 0.3049955666065216, "learning_rate": 0.00019888932941690098, "loss": 11.6742, "step": 6834 }, { "epoch": 0.1430754416813196, "grad_norm": 0.1996726542711258, "learning_rate": 0.00019888900352304977, "loss": 11.6816, "step": 6835 }, { "epoch": 0.14309637444528175, "grad_norm": 0.31225791573524475, "learning_rate": 0.00019888867758166063, "loss": 11.6755, "step": 6836 }, { "epoch": 0.1431173072092439, "grad_norm": 0.21882867813110352, "learning_rate": 0.00019888835159273372, "loss": 11.6794, "step": 6837 }, { "epoch": 0.14313823997320607, "grad_norm": 0.26793524622917175, "learning_rate": 0.00019888802555626922, "loss": 11.672, "step": 6838 }, { "epoch": 0.14315917273716822, "grad_norm": 0.18707959353923798, "learning_rate": 0.00019888769947226727, "loss": 11.6782, "step": 6839 }, { "epoch": 0.14318010550113036, "grad_norm": 0.2745767831802368, "learning_rate": 0.00019888737334072798, "loss": 11.6825, "step": 6840 }, { "epoch": 0.14320103826509253, "grad_norm": 0.20974859595298767, "learning_rate": 0.00019888704716165156, "loss": 11.675, "step": 6841 }, { "epoch": 0.14322197102905468, "grad_norm": 0.2190553992986679, "learning_rate": 0.00019888672093503818, "loss": 11.6814, "step": 6842 }, { "epoch": 0.14324290379301682, "grad_norm": 0.24497590959072113, "learning_rate": 0.00019888639466088796, "loss": 11.677, "step": 6843 }, { "epoch": 0.143263836556979, "grad_norm": 0.24496544897556305, "learning_rate": 0.00019888606833920106, "loss": 11.6728, "step": 6844 }, { "epoch": 0.14328476932094114, "grad_norm": 0.26453325152397156, "learning_rate": 0.00019888574196997766, "loss": 11.6829, "step": 6845 }, { "epoch": 0.14330570208490329, "grad_norm": 1.1912708282470703, "learning_rate": 0.00019888541555321788, "loss": 11.6512, "step": 6846 }, { "epoch": 0.14332663484886543, "grad_norm": 0.27138134837150574, "learning_rate": 0.0001988850890889219, "loss": 11.688, "step": 6847 }, { "epoch": 0.1433475676128276, "grad_norm": 0.20171618461608887, "learning_rate": 0.0001988847625770899, "loss": 11.6786, "step": 6848 }, { "epoch": 0.14336850037678975, "grad_norm": 0.24543800950050354, "learning_rate": 0.000198884436017722, "loss": 11.689, "step": 6849 }, { "epoch": 0.1433894331407519, "grad_norm": 0.30228400230407715, "learning_rate": 0.00019888410941081838, "loss": 11.686, "step": 6850 }, { "epoch": 0.14341036590471407, "grad_norm": 0.20458097755908966, "learning_rate": 0.0001988837827563792, "loss": 11.6774, "step": 6851 }, { "epoch": 0.1434312986686762, "grad_norm": 0.2140958458185196, "learning_rate": 0.00019888345605440457, "loss": 11.6702, "step": 6852 }, { "epoch": 0.14345223143263836, "grad_norm": 0.2693387567996979, "learning_rate": 0.0001988831293048947, "loss": 11.678, "step": 6853 }, { "epoch": 0.14347316419660053, "grad_norm": 0.4141653776168823, "learning_rate": 0.00019888280250784972, "loss": 11.6954, "step": 6854 }, { "epoch": 0.14349409696056267, "grad_norm": 0.22089512646198273, "learning_rate": 0.0001988824756632698, "loss": 11.676, "step": 6855 }, { "epoch": 0.14351502972452482, "grad_norm": 0.2734401822090149, "learning_rate": 0.0001988821487711551, "loss": 11.6775, "step": 6856 }, { "epoch": 0.143535962488487, "grad_norm": 0.23896148800849915, "learning_rate": 0.00019888182183150574, "loss": 11.6788, "step": 6857 }, { "epoch": 0.14355689525244913, "grad_norm": 0.2552521526813507, "learning_rate": 0.0001988814948443219, "loss": 11.6745, "step": 6858 }, { "epoch": 0.14357782801641128, "grad_norm": 0.2183409482240677, "learning_rate": 0.0001988811678096038, "loss": 11.6745, "step": 6859 }, { "epoch": 0.14359876078037345, "grad_norm": 0.22953787446022034, "learning_rate": 0.00019888084072735153, "loss": 11.6765, "step": 6860 }, { "epoch": 0.1436196935443356, "grad_norm": 0.24190960824489594, "learning_rate": 0.0001988805135975652, "loss": 11.6833, "step": 6861 }, { "epoch": 0.14364062630829774, "grad_norm": 0.23506738245487213, "learning_rate": 0.00019888018642024507, "loss": 11.6851, "step": 6862 }, { "epoch": 0.14366155907225991, "grad_norm": 0.2723444998264313, "learning_rate": 0.00019887985919539127, "loss": 11.6774, "step": 6863 }, { "epoch": 0.14368249183622206, "grad_norm": 0.2166530191898346, "learning_rate": 0.0001988795319230039, "loss": 11.6908, "step": 6864 }, { "epoch": 0.1437034246001842, "grad_norm": 0.2211328148841858, "learning_rate": 0.00019887920460308318, "loss": 11.6841, "step": 6865 }, { "epoch": 0.14372435736414635, "grad_norm": 0.2825120687484741, "learning_rate": 0.00019887887723562923, "loss": 11.6841, "step": 6866 }, { "epoch": 0.14374529012810852, "grad_norm": 0.22277896106243134, "learning_rate": 0.00019887854982064224, "loss": 11.6747, "step": 6867 }, { "epoch": 0.14376622289207067, "grad_norm": 0.29155391454696655, "learning_rate": 0.00019887822235812236, "loss": 11.6704, "step": 6868 }, { "epoch": 0.1437871556560328, "grad_norm": 0.23323827981948853, "learning_rate": 0.00019887789484806972, "loss": 11.6858, "step": 6869 }, { "epoch": 0.14380808841999498, "grad_norm": 0.2194225937128067, "learning_rate": 0.00019887756729048447, "loss": 11.6853, "step": 6870 }, { "epoch": 0.14382902118395713, "grad_norm": 0.2126736044883728, "learning_rate": 0.00019887723968536682, "loss": 11.6836, "step": 6871 }, { "epoch": 0.14384995394791927, "grad_norm": 0.20834754407405853, "learning_rate": 0.00019887691203271688, "loss": 11.6755, "step": 6872 }, { "epoch": 0.14387088671188145, "grad_norm": 0.20304414629936218, "learning_rate": 0.00019887658433253484, "loss": 11.684, "step": 6873 }, { "epoch": 0.1438918194758436, "grad_norm": 0.2788442075252533, "learning_rate": 0.00019887625658482087, "loss": 11.6959, "step": 6874 }, { "epoch": 0.14391275223980574, "grad_norm": 0.37510839104652405, "learning_rate": 0.00019887592878957506, "loss": 11.6896, "step": 6875 }, { "epoch": 0.1439336850037679, "grad_norm": 0.30937066674232483, "learning_rate": 0.00019887560094679764, "loss": 11.676, "step": 6876 }, { "epoch": 0.14395461776773005, "grad_norm": 0.2295713573694229, "learning_rate": 0.0001988752730564887, "loss": 11.7018, "step": 6877 }, { "epoch": 0.1439755505316922, "grad_norm": 0.24134978652000427, "learning_rate": 0.00019887494511864847, "loss": 11.6913, "step": 6878 }, { "epoch": 0.14399648329565437, "grad_norm": 0.25146281719207764, "learning_rate": 0.00019887461713327705, "loss": 11.6713, "step": 6879 }, { "epoch": 0.14401741605961652, "grad_norm": 0.2720869779586792, "learning_rate": 0.00019887428910037461, "loss": 11.6798, "step": 6880 }, { "epoch": 0.14403834882357866, "grad_norm": 0.2541080117225647, "learning_rate": 0.00019887396101994135, "loss": 11.6885, "step": 6881 }, { "epoch": 0.1440592815875408, "grad_norm": 0.3018304109573364, "learning_rate": 0.0001988736328919774, "loss": 11.686, "step": 6882 }, { "epoch": 0.14408021435150298, "grad_norm": 0.2876577377319336, "learning_rate": 0.00019887330471648289, "loss": 11.6876, "step": 6883 }, { "epoch": 0.14410114711546512, "grad_norm": 0.2743697762489319, "learning_rate": 0.000198872976493458, "loss": 11.678, "step": 6884 }, { "epoch": 0.14412207987942727, "grad_norm": 0.24171747267246246, "learning_rate": 0.0001988726482229029, "loss": 11.6811, "step": 6885 }, { "epoch": 0.14414301264338944, "grad_norm": 0.21884790062904358, "learning_rate": 0.00019887231990481773, "loss": 11.6683, "step": 6886 }, { "epoch": 0.14416394540735158, "grad_norm": 0.21345463395118713, "learning_rate": 0.00019887199153920267, "loss": 11.6749, "step": 6887 }, { "epoch": 0.14418487817131373, "grad_norm": 0.2323005348443985, "learning_rate": 0.00019887166312605785, "loss": 11.6841, "step": 6888 }, { "epoch": 0.1442058109352759, "grad_norm": 0.2158021181821823, "learning_rate": 0.00019887133466538345, "loss": 11.6758, "step": 6889 }, { "epoch": 0.14422674369923805, "grad_norm": 0.2976592183113098, "learning_rate": 0.0001988710061571796, "loss": 11.6986, "step": 6890 }, { "epoch": 0.1442476764632002, "grad_norm": 0.24178695678710938, "learning_rate": 0.0001988706776014465, "loss": 11.658, "step": 6891 }, { "epoch": 0.14426860922716236, "grad_norm": 0.24245499074459076, "learning_rate": 0.00019887034899818425, "loss": 11.6664, "step": 6892 }, { "epoch": 0.1442895419911245, "grad_norm": 0.23414744436740875, "learning_rate": 0.00019887002034739307, "loss": 11.6888, "step": 6893 }, { "epoch": 0.14431047475508665, "grad_norm": 0.7609760761260986, "learning_rate": 0.00019886969164907307, "loss": 11.6989, "step": 6894 }, { "epoch": 0.14433140751904883, "grad_norm": 0.24319539964199066, "learning_rate": 0.0001988693629032244, "loss": 11.6858, "step": 6895 }, { "epoch": 0.14435234028301097, "grad_norm": 0.23040421307086945, "learning_rate": 0.0001988690341098473, "loss": 11.6839, "step": 6896 }, { "epoch": 0.14437327304697312, "grad_norm": 0.20641346275806427, "learning_rate": 0.00019886870526894187, "loss": 11.6809, "step": 6897 }, { "epoch": 0.1443942058109353, "grad_norm": 0.24272727966308594, "learning_rate": 0.00019886837638050827, "loss": 11.6942, "step": 6898 }, { "epoch": 0.14441513857489743, "grad_norm": 0.23771452903747559, "learning_rate": 0.00019886804744454666, "loss": 11.6858, "step": 6899 }, { "epoch": 0.14443607133885958, "grad_norm": 0.24642261862754822, "learning_rate": 0.00019886771846105717, "loss": 11.6904, "step": 6900 }, { "epoch": 0.14445700410282172, "grad_norm": 0.2343604862689972, "learning_rate": 0.00019886738943004, "loss": 11.6864, "step": 6901 }, { "epoch": 0.1444779368667839, "grad_norm": 0.22800631821155548, "learning_rate": 0.00019886706035149532, "loss": 11.6839, "step": 6902 }, { "epoch": 0.14449886963074604, "grad_norm": 0.2899189591407776, "learning_rate": 0.00019886673122542322, "loss": 11.6967, "step": 6903 }, { "epoch": 0.14451980239470819, "grad_norm": 0.21913936734199524, "learning_rate": 0.00019886640205182392, "loss": 11.6658, "step": 6904 }, { "epoch": 0.14454073515867036, "grad_norm": 0.2207542359828949, "learning_rate": 0.00019886607283069757, "loss": 11.6784, "step": 6905 }, { "epoch": 0.1445616679226325, "grad_norm": 0.3008996546268463, "learning_rate": 0.00019886574356204432, "loss": 11.6845, "step": 6906 }, { "epoch": 0.14458260068659465, "grad_norm": 0.21467706561088562, "learning_rate": 0.0001988654142458643, "loss": 11.6773, "step": 6907 }, { "epoch": 0.14460353345055682, "grad_norm": 0.22682161629199982, "learning_rate": 0.00019886508488215773, "loss": 11.6784, "step": 6908 }, { "epoch": 0.14462446621451897, "grad_norm": 0.26550424098968506, "learning_rate": 0.00019886475547092468, "loss": 11.6766, "step": 6909 }, { "epoch": 0.1446453989784811, "grad_norm": 0.2611193060874939, "learning_rate": 0.0001988644260121654, "loss": 11.6659, "step": 6910 }, { "epoch": 0.14466633174244328, "grad_norm": 0.32936787605285645, "learning_rate": 0.00019886409650588001, "loss": 11.6869, "step": 6911 }, { "epoch": 0.14468726450640543, "grad_norm": 0.22639118134975433, "learning_rate": 0.00019886376695206866, "loss": 11.6753, "step": 6912 }, { "epoch": 0.14470819727036757, "grad_norm": 0.1939454972743988, "learning_rate": 0.00019886343735073153, "loss": 11.6885, "step": 6913 }, { "epoch": 0.14472913003432974, "grad_norm": 0.24073466658592224, "learning_rate": 0.00019886310770186874, "loss": 11.6724, "step": 6914 }, { "epoch": 0.1447500627982919, "grad_norm": 0.22943738102912903, "learning_rate": 0.00019886277800548047, "loss": 11.687, "step": 6915 }, { "epoch": 0.14477099556225403, "grad_norm": 0.3107089400291443, "learning_rate": 0.0001988624482615669, "loss": 11.6956, "step": 6916 }, { "epoch": 0.1447919283262162, "grad_norm": 0.2404617965221405, "learning_rate": 0.00019886211847012817, "loss": 11.6781, "step": 6917 }, { "epoch": 0.14481286109017835, "grad_norm": 0.2719407081604004, "learning_rate": 0.00019886178863116443, "loss": 11.68, "step": 6918 }, { "epoch": 0.1448337938541405, "grad_norm": 0.22218312323093414, "learning_rate": 0.00019886145874467586, "loss": 11.6893, "step": 6919 }, { "epoch": 0.14485472661810264, "grad_norm": 0.284148246049881, "learning_rate": 0.0001988611288106626, "loss": 11.6664, "step": 6920 }, { "epoch": 0.14487565938206481, "grad_norm": 0.23791497945785522, "learning_rate": 0.0001988607988291248, "loss": 11.6742, "step": 6921 }, { "epoch": 0.14489659214602696, "grad_norm": 0.2341720163822174, "learning_rate": 0.00019886046880006263, "loss": 11.6808, "step": 6922 }, { "epoch": 0.1449175249099891, "grad_norm": 0.20904280245304108, "learning_rate": 0.00019886013872347626, "loss": 11.6745, "step": 6923 }, { "epoch": 0.14493845767395128, "grad_norm": 0.22605837881565094, "learning_rate": 0.00019885980859936584, "loss": 11.679, "step": 6924 }, { "epoch": 0.14495939043791342, "grad_norm": 0.28336432576179504, "learning_rate": 0.00019885947842773153, "loss": 11.683, "step": 6925 }, { "epoch": 0.14498032320187557, "grad_norm": 0.2586427330970764, "learning_rate": 0.0001988591482085735, "loss": 11.6797, "step": 6926 }, { "epoch": 0.14500125596583774, "grad_norm": 0.2628691494464874, "learning_rate": 0.00019885881794189187, "loss": 11.6894, "step": 6927 }, { "epoch": 0.14502218872979988, "grad_norm": 0.22071348130702972, "learning_rate": 0.00019885848762768685, "loss": 11.6694, "step": 6928 }, { "epoch": 0.14504312149376203, "grad_norm": 0.2077094465494156, "learning_rate": 0.00019885815726595856, "loss": 11.666, "step": 6929 }, { "epoch": 0.1450640542577242, "grad_norm": 0.21869462728500366, "learning_rate": 0.00019885782685670716, "loss": 11.6678, "step": 6930 }, { "epoch": 0.14508498702168635, "grad_norm": 0.22087153792381287, "learning_rate": 0.00019885749639993286, "loss": 11.6787, "step": 6931 }, { "epoch": 0.1451059197856485, "grad_norm": 0.20496542751789093, "learning_rate": 0.00019885716589563575, "loss": 11.6682, "step": 6932 }, { "epoch": 0.14512685254961066, "grad_norm": 0.2243027687072754, "learning_rate": 0.00019885683534381606, "loss": 11.6652, "step": 6933 }, { "epoch": 0.1451477853135728, "grad_norm": 0.21739184856414795, "learning_rate": 0.00019885650474447383, "loss": 11.6779, "step": 6934 }, { "epoch": 0.14516871807753495, "grad_norm": 0.7591153979301453, "learning_rate": 0.00019885617409760937, "loss": 11.7104, "step": 6935 }, { "epoch": 0.1451896508414971, "grad_norm": 0.1983477771282196, "learning_rate": 0.00019885584340322273, "loss": 11.6751, "step": 6936 }, { "epoch": 0.14521058360545927, "grad_norm": 0.2024286985397339, "learning_rate": 0.00019885551266131412, "loss": 11.6755, "step": 6937 }, { "epoch": 0.14523151636942142, "grad_norm": 0.2620507776737213, "learning_rate": 0.0001988551818718837, "loss": 11.6871, "step": 6938 }, { "epoch": 0.14525244913338356, "grad_norm": 0.24397817254066467, "learning_rate": 0.00019885485103493156, "loss": 11.6737, "step": 6939 }, { "epoch": 0.14527338189734573, "grad_norm": 0.24081234633922577, "learning_rate": 0.00019885452015045796, "loss": 11.682, "step": 6940 }, { "epoch": 0.14529431466130788, "grad_norm": 0.22689631581306458, "learning_rate": 0.000198854189218463, "loss": 11.6852, "step": 6941 }, { "epoch": 0.14531524742527002, "grad_norm": 0.3086024820804596, "learning_rate": 0.00019885385823894684, "loss": 11.7045, "step": 6942 }, { "epoch": 0.1453361801892322, "grad_norm": 0.19116713106632233, "learning_rate": 0.00019885352721190963, "loss": 11.687, "step": 6943 }, { "epoch": 0.14535711295319434, "grad_norm": 0.31498074531555176, "learning_rate": 0.00019885319613735161, "loss": 11.6903, "step": 6944 }, { "epoch": 0.14537804571715648, "grad_norm": 0.7345612645149231, "learning_rate": 0.00019885286501527283, "loss": 11.736, "step": 6945 }, { "epoch": 0.14539897848111866, "grad_norm": 0.22060415148735046, "learning_rate": 0.0001988525338456735, "loss": 11.6758, "step": 6946 }, { "epoch": 0.1454199112450808, "grad_norm": 0.26174211502075195, "learning_rate": 0.0001988522026285538, "loss": 11.686, "step": 6947 }, { "epoch": 0.14544084400904295, "grad_norm": 0.23160967230796814, "learning_rate": 0.0001988518713639138, "loss": 11.6563, "step": 6948 }, { "epoch": 0.14546177677300512, "grad_norm": 0.3125200569629669, "learning_rate": 0.0001988515400517538, "loss": 11.6838, "step": 6949 }, { "epoch": 0.14548270953696726, "grad_norm": 0.21957190334796906, "learning_rate": 0.00019885120869207385, "loss": 11.6822, "step": 6950 }, { "epoch": 0.1455036423009294, "grad_norm": 0.20946942269802094, "learning_rate": 0.00019885087728487415, "loss": 11.6776, "step": 6951 }, { "epoch": 0.14552457506489158, "grad_norm": 0.26462408900260925, "learning_rate": 0.00019885054583015485, "loss": 11.6963, "step": 6952 }, { "epoch": 0.14554550782885373, "grad_norm": 0.24855124950408936, "learning_rate": 0.0001988502143279161, "loss": 11.6735, "step": 6953 }, { "epoch": 0.14556644059281587, "grad_norm": 0.2722248136997223, "learning_rate": 0.0001988498827781581, "loss": 11.7055, "step": 6954 }, { "epoch": 0.14558737335677802, "grad_norm": 0.25465622544288635, "learning_rate": 0.00019884955118088096, "loss": 11.6753, "step": 6955 }, { "epoch": 0.1456083061207402, "grad_norm": 0.2240130752325058, "learning_rate": 0.0001988492195360849, "loss": 11.6867, "step": 6956 }, { "epoch": 0.14562923888470233, "grad_norm": 0.355319619178772, "learning_rate": 0.00019884888784376997, "loss": 11.6899, "step": 6957 }, { "epoch": 0.14565017164866448, "grad_norm": 0.2814713418483734, "learning_rate": 0.00019884855610393642, "loss": 11.7011, "step": 6958 }, { "epoch": 0.14567110441262665, "grad_norm": 0.23419110476970673, "learning_rate": 0.0001988482243165844, "loss": 11.6903, "step": 6959 }, { "epoch": 0.1456920371765888, "grad_norm": 0.24005641043186188, "learning_rate": 0.00019884789248171406, "loss": 11.6632, "step": 6960 }, { "epoch": 0.14571296994055094, "grad_norm": 0.27135157585144043, "learning_rate": 0.00019884756059932558, "loss": 11.6883, "step": 6961 }, { "epoch": 0.1457339027045131, "grad_norm": 0.22549104690551758, "learning_rate": 0.00019884722866941905, "loss": 11.6695, "step": 6962 }, { "epoch": 0.14575483546847526, "grad_norm": 0.3330388367176056, "learning_rate": 0.0001988468966919947, "loss": 11.6922, "step": 6963 }, { "epoch": 0.1457757682324374, "grad_norm": 0.23663999140262604, "learning_rate": 0.00019884656466705267, "loss": 11.6895, "step": 6964 }, { "epoch": 0.14579670099639958, "grad_norm": 0.24334405362606049, "learning_rate": 0.0001988462325945931, "loss": 11.6679, "step": 6965 }, { "epoch": 0.14581763376036172, "grad_norm": 0.27406933903694153, "learning_rate": 0.0001988459004746162, "loss": 11.6984, "step": 6966 }, { "epoch": 0.14583856652432386, "grad_norm": 0.22059297561645508, "learning_rate": 0.00019884556830712208, "loss": 11.6848, "step": 6967 }, { "epoch": 0.14585949928828604, "grad_norm": 0.3451653718948364, "learning_rate": 0.00019884523609211088, "loss": 11.6724, "step": 6968 }, { "epoch": 0.14588043205224818, "grad_norm": 0.30687910318374634, "learning_rate": 0.00019884490382958281, "loss": 11.6688, "step": 6969 }, { "epoch": 0.14590136481621033, "grad_norm": 0.21632970869541168, "learning_rate": 0.00019884457151953804, "loss": 11.6697, "step": 6970 }, { "epoch": 0.1459222975801725, "grad_norm": 0.2397616058588028, "learning_rate": 0.00019884423916197665, "loss": 11.6963, "step": 6971 }, { "epoch": 0.14594323034413464, "grad_norm": 0.32044145464897156, "learning_rate": 0.0001988439067568989, "loss": 11.6977, "step": 6972 }, { "epoch": 0.1459641631080968, "grad_norm": 0.19498099386692047, "learning_rate": 0.00019884357430430488, "loss": 11.6805, "step": 6973 }, { "epoch": 0.14598509587205893, "grad_norm": 0.26804742217063904, "learning_rate": 0.0001988432418041948, "loss": 11.6861, "step": 6974 }, { "epoch": 0.1460060286360211, "grad_norm": 0.23629224300384521, "learning_rate": 0.00019884290925656876, "loss": 11.675, "step": 6975 }, { "epoch": 0.14602696139998325, "grad_norm": 0.25203049182891846, "learning_rate": 0.000198842576661427, "loss": 11.6882, "step": 6976 }, { "epoch": 0.1460478941639454, "grad_norm": 0.22328446805477142, "learning_rate": 0.0001988422440187696, "loss": 11.6834, "step": 6977 }, { "epoch": 0.14606882692790757, "grad_norm": 0.2644648551940918, "learning_rate": 0.00019884191132859674, "loss": 11.6917, "step": 6978 }, { "epoch": 0.14608975969186971, "grad_norm": 0.27991876006126404, "learning_rate": 0.0001988415785909086, "loss": 11.6788, "step": 6979 }, { "epoch": 0.14611069245583186, "grad_norm": 0.30047476291656494, "learning_rate": 0.00019884124580570536, "loss": 11.6984, "step": 6980 }, { "epoch": 0.14613162521979403, "grad_norm": 0.24419407546520233, "learning_rate": 0.00019884091297298713, "loss": 11.68, "step": 6981 }, { "epoch": 0.14615255798375618, "grad_norm": 0.2733045816421509, "learning_rate": 0.0001988405800927541, "loss": 11.6666, "step": 6982 }, { "epoch": 0.14617349074771832, "grad_norm": 0.3231927454471588, "learning_rate": 0.0001988402471650064, "loss": 11.689, "step": 6983 }, { "epoch": 0.1461944235116805, "grad_norm": 0.22309361398220062, "learning_rate": 0.00019883991418974425, "loss": 11.6637, "step": 6984 }, { "epoch": 0.14621535627564264, "grad_norm": 0.36040252447128296, "learning_rate": 0.00019883958116696774, "loss": 11.6981, "step": 6985 }, { "epoch": 0.14623628903960478, "grad_norm": 0.2689407765865326, "learning_rate": 0.00019883924809667707, "loss": 11.674, "step": 6986 }, { "epoch": 0.14625722180356696, "grad_norm": 0.2433033436536789, "learning_rate": 0.00019883891497887238, "loss": 11.6801, "step": 6987 }, { "epoch": 0.1462781545675291, "grad_norm": 0.3646934926509857, "learning_rate": 0.0001988385818135539, "loss": 11.6839, "step": 6988 }, { "epoch": 0.14629908733149125, "grad_norm": 0.2338895946741104, "learning_rate": 0.00019883824860072167, "loss": 11.688, "step": 6989 }, { "epoch": 0.1463200200954534, "grad_norm": 0.21390584111213684, "learning_rate": 0.00019883791534037592, "loss": 11.6951, "step": 6990 }, { "epoch": 0.14634095285941556, "grad_norm": 0.2353479564189911, "learning_rate": 0.00019883758203251682, "loss": 11.6658, "step": 6991 }, { "epoch": 0.1463618856233777, "grad_norm": 0.38970324397087097, "learning_rate": 0.00019883724867714452, "loss": 11.6761, "step": 6992 }, { "epoch": 0.14638281838733985, "grad_norm": 0.255014032125473, "learning_rate": 0.00019883691527425917, "loss": 11.6763, "step": 6993 }, { "epoch": 0.14640375115130203, "grad_norm": 0.22615525126457214, "learning_rate": 0.00019883658182386093, "loss": 11.6654, "step": 6994 }, { "epoch": 0.14642468391526417, "grad_norm": 0.2608318030834198, "learning_rate": 0.00019883624832594995, "loss": 11.7059, "step": 6995 }, { "epoch": 0.14644561667922631, "grad_norm": 0.22750212252140045, "learning_rate": 0.00019883591478052644, "loss": 11.6608, "step": 6996 }, { "epoch": 0.1464665494431885, "grad_norm": 0.3612581193447113, "learning_rate": 0.0001988355811875905, "loss": 11.69, "step": 6997 }, { "epoch": 0.14648748220715063, "grad_norm": 0.23521679639816284, "learning_rate": 0.0001988352475471423, "loss": 11.6665, "step": 6998 }, { "epoch": 0.14650841497111278, "grad_norm": 0.26345309615135193, "learning_rate": 0.00019883491385918207, "loss": 11.6761, "step": 6999 }, { "epoch": 0.14652934773507495, "grad_norm": 0.2378947138786316, "learning_rate": 0.00019883458012370987, "loss": 11.6783, "step": 7000 }, { "epoch": 0.14652934773507495, "eval_loss": 11.681327819824219, "eval_runtime": 34.2933, "eval_samples_per_second": 28.023, "eval_steps_per_second": 7.028, "step": 7000 }, { "epoch": 0.1465502804990371, "grad_norm": 0.3358388841152191, "learning_rate": 0.00019883424634072592, "loss": 11.6848, "step": 7001 }, { "epoch": 0.14657121326299924, "grad_norm": 0.24007880687713623, "learning_rate": 0.00019883391251023037, "loss": 11.6828, "step": 7002 }, { "epoch": 0.1465921460269614, "grad_norm": 0.21889731287956238, "learning_rate": 0.0001988335786322234, "loss": 11.6805, "step": 7003 }, { "epoch": 0.14661307879092356, "grad_norm": 0.24033209681510925, "learning_rate": 0.00019883324470670508, "loss": 11.6827, "step": 7004 }, { "epoch": 0.1466340115548857, "grad_norm": 0.3085665702819824, "learning_rate": 0.0001988329107336757, "loss": 11.6783, "step": 7005 }, { "epoch": 0.14665494431884787, "grad_norm": 0.22490788996219635, "learning_rate": 0.00019883257671313533, "loss": 11.6666, "step": 7006 }, { "epoch": 0.14667587708281002, "grad_norm": 0.2707768976688385, "learning_rate": 0.00019883224264508417, "loss": 11.6829, "step": 7007 }, { "epoch": 0.14669680984677216, "grad_norm": 0.3177391588687897, "learning_rate": 0.00019883190852952235, "loss": 11.7074, "step": 7008 }, { "epoch": 0.1467177426107343, "grad_norm": 0.21228128671646118, "learning_rate": 0.00019883157436645008, "loss": 11.6816, "step": 7009 }, { "epoch": 0.14673867537469648, "grad_norm": 0.2626881003379822, "learning_rate": 0.0001988312401558675, "loss": 11.6932, "step": 7010 }, { "epoch": 0.14675960813865863, "grad_norm": 0.2313690036535263, "learning_rate": 0.00019883090589777473, "loss": 11.6769, "step": 7011 }, { "epoch": 0.14678054090262077, "grad_norm": 0.236256942152977, "learning_rate": 0.00019883057159217197, "loss": 11.6896, "step": 7012 }, { "epoch": 0.14680147366658294, "grad_norm": 0.2521337568759918, "learning_rate": 0.0001988302372390594, "loss": 11.6939, "step": 7013 }, { "epoch": 0.1468224064305451, "grad_norm": 0.21737517416477203, "learning_rate": 0.00019882990283843715, "loss": 11.6874, "step": 7014 }, { "epoch": 0.14684333919450723, "grad_norm": 0.23216892778873444, "learning_rate": 0.00019882956839030537, "loss": 11.6701, "step": 7015 }, { "epoch": 0.1468642719584694, "grad_norm": 0.19773808121681213, "learning_rate": 0.00019882923389466425, "loss": 11.6767, "step": 7016 }, { "epoch": 0.14688520472243155, "grad_norm": 0.2066858410835266, "learning_rate": 0.0001988288993515139, "loss": 11.6775, "step": 7017 }, { "epoch": 0.1469061374863937, "grad_norm": 0.2674015164375305, "learning_rate": 0.00019882856476085455, "loss": 11.6832, "step": 7018 }, { "epoch": 0.14692707025035587, "grad_norm": 0.20506875216960907, "learning_rate": 0.0001988282301226863, "loss": 11.6981, "step": 7019 }, { "epoch": 0.146948003014318, "grad_norm": 0.21138787269592285, "learning_rate": 0.00019882789543700934, "loss": 11.6855, "step": 7020 }, { "epoch": 0.14696893577828016, "grad_norm": 0.22315745055675507, "learning_rate": 0.00019882756070382386, "loss": 11.6795, "step": 7021 }, { "epoch": 0.14698986854224233, "grad_norm": 0.2997732162475586, "learning_rate": 0.00019882722592313, "loss": 11.6717, "step": 7022 }, { "epoch": 0.14701080130620447, "grad_norm": 0.23739883303642273, "learning_rate": 0.0001988268910949279, "loss": 11.6856, "step": 7023 }, { "epoch": 0.14703173407016662, "grad_norm": 0.26510122418403625, "learning_rate": 0.0001988265562192177, "loss": 11.6827, "step": 7024 }, { "epoch": 0.14705266683412876, "grad_norm": 0.2040841281414032, "learning_rate": 0.0001988262212959996, "loss": 11.669, "step": 7025 }, { "epoch": 0.14707359959809094, "grad_norm": 0.19183705747127533, "learning_rate": 0.0001988258863252738, "loss": 11.6788, "step": 7026 }, { "epoch": 0.14709453236205308, "grad_norm": 0.22773638367652893, "learning_rate": 0.00019882555130704038, "loss": 11.6776, "step": 7027 }, { "epoch": 0.14711546512601523, "grad_norm": 0.23145951330661774, "learning_rate": 0.00019882521624129955, "loss": 11.6814, "step": 7028 }, { "epoch": 0.1471363978899774, "grad_norm": 0.2433793544769287, "learning_rate": 0.00019882488112805144, "loss": 11.6835, "step": 7029 }, { "epoch": 0.14715733065393954, "grad_norm": 0.1939791887998581, "learning_rate": 0.00019882454596729625, "loss": 11.6856, "step": 7030 }, { "epoch": 0.1471782634179017, "grad_norm": 0.2630406320095062, "learning_rate": 0.00019882421075903412, "loss": 11.6953, "step": 7031 }, { "epoch": 0.14719919618186386, "grad_norm": 0.22998270392417908, "learning_rate": 0.0001988238755032652, "loss": 11.6675, "step": 7032 }, { "epoch": 0.147220128945826, "grad_norm": 0.9916276335716248, "learning_rate": 0.00019882354019998968, "loss": 11.7086, "step": 7033 }, { "epoch": 0.14724106170978815, "grad_norm": 0.23221370577812195, "learning_rate": 0.00019882320484920768, "loss": 11.6909, "step": 7034 }, { "epoch": 0.14726199447375032, "grad_norm": 0.20657698810100555, "learning_rate": 0.0001988228694509194, "loss": 11.685, "step": 7035 }, { "epoch": 0.14728292723771247, "grad_norm": 0.2599308490753174, "learning_rate": 0.00019882253400512498, "loss": 11.6924, "step": 7036 }, { "epoch": 0.1473038600016746, "grad_norm": 0.2351277619600296, "learning_rate": 0.0001988221985118246, "loss": 11.6838, "step": 7037 }, { "epoch": 0.1473247927656368, "grad_norm": 0.3000665307044983, "learning_rate": 0.0001988218629710184, "loss": 11.681, "step": 7038 }, { "epoch": 0.14734572552959893, "grad_norm": 0.28961455821990967, "learning_rate": 0.00019882152738270653, "loss": 11.6801, "step": 7039 }, { "epoch": 0.14736665829356108, "grad_norm": 0.21783298254013062, "learning_rate": 0.0001988211917468892, "loss": 11.6749, "step": 7040 }, { "epoch": 0.14738759105752325, "grad_norm": 0.24282225966453552, "learning_rate": 0.00019882085606356652, "loss": 11.6939, "step": 7041 }, { "epoch": 0.1474085238214854, "grad_norm": 0.2575685679912567, "learning_rate": 0.0001988205203327387, "loss": 11.6878, "step": 7042 }, { "epoch": 0.14742945658544754, "grad_norm": 0.22168128192424774, "learning_rate": 0.00019882018455440585, "loss": 11.6702, "step": 7043 }, { "epoch": 0.14745038934940968, "grad_norm": 0.2407136857509613, "learning_rate": 0.00019881984872856817, "loss": 11.6782, "step": 7044 }, { "epoch": 0.14747132211337186, "grad_norm": 0.22656264901161194, "learning_rate": 0.00019881951285522582, "loss": 11.6909, "step": 7045 }, { "epoch": 0.147492254877334, "grad_norm": 0.28720229864120483, "learning_rate": 0.0001988191769343789, "loss": 11.6857, "step": 7046 }, { "epoch": 0.14751318764129615, "grad_norm": 0.22601278126239777, "learning_rate": 0.0001988188409660277, "loss": 11.6864, "step": 7047 }, { "epoch": 0.14753412040525832, "grad_norm": 0.20320747792720795, "learning_rate": 0.00019881850495017224, "loss": 11.6869, "step": 7048 }, { "epoch": 0.14755505316922046, "grad_norm": 0.2358773797750473, "learning_rate": 0.00019881816888681278, "loss": 11.6706, "step": 7049 }, { "epoch": 0.1475759859331826, "grad_norm": 0.2835787534713745, "learning_rate": 0.00019881783277594944, "loss": 11.6819, "step": 7050 }, { "epoch": 0.14759691869714478, "grad_norm": 0.20986121892929077, "learning_rate": 0.00019881749661758238, "loss": 11.6721, "step": 7051 }, { "epoch": 0.14761785146110692, "grad_norm": 0.2237689197063446, "learning_rate": 0.00019881716041171178, "loss": 11.671, "step": 7052 }, { "epoch": 0.14763878422506907, "grad_norm": 0.24433672428131104, "learning_rate": 0.00019881682415833778, "loss": 11.6737, "step": 7053 }, { "epoch": 0.14765971698903124, "grad_norm": 0.2680588662624359, "learning_rate": 0.00019881648785746055, "loss": 11.683, "step": 7054 }, { "epoch": 0.1476806497529934, "grad_norm": 0.2347169667482376, "learning_rate": 0.00019881615150908023, "loss": 11.6854, "step": 7055 }, { "epoch": 0.14770158251695553, "grad_norm": 0.2809951901435852, "learning_rate": 0.00019881581511319705, "loss": 11.6893, "step": 7056 }, { "epoch": 0.1477225152809177, "grad_norm": 0.32665714621543884, "learning_rate": 0.0001988154786698111, "loss": 11.6781, "step": 7057 }, { "epoch": 0.14774344804487985, "grad_norm": 0.22498197853565216, "learning_rate": 0.00019881514217892255, "loss": 11.6785, "step": 7058 }, { "epoch": 0.147764380808842, "grad_norm": 0.247257798910141, "learning_rate": 0.00019881480564053162, "loss": 11.6898, "step": 7059 }, { "epoch": 0.14778531357280417, "grad_norm": 0.22575753927230835, "learning_rate": 0.00019881446905463843, "loss": 11.682, "step": 7060 }, { "epoch": 0.1478062463367663, "grad_norm": 0.23504942655563354, "learning_rate": 0.00019881413242124314, "loss": 11.6868, "step": 7061 }, { "epoch": 0.14782717910072846, "grad_norm": 0.2741982638835907, "learning_rate": 0.00019881379574034593, "loss": 11.6875, "step": 7062 }, { "epoch": 0.1478481118646906, "grad_norm": 0.22631819546222687, "learning_rate": 0.0001988134590119469, "loss": 11.6769, "step": 7063 }, { "epoch": 0.14786904462865277, "grad_norm": 0.29299694299697876, "learning_rate": 0.0001988131222360463, "loss": 11.6881, "step": 7064 }, { "epoch": 0.14788997739261492, "grad_norm": 0.21586021780967712, "learning_rate": 0.00019881278541264424, "loss": 11.6808, "step": 7065 }, { "epoch": 0.14791091015657706, "grad_norm": 0.2499673068523407, "learning_rate": 0.0001988124485417409, "loss": 11.7003, "step": 7066 }, { "epoch": 0.14793184292053924, "grad_norm": 0.24688927829265594, "learning_rate": 0.00019881211162333645, "loss": 11.6859, "step": 7067 }, { "epoch": 0.14795277568450138, "grad_norm": 0.2501439154148102, "learning_rate": 0.000198811774657431, "loss": 11.6809, "step": 7068 }, { "epoch": 0.14797370844846353, "grad_norm": 0.26049432158470154, "learning_rate": 0.0001988114376440248, "loss": 11.6975, "step": 7069 }, { "epoch": 0.1479946412124257, "grad_norm": 0.20993337035179138, "learning_rate": 0.00019881110058311792, "loss": 11.6874, "step": 7070 }, { "epoch": 0.14801557397638784, "grad_norm": 0.201786071062088, "learning_rate": 0.00019881076347471054, "loss": 11.703, "step": 7071 }, { "epoch": 0.14803650674035, "grad_norm": 0.20063985884189606, "learning_rate": 0.0001988104263188029, "loss": 11.6925, "step": 7072 }, { "epoch": 0.14805743950431216, "grad_norm": 0.23539891839027405, "learning_rate": 0.0001988100891153951, "loss": 11.675, "step": 7073 }, { "epoch": 0.1480783722682743, "grad_norm": 0.21095456182956696, "learning_rate": 0.00019880975186448727, "loss": 11.6717, "step": 7074 }, { "epoch": 0.14809930503223645, "grad_norm": 0.20892779529094696, "learning_rate": 0.00019880941456607967, "loss": 11.6711, "step": 7075 }, { "epoch": 0.14812023779619862, "grad_norm": 0.20792141556739807, "learning_rate": 0.00019880907722017238, "loss": 11.6725, "step": 7076 }, { "epoch": 0.14814117056016077, "grad_norm": 0.2790983319282532, "learning_rate": 0.00019880873982676555, "loss": 11.6797, "step": 7077 }, { "epoch": 0.1481621033241229, "grad_norm": 0.26242077350616455, "learning_rate": 0.0001988084023858594, "loss": 11.6887, "step": 7078 }, { "epoch": 0.14818303608808506, "grad_norm": 0.24278490245342255, "learning_rate": 0.00019880806489745408, "loss": 11.6839, "step": 7079 }, { "epoch": 0.14820396885204723, "grad_norm": 0.2298395186662674, "learning_rate": 0.00019880772736154974, "loss": 11.6891, "step": 7080 }, { "epoch": 0.14822490161600937, "grad_norm": 0.24115394055843353, "learning_rate": 0.00019880738977814656, "loss": 11.6739, "step": 7081 }, { "epoch": 0.14824583437997152, "grad_norm": 0.23637473583221436, "learning_rate": 0.00019880705214724468, "loss": 11.688, "step": 7082 }, { "epoch": 0.1482667671439337, "grad_norm": 0.2091386467218399, "learning_rate": 0.00019880671446884427, "loss": 11.6833, "step": 7083 }, { "epoch": 0.14828769990789584, "grad_norm": 0.24708831310272217, "learning_rate": 0.00019880637674294548, "loss": 11.684, "step": 7084 }, { "epoch": 0.14830863267185798, "grad_norm": 0.2319200038909912, "learning_rate": 0.0001988060389695485, "loss": 11.6741, "step": 7085 }, { "epoch": 0.14832956543582015, "grad_norm": 0.2044987678527832, "learning_rate": 0.0001988057011486535, "loss": 11.6874, "step": 7086 }, { "epoch": 0.1483504981997823, "grad_norm": 0.25198933482170105, "learning_rate": 0.0001988053632802606, "loss": 11.694, "step": 7087 }, { "epoch": 0.14837143096374444, "grad_norm": 0.22830162942409515, "learning_rate": 0.00019880502536436992, "loss": 11.6902, "step": 7088 }, { "epoch": 0.14839236372770662, "grad_norm": 0.2238912433385849, "learning_rate": 0.00019880468740098175, "loss": 11.6793, "step": 7089 }, { "epoch": 0.14841329649166876, "grad_norm": 0.25397443771362305, "learning_rate": 0.0001988043493900962, "loss": 11.6769, "step": 7090 }, { "epoch": 0.1484342292556309, "grad_norm": 0.22414551675319672, "learning_rate": 0.0001988040113317134, "loss": 11.698, "step": 7091 }, { "epoch": 0.14845516201959308, "grad_norm": 0.24564354121685028, "learning_rate": 0.00019880367322583348, "loss": 11.6762, "step": 7092 }, { "epoch": 0.14847609478355522, "grad_norm": 0.21933341026306152, "learning_rate": 0.0001988033350724567, "loss": 11.6879, "step": 7093 }, { "epoch": 0.14849702754751737, "grad_norm": 0.2704715430736542, "learning_rate": 0.00019880299687158318, "loss": 11.6778, "step": 7094 }, { "epoch": 0.14851796031147954, "grad_norm": 0.2887064516544342, "learning_rate": 0.00019880265862321305, "loss": 11.6855, "step": 7095 }, { "epoch": 0.14853889307544169, "grad_norm": 0.22455742955207825, "learning_rate": 0.00019880232032734653, "loss": 11.6857, "step": 7096 }, { "epoch": 0.14855982583940383, "grad_norm": 0.19552694261074066, "learning_rate": 0.00019880198198398376, "loss": 11.6837, "step": 7097 }, { "epoch": 0.14858075860336598, "grad_norm": 0.26171454787254333, "learning_rate": 0.0001988016435931249, "loss": 11.6742, "step": 7098 }, { "epoch": 0.14860169136732815, "grad_norm": 0.22841912508010864, "learning_rate": 0.0001988013051547701, "loss": 11.6757, "step": 7099 }, { "epoch": 0.1486226241312903, "grad_norm": 0.19135266542434692, "learning_rate": 0.0001988009666689195, "loss": 11.6888, "step": 7100 }, { "epoch": 0.14864355689525244, "grad_norm": 0.2677173614501953, "learning_rate": 0.00019880062813557335, "loss": 11.6785, "step": 7101 }, { "epoch": 0.1486644896592146, "grad_norm": 0.2601892650127411, "learning_rate": 0.0001988002895547317, "loss": 11.6838, "step": 7102 }, { "epoch": 0.14868542242317676, "grad_norm": 0.23212112486362457, "learning_rate": 0.00019879995092639483, "loss": 11.6831, "step": 7103 }, { "epoch": 0.1487063551871389, "grad_norm": 0.2579100430011749, "learning_rate": 0.00019879961225056282, "loss": 11.6747, "step": 7104 }, { "epoch": 0.14872728795110107, "grad_norm": 0.2507610321044922, "learning_rate": 0.00019879927352723585, "loss": 11.6912, "step": 7105 }, { "epoch": 0.14874822071506322, "grad_norm": 0.263750821352005, "learning_rate": 0.0001987989347564141, "loss": 11.687, "step": 7106 }, { "epoch": 0.14876915347902536, "grad_norm": 0.20158223807811737, "learning_rate": 0.0001987985959380977, "loss": 11.6722, "step": 7107 }, { "epoch": 0.14879008624298753, "grad_norm": 0.23203137516975403, "learning_rate": 0.00019879825707228688, "loss": 11.6712, "step": 7108 }, { "epoch": 0.14881101900694968, "grad_norm": 0.33663707971572876, "learning_rate": 0.00019879791815898172, "loss": 11.6661, "step": 7109 }, { "epoch": 0.14883195177091182, "grad_norm": 0.2685670256614685, "learning_rate": 0.00019879757919818243, "loss": 11.6708, "step": 7110 }, { "epoch": 0.148852884534874, "grad_norm": 0.2693405747413635, "learning_rate": 0.00019879724018988918, "loss": 11.6923, "step": 7111 }, { "epoch": 0.14887381729883614, "grad_norm": 0.22919315099716187, "learning_rate": 0.00019879690113410207, "loss": 11.673, "step": 7112 }, { "epoch": 0.1488947500627983, "grad_norm": 0.21010062098503113, "learning_rate": 0.00019879656203082132, "loss": 11.672, "step": 7113 }, { "epoch": 0.14891568282676046, "grad_norm": 0.27827927470207214, "learning_rate": 0.0001987962228800471, "loss": 11.6764, "step": 7114 }, { "epoch": 0.1489366155907226, "grad_norm": 0.2742604613304138, "learning_rate": 0.0001987958836817796, "loss": 11.6804, "step": 7115 }, { "epoch": 0.14895754835468475, "grad_norm": 0.25291532278060913, "learning_rate": 0.00019879554443601887, "loss": 11.6775, "step": 7116 }, { "epoch": 0.1489784811186469, "grad_norm": 0.23451843857765198, "learning_rate": 0.00019879520514276517, "loss": 11.6758, "step": 7117 }, { "epoch": 0.14899941388260907, "grad_norm": 0.23980936408042908, "learning_rate": 0.00019879486580201862, "loss": 11.6744, "step": 7118 }, { "epoch": 0.1490203466465712, "grad_norm": 0.1916838437318802, "learning_rate": 0.00019879452641377942, "loss": 11.664, "step": 7119 }, { "epoch": 0.14904127941053336, "grad_norm": 0.2759820520877838, "learning_rate": 0.00019879418697804773, "loss": 11.6863, "step": 7120 }, { "epoch": 0.14906221217449553, "grad_norm": 0.23710301518440247, "learning_rate": 0.00019879384749482365, "loss": 11.6761, "step": 7121 }, { "epoch": 0.14908314493845767, "grad_norm": 0.23796257376670837, "learning_rate": 0.00019879350796410741, "loss": 11.68, "step": 7122 }, { "epoch": 0.14910407770241982, "grad_norm": 0.35440176725387573, "learning_rate": 0.00019879316838589914, "loss": 11.6978, "step": 7123 }, { "epoch": 0.149125010466382, "grad_norm": 0.21027928590774536, "learning_rate": 0.000198792828760199, "loss": 11.6744, "step": 7124 }, { "epoch": 0.14914594323034414, "grad_norm": 0.28502050042152405, "learning_rate": 0.0001987924890870072, "loss": 11.6867, "step": 7125 }, { "epoch": 0.14916687599430628, "grad_norm": 0.2652870714664459, "learning_rate": 0.00019879214936632388, "loss": 11.6765, "step": 7126 }, { "epoch": 0.14918780875826845, "grad_norm": 0.28703024983406067, "learning_rate": 0.00019879180959814916, "loss": 11.6885, "step": 7127 }, { "epoch": 0.1492087415222306, "grad_norm": 0.25665345788002014, "learning_rate": 0.00019879146978248323, "loss": 11.6726, "step": 7128 }, { "epoch": 0.14922967428619274, "grad_norm": 0.2327127754688263, "learning_rate": 0.0001987911299193263, "loss": 11.6892, "step": 7129 }, { "epoch": 0.14925060705015492, "grad_norm": 0.24238120019435883, "learning_rate": 0.00019879079000867847, "loss": 11.6865, "step": 7130 }, { "epoch": 0.14927153981411706, "grad_norm": 0.19173376262187958, "learning_rate": 0.00019879045005053993, "loss": 11.6769, "step": 7131 }, { "epoch": 0.1492924725780792, "grad_norm": 0.25689658522605896, "learning_rate": 0.00019879011004491083, "loss": 11.675, "step": 7132 }, { "epoch": 0.14931340534204135, "grad_norm": 0.20188665390014648, "learning_rate": 0.00019878976999179138, "loss": 11.6774, "step": 7133 }, { "epoch": 0.14933433810600352, "grad_norm": 0.2216806411743164, "learning_rate": 0.00019878942989118165, "loss": 11.6901, "step": 7134 }, { "epoch": 0.14935527086996567, "grad_norm": 0.29610908031463623, "learning_rate": 0.0001987890897430819, "loss": 11.6845, "step": 7135 }, { "epoch": 0.1493762036339278, "grad_norm": 0.2745245695114136, "learning_rate": 0.00019878874954749228, "loss": 11.6994, "step": 7136 }, { "epoch": 0.14939713639788998, "grad_norm": 0.25944533944129944, "learning_rate": 0.00019878840930441288, "loss": 11.6774, "step": 7137 }, { "epoch": 0.14941806916185213, "grad_norm": 0.23252812027931213, "learning_rate": 0.00019878806901384394, "loss": 11.6785, "step": 7138 }, { "epoch": 0.14943900192581427, "grad_norm": 0.2602021396160126, "learning_rate": 0.00019878772867578558, "loss": 11.6801, "step": 7139 }, { "epoch": 0.14945993468977645, "grad_norm": 0.2197858691215515, "learning_rate": 0.00019878738829023798, "loss": 11.6826, "step": 7140 }, { "epoch": 0.1494808674537386, "grad_norm": 0.21179381012916565, "learning_rate": 0.00019878704785720132, "loss": 11.6706, "step": 7141 }, { "epoch": 0.14950180021770074, "grad_norm": 0.2304038107395172, "learning_rate": 0.00019878670737667574, "loss": 11.6888, "step": 7142 }, { "epoch": 0.1495227329816629, "grad_norm": 0.181773841381073, "learning_rate": 0.0001987863668486614, "loss": 11.6882, "step": 7143 }, { "epoch": 0.14954366574562505, "grad_norm": 0.2423785924911499, "learning_rate": 0.00019878602627315848, "loss": 11.681, "step": 7144 }, { "epoch": 0.1495645985095872, "grad_norm": 0.2632061243057251, "learning_rate": 0.00019878568565016714, "loss": 11.6864, "step": 7145 }, { "epoch": 0.14958553127354937, "grad_norm": 0.2538205087184906, "learning_rate": 0.00019878534497968752, "loss": 11.6698, "step": 7146 }, { "epoch": 0.14960646403751152, "grad_norm": 0.21161416172981262, "learning_rate": 0.00019878500426171982, "loss": 11.6776, "step": 7147 }, { "epoch": 0.14962739680147366, "grad_norm": 0.21349823474884033, "learning_rate": 0.0001987846634962642, "loss": 11.6703, "step": 7148 }, { "epoch": 0.14964832956543583, "grad_norm": 0.24165965616703033, "learning_rate": 0.00019878432268332082, "loss": 11.6601, "step": 7149 }, { "epoch": 0.14966926232939798, "grad_norm": 0.2049083262681961, "learning_rate": 0.0001987839818228898, "loss": 11.6947, "step": 7150 }, { "epoch": 0.14969019509336012, "grad_norm": 0.2699245512485504, "learning_rate": 0.00019878364091497136, "loss": 11.6719, "step": 7151 }, { "epoch": 0.14971112785732227, "grad_norm": 0.23730283975601196, "learning_rate": 0.00019878329995956563, "loss": 11.6847, "step": 7152 }, { "epoch": 0.14973206062128444, "grad_norm": 0.23084278404712677, "learning_rate": 0.0001987829589566728, "loss": 11.6758, "step": 7153 }, { "epoch": 0.14975299338524659, "grad_norm": 0.24205678701400757, "learning_rate": 0.00019878261790629305, "loss": 11.6609, "step": 7154 }, { "epoch": 0.14977392614920873, "grad_norm": 0.24573533236980438, "learning_rate": 0.00019878227680842646, "loss": 11.6683, "step": 7155 }, { "epoch": 0.1497948589131709, "grad_norm": 0.22520340979099274, "learning_rate": 0.0001987819356630733, "loss": 11.6734, "step": 7156 }, { "epoch": 0.14981579167713305, "grad_norm": 0.26669931411743164, "learning_rate": 0.00019878159447023368, "loss": 11.6982, "step": 7157 }, { "epoch": 0.1498367244410952, "grad_norm": 0.26810622215270996, "learning_rate": 0.00019878125322990773, "loss": 11.6943, "step": 7158 }, { "epoch": 0.14985765720505737, "grad_norm": 0.2073739469051361, "learning_rate": 0.00019878091194209567, "loss": 11.6705, "step": 7159 }, { "epoch": 0.1498785899690195, "grad_norm": 0.2919054627418518, "learning_rate": 0.00019878057060679764, "loss": 11.6885, "step": 7160 }, { "epoch": 0.14989952273298165, "grad_norm": 0.22633349895477295, "learning_rate": 0.00019878022922401384, "loss": 11.6856, "step": 7161 }, { "epoch": 0.14992045549694383, "grad_norm": 0.20529699325561523, "learning_rate": 0.0001987798877937444, "loss": 11.6824, "step": 7162 }, { "epoch": 0.14994138826090597, "grad_norm": 0.2229853719472885, "learning_rate": 0.00019877954631598944, "loss": 11.6703, "step": 7163 }, { "epoch": 0.14996232102486812, "grad_norm": 0.3000503182411194, "learning_rate": 0.0001987792047907492, "loss": 11.6901, "step": 7164 }, { "epoch": 0.1499832537888303, "grad_norm": 0.2539565861225128, "learning_rate": 0.00019877886321802385, "loss": 11.6904, "step": 7165 }, { "epoch": 0.15000418655279243, "grad_norm": 0.2664490044116974, "learning_rate": 0.00019877852159781348, "loss": 11.6894, "step": 7166 }, { "epoch": 0.15002511931675458, "grad_norm": 0.23854558169841766, "learning_rate": 0.00019877817993011832, "loss": 11.6879, "step": 7167 }, { "epoch": 0.15004605208071672, "grad_norm": 0.3123255968093872, "learning_rate": 0.00019877783821493848, "loss": 11.6704, "step": 7168 }, { "epoch": 0.1500669848446789, "grad_norm": 0.27823153138160706, "learning_rate": 0.00019877749645227417, "loss": 11.6814, "step": 7169 }, { "epoch": 0.15008791760864104, "grad_norm": 0.24331629276275635, "learning_rate": 0.00019877715464212555, "loss": 11.6837, "step": 7170 }, { "epoch": 0.1501088503726032, "grad_norm": 0.29373699426651, "learning_rate": 0.00019877681278449277, "loss": 11.6965, "step": 7171 }, { "epoch": 0.15012978313656536, "grad_norm": 0.24974733591079712, "learning_rate": 0.00019877647087937597, "loss": 11.6804, "step": 7172 }, { "epoch": 0.1501507159005275, "grad_norm": 0.27254995703697205, "learning_rate": 0.00019877612892677538, "loss": 11.677, "step": 7173 }, { "epoch": 0.15017164866448965, "grad_norm": 0.21530364453792572, "learning_rate": 0.00019877578692669107, "loss": 11.6681, "step": 7174 }, { "epoch": 0.15019258142845182, "grad_norm": 0.25141507387161255, "learning_rate": 0.0001987754448791233, "loss": 11.6608, "step": 7175 }, { "epoch": 0.15021351419241397, "grad_norm": 0.21986258029937744, "learning_rate": 0.0001987751027840722, "loss": 11.664, "step": 7176 }, { "epoch": 0.1502344469563761, "grad_norm": 0.24082311987876892, "learning_rate": 0.0001987747606415379, "loss": 11.6734, "step": 7177 }, { "epoch": 0.15025537972033828, "grad_norm": 0.21570494771003723, "learning_rate": 0.00019877441845152062, "loss": 11.6692, "step": 7178 }, { "epoch": 0.15027631248430043, "grad_norm": 0.2413598746061325, "learning_rate": 0.00019877407621402048, "loss": 11.6779, "step": 7179 }, { "epoch": 0.15029724524826257, "grad_norm": 0.2511579692363739, "learning_rate": 0.00019877373392903768, "loss": 11.6819, "step": 7180 }, { "epoch": 0.15031817801222475, "grad_norm": 0.20065435767173767, "learning_rate": 0.00019877339159657235, "loss": 11.6939, "step": 7181 }, { "epoch": 0.1503391107761869, "grad_norm": 0.21722790598869324, "learning_rate": 0.0001987730492166247, "loss": 11.6708, "step": 7182 }, { "epoch": 0.15036004354014904, "grad_norm": 0.3103579878807068, "learning_rate": 0.00019877270678919483, "loss": 11.6802, "step": 7183 }, { "epoch": 0.1503809763041112, "grad_norm": 0.24620598554611206, "learning_rate": 0.00019877236431428298, "loss": 11.7139, "step": 7184 }, { "epoch": 0.15040190906807335, "grad_norm": 0.22034518420696259, "learning_rate": 0.00019877202179188923, "loss": 11.6919, "step": 7185 }, { "epoch": 0.1504228418320355, "grad_norm": 0.32893458008766174, "learning_rate": 0.0001987716792220138, "loss": 11.6973, "step": 7186 }, { "epoch": 0.15044377459599764, "grad_norm": 0.23769058287143707, "learning_rate": 0.00019877133660465687, "loss": 11.6823, "step": 7187 }, { "epoch": 0.15046470735995982, "grad_norm": 0.1921222060918808, "learning_rate": 0.00019877099393981852, "loss": 11.6744, "step": 7188 }, { "epoch": 0.15048564012392196, "grad_norm": 0.20968499779701233, "learning_rate": 0.00019877065122749903, "loss": 11.6818, "step": 7189 }, { "epoch": 0.1505065728878841, "grad_norm": 0.2834891378879547, "learning_rate": 0.00019877030846769848, "loss": 11.6822, "step": 7190 }, { "epoch": 0.15052750565184628, "grad_norm": 0.26767656207084656, "learning_rate": 0.0001987699656604171, "loss": 11.6777, "step": 7191 }, { "epoch": 0.15054843841580842, "grad_norm": 0.20304211974143982, "learning_rate": 0.00019876962280565498, "loss": 11.6852, "step": 7192 }, { "epoch": 0.15056937117977057, "grad_norm": 0.24896816909313202, "learning_rate": 0.00019876927990341233, "loss": 11.6753, "step": 7193 }, { "epoch": 0.15059030394373274, "grad_norm": 0.219016432762146, "learning_rate": 0.00019876893695368932, "loss": 11.6744, "step": 7194 }, { "epoch": 0.15061123670769488, "grad_norm": 0.2681659758090973, "learning_rate": 0.0001987685939564861, "loss": 11.6598, "step": 7195 }, { "epoch": 0.15063216947165703, "grad_norm": 0.24946443736553192, "learning_rate": 0.00019876825091180286, "loss": 11.6718, "step": 7196 }, { "epoch": 0.1506531022356192, "grad_norm": 0.22685746848583221, "learning_rate": 0.0001987679078196397, "loss": 11.6919, "step": 7197 }, { "epoch": 0.15067403499958135, "grad_norm": 0.23137229681015015, "learning_rate": 0.00019876756467999686, "loss": 11.6899, "step": 7198 }, { "epoch": 0.1506949677635435, "grad_norm": 0.26671087741851807, "learning_rate": 0.00019876722149287445, "loss": 11.6756, "step": 7199 }, { "epoch": 0.15071590052750566, "grad_norm": 0.23160186409950256, "learning_rate": 0.00019876687825827268, "loss": 11.6911, "step": 7200 }, { "epoch": 0.1507368332914678, "grad_norm": 0.2797106206417084, "learning_rate": 0.00019876653497619167, "loss": 11.6779, "step": 7201 }, { "epoch": 0.15075776605542995, "grad_norm": 0.21446916460990906, "learning_rate": 0.0001987661916466316, "loss": 11.6705, "step": 7202 }, { "epoch": 0.15077869881939213, "grad_norm": 0.22579900920391083, "learning_rate": 0.00019876584826959267, "loss": 11.6849, "step": 7203 }, { "epoch": 0.15079963158335427, "grad_norm": 0.23859229683876038, "learning_rate": 0.00019876550484507498, "loss": 11.6815, "step": 7204 }, { "epoch": 0.15082056434731642, "grad_norm": 0.21801619231700897, "learning_rate": 0.00019876516137307877, "loss": 11.6698, "step": 7205 }, { "epoch": 0.15084149711127856, "grad_norm": 0.30109962821006775, "learning_rate": 0.00019876481785360416, "loss": 11.6964, "step": 7206 }, { "epoch": 0.15086242987524073, "grad_norm": 0.18334242701530457, "learning_rate": 0.00019876447428665133, "loss": 11.6628, "step": 7207 }, { "epoch": 0.15088336263920288, "grad_norm": 0.2604663670063019, "learning_rate": 0.00019876413067222042, "loss": 11.6807, "step": 7208 }, { "epoch": 0.15090429540316502, "grad_norm": 0.2549024224281311, "learning_rate": 0.00019876378701031162, "loss": 11.6792, "step": 7209 }, { "epoch": 0.1509252281671272, "grad_norm": 0.255769282579422, "learning_rate": 0.0001987634433009251, "loss": 11.6699, "step": 7210 }, { "epoch": 0.15094616093108934, "grad_norm": 0.2351953536272049, "learning_rate": 0.00019876309954406098, "loss": 11.6627, "step": 7211 }, { "epoch": 0.15096709369505149, "grad_norm": 0.3028979003429413, "learning_rate": 0.0001987627557397195, "loss": 11.7023, "step": 7212 }, { "epoch": 0.15098802645901366, "grad_norm": 0.2272741049528122, "learning_rate": 0.00019876241188790075, "loss": 11.6607, "step": 7213 }, { "epoch": 0.1510089592229758, "grad_norm": 0.2332000881433487, "learning_rate": 0.00019876206798860496, "loss": 11.6847, "step": 7214 }, { "epoch": 0.15102989198693795, "grad_norm": 0.23172634840011597, "learning_rate": 0.00019876172404183223, "loss": 11.694, "step": 7215 }, { "epoch": 0.15105082475090012, "grad_norm": 0.2253504991531372, "learning_rate": 0.00019876138004758278, "loss": 11.6547, "step": 7216 }, { "epoch": 0.15107175751486226, "grad_norm": 0.2688353955745697, "learning_rate": 0.00019876103600585676, "loss": 11.6848, "step": 7217 }, { "epoch": 0.1510926902788244, "grad_norm": 0.27470797300338745, "learning_rate": 0.00019876069191665432, "loss": 11.6926, "step": 7218 }, { "epoch": 0.15111362304278658, "grad_norm": 0.2777091860771179, "learning_rate": 0.00019876034777997564, "loss": 11.6975, "step": 7219 }, { "epoch": 0.15113455580674873, "grad_norm": 0.24761782586574554, "learning_rate": 0.00019876000359582088, "loss": 11.6774, "step": 7220 }, { "epoch": 0.15115548857071087, "grad_norm": 0.2048196941614151, "learning_rate": 0.0001987596593641902, "loss": 11.6877, "step": 7221 }, { "epoch": 0.15117642133467302, "grad_norm": 0.24325962364673615, "learning_rate": 0.0001987593150850838, "loss": 11.6847, "step": 7222 }, { "epoch": 0.1511973540986352, "grad_norm": 0.20623791217803955, "learning_rate": 0.0001987589707585018, "loss": 11.6712, "step": 7223 }, { "epoch": 0.15121828686259733, "grad_norm": 0.21968354284763336, "learning_rate": 0.00019875862638444436, "loss": 11.6819, "step": 7224 }, { "epoch": 0.15123921962655948, "grad_norm": 0.3305055499076843, "learning_rate": 0.0001987582819629117, "loss": 11.6901, "step": 7225 }, { "epoch": 0.15126015239052165, "grad_norm": 0.30258631706237793, "learning_rate": 0.00019875793749390397, "loss": 11.6823, "step": 7226 }, { "epoch": 0.1512810851544838, "grad_norm": 0.3078140914440155, "learning_rate": 0.0001987575929774213, "loss": 11.6945, "step": 7227 }, { "epoch": 0.15130201791844594, "grad_norm": 0.23157884180545807, "learning_rate": 0.00019875724841346385, "loss": 11.6843, "step": 7228 }, { "epoch": 0.1513229506824081, "grad_norm": 0.2073967307806015, "learning_rate": 0.00019875690380203182, "loss": 11.6687, "step": 7229 }, { "epoch": 0.15134388344637026, "grad_norm": 0.22865115106105804, "learning_rate": 0.00019875655914312537, "loss": 11.6733, "step": 7230 }, { "epoch": 0.1513648162103324, "grad_norm": 0.3147011399269104, "learning_rate": 0.0001987562144367447, "loss": 11.6816, "step": 7231 }, { "epoch": 0.15138574897429458, "grad_norm": 0.29173773527145386, "learning_rate": 0.00019875586968288992, "loss": 11.6831, "step": 7232 }, { "epoch": 0.15140668173825672, "grad_norm": 0.23930470645427704, "learning_rate": 0.0001987555248815612, "loss": 11.6843, "step": 7233 }, { "epoch": 0.15142761450221887, "grad_norm": 0.2776482403278351, "learning_rate": 0.00019875518003275872, "loss": 11.669, "step": 7234 }, { "epoch": 0.15144854726618104, "grad_norm": 0.2488967627286911, "learning_rate": 0.0001987548351364827, "loss": 11.6915, "step": 7235 }, { "epoch": 0.15146948003014318, "grad_norm": 0.2635204493999481, "learning_rate": 0.00019875449019273318, "loss": 11.6764, "step": 7236 }, { "epoch": 0.15149041279410533, "grad_norm": 0.229597806930542, "learning_rate": 0.00019875414520151044, "loss": 11.6876, "step": 7237 }, { "epoch": 0.1515113455580675, "grad_norm": 0.2277536392211914, "learning_rate": 0.00019875380016281456, "loss": 11.6798, "step": 7238 }, { "epoch": 0.15153227832202965, "grad_norm": 0.2793027460575104, "learning_rate": 0.00019875345507664577, "loss": 11.6676, "step": 7239 }, { "epoch": 0.1515532110859918, "grad_norm": 0.22772076725959778, "learning_rate": 0.00019875310994300422, "loss": 11.6691, "step": 7240 }, { "epoch": 0.15157414384995394, "grad_norm": 0.23699861764907837, "learning_rate": 0.00019875276476189008, "loss": 11.6852, "step": 7241 }, { "epoch": 0.1515950766139161, "grad_norm": 0.2201569825410843, "learning_rate": 0.0001987524195333035, "loss": 11.6844, "step": 7242 }, { "epoch": 0.15161600937787825, "grad_norm": 0.278702974319458, "learning_rate": 0.00019875207425724468, "loss": 11.6907, "step": 7243 }, { "epoch": 0.1516369421418404, "grad_norm": 0.25985005497932434, "learning_rate": 0.0001987517289337137, "loss": 11.6869, "step": 7244 }, { "epoch": 0.15165787490580257, "grad_norm": 0.23420952260494232, "learning_rate": 0.00019875138356271083, "loss": 11.6748, "step": 7245 }, { "epoch": 0.15167880766976471, "grad_norm": 0.2615746855735779, "learning_rate": 0.00019875103814423616, "loss": 11.687, "step": 7246 }, { "epoch": 0.15169974043372686, "grad_norm": 0.26309892535209656, "learning_rate": 0.00019875069267828992, "loss": 11.6862, "step": 7247 }, { "epoch": 0.15172067319768903, "grad_norm": 0.24226363003253937, "learning_rate": 0.00019875034716487221, "loss": 11.678, "step": 7248 }, { "epoch": 0.15174160596165118, "grad_norm": 0.27034664154052734, "learning_rate": 0.00019875000160398326, "loss": 11.6879, "step": 7249 }, { "epoch": 0.15176253872561332, "grad_norm": 0.29165878891944885, "learning_rate": 0.00019874965599562318, "loss": 11.6788, "step": 7250 }, { "epoch": 0.1517834714895755, "grad_norm": 0.2140151560306549, "learning_rate": 0.00019874931033979219, "loss": 11.6865, "step": 7251 }, { "epoch": 0.15180440425353764, "grad_norm": 0.28564396500587463, "learning_rate": 0.0001987489646364904, "loss": 11.6844, "step": 7252 }, { "epoch": 0.15182533701749978, "grad_norm": 0.23468846082687378, "learning_rate": 0.000198748618885718, "loss": 11.6677, "step": 7253 }, { "epoch": 0.15184626978146196, "grad_norm": 0.2802468240261078, "learning_rate": 0.0001987482730874752, "loss": 11.6825, "step": 7254 }, { "epoch": 0.1518672025454241, "grad_norm": 0.23313793540000916, "learning_rate": 0.0001987479272417621, "loss": 11.6748, "step": 7255 }, { "epoch": 0.15188813530938625, "grad_norm": 0.2544350028038025, "learning_rate": 0.00019874758134857888, "loss": 11.6924, "step": 7256 }, { "epoch": 0.1519090680733484, "grad_norm": 0.1977785974740982, "learning_rate": 0.00019874723540792572, "loss": 11.6788, "step": 7257 }, { "epoch": 0.15193000083731056, "grad_norm": 0.23478129506111145, "learning_rate": 0.0001987468894198028, "loss": 11.685, "step": 7258 }, { "epoch": 0.1519509336012727, "grad_norm": 0.2045513540506363, "learning_rate": 0.00019874654338421027, "loss": 11.6687, "step": 7259 }, { "epoch": 0.15197186636523485, "grad_norm": 0.24250540137290955, "learning_rate": 0.0001987461973011483, "loss": 11.681, "step": 7260 }, { "epoch": 0.15199279912919703, "grad_norm": 0.2176835834980011, "learning_rate": 0.00019874585117061707, "loss": 11.689, "step": 7261 }, { "epoch": 0.15201373189315917, "grad_norm": 0.193060502409935, "learning_rate": 0.0001987455049926167, "loss": 11.6771, "step": 7262 }, { "epoch": 0.15203466465712132, "grad_norm": 0.3381642699241638, "learning_rate": 0.0001987451587671474, "loss": 11.6913, "step": 7263 }, { "epoch": 0.1520555974210835, "grad_norm": 0.18164817988872528, "learning_rate": 0.00019874481249420928, "loss": 11.6792, "step": 7264 }, { "epoch": 0.15207653018504563, "grad_norm": 0.2559863328933716, "learning_rate": 0.00019874446617380262, "loss": 11.67, "step": 7265 }, { "epoch": 0.15209746294900778, "grad_norm": 0.31939324736595154, "learning_rate": 0.0001987441198059275, "loss": 11.679, "step": 7266 }, { "epoch": 0.15211839571296995, "grad_norm": 0.23529042303562164, "learning_rate": 0.00019874377339058406, "loss": 11.6943, "step": 7267 }, { "epoch": 0.1521393284769321, "grad_norm": 0.21413549780845642, "learning_rate": 0.00019874342692777254, "loss": 11.6811, "step": 7268 }, { "epoch": 0.15216026124089424, "grad_norm": 0.23489350080490112, "learning_rate": 0.0001987430804174931, "loss": 11.666, "step": 7269 }, { "epoch": 0.1521811940048564, "grad_norm": 0.20098210871219635, "learning_rate": 0.00019874273385974586, "loss": 11.6743, "step": 7270 }, { "epoch": 0.15220212676881856, "grad_norm": 0.2720039188861847, "learning_rate": 0.00019874238725453095, "loss": 11.674, "step": 7271 }, { "epoch": 0.1522230595327807, "grad_norm": 0.2396690547466278, "learning_rate": 0.00019874204060184867, "loss": 11.6895, "step": 7272 }, { "epoch": 0.15224399229674287, "grad_norm": 0.26473966240882874, "learning_rate": 0.0001987416939016991, "loss": 11.6715, "step": 7273 }, { "epoch": 0.15226492506070502, "grad_norm": 0.26115113496780396, "learning_rate": 0.00019874134715408242, "loss": 11.662, "step": 7274 }, { "epoch": 0.15228585782466716, "grad_norm": 0.22926202416419983, "learning_rate": 0.00019874100035899877, "loss": 11.6739, "step": 7275 }, { "epoch": 0.1523067905886293, "grad_norm": 0.21757514774799347, "learning_rate": 0.00019874065351644838, "loss": 11.6726, "step": 7276 }, { "epoch": 0.15232772335259148, "grad_norm": 0.20843708515167236, "learning_rate": 0.00019874030662643134, "loss": 11.6755, "step": 7277 }, { "epoch": 0.15234865611655363, "grad_norm": 0.23613379895687103, "learning_rate": 0.00019873995968894788, "loss": 11.6586, "step": 7278 }, { "epoch": 0.15236958888051577, "grad_norm": 0.2554113268852234, "learning_rate": 0.00019873961270399813, "loss": 11.6731, "step": 7279 }, { "epoch": 0.15239052164447794, "grad_norm": 0.26261118054389954, "learning_rate": 0.0001987392656715823, "loss": 11.6701, "step": 7280 }, { "epoch": 0.1524114544084401, "grad_norm": 0.2303919792175293, "learning_rate": 0.0001987389185917005, "loss": 11.6686, "step": 7281 }, { "epoch": 0.15243238717240223, "grad_norm": 0.2124948650598526, "learning_rate": 0.0001987385714643529, "loss": 11.6918, "step": 7282 }, { "epoch": 0.1524533199363644, "grad_norm": 0.2092059850692749, "learning_rate": 0.00019873822428953975, "loss": 11.666, "step": 7283 }, { "epoch": 0.15247425270032655, "grad_norm": 0.25769761204719543, "learning_rate": 0.0001987378770672611, "loss": 11.671, "step": 7284 }, { "epoch": 0.1524951854642887, "grad_norm": 0.33485743403434753, "learning_rate": 0.00019873752979751718, "loss": 11.6898, "step": 7285 }, { "epoch": 0.15251611822825087, "grad_norm": 0.20819799602031708, "learning_rate": 0.0001987371824803082, "loss": 11.6751, "step": 7286 }, { "epoch": 0.152537050992213, "grad_norm": 0.24161671102046967, "learning_rate": 0.00019873683511563423, "loss": 11.6706, "step": 7287 }, { "epoch": 0.15255798375617516, "grad_norm": 0.2414923459291458, "learning_rate": 0.00019873648770349548, "loss": 11.6893, "step": 7288 }, { "epoch": 0.15257891652013733, "grad_norm": 0.22872792184352875, "learning_rate": 0.00019873614024389219, "loss": 11.6681, "step": 7289 }, { "epoch": 0.15259984928409948, "grad_norm": 0.22980113327503204, "learning_rate": 0.00019873579273682439, "loss": 11.6825, "step": 7290 }, { "epoch": 0.15262078204806162, "grad_norm": 0.25772586464881897, "learning_rate": 0.00019873544518229233, "loss": 11.6552, "step": 7291 }, { "epoch": 0.1526417148120238, "grad_norm": 0.26783639192581177, "learning_rate": 0.00019873509758029615, "loss": 11.6825, "step": 7292 }, { "epoch": 0.15266264757598594, "grad_norm": 0.236556738615036, "learning_rate": 0.00019873474993083607, "loss": 11.687, "step": 7293 }, { "epoch": 0.15268358033994808, "grad_norm": 0.2087402194738388, "learning_rate": 0.0001987344022339122, "loss": 11.6789, "step": 7294 }, { "epoch": 0.15270451310391023, "grad_norm": 0.2639673054218292, "learning_rate": 0.0001987340544895247, "loss": 11.6816, "step": 7295 }, { "epoch": 0.1527254458678724, "grad_norm": 0.2709030508995056, "learning_rate": 0.00019873370669767378, "loss": 11.6788, "step": 7296 }, { "epoch": 0.15274637863183455, "grad_norm": 0.21274320781230927, "learning_rate": 0.0001987333588583596, "loss": 11.684, "step": 7297 }, { "epoch": 0.1527673113957967, "grad_norm": 0.2556700110435486, "learning_rate": 0.0001987330109715823, "loss": 11.6629, "step": 7298 }, { "epoch": 0.15278824415975886, "grad_norm": 0.25919485092163086, "learning_rate": 0.00019873266303734207, "loss": 11.6722, "step": 7299 }, { "epoch": 0.152809176923721, "grad_norm": 0.24766948819160461, "learning_rate": 0.00019873231505563907, "loss": 11.6836, "step": 7300 }, { "epoch": 0.15283010968768315, "grad_norm": 0.3049006462097168, "learning_rate": 0.00019873196702647347, "loss": 11.6708, "step": 7301 }, { "epoch": 0.15285104245164532, "grad_norm": 0.21590085327625275, "learning_rate": 0.00019873161894984543, "loss": 11.6638, "step": 7302 }, { "epoch": 0.15287197521560747, "grad_norm": 0.23304681479930878, "learning_rate": 0.0001987312708257551, "loss": 11.6774, "step": 7303 }, { "epoch": 0.15289290797956961, "grad_norm": 0.2888511121273041, "learning_rate": 0.00019873092265420273, "loss": 11.6864, "step": 7304 }, { "epoch": 0.1529138407435318, "grad_norm": 0.28022950887680054, "learning_rate": 0.00019873057443518837, "loss": 11.6711, "step": 7305 }, { "epoch": 0.15293477350749393, "grad_norm": 0.2568627893924713, "learning_rate": 0.00019873022616871227, "loss": 11.6731, "step": 7306 }, { "epoch": 0.15295570627145608, "grad_norm": 0.1905509978532791, "learning_rate": 0.00019872987785477455, "loss": 11.6739, "step": 7307 }, { "epoch": 0.15297663903541825, "grad_norm": 0.29556891322135925, "learning_rate": 0.00019872952949337542, "loss": 11.6705, "step": 7308 }, { "epoch": 0.1529975717993804, "grad_norm": 0.21051999926567078, "learning_rate": 0.000198729181084515, "loss": 11.6839, "step": 7309 }, { "epoch": 0.15301850456334254, "grad_norm": 0.19420357048511505, "learning_rate": 0.00019872883262819354, "loss": 11.6794, "step": 7310 }, { "epoch": 0.15303943732730468, "grad_norm": 0.2320161610841751, "learning_rate": 0.00019872848412441112, "loss": 11.6802, "step": 7311 }, { "epoch": 0.15306037009126686, "grad_norm": 0.24807117879390717, "learning_rate": 0.0001987281355731679, "loss": 11.6728, "step": 7312 }, { "epoch": 0.153081302855229, "grad_norm": 0.26971596479415894, "learning_rate": 0.00019872778697446413, "loss": 11.6766, "step": 7313 }, { "epoch": 0.15310223561919115, "grad_norm": 0.1920522153377533, "learning_rate": 0.00019872743832829995, "loss": 11.676, "step": 7314 }, { "epoch": 0.15312316838315332, "grad_norm": 0.2266595959663391, "learning_rate": 0.00019872708963467547, "loss": 11.6781, "step": 7315 }, { "epoch": 0.15314410114711546, "grad_norm": 0.23230744898319244, "learning_rate": 0.00019872674089359093, "loss": 11.6684, "step": 7316 }, { "epoch": 0.1531650339110776, "grad_norm": 0.26525843143463135, "learning_rate": 0.00019872639210504644, "loss": 11.6963, "step": 7317 }, { "epoch": 0.15318596667503978, "grad_norm": 0.2313872128725052, "learning_rate": 0.00019872604326904222, "loss": 11.6727, "step": 7318 }, { "epoch": 0.15320689943900193, "grad_norm": 0.214797705411911, "learning_rate": 0.0001987256943855784, "loss": 11.6904, "step": 7319 }, { "epoch": 0.15322783220296407, "grad_norm": 0.2505885362625122, "learning_rate": 0.00019872534545465516, "loss": 11.6879, "step": 7320 }, { "epoch": 0.15324876496692624, "grad_norm": 0.2700836658477783, "learning_rate": 0.0001987249964762727, "loss": 11.6647, "step": 7321 }, { "epoch": 0.1532696977308884, "grad_norm": 0.1964952051639557, "learning_rate": 0.0001987246474504311, "loss": 11.6763, "step": 7322 }, { "epoch": 0.15329063049485053, "grad_norm": 0.22188639640808105, "learning_rate": 0.0001987242983771306, "loss": 11.683, "step": 7323 }, { "epoch": 0.1533115632588127, "grad_norm": 0.22196327149868011, "learning_rate": 0.00019872394925637136, "loss": 11.6684, "step": 7324 }, { "epoch": 0.15333249602277485, "grad_norm": 0.22293640673160553, "learning_rate": 0.00019872360008815355, "loss": 11.6807, "step": 7325 }, { "epoch": 0.153353428786737, "grad_norm": 0.24205386638641357, "learning_rate": 0.00019872325087247732, "loss": 11.6691, "step": 7326 }, { "epoch": 0.15337436155069917, "grad_norm": 0.2671475112438202, "learning_rate": 0.00019872290160934285, "loss": 11.6869, "step": 7327 }, { "epoch": 0.1533952943146613, "grad_norm": 0.25213441252708435, "learning_rate": 0.0001987225522987503, "loss": 11.6943, "step": 7328 }, { "epoch": 0.15341622707862346, "grad_norm": 0.2887894809246063, "learning_rate": 0.00019872220294069983, "loss": 11.6762, "step": 7329 }, { "epoch": 0.1534371598425856, "grad_norm": 0.3122015595436096, "learning_rate": 0.00019872185353519162, "loss": 11.6992, "step": 7330 }, { "epoch": 0.15345809260654777, "grad_norm": 0.2561491131782532, "learning_rate": 0.00019872150408222584, "loss": 11.679, "step": 7331 }, { "epoch": 0.15347902537050992, "grad_norm": 0.21144366264343262, "learning_rate": 0.00019872115458180266, "loss": 11.6799, "step": 7332 }, { "epoch": 0.15349995813447206, "grad_norm": 0.22549240291118622, "learning_rate": 0.00019872080503392225, "loss": 11.6897, "step": 7333 }, { "epoch": 0.15352089089843424, "grad_norm": 0.21206459403038025, "learning_rate": 0.00019872045543858476, "loss": 11.6773, "step": 7334 }, { "epoch": 0.15354182366239638, "grad_norm": 0.24533995985984802, "learning_rate": 0.00019872010579579036, "loss": 11.6817, "step": 7335 }, { "epoch": 0.15356275642635853, "grad_norm": 0.27779892086982727, "learning_rate": 0.00019871975610553924, "loss": 11.6791, "step": 7336 }, { "epoch": 0.1535836891903207, "grad_norm": 0.2403840273618698, "learning_rate": 0.00019871940636783153, "loss": 11.6727, "step": 7337 }, { "epoch": 0.15360462195428284, "grad_norm": 0.3258998394012451, "learning_rate": 0.00019871905658266746, "loss": 11.6791, "step": 7338 }, { "epoch": 0.153625554718245, "grad_norm": 0.23284666240215302, "learning_rate": 0.00019871870675004712, "loss": 11.6757, "step": 7339 }, { "epoch": 0.15364648748220716, "grad_norm": 0.22447596490383148, "learning_rate": 0.00019871835686997077, "loss": 11.6834, "step": 7340 }, { "epoch": 0.1536674202461693, "grad_norm": 0.4000603258609772, "learning_rate": 0.0001987180069424385, "loss": 11.693, "step": 7341 }, { "epoch": 0.15368835301013145, "grad_norm": 0.25832781195640564, "learning_rate": 0.0001987176569674505, "loss": 11.6725, "step": 7342 }, { "epoch": 0.15370928577409362, "grad_norm": 0.1993536651134491, "learning_rate": 0.00019871730694500693, "loss": 11.6847, "step": 7343 }, { "epoch": 0.15373021853805577, "grad_norm": 0.30269941687583923, "learning_rate": 0.000198716956875108, "loss": 11.6952, "step": 7344 }, { "epoch": 0.1537511513020179, "grad_norm": 0.3301282525062561, "learning_rate": 0.00019871660675775386, "loss": 11.694, "step": 7345 }, { "epoch": 0.15377208406598009, "grad_norm": 0.22403572499752045, "learning_rate": 0.00019871625659294463, "loss": 11.689, "step": 7346 }, { "epoch": 0.15379301682994223, "grad_norm": 0.23819409310817719, "learning_rate": 0.00019871590638068055, "loss": 11.6626, "step": 7347 }, { "epoch": 0.15381394959390438, "grad_norm": 0.29726114869117737, "learning_rate": 0.00019871555612096175, "loss": 11.6784, "step": 7348 }, { "epoch": 0.15383488235786652, "grad_norm": 0.27779629826545715, "learning_rate": 0.0001987152058137884, "loss": 11.6777, "step": 7349 }, { "epoch": 0.1538558151218287, "grad_norm": 0.23880253732204437, "learning_rate": 0.00019871485545916066, "loss": 11.6773, "step": 7350 }, { "epoch": 0.15387674788579084, "grad_norm": 0.252645879983902, "learning_rate": 0.00019871450505707871, "loss": 11.6888, "step": 7351 }, { "epoch": 0.15389768064975298, "grad_norm": 0.2268541306257248, "learning_rate": 0.00019871415460754273, "loss": 11.6936, "step": 7352 }, { "epoch": 0.15391861341371516, "grad_norm": 0.22165921330451965, "learning_rate": 0.0001987138041105529, "loss": 11.6805, "step": 7353 }, { "epoch": 0.1539395461776773, "grad_norm": 0.23805862665176392, "learning_rate": 0.00019871345356610933, "loss": 11.6927, "step": 7354 }, { "epoch": 0.15396047894163944, "grad_norm": 0.25230681896209717, "learning_rate": 0.00019871310297421224, "loss": 11.6836, "step": 7355 }, { "epoch": 0.15398141170560162, "grad_norm": 0.2142244130373001, "learning_rate": 0.00019871275233486179, "loss": 11.6745, "step": 7356 }, { "epoch": 0.15400234446956376, "grad_norm": 0.22057127952575684, "learning_rate": 0.0001987124016480581, "loss": 11.6845, "step": 7357 }, { "epoch": 0.1540232772335259, "grad_norm": 0.24022334814071655, "learning_rate": 0.00019871205091380145, "loss": 11.6745, "step": 7358 }, { "epoch": 0.15404420999748808, "grad_norm": 0.2646677792072296, "learning_rate": 0.0001987117001320919, "loss": 11.6698, "step": 7359 }, { "epoch": 0.15406514276145022, "grad_norm": 0.23349955677986145, "learning_rate": 0.00019871134930292964, "loss": 11.678, "step": 7360 }, { "epoch": 0.15408607552541237, "grad_norm": 0.22131988406181335, "learning_rate": 0.00019871099842631488, "loss": 11.6883, "step": 7361 }, { "epoch": 0.15410700828937454, "grad_norm": 0.2273695319890976, "learning_rate": 0.00019871064750224778, "loss": 11.6753, "step": 7362 }, { "epoch": 0.1541279410533367, "grad_norm": 0.2810184955596924, "learning_rate": 0.00019871029653072847, "loss": 11.6737, "step": 7363 }, { "epoch": 0.15414887381729883, "grad_norm": 0.2089732587337494, "learning_rate": 0.00019870994551175714, "loss": 11.6774, "step": 7364 }, { "epoch": 0.15416980658126098, "grad_norm": 0.22168800234794617, "learning_rate": 0.00019870959444533398, "loss": 11.6592, "step": 7365 }, { "epoch": 0.15419073934522315, "grad_norm": 0.21250919997692108, "learning_rate": 0.0001987092433314591, "loss": 11.702, "step": 7366 }, { "epoch": 0.1542116721091853, "grad_norm": 0.22775793075561523, "learning_rate": 0.00019870889217013274, "loss": 11.6881, "step": 7367 }, { "epoch": 0.15423260487314744, "grad_norm": 0.20162084698677063, "learning_rate": 0.00019870854096135505, "loss": 11.6782, "step": 7368 }, { "epoch": 0.1542535376371096, "grad_norm": 0.27131450176239014, "learning_rate": 0.00019870818970512617, "loss": 11.6951, "step": 7369 }, { "epoch": 0.15427447040107176, "grad_norm": 0.2145732045173645, "learning_rate": 0.00019870783840144628, "loss": 11.6743, "step": 7370 }, { "epoch": 0.1542954031650339, "grad_norm": 0.22349171340465546, "learning_rate": 0.00019870748705031556, "loss": 11.6715, "step": 7371 }, { "epoch": 0.15431633592899607, "grad_norm": 0.27957797050476074, "learning_rate": 0.00019870713565173418, "loss": 11.689, "step": 7372 }, { "epoch": 0.15433726869295822, "grad_norm": 0.2187732756137848, "learning_rate": 0.0001987067842057023, "loss": 11.6661, "step": 7373 }, { "epoch": 0.15435820145692036, "grad_norm": 0.2971084415912628, "learning_rate": 0.00019870643271222008, "loss": 11.6829, "step": 7374 }, { "epoch": 0.15437913422088254, "grad_norm": 0.25016096234321594, "learning_rate": 0.0001987060811712877, "loss": 11.6717, "step": 7375 }, { "epoch": 0.15440006698484468, "grad_norm": 0.3224141299724579, "learning_rate": 0.00019870572958290535, "loss": 11.6961, "step": 7376 }, { "epoch": 0.15442099974880683, "grad_norm": 0.2743781805038452, "learning_rate": 0.00019870537794707314, "loss": 11.6645, "step": 7377 }, { "epoch": 0.154441932512769, "grad_norm": 0.23565199971199036, "learning_rate": 0.00019870502626379127, "loss": 11.6641, "step": 7378 }, { "epoch": 0.15446286527673114, "grad_norm": 0.2569376528263092, "learning_rate": 0.00019870467453305996, "loss": 11.6868, "step": 7379 }, { "epoch": 0.1544837980406933, "grad_norm": 0.20415833592414856, "learning_rate": 0.00019870432275487931, "loss": 11.6625, "step": 7380 }, { "epoch": 0.15450473080465546, "grad_norm": 0.2065693587064743, "learning_rate": 0.00019870397092924954, "loss": 11.686, "step": 7381 }, { "epoch": 0.1545256635686176, "grad_norm": 0.22672826051712036, "learning_rate": 0.00019870361905617077, "loss": 11.6742, "step": 7382 }, { "epoch": 0.15454659633257975, "grad_norm": 0.30177581310272217, "learning_rate": 0.0001987032671356432, "loss": 11.6695, "step": 7383 }, { "epoch": 0.1545675290965419, "grad_norm": 0.24114522337913513, "learning_rate": 0.000198702915167667, "loss": 11.6767, "step": 7384 }, { "epoch": 0.15458846186050407, "grad_norm": 0.2671455144882202, "learning_rate": 0.0001987025631522423, "loss": 11.6953, "step": 7385 }, { "epoch": 0.1546093946244662, "grad_norm": 0.2556759715080261, "learning_rate": 0.00019870221108936933, "loss": 11.6663, "step": 7386 }, { "epoch": 0.15463032738842836, "grad_norm": 0.20750443637371063, "learning_rate": 0.00019870185897904823, "loss": 11.6737, "step": 7387 }, { "epoch": 0.15465126015239053, "grad_norm": 0.2451443076133728, "learning_rate": 0.00019870150682127914, "loss": 11.673, "step": 7388 }, { "epoch": 0.15467219291635267, "grad_norm": 0.1879292130470276, "learning_rate": 0.0001987011546160623, "loss": 11.6651, "step": 7389 }, { "epoch": 0.15469312568031482, "grad_norm": 0.242267906665802, "learning_rate": 0.0001987008023633978, "loss": 11.6779, "step": 7390 }, { "epoch": 0.154714058444277, "grad_norm": 0.1905507594347, "learning_rate": 0.00019870045006328585, "loss": 11.6774, "step": 7391 }, { "epoch": 0.15473499120823914, "grad_norm": 0.2085397094488144, "learning_rate": 0.00019870009771572663, "loss": 11.669, "step": 7392 }, { "epoch": 0.15475592397220128, "grad_norm": 0.20690365135669708, "learning_rate": 0.00019869974532072028, "loss": 11.6856, "step": 7393 }, { "epoch": 0.15477685673616345, "grad_norm": 0.20950661599636078, "learning_rate": 0.000198699392878267, "loss": 11.6951, "step": 7394 }, { "epoch": 0.1547977895001256, "grad_norm": 0.23143184185028076, "learning_rate": 0.00019869904038836693, "loss": 11.6882, "step": 7395 }, { "epoch": 0.15481872226408774, "grad_norm": 0.26925796270370483, "learning_rate": 0.00019869868785102026, "loss": 11.6708, "step": 7396 }, { "epoch": 0.15483965502804992, "grad_norm": 0.32611969113349915, "learning_rate": 0.00019869833526622713, "loss": 11.6899, "step": 7397 }, { "epoch": 0.15486058779201206, "grad_norm": 0.2652512788772583, "learning_rate": 0.00019869798263398778, "loss": 11.6887, "step": 7398 }, { "epoch": 0.1548815205559742, "grad_norm": 0.34370484948158264, "learning_rate": 0.0001986976299543023, "loss": 11.6879, "step": 7399 }, { "epoch": 0.15490245331993635, "grad_norm": 0.2322913557291031, "learning_rate": 0.00019869727722717092, "loss": 11.6781, "step": 7400 }, { "epoch": 0.15492338608389852, "grad_norm": 0.19194239377975464, "learning_rate": 0.00019869692445259374, "loss": 11.6732, "step": 7401 }, { "epoch": 0.15494431884786067, "grad_norm": 0.21378184854984283, "learning_rate": 0.000198696571630571, "loss": 11.6679, "step": 7402 }, { "epoch": 0.1549652516118228, "grad_norm": 0.19498515129089355, "learning_rate": 0.00019869621876110282, "loss": 11.6845, "step": 7403 }, { "epoch": 0.15498618437578499, "grad_norm": 0.22541335225105286, "learning_rate": 0.0001986958658441894, "loss": 11.6785, "step": 7404 }, { "epoch": 0.15500711713974713, "grad_norm": 0.20317429304122925, "learning_rate": 0.0001986955128798309, "loss": 11.6765, "step": 7405 }, { "epoch": 0.15502804990370928, "grad_norm": 0.23530946671962738, "learning_rate": 0.00019869515986802746, "loss": 11.6633, "step": 7406 }, { "epoch": 0.15504898266767145, "grad_norm": 0.2988080084323883, "learning_rate": 0.0001986948068087793, "loss": 11.6801, "step": 7407 }, { "epoch": 0.1550699154316336, "grad_norm": 0.2077287882566452, "learning_rate": 0.0001986944537020866, "loss": 11.675, "step": 7408 }, { "epoch": 0.15509084819559574, "grad_norm": 0.2130931168794632, "learning_rate": 0.0001986941005479495, "loss": 11.6695, "step": 7409 }, { "epoch": 0.1551117809595579, "grad_norm": 0.2544954717159271, "learning_rate": 0.00019869374734636812, "loss": 11.6846, "step": 7410 }, { "epoch": 0.15513271372352005, "grad_norm": 0.24567911028862, "learning_rate": 0.00019869339409734268, "loss": 11.6872, "step": 7411 }, { "epoch": 0.1551536464874822, "grad_norm": 0.2335902452468872, "learning_rate": 0.0001986930408008734, "loss": 11.6866, "step": 7412 }, { "epoch": 0.15517457925144437, "grad_norm": 0.2761474847793579, "learning_rate": 0.00019869268745696035, "loss": 11.6744, "step": 7413 }, { "epoch": 0.15519551201540652, "grad_norm": 0.2294432669878006, "learning_rate": 0.00019869233406560378, "loss": 11.674, "step": 7414 }, { "epoch": 0.15521644477936866, "grad_norm": 0.29949095845222473, "learning_rate": 0.0001986919806268038, "loss": 11.707, "step": 7415 }, { "epoch": 0.15523737754333083, "grad_norm": 0.2945214509963989, "learning_rate": 0.0001986916271405606, "loss": 11.6707, "step": 7416 }, { "epoch": 0.15525831030729298, "grad_norm": 0.24547550082206726, "learning_rate": 0.00019869127360687438, "loss": 11.69, "step": 7417 }, { "epoch": 0.15527924307125512, "grad_norm": 0.20742250978946686, "learning_rate": 0.00019869092002574527, "loss": 11.6757, "step": 7418 }, { "epoch": 0.15530017583521727, "grad_norm": 0.20169483125209808, "learning_rate": 0.00019869056639717348, "loss": 11.6883, "step": 7419 }, { "epoch": 0.15532110859917944, "grad_norm": 0.23527811467647552, "learning_rate": 0.00019869021272115914, "loss": 11.6794, "step": 7420 }, { "epoch": 0.1553420413631416, "grad_norm": 0.20892398059368134, "learning_rate": 0.00019868985899770244, "loss": 11.6779, "step": 7421 }, { "epoch": 0.15536297412710373, "grad_norm": 0.23224331438541412, "learning_rate": 0.00019868950522680353, "loss": 11.6866, "step": 7422 }, { "epoch": 0.1553839068910659, "grad_norm": 0.22927112877368927, "learning_rate": 0.00019868915140846264, "loss": 11.6925, "step": 7423 }, { "epoch": 0.15540483965502805, "grad_norm": 0.23367182910442352, "learning_rate": 0.00019868879754267988, "loss": 11.6878, "step": 7424 }, { "epoch": 0.1554257724189902, "grad_norm": 0.2117266058921814, "learning_rate": 0.00019868844362945542, "loss": 11.6742, "step": 7425 }, { "epoch": 0.15544670518295237, "grad_norm": 0.2964108884334564, "learning_rate": 0.00019868808966878944, "loss": 11.6854, "step": 7426 }, { "epoch": 0.1554676379469145, "grad_norm": 0.2104344218969345, "learning_rate": 0.00019868773566068216, "loss": 11.6721, "step": 7427 }, { "epoch": 0.15548857071087666, "grad_norm": 0.22337424755096436, "learning_rate": 0.00019868738160513366, "loss": 11.6739, "step": 7428 }, { "epoch": 0.15550950347483883, "grad_norm": 0.2628687620162964, "learning_rate": 0.0001986870275021442, "loss": 11.6782, "step": 7429 }, { "epoch": 0.15553043623880097, "grad_norm": 0.23357433080673218, "learning_rate": 0.00019868667335171388, "loss": 11.6655, "step": 7430 }, { "epoch": 0.15555136900276312, "grad_norm": 0.21888069808483124, "learning_rate": 0.00019868631915384292, "loss": 11.6665, "step": 7431 }, { "epoch": 0.1555723017667253, "grad_norm": 0.21253050863742828, "learning_rate": 0.00019868596490853145, "loss": 11.6635, "step": 7432 }, { "epoch": 0.15559323453068744, "grad_norm": 0.22779414057731628, "learning_rate": 0.00019868561061577966, "loss": 11.6741, "step": 7433 }, { "epoch": 0.15561416729464958, "grad_norm": 0.21775394678115845, "learning_rate": 0.00019868525627558773, "loss": 11.6665, "step": 7434 }, { "epoch": 0.15563510005861175, "grad_norm": 0.20428518950939178, "learning_rate": 0.0001986849018879558, "loss": 11.679, "step": 7435 }, { "epoch": 0.1556560328225739, "grad_norm": 0.26802387833595276, "learning_rate": 0.0001986845474528841, "loss": 11.6758, "step": 7436 }, { "epoch": 0.15567696558653604, "grad_norm": 0.2944480776786804, "learning_rate": 0.0001986841929703727, "loss": 11.6882, "step": 7437 }, { "epoch": 0.1556978983504982, "grad_norm": 0.27470922470092773, "learning_rate": 0.0001986838384404219, "loss": 11.6951, "step": 7438 }, { "epoch": 0.15571883111446036, "grad_norm": 0.27826133370399475, "learning_rate": 0.00019868348386303174, "loss": 11.6779, "step": 7439 }, { "epoch": 0.1557397638784225, "grad_norm": 0.29645681381225586, "learning_rate": 0.0001986831292382025, "loss": 11.684, "step": 7440 }, { "epoch": 0.15576069664238465, "grad_norm": 0.2015761137008667, "learning_rate": 0.00019868277456593428, "loss": 11.6792, "step": 7441 }, { "epoch": 0.15578162940634682, "grad_norm": 0.21453650295734406, "learning_rate": 0.00019868241984622725, "loss": 11.6742, "step": 7442 }, { "epoch": 0.15580256217030897, "grad_norm": 0.2603822946548462, "learning_rate": 0.00019868206507908163, "loss": 11.6907, "step": 7443 }, { "epoch": 0.1558234949342711, "grad_norm": 0.2763088047504425, "learning_rate": 0.00019868171026449756, "loss": 11.6931, "step": 7444 }, { "epoch": 0.15584442769823328, "grad_norm": 0.289803683757782, "learning_rate": 0.00019868135540247522, "loss": 11.6902, "step": 7445 }, { "epoch": 0.15586536046219543, "grad_norm": 0.22798354923725128, "learning_rate": 0.00019868100049301476, "loss": 11.6893, "step": 7446 }, { "epoch": 0.15588629322615757, "grad_norm": 0.22948132455348969, "learning_rate": 0.00019868064553611636, "loss": 11.6785, "step": 7447 }, { "epoch": 0.15590722599011975, "grad_norm": 0.1877768635749817, "learning_rate": 0.0001986802905317802, "loss": 11.6734, "step": 7448 }, { "epoch": 0.1559281587540819, "grad_norm": 0.28204119205474854, "learning_rate": 0.00019867993548000647, "loss": 11.6692, "step": 7449 }, { "epoch": 0.15594909151804404, "grad_norm": 0.20191796123981476, "learning_rate": 0.00019867958038079532, "loss": 11.6867, "step": 7450 }, { "epoch": 0.1559700242820062, "grad_norm": 0.282042920589447, "learning_rate": 0.0001986792252341469, "loss": 11.6875, "step": 7451 }, { "epoch": 0.15599095704596835, "grad_norm": 0.27904245257377625, "learning_rate": 0.00019867887004006138, "loss": 11.6843, "step": 7452 }, { "epoch": 0.1560118898099305, "grad_norm": 0.36596572399139404, "learning_rate": 0.000198678514798539, "loss": 11.6824, "step": 7453 }, { "epoch": 0.15603282257389264, "grad_norm": 0.24911338090896606, "learning_rate": 0.00019867815950957982, "loss": 11.6844, "step": 7454 }, { "epoch": 0.15605375533785482, "grad_norm": 0.24573315680027008, "learning_rate": 0.00019867780417318412, "loss": 11.6605, "step": 7455 }, { "epoch": 0.15607468810181696, "grad_norm": 0.2417161613702774, "learning_rate": 0.000198677448789352, "loss": 11.6811, "step": 7456 }, { "epoch": 0.1560956208657791, "grad_norm": 0.28084394335746765, "learning_rate": 0.00019867709335808366, "loss": 11.6852, "step": 7457 }, { "epoch": 0.15611655362974128, "grad_norm": 0.33943817019462585, "learning_rate": 0.00019867673787937927, "loss": 11.6884, "step": 7458 }, { "epoch": 0.15613748639370342, "grad_norm": 0.2246236354112625, "learning_rate": 0.00019867638235323897, "loss": 11.6806, "step": 7459 }, { "epoch": 0.15615841915766557, "grad_norm": 0.295353502035141, "learning_rate": 0.00019867602677966297, "loss": 11.6934, "step": 7460 }, { "epoch": 0.15617935192162774, "grad_norm": 0.210489884018898, "learning_rate": 0.00019867567115865143, "loss": 11.6791, "step": 7461 }, { "epoch": 0.15620028468558989, "grad_norm": 0.2571679949760437, "learning_rate": 0.00019867531549020454, "loss": 11.6778, "step": 7462 }, { "epoch": 0.15622121744955203, "grad_norm": 0.26837295293807983, "learning_rate": 0.0001986749597743224, "loss": 11.7028, "step": 7463 }, { "epoch": 0.1562421502135142, "grad_norm": 0.23117895424365997, "learning_rate": 0.00019867460401100525, "loss": 11.6688, "step": 7464 }, { "epoch": 0.15626308297747635, "grad_norm": 0.26851513981819153, "learning_rate": 0.00019867424820025324, "loss": 11.6872, "step": 7465 }, { "epoch": 0.1562840157414385, "grad_norm": 0.2323227822780609, "learning_rate": 0.00019867389234206654, "loss": 11.6884, "step": 7466 }, { "epoch": 0.15630494850540066, "grad_norm": 0.22129559516906738, "learning_rate": 0.00019867353643644533, "loss": 11.6698, "step": 7467 }, { "epoch": 0.1563258812693628, "grad_norm": 0.23129554092884064, "learning_rate": 0.00019867318048338978, "loss": 11.6734, "step": 7468 }, { "epoch": 0.15634681403332495, "grad_norm": 0.2985058128833771, "learning_rate": 0.00019867282448290005, "loss": 11.6754, "step": 7469 }, { "epoch": 0.15636774679728713, "grad_norm": 0.22045770287513733, "learning_rate": 0.0001986724684349763, "loss": 11.6897, "step": 7470 }, { "epoch": 0.15638867956124927, "grad_norm": 0.24877341091632843, "learning_rate": 0.00019867211233961873, "loss": 11.6722, "step": 7471 }, { "epoch": 0.15640961232521142, "grad_norm": 0.24904344975948334, "learning_rate": 0.0001986717561968275, "loss": 11.6841, "step": 7472 }, { "epoch": 0.15643054508917356, "grad_norm": 0.2115515172481537, "learning_rate": 0.00019867140000660277, "loss": 11.6699, "step": 7473 }, { "epoch": 0.15645147785313573, "grad_norm": 0.2026280015707016, "learning_rate": 0.00019867104376894472, "loss": 11.674, "step": 7474 }, { "epoch": 0.15647241061709788, "grad_norm": 0.21129973232746124, "learning_rate": 0.00019867068748385353, "loss": 11.679, "step": 7475 }, { "epoch": 0.15649334338106002, "grad_norm": 0.21393926441669464, "learning_rate": 0.00019867033115132936, "loss": 11.684, "step": 7476 }, { "epoch": 0.1565142761450222, "grad_norm": 0.28297001123428345, "learning_rate": 0.0001986699747713724, "loss": 11.6786, "step": 7477 }, { "epoch": 0.15653520890898434, "grad_norm": 0.21441207826137543, "learning_rate": 0.00019866961834398277, "loss": 11.6804, "step": 7478 }, { "epoch": 0.15655614167294649, "grad_norm": 0.3055580258369446, "learning_rate": 0.0001986692618691607, "loss": 11.6761, "step": 7479 }, { "epoch": 0.15657707443690866, "grad_norm": 0.2156405746936798, "learning_rate": 0.00019866890534690635, "loss": 11.6681, "step": 7480 }, { "epoch": 0.1565980072008708, "grad_norm": 0.2284359335899353, "learning_rate": 0.00019866854877721986, "loss": 11.6688, "step": 7481 }, { "epoch": 0.15661893996483295, "grad_norm": 0.25884202122688293, "learning_rate": 0.0001986681921601014, "loss": 11.6865, "step": 7482 }, { "epoch": 0.15663987272879512, "grad_norm": 0.23667217791080475, "learning_rate": 0.00019866783549555122, "loss": 11.6818, "step": 7483 }, { "epoch": 0.15666080549275727, "grad_norm": 0.27643081545829773, "learning_rate": 0.0001986674787835694, "loss": 11.6939, "step": 7484 }, { "epoch": 0.1566817382567194, "grad_norm": 0.25135257840156555, "learning_rate": 0.00019866712202415613, "loss": 11.6596, "step": 7485 }, { "epoch": 0.15670267102068158, "grad_norm": 0.24096179008483887, "learning_rate": 0.00019866676521731165, "loss": 11.6617, "step": 7486 }, { "epoch": 0.15672360378464373, "grad_norm": 0.2322496622800827, "learning_rate": 0.00019866640836303603, "loss": 11.683, "step": 7487 }, { "epoch": 0.15674453654860587, "grad_norm": 0.24421946704387665, "learning_rate": 0.00019866605146132954, "loss": 11.6679, "step": 7488 }, { "epoch": 0.15676546931256802, "grad_norm": 0.24984806776046753, "learning_rate": 0.00019866569451219225, "loss": 11.6883, "step": 7489 }, { "epoch": 0.1567864020765302, "grad_norm": 0.2861325740814209, "learning_rate": 0.00019866533751562439, "loss": 11.6815, "step": 7490 }, { "epoch": 0.15680733484049234, "grad_norm": 0.2670793831348419, "learning_rate": 0.00019866498047162615, "loss": 11.6831, "step": 7491 }, { "epoch": 0.15682826760445448, "grad_norm": 0.265995055437088, "learning_rate": 0.00019866462338019765, "loss": 11.6813, "step": 7492 }, { "epoch": 0.15684920036841665, "grad_norm": 0.25662386417388916, "learning_rate": 0.00019866426624133912, "loss": 11.6738, "step": 7493 }, { "epoch": 0.1568701331323788, "grad_norm": 0.27061575651168823, "learning_rate": 0.00019866390905505066, "loss": 11.6666, "step": 7494 }, { "epoch": 0.15689106589634094, "grad_norm": 0.2831328511238098, "learning_rate": 0.0001986635518213325, "loss": 11.6823, "step": 7495 }, { "epoch": 0.15691199866030311, "grad_norm": 0.22484709322452545, "learning_rate": 0.00019866319454018482, "loss": 11.6861, "step": 7496 }, { "epoch": 0.15693293142426526, "grad_norm": 0.2563515901565552, "learning_rate": 0.00019866283721160771, "loss": 11.6833, "step": 7497 }, { "epoch": 0.1569538641882274, "grad_norm": 0.26598459482192993, "learning_rate": 0.00019866247983560146, "loss": 11.6765, "step": 7498 }, { "epoch": 0.15697479695218958, "grad_norm": 0.2299756407737732, "learning_rate": 0.00019866212241216613, "loss": 11.6519, "step": 7499 }, { "epoch": 0.15699572971615172, "grad_norm": 0.24527254700660706, "learning_rate": 0.00019866176494130197, "loss": 11.6844, "step": 7500 }, { "epoch": 0.15701666248011387, "grad_norm": 0.2431299388408661, "learning_rate": 0.00019866140742300908, "loss": 11.6926, "step": 7501 }, { "epoch": 0.15703759524407604, "grad_norm": 0.21778947114944458, "learning_rate": 0.00019866104985728772, "loss": 11.6843, "step": 7502 }, { "epoch": 0.15705852800803818, "grad_norm": 0.24626289308071136, "learning_rate": 0.000198660692244138, "loss": 11.7029, "step": 7503 }, { "epoch": 0.15707946077200033, "grad_norm": 0.23460906744003296, "learning_rate": 0.00019866033458356013, "loss": 11.697, "step": 7504 }, { "epoch": 0.1571003935359625, "grad_norm": 0.3519129157066345, "learning_rate": 0.00019865997687555423, "loss": 11.6634, "step": 7505 }, { "epoch": 0.15712132629992465, "grad_norm": 0.26221799850463867, "learning_rate": 0.0001986596191201205, "loss": 11.6702, "step": 7506 }, { "epoch": 0.1571422590638868, "grad_norm": 0.23823179304599762, "learning_rate": 0.00019865926131725913, "loss": 11.6928, "step": 7507 }, { "epoch": 0.15716319182784894, "grad_norm": 0.23260965943336487, "learning_rate": 0.00019865890346697026, "loss": 11.705, "step": 7508 }, { "epoch": 0.1571841245918111, "grad_norm": 0.21066556870937347, "learning_rate": 0.0001986585455692541, "loss": 11.6929, "step": 7509 }, { "epoch": 0.15720505735577325, "grad_norm": 0.23955991864204407, "learning_rate": 0.0001986581876241108, "loss": 11.6755, "step": 7510 }, { "epoch": 0.1572259901197354, "grad_norm": 0.2812610864639282, "learning_rate": 0.0001986578296315405, "loss": 11.6738, "step": 7511 }, { "epoch": 0.15724692288369757, "grad_norm": 0.24242247641086578, "learning_rate": 0.0001986574715915434, "loss": 11.6867, "step": 7512 }, { "epoch": 0.15726785564765972, "grad_norm": 0.2608978748321533, "learning_rate": 0.00019865711350411974, "loss": 11.6752, "step": 7513 }, { "epoch": 0.15728878841162186, "grad_norm": 0.2329067587852478, "learning_rate": 0.00019865675536926957, "loss": 11.683, "step": 7514 }, { "epoch": 0.15730972117558403, "grad_norm": 0.19999995827674866, "learning_rate": 0.00019865639718699316, "loss": 11.681, "step": 7515 }, { "epoch": 0.15733065393954618, "grad_norm": 0.18912719190120697, "learning_rate": 0.0001986560389572906, "loss": 11.6802, "step": 7516 }, { "epoch": 0.15735158670350832, "grad_norm": 0.2865043878555298, "learning_rate": 0.00019865568068016214, "loss": 11.682, "step": 7517 }, { "epoch": 0.1573725194674705, "grad_norm": 0.28781411051750183, "learning_rate": 0.0001986553223556079, "loss": 11.7001, "step": 7518 }, { "epoch": 0.15739345223143264, "grad_norm": 0.3113858699798584, "learning_rate": 0.00019865496398362807, "loss": 11.685, "step": 7519 }, { "epoch": 0.15741438499539478, "grad_norm": 0.24664142727851868, "learning_rate": 0.00019865460556422283, "loss": 11.6938, "step": 7520 }, { "epoch": 0.15743531775935696, "grad_norm": 0.246616929769516, "learning_rate": 0.00019865424709739236, "loss": 11.6771, "step": 7521 }, { "epoch": 0.1574562505233191, "grad_norm": 0.23978087306022644, "learning_rate": 0.00019865388858313678, "loss": 11.6789, "step": 7522 }, { "epoch": 0.15747718328728125, "grad_norm": 0.29501238465309143, "learning_rate": 0.0001986535300214563, "loss": 11.6982, "step": 7523 }, { "epoch": 0.15749811605124342, "grad_norm": 0.2501520812511444, "learning_rate": 0.0001986531714123511, "loss": 11.6739, "step": 7524 }, { "epoch": 0.15751904881520556, "grad_norm": 0.23388876020908356, "learning_rate": 0.00019865281275582137, "loss": 11.6658, "step": 7525 }, { "epoch": 0.1575399815791677, "grad_norm": 0.2772158682346344, "learning_rate": 0.00019865245405186724, "loss": 11.6737, "step": 7526 }, { "epoch": 0.15756091434312985, "grad_norm": 0.3398595154285431, "learning_rate": 0.0001986520953004889, "loss": 11.6975, "step": 7527 }, { "epoch": 0.15758184710709203, "grad_norm": 0.23839950561523438, "learning_rate": 0.00019865173650168648, "loss": 11.6904, "step": 7528 }, { "epoch": 0.15760277987105417, "grad_norm": 0.23328901827335358, "learning_rate": 0.00019865137765546025, "loss": 11.6824, "step": 7529 }, { "epoch": 0.15762371263501632, "grad_norm": 0.36356717348098755, "learning_rate": 0.0001986510187618103, "loss": 11.6839, "step": 7530 }, { "epoch": 0.1576446453989785, "grad_norm": 0.25406771898269653, "learning_rate": 0.00019865065982073682, "loss": 11.6848, "step": 7531 }, { "epoch": 0.15766557816294063, "grad_norm": 0.2812620997428894, "learning_rate": 0.00019865030083224, "loss": 11.6751, "step": 7532 }, { "epoch": 0.15768651092690278, "grad_norm": 0.2039509415626526, "learning_rate": 0.00019864994179632, "loss": 11.6763, "step": 7533 }, { "epoch": 0.15770744369086495, "grad_norm": 0.23512651026248932, "learning_rate": 0.00019864958271297703, "loss": 11.6835, "step": 7534 }, { "epoch": 0.1577283764548271, "grad_norm": 0.30964046716690063, "learning_rate": 0.0001986492235822112, "loss": 11.7041, "step": 7535 }, { "epoch": 0.15774930921878924, "grad_norm": 0.2447679489850998, "learning_rate": 0.0001986488644040227, "loss": 11.6681, "step": 7536 }, { "epoch": 0.1577702419827514, "grad_norm": 0.23736725747585297, "learning_rate": 0.00019864850517841172, "loss": 11.6809, "step": 7537 }, { "epoch": 0.15779117474671356, "grad_norm": 0.22955110669136047, "learning_rate": 0.00019864814590537844, "loss": 11.6718, "step": 7538 }, { "epoch": 0.1578121075106757, "grad_norm": 0.2441832721233368, "learning_rate": 0.000198647786584923, "loss": 11.6724, "step": 7539 }, { "epoch": 0.15783304027463788, "grad_norm": 0.23215723037719727, "learning_rate": 0.00019864742721704563, "loss": 11.6818, "step": 7540 }, { "epoch": 0.15785397303860002, "grad_norm": 0.22031860053539276, "learning_rate": 0.00019864706780174643, "loss": 11.6659, "step": 7541 }, { "epoch": 0.15787490580256217, "grad_norm": 0.23018695414066315, "learning_rate": 0.0001986467083390256, "loss": 11.6798, "step": 7542 }, { "epoch": 0.1578958385665243, "grad_norm": 0.2581791281700134, "learning_rate": 0.00019864634882888336, "loss": 11.6699, "step": 7543 }, { "epoch": 0.15791677133048648, "grad_norm": 0.23363006114959717, "learning_rate": 0.00019864598927131982, "loss": 11.6793, "step": 7544 }, { "epoch": 0.15793770409444863, "grad_norm": 0.25972357392311096, "learning_rate": 0.00019864562966633516, "loss": 11.672, "step": 7545 }, { "epoch": 0.15795863685841077, "grad_norm": 0.38112562894821167, "learning_rate": 0.0001986452700139296, "loss": 11.6696, "step": 7546 }, { "epoch": 0.15797956962237295, "grad_norm": 0.26556506752967834, "learning_rate": 0.00019864491031410325, "loss": 11.689, "step": 7547 }, { "epoch": 0.1580005023863351, "grad_norm": 0.25444984436035156, "learning_rate": 0.00019864455056685634, "loss": 11.6637, "step": 7548 }, { "epoch": 0.15802143515029723, "grad_norm": 0.24557843804359436, "learning_rate": 0.000198644190772189, "loss": 11.6909, "step": 7549 }, { "epoch": 0.1580423679142594, "grad_norm": 0.26219791173934937, "learning_rate": 0.00019864383093010145, "loss": 11.6751, "step": 7550 }, { "epoch": 0.15806330067822155, "grad_norm": 0.2453841120004654, "learning_rate": 0.0001986434710405938, "loss": 11.6814, "step": 7551 }, { "epoch": 0.1580842334421837, "grad_norm": 0.2944642901420593, "learning_rate": 0.00019864311110366628, "loss": 11.6934, "step": 7552 }, { "epoch": 0.15810516620614587, "grad_norm": 0.3207341432571411, "learning_rate": 0.00019864275111931902, "loss": 11.6903, "step": 7553 }, { "epoch": 0.15812609897010801, "grad_norm": 0.21421010792255402, "learning_rate": 0.00019864239108755222, "loss": 11.6669, "step": 7554 }, { "epoch": 0.15814703173407016, "grad_norm": 0.28190168738365173, "learning_rate": 0.00019864203100836605, "loss": 11.6886, "step": 7555 }, { "epoch": 0.15816796449803233, "grad_norm": 0.2311365157365799, "learning_rate": 0.00019864167088176068, "loss": 11.6705, "step": 7556 }, { "epoch": 0.15818889726199448, "grad_norm": 0.2640654742717743, "learning_rate": 0.00019864131070773628, "loss": 11.6817, "step": 7557 }, { "epoch": 0.15820983002595662, "grad_norm": 0.22891764342784882, "learning_rate": 0.00019864095048629303, "loss": 11.6953, "step": 7558 }, { "epoch": 0.1582307627899188, "grad_norm": 0.23564615845680237, "learning_rate": 0.0001986405902174311, "loss": 11.6745, "step": 7559 }, { "epoch": 0.15825169555388094, "grad_norm": 0.22285889089107513, "learning_rate": 0.00019864022990115064, "loss": 11.6731, "step": 7560 }, { "epoch": 0.15827262831784308, "grad_norm": 0.22375552356243134, "learning_rate": 0.00019863986953745186, "loss": 11.6763, "step": 7561 }, { "epoch": 0.15829356108180523, "grad_norm": 0.22120115160942078, "learning_rate": 0.00019863950912633492, "loss": 11.6763, "step": 7562 }, { "epoch": 0.1583144938457674, "grad_norm": 0.24759750068187714, "learning_rate": 0.0001986391486678, "loss": 11.6711, "step": 7563 }, { "epoch": 0.15833542660972955, "grad_norm": 0.26949602365493774, "learning_rate": 0.00019863878816184723, "loss": 11.665, "step": 7564 }, { "epoch": 0.1583563593736917, "grad_norm": 0.31348684430122375, "learning_rate": 0.00019863842760847684, "loss": 11.6849, "step": 7565 }, { "epoch": 0.15837729213765386, "grad_norm": 0.31482213735580444, "learning_rate": 0.00019863806700768898, "loss": 11.6949, "step": 7566 }, { "epoch": 0.158398224901616, "grad_norm": 0.25274962186813354, "learning_rate": 0.00019863770635948383, "loss": 11.6735, "step": 7567 }, { "epoch": 0.15841915766557815, "grad_norm": 0.2566503882408142, "learning_rate": 0.00019863734566386154, "loss": 11.683, "step": 7568 }, { "epoch": 0.15844009042954033, "grad_norm": 0.1959918588399887, "learning_rate": 0.0001986369849208223, "loss": 11.6792, "step": 7569 }, { "epoch": 0.15846102319350247, "grad_norm": 0.3079065978527069, "learning_rate": 0.0001986366241303663, "loss": 11.6899, "step": 7570 }, { "epoch": 0.15848195595746462, "grad_norm": 0.2695024311542511, "learning_rate": 0.0001986362632924937, "loss": 11.6784, "step": 7571 }, { "epoch": 0.1585028887214268, "grad_norm": 0.3552788197994232, "learning_rate": 0.0001986359024072047, "loss": 11.6839, "step": 7572 }, { "epoch": 0.15852382148538893, "grad_norm": 0.246361643075943, "learning_rate": 0.00019863554147449942, "loss": 11.6789, "step": 7573 }, { "epoch": 0.15854475424935108, "grad_norm": 0.25717830657958984, "learning_rate": 0.00019863518049437805, "loss": 11.6655, "step": 7574 }, { "epoch": 0.15856568701331325, "grad_norm": 0.26852989196777344, "learning_rate": 0.00019863481946684076, "loss": 11.6887, "step": 7575 }, { "epoch": 0.1585866197772754, "grad_norm": 0.19357560575008392, "learning_rate": 0.00019863445839188776, "loss": 11.6791, "step": 7576 }, { "epoch": 0.15860755254123754, "grad_norm": 0.2168700098991394, "learning_rate": 0.00019863409726951918, "loss": 11.6916, "step": 7577 }, { "epoch": 0.1586284853051997, "grad_norm": 0.23830245435237885, "learning_rate": 0.00019863373609973524, "loss": 11.6849, "step": 7578 }, { "epoch": 0.15864941806916186, "grad_norm": 0.22075501084327698, "learning_rate": 0.00019863337488253607, "loss": 11.6795, "step": 7579 }, { "epoch": 0.158670350833124, "grad_norm": 0.22306892275810242, "learning_rate": 0.00019863301361792185, "loss": 11.6981, "step": 7580 }, { "epoch": 0.15869128359708615, "grad_norm": 0.21664586663246155, "learning_rate": 0.00019863265230589278, "loss": 11.675, "step": 7581 }, { "epoch": 0.15871221636104832, "grad_norm": 0.23840761184692383, "learning_rate": 0.000198632290946449, "loss": 11.6771, "step": 7582 }, { "epoch": 0.15873314912501046, "grad_norm": 0.25428131222724915, "learning_rate": 0.0001986319295395907, "loss": 11.6709, "step": 7583 }, { "epoch": 0.1587540818889726, "grad_norm": 0.2717326879501343, "learning_rate": 0.00019863156808531807, "loss": 11.6976, "step": 7584 }, { "epoch": 0.15877501465293478, "grad_norm": 0.26840144395828247, "learning_rate": 0.00019863120658363128, "loss": 11.6841, "step": 7585 }, { "epoch": 0.15879594741689693, "grad_norm": 0.22827459871768951, "learning_rate": 0.00019863084503453046, "loss": 11.6768, "step": 7586 }, { "epoch": 0.15881688018085907, "grad_norm": 0.23972275853157043, "learning_rate": 0.00019863048343801584, "loss": 11.671, "step": 7587 }, { "epoch": 0.15883781294482124, "grad_norm": 0.24709926545619965, "learning_rate": 0.00019863012179408758, "loss": 11.6905, "step": 7588 }, { "epoch": 0.1588587457087834, "grad_norm": 0.2117086499929428, "learning_rate": 0.0001986297601027458, "loss": 11.6759, "step": 7589 }, { "epoch": 0.15887967847274553, "grad_norm": 0.23432166874408722, "learning_rate": 0.00019862939836399077, "loss": 11.6875, "step": 7590 }, { "epoch": 0.1589006112367077, "grad_norm": 0.2612343430519104, "learning_rate": 0.00019862903657782256, "loss": 11.6737, "step": 7591 }, { "epoch": 0.15892154400066985, "grad_norm": 0.3939734995365143, "learning_rate": 0.00019862867474424145, "loss": 11.6843, "step": 7592 }, { "epoch": 0.158942476764632, "grad_norm": 0.2496805489063263, "learning_rate": 0.00019862831286324753, "loss": 11.7036, "step": 7593 }, { "epoch": 0.15896340952859417, "grad_norm": 0.2653671205043793, "learning_rate": 0.00019862795093484098, "loss": 11.6898, "step": 7594 }, { "epoch": 0.1589843422925563, "grad_norm": 0.21088330447673798, "learning_rate": 0.00019862758895902204, "loss": 11.6855, "step": 7595 }, { "epoch": 0.15900527505651846, "grad_norm": 0.24307455122470856, "learning_rate": 0.00019862722693579082, "loss": 11.6812, "step": 7596 }, { "epoch": 0.1590262078204806, "grad_norm": 0.28042858839035034, "learning_rate": 0.00019862686486514752, "loss": 11.6834, "step": 7597 }, { "epoch": 0.15904714058444278, "grad_norm": 0.223635733127594, "learning_rate": 0.00019862650274709228, "loss": 11.6683, "step": 7598 }, { "epoch": 0.15906807334840492, "grad_norm": 0.25147297978401184, "learning_rate": 0.00019862614058162534, "loss": 11.6907, "step": 7599 }, { "epoch": 0.15908900611236707, "grad_norm": 0.2591446340084076, "learning_rate": 0.00019862577836874686, "loss": 11.6809, "step": 7600 }, { "epoch": 0.15910993887632924, "grad_norm": 0.2390235811471939, "learning_rate": 0.00019862541610845695, "loss": 11.6898, "step": 7601 }, { "epoch": 0.15913087164029138, "grad_norm": 0.25575244426727295, "learning_rate": 0.00019862505380075583, "loss": 11.667, "step": 7602 }, { "epoch": 0.15915180440425353, "grad_norm": 0.3588978946208954, "learning_rate": 0.0001986246914456437, "loss": 11.6792, "step": 7603 }, { "epoch": 0.1591727371682157, "grad_norm": 0.2558763921260834, "learning_rate": 0.00019862432904312066, "loss": 11.6783, "step": 7604 }, { "epoch": 0.15919366993217784, "grad_norm": 0.2520032227039337, "learning_rate": 0.00019862396659318696, "loss": 11.6929, "step": 7605 }, { "epoch": 0.15921460269614, "grad_norm": 0.26933160424232483, "learning_rate": 0.00019862360409584272, "loss": 11.6835, "step": 7606 }, { "epoch": 0.15923553546010216, "grad_norm": 0.23551826179027557, "learning_rate": 0.00019862324155108814, "loss": 11.682, "step": 7607 }, { "epoch": 0.1592564682240643, "grad_norm": 0.26385655999183655, "learning_rate": 0.0001986228789589234, "loss": 11.6647, "step": 7608 }, { "epoch": 0.15927740098802645, "grad_norm": 0.2271515429019928, "learning_rate": 0.00019862251631934868, "loss": 11.6836, "step": 7609 }, { "epoch": 0.15929833375198862, "grad_norm": 0.21593037247657776, "learning_rate": 0.00019862215363236415, "loss": 11.6789, "step": 7610 }, { "epoch": 0.15931926651595077, "grad_norm": 0.23561222851276398, "learning_rate": 0.00019862179089796995, "loss": 11.6899, "step": 7611 }, { "epoch": 0.15934019927991291, "grad_norm": 0.21816043555736542, "learning_rate": 0.00019862142811616628, "loss": 11.6767, "step": 7612 }, { "epoch": 0.1593611320438751, "grad_norm": 0.22293713688850403, "learning_rate": 0.0001986210652869533, "loss": 11.6911, "step": 7613 }, { "epoch": 0.15938206480783723, "grad_norm": 0.2508317232131958, "learning_rate": 0.00019862070241033123, "loss": 11.6693, "step": 7614 }, { "epoch": 0.15940299757179938, "grad_norm": 0.3452923595905304, "learning_rate": 0.0001986203394863002, "loss": 11.6918, "step": 7615 }, { "epoch": 0.15942393033576152, "grad_norm": 0.4288461208343506, "learning_rate": 0.00019861997651486038, "loss": 11.6974, "step": 7616 }, { "epoch": 0.1594448630997237, "grad_norm": 0.2149713933467865, "learning_rate": 0.00019861961349601197, "loss": 11.6853, "step": 7617 }, { "epoch": 0.15946579586368584, "grad_norm": 0.23082879185676575, "learning_rate": 0.00019861925042975514, "loss": 11.6752, "step": 7618 }, { "epoch": 0.15948672862764798, "grad_norm": 0.2497207671403885, "learning_rate": 0.00019861888731609003, "loss": 11.6662, "step": 7619 }, { "epoch": 0.15950766139161016, "grad_norm": 0.2219698131084442, "learning_rate": 0.0001986185241550169, "loss": 11.6768, "step": 7620 }, { "epoch": 0.1595285941555723, "grad_norm": 0.2398543357849121, "learning_rate": 0.00019861816094653582, "loss": 11.669, "step": 7621 }, { "epoch": 0.15954952691953445, "grad_norm": 0.22673320770263672, "learning_rate": 0.00019861779769064704, "loss": 11.681, "step": 7622 }, { "epoch": 0.15957045968349662, "grad_norm": 0.19364239275455475, "learning_rate": 0.00019861743438735071, "loss": 11.6839, "step": 7623 }, { "epoch": 0.15959139244745876, "grad_norm": 0.1959104686975479, "learning_rate": 0.000198617071036647, "loss": 11.6711, "step": 7624 }, { "epoch": 0.1596123252114209, "grad_norm": 0.2830689549446106, "learning_rate": 0.00019861670763853608, "loss": 11.6865, "step": 7625 }, { "epoch": 0.15963325797538308, "grad_norm": 0.25202614068984985, "learning_rate": 0.00019861634419301815, "loss": 11.7064, "step": 7626 }, { "epoch": 0.15965419073934523, "grad_norm": 0.2165270298719406, "learning_rate": 0.00019861598070009334, "loss": 11.6737, "step": 7627 }, { "epoch": 0.15967512350330737, "grad_norm": 0.24468420445919037, "learning_rate": 0.00019861561715976188, "loss": 11.6886, "step": 7628 }, { "epoch": 0.15969605626726954, "grad_norm": 0.24006181955337524, "learning_rate": 0.00019861525357202386, "loss": 11.6615, "step": 7629 }, { "epoch": 0.1597169890312317, "grad_norm": 0.24376673996448517, "learning_rate": 0.00019861488993687957, "loss": 11.6676, "step": 7630 }, { "epoch": 0.15973792179519383, "grad_norm": 0.2402036190032959, "learning_rate": 0.0001986145262543291, "loss": 11.6623, "step": 7631 }, { "epoch": 0.15975885455915598, "grad_norm": 0.2593427896499634, "learning_rate": 0.00019861416252437266, "loss": 11.6837, "step": 7632 }, { "epoch": 0.15977978732311815, "grad_norm": 0.24230319261550903, "learning_rate": 0.00019861379874701042, "loss": 11.6697, "step": 7633 }, { "epoch": 0.1598007200870803, "grad_norm": 0.254798948764801, "learning_rate": 0.00019861343492224255, "loss": 11.6783, "step": 7634 }, { "epoch": 0.15982165285104244, "grad_norm": 0.33222222328186035, "learning_rate": 0.00019861307105006922, "loss": 11.6806, "step": 7635 }, { "epoch": 0.1598425856150046, "grad_norm": 0.22002221643924713, "learning_rate": 0.0001986127071304906, "loss": 11.6724, "step": 7636 }, { "epoch": 0.15986351837896676, "grad_norm": 0.23320472240447998, "learning_rate": 0.0001986123431635069, "loss": 11.6639, "step": 7637 }, { "epoch": 0.1598844511429289, "grad_norm": 0.2276669144630432, "learning_rate": 0.00019861197914911824, "loss": 11.6767, "step": 7638 }, { "epoch": 0.15990538390689107, "grad_norm": 0.24185331165790558, "learning_rate": 0.00019861161508732486, "loss": 11.6849, "step": 7639 }, { "epoch": 0.15992631667085322, "grad_norm": 0.2907452881336212, "learning_rate": 0.00019861125097812688, "loss": 11.6789, "step": 7640 }, { "epoch": 0.15994724943481536, "grad_norm": 0.2333287000656128, "learning_rate": 0.0001986108868215245, "loss": 11.679, "step": 7641 }, { "epoch": 0.15996818219877754, "grad_norm": 0.23403459787368774, "learning_rate": 0.0001986105226175179, "loss": 11.6723, "step": 7642 }, { "epoch": 0.15998911496273968, "grad_norm": 0.2867850959300995, "learning_rate": 0.00019861015836610723, "loss": 11.701, "step": 7643 }, { "epoch": 0.16001004772670183, "grad_norm": 0.22170370817184448, "learning_rate": 0.00019860979406729267, "loss": 11.6662, "step": 7644 }, { "epoch": 0.160030980490664, "grad_norm": 0.2841598689556122, "learning_rate": 0.0001986094297210744, "loss": 11.6764, "step": 7645 }, { "epoch": 0.16005191325462614, "grad_norm": 0.1927749365568161, "learning_rate": 0.00019860906532745266, "loss": 11.6833, "step": 7646 }, { "epoch": 0.1600728460185883, "grad_norm": 0.22024160623550415, "learning_rate": 0.0001986087008864275, "loss": 11.6786, "step": 7647 }, { "epoch": 0.16009377878255046, "grad_norm": 0.26668480038642883, "learning_rate": 0.0001986083363979992, "loss": 11.6605, "step": 7648 }, { "epoch": 0.1601147115465126, "grad_norm": 0.27096202969551086, "learning_rate": 0.0001986079718621679, "loss": 11.6649, "step": 7649 }, { "epoch": 0.16013564431047475, "grad_norm": 0.27001041173934937, "learning_rate": 0.00019860760727893374, "loss": 11.6744, "step": 7650 }, { "epoch": 0.1601565770744369, "grad_norm": 0.2924298346042633, "learning_rate": 0.00019860724264829694, "loss": 11.6736, "step": 7651 }, { "epoch": 0.16017750983839907, "grad_norm": 0.2191736400127411, "learning_rate": 0.00019860687797025768, "loss": 11.677, "step": 7652 }, { "epoch": 0.1601984426023612, "grad_norm": 0.2930023968219757, "learning_rate": 0.0001986065132448161, "loss": 11.6798, "step": 7653 }, { "epoch": 0.16021937536632336, "grad_norm": 0.33548569679260254, "learning_rate": 0.00019860614847197242, "loss": 11.6816, "step": 7654 }, { "epoch": 0.16024030813028553, "grad_norm": 0.29679831862449646, "learning_rate": 0.00019860578365172676, "loss": 11.6694, "step": 7655 }, { "epoch": 0.16026124089424768, "grad_norm": 0.2159586399793625, "learning_rate": 0.00019860541878407933, "loss": 11.6848, "step": 7656 }, { "epoch": 0.16028217365820982, "grad_norm": 0.24695216119289398, "learning_rate": 0.00019860505386903028, "loss": 11.6852, "step": 7657 }, { "epoch": 0.160303106422172, "grad_norm": 0.23238466680049896, "learning_rate": 0.00019860468890657985, "loss": 11.6688, "step": 7658 }, { "epoch": 0.16032403918613414, "grad_norm": 0.22144795954227448, "learning_rate": 0.0001986043238967281, "loss": 11.6626, "step": 7659 }, { "epoch": 0.16034497195009628, "grad_norm": 0.26423516869544983, "learning_rate": 0.00019860395883947534, "loss": 11.6847, "step": 7660 }, { "epoch": 0.16036590471405845, "grad_norm": 0.2496139258146286, "learning_rate": 0.00019860359373482167, "loss": 11.6703, "step": 7661 }, { "epoch": 0.1603868374780206, "grad_norm": 0.3019004166126251, "learning_rate": 0.00019860322858276727, "loss": 11.6857, "step": 7662 }, { "epoch": 0.16040777024198274, "grad_norm": 0.21484659612178802, "learning_rate": 0.0001986028633833123, "loss": 11.6722, "step": 7663 }, { "epoch": 0.16042870300594492, "grad_norm": 0.2484772950410843, "learning_rate": 0.000198602498136457, "loss": 11.6862, "step": 7664 }, { "epoch": 0.16044963576990706, "grad_norm": 0.2816752791404724, "learning_rate": 0.00019860213284220147, "loss": 11.6837, "step": 7665 }, { "epoch": 0.1604705685338692, "grad_norm": 0.2294357866048813, "learning_rate": 0.00019860176750054594, "loss": 11.6897, "step": 7666 }, { "epoch": 0.16049150129783138, "grad_norm": 0.26132121682167053, "learning_rate": 0.00019860140211149055, "loss": 11.6869, "step": 7667 }, { "epoch": 0.16051243406179352, "grad_norm": 0.329388827085495, "learning_rate": 0.00019860103667503548, "loss": 11.688, "step": 7668 }, { "epoch": 0.16053336682575567, "grad_norm": 0.21280860900878906, "learning_rate": 0.00019860067119118092, "loss": 11.6698, "step": 7669 }, { "epoch": 0.1605542995897178, "grad_norm": 0.24560992419719696, "learning_rate": 0.00019860030565992703, "loss": 11.6617, "step": 7670 }, { "epoch": 0.16057523235368, "grad_norm": 0.24867938458919525, "learning_rate": 0.00019859994008127404, "loss": 11.7045, "step": 7671 }, { "epoch": 0.16059616511764213, "grad_norm": 0.2837277948856354, "learning_rate": 0.0001985995744552221, "loss": 11.6851, "step": 7672 }, { "epoch": 0.16061709788160428, "grad_norm": 0.21386556327342987, "learning_rate": 0.0001985992087817713, "loss": 11.673, "step": 7673 }, { "epoch": 0.16063803064556645, "grad_norm": 0.2880765497684479, "learning_rate": 0.00019859884306092188, "loss": 11.6813, "step": 7674 }, { "epoch": 0.1606589634095286, "grad_norm": 0.2291942983865738, "learning_rate": 0.00019859847729267408, "loss": 11.66, "step": 7675 }, { "epoch": 0.16067989617349074, "grad_norm": 0.26259204745292664, "learning_rate": 0.000198598111477028, "loss": 11.6814, "step": 7676 }, { "epoch": 0.1607008289374529, "grad_norm": 0.2493143379688263, "learning_rate": 0.0001985977456139838, "loss": 11.6848, "step": 7677 }, { "epoch": 0.16072176170141506, "grad_norm": 0.24803733825683594, "learning_rate": 0.00019859737970354173, "loss": 11.6723, "step": 7678 }, { "epoch": 0.1607426944653772, "grad_norm": 0.3041382133960724, "learning_rate": 0.00019859701374570188, "loss": 11.6652, "step": 7679 }, { "epoch": 0.16076362722933937, "grad_norm": 0.20852746069431305, "learning_rate": 0.0001985966477404645, "loss": 11.6592, "step": 7680 }, { "epoch": 0.16078455999330152, "grad_norm": 0.21741166710853577, "learning_rate": 0.00019859628168782974, "loss": 11.6768, "step": 7681 }, { "epoch": 0.16080549275726366, "grad_norm": 0.2392929643392563, "learning_rate": 0.00019859591558779775, "loss": 11.6858, "step": 7682 }, { "epoch": 0.16082642552122584, "grad_norm": 0.30654847621917725, "learning_rate": 0.00019859554944036872, "loss": 11.686, "step": 7683 }, { "epoch": 0.16084735828518798, "grad_norm": 0.20858468115329742, "learning_rate": 0.00019859518324554288, "loss": 11.6816, "step": 7684 }, { "epoch": 0.16086829104915013, "grad_norm": 0.23459051549434662, "learning_rate": 0.00019859481700332032, "loss": 11.6847, "step": 7685 }, { "epoch": 0.16088922381311227, "grad_norm": 0.23885361850261688, "learning_rate": 0.00019859445071370124, "loss": 11.6611, "step": 7686 }, { "epoch": 0.16091015657707444, "grad_norm": 0.2028997540473938, "learning_rate": 0.00019859408437668588, "loss": 11.6658, "step": 7687 }, { "epoch": 0.1609310893410366, "grad_norm": 0.27092787623405457, "learning_rate": 0.00019859371799227433, "loss": 11.6825, "step": 7688 }, { "epoch": 0.16095202210499873, "grad_norm": 0.2643190324306488, "learning_rate": 0.0001985933515604668, "loss": 11.6927, "step": 7689 }, { "epoch": 0.1609729548689609, "grad_norm": 0.2143358588218689, "learning_rate": 0.0001985929850812635, "loss": 11.6763, "step": 7690 }, { "epoch": 0.16099388763292305, "grad_norm": 0.21201181411743164, "learning_rate": 0.0001985926185546646, "loss": 11.6944, "step": 7691 }, { "epoch": 0.1610148203968852, "grad_norm": 0.22998173534870148, "learning_rate": 0.0001985922519806702, "loss": 11.6598, "step": 7692 }, { "epoch": 0.16103575316084737, "grad_norm": 0.2778414785861969, "learning_rate": 0.00019859188535928051, "loss": 11.6796, "step": 7693 }, { "epoch": 0.1610566859248095, "grad_norm": 0.2526569962501526, "learning_rate": 0.0001985915186904958, "loss": 11.6796, "step": 7694 }, { "epoch": 0.16107761868877166, "grad_norm": 0.2348015010356903, "learning_rate": 0.00019859115197431613, "loss": 11.6715, "step": 7695 }, { "epoch": 0.16109855145273383, "grad_norm": 0.2970547378063202, "learning_rate": 0.0001985907852107417, "loss": 11.684, "step": 7696 }, { "epoch": 0.16111948421669597, "grad_norm": 0.1768154799938202, "learning_rate": 0.00019859041839977274, "loss": 11.6741, "step": 7697 }, { "epoch": 0.16114041698065812, "grad_norm": 0.22013935446739197, "learning_rate": 0.00019859005154140936, "loss": 11.6769, "step": 7698 }, { "epoch": 0.1611613497446203, "grad_norm": 0.19690582156181335, "learning_rate": 0.00019858968463565177, "loss": 11.6885, "step": 7699 }, { "epoch": 0.16118228250858244, "grad_norm": 0.19764025509357452, "learning_rate": 0.00019858931768250017, "loss": 11.6761, "step": 7700 }, { "epoch": 0.16120321527254458, "grad_norm": 0.2558426260948181, "learning_rate": 0.00019858895068195468, "loss": 11.6776, "step": 7701 }, { "epoch": 0.16122414803650675, "grad_norm": 0.22402620315551758, "learning_rate": 0.00019858858363401552, "loss": 11.6723, "step": 7702 }, { "epoch": 0.1612450808004689, "grad_norm": 0.30460137128829956, "learning_rate": 0.00019858821653868285, "loss": 11.6701, "step": 7703 }, { "epoch": 0.16126601356443104, "grad_norm": 0.2503671944141388, "learning_rate": 0.00019858784939595685, "loss": 11.6756, "step": 7704 }, { "epoch": 0.1612869463283932, "grad_norm": 0.28203365206718445, "learning_rate": 0.0001985874822058377, "loss": 11.6818, "step": 7705 }, { "epoch": 0.16130787909235536, "grad_norm": 0.21579457819461823, "learning_rate": 0.00019858711496832556, "loss": 11.6735, "step": 7706 }, { "epoch": 0.1613288118563175, "grad_norm": 0.1982194483280182, "learning_rate": 0.00019858674768342063, "loss": 11.6729, "step": 7707 }, { "epoch": 0.16134974462027965, "grad_norm": 0.2537178099155426, "learning_rate": 0.00019858638035112307, "loss": 11.6717, "step": 7708 }, { "epoch": 0.16137067738424182, "grad_norm": 0.2720877230167389, "learning_rate": 0.00019858601297143304, "loss": 11.679, "step": 7709 }, { "epoch": 0.16139161014820397, "grad_norm": 0.2800048589706421, "learning_rate": 0.00019858564554435076, "loss": 11.7133, "step": 7710 }, { "epoch": 0.1614125429121661, "grad_norm": 0.24076448380947113, "learning_rate": 0.00019858527806987638, "loss": 11.6673, "step": 7711 }, { "epoch": 0.16143347567612829, "grad_norm": 0.2551311254501343, "learning_rate": 0.00019858491054801006, "loss": 11.693, "step": 7712 }, { "epoch": 0.16145440844009043, "grad_norm": 0.29424458742141724, "learning_rate": 0.00019858454297875204, "loss": 11.6764, "step": 7713 }, { "epoch": 0.16147534120405257, "grad_norm": 0.27252548933029175, "learning_rate": 0.00019858417536210247, "loss": 11.6822, "step": 7714 }, { "epoch": 0.16149627396801475, "grad_norm": 0.18671774864196777, "learning_rate": 0.00019858380769806143, "loss": 11.6766, "step": 7715 }, { "epoch": 0.1615172067319769, "grad_norm": 0.2649557590484619, "learning_rate": 0.00019858343998662922, "loss": 11.6804, "step": 7716 }, { "epoch": 0.16153813949593904, "grad_norm": 0.2831215262413025, "learning_rate": 0.00019858307222780602, "loss": 11.6956, "step": 7717 }, { "epoch": 0.1615590722599012, "grad_norm": 0.24710842967033386, "learning_rate": 0.0001985827044215919, "loss": 11.6938, "step": 7718 }, { "epoch": 0.16158000502386335, "grad_norm": 0.25971361994743347, "learning_rate": 0.00019858233656798712, "loss": 11.6867, "step": 7719 }, { "epoch": 0.1616009377878255, "grad_norm": 0.22492334246635437, "learning_rate": 0.0001985819686669918, "loss": 11.678, "step": 7720 }, { "epoch": 0.16162187055178764, "grad_norm": 0.24205875396728516, "learning_rate": 0.0001985816007186062, "loss": 11.6871, "step": 7721 }, { "epoch": 0.16164280331574982, "grad_norm": 0.25755366683006287, "learning_rate": 0.00019858123272283046, "loss": 11.6706, "step": 7722 }, { "epoch": 0.16166373607971196, "grad_norm": 0.2843126058578491, "learning_rate": 0.00019858086467966472, "loss": 11.6705, "step": 7723 }, { "epoch": 0.1616846688436741, "grad_norm": 0.21394670009613037, "learning_rate": 0.00019858049658910915, "loss": 11.6866, "step": 7724 }, { "epoch": 0.16170560160763628, "grad_norm": 0.2637881934642792, "learning_rate": 0.000198580128451164, "loss": 11.6741, "step": 7725 }, { "epoch": 0.16172653437159842, "grad_norm": 0.24742279946804047, "learning_rate": 0.0001985797602658294, "loss": 11.6824, "step": 7726 }, { "epoch": 0.16174746713556057, "grad_norm": 0.21436403691768646, "learning_rate": 0.00019857939203310555, "loss": 11.6857, "step": 7727 }, { "epoch": 0.16176839989952274, "grad_norm": 0.2422185093164444, "learning_rate": 0.00019857902375299257, "loss": 11.6957, "step": 7728 }, { "epoch": 0.16178933266348489, "grad_norm": 0.33342909812927246, "learning_rate": 0.00019857865542549072, "loss": 11.6845, "step": 7729 }, { "epoch": 0.16181026542744703, "grad_norm": 0.22795383632183075, "learning_rate": 0.00019857828705060012, "loss": 11.6715, "step": 7730 }, { "epoch": 0.1618311981914092, "grad_norm": 0.22719328105449677, "learning_rate": 0.00019857791862832095, "loss": 11.6658, "step": 7731 }, { "epoch": 0.16185213095537135, "grad_norm": 0.23819495737552643, "learning_rate": 0.0001985775501586534, "loss": 11.6778, "step": 7732 }, { "epoch": 0.1618730637193335, "grad_norm": 0.25914791226387024, "learning_rate": 0.00019857718164159767, "loss": 11.6837, "step": 7733 }, { "epoch": 0.16189399648329567, "grad_norm": 0.25471624732017517, "learning_rate": 0.00019857681307715387, "loss": 11.6765, "step": 7734 }, { "epoch": 0.1619149292472578, "grad_norm": 0.30846983194351196, "learning_rate": 0.00019857644446532227, "loss": 11.7049, "step": 7735 }, { "epoch": 0.16193586201121996, "grad_norm": 0.26840469241142273, "learning_rate": 0.00019857607580610296, "loss": 11.6787, "step": 7736 }, { "epoch": 0.16195679477518213, "grad_norm": 0.22228871285915375, "learning_rate": 0.00019857570709949619, "loss": 11.6822, "step": 7737 }, { "epoch": 0.16197772753914427, "grad_norm": 0.2164304107427597, "learning_rate": 0.00019857533834550205, "loss": 11.6766, "step": 7738 }, { "epoch": 0.16199866030310642, "grad_norm": 0.23181389272212982, "learning_rate": 0.00019857496954412082, "loss": 11.6838, "step": 7739 }, { "epoch": 0.16201959306706856, "grad_norm": 0.2694057822227478, "learning_rate": 0.00019857460069535262, "loss": 11.6786, "step": 7740 }, { "epoch": 0.16204052583103074, "grad_norm": 0.20663514733314514, "learning_rate": 0.00019857423179919762, "loss": 11.6825, "step": 7741 }, { "epoch": 0.16206145859499288, "grad_norm": 0.25988534092903137, "learning_rate": 0.00019857386285565602, "loss": 11.6903, "step": 7742 }, { "epoch": 0.16208239135895502, "grad_norm": 0.21286088228225708, "learning_rate": 0.00019857349386472796, "loss": 11.6748, "step": 7743 }, { "epoch": 0.1621033241229172, "grad_norm": 0.23295170068740845, "learning_rate": 0.00019857312482641368, "loss": 11.6734, "step": 7744 }, { "epoch": 0.16212425688687934, "grad_norm": 0.2733794152736664, "learning_rate": 0.00019857275574071333, "loss": 11.6701, "step": 7745 }, { "epoch": 0.1621451896508415, "grad_norm": 0.28542742133140564, "learning_rate": 0.00019857238660762704, "loss": 11.6827, "step": 7746 }, { "epoch": 0.16216612241480366, "grad_norm": 0.24085508286952972, "learning_rate": 0.00019857201742715506, "loss": 11.6701, "step": 7747 }, { "epoch": 0.1621870551787658, "grad_norm": 0.24005962908267975, "learning_rate": 0.0001985716481992975, "loss": 11.7007, "step": 7748 }, { "epoch": 0.16220798794272795, "grad_norm": 0.23979021608829498, "learning_rate": 0.00019857127892405462, "loss": 11.6914, "step": 7749 }, { "epoch": 0.16222892070669012, "grad_norm": 0.22419536113739014, "learning_rate": 0.00019857090960142653, "loss": 11.684, "step": 7750 }, { "epoch": 0.16224985347065227, "grad_norm": 0.2357073873281479, "learning_rate": 0.00019857054023141344, "loss": 11.6822, "step": 7751 }, { "epoch": 0.1622707862346144, "grad_norm": 0.22479383647441864, "learning_rate": 0.00019857017081401548, "loss": 11.6699, "step": 7752 }, { "epoch": 0.16229171899857658, "grad_norm": 0.31996122002601624, "learning_rate": 0.00019856980134923288, "loss": 11.6924, "step": 7753 }, { "epoch": 0.16231265176253873, "grad_norm": 0.3231244385242462, "learning_rate": 0.0001985694318370658, "loss": 11.6895, "step": 7754 }, { "epoch": 0.16233358452650087, "grad_norm": 0.21254649758338928, "learning_rate": 0.00019856906227751445, "loss": 11.6828, "step": 7755 }, { "epoch": 0.16235451729046305, "grad_norm": 0.2400953322649002, "learning_rate": 0.00019856869267057895, "loss": 11.6765, "step": 7756 }, { "epoch": 0.1623754500544252, "grad_norm": 0.24783243238925934, "learning_rate": 0.0001985683230162595, "loss": 11.6923, "step": 7757 }, { "epoch": 0.16239638281838734, "grad_norm": 0.29950085282325745, "learning_rate": 0.0001985679533145563, "loss": 11.6831, "step": 7758 }, { "epoch": 0.16241731558234948, "grad_norm": 0.27398666739463806, "learning_rate": 0.0001985675835654695, "loss": 11.6954, "step": 7759 }, { "epoch": 0.16243824834631165, "grad_norm": 0.26844239234924316, "learning_rate": 0.0001985672137689993, "loss": 11.6805, "step": 7760 }, { "epoch": 0.1624591811102738, "grad_norm": 0.25790372490882874, "learning_rate": 0.00019856684392514584, "loss": 11.6836, "step": 7761 }, { "epoch": 0.16248011387423594, "grad_norm": 0.22976736724376678, "learning_rate": 0.00019856647403390936, "loss": 11.6815, "step": 7762 }, { "epoch": 0.16250104663819812, "grad_norm": 0.22773215174674988, "learning_rate": 0.00019856610409528996, "loss": 11.6848, "step": 7763 }, { "epoch": 0.16252197940216026, "grad_norm": 0.2603535056114197, "learning_rate": 0.00019856573410928787, "loss": 11.6797, "step": 7764 }, { "epoch": 0.1625429121661224, "grad_norm": 0.22884629666805267, "learning_rate": 0.00019856536407590326, "loss": 11.6803, "step": 7765 }, { "epoch": 0.16256384493008458, "grad_norm": 0.28864631056785583, "learning_rate": 0.00019856499399513631, "loss": 11.6846, "step": 7766 }, { "epoch": 0.16258477769404672, "grad_norm": 0.2600381672382355, "learning_rate": 0.0001985646238669872, "loss": 11.6765, "step": 7767 }, { "epoch": 0.16260571045800887, "grad_norm": 0.2076851725578308, "learning_rate": 0.00019856425369145609, "loss": 11.6759, "step": 7768 }, { "epoch": 0.16262664322197104, "grad_norm": 0.27096351981163025, "learning_rate": 0.00019856388346854313, "loss": 11.6878, "step": 7769 }, { "epoch": 0.16264757598593318, "grad_norm": 0.25715571641921997, "learning_rate": 0.00019856351319824856, "loss": 11.6938, "step": 7770 }, { "epoch": 0.16266850874989533, "grad_norm": 0.23689693212509155, "learning_rate": 0.0001985631428805726, "loss": 11.683, "step": 7771 }, { "epoch": 0.1626894415138575, "grad_norm": 0.23718859255313873, "learning_rate": 0.0001985627725155153, "loss": 11.6771, "step": 7772 }, { "epoch": 0.16271037427781965, "grad_norm": 0.26043668389320374, "learning_rate": 0.00019856240210307688, "loss": 11.6703, "step": 7773 }, { "epoch": 0.1627313070417818, "grad_norm": 0.283453106880188, "learning_rate": 0.00019856203164325757, "loss": 11.6835, "step": 7774 }, { "epoch": 0.16275223980574394, "grad_norm": 0.3216468095779419, "learning_rate": 0.0001985616611360575, "loss": 11.6786, "step": 7775 }, { "epoch": 0.1627731725697061, "grad_norm": 0.2286406010389328, "learning_rate": 0.0001985612905814769, "loss": 11.6665, "step": 7776 }, { "epoch": 0.16279410533366825, "grad_norm": 0.21176914870738983, "learning_rate": 0.00019856091997951588, "loss": 11.6879, "step": 7777 }, { "epoch": 0.1628150380976304, "grad_norm": 0.22362376749515533, "learning_rate": 0.00019856054933017466, "loss": 11.6682, "step": 7778 }, { "epoch": 0.16283597086159257, "grad_norm": 0.2158404290676117, "learning_rate": 0.0001985601786334534, "loss": 11.668, "step": 7779 }, { "epoch": 0.16285690362555472, "grad_norm": 0.2890789210796356, "learning_rate": 0.0001985598078893523, "loss": 11.684, "step": 7780 }, { "epoch": 0.16287783638951686, "grad_norm": 0.2683216631412506, "learning_rate": 0.00019855943709787152, "loss": 11.675, "step": 7781 }, { "epoch": 0.16289876915347903, "grad_norm": 0.23183879256248474, "learning_rate": 0.00019855906625901125, "loss": 11.6665, "step": 7782 }, { "epoch": 0.16291970191744118, "grad_norm": 0.2548471987247467, "learning_rate": 0.00019855869537277164, "loss": 11.6953, "step": 7783 }, { "epoch": 0.16294063468140332, "grad_norm": 0.2658562958240509, "learning_rate": 0.0001985583244391529, "loss": 11.6921, "step": 7784 }, { "epoch": 0.1629615674453655, "grad_norm": 0.232463076710701, "learning_rate": 0.0001985579534581552, "loss": 11.6604, "step": 7785 }, { "epoch": 0.16298250020932764, "grad_norm": 0.27813833951950073, "learning_rate": 0.00019855758242977873, "loss": 11.6956, "step": 7786 }, { "epoch": 0.16300343297328979, "grad_norm": 0.30692058801651, "learning_rate": 0.00019855721135402364, "loss": 11.7004, "step": 7787 }, { "epoch": 0.16302436573725196, "grad_norm": 0.21828429400920868, "learning_rate": 0.00019855684023089011, "loss": 11.6738, "step": 7788 }, { "epoch": 0.1630452985012141, "grad_norm": 0.24901963770389557, "learning_rate": 0.00019855646906037836, "loss": 11.6868, "step": 7789 }, { "epoch": 0.16306623126517625, "grad_norm": 0.2504144608974457, "learning_rate": 0.00019855609784248852, "loss": 11.6917, "step": 7790 }, { "epoch": 0.16308716402913842, "grad_norm": 0.27156174182891846, "learning_rate": 0.0001985557265772208, "loss": 11.6721, "step": 7791 }, { "epoch": 0.16310809679310057, "grad_norm": 0.23842819035053253, "learning_rate": 0.00019855535526457538, "loss": 11.6758, "step": 7792 }, { "epoch": 0.1631290295570627, "grad_norm": 0.2584468722343445, "learning_rate": 0.0001985549839045524, "loss": 11.6706, "step": 7793 }, { "epoch": 0.16314996232102486, "grad_norm": 0.37624284625053406, "learning_rate": 0.00019855461249715207, "loss": 11.7056, "step": 7794 }, { "epoch": 0.16317089508498703, "grad_norm": 0.2716015577316284, "learning_rate": 0.0001985542410423746, "loss": 11.6729, "step": 7795 }, { "epoch": 0.16319182784894917, "grad_norm": 0.35650864243507385, "learning_rate": 0.0001985538695402201, "loss": 11.6923, "step": 7796 }, { "epoch": 0.16321276061291132, "grad_norm": 0.2216835767030716, "learning_rate": 0.00019855349799068877, "loss": 11.6863, "step": 7797 }, { "epoch": 0.1632336933768735, "grad_norm": 0.2566080689430237, "learning_rate": 0.0001985531263937808, "loss": 11.6591, "step": 7798 }, { "epoch": 0.16325462614083563, "grad_norm": 0.23043662309646606, "learning_rate": 0.00019855275474949636, "loss": 11.6793, "step": 7799 }, { "epoch": 0.16327555890479778, "grad_norm": 0.263489693403244, "learning_rate": 0.00019855238305783565, "loss": 11.6823, "step": 7800 }, { "epoch": 0.16329649166875995, "grad_norm": 0.19993670284748077, "learning_rate": 0.00019855201131879885, "loss": 11.6728, "step": 7801 }, { "epoch": 0.1633174244327221, "grad_norm": 0.20433349907398224, "learning_rate": 0.0001985516395323861, "loss": 11.6806, "step": 7802 }, { "epoch": 0.16333835719668424, "grad_norm": 0.19635461270809174, "learning_rate": 0.00019855126769859763, "loss": 11.6616, "step": 7803 }, { "epoch": 0.16335928996064641, "grad_norm": 0.25404801964759827, "learning_rate": 0.00019855089581743355, "loss": 11.6775, "step": 7804 }, { "epoch": 0.16338022272460856, "grad_norm": 0.28116559982299805, "learning_rate": 0.0001985505238888941, "loss": 11.6957, "step": 7805 }, { "epoch": 0.1634011554885707, "grad_norm": 0.20436009764671326, "learning_rate": 0.00019855015191297946, "loss": 11.6764, "step": 7806 }, { "epoch": 0.16342208825253288, "grad_norm": 0.22929109632968903, "learning_rate": 0.00019854977988968976, "loss": 11.6868, "step": 7807 }, { "epoch": 0.16344302101649502, "grad_norm": 0.2420804351568222, "learning_rate": 0.0001985494078190252, "loss": 11.6727, "step": 7808 }, { "epoch": 0.16346395378045717, "grad_norm": 0.2349519282579422, "learning_rate": 0.000198549035700986, "loss": 11.6719, "step": 7809 }, { "epoch": 0.16348488654441934, "grad_norm": 0.19500026106834412, "learning_rate": 0.00019854866353557227, "loss": 11.6908, "step": 7810 }, { "epoch": 0.16350581930838148, "grad_norm": 0.23489749431610107, "learning_rate": 0.0001985482913227842, "loss": 11.673, "step": 7811 }, { "epoch": 0.16352675207234363, "grad_norm": 0.18842104077339172, "learning_rate": 0.00019854791906262205, "loss": 11.6583, "step": 7812 }, { "epoch": 0.16354768483630577, "grad_norm": 0.24358955025672913, "learning_rate": 0.0001985475467550859, "loss": 11.6807, "step": 7813 }, { "epoch": 0.16356861760026795, "grad_norm": 0.2811222970485687, "learning_rate": 0.00019854717440017598, "loss": 11.6878, "step": 7814 }, { "epoch": 0.1635895503642301, "grad_norm": 0.25520676374435425, "learning_rate": 0.0001985468019978925, "loss": 11.6662, "step": 7815 }, { "epoch": 0.16361048312819224, "grad_norm": 0.23551051318645477, "learning_rate": 0.00019854642954823554, "loss": 11.6848, "step": 7816 }, { "epoch": 0.1636314158921544, "grad_norm": 0.22506438195705414, "learning_rate": 0.00019854605705120536, "loss": 11.6613, "step": 7817 }, { "epoch": 0.16365234865611655, "grad_norm": 0.2720470130443573, "learning_rate": 0.0001985456845068021, "loss": 11.691, "step": 7818 }, { "epoch": 0.1636732814200787, "grad_norm": 0.32059866189956665, "learning_rate": 0.00019854531191502596, "loss": 11.6861, "step": 7819 }, { "epoch": 0.16369421418404087, "grad_norm": 0.24667954444885254, "learning_rate": 0.00019854493927587712, "loss": 11.6769, "step": 7820 }, { "epoch": 0.16371514694800302, "grad_norm": 0.310274600982666, "learning_rate": 0.00019854456658935577, "loss": 11.6816, "step": 7821 }, { "epoch": 0.16373607971196516, "grad_norm": 0.20794376730918884, "learning_rate": 0.00019854419385546203, "loss": 11.6683, "step": 7822 }, { "epoch": 0.16375701247592733, "grad_norm": 0.2315448820590973, "learning_rate": 0.00019854382107419615, "loss": 11.6882, "step": 7823 }, { "epoch": 0.16377794523988948, "grad_norm": 0.2689085006713867, "learning_rate": 0.0001985434482455583, "loss": 11.6796, "step": 7824 }, { "epoch": 0.16379887800385162, "grad_norm": 0.2427065670490265, "learning_rate": 0.0001985430753695486, "loss": 11.6932, "step": 7825 }, { "epoch": 0.1638198107678138, "grad_norm": 0.25700634717941284, "learning_rate": 0.00019854270244616727, "loss": 11.681, "step": 7826 }, { "epoch": 0.16384074353177594, "grad_norm": 0.22537487745285034, "learning_rate": 0.00019854232947541452, "loss": 11.6802, "step": 7827 }, { "epoch": 0.16386167629573808, "grad_norm": 0.21278592944145203, "learning_rate": 0.00019854195645729048, "loss": 11.6797, "step": 7828 }, { "epoch": 0.16388260905970023, "grad_norm": 0.22066475450992584, "learning_rate": 0.00019854158339179534, "loss": 11.6714, "step": 7829 }, { "epoch": 0.1639035418236624, "grad_norm": 0.2250571995973587, "learning_rate": 0.0001985412102789293, "loss": 11.6859, "step": 7830 }, { "epoch": 0.16392447458762455, "grad_norm": 0.23307284712791443, "learning_rate": 0.0001985408371186925, "loss": 11.6693, "step": 7831 }, { "epoch": 0.1639454073515867, "grad_norm": 0.2725023925304413, "learning_rate": 0.00019854046391108517, "loss": 11.6814, "step": 7832 }, { "epoch": 0.16396634011554886, "grad_norm": 0.22481225430965424, "learning_rate": 0.00019854009065610746, "loss": 11.6756, "step": 7833 }, { "epoch": 0.163987272879511, "grad_norm": 0.21483851969242096, "learning_rate": 0.00019853971735375953, "loss": 11.6818, "step": 7834 }, { "epoch": 0.16400820564347315, "grad_norm": 0.2397603839635849, "learning_rate": 0.0001985393440040416, "loss": 11.6735, "step": 7835 }, { "epoch": 0.16402913840743533, "grad_norm": 0.2154616117477417, "learning_rate": 0.00019853897060695386, "loss": 11.6774, "step": 7836 }, { "epoch": 0.16405007117139747, "grad_norm": 0.22170306742191315, "learning_rate": 0.00019853859716249643, "loss": 11.6722, "step": 7837 }, { "epoch": 0.16407100393535962, "grad_norm": 0.29089370369911194, "learning_rate": 0.00019853822367066953, "loss": 11.6831, "step": 7838 }, { "epoch": 0.1640919366993218, "grad_norm": 0.2934330105781555, "learning_rate": 0.00019853785013147332, "loss": 11.6644, "step": 7839 }, { "epoch": 0.16411286946328393, "grad_norm": 0.22871296107769012, "learning_rate": 0.00019853747654490799, "loss": 11.6744, "step": 7840 }, { "epoch": 0.16413380222724608, "grad_norm": 0.24625998735427856, "learning_rate": 0.00019853710291097376, "loss": 11.6719, "step": 7841 }, { "epoch": 0.16415473499120825, "grad_norm": 0.20898926258087158, "learning_rate": 0.00019853672922967076, "loss": 11.6779, "step": 7842 }, { "epoch": 0.1641756677551704, "grad_norm": 0.21064627170562744, "learning_rate": 0.00019853635550099913, "loss": 11.6775, "step": 7843 }, { "epoch": 0.16419660051913254, "grad_norm": 0.2527167499065399, "learning_rate": 0.00019853598172495912, "loss": 11.6905, "step": 7844 }, { "epoch": 0.1642175332830947, "grad_norm": 0.2632859945297241, "learning_rate": 0.0001985356079015509, "loss": 11.6743, "step": 7845 }, { "epoch": 0.16423846604705686, "grad_norm": 0.25948289036750793, "learning_rate": 0.00019853523403077464, "loss": 11.6841, "step": 7846 }, { "epoch": 0.164259398811019, "grad_norm": 0.22171908617019653, "learning_rate": 0.00019853486011263053, "loss": 11.6945, "step": 7847 }, { "epoch": 0.16428033157498115, "grad_norm": 0.2519201636314392, "learning_rate": 0.00019853448614711873, "loss": 11.6821, "step": 7848 }, { "epoch": 0.16430126433894332, "grad_norm": 0.3243428170681, "learning_rate": 0.00019853411213423941, "loss": 11.6805, "step": 7849 }, { "epoch": 0.16432219710290547, "grad_norm": 0.22160887718200684, "learning_rate": 0.00019853373807399277, "loss": 11.6675, "step": 7850 }, { "epoch": 0.1643431298668676, "grad_norm": 0.2054004818201065, "learning_rate": 0.00019853336396637904, "loss": 11.6854, "step": 7851 }, { "epoch": 0.16436406263082978, "grad_norm": 0.24516527354717255, "learning_rate": 0.00019853298981139828, "loss": 11.6738, "step": 7852 }, { "epoch": 0.16438499539479193, "grad_norm": 0.21262849867343903, "learning_rate": 0.00019853261560905073, "loss": 11.6612, "step": 7853 }, { "epoch": 0.16440592815875407, "grad_norm": 0.3339211344718933, "learning_rate": 0.00019853224135933664, "loss": 11.6678, "step": 7854 }, { "epoch": 0.16442686092271624, "grad_norm": 0.21929825842380524, "learning_rate": 0.00019853186706225611, "loss": 11.6746, "step": 7855 }, { "epoch": 0.1644477936866784, "grad_norm": 0.22496309876441956, "learning_rate": 0.00019853149271780932, "loss": 11.6924, "step": 7856 }, { "epoch": 0.16446872645064053, "grad_norm": 0.3150453269481659, "learning_rate": 0.00019853111832599644, "loss": 11.6992, "step": 7857 }, { "epoch": 0.1644896592146027, "grad_norm": 0.35103458166122437, "learning_rate": 0.00019853074388681772, "loss": 11.6881, "step": 7858 }, { "epoch": 0.16451059197856485, "grad_norm": 0.22753272950649261, "learning_rate": 0.00019853036940027327, "loss": 11.6707, "step": 7859 }, { "epoch": 0.164531524742527, "grad_norm": 0.25433847308158875, "learning_rate": 0.00019852999486636333, "loss": 11.6962, "step": 7860 }, { "epoch": 0.16455245750648917, "grad_norm": 0.3534499406814575, "learning_rate": 0.00019852962028508802, "loss": 11.6932, "step": 7861 }, { "epoch": 0.16457339027045131, "grad_norm": 0.23986847698688507, "learning_rate": 0.00019852924565644755, "loss": 11.6937, "step": 7862 }, { "epoch": 0.16459432303441346, "grad_norm": 0.1893606185913086, "learning_rate": 0.0001985288709804421, "loss": 11.6779, "step": 7863 }, { "epoch": 0.1646152557983756, "grad_norm": 0.23591352999210358, "learning_rate": 0.00019852849625707185, "loss": 11.6787, "step": 7864 }, { "epoch": 0.16463618856233778, "grad_norm": 0.23304003477096558, "learning_rate": 0.000198528121486337, "loss": 11.6736, "step": 7865 }, { "epoch": 0.16465712132629992, "grad_norm": 0.2611520290374756, "learning_rate": 0.00019852774666823767, "loss": 11.6871, "step": 7866 }, { "epoch": 0.16467805409026207, "grad_norm": 0.26024359464645386, "learning_rate": 0.00019852737180277408, "loss": 11.6889, "step": 7867 }, { "epoch": 0.16469898685422424, "grad_norm": 0.3170686662197113, "learning_rate": 0.00019852699688994644, "loss": 11.6903, "step": 7868 }, { "epoch": 0.16471991961818638, "grad_norm": 0.21188269555568695, "learning_rate": 0.00019852662192975485, "loss": 11.6839, "step": 7869 }, { "epoch": 0.16474085238214853, "grad_norm": 0.2802099883556366, "learning_rate": 0.00019852624692219957, "loss": 11.6901, "step": 7870 }, { "epoch": 0.1647617851461107, "grad_norm": 0.23180459439754486, "learning_rate": 0.00019852587186728075, "loss": 11.669, "step": 7871 }, { "epoch": 0.16478271791007285, "grad_norm": 0.24796979129314423, "learning_rate": 0.00019852549676499856, "loss": 11.6796, "step": 7872 }, { "epoch": 0.164803650674035, "grad_norm": 0.24018996953964233, "learning_rate": 0.0001985251216153532, "loss": 11.6714, "step": 7873 }, { "epoch": 0.16482458343799716, "grad_norm": 0.2648817002773285, "learning_rate": 0.00019852474641834483, "loss": 11.6923, "step": 7874 }, { "epoch": 0.1648455162019593, "grad_norm": 0.2791827619075775, "learning_rate": 0.00019852437117397364, "loss": 11.6845, "step": 7875 }, { "epoch": 0.16486644896592145, "grad_norm": 0.314404159784317, "learning_rate": 0.0001985239958822398, "loss": 11.6685, "step": 7876 }, { "epoch": 0.16488738172988363, "grad_norm": 0.2301216870546341, "learning_rate": 0.00019852362054314352, "loss": 11.6733, "step": 7877 }, { "epoch": 0.16490831449384577, "grad_norm": 0.2734161615371704, "learning_rate": 0.00019852324515668497, "loss": 11.6819, "step": 7878 }, { "epoch": 0.16492924725780791, "grad_norm": 0.22121769189834595, "learning_rate": 0.00019852286972286428, "loss": 11.6845, "step": 7879 }, { "epoch": 0.1649501800217701, "grad_norm": 0.23327544331550598, "learning_rate": 0.0001985224942416817, "loss": 11.686, "step": 7880 }, { "epoch": 0.16497111278573223, "grad_norm": 0.2182648777961731, "learning_rate": 0.00019852211871313738, "loss": 11.6835, "step": 7881 }, { "epoch": 0.16499204554969438, "grad_norm": 0.2746214270591736, "learning_rate": 0.0001985217431372315, "loss": 11.6858, "step": 7882 }, { "epoch": 0.16501297831365652, "grad_norm": 0.2965080440044403, "learning_rate": 0.00019852136751396425, "loss": 11.686, "step": 7883 }, { "epoch": 0.1650339110776187, "grad_norm": 0.22049164772033691, "learning_rate": 0.0001985209918433358, "loss": 11.6863, "step": 7884 }, { "epoch": 0.16505484384158084, "grad_norm": 0.2490425705909729, "learning_rate": 0.00019852061612534634, "loss": 11.6969, "step": 7885 }, { "epoch": 0.16507577660554298, "grad_norm": 0.23121541738510132, "learning_rate": 0.00019852024035999603, "loss": 11.6744, "step": 7886 }, { "epoch": 0.16509670936950516, "grad_norm": 0.23745693266391754, "learning_rate": 0.00019851986454728508, "loss": 11.6648, "step": 7887 }, { "epoch": 0.1651176421334673, "grad_norm": 0.2363056242465973, "learning_rate": 0.00019851948868721366, "loss": 11.6707, "step": 7888 }, { "epoch": 0.16513857489742945, "grad_norm": 0.28605136275291443, "learning_rate": 0.00019851911277978193, "loss": 11.6758, "step": 7889 }, { "epoch": 0.16515950766139162, "grad_norm": 1.2601139545440674, "learning_rate": 0.0001985187368249901, "loss": 11.7333, "step": 7890 }, { "epoch": 0.16518044042535376, "grad_norm": 0.2131146788597107, "learning_rate": 0.00019851836082283834, "loss": 11.6784, "step": 7891 }, { "epoch": 0.1652013731893159, "grad_norm": 0.2503240704536438, "learning_rate": 0.00019851798477332685, "loss": 11.6821, "step": 7892 }, { "epoch": 0.16522230595327808, "grad_norm": 0.23476983606815338, "learning_rate": 0.00019851760867645575, "loss": 11.6723, "step": 7893 }, { "epoch": 0.16524323871724023, "grad_norm": 0.30652034282684326, "learning_rate": 0.00019851723253222527, "loss": 11.6963, "step": 7894 }, { "epoch": 0.16526417148120237, "grad_norm": 0.2393568754196167, "learning_rate": 0.00019851685634063562, "loss": 11.6839, "step": 7895 }, { "epoch": 0.16528510424516454, "grad_norm": 0.2073076367378235, "learning_rate": 0.0001985164801016869, "loss": 11.6695, "step": 7896 }, { "epoch": 0.1653060370091267, "grad_norm": 0.2711433172225952, "learning_rate": 0.00019851610381537934, "loss": 11.6642, "step": 7897 }, { "epoch": 0.16532696977308883, "grad_norm": 0.331775039434433, "learning_rate": 0.00019851572748171315, "loss": 11.6747, "step": 7898 }, { "epoch": 0.165347902537051, "grad_norm": 0.25864288210868835, "learning_rate": 0.00019851535110068845, "loss": 11.696, "step": 7899 }, { "epoch": 0.16536883530101315, "grad_norm": 0.24013152718544006, "learning_rate": 0.00019851497467230543, "loss": 11.6769, "step": 7900 }, { "epoch": 0.1653897680649753, "grad_norm": 0.2059394121170044, "learning_rate": 0.00019851459819656428, "loss": 11.671, "step": 7901 }, { "epoch": 0.16541070082893744, "grad_norm": 0.3239263892173767, "learning_rate": 0.00019851422167346523, "loss": 11.6714, "step": 7902 }, { "epoch": 0.1654316335928996, "grad_norm": 0.3420448899269104, "learning_rate": 0.00019851384510300842, "loss": 11.6813, "step": 7903 }, { "epoch": 0.16545256635686176, "grad_norm": 0.3150527775287628, "learning_rate": 0.00019851346848519399, "loss": 11.6818, "step": 7904 }, { "epoch": 0.1654734991208239, "grad_norm": 0.2806271016597748, "learning_rate": 0.00019851309182002217, "loss": 11.6906, "step": 7905 }, { "epoch": 0.16549443188478608, "grad_norm": 0.23205460608005524, "learning_rate": 0.00019851271510749316, "loss": 11.6679, "step": 7906 }, { "epoch": 0.16551536464874822, "grad_norm": 0.21696442365646362, "learning_rate": 0.00019851233834760707, "loss": 11.6835, "step": 7907 }, { "epoch": 0.16553629741271036, "grad_norm": 0.24754329025745392, "learning_rate": 0.0001985119615403642, "loss": 11.6878, "step": 7908 }, { "epoch": 0.16555723017667254, "grad_norm": 0.227929025888443, "learning_rate": 0.00019851158468576458, "loss": 11.6696, "step": 7909 }, { "epoch": 0.16557816294063468, "grad_norm": 0.2675556540489197, "learning_rate": 0.0001985112077838085, "loss": 11.6761, "step": 7910 }, { "epoch": 0.16559909570459683, "grad_norm": 0.2438141405582428, "learning_rate": 0.0001985108308344961, "loss": 11.6732, "step": 7911 }, { "epoch": 0.165620028468559, "grad_norm": 0.23334865272045135, "learning_rate": 0.00019851045383782756, "loss": 11.6794, "step": 7912 }, { "epoch": 0.16564096123252114, "grad_norm": 0.3178694248199463, "learning_rate": 0.0001985100767938031, "loss": 11.6989, "step": 7913 }, { "epoch": 0.1656618939964833, "grad_norm": 0.26243290305137634, "learning_rate": 0.00019850969970242285, "loss": 11.667, "step": 7914 }, { "epoch": 0.16568282676044546, "grad_norm": 0.22988097369670868, "learning_rate": 0.00019850932256368703, "loss": 11.679, "step": 7915 }, { "epoch": 0.1657037595244076, "grad_norm": 0.3101765513420105, "learning_rate": 0.00019850894537759578, "loss": 11.69, "step": 7916 }, { "epoch": 0.16572469228836975, "grad_norm": 0.22547127306461334, "learning_rate": 0.00019850856814414934, "loss": 11.688, "step": 7917 }, { "epoch": 0.1657456250523319, "grad_norm": 0.28087711334228516, "learning_rate": 0.00019850819086334782, "loss": 11.6785, "step": 7918 }, { "epoch": 0.16576655781629407, "grad_norm": 0.23165088891983032, "learning_rate": 0.00019850781353519144, "loss": 11.679, "step": 7919 }, { "epoch": 0.1657874905802562, "grad_norm": 0.24622443318367004, "learning_rate": 0.00019850743615968042, "loss": 11.6666, "step": 7920 }, { "epoch": 0.16580842334421836, "grad_norm": 0.24060870707035065, "learning_rate": 0.00019850705873681487, "loss": 11.679, "step": 7921 }, { "epoch": 0.16582935610818053, "grad_norm": 0.30004650354385376, "learning_rate": 0.000198506681266595, "loss": 11.6652, "step": 7922 }, { "epoch": 0.16585028887214268, "grad_norm": 0.36571457982063293, "learning_rate": 0.00019850630374902098, "loss": 11.6849, "step": 7923 }, { "epoch": 0.16587122163610482, "grad_norm": 0.276553213596344, "learning_rate": 0.00019850592618409304, "loss": 11.6782, "step": 7924 }, { "epoch": 0.165892154400067, "grad_norm": 0.2813962399959564, "learning_rate": 0.00019850554857181135, "loss": 11.6829, "step": 7925 }, { "epoch": 0.16591308716402914, "grad_norm": 0.23515062034130096, "learning_rate": 0.000198505170912176, "loss": 11.675, "step": 7926 }, { "epoch": 0.16593401992799128, "grad_norm": 0.26738932728767395, "learning_rate": 0.00019850479320518728, "loss": 11.6849, "step": 7927 }, { "epoch": 0.16595495269195346, "grad_norm": 0.279731422662735, "learning_rate": 0.00019850441545084534, "loss": 11.6657, "step": 7928 }, { "epoch": 0.1659758854559156, "grad_norm": 0.20129302144050598, "learning_rate": 0.00019850403764915035, "loss": 11.6624, "step": 7929 }, { "epoch": 0.16599681821987775, "grad_norm": 0.30035409331321716, "learning_rate": 0.0001985036598001025, "loss": 11.7009, "step": 7930 }, { "epoch": 0.16601775098383992, "grad_norm": 0.2750433683395386, "learning_rate": 0.00019850328190370193, "loss": 11.6825, "step": 7931 }, { "epoch": 0.16603868374780206, "grad_norm": 0.19869782030582428, "learning_rate": 0.0001985029039599489, "loss": 11.6613, "step": 7932 }, { "epoch": 0.1660596165117642, "grad_norm": 0.23323141038417816, "learning_rate": 0.00019850252596884352, "loss": 11.6698, "step": 7933 }, { "epoch": 0.16608054927572638, "grad_norm": 0.24729324877262115, "learning_rate": 0.000198502147930386, "loss": 11.6678, "step": 7934 }, { "epoch": 0.16610148203968852, "grad_norm": 0.3019672632217407, "learning_rate": 0.00019850176984457653, "loss": 11.6706, "step": 7935 }, { "epoch": 0.16612241480365067, "grad_norm": 0.2773093283176422, "learning_rate": 0.0001985013917114153, "loss": 11.6674, "step": 7936 }, { "epoch": 0.16614334756761281, "grad_norm": 0.23222586512565613, "learning_rate": 0.00019850101353090246, "loss": 11.6731, "step": 7937 }, { "epoch": 0.166164280331575, "grad_norm": 0.2581925094127655, "learning_rate": 0.00019850063530303823, "loss": 11.68, "step": 7938 }, { "epoch": 0.16618521309553713, "grad_norm": 0.2288619428873062, "learning_rate": 0.00019850025702782275, "loss": 11.6813, "step": 7939 }, { "epoch": 0.16620614585949928, "grad_norm": 0.2584340572357178, "learning_rate": 0.00019849987870525625, "loss": 11.6944, "step": 7940 }, { "epoch": 0.16622707862346145, "grad_norm": 0.20932385325431824, "learning_rate": 0.00019849950033533885, "loss": 11.669, "step": 7941 }, { "epoch": 0.1662480113874236, "grad_norm": 0.22937259078025818, "learning_rate": 0.00019849912191807078, "loss": 11.6725, "step": 7942 }, { "epoch": 0.16626894415138574, "grad_norm": 0.2683808505535126, "learning_rate": 0.0001984987434534522, "loss": 11.6867, "step": 7943 }, { "epoch": 0.1662898769153479, "grad_norm": 0.24549219012260437, "learning_rate": 0.00019849836494148333, "loss": 11.6787, "step": 7944 }, { "epoch": 0.16631080967931006, "grad_norm": 0.32365673780441284, "learning_rate": 0.00019849798638216427, "loss": 11.6672, "step": 7945 }, { "epoch": 0.1663317424432722, "grad_norm": 0.23416289687156677, "learning_rate": 0.00019849760777549528, "loss": 11.665, "step": 7946 }, { "epoch": 0.16635267520723437, "grad_norm": 0.2847153842449188, "learning_rate": 0.00019849722912147652, "loss": 11.6801, "step": 7947 }, { "epoch": 0.16637360797119652, "grad_norm": 0.2149413675069809, "learning_rate": 0.00019849685042010816, "loss": 11.6828, "step": 7948 }, { "epoch": 0.16639454073515866, "grad_norm": 0.2303621470928192, "learning_rate": 0.00019849647167139044, "loss": 11.6688, "step": 7949 }, { "epoch": 0.16641547349912084, "grad_norm": 0.2539833188056946, "learning_rate": 0.00019849609287532344, "loss": 11.6769, "step": 7950 }, { "epoch": 0.16643640626308298, "grad_norm": 0.3169330060482025, "learning_rate": 0.0001984957140319074, "loss": 11.6881, "step": 7951 }, { "epoch": 0.16645733902704513, "grad_norm": 0.27741438150405884, "learning_rate": 0.0001984953351411425, "loss": 11.6707, "step": 7952 }, { "epoch": 0.1664782717910073, "grad_norm": 0.29029136896133423, "learning_rate": 0.00019849495620302893, "loss": 11.693, "step": 7953 }, { "epoch": 0.16649920455496944, "grad_norm": 0.23268622159957886, "learning_rate": 0.00019849457721756686, "loss": 11.6604, "step": 7954 }, { "epoch": 0.1665201373189316, "grad_norm": 0.22094093263149261, "learning_rate": 0.00019849419818475646, "loss": 11.6874, "step": 7955 }, { "epoch": 0.16654107008289373, "grad_norm": 0.23684920370578766, "learning_rate": 0.00019849381910459792, "loss": 11.6988, "step": 7956 }, { "epoch": 0.1665620028468559, "grad_norm": 0.1970190852880478, "learning_rate": 0.00019849343997709143, "loss": 11.6795, "step": 7957 }, { "epoch": 0.16658293561081805, "grad_norm": 0.3426138162612915, "learning_rate": 0.00019849306080223719, "loss": 11.6836, "step": 7958 }, { "epoch": 0.1666038683747802, "grad_norm": 0.2213808000087738, "learning_rate": 0.00019849268158003534, "loss": 11.6801, "step": 7959 }, { "epoch": 0.16662480113874237, "grad_norm": 0.2540576756000519, "learning_rate": 0.0001984923023104861, "loss": 11.7012, "step": 7960 }, { "epoch": 0.1666457339027045, "grad_norm": 0.25184595584869385, "learning_rate": 0.00019849192299358962, "loss": 11.6782, "step": 7961 }, { "epoch": 0.16666666666666666, "grad_norm": 0.25357162952423096, "learning_rate": 0.0001984915436293461, "loss": 11.6903, "step": 7962 }, { "epoch": 0.16668759943062883, "grad_norm": 0.2902141213417053, "learning_rate": 0.00019849116421775573, "loss": 11.6656, "step": 7963 }, { "epoch": 0.16670853219459097, "grad_norm": 0.2335306853055954, "learning_rate": 0.00019849078475881867, "loss": 11.6957, "step": 7964 }, { "epoch": 0.16672946495855312, "grad_norm": 0.1889534890651703, "learning_rate": 0.00019849040525253515, "loss": 11.6674, "step": 7965 }, { "epoch": 0.1667503977225153, "grad_norm": 0.21576544642448425, "learning_rate": 0.0001984900256989053, "loss": 11.6754, "step": 7966 }, { "epoch": 0.16677133048647744, "grad_norm": 0.20246030390262604, "learning_rate": 0.0001984896460979293, "loss": 11.6791, "step": 7967 }, { "epoch": 0.16679226325043958, "grad_norm": 0.2581302225589752, "learning_rate": 0.00019848926644960738, "loss": 11.681, "step": 7968 }, { "epoch": 0.16681319601440175, "grad_norm": 0.23064661026000977, "learning_rate": 0.00019848888675393967, "loss": 11.6799, "step": 7969 }, { "epoch": 0.1668341287783639, "grad_norm": 0.2633962631225586, "learning_rate": 0.0001984885070109264, "loss": 11.6846, "step": 7970 }, { "epoch": 0.16685506154232604, "grad_norm": 0.2958529591560364, "learning_rate": 0.00019848812722056774, "loss": 11.6853, "step": 7971 }, { "epoch": 0.1668759943062882, "grad_norm": 0.230996772646904, "learning_rate": 0.00019848774738286383, "loss": 11.6976, "step": 7972 }, { "epoch": 0.16689692707025036, "grad_norm": 0.24075660109519958, "learning_rate": 0.0001984873674978149, "loss": 11.6689, "step": 7973 }, { "epoch": 0.1669178598342125, "grad_norm": 0.24417251348495483, "learning_rate": 0.00019848698756542115, "loss": 11.6848, "step": 7974 }, { "epoch": 0.16693879259817465, "grad_norm": 0.24065576493740082, "learning_rate": 0.0001984866075856827, "loss": 11.6766, "step": 7975 }, { "epoch": 0.16695972536213682, "grad_norm": 0.28957584500312805, "learning_rate": 0.00019848622755859978, "loss": 11.6884, "step": 7976 }, { "epoch": 0.16698065812609897, "grad_norm": 0.25339505076408386, "learning_rate": 0.00019848584748417253, "loss": 11.6844, "step": 7977 }, { "epoch": 0.1670015908900611, "grad_norm": 0.2615736126899719, "learning_rate": 0.00019848546736240117, "loss": 11.669, "step": 7978 }, { "epoch": 0.16702252365402329, "grad_norm": 0.22386059165000916, "learning_rate": 0.00019848508719328588, "loss": 11.681, "step": 7979 }, { "epoch": 0.16704345641798543, "grad_norm": 0.22098642587661743, "learning_rate": 0.00019848470697682685, "loss": 11.6829, "step": 7980 }, { "epoch": 0.16706438918194758, "grad_norm": 0.251324325799942, "learning_rate": 0.00019848432671302422, "loss": 11.6735, "step": 7981 }, { "epoch": 0.16708532194590975, "grad_norm": 0.2291029840707779, "learning_rate": 0.00019848394640187823, "loss": 11.6763, "step": 7982 }, { "epoch": 0.1671062547098719, "grad_norm": 0.19151616096496582, "learning_rate": 0.00019848356604338898, "loss": 11.6723, "step": 7983 }, { "epoch": 0.16712718747383404, "grad_norm": 0.3023681342601776, "learning_rate": 0.00019848318563755676, "loss": 11.692, "step": 7984 }, { "epoch": 0.1671481202377962, "grad_norm": 0.2364322394132614, "learning_rate": 0.00019848280518438167, "loss": 11.6686, "step": 7985 }, { "epoch": 0.16716905300175836, "grad_norm": 0.24049454927444458, "learning_rate": 0.00019848242468386394, "loss": 11.6894, "step": 7986 }, { "epoch": 0.1671899857657205, "grad_norm": 0.2449871152639389, "learning_rate": 0.0001984820441360037, "loss": 11.6751, "step": 7987 }, { "epoch": 0.16721091852968267, "grad_norm": 0.2265491932630539, "learning_rate": 0.0001984816635408012, "loss": 11.6713, "step": 7988 }, { "epoch": 0.16723185129364482, "grad_norm": 0.2508186399936676, "learning_rate": 0.0001984812828982566, "loss": 11.6936, "step": 7989 }, { "epoch": 0.16725278405760696, "grad_norm": 0.17559845745563507, "learning_rate": 0.00019848090220837006, "loss": 11.6884, "step": 7990 }, { "epoch": 0.1672737168215691, "grad_norm": 0.32650497555732727, "learning_rate": 0.00019848052147114178, "loss": 11.6852, "step": 7991 }, { "epoch": 0.16729464958553128, "grad_norm": 0.27228379249572754, "learning_rate": 0.0001984801406865719, "loss": 11.6873, "step": 7992 }, { "epoch": 0.16731558234949342, "grad_norm": 0.34674355387687683, "learning_rate": 0.0001984797598546607, "loss": 11.6772, "step": 7993 }, { "epoch": 0.16733651511345557, "grad_norm": 0.2364831417798996, "learning_rate": 0.0001984793789754083, "loss": 11.6703, "step": 7994 }, { "epoch": 0.16735744787741774, "grad_norm": 0.23956169188022614, "learning_rate": 0.00019847899804881488, "loss": 11.6812, "step": 7995 }, { "epoch": 0.1673783806413799, "grad_norm": 0.25188207626342773, "learning_rate": 0.0001984786170748806, "loss": 11.6801, "step": 7996 }, { "epoch": 0.16739931340534203, "grad_norm": 0.21598193049430847, "learning_rate": 0.0001984782360536057, "loss": 11.6811, "step": 7997 }, { "epoch": 0.1674202461693042, "grad_norm": 0.2168140560388565, "learning_rate": 0.00019847785498499035, "loss": 11.6722, "step": 7998 }, { "epoch": 0.16744117893326635, "grad_norm": 0.22184914350509644, "learning_rate": 0.00019847747386903471, "loss": 11.6764, "step": 7999 }, { "epoch": 0.1674621116972285, "grad_norm": 0.26170504093170166, "learning_rate": 0.00019847709270573896, "loss": 11.6835, "step": 8000 }, { "epoch": 0.1674621116972285, "eval_loss": 11.680062294006348, "eval_runtime": 34.347, "eval_samples_per_second": 27.979, "eval_steps_per_second": 7.017, "step": 8000 }, { "epoch": 0.16748304446119067, "grad_norm": 0.2512272298336029, "learning_rate": 0.00019847671149510331, "loss": 11.6759, "step": 8001 }, { "epoch": 0.1675039772251528, "grad_norm": 0.22668692469596863, "learning_rate": 0.00019847633023712796, "loss": 11.692, "step": 8002 }, { "epoch": 0.16752490998911496, "grad_norm": 0.22941617667675018, "learning_rate": 0.00019847594893181302, "loss": 11.673, "step": 8003 }, { "epoch": 0.16754584275307713, "grad_norm": 0.23663005232810974, "learning_rate": 0.00019847556757915876, "loss": 11.6875, "step": 8004 }, { "epoch": 0.16756677551703927, "grad_norm": 0.25520429015159607, "learning_rate": 0.0001984751861791653, "loss": 11.6717, "step": 8005 }, { "epoch": 0.16758770828100142, "grad_norm": 0.2153823971748352, "learning_rate": 0.00019847480473183282, "loss": 11.6697, "step": 8006 }, { "epoch": 0.16760864104496356, "grad_norm": 0.30818504095077515, "learning_rate": 0.00019847442323716154, "loss": 11.6678, "step": 8007 }, { "epoch": 0.16762957380892574, "grad_norm": 0.20735444128513336, "learning_rate": 0.00019847404169515164, "loss": 11.6769, "step": 8008 }, { "epoch": 0.16765050657288788, "grad_norm": 0.22940434515476227, "learning_rate": 0.0001984736601058033, "loss": 11.6761, "step": 8009 }, { "epoch": 0.16767143933685003, "grad_norm": 0.2288072109222412, "learning_rate": 0.0001984732784691167, "loss": 11.6852, "step": 8010 }, { "epoch": 0.1676923721008122, "grad_norm": 0.4366629719734192, "learning_rate": 0.00019847289678509203, "loss": 11.6689, "step": 8011 }, { "epoch": 0.16771330486477434, "grad_norm": 0.2095743715763092, "learning_rate": 0.00019847251505372942, "loss": 11.6693, "step": 8012 }, { "epoch": 0.1677342376287365, "grad_norm": 0.21455462276935577, "learning_rate": 0.00019847213327502914, "loss": 11.6782, "step": 8013 }, { "epoch": 0.16775517039269866, "grad_norm": 0.23935531079769135, "learning_rate": 0.00019847175144899133, "loss": 11.6662, "step": 8014 }, { "epoch": 0.1677761031566608, "grad_norm": 0.27557429671287537, "learning_rate": 0.00019847136957561618, "loss": 11.6936, "step": 8015 }, { "epoch": 0.16779703592062295, "grad_norm": 0.26295194029808044, "learning_rate": 0.00019847098765490384, "loss": 11.6917, "step": 8016 }, { "epoch": 0.16781796868458512, "grad_norm": 0.23708346486091614, "learning_rate": 0.00019847060568685456, "loss": 11.674, "step": 8017 }, { "epoch": 0.16783890144854727, "grad_norm": 0.200143963098526, "learning_rate": 0.00019847022367146843, "loss": 11.6796, "step": 8018 }, { "epoch": 0.1678598342125094, "grad_norm": 0.21489815413951874, "learning_rate": 0.00019846984160874572, "loss": 11.6691, "step": 8019 }, { "epoch": 0.16788076697647158, "grad_norm": 0.321338951587677, "learning_rate": 0.00019846945949868663, "loss": 11.6725, "step": 8020 }, { "epoch": 0.16790169974043373, "grad_norm": 0.3341748118400574, "learning_rate": 0.00019846907734129125, "loss": 11.6898, "step": 8021 }, { "epoch": 0.16792263250439587, "grad_norm": 0.19911856949329376, "learning_rate": 0.00019846869513655982, "loss": 11.6708, "step": 8022 }, { "epoch": 0.16794356526835805, "grad_norm": 0.19857525825500488, "learning_rate": 0.0001984683128844925, "loss": 11.6804, "step": 8023 }, { "epoch": 0.1679644980323202, "grad_norm": 0.24112239480018616, "learning_rate": 0.00019846793058508953, "loss": 11.6753, "step": 8024 }, { "epoch": 0.16798543079628234, "grad_norm": 0.24160486459732056, "learning_rate": 0.00019846754823835098, "loss": 11.6825, "step": 8025 }, { "epoch": 0.16800636356024448, "grad_norm": 0.22921282052993774, "learning_rate": 0.00019846716584427716, "loss": 11.6895, "step": 8026 }, { "epoch": 0.16802729632420665, "grad_norm": 0.23436333239078522, "learning_rate": 0.00019846678340286822, "loss": 11.6847, "step": 8027 }, { "epoch": 0.1680482290881688, "grad_norm": 0.32083773612976074, "learning_rate": 0.00019846640091412428, "loss": 11.6741, "step": 8028 }, { "epoch": 0.16806916185213094, "grad_norm": 0.24804069101810455, "learning_rate": 0.00019846601837804557, "loss": 11.6825, "step": 8029 }, { "epoch": 0.16809009461609312, "grad_norm": 0.26848334074020386, "learning_rate": 0.00019846563579463232, "loss": 11.6805, "step": 8030 }, { "epoch": 0.16811102738005526, "grad_norm": 0.23988476395606995, "learning_rate": 0.0001984652531638846, "loss": 11.6781, "step": 8031 }, { "epoch": 0.1681319601440174, "grad_norm": 0.25718268752098083, "learning_rate": 0.0001984648704858027, "loss": 11.6572, "step": 8032 }, { "epoch": 0.16815289290797958, "grad_norm": 0.22658632695674896, "learning_rate": 0.00019846448776038674, "loss": 11.6801, "step": 8033 }, { "epoch": 0.16817382567194172, "grad_norm": 0.25780966877937317, "learning_rate": 0.00019846410498763694, "loss": 11.6679, "step": 8034 }, { "epoch": 0.16819475843590387, "grad_norm": 0.2423470914363861, "learning_rate": 0.00019846372216755347, "loss": 11.6855, "step": 8035 }, { "epoch": 0.16821569119986604, "grad_norm": 0.2604655623435974, "learning_rate": 0.00019846333930013653, "loss": 11.6762, "step": 8036 }, { "epoch": 0.16823662396382819, "grad_norm": 0.2203723043203354, "learning_rate": 0.00019846295638538628, "loss": 11.6798, "step": 8037 }, { "epoch": 0.16825755672779033, "grad_norm": 0.21963979303836823, "learning_rate": 0.0001984625734233029, "loss": 11.6714, "step": 8038 }, { "epoch": 0.1682784894917525, "grad_norm": 0.25184211134910583, "learning_rate": 0.0001984621904138866, "loss": 11.6737, "step": 8039 }, { "epoch": 0.16829942225571465, "grad_norm": 0.24893039464950562, "learning_rate": 0.00019846180735713756, "loss": 11.6818, "step": 8040 }, { "epoch": 0.1683203550196768, "grad_norm": 0.27244165539741516, "learning_rate": 0.00019846142425305593, "loss": 11.6824, "step": 8041 }, { "epoch": 0.16834128778363897, "grad_norm": 0.2917150557041168, "learning_rate": 0.00019846104110164193, "loss": 11.6877, "step": 8042 }, { "epoch": 0.1683622205476011, "grad_norm": 0.2807747721672058, "learning_rate": 0.00019846065790289576, "loss": 11.6873, "step": 8043 }, { "epoch": 0.16838315331156326, "grad_norm": 0.2821085453033447, "learning_rate": 0.00019846027465681755, "loss": 11.6852, "step": 8044 }, { "epoch": 0.1684040860755254, "grad_norm": 0.27451038360595703, "learning_rate": 0.00019845989136340751, "loss": 11.6816, "step": 8045 }, { "epoch": 0.16842501883948757, "grad_norm": 0.2578382194042206, "learning_rate": 0.00019845950802266585, "loss": 11.6709, "step": 8046 }, { "epoch": 0.16844595160344972, "grad_norm": 0.22465509176254272, "learning_rate": 0.00019845912463459272, "loss": 11.6608, "step": 8047 }, { "epoch": 0.16846688436741186, "grad_norm": 0.2175571173429489, "learning_rate": 0.00019845874119918833, "loss": 11.6703, "step": 8048 }, { "epoch": 0.16848781713137403, "grad_norm": 0.3051847517490387, "learning_rate": 0.00019845835771645283, "loss": 11.672, "step": 8049 }, { "epoch": 0.16850874989533618, "grad_norm": 0.23546051979064941, "learning_rate": 0.00019845797418638643, "loss": 11.6753, "step": 8050 }, { "epoch": 0.16852968265929832, "grad_norm": 0.25753626227378845, "learning_rate": 0.0001984575906089893, "loss": 11.6782, "step": 8051 }, { "epoch": 0.1685506154232605, "grad_norm": 0.2272939831018448, "learning_rate": 0.00019845720698426165, "loss": 11.6971, "step": 8052 }, { "epoch": 0.16857154818722264, "grad_norm": 0.26154500246047974, "learning_rate": 0.00019845682331220364, "loss": 11.6896, "step": 8053 }, { "epoch": 0.1685924809511848, "grad_norm": 0.22446593642234802, "learning_rate": 0.00019845643959281545, "loss": 11.6613, "step": 8054 }, { "epoch": 0.16861341371514696, "grad_norm": 0.2706899046897888, "learning_rate": 0.0001984560558260973, "loss": 11.6735, "step": 8055 }, { "epoch": 0.1686343464791091, "grad_norm": 0.21997161209583282, "learning_rate": 0.00019845567201204933, "loss": 11.6905, "step": 8056 }, { "epoch": 0.16865527924307125, "grad_norm": 0.23639459908008575, "learning_rate": 0.00019845528815067175, "loss": 11.6758, "step": 8057 }, { "epoch": 0.16867621200703342, "grad_norm": 0.26280856132507324, "learning_rate": 0.00019845490424196476, "loss": 11.6816, "step": 8058 }, { "epoch": 0.16869714477099557, "grad_norm": 0.2277594953775406, "learning_rate": 0.0001984545202859285, "loss": 11.6905, "step": 8059 }, { "epoch": 0.1687180775349577, "grad_norm": 0.2722996473312378, "learning_rate": 0.0001984541362825632, "loss": 11.6835, "step": 8060 }, { "epoch": 0.16873901029891986, "grad_norm": 0.28219980001449585, "learning_rate": 0.000198453752231869, "loss": 11.6879, "step": 8061 }, { "epoch": 0.16875994306288203, "grad_norm": 0.2689882218837738, "learning_rate": 0.00019845336813384613, "loss": 11.6693, "step": 8062 }, { "epoch": 0.16878087582684417, "grad_norm": 0.24578826129436493, "learning_rate": 0.00019845298398849472, "loss": 11.6628, "step": 8063 }, { "epoch": 0.16880180859080632, "grad_norm": 0.30730703473091125, "learning_rate": 0.00019845259979581501, "loss": 11.6819, "step": 8064 }, { "epoch": 0.1688227413547685, "grad_norm": 0.256188303232193, "learning_rate": 0.00019845221555580718, "loss": 11.69, "step": 8065 }, { "epoch": 0.16884367411873064, "grad_norm": 0.22611360251903534, "learning_rate": 0.0001984518312684714, "loss": 11.6729, "step": 8066 }, { "epoch": 0.16886460688269278, "grad_norm": 0.24183014035224915, "learning_rate": 0.00019845144693380782, "loss": 11.6768, "step": 8067 }, { "epoch": 0.16888553964665495, "grad_norm": 0.19344952702522278, "learning_rate": 0.00019845106255181666, "loss": 11.6754, "step": 8068 }, { "epoch": 0.1689064724106171, "grad_norm": 0.33260610699653625, "learning_rate": 0.0001984506781224981, "loss": 11.684, "step": 8069 }, { "epoch": 0.16892740517457924, "grad_norm": 0.2798177897930145, "learning_rate": 0.00019845029364585232, "loss": 11.6985, "step": 8070 }, { "epoch": 0.16894833793854142, "grad_norm": 0.2938096225261688, "learning_rate": 0.00019844990912187954, "loss": 11.6682, "step": 8071 }, { "epoch": 0.16896927070250356, "grad_norm": 0.22124126553535461, "learning_rate": 0.0001984495245505799, "loss": 11.6772, "step": 8072 }, { "epoch": 0.1689902034664657, "grad_norm": 0.21995136141777039, "learning_rate": 0.0001984491399319536, "loss": 11.6671, "step": 8073 }, { "epoch": 0.16901113623042788, "grad_norm": 0.2688554525375366, "learning_rate": 0.00019844875526600083, "loss": 11.6882, "step": 8074 }, { "epoch": 0.16903206899439002, "grad_norm": 0.19456447660923004, "learning_rate": 0.00019844837055272178, "loss": 11.6654, "step": 8075 }, { "epoch": 0.16905300175835217, "grad_norm": 0.3660815358161926, "learning_rate": 0.00019844798579211662, "loss": 11.6747, "step": 8076 }, { "epoch": 0.16907393452231434, "grad_norm": 0.24605806171894073, "learning_rate": 0.00019844760098418552, "loss": 11.6703, "step": 8077 }, { "epoch": 0.16909486728627648, "grad_norm": 0.24174973368644714, "learning_rate": 0.0001984472161289287, "loss": 11.667, "step": 8078 }, { "epoch": 0.16911580005023863, "grad_norm": 0.2430294007062912, "learning_rate": 0.00019844683122634634, "loss": 11.6659, "step": 8079 }, { "epoch": 0.16913673281420077, "grad_norm": 0.24897728860378265, "learning_rate": 0.0001984464462764386, "loss": 11.677, "step": 8080 }, { "epoch": 0.16915766557816295, "grad_norm": 0.26677820086479187, "learning_rate": 0.00019844606127920566, "loss": 11.6878, "step": 8081 }, { "epoch": 0.1691785983421251, "grad_norm": 0.30449339747428894, "learning_rate": 0.00019844567623464776, "loss": 11.691, "step": 8082 }, { "epoch": 0.16919953110608724, "grad_norm": 0.2511020004749298, "learning_rate": 0.00019844529114276504, "loss": 11.6926, "step": 8083 }, { "epoch": 0.1692204638700494, "grad_norm": 0.21746712923049927, "learning_rate": 0.0001984449060035577, "loss": 11.6899, "step": 8084 }, { "epoch": 0.16924139663401155, "grad_norm": 0.24743659794330597, "learning_rate": 0.0001984445208170259, "loss": 11.6807, "step": 8085 }, { "epoch": 0.1692623293979737, "grad_norm": 0.2577055096626282, "learning_rate": 0.00019844413558316987, "loss": 11.6866, "step": 8086 }, { "epoch": 0.16928326216193587, "grad_norm": 0.2762158215045929, "learning_rate": 0.00019844375030198975, "loss": 11.6882, "step": 8087 }, { "epoch": 0.16930419492589802, "grad_norm": 0.2515024244785309, "learning_rate": 0.00019844336497348576, "loss": 11.6814, "step": 8088 }, { "epoch": 0.16932512768986016, "grad_norm": 0.21363167464733124, "learning_rate": 0.0001984429795976581, "loss": 11.6635, "step": 8089 }, { "epoch": 0.16934606045382233, "grad_norm": 0.26157790422439575, "learning_rate": 0.00019844259417450691, "loss": 11.6832, "step": 8090 }, { "epoch": 0.16936699321778448, "grad_norm": 0.2400486171245575, "learning_rate": 0.00019844220870403236, "loss": 11.6902, "step": 8091 }, { "epoch": 0.16938792598174662, "grad_norm": 0.23113030195236206, "learning_rate": 0.0001984418231862347, "loss": 11.6765, "step": 8092 }, { "epoch": 0.1694088587457088, "grad_norm": 0.2689661681652069, "learning_rate": 0.00019844143762111406, "loss": 11.6782, "step": 8093 }, { "epoch": 0.16942979150967094, "grad_norm": 0.2399071902036667, "learning_rate": 0.00019844105200867066, "loss": 11.6806, "step": 8094 }, { "epoch": 0.16945072427363309, "grad_norm": 0.27619192004203796, "learning_rate": 0.0001984406663489047, "loss": 11.686, "step": 8095 }, { "epoch": 0.16947165703759523, "grad_norm": 0.23821701109409332, "learning_rate": 0.00019844028064181627, "loss": 11.6847, "step": 8096 }, { "epoch": 0.1694925898015574, "grad_norm": 0.22182096540927887, "learning_rate": 0.00019843989488740568, "loss": 11.682, "step": 8097 }, { "epoch": 0.16951352256551955, "grad_norm": 0.22375936806201935, "learning_rate": 0.00019843950908567305, "loss": 11.678, "step": 8098 }, { "epoch": 0.1695344553294817, "grad_norm": 0.2701778709888458, "learning_rate": 0.00019843912323661857, "loss": 11.6798, "step": 8099 }, { "epoch": 0.16955538809344387, "grad_norm": 0.23670931160449982, "learning_rate": 0.00019843873734024246, "loss": 11.6886, "step": 8100 }, { "epoch": 0.169576320857406, "grad_norm": 0.2502140700817108, "learning_rate": 0.00019843835139654484, "loss": 11.6616, "step": 8101 }, { "epoch": 0.16959725362136815, "grad_norm": 0.24595290422439575, "learning_rate": 0.00019843796540552593, "loss": 11.6775, "step": 8102 }, { "epoch": 0.16961818638533033, "grad_norm": 0.24980773031711578, "learning_rate": 0.0001984375793671859, "loss": 11.6772, "step": 8103 }, { "epoch": 0.16963911914929247, "grad_norm": 0.2288520187139511, "learning_rate": 0.000198437193281525, "loss": 11.6766, "step": 8104 }, { "epoch": 0.16966005191325462, "grad_norm": 0.2290070354938507, "learning_rate": 0.00019843680714854336, "loss": 11.6758, "step": 8105 }, { "epoch": 0.1696809846772168, "grad_norm": 0.2739899158477783, "learning_rate": 0.00019843642096824116, "loss": 11.6764, "step": 8106 }, { "epoch": 0.16970191744117893, "grad_norm": 0.23447008430957794, "learning_rate": 0.0001984360347406186, "loss": 11.6779, "step": 8107 }, { "epoch": 0.16972285020514108, "grad_norm": 0.2925579249858856, "learning_rate": 0.00019843564846567588, "loss": 11.6834, "step": 8108 }, { "epoch": 0.16974378296910325, "grad_norm": 0.2602928578853607, "learning_rate": 0.00019843526214341316, "loss": 11.6887, "step": 8109 }, { "epoch": 0.1697647157330654, "grad_norm": 0.2199609875679016, "learning_rate": 0.00019843487577383062, "loss": 11.6735, "step": 8110 }, { "epoch": 0.16978564849702754, "grad_norm": 0.3341684639453888, "learning_rate": 0.00019843448935692848, "loss": 11.6542, "step": 8111 }, { "epoch": 0.16980658126098971, "grad_norm": 0.2920941412448883, "learning_rate": 0.0001984341028927069, "loss": 11.7049, "step": 8112 }, { "epoch": 0.16982751402495186, "grad_norm": 0.2823193371295929, "learning_rate": 0.0001984337163811661, "loss": 11.6902, "step": 8113 }, { "epoch": 0.169848446788914, "grad_norm": 0.25505340099334717, "learning_rate": 0.00019843332982230621, "loss": 11.6655, "step": 8114 }, { "epoch": 0.16986937955287615, "grad_norm": 0.26481422781944275, "learning_rate": 0.00019843294321612748, "loss": 11.6729, "step": 8115 }, { "epoch": 0.16989031231683832, "grad_norm": 0.24047337472438812, "learning_rate": 0.00019843255656263004, "loss": 11.6875, "step": 8116 }, { "epoch": 0.16991124508080047, "grad_norm": 0.3643721044063568, "learning_rate": 0.00019843216986181408, "loss": 11.7073, "step": 8117 }, { "epoch": 0.1699321778447626, "grad_norm": 0.23196621239185333, "learning_rate": 0.0001984317831136798, "loss": 11.6647, "step": 8118 }, { "epoch": 0.16995311060872478, "grad_norm": 0.2778969705104828, "learning_rate": 0.00019843139631822742, "loss": 11.6902, "step": 8119 }, { "epoch": 0.16997404337268693, "grad_norm": 0.2603307068347931, "learning_rate": 0.00019843100947545707, "loss": 11.6771, "step": 8120 }, { "epoch": 0.16999497613664907, "grad_norm": 0.25072726607322693, "learning_rate": 0.00019843062258536898, "loss": 11.6882, "step": 8121 }, { "epoch": 0.17001590890061125, "grad_norm": 0.23041504621505737, "learning_rate": 0.00019843023564796334, "loss": 11.701, "step": 8122 }, { "epoch": 0.1700368416645734, "grad_norm": 0.22861556708812714, "learning_rate": 0.0001984298486632403, "loss": 11.6706, "step": 8123 }, { "epoch": 0.17005777442853554, "grad_norm": 0.25424671173095703, "learning_rate": 0.0001984294616312, "loss": 11.6737, "step": 8124 }, { "epoch": 0.1700787071924977, "grad_norm": 0.24688343703746796, "learning_rate": 0.00019842907455184275, "loss": 11.6971, "step": 8125 }, { "epoch": 0.17009963995645985, "grad_norm": 0.27189624309539795, "learning_rate": 0.00019842868742516863, "loss": 11.6803, "step": 8126 }, { "epoch": 0.170120572720422, "grad_norm": 0.28647273778915405, "learning_rate": 0.0001984283002511779, "loss": 11.6626, "step": 8127 }, { "epoch": 0.17014150548438417, "grad_norm": 0.291928231716156, "learning_rate": 0.0001984279130298707, "loss": 11.6944, "step": 8128 }, { "epoch": 0.17016243824834631, "grad_norm": 0.2932150363922119, "learning_rate": 0.00019842752576124724, "loss": 11.6856, "step": 8129 }, { "epoch": 0.17018337101230846, "grad_norm": 0.2597683370113373, "learning_rate": 0.00019842713844530767, "loss": 11.6815, "step": 8130 }, { "epoch": 0.17020430377627063, "grad_norm": 0.22325575351715088, "learning_rate": 0.00019842675108205224, "loss": 11.6653, "step": 8131 }, { "epoch": 0.17022523654023278, "grad_norm": 0.27267682552337646, "learning_rate": 0.00019842636367148106, "loss": 11.6953, "step": 8132 }, { "epoch": 0.17024616930419492, "grad_norm": 0.26101499795913696, "learning_rate": 0.00019842597621359438, "loss": 11.6805, "step": 8133 }, { "epoch": 0.17026710206815707, "grad_norm": 0.24254834651947021, "learning_rate": 0.00019842558870839235, "loss": 11.6791, "step": 8134 }, { "epoch": 0.17028803483211924, "grad_norm": 0.2094840258359909, "learning_rate": 0.00019842520115587517, "loss": 11.672, "step": 8135 }, { "epoch": 0.17030896759608138, "grad_norm": 0.20100398361682892, "learning_rate": 0.000198424813556043, "loss": 11.6785, "step": 8136 }, { "epoch": 0.17032990036004353, "grad_norm": 0.24472424387931824, "learning_rate": 0.0001984244259088961, "loss": 11.6727, "step": 8137 }, { "epoch": 0.1703508331240057, "grad_norm": 0.20551910996437073, "learning_rate": 0.00019842403821443457, "loss": 11.6781, "step": 8138 }, { "epoch": 0.17037176588796785, "grad_norm": 0.21815261244773865, "learning_rate": 0.00019842365047265865, "loss": 11.679, "step": 8139 }, { "epoch": 0.17039269865193, "grad_norm": 0.30451345443725586, "learning_rate": 0.0001984232626835685, "loss": 11.6751, "step": 8140 }, { "epoch": 0.17041363141589216, "grad_norm": 0.21411940455436707, "learning_rate": 0.00019842287484716432, "loss": 11.6764, "step": 8141 }, { "epoch": 0.1704345641798543, "grad_norm": 0.2531193792819977, "learning_rate": 0.00019842248696344628, "loss": 11.6863, "step": 8142 }, { "epoch": 0.17045549694381645, "grad_norm": 0.22202271223068237, "learning_rate": 0.00019842209903241458, "loss": 11.6757, "step": 8143 }, { "epoch": 0.17047642970777863, "grad_norm": 0.29678875207901, "learning_rate": 0.00019842171105406941, "loss": 11.6834, "step": 8144 }, { "epoch": 0.17049736247174077, "grad_norm": 0.27801501750946045, "learning_rate": 0.00019842132302841096, "loss": 11.6879, "step": 8145 }, { "epoch": 0.17051829523570292, "grad_norm": 0.21265360713005066, "learning_rate": 0.0001984209349554394, "loss": 11.6797, "step": 8146 }, { "epoch": 0.1705392279996651, "grad_norm": 0.22111880779266357, "learning_rate": 0.0001984205468351549, "loss": 11.6645, "step": 8147 }, { "epoch": 0.17056016076362723, "grad_norm": 0.22498297691345215, "learning_rate": 0.00019842015866755773, "loss": 11.6631, "step": 8148 }, { "epoch": 0.17058109352758938, "grad_norm": 0.2396891862154007, "learning_rate": 0.00019841977045264794, "loss": 11.6798, "step": 8149 }, { "epoch": 0.17060202629155152, "grad_norm": 0.2772917151451111, "learning_rate": 0.00019841938219042584, "loss": 11.6787, "step": 8150 }, { "epoch": 0.1706229590555137, "grad_norm": 0.29168689250946045, "learning_rate": 0.00019841899388089158, "loss": 11.6883, "step": 8151 }, { "epoch": 0.17064389181947584, "grad_norm": 0.22012025117874146, "learning_rate": 0.00019841860552404533, "loss": 11.674, "step": 8152 }, { "epoch": 0.17066482458343799, "grad_norm": 0.27340880036354065, "learning_rate": 0.00019841821711988726, "loss": 11.6804, "step": 8153 }, { "epoch": 0.17068575734740016, "grad_norm": 0.20527823269367218, "learning_rate": 0.0001984178286684176, "loss": 11.6655, "step": 8154 }, { "epoch": 0.1707066901113623, "grad_norm": 0.38733193278312683, "learning_rate": 0.0001984174401696365, "loss": 11.6709, "step": 8155 }, { "epoch": 0.17072762287532445, "grad_norm": 0.32215890288352966, "learning_rate": 0.0001984170516235442, "loss": 11.6847, "step": 8156 }, { "epoch": 0.17074855563928662, "grad_norm": 0.23501838743686676, "learning_rate": 0.0001984166630301408, "loss": 11.6581, "step": 8157 }, { "epoch": 0.17076948840324876, "grad_norm": 0.24733866751194, "learning_rate": 0.00019841627438942658, "loss": 11.6643, "step": 8158 }, { "epoch": 0.1707904211672109, "grad_norm": 0.26977208256721497, "learning_rate": 0.00019841588570140167, "loss": 11.6762, "step": 8159 }, { "epoch": 0.17081135393117308, "grad_norm": 0.24340641498565674, "learning_rate": 0.0001984154969660663, "loss": 11.6697, "step": 8160 }, { "epoch": 0.17083228669513523, "grad_norm": 0.24219077825546265, "learning_rate": 0.0001984151081834206, "loss": 11.6841, "step": 8161 }, { "epoch": 0.17085321945909737, "grad_norm": 0.27110302448272705, "learning_rate": 0.0001984147193534648, "loss": 11.6708, "step": 8162 }, { "epoch": 0.17087415222305954, "grad_norm": 0.25817519426345825, "learning_rate": 0.00019841433047619903, "loss": 11.6754, "step": 8163 }, { "epoch": 0.1708950849870217, "grad_norm": 0.3866393268108368, "learning_rate": 0.00019841394155162357, "loss": 11.6959, "step": 8164 }, { "epoch": 0.17091601775098383, "grad_norm": 0.3919485807418823, "learning_rate": 0.00019841355257973852, "loss": 11.6871, "step": 8165 }, { "epoch": 0.170936950514946, "grad_norm": 0.21136023104190826, "learning_rate": 0.00019841316356054414, "loss": 11.6747, "step": 8166 }, { "epoch": 0.17095788327890815, "grad_norm": 0.2683795392513275, "learning_rate": 0.00019841277449404054, "loss": 11.6926, "step": 8167 }, { "epoch": 0.1709788160428703, "grad_norm": 0.351481556892395, "learning_rate": 0.00019841238538022794, "loss": 11.6685, "step": 8168 }, { "epoch": 0.17099974880683244, "grad_norm": 0.2854568064212799, "learning_rate": 0.00019841199621910657, "loss": 11.6835, "step": 8169 }, { "epoch": 0.1710206815707946, "grad_norm": 0.23067043721675873, "learning_rate": 0.00019841160701067657, "loss": 11.6722, "step": 8170 }, { "epoch": 0.17104161433475676, "grad_norm": 0.22909624874591827, "learning_rate": 0.00019841121775493817, "loss": 11.6784, "step": 8171 }, { "epoch": 0.1710625470987189, "grad_norm": 0.26566576957702637, "learning_rate": 0.00019841082845189147, "loss": 11.6837, "step": 8172 }, { "epoch": 0.17108347986268108, "grad_norm": 0.2350299209356308, "learning_rate": 0.00019841043910153673, "loss": 11.6734, "step": 8173 }, { "epoch": 0.17110441262664322, "grad_norm": 0.23451858758926392, "learning_rate": 0.00019841004970387415, "loss": 11.6798, "step": 8174 }, { "epoch": 0.17112534539060537, "grad_norm": 0.21888616681098938, "learning_rate": 0.00019840966025890384, "loss": 11.6744, "step": 8175 }, { "epoch": 0.17114627815456754, "grad_norm": 0.2204277366399765, "learning_rate": 0.00019840927076662608, "loss": 11.6799, "step": 8176 }, { "epoch": 0.17116721091852968, "grad_norm": 0.32408788800239563, "learning_rate": 0.00019840888122704098, "loss": 11.6959, "step": 8177 }, { "epoch": 0.17118814368249183, "grad_norm": 0.21650822460651398, "learning_rate": 0.00019840849164014877, "loss": 11.6677, "step": 8178 }, { "epoch": 0.171209076446454, "grad_norm": 0.26673614978790283, "learning_rate": 0.0001984081020059496, "loss": 11.6714, "step": 8179 }, { "epoch": 0.17123000921041615, "grad_norm": 0.25189170241355896, "learning_rate": 0.0001984077123244437, "loss": 11.6723, "step": 8180 }, { "epoch": 0.1712509419743783, "grad_norm": 0.21014229953289032, "learning_rate": 0.00019840732259563128, "loss": 11.6756, "step": 8181 }, { "epoch": 0.17127187473834046, "grad_norm": 0.25465962290763855, "learning_rate": 0.00019840693281951243, "loss": 11.674, "step": 8182 }, { "epoch": 0.1712928075023026, "grad_norm": 0.2256918102502823, "learning_rate": 0.00019840654299608742, "loss": 11.6619, "step": 8183 }, { "epoch": 0.17131374026626475, "grad_norm": 0.1792488694190979, "learning_rate": 0.00019840615312535645, "loss": 11.6676, "step": 8184 }, { "epoch": 0.17133467303022692, "grad_norm": 0.27201685309410095, "learning_rate": 0.0001984057632073196, "loss": 11.6749, "step": 8185 }, { "epoch": 0.17135560579418907, "grad_norm": 0.2384711503982544, "learning_rate": 0.00019840537324197717, "loss": 11.6841, "step": 8186 }, { "epoch": 0.17137653855815121, "grad_norm": 0.24567310512065887, "learning_rate": 0.0001984049832293293, "loss": 11.6738, "step": 8187 }, { "epoch": 0.17139747132211336, "grad_norm": 0.2941831350326538, "learning_rate": 0.00019840459316937617, "loss": 11.676, "step": 8188 }, { "epoch": 0.17141840408607553, "grad_norm": 0.2766879200935364, "learning_rate": 0.000198404203062118, "loss": 11.6826, "step": 8189 }, { "epoch": 0.17143933685003768, "grad_norm": 0.1948884129524231, "learning_rate": 0.00019840381290755495, "loss": 11.6711, "step": 8190 }, { "epoch": 0.17146026961399982, "grad_norm": 0.2589569389820099, "learning_rate": 0.0001984034227056872, "loss": 11.668, "step": 8191 }, { "epoch": 0.171481202377962, "grad_norm": 0.29933324456214905, "learning_rate": 0.00019840303245651498, "loss": 11.6856, "step": 8192 }, { "epoch": 0.17150213514192414, "grad_norm": 0.32990795373916626, "learning_rate": 0.00019840264216003842, "loss": 11.6945, "step": 8193 }, { "epoch": 0.17152306790588628, "grad_norm": 0.3264368176460266, "learning_rate": 0.00019840225181625774, "loss": 11.6571, "step": 8194 }, { "epoch": 0.17154400066984846, "grad_norm": 0.22522689402103424, "learning_rate": 0.00019840186142517315, "loss": 11.6674, "step": 8195 }, { "epoch": 0.1715649334338106, "grad_norm": 0.25974345207214355, "learning_rate": 0.0001984014709867848, "loss": 11.6838, "step": 8196 }, { "epoch": 0.17158586619777275, "grad_norm": 0.2802351415157318, "learning_rate": 0.00019840108050109287, "loss": 11.6735, "step": 8197 }, { "epoch": 0.17160679896173492, "grad_norm": 0.2455441653728485, "learning_rate": 0.0001984006899680976, "loss": 11.6822, "step": 8198 }, { "epoch": 0.17162773172569706, "grad_norm": 0.20715102553367615, "learning_rate": 0.00019840029938779914, "loss": 11.6668, "step": 8199 }, { "epoch": 0.1716486644896592, "grad_norm": 0.29478415846824646, "learning_rate": 0.00019839990876019766, "loss": 11.6824, "step": 8200 }, { "epoch": 0.17166959725362138, "grad_norm": 0.22123463451862335, "learning_rate": 0.00019839951808529343, "loss": 11.6754, "step": 8201 }, { "epoch": 0.17169053001758353, "grad_norm": 0.294152170419693, "learning_rate": 0.00019839912736308653, "loss": 11.674, "step": 8202 }, { "epoch": 0.17171146278154567, "grad_norm": 0.2352718710899353, "learning_rate": 0.0001983987365935772, "loss": 11.687, "step": 8203 }, { "epoch": 0.17173239554550782, "grad_norm": 0.27541086077690125, "learning_rate": 0.00019839834577676564, "loss": 11.6871, "step": 8204 }, { "epoch": 0.17175332830947, "grad_norm": 0.2353905439376831, "learning_rate": 0.000198397954912652, "loss": 11.688, "step": 8205 }, { "epoch": 0.17177426107343213, "grad_norm": 0.260358989238739, "learning_rate": 0.00019839756400123653, "loss": 11.6766, "step": 8206 }, { "epoch": 0.17179519383739428, "grad_norm": 0.24949350953102112, "learning_rate": 0.00019839717304251936, "loss": 11.685, "step": 8207 }, { "epoch": 0.17181612660135645, "grad_norm": 0.2380146086215973, "learning_rate": 0.0001983967820365007, "loss": 11.6823, "step": 8208 }, { "epoch": 0.1718370593653186, "grad_norm": 0.22660495340824127, "learning_rate": 0.00019839639098318072, "loss": 11.6704, "step": 8209 }, { "epoch": 0.17185799212928074, "grad_norm": 0.22358770668506622, "learning_rate": 0.00019839599988255964, "loss": 11.67, "step": 8210 }, { "epoch": 0.1718789248932429, "grad_norm": 0.23877541720867157, "learning_rate": 0.0001983956087346376, "loss": 11.662, "step": 8211 }, { "epoch": 0.17189985765720506, "grad_norm": 0.23920905590057373, "learning_rate": 0.00019839521753941487, "loss": 11.6779, "step": 8212 }, { "epoch": 0.1719207904211672, "grad_norm": 0.24164453148841858, "learning_rate": 0.00019839482629689154, "loss": 11.6834, "step": 8213 }, { "epoch": 0.17194172318512937, "grad_norm": 0.23896066844463348, "learning_rate": 0.0001983944350070679, "loss": 11.6834, "step": 8214 }, { "epoch": 0.17196265594909152, "grad_norm": 0.221146360039711, "learning_rate": 0.00019839404366994403, "loss": 11.657, "step": 8215 }, { "epoch": 0.17198358871305366, "grad_norm": 0.25783011317253113, "learning_rate": 0.0001983936522855202, "loss": 11.6999, "step": 8216 }, { "epoch": 0.17200452147701584, "grad_norm": 0.20925216376781464, "learning_rate": 0.00019839326085379654, "loss": 11.6863, "step": 8217 }, { "epoch": 0.17202545424097798, "grad_norm": 0.27330541610717773, "learning_rate": 0.00019839286937477327, "loss": 11.6821, "step": 8218 }, { "epoch": 0.17204638700494013, "grad_norm": 0.2714552581310272, "learning_rate": 0.00019839247784845062, "loss": 11.6806, "step": 8219 }, { "epoch": 0.1720673197689023, "grad_norm": 0.3468771278858185, "learning_rate": 0.0001983920862748287, "loss": 11.6866, "step": 8220 }, { "epoch": 0.17208825253286444, "grad_norm": 0.19494009017944336, "learning_rate": 0.00019839169465390774, "loss": 11.6735, "step": 8221 }, { "epoch": 0.1721091852968266, "grad_norm": 0.2371620386838913, "learning_rate": 0.0001983913029856879, "loss": 11.6749, "step": 8222 }, { "epoch": 0.17213011806078873, "grad_norm": 0.25672647356987, "learning_rate": 0.00019839091127016943, "loss": 11.6795, "step": 8223 }, { "epoch": 0.1721510508247509, "grad_norm": 0.3085431456565857, "learning_rate": 0.00019839051950735245, "loss": 11.689, "step": 8224 }, { "epoch": 0.17217198358871305, "grad_norm": 0.2853640615940094, "learning_rate": 0.0001983901276972372, "loss": 11.6885, "step": 8225 }, { "epoch": 0.1721929163526752, "grad_norm": 0.2326035499572754, "learning_rate": 0.00019838973583982382, "loss": 11.6859, "step": 8226 }, { "epoch": 0.17221384911663737, "grad_norm": 0.1998317539691925, "learning_rate": 0.00019838934393511253, "loss": 11.6876, "step": 8227 }, { "epoch": 0.1722347818805995, "grad_norm": 0.25991594791412354, "learning_rate": 0.0001983889519831035, "loss": 11.6868, "step": 8228 }, { "epoch": 0.17225571464456166, "grad_norm": 0.26185429096221924, "learning_rate": 0.00019838855998379696, "loss": 11.6792, "step": 8229 }, { "epoch": 0.17227664740852383, "grad_norm": 0.19587959349155426, "learning_rate": 0.000198388167937193, "loss": 11.674, "step": 8230 }, { "epoch": 0.17229758017248598, "grad_norm": 0.23471924662590027, "learning_rate": 0.00019838777584329194, "loss": 11.6719, "step": 8231 }, { "epoch": 0.17231851293644812, "grad_norm": 0.28499117493629456, "learning_rate": 0.0001983873837020939, "loss": 11.6922, "step": 8232 }, { "epoch": 0.1723394457004103, "grad_norm": 0.23678046464920044, "learning_rate": 0.00019838699151359906, "loss": 11.6861, "step": 8233 }, { "epoch": 0.17236037846437244, "grad_norm": 0.3193219304084778, "learning_rate": 0.0001983865992778076, "loss": 11.6803, "step": 8234 }, { "epoch": 0.17238131122833458, "grad_norm": 0.20794223248958588, "learning_rate": 0.00019838620699471978, "loss": 11.6646, "step": 8235 }, { "epoch": 0.17240224399229676, "grad_norm": 0.24461106956005096, "learning_rate": 0.00019838581466433567, "loss": 11.6674, "step": 8236 }, { "epoch": 0.1724231767562589, "grad_norm": 0.26289692521095276, "learning_rate": 0.0001983854222866556, "loss": 11.6867, "step": 8237 }, { "epoch": 0.17244410952022104, "grad_norm": 0.24694743752479553, "learning_rate": 0.00019838502986167965, "loss": 11.6683, "step": 8238 }, { "epoch": 0.1724650422841832, "grad_norm": 0.23231172561645508, "learning_rate": 0.00019838463738940803, "loss": 11.6807, "step": 8239 }, { "epoch": 0.17248597504814536, "grad_norm": 0.3208155333995819, "learning_rate": 0.00019838424486984097, "loss": 11.6772, "step": 8240 }, { "epoch": 0.1725069078121075, "grad_norm": 0.22446449100971222, "learning_rate": 0.00019838385230297863, "loss": 11.6818, "step": 8241 }, { "epoch": 0.17252784057606965, "grad_norm": 0.19772785902023315, "learning_rate": 0.00019838345968882116, "loss": 11.6718, "step": 8242 }, { "epoch": 0.17254877334003182, "grad_norm": 0.25249141454696655, "learning_rate": 0.00019838306702736885, "loss": 11.6572, "step": 8243 }, { "epoch": 0.17256970610399397, "grad_norm": 0.26479509472846985, "learning_rate": 0.0001983826743186218, "loss": 11.6887, "step": 8244 }, { "epoch": 0.17259063886795611, "grad_norm": 0.30173954367637634, "learning_rate": 0.0001983822815625802, "loss": 11.6832, "step": 8245 }, { "epoch": 0.1726115716319183, "grad_norm": 0.23387020826339722, "learning_rate": 0.0001983818887592443, "loss": 11.6889, "step": 8246 }, { "epoch": 0.17263250439588043, "grad_norm": 0.2025737315416336, "learning_rate": 0.00019838149590861425, "loss": 11.6651, "step": 8247 }, { "epoch": 0.17265343715984258, "grad_norm": 0.2595216929912567, "learning_rate": 0.00019838110301069025, "loss": 11.6674, "step": 8248 }, { "epoch": 0.17267436992380475, "grad_norm": 1.0538057088851929, "learning_rate": 0.00019838071006547247, "loss": 11.6004, "step": 8249 }, { "epoch": 0.1726953026877669, "grad_norm": 0.23611696064472198, "learning_rate": 0.0001983803170729611, "loss": 11.685, "step": 8250 }, { "epoch": 0.17271623545172904, "grad_norm": 0.22101683914661407, "learning_rate": 0.00019837992403315635, "loss": 11.6778, "step": 8251 }, { "epoch": 0.1727371682156912, "grad_norm": 0.31849685311317444, "learning_rate": 0.0001983795309460584, "loss": 11.667, "step": 8252 }, { "epoch": 0.17275810097965336, "grad_norm": 0.21634553372859955, "learning_rate": 0.00019837913781166745, "loss": 11.6831, "step": 8253 }, { "epoch": 0.1727790337436155, "grad_norm": 0.2765516936779022, "learning_rate": 0.00019837874462998368, "loss": 11.6837, "step": 8254 }, { "epoch": 0.17279996650757767, "grad_norm": 0.3427102565765381, "learning_rate": 0.00019837835140100725, "loss": 11.6981, "step": 8255 }, { "epoch": 0.17282089927153982, "grad_norm": 0.25156933069229126, "learning_rate": 0.0001983779581247384, "loss": 11.6699, "step": 8256 }, { "epoch": 0.17284183203550196, "grad_norm": 0.36548563838005066, "learning_rate": 0.00019837756480117725, "loss": 11.7055, "step": 8257 }, { "epoch": 0.1728627647994641, "grad_norm": 0.2462979108095169, "learning_rate": 0.00019837717143032405, "loss": 11.6726, "step": 8258 }, { "epoch": 0.17288369756342628, "grad_norm": 0.2879633605480194, "learning_rate": 0.000198376778012179, "loss": 11.6786, "step": 8259 }, { "epoch": 0.17290463032738843, "grad_norm": 0.2525499761104584, "learning_rate": 0.00019837638454674227, "loss": 11.6813, "step": 8260 }, { "epoch": 0.17292556309135057, "grad_norm": 0.2525116801261902, "learning_rate": 0.000198375991034014, "loss": 11.6803, "step": 8261 }, { "epoch": 0.17294649585531274, "grad_norm": 0.27347585558891296, "learning_rate": 0.00019837559747399444, "loss": 11.6845, "step": 8262 }, { "epoch": 0.1729674286192749, "grad_norm": 0.2531152367591858, "learning_rate": 0.00019837520386668374, "loss": 11.6683, "step": 8263 }, { "epoch": 0.17298836138323703, "grad_norm": 0.25226470828056335, "learning_rate": 0.00019837481021208215, "loss": 11.6772, "step": 8264 }, { "epoch": 0.1730092941471992, "grad_norm": 0.24115660786628723, "learning_rate": 0.00019837441651018978, "loss": 11.6619, "step": 8265 }, { "epoch": 0.17303022691116135, "grad_norm": 0.3289414048194885, "learning_rate": 0.00019837402276100686, "loss": 11.7007, "step": 8266 }, { "epoch": 0.1730511596751235, "grad_norm": 0.2179294228553772, "learning_rate": 0.0001983736289645336, "loss": 11.6739, "step": 8267 }, { "epoch": 0.17307209243908567, "grad_norm": 0.2646835148334503, "learning_rate": 0.00019837323512077016, "loss": 11.6764, "step": 8268 }, { "epoch": 0.1730930252030478, "grad_norm": 0.24565346539020538, "learning_rate": 0.0001983728412297167, "loss": 11.684, "step": 8269 }, { "epoch": 0.17311395796700996, "grad_norm": 0.2826888859272003, "learning_rate": 0.0001983724472913735, "loss": 11.6882, "step": 8270 }, { "epoch": 0.17313489073097213, "grad_norm": 0.2887265086174011, "learning_rate": 0.00019837205330574064, "loss": 11.688, "step": 8271 }, { "epoch": 0.17315582349493427, "grad_norm": 0.24294674396514893, "learning_rate": 0.0001983716592728184, "loss": 11.6792, "step": 8272 }, { "epoch": 0.17317675625889642, "grad_norm": 0.24385763704776764, "learning_rate": 0.0001983712651926069, "loss": 11.6832, "step": 8273 }, { "epoch": 0.1731976890228586, "grad_norm": 0.29187947511672974, "learning_rate": 0.0001983708710651064, "loss": 11.6799, "step": 8274 }, { "epoch": 0.17321862178682074, "grad_norm": 0.24861891567707062, "learning_rate": 0.000198370476890317, "loss": 11.703, "step": 8275 }, { "epoch": 0.17323955455078288, "grad_norm": 0.29613491892814636, "learning_rate": 0.00019837008266823897, "loss": 11.6855, "step": 8276 }, { "epoch": 0.17326048731474503, "grad_norm": 0.25311699509620667, "learning_rate": 0.00019836968839887246, "loss": 11.6794, "step": 8277 }, { "epoch": 0.1732814200787072, "grad_norm": 0.2114000916481018, "learning_rate": 0.0001983692940822177, "loss": 11.6795, "step": 8278 }, { "epoch": 0.17330235284266934, "grad_norm": 0.18593120574951172, "learning_rate": 0.00019836889971827483, "loss": 11.6773, "step": 8279 }, { "epoch": 0.1733232856066315, "grad_norm": 0.25158068537712097, "learning_rate": 0.00019836850530704405, "loss": 11.6743, "step": 8280 }, { "epoch": 0.17334421837059366, "grad_norm": 0.23779046535491943, "learning_rate": 0.00019836811084852554, "loss": 11.6874, "step": 8281 }, { "epoch": 0.1733651511345558, "grad_norm": 0.21944423019886017, "learning_rate": 0.00019836771634271956, "loss": 11.675, "step": 8282 }, { "epoch": 0.17338608389851795, "grad_norm": 0.2683657109737396, "learning_rate": 0.00019836732178962622, "loss": 11.6736, "step": 8283 }, { "epoch": 0.17340701666248012, "grad_norm": 0.25330567359924316, "learning_rate": 0.00019836692718924571, "loss": 11.6871, "step": 8284 }, { "epoch": 0.17342794942644227, "grad_norm": 0.23589865863323212, "learning_rate": 0.00019836653254157828, "loss": 11.6818, "step": 8285 }, { "epoch": 0.1734488821904044, "grad_norm": 0.22240640223026276, "learning_rate": 0.00019836613784662407, "loss": 11.6813, "step": 8286 }, { "epoch": 0.17346981495436659, "grad_norm": 0.2121780961751938, "learning_rate": 0.00019836574310438333, "loss": 11.6639, "step": 8287 }, { "epoch": 0.17349074771832873, "grad_norm": 0.26044633984565735, "learning_rate": 0.00019836534831485617, "loss": 11.6807, "step": 8288 }, { "epoch": 0.17351168048229088, "grad_norm": 0.2095434069633484, "learning_rate": 0.00019836495347804278, "loss": 11.6791, "step": 8289 }, { "epoch": 0.17353261324625305, "grad_norm": 0.24599376320838928, "learning_rate": 0.00019836455859394345, "loss": 11.6755, "step": 8290 }, { "epoch": 0.1735535460102152, "grad_norm": 0.3248543441295624, "learning_rate": 0.00019836416366255825, "loss": 11.6888, "step": 8291 }, { "epoch": 0.17357447877417734, "grad_norm": 0.23873041570186615, "learning_rate": 0.00019836376868388747, "loss": 11.673, "step": 8292 }, { "epoch": 0.17359541153813948, "grad_norm": 0.27012163400650024, "learning_rate": 0.00019836337365793124, "loss": 11.6633, "step": 8293 }, { "epoch": 0.17361634430210166, "grad_norm": 0.28175511956214905, "learning_rate": 0.00019836297858468976, "loss": 11.6703, "step": 8294 }, { "epoch": 0.1736372770660638, "grad_norm": 0.25928163528442383, "learning_rate": 0.00019836258346416324, "loss": 11.6807, "step": 8295 }, { "epoch": 0.17365820983002594, "grad_norm": 0.26795780658721924, "learning_rate": 0.00019836218829635181, "loss": 11.6961, "step": 8296 }, { "epoch": 0.17367914259398812, "grad_norm": 0.22574396431446075, "learning_rate": 0.00019836179308125576, "loss": 11.6808, "step": 8297 }, { "epoch": 0.17370007535795026, "grad_norm": 0.2816677987575531, "learning_rate": 0.0001983613978188752, "loss": 11.6734, "step": 8298 }, { "epoch": 0.1737210081219124, "grad_norm": 0.2630864679813385, "learning_rate": 0.00019836100250921035, "loss": 11.6662, "step": 8299 }, { "epoch": 0.17374194088587458, "grad_norm": 0.22603678703308105, "learning_rate": 0.0001983606071522614, "loss": 11.666, "step": 8300 }, { "epoch": 0.17376287364983672, "grad_norm": 0.2777862250804901, "learning_rate": 0.00019836021174802853, "loss": 11.6861, "step": 8301 }, { "epoch": 0.17378380641379887, "grad_norm": 0.2861955165863037, "learning_rate": 0.00019835981629651191, "loss": 11.6871, "step": 8302 }, { "epoch": 0.17380473917776104, "grad_norm": 0.2730121910572052, "learning_rate": 0.00019835942079771178, "loss": 11.6854, "step": 8303 }, { "epoch": 0.1738256719417232, "grad_norm": 0.2140984684228897, "learning_rate": 0.0001983590252516283, "loss": 11.6568, "step": 8304 }, { "epoch": 0.17384660470568533, "grad_norm": 0.22798104584217072, "learning_rate": 0.0001983586296582617, "loss": 11.6867, "step": 8305 }, { "epoch": 0.1738675374696475, "grad_norm": 0.21682047843933105, "learning_rate": 0.00019835823401761208, "loss": 11.6956, "step": 8306 }, { "epoch": 0.17388847023360965, "grad_norm": 0.25663062930107117, "learning_rate": 0.0001983578383296797, "loss": 11.6855, "step": 8307 }, { "epoch": 0.1739094029975718, "grad_norm": 0.30488744378089905, "learning_rate": 0.00019835744259446477, "loss": 11.6945, "step": 8308 }, { "epoch": 0.17393033576153397, "grad_norm": 0.3166521489620209, "learning_rate": 0.00019835704681196741, "loss": 11.6678, "step": 8309 }, { "epoch": 0.1739512685254961, "grad_norm": 0.27563634514808655, "learning_rate": 0.00019835665098218787, "loss": 11.6911, "step": 8310 }, { "epoch": 0.17397220128945826, "grad_norm": 0.2133299708366394, "learning_rate": 0.0001983562551051263, "loss": 11.68, "step": 8311 }, { "epoch": 0.1739931340534204, "grad_norm": 0.19913579523563385, "learning_rate": 0.0001983558591807829, "loss": 11.6756, "step": 8312 }, { "epoch": 0.17401406681738257, "grad_norm": 0.23899492621421814, "learning_rate": 0.0001983554632091579, "loss": 11.6947, "step": 8313 }, { "epoch": 0.17403499958134472, "grad_norm": 0.25724658370018005, "learning_rate": 0.00019835506719025143, "loss": 11.6634, "step": 8314 }, { "epoch": 0.17405593234530686, "grad_norm": 0.23751337826251984, "learning_rate": 0.00019835467112406372, "loss": 11.6748, "step": 8315 }, { "epoch": 0.17407686510926904, "grad_norm": 0.23793448507785797, "learning_rate": 0.00019835427501059495, "loss": 11.6591, "step": 8316 }, { "epoch": 0.17409779787323118, "grad_norm": 0.33545804023742676, "learning_rate": 0.00019835387884984532, "loss": 11.6808, "step": 8317 }, { "epoch": 0.17411873063719333, "grad_norm": 0.2429250180721283, "learning_rate": 0.00019835348264181496, "loss": 11.6843, "step": 8318 }, { "epoch": 0.1741396634011555, "grad_norm": 0.219519704580307, "learning_rate": 0.00019835308638650414, "loss": 11.6875, "step": 8319 }, { "epoch": 0.17416059616511764, "grad_norm": 0.23399952054023743, "learning_rate": 0.00019835269008391303, "loss": 11.6829, "step": 8320 }, { "epoch": 0.1741815289290798, "grad_norm": 0.30398261547088623, "learning_rate": 0.0001983522937340418, "loss": 11.6855, "step": 8321 }, { "epoch": 0.17420246169304196, "grad_norm": 0.18352793157100677, "learning_rate": 0.00019835189733689065, "loss": 11.684, "step": 8322 }, { "epoch": 0.1742233944570041, "grad_norm": 0.26686954498291016, "learning_rate": 0.0001983515008924598, "loss": 11.6721, "step": 8323 }, { "epoch": 0.17424432722096625, "grad_norm": 0.23787538707256317, "learning_rate": 0.00019835110440074937, "loss": 11.6528, "step": 8324 }, { "epoch": 0.17426525998492842, "grad_norm": 0.31833794713020325, "learning_rate": 0.00019835070786175963, "loss": 11.6903, "step": 8325 }, { "epoch": 0.17428619274889057, "grad_norm": 0.25724905729293823, "learning_rate": 0.0001983503112754907, "loss": 11.6717, "step": 8326 }, { "epoch": 0.1743071255128527, "grad_norm": 0.19581638276576996, "learning_rate": 0.00019834991464194284, "loss": 11.6736, "step": 8327 }, { "epoch": 0.17432805827681486, "grad_norm": 0.264663428068161, "learning_rate": 0.00019834951796111617, "loss": 11.693, "step": 8328 }, { "epoch": 0.17434899104077703, "grad_norm": 0.24435368180274963, "learning_rate": 0.00019834912123301094, "loss": 11.6746, "step": 8329 }, { "epoch": 0.17436992380473917, "grad_norm": 0.2085433155298233, "learning_rate": 0.00019834872445762732, "loss": 11.699, "step": 8330 }, { "epoch": 0.17439085656870132, "grad_norm": 0.23551028966903687, "learning_rate": 0.00019834832763496548, "loss": 11.6982, "step": 8331 }, { "epoch": 0.1744117893326635, "grad_norm": 0.2199925035238266, "learning_rate": 0.00019834793076502566, "loss": 11.6722, "step": 8332 }, { "epoch": 0.17443272209662564, "grad_norm": 0.2832457721233368, "learning_rate": 0.00019834753384780798, "loss": 11.6662, "step": 8333 }, { "epoch": 0.17445365486058778, "grad_norm": 0.25541406869888306, "learning_rate": 0.00019834713688331266, "loss": 11.6757, "step": 8334 }, { "epoch": 0.17447458762454995, "grad_norm": 0.25543373823165894, "learning_rate": 0.00019834673987153994, "loss": 11.6709, "step": 8335 }, { "epoch": 0.1744955203885121, "grad_norm": 0.2481972724199295, "learning_rate": 0.00019834634281248994, "loss": 11.6773, "step": 8336 }, { "epoch": 0.17451645315247424, "grad_norm": 0.20948341488838196, "learning_rate": 0.0001983459457061629, "loss": 11.6715, "step": 8337 }, { "epoch": 0.17453738591643642, "grad_norm": 0.23587994277477264, "learning_rate": 0.00019834554855255898, "loss": 11.6934, "step": 8338 }, { "epoch": 0.17455831868039856, "grad_norm": 0.26463204622268677, "learning_rate": 0.0001983451513516784, "loss": 11.6927, "step": 8339 }, { "epoch": 0.1745792514443607, "grad_norm": 0.2414478063583374, "learning_rate": 0.0001983447541035213, "loss": 11.6806, "step": 8340 }, { "epoch": 0.17460018420832288, "grad_norm": 0.2751566469669342, "learning_rate": 0.00019834435680808797, "loss": 11.6859, "step": 8341 }, { "epoch": 0.17462111697228502, "grad_norm": 0.2208302915096283, "learning_rate": 0.00019834395946537848, "loss": 11.6803, "step": 8342 }, { "epoch": 0.17464204973624717, "grad_norm": 0.2527766227722168, "learning_rate": 0.00019834356207539311, "loss": 11.6617, "step": 8343 }, { "epoch": 0.17466298250020934, "grad_norm": 0.25951728224754333, "learning_rate": 0.000198343164638132, "loss": 11.678, "step": 8344 }, { "epoch": 0.17468391526417149, "grad_norm": 0.2585501968860626, "learning_rate": 0.00019834276715359538, "loss": 11.693, "step": 8345 }, { "epoch": 0.17470484802813363, "grad_norm": 0.1898295134305954, "learning_rate": 0.00019834236962178338, "loss": 11.6781, "step": 8346 }, { "epoch": 0.17472578079209578, "grad_norm": 0.2186599224805832, "learning_rate": 0.00019834197204269627, "loss": 11.6715, "step": 8347 }, { "epoch": 0.17474671355605795, "grad_norm": 0.22248606383800507, "learning_rate": 0.0001983415744163342, "loss": 11.6708, "step": 8348 }, { "epoch": 0.1747676463200201, "grad_norm": 0.28857722878456116, "learning_rate": 0.00019834117674269737, "loss": 11.691, "step": 8349 }, { "epoch": 0.17478857908398224, "grad_norm": 0.22748631238937378, "learning_rate": 0.00019834077902178594, "loss": 11.6797, "step": 8350 }, { "epoch": 0.1748095118479444, "grad_norm": 0.2629973888397217, "learning_rate": 0.00019834038125360014, "loss": 11.6856, "step": 8351 }, { "epoch": 0.17483044461190655, "grad_norm": 0.24650703370571136, "learning_rate": 0.00019833998343814015, "loss": 11.68, "step": 8352 }, { "epoch": 0.1748513773758687, "grad_norm": 0.2704728841781616, "learning_rate": 0.00019833958557540618, "loss": 11.7025, "step": 8353 }, { "epoch": 0.17487231013983087, "grad_norm": 0.23040185868740082, "learning_rate": 0.00019833918766539837, "loss": 11.6716, "step": 8354 }, { "epoch": 0.17489324290379302, "grad_norm": 0.27641579508781433, "learning_rate": 0.00019833878970811696, "loss": 11.6604, "step": 8355 }, { "epoch": 0.17491417566775516, "grad_norm": 0.2534990608692169, "learning_rate": 0.00019833839170356208, "loss": 11.6726, "step": 8356 }, { "epoch": 0.17493510843171733, "grad_norm": 0.24569196999073029, "learning_rate": 0.000198337993651734, "loss": 11.6847, "step": 8357 }, { "epoch": 0.17495604119567948, "grad_norm": 0.2354319840669632, "learning_rate": 0.00019833759555263288, "loss": 11.6691, "step": 8358 }, { "epoch": 0.17497697395964162, "grad_norm": 0.21588970720767975, "learning_rate": 0.00019833719740625888, "loss": 11.672, "step": 8359 }, { "epoch": 0.1749979067236038, "grad_norm": 0.24595439434051514, "learning_rate": 0.00019833679921261226, "loss": 11.6937, "step": 8360 }, { "epoch": 0.17501883948756594, "grad_norm": 0.2209121435880661, "learning_rate": 0.00019833640097169315, "loss": 11.6817, "step": 8361 }, { "epoch": 0.1750397722515281, "grad_norm": 0.20703807473182678, "learning_rate": 0.00019833600268350177, "loss": 11.6787, "step": 8362 }, { "epoch": 0.17506070501549026, "grad_norm": 0.27876514196395874, "learning_rate": 0.00019833560434803825, "loss": 11.6847, "step": 8363 }, { "epoch": 0.1750816377794524, "grad_norm": 0.27732977271080017, "learning_rate": 0.0001983352059653029, "loss": 11.6919, "step": 8364 }, { "epoch": 0.17510257054341455, "grad_norm": 0.27009305357933044, "learning_rate": 0.00019833480753529584, "loss": 11.699, "step": 8365 }, { "epoch": 0.1751235033073767, "grad_norm": 0.267108678817749, "learning_rate": 0.00019833440905801724, "loss": 11.667, "step": 8366 }, { "epoch": 0.17514443607133887, "grad_norm": 0.25909411907196045, "learning_rate": 0.00019833401053346732, "loss": 11.6742, "step": 8367 }, { "epoch": 0.175165368835301, "grad_norm": 0.21849635243415833, "learning_rate": 0.0001983336119616463, "loss": 11.6761, "step": 8368 }, { "epoch": 0.17518630159926316, "grad_norm": 0.2714446187019348, "learning_rate": 0.00019833321334255432, "loss": 11.6882, "step": 8369 }, { "epoch": 0.17520723436322533, "grad_norm": 0.224824920296669, "learning_rate": 0.0001983328146761916, "loss": 11.6786, "step": 8370 }, { "epoch": 0.17522816712718747, "grad_norm": 0.20933786034584045, "learning_rate": 0.00019833241596255833, "loss": 11.6798, "step": 8371 }, { "epoch": 0.17524909989114962, "grad_norm": 0.2803569734096527, "learning_rate": 0.00019833201720165466, "loss": 11.6699, "step": 8372 }, { "epoch": 0.1752700326551118, "grad_norm": 0.22757813334465027, "learning_rate": 0.00019833161839348084, "loss": 11.6797, "step": 8373 }, { "epoch": 0.17529096541907394, "grad_norm": 0.2585286796092987, "learning_rate": 0.00019833121953803705, "loss": 11.6845, "step": 8374 }, { "epoch": 0.17531189818303608, "grad_norm": 0.25664451718330383, "learning_rate": 0.00019833082063532346, "loss": 11.679, "step": 8375 }, { "epoch": 0.17533283094699825, "grad_norm": 0.2508975863456726, "learning_rate": 0.00019833042168534027, "loss": 11.6736, "step": 8376 }, { "epoch": 0.1753537637109604, "grad_norm": 0.19649656116962433, "learning_rate": 0.00019833002268808772, "loss": 11.6677, "step": 8377 }, { "epoch": 0.17537469647492254, "grad_norm": 0.28680264949798584, "learning_rate": 0.00019832962364356593, "loss": 11.6948, "step": 8378 }, { "epoch": 0.17539562923888471, "grad_norm": 0.28567034006118774, "learning_rate": 0.00019832922455177511, "loss": 11.6784, "step": 8379 }, { "epoch": 0.17541656200284686, "grad_norm": 0.2592819035053253, "learning_rate": 0.00019832882541271545, "loss": 11.6779, "step": 8380 }, { "epoch": 0.175437494766809, "grad_norm": 0.25163981318473816, "learning_rate": 0.0001983284262263872, "loss": 11.6677, "step": 8381 }, { "epoch": 0.17545842753077115, "grad_norm": 0.23132765293121338, "learning_rate": 0.00019832802699279047, "loss": 11.675, "step": 8382 }, { "epoch": 0.17547936029473332, "grad_norm": 0.2720871567726135, "learning_rate": 0.00019832762771192548, "loss": 11.6779, "step": 8383 }, { "epoch": 0.17550029305869547, "grad_norm": 0.2958238422870636, "learning_rate": 0.00019832722838379244, "loss": 11.6521, "step": 8384 }, { "epoch": 0.1755212258226576, "grad_norm": 0.28722870349884033, "learning_rate": 0.00019832682900839153, "loss": 11.6801, "step": 8385 }, { "epoch": 0.17554215858661978, "grad_norm": 0.21748508512973785, "learning_rate": 0.00019832642958572295, "loss": 11.6655, "step": 8386 }, { "epoch": 0.17556309135058193, "grad_norm": 0.2480606883764267, "learning_rate": 0.00019832603011578687, "loss": 11.6869, "step": 8387 }, { "epoch": 0.17558402411454407, "grad_norm": 0.35335588455200195, "learning_rate": 0.0001983256305985835, "loss": 11.6706, "step": 8388 }, { "epoch": 0.17560495687850625, "grad_norm": 0.35933825373649597, "learning_rate": 0.00019832523103411305, "loss": 11.6916, "step": 8389 }, { "epoch": 0.1756258896424684, "grad_norm": 0.28888460993766785, "learning_rate": 0.00019832483142237566, "loss": 11.6825, "step": 8390 }, { "epoch": 0.17564682240643054, "grad_norm": 0.21915879845619202, "learning_rate": 0.00019832443176337158, "loss": 11.6717, "step": 8391 }, { "epoch": 0.1756677551703927, "grad_norm": 0.28228598833084106, "learning_rate": 0.00019832403205710097, "loss": 11.6768, "step": 8392 }, { "epoch": 0.17568868793435485, "grad_norm": 0.22341178357601166, "learning_rate": 0.000198323632303564, "loss": 11.6805, "step": 8393 }, { "epoch": 0.175709620698317, "grad_norm": 0.2705223262310028, "learning_rate": 0.00019832323250276092, "loss": 11.6592, "step": 8394 }, { "epoch": 0.17573055346227917, "grad_norm": 0.2874871492385864, "learning_rate": 0.0001983228326546919, "loss": 11.6894, "step": 8395 }, { "epoch": 0.17575148622624132, "grad_norm": 0.249452605843544, "learning_rate": 0.0001983224327593571, "loss": 11.6832, "step": 8396 }, { "epoch": 0.17577241899020346, "grad_norm": 0.23030491173267365, "learning_rate": 0.0001983220328167567, "loss": 11.6775, "step": 8397 }, { "epoch": 0.17579335175416563, "grad_norm": 0.23305103182792664, "learning_rate": 0.00019832163282689101, "loss": 11.6866, "step": 8398 }, { "epoch": 0.17581428451812778, "grad_norm": 0.21163497865200043, "learning_rate": 0.00019832123278976013, "loss": 11.6751, "step": 8399 }, { "epoch": 0.17583521728208992, "grad_norm": 0.2628572881221771, "learning_rate": 0.0001983208327053642, "loss": 11.6767, "step": 8400 }, { "epoch": 0.17585615004605207, "grad_norm": 0.24392300844192505, "learning_rate": 0.00019832043257370349, "loss": 11.6866, "step": 8401 }, { "epoch": 0.17587708281001424, "grad_norm": 0.27184146642684937, "learning_rate": 0.0001983200323947782, "loss": 11.6774, "step": 8402 }, { "epoch": 0.17589801557397639, "grad_norm": 0.292420893907547, "learning_rate": 0.0001983196321685885, "loss": 11.6646, "step": 8403 }, { "epoch": 0.17591894833793853, "grad_norm": 0.2532760798931122, "learning_rate": 0.0001983192318951346, "loss": 11.6868, "step": 8404 }, { "epoch": 0.1759398811019007, "grad_norm": 0.19780735671520233, "learning_rate": 0.00019831883157441667, "loss": 11.6804, "step": 8405 }, { "epoch": 0.17596081386586285, "grad_norm": 0.2698502540588379, "learning_rate": 0.00019831843120643488, "loss": 11.681, "step": 8406 }, { "epoch": 0.175981746629825, "grad_norm": 0.2085513025522232, "learning_rate": 0.00019831803079118946, "loss": 11.6798, "step": 8407 }, { "epoch": 0.17600267939378716, "grad_norm": 0.23098911345005035, "learning_rate": 0.00019831763032868057, "loss": 11.6745, "step": 8408 }, { "epoch": 0.1760236121577493, "grad_norm": 0.255403071641922, "learning_rate": 0.00019831722981890847, "loss": 11.6915, "step": 8409 }, { "epoch": 0.17604454492171145, "grad_norm": 0.299940824508667, "learning_rate": 0.00019831682926187328, "loss": 11.6948, "step": 8410 }, { "epoch": 0.17606547768567363, "grad_norm": 0.26362520456314087, "learning_rate": 0.00019831642865757526, "loss": 11.6813, "step": 8411 }, { "epoch": 0.17608641044963577, "grad_norm": 0.3300999402999878, "learning_rate": 0.00019831602800601453, "loss": 11.6885, "step": 8412 }, { "epoch": 0.17610734321359792, "grad_norm": 0.24177135527133942, "learning_rate": 0.0001983156273071913, "loss": 11.674, "step": 8413 }, { "epoch": 0.1761282759775601, "grad_norm": 0.21372705698013306, "learning_rate": 0.00019831522656110577, "loss": 11.6662, "step": 8414 }, { "epoch": 0.17614920874152223, "grad_norm": 0.2734915018081665, "learning_rate": 0.00019831482576775818, "loss": 11.6769, "step": 8415 }, { "epoch": 0.17617014150548438, "grad_norm": 0.24671821296215057, "learning_rate": 0.00019831442492714864, "loss": 11.6951, "step": 8416 }, { "epoch": 0.17619107426944655, "grad_norm": 0.2605449855327606, "learning_rate": 0.00019831402403927743, "loss": 11.6841, "step": 8417 }, { "epoch": 0.1762120070334087, "grad_norm": 0.22961093485355377, "learning_rate": 0.00019831362310414468, "loss": 11.6571, "step": 8418 }, { "epoch": 0.17623293979737084, "grad_norm": 0.25969138741493225, "learning_rate": 0.0001983132221217506, "loss": 11.6727, "step": 8419 }, { "epoch": 0.17625387256133299, "grad_norm": 0.3631923794746399, "learning_rate": 0.00019831282109209538, "loss": 11.6907, "step": 8420 }, { "epoch": 0.17627480532529516, "grad_norm": 0.22551970183849335, "learning_rate": 0.00019831242001517923, "loss": 11.6842, "step": 8421 }, { "epoch": 0.1762957380892573, "grad_norm": 0.190690279006958, "learning_rate": 0.00019831201889100234, "loss": 11.6824, "step": 8422 }, { "epoch": 0.17631667085321945, "grad_norm": 0.2134455293416977, "learning_rate": 0.00019831161771956482, "loss": 11.6734, "step": 8423 }, { "epoch": 0.17633760361718162, "grad_norm": 0.27065473794937134, "learning_rate": 0.00019831121650086702, "loss": 11.6843, "step": 8424 }, { "epoch": 0.17635853638114377, "grad_norm": 0.23527391254901886, "learning_rate": 0.000198310815234909, "loss": 11.6824, "step": 8425 }, { "epoch": 0.1763794691451059, "grad_norm": 0.22959841787815094, "learning_rate": 0.00019831041392169102, "loss": 11.6844, "step": 8426 }, { "epoch": 0.17640040190906808, "grad_norm": 0.2633744180202484, "learning_rate": 0.00019831001256121326, "loss": 11.6766, "step": 8427 }, { "epoch": 0.17642133467303023, "grad_norm": 0.2591093182563782, "learning_rate": 0.0001983096111534759, "loss": 11.6807, "step": 8428 }, { "epoch": 0.17644226743699237, "grad_norm": 0.2544228136539459, "learning_rate": 0.00019830920969847915, "loss": 11.6705, "step": 8429 }, { "epoch": 0.17646320020095455, "grad_norm": 0.33332571387290955, "learning_rate": 0.00019830880819622317, "loss": 11.6879, "step": 8430 }, { "epoch": 0.1764841329649167, "grad_norm": 0.25578778982162476, "learning_rate": 0.00019830840664670818, "loss": 11.671, "step": 8431 }, { "epoch": 0.17650506572887883, "grad_norm": 0.2285173535346985, "learning_rate": 0.00019830800504993436, "loss": 11.6894, "step": 8432 }, { "epoch": 0.176525998492841, "grad_norm": 0.2266206443309784, "learning_rate": 0.00019830760340590193, "loss": 11.68, "step": 8433 }, { "epoch": 0.17654693125680315, "grad_norm": 0.2586141526699066, "learning_rate": 0.00019830720171461108, "loss": 11.6674, "step": 8434 }, { "epoch": 0.1765678640207653, "grad_norm": 0.27231982350349426, "learning_rate": 0.00019830679997606196, "loss": 11.6844, "step": 8435 }, { "epoch": 0.17658879678472744, "grad_norm": 0.2616093158721924, "learning_rate": 0.0001983063981902548, "loss": 11.6907, "step": 8436 }, { "epoch": 0.17660972954868961, "grad_norm": 0.2694806456565857, "learning_rate": 0.00019830599635718978, "loss": 11.6705, "step": 8437 }, { "epoch": 0.17663066231265176, "grad_norm": 0.28369343280792236, "learning_rate": 0.0001983055944768671, "loss": 11.69, "step": 8438 }, { "epoch": 0.1766515950766139, "grad_norm": 0.23399102687835693, "learning_rate": 0.00019830519254928697, "loss": 11.6797, "step": 8439 }, { "epoch": 0.17667252784057608, "grad_norm": 0.24049031734466553, "learning_rate": 0.00019830479057444955, "loss": 11.6756, "step": 8440 }, { "epoch": 0.17669346060453822, "grad_norm": 0.2933846712112427, "learning_rate": 0.00019830438855235504, "loss": 11.6721, "step": 8441 }, { "epoch": 0.17671439336850037, "grad_norm": 0.23318956792354584, "learning_rate": 0.00019830398648300364, "loss": 11.6687, "step": 8442 }, { "epoch": 0.17673532613246254, "grad_norm": 0.2384999692440033, "learning_rate": 0.00019830358436639554, "loss": 11.6792, "step": 8443 }, { "epoch": 0.17675625889642468, "grad_norm": 0.21255259215831757, "learning_rate": 0.00019830318220253094, "loss": 11.6626, "step": 8444 }, { "epoch": 0.17677719166038683, "grad_norm": 0.2339749038219452, "learning_rate": 0.00019830277999141006, "loss": 11.6574, "step": 8445 }, { "epoch": 0.176798124424349, "grad_norm": 0.2238558828830719, "learning_rate": 0.00019830237773303303, "loss": 11.6731, "step": 8446 }, { "epoch": 0.17681905718831115, "grad_norm": 0.2090713083744049, "learning_rate": 0.00019830197542740008, "loss": 11.6914, "step": 8447 }, { "epoch": 0.1768399899522733, "grad_norm": 0.19152525067329407, "learning_rate": 0.0001983015730745114, "loss": 11.683, "step": 8448 }, { "epoch": 0.17686092271623546, "grad_norm": 0.28417283296585083, "learning_rate": 0.0001983011706743672, "loss": 11.6769, "step": 8449 }, { "epoch": 0.1768818554801976, "grad_norm": 0.22247929871082306, "learning_rate": 0.00019830076822696766, "loss": 11.6645, "step": 8450 }, { "epoch": 0.17690278824415975, "grad_norm": 0.22302520275115967, "learning_rate": 0.00019830036573231295, "loss": 11.6796, "step": 8451 }, { "epoch": 0.17692372100812193, "grad_norm": 0.2918960452079773, "learning_rate": 0.00019829996319040328, "loss": 11.6822, "step": 8452 }, { "epoch": 0.17694465377208407, "grad_norm": 0.23750589787960052, "learning_rate": 0.00019829956060123886, "loss": 11.6771, "step": 8453 }, { "epoch": 0.17696558653604622, "grad_norm": 0.21645523607730865, "learning_rate": 0.0001982991579648199, "loss": 11.6721, "step": 8454 }, { "epoch": 0.17698651930000836, "grad_norm": 0.21956981718540192, "learning_rate": 0.0001982987552811465, "loss": 11.6649, "step": 8455 }, { "epoch": 0.17700745206397053, "grad_norm": 0.22490568459033966, "learning_rate": 0.000198298352550219, "loss": 11.6838, "step": 8456 }, { "epoch": 0.17702838482793268, "grad_norm": 0.3003464639186859, "learning_rate": 0.00019829794977203744, "loss": 11.6855, "step": 8457 }, { "epoch": 0.17704931759189482, "grad_norm": 0.262704461812973, "learning_rate": 0.0001982975469466021, "loss": 11.6783, "step": 8458 }, { "epoch": 0.177070250355857, "grad_norm": 0.25813043117523193, "learning_rate": 0.00019829714407391318, "loss": 11.6823, "step": 8459 }, { "epoch": 0.17709118311981914, "grad_norm": 0.2820969820022583, "learning_rate": 0.00019829674115397086, "loss": 11.6807, "step": 8460 }, { "epoch": 0.17711211588378128, "grad_norm": 0.26739823818206787, "learning_rate": 0.00019829633818677528, "loss": 11.6835, "step": 8461 }, { "epoch": 0.17713304864774346, "grad_norm": 0.3242688477039337, "learning_rate": 0.00019829593517232673, "loss": 11.6842, "step": 8462 }, { "epoch": 0.1771539814117056, "grad_norm": 0.2155395895242691, "learning_rate": 0.00019829553211062534, "loss": 11.658, "step": 8463 }, { "epoch": 0.17717491417566775, "grad_norm": 0.22116847336292267, "learning_rate": 0.00019829512900167132, "loss": 11.6827, "step": 8464 }, { "epoch": 0.17719584693962992, "grad_norm": 0.24787984788417816, "learning_rate": 0.00019829472584546485, "loss": 11.6785, "step": 8465 }, { "epoch": 0.17721677970359206, "grad_norm": 0.3707813620567322, "learning_rate": 0.00019829432264200614, "loss": 11.6798, "step": 8466 }, { "epoch": 0.1772377124675542, "grad_norm": 0.30221354961395264, "learning_rate": 0.00019829391939129538, "loss": 11.6754, "step": 8467 }, { "epoch": 0.17725864523151638, "grad_norm": 0.2881658673286438, "learning_rate": 0.00019829351609333275, "loss": 11.6575, "step": 8468 }, { "epoch": 0.17727957799547853, "grad_norm": 0.22116610407829285, "learning_rate": 0.00019829311274811848, "loss": 11.6887, "step": 8469 }, { "epoch": 0.17730051075944067, "grad_norm": 0.2713099420070648, "learning_rate": 0.00019829270935565273, "loss": 11.6808, "step": 8470 }, { "epoch": 0.17732144352340282, "grad_norm": 0.27009060978889465, "learning_rate": 0.0001982923059159357, "loss": 11.6673, "step": 8471 }, { "epoch": 0.177342376287365, "grad_norm": 0.28069213032722473, "learning_rate": 0.00019829190242896762, "loss": 11.6699, "step": 8472 }, { "epoch": 0.17736330905132713, "grad_norm": 0.22608743607997894, "learning_rate": 0.0001982914988947486, "loss": 11.6775, "step": 8473 }, { "epoch": 0.17738424181528928, "grad_norm": 0.19438667595386505, "learning_rate": 0.0001982910953132789, "loss": 11.676, "step": 8474 }, { "epoch": 0.17740517457925145, "grad_norm": 0.22603930532932281, "learning_rate": 0.00019829069168455875, "loss": 11.6806, "step": 8475 }, { "epoch": 0.1774261073432136, "grad_norm": 0.27390944957733154, "learning_rate": 0.00019829028800858825, "loss": 11.6706, "step": 8476 }, { "epoch": 0.17744704010717574, "grad_norm": 0.23703616857528687, "learning_rate": 0.00019828988428536763, "loss": 11.6872, "step": 8477 }, { "epoch": 0.1774679728711379, "grad_norm": 0.26798543334007263, "learning_rate": 0.00019828948051489712, "loss": 11.6696, "step": 8478 }, { "epoch": 0.17748890563510006, "grad_norm": 0.25386330485343933, "learning_rate": 0.00019828907669717686, "loss": 11.6822, "step": 8479 }, { "epoch": 0.1775098383990622, "grad_norm": 0.29538506269454956, "learning_rate": 0.0001982886728322071, "loss": 11.6739, "step": 8480 }, { "epoch": 0.17753077116302438, "grad_norm": 0.262629896402359, "learning_rate": 0.000198288268919988, "loss": 11.6892, "step": 8481 }, { "epoch": 0.17755170392698652, "grad_norm": 0.21310360729694366, "learning_rate": 0.00019828786496051977, "loss": 11.6735, "step": 8482 }, { "epoch": 0.17757263669094867, "grad_norm": 0.25993701815605164, "learning_rate": 0.00019828746095380256, "loss": 11.6907, "step": 8483 }, { "epoch": 0.17759356945491084, "grad_norm": 0.22209432721138, "learning_rate": 0.00019828705689983663, "loss": 11.6895, "step": 8484 }, { "epoch": 0.17761450221887298, "grad_norm": 0.23705808818340302, "learning_rate": 0.00019828665279862212, "loss": 11.6907, "step": 8485 }, { "epoch": 0.17763543498283513, "grad_norm": 0.2719249725341797, "learning_rate": 0.00019828624865015924, "loss": 11.6751, "step": 8486 }, { "epoch": 0.1776563677467973, "grad_norm": 0.225142702460289, "learning_rate": 0.0001982858444544482, "loss": 11.6864, "step": 8487 }, { "epoch": 0.17767730051075944, "grad_norm": 0.253292053937912, "learning_rate": 0.0001982854402114892, "loss": 11.6808, "step": 8488 }, { "epoch": 0.1776982332747216, "grad_norm": 0.20485492050647736, "learning_rate": 0.0001982850359212824, "loss": 11.6667, "step": 8489 }, { "epoch": 0.17771916603868373, "grad_norm": 0.2172737419605255, "learning_rate": 0.00019828463158382803, "loss": 11.6776, "step": 8490 }, { "epoch": 0.1777400988026459, "grad_norm": 0.2578539252281189, "learning_rate": 0.00019828422719912628, "loss": 11.6547, "step": 8491 }, { "epoch": 0.17776103156660805, "grad_norm": 0.2571409344673157, "learning_rate": 0.0001982838227671773, "loss": 11.6799, "step": 8492 }, { "epoch": 0.1777819643305702, "grad_norm": 0.3010266423225403, "learning_rate": 0.00019828341828798132, "loss": 11.6844, "step": 8493 }, { "epoch": 0.17780289709453237, "grad_norm": 0.28707656264305115, "learning_rate": 0.00019828301376153854, "loss": 11.6787, "step": 8494 }, { "epoch": 0.17782382985849451, "grad_norm": 0.20670199394226074, "learning_rate": 0.00019828260918784913, "loss": 11.6745, "step": 8495 }, { "epoch": 0.17784476262245666, "grad_norm": 0.21183368563652039, "learning_rate": 0.00019828220456691333, "loss": 11.6895, "step": 8496 }, { "epoch": 0.17786569538641883, "grad_norm": 0.27548345923423767, "learning_rate": 0.00019828179989873128, "loss": 11.6922, "step": 8497 }, { "epoch": 0.17788662815038098, "grad_norm": 0.25682002305984497, "learning_rate": 0.00019828139518330323, "loss": 11.6683, "step": 8498 }, { "epoch": 0.17790756091434312, "grad_norm": 0.22357642650604248, "learning_rate": 0.0001982809904206293, "loss": 11.6743, "step": 8499 }, { "epoch": 0.1779284936783053, "grad_norm": 0.22089120745658875, "learning_rate": 0.00019828058561070978, "loss": 11.6661, "step": 8500 }, { "epoch": 0.17794942644226744, "grad_norm": 0.21188585460186005, "learning_rate": 0.0001982801807535448, "loss": 11.6899, "step": 8501 }, { "epoch": 0.17797035920622958, "grad_norm": 0.25684112310409546, "learning_rate": 0.00019827977584913452, "loss": 11.6589, "step": 8502 }, { "epoch": 0.17799129197019176, "grad_norm": 0.2409425973892212, "learning_rate": 0.00019827937089747922, "loss": 11.6862, "step": 8503 }, { "epoch": 0.1780122247341539, "grad_norm": 0.31623440980911255, "learning_rate": 0.00019827896589857903, "loss": 11.6833, "step": 8504 }, { "epoch": 0.17803315749811605, "grad_norm": 0.20534059405326843, "learning_rate": 0.0001982785608524342, "loss": 11.6748, "step": 8505 }, { "epoch": 0.17805409026207822, "grad_norm": 0.27910730242729187, "learning_rate": 0.0001982781557590449, "loss": 11.6744, "step": 8506 }, { "epoch": 0.17807502302604036, "grad_norm": 0.26902541518211365, "learning_rate": 0.0001982777506184113, "loss": 11.6859, "step": 8507 }, { "epoch": 0.1780959557900025, "grad_norm": 0.316837877035141, "learning_rate": 0.00019827734543053364, "loss": 11.6765, "step": 8508 }, { "epoch": 0.17811688855396465, "grad_norm": 0.20099693536758423, "learning_rate": 0.00019827694019541208, "loss": 11.6885, "step": 8509 }, { "epoch": 0.17813782131792683, "grad_norm": 0.22556789219379425, "learning_rate": 0.00019827653491304682, "loss": 11.6717, "step": 8510 }, { "epoch": 0.17815875408188897, "grad_norm": 0.2542853355407715, "learning_rate": 0.00019827612958343805, "loss": 11.6673, "step": 8511 }, { "epoch": 0.17817968684585112, "grad_norm": 0.3387241065502167, "learning_rate": 0.000198275724206586, "loss": 11.6792, "step": 8512 }, { "epoch": 0.1782006196098133, "grad_norm": 0.2014634758234024, "learning_rate": 0.00019827531878249084, "loss": 11.6677, "step": 8513 }, { "epoch": 0.17822155237377543, "grad_norm": 0.35730451345443726, "learning_rate": 0.00019827491331115275, "loss": 11.6647, "step": 8514 }, { "epoch": 0.17824248513773758, "grad_norm": 0.23539800941944122, "learning_rate": 0.00019827450779257193, "loss": 11.6811, "step": 8515 }, { "epoch": 0.17826341790169975, "grad_norm": 0.25430548191070557, "learning_rate": 0.0001982741022267486, "loss": 11.691, "step": 8516 }, { "epoch": 0.1782843506656619, "grad_norm": 0.38667061924934387, "learning_rate": 0.00019827369661368295, "loss": 11.6818, "step": 8517 }, { "epoch": 0.17830528342962404, "grad_norm": 0.1983060985803604, "learning_rate": 0.0001982732909533751, "loss": 11.6692, "step": 8518 }, { "epoch": 0.1783262161935862, "grad_norm": 0.23547376692295074, "learning_rate": 0.0001982728852458254, "loss": 11.6909, "step": 8519 }, { "epoch": 0.17834714895754836, "grad_norm": 0.22953487932682037, "learning_rate": 0.00019827247949103392, "loss": 11.6835, "step": 8520 }, { "epoch": 0.1783680817215105, "grad_norm": 0.28492921590805054, "learning_rate": 0.00019827207368900088, "loss": 11.6918, "step": 8521 }, { "epoch": 0.17838901448547267, "grad_norm": 0.2553466558456421, "learning_rate": 0.00019827166783972647, "loss": 11.6825, "step": 8522 }, { "epoch": 0.17840994724943482, "grad_norm": 0.22437989711761475, "learning_rate": 0.0001982712619432109, "loss": 11.6762, "step": 8523 }, { "epoch": 0.17843088001339696, "grad_norm": 0.26118049025535583, "learning_rate": 0.0001982708559994544, "loss": 11.675, "step": 8524 }, { "epoch": 0.1784518127773591, "grad_norm": 0.2663240432739258, "learning_rate": 0.00019827045000845713, "loss": 11.6798, "step": 8525 }, { "epoch": 0.17847274554132128, "grad_norm": 0.3466932475566864, "learning_rate": 0.00019827004397021924, "loss": 11.6848, "step": 8526 }, { "epoch": 0.17849367830528343, "grad_norm": 0.29019656777381897, "learning_rate": 0.000198269637884741, "loss": 11.7009, "step": 8527 }, { "epoch": 0.17851461106924557, "grad_norm": 0.27925628423690796, "learning_rate": 0.0001982692317520226, "loss": 11.6739, "step": 8528 }, { "epoch": 0.17853554383320774, "grad_norm": 0.2245539426803589, "learning_rate": 0.00019826882557206417, "loss": 11.6912, "step": 8529 }, { "epoch": 0.1785564765971699, "grad_norm": 0.24130094051361084, "learning_rate": 0.00019826841934486597, "loss": 11.6742, "step": 8530 }, { "epoch": 0.17857740936113203, "grad_norm": 0.2455570548772812, "learning_rate": 0.00019826801307042818, "loss": 11.6681, "step": 8531 }, { "epoch": 0.1785983421250942, "grad_norm": 0.24598106741905212, "learning_rate": 0.00019826760674875095, "loss": 11.683, "step": 8532 }, { "epoch": 0.17861927488905635, "grad_norm": 0.2145897001028061, "learning_rate": 0.00019826720037983451, "loss": 11.667, "step": 8533 }, { "epoch": 0.1786402076530185, "grad_norm": 0.27341121435165405, "learning_rate": 0.00019826679396367912, "loss": 11.6862, "step": 8534 }, { "epoch": 0.17866114041698067, "grad_norm": 0.24981637299060822, "learning_rate": 0.00019826638750028485, "loss": 11.6853, "step": 8535 }, { "epoch": 0.1786820731809428, "grad_norm": 0.2744219899177551, "learning_rate": 0.00019826598098965202, "loss": 11.6865, "step": 8536 }, { "epoch": 0.17870300594490496, "grad_norm": 0.23199373483657837, "learning_rate": 0.0001982655744317807, "loss": 11.6924, "step": 8537 }, { "epoch": 0.17872393870886713, "grad_norm": 0.2642698287963867, "learning_rate": 0.0001982651678266712, "loss": 11.6674, "step": 8538 }, { "epoch": 0.17874487147282928, "grad_norm": 0.22539900243282318, "learning_rate": 0.00019826476117432363, "loss": 11.6807, "step": 8539 }, { "epoch": 0.17876580423679142, "grad_norm": 0.2147969901561737, "learning_rate": 0.00019826435447473822, "loss": 11.6822, "step": 8540 }, { "epoch": 0.1787867370007536, "grad_norm": 0.25613534450531006, "learning_rate": 0.00019826394772791517, "loss": 11.6697, "step": 8541 }, { "epoch": 0.17880766976471574, "grad_norm": 0.2026541829109192, "learning_rate": 0.00019826354093385468, "loss": 11.69, "step": 8542 }, { "epoch": 0.17882860252867788, "grad_norm": 0.2500722408294678, "learning_rate": 0.00019826313409255692, "loss": 11.6842, "step": 8543 }, { "epoch": 0.17884953529264003, "grad_norm": 0.27359700202941895, "learning_rate": 0.00019826272720402212, "loss": 11.6836, "step": 8544 }, { "epoch": 0.1788704680566022, "grad_norm": 0.237052783370018, "learning_rate": 0.00019826232026825044, "loss": 11.6681, "step": 8545 }, { "epoch": 0.17889140082056434, "grad_norm": 0.2815359830856323, "learning_rate": 0.00019826191328524214, "loss": 11.6844, "step": 8546 }, { "epoch": 0.1789123335845265, "grad_norm": 2.726036310195923, "learning_rate": 0.00019826150625499732, "loss": 11.7471, "step": 8547 }, { "epoch": 0.17893326634848866, "grad_norm": 0.19493314623832703, "learning_rate": 0.00019826109917751622, "loss": 11.6776, "step": 8548 }, { "epoch": 0.1789541991124508, "grad_norm": 0.25921809673309326, "learning_rate": 0.0001982606920527991, "loss": 11.685, "step": 8549 }, { "epoch": 0.17897513187641295, "grad_norm": 0.25552845001220703, "learning_rate": 0.00019826028488084603, "loss": 11.6783, "step": 8550 }, { "epoch": 0.17899606464037512, "grad_norm": 0.2468337118625641, "learning_rate": 0.0001982598776616573, "loss": 11.6918, "step": 8551 }, { "epoch": 0.17901699740433727, "grad_norm": 0.30020400881767273, "learning_rate": 0.0001982594703952331, "loss": 11.6939, "step": 8552 }, { "epoch": 0.17903793016829941, "grad_norm": 0.2606404423713684, "learning_rate": 0.00019825906308157355, "loss": 11.6804, "step": 8553 }, { "epoch": 0.1790588629322616, "grad_norm": 0.2546684741973877, "learning_rate": 0.0001982586557206789, "loss": 11.6765, "step": 8554 }, { "epoch": 0.17907979569622373, "grad_norm": 0.22075961530208588, "learning_rate": 0.00019825824831254943, "loss": 11.6606, "step": 8555 }, { "epoch": 0.17910072846018588, "grad_norm": 0.2933824956417084, "learning_rate": 0.00019825784085718517, "loss": 11.664, "step": 8556 }, { "epoch": 0.17912166122414805, "grad_norm": 0.22026589512825012, "learning_rate": 0.00019825743335458645, "loss": 11.6587, "step": 8557 }, { "epoch": 0.1791425939881102, "grad_norm": 0.25855982303619385, "learning_rate": 0.00019825702580475335, "loss": 11.6763, "step": 8558 }, { "epoch": 0.17916352675207234, "grad_norm": 0.21384654939174652, "learning_rate": 0.00019825661820768617, "loss": 11.679, "step": 8559 }, { "epoch": 0.1791844595160345, "grad_norm": 0.18341392278671265, "learning_rate": 0.00019825621056338507, "loss": 11.6744, "step": 8560 }, { "epoch": 0.17920539227999666, "grad_norm": 0.27473175525665283, "learning_rate": 0.0001982558028718502, "loss": 11.6744, "step": 8561 }, { "epoch": 0.1792263250439588, "grad_norm": 0.21358083188533783, "learning_rate": 0.00019825539513308184, "loss": 11.6751, "step": 8562 }, { "epoch": 0.17924725780792095, "grad_norm": 0.21368692815303802, "learning_rate": 0.00019825498734708013, "loss": 11.6745, "step": 8563 }, { "epoch": 0.17926819057188312, "grad_norm": 0.20669981837272644, "learning_rate": 0.00019825457951384528, "loss": 11.6708, "step": 8564 }, { "epoch": 0.17928912333584526, "grad_norm": 0.22174926102161407, "learning_rate": 0.00019825417163337746, "loss": 11.6655, "step": 8565 }, { "epoch": 0.1793100560998074, "grad_norm": 0.24687263369560242, "learning_rate": 0.00019825376370567692, "loss": 11.6794, "step": 8566 }, { "epoch": 0.17933098886376958, "grad_norm": 0.25922828912734985, "learning_rate": 0.0001982533557307438, "loss": 11.6894, "step": 8567 }, { "epoch": 0.17935192162773173, "grad_norm": 0.24044640362262726, "learning_rate": 0.00019825294770857836, "loss": 11.6906, "step": 8568 }, { "epoch": 0.17937285439169387, "grad_norm": 0.25538450479507446, "learning_rate": 0.00019825253963918072, "loss": 11.6917, "step": 8569 }, { "epoch": 0.17939378715565604, "grad_norm": 0.2992608845233917, "learning_rate": 0.00019825213152255115, "loss": 11.6785, "step": 8570 }, { "epoch": 0.1794147199196182, "grad_norm": 0.24521708488464355, "learning_rate": 0.0001982517233586898, "loss": 11.6763, "step": 8571 }, { "epoch": 0.17943565268358033, "grad_norm": 0.2266933172941208, "learning_rate": 0.00019825131514759689, "loss": 11.6753, "step": 8572 }, { "epoch": 0.1794565854475425, "grad_norm": 0.21965011954307556, "learning_rate": 0.00019825090688927258, "loss": 11.6633, "step": 8573 }, { "epoch": 0.17947751821150465, "grad_norm": 0.3526890277862549, "learning_rate": 0.00019825049858371706, "loss": 11.6738, "step": 8574 }, { "epoch": 0.1794984509754668, "grad_norm": 0.22668768465518951, "learning_rate": 0.00019825009023093063, "loss": 11.6803, "step": 8575 }, { "epoch": 0.17951938373942897, "grad_norm": 0.2231176197528839, "learning_rate": 0.00019824968183091338, "loss": 11.6875, "step": 8576 }, { "epoch": 0.1795403165033911, "grad_norm": 0.1983148753643036, "learning_rate": 0.0001982492733836655, "loss": 11.6686, "step": 8577 }, { "epoch": 0.17956124926735326, "grad_norm": 0.2359818071126938, "learning_rate": 0.00019824886488918728, "loss": 11.6884, "step": 8578 }, { "epoch": 0.1795821820313154, "grad_norm": 0.2525149881839752, "learning_rate": 0.00019824845634747884, "loss": 11.6834, "step": 8579 }, { "epoch": 0.17960311479527757, "grad_norm": 0.2133374959230423, "learning_rate": 0.0001982480477585404, "loss": 11.6669, "step": 8580 }, { "epoch": 0.17962404755923972, "grad_norm": 0.2633585035800934, "learning_rate": 0.00019824763912237216, "loss": 11.6744, "step": 8581 }, { "epoch": 0.17964498032320186, "grad_norm": 0.2958463430404663, "learning_rate": 0.0001982472304389743, "loss": 11.6721, "step": 8582 }, { "epoch": 0.17966591308716404, "grad_norm": 0.275531530380249, "learning_rate": 0.00019824682170834702, "loss": 11.6857, "step": 8583 }, { "epoch": 0.17968684585112618, "grad_norm": 0.307755708694458, "learning_rate": 0.00019824641293049057, "loss": 11.684, "step": 8584 }, { "epoch": 0.17970777861508833, "grad_norm": 0.24539700150489807, "learning_rate": 0.00019824600410540505, "loss": 11.6771, "step": 8585 }, { "epoch": 0.1797287113790505, "grad_norm": 0.2330632209777832, "learning_rate": 0.00019824559523309073, "loss": 11.6719, "step": 8586 }, { "epoch": 0.17974964414301264, "grad_norm": 0.22122065722942352, "learning_rate": 0.00019824518631354776, "loss": 11.6644, "step": 8587 }, { "epoch": 0.1797705769069748, "grad_norm": 0.3167705535888672, "learning_rate": 0.00019824477734677637, "loss": 11.6893, "step": 8588 }, { "epoch": 0.17979150967093696, "grad_norm": 0.3183799684047699, "learning_rate": 0.00019824436833277678, "loss": 11.6699, "step": 8589 }, { "epoch": 0.1798124424348991, "grad_norm": 0.4018946886062622, "learning_rate": 0.00019824395927154912, "loss": 11.6967, "step": 8590 }, { "epoch": 0.17983337519886125, "grad_norm": 0.25712308287620544, "learning_rate": 0.00019824355016309365, "loss": 11.6757, "step": 8591 }, { "epoch": 0.17985430796282342, "grad_norm": 0.2886626422405243, "learning_rate": 0.0001982431410074105, "loss": 11.6748, "step": 8592 }, { "epoch": 0.17987524072678557, "grad_norm": 0.3021979033946991, "learning_rate": 0.00019824273180449992, "loss": 11.6737, "step": 8593 }, { "epoch": 0.1798961734907477, "grad_norm": 0.2144390344619751, "learning_rate": 0.00019824232255436208, "loss": 11.668, "step": 8594 }, { "epoch": 0.17991710625470989, "grad_norm": 0.24104417860507965, "learning_rate": 0.00019824191325699722, "loss": 11.6701, "step": 8595 }, { "epoch": 0.17993803901867203, "grad_norm": 0.24979281425476074, "learning_rate": 0.00019824150391240547, "loss": 11.6785, "step": 8596 }, { "epoch": 0.17995897178263418, "grad_norm": 0.2686556875705719, "learning_rate": 0.00019824109452058706, "loss": 11.6517, "step": 8597 }, { "epoch": 0.17997990454659632, "grad_norm": 0.248971626162529, "learning_rate": 0.0001982406850815422, "loss": 11.676, "step": 8598 }, { "epoch": 0.1800008373105585, "grad_norm": 0.2863205075263977, "learning_rate": 0.00019824027559527108, "loss": 11.6847, "step": 8599 }, { "epoch": 0.18002177007452064, "grad_norm": 0.20141787827014923, "learning_rate": 0.00019823986606177388, "loss": 11.6673, "step": 8600 }, { "epoch": 0.18004270283848278, "grad_norm": 0.23207055032253265, "learning_rate": 0.0001982394564810508, "loss": 11.6965, "step": 8601 }, { "epoch": 0.18006363560244495, "grad_norm": 0.30753999948501587, "learning_rate": 0.00019823904685310203, "loss": 11.6946, "step": 8602 }, { "epoch": 0.1800845683664071, "grad_norm": 0.2588915526866913, "learning_rate": 0.00019823863717792782, "loss": 11.6857, "step": 8603 }, { "epoch": 0.18010550113036924, "grad_norm": 0.2338716834783554, "learning_rate": 0.0001982382274555283, "loss": 11.7004, "step": 8604 }, { "epoch": 0.18012643389433142, "grad_norm": 0.31311675906181335, "learning_rate": 0.0001982378176859037, "loss": 11.669, "step": 8605 }, { "epoch": 0.18014736665829356, "grad_norm": 0.34740397334098816, "learning_rate": 0.00019823740786905424, "loss": 11.6964, "step": 8606 }, { "epoch": 0.1801682994222557, "grad_norm": 0.23713898658752441, "learning_rate": 0.00019823699800498008, "loss": 11.6706, "step": 8607 }, { "epoch": 0.18018923218621788, "grad_norm": 0.2300623208284378, "learning_rate": 0.0001982365880936814, "loss": 11.673, "step": 8608 }, { "epoch": 0.18021016495018002, "grad_norm": 0.19616788625717163, "learning_rate": 0.00019823617813515844, "loss": 11.688, "step": 8609 }, { "epoch": 0.18023109771414217, "grad_norm": 0.2430504858493805, "learning_rate": 0.00019823576812941137, "loss": 11.6876, "step": 8610 }, { "epoch": 0.18025203047810434, "grad_norm": 0.2690168619155884, "learning_rate": 0.0001982353580764404, "loss": 11.6779, "step": 8611 }, { "epoch": 0.1802729632420665, "grad_norm": 0.2198544293642044, "learning_rate": 0.00019823494797624575, "loss": 11.6829, "step": 8612 }, { "epoch": 0.18029389600602863, "grad_norm": 0.23498287796974182, "learning_rate": 0.00019823453782882758, "loss": 11.6767, "step": 8613 }, { "epoch": 0.18031482876999078, "grad_norm": 0.24534468352794647, "learning_rate": 0.00019823412763418608, "loss": 11.6908, "step": 8614 }, { "epoch": 0.18033576153395295, "grad_norm": 0.23111779987812042, "learning_rate": 0.0001982337173923215, "loss": 11.6734, "step": 8615 }, { "epoch": 0.1803566942979151, "grad_norm": 0.23490943014621735, "learning_rate": 0.00019823330710323398, "loss": 11.6759, "step": 8616 }, { "epoch": 0.18037762706187724, "grad_norm": 0.2333010584115982, "learning_rate": 0.00019823289676692372, "loss": 11.6808, "step": 8617 }, { "epoch": 0.1803985598258394, "grad_norm": 0.2093799114227295, "learning_rate": 0.00019823248638339096, "loss": 11.6674, "step": 8618 }, { "epoch": 0.18041949258980156, "grad_norm": 0.24387741088867188, "learning_rate": 0.0001982320759526359, "loss": 11.6969, "step": 8619 }, { "epoch": 0.1804404253537637, "grad_norm": 0.26153454184532166, "learning_rate": 0.0001982316654746587, "loss": 11.6721, "step": 8620 }, { "epoch": 0.18046135811772587, "grad_norm": 0.23062865436077118, "learning_rate": 0.00019823125494945954, "loss": 11.662, "step": 8621 }, { "epoch": 0.18048229088168802, "grad_norm": 0.2102607786655426, "learning_rate": 0.00019823084437703864, "loss": 11.6675, "step": 8622 }, { "epoch": 0.18050322364565016, "grad_norm": 0.26561737060546875, "learning_rate": 0.00019823043375739624, "loss": 11.6858, "step": 8623 }, { "epoch": 0.18052415640961234, "grad_norm": 0.36928921937942505, "learning_rate": 0.0001982300230905325, "loss": 11.6711, "step": 8624 }, { "epoch": 0.18054508917357448, "grad_norm": 0.22885379195213318, "learning_rate": 0.0001982296123764476, "loss": 11.691, "step": 8625 }, { "epoch": 0.18056602193753662, "grad_norm": 0.22054487466812134, "learning_rate": 0.00019822920161514178, "loss": 11.6714, "step": 8626 }, { "epoch": 0.1805869547014988, "grad_norm": 0.24479690194129944, "learning_rate": 0.0001982287908066152, "loss": 11.6715, "step": 8627 }, { "epoch": 0.18060788746546094, "grad_norm": 0.22278426587581635, "learning_rate": 0.0001982283799508681, "loss": 11.6641, "step": 8628 }, { "epoch": 0.1806288202294231, "grad_norm": 0.25495219230651855, "learning_rate": 0.0001982279690479006, "loss": 11.673, "step": 8629 }, { "epoch": 0.18064975299338526, "grad_norm": 0.2579081058502197, "learning_rate": 0.00019822755809771297, "loss": 11.6688, "step": 8630 }, { "epoch": 0.1806706857573474, "grad_norm": 0.30947208404541016, "learning_rate": 0.0001982271471003054, "loss": 11.6652, "step": 8631 }, { "epoch": 0.18069161852130955, "grad_norm": 0.3004082441329956, "learning_rate": 0.00019822673605567802, "loss": 11.6728, "step": 8632 }, { "epoch": 0.1807125512852717, "grad_norm": 0.27176693081855774, "learning_rate": 0.00019822632496383112, "loss": 11.6984, "step": 8633 }, { "epoch": 0.18073348404923387, "grad_norm": 0.2878904640674591, "learning_rate": 0.00019822591382476486, "loss": 11.679, "step": 8634 }, { "epoch": 0.180754416813196, "grad_norm": 0.278265118598938, "learning_rate": 0.00019822550263847943, "loss": 11.6862, "step": 8635 }, { "epoch": 0.18077534957715816, "grad_norm": 0.2558882236480713, "learning_rate": 0.00019822509140497504, "loss": 11.6611, "step": 8636 }, { "epoch": 0.18079628234112033, "grad_norm": 0.24824315309524536, "learning_rate": 0.00019822468012425186, "loss": 11.6794, "step": 8637 }, { "epoch": 0.18081721510508247, "grad_norm": 0.2762868106365204, "learning_rate": 0.00019822426879631013, "loss": 11.6851, "step": 8638 }, { "epoch": 0.18083814786904462, "grad_norm": 0.32939302921295166, "learning_rate": 0.00019822385742115, "loss": 11.6818, "step": 8639 }, { "epoch": 0.1808590806330068, "grad_norm": 0.30020445585250854, "learning_rate": 0.00019822344599877172, "loss": 11.6988, "step": 8640 }, { "epoch": 0.18088001339696894, "grad_norm": 0.22787874937057495, "learning_rate": 0.00019822303452917542, "loss": 11.6705, "step": 8641 }, { "epoch": 0.18090094616093108, "grad_norm": 0.2437194585800171, "learning_rate": 0.0001982226230123614, "loss": 11.677, "step": 8642 }, { "epoch": 0.18092187892489325, "grad_norm": 0.2241489440202713, "learning_rate": 0.00019822221144832975, "loss": 11.6764, "step": 8643 }, { "epoch": 0.1809428116888554, "grad_norm": 0.21385887265205383, "learning_rate": 0.00019822179983708073, "loss": 11.6757, "step": 8644 }, { "epoch": 0.18096374445281754, "grad_norm": 0.23617452383041382, "learning_rate": 0.00019822138817861452, "loss": 11.6753, "step": 8645 }, { "epoch": 0.18098467721677972, "grad_norm": 0.29005399346351624, "learning_rate": 0.00019822097647293133, "loss": 11.684, "step": 8646 }, { "epoch": 0.18100560998074186, "grad_norm": 0.22524884343147278, "learning_rate": 0.00019822056472003135, "loss": 11.6821, "step": 8647 }, { "epoch": 0.181026542744704, "grad_norm": 0.22310541570186615, "learning_rate": 0.00019822015291991475, "loss": 11.6853, "step": 8648 }, { "epoch": 0.18104747550866618, "grad_norm": 0.3014034032821655, "learning_rate": 0.00019821974107258177, "loss": 11.7012, "step": 8649 }, { "epoch": 0.18106840827262832, "grad_norm": 0.3535830080509186, "learning_rate": 0.0001982193291780326, "loss": 11.703, "step": 8650 }, { "epoch": 0.18108934103659047, "grad_norm": 0.19258879125118256, "learning_rate": 0.00019821891723626739, "loss": 11.6776, "step": 8651 }, { "epoch": 0.1811102738005526, "grad_norm": 0.19753462076187134, "learning_rate": 0.00019821850524728642, "loss": 11.6806, "step": 8652 }, { "epoch": 0.18113120656451479, "grad_norm": 0.2681965231895447, "learning_rate": 0.00019821809321108984, "loss": 11.6688, "step": 8653 }, { "epoch": 0.18115213932847693, "grad_norm": 0.24022449553012848, "learning_rate": 0.00019821768112767788, "loss": 11.6829, "step": 8654 }, { "epoch": 0.18117307209243907, "grad_norm": 0.2643812894821167, "learning_rate": 0.00019821726899705064, "loss": 11.6787, "step": 8655 }, { "epoch": 0.18119400485640125, "grad_norm": 0.2564407289028168, "learning_rate": 0.00019821685681920844, "loss": 11.6766, "step": 8656 }, { "epoch": 0.1812149376203634, "grad_norm": 0.20256450772285461, "learning_rate": 0.00019821644459415143, "loss": 11.6857, "step": 8657 }, { "epoch": 0.18123587038432554, "grad_norm": 0.26276546716690063, "learning_rate": 0.0001982160323218798, "loss": 11.6843, "step": 8658 }, { "epoch": 0.1812568031482877, "grad_norm": 0.2178860455751419, "learning_rate": 0.00019821562000239376, "loss": 11.6765, "step": 8659 }, { "epoch": 0.18127773591224985, "grad_norm": 0.2586802840232849, "learning_rate": 0.00019821520763569348, "loss": 11.6668, "step": 8660 }, { "epoch": 0.181298668676212, "grad_norm": 0.2547401785850525, "learning_rate": 0.0001982147952217792, "loss": 11.6708, "step": 8661 }, { "epoch": 0.18131960144017417, "grad_norm": 0.2618475556373596, "learning_rate": 0.0001982143827606511, "loss": 11.6903, "step": 8662 }, { "epoch": 0.18134053420413632, "grad_norm": 0.28815600275993347, "learning_rate": 0.00019821397025230938, "loss": 11.6873, "step": 8663 }, { "epoch": 0.18136146696809846, "grad_norm": 0.20939746499061584, "learning_rate": 0.00019821355769675424, "loss": 11.6798, "step": 8664 }, { "epoch": 0.18138239973206063, "grad_norm": 0.3142988979816437, "learning_rate": 0.00019821314509398588, "loss": 11.6851, "step": 8665 }, { "epoch": 0.18140333249602278, "grad_norm": 0.23062677681446075, "learning_rate": 0.00019821273244400447, "loss": 11.6607, "step": 8666 }, { "epoch": 0.18142426525998492, "grad_norm": 0.24571006000041962, "learning_rate": 0.00019821231974681024, "loss": 11.6772, "step": 8667 }, { "epoch": 0.18144519802394707, "grad_norm": 0.28370821475982666, "learning_rate": 0.00019821190700240336, "loss": 11.6879, "step": 8668 }, { "epoch": 0.18146613078790924, "grad_norm": 0.24708513915538788, "learning_rate": 0.0001982114942107841, "loss": 11.6792, "step": 8669 }, { "epoch": 0.18148706355187139, "grad_norm": 0.20196561515331268, "learning_rate": 0.00019821108137195256, "loss": 11.6674, "step": 8670 }, { "epoch": 0.18150799631583353, "grad_norm": 0.23784852027893066, "learning_rate": 0.00019821066848590898, "loss": 11.6857, "step": 8671 }, { "epoch": 0.1815289290797957, "grad_norm": 0.2344694584608078, "learning_rate": 0.0001982102555526536, "loss": 11.6894, "step": 8672 }, { "epoch": 0.18154986184375785, "grad_norm": 0.21743202209472656, "learning_rate": 0.00019820984257218655, "loss": 11.6788, "step": 8673 }, { "epoch": 0.18157079460772, "grad_norm": 0.2896273732185364, "learning_rate": 0.0001982094295445081, "loss": 11.6777, "step": 8674 }, { "epoch": 0.18159172737168217, "grad_norm": 0.2641315162181854, "learning_rate": 0.0001982090164696184, "loss": 11.6935, "step": 8675 }, { "epoch": 0.1816126601356443, "grad_norm": 0.27254411578178406, "learning_rate": 0.00019820860334751763, "loss": 11.6702, "step": 8676 }, { "epoch": 0.18163359289960646, "grad_norm": 0.2376912534236908, "learning_rate": 0.000198208190178206, "loss": 11.6844, "step": 8677 }, { "epoch": 0.18165452566356863, "grad_norm": 0.22483102977275848, "learning_rate": 0.0001982077769616838, "loss": 11.6938, "step": 8678 }, { "epoch": 0.18167545842753077, "grad_norm": 0.20746998488903046, "learning_rate": 0.00019820736369795106, "loss": 11.6615, "step": 8679 }, { "epoch": 0.18169639119149292, "grad_norm": 0.2517799735069275, "learning_rate": 0.00019820695038700814, "loss": 11.6852, "step": 8680 }, { "epoch": 0.1817173239554551, "grad_norm": 0.22088399529457092, "learning_rate": 0.00019820653702885515, "loss": 11.6704, "step": 8681 }, { "epoch": 0.18173825671941723, "grad_norm": 0.19773176312446594, "learning_rate": 0.0001982061236234923, "loss": 11.6694, "step": 8682 }, { "epoch": 0.18175918948337938, "grad_norm": 0.23124033212661743, "learning_rate": 0.0001982057101709198, "loss": 11.6838, "step": 8683 }, { "epoch": 0.18178012224734155, "grad_norm": 0.2613028287887573, "learning_rate": 0.00019820529667113787, "loss": 11.6708, "step": 8684 }, { "epoch": 0.1818010550113037, "grad_norm": 0.29742735624313354, "learning_rate": 0.00019820488312414668, "loss": 11.6695, "step": 8685 }, { "epoch": 0.18182198777526584, "grad_norm": 0.20804554224014282, "learning_rate": 0.00019820446952994642, "loss": 11.6782, "step": 8686 }, { "epoch": 0.181842920539228, "grad_norm": 0.25971120595932007, "learning_rate": 0.00019820405588853732, "loss": 11.6777, "step": 8687 }, { "epoch": 0.18186385330319016, "grad_norm": 0.2058601677417755, "learning_rate": 0.00019820364219991956, "loss": 11.674, "step": 8688 }, { "epoch": 0.1818847860671523, "grad_norm": 0.2559477984905243, "learning_rate": 0.0001982032284640933, "loss": 11.6877, "step": 8689 }, { "epoch": 0.18190571883111445, "grad_norm": 0.2916939854621887, "learning_rate": 0.00019820281468105883, "loss": 11.6898, "step": 8690 }, { "epoch": 0.18192665159507662, "grad_norm": 0.256681889295578, "learning_rate": 0.00019820240085081627, "loss": 11.6795, "step": 8691 }, { "epoch": 0.18194758435903877, "grad_norm": 0.2912600338459015, "learning_rate": 0.00019820198697336586, "loss": 11.6891, "step": 8692 }, { "epoch": 0.1819685171230009, "grad_norm": 0.24691960215568542, "learning_rate": 0.0001982015730487078, "loss": 11.6752, "step": 8693 }, { "epoch": 0.18198944988696308, "grad_norm": 0.23216815292835236, "learning_rate": 0.00019820115907684226, "loss": 11.6767, "step": 8694 }, { "epoch": 0.18201038265092523, "grad_norm": 0.237783282995224, "learning_rate": 0.00019820074505776946, "loss": 11.6854, "step": 8695 }, { "epoch": 0.18203131541488737, "grad_norm": 0.3123631179332733, "learning_rate": 0.00019820033099148958, "loss": 11.6954, "step": 8696 }, { "epoch": 0.18205224817884955, "grad_norm": 0.2354286015033722, "learning_rate": 0.00019819991687800286, "loss": 11.6859, "step": 8697 }, { "epoch": 0.1820731809428117, "grad_norm": 0.3104867935180664, "learning_rate": 0.00019819950271730944, "loss": 11.689, "step": 8698 }, { "epoch": 0.18209411370677384, "grad_norm": 0.23816469311714172, "learning_rate": 0.00019819908850940958, "loss": 11.6816, "step": 8699 }, { "epoch": 0.182115046470736, "grad_norm": 0.22705546021461487, "learning_rate": 0.00019819867425430342, "loss": 11.687, "step": 8700 }, { "epoch": 0.18213597923469815, "grad_norm": 0.2433178871870041, "learning_rate": 0.00019819825995199124, "loss": 11.6757, "step": 8701 }, { "epoch": 0.1821569119986603, "grad_norm": 0.2817809581756592, "learning_rate": 0.00019819784560247313, "loss": 11.6717, "step": 8702 }, { "epoch": 0.18217784476262244, "grad_norm": 0.30062055587768555, "learning_rate": 0.00019819743120574938, "loss": 11.6889, "step": 8703 }, { "epoch": 0.18219877752658462, "grad_norm": 0.24723507463932037, "learning_rate": 0.00019819701676182018, "loss": 11.686, "step": 8704 }, { "epoch": 0.18221971029054676, "grad_norm": 0.2651127278804779, "learning_rate": 0.00019819660227068565, "loss": 11.6817, "step": 8705 }, { "epoch": 0.1822406430545089, "grad_norm": 0.2861610949039459, "learning_rate": 0.00019819618773234606, "loss": 11.6663, "step": 8706 }, { "epoch": 0.18226157581847108, "grad_norm": 0.34230712056159973, "learning_rate": 0.00019819577314680162, "loss": 11.6817, "step": 8707 }, { "epoch": 0.18228250858243322, "grad_norm": 0.23655131459236145, "learning_rate": 0.00019819535851405248, "loss": 11.6791, "step": 8708 }, { "epoch": 0.18230344134639537, "grad_norm": 0.23353828489780426, "learning_rate": 0.00019819494383409886, "loss": 11.6855, "step": 8709 }, { "epoch": 0.18232437411035754, "grad_norm": 0.20982913672924042, "learning_rate": 0.00019819452910694102, "loss": 11.6747, "step": 8710 }, { "epoch": 0.18234530687431968, "grad_norm": 0.32553401589393616, "learning_rate": 0.00019819411433257903, "loss": 11.6873, "step": 8711 }, { "epoch": 0.18236623963828183, "grad_norm": 0.20357699692249298, "learning_rate": 0.00019819369951101318, "loss": 11.6748, "step": 8712 }, { "epoch": 0.182387172402244, "grad_norm": 0.19954413175582886, "learning_rate": 0.00019819328464224366, "loss": 11.6609, "step": 8713 }, { "epoch": 0.18240810516620615, "grad_norm": 0.25470155477523804, "learning_rate": 0.00019819286972627066, "loss": 11.6762, "step": 8714 }, { "epoch": 0.1824290379301683, "grad_norm": 0.24277031421661377, "learning_rate": 0.0001981924547630944, "loss": 11.6895, "step": 8715 }, { "epoch": 0.18244997069413046, "grad_norm": 0.22404122352600098, "learning_rate": 0.00019819203975271507, "loss": 11.69, "step": 8716 }, { "epoch": 0.1824709034580926, "grad_norm": 0.22168482840061188, "learning_rate": 0.0001981916246951328, "loss": 11.6886, "step": 8717 }, { "epoch": 0.18249183622205475, "grad_norm": 0.27819347381591797, "learning_rate": 0.00019819120959034786, "loss": 11.6785, "step": 8718 }, { "epoch": 0.18251276898601693, "grad_norm": 0.19426800310611725, "learning_rate": 0.00019819079443836047, "loss": 11.6725, "step": 8719 }, { "epoch": 0.18253370174997907, "grad_norm": 0.30038997530937195, "learning_rate": 0.0001981903792391708, "loss": 11.6861, "step": 8720 }, { "epoch": 0.18255463451394122, "grad_norm": 0.2754061222076416, "learning_rate": 0.000198189963992779, "loss": 11.6772, "step": 8721 }, { "epoch": 0.18257556727790336, "grad_norm": 0.29718807339668274, "learning_rate": 0.00019818954869918535, "loss": 11.6819, "step": 8722 }, { "epoch": 0.18259650004186553, "grad_norm": 0.2761630415916443, "learning_rate": 0.00019818913335839001, "loss": 11.6825, "step": 8723 }, { "epoch": 0.18261743280582768, "grad_norm": 0.30297988653182983, "learning_rate": 0.00019818871797039317, "loss": 11.6765, "step": 8724 }, { "epoch": 0.18263836556978982, "grad_norm": 0.28862494230270386, "learning_rate": 0.0001981883025351951, "loss": 11.6697, "step": 8725 }, { "epoch": 0.182659298333752, "grad_norm": 0.32005417346954346, "learning_rate": 0.0001981878870527959, "loss": 11.6883, "step": 8726 }, { "epoch": 0.18268023109771414, "grad_norm": 0.2617470622062683, "learning_rate": 0.00019818747152319582, "loss": 11.6898, "step": 8727 }, { "epoch": 0.18270116386167629, "grad_norm": 0.2444438338279724, "learning_rate": 0.00019818705594639506, "loss": 11.673, "step": 8728 }, { "epoch": 0.18272209662563846, "grad_norm": 0.24549485743045807, "learning_rate": 0.0001981866403223938, "loss": 11.685, "step": 8729 }, { "epoch": 0.1827430293896006, "grad_norm": 0.27985188364982605, "learning_rate": 0.00019818622465119228, "loss": 11.6812, "step": 8730 }, { "epoch": 0.18276396215356275, "grad_norm": 0.23495672643184662, "learning_rate": 0.00019818580893279065, "loss": 11.6759, "step": 8731 }, { "epoch": 0.18278489491752492, "grad_norm": 0.19342350959777832, "learning_rate": 0.00019818539316718913, "loss": 11.6843, "step": 8732 }, { "epoch": 0.18280582768148707, "grad_norm": 0.2501344680786133, "learning_rate": 0.00019818497735438797, "loss": 11.683, "step": 8733 }, { "epoch": 0.1828267604454492, "grad_norm": 0.2698166072368622, "learning_rate": 0.00019818456149438727, "loss": 11.6881, "step": 8734 }, { "epoch": 0.18284769320941138, "grad_norm": 0.25724634528160095, "learning_rate": 0.0001981841455871873, "loss": 11.6828, "step": 8735 }, { "epoch": 0.18286862597337353, "grad_norm": 0.2104189097881317, "learning_rate": 0.00019818372963278827, "loss": 11.6723, "step": 8736 }, { "epoch": 0.18288955873733567, "grad_norm": 0.26239559054374695, "learning_rate": 0.00019818331363119032, "loss": 11.6788, "step": 8737 }, { "epoch": 0.18291049150129784, "grad_norm": 0.23263907432556152, "learning_rate": 0.0001981828975823937, "loss": 11.6818, "step": 8738 }, { "epoch": 0.18293142426526, "grad_norm": 0.23874184489250183, "learning_rate": 0.00019818248148639858, "loss": 11.6782, "step": 8739 }, { "epoch": 0.18295235702922213, "grad_norm": 0.22152075171470642, "learning_rate": 0.0001981820653432052, "loss": 11.6754, "step": 8740 }, { "epoch": 0.18297328979318428, "grad_norm": 0.29203876852989197, "learning_rate": 0.00019818164915281372, "loss": 11.6628, "step": 8741 }, { "epoch": 0.18299422255714645, "grad_norm": 0.22164389491081238, "learning_rate": 0.00019818123291522434, "loss": 11.6948, "step": 8742 }, { "epoch": 0.1830151553211086, "grad_norm": 0.30558836460113525, "learning_rate": 0.0001981808166304373, "loss": 11.6808, "step": 8743 }, { "epoch": 0.18303608808507074, "grad_norm": 0.24117936193943024, "learning_rate": 0.00019818040029845275, "loss": 11.6909, "step": 8744 }, { "epoch": 0.18305702084903291, "grad_norm": 0.2519080936908722, "learning_rate": 0.00019817998391927092, "loss": 11.6816, "step": 8745 }, { "epoch": 0.18307795361299506, "grad_norm": 0.32219502329826355, "learning_rate": 0.00019817956749289202, "loss": 11.6811, "step": 8746 }, { "epoch": 0.1830988863769572, "grad_norm": 0.22229167819023132, "learning_rate": 0.0001981791510193162, "loss": 11.6799, "step": 8747 }, { "epoch": 0.18311981914091938, "grad_norm": 0.263269305229187, "learning_rate": 0.00019817873449854375, "loss": 11.7001, "step": 8748 }, { "epoch": 0.18314075190488152, "grad_norm": 0.23723891377449036, "learning_rate": 0.00019817831793057474, "loss": 11.6783, "step": 8749 }, { "epoch": 0.18316168466884367, "grad_norm": 0.2964494824409485, "learning_rate": 0.0001981779013154095, "loss": 11.6819, "step": 8750 }, { "epoch": 0.18318261743280584, "grad_norm": 0.2399803102016449, "learning_rate": 0.00019817748465304815, "loss": 11.6773, "step": 8751 }, { "epoch": 0.18320355019676798, "grad_norm": 0.21805325150489807, "learning_rate": 0.00019817706794349093, "loss": 11.6701, "step": 8752 }, { "epoch": 0.18322448296073013, "grad_norm": 0.2846616506576538, "learning_rate": 0.00019817665118673804, "loss": 11.6896, "step": 8753 }, { "epoch": 0.1832454157246923, "grad_norm": 0.2472764253616333, "learning_rate": 0.00019817623438278962, "loss": 11.683, "step": 8754 }, { "epoch": 0.18326634848865445, "grad_norm": 0.24885641038417816, "learning_rate": 0.00019817581753164598, "loss": 11.6859, "step": 8755 }, { "epoch": 0.1832872812526166, "grad_norm": 0.23726654052734375, "learning_rate": 0.0001981754006333072, "loss": 11.667, "step": 8756 }, { "epoch": 0.18330821401657874, "grad_norm": 0.32482877373695374, "learning_rate": 0.0001981749836877736, "loss": 11.6864, "step": 8757 }, { "epoch": 0.1833291467805409, "grad_norm": 0.27138203382492065, "learning_rate": 0.00019817456669504524, "loss": 11.6786, "step": 8758 }, { "epoch": 0.18335007954450305, "grad_norm": 0.2981128990650177, "learning_rate": 0.00019817414965512243, "loss": 11.6888, "step": 8759 }, { "epoch": 0.1833710123084652, "grad_norm": 0.26975077390670776, "learning_rate": 0.00019817373256800536, "loss": 11.6874, "step": 8760 }, { "epoch": 0.18339194507242737, "grad_norm": 0.263668030500412, "learning_rate": 0.0001981733154336942, "loss": 11.6758, "step": 8761 }, { "epoch": 0.18341287783638952, "grad_norm": 0.2098800390958786, "learning_rate": 0.00019817289825218914, "loss": 11.6835, "step": 8762 }, { "epoch": 0.18343381060035166, "grad_norm": 0.21526606380939484, "learning_rate": 0.00019817248102349042, "loss": 11.6653, "step": 8763 }, { "epoch": 0.18345474336431383, "grad_norm": 0.25574788451194763, "learning_rate": 0.0001981720637475982, "loss": 11.6781, "step": 8764 }, { "epoch": 0.18347567612827598, "grad_norm": 0.22158952057361603, "learning_rate": 0.00019817164642451274, "loss": 11.6842, "step": 8765 }, { "epoch": 0.18349660889223812, "grad_norm": 0.1956341415643692, "learning_rate": 0.00019817122905423418, "loss": 11.6765, "step": 8766 }, { "epoch": 0.1835175416562003, "grad_norm": 0.21363452076911926, "learning_rate": 0.00019817081163676274, "loss": 11.6799, "step": 8767 }, { "epoch": 0.18353847442016244, "grad_norm": 0.21498367190361023, "learning_rate": 0.00019817039417209862, "loss": 11.6831, "step": 8768 }, { "epoch": 0.18355940718412458, "grad_norm": 0.2713744044303894, "learning_rate": 0.000198169976660242, "loss": 11.6753, "step": 8769 }, { "epoch": 0.18358033994808676, "grad_norm": 0.22487907111644745, "learning_rate": 0.00019816955910119318, "loss": 11.672, "step": 8770 }, { "epoch": 0.1836012727120489, "grad_norm": 0.29124704003334045, "learning_rate": 0.00019816914149495222, "loss": 11.6906, "step": 8771 }, { "epoch": 0.18362220547601105, "grad_norm": 0.2881288528442383, "learning_rate": 0.00019816872384151942, "loss": 11.6641, "step": 8772 }, { "epoch": 0.18364313823997322, "grad_norm": 0.2771703004837036, "learning_rate": 0.00019816830614089492, "loss": 11.6736, "step": 8773 }, { "epoch": 0.18366407100393536, "grad_norm": 0.241933673620224, "learning_rate": 0.00019816788839307895, "loss": 11.6805, "step": 8774 }, { "epoch": 0.1836850037678975, "grad_norm": 0.26382821798324585, "learning_rate": 0.00019816747059807176, "loss": 11.6818, "step": 8775 }, { "epoch": 0.18370593653185965, "grad_norm": 0.24035502970218658, "learning_rate": 0.00019816705275587344, "loss": 11.6657, "step": 8776 }, { "epoch": 0.18372686929582183, "grad_norm": 0.29647791385650635, "learning_rate": 0.00019816663486648425, "loss": 11.6745, "step": 8777 }, { "epoch": 0.18374780205978397, "grad_norm": 0.2325890213251114, "learning_rate": 0.00019816621692990443, "loss": 11.6816, "step": 8778 }, { "epoch": 0.18376873482374612, "grad_norm": 0.3524409234523773, "learning_rate": 0.00019816579894613412, "loss": 11.6801, "step": 8779 }, { "epoch": 0.1837896675877083, "grad_norm": 0.19024187326431274, "learning_rate": 0.00019816538091517355, "loss": 11.6721, "step": 8780 }, { "epoch": 0.18381060035167043, "grad_norm": 0.33582809567451477, "learning_rate": 0.00019816496283702292, "loss": 11.6785, "step": 8781 }, { "epoch": 0.18383153311563258, "grad_norm": 0.231706902384758, "learning_rate": 0.00019816454471168238, "loss": 11.6646, "step": 8782 }, { "epoch": 0.18385246587959475, "grad_norm": 0.3308338224887848, "learning_rate": 0.00019816412653915225, "loss": 11.6743, "step": 8783 }, { "epoch": 0.1838733986435569, "grad_norm": 0.35309144854545593, "learning_rate": 0.0001981637083194326, "loss": 11.6783, "step": 8784 }, { "epoch": 0.18389433140751904, "grad_norm": 0.20950256288051605, "learning_rate": 0.00019816329005252373, "loss": 11.6658, "step": 8785 }, { "epoch": 0.1839152641714812, "grad_norm": 0.25346916913986206, "learning_rate": 0.00019816287173842577, "loss": 11.6674, "step": 8786 }, { "epoch": 0.18393619693544336, "grad_norm": 0.2581900954246521, "learning_rate": 0.00019816245337713894, "loss": 11.6863, "step": 8787 }, { "epoch": 0.1839571296994055, "grad_norm": 0.20745877921581268, "learning_rate": 0.00019816203496866348, "loss": 11.6819, "step": 8788 }, { "epoch": 0.18397806246336768, "grad_norm": 0.2833133339881897, "learning_rate": 0.00019816161651299954, "loss": 11.6771, "step": 8789 }, { "epoch": 0.18399899522732982, "grad_norm": 0.22473488748073578, "learning_rate": 0.00019816119801014737, "loss": 11.6814, "step": 8790 }, { "epoch": 0.18401992799129196, "grad_norm": 0.22674132883548737, "learning_rate": 0.00019816077946010712, "loss": 11.6735, "step": 8791 }, { "epoch": 0.18404086075525414, "grad_norm": 0.25160518288612366, "learning_rate": 0.00019816036086287903, "loss": 11.6736, "step": 8792 }, { "epoch": 0.18406179351921628, "grad_norm": 0.30193352699279785, "learning_rate": 0.00019815994221846327, "loss": 11.6993, "step": 8793 }, { "epoch": 0.18408272628317843, "grad_norm": 0.2600216567516327, "learning_rate": 0.0001981595235268601, "loss": 11.6859, "step": 8794 }, { "epoch": 0.18410365904714057, "grad_norm": 0.23453952372074127, "learning_rate": 0.00019815910478806963, "loss": 11.6787, "step": 8795 }, { "epoch": 0.18412459181110274, "grad_norm": 0.23669768869876862, "learning_rate": 0.00019815868600209214, "loss": 11.6786, "step": 8796 }, { "epoch": 0.1841455245750649, "grad_norm": 0.2732905447483063, "learning_rate": 0.0001981582671689278, "loss": 11.67, "step": 8797 }, { "epoch": 0.18416645733902703, "grad_norm": 0.20538055896759033, "learning_rate": 0.00019815784828857683, "loss": 11.6914, "step": 8798 }, { "epoch": 0.1841873901029892, "grad_norm": 0.3075682520866394, "learning_rate": 0.00019815742936103936, "loss": 11.6729, "step": 8799 }, { "epoch": 0.18420832286695135, "grad_norm": 0.2564481794834137, "learning_rate": 0.0001981570103863157, "loss": 11.6865, "step": 8800 }, { "epoch": 0.1842292556309135, "grad_norm": 0.2536742389202118, "learning_rate": 0.000198156591364406, "loss": 11.6882, "step": 8801 }, { "epoch": 0.18425018839487567, "grad_norm": 0.23806490004062653, "learning_rate": 0.00019815617229531043, "loss": 11.6833, "step": 8802 }, { "epoch": 0.18427112115883781, "grad_norm": 0.301148921251297, "learning_rate": 0.00019815575317902923, "loss": 11.6953, "step": 8803 }, { "epoch": 0.18429205392279996, "grad_norm": 0.26641935110092163, "learning_rate": 0.0001981553340155626, "loss": 11.6664, "step": 8804 }, { "epoch": 0.18431298668676213, "grad_norm": 0.2393956184387207, "learning_rate": 0.0001981549148049107, "loss": 11.6731, "step": 8805 }, { "epoch": 0.18433391945072428, "grad_norm": 0.22560298442840576, "learning_rate": 0.00019815449554707378, "loss": 11.6886, "step": 8806 }, { "epoch": 0.18435485221468642, "grad_norm": 0.3034481406211853, "learning_rate": 0.00019815407624205203, "loss": 11.67, "step": 8807 }, { "epoch": 0.1843757849786486, "grad_norm": 0.2660495936870575, "learning_rate": 0.00019815365688984567, "loss": 11.6906, "step": 8808 }, { "epoch": 0.18439671774261074, "grad_norm": 0.2656223177909851, "learning_rate": 0.00019815323749045489, "loss": 11.6793, "step": 8809 }, { "epoch": 0.18441765050657288, "grad_norm": 0.3035281300544739, "learning_rate": 0.00019815281804387984, "loss": 11.6777, "step": 8810 }, { "epoch": 0.18443858327053503, "grad_norm": 0.3265521824359894, "learning_rate": 0.0001981523985501208, "loss": 11.686, "step": 8811 }, { "epoch": 0.1844595160344972, "grad_norm": 0.2689542770385742, "learning_rate": 0.0001981519790091779, "loss": 11.6635, "step": 8812 }, { "epoch": 0.18448044879845935, "grad_norm": 0.22577476501464844, "learning_rate": 0.0001981515594210514, "loss": 11.6664, "step": 8813 }, { "epoch": 0.1845013815624215, "grad_norm": 0.23689749836921692, "learning_rate": 0.00019815113978574147, "loss": 11.6863, "step": 8814 }, { "epoch": 0.18452231432638366, "grad_norm": 0.26500001549720764, "learning_rate": 0.00019815072010324834, "loss": 11.6685, "step": 8815 }, { "epoch": 0.1845432470903458, "grad_norm": 0.22922426462173462, "learning_rate": 0.00019815030037357217, "loss": 11.6941, "step": 8816 }, { "epoch": 0.18456417985430795, "grad_norm": 0.23556029796600342, "learning_rate": 0.00019814988059671317, "loss": 11.6797, "step": 8817 }, { "epoch": 0.18458511261827013, "grad_norm": 0.24253158271312714, "learning_rate": 0.00019814946077267157, "loss": 11.6748, "step": 8818 }, { "epoch": 0.18460604538223227, "grad_norm": 0.3029274642467499, "learning_rate": 0.00019814904090144757, "loss": 11.6905, "step": 8819 }, { "epoch": 0.18462697814619441, "grad_norm": 0.2825325131416321, "learning_rate": 0.00019814862098304134, "loss": 11.6749, "step": 8820 }, { "epoch": 0.1846479109101566, "grad_norm": 0.2656589150428772, "learning_rate": 0.00019814820101745313, "loss": 11.6806, "step": 8821 }, { "epoch": 0.18466884367411873, "grad_norm": 0.2598646879196167, "learning_rate": 0.0001981477810046831, "loss": 11.6879, "step": 8822 }, { "epoch": 0.18468977643808088, "grad_norm": 0.23168089985847473, "learning_rate": 0.00019814736094473143, "loss": 11.6813, "step": 8823 }, { "epoch": 0.18471070920204305, "grad_norm": 0.2738237679004669, "learning_rate": 0.0001981469408375984, "loss": 11.6805, "step": 8824 }, { "epoch": 0.1847316419660052, "grad_norm": 0.2174171358346939, "learning_rate": 0.00019814652068328418, "loss": 11.6831, "step": 8825 }, { "epoch": 0.18475257472996734, "grad_norm": 0.2782839834690094, "learning_rate": 0.00019814610048178892, "loss": 11.68, "step": 8826 }, { "epoch": 0.1847735074939295, "grad_norm": 1.3301918506622314, "learning_rate": 0.00019814568023311286, "loss": 11.7149, "step": 8827 }, { "epoch": 0.18479444025789166, "grad_norm": 0.1957748979330063, "learning_rate": 0.00019814525993725623, "loss": 11.6647, "step": 8828 }, { "epoch": 0.1848153730218538, "grad_norm": 0.23657472431659698, "learning_rate": 0.0001981448395942192, "loss": 11.6677, "step": 8829 }, { "epoch": 0.18483630578581595, "grad_norm": 0.248448446393013, "learning_rate": 0.00019814441920400197, "loss": 11.6844, "step": 8830 }, { "epoch": 0.18485723854977812, "grad_norm": 0.25873124599456787, "learning_rate": 0.00019814399876660476, "loss": 11.6841, "step": 8831 }, { "epoch": 0.18487817131374026, "grad_norm": 0.22933723032474518, "learning_rate": 0.00019814357828202775, "loss": 11.6648, "step": 8832 }, { "epoch": 0.1848991040777024, "grad_norm": 0.21058861911296844, "learning_rate": 0.00019814315775027117, "loss": 11.665, "step": 8833 }, { "epoch": 0.18492003684166458, "grad_norm": 0.2484463006258011, "learning_rate": 0.0001981427371713352, "loss": 11.6803, "step": 8834 }, { "epoch": 0.18494096960562673, "grad_norm": 0.403863787651062, "learning_rate": 0.00019814231654522005, "loss": 11.6872, "step": 8835 }, { "epoch": 0.18496190236958887, "grad_norm": 0.2424795925617218, "learning_rate": 0.00019814189587192592, "loss": 11.6847, "step": 8836 }, { "epoch": 0.18498283513355104, "grad_norm": 0.2276209145784378, "learning_rate": 0.00019814147515145303, "loss": 11.6733, "step": 8837 }, { "epoch": 0.1850037678975132, "grad_norm": 0.31738510727882385, "learning_rate": 0.00019814105438380154, "loss": 11.6729, "step": 8838 }, { "epoch": 0.18502470066147533, "grad_norm": 0.25445297360420227, "learning_rate": 0.00019814063356897172, "loss": 11.6805, "step": 8839 }, { "epoch": 0.1850456334254375, "grad_norm": 0.20467546582221985, "learning_rate": 0.00019814021270696367, "loss": 11.6683, "step": 8840 }, { "epoch": 0.18506656618939965, "grad_norm": 0.24110747873783112, "learning_rate": 0.00019813979179777772, "loss": 11.6645, "step": 8841 }, { "epoch": 0.1850874989533618, "grad_norm": 0.2382158786058426, "learning_rate": 0.00019813937084141395, "loss": 11.6728, "step": 8842 }, { "epoch": 0.18510843171732397, "grad_norm": 0.2683476209640503, "learning_rate": 0.00019813894983787265, "loss": 11.6865, "step": 8843 }, { "epoch": 0.1851293644812861, "grad_norm": 0.19020631909370422, "learning_rate": 0.00019813852878715398, "loss": 11.6656, "step": 8844 }, { "epoch": 0.18515029724524826, "grad_norm": 0.24929052591323853, "learning_rate": 0.00019813810768925817, "loss": 11.6761, "step": 8845 }, { "epoch": 0.1851712300092104, "grad_norm": 0.3109915256500244, "learning_rate": 0.00019813768654418536, "loss": 11.6882, "step": 8846 }, { "epoch": 0.18519216277317258, "grad_norm": 0.2605709433555603, "learning_rate": 0.00019813726535193582, "loss": 11.6879, "step": 8847 }, { "epoch": 0.18521309553713472, "grad_norm": 0.2196771204471588, "learning_rate": 0.00019813684411250975, "loss": 11.6836, "step": 8848 }, { "epoch": 0.18523402830109686, "grad_norm": 0.21660184860229492, "learning_rate": 0.00019813642282590733, "loss": 11.6657, "step": 8849 }, { "epoch": 0.18525496106505904, "grad_norm": 0.23280476033687592, "learning_rate": 0.00019813600149212876, "loss": 11.6769, "step": 8850 }, { "epoch": 0.18527589382902118, "grad_norm": 0.2912581264972687, "learning_rate": 0.0001981355801111742, "loss": 11.6968, "step": 8851 }, { "epoch": 0.18529682659298333, "grad_norm": 0.29882004857063293, "learning_rate": 0.00019813515868304394, "loss": 11.6833, "step": 8852 }, { "epoch": 0.1853177593569455, "grad_norm": 0.22588196396827698, "learning_rate": 0.00019813473720773814, "loss": 11.6775, "step": 8853 }, { "epoch": 0.18533869212090764, "grad_norm": 0.36766478419303894, "learning_rate": 0.00019813431568525703, "loss": 11.6758, "step": 8854 }, { "epoch": 0.1853596248848698, "grad_norm": 0.278492271900177, "learning_rate": 0.00019813389411560075, "loss": 11.6792, "step": 8855 }, { "epoch": 0.18538055764883196, "grad_norm": 0.2083720862865448, "learning_rate": 0.00019813347249876957, "loss": 11.6698, "step": 8856 }, { "epoch": 0.1854014904127941, "grad_norm": 0.30608317255973816, "learning_rate": 0.00019813305083476364, "loss": 11.681, "step": 8857 }, { "epoch": 0.18542242317675625, "grad_norm": 0.2776576578617096, "learning_rate": 0.00019813262912358318, "loss": 11.6904, "step": 8858 }, { "epoch": 0.18544335594071842, "grad_norm": 0.2249169647693634, "learning_rate": 0.00019813220736522843, "loss": 11.6859, "step": 8859 }, { "epoch": 0.18546428870468057, "grad_norm": 0.2749161422252655, "learning_rate": 0.00019813178555969953, "loss": 11.6876, "step": 8860 }, { "epoch": 0.1854852214686427, "grad_norm": 0.2590489089488983, "learning_rate": 0.00019813136370699672, "loss": 11.6867, "step": 8861 }, { "epoch": 0.1855061542326049, "grad_norm": 0.2533911168575287, "learning_rate": 0.00019813094180712022, "loss": 11.6742, "step": 8862 }, { "epoch": 0.18552708699656703, "grad_norm": 0.2668818533420563, "learning_rate": 0.0001981305198600702, "loss": 11.6881, "step": 8863 }, { "epoch": 0.18554801976052918, "grad_norm": 0.2572023570537567, "learning_rate": 0.00019813009786584686, "loss": 11.675, "step": 8864 }, { "epoch": 0.18556895252449132, "grad_norm": 0.2343195676803589, "learning_rate": 0.0001981296758244504, "loss": 11.6945, "step": 8865 }, { "epoch": 0.1855898852884535, "grad_norm": 0.2580719292163849, "learning_rate": 0.00019812925373588106, "loss": 11.6781, "step": 8866 }, { "epoch": 0.18561081805241564, "grad_norm": 0.2881343960762024, "learning_rate": 0.00019812883160013904, "loss": 11.6817, "step": 8867 }, { "epoch": 0.18563175081637778, "grad_norm": 0.2853696346282959, "learning_rate": 0.0001981284094172245, "loss": 11.6921, "step": 8868 }, { "epoch": 0.18565268358033996, "grad_norm": 0.254892498254776, "learning_rate": 0.00019812798718713768, "loss": 11.6781, "step": 8869 }, { "epoch": 0.1856736163443021, "grad_norm": 0.26201313734054565, "learning_rate": 0.00019812756490987879, "loss": 11.6712, "step": 8870 }, { "epoch": 0.18569454910826425, "grad_norm": 0.25257351994514465, "learning_rate": 0.000198127142585448, "loss": 11.6918, "step": 8871 }, { "epoch": 0.18571548187222642, "grad_norm": 0.18218712508678436, "learning_rate": 0.00019812672021384548, "loss": 11.6727, "step": 8872 }, { "epoch": 0.18573641463618856, "grad_norm": 0.22680886089801788, "learning_rate": 0.0001981262977950715, "loss": 11.6763, "step": 8873 }, { "epoch": 0.1857573474001507, "grad_norm": 0.2540397644042969, "learning_rate": 0.00019812587532912624, "loss": 11.6815, "step": 8874 }, { "epoch": 0.18577828016411288, "grad_norm": 0.264570027589798, "learning_rate": 0.00019812545281600993, "loss": 11.6669, "step": 8875 }, { "epoch": 0.18579921292807502, "grad_norm": 0.2227993905544281, "learning_rate": 0.00019812503025572276, "loss": 11.6813, "step": 8876 }, { "epoch": 0.18582014569203717, "grad_norm": 0.22391097247600555, "learning_rate": 0.0001981246076482649, "loss": 11.6783, "step": 8877 }, { "epoch": 0.18584107845599934, "grad_norm": 0.241995170712471, "learning_rate": 0.00019812418499363656, "loss": 11.6795, "step": 8878 }, { "epoch": 0.1858620112199615, "grad_norm": 0.3176066279411316, "learning_rate": 0.00019812376229183797, "loss": 11.6865, "step": 8879 }, { "epoch": 0.18588294398392363, "grad_norm": 0.2435736358165741, "learning_rate": 0.00019812333954286933, "loss": 11.6687, "step": 8880 }, { "epoch": 0.1859038767478858, "grad_norm": 0.2813495993614197, "learning_rate": 0.0001981229167467308, "loss": 11.6653, "step": 8881 }, { "epoch": 0.18592480951184795, "grad_norm": 0.2335929572582245, "learning_rate": 0.00019812249390342265, "loss": 11.689, "step": 8882 }, { "epoch": 0.1859457422758101, "grad_norm": 0.23038947582244873, "learning_rate": 0.00019812207101294505, "loss": 11.673, "step": 8883 }, { "epoch": 0.18596667503977224, "grad_norm": 0.24794858694076538, "learning_rate": 0.0001981216480752982, "loss": 11.6902, "step": 8884 }, { "epoch": 0.1859876078037344, "grad_norm": 0.2555329501628876, "learning_rate": 0.0001981212250904823, "loss": 11.6779, "step": 8885 }, { "epoch": 0.18600854056769656, "grad_norm": 0.20895099639892578, "learning_rate": 0.00019812080205849756, "loss": 11.6756, "step": 8886 }, { "epoch": 0.1860294733316587, "grad_norm": 0.3002948760986328, "learning_rate": 0.00019812037897934422, "loss": 11.6901, "step": 8887 }, { "epoch": 0.18605040609562087, "grad_norm": 0.22626560926437378, "learning_rate": 0.0001981199558530224, "loss": 11.6833, "step": 8888 }, { "epoch": 0.18607133885958302, "grad_norm": 0.22450435161590576, "learning_rate": 0.0001981195326795324, "loss": 11.6775, "step": 8889 }, { "epoch": 0.18609227162354516, "grad_norm": 0.26198574900627136, "learning_rate": 0.0001981191094588743, "loss": 11.6694, "step": 8890 }, { "epoch": 0.18611320438750734, "grad_norm": 0.29722264409065247, "learning_rate": 0.00019811868619104846, "loss": 11.6752, "step": 8891 }, { "epoch": 0.18613413715146948, "grad_norm": 0.2574159801006317, "learning_rate": 0.00019811826287605494, "loss": 11.6791, "step": 8892 }, { "epoch": 0.18615506991543163, "grad_norm": 0.24326853454113007, "learning_rate": 0.00019811783951389402, "loss": 11.6828, "step": 8893 }, { "epoch": 0.1861760026793938, "grad_norm": 0.3326054811477661, "learning_rate": 0.00019811741610456588, "loss": 11.6688, "step": 8894 }, { "epoch": 0.18619693544335594, "grad_norm": 0.22565606236457825, "learning_rate": 0.00019811699264807078, "loss": 11.6667, "step": 8895 }, { "epoch": 0.1862178682073181, "grad_norm": 0.24442365765571594, "learning_rate": 0.00019811656914440885, "loss": 11.6784, "step": 8896 }, { "epoch": 0.18623880097128026, "grad_norm": 0.22847974300384521, "learning_rate": 0.0001981161455935803, "loss": 11.6915, "step": 8897 }, { "epoch": 0.1862597337352424, "grad_norm": 0.255008727312088, "learning_rate": 0.00019811572199558537, "loss": 11.6724, "step": 8898 }, { "epoch": 0.18628066649920455, "grad_norm": 0.2811926603317261, "learning_rate": 0.00019811529835042424, "loss": 11.6937, "step": 8899 }, { "epoch": 0.1863015992631667, "grad_norm": 0.2799991965293884, "learning_rate": 0.0001981148746580971, "loss": 11.6986, "step": 8900 }, { "epoch": 0.18632253202712887, "grad_norm": 0.27015769481658936, "learning_rate": 0.00019811445091860418, "loss": 11.68, "step": 8901 }, { "epoch": 0.186343464791091, "grad_norm": 0.24294167757034302, "learning_rate": 0.00019811402713194572, "loss": 11.6885, "step": 8902 }, { "epoch": 0.18636439755505316, "grad_norm": 0.20895957946777344, "learning_rate": 0.00019811360329812186, "loss": 11.6676, "step": 8903 }, { "epoch": 0.18638533031901533, "grad_norm": 0.20673276484012604, "learning_rate": 0.0001981131794171328, "loss": 11.6827, "step": 8904 }, { "epoch": 0.18640626308297747, "grad_norm": 0.2395399808883667, "learning_rate": 0.0001981127554889788, "loss": 11.7014, "step": 8905 }, { "epoch": 0.18642719584693962, "grad_norm": 0.26211977005004883, "learning_rate": 0.00019811233151366, "loss": 11.6817, "step": 8906 }, { "epoch": 0.1864481286109018, "grad_norm": 0.24987094104290009, "learning_rate": 0.00019811190749117666, "loss": 11.6817, "step": 8907 }, { "epoch": 0.18646906137486394, "grad_norm": 0.24092642962932587, "learning_rate": 0.00019811148342152896, "loss": 11.6725, "step": 8908 }, { "epoch": 0.18648999413882608, "grad_norm": 0.19649818539619446, "learning_rate": 0.00019811105930471706, "loss": 11.6893, "step": 8909 }, { "epoch": 0.18651092690278825, "grad_norm": 0.2828288674354553, "learning_rate": 0.00019811063514074126, "loss": 11.682, "step": 8910 }, { "epoch": 0.1865318596667504, "grad_norm": 0.2508549094200134, "learning_rate": 0.0001981102109296017, "loss": 11.6763, "step": 8911 }, { "epoch": 0.18655279243071254, "grad_norm": 0.22629524767398834, "learning_rate": 0.0001981097866712986, "loss": 11.6693, "step": 8912 }, { "epoch": 0.18657372519467472, "grad_norm": 0.2129192352294922, "learning_rate": 0.00019810936236583216, "loss": 11.6643, "step": 8913 }, { "epoch": 0.18659465795863686, "grad_norm": 0.24509282410144806, "learning_rate": 0.00019810893801320257, "loss": 11.6867, "step": 8914 }, { "epoch": 0.186615590722599, "grad_norm": 0.23259380459785461, "learning_rate": 0.00019810851361341003, "loss": 11.6781, "step": 8915 }, { "epoch": 0.18663652348656118, "grad_norm": 0.3088061809539795, "learning_rate": 0.0001981080891664548, "loss": 11.6802, "step": 8916 }, { "epoch": 0.18665745625052332, "grad_norm": 0.22417879104614258, "learning_rate": 0.00019810766467233705, "loss": 11.6644, "step": 8917 }, { "epoch": 0.18667838901448547, "grad_norm": 0.20308616757392883, "learning_rate": 0.00019810724013105696, "loss": 11.6857, "step": 8918 }, { "epoch": 0.1866993217784476, "grad_norm": 1.65939462184906, "learning_rate": 0.00019810681554261473, "loss": 11.5721, "step": 8919 }, { "epoch": 0.18672025454240979, "grad_norm": 0.22443771362304688, "learning_rate": 0.00019810639090701062, "loss": 11.6765, "step": 8920 }, { "epoch": 0.18674118730637193, "grad_norm": 0.28044989705085754, "learning_rate": 0.00019810596622424478, "loss": 11.6881, "step": 8921 }, { "epoch": 0.18676212007033408, "grad_norm": 0.2605118751525879, "learning_rate": 0.00019810554149431745, "loss": 11.6934, "step": 8922 }, { "epoch": 0.18678305283429625, "grad_norm": 0.23685258626937866, "learning_rate": 0.0001981051167172288, "loss": 11.6973, "step": 8923 }, { "epoch": 0.1868039855982584, "grad_norm": 0.2161848098039627, "learning_rate": 0.00019810469189297905, "loss": 11.6806, "step": 8924 }, { "epoch": 0.18682491836222054, "grad_norm": 0.3032032251358032, "learning_rate": 0.00019810426702156846, "loss": 11.6713, "step": 8925 }, { "epoch": 0.1868458511261827, "grad_norm": 0.24275654554367065, "learning_rate": 0.00019810384210299715, "loss": 11.6767, "step": 8926 }, { "epoch": 0.18686678389014486, "grad_norm": 0.243992879986763, "learning_rate": 0.00019810341713726536, "loss": 11.6938, "step": 8927 }, { "epoch": 0.186887716654107, "grad_norm": 0.2589995563030243, "learning_rate": 0.0001981029921243733, "loss": 11.6849, "step": 8928 }, { "epoch": 0.18690864941806917, "grad_norm": 0.24693000316619873, "learning_rate": 0.00019810256706432117, "loss": 11.6867, "step": 8929 }, { "epoch": 0.18692958218203132, "grad_norm": 0.2329302579164505, "learning_rate": 0.00019810214195710915, "loss": 11.6716, "step": 8930 }, { "epoch": 0.18695051494599346, "grad_norm": 0.34043964743614197, "learning_rate": 0.00019810171680273745, "loss": 11.6878, "step": 8931 }, { "epoch": 0.18697144770995563, "grad_norm": 0.1892787516117096, "learning_rate": 0.00019810129160120632, "loss": 11.6727, "step": 8932 }, { "epoch": 0.18699238047391778, "grad_norm": 0.27052685618400574, "learning_rate": 0.00019810086635251592, "loss": 11.6808, "step": 8933 }, { "epoch": 0.18701331323787992, "grad_norm": 0.22320438921451569, "learning_rate": 0.00019810044105666647, "loss": 11.6902, "step": 8934 }, { "epoch": 0.18703424600184207, "grad_norm": 0.35749995708465576, "learning_rate": 0.00019810001571365818, "loss": 11.6888, "step": 8935 }, { "epoch": 0.18705517876580424, "grad_norm": 0.2190866619348526, "learning_rate": 0.00019809959032349124, "loss": 11.6736, "step": 8936 }, { "epoch": 0.1870761115297664, "grad_norm": 0.22156552970409393, "learning_rate": 0.00019809916488616587, "loss": 11.6951, "step": 8937 }, { "epoch": 0.18709704429372853, "grad_norm": 0.2366587519645691, "learning_rate": 0.00019809873940168226, "loss": 11.7003, "step": 8938 }, { "epoch": 0.1871179770576907, "grad_norm": 0.26232850551605225, "learning_rate": 0.00019809831387004062, "loss": 11.6871, "step": 8939 }, { "epoch": 0.18713890982165285, "grad_norm": 0.2528976798057556, "learning_rate": 0.00019809788829124113, "loss": 11.6862, "step": 8940 }, { "epoch": 0.187159842585615, "grad_norm": 0.2813825011253357, "learning_rate": 0.000198097462665284, "loss": 11.6806, "step": 8941 }, { "epoch": 0.18718077534957717, "grad_norm": 0.24774673581123352, "learning_rate": 0.00019809703699216952, "loss": 11.6895, "step": 8942 }, { "epoch": 0.1872017081135393, "grad_norm": 0.2771279513835907, "learning_rate": 0.00019809661127189783, "loss": 11.6781, "step": 8943 }, { "epoch": 0.18722264087750146, "grad_norm": 0.2296333909034729, "learning_rate": 0.00019809618550446908, "loss": 11.6749, "step": 8944 }, { "epoch": 0.18724357364146363, "grad_norm": 0.22439584136009216, "learning_rate": 0.00019809575968988353, "loss": 11.6691, "step": 8945 }, { "epoch": 0.18726450640542577, "grad_norm": 0.26578307151794434, "learning_rate": 0.00019809533382814142, "loss": 11.6899, "step": 8946 }, { "epoch": 0.18728543916938792, "grad_norm": 0.28505223989486694, "learning_rate": 0.0001980949079192429, "loss": 11.671, "step": 8947 }, { "epoch": 0.1873063719333501, "grad_norm": 0.28628087043762207, "learning_rate": 0.0001980944819631882, "loss": 11.6792, "step": 8948 }, { "epoch": 0.18732730469731224, "grad_norm": 0.20615537464618683, "learning_rate": 0.00019809405595997751, "loss": 11.688, "step": 8949 }, { "epoch": 0.18734823746127438, "grad_norm": 0.2815224826335907, "learning_rate": 0.00019809362990961103, "loss": 11.6748, "step": 8950 }, { "epoch": 0.18736917022523655, "grad_norm": 0.24435074627399445, "learning_rate": 0.00019809320381208902, "loss": 11.6757, "step": 8951 }, { "epoch": 0.1873901029891987, "grad_norm": 0.2312314808368683, "learning_rate": 0.00019809277766741161, "loss": 11.6909, "step": 8952 }, { "epoch": 0.18741103575316084, "grad_norm": 0.33121219277381897, "learning_rate": 0.00019809235147557903, "loss": 11.6875, "step": 8953 }, { "epoch": 0.187431968517123, "grad_norm": 0.3038991391658783, "learning_rate": 0.0001980919252365915, "loss": 11.6954, "step": 8954 }, { "epoch": 0.18745290128108516, "grad_norm": 0.20712091028690338, "learning_rate": 0.00019809149895044922, "loss": 11.6874, "step": 8955 }, { "epoch": 0.1874738340450473, "grad_norm": 0.22887471318244934, "learning_rate": 0.00019809107261715237, "loss": 11.6703, "step": 8956 }, { "epoch": 0.18749476680900945, "grad_norm": 0.2547525465488434, "learning_rate": 0.0001980906462367012, "loss": 11.6607, "step": 8957 }, { "epoch": 0.18751569957297162, "grad_norm": 0.24336165189743042, "learning_rate": 0.0001980902198090959, "loss": 11.6671, "step": 8958 }, { "epoch": 0.18753663233693377, "grad_norm": 0.22840018570423126, "learning_rate": 0.0001980897933343366, "loss": 11.6653, "step": 8959 }, { "epoch": 0.1875575651008959, "grad_norm": 0.3641381561756134, "learning_rate": 0.00019808936681242362, "loss": 11.6648, "step": 8960 }, { "epoch": 0.18757849786485808, "grad_norm": 0.2441326379776001, "learning_rate": 0.00019808894024335712, "loss": 11.6661, "step": 8961 }, { "epoch": 0.18759943062882023, "grad_norm": 0.2750356197357178, "learning_rate": 0.0001980885136271373, "loss": 11.6877, "step": 8962 }, { "epoch": 0.18762036339278237, "grad_norm": 0.2697383463382721, "learning_rate": 0.00019808808696376433, "loss": 11.6795, "step": 8963 }, { "epoch": 0.18764129615674455, "grad_norm": 0.2726691961288452, "learning_rate": 0.00019808766025323848, "loss": 11.6838, "step": 8964 }, { "epoch": 0.1876622289207067, "grad_norm": 0.21822567284107208, "learning_rate": 0.00019808723349555994, "loss": 11.6796, "step": 8965 }, { "epoch": 0.18768316168466884, "grad_norm": 0.23272313177585602, "learning_rate": 0.00019808680669072888, "loss": 11.6646, "step": 8966 }, { "epoch": 0.187704094448631, "grad_norm": 0.26184144616127014, "learning_rate": 0.0001980863798387455, "loss": 11.6988, "step": 8967 }, { "epoch": 0.18772502721259315, "grad_norm": 0.21806426346302032, "learning_rate": 0.0001980859529396101, "loss": 11.6517, "step": 8968 }, { "epoch": 0.1877459599765553, "grad_norm": 0.30652377009391785, "learning_rate": 0.00019808552599332276, "loss": 11.6696, "step": 8969 }, { "epoch": 0.18776689274051747, "grad_norm": 0.23101697862148285, "learning_rate": 0.00019808509899988378, "loss": 11.6856, "step": 8970 }, { "epoch": 0.18778782550447962, "grad_norm": 0.21096530556678772, "learning_rate": 0.0001980846719592933, "loss": 11.6868, "step": 8971 }, { "epoch": 0.18780875826844176, "grad_norm": 0.213398739695549, "learning_rate": 0.00019808424487155157, "loss": 11.6734, "step": 8972 }, { "epoch": 0.1878296910324039, "grad_norm": 0.22318479418754578, "learning_rate": 0.00019808381773665876, "loss": 11.6762, "step": 8973 }, { "epoch": 0.18785062379636608, "grad_norm": 0.26659348607063293, "learning_rate": 0.00019808339055461512, "loss": 11.6771, "step": 8974 }, { "epoch": 0.18787155656032822, "grad_norm": 0.34918951988220215, "learning_rate": 0.0001980829633254208, "loss": 11.6815, "step": 8975 }, { "epoch": 0.18789248932429037, "grad_norm": 0.2092593014240265, "learning_rate": 0.00019808253604907606, "loss": 11.6795, "step": 8976 }, { "epoch": 0.18791342208825254, "grad_norm": 0.23755481839179993, "learning_rate": 0.00019808210872558103, "loss": 11.6739, "step": 8977 }, { "epoch": 0.18793435485221469, "grad_norm": 0.24806198477745056, "learning_rate": 0.00019808168135493603, "loss": 11.6671, "step": 8978 }, { "epoch": 0.18795528761617683, "grad_norm": 0.2379072606563568, "learning_rate": 0.00019808125393714116, "loss": 11.681, "step": 8979 }, { "epoch": 0.187976220380139, "grad_norm": 0.2513352334499359, "learning_rate": 0.00019808082647219667, "loss": 11.6748, "step": 8980 }, { "epoch": 0.18799715314410115, "grad_norm": 0.25447267293930054, "learning_rate": 0.00019808039896010278, "loss": 11.6849, "step": 8981 }, { "epoch": 0.1880180859080633, "grad_norm": 0.3007049858570099, "learning_rate": 0.00019807997140085967, "loss": 11.6671, "step": 8982 }, { "epoch": 0.18803901867202547, "grad_norm": 0.24624104797840118, "learning_rate": 0.00019807954379446753, "loss": 11.6835, "step": 8983 }, { "epoch": 0.1880599514359876, "grad_norm": 0.22933900356292725, "learning_rate": 0.00019807911614092661, "loss": 11.6707, "step": 8984 }, { "epoch": 0.18808088419994975, "grad_norm": 0.2358676642179489, "learning_rate": 0.00019807868844023708, "loss": 11.6861, "step": 8985 }, { "epoch": 0.18810181696391193, "grad_norm": 0.22824352979660034, "learning_rate": 0.00019807826069239917, "loss": 11.6574, "step": 8986 }, { "epoch": 0.18812274972787407, "grad_norm": 0.20591309666633606, "learning_rate": 0.00019807783289741307, "loss": 11.6723, "step": 8987 }, { "epoch": 0.18814368249183622, "grad_norm": 0.342589795589447, "learning_rate": 0.00019807740505527899, "loss": 11.6702, "step": 8988 }, { "epoch": 0.18816461525579836, "grad_norm": 0.30741962790489197, "learning_rate": 0.00019807697716599714, "loss": 11.693, "step": 8989 }, { "epoch": 0.18818554801976053, "grad_norm": 0.3075580894947052, "learning_rate": 0.00019807654922956772, "loss": 11.6969, "step": 8990 }, { "epoch": 0.18820648078372268, "grad_norm": 0.18427273631095886, "learning_rate": 0.00019807612124599091, "loss": 11.6725, "step": 8991 }, { "epoch": 0.18822741354768482, "grad_norm": 0.22887185215950012, "learning_rate": 0.00019807569321526699, "loss": 11.687, "step": 8992 }, { "epoch": 0.188248346311647, "grad_norm": 0.2341277003288269, "learning_rate": 0.0001980752651373961, "loss": 11.6743, "step": 8993 }, { "epoch": 0.18826927907560914, "grad_norm": 0.2609529495239258, "learning_rate": 0.00019807483701237845, "loss": 11.6755, "step": 8994 }, { "epoch": 0.1882902118395713, "grad_norm": 0.21953554451465607, "learning_rate": 0.00019807440884021428, "loss": 11.6967, "step": 8995 }, { "epoch": 0.18831114460353346, "grad_norm": 0.2680833339691162, "learning_rate": 0.00019807398062090376, "loss": 11.6822, "step": 8996 }, { "epoch": 0.1883320773674956, "grad_norm": 0.36472129821777344, "learning_rate": 0.00019807355235444712, "loss": 11.693, "step": 8997 }, { "epoch": 0.18835301013145775, "grad_norm": 0.2679446339607239, "learning_rate": 0.00019807312404084457, "loss": 11.6736, "step": 8998 }, { "epoch": 0.18837394289541992, "grad_norm": 0.22891946136951447, "learning_rate": 0.00019807269568009628, "loss": 11.6764, "step": 8999 }, { "epoch": 0.18839487565938207, "grad_norm": 0.21172264218330383, "learning_rate": 0.00019807226727220248, "loss": 11.6773, "step": 9000 }, { "epoch": 0.18839487565938207, "eval_loss": 11.67927074432373, "eval_runtime": 34.3573, "eval_samples_per_second": 27.971, "eval_steps_per_second": 7.015, "step": 9000 }, { "epoch": 0.1884158084233442, "grad_norm": 0.2964959442615509, "learning_rate": 0.0001980718388171634, "loss": 11.6871, "step": 9001 }, { "epoch": 0.18843674118730638, "grad_norm": 0.2944232225418091, "learning_rate": 0.00019807141031497923, "loss": 11.6951, "step": 9002 }, { "epoch": 0.18845767395126853, "grad_norm": 0.25827351212501526, "learning_rate": 0.00019807098176565012, "loss": 11.6971, "step": 9003 }, { "epoch": 0.18847860671523067, "grad_norm": 0.28378185629844666, "learning_rate": 0.00019807055316917635, "loss": 11.6868, "step": 9004 }, { "epoch": 0.18849953947919285, "grad_norm": 0.22284600138664246, "learning_rate": 0.0001980701245255581, "loss": 11.6785, "step": 9005 }, { "epoch": 0.188520472243155, "grad_norm": 0.3572536110877991, "learning_rate": 0.0001980696958347956, "loss": 11.6962, "step": 9006 }, { "epoch": 0.18854140500711714, "grad_norm": 0.2658984959125519, "learning_rate": 0.00019806926709688898, "loss": 11.6775, "step": 9007 }, { "epoch": 0.18856233777107928, "grad_norm": 0.18217220902442932, "learning_rate": 0.00019806883831183854, "loss": 11.6731, "step": 9008 }, { "epoch": 0.18858327053504145, "grad_norm": 0.25458475947380066, "learning_rate": 0.00019806840947964442, "loss": 11.6806, "step": 9009 }, { "epoch": 0.1886042032990036, "grad_norm": 0.3112211525440216, "learning_rate": 0.00019806798060030686, "loss": 11.6906, "step": 9010 }, { "epoch": 0.18862513606296574, "grad_norm": 0.24578140676021576, "learning_rate": 0.00019806755167382607, "loss": 11.6877, "step": 9011 }, { "epoch": 0.18864606882692792, "grad_norm": 0.24445582926273346, "learning_rate": 0.00019806712270020223, "loss": 11.6689, "step": 9012 }, { "epoch": 0.18866700159089006, "grad_norm": 0.24817952513694763, "learning_rate": 0.00019806669367943556, "loss": 11.6768, "step": 9013 }, { "epoch": 0.1886879343548522, "grad_norm": 0.26799875497817993, "learning_rate": 0.00019806626461152624, "loss": 11.6846, "step": 9014 }, { "epoch": 0.18870886711881438, "grad_norm": 0.2442193627357483, "learning_rate": 0.00019806583549647453, "loss": 11.6799, "step": 9015 }, { "epoch": 0.18872979988277652, "grad_norm": 0.25396522879600525, "learning_rate": 0.0001980654063342806, "loss": 11.6695, "step": 9016 }, { "epoch": 0.18875073264673867, "grad_norm": 0.2481248825788498, "learning_rate": 0.00019806497712494467, "loss": 11.696, "step": 9017 }, { "epoch": 0.18877166541070084, "grad_norm": 0.201185941696167, "learning_rate": 0.00019806454786846692, "loss": 11.675, "step": 9018 }, { "epoch": 0.18879259817466298, "grad_norm": 0.37848928570747375, "learning_rate": 0.0001980641185648476, "loss": 11.6799, "step": 9019 }, { "epoch": 0.18881353093862513, "grad_norm": 0.3493940830230713, "learning_rate": 0.0001980636892140869, "loss": 11.675, "step": 9020 }, { "epoch": 0.1888344637025873, "grad_norm": 0.24925127625465393, "learning_rate": 0.000198063259816185, "loss": 11.67, "step": 9021 }, { "epoch": 0.18885539646654945, "grad_norm": 0.20828135311603546, "learning_rate": 0.00019806283037114211, "loss": 11.6632, "step": 9022 }, { "epoch": 0.1888763292305116, "grad_norm": 0.2289385050535202, "learning_rate": 0.0001980624008789585, "loss": 11.6922, "step": 9023 }, { "epoch": 0.18889726199447376, "grad_norm": 0.23738788068294525, "learning_rate": 0.0001980619713396343, "loss": 11.6819, "step": 9024 }, { "epoch": 0.1889181947584359, "grad_norm": 0.2643432319164276, "learning_rate": 0.00019806154175316975, "loss": 11.6859, "step": 9025 }, { "epoch": 0.18893912752239805, "grad_norm": 0.3329589366912842, "learning_rate": 0.00019806111211956507, "loss": 11.695, "step": 9026 }, { "epoch": 0.1889600602863602, "grad_norm": 0.2531280815601349, "learning_rate": 0.00019806068243882042, "loss": 11.6946, "step": 9027 }, { "epoch": 0.18898099305032237, "grad_norm": 0.24251782894134521, "learning_rate": 0.000198060252710936, "loss": 11.6698, "step": 9028 }, { "epoch": 0.18900192581428452, "grad_norm": 0.34355291724205017, "learning_rate": 0.00019805982293591212, "loss": 11.6805, "step": 9029 }, { "epoch": 0.18902285857824666, "grad_norm": 0.3181098401546478, "learning_rate": 0.00019805939311374888, "loss": 11.6835, "step": 9030 }, { "epoch": 0.18904379134220883, "grad_norm": 0.24978579580783844, "learning_rate": 0.00019805896324444656, "loss": 11.6782, "step": 9031 }, { "epoch": 0.18906472410617098, "grad_norm": 0.2809198796749115, "learning_rate": 0.0001980585333280053, "loss": 11.6889, "step": 9032 }, { "epoch": 0.18908565687013312, "grad_norm": 0.28804996609687805, "learning_rate": 0.00019805810336442534, "loss": 11.6708, "step": 9033 }, { "epoch": 0.1891065896340953, "grad_norm": 0.30858853459358215, "learning_rate": 0.00019805767335370688, "loss": 11.6822, "step": 9034 }, { "epoch": 0.18912752239805744, "grad_norm": 0.22669601440429688, "learning_rate": 0.00019805724329585015, "loss": 11.6743, "step": 9035 }, { "epoch": 0.18914845516201959, "grad_norm": 0.2530064880847931, "learning_rate": 0.00019805681319085534, "loss": 11.6721, "step": 9036 }, { "epoch": 0.18916938792598176, "grad_norm": 0.2229863554239273, "learning_rate": 0.00019805638303872262, "loss": 11.6935, "step": 9037 }, { "epoch": 0.1891903206899439, "grad_norm": 0.24865098297595978, "learning_rate": 0.00019805595283945226, "loss": 11.6712, "step": 9038 }, { "epoch": 0.18921125345390605, "grad_norm": 0.23813213407993317, "learning_rate": 0.00019805552259304445, "loss": 11.6827, "step": 9039 }, { "epoch": 0.18923218621786822, "grad_norm": 0.35156455636024475, "learning_rate": 0.00019805509229949936, "loss": 11.6946, "step": 9040 }, { "epoch": 0.18925311898183036, "grad_norm": 0.22044125199317932, "learning_rate": 0.00019805466195881724, "loss": 11.6916, "step": 9041 }, { "epoch": 0.1892740517457925, "grad_norm": 0.2355402410030365, "learning_rate": 0.00019805423157099824, "loss": 11.6786, "step": 9042 }, { "epoch": 0.18929498450975465, "grad_norm": 0.2645147144794464, "learning_rate": 0.00019805380113604266, "loss": 11.6794, "step": 9043 }, { "epoch": 0.18931591727371683, "grad_norm": 0.2804671823978424, "learning_rate": 0.00019805337065395063, "loss": 11.6685, "step": 9044 }, { "epoch": 0.18933685003767897, "grad_norm": 0.2644904553890228, "learning_rate": 0.00019805294012472237, "loss": 11.695, "step": 9045 }, { "epoch": 0.18935778280164112, "grad_norm": 0.2492242157459259, "learning_rate": 0.00019805250954835812, "loss": 11.6802, "step": 9046 }, { "epoch": 0.1893787155656033, "grad_norm": 0.22418497502803802, "learning_rate": 0.00019805207892485804, "loss": 11.6731, "step": 9047 }, { "epoch": 0.18939964832956543, "grad_norm": 0.27596622705459595, "learning_rate": 0.0001980516482542224, "loss": 11.6785, "step": 9048 }, { "epoch": 0.18942058109352758, "grad_norm": 0.2442515790462494, "learning_rate": 0.00019805121753645131, "loss": 11.6814, "step": 9049 }, { "epoch": 0.18944151385748975, "grad_norm": 0.2144228219985962, "learning_rate": 0.00019805078677154504, "loss": 11.674, "step": 9050 }, { "epoch": 0.1894624466214519, "grad_norm": 0.26065903902053833, "learning_rate": 0.00019805035595950383, "loss": 11.6691, "step": 9051 }, { "epoch": 0.18948337938541404, "grad_norm": 0.2504398822784424, "learning_rate": 0.00019804992510032783, "loss": 11.6703, "step": 9052 }, { "epoch": 0.18950431214937621, "grad_norm": 0.31033292412757874, "learning_rate": 0.00019804949419401728, "loss": 11.6721, "step": 9053 }, { "epoch": 0.18952524491333836, "grad_norm": 0.20927076041698456, "learning_rate": 0.00019804906324057234, "loss": 11.6717, "step": 9054 }, { "epoch": 0.1895461776773005, "grad_norm": 0.3275319039821625, "learning_rate": 0.00019804863223999328, "loss": 11.6777, "step": 9055 }, { "epoch": 0.18956711044126268, "grad_norm": 0.23787084221839905, "learning_rate": 0.00019804820119228027, "loss": 11.6696, "step": 9056 }, { "epoch": 0.18958804320522482, "grad_norm": 0.23281487822532654, "learning_rate": 0.00019804777009743352, "loss": 11.6839, "step": 9057 }, { "epoch": 0.18960897596918697, "grad_norm": 0.27660638093948364, "learning_rate": 0.00019804733895545327, "loss": 11.6785, "step": 9058 }, { "epoch": 0.18962990873314914, "grad_norm": 0.30416640639305115, "learning_rate": 0.00019804690776633964, "loss": 11.6924, "step": 9059 }, { "epoch": 0.18965084149711128, "grad_norm": 0.23835521936416626, "learning_rate": 0.00019804647653009293, "loss": 11.6929, "step": 9060 }, { "epoch": 0.18967177426107343, "grad_norm": 1.1875604391098022, "learning_rate": 0.00019804604524671334, "loss": 11.6761, "step": 9061 }, { "epoch": 0.18969270702503557, "grad_norm": 0.3092977702617645, "learning_rate": 0.00019804561391620102, "loss": 11.674, "step": 9062 }, { "epoch": 0.18971363978899775, "grad_norm": 0.25374308228492737, "learning_rate": 0.00019804518253855623, "loss": 11.6701, "step": 9063 }, { "epoch": 0.1897345725529599, "grad_norm": 0.2129417359828949, "learning_rate": 0.00019804475111377914, "loss": 11.6582, "step": 9064 }, { "epoch": 0.18975550531692204, "grad_norm": 0.21415475010871887, "learning_rate": 0.00019804431964186997, "loss": 11.6622, "step": 9065 }, { "epoch": 0.1897764380808842, "grad_norm": 0.18936936557292938, "learning_rate": 0.00019804388812282896, "loss": 11.6689, "step": 9066 }, { "epoch": 0.18979737084484635, "grad_norm": 0.27287742495536804, "learning_rate": 0.00019804345655665624, "loss": 11.6552, "step": 9067 }, { "epoch": 0.1898183036088085, "grad_norm": 0.22606691718101501, "learning_rate": 0.00019804302494335209, "loss": 11.677, "step": 9068 }, { "epoch": 0.18983923637277067, "grad_norm": 0.26725804805755615, "learning_rate": 0.0001980425932829167, "loss": 11.6758, "step": 9069 }, { "epoch": 0.18986016913673281, "grad_norm": 0.3969671428203583, "learning_rate": 0.00019804216157535027, "loss": 11.6728, "step": 9070 }, { "epoch": 0.18988110190069496, "grad_norm": 0.30184274911880493, "learning_rate": 0.00019804172982065298, "loss": 11.7018, "step": 9071 }, { "epoch": 0.18990203466465713, "grad_norm": 0.22726386785507202, "learning_rate": 0.0001980412980188251, "loss": 11.6735, "step": 9072 }, { "epoch": 0.18992296742861928, "grad_norm": 0.2374182492494583, "learning_rate": 0.00019804086616986678, "loss": 11.6903, "step": 9073 }, { "epoch": 0.18994390019258142, "grad_norm": 0.30597570538520813, "learning_rate": 0.00019804043427377828, "loss": 11.6855, "step": 9074 }, { "epoch": 0.1899648329565436, "grad_norm": 0.21911503374576569, "learning_rate": 0.00019804000233055976, "loss": 11.6755, "step": 9075 }, { "epoch": 0.18998576572050574, "grad_norm": 0.20524291694164276, "learning_rate": 0.00019803957034021143, "loss": 11.6832, "step": 9076 }, { "epoch": 0.19000669848446788, "grad_norm": 0.2524695098400116, "learning_rate": 0.00019803913830273353, "loss": 11.6787, "step": 9077 }, { "epoch": 0.19002763124843003, "grad_norm": 0.24576117098331451, "learning_rate": 0.00019803870621812626, "loss": 11.6892, "step": 9078 }, { "epoch": 0.1900485640123922, "grad_norm": 0.257875919342041, "learning_rate": 0.0001980382740863898, "loss": 11.6924, "step": 9079 }, { "epoch": 0.19006949677635435, "grad_norm": 0.25884103775024414, "learning_rate": 0.00019803784190752438, "loss": 11.6772, "step": 9080 }, { "epoch": 0.1900904295403165, "grad_norm": 0.2617088854312897, "learning_rate": 0.00019803740968153024, "loss": 11.6833, "step": 9081 }, { "epoch": 0.19011136230427866, "grad_norm": 0.279838889837265, "learning_rate": 0.00019803697740840753, "loss": 11.6905, "step": 9082 }, { "epoch": 0.1901322950682408, "grad_norm": 0.3326278626918793, "learning_rate": 0.00019803654508815646, "loss": 11.6905, "step": 9083 }, { "epoch": 0.19015322783220295, "grad_norm": 0.23382030427455902, "learning_rate": 0.00019803611272077728, "loss": 11.6786, "step": 9084 }, { "epoch": 0.19017416059616513, "grad_norm": 0.22868026793003082, "learning_rate": 0.00019803568030627016, "loss": 11.6783, "step": 9085 }, { "epoch": 0.19019509336012727, "grad_norm": 0.21076317131519318, "learning_rate": 0.00019803524784463533, "loss": 11.6636, "step": 9086 }, { "epoch": 0.19021602612408942, "grad_norm": 0.28914061188697815, "learning_rate": 0.000198034815335873, "loss": 11.6723, "step": 9087 }, { "epoch": 0.1902369588880516, "grad_norm": 0.23398707807064056, "learning_rate": 0.00019803438277998336, "loss": 11.6702, "step": 9088 }, { "epoch": 0.19025789165201373, "grad_norm": 0.23318621516227722, "learning_rate": 0.00019803395017696661, "loss": 11.6923, "step": 9089 }, { "epoch": 0.19027882441597588, "grad_norm": 0.23192986845970154, "learning_rate": 0.000198033517526823, "loss": 11.6786, "step": 9090 }, { "epoch": 0.19029975717993805, "grad_norm": 0.25415652990341187, "learning_rate": 0.00019803308482955272, "loss": 11.6699, "step": 9091 }, { "epoch": 0.1903206899439002, "grad_norm": 0.27405133843421936, "learning_rate": 0.00019803265208515595, "loss": 11.6807, "step": 9092 }, { "epoch": 0.19034162270786234, "grad_norm": 0.26318472623825073, "learning_rate": 0.00019803221929363293, "loss": 11.6693, "step": 9093 }, { "epoch": 0.1903625554718245, "grad_norm": 0.24782748520374298, "learning_rate": 0.0001980317864549839, "loss": 11.6854, "step": 9094 }, { "epoch": 0.19038348823578666, "grad_norm": 0.21820564568042755, "learning_rate": 0.00019803135356920897, "loss": 11.6754, "step": 9095 }, { "epoch": 0.1904044209997488, "grad_norm": 0.2767714858055115, "learning_rate": 0.00019803092063630843, "loss": 11.6709, "step": 9096 }, { "epoch": 0.19042535376371095, "grad_norm": 0.28229406476020813, "learning_rate": 0.00019803048765628243, "loss": 11.6851, "step": 9097 }, { "epoch": 0.19044628652767312, "grad_norm": 0.2630496621131897, "learning_rate": 0.00019803005462913124, "loss": 11.6603, "step": 9098 }, { "epoch": 0.19046721929163526, "grad_norm": 0.2407674491405487, "learning_rate": 0.00019802962155485504, "loss": 11.6705, "step": 9099 }, { "epoch": 0.1904881520555974, "grad_norm": 0.2489699274301529, "learning_rate": 0.00019802918843345402, "loss": 11.6954, "step": 9100 }, { "epoch": 0.19050908481955958, "grad_norm": 0.1867770403623581, "learning_rate": 0.0001980287552649284, "loss": 11.6903, "step": 9101 }, { "epoch": 0.19053001758352173, "grad_norm": 0.2518065273761749, "learning_rate": 0.0001980283220492784, "loss": 11.6746, "step": 9102 }, { "epoch": 0.19055095034748387, "grad_norm": 0.293582022190094, "learning_rate": 0.00019802788878650423, "loss": 11.6767, "step": 9103 }, { "epoch": 0.19057188311144604, "grad_norm": 0.23523356020450592, "learning_rate": 0.0001980274554766061, "loss": 11.6886, "step": 9104 }, { "epoch": 0.1905928158754082, "grad_norm": 0.2232407033443451, "learning_rate": 0.00019802702211958417, "loss": 11.683, "step": 9105 }, { "epoch": 0.19061374863937033, "grad_norm": 4.131625652313232, "learning_rate": 0.00019802658871543872, "loss": 11.7, "step": 9106 }, { "epoch": 0.1906346814033325, "grad_norm": 0.24560442566871643, "learning_rate": 0.00019802615526416987, "loss": 11.6748, "step": 9107 }, { "epoch": 0.19065561416729465, "grad_norm": 0.273957759141922, "learning_rate": 0.00019802572176577795, "loss": 11.652, "step": 9108 }, { "epoch": 0.1906765469312568, "grad_norm": 0.2555335462093353, "learning_rate": 0.00019802528822026306, "loss": 11.6856, "step": 9109 }, { "epoch": 0.19069747969521897, "grad_norm": 0.30203869938850403, "learning_rate": 0.0001980248546276255, "loss": 11.7045, "step": 9110 }, { "epoch": 0.1907184124591811, "grad_norm": 0.26509320735931396, "learning_rate": 0.00019802442098786535, "loss": 11.6825, "step": 9111 }, { "epoch": 0.19073934522314326, "grad_norm": 0.297499418258667, "learning_rate": 0.00019802398730098294, "loss": 11.68, "step": 9112 }, { "epoch": 0.19076027798710543, "grad_norm": 0.2640356123447418, "learning_rate": 0.00019802355356697847, "loss": 11.684, "step": 9113 }, { "epoch": 0.19078121075106758, "grad_norm": 0.280648410320282, "learning_rate": 0.00019802311978585205, "loss": 11.6743, "step": 9114 }, { "epoch": 0.19080214351502972, "grad_norm": 0.1876436173915863, "learning_rate": 0.000198022685957604, "loss": 11.6735, "step": 9115 }, { "epoch": 0.19082307627899187, "grad_norm": 0.24858041107654572, "learning_rate": 0.00019802225208223444, "loss": 11.6881, "step": 9116 }, { "epoch": 0.19084400904295404, "grad_norm": 0.24511760473251343, "learning_rate": 0.00019802181815974364, "loss": 11.6781, "step": 9117 }, { "epoch": 0.19086494180691618, "grad_norm": 0.2669599950313568, "learning_rate": 0.0001980213841901318, "loss": 11.6834, "step": 9118 }, { "epoch": 0.19088587457087833, "grad_norm": 0.321200966835022, "learning_rate": 0.00019802095017339909, "loss": 11.68, "step": 9119 }, { "epoch": 0.1909068073348405, "grad_norm": 0.328644722700119, "learning_rate": 0.00019802051610954578, "loss": 11.6792, "step": 9120 }, { "epoch": 0.19092774009880265, "grad_norm": 0.23708045482635498, "learning_rate": 0.00019802008199857197, "loss": 11.6784, "step": 9121 }, { "epoch": 0.1909486728627648, "grad_norm": 0.2393164187669754, "learning_rate": 0.00019801964784047803, "loss": 11.6898, "step": 9122 }, { "epoch": 0.19096960562672696, "grad_norm": 0.23540042340755463, "learning_rate": 0.00019801921363526404, "loss": 11.6881, "step": 9123 }, { "epoch": 0.1909905383906891, "grad_norm": 0.24493302404880524, "learning_rate": 0.00019801877938293026, "loss": 11.6787, "step": 9124 }, { "epoch": 0.19101147115465125, "grad_norm": 0.32709047198295593, "learning_rate": 0.00019801834508347685, "loss": 11.6829, "step": 9125 }, { "epoch": 0.19103240391861342, "grad_norm": 1.3494867086410522, "learning_rate": 0.0001980179107369041, "loss": 11.6336, "step": 9126 }, { "epoch": 0.19105333668257557, "grad_norm": 0.3315231502056122, "learning_rate": 0.00019801747634321217, "loss": 11.6975, "step": 9127 }, { "epoch": 0.19107426944653771, "grad_norm": 0.2828322947025299, "learning_rate": 0.00019801704190240127, "loss": 11.6825, "step": 9128 }, { "epoch": 0.1910952022104999, "grad_norm": 0.2655010521411896, "learning_rate": 0.00019801660741447164, "loss": 11.676, "step": 9129 }, { "epoch": 0.19111613497446203, "grad_norm": 0.2874034643173218, "learning_rate": 0.00019801617287942342, "loss": 11.6834, "step": 9130 }, { "epoch": 0.19113706773842418, "grad_norm": 0.26347413659095764, "learning_rate": 0.00019801573829725686, "loss": 11.686, "step": 9131 }, { "epoch": 0.19115800050238632, "grad_norm": 0.26625552773475647, "learning_rate": 0.0001980153036679722, "loss": 11.6749, "step": 9132 }, { "epoch": 0.1911789332663485, "grad_norm": 0.2472955882549286, "learning_rate": 0.0001980148689915696, "loss": 11.6832, "step": 9133 }, { "epoch": 0.19119986603031064, "grad_norm": 0.32038265466690063, "learning_rate": 0.0001980144342680493, "loss": 11.6938, "step": 9134 }, { "epoch": 0.19122079879427278, "grad_norm": 0.23688767850399017, "learning_rate": 0.00019801399949741152, "loss": 11.6863, "step": 9135 }, { "epoch": 0.19124173155823496, "grad_norm": 0.24395786225795746, "learning_rate": 0.0001980135646796564, "loss": 11.684, "step": 9136 }, { "epoch": 0.1912626643221971, "grad_norm": 0.23631539940834045, "learning_rate": 0.00019801312981478422, "loss": 11.6724, "step": 9137 }, { "epoch": 0.19128359708615925, "grad_norm": 0.18881861865520477, "learning_rate": 0.00019801269490279515, "loss": 11.684, "step": 9138 }, { "epoch": 0.19130452985012142, "grad_norm": 0.25436368584632874, "learning_rate": 0.00019801225994368942, "loss": 11.671, "step": 9139 }, { "epoch": 0.19132546261408356, "grad_norm": 0.27136072516441345, "learning_rate": 0.0001980118249374672, "loss": 11.6893, "step": 9140 }, { "epoch": 0.1913463953780457, "grad_norm": 0.2490755170583725, "learning_rate": 0.0001980113898841288, "loss": 11.66, "step": 9141 }, { "epoch": 0.19136732814200788, "grad_norm": 0.2667648196220398, "learning_rate": 0.00019801095478367433, "loss": 11.6735, "step": 9142 }, { "epoch": 0.19138826090597003, "grad_norm": 0.2338913232088089, "learning_rate": 0.000198010519636104, "loss": 11.6614, "step": 9143 }, { "epoch": 0.19140919366993217, "grad_norm": 0.22726328670978546, "learning_rate": 0.00019801008444141811, "loss": 11.6731, "step": 9144 }, { "epoch": 0.19143012643389434, "grad_norm": 0.24437114596366882, "learning_rate": 0.00019800964919961675, "loss": 11.6724, "step": 9145 }, { "epoch": 0.1914510591978565, "grad_norm": 0.22523969411849976, "learning_rate": 0.0001980092139107002, "loss": 11.6725, "step": 9146 }, { "epoch": 0.19147199196181863, "grad_norm": 0.21289432048797607, "learning_rate": 0.0001980087785746687, "loss": 11.6688, "step": 9147 }, { "epoch": 0.1914929247257808, "grad_norm": 0.23578393459320068, "learning_rate": 0.00019800834319152238, "loss": 11.6818, "step": 9148 }, { "epoch": 0.19151385748974295, "grad_norm": 0.2357616126537323, "learning_rate": 0.00019800790776126147, "loss": 11.6772, "step": 9149 }, { "epoch": 0.1915347902537051, "grad_norm": 0.23164594173431396, "learning_rate": 0.00019800747228388622, "loss": 11.6774, "step": 9150 }, { "epoch": 0.19155572301766724, "grad_norm": 0.2515439987182617, "learning_rate": 0.0001980070367593968, "loss": 11.6657, "step": 9151 }, { "epoch": 0.1915766557816294, "grad_norm": 0.35297638177871704, "learning_rate": 0.00019800660118779345, "loss": 11.6711, "step": 9152 }, { "epoch": 0.19159758854559156, "grad_norm": 0.33985447883605957, "learning_rate": 0.00019800616556907635, "loss": 11.6946, "step": 9153 }, { "epoch": 0.1916185213095537, "grad_norm": 0.22362327575683594, "learning_rate": 0.00019800572990324574, "loss": 11.6814, "step": 9154 }, { "epoch": 0.19163945407351587, "grad_norm": 0.2671365439891815, "learning_rate": 0.0001980052941903018, "loss": 11.6733, "step": 9155 }, { "epoch": 0.19166038683747802, "grad_norm": 0.2021106630563736, "learning_rate": 0.00019800485843024474, "loss": 11.6747, "step": 9156 }, { "epoch": 0.19168131960144016, "grad_norm": 0.2431430071592331, "learning_rate": 0.0001980044226230748, "loss": 11.6603, "step": 9157 }, { "epoch": 0.19170225236540234, "grad_norm": 0.25698012113571167, "learning_rate": 0.00019800398676879215, "loss": 11.686, "step": 9158 }, { "epoch": 0.19172318512936448, "grad_norm": 0.2480231076478958, "learning_rate": 0.000198003550867397, "loss": 11.6739, "step": 9159 }, { "epoch": 0.19174411789332663, "grad_norm": 0.23918397724628448, "learning_rate": 0.00019800311491888963, "loss": 11.6688, "step": 9160 }, { "epoch": 0.1917650506572888, "grad_norm": 0.23832811415195465, "learning_rate": 0.0001980026789232702, "loss": 11.6704, "step": 9161 }, { "epoch": 0.19178598342125094, "grad_norm": 0.2554791271686554, "learning_rate": 0.00019800224288053886, "loss": 11.6959, "step": 9162 }, { "epoch": 0.1918069161852131, "grad_norm": 0.22438430786132812, "learning_rate": 0.00019800180679069591, "loss": 11.6949, "step": 9163 }, { "epoch": 0.19182784894917526, "grad_norm": 0.2155892699956894, "learning_rate": 0.00019800137065374154, "loss": 11.6827, "step": 9164 }, { "epoch": 0.1918487817131374, "grad_norm": 0.1923249065876007, "learning_rate": 0.00019800093446967597, "loss": 11.6817, "step": 9165 }, { "epoch": 0.19186971447709955, "grad_norm": 0.24534215033054352, "learning_rate": 0.0001980004982384993, "loss": 11.67, "step": 9166 }, { "epoch": 0.19189064724106172, "grad_norm": 0.2733977138996124, "learning_rate": 0.0001980000619602119, "loss": 11.6788, "step": 9167 }, { "epoch": 0.19191158000502387, "grad_norm": 0.2310258001089096, "learning_rate": 0.0001979996256348139, "loss": 11.6692, "step": 9168 }, { "epoch": 0.191932512768986, "grad_norm": 0.26005735993385315, "learning_rate": 0.0001979991892623055, "loss": 11.6634, "step": 9169 }, { "epoch": 0.19195344553294816, "grad_norm": 0.2286462038755417, "learning_rate": 0.0001979987528426869, "loss": 11.6892, "step": 9170 }, { "epoch": 0.19197437829691033, "grad_norm": 0.3001565635204315, "learning_rate": 0.0001979983163759584, "loss": 11.6742, "step": 9171 }, { "epoch": 0.19199531106087248, "grad_norm": 0.22588111460208893, "learning_rate": 0.0001979978798621201, "loss": 11.6648, "step": 9172 }, { "epoch": 0.19201624382483462, "grad_norm": 0.24980191886425018, "learning_rate": 0.00019799744330117224, "loss": 11.6762, "step": 9173 }, { "epoch": 0.1920371765887968, "grad_norm": 0.2471807450056076, "learning_rate": 0.0001979970066931151, "loss": 11.6732, "step": 9174 }, { "epoch": 0.19205810935275894, "grad_norm": 0.34166041016578674, "learning_rate": 0.00019799657003794882, "loss": 11.694, "step": 9175 }, { "epoch": 0.19207904211672108, "grad_norm": 0.20015236735343933, "learning_rate": 0.0001979961333356736, "loss": 11.6785, "step": 9176 }, { "epoch": 0.19209997488068326, "grad_norm": 0.3298340141773224, "learning_rate": 0.00019799569658628967, "loss": 11.6916, "step": 9177 }, { "epoch": 0.1921209076446454, "grad_norm": 0.37682393193244934, "learning_rate": 0.00019799525978979725, "loss": 11.6815, "step": 9178 }, { "epoch": 0.19214184040860754, "grad_norm": 0.24246808886528015, "learning_rate": 0.00019799482294619657, "loss": 11.6927, "step": 9179 }, { "epoch": 0.19216277317256972, "grad_norm": 0.26287755370140076, "learning_rate": 0.0001979943860554878, "loss": 11.6787, "step": 9180 }, { "epoch": 0.19218370593653186, "grad_norm": 0.28546738624572754, "learning_rate": 0.00019799394911767116, "loss": 11.6739, "step": 9181 }, { "epoch": 0.192204638700494, "grad_norm": 0.2454858273267746, "learning_rate": 0.0001979935121327469, "loss": 11.6825, "step": 9182 }, { "epoch": 0.19222557146445618, "grad_norm": 0.2493865042924881, "learning_rate": 0.00019799307510071513, "loss": 11.6849, "step": 9183 }, { "epoch": 0.19224650422841832, "grad_norm": 0.268513023853302, "learning_rate": 0.00019799263802157616, "loss": 11.6762, "step": 9184 }, { "epoch": 0.19226743699238047, "grad_norm": 0.21983368694782257, "learning_rate": 0.00019799220089533015, "loss": 11.6768, "step": 9185 }, { "epoch": 0.19228836975634261, "grad_norm": 0.2694892883300781, "learning_rate": 0.00019799176372197735, "loss": 11.6819, "step": 9186 }, { "epoch": 0.1923093025203048, "grad_norm": 0.18507975339889526, "learning_rate": 0.00019799132650151794, "loss": 11.6821, "step": 9187 }, { "epoch": 0.19233023528426693, "grad_norm": 0.2723826467990875, "learning_rate": 0.00019799088923395212, "loss": 11.6777, "step": 9188 }, { "epoch": 0.19235116804822908, "grad_norm": 0.31436485052108765, "learning_rate": 0.00019799045191928012, "loss": 11.6903, "step": 9189 }, { "epoch": 0.19237210081219125, "grad_norm": 0.3152309060096741, "learning_rate": 0.00019799001455750217, "loss": 11.6781, "step": 9190 }, { "epoch": 0.1923930335761534, "grad_norm": 0.22873267531394958, "learning_rate": 0.00019798957714861843, "loss": 11.6774, "step": 9191 }, { "epoch": 0.19241396634011554, "grad_norm": 0.27874433994293213, "learning_rate": 0.00019798913969262913, "loss": 11.656, "step": 9192 }, { "epoch": 0.1924348991040777, "grad_norm": 0.2222604751586914, "learning_rate": 0.0001979887021895345, "loss": 11.6906, "step": 9193 }, { "epoch": 0.19245583186803986, "grad_norm": 0.29242128133773804, "learning_rate": 0.00019798826463933474, "loss": 11.6748, "step": 9194 }, { "epoch": 0.192476764632002, "grad_norm": 0.30268001556396484, "learning_rate": 0.00019798782704203008, "loss": 11.6824, "step": 9195 }, { "epoch": 0.19249769739596417, "grad_norm": 0.2451685667037964, "learning_rate": 0.00019798738939762068, "loss": 11.6731, "step": 9196 }, { "epoch": 0.19251863015992632, "grad_norm": 0.260593444108963, "learning_rate": 0.00019798695170610678, "loss": 11.6801, "step": 9197 }, { "epoch": 0.19253956292388846, "grad_norm": 0.26835957169532776, "learning_rate": 0.00019798651396748856, "loss": 11.6764, "step": 9198 }, { "epoch": 0.19256049568785064, "grad_norm": 0.28174668550491333, "learning_rate": 0.0001979860761817663, "loss": 11.6911, "step": 9199 }, { "epoch": 0.19258142845181278, "grad_norm": 0.25259795784950256, "learning_rate": 0.00019798563834894016, "loss": 11.6896, "step": 9200 }, { "epoch": 0.19260236121577493, "grad_norm": 0.24183757603168488, "learning_rate": 0.00019798520046901037, "loss": 11.6683, "step": 9201 }, { "epoch": 0.1926232939797371, "grad_norm": 0.2062378078699112, "learning_rate": 0.0001979847625419771, "loss": 11.6891, "step": 9202 }, { "epoch": 0.19264422674369924, "grad_norm": 0.2616851031780243, "learning_rate": 0.00019798432456784063, "loss": 11.6687, "step": 9203 }, { "epoch": 0.1926651595076614, "grad_norm": 0.2618180513381958, "learning_rate": 0.00019798388654660112, "loss": 11.6988, "step": 9204 }, { "epoch": 0.19268609227162353, "grad_norm": 0.30687427520751953, "learning_rate": 0.00019798344847825878, "loss": 11.6855, "step": 9205 }, { "epoch": 0.1927070250355857, "grad_norm": 0.31829968094825745, "learning_rate": 0.00019798301036281384, "loss": 11.687, "step": 9206 }, { "epoch": 0.19272795779954785, "grad_norm": 0.22598639130592346, "learning_rate": 0.0001979825722002665, "loss": 11.6731, "step": 9207 }, { "epoch": 0.19274889056351, "grad_norm": 0.25648069381713867, "learning_rate": 0.00019798213399061697, "loss": 11.6737, "step": 9208 }, { "epoch": 0.19276982332747217, "grad_norm": 0.22707003355026245, "learning_rate": 0.00019798169573386547, "loss": 11.6836, "step": 9209 }, { "epoch": 0.1927907560914343, "grad_norm": 0.21826773881912231, "learning_rate": 0.00019798125743001222, "loss": 11.6685, "step": 9210 }, { "epoch": 0.19281168885539646, "grad_norm": 0.20546627044677734, "learning_rate": 0.0001979808190790574, "loss": 11.6833, "step": 9211 }, { "epoch": 0.19283262161935863, "grad_norm": 0.23920150101184845, "learning_rate": 0.00019798038068100123, "loss": 11.6773, "step": 9212 }, { "epoch": 0.19285355438332077, "grad_norm": 0.23742520809173584, "learning_rate": 0.00019797994223584394, "loss": 11.6644, "step": 9213 }, { "epoch": 0.19287448714728292, "grad_norm": 0.2744372487068176, "learning_rate": 0.0001979795037435857, "loss": 11.6683, "step": 9214 }, { "epoch": 0.1928954199112451, "grad_norm": 0.23906932771205902, "learning_rate": 0.00019797906520422677, "loss": 11.6687, "step": 9215 }, { "epoch": 0.19291635267520724, "grad_norm": 0.31778454780578613, "learning_rate": 0.00019797862661776735, "loss": 11.6762, "step": 9216 }, { "epoch": 0.19293728543916938, "grad_norm": 0.29638978838920593, "learning_rate": 0.00019797818798420762, "loss": 11.6858, "step": 9217 }, { "epoch": 0.19295821820313155, "grad_norm": 0.22660942375659943, "learning_rate": 0.00019797774930354782, "loss": 11.6616, "step": 9218 }, { "epoch": 0.1929791509670937, "grad_norm": 0.21956638991832733, "learning_rate": 0.00019797731057578818, "loss": 11.674, "step": 9219 }, { "epoch": 0.19300008373105584, "grad_norm": 0.24179479479789734, "learning_rate": 0.00019797687180092885, "loss": 11.6868, "step": 9220 }, { "epoch": 0.193021016495018, "grad_norm": 0.20017777383327484, "learning_rate": 0.00019797643297897008, "loss": 11.6928, "step": 9221 }, { "epoch": 0.19304194925898016, "grad_norm": 0.2581314742565155, "learning_rate": 0.0001979759941099121, "loss": 11.6734, "step": 9222 }, { "epoch": 0.1930628820229423, "grad_norm": 0.23538634181022644, "learning_rate": 0.00019797555519375507, "loss": 11.6805, "step": 9223 }, { "epoch": 0.19308381478690445, "grad_norm": 0.2917812168598175, "learning_rate": 0.0001979751162304992, "loss": 11.6896, "step": 9224 }, { "epoch": 0.19310474755086662, "grad_norm": 0.2605206072330475, "learning_rate": 0.00019797467722014478, "loss": 11.683, "step": 9225 }, { "epoch": 0.19312568031482877, "grad_norm": 0.2325843870639801, "learning_rate": 0.00019797423816269195, "loss": 11.6942, "step": 9226 }, { "epoch": 0.1931466130787909, "grad_norm": 0.2911246418952942, "learning_rate": 0.00019797379905814094, "loss": 11.6769, "step": 9227 }, { "epoch": 0.19316754584275309, "grad_norm": 0.2777511477470398, "learning_rate": 0.00019797335990649195, "loss": 11.6973, "step": 9228 }, { "epoch": 0.19318847860671523, "grad_norm": 0.22905123233795166, "learning_rate": 0.00019797292070774521, "loss": 11.6725, "step": 9229 }, { "epoch": 0.19320941137067738, "grad_norm": 0.32048648595809937, "learning_rate": 0.00019797248146190093, "loss": 11.6782, "step": 9230 }, { "epoch": 0.19323034413463955, "grad_norm": 0.2543858289718628, "learning_rate": 0.00019797204216895933, "loss": 11.6597, "step": 9231 }, { "epoch": 0.1932512768986017, "grad_norm": 0.21150116622447968, "learning_rate": 0.0001979716028289206, "loss": 11.6734, "step": 9232 }, { "epoch": 0.19327220966256384, "grad_norm": 0.3134775161743164, "learning_rate": 0.00019797116344178492, "loss": 11.6983, "step": 9233 }, { "epoch": 0.193293142426526, "grad_norm": 0.2172023355960846, "learning_rate": 0.00019797072400755256, "loss": 11.6882, "step": 9234 }, { "epoch": 0.19331407519048815, "grad_norm": 0.3052367866039276, "learning_rate": 0.00019797028452622374, "loss": 11.679, "step": 9235 }, { "epoch": 0.1933350079544503, "grad_norm": 0.22503171861171722, "learning_rate": 0.0001979698449977986, "loss": 11.6745, "step": 9236 }, { "epoch": 0.19335594071841247, "grad_norm": 0.3079988956451416, "learning_rate": 0.0001979694054222774, "loss": 11.69, "step": 9237 }, { "epoch": 0.19337687348237462, "grad_norm": 0.24188263714313507, "learning_rate": 0.00019796896579966035, "loss": 11.653, "step": 9238 }, { "epoch": 0.19339780624633676, "grad_norm": 0.2250327169895172, "learning_rate": 0.00019796852612994766, "loss": 11.6857, "step": 9239 }, { "epoch": 0.1934187390102989, "grad_norm": 0.24570141732692719, "learning_rate": 0.00019796808641313953, "loss": 11.678, "step": 9240 }, { "epoch": 0.19343967177426108, "grad_norm": 0.19914795458316803, "learning_rate": 0.00019796764664923618, "loss": 11.6658, "step": 9241 }, { "epoch": 0.19346060453822322, "grad_norm": 0.34767478704452515, "learning_rate": 0.00019796720683823782, "loss": 11.6836, "step": 9242 }, { "epoch": 0.19348153730218537, "grad_norm": 0.2367141842842102, "learning_rate": 0.00019796676698014465, "loss": 11.6703, "step": 9243 }, { "epoch": 0.19350247006614754, "grad_norm": 0.25185203552246094, "learning_rate": 0.0001979663270749569, "loss": 11.673, "step": 9244 }, { "epoch": 0.1935234028301097, "grad_norm": 0.25187593698501587, "learning_rate": 0.00019796588712267478, "loss": 11.6734, "step": 9245 }, { "epoch": 0.19354433559407183, "grad_norm": 0.2374175488948822, "learning_rate": 0.0001979654471232985, "loss": 11.6829, "step": 9246 }, { "epoch": 0.193565268358034, "grad_norm": 0.33340758085250854, "learning_rate": 0.00019796500707682822, "loss": 11.679, "step": 9247 }, { "epoch": 0.19358620112199615, "grad_norm": 0.2844986319541931, "learning_rate": 0.00019796456698326426, "loss": 11.676, "step": 9248 }, { "epoch": 0.1936071338859583, "grad_norm": 0.2613949477672577, "learning_rate": 0.00019796412684260672, "loss": 11.6858, "step": 9249 }, { "epoch": 0.19362806664992047, "grad_norm": 0.23090875148773193, "learning_rate": 0.0001979636866548559, "loss": 11.6733, "step": 9250 }, { "epoch": 0.1936489994138826, "grad_norm": 0.2558780014514923, "learning_rate": 0.00019796324642001195, "loss": 11.6732, "step": 9251 }, { "epoch": 0.19366993217784476, "grad_norm": 0.23225827515125275, "learning_rate": 0.0001979628061380751, "loss": 11.6821, "step": 9252 }, { "epoch": 0.19369086494180693, "grad_norm": 0.2829996347427368, "learning_rate": 0.00019796236580904556, "loss": 11.6904, "step": 9253 }, { "epoch": 0.19371179770576907, "grad_norm": 0.26340627670288086, "learning_rate": 0.00019796192543292357, "loss": 11.6774, "step": 9254 }, { "epoch": 0.19373273046973122, "grad_norm": 0.24284066259860992, "learning_rate": 0.0001979614850097093, "loss": 11.6812, "step": 9255 }, { "epoch": 0.1937536632336934, "grad_norm": 0.320046603679657, "learning_rate": 0.000197961044539403, "loss": 11.6913, "step": 9256 }, { "epoch": 0.19377459599765554, "grad_norm": 0.26419422030448914, "learning_rate": 0.00019796060402200487, "loss": 11.6821, "step": 9257 }, { "epoch": 0.19379552876161768, "grad_norm": 0.22494366765022278, "learning_rate": 0.00019796016345751508, "loss": 11.6705, "step": 9258 }, { "epoch": 0.19381646152557983, "grad_norm": 0.21626445651054382, "learning_rate": 0.00019795972284593393, "loss": 11.6872, "step": 9259 }, { "epoch": 0.193837394289542, "grad_norm": 0.1972460001707077, "learning_rate": 0.0001979592821872615, "loss": 11.6859, "step": 9260 }, { "epoch": 0.19385832705350414, "grad_norm": 0.24155989289283752, "learning_rate": 0.00019795884148149815, "loss": 11.6655, "step": 9261 }, { "epoch": 0.1938792598174663, "grad_norm": 0.24624764919281006, "learning_rate": 0.000197958400728644, "loss": 11.6717, "step": 9262 }, { "epoch": 0.19390019258142846, "grad_norm": 0.26604756712913513, "learning_rate": 0.0001979579599286993, "loss": 11.6797, "step": 9263 }, { "epoch": 0.1939211253453906, "grad_norm": 0.24569249153137207, "learning_rate": 0.0001979575190816642, "loss": 11.6842, "step": 9264 }, { "epoch": 0.19394205810935275, "grad_norm": 0.2198237180709839, "learning_rate": 0.00019795707818753898, "loss": 11.6742, "step": 9265 }, { "epoch": 0.19396299087331492, "grad_norm": 0.23996232450008392, "learning_rate": 0.00019795663724632382, "loss": 11.6879, "step": 9266 }, { "epoch": 0.19398392363727707, "grad_norm": 0.2404583841562271, "learning_rate": 0.00019795619625801898, "loss": 11.6884, "step": 9267 }, { "epoch": 0.1940048564012392, "grad_norm": 0.22135329246520996, "learning_rate": 0.0001979557552226246, "loss": 11.6965, "step": 9268 }, { "epoch": 0.19402578916520138, "grad_norm": 0.2249256819486618, "learning_rate": 0.0001979553141401409, "loss": 11.6838, "step": 9269 }, { "epoch": 0.19404672192916353, "grad_norm": 0.23202534019947052, "learning_rate": 0.00019795487301056816, "loss": 11.6793, "step": 9270 }, { "epoch": 0.19406765469312567, "grad_norm": 0.23276564478874207, "learning_rate": 0.00019795443183390657, "loss": 11.6713, "step": 9271 }, { "epoch": 0.19408858745708785, "grad_norm": 0.20639866590499878, "learning_rate": 0.00019795399061015628, "loss": 11.6882, "step": 9272 }, { "epoch": 0.19410952022105, "grad_norm": 0.23735515773296356, "learning_rate": 0.00019795354933931756, "loss": 11.6641, "step": 9273 }, { "epoch": 0.19413045298501214, "grad_norm": 0.33385545015335083, "learning_rate": 0.00019795310802139059, "loss": 11.6645, "step": 9274 }, { "epoch": 0.19415138574897428, "grad_norm": 0.2867959141731262, "learning_rate": 0.00019795266665637563, "loss": 11.6902, "step": 9275 }, { "epoch": 0.19417231851293645, "grad_norm": 0.21645012497901917, "learning_rate": 0.00019795222524427282, "loss": 11.6949, "step": 9276 }, { "epoch": 0.1941932512768986, "grad_norm": 0.21303178369998932, "learning_rate": 0.00019795178378508244, "loss": 11.6618, "step": 9277 }, { "epoch": 0.19421418404086074, "grad_norm": 0.2658376693725586, "learning_rate": 0.00019795134227880464, "loss": 11.6833, "step": 9278 }, { "epoch": 0.19423511680482292, "grad_norm": 0.4384657144546509, "learning_rate": 0.0001979509007254397, "loss": 11.6969, "step": 9279 }, { "epoch": 0.19425604956878506, "grad_norm": 0.2943846881389618, "learning_rate": 0.0001979504591249878, "loss": 11.6704, "step": 9280 }, { "epoch": 0.1942769823327472, "grad_norm": 0.24208033084869385, "learning_rate": 0.00019795001747744914, "loss": 11.6858, "step": 9281 }, { "epoch": 0.19429791509670938, "grad_norm": 0.2242109626531601, "learning_rate": 0.00019794957578282392, "loss": 11.6556, "step": 9282 }, { "epoch": 0.19431884786067152, "grad_norm": 0.21774324774742126, "learning_rate": 0.0001979491340411124, "loss": 11.669, "step": 9283 }, { "epoch": 0.19433978062463367, "grad_norm": 0.22312535345554352, "learning_rate": 0.00019794869225231478, "loss": 11.6863, "step": 9284 }, { "epoch": 0.19436071338859584, "grad_norm": 0.2420155107975006, "learning_rate": 0.00019794825041643125, "loss": 11.6832, "step": 9285 }, { "epoch": 0.19438164615255799, "grad_norm": 0.23382198810577393, "learning_rate": 0.00019794780853346207, "loss": 11.6832, "step": 9286 }, { "epoch": 0.19440257891652013, "grad_norm": 0.2644939124584198, "learning_rate": 0.00019794736660340735, "loss": 11.6864, "step": 9287 }, { "epoch": 0.1944235116804823, "grad_norm": 0.2031252384185791, "learning_rate": 0.0001979469246262674, "loss": 11.6833, "step": 9288 }, { "epoch": 0.19444444444444445, "grad_norm": 0.22154472768306732, "learning_rate": 0.0001979464826020424, "loss": 11.6713, "step": 9289 }, { "epoch": 0.1944653772084066, "grad_norm": 0.23930205404758453, "learning_rate": 0.00019794604053073256, "loss": 11.6797, "step": 9290 }, { "epoch": 0.19448630997236876, "grad_norm": 0.24766817688941956, "learning_rate": 0.00019794559841233812, "loss": 11.6849, "step": 9291 }, { "epoch": 0.1945072427363309, "grad_norm": 0.22713296115398407, "learning_rate": 0.00019794515624685924, "loss": 11.6766, "step": 9292 }, { "epoch": 0.19452817550029305, "grad_norm": 0.22728998959064484, "learning_rate": 0.00019794471403429615, "loss": 11.6863, "step": 9293 }, { "epoch": 0.1945491082642552, "grad_norm": 0.22100681066513062, "learning_rate": 0.0001979442717746491, "loss": 11.6739, "step": 9294 }, { "epoch": 0.19457004102821737, "grad_norm": 0.2636812925338745, "learning_rate": 0.00019794382946791825, "loss": 11.6811, "step": 9295 }, { "epoch": 0.19459097379217952, "grad_norm": 0.20235030353069305, "learning_rate": 0.00019794338711410386, "loss": 11.6744, "step": 9296 }, { "epoch": 0.19461190655614166, "grad_norm": 0.33677953481674194, "learning_rate": 0.00019794294471320612, "loss": 11.695, "step": 9297 }, { "epoch": 0.19463283932010383, "grad_norm": 0.21118012070655823, "learning_rate": 0.0001979425022652252, "loss": 11.6865, "step": 9298 }, { "epoch": 0.19465377208406598, "grad_norm": 0.20359395444393158, "learning_rate": 0.00019794205977016142, "loss": 11.6713, "step": 9299 }, { "epoch": 0.19467470484802812, "grad_norm": 0.21400584280490875, "learning_rate": 0.0001979416172280149, "loss": 11.6867, "step": 9300 }, { "epoch": 0.1946956376119903, "grad_norm": 0.23054158687591553, "learning_rate": 0.0001979411746387859, "loss": 11.6815, "step": 9301 }, { "epoch": 0.19471657037595244, "grad_norm": 0.23685702681541443, "learning_rate": 0.00019794073200247458, "loss": 11.684, "step": 9302 }, { "epoch": 0.1947375031399146, "grad_norm": 0.19531558454036713, "learning_rate": 0.0001979402893190812, "loss": 11.6867, "step": 9303 }, { "epoch": 0.19475843590387676, "grad_norm": 0.1992420107126236, "learning_rate": 0.00019793984658860597, "loss": 11.6835, "step": 9304 }, { "epoch": 0.1947793686678389, "grad_norm": 0.24036701023578644, "learning_rate": 0.00019793940381104908, "loss": 11.6709, "step": 9305 }, { "epoch": 0.19480030143180105, "grad_norm": 0.2216116040945053, "learning_rate": 0.00019793896098641077, "loss": 11.6795, "step": 9306 }, { "epoch": 0.19482123419576322, "grad_norm": 0.3016562759876251, "learning_rate": 0.00019793851811469125, "loss": 11.6747, "step": 9307 }, { "epoch": 0.19484216695972537, "grad_norm": 0.25826671719551086, "learning_rate": 0.0001979380751958907, "loss": 11.6788, "step": 9308 }, { "epoch": 0.1948630997236875, "grad_norm": 0.19644491374492645, "learning_rate": 0.00019793763223000937, "loss": 11.6938, "step": 9309 }, { "epoch": 0.19488403248764966, "grad_norm": 0.2599189877510071, "learning_rate": 0.00019793718921704744, "loss": 11.6631, "step": 9310 }, { "epoch": 0.19490496525161183, "grad_norm": 0.3084249794483185, "learning_rate": 0.00019793674615700514, "loss": 11.6802, "step": 9311 }, { "epoch": 0.19492589801557397, "grad_norm": 0.21060648560523987, "learning_rate": 0.0001979363030498827, "loss": 11.6758, "step": 9312 }, { "epoch": 0.19494683077953612, "grad_norm": 0.25088027119636536, "learning_rate": 0.00019793585989568032, "loss": 11.6623, "step": 9313 }, { "epoch": 0.1949677635434983, "grad_norm": 0.2721308469772339, "learning_rate": 0.00019793541669439817, "loss": 11.6763, "step": 9314 }, { "epoch": 0.19498869630746044, "grad_norm": 0.2445250004529953, "learning_rate": 0.00019793497344603657, "loss": 11.6834, "step": 9315 }, { "epoch": 0.19500962907142258, "grad_norm": 0.24652943015098572, "learning_rate": 0.0001979345301505956, "loss": 11.6881, "step": 9316 }, { "epoch": 0.19503056183538475, "grad_norm": 0.26228612661361694, "learning_rate": 0.00019793408680807555, "loss": 11.6716, "step": 9317 }, { "epoch": 0.1950514945993469, "grad_norm": 0.2861483693122864, "learning_rate": 0.00019793364341847664, "loss": 11.6922, "step": 9318 }, { "epoch": 0.19507242736330904, "grad_norm": 0.26981088519096375, "learning_rate": 0.00019793319998179907, "loss": 11.6774, "step": 9319 }, { "epoch": 0.19509336012727121, "grad_norm": 0.27552708983421326, "learning_rate": 0.00019793275649804303, "loss": 11.6847, "step": 9320 }, { "epoch": 0.19511429289123336, "grad_norm": 0.3009319007396698, "learning_rate": 0.00019793231296720876, "loss": 11.6783, "step": 9321 }, { "epoch": 0.1951352256551955, "grad_norm": 0.28206855058670044, "learning_rate": 0.00019793186938929646, "loss": 11.6854, "step": 9322 }, { "epoch": 0.19515615841915768, "grad_norm": 0.3625849485397339, "learning_rate": 0.00019793142576430637, "loss": 11.6894, "step": 9323 }, { "epoch": 0.19517709118311982, "grad_norm": 0.22243818640708923, "learning_rate": 0.00019793098209223867, "loss": 11.6758, "step": 9324 }, { "epoch": 0.19519802394708197, "grad_norm": 0.22765040397644043, "learning_rate": 0.00019793053837309355, "loss": 11.6707, "step": 9325 }, { "epoch": 0.19521895671104414, "grad_norm": 0.25638535618782043, "learning_rate": 0.0001979300946068713, "loss": 11.6708, "step": 9326 }, { "epoch": 0.19523988947500628, "grad_norm": 0.23341643810272217, "learning_rate": 0.0001979296507935721, "loss": 11.6718, "step": 9327 }, { "epoch": 0.19526082223896843, "grad_norm": 0.24252890050411224, "learning_rate": 0.00019792920693319612, "loss": 11.6696, "step": 9328 }, { "epoch": 0.19528175500293057, "grad_norm": 0.2506882846355438, "learning_rate": 0.0001979287630257436, "loss": 11.6919, "step": 9329 }, { "epoch": 0.19530268776689275, "grad_norm": 0.23042243719100952, "learning_rate": 0.0001979283190712148, "loss": 11.6937, "step": 9330 }, { "epoch": 0.1953236205308549, "grad_norm": 0.2533189654350281, "learning_rate": 0.00019792787506960985, "loss": 11.6655, "step": 9331 }, { "epoch": 0.19534455329481704, "grad_norm": 0.24258632957935333, "learning_rate": 0.00019792743102092905, "loss": 11.671, "step": 9332 }, { "epoch": 0.1953654860587792, "grad_norm": 0.2584870755672455, "learning_rate": 0.00019792698692517254, "loss": 11.6925, "step": 9333 }, { "epoch": 0.19538641882274135, "grad_norm": 0.23976610600948334, "learning_rate": 0.0001979265427823406, "loss": 11.6706, "step": 9334 }, { "epoch": 0.1954073515867035, "grad_norm": 0.24329330027103424, "learning_rate": 0.0001979260985924334, "loss": 11.6792, "step": 9335 }, { "epoch": 0.19542828435066567, "grad_norm": 0.23519107699394226, "learning_rate": 0.00019792565435545112, "loss": 11.6681, "step": 9336 }, { "epoch": 0.19544921711462782, "grad_norm": 0.2370767742395401, "learning_rate": 0.00019792521007139406, "loss": 11.6875, "step": 9337 }, { "epoch": 0.19547014987858996, "grad_norm": 0.28170061111450195, "learning_rate": 0.0001979247657402624, "loss": 11.67, "step": 9338 }, { "epoch": 0.19549108264255213, "grad_norm": 0.23581764101982117, "learning_rate": 0.0001979243213620563, "loss": 11.6747, "step": 9339 }, { "epoch": 0.19551201540651428, "grad_norm": 0.2297850251197815, "learning_rate": 0.00019792387693677604, "loss": 11.6668, "step": 9340 }, { "epoch": 0.19553294817047642, "grad_norm": 0.27397575974464417, "learning_rate": 0.00019792343246442177, "loss": 11.698, "step": 9341 }, { "epoch": 0.1955538809344386, "grad_norm": 0.2611105740070343, "learning_rate": 0.0001979229879449938, "loss": 11.6743, "step": 9342 }, { "epoch": 0.19557481369840074, "grad_norm": 0.2941006124019623, "learning_rate": 0.00019792254337849225, "loss": 11.6796, "step": 9343 }, { "epoch": 0.19559574646236288, "grad_norm": 0.25584739446640015, "learning_rate": 0.0001979220987649174, "loss": 11.6708, "step": 9344 }, { "epoch": 0.19561667922632506, "grad_norm": 0.2374214082956314, "learning_rate": 0.00019792165410426938, "loss": 11.6802, "step": 9345 }, { "epoch": 0.1956376119902872, "grad_norm": 0.2777079939842224, "learning_rate": 0.0001979212093965485, "loss": 11.6885, "step": 9346 }, { "epoch": 0.19565854475424935, "grad_norm": 0.24452078342437744, "learning_rate": 0.00019792076464175495, "loss": 11.6609, "step": 9347 }, { "epoch": 0.1956794775182115, "grad_norm": 0.2515469789505005, "learning_rate": 0.0001979203198398889, "loss": 11.6746, "step": 9348 }, { "epoch": 0.19570041028217366, "grad_norm": 0.22779342532157898, "learning_rate": 0.00019791987499095059, "loss": 11.6752, "step": 9349 }, { "epoch": 0.1957213430461358, "grad_norm": 0.2375556379556656, "learning_rate": 0.00019791943009494025, "loss": 11.6726, "step": 9350 }, { "epoch": 0.19574227581009795, "grad_norm": 0.22260166704654694, "learning_rate": 0.00019791898515185806, "loss": 11.6734, "step": 9351 }, { "epoch": 0.19576320857406013, "grad_norm": 0.2689514458179474, "learning_rate": 0.0001979185401617043, "loss": 11.682, "step": 9352 }, { "epoch": 0.19578414133802227, "grad_norm": 0.3384024500846863, "learning_rate": 0.00019791809512447907, "loss": 11.6999, "step": 9353 }, { "epoch": 0.19580507410198442, "grad_norm": 0.20597267150878906, "learning_rate": 0.00019791765004018266, "loss": 11.6673, "step": 9354 }, { "epoch": 0.1958260068659466, "grad_norm": 0.1950264424085617, "learning_rate": 0.0001979172049088153, "loss": 11.6707, "step": 9355 }, { "epoch": 0.19584693962990873, "grad_norm": 0.2470674365758896, "learning_rate": 0.00019791675973037718, "loss": 11.6832, "step": 9356 }, { "epoch": 0.19586787239387088, "grad_norm": 0.2376534640789032, "learning_rate": 0.00019791631450486849, "loss": 11.6832, "step": 9357 }, { "epoch": 0.19588880515783305, "grad_norm": 0.34114983677864075, "learning_rate": 0.00019791586923228945, "loss": 11.6968, "step": 9358 }, { "epoch": 0.1959097379217952, "grad_norm": 0.247531458735466, "learning_rate": 0.00019791542391264032, "loss": 11.6715, "step": 9359 }, { "epoch": 0.19593067068575734, "grad_norm": 0.21507073938846588, "learning_rate": 0.00019791497854592128, "loss": 11.6915, "step": 9360 }, { "epoch": 0.1959516034497195, "grad_norm": 0.3059726357460022, "learning_rate": 0.00019791453313213256, "loss": 11.6919, "step": 9361 }, { "epoch": 0.19597253621368166, "grad_norm": 0.22378544509410858, "learning_rate": 0.0001979140876712743, "loss": 11.6861, "step": 9362 }, { "epoch": 0.1959934689776438, "grad_norm": 0.2088208943605423, "learning_rate": 0.00019791364216334683, "loss": 11.6702, "step": 9363 }, { "epoch": 0.19601440174160595, "grad_norm": 0.27938327193260193, "learning_rate": 0.00019791319660835031, "loss": 11.6853, "step": 9364 }, { "epoch": 0.19603533450556812, "grad_norm": 0.2277214676141739, "learning_rate": 0.00019791275100628495, "loss": 11.6658, "step": 9365 }, { "epoch": 0.19605626726953027, "grad_norm": 0.30083975195884705, "learning_rate": 0.00019791230535715095, "loss": 11.6941, "step": 9366 }, { "epoch": 0.1960772000334924, "grad_norm": 0.22152361273765564, "learning_rate": 0.00019791185966094857, "loss": 11.6671, "step": 9367 }, { "epoch": 0.19609813279745458, "grad_norm": 0.21747837960720062, "learning_rate": 0.00019791141391767796, "loss": 11.6619, "step": 9368 }, { "epoch": 0.19611906556141673, "grad_norm": 0.21981000900268555, "learning_rate": 0.0001979109681273394, "loss": 11.6888, "step": 9369 }, { "epoch": 0.19613999832537887, "grad_norm": 0.3588848412036896, "learning_rate": 0.00019791052228993307, "loss": 11.6986, "step": 9370 }, { "epoch": 0.19616093108934105, "grad_norm": 0.3361019492149353, "learning_rate": 0.0001979100764054592, "loss": 11.6784, "step": 9371 }, { "epoch": 0.1961818638533032, "grad_norm": 0.28881072998046875, "learning_rate": 0.00019790963047391796, "loss": 11.6783, "step": 9372 }, { "epoch": 0.19620279661726533, "grad_norm": 0.22119207680225372, "learning_rate": 0.00019790918449530965, "loss": 11.6708, "step": 9373 }, { "epoch": 0.1962237293812275, "grad_norm": 0.24445344507694244, "learning_rate": 0.00019790873846963438, "loss": 11.6856, "step": 9374 }, { "epoch": 0.19624466214518965, "grad_norm": 0.2397838532924652, "learning_rate": 0.00019790829239689245, "loss": 11.6809, "step": 9375 }, { "epoch": 0.1962655949091518, "grad_norm": 0.2260706126689911, "learning_rate": 0.00019790784627708404, "loss": 11.6895, "step": 9376 }, { "epoch": 0.19628652767311397, "grad_norm": 0.2424813061952591, "learning_rate": 0.00019790740011020933, "loss": 11.6768, "step": 9377 }, { "epoch": 0.19630746043707611, "grad_norm": 0.2543542683124542, "learning_rate": 0.0001979069538962686, "loss": 11.6804, "step": 9378 }, { "epoch": 0.19632839320103826, "grad_norm": 0.2691535949707031, "learning_rate": 0.00019790650763526206, "loss": 11.6666, "step": 9379 }, { "epoch": 0.19634932596500043, "grad_norm": 0.27504584193229675, "learning_rate": 0.00019790606132718988, "loss": 11.6837, "step": 9380 }, { "epoch": 0.19637025872896258, "grad_norm": 0.2537047564983368, "learning_rate": 0.00019790561497205227, "loss": 11.6627, "step": 9381 }, { "epoch": 0.19639119149292472, "grad_norm": 1.146257996559143, "learning_rate": 0.0001979051685698495, "loss": 11.7104, "step": 9382 }, { "epoch": 0.19641212425688687, "grad_norm": 0.2415916919708252, "learning_rate": 0.00019790472212058172, "loss": 11.6742, "step": 9383 }, { "epoch": 0.19643305702084904, "grad_norm": 0.2997620403766632, "learning_rate": 0.0001979042756242492, "loss": 11.6952, "step": 9384 }, { "epoch": 0.19645398978481118, "grad_norm": 0.22060543298721313, "learning_rate": 0.00019790382908085215, "loss": 11.6648, "step": 9385 }, { "epoch": 0.19647492254877333, "grad_norm": 0.21552027761936188, "learning_rate": 0.00019790338249039074, "loss": 11.6766, "step": 9386 }, { "epoch": 0.1964958553127355, "grad_norm": 0.29369673132896423, "learning_rate": 0.00019790293585286522, "loss": 11.682, "step": 9387 }, { "epoch": 0.19651678807669765, "grad_norm": 0.3029271960258484, "learning_rate": 0.00019790248916827578, "loss": 11.6855, "step": 9388 }, { "epoch": 0.1965377208406598, "grad_norm": 0.2384745478630066, "learning_rate": 0.0001979020424366227, "loss": 11.6758, "step": 9389 }, { "epoch": 0.19655865360462196, "grad_norm": 0.2538190186023712, "learning_rate": 0.00019790159565790608, "loss": 11.6823, "step": 9390 }, { "epoch": 0.1965795863685841, "grad_norm": 0.19771236181259155, "learning_rate": 0.00019790114883212626, "loss": 11.6742, "step": 9391 }, { "epoch": 0.19660051913254625, "grad_norm": 0.2630857527256012, "learning_rate": 0.00019790070195928336, "loss": 11.6874, "step": 9392 }, { "epoch": 0.19662145189650843, "grad_norm": 0.2656620740890503, "learning_rate": 0.00019790025503937762, "loss": 11.6887, "step": 9393 }, { "epoch": 0.19664238466047057, "grad_norm": 0.22331824898719788, "learning_rate": 0.0001978998080724093, "loss": 11.6834, "step": 9394 }, { "epoch": 0.19666331742443272, "grad_norm": 0.2426372468471527, "learning_rate": 0.00019789936105837859, "loss": 11.6751, "step": 9395 }, { "epoch": 0.1966842501883949, "grad_norm": 0.24403822422027588, "learning_rate": 0.00019789891399728566, "loss": 11.675, "step": 9396 }, { "epoch": 0.19670518295235703, "grad_norm": 0.22292070090770721, "learning_rate": 0.00019789846688913077, "loss": 11.6852, "step": 9397 }, { "epoch": 0.19672611571631918, "grad_norm": 0.23380769789218903, "learning_rate": 0.00019789801973391416, "loss": 11.6702, "step": 9398 }, { "epoch": 0.19674704848028135, "grad_norm": 0.24948227405548096, "learning_rate": 0.00019789757253163597, "loss": 11.6754, "step": 9399 }, { "epoch": 0.1967679812442435, "grad_norm": 0.22876667976379395, "learning_rate": 0.00019789712528229644, "loss": 11.6792, "step": 9400 }, { "epoch": 0.19678891400820564, "grad_norm": 0.24773916602134705, "learning_rate": 0.0001978966779858958, "loss": 11.6798, "step": 9401 }, { "epoch": 0.19680984677216778, "grad_norm": 0.2493886649608612, "learning_rate": 0.00019789623064243433, "loss": 11.6786, "step": 9402 }, { "epoch": 0.19683077953612996, "grad_norm": 0.2463032603263855, "learning_rate": 0.0001978957832519121, "loss": 11.6897, "step": 9403 }, { "epoch": 0.1968517123000921, "grad_norm": 0.26989296078681946, "learning_rate": 0.00019789533581432948, "loss": 11.6764, "step": 9404 }, { "epoch": 0.19687264506405425, "grad_norm": 0.2670849859714508, "learning_rate": 0.00019789488832968656, "loss": 11.673, "step": 9405 }, { "epoch": 0.19689357782801642, "grad_norm": 0.2804618179798126, "learning_rate": 0.00019789444079798362, "loss": 11.6749, "step": 9406 }, { "epoch": 0.19691451059197856, "grad_norm": 0.25698181986808777, "learning_rate": 0.00019789399321922086, "loss": 11.6897, "step": 9407 }, { "epoch": 0.1969354433559407, "grad_norm": 0.2590201795101166, "learning_rate": 0.0001978935455933985, "loss": 11.7046, "step": 9408 }, { "epoch": 0.19695637611990288, "grad_norm": 0.24722152948379517, "learning_rate": 0.00019789309792051673, "loss": 11.6847, "step": 9409 }, { "epoch": 0.19697730888386503, "grad_norm": 0.23305866122245789, "learning_rate": 0.00019789265020057583, "loss": 11.6725, "step": 9410 }, { "epoch": 0.19699824164782717, "grad_norm": 0.23336218297481537, "learning_rate": 0.00019789220243357595, "loss": 11.6778, "step": 9411 }, { "epoch": 0.19701917441178934, "grad_norm": 0.22941435873508453, "learning_rate": 0.00019789175461951733, "loss": 11.697, "step": 9412 }, { "epoch": 0.1970401071757515, "grad_norm": 0.265440434217453, "learning_rate": 0.00019789130675840016, "loss": 11.6474, "step": 9413 }, { "epoch": 0.19706103993971363, "grad_norm": 0.26202842593193054, "learning_rate": 0.00019789085885022468, "loss": 11.6747, "step": 9414 }, { "epoch": 0.1970819727036758, "grad_norm": 0.21981407701969147, "learning_rate": 0.0001978904108949911, "loss": 11.69, "step": 9415 }, { "epoch": 0.19710290546763795, "grad_norm": 0.31253746151924133, "learning_rate": 0.00019788996289269966, "loss": 11.6745, "step": 9416 }, { "epoch": 0.1971238382316001, "grad_norm": 0.23551878333091736, "learning_rate": 0.00019788951484335054, "loss": 11.6728, "step": 9417 }, { "epoch": 0.19714477099556224, "grad_norm": 0.17857331037521362, "learning_rate": 0.00019788906674694397, "loss": 11.686, "step": 9418 }, { "epoch": 0.1971657037595244, "grad_norm": 0.22809557616710663, "learning_rate": 0.00019788861860348017, "loss": 11.6715, "step": 9419 }, { "epoch": 0.19718663652348656, "grad_norm": 0.21909832954406738, "learning_rate": 0.00019788817041295934, "loss": 11.6632, "step": 9420 }, { "epoch": 0.1972075692874487, "grad_norm": 0.27548161149024963, "learning_rate": 0.0001978877221753817, "loss": 11.6771, "step": 9421 }, { "epoch": 0.19722850205141088, "grad_norm": 0.27263039350509644, "learning_rate": 0.00019788727389074748, "loss": 11.687, "step": 9422 }, { "epoch": 0.19724943481537302, "grad_norm": 0.3111990690231323, "learning_rate": 0.0001978868255590569, "loss": 11.676, "step": 9423 }, { "epoch": 0.19727036757933517, "grad_norm": 0.32190847396850586, "learning_rate": 0.00019788637718031013, "loss": 11.6875, "step": 9424 }, { "epoch": 0.19729130034329734, "grad_norm": 0.290117472410202, "learning_rate": 0.00019788592875450743, "loss": 11.6913, "step": 9425 }, { "epoch": 0.19731223310725948, "grad_norm": 0.2485295683145523, "learning_rate": 0.00019788548028164903, "loss": 11.6733, "step": 9426 }, { "epoch": 0.19733316587122163, "grad_norm": 0.19490505754947662, "learning_rate": 0.00019788503176173508, "loss": 11.6893, "step": 9427 }, { "epoch": 0.1973540986351838, "grad_norm": 0.22723045945167542, "learning_rate": 0.00019788458319476584, "loss": 11.6768, "step": 9428 }, { "epoch": 0.19737503139914594, "grad_norm": 0.2163771241903305, "learning_rate": 0.00019788413458074152, "loss": 11.674, "step": 9429 }, { "epoch": 0.1973959641631081, "grad_norm": 0.2477235645055771, "learning_rate": 0.00019788368591966237, "loss": 11.6963, "step": 9430 }, { "epoch": 0.19741689692707026, "grad_norm": 0.23165664076805115, "learning_rate": 0.0001978832372115285, "loss": 11.6858, "step": 9431 }, { "epoch": 0.1974378296910324, "grad_norm": 0.25230708718299866, "learning_rate": 0.00019788278845634026, "loss": 11.6833, "step": 9432 }, { "epoch": 0.19745876245499455, "grad_norm": 0.22073166072368622, "learning_rate": 0.00019788233965409776, "loss": 11.6872, "step": 9433 }, { "epoch": 0.19747969521895672, "grad_norm": 0.23216953873634338, "learning_rate": 0.00019788189080480127, "loss": 11.6778, "step": 9434 }, { "epoch": 0.19750062798291887, "grad_norm": 0.3025539815425873, "learning_rate": 0.000197881441908451, "loss": 11.6889, "step": 9435 }, { "epoch": 0.19752156074688101, "grad_norm": 0.28306519985198975, "learning_rate": 0.00019788099296504718, "loss": 11.6786, "step": 9436 }, { "epoch": 0.19754249351084316, "grad_norm": 0.2733401358127594, "learning_rate": 0.00019788054397459, "loss": 11.6885, "step": 9437 }, { "epoch": 0.19756342627480533, "grad_norm": 0.2260993868112564, "learning_rate": 0.00019788009493707965, "loss": 11.6644, "step": 9438 }, { "epoch": 0.19758435903876748, "grad_norm": 0.26688477396965027, "learning_rate": 0.0001978796458525164, "loss": 11.684, "step": 9439 }, { "epoch": 0.19760529180272962, "grad_norm": 0.27313777804374695, "learning_rate": 0.00019787919672090043, "loss": 11.697, "step": 9440 }, { "epoch": 0.1976262245666918, "grad_norm": 0.27318912744522095, "learning_rate": 0.000197878747542232, "loss": 11.6665, "step": 9441 }, { "epoch": 0.19764715733065394, "grad_norm": 0.2089090794324875, "learning_rate": 0.00019787829831651124, "loss": 11.6698, "step": 9442 }, { "epoch": 0.19766809009461608, "grad_norm": 0.23357699811458588, "learning_rate": 0.00019787784904373848, "loss": 11.672, "step": 9443 }, { "epoch": 0.19768902285857826, "grad_norm": 0.2964670956134796, "learning_rate": 0.00019787739972391383, "loss": 11.6765, "step": 9444 }, { "epoch": 0.1977099556225404, "grad_norm": 0.2745245397090912, "learning_rate": 0.00019787695035703755, "loss": 11.6604, "step": 9445 }, { "epoch": 0.19773088838650255, "grad_norm": 0.29378679394721985, "learning_rate": 0.00019787650094310987, "loss": 11.6872, "step": 9446 }, { "epoch": 0.19775182115046472, "grad_norm": 0.22549960017204285, "learning_rate": 0.00019787605148213102, "loss": 11.6661, "step": 9447 }, { "epoch": 0.19777275391442686, "grad_norm": 0.21994785964488983, "learning_rate": 0.00019787560197410118, "loss": 11.6588, "step": 9448 }, { "epoch": 0.197793686678389, "grad_norm": 0.24572019279003143, "learning_rate": 0.00019787515241902055, "loss": 11.653, "step": 9449 }, { "epoch": 0.19781461944235118, "grad_norm": 0.22053281962871552, "learning_rate": 0.0001978747028168894, "loss": 11.673, "step": 9450 }, { "epoch": 0.19783555220631333, "grad_norm": 0.2691057622432709, "learning_rate": 0.00019787425316770792, "loss": 11.6703, "step": 9451 }, { "epoch": 0.19785648497027547, "grad_norm": 0.23824116587638855, "learning_rate": 0.0001978738034714763, "loss": 11.6797, "step": 9452 }, { "epoch": 0.19787741773423762, "grad_norm": 0.2911677658557892, "learning_rate": 0.0001978733537281948, "loss": 11.6737, "step": 9453 }, { "epoch": 0.1978983504981998, "grad_norm": 0.33449944853782654, "learning_rate": 0.0001978729039378636, "loss": 11.6808, "step": 9454 }, { "epoch": 0.19791928326216193, "grad_norm": 0.22643792629241943, "learning_rate": 0.00019787245410048297, "loss": 11.6692, "step": 9455 }, { "epoch": 0.19794021602612408, "grad_norm": 0.2626595199108124, "learning_rate": 0.00019787200421605308, "loss": 11.6579, "step": 9456 }, { "epoch": 0.19796114879008625, "grad_norm": 0.27211257815361023, "learning_rate": 0.00019787155428457412, "loss": 11.6749, "step": 9457 }, { "epoch": 0.1979820815540484, "grad_norm": 0.20037220418453217, "learning_rate": 0.00019787110430604637, "loss": 11.6792, "step": 9458 }, { "epoch": 0.19800301431801054, "grad_norm": 0.23151864111423492, "learning_rate": 0.00019787065428047003, "loss": 11.6687, "step": 9459 }, { "epoch": 0.1980239470819727, "grad_norm": 0.26845046877861023, "learning_rate": 0.00019787020420784527, "loss": 11.6609, "step": 9460 }, { "epoch": 0.19804487984593486, "grad_norm": 0.29711300134658813, "learning_rate": 0.0001978697540881724, "loss": 11.6842, "step": 9461 }, { "epoch": 0.198065812609897, "grad_norm": 0.2737075686454773, "learning_rate": 0.00019786930392145152, "loss": 11.6687, "step": 9462 }, { "epoch": 0.19808674537385917, "grad_norm": 0.23565170168876648, "learning_rate": 0.00019786885370768294, "loss": 11.6868, "step": 9463 }, { "epoch": 0.19810767813782132, "grad_norm": 0.218616783618927, "learning_rate": 0.00019786840344686679, "loss": 11.6781, "step": 9464 }, { "epoch": 0.19812861090178346, "grad_norm": 0.2789084315299988, "learning_rate": 0.0001978679531390034, "loss": 11.6971, "step": 9465 }, { "epoch": 0.19814954366574564, "grad_norm": 0.28353986144065857, "learning_rate": 0.00019786750278409288, "loss": 11.6876, "step": 9466 }, { "epoch": 0.19817047642970778, "grad_norm": 0.31334248185157776, "learning_rate": 0.0001978670523821355, "loss": 11.6883, "step": 9467 }, { "epoch": 0.19819140919366993, "grad_norm": 0.272340327501297, "learning_rate": 0.00019786660193313148, "loss": 11.6886, "step": 9468 }, { "epoch": 0.1982123419576321, "grad_norm": 0.28227922320365906, "learning_rate": 0.00019786615143708102, "loss": 11.67, "step": 9469 }, { "epoch": 0.19823327472159424, "grad_norm": 0.29310399293899536, "learning_rate": 0.00019786570089398432, "loss": 11.6846, "step": 9470 }, { "epoch": 0.1982542074855564, "grad_norm": 0.258522093296051, "learning_rate": 0.00019786525030384165, "loss": 11.6983, "step": 9471 }, { "epoch": 0.19827514024951853, "grad_norm": 0.2670951187610626, "learning_rate": 0.00019786479966665317, "loss": 11.6868, "step": 9472 }, { "epoch": 0.1982960730134807, "grad_norm": 0.23975573480129242, "learning_rate": 0.0001978643489824191, "loss": 11.6816, "step": 9473 }, { "epoch": 0.19831700577744285, "grad_norm": 0.21214741468429565, "learning_rate": 0.0001978638982511397, "loss": 11.6848, "step": 9474 }, { "epoch": 0.198337938541405, "grad_norm": 0.23846089839935303, "learning_rate": 0.00019786344747281518, "loss": 11.6788, "step": 9475 }, { "epoch": 0.19835887130536717, "grad_norm": 0.23468950390815735, "learning_rate": 0.0001978629966474457, "loss": 11.6805, "step": 9476 }, { "epoch": 0.1983798040693293, "grad_norm": 0.24951224029064178, "learning_rate": 0.00019786254577503157, "loss": 11.6713, "step": 9477 }, { "epoch": 0.19840073683329146, "grad_norm": 0.3300996422767639, "learning_rate": 0.0001978620948555729, "loss": 11.693, "step": 9478 }, { "epoch": 0.19842166959725363, "grad_norm": 0.22373490035533905, "learning_rate": 0.00019786164388907001, "loss": 11.6714, "step": 9479 }, { "epoch": 0.19844260236121578, "grad_norm": 0.2826727628707886, "learning_rate": 0.00019786119287552303, "loss": 11.6702, "step": 9480 }, { "epoch": 0.19846353512517792, "grad_norm": 0.25323957204818726, "learning_rate": 0.00019786074181493221, "loss": 11.6746, "step": 9481 }, { "epoch": 0.1984844678891401, "grad_norm": 0.29130733013153076, "learning_rate": 0.00019786029070729779, "loss": 11.6817, "step": 9482 }, { "epoch": 0.19850540065310224, "grad_norm": 0.24129709601402283, "learning_rate": 0.00019785983955261996, "loss": 11.6808, "step": 9483 }, { "epoch": 0.19852633341706438, "grad_norm": 0.2514372766017914, "learning_rate": 0.00019785938835089896, "loss": 11.6699, "step": 9484 }, { "epoch": 0.19854726618102655, "grad_norm": 0.2511741518974304, "learning_rate": 0.00019785893710213496, "loss": 11.6745, "step": 9485 }, { "epoch": 0.1985681989449887, "grad_norm": 0.24121223390102386, "learning_rate": 0.00019785848580632822, "loss": 11.672, "step": 9486 }, { "epoch": 0.19858913170895084, "grad_norm": 0.24229252338409424, "learning_rate": 0.00019785803446347895, "loss": 11.6806, "step": 9487 }, { "epoch": 0.19861006447291302, "grad_norm": 0.2657470703125, "learning_rate": 0.00019785758307358734, "loss": 11.6787, "step": 9488 }, { "epoch": 0.19863099723687516, "grad_norm": 0.2372060865163803, "learning_rate": 0.00019785713163665366, "loss": 11.6686, "step": 9489 }, { "epoch": 0.1986519300008373, "grad_norm": 0.2115606814622879, "learning_rate": 0.0001978566801526781, "loss": 11.6898, "step": 9490 }, { "epoch": 0.19867286276479945, "grad_norm": 0.29121389985084534, "learning_rate": 0.00019785622862166085, "loss": 11.6917, "step": 9491 }, { "epoch": 0.19869379552876162, "grad_norm": 0.28240418434143066, "learning_rate": 0.00019785577704360213, "loss": 11.6842, "step": 9492 }, { "epoch": 0.19871472829272377, "grad_norm": 0.26547402143478394, "learning_rate": 0.0001978553254185022, "loss": 11.673, "step": 9493 }, { "epoch": 0.1987356610566859, "grad_norm": 0.28100401163101196, "learning_rate": 0.00019785487374636128, "loss": 11.676, "step": 9494 }, { "epoch": 0.1987565938206481, "grad_norm": 0.2255326211452484, "learning_rate": 0.00019785442202717955, "loss": 11.6733, "step": 9495 }, { "epoch": 0.19877752658461023, "grad_norm": 0.22130820155143738, "learning_rate": 0.0001978539702609572, "loss": 11.6792, "step": 9496 }, { "epoch": 0.19879845934857238, "grad_norm": 0.222776398062706, "learning_rate": 0.00019785351844769453, "loss": 11.6713, "step": 9497 }, { "epoch": 0.19881939211253455, "grad_norm": 0.27071666717529297, "learning_rate": 0.00019785306658739167, "loss": 11.6927, "step": 9498 }, { "epoch": 0.1988403248764967, "grad_norm": 0.2494169920682907, "learning_rate": 0.00019785261468004893, "loss": 11.6752, "step": 9499 }, { "epoch": 0.19886125764045884, "grad_norm": 0.22827424108982086, "learning_rate": 0.00019785216272566643, "loss": 11.6732, "step": 9500 }, { "epoch": 0.198882190404421, "grad_norm": 0.31936362385749817, "learning_rate": 0.00019785171072424446, "loss": 11.6704, "step": 9501 }, { "epoch": 0.19890312316838316, "grad_norm": 0.25669851899147034, "learning_rate": 0.0001978512586757832, "loss": 11.6722, "step": 9502 }, { "epoch": 0.1989240559323453, "grad_norm": 0.28624773025512695, "learning_rate": 0.0001978508065802829, "loss": 11.6817, "step": 9503 }, { "epoch": 0.19894498869630747, "grad_norm": 0.20063002407550812, "learning_rate": 0.00019785035443774373, "loss": 11.6769, "step": 9504 }, { "epoch": 0.19896592146026962, "grad_norm": 0.22825020551681519, "learning_rate": 0.00019784990224816596, "loss": 11.6777, "step": 9505 }, { "epoch": 0.19898685422423176, "grad_norm": 0.30303123593330383, "learning_rate": 0.00019784945001154975, "loss": 11.6905, "step": 9506 }, { "epoch": 0.1990077869881939, "grad_norm": 0.264808714389801, "learning_rate": 0.00019784899772789537, "loss": 11.6784, "step": 9507 }, { "epoch": 0.19902871975215608, "grad_norm": 0.24323031306266785, "learning_rate": 0.00019784854539720302, "loss": 11.6833, "step": 9508 }, { "epoch": 0.19904965251611823, "grad_norm": 0.31050530076026917, "learning_rate": 0.0001978480930194729, "loss": 11.698, "step": 9509 }, { "epoch": 0.19907058528008037, "grad_norm": 0.2763565182685852, "learning_rate": 0.00019784764059470526, "loss": 11.6739, "step": 9510 }, { "epoch": 0.19909151804404254, "grad_norm": 0.23641207814216614, "learning_rate": 0.00019784718812290025, "loss": 11.6724, "step": 9511 }, { "epoch": 0.1991124508080047, "grad_norm": 0.2495397925376892, "learning_rate": 0.00019784673560405816, "loss": 11.6802, "step": 9512 }, { "epoch": 0.19913338357196683, "grad_norm": 0.27251261472702026, "learning_rate": 0.00019784628303817921, "loss": 11.6788, "step": 9513 }, { "epoch": 0.199154316335929, "grad_norm": 0.2668011784553528, "learning_rate": 0.0001978458304252636, "loss": 11.6753, "step": 9514 }, { "epoch": 0.19917524909989115, "grad_norm": 0.27810442447662354, "learning_rate": 0.00019784537776531148, "loss": 11.6839, "step": 9515 }, { "epoch": 0.1991961818638533, "grad_norm": 0.2476710081100464, "learning_rate": 0.00019784492505832318, "loss": 11.6731, "step": 9516 }, { "epoch": 0.19921711462781547, "grad_norm": 0.24072273075580597, "learning_rate": 0.00019784447230429883, "loss": 11.6878, "step": 9517 }, { "epoch": 0.1992380473917776, "grad_norm": 0.3990265130996704, "learning_rate": 0.00019784401950323873, "loss": 11.6963, "step": 9518 }, { "epoch": 0.19925898015573976, "grad_norm": 0.3142269551753998, "learning_rate": 0.00019784356665514298, "loss": 11.6868, "step": 9519 }, { "epoch": 0.19927991291970193, "grad_norm": 0.22939473390579224, "learning_rate": 0.0001978431137600119, "loss": 11.6821, "step": 9520 }, { "epoch": 0.19930084568366407, "grad_norm": 0.2844809293746948, "learning_rate": 0.00019784266081784567, "loss": 11.6868, "step": 9521 }, { "epoch": 0.19932177844762622, "grad_norm": 0.21383865177631378, "learning_rate": 0.0001978422078286445, "loss": 11.6793, "step": 9522 }, { "epoch": 0.1993427112115884, "grad_norm": 0.23712842166423798, "learning_rate": 0.00019784175479240865, "loss": 11.6781, "step": 9523 }, { "epoch": 0.19936364397555054, "grad_norm": 0.21210302412509918, "learning_rate": 0.0001978413017091383, "loss": 11.6767, "step": 9524 }, { "epoch": 0.19938457673951268, "grad_norm": 0.20618034899234772, "learning_rate": 0.00019784084857883368, "loss": 11.6791, "step": 9525 }, { "epoch": 0.19940550950347483, "grad_norm": 0.20497533679008484, "learning_rate": 0.00019784039540149498, "loss": 11.6839, "step": 9526 }, { "epoch": 0.199426442267437, "grad_norm": 0.281579852104187, "learning_rate": 0.00019783994217712248, "loss": 11.6816, "step": 9527 }, { "epoch": 0.19944737503139914, "grad_norm": 0.29094359278678894, "learning_rate": 0.00019783948890571631, "loss": 11.6669, "step": 9528 }, { "epoch": 0.1994683077953613, "grad_norm": 0.21298684179782867, "learning_rate": 0.00019783903558727675, "loss": 11.6759, "step": 9529 }, { "epoch": 0.19948924055932346, "grad_norm": 0.2004716843366623, "learning_rate": 0.00019783858222180404, "loss": 11.6805, "step": 9530 }, { "epoch": 0.1995101733232856, "grad_norm": 0.24099235236644745, "learning_rate": 0.0001978381288092983, "loss": 11.6834, "step": 9531 }, { "epoch": 0.19953110608724775, "grad_norm": 0.32153594493865967, "learning_rate": 0.00019783767534975986, "loss": 11.671, "step": 9532 }, { "epoch": 0.19955203885120992, "grad_norm": 0.2850547730922699, "learning_rate": 0.0001978372218431889, "loss": 11.6973, "step": 9533 }, { "epoch": 0.19957297161517207, "grad_norm": 0.27536994218826294, "learning_rate": 0.0001978367682895856, "loss": 11.6848, "step": 9534 }, { "epoch": 0.1995939043791342, "grad_norm": 0.2476358860731125, "learning_rate": 0.0001978363146889502, "loss": 11.6641, "step": 9535 }, { "epoch": 0.19961483714309639, "grad_norm": 0.24546504020690918, "learning_rate": 0.00019783586104128294, "loss": 11.6842, "step": 9536 }, { "epoch": 0.19963576990705853, "grad_norm": 0.20736174285411835, "learning_rate": 0.000197835407346584, "loss": 11.6725, "step": 9537 }, { "epoch": 0.19965670267102067, "grad_norm": 0.2275128811597824, "learning_rate": 0.00019783495360485364, "loss": 11.677, "step": 9538 }, { "epoch": 0.19967763543498285, "grad_norm": 0.24250885844230652, "learning_rate": 0.00019783449981609206, "loss": 11.6816, "step": 9539 }, { "epoch": 0.199698568198945, "grad_norm": 0.2134741246700287, "learning_rate": 0.00019783404598029943, "loss": 11.6807, "step": 9540 }, { "epoch": 0.19971950096290714, "grad_norm": 0.25967326760292053, "learning_rate": 0.00019783359209747604, "loss": 11.6915, "step": 9541 }, { "epoch": 0.19974043372686928, "grad_norm": 0.21944750845432281, "learning_rate": 0.00019783313816762213, "loss": 11.6745, "step": 9542 }, { "epoch": 0.19976136649083145, "grad_norm": 0.20164838433265686, "learning_rate": 0.0001978326841907378, "loss": 11.6769, "step": 9543 }, { "epoch": 0.1997822992547936, "grad_norm": 0.2521476745605469, "learning_rate": 0.00019783223016682337, "loss": 11.6762, "step": 9544 }, { "epoch": 0.19980323201875574, "grad_norm": 0.24263721704483032, "learning_rate": 0.00019783177609587903, "loss": 11.6755, "step": 9545 }, { "epoch": 0.19982416478271792, "grad_norm": 0.20431984961032867, "learning_rate": 0.00019783132197790496, "loss": 11.6604, "step": 9546 }, { "epoch": 0.19984509754668006, "grad_norm": 0.30460435152053833, "learning_rate": 0.00019783086781290145, "loss": 11.686, "step": 9547 }, { "epoch": 0.1998660303106422, "grad_norm": 0.25635480880737305, "learning_rate": 0.0001978304136008687, "loss": 11.6661, "step": 9548 }, { "epoch": 0.19988696307460438, "grad_norm": 0.20450422167778015, "learning_rate": 0.00019782995934180687, "loss": 11.6878, "step": 9549 }, { "epoch": 0.19990789583856652, "grad_norm": 0.40046167373657227, "learning_rate": 0.00019782950503571622, "loss": 11.6731, "step": 9550 }, { "epoch": 0.19992882860252867, "grad_norm": 0.2628501355648041, "learning_rate": 0.000197829050682597, "loss": 11.6833, "step": 9551 }, { "epoch": 0.19994976136649084, "grad_norm": 0.2254222333431244, "learning_rate": 0.00019782859628244935, "loss": 11.6882, "step": 9552 }, { "epoch": 0.199970694130453, "grad_norm": 0.22139370441436768, "learning_rate": 0.00019782814183527355, "loss": 11.6801, "step": 9553 }, { "epoch": 0.19999162689441513, "grad_norm": 0.26854369044303894, "learning_rate": 0.00019782768734106983, "loss": 11.6589, "step": 9554 }, { "epoch": 0.2000125596583773, "grad_norm": 0.24357864260673523, "learning_rate": 0.00019782723279983833, "loss": 11.6839, "step": 9555 }, { "epoch": 0.20003349242233945, "grad_norm": 0.37562817335128784, "learning_rate": 0.00019782677821157936, "loss": 11.6801, "step": 9556 }, { "epoch": 0.2000544251863016, "grad_norm": 0.2457461655139923, "learning_rate": 0.00019782632357629308, "loss": 11.6737, "step": 9557 }, { "epoch": 0.20007535795026377, "grad_norm": 0.23816798627376556, "learning_rate": 0.0001978258688939797, "loss": 11.6806, "step": 9558 }, { "epoch": 0.2000962907142259, "grad_norm": 0.24029693007469177, "learning_rate": 0.0001978254141646395, "loss": 11.6785, "step": 9559 }, { "epoch": 0.20011722347818806, "grad_norm": 0.25540974736213684, "learning_rate": 0.00019782495938827266, "loss": 11.6553, "step": 9560 }, { "epoch": 0.2001381562421502, "grad_norm": 0.3040260374546051, "learning_rate": 0.0001978245045648794, "loss": 11.6747, "step": 9561 }, { "epoch": 0.20015908900611237, "grad_norm": 0.27418941259384155, "learning_rate": 0.00019782404969445993, "loss": 11.6766, "step": 9562 }, { "epoch": 0.20018002177007452, "grad_norm": 0.2795228064060211, "learning_rate": 0.0001978235947770145, "loss": 11.672, "step": 9563 }, { "epoch": 0.20020095453403666, "grad_norm": 0.3101537823677063, "learning_rate": 0.0001978231398125433, "loss": 11.6723, "step": 9564 }, { "epoch": 0.20022188729799884, "grad_norm": 0.2716124653816223, "learning_rate": 0.00019782268480104652, "loss": 11.6677, "step": 9565 }, { "epoch": 0.20024282006196098, "grad_norm": 0.26910439133644104, "learning_rate": 0.00019782222974252446, "loss": 11.6859, "step": 9566 }, { "epoch": 0.20026375282592312, "grad_norm": 0.24651353061199188, "learning_rate": 0.00019782177463697727, "loss": 11.6743, "step": 9567 }, { "epoch": 0.2002846855898853, "grad_norm": 0.23152752220630646, "learning_rate": 0.00019782131948440523, "loss": 11.6762, "step": 9568 }, { "epoch": 0.20030561835384744, "grad_norm": 0.27968668937683105, "learning_rate": 0.0001978208642848085, "loss": 11.6688, "step": 9569 }, { "epoch": 0.2003265511178096, "grad_norm": 0.2978087365627289, "learning_rate": 0.0001978204090381873, "loss": 11.6826, "step": 9570 }, { "epoch": 0.20034748388177176, "grad_norm": 0.2746131420135498, "learning_rate": 0.00019781995374454188, "loss": 11.6736, "step": 9571 }, { "epoch": 0.2003684166457339, "grad_norm": 0.2238207757472992, "learning_rate": 0.00019781949840387244, "loss": 11.6743, "step": 9572 }, { "epoch": 0.20038934940969605, "grad_norm": 0.24775627255439758, "learning_rate": 0.00019781904301617923, "loss": 11.6839, "step": 9573 }, { "epoch": 0.20041028217365822, "grad_norm": 0.223234623670578, "learning_rate": 0.00019781858758146243, "loss": 11.6713, "step": 9574 }, { "epoch": 0.20043121493762037, "grad_norm": 0.33964526653289795, "learning_rate": 0.00019781813209972227, "loss": 11.6944, "step": 9575 }, { "epoch": 0.2004521477015825, "grad_norm": 0.23412847518920898, "learning_rate": 0.000197817676570959, "loss": 11.6749, "step": 9576 }, { "epoch": 0.20047308046554468, "grad_norm": 0.29632046818733215, "learning_rate": 0.00019781722099517277, "loss": 11.6848, "step": 9577 }, { "epoch": 0.20049401322950683, "grad_norm": 0.2663611173629761, "learning_rate": 0.00019781676537236387, "loss": 11.6787, "step": 9578 }, { "epoch": 0.20051494599346897, "grad_norm": 0.24221079051494598, "learning_rate": 0.00019781630970253246, "loss": 11.6855, "step": 9579 }, { "epoch": 0.20053587875743112, "grad_norm": 0.2942267060279846, "learning_rate": 0.00019781585398567883, "loss": 11.6946, "step": 9580 }, { "epoch": 0.2005568115213933, "grad_norm": 0.273363322019577, "learning_rate": 0.00019781539822180314, "loss": 11.6875, "step": 9581 }, { "epoch": 0.20057774428535544, "grad_norm": 0.2865696847438812, "learning_rate": 0.00019781494241090565, "loss": 11.6756, "step": 9582 }, { "epoch": 0.20059867704931758, "grad_norm": 0.19525349140167236, "learning_rate": 0.0001978144865529865, "loss": 11.6718, "step": 9583 }, { "epoch": 0.20061960981327975, "grad_norm": 0.23443153500556946, "learning_rate": 0.00019781403064804605, "loss": 11.6583, "step": 9584 }, { "epoch": 0.2006405425772419, "grad_norm": 0.280615895986557, "learning_rate": 0.00019781357469608437, "loss": 11.6786, "step": 9585 }, { "epoch": 0.20066147534120404, "grad_norm": 0.2807461619377136, "learning_rate": 0.00019781311869710176, "loss": 11.6836, "step": 9586 }, { "epoch": 0.20068240810516622, "grad_norm": 0.20373542606830597, "learning_rate": 0.00019781266265109844, "loss": 11.6623, "step": 9587 }, { "epoch": 0.20070334086912836, "grad_norm": 0.25138339400291443, "learning_rate": 0.00019781220655807458, "loss": 11.6645, "step": 9588 }, { "epoch": 0.2007242736330905, "grad_norm": 0.22673815488815308, "learning_rate": 0.00019781175041803047, "loss": 11.6522, "step": 9589 }, { "epoch": 0.20074520639705268, "grad_norm": 0.2533838152885437, "learning_rate": 0.00019781129423096627, "loss": 11.6704, "step": 9590 }, { "epoch": 0.20076613916101482, "grad_norm": 0.23936067521572113, "learning_rate": 0.00019781083799688223, "loss": 11.6946, "step": 9591 }, { "epoch": 0.20078707192497697, "grad_norm": 0.22428537905216217, "learning_rate": 0.00019781038171577854, "loss": 11.6733, "step": 9592 }, { "epoch": 0.20080800468893914, "grad_norm": 0.23663461208343506, "learning_rate": 0.00019780992538765546, "loss": 11.6875, "step": 9593 }, { "epoch": 0.20082893745290128, "grad_norm": 0.24669824540615082, "learning_rate": 0.00019780946901251317, "loss": 11.662, "step": 9594 }, { "epoch": 0.20084987021686343, "grad_norm": 0.25121408700942993, "learning_rate": 0.00019780901259035193, "loss": 11.6677, "step": 9595 }, { "epoch": 0.20087080298082557, "grad_norm": 0.23731179535388947, "learning_rate": 0.00019780855612117193, "loss": 11.6769, "step": 9596 }, { "epoch": 0.20089173574478775, "grad_norm": 0.20804381370544434, "learning_rate": 0.00019780809960497339, "loss": 11.6914, "step": 9597 }, { "epoch": 0.2009126685087499, "grad_norm": 0.22656330466270447, "learning_rate": 0.00019780764304175655, "loss": 11.6787, "step": 9598 }, { "epoch": 0.20093360127271204, "grad_norm": 0.22847701609134674, "learning_rate": 0.0001978071864315216, "loss": 11.6832, "step": 9599 }, { "epoch": 0.2009545340366742, "grad_norm": 0.2431049644947052, "learning_rate": 0.0001978067297742688, "loss": 11.6948, "step": 9600 }, { "epoch": 0.20097546680063635, "grad_norm": 0.2083570510149002, "learning_rate": 0.00019780627306999833, "loss": 11.6875, "step": 9601 }, { "epoch": 0.2009963995645985, "grad_norm": 0.27046552300453186, "learning_rate": 0.00019780581631871042, "loss": 11.6562, "step": 9602 }, { "epoch": 0.20101733232856067, "grad_norm": 0.2298838198184967, "learning_rate": 0.0001978053595204053, "loss": 11.6853, "step": 9603 }, { "epoch": 0.20103826509252282, "grad_norm": 0.23001472651958466, "learning_rate": 0.0001978049026750832, "loss": 11.6614, "step": 9604 }, { "epoch": 0.20105919785648496, "grad_norm": 0.262529194355011, "learning_rate": 0.0001978044457827443, "loss": 11.6826, "step": 9605 }, { "epoch": 0.20108013062044713, "grad_norm": 0.26683634519577026, "learning_rate": 0.00019780398884338882, "loss": 11.6725, "step": 9606 }, { "epoch": 0.20110106338440928, "grad_norm": 0.29273316264152527, "learning_rate": 0.000197803531857017, "loss": 11.6809, "step": 9607 }, { "epoch": 0.20112199614837142, "grad_norm": 0.2070227414369583, "learning_rate": 0.00019780307482362912, "loss": 11.6738, "step": 9608 }, { "epoch": 0.2011429289123336, "grad_norm": 0.2543587386608124, "learning_rate": 0.0001978026177432253, "loss": 11.6874, "step": 9609 }, { "epoch": 0.20116386167629574, "grad_norm": 0.24850821495056152, "learning_rate": 0.0001978021606158058, "loss": 11.6936, "step": 9610 }, { "epoch": 0.20118479444025789, "grad_norm": 0.2668728828430176, "learning_rate": 0.00019780170344137088, "loss": 11.6587, "step": 9611 }, { "epoch": 0.20120572720422006, "grad_norm": 0.20491105318069458, "learning_rate": 0.0001978012462199207, "loss": 11.693, "step": 9612 }, { "epoch": 0.2012266599681822, "grad_norm": 0.3262689709663391, "learning_rate": 0.00019780078895145547, "loss": 11.6969, "step": 9613 }, { "epoch": 0.20124759273214435, "grad_norm": 0.22509750723838806, "learning_rate": 0.0001978003316359755, "loss": 11.6719, "step": 9614 }, { "epoch": 0.2012685254961065, "grad_norm": 0.2355249673128128, "learning_rate": 0.00019779987427348092, "loss": 11.6815, "step": 9615 }, { "epoch": 0.20128945826006867, "grad_norm": 0.27638116478919983, "learning_rate": 0.000197799416863972, "loss": 11.6655, "step": 9616 }, { "epoch": 0.2013103910240308, "grad_norm": 0.20699664950370789, "learning_rate": 0.0001977989594074489, "loss": 11.6814, "step": 9617 }, { "epoch": 0.20133132378799296, "grad_norm": 0.2588582932949066, "learning_rate": 0.00019779850190391188, "loss": 11.6902, "step": 9618 }, { "epoch": 0.20135225655195513, "grad_norm": 0.2518020272254944, "learning_rate": 0.0001977980443533612, "loss": 11.6786, "step": 9619 }, { "epoch": 0.20137318931591727, "grad_norm": 0.2959787845611572, "learning_rate": 0.00019779758675579702, "loss": 11.6773, "step": 9620 }, { "epoch": 0.20139412207987942, "grad_norm": 0.2541501820087433, "learning_rate": 0.0001977971291112196, "loss": 11.6975, "step": 9621 }, { "epoch": 0.2014150548438416, "grad_norm": 0.21319863200187683, "learning_rate": 0.0001977966714196291, "loss": 11.6861, "step": 9622 }, { "epoch": 0.20143598760780373, "grad_norm": 0.2958364188671112, "learning_rate": 0.0001977962136810258, "loss": 11.6826, "step": 9623 }, { "epoch": 0.20145692037176588, "grad_norm": 0.21849879622459412, "learning_rate": 0.00019779575589540992, "loss": 11.6839, "step": 9624 }, { "epoch": 0.20147785313572805, "grad_norm": 0.27106714248657227, "learning_rate": 0.00019779529806278164, "loss": 11.6935, "step": 9625 }, { "epoch": 0.2014987858996902, "grad_norm": 0.227918341755867, "learning_rate": 0.00019779484018314118, "loss": 11.6798, "step": 9626 }, { "epoch": 0.20151971866365234, "grad_norm": 0.21879039704799652, "learning_rate": 0.0001977943822564888, "loss": 11.6867, "step": 9627 }, { "epoch": 0.20154065142761451, "grad_norm": 0.2807095944881439, "learning_rate": 0.0001977939242828247, "loss": 11.6697, "step": 9628 }, { "epoch": 0.20156158419157666, "grad_norm": 0.22657373547554016, "learning_rate": 0.00019779346626214912, "loss": 11.6743, "step": 9629 }, { "epoch": 0.2015825169555388, "grad_norm": 0.23327507078647614, "learning_rate": 0.00019779300819446226, "loss": 11.6792, "step": 9630 }, { "epoch": 0.20160344971950098, "grad_norm": 0.22314836084842682, "learning_rate": 0.0001977925500797643, "loss": 11.6734, "step": 9631 }, { "epoch": 0.20162438248346312, "grad_norm": 0.2247142195701599, "learning_rate": 0.00019779209191805554, "loss": 11.6923, "step": 9632 }, { "epoch": 0.20164531524742527, "grad_norm": 0.30047768354415894, "learning_rate": 0.00019779163370933615, "loss": 11.6738, "step": 9633 }, { "epoch": 0.2016662480113874, "grad_norm": 0.19717112183570862, "learning_rate": 0.00019779117545360637, "loss": 11.6774, "step": 9634 }, { "epoch": 0.20168718077534958, "grad_norm": 0.20369397103786469, "learning_rate": 0.00019779071715086638, "loss": 11.6902, "step": 9635 }, { "epoch": 0.20170811353931173, "grad_norm": 0.28465721011161804, "learning_rate": 0.00019779025880111646, "loss": 11.672, "step": 9636 }, { "epoch": 0.20172904630327387, "grad_norm": 0.23260724544525146, "learning_rate": 0.0001977898004043568, "loss": 11.6795, "step": 9637 }, { "epoch": 0.20174997906723605, "grad_norm": 0.26690948009490967, "learning_rate": 0.0001977893419605876, "loss": 11.6691, "step": 9638 }, { "epoch": 0.2017709118311982, "grad_norm": 0.21761083602905273, "learning_rate": 0.00019778888346980918, "loss": 11.6783, "step": 9639 }, { "epoch": 0.20179184459516034, "grad_norm": 0.21547046303749084, "learning_rate": 0.0001977884249320216, "loss": 11.6801, "step": 9640 }, { "epoch": 0.2018127773591225, "grad_norm": 0.24326656758785248, "learning_rate": 0.00019778796634722522, "loss": 11.6765, "step": 9641 }, { "epoch": 0.20183371012308465, "grad_norm": 0.2363099455833435, "learning_rate": 0.00019778750771542016, "loss": 11.6767, "step": 9642 }, { "epoch": 0.2018546428870468, "grad_norm": 0.278789758682251, "learning_rate": 0.00019778704903660672, "loss": 11.6695, "step": 9643 }, { "epoch": 0.20187557565100897, "grad_norm": 0.25859349966049194, "learning_rate": 0.00019778659031078507, "loss": 11.6721, "step": 9644 }, { "epoch": 0.20189650841497112, "grad_norm": 0.2112036943435669, "learning_rate": 0.00019778613153795545, "loss": 11.6708, "step": 9645 }, { "epoch": 0.20191744117893326, "grad_norm": 0.23168130218982697, "learning_rate": 0.00019778567271811805, "loss": 11.6869, "step": 9646 }, { "epoch": 0.20193837394289543, "grad_norm": 0.21546445786952972, "learning_rate": 0.00019778521385127316, "loss": 11.6876, "step": 9647 }, { "epoch": 0.20195930670685758, "grad_norm": 0.2503458559513092, "learning_rate": 0.00019778475493742093, "loss": 11.6662, "step": 9648 }, { "epoch": 0.20198023947081972, "grad_norm": 0.2714870870113373, "learning_rate": 0.00019778429597656162, "loss": 11.686, "step": 9649 }, { "epoch": 0.20200117223478187, "grad_norm": 0.3330462574958801, "learning_rate": 0.00019778383696869543, "loss": 11.6806, "step": 9650 }, { "epoch": 0.20202210499874404, "grad_norm": 0.273048996925354, "learning_rate": 0.0001977833779138226, "loss": 11.6892, "step": 9651 }, { "epoch": 0.20204303776270618, "grad_norm": 0.21437205374240875, "learning_rate": 0.00019778291881194333, "loss": 11.6788, "step": 9652 }, { "epoch": 0.20206397052666833, "grad_norm": 0.2522144019603729, "learning_rate": 0.00019778245966305789, "loss": 11.6639, "step": 9653 }, { "epoch": 0.2020849032906305, "grad_norm": 0.2694856524467468, "learning_rate": 0.0001977820004671664, "loss": 11.6818, "step": 9654 }, { "epoch": 0.20210583605459265, "grad_norm": 0.28500765562057495, "learning_rate": 0.0001977815412242692, "loss": 11.6617, "step": 9655 }, { "epoch": 0.2021267688185548, "grad_norm": 0.2955965995788574, "learning_rate": 0.0001977810819343664, "loss": 11.6853, "step": 9656 }, { "epoch": 0.20214770158251696, "grad_norm": 1.3292590379714966, "learning_rate": 0.0001977806225974583, "loss": 11.6579, "step": 9657 }, { "epoch": 0.2021686343464791, "grad_norm": 0.2524156868457794, "learning_rate": 0.0001977801632135451, "loss": 11.671, "step": 9658 }, { "epoch": 0.20218956711044125, "grad_norm": 0.26720380783081055, "learning_rate": 0.00019777970378262702, "loss": 11.6616, "step": 9659 }, { "epoch": 0.20221049987440343, "grad_norm": 0.2971334159374237, "learning_rate": 0.00019777924430470427, "loss": 11.6822, "step": 9660 }, { "epoch": 0.20223143263836557, "grad_norm": 0.2541237771511078, "learning_rate": 0.0001977787847797771, "loss": 11.6604, "step": 9661 }, { "epoch": 0.20225236540232772, "grad_norm": 0.26894611120224, "learning_rate": 0.00019777832520784567, "loss": 11.6775, "step": 9662 }, { "epoch": 0.2022732981662899, "grad_norm": 0.24297478795051575, "learning_rate": 0.00019777786558891025, "loss": 11.6893, "step": 9663 }, { "epoch": 0.20229423093025203, "grad_norm": 0.24793322384357452, "learning_rate": 0.00019777740592297107, "loss": 11.6708, "step": 9664 }, { "epoch": 0.20231516369421418, "grad_norm": 0.3279930353164673, "learning_rate": 0.00019777694621002828, "loss": 11.6819, "step": 9665 }, { "epoch": 0.20233609645817635, "grad_norm": 0.24182924628257751, "learning_rate": 0.0001977764864500822, "loss": 11.6879, "step": 9666 }, { "epoch": 0.2023570292221385, "grad_norm": 0.2786652743816376, "learning_rate": 0.00019777602664313302, "loss": 11.6671, "step": 9667 }, { "epoch": 0.20237796198610064, "grad_norm": 0.31263071298599243, "learning_rate": 0.00019777556678918093, "loss": 11.6679, "step": 9668 }, { "epoch": 0.20239889475006279, "grad_norm": 0.32106417417526245, "learning_rate": 0.00019777510688822613, "loss": 11.6864, "step": 9669 }, { "epoch": 0.20241982751402496, "grad_norm": 0.2115429937839508, "learning_rate": 0.00019777464694026891, "loss": 11.6555, "step": 9670 }, { "epoch": 0.2024407602779871, "grad_norm": 0.25865286588668823, "learning_rate": 0.00019777418694530945, "loss": 11.689, "step": 9671 }, { "epoch": 0.20246169304194925, "grad_norm": 0.24115118384361267, "learning_rate": 0.00019777372690334797, "loss": 11.6778, "step": 9672 }, { "epoch": 0.20248262580591142, "grad_norm": 0.2064029425382614, "learning_rate": 0.0001977732668143847, "loss": 11.6805, "step": 9673 }, { "epoch": 0.20250355856987357, "grad_norm": 0.26075613498687744, "learning_rate": 0.00019777280667841987, "loss": 11.671, "step": 9674 }, { "epoch": 0.2025244913338357, "grad_norm": 0.2408301830291748, "learning_rate": 0.0001977723464954537, "loss": 11.6808, "step": 9675 }, { "epoch": 0.20254542409779788, "grad_norm": 0.29154878854751587, "learning_rate": 0.0001977718862654864, "loss": 11.6879, "step": 9676 }, { "epoch": 0.20256635686176003, "grad_norm": 0.2535857856273651, "learning_rate": 0.00019777142598851816, "loss": 11.6688, "step": 9677 }, { "epoch": 0.20258728962572217, "grad_norm": 0.2956916391849518, "learning_rate": 0.0001977709656645493, "loss": 11.6869, "step": 9678 }, { "epoch": 0.20260822238968434, "grad_norm": 0.24249231815338135, "learning_rate": 0.00019777050529357993, "loss": 11.7027, "step": 9679 }, { "epoch": 0.2026291551536465, "grad_norm": 0.21981865167617798, "learning_rate": 0.00019777004487561033, "loss": 11.6789, "step": 9680 }, { "epoch": 0.20265008791760863, "grad_norm": 0.2631014287471771, "learning_rate": 0.00019776958441064074, "loss": 11.6823, "step": 9681 }, { "epoch": 0.2026710206815708, "grad_norm": 0.21851187944412231, "learning_rate": 0.00019776912389867128, "loss": 11.6641, "step": 9682 }, { "epoch": 0.20269195344553295, "grad_norm": 0.210159033536911, "learning_rate": 0.0001977686633397023, "loss": 11.6632, "step": 9683 }, { "epoch": 0.2027128862094951, "grad_norm": 0.21663111448287964, "learning_rate": 0.00019776820273373396, "loss": 11.6648, "step": 9684 }, { "epoch": 0.20273381897345724, "grad_norm": 0.2230907529592514, "learning_rate": 0.00019776774208076647, "loss": 11.6809, "step": 9685 }, { "epoch": 0.20275475173741941, "grad_norm": 0.2534843981266022, "learning_rate": 0.00019776728138080007, "loss": 11.6803, "step": 9686 }, { "epoch": 0.20277568450138156, "grad_norm": 0.23690149188041687, "learning_rate": 0.000197766820633835, "loss": 11.6788, "step": 9687 }, { "epoch": 0.2027966172653437, "grad_norm": 0.33249881863594055, "learning_rate": 0.00019776635983987143, "loss": 11.6697, "step": 9688 }, { "epoch": 0.20281755002930588, "grad_norm": 0.2518758475780487, "learning_rate": 0.00019776589899890963, "loss": 11.6841, "step": 9689 }, { "epoch": 0.20283848279326802, "grad_norm": 0.2797379493713379, "learning_rate": 0.00019776543811094978, "loss": 11.6937, "step": 9690 }, { "epoch": 0.20285941555723017, "grad_norm": 0.2459290623664856, "learning_rate": 0.00019776497717599213, "loss": 11.6676, "step": 9691 }, { "epoch": 0.20288034832119234, "grad_norm": 0.26185762882232666, "learning_rate": 0.00019776451619403694, "loss": 11.6817, "step": 9692 }, { "epoch": 0.20290128108515448, "grad_norm": 0.2405027151107788, "learning_rate": 0.00019776405516508435, "loss": 11.6792, "step": 9693 }, { "epoch": 0.20292221384911663, "grad_norm": 0.22102835774421692, "learning_rate": 0.0001977635940891346, "loss": 11.6743, "step": 9694 }, { "epoch": 0.2029431466130788, "grad_norm": 0.400920033454895, "learning_rate": 0.00019776313296618797, "loss": 11.6814, "step": 9695 }, { "epoch": 0.20296407937704095, "grad_norm": 0.23861467838287354, "learning_rate": 0.00019776267179624465, "loss": 11.6781, "step": 9696 }, { "epoch": 0.2029850121410031, "grad_norm": 0.27407392859458923, "learning_rate": 0.00019776221057930482, "loss": 11.6753, "step": 9697 }, { "epoch": 0.20300594490496526, "grad_norm": 0.22858931124210358, "learning_rate": 0.00019776174931536875, "loss": 11.6899, "step": 9698 }, { "epoch": 0.2030268776689274, "grad_norm": 0.28738003969192505, "learning_rate": 0.00019776128800443664, "loss": 11.6921, "step": 9699 }, { "epoch": 0.20304781043288955, "grad_norm": 0.2369072586297989, "learning_rate": 0.00019776082664650874, "loss": 11.6679, "step": 9700 }, { "epoch": 0.20306874319685173, "grad_norm": 0.24638476967811584, "learning_rate": 0.00019776036524158524, "loss": 11.6867, "step": 9701 }, { "epoch": 0.20308967596081387, "grad_norm": 0.2815134525299072, "learning_rate": 0.00019775990378966636, "loss": 11.6845, "step": 9702 }, { "epoch": 0.20311060872477602, "grad_norm": 0.2526208758354187, "learning_rate": 0.00019775944229075235, "loss": 11.677, "step": 9703 }, { "epoch": 0.20313154148873816, "grad_norm": 0.23199261724948883, "learning_rate": 0.0001977589807448434, "loss": 11.6881, "step": 9704 }, { "epoch": 0.20315247425270033, "grad_norm": 0.2516510784626007, "learning_rate": 0.00019775851915193978, "loss": 11.6733, "step": 9705 }, { "epoch": 0.20317340701666248, "grad_norm": 0.23399832844734192, "learning_rate": 0.00019775805751204165, "loss": 11.6665, "step": 9706 }, { "epoch": 0.20319433978062462, "grad_norm": 0.2366075962781906, "learning_rate": 0.00019775759582514927, "loss": 11.683, "step": 9707 }, { "epoch": 0.2032152725445868, "grad_norm": 0.1954851597547531, "learning_rate": 0.00019775713409126287, "loss": 11.6724, "step": 9708 }, { "epoch": 0.20323620530854894, "grad_norm": 0.2093992680311203, "learning_rate": 0.00019775667231038265, "loss": 11.659, "step": 9709 }, { "epoch": 0.20325713807251108, "grad_norm": 0.2539312243461609, "learning_rate": 0.00019775621048250886, "loss": 11.6776, "step": 9710 }, { "epoch": 0.20327807083647326, "grad_norm": 0.22217008471488953, "learning_rate": 0.00019775574860764165, "loss": 11.68, "step": 9711 }, { "epoch": 0.2032990036004354, "grad_norm": 0.23580895364284515, "learning_rate": 0.00019775528668578135, "loss": 11.6654, "step": 9712 }, { "epoch": 0.20331993636439755, "grad_norm": 0.2381724864244461, "learning_rate": 0.00019775482471692807, "loss": 11.6734, "step": 9713 }, { "epoch": 0.20334086912835972, "grad_norm": 0.214573934674263, "learning_rate": 0.0001977543627010821, "loss": 11.6871, "step": 9714 }, { "epoch": 0.20336180189232186, "grad_norm": 0.1924637109041214, "learning_rate": 0.00019775390063824367, "loss": 11.6646, "step": 9715 }, { "epoch": 0.203382734656284, "grad_norm": 0.26360300183296204, "learning_rate": 0.00019775343852841297, "loss": 11.6839, "step": 9716 }, { "epoch": 0.20340366742024618, "grad_norm": 0.2512708604335785, "learning_rate": 0.00019775297637159023, "loss": 11.6753, "step": 9717 }, { "epoch": 0.20342460018420833, "grad_norm": 0.22163452208042145, "learning_rate": 0.00019775251416777567, "loss": 11.6807, "step": 9718 }, { "epoch": 0.20344553294817047, "grad_norm": 0.32913708686828613, "learning_rate": 0.00019775205191696954, "loss": 11.6812, "step": 9719 }, { "epoch": 0.20346646571213264, "grad_norm": 0.2352316826581955, "learning_rate": 0.000197751589619172, "loss": 11.6731, "step": 9720 }, { "epoch": 0.2034873984760948, "grad_norm": 0.2957700788974762, "learning_rate": 0.00019775112727438336, "loss": 11.6808, "step": 9721 }, { "epoch": 0.20350833124005693, "grad_norm": 0.32588720321655273, "learning_rate": 0.00019775066488260376, "loss": 11.6903, "step": 9722 }, { "epoch": 0.20352926400401908, "grad_norm": 0.2688198387622833, "learning_rate": 0.0001977502024438335, "loss": 11.6728, "step": 9723 }, { "epoch": 0.20355019676798125, "grad_norm": 0.2593197226524353, "learning_rate": 0.0001977497399580727, "loss": 11.6834, "step": 9724 }, { "epoch": 0.2035711295319434, "grad_norm": 0.3036085069179535, "learning_rate": 0.00019774927742532167, "loss": 11.6767, "step": 9725 }, { "epoch": 0.20359206229590554, "grad_norm": 0.23516707122325897, "learning_rate": 0.0001977488148455806, "loss": 11.6671, "step": 9726 }, { "epoch": 0.2036129950598677, "grad_norm": 0.26178982853889465, "learning_rate": 0.00019774835221884974, "loss": 11.6815, "step": 9727 }, { "epoch": 0.20363392782382986, "grad_norm": 0.21696172654628754, "learning_rate": 0.00019774788954512925, "loss": 11.6789, "step": 9728 }, { "epoch": 0.203654860587792, "grad_norm": 0.22817878425121307, "learning_rate": 0.0001977474268244194, "loss": 11.6746, "step": 9729 }, { "epoch": 0.20367579335175418, "grad_norm": 0.2872121036052704, "learning_rate": 0.00019774696405672039, "loss": 11.6779, "step": 9730 }, { "epoch": 0.20369672611571632, "grad_norm": 0.24336116015911102, "learning_rate": 0.0001977465012420325, "loss": 11.6725, "step": 9731 }, { "epoch": 0.20371765887967846, "grad_norm": 0.2410254180431366, "learning_rate": 0.0001977460383803559, "loss": 11.6638, "step": 9732 }, { "epoch": 0.20373859164364064, "grad_norm": 0.2531071603298187, "learning_rate": 0.00019774557547169077, "loss": 11.6769, "step": 9733 }, { "epoch": 0.20375952440760278, "grad_norm": 0.23943203687667847, "learning_rate": 0.00019774511251603743, "loss": 11.6866, "step": 9734 }, { "epoch": 0.20378045717156493, "grad_norm": 0.2634023129940033, "learning_rate": 0.00019774464951339605, "loss": 11.6727, "step": 9735 }, { "epoch": 0.2038013899355271, "grad_norm": 0.29516106843948364, "learning_rate": 0.00019774418646376684, "loss": 11.6751, "step": 9736 }, { "epoch": 0.20382232269948924, "grad_norm": 0.23820270597934723, "learning_rate": 0.00019774372336715007, "loss": 11.6857, "step": 9737 }, { "epoch": 0.2038432554634514, "grad_norm": 0.2747935950756073, "learning_rate": 0.00019774326022354587, "loss": 11.6933, "step": 9738 }, { "epoch": 0.20386418822741353, "grad_norm": 0.31088292598724365, "learning_rate": 0.0001977427970329546, "loss": 11.6839, "step": 9739 }, { "epoch": 0.2038851209913757, "grad_norm": 0.2636888027191162, "learning_rate": 0.00019774233379537637, "loss": 11.6594, "step": 9740 }, { "epoch": 0.20390605375533785, "grad_norm": 0.27610206604003906, "learning_rate": 0.00019774187051081147, "loss": 11.6849, "step": 9741 }, { "epoch": 0.2039269865193, "grad_norm": 0.26876720786094666, "learning_rate": 0.00019774140717926004, "loss": 11.6873, "step": 9742 }, { "epoch": 0.20394791928326217, "grad_norm": 0.30841416120529175, "learning_rate": 0.0001977409438007224, "loss": 11.672, "step": 9743 }, { "epoch": 0.2039688520472243, "grad_norm": 0.3064405918121338, "learning_rate": 0.00019774048037519873, "loss": 11.6863, "step": 9744 }, { "epoch": 0.20398978481118646, "grad_norm": 0.23937521874904633, "learning_rate": 0.00019774001690268925, "loss": 11.6776, "step": 9745 }, { "epoch": 0.20401071757514863, "grad_norm": 0.34185048937797546, "learning_rate": 0.00019773955338319417, "loss": 11.6772, "step": 9746 }, { "epoch": 0.20403165033911078, "grad_norm": 0.22537881135940552, "learning_rate": 0.00019773908981671374, "loss": 11.6913, "step": 9747 }, { "epoch": 0.20405258310307292, "grad_norm": 0.2560552656650543, "learning_rate": 0.00019773862620324817, "loss": 11.6693, "step": 9748 }, { "epoch": 0.2040735158670351, "grad_norm": 0.30877190828323364, "learning_rate": 0.00019773816254279767, "loss": 11.6812, "step": 9749 }, { "epoch": 0.20409444863099724, "grad_norm": 0.2288786917924881, "learning_rate": 0.00019773769883536248, "loss": 11.6932, "step": 9750 }, { "epoch": 0.20411538139495938, "grad_norm": 0.32434171438217163, "learning_rate": 0.00019773723508094282, "loss": 11.673, "step": 9751 }, { "epoch": 0.20413631415892156, "grad_norm": 0.24478928744792938, "learning_rate": 0.00019773677127953894, "loss": 11.6764, "step": 9752 }, { "epoch": 0.2041572469228837, "grad_norm": 0.26331377029418945, "learning_rate": 0.000197736307431151, "loss": 11.6794, "step": 9753 }, { "epoch": 0.20417817968684585, "grad_norm": 0.25094088912010193, "learning_rate": 0.00019773584353577928, "loss": 11.6955, "step": 9754 }, { "epoch": 0.20419911245080802, "grad_norm": 0.29193761944770813, "learning_rate": 0.00019773537959342398, "loss": 11.6831, "step": 9755 }, { "epoch": 0.20422004521477016, "grad_norm": 0.2050519585609436, "learning_rate": 0.0001977349156040853, "loss": 11.6713, "step": 9756 }, { "epoch": 0.2042409779787323, "grad_norm": 0.2808402180671692, "learning_rate": 0.00019773445156776352, "loss": 11.6892, "step": 9757 }, { "epoch": 0.20426191074269445, "grad_norm": 0.25486424565315247, "learning_rate": 0.00019773398748445883, "loss": 11.6818, "step": 9758 }, { "epoch": 0.20428284350665663, "grad_norm": 0.22051338851451874, "learning_rate": 0.0001977335233541714, "loss": 11.6821, "step": 9759 }, { "epoch": 0.20430377627061877, "grad_norm": 0.26658275723457336, "learning_rate": 0.00019773305917690156, "loss": 11.6926, "step": 9760 }, { "epoch": 0.20432470903458091, "grad_norm": 0.1955420821905136, "learning_rate": 0.00019773259495264948, "loss": 11.6608, "step": 9761 }, { "epoch": 0.2043456417985431, "grad_norm": 0.2285138964653015, "learning_rate": 0.00019773213068141537, "loss": 11.6804, "step": 9762 }, { "epoch": 0.20436657456250523, "grad_norm": 0.2797202169895172, "learning_rate": 0.00019773166636319944, "loss": 11.7174, "step": 9763 }, { "epoch": 0.20438750732646738, "grad_norm": 0.2348782867193222, "learning_rate": 0.000197731201998002, "loss": 11.6755, "step": 9764 }, { "epoch": 0.20440844009042955, "grad_norm": 0.2637876570224762, "learning_rate": 0.00019773073758582314, "loss": 11.6748, "step": 9765 }, { "epoch": 0.2044293728543917, "grad_norm": 0.26073694229125977, "learning_rate": 0.0001977302731266632, "loss": 11.681, "step": 9766 }, { "epoch": 0.20445030561835384, "grad_norm": 0.23156864941120148, "learning_rate": 0.00019772980862052238, "loss": 11.6916, "step": 9767 }, { "epoch": 0.204471238382316, "grad_norm": 0.2662575840950012, "learning_rate": 0.00019772934406740085, "loss": 11.6777, "step": 9768 }, { "epoch": 0.20449217114627816, "grad_norm": 0.32210826873779297, "learning_rate": 0.00019772887946729886, "loss": 11.6944, "step": 9769 }, { "epoch": 0.2045131039102403, "grad_norm": 0.24525021016597748, "learning_rate": 0.00019772841482021666, "loss": 11.6631, "step": 9770 }, { "epoch": 0.20453403667420247, "grad_norm": 0.31294378638267517, "learning_rate": 0.00019772795012615443, "loss": 11.6703, "step": 9771 }, { "epoch": 0.20455496943816462, "grad_norm": 0.2391285002231598, "learning_rate": 0.00019772748538511242, "loss": 11.6858, "step": 9772 }, { "epoch": 0.20457590220212676, "grad_norm": 0.2838376760482788, "learning_rate": 0.00019772702059709087, "loss": 11.671, "step": 9773 }, { "epoch": 0.2045968349660889, "grad_norm": 0.23264473676681519, "learning_rate": 0.00019772655576208998, "loss": 11.6862, "step": 9774 }, { "epoch": 0.20461776773005108, "grad_norm": 0.2038044035434723, "learning_rate": 0.00019772609088010995, "loss": 11.67, "step": 9775 }, { "epoch": 0.20463870049401323, "grad_norm": 0.27539756894111633, "learning_rate": 0.00019772562595115104, "loss": 11.6948, "step": 9776 }, { "epoch": 0.20465963325797537, "grad_norm": 0.22519128024578094, "learning_rate": 0.0001977251609752135, "loss": 11.6893, "step": 9777 }, { "epoch": 0.20468056602193754, "grad_norm": 0.236287459731102, "learning_rate": 0.00019772469595229745, "loss": 11.6981, "step": 9778 }, { "epoch": 0.2047014987858997, "grad_norm": 0.26832523941993713, "learning_rate": 0.00019772423088240324, "loss": 11.6647, "step": 9779 }, { "epoch": 0.20472243154986183, "grad_norm": 0.2030322551727295, "learning_rate": 0.000197723765765531, "loss": 11.6672, "step": 9780 }, { "epoch": 0.204743364313824, "grad_norm": 0.28118032217025757, "learning_rate": 0.00019772330060168102, "loss": 11.6888, "step": 9781 }, { "epoch": 0.20476429707778615, "grad_norm": 0.2402586042881012, "learning_rate": 0.00019772283539085345, "loss": 11.648, "step": 9782 }, { "epoch": 0.2047852298417483, "grad_norm": 0.3501753509044647, "learning_rate": 0.00019772237013304856, "loss": 11.6837, "step": 9783 }, { "epoch": 0.20480616260571047, "grad_norm": 0.21683155000209808, "learning_rate": 0.00019772190482826658, "loss": 11.6803, "step": 9784 }, { "epoch": 0.2048270953696726, "grad_norm": 0.21987715363502502, "learning_rate": 0.00019772143947650774, "loss": 11.6688, "step": 9785 }, { "epoch": 0.20484802813363476, "grad_norm": 0.2409181445837021, "learning_rate": 0.00019772097407777222, "loss": 11.6843, "step": 9786 }, { "epoch": 0.20486896089759693, "grad_norm": 0.20045116543769836, "learning_rate": 0.0001977205086320603, "loss": 11.6821, "step": 9787 }, { "epoch": 0.20488989366155907, "grad_norm": 0.3651318848133087, "learning_rate": 0.00019772004313937214, "loss": 11.6858, "step": 9788 }, { "epoch": 0.20491082642552122, "grad_norm": 0.24677105247974396, "learning_rate": 0.000197719577599708, "loss": 11.6876, "step": 9789 }, { "epoch": 0.2049317591894834, "grad_norm": 0.2525889575481415, "learning_rate": 0.00019771911201306813, "loss": 11.6702, "step": 9790 }, { "epoch": 0.20495269195344554, "grad_norm": 0.24607986211776733, "learning_rate": 0.0001977186463794527, "loss": 11.6691, "step": 9791 }, { "epoch": 0.20497362471740768, "grad_norm": 0.2567032277584076, "learning_rate": 0.00019771818069886195, "loss": 11.6794, "step": 9792 }, { "epoch": 0.20499455748136983, "grad_norm": 0.3326030969619751, "learning_rate": 0.00019771771497129615, "loss": 11.6811, "step": 9793 }, { "epoch": 0.205015490245332, "grad_norm": 0.2598542273044586, "learning_rate": 0.00019771724919675544, "loss": 11.6726, "step": 9794 }, { "epoch": 0.20503642300929414, "grad_norm": 0.24570035934448242, "learning_rate": 0.0001977167833752401, "loss": 11.6661, "step": 9795 }, { "epoch": 0.2050573557732563, "grad_norm": 0.26119592785835266, "learning_rate": 0.00019771631750675037, "loss": 11.6753, "step": 9796 }, { "epoch": 0.20507828853721846, "grad_norm": 0.21028421819210052, "learning_rate": 0.00019771585159128642, "loss": 11.6814, "step": 9797 }, { "epoch": 0.2050992213011806, "grad_norm": 0.2010875940322876, "learning_rate": 0.00019771538562884852, "loss": 11.6828, "step": 9798 }, { "epoch": 0.20512015406514275, "grad_norm": 0.2768191993236542, "learning_rate": 0.00019771491961943687, "loss": 11.6757, "step": 9799 }, { "epoch": 0.20514108682910492, "grad_norm": 0.3120696246623993, "learning_rate": 0.00019771445356305168, "loss": 11.6909, "step": 9800 }, { "epoch": 0.20516201959306707, "grad_norm": 0.29928308725357056, "learning_rate": 0.00019771398745969325, "loss": 11.6728, "step": 9801 }, { "epoch": 0.2051829523570292, "grad_norm": 0.3915594816207886, "learning_rate": 0.0001977135213093617, "loss": 11.6869, "step": 9802 }, { "epoch": 0.20520388512099139, "grad_norm": 0.248755544424057, "learning_rate": 0.0001977130551120573, "loss": 11.6758, "step": 9803 }, { "epoch": 0.20522481788495353, "grad_norm": 0.203324094414711, "learning_rate": 0.0001977125888677803, "loss": 11.6739, "step": 9804 }, { "epoch": 0.20524575064891568, "grad_norm": 0.22685924172401428, "learning_rate": 0.00019771212257653088, "loss": 11.6742, "step": 9805 }, { "epoch": 0.20526668341287785, "grad_norm": 0.25695204734802246, "learning_rate": 0.00019771165623830932, "loss": 11.6868, "step": 9806 }, { "epoch": 0.20528761617684, "grad_norm": 0.23528213798999786, "learning_rate": 0.00019771118985311578, "loss": 11.6878, "step": 9807 }, { "epoch": 0.20530854894080214, "grad_norm": 0.22052177786827087, "learning_rate": 0.0001977107234209505, "loss": 11.6642, "step": 9808 }, { "epoch": 0.2053294817047643, "grad_norm": 0.19387853145599365, "learning_rate": 0.00019771025694181375, "loss": 11.6892, "step": 9809 }, { "epoch": 0.20535041446872646, "grad_norm": 0.2210465669631958, "learning_rate": 0.0001977097904157057, "loss": 11.6889, "step": 9810 }, { "epoch": 0.2053713472326886, "grad_norm": 0.2512439787387848, "learning_rate": 0.00019770932384262658, "loss": 11.7011, "step": 9811 }, { "epoch": 0.20539227999665075, "grad_norm": 0.23811836540699005, "learning_rate": 0.00019770885722257667, "loss": 11.6773, "step": 9812 }, { "epoch": 0.20541321276061292, "grad_norm": 0.27045589685440063, "learning_rate": 0.00019770839055555613, "loss": 11.6871, "step": 9813 }, { "epoch": 0.20543414552457506, "grad_norm": 0.26100030541419983, "learning_rate": 0.0001977079238415652, "loss": 11.6753, "step": 9814 }, { "epoch": 0.2054550782885372, "grad_norm": 0.24507562816143036, "learning_rate": 0.00019770745708060414, "loss": 11.6832, "step": 9815 }, { "epoch": 0.20547601105249938, "grad_norm": 0.3011797368526459, "learning_rate": 0.00019770699027267316, "loss": 11.6839, "step": 9816 }, { "epoch": 0.20549694381646152, "grad_norm": 0.27319636940956116, "learning_rate": 0.00019770652341777242, "loss": 11.6739, "step": 9817 }, { "epoch": 0.20551787658042367, "grad_norm": 0.2662891149520874, "learning_rate": 0.00019770605651590222, "loss": 11.6599, "step": 9818 }, { "epoch": 0.20553880934438584, "grad_norm": 0.2838863432407379, "learning_rate": 0.00019770558956706276, "loss": 11.6914, "step": 9819 }, { "epoch": 0.205559742108348, "grad_norm": 0.22885428369045258, "learning_rate": 0.00019770512257125429, "loss": 11.664, "step": 9820 }, { "epoch": 0.20558067487231013, "grad_norm": 0.25111544132232666, "learning_rate": 0.00019770465552847698, "loss": 11.6756, "step": 9821 }, { "epoch": 0.2056016076362723, "grad_norm": 0.24629339575767517, "learning_rate": 0.00019770418843873108, "loss": 11.6783, "step": 9822 }, { "epoch": 0.20562254040023445, "grad_norm": 0.23792429268360138, "learning_rate": 0.00019770372130201684, "loss": 11.6793, "step": 9823 }, { "epoch": 0.2056434731641966, "grad_norm": 0.3045458495616913, "learning_rate": 0.00019770325411833445, "loss": 11.6904, "step": 9824 }, { "epoch": 0.20566440592815877, "grad_norm": 0.22765713930130005, "learning_rate": 0.00019770278688768412, "loss": 11.6758, "step": 9825 }, { "epoch": 0.2056853386921209, "grad_norm": 0.24841345846652985, "learning_rate": 0.00019770231961006615, "loss": 11.6842, "step": 9826 }, { "epoch": 0.20570627145608306, "grad_norm": 0.21971994638442993, "learning_rate": 0.0001977018522854807, "loss": 11.6707, "step": 9827 }, { "epoch": 0.2057272042200452, "grad_norm": 0.2671278417110443, "learning_rate": 0.000197701384913928, "loss": 11.6806, "step": 9828 }, { "epoch": 0.20574813698400737, "grad_norm": 0.2592371106147766, "learning_rate": 0.00019770091749540832, "loss": 11.6779, "step": 9829 }, { "epoch": 0.20576906974796952, "grad_norm": 0.19109998643398285, "learning_rate": 0.0001977004500299218, "loss": 11.6561, "step": 9830 }, { "epoch": 0.20579000251193166, "grad_norm": 0.25599318742752075, "learning_rate": 0.00019769998251746877, "loss": 11.6935, "step": 9831 }, { "epoch": 0.20581093527589384, "grad_norm": 0.3405972719192505, "learning_rate": 0.00019769951495804937, "loss": 11.6716, "step": 9832 }, { "epoch": 0.20583186803985598, "grad_norm": 0.226841539144516, "learning_rate": 0.00019769904735166387, "loss": 11.6734, "step": 9833 }, { "epoch": 0.20585280080381813, "grad_norm": 0.21350860595703125, "learning_rate": 0.00019769857969831247, "loss": 11.6597, "step": 9834 }, { "epoch": 0.2058737335677803, "grad_norm": 0.24769318103790283, "learning_rate": 0.00019769811199799538, "loss": 11.6695, "step": 9835 }, { "epoch": 0.20589466633174244, "grad_norm": 0.2610708475112915, "learning_rate": 0.00019769764425071288, "loss": 11.6826, "step": 9836 }, { "epoch": 0.2059155990957046, "grad_norm": 0.2820904552936554, "learning_rate": 0.00019769717645646518, "loss": 11.6674, "step": 9837 }, { "epoch": 0.20593653185966676, "grad_norm": 0.2865296006202698, "learning_rate": 0.00019769670861525247, "loss": 11.6834, "step": 9838 }, { "epoch": 0.2059574646236289, "grad_norm": 0.2520230710506439, "learning_rate": 0.000197696240727075, "loss": 11.6771, "step": 9839 }, { "epoch": 0.20597839738759105, "grad_norm": 0.26282092928886414, "learning_rate": 0.00019769577279193297, "loss": 11.6973, "step": 9840 }, { "epoch": 0.20599933015155322, "grad_norm": 0.31994256377220154, "learning_rate": 0.00019769530480982664, "loss": 11.6815, "step": 9841 }, { "epoch": 0.20602026291551537, "grad_norm": 0.23834726214408875, "learning_rate": 0.0001976948367807562, "loss": 11.683, "step": 9842 }, { "epoch": 0.2060411956794775, "grad_norm": 0.25591790676116943, "learning_rate": 0.0001976943687047219, "loss": 11.6697, "step": 9843 }, { "epoch": 0.20606212844343968, "grad_norm": 0.23556648194789886, "learning_rate": 0.000197693900581724, "loss": 11.6568, "step": 9844 }, { "epoch": 0.20608306120740183, "grad_norm": 0.2355634570121765, "learning_rate": 0.00019769343241176263, "loss": 11.6721, "step": 9845 }, { "epoch": 0.20610399397136397, "grad_norm": 0.18447892367839813, "learning_rate": 0.00019769296419483808, "loss": 11.6683, "step": 9846 }, { "epoch": 0.20612492673532612, "grad_norm": 0.2955437898635864, "learning_rate": 0.0001976924959309506, "loss": 11.6915, "step": 9847 }, { "epoch": 0.2061458594992883, "grad_norm": 0.24064701795578003, "learning_rate": 0.00019769202762010034, "loss": 11.6762, "step": 9848 }, { "epoch": 0.20616679226325044, "grad_norm": 0.20096652209758759, "learning_rate": 0.0001976915592622876, "loss": 11.6873, "step": 9849 }, { "epoch": 0.20618772502721258, "grad_norm": 0.21458996832370758, "learning_rate": 0.00019769109085751254, "loss": 11.6739, "step": 9850 }, { "epoch": 0.20620865779117475, "grad_norm": 0.25431200861930847, "learning_rate": 0.00019769062240577542, "loss": 11.6795, "step": 9851 }, { "epoch": 0.2062295905551369, "grad_norm": 0.22861608862876892, "learning_rate": 0.00019769015390707647, "loss": 11.6867, "step": 9852 }, { "epoch": 0.20625052331909904, "grad_norm": 0.20681369304656982, "learning_rate": 0.0001976896853614159, "loss": 11.6766, "step": 9853 }, { "epoch": 0.20627145608306122, "grad_norm": 0.25298967957496643, "learning_rate": 0.00019768921676879394, "loss": 11.6587, "step": 9854 }, { "epoch": 0.20629238884702336, "grad_norm": 0.2540844678878784, "learning_rate": 0.0001976887481292108, "loss": 11.6893, "step": 9855 }, { "epoch": 0.2063133216109855, "grad_norm": 0.23471495509147644, "learning_rate": 0.00019768827944266674, "loss": 11.6844, "step": 9856 }, { "epoch": 0.20633425437494768, "grad_norm": 0.24075065553188324, "learning_rate": 0.00019768781070916199, "loss": 11.6959, "step": 9857 }, { "epoch": 0.20635518713890982, "grad_norm": 0.39449456334114075, "learning_rate": 0.00019768734192869672, "loss": 11.6767, "step": 9858 }, { "epoch": 0.20637611990287197, "grad_norm": 0.2237602174282074, "learning_rate": 0.0001976868731012712, "loss": 11.687, "step": 9859 }, { "epoch": 0.20639705266683414, "grad_norm": 0.25631120800971985, "learning_rate": 0.0001976864042268856, "loss": 11.6703, "step": 9860 }, { "epoch": 0.20641798543079629, "grad_norm": 0.29825088381767273, "learning_rate": 0.00019768593530554025, "loss": 11.6669, "step": 9861 }, { "epoch": 0.20643891819475843, "grad_norm": 0.21293656527996063, "learning_rate": 0.0001976854663372353, "loss": 11.6677, "step": 9862 }, { "epoch": 0.2064598509587206, "grad_norm": 0.2955878973007202, "learning_rate": 0.00019768499732197097, "loss": 11.6804, "step": 9863 }, { "epoch": 0.20648078372268275, "grad_norm": 0.24086225032806396, "learning_rate": 0.0001976845282597475, "loss": 11.6666, "step": 9864 }, { "epoch": 0.2065017164866449, "grad_norm": 0.1964775025844574, "learning_rate": 0.00019768405915056514, "loss": 11.675, "step": 9865 }, { "epoch": 0.20652264925060704, "grad_norm": 0.2538498640060425, "learning_rate": 0.0001976835899944241, "loss": 11.6856, "step": 9866 }, { "epoch": 0.2065435820145692, "grad_norm": 0.23608796298503876, "learning_rate": 0.00019768312079132457, "loss": 11.6813, "step": 9867 }, { "epoch": 0.20656451477853136, "grad_norm": 0.2774505317211151, "learning_rate": 0.00019768265154126682, "loss": 11.6879, "step": 9868 }, { "epoch": 0.2065854475424935, "grad_norm": 0.2483973652124405, "learning_rate": 0.00019768218224425104, "loss": 11.6712, "step": 9869 }, { "epoch": 0.20660638030645567, "grad_norm": 0.22628186643123627, "learning_rate": 0.00019768171290027753, "loss": 11.6821, "step": 9870 }, { "epoch": 0.20662731307041782, "grad_norm": 0.27792325615882874, "learning_rate": 0.00019768124350934643, "loss": 11.6921, "step": 9871 }, { "epoch": 0.20664824583437996, "grad_norm": 0.22401095926761627, "learning_rate": 0.000197680774071458, "loss": 11.6778, "step": 9872 }, { "epoch": 0.20666917859834213, "grad_norm": 0.22113899886608124, "learning_rate": 0.00019768030458661246, "loss": 11.673, "step": 9873 }, { "epoch": 0.20669011136230428, "grad_norm": 0.2736431658267975, "learning_rate": 0.00019767983505481005, "loss": 11.6755, "step": 9874 }, { "epoch": 0.20671104412626642, "grad_norm": 0.2690366506576538, "learning_rate": 0.00019767936547605098, "loss": 11.6797, "step": 9875 }, { "epoch": 0.2067319768902286, "grad_norm": 0.24572598934173584, "learning_rate": 0.00019767889585033547, "loss": 11.6812, "step": 9876 }, { "epoch": 0.20675290965419074, "grad_norm": 0.24010787904262543, "learning_rate": 0.00019767842617766376, "loss": 11.6656, "step": 9877 }, { "epoch": 0.2067738424181529, "grad_norm": 0.22441068291664124, "learning_rate": 0.0001976779564580361, "loss": 11.6628, "step": 9878 }, { "epoch": 0.20679477518211506, "grad_norm": 0.30946260690689087, "learning_rate": 0.00019767748669145268, "loss": 11.688, "step": 9879 }, { "epoch": 0.2068157079460772, "grad_norm": 0.3290144205093384, "learning_rate": 0.00019767701687791372, "loss": 11.6744, "step": 9880 }, { "epoch": 0.20683664071003935, "grad_norm": 0.24147093296051025, "learning_rate": 0.00019767654701741946, "loss": 11.6911, "step": 9881 }, { "epoch": 0.2068575734740015, "grad_norm": 0.2896716594696045, "learning_rate": 0.00019767607710997017, "loss": 11.6805, "step": 9882 }, { "epoch": 0.20687850623796367, "grad_norm": 0.2663096785545349, "learning_rate": 0.00019767560715556597, "loss": 11.673, "step": 9883 }, { "epoch": 0.2068994390019258, "grad_norm": 0.23147092759609222, "learning_rate": 0.00019767513715420715, "loss": 11.6891, "step": 9884 }, { "epoch": 0.20692037176588796, "grad_norm": 0.24953824281692505, "learning_rate": 0.000197674667105894, "loss": 11.6814, "step": 9885 }, { "epoch": 0.20694130452985013, "grad_norm": 0.2921728789806366, "learning_rate": 0.00019767419701062662, "loss": 11.6871, "step": 9886 }, { "epoch": 0.20696223729381227, "grad_norm": 0.2551897168159485, "learning_rate": 0.00019767372686840533, "loss": 11.6839, "step": 9887 }, { "epoch": 0.20698317005777442, "grad_norm": 0.2587791383266449, "learning_rate": 0.0001976732566792303, "loss": 11.6922, "step": 9888 }, { "epoch": 0.2070041028217366, "grad_norm": 0.2683072090148926, "learning_rate": 0.00019767278644310178, "loss": 11.6806, "step": 9889 }, { "epoch": 0.20702503558569874, "grad_norm": 0.22929856181144714, "learning_rate": 0.00019767231616001999, "loss": 11.6849, "step": 9890 }, { "epoch": 0.20704596834966088, "grad_norm": 0.34307432174682617, "learning_rate": 0.0001976718458299852, "loss": 11.6903, "step": 9891 }, { "epoch": 0.20706690111362305, "grad_norm": 0.22509193420410156, "learning_rate": 0.00019767137545299753, "loss": 11.6837, "step": 9892 }, { "epoch": 0.2070878338775852, "grad_norm": 0.3092462122440338, "learning_rate": 0.0001976709050290573, "loss": 11.6777, "step": 9893 }, { "epoch": 0.20710876664154734, "grad_norm": 0.22233109176158905, "learning_rate": 0.00019767043455816473, "loss": 11.6828, "step": 9894 }, { "epoch": 0.20712969940550952, "grad_norm": 0.24143481254577637, "learning_rate": 0.00019766996404032002, "loss": 11.6637, "step": 9895 }, { "epoch": 0.20715063216947166, "grad_norm": 0.2994156777858734, "learning_rate": 0.0001976694934755234, "loss": 11.6678, "step": 9896 }, { "epoch": 0.2071715649334338, "grad_norm": 0.27739229798316956, "learning_rate": 0.00019766902286377506, "loss": 11.6774, "step": 9897 }, { "epoch": 0.20719249769739598, "grad_norm": 0.24091939628124237, "learning_rate": 0.00019766855220507533, "loss": 11.6812, "step": 9898 }, { "epoch": 0.20721343046135812, "grad_norm": 0.9502580761909485, "learning_rate": 0.00019766808149942433, "loss": 11.5563, "step": 9899 }, { "epoch": 0.20723436322532027, "grad_norm": 0.2171572744846344, "learning_rate": 0.00019766761074682231, "loss": 11.678, "step": 9900 }, { "epoch": 0.2072552959892824, "grad_norm": 0.2774423062801361, "learning_rate": 0.00019766713994726954, "loss": 11.6679, "step": 9901 }, { "epoch": 0.20727622875324458, "grad_norm": 0.24081742763519287, "learning_rate": 0.0001976666691007662, "loss": 11.6796, "step": 9902 }, { "epoch": 0.20729716151720673, "grad_norm": 0.265756756067276, "learning_rate": 0.00019766619820731256, "loss": 11.6823, "step": 9903 }, { "epoch": 0.20731809428116887, "grad_norm": 0.21172279119491577, "learning_rate": 0.0001976657272669088, "loss": 11.658, "step": 9904 }, { "epoch": 0.20733902704513105, "grad_norm": 0.23547102510929108, "learning_rate": 0.00019766525627955517, "loss": 11.6838, "step": 9905 }, { "epoch": 0.2073599598090932, "grad_norm": 0.3240290880203247, "learning_rate": 0.00019766478524525192, "loss": 11.6776, "step": 9906 }, { "epoch": 0.20738089257305534, "grad_norm": 0.2401398867368698, "learning_rate": 0.0001976643141639992, "loss": 11.6643, "step": 9907 }, { "epoch": 0.2074018253370175, "grad_norm": 0.24890147149562836, "learning_rate": 0.00019766384303579733, "loss": 11.6839, "step": 9908 }, { "epoch": 0.20742275810097965, "grad_norm": 0.20800259709358215, "learning_rate": 0.00019766337186064646, "loss": 11.6703, "step": 9909 }, { "epoch": 0.2074436908649418, "grad_norm": 0.26359301805496216, "learning_rate": 0.00019766290063854687, "loss": 11.6921, "step": 9910 }, { "epoch": 0.20746462362890397, "grad_norm": 0.2784370481967926, "learning_rate": 0.00019766242936949876, "loss": 11.6854, "step": 9911 }, { "epoch": 0.20748555639286612, "grad_norm": 0.2434450089931488, "learning_rate": 0.00019766195805350238, "loss": 11.6797, "step": 9912 }, { "epoch": 0.20750648915682826, "grad_norm": 0.2199140191078186, "learning_rate": 0.00019766148669055794, "loss": 11.6768, "step": 9913 }, { "epoch": 0.20752742192079043, "grad_norm": 0.23447784781455994, "learning_rate": 0.00019766101528066564, "loss": 11.6722, "step": 9914 }, { "epoch": 0.20754835468475258, "grad_norm": 0.24884533882141113, "learning_rate": 0.00019766054382382576, "loss": 11.6753, "step": 9915 }, { "epoch": 0.20756928744871472, "grad_norm": 0.24965310096740723, "learning_rate": 0.0001976600723200385, "loss": 11.6748, "step": 9916 }, { "epoch": 0.20759022021267687, "grad_norm": 0.2780047655105591, "learning_rate": 0.00019765960076930403, "loss": 11.6906, "step": 9917 }, { "epoch": 0.20761115297663904, "grad_norm": 0.2647860646247864, "learning_rate": 0.00019765912917162268, "loss": 11.6718, "step": 9918 }, { "epoch": 0.20763208574060119, "grad_norm": 1.172430396080017, "learning_rate": 0.00019765865752699463, "loss": 11.679, "step": 9919 }, { "epoch": 0.20765301850456333, "grad_norm": 0.27078908681869507, "learning_rate": 0.0001976581858354201, "loss": 11.6783, "step": 9920 }, { "epoch": 0.2076739512685255, "grad_norm": 0.3029256761074066, "learning_rate": 0.00019765771409689932, "loss": 11.6786, "step": 9921 }, { "epoch": 0.20769488403248765, "grad_norm": 0.25324949622154236, "learning_rate": 0.00019765724231143254, "loss": 11.6653, "step": 9922 }, { "epoch": 0.2077158167964498, "grad_norm": 0.2745071351528168, "learning_rate": 0.00019765677047901993, "loss": 11.682, "step": 9923 }, { "epoch": 0.20773674956041197, "grad_norm": 0.23684599995613098, "learning_rate": 0.00019765629859966178, "loss": 11.681, "step": 9924 }, { "epoch": 0.2077576823243741, "grad_norm": 0.29491668939590454, "learning_rate": 0.0001976558266733583, "loss": 11.6762, "step": 9925 }, { "epoch": 0.20777861508833625, "grad_norm": 0.25049325823783875, "learning_rate": 0.0001976553547001097, "loss": 11.6693, "step": 9926 }, { "epoch": 0.20779954785229843, "grad_norm": 0.27761346101760864, "learning_rate": 0.00019765488267991618, "loss": 11.674, "step": 9927 }, { "epoch": 0.20782048061626057, "grad_norm": 0.23702292144298553, "learning_rate": 0.00019765441061277803, "loss": 11.683, "step": 9928 }, { "epoch": 0.20784141338022272, "grad_norm": 0.24990525841712952, "learning_rate": 0.00019765393849869545, "loss": 11.6637, "step": 9929 }, { "epoch": 0.2078623461441849, "grad_norm": 0.24198289215564728, "learning_rate": 0.00019765346633766865, "loss": 11.6729, "step": 9930 }, { "epoch": 0.20788327890814703, "grad_norm": 0.1981707215309143, "learning_rate": 0.0001976529941296979, "loss": 11.6754, "step": 9931 }, { "epoch": 0.20790421167210918, "grad_norm": 0.26254841685295105, "learning_rate": 0.00019765252187478336, "loss": 11.6636, "step": 9932 }, { "epoch": 0.20792514443607135, "grad_norm": 0.3143080472946167, "learning_rate": 0.0001976520495729253, "loss": 11.6699, "step": 9933 }, { "epoch": 0.2079460772000335, "grad_norm": 0.2314678281545639, "learning_rate": 0.00019765157722412396, "loss": 11.6791, "step": 9934 }, { "epoch": 0.20796700996399564, "grad_norm": 0.26766541600227356, "learning_rate": 0.00019765110482837956, "loss": 11.698, "step": 9935 }, { "epoch": 0.2079879427279578, "grad_norm": 0.22918081283569336, "learning_rate": 0.0001976506323856923, "loss": 11.678, "step": 9936 }, { "epoch": 0.20800887549191996, "grad_norm": 0.2545369863510132, "learning_rate": 0.0001976501598960624, "loss": 11.6778, "step": 9937 }, { "epoch": 0.2080298082558821, "grad_norm": 0.23242944478988647, "learning_rate": 0.00019764968735949017, "loss": 11.6775, "step": 9938 }, { "epoch": 0.20805074101984425, "grad_norm": 0.2671608328819275, "learning_rate": 0.00019764921477597572, "loss": 11.6871, "step": 9939 }, { "epoch": 0.20807167378380642, "grad_norm": 0.2620985209941864, "learning_rate": 0.00019764874214551938, "loss": 11.667, "step": 9940 }, { "epoch": 0.20809260654776857, "grad_norm": 0.2533607482910156, "learning_rate": 0.00019764826946812132, "loss": 11.6661, "step": 9941 }, { "epoch": 0.2081135393117307, "grad_norm": 0.2368418276309967, "learning_rate": 0.00019764779674378178, "loss": 11.6783, "step": 9942 }, { "epoch": 0.20813447207569288, "grad_norm": 0.22833143174648285, "learning_rate": 0.00019764732397250097, "loss": 11.6789, "step": 9943 }, { "epoch": 0.20815540483965503, "grad_norm": 0.2585769593715668, "learning_rate": 0.00019764685115427914, "loss": 11.6851, "step": 9944 }, { "epoch": 0.20817633760361717, "grad_norm": 0.2383987158536911, "learning_rate": 0.00019764637828911653, "loss": 11.6777, "step": 9945 }, { "epoch": 0.20819727036757935, "grad_norm": 0.26658374071121216, "learning_rate": 0.00019764590537701336, "loss": 11.667, "step": 9946 }, { "epoch": 0.2082182031315415, "grad_norm": 0.23765034973621368, "learning_rate": 0.00019764543241796982, "loss": 11.6635, "step": 9947 }, { "epoch": 0.20823913589550364, "grad_norm": 0.23963987827301025, "learning_rate": 0.00019764495941198616, "loss": 11.6761, "step": 9948 }, { "epoch": 0.2082600686594658, "grad_norm": 0.24763377010822296, "learning_rate": 0.0001976444863590626, "loss": 11.6805, "step": 9949 }, { "epoch": 0.20828100142342795, "grad_norm": 0.24913522601127625, "learning_rate": 0.00019764401325919942, "loss": 11.6723, "step": 9950 }, { "epoch": 0.2083019341873901, "grad_norm": 0.25507673621177673, "learning_rate": 0.0001976435401123968, "loss": 11.6784, "step": 9951 }, { "epoch": 0.20832286695135227, "grad_norm": 0.2785545885562897, "learning_rate": 0.00019764306691865495, "loss": 11.6785, "step": 9952 }, { "epoch": 0.20834379971531441, "grad_norm": 0.2539829611778259, "learning_rate": 0.00019764259367797412, "loss": 11.6823, "step": 9953 }, { "epoch": 0.20836473247927656, "grad_norm": 0.2234482616186142, "learning_rate": 0.00019764212039035455, "loss": 11.6695, "step": 9954 }, { "epoch": 0.2083856652432387, "grad_norm": 0.24489156901836395, "learning_rate": 0.00019764164705579645, "loss": 11.687, "step": 9955 }, { "epoch": 0.20840659800720088, "grad_norm": 0.2721993625164032, "learning_rate": 0.00019764117367430007, "loss": 11.6703, "step": 9956 }, { "epoch": 0.20842753077116302, "grad_norm": 0.20857445895671844, "learning_rate": 0.0001976407002458656, "loss": 11.6621, "step": 9957 }, { "epoch": 0.20844846353512517, "grad_norm": 0.23988358676433563, "learning_rate": 0.00019764022677049326, "loss": 11.6676, "step": 9958 }, { "epoch": 0.20846939629908734, "grad_norm": 0.23796887695789337, "learning_rate": 0.0001976397532481834, "loss": 11.677, "step": 9959 }, { "epoch": 0.20849032906304948, "grad_norm": 1.2224889993667603, "learning_rate": 0.00019763927967893604, "loss": 11.6968, "step": 9960 }, { "epoch": 0.20851126182701163, "grad_norm": 0.30832764506340027, "learning_rate": 0.0001976388060627516, "loss": 11.6866, "step": 9961 }, { "epoch": 0.2085321945909738, "grad_norm": 0.22973638772964478, "learning_rate": 0.0001976383323996302, "loss": 11.66, "step": 9962 }, { "epoch": 0.20855312735493595, "grad_norm": 0.21053765714168549, "learning_rate": 0.0001976378586895721, "loss": 11.7007, "step": 9963 }, { "epoch": 0.2085740601188981, "grad_norm": 0.3273547887802124, "learning_rate": 0.00019763738493257753, "loss": 11.6754, "step": 9964 }, { "epoch": 0.20859499288286026, "grad_norm": 0.23200927674770355, "learning_rate": 0.00019763691112864674, "loss": 11.6815, "step": 9965 }, { "epoch": 0.2086159256468224, "grad_norm": 0.2823418974876404, "learning_rate": 0.00019763643727777988, "loss": 11.6887, "step": 9966 }, { "epoch": 0.20863685841078455, "grad_norm": 0.24533672630786896, "learning_rate": 0.00019763596337997726, "loss": 11.672, "step": 9967 }, { "epoch": 0.20865779117474673, "grad_norm": 0.21980604529380798, "learning_rate": 0.00019763548943523907, "loss": 11.6896, "step": 9968 }, { "epoch": 0.20867872393870887, "grad_norm": 0.22268211841583252, "learning_rate": 0.00019763501544356552, "loss": 11.6634, "step": 9969 }, { "epoch": 0.20869965670267102, "grad_norm": 0.24929626286029816, "learning_rate": 0.00019763454140495686, "loss": 11.679, "step": 9970 }, { "epoch": 0.20872058946663316, "grad_norm": 0.26184967160224915, "learning_rate": 0.00019763406731941336, "loss": 11.677, "step": 9971 }, { "epoch": 0.20874152223059533, "grad_norm": 0.21990029513835907, "learning_rate": 0.00019763359318693518, "loss": 11.6782, "step": 9972 }, { "epoch": 0.20876245499455748, "grad_norm": 0.25014036893844604, "learning_rate": 0.0001976331190075226, "loss": 11.6753, "step": 9973 }, { "epoch": 0.20878338775851962, "grad_norm": 0.2813563942909241, "learning_rate": 0.0001976326447811758, "loss": 11.6661, "step": 9974 }, { "epoch": 0.2088043205224818, "grad_norm": 0.3632946312427521, "learning_rate": 0.00019763217050789504, "loss": 11.7006, "step": 9975 }, { "epoch": 0.20882525328644394, "grad_norm": 0.22233499586582184, "learning_rate": 0.0001976316961876805, "loss": 11.678, "step": 9976 }, { "epoch": 0.20884618605040609, "grad_norm": 0.24105112254619598, "learning_rate": 0.00019763122182053252, "loss": 11.6674, "step": 9977 }, { "epoch": 0.20886711881436826, "grad_norm": 1.132871150970459, "learning_rate": 0.0001976307474064512, "loss": 11.6535, "step": 9978 }, { "epoch": 0.2088880515783304, "grad_norm": 0.26853519678115845, "learning_rate": 0.00019763027294543686, "loss": 11.6727, "step": 9979 }, { "epoch": 0.20890898434229255, "grad_norm": 0.21279554069042206, "learning_rate": 0.00019762979843748966, "loss": 11.6789, "step": 9980 }, { "epoch": 0.20892991710625472, "grad_norm": 0.23912692070007324, "learning_rate": 0.00019762932388260986, "loss": 11.6636, "step": 9981 }, { "epoch": 0.20895084987021686, "grad_norm": 0.2584584355354309, "learning_rate": 0.0001976288492807977, "loss": 11.6822, "step": 9982 }, { "epoch": 0.208971782634179, "grad_norm": 0.2749204933643341, "learning_rate": 0.0001976283746320534, "loss": 11.675, "step": 9983 }, { "epoch": 0.20899271539814118, "grad_norm": 0.26157572865486145, "learning_rate": 0.00019762789993637718, "loss": 11.6875, "step": 9984 }, { "epoch": 0.20901364816210333, "grad_norm": 0.2826675474643707, "learning_rate": 0.00019762742519376927, "loss": 11.6752, "step": 9985 }, { "epoch": 0.20903458092606547, "grad_norm": 0.2415541708469391, "learning_rate": 0.0001976269504042299, "loss": 11.6803, "step": 9986 }, { "epoch": 0.20905551369002764, "grad_norm": 0.3068184554576874, "learning_rate": 0.0001976264755677593, "loss": 11.6926, "step": 9987 }, { "epoch": 0.2090764464539898, "grad_norm": 0.33286288380622864, "learning_rate": 0.0001976260006843577, "loss": 11.6698, "step": 9988 }, { "epoch": 0.20909737921795193, "grad_norm": 0.3281160891056061, "learning_rate": 0.0001976255257540253, "loss": 11.6718, "step": 9989 }, { "epoch": 0.20911831198191408, "grad_norm": 0.23173481225967407, "learning_rate": 0.0001976250507767624, "loss": 11.6706, "step": 9990 }, { "epoch": 0.20913924474587625, "grad_norm": 0.24854880571365356, "learning_rate": 0.00019762457575256913, "loss": 11.6842, "step": 9991 }, { "epoch": 0.2091601775098384, "grad_norm": 0.2973760962486267, "learning_rate": 0.0001976241006814458, "loss": 11.669, "step": 9992 }, { "epoch": 0.20918111027380054, "grad_norm": 0.28363409638404846, "learning_rate": 0.00019762362556339263, "loss": 11.685, "step": 9993 }, { "epoch": 0.2092020430377627, "grad_norm": 0.2838197648525238, "learning_rate": 0.00019762315039840978, "loss": 11.6847, "step": 9994 }, { "epoch": 0.20922297580172486, "grad_norm": 0.29630032181739807, "learning_rate": 0.00019762267518649753, "loss": 11.6874, "step": 9995 }, { "epoch": 0.209243908565687, "grad_norm": 0.20099392533302307, "learning_rate": 0.00019762219992765614, "loss": 11.6673, "step": 9996 }, { "epoch": 0.20926484132964918, "grad_norm": 0.24002960324287415, "learning_rate": 0.00019762172462188578, "loss": 11.6748, "step": 9997 }, { "epoch": 0.20928577409361132, "grad_norm": 0.2402936816215515, "learning_rate": 0.00019762124926918668, "loss": 11.6796, "step": 9998 }, { "epoch": 0.20930670685757347, "grad_norm": 0.2416052222251892, "learning_rate": 0.00019762077386955914, "loss": 11.6643, "step": 9999 }, { "epoch": 0.20932763962153564, "grad_norm": 0.32624953985214233, "learning_rate": 0.0001976202984230033, "loss": 11.691, "step": 10000 }, { "epoch": 0.20932763962153564, "eval_loss": 11.678925514221191, "eval_runtime": 34.33, "eval_samples_per_second": 27.993, "eval_steps_per_second": 7.02, "step": 10000 }, { "epoch": 0.20934857238549778, "grad_norm": 0.21980831027030945, "learning_rate": 0.0001976198229295194, "loss": 11.6891, "step": 10001 }, { "epoch": 0.20936950514945993, "grad_norm": 0.28229543566703796, "learning_rate": 0.00019761934738910775, "loss": 11.6859, "step": 10002 }, { "epoch": 0.2093904379134221, "grad_norm": 0.26909273862838745, "learning_rate": 0.0001976188718017685, "loss": 11.6684, "step": 10003 }, { "epoch": 0.20941137067738425, "grad_norm": 0.2244427502155304, "learning_rate": 0.0001976183961675019, "loss": 11.6793, "step": 10004 }, { "epoch": 0.2094323034413464, "grad_norm": 0.24329814314842224, "learning_rate": 0.00019761792048630822, "loss": 11.6702, "step": 10005 }, { "epoch": 0.20945323620530856, "grad_norm": 0.22806358337402344, "learning_rate": 0.0001976174447581876, "loss": 11.6618, "step": 10006 }, { "epoch": 0.2094741689692707, "grad_norm": 0.2869786322116852, "learning_rate": 0.00019761696898314032, "loss": 11.6755, "step": 10007 }, { "epoch": 0.20949510173323285, "grad_norm": 0.2626846432685852, "learning_rate": 0.00019761649316116665, "loss": 11.6679, "step": 10008 }, { "epoch": 0.209516034497195, "grad_norm": 0.25680872797966003, "learning_rate": 0.00019761601729226673, "loss": 11.6773, "step": 10009 }, { "epoch": 0.20953696726115717, "grad_norm": 0.2692457437515259, "learning_rate": 0.00019761554137644085, "loss": 11.6999, "step": 10010 }, { "epoch": 0.20955790002511931, "grad_norm": 0.32072198390960693, "learning_rate": 0.00019761506541368925, "loss": 11.682, "step": 10011 }, { "epoch": 0.20957883278908146, "grad_norm": 0.3099750578403473, "learning_rate": 0.00019761458940401207, "loss": 11.6716, "step": 10012 }, { "epoch": 0.20959976555304363, "grad_norm": 0.2023184448480606, "learning_rate": 0.00019761411334740965, "loss": 11.6722, "step": 10013 }, { "epoch": 0.20962069831700578, "grad_norm": 0.715281069278717, "learning_rate": 0.00019761363724388217, "loss": 11.6316, "step": 10014 }, { "epoch": 0.20964163108096792, "grad_norm": 0.24164921045303345, "learning_rate": 0.00019761316109342982, "loss": 11.6826, "step": 10015 }, { "epoch": 0.2096625638449301, "grad_norm": 0.22058062255382538, "learning_rate": 0.00019761268489605288, "loss": 11.6855, "step": 10016 }, { "epoch": 0.20968349660889224, "grad_norm": 0.27369675040245056, "learning_rate": 0.0001976122086517516, "loss": 11.6654, "step": 10017 }, { "epoch": 0.20970442937285438, "grad_norm": 0.22208259999752045, "learning_rate": 0.00019761173236052613, "loss": 11.6871, "step": 10018 }, { "epoch": 0.20972536213681656, "grad_norm": 0.30076444149017334, "learning_rate": 0.00019761125602237676, "loss": 11.6986, "step": 10019 }, { "epoch": 0.2097462949007787, "grad_norm": 0.19152486324310303, "learning_rate": 0.00019761077963730372, "loss": 11.6727, "step": 10020 }, { "epoch": 0.20976722766474085, "grad_norm": 0.21849216520786285, "learning_rate": 0.0001976103032053072, "loss": 11.6829, "step": 10021 }, { "epoch": 0.20978816042870302, "grad_norm": 0.30683013796806335, "learning_rate": 0.00019760982672638745, "loss": 11.6715, "step": 10022 }, { "epoch": 0.20980909319266516, "grad_norm": 0.27099788188934326, "learning_rate": 0.00019760935020054474, "loss": 11.6738, "step": 10023 }, { "epoch": 0.2098300259566273, "grad_norm": 0.22886769473552704, "learning_rate": 0.00019760887362777922, "loss": 11.6756, "step": 10024 }, { "epoch": 0.20985095872058945, "grad_norm": 0.2742978036403656, "learning_rate": 0.00019760839700809117, "loss": 11.6634, "step": 10025 }, { "epoch": 0.20987189148455163, "grad_norm": 0.23105867207050323, "learning_rate": 0.00019760792034148082, "loss": 11.6945, "step": 10026 }, { "epoch": 0.20989282424851377, "grad_norm": 0.1891949325799942, "learning_rate": 0.00019760744362794838, "loss": 11.672, "step": 10027 }, { "epoch": 0.20991375701247592, "grad_norm": 0.27837467193603516, "learning_rate": 0.00019760696686749407, "loss": 11.6638, "step": 10028 }, { "epoch": 0.2099346897764381, "grad_norm": 0.27565062046051025, "learning_rate": 0.00019760649006011813, "loss": 11.6942, "step": 10029 }, { "epoch": 0.20995562254040023, "grad_norm": 0.26114267110824585, "learning_rate": 0.00019760601320582081, "loss": 11.6787, "step": 10030 }, { "epoch": 0.20997655530436238, "grad_norm": 0.21686261892318726, "learning_rate": 0.00019760553630460232, "loss": 11.6796, "step": 10031 }, { "epoch": 0.20999748806832455, "grad_norm": 0.24168646335601807, "learning_rate": 0.0001976050593564629, "loss": 11.6931, "step": 10032 }, { "epoch": 0.2100184208322867, "grad_norm": 0.2831345200538635, "learning_rate": 0.00019760458236140277, "loss": 11.6823, "step": 10033 }, { "epoch": 0.21003935359624884, "grad_norm": 0.2354011833667755, "learning_rate": 0.00019760410531942214, "loss": 11.6806, "step": 10034 }, { "epoch": 0.210060286360211, "grad_norm": 0.2514393925666809, "learning_rate": 0.0001976036282305213, "loss": 11.6865, "step": 10035 }, { "epoch": 0.21008121912417316, "grad_norm": 0.2349153459072113, "learning_rate": 0.00019760315109470042, "loss": 11.6833, "step": 10036 }, { "epoch": 0.2101021518881353, "grad_norm": 0.31549862027168274, "learning_rate": 0.00019760267391195975, "loss": 11.6893, "step": 10037 }, { "epoch": 0.21012308465209747, "grad_norm": 0.2436886578798294, "learning_rate": 0.0001976021966822995, "loss": 11.6881, "step": 10038 }, { "epoch": 0.21014401741605962, "grad_norm": 0.2572450041770935, "learning_rate": 0.00019760171940571994, "loss": 11.6856, "step": 10039 }, { "epoch": 0.21016495018002176, "grad_norm": 0.2806824743747711, "learning_rate": 0.0001976012420822213, "loss": 11.686, "step": 10040 }, { "epoch": 0.21018588294398394, "grad_norm": 0.23743116855621338, "learning_rate": 0.00019760076471180373, "loss": 11.6664, "step": 10041 }, { "epoch": 0.21020681570794608, "grad_norm": 0.25166767835617065, "learning_rate": 0.00019760028729446755, "loss": 11.6794, "step": 10042 }, { "epoch": 0.21022774847190823, "grad_norm": 0.24661239981651306, "learning_rate": 0.00019759980983021297, "loss": 11.6762, "step": 10043 }, { "epoch": 0.21024868123587037, "grad_norm": 0.2586453855037689, "learning_rate": 0.00019759933231904017, "loss": 11.6846, "step": 10044 }, { "epoch": 0.21026961399983254, "grad_norm": 0.2393386960029602, "learning_rate": 0.00019759885476094944, "loss": 11.672, "step": 10045 }, { "epoch": 0.2102905467637947, "grad_norm": 0.22333677113056183, "learning_rate": 0.00019759837715594097, "loss": 11.6703, "step": 10046 }, { "epoch": 0.21031147952775683, "grad_norm": 0.23518621921539307, "learning_rate": 0.000197597899504015, "loss": 11.6773, "step": 10047 }, { "epoch": 0.210332412291719, "grad_norm": 0.25675809383392334, "learning_rate": 0.00019759742180517178, "loss": 11.6899, "step": 10048 }, { "epoch": 0.21035334505568115, "grad_norm": 0.23976409435272217, "learning_rate": 0.0001975969440594115, "loss": 11.6908, "step": 10049 }, { "epoch": 0.2103742778196433, "grad_norm": 0.3661139905452728, "learning_rate": 0.00019759646626673445, "loss": 11.6808, "step": 10050 }, { "epoch": 0.21039521058360547, "grad_norm": 0.2119922637939453, "learning_rate": 0.00019759598842714082, "loss": 11.6873, "step": 10051 }, { "epoch": 0.2104161433475676, "grad_norm": 0.2973380386829376, "learning_rate": 0.0001975955105406308, "loss": 11.6619, "step": 10052 }, { "epoch": 0.21043707611152976, "grad_norm": 0.272372841835022, "learning_rate": 0.00019759503260720468, "loss": 11.6737, "step": 10053 }, { "epoch": 0.21045800887549193, "grad_norm": 0.3022526800632477, "learning_rate": 0.0001975945546268627, "loss": 11.6789, "step": 10054 }, { "epoch": 0.21047894163945408, "grad_norm": 0.28947916626930237, "learning_rate": 0.00019759407659960504, "loss": 11.6947, "step": 10055 }, { "epoch": 0.21049987440341622, "grad_norm": 0.2751534581184387, "learning_rate": 0.00019759359852543197, "loss": 11.6775, "step": 10056 }, { "epoch": 0.2105208071673784, "grad_norm": 0.21666888892650604, "learning_rate": 0.00019759312040434364, "loss": 11.674, "step": 10057 }, { "epoch": 0.21054173993134054, "grad_norm": 0.2172805666923523, "learning_rate": 0.00019759264223634043, "loss": 11.6721, "step": 10058 }, { "epoch": 0.21056267269530268, "grad_norm": 0.19792351126670837, "learning_rate": 0.00019759216402142244, "loss": 11.6554, "step": 10059 }, { "epoch": 0.21058360545926483, "grad_norm": 0.32006898522377014, "learning_rate": 0.00019759168575958993, "loss": 11.6736, "step": 10060 }, { "epoch": 0.210604538223227, "grad_norm": 0.27516600489616394, "learning_rate": 0.00019759120745084315, "loss": 11.6903, "step": 10061 }, { "epoch": 0.21062547098718915, "grad_norm": 0.23667095601558685, "learning_rate": 0.0001975907290951823, "loss": 11.6963, "step": 10062 }, { "epoch": 0.2106464037511513, "grad_norm": 0.20922496914863586, "learning_rate": 0.0001975902506926077, "loss": 11.6763, "step": 10063 }, { "epoch": 0.21066733651511346, "grad_norm": 0.22021348774433136, "learning_rate": 0.00019758977224311944, "loss": 11.6644, "step": 10064 }, { "epoch": 0.2106882692790756, "grad_norm": 0.2939381003379822, "learning_rate": 0.0001975892937467178, "loss": 11.6942, "step": 10065 }, { "epoch": 0.21070920204303775, "grad_norm": 0.27860549092292786, "learning_rate": 0.0001975888152034031, "loss": 11.6774, "step": 10066 }, { "epoch": 0.21073013480699992, "grad_norm": 0.2414252758026123, "learning_rate": 0.0001975883366131755, "loss": 11.6795, "step": 10067 }, { "epoch": 0.21075106757096207, "grad_norm": 0.26068106293678284, "learning_rate": 0.00019758785797603518, "loss": 11.6703, "step": 10068 }, { "epoch": 0.21077200033492421, "grad_norm": 0.29055413603782654, "learning_rate": 0.00019758737929198244, "loss": 11.6784, "step": 10069 }, { "epoch": 0.2107929330988864, "grad_norm": 0.2809041738510132, "learning_rate": 0.0001975869005610175, "loss": 11.643, "step": 10070 }, { "epoch": 0.21081386586284853, "grad_norm": 0.32701441645622253, "learning_rate": 0.0001975864217831406, "loss": 11.6991, "step": 10071 }, { "epoch": 0.21083479862681068, "grad_norm": 0.2589292824268341, "learning_rate": 0.0001975859429583519, "loss": 11.6738, "step": 10072 }, { "epoch": 0.21085573139077285, "grad_norm": 0.1902862787246704, "learning_rate": 0.00019758546408665172, "loss": 11.663, "step": 10073 }, { "epoch": 0.210876664154735, "grad_norm": 0.2906130850315094, "learning_rate": 0.00019758498516804024, "loss": 11.6957, "step": 10074 }, { "epoch": 0.21089759691869714, "grad_norm": 0.2617749273777008, "learning_rate": 0.00019758450620251773, "loss": 11.6691, "step": 10075 }, { "epoch": 0.2109185296826593, "grad_norm": 0.23829901218414307, "learning_rate": 0.00019758402719008436, "loss": 11.6586, "step": 10076 }, { "epoch": 0.21093946244662146, "grad_norm": 0.2490040510892868, "learning_rate": 0.00019758354813074038, "loss": 11.6754, "step": 10077 }, { "epoch": 0.2109603952105836, "grad_norm": 0.2078428566455841, "learning_rate": 0.00019758306902448606, "loss": 11.67, "step": 10078 }, { "epoch": 0.21098132797454575, "grad_norm": 0.22930972278118134, "learning_rate": 0.0001975825898713216, "loss": 11.6899, "step": 10079 }, { "epoch": 0.21100226073850792, "grad_norm": 0.24187102913856506, "learning_rate": 0.00019758211067124722, "loss": 11.6766, "step": 10080 }, { "epoch": 0.21102319350247006, "grad_norm": 0.24082577228546143, "learning_rate": 0.00019758163142426317, "loss": 11.688, "step": 10081 }, { "epoch": 0.2110441262664322, "grad_norm": 0.309873104095459, "learning_rate": 0.0001975811521303697, "loss": 11.6906, "step": 10082 }, { "epoch": 0.21106505903039438, "grad_norm": 0.27373239398002625, "learning_rate": 0.00019758067278956697, "loss": 11.6744, "step": 10083 }, { "epoch": 0.21108599179435653, "grad_norm": 0.25123077630996704, "learning_rate": 0.00019758019340185528, "loss": 11.6655, "step": 10084 }, { "epoch": 0.21110692455831867, "grad_norm": 0.2588856518268585, "learning_rate": 0.00019757971396723483, "loss": 11.669, "step": 10085 }, { "epoch": 0.21112785732228084, "grad_norm": 0.2344980090856552, "learning_rate": 0.0001975792344857059, "loss": 11.6835, "step": 10086 }, { "epoch": 0.211148790086243, "grad_norm": 0.23626267910003662, "learning_rate": 0.00019757875495726861, "loss": 11.655, "step": 10087 }, { "epoch": 0.21116972285020513, "grad_norm": 0.2566002309322357, "learning_rate": 0.00019757827538192328, "loss": 11.6582, "step": 10088 }, { "epoch": 0.2111906556141673, "grad_norm": 0.22637717425823212, "learning_rate": 0.0001975777957596701, "loss": 11.6849, "step": 10089 }, { "epoch": 0.21121158837812945, "grad_norm": 0.21818888187408447, "learning_rate": 0.00019757731609050937, "loss": 11.7054, "step": 10090 }, { "epoch": 0.2112325211420916, "grad_norm": 0.23840518295764923, "learning_rate": 0.00019757683637444123, "loss": 11.6833, "step": 10091 }, { "epoch": 0.21125345390605377, "grad_norm": 0.2878458797931671, "learning_rate": 0.00019757635661146595, "loss": 11.6761, "step": 10092 }, { "epoch": 0.2112743866700159, "grad_norm": 0.27897605299949646, "learning_rate": 0.00019757587680158375, "loss": 11.6874, "step": 10093 }, { "epoch": 0.21129531943397806, "grad_norm": 0.25089097023010254, "learning_rate": 0.0001975753969447949, "loss": 11.6765, "step": 10094 }, { "epoch": 0.21131625219794023, "grad_norm": 0.2603825032711029, "learning_rate": 0.00019757491704109958, "loss": 11.6739, "step": 10095 }, { "epoch": 0.21133718496190237, "grad_norm": 0.24984212219715118, "learning_rate": 0.00019757443709049805, "loss": 11.6866, "step": 10096 }, { "epoch": 0.21135811772586452, "grad_norm": 0.24754342436790466, "learning_rate": 0.00019757395709299054, "loss": 11.6689, "step": 10097 }, { "epoch": 0.21137905048982666, "grad_norm": 0.22887468338012695, "learning_rate": 0.00019757347704857728, "loss": 11.6701, "step": 10098 }, { "epoch": 0.21139998325378884, "grad_norm": 0.20755574107170105, "learning_rate": 0.00019757299695725847, "loss": 11.6578, "step": 10099 }, { "epoch": 0.21142091601775098, "grad_norm": 0.22006350755691528, "learning_rate": 0.00019757251681903437, "loss": 11.6829, "step": 10100 }, { "epoch": 0.21144184878171313, "grad_norm": 0.2743687927722931, "learning_rate": 0.0001975720366339052, "loss": 11.6739, "step": 10101 }, { "epoch": 0.2114627815456753, "grad_norm": 0.2582893669605255, "learning_rate": 0.00019757155640187123, "loss": 11.6757, "step": 10102 }, { "epoch": 0.21148371430963744, "grad_norm": 0.21725520491600037, "learning_rate": 0.00019757107612293262, "loss": 11.6768, "step": 10103 }, { "epoch": 0.2115046470735996, "grad_norm": 0.2918147146701813, "learning_rate": 0.00019757059579708966, "loss": 11.6908, "step": 10104 }, { "epoch": 0.21152557983756176, "grad_norm": 0.28212517499923706, "learning_rate": 0.00019757011542434255, "loss": 11.6786, "step": 10105 }, { "epoch": 0.2115465126015239, "grad_norm": 0.24551233649253845, "learning_rate": 0.00019756963500469152, "loss": 11.6818, "step": 10106 }, { "epoch": 0.21156744536548605, "grad_norm": 0.30064651370048523, "learning_rate": 0.00019756915453813682, "loss": 11.6906, "step": 10107 }, { "epoch": 0.21158837812944822, "grad_norm": 0.20772504806518555, "learning_rate": 0.00019756867402467867, "loss": 11.6814, "step": 10108 }, { "epoch": 0.21160931089341037, "grad_norm": 0.2597355842590332, "learning_rate": 0.0001975681934643173, "loss": 11.6816, "step": 10109 }, { "epoch": 0.2116302436573725, "grad_norm": 0.2902146577835083, "learning_rate": 0.00019756771285705298, "loss": 11.6835, "step": 10110 }, { "epoch": 0.21165117642133469, "grad_norm": 0.21766746044158936, "learning_rate": 0.00019756723220288585, "loss": 11.6692, "step": 10111 }, { "epoch": 0.21167210918529683, "grad_norm": 0.1986369788646698, "learning_rate": 0.00019756675150181625, "loss": 11.6586, "step": 10112 }, { "epoch": 0.21169304194925898, "grad_norm": 0.288238525390625, "learning_rate": 0.00019756627075384432, "loss": 11.687, "step": 10113 }, { "epoch": 0.21171397471322112, "grad_norm": 0.24189788103103638, "learning_rate": 0.00019756578995897036, "loss": 11.6818, "step": 10114 }, { "epoch": 0.2117349074771833, "grad_norm": 0.2795363962650299, "learning_rate": 0.00019756530911719457, "loss": 11.6641, "step": 10115 }, { "epoch": 0.21175584024114544, "grad_norm": 0.25010138750076294, "learning_rate": 0.00019756482822851714, "loss": 11.6688, "step": 10116 }, { "epoch": 0.21177677300510758, "grad_norm": 0.24265903234481812, "learning_rate": 0.00019756434729293837, "loss": 11.6872, "step": 10117 }, { "epoch": 0.21179770576906976, "grad_norm": 0.24290932714939117, "learning_rate": 0.00019756386631045842, "loss": 11.6881, "step": 10118 }, { "epoch": 0.2118186385330319, "grad_norm": 0.2926328778266907, "learning_rate": 0.0001975633852810776, "loss": 11.6887, "step": 10119 }, { "epoch": 0.21183957129699404, "grad_norm": 0.3028404116630554, "learning_rate": 0.00019756290420479614, "loss": 11.6727, "step": 10120 }, { "epoch": 0.21186050406095622, "grad_norm": 0.21379932761192322, "learning_rate": 0.00019756242308161418, "loss": 11.6739, "step": 10121 }, { "epoch": 0.21188143682491836, "grad_norm": 0.28748688101768494, "learning_rate": 0.00019756194191153202, "loss": 11.6695, "step": 10122 }, { "epoch": 0.2119023695888805, "grad_norm": 0.32225731015205383, "learning_rate": 0.0001975614606945499, "loss": 11.6715, "step": 10123 }, { "epoch": 0.21192330235284268, "grad_norm": 0.22795847058296204, "learning_rate": 0.000197560979430668, "loss": 11.6733, "step": 10124 }, { "epoch": 0.21194423511680482, "grad_norm": 0.23280620574951172, "learning_rate": 0.00019756049811988662, "loss": 11.6788, "step": 10125 }, { "epoch": 0.21196516788076697, "grad_norm": 0.28219687938690186, "learning_rate": 0.00019756001676220593, "loss": 11.6911, "step": 10126 }, { "epoch": 0.21198610064472914, "grad_norm": 0.2095879167318344, "learning_rate": 0.0001975595353576262, "loss": 11.6844, "step": 10127 }, { "epoch": 0.2120070334086913, "grad_norm": 0.24321569502353668, "learning_rate": 0.00019755905390614765, "loss": 11.6683, "step": 10128 }, { "epoch": 0.21202796617265343, "grad_norm": 0.2551618814468384, "learning_rate": 0.00019755857240777048, "loss": 11.6865, "step": 10129 }, { "epoch": 0.2120488989366156, "grad_norm": 0.3145560920238495, "learning_rate": 0.00019755809086249496, "loss": 11.6878, "step": 10130 }, { "epoch": 0.21206983170057775, "grad_norm": 0.25093647837638855, "learning_rate": 0.0001975576092703213, "loss": 11.6819, "step": 10131 }, { "epoch": 0.2120907644645399, "grad_norm": 0.265017569065094, "learning_rate": 0.00019755712763124976, "loss": 11.6741, "step": 10132 }, { "epoch": 0.21211169722850204, "grad_norm": 0.24634091556072235, "learning_rate": 0.00019755664594528054, "loss": 11.6825, "step": 10133 }, { "epoch": 0.2121326299924642, "grad_norm": 0.3354627192020416, "learning_rate": 0.00019755616421241392, "loss": 11.6934, "step": 10134 }, { "epoch": 0.21215356275642636, "grad_norm": 0.2249365746974945, "learning_rate": 0.00019755568243265005, "loss": 11.6854, "step": 10135 }, { "epoch": 0.2121744955203885, "grad_norm": 0.28569430112838745, "learning_rate": 0.00019755520060598923, "loss": 11.6909, "step": 10136 }, { "epoch": 0.21219542828435067, "grad_norm": 0.2446616142988205, "learning_rate": 0.00019755471873243168, "loss": 11.668, "step": 10137 }, { "epoch": 0.21221636104831282, "grad_norm": 0.25935131311416626, "learning_rate": 0.0001975542368119776, "loss": 11.6587, "step": 10138 }, { "epoch": 0.21223729381227496, "grad_norm": 0.2140955775976181, "learning_rate": 0.00019755375484462726, "loss": 11.6943, "step": 10139 }, { "epoch": 0.21225822657623714, "grad_norm": 0.34727197885513306, "learning_rate": 0.00019755327283038087, "loss": 11.682, "step": 10140 }, { "epoch": 0.21227915934019928, "grad_norm": 0.2153579294681549, "learning_rate": 0.00019755279076923867, "loss": 11.6669, "step": 10141 }, { "epoch": 0.21230009210416143, "grad_norm": 0.2519916296005249, "learning_rate": 0.00019755230866120088, "loss": 11.6502, "step": 10142 }, { "epoch": 0.2123210248681236, "grad_norm": 0.25294050574302673, "learning_rate": 0.00019755182650626777, "loss": 11.6639, "step": 10143 }, { "epoch": 0.21234195763208574, "grad_norm": 0.2721186578273773, "learning_rate": 0.0001975513443044395, "loss": 11.6878, "step": 10144 }, { "epoch": 0.2123628903960479, "grad_norm": 0.26735299825668335, "learning_rate": 0.00019755086205571637, "loss": 11.6637, "step": 10145 }, { "epoch": 0.21238382316001006, "grad_norm": 0.22938093543052673, "learning_rate": 0.00019755037976009856, "loss": 11.6547, "step": 10146 }, { "epoch": 0.2124047559239722, "grad_norm": 0.23629529774188995, "learning_rate": 0.00019754989741758636, "loss": 11.6663, "step": 10147 }, { "epoch": 0.21242568868793435, "grad_norm": 0.2749675512313843, "learning_rate": 0.00019754941502817994, "loss": 11.6796, "step": 10148 }, { "epoch": 0.2124466214518965, "grad_norm": 0.29192501306533813, "learning_rate": 0.00019754893259187956, "loss": 11.6794, "step": 10149 }, { "epoch": 0.21246755421585867, "grad_norm": 0.20172615349292755, "learning_rate": 0.0001975484501086855, "loss": 11.6732, "step": 10150 }, { "epoch": 0.2124884869798208, "grad_norm": 0.2593059241771698, "learning_rate": 0.0001975479675785979, "loss": 11.6913, "step": 10151 }, { "epoch": 0.21250941974378296, "grad_norm": 0.22252151370048523, "learning_rate": 0.00019754748500161706, "loss": 11.6678, "step": 10152 }, { "epoch": 0.21253035250774513, "grad_norm": 0.2695418894290924, "learning_rate": 0.00019754700237774317, "loss": 11.6717, "step": 10153 }, { "epoch": 0.21255128527170727, "grad_norm": 0.25736162066459656, "learning_rate": 0.00019754651970697648, "loss": 11.6685, "step": 10154 }, { "epoch": 0.21257221803566942, "grad_norm": 0.31803634762763977, "learning_rate": 0.00019754603698931723, "loss": 11.6769, "step": 10155 }, { "epoch": 0.2125931507996316, "grad_norm": 0.24037878215312958, "learning_rate": 0.00019754555422476567, "loss": 11.6767, "step": 10156 }, { "epoch": 0.21261408356359374, "grad_norm": 0.28128716349601746, "learning_rate": 0.00019754507141332196, "loss": 11.684, "step": 10157 }, { "epoch": 0.21263501632755588, "grad_norm": 0.24684983491897583, "learning_rate": 0.00019754458855498642, "loss": 11.6754, "step": 10158 }, { "epoch": 0.21265594909151805, "grad_norm": 0.2129364013671875, "learning_rate": 0.00019754410564975925, "loss": 11.664, "step": 10159 }, { "epoch": 0.2126768818554802, "grad_norm": 0.22416608035564423, "learning_rate": 0.00019754362269764063, "loss": 11.6853, "step": 10160 }, { "epoch": 0.21269781461944234, "grad_norm": 0.31632906198501587, "learning_rate": 0.00019754313969863087, "loss": 11.6788, "step": 10161 }, { "epoch": 0.21271874738340452, "grad_norm": 0.28640782833099365, "learning_rate": 0.00019754265665273012, "loss": 11.6865, "step": 10162 }, { "epoch": 0.21273968014736666, "grad_norm": 0.25109368562698364, "learning_rate": 0.0001975421735599387, "loss": 11.6707, "step": 10163 }, { "epoch": 0.2127606129113288, "grad_norm": 0.26823121309280396, "learning_rate": 0.0001975416904202568, "loss": 11.681, "step": 10164 }, { "epoch": 0.21278154567529098, "grad_norm": 0.2831895351409912, "learning_rate": 0.00019754120723368466, "loss": 11.6728, "step": 10165 }, { "epoch": 0.21280247843925312, "grad_norm": 0.22522293031215668, "learning_rate": 0.0001975407240002225, "loss": 11.6701, "step": 10166 }, { "epoch": 0.21282341120321527, "grad_norm": 0.26262137293815613, "learning_rate": 0.00019754024071987056, "loss": 11.6805, "step": 10167 }, { "epoch": 0.2128443439671774, "grad_norm": 0.22352714836597443, "learning_rate": 0.00019753975739262908, "loss": 11.6756, "step": 10168 }, { "epoch": 0.21286527673113959, "grad_norm": 0.2639595866203308, "learning_rate": 0.00019753927401849826, "loss": 11.6758, "step": 10169 }, { "epoch": 0.21288620949510173, "grad_norm": 0.32994362711906433, "learning_rate": 0.00019753879059747839, "loss": 11.6704, "step": 10170 }, { "epoch": 0.21290714225906388, "grad_norm": 0.24978524446487427, "learning_rate": 0.00019753830712956964, "loss": 11.6828, "step": 10171 }, { "epoch": 0.21292807502302605, "grad_norm": 0.2503492832183838, "learning_rate": 0.00019753782361477227, "loss": 11.6734, "step": 10172 }, { "epoch": 0.2129490077869882, "grad_norm": 0.2805951237678528, "learning_rate": 0.00019753734005308654, "loss": 11.6689, "step": 10173 }, { "epoch": 0.21296994055095034, "grad_norm": 0.26668038964271545, "learning_rate": 0.00019753685644451264, "loss": 11.6716, "step": 10174 }, { "epoch": 0.2129908733149125, "grad_norm": 0.25511160492897034, "learning_rate": 0.0001975363727890508, "loss": 11.6855, "step": 10175 }, { "epoch": 0.21301180607887465, "grad_norm": 0.23628033697605133, "learning_rate": 0.0001975358890867013, "loss": 11.6777, "step": 10176 }, { "epoch": 0.2130327388428368, "grad_norm": 0.20858456194400787, "learning_rate": 0.00019753540533746434, "loss": 11.6867, "step": 10177 }, { "epoch": 0.21305367160679897, "grad_norm": 0.3879895806312561, "learning_rate": 0.00019753492154134011, "loss": 11.6867, "step": 10178 }, { "epoch": 0.21307460437076112, "grad_norm": 0.26025140285491943, "learning_rate": 0.00019753443769832894, "loss": 11.6907, "step": 10179 }, { "epoch": 0.21309553713472326, "grad_norm": 0.22697941958904266, "learning_rate": 0.000197533953808431, "loss": 11.6681, "step": 10180 }, { "epoch": 0.21311646989868543, "grad_norm": 0.29014724493026733, "learning_rate": 0.00019753346987164654, "loss": 11.713, "step": 10181 }, { "epoch": 0.21313740266264758, "grad_norm": 0.25172361731529236, "learning_rate": 0.00019753298588797574, "loss": 11.6726, "step": 10182 }, { "epoch": 0.21315833542660972, "grad_norm": 0.2992294728755951, "learning_rate": 0.00019753250185741895, "loss": 11.6948, "step": 10183 }, { "epoch": 0.2131792681905719, "grad_norm": 0.2925310730934143, "learning_rate": 0.0001975320177799763, "loss": 11.6682, "step": 10184 }, { "epoch": 0.21320020095453404, "grad_norm": 0.3287511169910431, "learning_rate": 0.00019753153365564805, "loss": 11.6467, "step": 10185 }, { "epoch": 0.2132211337184962, "grad_norm": 0.2561683654785156, "learning_rate": 0.00019753104948443443, "loss": 11.6723, "step": 10186 }, { "epoch": 0.21324206648245833, "grad_norm": 0.2956770360469818, "learning_rate": 0.00019753056526633568, "loss": 11.6902, "step": 10187 }, { "epoch": 0.2132629992464205, "grad_norm": 0.2347823977470398, "learning_rate": 0.00019753008100135207, "loss": 11.6752, "step": 10188 }, { "epoch": 0.21328393201038265, "grad_norm": 0.2677226960659027, "learning_rate": 0.00019752959668948376, "loss": 11.6686, "step": 10189 }, { "epoch": 0.2133048647743448, "grad_norm": 1.3358242511749268, "learning_rate": 0.000197529112330731, "loss": 11.5873, "step": 10190 }, { "epoch": 0.21332579753830697, "grad_norm": 0.35674840211868286, "learning_rate": 0.00019752862792509407, "loss": 11.6863, "step": 10191 }, { "epoch": 0.2133467303022691, "grad_norm": 0.21675308048725128, "learning_rate": 0.0001975281434725732, "loss": 11.6841, "step": 10192 }, { "epoch": 0.21336766306623126, "grad_norm": 0.3026883602142334, "learning_rate": 0.00019752765897316856, "loss": 11.6718, "step": 10193 }, { "epoch": 0.21338859583019343, "grad_norm": 0.9175008535385132, "learning_rate": 0.00019752717442688043, "loss": 11.675, "step": 10194 }, { "epoch": 0.21340952859415557, "grad_norm": 0.26934394240379333, "learning_rate": 0.00019752668983370904, "loss": 11.677, "step": 10195 }, { "epoch": 0.21343046135811772, "grad_norm": 0.23231089115142822, "learning_rate": 0.0001975262051936546, "loss": 11.679, "step": 10196 }, { "epoch": 0.2134513941220799, "grad_norm": 0.207871213555336, "learning_rate": 0.00019752572050671735, "loss": 11.6721, "step": 10197 }, { "epoch": 0.21347232688604204, "grad_norm": 0.26524487137794495, "learning_rate": 0.00019752523577289756, "loss": 11.6772, "step": 10198 }, { "epoch": 0.21349325965000418, "grad_norm": 0.28712812066078186, "learning_rate": 0.00019752475099219543, "loss": 11.6678, "step": 10199 }, { "epoch": 0.21351419241396635, "grad_norm": 0.22451582551002502, "learning_rate": 0.00019752426616461119, "loss": 11.6832, "step": 10200 }, { "epoch": 0.2135351251779285, "grad_norm": 0.26682764291763306, "learning_rate": 0.00019752378129014506, "loss": 11.6725, "step": 10201 }, { "epoch": 0.21355605794189064, "grad_norm": 0.23994337022304535, "learning_rate": 0.00019752329636879733, "loss": 11.6834, "step": 10202 }, { "epoch": 0.2135769907058528, "grad_norm": 0.32162341475486755, "learning_rate": 0.00019752281140056818, "loss": 11.6843, "step": 10203 }, { "epoch": 0.21359792346981496, "grad_norm": 0.2221251130104065, "learning_rate": 0.00019752232638545786, "loss": 11.669, "step": 10204 }, { "epoch": 0.2136188562337771, "grad_norm": 0.27443668246269226, "learning_rate": 0.0001975218413234666, "loss": 11.685, "step": 10205 }, { "epoch": 0.21363978899773925, "grad_norm": 0.2825404405593872, "learning_rate": 0.00019752135621459464, "loss": 11.673, "step": 10206 }, { "epoch": 0.21366072176170142, "grad_norm": 0.22224535048007965, "learning_rate": 0.00019752087105884224, "loss": 11.647, "step": 10207 }, { "epoch": 0.21368165452566357, "grad_norm": 0.34416523575782776, "learning_rate": 0.00019752038585620955, "loss": 11.6992, "step": 10208 }, { "epoch": 0.2137025872896257, "grad_norm": 0.2813953757286072, "learning_rate": 0.00019751990060669686, "loss": 11.6612, "step": 10209 }, { "epoch": 0.21372352005358788, "grad_norm": 0.257954865694046, "learning_rate": 0.00019751941531030443, "loss": 11.6706, "step": 10210 }, { "epoch": 0.21374445281755003, "grad_norm": 0.21063123643398285, "learning_rate": 0.00019751892996703246, "loss": 11.6802, "step": 10211 }, { "epoch": 0.21376538558151217, "grad_norm": 0.24446240067481995, "learning_rate": 0.00019751844457688118, "loss": 11.6648, "step": 10212 }, { "epoch": 0.21378631834547435, "grad_norm": 0.1953161358833313, "learning_rate": 0.00019751795913985082, "loss": 11.6642, "step": 10213 }, { "epoch": 0.2138072511094365, "grad_norm": 0.21718354523181915, "learning_rate": 0.00019751747365594167, "loss": 11.6842, "step": 10214 }, { "epoch": 0.21382818387339864, "grad_norm": 0.2911403775215149, "learning_rate": 0.00019751698812515387, "loss": 11.6688, "step": 10215 }, { "epoch": 0.2138491166373608, "grad_norm": 0.2267615646123886, "learning_rate": 0.0001975165025474877, "loss": 11.6767, "step": 10216 }, { "epoch": 0.21387004940132295, "grad_norm": 0.2507610619068146, "learning_rate": 0.0001975160169229434, "loss": 11.6766, "step": 10217 }, { "epoch": 0.2138909821652851, "grad_norm": 0.22234243154525757, "learning_rate": 0.0001975155312515212, "loss": 11.6752, "step": 10218 }, { "epoch": 0.21391191492924727, "grad_norm": 0.22173993289470673, "learning_rate": 0.0001975150455332213, "loss": 11.6704, "step": 10219 }, { "epoch": 0.21393284769320942, "grad_norm": 0.30766376852989197, "learning_rate": 0.00019751455976804402, "loss": 11.6745, "step": 10220 }, { "epoch": 0.21395378045717156, "grad_norm": 0.2726268470287323, "learning_rate": 0.00019751407395598948, "loss": 11.6723, "step": 10221 }, { "epoch": 0.2139747132211337, "grad_norm": 0.2558664381504059, "learning_rate": 0.000197513588097058, "loss": 11.6827, "step": 10222 }, { "epoch": 0.21399564598509588, "grad_norm": 0.21211163699626923, "learning_rate": 0.00019751310219124978, "loss": 11.6754, "step": 10223 }, { "epoch": 0.21401657874905802, "grad_norm": 0.2906314432621002, "learning_rate": 0.00019751261623856506, "loss": 11.6715, "step": 10224 }, { "epoch": 0.21403751151302017, "grad_norm": 0.235830619931221, "learning_rate": 0.00019751213023900407, "loss": 11.678, "step": 10225 }, { "epoch": 0.21405844427698234, "grad_norm": 0.2678644359111786, "learning_rate": 0.00019751164419256706, "loss": 11.6847, "step": 10226 }, { "epoch": 0.21407937704094449, "grad_norm": 0.2067124843597412, "learning_rate": 0.00019751115809925422, "loss": 11.7012, "step": 10227 }, { "epoch": 0.21410030980490663, "grad_norm": 0.2614019215106964, "learning_rate": 0.00019751067195906583, "loss": 11.69, "step": 10228 }, { "epoch": 0.2141212425688688, "grad_norm": 0.22068682312965393, "learning_rate": 0.00019751018577200208, "loss": 11.6686, "step": 10229 }, { "epoch": 0.21414217533283095, "grad_norm": 0.39507097005844116, "learning_rate": 0.00019750969953806328, "loss": 11.6812, "step": 10230 }, { "epoch": 0.2141631080967931, "grad_norm": 0.30573582649230957, "learning_rate": 0.00019750921325724957, "loss": 11.6733, "step": 10231 }, { "epoch": 0.21418404086075526, "grad_norm": 0.24344302713871002, "learning_rate": 0.00019750872692956124, "loss": 11.6704, "step": 10232 }, { "epoch": 0.2142049736247174, "grad_norm": 0.24356116354465485, "learning_rate": 0.00019750824055499852, "loss": 11.6872, "step": 10233 }, { "epoch": 0.21422590638867955, "grad_norm": 0.24197378754615784, "learning_rate": 0.00019750775413356162, "loss": 11.6725, "step": 10234 }, { "epoch": 0.21424683915264173, "grad_norm": 0.2595764994621277, "learning_rate": 0.0001975072676652508, "loss": 11.6956, "step": 10235 }, { "epoch": 0.21426777191660387, "grad_norm": 0.2650708556175232, "learning_rate": 0.00019750678115006628, "loss": 11.6607, "step": 10236 }, { "epoch": 0.21428870468056602, "grad_norm": 0.24576161801815033, "learning_rate": 0.0001975062945880083, "loss": 11.6695, "step": 10237 }, { "epoch": 0.2143096374445282, "grad_norm": 0.23577222228050232, "learning_rate": 0.00019750580797907706, "loss": 11.6768, "step": 10238 }, { "epoch": 0.21433057020849033, "grad_norm": 0.2211160808801651, "learning_rate": 0.00019750532132327287, "loss": 11.6795, "step": 10239 }, { "epoch": 0.21435150297245248, "grad_norm": 0.32623404264450073, "learning_rate": 0.00019750483462059587, "loss": 11.672, "step": 10240 }, { "epoch": 0.21437243573641462, "grad_norm": 0.20957784354686737, "learning_rate": 0.00019750434787104638, "loss": 11.666, "step": 10241 }, { "epoch": 0.2143933685003768, "grad_norm": 0.23131728172302246, "learning_rate": 0.0001975038610746246, "loss": 11.676, "step": 10242 }, { "epoch": 0.21441430126433894, "grad_norm": 0.20557771623134613, "learning_rate": 0.00019750337423133073, "loss": 11.6673, "step": 10243 }, { "epoch": 0.21443523402830109, "grad_norm": 0.4645277261734009, "learning_rate": 0.00019750288734116507, "loss": 11.6807, "step": 10244 }, { "epoch": 0.21445616679226326, "grad_norm": 0.2579987049102783, "learning_rate": 0.00019750240040412778, "loss": 11.6862, "step": 10245 }, { "epoch": 0.2144770995562254, "grad_norm": 0.2509106695652008, "learning_rate": 0.00019750191342021912, "loss": 11.6768, "step": 10246 }, { "epoch": 0.21449803232018755, "grad_norm": 0.2421819567680359, "learning_rate": 0.00019750142638943937, "loss": 11.6749, "step": 10247 }, { "epoch": 0.21451896508414972, "grad_norm": 0.4043102264404297, "learning_rate": 0.00019750093931178875, "loss": 11.6786, "step": 10248 }, { "epoch": 0.21453989784811187, "grad_norm": 0.3350563049316406, "learning_rate": 0.00019750045218726745, "loss": 11.6741, "step": 10249 }, { "epoch": 0.214560830612074, "grad_norm": 0.2797852158546448, "learning_rate": 0.0001974999650158757, "loss": 11.6763, "step": 10250 }, { "epoch": 0.21458176337603618, "grad_norm": 0.268197625875473, "learning_rate": 0.00019749947779761383, "loss": 11.6648, "step": 10251 }, { "epoch": 0.21460269613999833, "grad_norm": 0.2493339329957962, "learning_rate": 0.00019749899053248196, "loss": 11.6767, "step": 10252 }, { "epoch": 0.21462362890396047, "grad_norm": 0.3023637533187866, "learning_rate": 0.00019749850322048037, "loss": 11.6959, "step": 10253 }, { "epoch": 0.21464456166792265, "grad_norm": 0.2608030438423157, "learning_rate": 0.00019749801586160933, "loss": 11.6779, "step": 10254 }, { "epoch": 0.2146654944318848, "grad_norm": 0.24987034499645233, "learning_rate": 0.00019749752845586897, "loss": 11.6797, "step": 10255 }, { "epoch": 0.21468642719584694, "grad_norm": 0.23468588292598724, "learning_rate": 0.00019749704100325966, "loss": 11.6868, "step": 10256 }, { "epoch": 0.21470735995980908, "grad_norm": 0.231887087225914, "learning_rate": 0.00019749655350378157, "loss": 11.6757, "step": 10257 }, { "epoch": 0.21472829272377125, "grad_norm": 0.24170485138893127, "learning_rate": 0.00019749606595743487, "loss": 11.68, "step": 10258 }, { "epoch": 0.2147492254877334, "grad_norm": 0.21353796124458313, "learning_rate": 0.0001974955783642199, "loss": 11.677, "step": 10259 }, { "epoch": 0.21477015825169554, "grad_norm": 0.24391809105873108, "learning_rate": 0.00019749509072413687, "loss": 11.6759, "step": 10260 }, { "epoch": 0.21479109101565771, "grad_norm": 0.23831306397914886, "learning_rate": 0.00019749460303718596, "loss": 11.6782, "step": 10261 }, { "epoch": 0.21481202377961986, "grad_norm": 0.2793962061405182, "learning_rate": 0.00019749411530336746, "loss": 11.6706, "step": 10262 }, { "epoch": 0.214832956543582, "grad_norm": 0.22869504988193512, "learning_rate": 0.00019749362752268158, "loss": 11.6761, "step": 10263 }, { "epoch": 0.21485388930754418, "grad_norm": 0.282797247171402, "learning_rate": 0.00019749313969512854, "loss": 11.677, "step": 10264 }, { "epoch": 0.21487482207150632, "grad_norm": 0.2649148404598236, "learning_rate": 0.00019749265182070864, "loss": 11.6866, "step": 10265 }, { "epoch": 0.21489575483546847, "grad_norm": 0.29829052090644836, "learning_rate": 0.00019749216389942204, "loss": 11.6754, "step": 10266 }, { "epoch": 0.21491668759943064, "grad_norm": 0.23904669284820557, "learning_rate": 0.00019749167593126902, "loss": 11.6621, "step": 10267 }, { "epoch": 0.21493762036339278, "grad_norm": 0.22407448291778564, "learning_rate": 0.00019749118791624978, "loss": 11.6859, "step": 10268 }, { "epoch": 0.21495855312735493, "grad_norm": 0.38091662526130676, "learning_rate": 0.00019749069985436457, "loss": 11.6573, "step": 10269 }, { "epoch": 0.2149794858913171, "grad_norm": 0.27427569031715393, "learning_rate": 0.00019749021174561362, "loss": 11.6756, "step": 10270 }, { "epoch": 0.21500041865527925, "grad_norm": 0.25648418068885803, "learning_rate": 0.0001974897235899972, "loss": 11.6611, "step": 10271 }, { "epoch": 0.2150213514192414, "grad_norm": 0.19408735632896423, "learning_rate": 0.0001974892353875155, "loss": 11.6771, "step": 10272 }, { "epoch": 0.21504228418320356, "grad_norm": 0.2287694811820984, "learning_rate": 0.00019748874713816877, "loss": 11.6722, "step": 10273 }, { "epoch": 0.2150632169471657, "grad_norm": 0.2664645314216614, "learning_rate": 0.00019748825884195723, "loss": 11.6895, "step": 10274 }, { "epoch": 0.21508414971112785, "grad_norm": 0.2788810431957245, "learning_rate": 0.00019748777049888116, "loss": 11.6564, "step": 10275 }, { "epoch": 0.21510508247509, "grad_norm": 0.26944029331207275, "learning_rate": 0.00019748728210894077, "loss": 11.6695, "step": 10276 }, { "epoch": 0.21512601523905217, "grad_norm": 0.22485989332199097, "learning_rate": 0.00019748679367213626, "loss": 11.6867, "step": 10277 }, { "epoch": 0.21514694800301432, "grad_norm": 0.27954307198524475, "learning_rate": 0.0001974863051884679, "loss": 11.6669, "step": 10278 }, { "epoch": 0.21516788076697646, "grad_norm": 0.2655680775642395, "learning_rate": 0.0001974858166579359, "loss": 11.6682, "step": 10279 }, { "epoch": 0.21518881353093863, "grad_norm": 0.20446406304836273, "learning_rate": 0.00019748532808054056, "loss": 11.6735, "step": 10280 }, { "epoch": 0.21520974629490078, "grad_norm": 0.25086915493011475, "learning_rate": 0.00019748483945628204, "loss": 11.6749, "step": 10281 }, { "epoch": 0.21523067905886292, "grad_norm": 0.29135802388191223, "learning_rate": 0.0001974843507851606, "loss": 11.6728, "step": 10282 }, { "epoch": 0.2152516118228251, "grad_norm": 0.2822078764438629, "learning_rate": 0.00019748386206717652, "loss": 11.6829, "step": 10283 }, { "epoch": 0.21527254458678724, "grad_norm": 0.28771188855171204, "learning_rate": 0.00019748337330232997, "loss": 11.6821, "step": 10284 }, { "epoch": 0.21529347735074938, "grad_norm": 0.22913074493408203, "learning_rate": 0.0001974828844906212, "loss": 11.6901, "step": 10285 }, { "epoch": 0.21531441011471156, "grad_norm": 0.22862519323825836, "learning_rate": 0.00019748239563205044, "loss": 11.6707, "step": 10286 }, { "epoch": 0.2153353428786737, "grad_norm": 0.2441640943288803, "learning_rate": 0.00019748190672661797, "loss": 11.6746, "step": 10287 }, { "epoch": 0.21535627564263585, "grad_norm": 0.23484137654304504, "learning_rate": 0.00019748141777432398, "loss": 11.688, "step": 10288 }, { "epoch": 0.21537720840659802, "grad_norm": 0.2239401787519455, "learning_rate": 0.00019748092877516874, "loss": 11.7037, "step": 10289 }, { "epoch": 0.21539814117056016, "grad_norm": 0.2369835376739502, "learning_rate": 0.0001974804397291524, "loss": 11.6797, "step": 10290 }, { "epoch": 0.2154190739345223, "grad_norm": 0.34400880336761475, "learning_rate": 0.00019747995063627534, "loss": 11.6996, "step": 10291 }, { "epoch": 0.21544000669848445, "grad_norm": 0.26314881443977356, "learning_rate": 0.00019747946149653768, "loss": 11.6842, "step": 10292 }, { "epoch": 0.21546093946244663, "grad_norm": 0.21910513937473297, "learning_rate": 0.00019747897230993968, "loss": 11.6569, "step": 10293 }, { "epoch": 0.21548187222640877, "grad_norm": 0.32291731238365173, "learning_rate": 0.00019747848307648158, "loss": 11.6751, "step": 10294 }, { "epoch": 0.21550280499037092, "grad_norm": 0.24449823796749115, "learning_rate": 0.00019747799379616363, "loss": 11.6793, "step": 10295 }, { "epoch": 0.2155237377543331, "grad_norm": 0.25046873092651367, "learning_rate": 0.00019747750446898605, "loss": 11.6669, "step": 10296 }, { "epoch": 0.21554467051829523, "grad_norm": 0.26915842294692993, "learning_rate": 0.0001974770150949491, "loss": 11.685, "step": 10297 }, { "epoch": 0.21556560328225738, "grad_norm": 0.27066653966903687, "learning_rate": 0.00019747652567405298, "loss": 11.7045, "step": 10298 }, { "epoch": 0.21558653604621955, "grad_norm": 0.21856100857257843, "learning_rate": 0.00019747603620629795, "loss": 11.6823, "step": 10299 }, { "epoch": 0.2156074688101817, "grad_norm": 0.2195090800523758, "learning_rate": 0.00019747554669168423, "loss": 11.6793, "step": 10300 }, { "epoch": 0.21562840157414384, "grad_norm": 0.2729223370552063, "learning_rate": 0.00019747505713021206, "loss": 11.6804, "step": 10301 }, { "epoch": 0.215649334338106, "grad_norm": 0.2475399225950241, "learning_rate": 0.00019747456752188168, "loss": 11.6862, "step": 10302 }, { "epoch": 0.21567026710206816, "grad_norm": 0.26348555088043213, "learning_rate": 0.0001974740778666933, "loss": 11.6709, "step": 10303 }, { "epoch": 0.2156911998660303, "grad_norm": 0.2504883110523224, "learning_rate": 0.0001974735881646472, "loss": 11.6793, "step": 10304 }, { "epoch": 0.21571213262999248, "grad_norm": 0.20305235683918, "learning_rate": 0.00019747309841574358, "loss": 11.6793, "step": 10305 }, { "epoch": 0.21573306539395462, "grad_norm": 0.28116607666015625, "learning_rate": 0.0001974726086199827, "loss": 11.6769, "step": 10306 }, { "epoch": 0.21575399815791677, "grad_norm": 0.31049075722694397, "learning_rate": 0.0001974721187773648, "loss": 11.6758, "step": 10307 }, { "epoch": 0.21577493092187894, "grad_norm": 0.24439522624015808, "learning_rate": 0.00019747162888789007, "loss": 11.6802, "step": 10308 }, { "epoch": 0.21579586368584108, "grad_norm": 0.25291240215301514, "learning_rate": 0.0001974711389515588, "loss": 11.6701, "step": 10309 }, { "epoch": 0.21581679644980323, "grad_norm": 0.32503533363342285, "learning_rate": 0.00019747064896837117, "loss": 11.6922, "step": 10310 }, { "epoch": 0.21583772921376537, "grad_norm": 0.26493746042251587, "learning_rate": 0.00019747015893832744, "loss": 11.6591, "step": 10311 }, { "epoch": 0.21585866197772755, "grad_norm": 0.31251564621925354, "learning_rate": 0.0001974696688614279, "loss": 11.6852, "step": 10312 }, { "epoch": 0.2158795947416897, "grad_norm": 0.2763710021972656, "learning_rate": 0.0001974691787376727, "loss": 11.6645, "step": 10313 }, { "epoch": 0.21590052750565183, "grad_norm": 0.266414612531662, "learning_rate": 0.0001974686885670621, "loss": 11.6761, "step": 10314 }, { "epoch": 0.215921460269614, "grad_norm": 0.33404335379600525, "learning_rate": 0.0001974681983495964, "loss": 11.6728, "step": 10315 }, { "epoch": 0.21594239303357615, "grad_norm": 0.21472322940826416, "learning_rate": 0.00019746770808527571, "loss": 11.6679, "step": 10316 }, { "epoch": 0.2159633257975383, "grad_norm": 0.27869370579719543, "learning_rate": 0.0001974672177741004, "loss": 11.6914, "step": 10317 }, { "epoch": 0.21598425856150047, "grad_norm": 0.2256821244955063, "learning_rate": 0.00019746672741607064, "loss": 11.6827, "step": 10318 }, { "epoch": 0.21600519132546261, "grad_norm": 0.22753365337848663, "learning_rate": 0.00019746623701118665, "loss": 11.6868, "step": 10319 }, { "epoch": 0.21602612408942476, "grad_norm": 0.24399615824222565, "learning_rate": 0.0001974657465594487, "loss": 11.6719, "step": 10320 }, { "epoch": 0.21604705685338693, "grad_norm": 0.253302663564682, "learning_rate": 0.000197465256060857, "loss": 11.6746, "step": 10321 }, { "epoch": 0.21606798961734908, "grad_norm": 0.24395647644996643, "learning_rate": 0.0001974647655154118, "loss": 11.6729, "step": 10322 }, { "epoch": 0.21608892238131122, "grad_norm": 0.22802984714508057, "learning_rate": 0.00019746427492311333, "loss": 11.68, "step": 10323 }, { "epoch": 0.2161098551452734, "grad_norm": 0.24481767416000366, "learning_rate": 0.00019746378428396187, "loss": 11.6961, "step": 10324 }, { "epoch": 0.21613078790923554, "grad_norm": 0.2113310992717743, "learning_rate": 0.00019746329359795757, "loss": 11.6667, "step": 10325 }, { "epoch": 0.21615172067319768, "grad_norm": 0.2726260721683502, "learning_rate": 0.0001974628028651007, "loss": 11.6675, "step": 10326 }, { "epoch": 0.21617265343715986, "grad_norm": 0.2712045907974243, "learning_rate": 0.00019746231208539157, "loss": 11.668, "step": 10327 }, { "epoch": 0.216193586201122, "grad_norm": 0.27601078152656555, "learning_rate": 0.0001974618212588303, "loss": 11.6662, "step": 10328 }, { "epoch": 0.21621451896508415, "grad_norm": 0.26230403780937195, "learning_rate": 0.0001974613303854172, "loss": 11.6964, "step": 10329 }, { "epoch": 0.2162354517290463, "grad_norm": 0.2816905975341797, "learning_rate": 0.00019746083946515247, "loss": 11.6612, "step": 10330 }, { "epoch": 0.21625638449300846, "grad_norm": 0.2275983840227127, "learning_rate": 0.00019746034849803636, "loss": 11.6781, "step": 10331 }, { "epoch": 0.2162773172569706, "grad_norm": 2.0757312774658203, "learning_rate": 0.00019745985748406914, "loss": 11.6317, "step": 10332 }, { "epoch": 0.21629825002093275, "grad_norm": 0.26373523473739624, "learning_rate": 0.000197459366423251, "loss": 11.676, "step": 10333 }, { "epoch": 0.21631918278489493, "grad_norm": 0.2771134674549103, "learning_rate": 0.0001974588753155822, "loss": 11.6681, "step": 10334 }, { "epoch": 0.21634011554885707, "grad_norm": 0.23986244201660156, "learning_rate": 0.00019745838416106292, "loss": 11.6684, "step": 10335 }, { "epoch": 0.21636104831281922, "grad_norm": 0.26980456709861755, "learning_rate": 0.00019745789295969347, "loss": 11.6728, "step": 10336 }, { "epoch": 0.2163819810767814, "grad_norm": 0.2653292119503021, "learning_rate": 0.00019745740171147407, "loss": 11.6665, "step": 10337 }, { "epoch": 0.21640291384074353, "grad_norm": 0.2054324895143509, "learning_rate": 0.00019745691041640493, "loss": 11.6723, "step": 10338 }, { "epoch": 0.21642384660470568, "grad_norm": 0.24228085577487946, "learning_rate": 0.0001974564190744863, "loss": 11.6868, "step": 10339 }, { "epoch": 0.21644477936866785, "grad_norm": 0.2483314722776413, "learning_rate": 0.00019745592768571842, "loss": 11.681, "step": 10340 }, { "epoch": 0.21646571213263, "grad_norm": 0.29634401202201843, "learning_rate": 0.0001974554362501015, "loss": 11.6602, "step": 10341 }, { "epoch": 0.21648664489659214, "grad_norm": 0.2683432400226593, "learning_rate": 0.00019745494476763585, "loss": 11.6767, "step": 10342 }, { "epoch": 0.2165075776605543, "grad_norm": 0.25339052081108093, "learning_rate": 0.0001974544532383216, "loss": 11.675, "step": 10343 }, { "epoch": 0.21652851042451646, "grad_norm": 0.2832910716533661, "learning_rate": 0.00019745396166215908, "loss": 11.6933, "step": 10344 }, { "epoch": 0.2165494431884786, "grad_norm": 0.28627514839172363, "learning_rate": 0.00019745347003914847, "loss": 11.6791, "step": 10345 }, { "epoch": 0.21657037595244075, "grad_norm": 0.2533588707447052, "learning_rate": 0.00019745297836929005, "loss": 11.6828, "step": 10346 }, { "epoch": 0.21659130871640292, "grad_norm": 0.2388332635164261, "learning_rate": 0.000197452486652584, "loss": 11.6745, "step": 10347 }, { "epoch": 0.21661224148036506, "grad_norm": 0.20717842876911163, "learning_rate": 0.00019745199488903058, "loss": 11.6755, "step": 10348 }, { "epoch": 0.2166331742443272, "grad_norm": 0.2658360004425049, "learning_rate": 0.00019745150307863004, "loss": 11.6765, "step": 10349 }, { "epoch": 0.21665410700828938, "grad_norm": 0.2870126962661743, "learning_rate": 0.00019745101122138262, "loss": 11.6819, "step": 10350 }, { "epoch": 0.21667503977225153, "grad_norm": 0.25044623017311096, "learning_rate": 0.00019745051931728856, "loss": 11.6785, "step": 10351 }, { "epoch": 0.21669597253621367, "grad_norm": 0.26025155186653137, "learning_rate": 0.00019745002736634807, "loss": 11.6872, "step": 10352 }, { "epoch": 0.21671690530017584, "grad_norm": 0.20050473511219025, "learning_rate": 0.0001974495353685614, "loss": 11.6739, "step": 10353 }, { "epoch": 0.216737838064138, "grad_norm": 0.3867972195148468, "learning_rate": 0.00019744904332392874, "loss": 11.6838, "step": 10354 }, { "epoch": 0.21675877082810013, "grad_norm": 0.2368917316198349, "learning_rate": 0.00019744855123245042, "loss": 11.6838, "step": 10355 }, { "epoch": 0.2167797035920623, "grad_norm": 0.22146786749362946, "learning_rate": 0.00019744805909412662, "loss": 11.6794, "step": 10356 }, { "epoch": 0.21680063635602445, "grad_norm": 0.32740092277526855, "learning_rate": 0.00019744756690895758, "loss": 11.6862, "step": 10357 }, { "epoch": 0.2168215691199866, "grad_norm": 0.2275303602218628, "learning_rate": 0.00019744707467694354, "loss": 11.6729, "step": 10358 }, { "epoch": 0.21684250188394877, "grad_norm": 0.19828620553016663, "learning_rate": 0.00019744658239808475, "loss": 11.6755, "step": 10359 }, { "epoch": 0.2168634346479109, "grad_norm": 0.24472880363464355, "learning_rate": 0.00019744609007238143, "loss": 11.6783, "step": 10360 }, { "epoch": 0.21688436741187306, "grad_norm": 0.33270263671875, "learning_rate": 0.00019744559769983383, "loss": 11.6663, "step": 10361 }, { "epoch": 0.21690530017583523, "grad_norm": 0.2587944269180298, "learning_rate": 0.00019744510528044216, "loss": 11.6739, "step": 10362 }, { "epoch": 0.21692623293979738, "grad_norm": 0.28992852568626404, "learning_rate": 0.0001974446128142067, "loss": 11.6959, "step": 10363 }, { "epoch": 0.21694716570375952, "grad_norm": 0.21940505504608154, "learning_rate": 0.00019744412030112764, "loss": 11.6661, "step": 10364 }, { "epoch": 0.21696809846772167, "grad_norm": 0.2553580105304718, "learning_rate": 0.00019744362774120526, "loss": 11.6857, "step": 10365 }, { "epoch": 0.21698903123168384, "grad_norm": 0.4055202305316925, "learning_rate": 0.00019744313513443977, "loss": 11.664, "step": 10366 }, { "epoch": 0.21700996399564598, "grad_norm": 0.28418922424316406, "learning_rate": 0.0001974426424808314, "loss": 11.6817, "step": 10367 }, { "epoch": 0.21703089675960813, "grad_norm": 0.21339188516139984, "learning_rate": 0.0001974421497803804, "loss": 11.6718, "step": 10368 }, { "epoch": 0.2170518295235703, "grad_norm": 0.28345969319343567, "learning_rate": 0.000197441657033087, "loss": 11.6737, "step": 10369 }, { "epoch": 0.21707276228753244, "grad_norm": 0.272656112909317, "learning_rate": 0.00019744116423895145, "loss": 11.6717, "step": 10370 }, { "epoch": 0.2170936950514946, "grad_norm": 0.24920161068439484, "learning_rate": 0.000197440671397974, "loss": 11.6487, "step": 10371 }, { "epoch": 0.21711462781545676, "grad_norm": 0.2112736999988556, "learning_rate": 0.00019744017851015483, "loss": 11.6843, "step": 10372 }, { "epoch": 0.2171355605794189, "grad_norm": 0.24299217760562897, "learning_rate": 0.0001974396855754942, "loss": 11.6777, "step": 10373 }, { "epoch": 0.21715649334338105, "grad_norm": 0.2829500436782837, "learning_rate": 0.00019743919259399242, "loss": 11.6642, "step": 10374 }, { "epoch": 0.21717742610734322, "grad_norm": 2.8017587661743164, "learning_rate": 0.00019743869956564963, "loss": 11.679, "step": 10375 }, { "epoch": 0.21719835887130537, "grad_norm": 0.2793741524219513, "learning_rate": 0.0001974382064904661, "loss": 11.6813, "step": 10376 }, { "epoch": 0.21721929163526751, "grad_norm": 0.2663913071155548, "learning_rate": 0.0001974377133684421, "loss": 11.6806, "step": 10377 }, { "epoch": 0.2172402243992297, "grad_norm": 0.21882623434066772, "learning_rate": 0.00019743722019957782, "loss": 11.6569, "step": 10378 }, { "epoch": 0.21726115716319183, "grad_norm": 0.28177279233932495, "learning_rate": 0.00019743672698387354, "loss": 11.6807, "step": 10379 }, { "epoch": 0.21728208992715398, "grad_norm": 0.2068682461977005, "learning_rate": 0.00019743623372132945, "loss": 11.6891, "step": 10380 }, { "epoch": 0.21730302269111612, "grad_norm": 0.27539584040641785, "learning_rate": 0.0001974357404119458, "loss": 11.6756, "step": 10381 }, { "epoch": 0.2173239554550783, "grad_norm": 0.21289695799350739, "learning_rate": 0.00019743524705572284, "loss": 11.679, "step": 10382 }, { "epoch": 0.21734488821904044, "grad_norm": 0.27779561281204224, "learning_rate": 0.0001974347536526608, "loss": 11.6993, "step": 10383 }, { "epoch": 0.21736582098300258, "grad_norm": 0.22699864208698273, "learning_rate": 0.00019743426020275994, "loss": 11.689, "step": 10384 }, { "epoch": 0.21738675374696476, "grad_norm": 0.2106088399887085, "learning_rate": 0.00019743376670602048, "loss": 11.667, "step": 10385 }, { "epoch": 0.2174076865109269, "grad_norm": 0.23311468958854675, "learning_rate": 0.00019743327316244265, "loss": 11.6747, "step": 10386 }, { "epoch": 0.21742861927488905, "grad_norm": 0.22803300619125366, "learning_rate": 0.00019743277957202669, "loss": 11.6754, "step": 10387 }, { "epoch": 0.21744955203885122, "grad_norm": 0.26697447896003723, "learning_rate": 0.00019743228593477286, "loss": 11.6842, "step": 10388 }, { "epoch": 0.21747048480281336, "grad_norm": 0.24419215321540833, "learning_rate": 0.00019743179225068135, "loss": 11.693, "step": 10389 }, { "epoch": 0.2174914175667755, "grad_norm": 0.23749259114265442, "learning_rate": 0.00019743129851975243, "loss": 11.6756, "step": 10390 }, { "epoch": 0.21751235033073768, "grad_norm": 0.27646106481552124, "learning_rate": 0.00019743080474198633, "loss": 11.6863, "step": 10391 }, { "epoch": 0.21753328309469983, "grad_norm": 0.22105881571769714, "learning_rate": 0.00019743031091738332, "loss": 11.6888, "step": 10392 }, { "epoch": 0.21755421585866197, "grad_norm": 0.30337536334991455, "learning_rate": 0.00019742981704594357, "loss": 11.6995, "step": 10393 }, { "epoch": 0.21757514862262414, "grad_norm": 0.22877685725688934, "learning_rate": 0.00019742932312766736, "loss": 11.6691, "step": 10394 }, { "epoch": 0.2175960813865863, "grad_norm": 0.4227904975414276, "learning_rate": 0.00019742882916255492, "loss": 11.6788, "step": 10395 }, { "epoch": 0.21761701415054843, "grad_norm": 0.3210810422897339, "learning_rate": 0.0001974283351506065, "loss": 11.6826, "step": 10396 }, { "epoch": 0.2176379469145106, "grad_norm": 0.3002779483795166, "learning_rate": 0.0001974278410918223, "loss": 11.6777, "step": 10397 }, { "epoch": 0.21765887967847275, "grad_norm": 0.21157725155353546, "learning_rate": 0.00019742734698620265, "loss": 11.6742, "step": 10398 }, { "epoch": 0.2176798124424349, "grad_norm": 0.2761234641075134, "learning_rate": 0.00019742685283374766, "loss": 11.6714, "step": 10399 }, { "epoch": 0.21770074520639704, "grad_norm": 0.22130391001701355, "learning_rate": 0.00019742635863445767, "loss": 11.6687, "step": 10400 }, { "epoch": 0.2177216779703592, "grad_norm": 0.2527410387992859, "learning_rate": 0.00019742586438833284, "loss": 11.688, "step": 10401 }, { "epoch": 0.21774261073432136, "grad_norm": 0.27960264682769775, "learning_rate": 0.00019742537009537347, "loss": 11.6865, "step": 10402 }, { "epoch": 0.2177635434982835, "grad_norm": 0.2346370667219162, "learning_rate": 0.00019742487575557976, "loss": 11.6904, "step": 10403 }, { "epoch": 0.21778447626224567, "grad_norm": 0.3255414366722107, "learning_rate": 0.00019742438136895196, "loss": 11.6979, "step": 10404 }, { "epoch": 0.21780540902620782, "grad_norm": 0.2200959026813507, "learning_rate": 0.00019742388693549029, "loss": 11.6551, "step": 10405 }, { "epoch": 0.21782634179016996, "grad_norm": 0.23050463199615479, "learning_rate": 0.00019742339245519506, "loss": 11.6768, "step": 10406 }, { "epoch": 0.21784727455413214, "grad_norm": 0.35660290718078613, "learning_rate": 0.0001974228979280664, "loss": 11.701, "step": 10407 }, { "epoch": 0.21786820731809428, "grad_norm": 0.27770641446113586, "learning_rate": 0.00019742240335410461, "loss": 11.6765, "step": 10408 }, { "epoch": 0.21788914008205643, "grad_norm": 0.24343334138393402, "learning_rate": 0.00019742190873330993, "loss": 11.6666, "step": 10409 }, { "epoch": 0.2179100728460186, "grad_norm": 0.33188101649284363, "learning_rate": 0.0001974214140656826, "loss": 11.6745, "step": 10410 }, { "epoch": 0.21793100560998074, "grad_norm": 0.25051113963127136, "learning_rate": 0.00019742091935122283, "loss": 11.67, "step": 10411 }, { "epoch": 0.2179519383739429, "grad_norm": 0.2495565563440323, "learning_rate": 0.00019742042458993086, "loss": 11.6655, "step": 10412 }, { "epoch": 0.21797287113790506, "grad_norm": 0.249752938747406, "learning_rate": 0.00019741992978180694, "loss": 11.6819, "step": 10413 }, { "epoch": 0.2179938039018672, "grad_norm": 0.22459925711154938, "learning_rate": 0.00019741943492685134, "loss": 11.6699, "step": 10414 }, { "epoch": 0.21801473666582935, "grad_norm": 0.2790786325931549, "learning_rate": 0.00019741894002506426, "loss": 11.6877, "step": 10415 }, { "epoch": 0.21803566942979152, "grad_norm": 0.2924932837486267, "learning_rate": 0.0001974184450764459, "loss": 11.6894, "step": 10416 }, { "epoch": 0.21805660219375367, "grad_norm": 0.24102479219436646, "learning_rate": 0.00019741795008099654, "loss": 11.6853, "step": 10417 }, { "epoch": 0.2180775349577158, "grad_norm": 0.27003178000450134, "learning_rate": 0.00019741745503871648, "loss": 11.6799, "step": 10418 }, { "epoch": 0.21809846772167796, "grad_norm": 0.21214252710342407, "learning_rate": 0.00019741695994960584, "loss": 11.6846, "step": 10419 }, { "epoch": 0.21811940048564013, "grad_norm": 0.29765012860298157, "learning_rate": 0.00019741646481366494, "loss": 11.6667, "step": 10420 }, { "epoch": 0.21814033324960228, "grad_norm": 0.2509808838367462, "learning_rate": 0.00019741596963089402, "loss": 11.6633, "step": 10421 }, { "epoch": 0.21816126601356442, "grad_norm": 0.25245997309684753, "learning_rate": 0.00019741547440129325, "loss": 11.6884, "step": 10422 }, { "epoch": 0.2181821987775266, "grad_norm": 0.23588727414608002, "learning_rate": 0.0001974149791248629, "loss": 11.6816, "step": 10423 }, { "epoch": 0.21820313154148874, "grad_norm": 0.21022894978523254, "learning_rate": 0.00019741448380160325, "loss": 11.6704, "step": 10424 }, { "epoch": 0.21822406430545088, "grad_norm": 0.24282363057136536, "learning_rate": 0.0001974139884315145, "loss": 11.6798, "step": 10425 }, { "epoch": 0.21824499706941305, "grad_norm": 0.2554599344730377, "learning_rate": 0.00019741349301459688, "loss": 11.6685, "step": 10426 }, { "epoch": 0.2182659298333752, "grad_norm": 0.21550339460372925, "learning_rate": 0.00019741299755085065, "loss": 11.6657, "step": 10427 }, { "epoch": 0.21828686259733734, "grad_norm": 0.277071088552475, "learning_rate": 0.00019741250204027605, "loss": 11.6804, "step": 10428 }, { "epoch": 0.21830779536129952, "grad_norm": 0.23385363817214966, "learning_rate": 0.0001974120064828733, "loss": 11.689, "step": 10429 }, { "epoch": 0.21832872812526166, "grad_norm": 0.21682776510715485, "learning_rate": 0.00019741151087864265, "loss": 11.6727, "step": 10430 }, { "epoch": 0.2183496608892238, "grad_norm": 0.20989017188549042, "learning_rate": 0.00019741101522758431, "loss": 11.6709, "step": 10431 }, { "epoch": 0.21837059365318598, "grad_norm": 0.29772982001304626, "learning_rate": 0.00019741051952969855, "loss": 11.6873, "step": 10432 }, { "epoch": 0.21839152641714812, "grad_norm": 0.21864120662212372, "learning_rate": 0.00019741002378498564, "loss": 11.6931, "step": 10433 }, { "epoch": 0.21841245918111027, "grad_norm": 0.2821516990661621, "learning_rate": 0.0001974095279934457, "loss": 11.6776, "step": 10434 }, { "epoch": 0.2184333919450724, "grad_norm": 0.27040764689445496, "learning_rate": 0.00019740903215507914, "loss": 11.6838, "step": 10435 }, { "epoch": 0.2184543247090346, "grad_norm": 0.41873517632484436, "learning_rate": 0.00019740853626988605, "loss": 11.6685, "step": 10436 }, { "epoch": 0.21847525747299673, "grad_norm": 0.2708163857460022, "learning_rate": 0.00019740804033786675, "loss": 11.6873, "step": 10437 }, { "epoch": 0.21849619023695888, "grad_norm": 0.2579672038555145, "learning_rate": 0.00019740754435902146, "loss": 11.6892, "step": 10438 }, { "epoch": 0.21851712300092105, "grad_norm": 0.25044775009155273, "learning_rate": 0.00019740704833335037, "loss": 11.6796, "step": 10439 }, { "epoch": 0.2185380557648832, "grad_norm": 0.2773364186286926, "learning_rate": 0.0001974065522608538, "loss": 11.672, "step": 10440 }, { "epoch": 0.21855898852884534, "grad_norm": 0.21544933319091797, "learning_rate": 0.00019740605614153195, "loss": 11.6747, "step": 10441 }, { "epoch": 0.2185799212928075, "grad_norm": 0.25374338030815125, "learning_rate": 0.00019740555997538502, "loss": 11.6761, "step": 10442 }, { "epoch": 0.21860085405676966, "grad_norm": 0.2388283908367157, "learning_rate": 0.0001974050637624133, "loss": 11.6638, "step": 10443 }, { "epoch": 0.2186217868207318, "grad_norm": 0.2983497381210327, "learning_rate": 0.000197404567502617, "loss": 11.6731, "step": 10444 }, { "epoch": 0.21864271958469397, "grad_norm": 0.2879904508590698, "learning_rate": 0.0001974040711959964, "loss": 11.6678, "step": 10445 }, { "epoch": 0.21866365234865612, "grad_norm": 0.21255120635032654, "learning_rate": 0.0001974035748425517, "loss": 11.6863, "step": 10446 }, { "epoch": 0.21868458511261826, "grad_norm": 0.260032057762146, "learning_rate": 0.00019740307844228315, "loss": 11.6737, "step": 10447 }, { "epoch": 0.21870551787658044, "grad_norm": 0.3102332353591919, "learning_rate": 0.000197402581995191, "loss": 11.7062, "step": 10448 }, { "epoch": 0.21872645064054258, "grad_norm": 0.32887348532676697, "learning_rate": 0.00019740208550127547, "loss": 11.6882, "step": 10449 }, { "epoch": 0.21874738340450472, "grad_norm": 0.26113152503967285, "learning_rate": 0.00019740158896053677, "loss": 11.6636, "step": 10450 }, { "epoch": 0.2187683161684669, "grad_norm": 0.2288360297679901, "learning_rate": 0.00019740109237297522, "loss": 11.6647, "step": 10451 }, { "epoch": 0.21878924893242904, "grad_norm": 0.23149792850017548, "learning_rate": 0.00019740059573859103, "loss": 11.6869, "step": 10452 }, { "epoch": 0.2188101816963912, "grad_norm": 0.24354633688926697, "learning_rate": 0.00019740009905738437, "loss": 11.6775, "step": 10453 }, { "epoch": 0.21883111446035333, "grad_norm": 0.20859472453594208, "learning_rate": 0.00019739960232935554, "loss": 11.6651, "step": 10454 }, { "epoch": 0.2188520472243155, "grad_norm": 0.31290051341056824, "learning_rate": 0.0001973991055545048, "loss": 11.6678, "step": 10455 }, { "epoch": 0.21887297998827765, "grad_norm": 0.24917341768741608, "learning_rate": 0.00019739860873283234, "loss": 11.6767, "step": 10456 }, { "epoch": 0.2188939127522398, "grad_norm": 0.30397775769233704, "learning_rate": 0.0001973981118643384, "loss": 11.6813, "step": 10457 }, { "epoch": 0.21891484551620197, "grad_norm": 0.21997936069965363, "learning_rate": 0.00019739761494902327, "loss": 11.6761, "step": 10458 }, { "epoch": 0.2189357782801641, "grad_norm": 0.2528150975704193, "learning_rate": 0.00019739711798688711, "loss": 11.6753, "step": 10459 }, { "epoch": 0.21895671104412626, "grad_norm": 0.34361135959625244, "learning_rate": 0.00019739662097793023, "loss": 11.6881, "step": 10460 }, { "epoch": 0.21897764380808843, "grad_norm": 0.29088297486305237, "learning_rate": 0.00019739612392215286, "loss": 11.6805, "step": 10461 }, { "epoch": 0.21899857657205057, "grad_norm": 0.2741030156612396, "learning_rate": 0.0001973956268195552, "loss": 11.6683, "step": 10462 }, { "epoch": 0.21901950933601272, "grad_norm": 0.23206289112567902, "learning_rate": 0.0001973951296701375, "loss": 11.6722, "step": 10463 }, { "epoch": 0.2190404420999749, "grad_norm": 0.22709710896015167, "learning_rate": 0.00019739463247390004, "loss": 11.6807, "step": 10464 }, { "epoch": 0.21906137486393704, "grad_norm": 0.26890891790390015, "learning_rate": 0.000197394135230843, "loss": 11.6507, "step": 10465 }, { "epoch": 0.21908230762789918, "grad_norm": 0.25239092111587524, "learning_rate": 0.00019739363794096665, "loss": 11.6821, "step": 10466 }, { "epoch": 0.21910324039186135, "grad_norm": 0.22292611002922058, "learning_rate": 0.00019739314060427123, "loss": 11.6899, "step": 10467 }, { "epoch": 0.2191241731558235, "grad_norm": 0.3367576003074646, "learning_rate": 0.000197392643220757, "loss": 11.6939, "step": 10468 }, { "epoch": 0.21914510591978564, "grad_norm": 0.2784558832645416, "learning_rate": 0.00019739214579042415, "loss": 11.6761, "step": 10469 }, { "epoch": 0.21916603868374782, "grad_norm": 0.23979198932647705, "learning_rate": 0.00019739164831327296, "loss": 11.6779, "step": 10470 }, { "epoch": 0.21918697144770996, "grad_norm": 0.2455206662416458, "learning_rate": 0.00019739115078930365, "loss": 11.6635, "step": 10471 }, { "epoch": 0.2192079042116721, "grad_norm": 0.211730495095253, "learning_rate": 0.00019739065321851645, "loss": 11.6672, "step": 10472 }, { "epoch": 0.21922883697563425, "grad_norm": 0.3049231767654419, "learning_rate": 0.00019739015560091158, "loss": 11.6647, "step": 10473 }, { "epoch": 0.21924976973959642, "grad_norm": 0.2623559832572937, "learning_rate": 0.00019738965793648935, "loss": 11.6772, "step": 10474 }, { "epoch": 0.21927070250355857, "grad_norm": 0.23655982315540314, "learning_rate": 0.00019738916022524997, "loss": 11.6854, "step": 10475 }, { "epoch": 0.2192916352675207, "grad_norm": 0.2945692837238312, "learning_rate": 0.00019738866246719365, "loss": 11.673, "step": 10476 }, { "epoch": 0.21931256803148289, "grad_norm": 0.2326129674911499, "learning_rate": 0.00019738816466232067, "loss": 11.6938, "step": 10477 }, { "epoch": 0.21933350079544503, "grad_norm": 0.2667989730834961, "learning_rate": 0.00019738766681063122, "loss": 11.6726, "step": 10478 }, { "epoch": 0.21935443355940717, "grad_norm": 0.34553197026252747, "learning_rate": 0.00019738716891212556, "loss": 11.6764, "step": 10479 }, { "epoch": 0.21937536632336935, "grad_norm": 0.24101494252681732, "learning_rate": 0.00019738667096680398, "loss": 11.6893, "step": 10480 }, { "epoch": 0.2193962990873315, "grad_norm": 0.22953207790851593, "learning_rate": 0.00019738617297466662, "loss": 11.676, "step": 10481 }, { "epoch": 0.21941723185129364, "grad_norm": 0.2433115690946579, "learning_rate": 0.00019738567493571383, "loss": 11.6702, "step": 10482 }, { "epoch": 0.2194381646152558, "grad_norm": 0.2516716718673706, "learning_rate": 0.00019738517684994575, "loss": 11.6722, "step": 10483 }, { "epoch": 0.21945909737921795, "grad_norm": 0.2232380509376526, "learning_rate": 0.0001973846787173627, "loss": 11.6671, "step": 10484 }, { "epoch": 0.2194800301431801, "grad_norm": 0.26335662603378296, "learning_rate": 0.00019738418053796486, "loss": 11.671, "step": 10485 }, { "epoch": 0.21950096290714227, "grad_norm": 0.2144724726676941, "learning_rate": 0.00019738368231175248, "loss": 11.6662, "step": 10486 }, { "epoch": 0.21952189567110442, "grad_norm": 0.35445499420166016, "learning_rate": 0.00019738318403872583, "loss": 11.685, "step": 10487 }, { "epoch": 0.21954282843506656, "grad_norm": 0.2387901395559311, "learning_rate": 0.00019738268571888515, "loss": 11.6717, "step": 10488 }, { "epoch": 0.2195637611990287, "grad_norm": 0.2686651051044464, "learning_rate": 0.00019738218735223064, "loss": 11.6773, "step": 10489 }, { "epoch": 0.21958469396299088, "grad_norm": 0.22008642554283142, "learning_rate": 0.00019738168893876256, "loss": 11.6779, "step": 10490 }, { "epoch": 0.21960562672695302, "grad_norm": 0.21146458387374878, "learning_rate": 0.00019738119047848116, "loss": 11.6706, "step": 10491 }, { "epoch": 0.21962655949091517, "grad_norm": 0.27774444222450256, "learning_rate": 0.00019738069197138667, "loss": 11.6791, "step": 10492 }, { "epoch": 0.21964749225487734, "grad_norm": 0.2730133533477783, "learning_rate": 0.0001973801934174793, "loss": 11.6692, "step": 10493 }, { "epoch": 0.21966842501883949, "grad_norm": 0.21741969883441925, "learning_rate": 0.00019737969481675935, "loss": 11.6945, "step": 10494 }, { "epoch": 0.21968935778280163, "grad_norm": 0.27186039090156555, "learning_rate": 0.00019737919616922702, "loss": 11.6791, "step": 10495 }, { "epoch": 0.2197102905467638, "grad_norm": 0.22388261556625366, "learning_rate": 0.00019737869747488257, "loss": 11.6784, "step": 10496 }, { "epoch": 0.21973122331072595, "grad_norm": 0.2311507612466812, "learning_rate": 0.00019737819873372623, "loss": 11.6674, "step": 10497 }, { "epoch": 0.2197521560746881, "grad_norm": 0.25774940848350525, "learning_rate": 0.00019737769994575825, "loss": 11.6843, "step": 10498 }, { "epoch": 0.21977308883865027, "grad_norm": 0.27173373103141785, "learning_rate": 0.00019737720111097883, "loss": 11.6898, "step": 10499 }, { "epoch": 0.2197940216026124, "grad_norm": 0.33931565284729004, "learning_rate": 0.00019737670222938825, "loss": 11.6996, "step": 10500 }, { "epoch": 0.21981495436657456, "grad_norm": 0.24007141590118408, "learning_rate": 0.00019737620330098672, "loss": 11.6693, "step": 10501 }, { "epoch": 0.21983588713053673, "grad_norm": 0.25559842586517334, "learning_rate": 0.0001973757043257745, "loss": 11.6901, "step": 10502 }, { "epoch": 0.21985681989449887, "grad_norm": 0.2829703390598297, "learning_rate": 0.00019737520530375185, "loss": 11.6933, "step": 10503 }, { "epoch": 0.21987775265846102, "grad_norm": 0.26582640409469604, "learning_rate": 0.00019737470623491899, "loss": 11.6858, "step": 10504 }, { "epoch": 0.2198986854224232, "grad_norm": 0.24852733314037323, "learning_rate": 0.00019737420711927613, "loss": 11.6672, "step": 10505 }, { "epoch": 0.21991961818638533, "grad_norm": 0.27719253301620483, "learning_rate": 0.00019737370795682354, "loss": 11.6644, "step": 10506 }, { "epoch": 0.21994055095034748, "grad_norm": 0.2259276658296585, "learning_rate": 0.00019737320874756148, "loss": 11.6811, "step": 10507 }, { "epoch": 0.21996148371430962, "grad_norm": 0.2500360608100891, "learning_rate": 0.00019737270949149016, "loss": 11.659, "step": 10508 }, { "epoch": 0.2199824164782718, "grad_norm": 0.2701370120048523, "learning_rate": 0.00019737221018860984, "loss": 11.6646, "step": 10509 }, { "epoch": 0.22000334924223394, "grad_norm": 0.24120654165744781, "learning_rate": 0.0001973717108389207, "loss": 11.6763, "step": 10510 }, { "epoch": 0.2200242820061961, "grad_norm": 0.21597054600715637, "learning_rate": 0.00019737121144242306, "loss": 11.6688, "step": 10511 }, { "epoch": 0.22004521477015826, "grad_norm": 0.28511491417884827, "learning_rate": 0.00019737071199911712, "loss": 11.6833, "step": 10512 }, { "epoch": 0.2200661475341204, "grad_norm": 0.29421547055244446, "learning_rate": 0.00019737021250900314, "loss": 11.6784, "step": 10513 }, { "epoch": 0.22008708029808255, "grad_norm": 0.2670076787471771, "learning_rate": 0.00019736971297208133, "loss": 11.6796, "step": 10514 }, { "epoch": 0.22010801306204472, "grad_norm": 0.29862117767333984, "learning_rate": 0.00019736921338835196, "loss": 11.6661, "step": 10515 }, { "epoch": 0.22012894582600687, "grad_norm": 0.24203234910964966, "learning_rate": 0.00019736871375781526, "loss": 11.6887, "step": 10516 }, { "epoch": 0.220149878589969, "grad_norm": 0.20041288435459137, "learning_rate": 0.00019736821408047146, "loss": 11.6663, "step": 10517 }, { "epoch": 0.22017081135393118, "grad_norm": 0.2285861372947693, "learning_rate": 0.0001973677143563208, "loss": 11.685, "step": 10518 }, { "epoch": 0.22019174411789333, "grad_norm": 0.25732749700546265, "learning_rate": 0.00019736721458536355, "loss": 11.6769, "step": 10519 }, { "epoch": 0.22021267688185547, "grad_norm": 0.32385560870170593, "learning_rate": 0.0001973667147675999, "loss": 11.6704, "step": 10520 }, { "epoch": 0.22023360964581765, "grad_norm": 0.22424934804439545, "learning_rate": 0.00019736621490303017, "loss": 11.6806, "step": 10521 }, { "epoch": 0.2202545424097798, "grad_norm": 0.19742661714553833, "learning_rate": 0.00019736571499165447, "loss": 11.6754, "step": 10522 }, { "epoch": 0.22027547517374194, "grad_norm": 0.23646871745586395, "learning_rate": 0.00019736521503347317, "loss": 11.6814, "step": 10523 }, { "epoch": 0.22029640793770408, "grad_norm": 0.28932470083236694, "learning_rate": 0.00019736471502848648, "loss": 11.6745, "step": 10524 }, { "epoch": 0.22031734070166625, "grad_norm": 0.2549000084400177, "learning_rate": 0.0001973642149766946, "loss": 11.6751, "step": 10525 }, { "epoch": 0.2203382734656284, "grad_norm": 0.2908916473388672, "learning_rate": 0.00019736371487809779, "loss": 11.6722, "step": 10526 }, { "epoch": 0.22035920622959054, "grad_norm": 0.2472081482410431, "learning_rate": 0.00019736321473269627, "loss": 11.6663, "step": 10527 }, { "epoch": 0.22038013899355272, "grad_norm": 0.25696995854377747, "learning_rate": 0.00019736271454049033, "loss": 11.6793, "step": 10528 }, { "epoch": 0.22040107175751486, "grad_norm": 0.26185888051986694, "learning_rate": 0.00019736221430148015, "loss": 11.6717, "step": 10529 }, { "epoch": 0.220422004521477, "grad_norm": 0.250735878944397, "learning_rate": 0.00019736171401566605, "loss": 11.7019, "step": 10530 }, { "epoch": 0.22044293728543918, "grad_norm": 0.27866828441619873, "learning_rate": 0.00019736121368304818, "loss": 11.6799, "step": 10531 }, { "epoch": 0.22046387004940132, "grad_norm": 0.20369161665439606, "learning_rate": 0.00019736071330362685, "loss": 11.6797, "step": 10532 }, { "epoch": 0.22048480281336347, "grad_norm": 0.19163092970848083, "learning_rate": 0.00019736021287740228, "loss": 11.6825, "step": 10533 }, { "epoch": 0.22050573557732564, "grad_norm": 0.3102211356163025, "learning_rate": 0.00019735971240437467, "loss": 11.6702, "step": 10534 }, { "epoch": 0.22052666834128778, "grad_norm": 0.24241310358047485, "learning_rate": 0.0001973592118845443, "loss": 11.6973, "step": 10535 }, { "epoch": 0.22054760110524993, "grad_norm": 0.21515707671642303, "learning_rate": 0.00019735871131791144, "loss": 11.6712, "step": 10536 }, { "epoch": 0.2205685338692121, "grad_norm": 0.2259564995765686, "learning_rate": 0.00019735821070447626, "loss": 11.6848, "step": 10537 }, { "epoch": 0.22058946663317425, "grad_norm": 0.2548002600669861, "learning_rate": 0.00019735771004423906, "loss": 11.6824, "step": 10538 }, { "epoch": 0.2206103993971364, "grad_norm": 0.21783536672592163, "learning_rate": 0.00019735720933720006, "loss": 11.6692, "step": 10539 }, { "epoch": 0.22063133216109856, "grad_norm": 0.1956213116645813, "learning_rate": 0.00019735670858335948, "loss": 11.6768, "step": 10540 }, { "epoch": 0.2206522649250607, "grad_norm": 0.3844911754131317, "learning_rate": 0.00019735620778271759, "loss": 11.6897, "step": 10541 }, { "epoch": 0.22067319768902285, "grad_norm": 0.24177254736423492, "learning_rate": 0.00019735570693527464, "loss": 11.6808, "step": 10542 }, { "epoch": 0.220694130452985, "grad_norm": 0.23294898867607117, "learning_rate": 0.0001973552060410308, "loss": 11.6815, "step": 10543 }, { "epoch": 0.22071506321694717, "grad_norm": 0.3638826310634613, "learning_rate": 0.00019735470509998638, "loss": 11.667, "step": 10544 }, { "epoch": 0.22073599598090932, "grad_norm": 0.3515777587890625, "learning_rate": 0.00019735420411214164, "loss": 11.6696, "step": 10545 }, { "epoch": 0.22075692874487146, "grad_norm": 0.20895978808403015, "learning_rate": 0.00019735370307749674, "loss": 11.6677, "step": 10546 }, { "epoch": 0.22077786150883363, "grad_norm": 0.23529422283172607, "learning_rate": 0.00019735320199605198, "loss": 11.667, "step": 10547 }, { "epoch": 0.22079879427279578, "grad_norm": 0.27862465381622314, "learning_rate": 0.0001973527008678076, "loss": 11.6616, "step": 10548 }, { "epoch": 0.22081972703675792, "grad_norm": 0.21025443077087402, "learning_rate": 0.0001973521996927638, "loss": 11.6694, "step": 10549 }, { "epoch": 0.2208406598007201, "grad_norm": 0.32696497440338135, "learning_rate": 0.00019735169847092085, "loss": 11.6784, "step": 10550 }, { "epoch": 0.22086159256468224, "grad_norm": 0.19841700792312622, "learning_rate": 0.000197351197202279, "loss": 11.6767, "step": 10551 }, { "epoch": 0.22088252532864439, "grad_norm": 0.22978802025318146, "learning_rate": 0.00019735069588683847, "loss": 11.6691, "step": 10552 }, { "epoch": 0.22090345809260656, "grad_norm": 0.2993501126766205, "learning_rate": 0.0001973501945245995, "loss": 11.6869, "step": 10553 }, { "epoch": 0.2209243908565687, "grad_norm": 0.2153993397951126, "learning_rate": 0.00019734969311556238, "loss": 11.6796, "step": 10554 }, { "epoch": 0.22094532362053085, "grad_norm": 0.23603476583957672, "learning_rate": 0.00019734919165972724, "loss": 11.6835, "step": 10555 }, { "epoch": 0.22096625638449302, "grad_norm": 0.29181089997291565, "learning_rate": 0.00019734869015709445, "loss": 11.6869, "step": 10556 }, { "epoch": 0.22098718914845517, "grad_norm": 0.27149268984794617, "learning_rate": 0.0001973481886076642, "loss": 11.6794, "step": 10557 }, { "epoch": 0.2210081219124173, "grad_norm": 0.2709835171699524, "learning_rate": 0.0001973476870114367, "loss": 11.6792, "step": 10558 }, { "epoch": 0.22102905467637948, "grad_norm": 0.21180376410484314, "learning_rate": 0.00019734718536841222, "loss": 11.6578, "step": 10559 }, { "epoch": 0.22104998744034163, "grad_norm": 0.26146987080574036, "learning_rate": 0.000197346683678591, "loss": 11.6658, "step": 10560 }, { "epoch": 0.22107092020430377, "grad_norm": 0.21776999533176422, "learning_rate": 0.00019734618194197328, "loss": 11.679, "step": 10561 }, { "epoch": 0.22109185296826592, "grad_norm": 0.28764793276786804, "learning_rate": 0.00019734568015855928, "loss": 11.6899, "step": 10562 }, { "epoch": 0.2211127857322281, "grad_norm": 0.2528000771999359, "learning_rate": 0.0001973451783283493, "loss": 11.6563, "step": 10563 }, { "epoch": 0.22113371849619023, "grad_norm": 0.2162962406873703, "learning_rate": 0.0001973446764513435, "loss": 11.6596, "step": 10564 }, { "epoch": 0.22115465126015238, "grad_norm": 0.2688436806201935, "learning_rate": 0.0001973441745275422, "loss": 11.6764, "step": 10565 }, { "epoch": 0.22117558402411455, "grad_norm": 0.30664610862731934, "learning_rate": 0.00019734367255694557, "loss": 11.6871, "step": 10566 }, { "epoch": 0.2211965167880767, "grad_norm": 0.25408151745796204, "learning_rate": 0.0001973431705395539, "loss": 11.6789, "step": 10567 }, { "epoch": 0.22121744955203884, "grad_norm": 0.29067423939704895, "learning_rate": 0.0001973426684753674, "loss": 11.69, "step": 10568 }, { "epoch": 0.22123838231600101, "grad_norm": 0.2616800367832184, "learning_rate": 0.00019734216636438636, "loss": 11.6943, "step": 10569 }, { "epoch": 0.22125931507996316, "grad_norm": 0.2571745216846466, "learning_rate": 0.000197341664206611, "loss": 11.6759, "step": 10570 }, { "epoch": 0.2212802478439253, "grad_norm": 0.22218911349773407, "learning_rate": 0.00019734116200204152, "loss": 11.6787, "step": 10571 }, { "epoch": 0.22130118060788748, "grad_norm": 0.21449466049671173, "learning_rate": 0.00019734065975067822, "loss": 11.6685, "step": 10572 }, { "epoch": 0.22132211337184962, "grad_norm": 0.2528100907802582, "learning_rate": 0.00019734015745252127, "loss": 11.66, "step": 10573 }, { "epoch": 0.22134304613581177, "grad_norm": 0.22056810557842255, "learning_rate": 0.00019733965510757096, "loss": 11.6755, "step": 10574 }, { "epoch": 0.22136397889977394, "grad_norm": 0.2132079154253006, "learning_rate": 0.00019733915271582758, "loss": 11.6883, "step": 10575 }, { "epoch": 0.22138491166373608, "grad_norm": 0.23172201216220856, "learning_rate": 0.00019733865027729127, "loss": 11.6863, "step": 10576 }, { "epoch": 0.22140584442769823, "grad_norm": 0.25238099694252014, "learning_rate": 0.00019733814779196237, "loss": 11.6696, "step": 10577 }, { "epoch": 0.22142677719166037, "grad_norm": 0.3380681276321411, "learning_rate": 0.000197337645259841, "loss": 11.6931, "step": 10578 }, { "epoch": 0.22144770995562255, "grad_norm": 0.27261975407600403, "learning_rate": 0.00019733714268092752, "loss": 11.6886, "step": 10579 }, { "epoch": 0.2214686427195847, "grad_norm": 0.22544565796852112, "learning_rate": 0.00019733664005522212, "loss": 11.6733, "step": 10580 }, { "epoch": 0.22148957548354684, "grad_norm": 0.359311580657959, "learning_rate": 0.00019733613738272506, "loss": 11.6897, "step": 10581 }, { "epoch": 0.221510508247509, "grad_norm": 0.22374022006988525, "learning_rate": 0.00019733563466343656, "loss": 11.6792, "step": 10582 }, { "epoch": 0.22153144101147115, "grad_norm": 0.2575557827949524, "learning_rate": 0.00019733513189735687, "loss": 11.6649, "step": 10583 }, { "epoch": 0.2215523737754333, "grad_norm": 0.2405235767364502, "learning_rate": 0.00019733462908448618, "loss": 11.6667, "step": 10584 }, { "epoch": 0.22157330653939547, "grad_norm": 0.2654299736022949, "learning_rate": 0.00019733412622482485, "loss": 11.6824, "step": 10585 }, { "epoch": 0.22159423930335762, "grad_norm": 0.24477343261241913, "learning_rate": 0.00019733362331837305, "loss": 11.6712, "step": 10586 }, { "epoch": 0.22161517206731976, "grad_norm": 0.2576274871826172, "learning_rate": 0.00019733312036513098, "loss": 11.6761, "step": 10587 }, { "epoch": 0.22163610483128193, "grad_norm": 0.2679769992828369, "learning_rate": 0.00019733261736509897, "loss": 11.6777, "step": 10588 }, { "epoch": 0.22165703759524408, "grad_norm": 0.30529558658599854, "learning_rate": 0.0001973321143182772, "loss": 11.6893, "step": 10589 }, { "epoch": 0.22167797035920622, "grad_norm": 0.20767928659915924, "learning_rate": 0.00019733161122466593, "loss": 11.6662, "step": 10590 }, { "epoch": 0.2216989031231684, "grad_norm": 0.2779685854911804, "learning_rate": 0.0001973311080842654, "loss": 11.664, "step": 10591 }, { "epoch": 0.22171983588713054, "grad_norm": 0.2761765718460083, "learning_rate": 0.00019733060489707588, "loss": 11.6748, "step": 10592 }, { "epoch": 0.22174076865109268, "grad_norm": 0.2616657614707947, "learning_rate": 0.0001973301016630976, "loss": 11.698, "step": 10593 }, { "epoch": 0.22176170141505486, "grad_norm": 0.2597530484199524, "learning_rate": 0.00019732959838233077, "loss": 11.6838, "step": 10594 }, { "epoch": 0.221782634179017, "grad_norm": 0.27253007888793945, "learning_rate": 0.00019732909505477562, "loss": 11.6809, "step": 10595 }, { "epoch": 0.22180356694297915, "grad_norm": 0.27060815691947937, "learning_rate": 0.00019732859168043246, "loss": 11.7009, "step": 10596 }, { "epoch": 0.2218244997069413, "grad_norm": 0.2471400648355484, "learning_rate": 0.00019732808825930146, "loss": 11.6744, "step": 10597 }, { "epoch": 0.22184543247090346, "grad_norm": 0.21531768143177032, "learning_rate": 0.00019732758479138295, "loss": 11.6656, "step": 10598 }, { "epoch": 0.2218663652348656, "grad_norm": 0.2855546772480011, "learning_rate": 0.00019732708127667708, "loss": 11.6701, "step": 10599 }, { "epoch": 0.22188729799882775, "grad_norm": 0.2507423460483551, "learning_rate": 0.00019732657771518418, "loss": 11.6972, "step": 10600 }, { "epoch": 0.22190823076278993, "grad_norm": 0.2903985381126404, "learning_rate": 0.00019732607410690437, "loss": 11.6658, "step": 10601 }, { "epoch": 0.22192916352675207, "grad_norm": 0.25804999470710754, "learning_rate": 0.00019732557045183801, "loss": 11.6675, "step": 10602 }, { "epoch": 0.22195009629071422, "grad_norm": 0.2722436487674713, "learning_rate": 0.0001973250667499853, "loss": 11.6756, "step": 10603 }, { "epoch": 0.2219710290546764, "grad_norm": 0.20880387723445892, "learning_rate": 0.00019732456300134647, "loss": 11.6739, "step": 10604 }, { "epoch": 0.22199196181863853, "grad_norm": 0.2375534474849701, "learning_rate": 0.0001973240592059218, "loss": 11.6658, "step": 10605 }, { "epoch": 0.22201289458260068, "grad_norm": 0.23900143802165985, "learning_rate": 0.00019732355536371146, "loss": 11.6833, "step": 10606 }, { "epoch": 0.22203382734656285, "grad_norm": 0.25047481060028076, "learning_rate": 0.00019732305147471575, "loss": 11.6867, "step": 10607 }, { "epoch": 0.222054760110525, "grad_norm": 0.2367485612630844, "learning_rate": 0.00019732254753893492, "loss": 11.6954, "step": 10608 }, { "epoch": 0.22207569287448714, "grad_norm": 0.3032638430595398, "learning_rate": 0.00019732204355636917, "loss": 11.6787, "step": 10609 }, { "epoch": 0.2220966256384493, "grad_norm": 0.2645500898361206, "learning_rate": 0.00019732153952701874, "loss": 11.6764, "step": 10610 }, { "epoch": 0.22211755840241146, "grad_norm": 0.24518482387065887, "learning_rate": 0.00019732103545088393, "loss": 11.658, "step": 10611 }, { "epoch": 0.2221384911663736, "grad_norm": 0.23537959158420563, "learning_rate": 0.00019732053132796496, "loss": 11.6736, "step": 10612 }, { "epoch": 0.22215942393033578, "grad_norm": 0.2796265184879303, "learning_rate": 0.00019732002715826204, "loss": 11.6674, "step": 10613 }, { "epoch": 0.22218035669429792, "grad_norm": 0.20096807181835175, "learning_rate": 0.0001973195229417754, "loss": 11.6732, "step": 10614 }, { "epoch": 0.22220128945826007, "grad_norm": 0.21955522894859314, "learning_rate": 0.00019731901867850538, "loss": 11.6811, "step": 10615 }, { "epoch": 0.2222222222222222, "grad_norm": 0.23777365684509277, "learning_rate": 0.0001973185143684521, "loss": 11.6766, "step": 10616 }, { "epoch": 0.22224315498618438, "grad_norm": 0.25796934962272644, "learning_rate": 0.00019731801001161592, "loss": 11.6662, "step": 10617 }, { "epoch": 0.22226408775014653, "grad_norm": 0.21499000489711761, "learning_rate": 0.00019731750560799698, "loss": 11.6619, "step": 10618 }, { "epoch": 0.22228502051410867, "grad_norm": 0.24000272154808044, "learning_rate": 0.00019731700115759558, "loss": 11.6864, "step": 10619 }, { "epoch": 0.22230595327807084, "grad_norm": 0.2644888758659363, "learning_rate": 0.00019731649666041194, "loss": 11.6788, "step": 10620 }, { "epoch": 0.222326886042033, "grad_norm": 0.282870352268219, "learning_rate": 0.0001973159921164463, "loss": 11.6814, "step": 10621 }, { "epoch": 0.22234781880599513, "grad_norm": 0.5333594083786011, "learning_rate": 0.00019731548752569892, "loss": 11.6845, "step": 10622 }, { "epoch": 0.2223687515699573, "grad_norm": 0.2636285424232483, "learning_rate": 0.00019731498288817005, "loss": 11.6684, "step": 10623 }, { "epoch": 0.22238968433391945, "grad_norm": 0.3106842041015625, "learning_rate": 0.00019731447820385992, "loss": 11.6755, "step": 10624 }, { "epoch": 0.2224106170978816, "grad_norm": 0.25908493995666504, "learning_rate": 0.00019731397347276874, "loss": 11.6847, "step": 10625 }, { "epoch": 0.22243154986184377, "grad_norm": 0.27687057852745056, "learning_rate": 0.0001973134686948968, "loss": 11.654, "step": 10626 }, { "epoch": 0.22245248262580591, "grad_norm": 0.24876247346401215, "learning_rate": 0.0001973129638702443, "loss": 11.6667, "step": 10627 }, { "epoch": 0.22247341538976806, "grad_norm": 0.24313169717788696, "learning_rate": 0.00019731245899881156, "loss": 11.6711, "step": 10628 }, { "epoch": 0.22249434815373023, "grad_norm": 0.23828125, "learning_rate": 0.00019731195408059871, "loss": 11.6548, "step": 10629 }, { "epoch": 0.22251528091769238, "grad_norm": 0.2590550184249878, "learning_rate": 0.0001973114491156061, "loss": 11.6934, "step": 10630 }, { "epoch": 0.22253621368165452, "grad_norm": 0.24349644780158997, "learning_rate": 0.00019731094410383392, "loss": 11.665, "step": 10631 }, { "epoch": 0.22255714644561667, "grad_norm": 0.21663549542427063, "learning_rate": 0.0001973104390452824, "loss": 11.6713, "step": 10632 }, { "epoch": 0.22257807920957884, "grad_norm": 0.30751579999923706, "learning_rate": 0.00019730993393995183, "loss": 11.6749, "step": 10633 }, { "epoch": 0.22259901197354098, "grad_norm": 0.24768157303333282, "learning_rate": 0.0001973094287878424, "loss": 11.6664, "step": 10634 }, { "epoch": 0.22261994473750313, "grad_norm": 0.2320336550474167, "learning_rate": 0.00019730892358895438, "loss": 11.6724, "step": 10635 }, { "epoch": 0.2226408775014653, "grad_norm": 0.24321900308132172, "learning_rate": 0.00019730841834328803, "loss": 11.6727, "step": 10636 }, { "epoch": 0.22266181026542745, "grad_norm": 0.2548539340496063, "learning_rate": 0.00019730791305084356, "loss": 11.6883, "step": 10637 }, { "epoch": 0.2226827430293896, "grad_norm": 0.24583037197589874, "learning_rate": 0.00019730740771162122, "loss": 11.6833, "step": 10638 }, { "epoch": 0.22270367579335176, "grad_norm": 0.2739277482032776, "learning_rate": 0.00019730690232562127, "loss": 11.6884, "step": 10639 }, { "epoch": 0.2227246085573139, "grad_norm": 0.3221769630908966, "learning_rate": 0.00019730639689284393, "loss": 11.684, "step": 10640 }, { "epoch": 0.22274554132127605, "grad_norm": 0.2707364559173584, "learning_rate": 0.00019730589141328948, "loss": 11.6902, "step": 10641 }, { "epoch": 0.22276647408523823, "grad_norm": 0.289988249540329, "learning_rate": 0.0001973053858869581, "loss": 11.6683, "step": 10642 }, { "epoch": 0.22278740684920037, "grad_norm": 0.23131632804870605, "learning_rate": 0.00019730488031385012, "loss": 11.6678, "step": 10643 }, { "epoch": 0.22280833961316251, "grad_norm": 0.29238662123680115, "learning_rate": 0.0001973043746939657, "loss": 11.6816, "step": 10644 }, { "epoch": 0.2228292723771247, "grad_norm": 0.22991064190864563, "learning_rate": 0.0001973038690273051, "loss": 11.6863, "step": 10645 }, { "epoch": 0.22285020514108683, "grad_norm": 0.2687980532646179, "learning_rate": 0.00019730336331386862, "loss": 11.6821, "step": 10646 }, { "epoch": 0.22287113790504898, "grad_norm": 0.22479405999183655, "learning_rate": 0.00019730285755365645, "loss": 11.6803, "step": 10647 }, { "epoch": 0.22289207066901115, "grad_norm": 0.23739899694919586, "learning_rate": 0.00019730235174666882, "loss": 11.6664, "step": 10648 }, { "epoch": 0.2229130034329733, "grad_norm": 0.28131550550460815, "learning_rate": 0.00019730184589290603, "loss": 11.661, "step": 10649 }, { "epoch": 0.22293393619693544, "grad_norm": 0.243291974067688, "learning_rate": 0.00019730133999236828, "loss": 11.6915, "step": 10650 }, { "epoch": 0.22295486896089758, "grad_norm": 0.22814971208572388, "learning_rate": 0.0001973008340450558, "loss": 11.6971, "step": 10651 }, { "epoch": 0.22297580172485976, "grad_norm": 0.20944470167160034, "learning_rate": 0.00019730032805096893, "loss": 11.6662, "step": 10652 }, { "epoch": 0.2229967344888219, "grad_norm": 0.25714248418807983, "learning_rate": 0.0001972998220101078, "loss": 11.6828, "step": 10653 }, { "epoch": 0.22301766725278405, "grad_norm": 0.24848610162734985, "learning_rate": 0.00019729931592247268, "loss": 11.6722, "step": 10654 }, { "epoch": 0.22303860001674622, "grad_norm": 0.21205587685108185, "learning_rate": 0.00019729880978806386, "loss": 11.6783, "step": 10655 }, { "epoch": 0.22305953278070836, "grad_norm": 0.2400725781917572, "learning_rate": 0.00019729830360688152, "loss": 11.6699, "step": 10656 }, { "epoch": 0.2230804655446705, "grad_norm": 0.23987145721912384, "learning_rate": 0.00019729779737892597, "loss": 11.6885, "step": 10657 }, { "epoch": 0.22310139830863268, "grad_norm": 0.3107942044734955, "learning_rate": 0.0001972972911041974, "loss": 11.6888, "step": 10658 }, { "epoch": 0.22312233107259483, "grad_norm": 0.37337109446525574, "learning_rate": 0.00019729678478269607, "loss": 11.6832, "step": 10659 }, { "epoch": 0.22314326383655697, "grad_norm": 0.2449035346508026, "learning_rate": 0.00019729627841442223, "loss": 11.6714, "step": 10660 }, { "epoch": 0.22316419660051914, "grad_norm": 0.24374185502529144, "learning_rate": 0.00019729577199937613, "loss": 11.6856, "step": 10661 }, { "epoch": 0.2231851293644813, "grad_norm": 0.24920636415481567, "learning_rate": 0.000197295265537558, "loss": 11.6791, "step": 10662 }, { "epoch": 0.22320606212844343, "grad_norm": 0.2596074640750885, "learning_rate": 0.00019729475902896805, "loss": 11.6853, "step": 10663 }, { "epoch": 0.2232269948924056, "grad_norm": 0.26430198550224304, "learning_rate": 0.0001972942524736066, "loss": 11.678, "step": 10664 }, { "epoch": 0.22324792765636775, "grad_norm": 0.3549817204475403, "learning_rate": 0.00019729374587147382, "loss": 11.6873, "step": 10665 }, { "epoch": 0.2232688604203299, "grad_norm": 0.29515212774276733, "learning_rate": 0.00019729323922257003, "loss": 11.683, "step": 10666 }, { "epoch": 0.22328979318429204, "grad_norm": 0.2600916624069214, "learning_rate": 0.0001972927325268954, "loss": 11.6639, "step": 10667 }, { "epoch": 0.2233107259482542, "grad_norm": 0.2262328416109085, "learning_rate": 0.00019729222578445023, "loss": 11.6684, "step": 10668 }, { "epoch": 0.22333165871221636, "grad_norm": 0.30734676122665405, "learning_rate": 0.0001972917189952347, "loss": 11.6897, "step": 10669 }, { "epoch": 0.2233525914761785, "grad_norm": 0.21325452625751495, "learning_rate": 0.00019729121215924912, "loss": 11.6838, "step": 10670 }, { "epoch": 0.22337352424014068, "grad_norm": 0.2775264382362366, "learning_rate": 0.00019729070527649368, "loss": 11.688, "step": 10671 }, { "epoch": 0.22339445700410282, "grad_norm": 0.29253777861595154, "learning_rate": 0.00019729019834696868, "loss": 11.6491, "step": 10672 }, { "epoch": 0.22341538976806496, "grad_norm": 0.31326866149902344, "learning_rate": 0.0001972896913706743, "loss": 11.686, "step": 10673 }, { "epoch": 0.22343632253202714, "grad_norm": 0.24758733808994293, "learning_rate": 0.00019728918434761084, "loss": 11.6808, "step": 10674 }, { "epoch": 0.22345725529598928, "grad_norm": 0.25974389910697937, "learning_rate": 0.00019728867727777848, "loss": 11.6837, "step": 10675 }, { "epoch": 0.22347818805995143, "grad_norm": 0.27849701046943665, "learning_rate": 0.00019728817016117755, "loss": 11.6805, "step": 10676 }, { "epoch": 0.2234991208239136, "grad_norm": 0.23330417275428772, "learning_rate": 0.00019728766299780822, "loss": 11.6547, "step": 10677 }, { "epoch": 0.22352005358787574, "grad_norm": 0.25034424662590027, "learning_rate": 0.00019728715578767078, "loss": 11.6762, "step": 10678 }, { "epoch": 0.2235409863518379, "grad_norm": 0.2259029746055603, "learning_rate": 0.00019728664853076543, "loss": 11.6726, "step": 10679 }, { "epoch": 0.22356191911580006, "grad_norm": 0.22347931563854218, "learning_rate": 0.00019728614122709246, "loss": 11.6746, "step": 10680 }, { "epoch": 0.2235828518797622, "grad_norm": 0.2740715444087982, "learning_rate": 0.00019728563387665207, "loss": 11.6726, "step": 10681 }, { "epoch": 0.22360378464372435, "grad_norm": 0.20929253101348877, "learning_rate": 0.00019728512647944455, "loss": 11.668, "step": 10682 }, { "epoch": 0.22362471740768652, "grad_norm": 0.21120330691337585, "learning_rate": 0.00019728461903547012, "loss": 11.6752, "step": 10683 }, { "epoch": 0.22364565017164867, "grad_norm": 0.2224671095609665, "learning_rate": 0.00019728411154472904, "loss": 11.6694, "step": 10684 }, { "epoch": 0.2236665829356108, "grad_norm": 0.23101969063282013, "learning_rate": 0.0001972836040072215, "loss": 11.6528, "step": 10685 }, { "epoch": 0.22368751569957296, "grad_norm": 0.2850889265537262, "learning_rate": 0.00019728309642294778, "loss": 11.6725, "step": 10686 }, { "epoch": 0.22370844846353513, "grad_norm": 0.31198304891586304, "learning_rate": 0.00019728258879190815, "loss": 11.6823, "step": 10687 }, { "epoch": 0.22372938122749728, "grad_norm": 0.40938273072242737, "learning_rate": 0.00019728208111410283, "loss": 11.6785, "step": 10688 }, { "epoch": 0.22375031399145942, "grad_norm": 0.2591279447078705, "learning_rate": 0.00019728157338953203, "loss": 11.6831, "step": 10689 }, { "epoch": 0.2237712467554216, "grad_norm": 0.2379436045885086, "learning_rate": 0.00019728106561819607, "loss": 11.6774, "step": 10690 }, { "epoch": 0.22379217951938374, "grad_norm": 0.26215997338294983, "learning_rate": 0.00019728055780009513, "loss": 11.6831, "step": 10691 }, { "epoch": 0.22381311228334588, "grad_norm": 0.3158070147037506, "learning_rate": 0.0001972800499352295, "loss": 11.6681, "step": 10692 }, { "epoch": 0.22383404504730806, "grad_norm": 0.2338217794895172, "learning_rate": 0.00019727954202359936, "loss": 11.6834, "step": 10693 }, { "epoch": 0.2238549778112702, "grad_norm": 0.22403879463672638, "learning_rate": 0.00019727903406520502, "loss": 11.675, "step": 10694 }, { "epoch": 0.22387591057523235, "grad_norm": 0.23192469775676727, "learning_rate": 0.0001972785260600467, "loss": 11.6534, "step": 10695 }, { "epoch": 0.22389684333919452, "grad_norm": 0.2784285545349121, "learning_rate": 0.00019727801800812465, "loss": 11.6618, "step": 10696 }, { "epoch": 0.22391777610315666, "grad_norm": 0.3066727817058563, "learning_rate": 0.0001972775099094391, "loss": 11.6846, "step": 10697 }, { "epoch": 0.2239387088671188, "grad_norm": 0.31583699584007263, "learning_rate": 0.00019727700176399028, "loss": 11.6735, "step": 10698 }, { "epoch": 0.22395964163108098, "grad_norm": 0.21119828522205353, "learning_rate": 0.0001972764935717785, "loss": 11.6722, "step": 10699 }, { "epoch": 0.22398057439504312, "grad_norm": 0.2426316887140274, "learning_rate": 0.0001972759853328039, "loss": 11.6851, "step": 10700 }, { "epoch": 0.22400150715900527, "grad_norm": 0.2329549342393875, "learning_rate": 0.00019727547704706685, "loss": 11.6805, "step": 10701 }, { "epoch": 0.22402243992296744, "grad_norm": 0.2984265983104706, "learning_rate": 0.0001972749687145675, "loss": 11.6873, "step": 10702 }, { "epoch": 0.2240433726869296, "grad_norm": 0.25230997800827026, "learning_rate": 0.0001972744603353061, "loss": 11.6769, "step": 10703 }, { "epoch": 0.22406430545089173, "grad_norm": 0.23960356414318085, "learning_rate": 0.00019727395190928296, "loss": 11.6849, "step": 10704 }, { "epoch": 0.22408523821485388, "grad_norm": 0.2898677885532379, "learning_rate": 0.00019727344343649822, "loss": 11.6789, "step": 10705 }, { "epoch": 0.22410617097881605, "grad_norm": 0.3474435806274414, "learning_rate": 0.00019727293491695225, "loss": 11.6574, "step": 10706 }, { "epoch": 0.2241271037427782, "grad_norm": 0.2106546312570572, "learning_rate": 0.0001972724263506452, "loss": 11.6573, "step": 10707 }, { "epoch": 0.22414803650674034, "grad_norm": 0.23609678447246552, "learning_rate": 0.00019727191773757734, "loss": 11.6727, "step": 10708 }, { "epoch": 0.2241689692707025, "grad_norm": 0.25257939100265503, "learning_rate": 0.00019727140907774895, "loss": 11.6824, "step": 10709 }, { "epoch": 0.22418990203466466, "grad_norm": 0.23518487811088562, "learning_rate": 0.00019727090037116024, "loss": 11.6583, "step": 10710 }, { "epoch": 0.2242108347986268, "grad_norm": 0.2463126927614212, "learning_rate": 0.00019727039161781144, "loss": 11.6735, "step": 10711 }, { "epoch": 0.22423176756258897, "grad_norm": 0.23623797297477722, "learning_rate": 0.0001972698828177028, "loss": 11.6763, "step": 10712 }, { "epoch": 0.22425270032655112, "grad_norm": 0.26273682713508606, "learning_rate": 0.00019726937397083458, "loss": 11.6815, "step": 10713 }, { "epoch": 0.22427363309051326, "grad_norm": 0.275717169046402, "learning_rate": 0.00019726886507720706, "loss": 11.6865, "step": 10714 }, { "epoch": 0.22429456585447544, "grad_norm": 0.29841193556785583, "learning_rate": 0.00019726835613682043, "loss": 11.6761, "step": 10715 }, { "epoch": 0.22431549861843758, "grad_norm": 0.2464240938425064, "learning_rate": 0.00019726784714967495, "loss": 11.6556, "step": 10716 }, { "epoch": 0.22433643138239973, "grad_norm": 0.2845725119113922, "learning_rate": 0.00019726733811577086, "loss": 11.6912, "step": 10717 }, { "epoch": 0.2243573641463619, "grad_norm": 0.2865603268146515, "learning_rate": 0.0001972668290351084, "loss": 11.6706, "step": 10718 }, { "epoch": 0.22437829691032404, "grad_norm": 0.30260732769966125, "learning_rate": 0.00019726631990768786, "loss": 11.6824, "step": 10719 }, { "epoch": 0.2243992296742862, "grad_norm": 0.2954130172729492, "learning_rate": 0.00019726581073350942, "loss": 11.6802, "step": 10720 }, { "epoch": 0.22442016243824833, "grad_norm": 0.28479835391044617, "learning_rate": 0.0001972653015125734, "loss": 11.689, "step": 10721 }, { "epoch": 0.2244410952022105, "grad_norm": 0.2700885236263275, "learning_rate": 0.00019726479224487994, "loss": 11.6865, "step": 10722 }, { "epoch": 0.22446202796617265, "grad_norm": 0.28129851818084717, "learning_rate": 0.00019726428293042938, "loss": 11.6813, "step": 10723 }, { "epoch": 0.2244829607301348, "grad_norm": 0.23482342064380646, "learning_rate": 0.00019726377356922193, "loss": 11.6793, "step": 10724 }, { "epoch": 0.22450389349409697, "grad_norm": 0.3547705411911011, "learning_rate": 0.00019726326416125782, "loss": 11.6937, "step": 10725 }, { "epoch": 0.2245248262580591, "grad_norm": 0.39412492513656616, "learning_rate": 0.00019726275470653733, "loss": 11.6798, "step": 10726 }, { "epoch": 0.22454575902202126, "grad_norm": 0.2659642696380615, "learning_rate": 0.00019726224520506068, "loss": 11.6787, "step": 10727 }, { "epoch": 0.22456669178598343, "grad_norm": 0.2421731948852539, "learning_rate": 0.00019726173565682808, "loss": 11.6923, "step": 10728 }, { "epoch": 0.22458762454994557, "grad_norm": 0.23735791444778442, "learning_rate": 0.00019726122606183986, "loss": 11.6747, "step": 10729 }, { "epoch": 0.22460855731390772, "grad_norm": 0.2848970293998718, "learning_rate": 0.0001972607164200962, "loss": 11.6588, "step": 10730 }, { "epoch": 0.2246294900778699, "grad_norm": 0.24624104797840118, "learning_rate": 0.00019726020673159738, "loss": 11.6716, "step": 10731 }, { "epoch": 0.22465042284183204, "grad_norm": 0.2192416489124298, "learning_rate": 0.00019725969699634362, "loss": 11.6767, "step": 10732 }, { "epoch": 0.22467135560579418, "grad_norm": 0.2750304341316223, "learning_rate": 0.00019725918721433518, "loss": 11.6776, "step": 10733 }, { "epoch": 0.22469228836975635, "grad_norm": 0.22111155092716217, "learning_rate": 0.0001972586773855723, "loss": 11.6712, "step": 10734 }, { "epoch": 0.2247132211337185, "grad_norm": 0.2665061950683594, "learning_rate": 0.00019725816751005518, "loss": 11.6788, "step": 10735 }, { "epoch": 0.22473415389768064, "grad_norm": 0.27576419711112976, "learning_rate": 0.00019725765758778416, "loss": 11.6714, "step": 10736 }, { "epoch": 0.22475508666164282, "grad_norm": 0.290582537651062, "learning_rate": 0.00019725714761875945, "loss": 11.6914, "step": 10737 }, { "epoch": 0.22477601942560496, "grad_norm": 0.255660742521286, "learning_rate": 0.00019725663760298122, "loss": 11.6589, "step": 10738 }, { "epoch": 0.2247969521895671, "grad_norm": 0.25633513927459717, "learning_rate": 0.0001972561275404498, "loss": 11.667, "step": 10739 }, { "epoch": 0.22481788495352925, "grad_norm": 0.22226491570472717, "learning_rate": 0.00019725561743116546, "loss": 11.6746, "step": 10740 }, { "epoch": 0.22483881771749142, "grad_norm": 0.2939460873603821, "learning_rate": 0.00019725510727512834, "loss": 11.6844, "step": 10741 }, { "epoch": 0.22485975048145357, "grad_norm": 0.32785484194755554, "learning_rate": 0.0001972545970723388, "loss": 11.6699, "step": 10742 }, { "epoch": 0.2248806832454157, "grad_norm": 0.22649742662906647, "learning_rate": 0.00019725408682279695, "loss": 11.6664, "step": 10743 }, { "epoch": 0.22490161600937789, "grad_norm": 0.2890861928462982, "learning_rate": 0.00019725357652650315, "loss": 11.6797, "step": 10744 }, { "epoch": 0.22492254877334003, "grad_norm": 0.22324495017528534, "learning_rate": 0.0001972530661834576, "loss": 11.676, "step": 10745 }, { "epoch": 0.22494348153730218, "grad_norm": 0.3143276870250702, "learning_rate": 0.00019725255579366055, "loss": 11.6886, "step": 10746 }, { "epoch": 0.22496441430126435, "grad_norm": 0.2047290951013565, "learning_rate": 0.00019725204535711225, "loss": 11.676, "step": 10747 }, { "epoch": 0.2249853470652265, "grad_norm": 0.3203319013118744, "learning_rate": 0.00019725153487381295, "loss": 11.6805, "step": 10748 }, { "epoch": 0.22500627982918864, "grad_norm": 0.22361193597316742, "learning_rate": 0.00019725102434376286, "loss": 11.6659, "step": 10749 }, { "epoch": 0.2250272125931508, "grad_norm": 0.24492332339286804, "learning_rate": 0.0001972505137669623, "loss": 11.685, "step": 10750 }, { "epoch": 0.22504814535711296, "grad_norm": 0.3326622247695923, "learning_rate": 0.00019725000314341142, "loss": 11.6721, "step": 10751 }, { "epoch": 0.2250690781210751, "grad_norm": 0.3242306411266327, "learning_rate": 0.00019724949247311056, "loss": 11.6916, "step": 10752 }, { "epoch": 0.22509001088503727, "grad_norm": 0.2814691364765167, "learning_rate": 0.00019724898175605988, "loss": 11.6909, "step": 10753 }, { "epoch": 0.22511094364899942, "grad_norm": 0.26208049058914185, "learning_rate": 0.0001972484709922597, "loss": 11.6637, "step": 10754 }, { "epoch": 0.22513187641296156, "grad_norm": 0.21459300816059113, "learning_rate": 0.00019724796018171021, "loss": 11.6732, "step": 10755 }, { "epoch": 0.2251528091769237, "grad_norm": 0.23458346724510193, "learning_rate": 0.0001972474493244117, "loss": 11.6818, "step": 10756 }, { "epoch": 0.22517374194088588, "grad_norm": 0.23676030337810516, "learning_rate": 0.00019724693842036435, "loss": 11.6639, "step": 10757 }, { "epoch": 0.22519467470484802, "grad_norm": 0.29369425773620605, "learning_rate": 0.00019724642746956847, "loss": 11.6776, "step": 10758 }, { "epoch": 0.22521560746881017, "grad_norm": 0.41014882922172546, "learning_rate": 0.0001972459164720243, "loss": 11.6745, "step": 10759 }, { "epoch": 0.22523654023277234, "grad_norm": 0.30205389857292175, "learning_rate": 0.00019724540542773205, "loss": 11.6882, "step": 10760 }, { "epoch": 0.2252574729967345, "grad_norm": 0.34949126839637756, "learning_rate": 0.00019724489433669198, "loss": 11.6906, "step": 10761 }, { "epoch": 0.22527840576069663, "grad_norm": 0.2912789285182953, "learning_rate": 0.00019724438319890437, "loss": 11.6639, "step": 10762 }, { "epoch": 0.2252993385246588, "grad_norm": 0.3041010797023773, "learning_rate": 0.0001972438720143694, "loss": 11.6966, "step": 10763 }, { "epoch": 0.22532027128862095, "grad_norm": 0.2149394452571869, "learning_rate": 0.00019724336078308737, "loss": 11.6766, "step": 10764 }, { "epoch": 0.2253412040525831, "grad_norm": 0.2535722851753235, "learning_rate": 0.0001972428495050585, "loss": 11.691, "step": 10765 }, { "epoch": 0.22536213681654527, "grad_norm": 0.304904580116272, "learning_rate": 0.00019724233818028307, "loss": 11.6708, "step": 10766 }, { "epoch": 0.2253830695805074, "grad_norm": 0.3454069495201111, "learning_rate": 0.00019724182680876126, "loss": 11.6639, "step": 10767 }, { "epoch": 0.22540400234446956, "grad_norm": 0.24993528425693512, "learning_rate": 0.00019724131539049336, "loss": 11.6675, "step": 10768 }, { "epoch": 0.22542493510843173, "grad_norm": 0.26149436831474304, "learning_rate": 0.00019724080392547966, "loss": 11.6803, "step": 10769 }, { "epoch": 0.22544586787239387, "grad_norm": 0.2276497483253479, "learning_rate": 0.0001972402924137203, "loss": 11.6874, "step": 10770 }, { "epoch": 0.22546680063635602, "grad_norm": 0.21971119940280914, "learning_rate": 0.00019723978085521562, "loss": 11.672, "step": 10771 }, { "epoch": 0.2254877334003182, "grad_norm": 0.27466800808906555, "learning_rate": 0.00019723926924996582, "loss": 11.6801, "step": 10772 }, { "epoch": 0.22550866616428034, "grad_norm": 0.30205705761909485, "learning_rate": 0.00019723875759797114, "loss": 11.6703, "step": 10773 }, { "epoch": 0.22552959892824248, "grad_norm": 0.22020497918128967, "learning_rate": 0.00019723824589923186, "loss": 11.674, "step": 10774 }, { "epoch": 0.22555053169220463, "grad_norm": 0.23320665955543518, "learning_rate": 0.00019723773415374817, "loss": 11.6638, "step": 10775 }, { "epoch": 0.2255714644561668, "grad_norm": 0.2469979226589203, "learning_rate": 0.00019723722236152037, "loss": 11.6797, "step": 10776 }, { "epoch": 0.22559239722012894, "grad_norm": 0.2822282612323761, "learning_rate": 0.0001972367105225487, "loss": 11.6713, "step": 10777 }, { "epoch": 0.2256133299840911, "grad_norm": 0.24846100807189941, "learning_rate": 0.0001972361986368334, "loss": 11.6786, "step": 10778 }, { "epoch": 0.22563426274805326, "grad_norm": 0.28491348028182983, "learning_rate": 0.0001972356867043747, "loss": 11.6766, "step": 10779 }, { "epoch": 0.2256551955120154, "grad_norm": 0.23459818959236145, "learning_rate": 0.00019723517472517287, "loss": 11.6911, "step": 10780 }, { "epoch": 0.22567612827597755, "grad_norm": 0.3073161542415619, "learning_rate": 0.0001972346626992281, "loss": 11.6554, "step": 10781 }, { "epoch": 0.22569706103993972, "grad_norm": 0.28799381852149963, "learning_rate": 0.00019723415062654072, "loss": 11.6837, "step": 10782 }, { "epoch": 0.22571799380390187, "grad_norm": 0.1925695687532425, "learning_rate": 0.00019723363850711093, "loss": 11.6823, "step": 10783 }, { "epoch": 0.225738926567864, "grad_norm": 0.24514317512512207, "learning_rate": 0.00019723312634093899, "loss": 11.6702, "step": 10784 }, { "epoch": 0.22575985933182618, "grad_norm": 0.22922788560390472, "learning_rate": 0.0001972326141280251, "loss": 11.6735, "step": 10785 }, { "epoch": 0.22578079209578833, "grad_norm": 0.24387475848197937, "learning_rate": 0.00019723210186836957, "loss": 11.6631, "step": 10786 }, { "epoch": 0.22580172485975047, "grad_norm": 0.3020693063735962, "learning_rate": 0.00019723158956197264, "loss": 11.6745, "step": 10787 }, { "epoch": 0.22582265762371265, "grad_norm": 0.2764652967453003, "learning_rate": 0.0001972310772088345, "loss": 11.6808, "step": 10788 }, { "epoch": 0.2258435903876748, "grad_norm": 0.3168897032737732, "learning_rate": 0.00019723056480895545, "loss": 11.6837, "step": 10789 }, { "epoch": 0.22586452315163694, "grad_norm": 0.30294960737228394, "learning_rate": 0.00019723005236233572, "loss": 11.6827, "step": 10790 }, { "epoch": 0.2258854559155991, "grad_norm": 0.23804238438606262, "learning_rate": 0.00019722953986897555, "loss": 11.6725, "step": 10791 }, { "epoch": 0.22590638867956125, "grad_norm": 0.2561712861061096, "learning_rate": 0.00019722902732887518, "loss": 11.6728, "step": 10792 }, { "epoch": 0.2259273214435234, "grad_norm": 0.22450467944145203, "learning_rate": 0.0001972285147420349, "loss": 11.6677, "step": 10793 }, { "epoch": 0.22594825420748554, "grad_norm": 0.26330575346946716, "learning_rate": 0.00019722800210845488, "loss": 11.6769, "step": 10794 }, { "epoch": 0.22596918697144772, "grad_norm": 0.2966079115867615, "learning_rate": 0.00019722748942813546, "loss": 11.6788, "step": 10795 }, { "epoch": 0.22599011973540986, "grad_norm": 0.2564469873905182, "learning_rate": 0.0001972269767010768, "loss": 11.6688, "step": 10796 }, { "epoch": 0.226011052499372, "grad_norm": 0.35815247893333435, "learning_rate": 0.00019722646392727921, "loss": 11.6959, "step": 10797 }, { "epoch": 0.22603198526333418, "grad_norm": 0.23403267562389374, "learning_rate": 0.00019722595110674288, "loss": 11.6641, "step": 10798 }, { "epoch": 0.22605291802729632, "grad_norm": 0.2488154023885727, "learning_rate": 0.00019722543823946813, "loss": 11.6603, "step": 10799 }, { "epoch": 0.22607385079125847, "grad_norm": 0.22484517097473145, "learning_rate": 0.00019722492532545514, "loss": 11.6678, "step": 10800 }, { "epoch": 0.22609478355522064, "grad_norm": 0.27020642161369324, "learning_rate": 0.00019722441236470414, "loss": 11.6876, "step": 10801 }, { "epoch": 0.22611571631918279, "grad_norm": 0.23184655606746674, "learning_rate": 0.00019722389935721547, "loss": 11.6692, "step": 10802 }, { "epoch": 0.22613664908314493, "grad_norm": 0.23129858076572418, "learning_rate": 0.0001972233863029893, "loss": 11.6808, "step": 10803 }, { "epoch": 0.2261575818471071, "grad_norm": 0.26836085319519043, "learning_rate": 0.00019722287320202592, "loss": 11.67, "step": 10804 }, { "epoch": 0.22617851461106925, "grad_norm": 0.2772062122821808, "learning_rate": 0.00019722236005432554, "loss": 11.6797, "step": 10805 }, { "epoch": 0.2261994473750314, "grad_norm": 0.2421705275774002, "learning_rate": 0.00019722184685988845, "loss": 11.6999, "step": 10806 }, { "epoch": 0.22622038013899357, "grad_norm": 0.2532307207584381, "learning_rate": 0.00019722133361871485, "loss": 11.6746, "step": 10807 }, { "epoch": 0.2262413129029557, "grad_norm": 0.2790790796279907, "learning_rate": 0.00019722082033080498, "loss": 11.6847, "step": 10808 }, { "epoch": 0.22626224566691786, "grad_norm": 0.2971727252006531, "learning_rate": 0.00019722030699615914, "loss": 11.6613, "step": 10809 }, { "epoch": 0.22628317843088, "grad_norm": 0.29201269149780273, "learning_rate": 0.00019721979361477754, "loss": 11.6846, "step": 10810 }, { "epoch": 0.22630411119484217, "grad_norm": 2.2533681392669678, "learning_rate": 0.00019721928018666044, "loss": 11.6607, "step": 10811 }, { "epoch": 0.22632504395880432, "grad_norm": 0.2522198259830475, "learning_rate": 0.0001972187667118081, "loss": 11.655, "step": 10812 }, { "epoch": 0.22634597672276646, "grad_norm": 0.24118225276470184, "learning_rate": 0.00019721825319022072, "loss": 11.685, "step": 10813 }, { "epoch": 0.22636690948672863, "grad_norm": 0.294479638338089, "learning_rate": 0.0001972177396218986, "loss": 11.678, "step": 10814 }, { "epoch": 0.22638784225069078, "grad_norm": 0.21607711911201477, "learning_rate": 0.00019721722600684198, "loss": 11.6973, "step": 10815 }, { "epoch": 0.22640877501465292, "grad_norm": 0.27746912837028503, "learning_rate": 0.00019721671234505106, "loss": 11.688, "step": 10816 }, { "epoch": 0.2264297077786151, "grad_norm": 0.2138531357049942, "learning_rate": 0.00019721619863652615, "loss": 11.6828, "step": 10817 }, { "epoch": 0.22645064054257724, "grad_norm": 0.203679159283638, "learning_rate": 0.00019721568488126744, "loss": 11.6845, "step": 10818 }, { "epoch": 0.2264715733065394, "grad_norm": 0.22919787466526031, "learning_rate": 0.00019721517107927522, "loss": 11.6694, "step": 10819 }, { "epoch": 0.22649250607050156, "grad_norm": 0.2776730954647064, "learning_rate": 0.00019721465723054969, "loss": 11.6792, "step": 10820 }, { "epoch": 0.2265134388344637, "grad_norm": 0.2226666361093521, "learning_rate": 0.00019721414333509114, "loss": 11.6785, "step": 10821 }, { "epoch": 0.22653437159842585, "grad_norm": 0.2595999240875244, "learning_rate": 0.0001972136293928998, "loss": 11.6756, "step": 10822 }, { "epoch": 0.22655530436238802, "grad_norm": 0.27122628688812256, "learning_rate": 0.00019721311540397594, "loss": 11.6995, "step": 10823 }, { "epoch": 0.22657623712635017, "grad_norm": 0.27113136649131775, "learning_rate": 0.00019721260136831977, "loss": 11.6809, "step": 10824 }, { "epoch": 0.2265971698903123, "grad_norm": 0.21500691771507263, "learning_rate": 0.00019721208728593154, "loss": 11.6827, "step": 10825 }, { "epoch": 0.22661810265427448, "grad_norm": 0.25831112265586853, "learning_rate": 0.00019721157315681151, "loss": 11.6686, "step": 10826 }, { "epoch": 0.22663903541823663, "grad_norm": 0.24110649526119232, "learning_rate": 0.00019721105898095996, "loss": 11.6705, "step": 10827 }, { "epoch": 0.22665996818219877, "grad_norm": 0.23133793473243713, "learning_rate": 0.0001972105447583771, "loss": 11.6773, "step": 10828 }, { "epoch": 0.22668090094616092, "grad_norm": 0.34551510214805603, "learning_rate": 0.00019721003048906317, "loss": 11.6914, "step": 10829 }, { "epoch": 0.2267018337101231, "grad_norm": 0.24035614728927612, "learning_rate": 0.00019720951617301845, "loss": 11.6619, "step": 10830 }, { "epoch": 0.22672276647408524, "grad_norm": 0.2282082438468933, "learning_rate": 0.00019720900181024312, "loss": 11.6701, "step": 10831 }, { "epoch": 0.22674369923804738, "grad_norm": 0.3468250036239624, "learning_rate": 0.0001972084874007375, "loss": 11.6817, "step": 10832 }, { "epoch": 0.22676463200200955, "grad_norm": 0.23064082860946655, "learning_rate": 0.00019720797294450187, "loss": 11.6771, "step": 10833 }, { "epoch": 0.2267855647659717, "grad_norm": 0.2329358607530594, "learning_rate": 0.00019720745844153633, "loss": 11.701, "step": 10834 }, { "epoch": 0.22680649752993384, "grad_norm": 0.29913291335105896, "learning_rate": 0.00019720694389184127, "loss": 11.6751, "step": 10835 }, { "epoch": 0.22682743029389602, "grad_norm": 0.2052660584449768, "learning_rate": 0.00019720642929541685, "loss": 11.6857, "step": 10836 }, { "epoch": 0.22684836305785816, "grad_norm": 0.25426581501960754, "learning_rate": 0.0001972059146522634, "loss": 11.6711, "step": 10837 }, { "epoch": 0.2268692958218203, "grad_norm": 0.2239026129245758, "learning_rate": 0.00019720539996238108, "loss": 11.6728, "step": 10838 }, { "epoch": 0.22689022858578248, "grad_norm": 0.26166531443595886, "learning_rate": 0.0001972048852257702, "loss": 11.6751, "step": 10839 }, { "epoch": 0.22691116134974462, "grad_norm": 0.2803172171115875, "learning_rate": 0.00019720437044243097, "loss": 11.6569, "step": 10840 }, { "epoch": 0.22693209411370677, "grad_norm": 0.2347813844680786, "learning_rate": 0.00019720385561236366, "loss": 11.6772, "step": 10841 }, { "epoch": 0.22695302687766894, "grad_norm": 0.21147944033145905, "learning_rate": 0.0001972033407355685, "loss": 11.6718, "step": 10842 }, { "epoch": 0.22697395964163108, "grad_norm": 0.3001781105995178, "learning_rate": 0.00019720282581204575, "loss": 11.6795, "step": 10843 }, { "epoch": 0.22699489240559323, "grad_norm": 0.2809975743293762, "learning_rate": 0.0001972023108417956, "loss": 11.6906, "step": 10844 }, { "epoch": 0.2270158251695554, "grad_norm": 0.493694931268692, "learning_rate": 0.0001972017958248184, "loss": 11.6855, "step": 10845 }, { "epoch": 0.22703675793351755, "grad_norm": 0.243812695145607, "learning_rate": 0.00019720128076111437, "loss": 11.6782, "step": 10846 }, { "epoch": 0.2270576906974797, "grad_norm": 0.2591700851917267, "learning_rate": 0.00019720076565068372, "loss": 11.691, "step": 10847 }, { "epoch": 0.22707862346144184, "grad_norm": 0.2307008057832718, "learning_rate": 0.00019720025049352674, "loss": 11.673, "step": 10848 }, { "epoch": 0.227099556225404, "grad_norm": 0.2713991701602936, "learning_rate": 0.0001971997352896436, "loss": 11.6921, "step": 10849 }, { "epoch": 0.22712048898936615, "grad_norm": 0.2313070148229599, "learning_rate": 0.0001971992200390346, "loss": 11.679, "step": 10850 }, { "epoch": 0.2271414217533283, "grad_norm": 0.2608635425567627, "learning_rate": 0.00019719870474170002, "loss": 11.6588, "step": 10851 }, { "epoch": 0.22716235451729047, "grad_norm": 0.26126569509506226, "learning_rate": 0.00019719818939764006, "loss": 11.6753, "step": 10852 }, { "epoch": 0.22718328728125262, "grad_norm": 0.2304963916540146, "learning_rate": 0.00019719767400685497, "loss": 11.6675, "step": 10853 }, { "epoch": 0.22720422004521476, "grad_norm": 0.30732813477516174, "learning_rate": 0.00019719715856934508, "loss": 11.6903, "step": 10854 }, { "epoch": 0.22722515280917693, "grad_norm": 0.30852222442626953, "learning_rate": 0.0001971966430851105, "loss": 11.6778, "step": 10855 }, { "epoch": 0.22724608557313908, "grad_norm": 0.3144867420196533, "learning_rate": 0.00019719612755415153, "loss": 11.6819, "step": 10856 }, { "epoch": 0.22726701833710122, "grad_norm": 0.203343003988266, "learning_rate": 0.00019719561197646847, "loss": 11.6752, "step": 10857 }, { "epoch": 0.2272879511010634, "grad_norm": 0.24645860493183136, "learning_rate": 0.00019719509635206153, "loss": 11.6858, "step": 10858 }, { "epoch": 0.22730888386502554, "grad_norm": 0.2665141820907593, "learning_rate": 0.00019719458068093095, "loss": 11.6713, "step": 10859 }, { "epoch": 0.22732981662898769, "grad_norm": 0.273175448179245, "learning_rate": 0.000197194064963077, "loss": 11.6824, "step": 10860 }, { "epoch": 0.22735074939294986, "grad_norm": 0.293215274810791, "learning_rate": 0.0001971935491984999, "loss": 11.6693, "step": 10861 }, { "epoch": 0.227371682156912, "grad_norm": 0.2840285003185272, "learning_rate": 0.00019719303338719993, "loss": 11.6751, "step": 10862 }, { "epoch": 0.22739261492087415, "grad_norm": 0.27285876870155334, "learning_rate": 0.0001971925175291773, "loss": 11.6785, "step": 10863 }, { "epoch": 0.2274135476848363, "grad_norm": 0.3526171147823334, "learning_rate": 0.00019719200162443228, "loss": 11.6814, "step": 10864 }, { "epoch": 0.22743448044879847, "grad_norm": 0.26122018694877625, "learning_rate": 0.00019719148567296513, "loss": 11.6733, "step": 10865 }, { "epoch": 0.2274554132127606, "grad_norm": 0.2845854163169861, "learning_rate": 0.00019719096967477608, "loss": 11.6787, "step": 10866 }, { "epoch": 0.22747634597672275, "grad_norm": 0.20974549651145935, "learning_rate": 0.0001971904536298654, "loss": 11.6776, "step": 10867 }, { "epoch": 0.22749727874068493, "grad_norm": 0.29559004306793213, "learning_rate": 0.00019718993753823329, "loss": 11.6909, "step": 10868 }, { "epoch": 0.22751821150464707, "grad_norm": 0.2562377452850342, "learning_rate": 0.00019718942139988006, "loss": 11.6732, "step": 10869 }, { "epoch": 0.22753914426860922, "grad_norm": 0.2610447108745575, "learning_rate": 0.00019718890521480591, "loss": 11.68, "step": 10870 }, { "epoch": 0.2275600770325714, "grad_norm": 0.23623614013195038, "learning_rate": 0.0001971883889830111, "loss": 11.6801, "step": 10871 }, { "epoch": 0.22758100979653353, "grad_norm": 0.27600473165512085, "learning_rate": 0.0001971878727044959, "loss": 11.6802, "step": 10872 }, { "epoch": 0.22760194256049568, "grad_norm": 0.2933751940727234, "learning_rate": 0.00019718735637926053, "loss": 11.697, "step": 10873 }, { "epoch": 0.22762287532445785, "grad_norm": 0.23519361019134521, "learning_rate": 0.00019718684000730524, "loss": 11.6707, "step": 10874 }, { "epoch": 0.22764380808842, "grad_norm": 0.28095874190330505, "learning_rate": 0.00019718632358863033, "loss": 11.6976, "step": 10875 }, { "epoch": 0.22766474085238214, "grad_norm": 0.2939605712890625, "learning_rate": 0.00019718580712323598, "loss": 11.6781, "step": 10876 }, { "epoch": 0.22768567361634431, "grad_norm": 0.2500995993614197, "learning_rate": 0.00019718529061112246, "loss": 11.6726, "step": 10877 }, { "epoch": 0.22770660638030646, "grad_norm": 0.23000468313694, "learning_rate": 0.00019718477405229003, "loss": 11.6761, "step": 10878 }, { "epoch": 0.2277275391442686, "grad_norm": 0.2899494767189026, "learning_rate": 0.0001971842574467389, "loss": 11.6875, "step": 10879 }, { "epoch": 0.22774847190823078, "grad_norm": 0.3551441431045532, "learning_rate": 0.00019718374079446939, "loss": 11.711, "step": 10880 }, { "epoch": 0.22776940467219292, "grad_norm": 0.2794858515262604, "learning_rate": 0.0001971832240954817, "loss": 11.6645, "step": 10881 }, { "epoch": 0.22779033743615507, "grad_norm": 0.22676746547222137, "learning_rate": 0.0001971827073497761, "loss": 11.6958, "step": 10882 }, { "epoch": 0.2278112702001172, "grad_norm": 0.2114848494529724, "learning_rate": 0.0001971821905573528, "loss": 11.6616, "step": 10883 }, { "epoch": 0.22783220296407938, "grad_norm": 0.20831842720508575, "learning_rate": 0.00019718167371821207, "loss": 11.6767, "step": 10884 }, { "epoch": 0.22785313572804153, "grad_norm": 0.26306039094924927, "learning_rate": 0.00019718115683235417, "loss": 11.6831, "step": 10885 }, { "epoch": 0.22787406849200367, "grad_norm": 0.28444457054138184, "learning_rate": 0.00019718063989977936, "loss": 11.6939, "step": 10886 }, { "epoch": 0.22789500125596585, "grad_norm": 0.24279342591762543, "learning_rate": 0.00019718012292048784, "loss": 11.6744, "step": 10887 }, { "epoch": 0.227915934019928, "grad_norm": 0.20517078042030334, "learning_rate": 0.0001971796058944799, "loss": 11.682, "step": 10888 }, { "epoch": 0.22793686678389014, "grad_norm": 0.21023766696453094, "learning_rate": 0.00019717908882175577, "loss": 11.6743, "step": 10889 }, { "epoch": 0.2279577995478523, "grad_norm": 0.25901126861572266, "learning_rate": 0.0001971785717023157, "loss": 11.6859, "step": 10890 }, { "epoch": 0.22797873231181445, "grad_norm": 0.24165652692317963, "learning_rate": 0.00019717805453615999, "loss": 11.6781, "step": 10891 }, { "epoch": 0.2279996650757766, "grad_norm": 0.2527129352092743, "learning_rate": 0.0001971775373232888, "loss": 11.6996, "step": 10892 }, { "epoch": 0.22802059783973877, "grad_norm": 0.30815577507019043, "learning_rate": 0.00019717702006370242, "loss": 11.6829, "step": 10893 }, { "epoch": 0.22804153060370091, "grad_norm": 0.28036829829216003, "learning_rate": 0.0001971765027574011, "loss": 11.6644, "step": 10894 }, { "epoch": 0.22806246336766306, "grad_norm": 0.2299276441335678, "learning_rate": 0.00019717598540438508, "loss": 11.6751, "step": 10895 }, { "epoch": 0.22808339613162523, "grad_norm": 0.23328129947185516, "learning_rate": 0.00019717546800465465, "loss": 11.6832, "step": 10896 }, { "epoch": 0.22810432889558738, "grad_norm": 1.068908929824829, "learning_rate": 0.00019717495055821, "loss": 11.5847, "step": 10897 }, { "epoch": 0.22812526165954952, "grad_norm": 0.1986604481935501, "learning_rate": 0.00019717443306505142, "loss": 11.672, "step": 10898 }, { "epoch": 0.22814619442351167, "grad_norm": 0.376362681388855, "learning_rate": 0.0001971739155251791, "loss": 11.6784, "step": 10899 }, { "epoch": 0.22816712718747384, "grad_norm": 0.2483205497264862, "learning_rate": 0.00019717339793859338, "loss": 11.6616, "step": 10900 }, { "epoch": 0.22818805995143598, "grad_norm": 0.28246214985847473, "learning_rate": 0.00019717288030529444, "loss": 11.6598, "step": 10901 }, { "epoch": 0.22820899271539813, "grad_norm": 0.24285708367824554, "learning_rate": 0.00019717236262528255, "loss": 11.6752, "step": 10902 }, { "epoch": 0.2282299254793603, "grad_norm": 0.22364845871925354, "learning_rate": 0.00019717184489855796, "loss": 11.6802, "step": 10903 }, { "epoch": 0.22825085824332245, "grad_norm": 0.27937573194503784, "learning_rate": 0.0001971713271251209, "loss": 11.6705, "step": 10904 }, { "epoch": 0.2282717910072846, "grad_norm": 0.2687658965587616, "learning_rate": 0.00019717080930497168, "loss": 11.6703, "step": 10905 }, { "epoch": 0.22829272377124676, "grad_norm": 0.4294947683811188, "learning_rate": 0.00019717029143811047, "loss": 11.6949, "step": 10906 }, { "epoch": 0.2283136565352089, "grad_norm": 0.2829876244068146, "learning_rate": 0.00019716977352453755, "loss": 11.6613, "step": 10907 }, { "epoch": 0.22833458929917105, "grad_norm": 0.2692597210407257, "learning_rate": 0.00019716925556425319, "loss": 11.6819, "step": 10908 }, { "epoch": 0.22835552206313323, "grad_norm": 0.30383238196372986, "learning_rate": 0.0001971687375572576, "loss": 11.6947, "step": 10909 }, { "epoch": 0.22837645482709537, "grad_norm": 0.24648047983646393, "learning_rate": 0.00019716821950355108, "loss": 11.6682, "step": 10910 }, { "epoch": 0.22839738759105752, "grad_norm": 0.24580132961273193, "learning_rate": 0.00019716770140313383, "loss": 11.6609, "step": 10911 }, { "epoch": 0.2284183203550197, "grad_norm": 0.23887357115745544, "learning_rate": 0.00019716718325600612, "loss": 11.6736, "step": 10912 }, { "epoch": 0.22843925311898183, "grad_norm": 0.22703011333942413, "learning_rate": 0.0001971666650621682, "loss": 11.6769, "step": 10913 }, { "epoch": 0.22846018588294398, "grad_norm": 0.2945117652416229, "learning_rate": 0.00019716614682162035, "loss": 11.6738, "step": 10914 }, { "epoch": 0.22848111864690615, "grad_norm": 0.30931374430656433, "learning_rate": 0.00019716562853436273, "loss": 11.6926, "step": 10915 }, { "epoch": 0.2285020514108683, "grad_norm": 0.291896790266037, "learning_rate": 0.00019716511020039564, "loss": 11.6859, "step": 10916 }, { "epoch": 0.22852298417483044, "grad_norm": 0.4220649003982544, "learning_rate": 0.0001971645918197194, "loss": 11.6888, "step": 10917 }, { "epoch": 0.22854391693879259, "grad_norm": 0.27384525537490845, "learning_rate": 0.00019716407339233415, "loss": 11.6819, "step": 10918 }, { "epoch": 0.22856484970275476, "grad_norm": 0.221856027841568, "learning_rate": 0.0001971635549182402, "loss": 11.6746, "step": 10919 }, { "epoch": 0.2285857824667169, "grad_norm": 0.21530267596244812, "learning_rate": 0.00019716303639743775, "loss": 11.6828, "step": 10920 }, { "epoch": 0.22860671523067905, "grad_norm": 0.25139299035072327, "learning_rate": 0.0001971625178299271, "loss": 11.6801, "step": 10921 }, { "epoch": 0.22862764799464122, "grad_norm": 0.22153101861476898, "learning_rate": 0.0001971619992157085, "loss": 11.6755, "step": 10922 }, { "epoch": 0.22864858075860336, "grad_norm": 0.3009665012359619, "learning_rate": 0.00019716148055478217, "loss": 11.6616, "step": 10923 }, { "epoch": 0.2286695135225655, "grad_norm": 0.2111574113368988, "learning_rate": 0.00019716096184714833, "loss": 11.6778, "step": 10924 }, { "epoch": 0.22869044628652768, "grad_norm": 0.2716302275657654, "learning_rate": 0.0001971604430928073, "loss": 11.6891, "step": 10925 }, { "epoch": 0.22871137905048983, "grad_norm": 0.3760446310043335, "learning_rate": 0.00019715992429175931, "loss": 11.6707, "step": 10926 }, { "epoch": 0.22873231181445197, "grad_norm": 0.25011494755744934, "learning_rate": 0.0001971594054440046, "loss": 11.6472, "step": 10927 }, { "epoch": 0.22875324457841414, "grad_norm": 0.2515035569667816, "learning_rate": 0.00019715888654954337, "loss": 11.6533, "step": 10928 }, { "epoch": 0.2287741773423763, "grad_norm": 0.38000455498695374, "learning_rate": 0.00019715836760837596, "loss": 11.6869, "step": 10929 }, { "epoch": 0.22879511010633843, "grad_norm": 0.24576444923877716, "learning_rate": 0.00019715784862050256, "loss": 11.6828, "step": 10930 }, { "epoch": 0.2288160428703006, "grad_norm": 0.234608992934227, "learning_rate": 0.00019715732958592346, "loss": 11.685, "step": 10931 }, { "epoch": 0.22883697563426275, "grad_norm": 0.22834144532680511, "learning_rate": 0.00019715681050463886, "loss": 11.665, "step": 10932 }, { "epoch": 0.2288579083982249, "grad_norm": 0.27882203459739685, "learning_rate": 0.00019715629137664903, "loss": 11.6747, "step": 10933 }, { "epoch": 0.22887884116218707, "grad_norm": 0.2637006640434265, "learning_rate": 0.00019715577220195423, "loss": 11.6941, "step": 10934 }, { "epoch": 0.2288997739261492, "grad_norm": 0.2472536265850067, "learning_rate": 0.0001971552529805547, "loss": 11.6806, "step": 10935 }, { "epoch": 0.22892070669011136, "grad_norm": 0.47250670194625854, "learning_rate": 0.00019715473371245072, "loss": 11.6689, "step": 10936 }, { "epoch": 0.2289416394540735, "grad_norm": 0.27282652258872986, "learning_rate": 0.00019715421439764247, "loss": 11.6876, "step": 10937 }, { "epoch": 0.22896257221803568, "grad_norm": 0.2748161852359772, "learning_rate": 0.00019715369503613025, "loss": 11.6791, "step": 10938 }, { "epoch": 0.22898350498199782, "grad_norm": 0.2356778383255005, "learning_rate": 0.00019715317562791433, "loss": 11.6846, "step": 10939 }, { "epoch": 0.22900443774595997, "grad_norm": 0.32395240664482117, "learning_rate": 0.00019715265617299493, "loss": 11.679, "step": 10940 }, { "epoch": 0.22902537050992214, "grad_norm": 0.2395753562450409, "learning_rate": 0.00019715213667137227, "loss": 11.6833, "step": 10941 }, { "epoch": 0.22904630327388428, "grad_norm": 0.20749953389167786, "learning_rate": 0.00019715161712304666, "loss": 11.6733, "step": 10942 }, { "epoch": 0.22906723603784643, "grad_norm": 0.2528466284275055, "learning_rate": 0.0001971510975280183, "loss": 11.6674, "step": 10943 }, { "epoch": 0.2290881688018086, "grad_norm": 0.25687193870544434, "learning_rate": 0.00019715057788628748, "loss": 11.6718, "step": 10944 }, { "epoch": 0.22910910156577075, "grad_norm": 0.24535635113716125, "learning_rate": 0.0001971500581978544, "loss": 11.6807, "step": 10945 }, { "epoch": 0.2291300343297329, "grad_norm": 0.24470333755016327, "learning_rate": 0.00019714953846271938, "loss": 11.6739, "step": 10946 }, { "epoch": 0.22915096709369506, "grad_norm": 0.24686738848686218, "learning_rate": 0.0001971490186808826, "loss": 11.6828, "step": 10947 }, { "epoch": 0.2291718998576572, "grad_norm": 0.26446717977523804, "learning_rate": 0.00019714849885234435, "loss": 11.674, "step": 10948 }, { "epoch": 0.22919283262161935, "grad_norm": 0.21239909529685974, "learning_rate": 0.00019714797897710487, "loss": 11.6837, "step": 10949 }, { "epoch": 0.22921376538558152, "grad_norm": 0.24324053525924683, "learning_rate": 0.0001971474590551644, "loss": 11.6942, "step": 10950 }, { "epoch": 0.22923469814954367, "grad_norm": 0.276284784078598, "learning_rate": 0.0001971469390865232, "loss": 11.6873, "step": 10951 }, { "epoch": 0.22925563091350581, "grad_norm": 0.24318063259124756, "learning_rate": 0.00019714641907118155, "loss": 11.6719, "step": 10952 }, { "epoch": 0.22927656367746796, "grad_norm": 0.2524380385875702, "learning_rate": 0.00019714589900913964, "loss": 11.6835, "step": 10953 }, { "epoch": 0.22929749644143013, "grad_norm": 0.21678100526332855, "learning_rate": 0.00019714537890039776, "loss": 11.6816, "step": 10954 }, { "epoch": 0.22931842920539228, "grad_norm": 0.2750302255153656, "learning_rate": 0.00019714485874495615, "loss": 11.6711, "step": 10955 }, { "epoch": 0.22933936196935442, "grad_norm": 0.229862242937088, "learning_rate": 0.00019714433854281507, "loss": 11.6738, "step": 10956 }, { "epoch": 0.2293602947333166, "grad_norm": 0.24526599049568176, "learning_rate": 0.00019714381829397473, "loss": 11.6607, "step": 10957 }, { "epoch": 0.22938122749727874, "grad_norm": 0.3005819618701935, "learning_rate": 0.00019714329799843544, "loss": 11.6832, "step": 10958 }, { "epoch": 0.22940216026124088, "grad_norm": 0.22541160881519318, "learning_rate": 0.00019714277765619736, "loss": 11.6584, "step": 10959 }, { "epoch": 0.22942309302520306, "grad_norm": 0.31416213512420654, "learning_rate": 0.00019714225726726088, "loss": 11.6609, "step": 10960 }, { "epoch": 0.2294440257891652, "grad_norm": 0.2501601576805115, "learning_rate": 0.00019714173683162614, "loss": 11.6711, "step": 10961 }, { "epoch": 0.22946495855312735, "grad_norm": 0.2248605340719223, "learning_rate": 0.0001971412163492934, "loss": 11.6726, "step": 10962 }, { "epoch": 0.22948589131708952, "grad_norm": 0.2939607501029968, "learning_rate": 0.00019714069582026295, "loss": 11.6882, "step": 10963 }, { "epoch": 0.22950682408105166, "grad_norm": 0.29097965359687805, "learning_rate": 0.00019714017524453503, "loss": 11.6759, "step": 10964 }, { "epoch": 0.2295277568450138, "grad_norm": 0.2677697539329529, "learning_rate": 0.00019713965462210987, "loss": 11.6758, "step": 10965 }, { "epoch": 0.22954868960897598, "grad_norm": 0.25424373149871826, "learning_rate": 0.0001971391339529877, "loss": 11.6866, "step": 10966 }, { "epoch": 0.22956962237293813, "grad_norm": 0.23931202292442322, "learning_rate": 0.00019713861323716886, "loss": 11.6702, "step": 10967 }, { "epoch": 0.22959055513690027, "grad_norm": 0.23532600700855255, "learning_rate": 0.0001971380924746535, "loss": 11.6703, "step": 10968 }, { "epoch": 0.22961148790086244, "grad_norm": 0.2397320419549942, "learning_rate": 0.0001971375716654419, "loss": 11.69, "step": 10969 }, { "epoch": 0.2296324206648246, "grad_norm": 0.22625099122524261, "learning_rate": 0.00019713705080953438, "loss": 11.6659, "step": 10970 }, { "epoch": 0.22965335342878673, "grad_norm": 0.23071834444999695, "learning_rate": 0.00019713652990693107, "loss": 11.6704, "step": 10971 }, { "epoch": 0.22967428619274888, "grad_norm": 0.2545914053916931, "learning_rate": 0.00019713600895763234, "loss": 11.672, "step": 10972 }, { "epoch": 0.22969521895671105, "grad_norm": 0.29595229029655457, "learning_rate": 0.00019713548796163836, "loss": 11.6851, "step": 10973 }, { "epoch": 0.2297161517206732, "grad_norm": 0.3238823115825653, "learning_rate": 0.00019713496691894938, "loss": 11.6931, "step": 10974 }, { "epoch": 0.22973708448463534, "grad_norm": 0.3412425220012665, "learning_rate": 0.00019713444582956572, "loss": 11.6791, "step": 10975 }, { "epoch": 0.2297580172485975, "grad_norm": 0.2148292511701584, "learning_rate": 0.00019713392469348758, "loss": 11.6799, "step": 10976 }, { "epoch": 0.22977895001255966, "grad_norm": 0.23511826992034912, "learning_rate": 0.00019713340351071515, "loss": 11.6723, "step": 10977 }, { "epoch": 0.2297998827765218, "grad_norm": 0.24123573303222656, "learning_rate": 0.00019713288228124883, "loss": 11.6793, "step": 10978 }, { "epoch": 0.22982081554048397, "grad_norm": 0.24258972704410553, "learning_rate": 0.00019713236100508873, "loss": 11.6808, "step": 10979 }, { "epoch": 0.22984174830444612, "grad_norm": 0.2536763846874237, "learning_rate": 0.0001971318396822352, "loss": 11.6789, "step": 10980 }, { "epoch": 0.22986268106840826, "grad_norm": 0.26618248224258423, "learning_rate": 0.00019713131831268843, "loss": 11.6902, "step": 10981 }, { "epoch": 0.22988361383237044, "grad_norm": 0.2898224890232086, "learning_rate": 0.00019713079689644868, "loss": 11.6818, "step": 10982 }, { "epoch": 0.22990454659633258, "grad_norm": 0.28443801403045654, "learning_rate": 0.0001971302754335162, "loss": 11.6865, "step": 10983 }, { "epoch": 0.22992547936029473, "grad_norm": 0.24154570698738098, "learning_rate": 0.00019712975392389127, "loss": 11.6727, "step": 10984 }, { "epoch": 0.2299464121242569, "grad_norm": 0.26749566197395325, "learning_rate": 0.00019712923236757412, "loss": 11.6794, "step": 10985 }, { "epoch": 0.22996734488821904, "grad_norm": 0.24776583909988403, "learning_rate": 0.00019712871076456502, "loss": 11.6641, "step": 10986 }, { "epoch": 0.2299882776521812, "grad_norm": 0.2243122160434723, "learning_rate": 0.00019712818911486415, "loss": 11.6602, "step": 10987 }, { "epoch": 0.23000921041614333, "grad_norm": 0.2237510085105896, "learning_rate": 0.00019712766741847185, "loss": 11.6845, "step": 10988 }, { "epoch": 0.2300301431801055, "grad_norm": 0.27804994583129883, "learning_rate": 0.00019712714567538832, "loss": 11.6767, "step": 10989 }, { "epoch": 0.23005107594406765, "grad_norm": 0.246033176779747, "learning_rate": 0.00019712662388561385, "loss": 11.7003, "step": 10990 }, { "epoch": 0.2300720087080298, "grad_norm": 0.24508549273014069, "learning_rate": 0.00019712610204914864, "loss": 11.6777, "step": 10991 }, { "epoch": 0.23009294147199197, "grad_norm": 0.3594638407230377, "learning_rate": 0.00019712558016599297, "loss": 11.6764, "step": 10992 }, { "epoch": 0.2301138742359541, "grad_norm": 0.22537560760974884, "learning_rate": 0.0001971250582361471, "loss": 11.686, "step": 10993 }, { "epoch": 0.23013480699991626, "grad_norm": 0.31670138239860535, "learning_rate": 0.00019712453625961125, "loss": 11.6947, "step": 10994 }, { "epoch": 0.23015573976387843, "grad_norm": 0.2302142232656479, "learning_rate": 0.00019712401423638572, "loss": 11.6523, "step": 10995 }, { "epoch": 0.23017667252784058, "grad_norm": 0.27924609184265137, "learning_rate": 0.00019712349216647067, "loss": 11.6793, "step": 10996 }, { "epoch": 0.23019760529180272, "grad_norm": 0.24109971523284912, "learning_rate": 0.00019712297004986646, "loss": 11.6811, "step": 10997 }, { "epoch": 0.2302185380557649, "grad_norm": 0.25617045164108276, "learning_rate": 0.00019712244788657328, "loss": 11.6755, "step": 10998 }, { "epoch": 0.23023947081972704, "grad_norm": 0.25401490926742554, "learning_rate": 0.0001971219256765914, "loss": 11.6517, "step": 10999 }, { "epoch": 0.23026040358368918, "grad_norm": 0.27186065912246704, "learning_rate": 0.00019712140341992106, "loss": 11.6713, "step": 11000 }, { "epoch": 0.23026040358368918, "eval_loss": 11.677996635437012, "eval_runtime": 34.293, "eval_samples_per_second": 28.023, "eval_steps_per_second": 7.028, "step": 11000 }, { "epoch": 0.23028133634765136, "grad_norm": 0.23795965313911438, "learning_rate": 0.0001971208811165625, "loss": 11.677, "step": 11001 }, { "epoch": 0.2303022691116135, "grad_norm": 0.3122803866863251, "learning_rate": 0.00019712035876651602, "loss": 11.6843, "step": 11002 }, { "epoch": 0.23032320187557564, "grad_norm": 0.23237201571464539, "learning_rate": 0.0001971198363697818, "loss": 11.6763, "step": 11003 }, { "epoch": 0.23034413463953782, "grad_norm": 0.27443915605545044, "learning_rate": 0.00019711931392636016, "loss": 11.6581, "step": 11004 }, { "epoch": 0.23036506740349996, "grad_norm": 0.25412485003471375, "learning_rate": 0.00019711879143625128, "loss": 11.6696, "step": 11005 }, { "epoch": 0.2303860001674621, "grad_norm": 0.21880590915679932, "learning_rate": 0.00019711826889945547, "loss": 11.6623, "step": 11006 }, { "epoch": 0.23040693293142425, "grad_norm": 0.3134768605232239, "learning_rate": 0.00019711774631597296, "loss": 11.6847, "step": 11007 }, { "epoch": 0.23042786569538642, "grad_norm": 0.37025389075279236, "learning_rate": 0.00019711722368580403, "loss": 11.6893, "step": 11008 }, { "epoch": 0.23044879845934857, "grad_norm": 0.40073907375335693, "learning_rate": 0.0001971167010089489, "loss": 11.6747, "step": 11009 }, { "epoch": 0.23046973122331071, "grad_norm": 0.2263125479221344, "learning_rate": 0.00019711617828540781, "loss": 11.6789, "step": 11010 }, { "epoch": 0.2304906639872729, "grad_norm": 0.2666516602039337, "learning_rate": 0.00019711565551518102, "loss": 11.6787, "step": 11011 }, { "epoch": 0.23051159675123503, "grad_norm": 0.2974736988544464, "learning_rate": 0.00019711513269826878, "loss": 11.6827, "step": 11012 }, { "epoch": 0.23053252951519718, "grad_norm": 0.3703163266181946, "learning_rate": 0.00019711460983467136, "loss": 11.6618, "step": 11013 }, { "epoch": 0.23055346227915935, "grad_norm": 0.25795620679855347, "learning_rate": 0.000197114086924389, "loss": 11.6806, "step": 11014 }, { "epoch": 0.2305743950431215, "grad_norm": 0.22512730956077576, "learning_rate": 0.00019711356396742198, "loss": 11.6609, "step": 11015 }, { "epoch": 0.23059532780708364, "grad_norm": 0.2349899709224701, "learning_rate": 0.00019711304096377047, "loss": 11.6739, "step": 11016 }, { "epoch": 0.2306162605710458, "grad_norm": 0.28314709663391113, "learning_rate": 0.00019711251791343481, "loss": 11.6739, "step": 11017 }, { "epoch": 0.23063719333500796, "grad_norm": 0.267884761095047, "learning_rate": 0.00019711199481641523, "loss": 11.6707, "step": 11018 }, { "epoch": 0.2306581260989701, "grad_norm": 0.2524879276752472, "learning_rate": 0.00019711147167271196, "loss": 11.6585, "step": 11019 }, { "epoch": 0.23067905886293227, "grad_norm": 0.2538243234157562, "learning_rate": 0.00019711094848232522, "loss": 11.668, "step": 11020 }, { "epoch": 0.23069999162689442, "grad_norm": 0.2527152895927429, "learning_rate": 0.00019711042524525537, "loss": 11.6876, "step": 11021 }, { "epoch": 0.23072092439085656, "grad_norm": 0.30761489272117615, "learning_rate": 0.00019710990196150253, "loss": 11.6837, "step": 11022 }, { "epoch": 0.23074185715481874, "grad_norm": 0.3080105781555176, "learning_rate": 0.000197109378631067, "loss": 11.688, "step": 11023 }, { "epoch": 0.23076278991878088, "grad_norm": 0.3279312252998352, "learning_rate": 0.0001971088552539491, "loss": 11.6867, "step": 11024 }, { "epoch": 0.23078372268274303, "grad_norm": 0.26131120324134827, "learning_rate": 0.00019710833183014904, "loss": 11.6736, "step": 11025 }, { "epoch": 0.23080465544670517, "grad_norm": 0.2355901300907135, "learning_rate": 0.00019710780835966701, "loss": 11.6739, "step": 11026 }, { "epoch": 0.23082558821066734, "grad_norm": 0.2420038878917694, "learning_rate": 0.00019710728484250334, "loss": 11.6696, "step": 11027 }, { "epoch": 0.2308465209746295, "grad_norm": 0.2273702472448349, "learning_rate": 0.00019710676127865823, "loss": 11.6696, "step": 11028 }, { "epoch": 0.23086745373859163, "grad_norm": 0.26466137170791626, "learning_rate": 0.00019710623766813195, "loss": 11.6795, "step": 11029 }, { "epoch": 0.2308883865025538, "grad_norm": 0.7639848589897156, "learning_rate": 0.00019710571401092478, "loss": 11.6991, "step": 11030 }, { "epoch": 0.23090931926651595, "grad_norm": 0.22988194227218628, "learning_rate": 0.00019710519030703694, "loss": 11.6714, "step": 11031 }, { "epoch": 0.2309302520304781, "grad_norm": 0.2668343782424927, "learning_rate": 0.0001971046665564687, "loss": 11.6759, "step": 11032 }, { "epoch": 0.23095118479444027, "grad_norm": 0.28256553411483765, "learning_rate": 0.00019710414275922027, "loss": 11.6785, "step": 11033 }, { "epoch": 0.2309721175584024, "grad_norm": 0.2258596569299698, "learning_rate": 0.00019710361891529194, "loss": 11.6852, "step": 11034 }, { "epoch": 0.23099305032236456, "grad_norm": 0.35154491662979126, "learning_rate": 0.000197103095024684, "loss": 11.688, "step": 11035 }, { "epoch": 0.23101398308632673, "grad_norm": 0.2408028244972229, "learning_rate": 0.0001971025710873966, "loss": 11.6834, "step": 11036 }, { "epoch": 0.23103491585028887, "grad_norm": 0.24814456701278687, "learning_rate": 0.00019710204710343007, "loss": 11.6713, "step": 11037 }, { "epoch": 0.23105584861425102, "grad_norm": 0.20047861337661743, "learning_rate": 0.00019710152307278464, "loss": 11.6661, "step": 11038 }, { "epoch": 0.2310767813782132, "grad_norm": 0.31353023648262024, "learning_rate": 0.00019710099899546054, "loss": 11.6791, "step": 11039 }, { "epoch": 0.23109771414217534, "grad_norm": 0.2398127019405365, "learning_rate": 0.0001971004748714581, "loss": 11.68, "step": 11040 }, { "epoch": 0.23111864690613748, "grad_norm": 0.3061933219432831, "learning_rate": 0.00019709995070077747, "loss": 11.6557, "step": 11041 }, { "epoch": 0.23113957967009963, "grad_norm": 0.25416284799575806, "learning_rate": 0.00019709942648341895, "loss": 11.673, "step": 11042 }, { "epoch": 0.2311605124340618, "grad_norm": 0.26947784423828125, "learning_rate": 0.0001970989022193828, "loss": 11.68, "step": 11043 }, { "epoch": 0.23118144519802394, "grad_norm": 0.3027745187282562, "learning_rate": 0.00019709837790866924, "loss": 11.6826, "step": 11044 }, { "epoch": 0.2312023779619861, "grad_norm": 0.2692044675350189, "learning_rate": 0.0001970978535512786, "loss": 11.6844, "step": 11045 }, { "epoch": 0.23122331072594826, "grad_norm": 0.2565951347351074, "learning_rate": 0.000197097329147211, "loss": 11.6717, "step": 11046 }, { "epoch": 0.2312442434899104, "grad_norm": 0.23993253707885742, "learning_rate": 0.00019709680469646683, "loss": 11.673, "step": 11047 }, { "epoch": 0.23126517625387255, "grad_norm": 0.27727606892585754, "learning_rate": 0.00019709628019904627, "loss": 11.6647, "step": 11048 }, { "epoch": 0.23128610901783472, "grad_norm": 0.25538840889930725, "learning_rate": 0.00019709575565494955, "loss": 11.6748, "step": 11049 }, { "epoch": 0.23130704178179687, "grad_norm": 0.27057626843452454, "learning_rate": 0.00019709523106417698, "loss": 11.6786, "step": 11050 }, { "epoch": 0.231327974545759, "grad_norm": 0.3076338469982147, "learning_rate": 0.00019709470642672878, "loss": 11.6959, "step": 11051 }, { "epoch": 0.23134890730972119, "grad_norm": 0.22210641205310822, "learning_rate": 0.0001970941817426052, "loss": 11.6726, "step": 11052 }, { "epoch": 0.23136984007368333, "grad_norm": 0.2586429715156555, "learning_rate": 0.00019709365701180652, "loss": 11.681, "step": 11053 }, { "epoch": 0.23139077283764548, "grad_norm": 0.22560206055641174, "learning_rate": 0.00019709313223433296, "loss": 11.673, "step": 11054 }, { "epoch": 0.23141170560160765, "grad_norm": 0.25833365321159363, "learning_rate": 0.0001970926074101848, "loss": 11.6927, "step": 11055 }, { "epoch": 0.2314326383655698, "grad_norm": 0.2550235688686371, "learning_rate": 0.00019709208253936226, "loss": 11.676, "step": 11056 }, { "epoch": 0.23145357112953194, "grad_norm": 0.26144149899482727, "learning_rate": 0.0001970915576218656, "loss": 11.6824, "step": 11057 }, { "epoch": 0.2314745038934941, "grad_norm": 0.2760274112224579, "learning_rate": 0.00019709103265769514, "loss": 11.6751, "step": 11058 }, { "epoch": 0.23149543665745625, "grad_norm": 0.24891532957553864, "learning_rate": 0.00019709050764685101, "loss": 11.6665, "step": 11059 }, { "epoch": 0.2315163694214184, "grad_norm": 0.19034646451473236, "learning_rate": 0.00019708998258933356, "loss": 11.6591, "step": 11060 }, { "epoch": 0.23153730218538054, "grad_norm": 0.2556886076927185, "learning_rate": 0.000197089457485143, "loss": 11.6702, "step": 11061 }, { "epoch": 0.23155823494934272, "grad_norm": 0.285817950963974, "learning_rate": 0.0001970889323342796, "loss": 11.6752, "step": 11062 }, { "epoch": 0.23157916771330486, "grad_norm": 0.28330856561660767, "learning_rate": 0.00019708840713674358, "loss": 11.7003, "step": 11063 }, { "epoch": 0.231600100477267, "grad_norm": 0.3012486696243286, "learning_rate": 0.00019708788189253524, "loss": 11.6787, "step": 11064 }, { "epoch": 0.23162103324122918, "grad_norm": 0.2204301506280899, "learning_rate": 0.0001970873566016548, "loss": 11.656, "step": 11065 }, { "epoch": 0.23164196600519132, "grad_norm": 0.24164845049381256, "learning_rate": 0.00019708683126410255, "loss": 11.6786, "step": 11066 }, { "epoch": 0.23166289876915347, "grad_norm": 0.24394993484020233, "learning_rate": 0.00019708630587987868, "loss": 11.6656, "step": 11067 }, { "epoch": 0.23168383153311564, "grad_norm": 0.2590658366680145, "learning_rate": 0.0001970857804489835, "loss": 11.6887, "step": 11068 }, { "epoch": 0.2317047642970778, "grad_norm": 0.2628190517425537, "learning_rate": 0.00019708525497141724, "loss": 11.6741, "step": 11069 }, { "epoch": 0.23172569706103993, "grad_norm": 0.23821628093719482, "learning_rate": 0.00019708472944718013, "loss": 11.6826, "step": 11070 }, { "epoch": 0.2317466298250021, "grad_norm": 0.2528453767299652, "learning_rate": 0.00019708420387627243, "loss": 11.6652, "step": 11071 }, { "epoch": 0.23176756258896425, "grad_norm": 0.25162920355796814, "learning_rate": 0.00019708367825869446, "loss": 11.6771, "step": 11072 }, { "epoch": 0.2317884953529264, "grad_norm": 0.3199084401130676, "learning_rate": 0.00019708315259444638, "loss": 11.6735, "step": 11073 }, { "epoch": 0.23180942811688857, "grad_norm": 0.2599825859069824, "learning_rate": 0.0001970826268835285, "loss": 11.6712, "step": 11074 }, { "epoch": 0.2318303608808507, "grad_norm": 0.25822219252586365, "learning_rate": 0.00019708210112594104, "loss": 11.6754, "step": 11075 }, { "epoch": 0.23185129364481286, "grad_norm": 0.19769765436649323, "learning_rate": 0.0001970815753216843, "loss": 11.6688, "step": 11076 }, { "epoch": 0.23187222640877503, "grad_norm": 0.22478200495243073, "learning_rate": 0.00019708104947075848, "loss": 11.6815, "step": 11077 }, { "epoch": 0.23189315917273717, "grad_norm": 0.2521090805530548, "learning_rate": 0.00019708052357316386, "loss": 11.6718, "step": 11078 }, { "epoch": 0.23191409193669932, "grad_norm": 0.2003510296344757, "learning_rate": 0.00019707999762890065, "loss": 11.6666, "step": 11079 }, { "epoch": 0.23193502470066146, "grad_norm": 0.3247688412666321, "learning_rate": 0.00019707947163796917, "loss": 11.6762, "step": 11080 }, { "epoch": 0.23195595746462364, "grad_norm": 0.2677929699420929, "learning_rate": 0.00019707894560036965, "loss": 11.6675, "step": 11081 }, { "epoch": 0.23197689022858578, "grad_norm": 0.2572006285190582, "learning_rate": 0.00019707841951610236, "loss": 11.6783, "step": 11082 }, { "epoch": 0.23199782299254793, "grad_norm": 0.25098955631256104, "learning_rate": 0.00019707789338516748, "loss": 11.6939, "step": 11083 }, { "epoch": 0.2320187557565101, "grad_norm": 0.32924512028694153, "learning_rate": 0.00019707736720756534, "loss": 11.6699, "step": 11084 }, { "epoch": 0.23203968852047224, "grad_norm": 0.28623342514038086, "learning_rate": 0.00019707684098329615, "loss": 11.668, "step": 11085 }, { "epoch": 0.2320606212844344, "grad_norm": 0.28895968198776245, "learning_rate": 0.00019707631471236017, "loss": 11.6823, "step": 11086 }, { "epoch": 0.23208155404839656, "grad_norm": 0.21087834239006042, "learning_rate": 0.0001970757883947577, "loss": 11.6825, "step": 11087 }, { "epoch": 0.2321024868123587, "grad_norm": 0.31932345032691956, "learning_rate": 0.0001970752620304889, "loss": 11.6619, "step": 11088 }, { "epoch": 0.23212341957632085, "grad_norm": 0.27325230836868286, "learning_rate": 0.00019707473561955409, "loss": 11.6793, "step": 11089 }, { "epoch": 0.23214435234028302, "grad_norm": 0.28029170632362366, "learning_rate": 0.00019707420916195354, "loss": 11.6848, "step": 11090 }, { "epoch": 0.23216528510424517, "grad_norm": 0.2546239495277405, "learning_rate": 0.00019707368265768746, "loss": 11.6641, "step": 11091 }, { "epoch": 0.2321862178682073, "grad_norm": 0.2681523561477661, "learning_rate": 0.0001970731561067561, "loss": 11.6791, "step": 11092 }, { "epoch": 0.23220715063216948, "grad_norm": 0.2387666255235672, "learning_rate": 0.00019707262950915974, "loss": 11.6791, "step": 11093 }, { "epoch": 0.23222808339613163, "grad_norm": 0.25653505325317383, "learning_rate": 0.0001970721028648986, "loss": 11.675, "step": 11094 }, { "epoch": 0.23224901616009377, "grad_norm": 2.195314407348633, "learning_rate": 0.00019707157617397298, "loss": 11.6517, "step": 11095 }, { "epoch": 0.23226994892405592, "grad_norm": 0.24716754257678986, "learning_rate": 0.0001970710494363831, "loss": 11.6777, "step": 11096 }, { "epoch": 0.2322908816880181, "grad_norm": 0.2517777383327484, "learning_rate": 0.0001970705226521292, "loss": 11.6852, "step": 11097 }, { "epoch": 0.23231181445198024, "grad_norm": 0.24126864969730377, "learning_rate": 0.0001970699958212116, "loss": 11.665, "step": 11098 }, { "epoch": 0.23233274721594238, "grad_norm": 0.23340652883052826, "learning_rate": 0.00019706946894363048, "loss": 11.666, "step": 11099 }, { "epoch": 0.23235367997990455, "grad_norm": 0.21868734061717987, "learning_rate": 0.00019706894201938614, "loss": 11.6858, "step": 11100 }, { "epoch": 0.2323746127438667, "grad_norm": 0.32117462158203125, "learning_rate": 0.00019706841504847877, "loss": 11.6828, "step": 11101 }, { "epoch": 0.23239554550782884, "grad_norm": 0.2214663326740265, "learning_rate": 0.0001970678880309087, "loss": 11.6596, "step": 11102 }, { "epoch": 0.23241647827179102, "grad_norm": 0.2509472072124481, "learning_rate": 0.00019706736096667616, "loss": 11.6688, "step": 11103 }, { "epoch": 0.23243741103575316, "grad_norm": 0.25625351071357727, "learning_rate": 0.00019706683385578138, "loss": 11.6917, "step": 11104 }, { "epoch": 0.2324583437997153, "grad_norm": 0.2525162994861603, "learning_rate": 0.00019706630669822462, "loss": 11.678, "step": 11105 }, { "epoch": 0.23247927656367748, "grad_norm": 0.23143315315246582, "learning_rate": 0.00019706577949400616, "loss": 11.6707, "step": 11106 }, { "epoch": 0.23250020932763962, "grad_norm": 4.480614185333252, "learning_rate": 0.00019706525224312622, "loss": 11.6658, "step": 11107 }, { "epoch": 0.23252114209160177, "grad_norm": 0.23007571697235107, "learning_rate": 0.00019706472494558508, "loss": 11.6788, "step": 11108 }, { "epoch": 0.23254207485556394, "grad_norm": 0.3012748062610626, "learning_rate": 0.00019706419760138298, "loss": 11.68, "step": 11109 }, { "epoch": 0.23256300761952609, "grad_norm": 0.22728465497493744, "learning_rate": 0.00019706367021052015, "loss": 11.6868, "step": 11110 }, { "epoch": 0.23258394038348823, "grad_norm": 0.34121859073638916, "learning_rate": 0.00019706314277299688, "loss": 11.6855, "step": 11111 }, { "epoch": 0.2326048731474504, "grad_norm": 0.24435319006443024, "learning_rate": 0.00019706261528881343, "loss": 11.6716, "step": 11112 }, { "epoch": 0.23262580591141255, "grad_norm": 0.2753197252750397, "learning_rate": 0.00019706208775797004, "loss": 11.682, "step": 11113 }, { "epoch": 0.2326467386753747, "grad_norm": 0.29352137446403503, "learning_rate": 0.00019706156018046693, "loss": 11.6825, "step": 11114 }, { "epoch": 0.23266767143933684, "grad_norm": 0.24130666255950928, "learning_rate": 0.0001970610325563044, "loss": 11.6789, "step": 11115 }, { "epoch": 0.232688604203299, "grad_norm": 0.23746664822101593, "learning_rate": 0.0001970605048854827, "loss": 11.6701, "step": 11116 }, { "epoch": 0.23270953696726115, "grad_norm": 0.24681760370731354, "learning_rate": 0.00019705997716800203, "loss": 11.6828, "step": 11117 }, { "epoch": 0.2327304697312233, "grad_norm": 0.30396509170532227, "learning_rate": 0.00019705944940386273, "loss": 11.6797, "step": 11118 }, { "epoch": 0.23275140249518547, "grad_norm": 0.23344486951828003, "learning_rate": 0.00019705892159306497, "loss": 11.6829, "step": 11119 }, { "epoch": 0.23277233525914762, "grad_norm": 0.25031477212905884, "learning_rate": 0.00019705839373560907, "loss": 11.6668, "step": 11120 }, { "epoch": 0.23279326802310976, "grad_norm": 0.24051980674266815, "learning_rate": 0.00019705786583149523, "loss": 11.6633, "step": 11121 }, { "epoch": 0.23281420078707193, "grad_norm": 0.2389899045228958, "learning_rate": 0.00019705733788072375, "loss": 11.6924, "step": 11122 }, { "epoch": 0.23283513355103408, "grad_norm": 0.25189757347106934, "learning_rate": 0.00019705680988329487, "loss": 11.6802, "step": 11123 }, { "epoch": 0.23285606631499622, "grad_norm": 0.3323768079280853, "learning_rate": 0.00019705628183920882, "loss": 11.6734, "step": 11124 }, { "epoch": 0.2328769990789584, "grad_norm": 0.2812141478061676, "learning_rate": 0.00019705575374846587, "loss": 11.6826, "step": 11125 }, { "epoch": 0.23289793184292054, "grad_norm": 0.2402249574661255, "learning_rate": 0.00019705522561106625, "loss": 11.6729, "step": 11126 }, { "epoch": 0.2329188646068827, "grad_norm": 0.24530963599681854, "learning_rate": 0.00019705469742701027, "loss": 11.688, "step": 11127 }, { "epoch": 0.23293979737084486, "grad_norm": 0.2566646337509155, "learning_rate": 0.00019705416919629817, "loss": 11.6708, "step": 11128 }, { "epoch": 0.232960730134807, "grad_norm": 0.21974165737628937, "learning_rate": 0.00019705364091893014, "loss": 11.68, "step": 11129 }, { "epoch": 0.23298166289876915, "grad_norm": 0.2531284689903259, "learning_rate": 0.00019705311259490647, "loss": 11.6787, "step": 11130 }, { "epoch": 0.2330025956627313, "grad_norm": 0.2702864408493042, "learning_rate": 0.00019705258422422748, "loss": 11.6889, "step": 11131 }, { "epoch": 0.23302352842669347, "grad_norm": 0.2448788583278656, "learning_rate": 0.00019705205580689334, "loss": 11.6657, "step": 11132 }, { "epoch": 0.2330444611906556, "grad_norm": 0.22675445675849915, "learning_rate": 0.00019705152734290432, "loss": 11.6831, "step": 11133 }, { "epoch": 0.23306539395461776, "grad_norm": 0.3158706724643707, "learning_rate": 0.00019705099883226074, "loss": 11.6747, "step": 11134 }, { "epoch": 0.23308632671857993, "grad_norm": 0.24747152626514435, "learning_rate": 0.00019705047027496276, "loss": 11.6949, "step": 11135 }, { "epoch": 0.23310725948254207, "grad_norm": 0.2428964525461197, "learning_rate": 0.00019704994167101065, "loss": 11.6561, "step": 11136 }, { "epoch": 0.23312819224650422, "grad_norm": 0.2376512736082077, "learning_rate": 0.00019704941302040472, "loss": 11.6648, "step": 11137 }, { "epoch": 0.2331491250104664, "grad_norm": 0.2717027962207794, "learning_rate": 0.0001970488843231452, "loss": 11.6645, "step": 11138 }, { "epoch": 0.23317005777442854, "grad_norm": 0.30698516964912415, "learning_rate": 0.00019704835557923228, "loss": 11.6841, "step": 11139 }, { "epoch": 0.23319099053839068, "grad_norm": 0.26193946599960327, "learning_rate": 0.0001970478267886663, "loss": 11.684, "step": 11140 }, { "epoch": 0.23321192330235285, "grad_norm": 0.24151386320590973, "learning_rate": 0.00019704729795144753, "loss": 11.6657, "step": 11141 }, { "epoch": 0.233232856066315, "grad_norm": 0.29323136806488037, "learning_rate": 0.00019704676906757613, "loss": 11.6902, "step": 11142 }, { "epoch": 0.23325378883027714, "grad_norm": 0.24713453650474548, "learning_rate": 0.00019704624013705245, "loss": 11.6787, "step": 11143 }, { "epoch": 0.23327472159423931, "grad_norm": 0.2572205364704132, "learning_rate": 0.00019704571115987664, "loss": 11.6772, "step": 11144 }, { "epoch": 0.23329565435820146, "grad_norm": 0.21886371076107025, "learning_rate": 0.00019704518213604902, "loss": 11.6792, "step": 11145 }, { "epoch": 0.2333165871221636, "grad_norm": 0.3343595266342163, "learning_rate": 0.00019704465306556986, "loss": 11.69, "step": 11146 }, { "epoch": 0.23333751988612578, "grad_norm": 0.2263149917125702, "learning_rate": 0.00019704412394843938, "loss": 11.6599, "step": 11147 }, { "epoch": 0.23335845265008792, "grad_norm": 0.24698394536972046, "learning_rate": 0.00019704359478465788, "loss": 11.6739, "step": 11148 }, { "epoch": 0.23337938541405007, "grad_norm": 0.2636972665786743, "learning_rate": 0.00019704306557422554, "loss": 11.6846, "step": 11149 }, { "epoch": 0.2334003181780122, "grad_norm": 0.2134024202823639, "learning_rate": 0.00019704253631714263, "loss": 11.6873, "step": 11150 }, { "epoch": 0.23342125094197438, "grad_norm": 0.2771439850330353, "learning_rate": 0.0001970420070134095, "loss": 11.6787, "step": 11151 }, { "epoch": 0.23344218370593653, "grad_norm": 0.25307896733283997, "learning_rate": 0.00019704147766302628, "loss": 11.6931, "step": 11152 }, { "epoch": 0.23346311646989867, "grad_norm": 0.19074249267578125, "learning_rate": 0.00019704094826599329, "loss": 11.6833, "step": 11153 }, { "epoch": 0.23348404923386085, "grad_norm": 0.23551462590694427, "learning_rate": 0.00019704041882231076, "loss": 11.6746, "step": 11154 }, { "epoch": 0.233504981997823, "grad_norm": 0.24390031397342682, "learning_rate": 0.00019703988933197897, "loss": 11.6743, "step": 11155 }, { "epoch": 0.23352591476178514, "grad_norm": 0.23711033165454865, "learning_rate": 0.00019703935979499817, "loss": 11.6827, "step": 11156 }, { "epoch": 0.2335468475257473, "grad_norm": 0.22427257895469666, "learning_rate": 0.00019703883021136858, "loss": 11.6697, "step": 11157 }, { "epoch": 0.23356778028970945, "grad_norm": 0.2061341106891632, "learning_rate": 0.0001970383005810905, "loss": 11.6904, "step": 11158 }, { "epoch": 0.2335887130536716, "grad_norm": 0.3130512535572052, "learning_rate": 0.00019703777090416416, "loss": 11.6698, "step": 11159 }, { "epoch": 0.23360964581763377, "grad_norm": 0.2526877820491791, "learning_rate": 0.00019703724118058983, "loss": 11.6583, "step": 11160 }, { "epoch": 0.23363057858159592, "grad_norm": 0.23244504630565643, "learning_rate": 0.00019703671141036772, "loss": 11.6679, "step": 11161 }, { "epoch": 0.23365151134555806, "grad_norm": 0.24317598342895508, "learning_rate": 0.00019703618159349812, "loss": 11.6645, "step": 11162 }, { "epoch": 0.23367244410952023, "grad_norm": 0.2517319917678833, "learning_rate": 0.00019703565172998132, "loss": 11.669, "step": 11163 }, { "epoch": 0.23369337687348238, "grad_norm": 0.2469409704208374, "learning_rate": 0.00019703512181981753, "loss": 11.6647, "step": 11164 }, { "epoch": 0.23371430963744452, "grad_norm": 0.22875431180000305, "learning_rate": 0.000197034591863007, "loss": 11.6833, "step": 11165 }, { "epoch": 0.2337352424014067, "grad_norm": 0.2749752402305603, "learning_rate": 0.00019703406185955, "loss": 11.6609, "step": 11166 }, { "epoch": 0.23375617516536884, "grad_norm": 0.27557173371315, "learning_rate": 0.00019703353180944674, "loss": 11.6782, "step": 11167 }, { "epoch": 0.23377710792933099, "grad_norm": 0.2129225730895996, "learning_rate": 0.00019703300171269755, "loss": 11.6672, "step": 11168 }, { "epoch": 0.23379804069329313, "grad_norm": 0.2562906742095947, "learning_rate": 0.00019703247156930268, "loss": 11.6795, "step": 11169 }, { "epoch": 0.2338189734572553, "grad_norm": 0.23510535061359406, "learning_rate": 0.00019703194137926232, "loss": 11.6785, "step": 11170 }, { "epoch": 0.23383990622121745, "grad_norm": 0.21926794946193695, "learning_rate": 0.0001970314111425768, "loss": 11.6878, "step": 11171 }, { "epoch": 0.2338608389851796, "grad_norm": 0.1936304271221161, "learning_rate": 0.0001970308808592463, "loss": 11.6713, "step": 11172 }, { "epoch": 0.23388177174914176, "grad_norm": 0.2594984173774719, "learning_rate": 0.0001970303505292711, "loss": 11.6674, "step": 11173 }, { "epoch": 0.2339027045131039, "grad_norm": 0.2609492242336273, "learning_rate": 0.0001970298201526515, "loss": 11.6878, "step": 11174 }, { "epoch": 0.23392363727706605, "grad_norm": 0.2441910356283188, "learning_rate": 0.0001970292897293877, "loss": 11.6682, "step": 11175 }, { "epoch": 0.23394457004102823, "grad_norm": 0.30060097575187683, "learning_rate": 0.00019702875925948, "loss": 11.6708, "step": 11176 }, { "epoch": 0.23396550280499037, "grad_norm": 0.2206157147884369, "learning_rate": 0.00019702822874292862, "loss": 11.6726, "step": 11177 }, { "epoch": 0.23398643556895252, "grad_norm": 0.25788870453834534, "learning_rate": 0.00019702769817973382, "loss": 11.6728, "step": 11178 }, { "epoch": 0.2340073683329147, "grad_norm": 0.3000008761882782, "learning_rate": 0.00019702716756989585, "loss": 11.6832, "step": 11179 }, { "epoch": 0.23402830109687683, "grad_norm": 0.3405551612377167, "learning_rate": 0.000197026636913415, "loss": 11.6958, "step": 11180 }, { "epoch": 0.23404923386083898, "grad_norm": 0.2479729950428009, "learning_rate": 0.00019702610621029148, "loss": 11.678, "step": 11181 }, { "epoch": 0.23407016662480115, "grad_norm": 0.26917514204978943, "learning_rate": 0.0001970255754605256, "loss": 11.6708, "step": 11182 }, { "epoch": 0.2340910993887633, "grad_norm": 0.24124540388584137, "learning_rate": 0.00019702504466411754, "loss": 11.6816, "step": 11183 }, { "epoch": 0.23411203215272544, "grad_norm": 0.28243303298950195, "learning_rate": 0.0001970245138210676, "loss": 11.6867, "step": 11184 }, { "epoch": 0.23413296491668759, "grad_norm": 0.257778525352478, "learning_rate": 0.00019702398293137605, "loss": 11.6738, "step": 11185 }, { "epoch": 0.23415389768064976, "grad_norm": 0.253825306892395, "learning_rate": 0.00019702345199504312, "loss": 11.6833, "step": 11186 }, { "epoch": 0.2341748304446119, "grad_norm": 0.20740976929664612, "learning_rate": 0.00019702292101206909, "loss": 11.6762, "step": 11187 }, { "epoch": 0.23419576320857405, "grad_norm": 0.27704545855522156, "learning_rate": 0.00019702238998245417, "loss": 11.6748, "step": 11188 }, { "epoch": 0.23421669597253622, "grad_norm": 0.28313514590263367, "learning_rate": 0.00019702185890619863, "loss": 11.6893, "step": 11189 }, { "epoch": 0.23423762873649837, "grad_norm": 0.20284722745418549, "learning_rate": 0.0001970213277833028, "loss": 11.6637, "step": 11190 }, { "epoch": 0.2342585615004605, "grad_norm": 0.23380278050899506, "learning_rate": 0.00019702079661376682, "loss": 11.6785, "step": 11191 }, { "epoch": 0.23427949426442268, "grad_norm": 0.3099060356616974, "learning_rate": 0.000197020265397591, "loss": 11.683, "step": 11192 }, { "epoch": 0.23430042702838483, "grad_norm": 0.34935906529426575, "learning_rate": 0.00019701973413477564, "loss": 11.678, "step": 11193 }, { "epoch": 0.23432135979234697, "grad_norm": 0.2302612066268921, "learning_rate": 0.0001970192028253209, "loss": 11.6838, "step": 11194 }, { "epoch": 0.23434229255630915, "grad_norm": 0.2935968041419983, "learning_rate": 0.00019701867146922712, "loss": 11.6764, "step": 11195 }, { "epoch": 0.2343632253202713, "grad_norm": 0.2874067723751068, "learning_rate": 0.0001970181400664945, "loss": 11.6643, "step": 11196 }, { "epoch": 0.23438415808423343, "grad_norm": 0.34363868832588196, "learning_rate": 0.00019701760861712332, "loss": 11.6714, "step": 11197 }, { "epoch": 0.2344050908481956, "grad_norm": 0.20662780106067657, "learning_rate": 0.00019701707712111382, "loss": 11.6693, "step": 11198 }, { "epoch": 0.23442602361215775, "grad_norm": 0.2637676000595093, "learning_rate": 0.00019701654557846627, "loss": 11.6775, "step": 11199 }, { "epoch": 0.2344469563761199, "grad_norm": 0.22815701365470886, "learning_rate": 0.00019701601398918096, "loss": 11.678, "step": 11200 }, { "epoch": 0.23446788914008207, "grad_norm": 0.280678391456604, "learning_rate": 0.00019701548235325807, "loss": 11.6789, "step": 11201 }, { "epoch": 0.23448882190404421, "grad_norm": 0.22749413549900055, "learning_rate": 0.0001970149506706979, "loss": 11.6814, "step": 11202 }, { "epoch": 0.23450975466800636, "grad_norm": 0.2582281529903412, "learning_rate": 0.0001970144189415007, "loss": 11.6824, "step": 11203 }, { "epoch": 0.2345306874319685, "grad_norm": 0.3035053610801697, "learning_rate": 0.00019701388716566673, "loss": 11.6649, "step": 11204 }, { "epoch": 0.23455162019593068, "grad_norm": 0.29247891902923584, "learning_rate": 0.00019701335534319623, "loss": 11.6592, "step": 11205 }, { "epoch": 0.23457255295989282, "grad_norm": 0.32004162669181824, "learning_rate": 0.00019701282347408947, "loss": 11.6701, "step": 11206 }, { "epoch": 0.23459348572385497, "grad_norm": 0.22570857405662537, "learning_rate": 0.0001970122915583467, "loss": 11.6838, "step": 11207 }, { "epoch": 0.23461441848781714, "grad_norm": 0.24942371249198914, "learning_rate": 0.00019701175959596818, "loss": 11.6801, "step": 11208 }, { "epoch": 0.23463535125177928, "grad_norm": 0.22722500562667847, "learning_rate": 0.0001970112275869542, "loss": 11.6752, "step": 11209 }, { "epoch": 0.23465628401574143, "grad_norm": 0.5124601125717163, "learning_rate": 0.0001970106955313049, "loss": 11.6928, "step": 11210 }, { "epoch": 0.2346772167797036, "grad_norm": 0.4231942296028137, "learning_rate": 0.00019701016342902068, "loss": 11.7066, "step": 11211 }, { "epoch": 0.23469814954366575, "grad_norm": 0.3175663650035858, "learning_rate": 0.0001970096312801017, "loss": 11.6943, "step": 11212 }, { "epoch": 0.2347190823076279, "grad_norm": 0.2440042644739151, "learning_rate": 0.00019700909908454826, "loss": 11.686, "step": 11213 }, { "epoch": 0.23474001507159006, "grad_norm": 0.26051756739616394, "learning_rate": 0.0001970085668423606, "loss": 11.6715, "step": 11214 }, { "epoch": 0.2347609478355522, "grad_norm": 0.27963951230049133, "learning_rate": 0.00019700803455353897, "loss": 11.6704, "step": 11215 }, { "epoch": 0.23478188059951435, "grad_norm": 0.2548452615737915, "learning_rate": 0.00019700750221808363, "loss": 11.6646, "step": 11216 }, { "epoch": 0.23480281336347653, "grad_norm": 0.2358669638633728, "learning_rate": 0.00019700696983599483, "loss": 11.6796, "step": 11217 }, { "epoch": 0.23482374612743867, "grad_norm": 0.28316566348075867, "learning_rate": 0.00019700643740727287, "loss": 11.6785, "step": 11218 }, { "epoch": 0.23484467889140082, "grad_norm": 0.24695594608783722, "learning_rate": 0.00019700590493191796, "loss": 11.6809, "step": 11219 }, { "epoch": 0.23486561165536296, "grad_norm": 0.22464898228645325, "learning_rate": 0.00019700537240993033, "loss": 11.6716, "step": 11220 }, { "epoch": 0.23488654441932513, "grad_norm": 0.31831222772598267, "learning_rate": 0.00019700483984131032, "loss": 11.6818, "step": 11221 }, { "epoch": 0.23490747718328728, "grad_norm": 0.24842609465122223, "learning_rate": 0.00019700430722605812, "loss": 11.6819, "step": 11222 }, { "epoch": 0.23492840994724942, "grad_norm": 0.21396265923976898, "learning_rate": 0.00019700377456417401, "loss": 11.6898, "step": 11223 }, { "epoch": 0.2349493427112116, "grad_norm": 0.21298356354236603, "learning_rate": 0.00019700324185565826, "loss": 11.669, "step": 11224 }, { "epoch": 0.23497027547517374, "grad_norm": 0.23252035677433014, "learning_rate": 0.00019700270910051105, "loss": 11.6804, "step": 11225 }, { "epoch": 0.23499120823913588, "grad_norm": 0.3353831470012665, "learning_rate": 0.00019700217629873277, "loss": 11.6798, "step": 11226 }, { "epoch": 0.23501214100309806, "grad_norm": 0.3132203221321106, "learning_rate": 0.00019700164345032355, "loss": 11.6865, "step": 11227 }, { "epoch": 0.2350330737670602, "grad_norm": 0.23381108045578003, "learning_rate": 0.0001970011105552837, "loss": 11.6729, "step": 11228 }, { "epoch": 0.23505400653102235, "grad_norm": 0.21820232272148132, "learning_rate": 0.0001970005776136135, "loss": 11.6789, "step": 11229 }, { "epoch": 0.23507493929498452, "grad_norm": 0.2013746052980423, "learning_rate": 0.00019700004462531315, "loss": 11.6825, "step": 11230 }, { "epoch": 0.23509587205894666, "grad_norm": 0.3152814507484436, "learning_rate": 0.00019699951159038293, "loss": 11.6794, "step": 11231 }, { "epoch": 0.2351168048229088, "grad_norm": 0.22495156526565552, "learning_rate": 0.00019699897850882314, "loss": 11.6718, "step": 11232 }, { "epoch": 0.23513773758687098, "grad_norm": 0.28409531712532043, "learning_rate": 0.00019699844538063394, "loss": 11.6777, "step": 11233 }, { "epoch": 0.23515867035083313, "grad_norm": 0.22361715137958527, "learning_rate": 0.00019699791220581568, "loss": 11.676, "step": 11234 }, { "epoch": 0.23517960311479527, "grad_norm": 0.27240198850631714, "learning_rate": 0.0001969973789843686, "loss": 11.6723, "step": 11235 }, { "epoch": 0.23520053587875744, "grad_norm": 0.2543618083000183, "learning_rate": 0.0001969968457162929, "loss": 11.6873, "step": 11236 }, { "epoch": 0.2352214686427196, "grad_norm": 0.2534591555595398, "learning_rate": 0.00019699631240158887, "loss": 11.6612, "step": 11237 }, { "epoch": 0.23524240140668173, "grad_norm": 0.2337152659893036, "learning_rate": 0.00019699577904025678, "loss": 11.6717, "step": 11238 }, { "epoch": 0.23526333417064388, "grad_norm": 0.2143351286649704, "learning_rate": 0.00019699524563229689, "loss": 11.6599, "step": 11239 }, { "epoch": 0.23528426693460605, "grad_norm": 0.2200196087360382, "learning_rate": 0.00019699471217770942, "loss": 11.6617, "step": 11240 }, { "epoch": 0.2353051996985682, "grad_norm": 0.3606988787651062, "learning_rate": 0.00019699417867649467, "loss": 11.6982, "step": 11241 }, { "epoch": 0.23532613246253034, "grad_norm": 0.3269689679145813, "learning_rate": 0.00019699364512865285, "loss": 11.6668, "step": 11242 }, { "epoch": 0.2353470652264925, "grad_norm": 0.2728709280490875, "learning_rate": 0.00019699311153418428, "loss": 11.6792, "step": 11243 }, { "epoch": 0.23536799799045466, "grad_norm": 0.23258472979068756, "learning_rate": 0.00019699257789308915, "loss": 11.6537, "step": 11244 }, { "epoch": 0.2353889307544168, "grad_norm": 0.2665190100669861, "learning_rate": 0.00019699204420536772, "loss": 11.6728, "step": 11245 }, { "epoch": 0.23540986351837898, "grad_norm": 0.3351247310638428, "learning_rate": 0.0001969915104710203, "loss": 11.6798, "step": 11246 }, { "epoch": 0.23543079628234112, "grad_norm": 0.28132861852645874, "learning_rate": 0.0001969909766900471, "loss": 11.6804, "step": 11247 }, { "epoch": 0.23545172904630327, "grad_norm": 0.24205632507801056, "learning_rate": 0.0001969904428624484, "loss": 11.6718, "step": 11248 }, { "epoch": 0.23547266181026544, "grad_norm": 0.33969569206237793, "learning_rate": 0.00019698990898822446, "loss": 11.6752, "step": 11249 }, { "epoch": 0.23549359457422758, "grad_norm": 0.2922504246234894, "learning_rate": 0.00019698937506737555, "loss": 11.6853, "step": 11250 }, { "epoch": 0.23551452733818973, "grad_norm": 0.31805360317230225, "learning_rate": 0.00019698884109990185, "loss": 11.6867, "step": 11251 }, { "epoch": 0.2355354601021519, "grad_norm": 0.25620290637016296, "learning_rate": 0.00019698830708580372, "loss": 11.6697, "step": 11252 }, { "epoch": 0.23555639286611404, "grad_norm": 0.2953740060329437, "learning_rate": 0.00019698777302508134, "loss": 11.6859, "step": 11253 }, { "epoch": 0.2355773256300762, "grad_norm": 0.23774704337120056, "learning_rate": 0.00019698723891773502, "loss": 11.6642, "step": 11254 }, { "epoch": 0.23559825839403836, "grad_norm": 0.252172589302063, "learning_rate": 0.00019698670476376494, "loss": 11.6761, "step": 11255 }, { "epoch": 0.2356191911580005, "grad_norm": 0.22900554537773132, "learning_rate": 0.00019698617056317143, "loss": 11.6772, "step": 11256 }, { "epoch": 0.23564012392196265, "grad_norm": 0.2303926646709442, "learning_rate": 0.00019698563631595476, "loss": 11.6598, "step": 11257 }, { "epoch": 0.2356610566859248, "grad_norm": 0.25422072410583496, "learning_rate": 0.0001969851020221151, "loss": 11.6694, "step": 11258 }, { "epoch": 0.23568198944988697, "grad_norm": 0.2822806239128113, "learning_rate": 0.00019698456768165275, "loss": 11.6951, "step": 11259 }, { "epoch": 0.23570292221384911, "grad_norm": 0.24763508141040802, "learning_rate": 0.00019698403329456804, "loss": 11.6697, "step": 11260 }, { "epoch": 0.23572385497781126, "grad_norm": 0.33466899394989014, "learning_rate": 0.00019698349886086112, "loss": 11.6881, "step": 11261 }, { "epoch": 0.23574478774177343, "grad_norm": 0.2872779071331024, "learning_rate": 0.0001969829643805323, "loss": 11.6822, "step": 11262 }, { "epoch": 0.23576572050573558, "grad_norm": 0.2642299234867096, "learning_rate": 0.00019698242985358183, "loss": 11.67, "step": 11263 }, { "epoch": 0.23578665326969772, "grad_norm": 0.3048262596130371, "learning_rate": 0.00019698189528000995, "loss": 11.7015, "step": 11264 }, { "epoch": 0.2358075860336599, "grad_norm": 0.25879135727882385, "learning_rate": 0.00019698136065981692, "loss": 11.6665, "step": 11265 }, { "epoch": 0.23582851879762204, "grad_norm": 0.2963217794895172, "learning_rate": 0.000196980825993003, "loss": 11.6982, "step": 11266 }, { "epoch": 0.23584945156158418, "grad_norm": 0.42995113134384155, "learning_rate": 0.0001969802912795685, "loss": 11.6751, "step": 11267 }, { "epoch": 0.23587038432554636, "grad_norm": 0.20969319343566895, "learning_rate": 0.0001969797565195136, "loss": 11.6745, "step": 11268 }, { "epoch": 0.2358913170895085, "grad_norm": 0.2696808874607086, "learning_rate": 0.00019697922171283858, "loss": 11.6752, "step": 11269 }, { "epoch": 0.23591224985347065, "grad_norm": 0.3603758215904236, "learning_rate": 0.0001969786868595437, "loss": 11.6648, "step": 11270 }, { "epoch": 0.23593318261743282, "grad_norm": 0.3072526454925537, "learning_rate": 0.00019697815195962926, "loss": 11.6663, "step": 11271 }, { "epoch": 0.23595411538139496, "grad_norm": 0.2757617235183716, "learning_rate": 0.00019697761701309544, "loss": 11.686, "step": 11272 }, { "epoch": 0.2359750481453571, "grad_norm": 0.25690123438835144, "learning_rate": 0.00019697708201994255, "loss": 11.6665, "step": 11273 }, { "epoch": 0.23599598090931925, "grad_norm": 0.23015443980693817, "learning_rate": 0.00019697654698017084, "loss": 11.6633, "step": 11274 }, { "epoch": 0.23601691367328143, "grad_norm": 0.23617738485336304, "learning_rate": 0.00019697601189378055, "loss": 11.6626, "step": 11275 }, { "epoch": 0.23603784643724357, "grad_norm": 0.2980000078678131, "learning_rate": 0.00019697547676077195, "loss": 11.6805, "step": 11276 }, { "epoch": 0.23605877920120572, "grad_norm": 0.27642685174942017, "learning_rate": 0.00019697494158114532, "loss": 11.6717, "step": 11277 }, { "epoch": 0.2360797119651679, "grad_norm": 0.23997485637664795, "learning_rate": 0.00019697440635490083, "loss": 11.6921, "step": 11278 }, { "epoch": 0.23610064472913003, "grad_norm": 0.24458757042884827, "learning_rate": 0.00019697387108203883, "loss": 11.6817, "step": 11279 }, { "epoch": 0.23612157749309218, "grad_norm": 0.3180442452430725, "learning_rate": 0.00019697333576255957, "loss": 11.6928, "step": 11280 }, { "epoch": 0.23614251025705435, "grad_norm": 0.25730350613594055, "learning_rate": 0.00019697280039646326, "loss": 11.6677, "step": 11281 }, { "epoch": 0.2361634430210165, "grad_norm": 0.3264119029045105, "learning_rate": 0.0001969722649837502, "loss": 11.6824, "step": 11282 }, { "epoch": 0.23618437578497864, "grad_norm": 0.2827482223510742, "learning_rate": 0.00019697172952442063, "loss": 11.6792, "step": 11283 }, { "epoch": 0.2362053085489408, "grad_norm": 0.25616076588630676, "learning_rate": 0.00019697119401847478, "loss": 11.6689, "step": 11284 }, { "epoch": 0.23622624131290296, "grad_norm": 0.21659521758556366, "learning_rate": 0.00019697065846591296, "loss": 11.681, "step": 11285 }, { "epoch": 0.2362471740768651, "grad_norm": 0.22358185052871704, "learning_rate": 0.0001969701228667354, "loss": 11.6831, "step": 11286 }, { "epoch": 0.23626810684082727, "grad_norm": 0.2685059905052185, "learning_rate": 0.00019696958722094235, "loss": 11.6912, "step": 11287 }, { "epoch": 0.23628903960478942, "grad_norm": 0.2614029049873352, "learning_rate": 0.00019696905152853406, "loss": 11.678, "step": 11288 }, { "epoch": 0.23630997236875156, "grad_norm": 0.23336392641067505, "learning_rate": 0.00019696851578951082, "loss": 11.6873, "step": 11289 }, { "epoch": 0.23633090513271374, "grad_norm": 0.2731491029262543, "learning_rate": 0.00019696798000387288, "loss": 11.6674, "step": 11290 }, { "epoch": 0.23635183789667588, "grad_norm": 0.2555449604988098, "learning_rate": 0.00019696744417162048, "loss": 11.6766, "step": 11291 }, { "epoch": 0.23637277066063803, "grad_norm": 0.2814973294734955, "learning_rate": 0.0001969669082927539, "loss": 11.6828, "step": 11292 }, { "epoch": 0.23639370342460017, "grad_norm": 0.259443998336792, "learning_rate": 0.00019696637236727335, "loss": 11.6869, "step": 11293 }, { "epoch": 0.23641463618856234, "grad_norm": 0.31016626954078674, "learning_rate": 0.00019696583639517914, "loss": 11.6713, "step": 11294 }, { "epoch": 0.2364355689525245, "grad_norm": 0.29685381054878235, "learning_rate": 0.0001969653003764715, "loss": 11.6895, "step": 11295 }, { "epoch": 0.23645650171648663, "grad_norm": 0.28251025080680847, "learning_rate": 0.0001969647643111507, "loss": 11.6835, "step": 11296 }, { "epoch": 0.2364774344804488, "grad_norm": 0.2700667381286621, "learning_rate": 0.00019696422819921703, "loss": 11.6744, "step": 11297 }, { "epoch": 0.23649836724441095, "grad_norm": 0.2734930217266083, "learning_rate": 0.00019696369204067068, "loss": 11.6753, "step": 11298 }, { "epoch": 0.2365193000083731, "grad_norm": 0.25347235798835754, "learning_rate": 0.00019696315583551195, "loss": 11.6689, "step": 11299 }, { "epoch": 0.23654023277233527, "grad_norm": 0.21256597340106964, "learning_rate": 0.00019696261958374107, "loss": 11.6619, "step": 11300 }, { "epoch": 0.2365611655362974, "grad_norm": 0.22262850403785706, "learning_rate": 0.00019696208328535834, "loss": 11.6716, "step": 11301 }, { "epoch": 0.23658209830025956, "grad_norm": 0.2414076030254364, "learning_rate": 0.00019696154694036398, "loss": 11.67, "step": 11302 }, { "epoch": 0.23660303106422173, "grad_norm": 0.24875760078430176, "learning_rate": 0.00019696101054875826, "loss": 11.6837, "step": 11303 }, { "epoch": 0.23662396382818388, "grad_norm": 0.3133353590965271, "learning_rate": 0.00019696047411054145, "loss": 11.6929, "step": 11304 }, { "epoch": 0.23664489659214602, "grad_norm": 0.22746941447257996, "learning_rate": 0.00019695993762571377, "loss": 11.671, "step": 11305 }, { "epoch": 0.2366658293561082, "grad_norm": 0.2732757329940796, "learning_rate": 0.00019695940109427552, "loss": 11.6782, "step": 11306 }, { "epoch": 0.23668676212007034, "grad_norm": 0.2590303421020508, "learning_rate": 0.00019695886451622695, "loss": 11.6768, "step": 11307 }, { "epoch": 0.23670769488403248, "grad_norm": 0.30762577056884766, "learning_rate": 0.00019695832789156831, "loss": 11.7061, "step": 11308 }, { "epoch": 0.23672862764799465, "grad_norm": 0.2890058755874634, "learning_rate": 0.00019695779122029988, "loss": 11.6839, "step": 11309 }, { "epoch": 0.2367495604119568, "grad_norm": 0.27859631180763245, "learning_rate": 0.00019695725450242187, "loss": 11.68, "step": 11310 }, { "epoch": 0.23677049317591894, "grad_norm": 0.22459879517555237, "learning_rate": 0.00019695671773793457, "loss": 11.6855, "step": 11311 }, { "epoch": 0.2367914259398811, "grad_norm": 0.31466567516326904, "learning_rate": 0.0001969561809268382, "loss": 11.6817, "step": 11312 }, { "epoch": 0.23681235870384326, "grad_norm": 0.25744929909706116, "learning_rate": 0.00019695564406913308, "loss": 11.6728, "step": 11313 }, { "epoch": 0.2368332914678054, "grad_norm": 0.2575905919075012, "learning_rate": 0.00019695510716481943, "loss": 11.6712, "step": 11314 }, { "epoch": 0.23685422423176755, "grad_norm": 0.25124403834342957, "learning_rate": 0.00019695457021389753, "loss": 11.7004, "step": 11315 }, { "epoch": 0.23687515699572972, "grad_norm": 0.21201904118061066, "learning_rate": 0.00019695403321636762, "loss": 11.6701, "step": 11316 }, { "epoch": 0.23689608975969187, "grad_norm": 0.322321355342865, "learning_rate": 0.00019695349617222997, "loss": 11.6545, "step": 11317 }, { "epoch": 0.23691702252365401, "grad_norm": 0.22775088250637054, "learning_rate": 0.00019695295908148482, "loss": 11.667, "step": 11318 }, { "epoch": 0.2369379552876162, "grad_norm": 0.23376449942588806, "learning_rate": 0.00019695242194413246, "loss": 11.6879, "step": 11319 }, { "epoch": 0.23695888805157833, "grad_norm": 0.2349337339401245, "learning_rate": 0.0001969518847601731, "loss": 11.6822, "step": 11320 }, { "epoch": 0.23697982081554048, "grad_norm": 0.4458090662956238, "learning_rate": 0.00019695134752960707, "loss": 11.6948, "step": 11321 }, { "epoch": 0.23700075357950265, "grad_norm": 0.3092789351940155, "learning_rate": 0.00019695081025243453, "loss": 11.6886, "step": 11322 }, { "epoch": 0.2370216863434648, "grad_norm": 0.19552266597747803, "learning_rate": 0.00019695027292865582, "loss": 11.6811, "step": 11323 }, { "epoch": 0.23704261910742694, "grad_norm": 0.3151932656764984, "learning_rate": 0.0001969497355582712, "loss": 11.6679, "step": 11324 }, { "epoch": 0.2370635518713891, "grad_norm": 0.2832810580730438, "learning_rate": 0.00019694919814128084, "loss": 11.6894, "step": 11325 }, { "epoch": 0.23708448463535126, "grad_norm": 0.2624472379684448, "learning_rate": 0.0001969486606776851, "loss": 11.68, "step": 11326 }, { "epoch": 0.2371054173993134, "grad_norm": 0.22776566445827484, "learning_rate": 0.00019694812316748417, "loss": 11.6611, "step": 11327 }, { "epoch": 0.23712635016327555, "grad_norm": 0.2663056552410126, "learning_rate": 0.00019694758561067835, "loss": 11.6916, "step": 11328 }, { "epoch": 0.23714728292723772, "grad_norm": 0.3158182203769684, "learning_rate": 0.00019694704800726786, "loss": 11.6782, "step": 11329 }, { "epoch": 0.23716821569119986, "grad_norm": 0.28098824620246887, "learning_rate": 0.000196946510357253, "loss": 11.6789, "step": 11330 }, { "epoch": 0.237189148455162, "grad_norm": 0.26159682869911194, "learning_rate": 0.00019694597266063402, "loss": 11.6842, "step": 11331 }, { "epoch": 0.23721008121912418, "grad_norm": 0.2645762264728546, "learning_rate": 0.00019694543491741113, "loss": 11.6854, "step": 11332 }, { "epoch": 0.23723101398308633, "grad_norm": 0.24647395312786102, "learning_rate": 0.00019694489712758468, "loss": 11.6618, "step": 11333 }, { "epoch": 0.23725194674704847, "grad_norm": 0.23256535828113556, "learning_rate": 0.00019694435929115486, "loss": 11.6784, "step": 11334 }, { "epoch": 0.23727287951101064, "grad_norm": 0.24689878523349762, "learning_rate": 0.0001969438214081219, "loss": 11.6635, "step": 11335 }, { "epoch": 0.2372938122749728, "grad_norm": 0.23260509967803955, "learning_rate": 0.00019694328347848614, "loss": 11.6645, "step": 11336 }, { "epoch": 0.23731474503893493, "grad_norm": 0.2579907476902008, "learning_rate": 0.0001969427455022478, "loss": 11.6725, "step": 11337 }, { "epoch": 0.2373356778028971, "grad_norm": 0.27428391575813293, "learning_rate": 0.00019694220747940714, "loss": 11.6824, "step": 11338 }, { "epoch": 0.23735661056685925, "grad_norm": 0.2532646059989929, "learning_rate": 0.0001969416694099644, "loss": 11.6724, "step": 11339 }, { "epoch": 0.2373775433308214, "grad_norm": 0.24124622344970703, "learning_rate": 0.00019694113129391987, "loss": 11.6793, "step": 11340 }, { "epoch": 0.23739847609478357, "grad_norm": 0.25866958498954773, "learning_rate": 0.0001969405931312738, "loss": 11.671, "step": 11341 }, { "epoch": 0.2374194088587457, "grad_norm": 0.26170432567596436, "learning_rate": 0.00019694005492202642, "loss": 11.6724, "step": 11342 }, { "epoch": 0.23744034162270786, "grad_norm": 0.2169855833053589, "learning_rate": 0.00019693951666617805, "loss": 11.6741, "step": 11343 }, { "epoch": 0.23746127438667003, "grad_norm": 0.28398314118385315, "learning_rate": 0.0001969389783637289, "loss": 11.6712, "step": 11344 }, { "epoch": 0.23748220715063217, "grad_norm": 0.31246045231819153, "learning_rate": 0.00019693844001467922, "loss": 11.6626, "step": 11345 }, { "epoch": 0.23750313991459432, "grad_norm": 0.2673998773097992, "learning_rate": 0.00019693790161902933, "loss": 11.6684, "step": 11346 }, { "epoch": 0.23752407267855646, "grad_norm": 0.2191624939441681, "learning_rate": 0.0001969373631767794, "loss": 11.6725, "step": 11347 }, { "epoch": 0.23754500544251864, "grad_norm": 0.25531086325645447, "learning_rate": 0.00019693682468792975, "loss": 11.6722, "step": 11348 }, { "epoch": 0.23756593820648078, "grad_norm": 0.3362026512622833, "learning_rate": 0.00019693628615248063, "loss": 11.6831, "step": 11349 }, { "epoch": 0.23758687097044293, "grad_norm": 0.27171263098716736, "learning_rate": 0.0001969357475704323, "loss": 11.6744, "step": 11350 }, { "epoch": 0.2376078037344051, "grad_norm": 0.23156027495861053, "learning_rate": 0.00019693520894178504, "loss": 11.6805, "step": 11351 }, { "epoch": 0.23762873649836724, "grad_norm": 0.23238274455070496, "learning_rate": 0.00019693467026653903, "loss": 11.6739, "step": 11352 }, { "epoch": 0.2376496692623294, "grad_norm": 0.21930387616157532, "learning_rate": 0.0001969341315446946, "loss": 11.6712, "step": 11353 }, { "epoch": 0.23767060202629156, "grad_norm": 0.27000680565834045, "learning_rate": 0.00019693359277625201, "loss": 11.6822, "step": 11354 }, { "epoch": 0.2376915347902537, "grad_norm": 0.2370986044406891, "learning_rate": 0.0001969330539612115, "loss": 11.6639, "step": 11355 }, { "epoch": 0.23771246755421585, "grad_norm": 0.29068702459335327, "learning_rate": 0.0001969325150995733, "loss": 11.6937, "step": 11356 }, { "epoch": 0.23773340031817802, "grad_norm": 0.2821868658065796, "learning_rate": 0.0001969319761913377, "loss": 11.6805, "step": 11357 }, { "epoch": 0.23775433308214017, "grad_norm": 0.2329416424036026, "learning_rate": 0.00019693143723650497, "loss": 11.6706, "step": 11358 }, { "epoch": 0.2377752658461023, "grad_norm": 0.23748257756233215, "learning_rate": 0.00019693089823507535, "loss": 11.6628, "step": 11359 }, { "epoch": 0.23779619861006449, "grad_norm": 0.23261615633964539, "learning_rate": 0.0001969303591870491, "loss": 11.6428, "step": 11360 }, { "epoch": 0.23781713137402663, "grad_norm": 0.3006993234157562, "learning_rate": 0.00019692982009242652, "loss": 11.6875, "step": 11361 }, { "epoch": 0.23783806413798877, "grad_norm": 0.250284343957901, "learning_rate": 0.00019692928095120779, "loss": 11.6777, "step": 11362 }, { "epoch": 0.23785899690195092, "grad_norm": 0.2620970904827118, "learning_rate": 0.00019692874176339322, "loss": 11.6731, "step": 11363 }, { "epoch": 0.2378799296659131, "grad_norm": 0.24343517422676086, "learning_rate": 0.00019692820252898308, "loss": 11.687, "step": 11364 }, { "epoch": 0.23790086242987524, "grad_norm": 0.27411216497421265, "learning_rate": 0.0001969276632479776, "loss": 11.6637, "step": 11365 }, { "epoch": 0.23792179519383738, "grad_norm": 0.29179492592811584, "learning_rate": 0.00019692712392037703, "loss": 11.6676, "step": 11366 }, { "epoch": 0.23794272795779955, "grad_norm": 0.2436501383781433, "learning_rate": 0.00019692658454618166, "loss": 11.6878, "step": 11367 }, { "epoch": 0.2379636607217617, "grad_norm": 0.23705214262008667, "learning_rate": 0.00019692604512539178, "loss": 11.6655, "step": 11368 }, { "epoch": 0.23798459348572384, "grad_norm": 0.37130481004714966, "learning_rate": 0.00019692550565800755, "loss": 11.6838, "step": 11369 }, { "epoch": 0.23800552624968602, "grad_norm": 0.2517484724521637, "learning_rate": 0.00019692496614402932, "loss": 11.6577, "step": 11370 }, { "epoch": 0.23802645901364816, "grad_norm": 0.28431785106658936, "learning_rate": 0.0001969244265834573, "loss": 11.669, "step": 11371 }, { "epoch": 0.2380473917776103, "grad_norm": 0.38834458589553833, "learning_rate": 0.00019692388697629178, "loss": 11.6656, "step": 11372 }, { "epoch": 0.23806832454157248, "grad_norm": 0.33346831798553467, "learning_rate": 0.00019692334732253298, "loss": 11.6838, "step": 11373 }, { "epoch": 0.23808925730553462, "grad_norm": 0.320517361164093, "learning_rate": 0.00019692280762218122, "loss": 11.693, "step": 11374 }, { "epoch": 0.23811019006949677, "grad_norm": 0.3353988528251648, "learning_rate": 0.0001969222678752367, "loss": 11.6766, "step": 11375 }, { "epoch": 0.23813112283345894, "grad_norm": 0.2530510425567627, "learning_rate": 0.00019692172808169974, "loss": 11.684, "step": 11376 }, { "epoch": 0.2381520555974211, "grad_norm": 0.2465306967496872, "learning_rate": 0.00019692118824157052, "loss": 11.6596, "step": 11377 }, { "epoch": 0.23817298836138323, "grad_norm": 0.288247287273407, "learning_rate": 0.00019692064835484936, "loss": 11.6723, "step": 11378 }, { "epoch": 0.2381939211253454, "grad_norm": 0.24532708525657654, "learning_rate": 0.00019692010842153648, "loss": 11.6621, "step": 11379 }, { "epoch": 0.23821485388930755, "grad_norm": 0.2527288496494293, "learning_rate": 0.0001969195684416322, "loss": 11.6669, "step": 11380 }, { "epoch": 0.2382357866532697, "grad_norm": 0.3659381866455078, "learning_rate": 0.0001969190284151367, "loss": 11.6786, "step": 11381 }, { "epoch": 0.23825671941723184, "grad_norm": 0.25565409660339355, "learning_rate": 0.00019691848834205028, "loss": 11.6882, "step": 11382 }, { "epoch": 0.238277652181194, "grad_norm": 0.2628132700920105, "learning_rate": 0.00019691794822237325, "loss": 11.6774, "step": 11383 }, { "epoch": 0.23829858494515616, "grad_norm": 0.24462053179740906, "learning_rate": 0.00019691740805610576, "loss": 11.6609, "step": 11384 }, { "epoch": 0.2383195177091183, "grad_norm": 0.24485807120800018, "learning_rate": 0.0001969168678432482, "loss": 11.6654, "step": 11385 }, { "epoch": 0.23834045047308047, "grad_norm": 0.26758310198783875, "learning_rate": 0.0001969163275838007, "loss": 11.6719, "step": 11386 }, { "epoch": 0.23836138323704262, "grad_norm": 0.2557593882083893, "learning_rate": 0.0001969157872777636, "loss": 11.6621, "step": 11387 }, { "epoch": 0.23838231600100476, "grad_norm": 0.2540559768676758, "learning_rate": 0.00019691524692513714, "loss": 11.6803, "step": 11388 }, { "epoch": 0.23840324876496694, "grad_norm": 0.3153545558452606, "learning_rate": 0.00019691470652592158, "loss": 11.6858, "step": 11389 }, { "epoch": 0.23842418152892908, "grad_norm": 0.23301036655902863, "learning_rate": 0.00019691416608011718, "loss": 11.6767, "step": 11390 }, { "epoch": 0.23844511429289122, "grad_norm": 0.2851318418979645, "learning_rate": 0.0001969136255877242, "loss": 11.6644, "step": 11391 }, { "epoch": 0.2384660470568534, "grad_norm": 0.2672741711139679, "learning_rate": 0.00019691308504874287, "loss": 11.6715, "step": 11392 }, { "epoch": 0.23848697982081554, "grad_norm": 0.22236333787441254, "learning_rate": 0.0001969125444631735, "loss": 11.6868, "step": 11393 }, { "epoch": 0.2385079125847777, "grad_norm": 0.23592780530452728, "learning_rate": 0.00019691200383101633, "loss": 11.6804, "step": 11394 }, { "epoch": 0.23852884534873986, "grad_norm": 0.26740509271621704, "learning_rate": 0.00019691146315227163, "loss": 11.6788, "step": 11395 }, { "epoch": 0.238549778112702, "grad_norm": 0.23561733961105347, "learning_rate": 0.00019691092242693964, "loss": 11.6764, "step": 11396 }, { "epoch": 0.23857071087666415, "grad_norm": 0.28102371096611023, "learning_rate": 0.0001969103816550206, "loss": 11.6864, "step": 11397 }, { "epoch": 0.23859164364062632, "grad_norm": 0.21490544080734253, "learning_rate": 0.00019690984083651483, "loss": 11.6632, "step": 11398 }, { "epoch": 0.23861257640458847, "grad_norm": 0.292349249124527, "learning_rate": 0.00019690929997142253, "loss": 11.6643, "step": 11399 }, { "epoch": 0.2386335091685506, "grad_norm": 0.2509537637233734, "learning_rate": 0.000196908759059744, "loss": 11.6905, "step": 11400 }, { "epoch": 0.23865444193251276, "grad_norm": 0.2296757698059082, "learning_rate": 0.0001969082181014795, "loss": 11.6642, "step": 11401 }, { "epoch": 0.23867537469647493, "grad_norm": 0.2809019982814789, "learning_rate": 0.00019690767709662924, "loss": 11.6808, "step": 11402 }, { "epoch": 0.23869630746043707, "grad_norm": 0.22949732840061188, "learning_rate": 0.00019690713604519357, "loss": 11.6652, "step": 11403 }, { "epoch": 0.23871724022439922, "grad_norm": 0.2628082036972046, "learning_rate": 0.00019690659494717267, "loss": 11.6829, "step": 11404 }, { "epoch": 0.2387381729883614, "grad_norm": 0.2728184759616852, "learning_rate": 0.00019690605380256684, "loss": 11.6784, "step": 11405 }, { "epoch": 0.23875910575232354, "grad_norm": 0.284123033285141, "learning_rate": 0.00019690551261137632, "loss": 11.6781, "step": 11406 }, { "epoch": 0.23878003851628568, "grad_norm": 0.293734610080719, "learning_rate": 0.00019690497137360134, "loss": 11.6862, "step": 11407 }, { "epoch": 0.23880097128024785, "grad_norm": 0.21316543221473694, "learning_rate": 0.00019690443008924227, "loss": 11.6741, "step": 11408 }, { "epoch": 0.23882190404421, "grad_norm": 0.24538524448871613, "learning_rate": 0.00019690388875829925, "loss": 11.6621, "step": 11409 }, { "epoch": 0.23884283680817214, "grad_norm": 0.2311360090970993, "learning_rate": 0.00019690334738077262, "loss": 11.6696, "step": 11410 }, { "epoch": 0.23886376957213432, "grad_norm": 0.24130740761756897, "learning_rate": 0.00019690280595666257, "loss": 11.6872, "step": 11411 }, { "epoch": 0.23888470233609646, "grad_norm": 0.21426229178905487, "learning_rate": 0.00019690226448596941, "loss": 11.6728, "step": 11412 }, { "epoch": 0.2389056351000586, "grad_norm": 0.27300891280174255, "learning_rate": 0.0001969017229686934, "loss": 11.6919, "step": 11413 }, { "epoch": 0.23892656786402078, "grad_norm": 0.22092518210411072, "learning_rate": 0.0001969011814048348, "loss": 11.6832, "step": 11414 }, { "epoch": 0.23894750062798292, "grad_norm": 0.2499883472919464, "learning_rate": 0.00019690063979439386, "loss": 11.6871, "step": 11415 }, { "epoch": 0.23896843339194507, "grad_norm": 0.25955620408058167, "learning_rate": 0.00019690009813737083, "loss": 11.6689, "step": 11416 }, { "epoch": 0.2389893661559072, "grad_norm": 0.28283965587615967, "learning_rate": 0.00019689955643376598, "loss": 11.6744, "step": 11417 }, { "epoch": 0.23901029891986939, "grad_norm": 0.28610071539878845, "learning_rate": 0.00019689901468357957, "loss": 11.6802, "step": 11418 }, { "epoch": 0.23903123168383153, "grad_norm": 0.27587416768074036, "learning_rate": 0.00019689847288681186, "loss": 11.6793, "step": 11419 }, { "epoch": 0.23905216444779367, "grad_norm": 0.2061351090669632, "learning_rate": 0.0001968979310434631, "loss": 11.6666, "step": 11420 }, { "epoch": 0.23907309721175585, "grad_norm": 0.3027082085609436, "learning_rate": 0.00019689738915353358, "loss": 11.6768, "step": 11421 }, { "epoch": 0.239094029975718, "grad_norm": 0.25785306096076965, "learning_rate": 0.00019689684721702357, "loss": 11.667, "step": 11422 }, { "epoch": 0.23911496273968014, "grad_norm": 0.2379174381494522, "learning_rate": 0.00019689630523393324, "loss": 11.6771, "step": 11423 }, { "epoch": 0.2391358955036423, "grad_norm": 0.22964335978031158, "learning_rate": 0.00019689576320426296, "loss": 11.6747, "step": 11424 }, { "epoch": 0.23915682826760445, "grad_norm": 0.22015388309955597, "learning_rate": 0.00019689522112801292, "loss": 11.684, "step": 11425 }, { "epoch": 0.2391777610315666, "grad_norm": 0.27340731024742126, "learning_rate": 0.00019689467900518346, "loss": 11.699, "step": 11426 }, { "epoch": 0.23919869379552877, "grad_norm": 0.43002980947494507, "learning_rate": 0.00019689413683577473, "loss": 11.6802, "step": 11427 }, { "epoch": 0.23921962655949092, "grad_norm": 0.24932458996772766, "learning_rate": 0.00019689359461978704, "loss": 11.6786, "step": 11428 }, { "epoch": 0.23924055932345306, "grad_norm": 0.29111748933792114, "learning_rate": 0.00019689305235722068, "loss": 11.6973, "step": 11429 }, { "epoch": 0.23926149208741523, "grad_norm": 0.23961669206619263, "learning_rate": 0.0001968925100480759, "loss": 11.6601, "step": 11430 }, { "epoch": 0.23928242485137738, "grad_norm": 0.22550886869430542, "learning_rate": 0.00019689196769235293, "loss": 11.6667, "step": 11431 }, { "epoch": 0.23930335761533952, "grad_norm": 0.3107045888900757, "learning_rate": 0.0001968914252900521, "loss": 11.6815, "step": 11432 }, { "epoch": 0.2393242903793017, "grad_norm": 0.2895697057247162, "learning_rate": 0.00019689088284117356, "loss": 11.7028, "step": 11433 }, { "epoch": 0.23934522314326384, "grad_norm": 1.002822995185852, "learning_rate": 0.00019689034034571764, "loss": 11.6828, "step": 11434 }, { "epoch": 0.23936615590722599, "grad_norm": 0.2345753312110901, "learning_rate": 0.00019688979780368458, "loss": 11.6735, "step": 11435 }, { "epoch": 0.23938708867118813, "grad_norm": 0.233476921916008, "learning_rate": 0.0001968892552150747, "loss": 11.6734, "step": 11436 }, { "epoch": 0.2394080214351503, "grad_norm": 0.30652347207069397, "learning_rate": 0.00019688871257988818, "loss": 11.6798, "step": 11437 }, { "epoch": 0.23942895419911245, "grad_norm": 0.2543955147266388, "learning_rate": 0.00019688816989812532, "loss": 11.6704, "step": 11438 }, { "epoch": 0.2394498869630746, "grad_norm": 0.2566307783126831, "learning_rate": 0.00019688762716978637, "loss": 11.6718, "step": 11439 }, { "epoch": 0.23947081972703677, "grad_norm": 0.2891349494457245, "learning_rate": 0.00019688708439487157, "loss": 11.6798, "step": 11440 }, { "epoch": 0.2394917524909989, "grad_norm": 0.23762595653533936, "learning_rate": 0.00019688654157338123, "loss": 11.6861, "step": 11441 }, { "epoch": 0.23951268525496106, "grad_norm": 0.36461398005485535, "learning_rate": 0.00019688599870531563, "loss": 11.6819, "step": 11442 }, { "epoch": 0.23953361801892323, "grad_norm": 0.2848193943500519, "learning_rate": 0.00019688545579067495, "loss": 11.671, "step": 11443 }, { "epoch": 0.23955455078288537, "grad_norm": 0.3189832270145416, "learning_rate": 0.0001968849128294595, "loss": 11.6948, "step": 11444 }, { "epoch": 0.23957548354684752, "grad_norm": 0.2404317706823349, "learning_rate": 0.00019688436982166952, "loss": 11.6686, "step": 11445 }, { "epoch": 0.2395964163108097, "grad_norm": 0.22173576056957245, "learning_rate": 0.00019688382676730527, "loss": 11.6643, "step": 11446 }, { "epoch": 0.23961734907477183, "grad_norm": 0.30529969930648804, "learning_rate": 0.00019688328366636703, "loss": 11.6859, "step": 11447 }, { "epoch": 0.23963828183873398, "grad_norm": 0.27665597200393677, "learning_rate": 0.0001968827405188551, "loss": 11.6775, "step": 11448 }, { "epoch": 0.23965921460269615, "grad_norm": 0.2413823902606964, "learning_rate": 0.00019688219732476965, "loss": 11.6867, "step": 11449 }, { "epoch": 0.2396801473666583, "grad_norm": 0.23722036182880402, "learning_rate": 0.00019688165408411097, "loss": 11.6919, "step": 11450 }, { "epoch": 0.23970108013062044, "grad_norm": 0.24864080548286438, "learning_rate": 0.00019688111079687936, "loss": 11.6776, "step": 11451 }, { "epoch": 0.23972201289458261, "grad_norm": 0.252845823764801, "learning_rate": 0.00019688056746307506, "loss": 11.681, "step": 11452 }, { "epoch": 0.23974294565854476, "grad_norm": 0.32267919182777405, "learning_rate": 0.00019688002408269835, "loss": 11.6864, "step": 11453 }, { "epoch": 0.2397638784225069, "grad_norm": 0.28333157300949097, "learning_rate": 0.00019687948065574942, "loss": 11.6829, "step": 11454 }, { "epoch": 0.23978481118646905, "grad_norm": 0.21357659995555878, "learning_rate": 0.00019687893718222863, "loss": 11.6759, "step": 11455 }, { "epoch": 0.23980574395043122, "grad_norm": 0.30201011896133423, "learning_rate": 0.0001968783936621362, "loss": 11.6883, "step": 11456 }, { "epoch": 0.23982667671439337, "grad_norm": 0.3004101812839508, "learning_rate": 0.00019687785009547235, "loss": 11.6637, "step": 11457 }, { "epoch": 0.2398476094783555, "grad_norm": 0.2757614850997925, "learning_rate": 0.00019687730648223738, "loss": 11.6589, "step": 11458 }, { "epoch": 0.23986854224231768, "grad_norm": 0.21938946843147278, "learning_rate": 0.00019687676282243154, "loss": 11.6935, "step": 11459 }, { "epoch": 0.23988947500627983, "grad_norm": 0.25719335675239563, "learning_rate": 0.00019687621911605515, "loss": 11.6791, "step": 11460 }, { "epoch": 0.23991040777024197, "grad_norm": 0.2691565454006195, "learning_rate": 0.00019687567536310835, "loss": 11.675, "step": 11461 }, { "epoch": 0.23993134053420415, "grad_norm": 0.2706129848957062, "learning_rate": 0.00019687513156359148, "loss": 11.6694, "step": 11462 }, { "epoch": 0.2399522732981663, "grad_norm": 0.32651400566101074, "learning_rate": 0.00019687458771750486, "loss": 11.6744, "step": 11463 }, { "epoch": 0.23997320606212844, "grad_norm": 0.2045867145061493, "learning_rate": 0.00019687404382484863, "loss": 11.674, "step": 11464 }, { "epoch": 0.2399941388260906, "grad_norm": 0.20091204345226288, "learning_rate": 0.00019687349988562313, "loss": 11.6704, "step": 11465 }, { "epoch": 0.24001507159005275, "grad_norm": 0.28583022952079773, "learning_rate": 0.00019687295589982857, "loss": 11.6742, "step": 11466 }, { "epoch": 0.2400360043540149, "grad_norm": 0.25718632340431213, "learning_rate": 0.00019687241186746524, "loss": 11.6692, "step": 11467 }, { "epoch": 0.24005693711797707, "grad_norm": 0.2117307335138321, "learning_rate": 0.0001968718677885334, "loss": 11.6702, "step": 11468 }, { "epoch": 0.24007786988193922, "grad_norm": 0.2545081377029419, "learning_rate": 0.00019687132366303332, "loss": 11.6704, "step": 11469 }, { "epoch": 0.24009880264590136, "grad_norm": 0.22465506196022034, "learning_rate": 0.0001968707794909653, "loss": 11.6807, "step": 11470 }, { "epoch": 0.2401197354098635, "grad_norm": 0.27150341868400574, "learning_rate": 0.0001968702352723295, "loss": 11.6734, "step": 11471 }, { "epoch": 0.24014066817382568, "grad_norm": 0.2918114960193634, "learning_rate": 0.00019686969100712624, "loss": 11.6779, "step": 11472 }, { "epoch": 0.24016160093778782, "grad_norm": 0.25202542543411255, "learning_rate": 0.00019686914669535577, "loss": 11.6888, "step": 11473 }, { "epoch": 0.24018253370174997, "grad_norm": 0.2154456228017807, "learning_rate": 0.0001968686023370184, "loss": 11.6774, "step": 11474 }, { "epoch": 0.24020346646571214, "grad_norm": 0.31112930178642273, "learning_rate": 0.00019686805793211435, "loss": 11.6768, "step": 11475 }, { "epoch": 0.24022439922967428, "grad_norm": 0.2582041919231415, "learning_rate": 0.00019686751348064384, "loss": 11.6865, "step": 11476 }, { "epoch": 0.24024533199363643, "grad_norm": 0.2195984423160553, "learning_rate": 0.0001968669689826072, "loss": 11.666, "step": 11477 }, { "epoch": 0.2402662647575986, "grad_norm": 0.21777571737766266, "learning_rate": 0.00019686642443800468, "loss": 11.6748, "step": 11478 }, { "epoch": 0.24028719752156075, "grad_norm": 0.2783684730529785, "learning_rate": 0.0001968658798468365, "loss": 11.702, "step": 11479 }, { "epoch": 0.2403081302855229, "grad_norm": 0.2343061864376068, "learning_rate": 0.00019686533520910297, "loss": 11.6746, "step": 11480 }, { "epoch": 0.24032906304948506, "grad_norm": 0.31663060188293457, "learning_rate": 0.0001968647905248043, "loss": 11.6838, "step": 11481 }, { "epoch": 0.2403499958134472, "grad_norm": 0.27625471353530884, "learning_rate": 0.00019686424579394083, "loss": 11.6833, "step": 11482 }, { "epoch": 0.24037092857740935, "grad_norm": 0.2933885157108307, "learning_rate": 0.00019686370101651277, "loss": 11.6948, "step": 11483 }, { "epoch": 0.24039186134137153, "grad_norm": 0.30854901671409607, "learning_rate": 0.00019686315619252037, "loss": 11.7001, "step": 11484 }, { "epoch": 0.24041279410533367, "grad_norm": 0.21218261122703552, "learning_rate": 0.0001968626113219639, "loss": 11.672, "step": 11485 }, { "epoch": 0.24043372686929582, "grad_norm": 0.28536322712898254, "learning_rate": 0.00019686206640484366, "loss": 11.6711, "step": 11486 }, { "epoch": 0.240454659633258, "grad_norm": 0.23229631781578064, "learning_rate": 0.00019686152144115986, "loss": 11.6797, "step": 11487 }, { "epoch": 0.24047559239722013, "grad_norm": 0.24075153470039368, "learning_rate": 0.0001968609764309128, "loss": 11.6811, "step": 11488 }, { "epoch": 0.24049652516118228, "grad_norm": 0.2569393217563629, "learning_rate": 0.0001968604313741027, "loss": 11.6852, "step": 11489 }, { "epoch": 0.24051745792514442, "grad_norm": 0.24274539947509766, "learning_rate": 0.0001968598862707299, "loss": 11.6845, "step": 11490 }, { "epoch": 0.2405383906891066, "grad_norm": 0.2636181116104126, "learning_rate": 0.00019685934112079458, "loss": 11.6635, "step": 11491 }, { "epoch": 0.24055932345306874, "grad_norm": 0.23744119703769684, "learning_rate": 0.00019685879592429704, "loss": 11.678, "step": 11492 }, { "epoch": 0.24058025621703089, "grad_norm": 0.21319183707237244, "learning_rate": 0.00019685825068123755, "loss": 11.6751, "step": 11493 }, { "epoch": 0.24060118898099306, "grad_norm": 0.2054394632577896, "learning_rate": 0.0001968577053916163, "loss": 11.6861, "step": 11494 }, { "epoch": 0.2406221217449552, "grad_norm": 0.2960839569568634, "learning_rate": 0.00019685716005543363, "loss": 11.6969, "step": 11495 }, { "epoch": 0.24064305450891735, "grad_norm": 0.2766166925430298, "learning_rate": 0.00019685661467268984, "loss": 11.68, "step": 11496 }, { "epoch": 0.24066398727287952, "grad_norm": 0.2730282247066498, "learning_rate": 0.0001968560692433851, "loss": 11.6529, "step": 11497 }, { "epoch": 0.24068492003684167, "grad_norm": 0.2260381579399109, "learning_rate": 0.00019685552376751968, "loss": 11.6688, "step": 11498 }, { "epoch": 0.2407058528008038, "grad_norm": 0.2056184858083725, "learning_rate": 0.00019685497824509392, "loss": 11.6705, "step": 11499 }, { "epoch": 0.24072678556476598, "grad_norm": 0.21015530824661255, "learning_rate": 0.00019685443267610796, "loss": 11.6771, "step": 11500 }, { "epoch": 0.24074771832872813, "grad_norm": 0.23021994531154633, "learning_rate": 0.0001968538870605622, "loss": 11.6715, "step": 11501 }, { "epoch": 0.24076865109269027, "grad_norm": 0.2398614138364792, "learning_rate": 0.0001968533413984568, "loss": 11.6645, "step": 11502 }, { "epoch": 0.24078958385665244, "grad_norm": 0.29990553855895996, "learning_rate": 0.0001968527956897921, "loss": 11.677, "step": 11503 }, { "epoch": 0.2408105166206146, "grad_norm": 0.282405823469162, "learning_rate": 0.00019685224993456829, "loss": 11.6629, "step": 11504 }, { "epoch": 0.24083144938457673, "grad_norm": 0.29321834444999695, "learning_rate": 0.0001968517041327856, "loss": 11.6518, "step": 11505 }, { "epoch": 0.24085238214853888, "grad_norm": 0.24525971710681915, "learning_rate": 0.00019685115828444445, "loss": 11.6818, "step": 11506 }, { "epoch": 0.24087331491250105, "grad_norm": 0.28307631611824036, "learning_rate": 0.00019685061238954496, "loss": 11.6854, "step": 11507 }, { "epoch": 0.2408942476764632, "grad_norm": 0.2750409245491028, "learning_rate": 0.00019685006644808744, "loss": 11.6702, "step": 11508 }, { "epoch": 0.24091518044042534, "grad_norm": 0.26927974820137024, "learning_rate": 0.0001968495204600722, "loss": 11.6684, "step": 11509 }, { "epoch": 0.24093611320438751, "grad_norm": 0.271193265914917, "learning_rate": 0.00019684897442549942, "loss": 11.6823, "step": 11510 }, { "epoch": 0.24095704596834966, "grad_norm": 0.2807227373123169, "learning_rate": 0.0001968484283443694, "loss": 11.6663, "step": 11511 }, { "epoch": 0.2409779787323118, "grad_norm": 0.32475972175598145, "learning_rate": 0.00019684788221668238, "loss": 11.6759, "step": 11512 }, { "epoch": 0.24099891149627398, "grad_norm": 0.22757470607757568, "learning_rate": 0.0001968473360424387, "loss": 11.6643, "step": 11513 }, { "epoch": 0.24101984426023612, "grad_norm": 0.3404393494129181, "learning_rate": 0.0001968467898216385, "loss": 11.687, "step": 11514 }, { "epoch": 0.24104077702419827, "grad_norm": 0.3084554672241211, "learning_rate": 0.0001968462435542821, "loss": 11.6652, "step": 11515 }, { "epoch": 0.24106170978816044, "grad_norm": 0.21010935306549072, "learning_rate": 0.00019684569724036984, "loss": 11.6877, "step": 11516 }, { "epoch": 0.24108264255212258, "grad_norm": 0.242953822016716, "learning_rate": 0.00019684515087990185, "loss": 11.6717, "step": 11517 }, { "epoch": 0.24110357531608473, "grad_norm": 0.2358892261981964, "learning_rate": 0.0001968446044728785, "loss": 11.6741, "step": 11518 }, { "epoch": 0.2411245080800469, "grad_norm": 0.24247770011425018, "learning_rate": 0.00019684405801929998, "loss": 11.6821, "step": 11519 }, { "epoch": 0.24114544084400905, "grad_norm": 0.2955739200115204, "learning_rate": 0.00019684351151916658, "loss": 11.6748, "step": 11520 }, { "epoch": 0.2411663736079712, "grad_norm": 0.33174026012420654, "learning_rate": 0.00019684296497247855, "loss": 11.6739, "step": 11521 }, { "epoch": 0.24118730637193336, "grad_norm": 0.26150864362716675, "learning_rate": 0.0001968424183792362, "loss": 11.6702, "step": 11522 }, { "epoch": 0.2412082391358955, "grad_norm": 0.2084018737077713, "learning_rate": 0.00019684187173943976, "loss": 11.6853, "step": 11523 }, { "epoch": 0.24122917189985765, "grad_norm": 0.30600419640541077, "learning_rate": 0.0001968413250530895, "loss": 11.6792, "step": 11524 }, { "epoch": 0.2412501046638198, "grad_norm": 0.3469846844673157, "learning_rate": 0.0001968407783201856, "loss": 11.675, "step": 11525 }, { "epoch": 0.24127103742778197, "grad_norm": 0.27237921953201294, "learning_rate": 0.00019684023154072845, "loss": 11.6896, "step": 11526 }, { "epoch": 0.24129197019174412, "grad_norm": 0.24790778756141663, "learning_rate": 0.00019683968471471825, "loss": 11.6861, "step": 11527 }, { "epoch": 0.24131290295570626, "grad_norm": 0.29393163323402405, "learning_rate": 0.00019683913784215528, "loss": 11.6707, "step": 11528 }, { "epoch": 0.24133383571966843, "grad_norm": 0.22742341458797455, "learning_rate": 0.00019683859092303982, "loss": 11.6771, "step": 11529 }, { "epoch": 0.24135476848363058, "grad_norm": 0.2541314661502838, "learning_rate": 0.0001968380439573721, "loss": 11.6709, "step": 11530 }, { "epoch": 0.24137570124759272, "grad_norm": 0.2834857702255249, "learning_rate": 0.00019683749694515235, "loss": 11.6912, "step": 11531 }, { "epoch": 0.2413966340115549, "grad_norm": 0.25993260741233826, "learning_rate": 0.0001968369498863809, "loss": 11.6658, "step": 11532 }, { "epoch": 0.24141756677551704, "grad_norm": 0.3720017075538635, "learning_rate": 0.000196836402781058, "loss": 11.6718, "step": 11533 }, { "epoch": 0.24143849953947918, "grad_norm": 0.2474985122680664, "learning_rate": 0.00019683585562918387, "loss": 11.6858, "step": 11534 }, { "epoch": 0.24145943230344136, "grad_norm": 0.24115365743637085, "learning_rate": 0.00019683530843075885, "loss": 11.6804, "step": 11535 }, { "epoch": 0.2414803650674035, "grad_norm": 0.27492910623550415, "learning_rate": 0.0001968347611857831, "loss": 11.6802, "step": 11536 }, { "epoch": 0.24150129783136565, "grad_norm": 0.22384780645370483, "learning_rate": 0.00019683421389425697, "loss": 11.6916, "step": 11537 }, { "epoch": 0.24152223059532782, "grad_norm": 0.21233713626861572, "learning_rate": 0.00019683366655618068, "loss": 11.6775, "step": 11538 }, { "epoch": 0.24154316335928996, "grad_norm": 0.20937108993530273, "learning_rate": 0.00019683311917155453, "loss": 11.6621, "step": 11539 }, { "epoch": 0.2415640961232521, "grad_norm": 0.2932741343975067, "learning_rate": 0.00019683257174037876, "loss": 11.6783, "step": 11540 }, { "epoch": 0.24158502888721428, "grad_norm": 0.2554040551185608, "learning_rate": 0.0001968320242626536, "loss": 11.6897, "step": 11541 }, { "epoch": 0.24160596165117643, "grad_norm": 0.24132879078388214, "learning_rate": 0.0001968314767383794, "loss": 11.6766, "step": 11542 }, { "epoch": 0.24162689441513857, "grad_norm": 0.3476201891899109, "learning_rate": 0.00019683092916755632, "loss": 11.6907, "step": 11543 }, { "epoch": 0.24164782717910072, "grad_norm": 0.3314189314842224, "learning_rate": 0.0001968303815501847, "loss": 11.6713, "step": 11544 }, { "epoch": 0.2416687599430629, "grad_norm": 0.2562156319618225, "learning_rate": 0.00019682983388626474, "loss": 11.6711, "step": 11545 }, { "epoch": 0.24168969270702503, "grad_norm": 0.2510015666484833, "learning_rate": 0.00019682928617579677, "loss": 11.6912, "step": 11546 }, { "epoch": 0.24171062547098718, "grad_norm": 0.23370137810707092, "learning_rate": 0.00019682873841878103, "loss": 11.6798, "step": 11547 }, { "epoch": 0.24173155823494935, "grad_norm": 0.26422759890556335, "learning_rate": 0.00019682819061521772, "loss": 11.6888, "step": 11548 }, { "epoch": 0.2417524909989115, "grad_norm": 0.23825377225875854, "learning_rate": 0.0001968276427651072, "loss": 11.6686, "step": 11549 }, { "epoch": 0.24177342376287364, "grad_norm": 0.2418021261692047, "learning_rate": 0.0001968270948684497, "loss": 11.6624, "step": 11550 }, { "epoch": 0.2417943565268358, "grad_norm": 0.23082207143306732, "learning_rate": 0.00019682654692524545, "loss": 11.6889, "step": 11551 }, { "epoch": 0.24181528929079796, "grad_norm": 0.29373878240585327, "learning_rate": 0.00019682599893549476, "loss": 11.6764, "step": 11552 }, { "epoch": 0.2418362220547601, "grad_norm": 0.33736008405685425, "learning_rate": 0.00019682545089919784, "loss": 11.6813, "step": 11553 }, { "epoch": 0.24185715481872228, "grad_norm": 0.2532752752304077, "learning_rate": 0.00019682490281635503, "loss": 11.6727, "step": 11554 }, { "epoch": 0.24187808758268442, "grad_norm": 0.2236488312482834, "learning_rate": 0.00019682435468696652, "loss": 11.6798, "step": 11555 }, { "epoch": 0.24189902034664656, "grad_norm": 0.261331170797348, "learning_rate": 0.0001968238065110326, "loss": 11.6786, "step": 11556 }, { "epoch": 0.24191995311060874, "grad_norm": 0.3020852506160736, "learning_rate": 0.00019682325828855355, "loss": 11.6785, "step": 11557 }, { "epoch": 0.24194088587457088, "grad_norm": 0.22106052935123444, "learning_rate": 0.0001968227100195296, "loss": 11.6817, "step": 11558 }, { "epoch": 0.24196181863853303, "grad_norm": 0.20993341505527496, "learning_rate": 0.00019682216170396105, "loss": 11.6594, "step": 11559 }, { "epoch": 0.24198275140249517, "grad_norm": 0.274486243724823, "learning_rate": 0.00019682161334184814, "loss": 11.6831, "step": 11560 }, { "epoch": 0.24200368416645734, "grad_norm": 0.30742982029914856, "learning_rate": 0.00019682106493319114, "loss": 11.6771, "step": 11561 }, { "epoch": 0.2420246169304195, "grad_norm": 0.24652139842510223, "learning_rate": 0.00019682051647799032, "loss": 11.6785, "step": 11562 }, { "epoch": 0.24204554969438163, "grad_norm": 0.21910591423511505, "learning_rate": 0.00019681996797624594, "loss": 11.6721, "step": 11563 }, { "epoch": 0.2420664824583438, "grad_norm": 0.22478501498699188, "learning_rate": 0.00019681941942795825, "loss": 11.6781, "step": 11564 }, { "epoch": 0.24208741522230595, "grad_norm": 0.2650853097438812, "learning_rate": 0.0001968188708331275, "loss": 11.6683, "step": 11565 }, { "epoch": 0.2421083479862681, "grad_norm": 0.2448521852493286, "learning_rate": 0.00019681832219175401, "loss": 11.665, "step": 11566 }, { "epoch": 0.24212928075023027, "grad_norm": 0.22108100354671478, "learning_rate": 0.00019681777350383803, "loss": 11.6668, "step": 11567 }, { "epoch": 0.24215021351419241, "grad_norm": 0.2390725165605545, "learning_rate": 0.0001968172247693798, "loss": 11.6757, "step": 11568 }, { "epoch": 0.24217114627815456, "grad_norm": 0.28343477845191956, "learning_rate": 0.00019681667598837955, "loss": 11.681, "step": 11569 }, { "epoch": 0.24219207904211673, "grad_norm": 0.2357146292924881, "learning_rate": 0.00019681612716083763, "loss": 11.6866, "step": 11570 }, { "epoch": 0.24221301180607888, "grad_norm": 0.2978142499923706, "learning_rate": 0.00019681557828675424, "loss": 11.6787, "step": 11571 }, { "epoch": 0.24223394457004102, "grad_norm": 0.2287467122077942, "learning_rate": 0.00019681502936612962, "loss": 11.6736, "step": 11572 }, { "epoch": 0.2422548773340032, "grad_norm": 0.3294108211994171, "learning_rate": 0.00019681448039896414, "loss": 11.6615, "step": 11573 }, { "epoch": 0.24227581009796534, "grad_norm": 0.23090362548828125, "learning_rate": 0.00019681393138525798, "loss": 11.6716, "step": 11574 }, { "epoch": 0.24229674286192748, "grad_norm": 0.23037807643413544, "learning_rate": 0.0001968133823250114, "loss": 11.6908, "step": 11575 }, { "epoch": 0.24231767562588966, "grad_norm": 0.29443952441215515, "learning_rate": 0.00019681283321822473, "loss": 11.6776, "step": 11576 }, { "epoch": 0.2423386083898518, "grad_norm": 0.2988207936286926, "learning_rate": 0.00019681228406489814, "loss": 11.6908, "step": 11577 }, { "epoch": 0.24235954115381395, "grad_norm": 0.2907037138938904, "learning_rate": 0.00019681173486503197, "loss": 11.6772, "step": 11578 }, { "epoch": 0.2423804739177761, "grad_norm": 0.2584044635295868, "learning_rate": 0.00019681118561862645, "loss": 11.6914, "step": 11579 }, { "epoch": 0.24240140668173826, "grad_norm": 0.23470143973827362, "learning_rate": 0.00019681063632568185, "loss": 11.6805, "step": 11580 }, { "epoch": 0.2424223394457004, "grad_norm": 0.26985079050064087, "learning_rate": 0.00019681008698619844, "loss": 11.6705, "step": 11581 }, { "epoch": 0.24244327220966255, "grad_norm": 0.2548772990703583, "learning_rate": 0.0001968095376001765, "loss": 11.662, "step": 11582 }, { "epoch": 0.24246420497362473, "grad_norm": 0.22793073952198029, "learning_rate": 0.0001968089881676163, "loss": 11.6805, "step": 11583 }, { "epoch": 0.24248513773758687, "grad_norm": 0.3320971429347992, "learning_rate": 0.00019680843868851802, "loss": 11.699, "step": 11584 }, { "epoch": 0.24250607050154901, "grad_norm": 0.27250075340270996, "learning_rate": 0.000196807889162882, "loss": 11.6761, "step": 11585 }, { "epoch": 0.2425270032655112, "grad_norm": 0.2363463193178177, "learning_rate": 0.0001968073395907085, "loss": 11.6756, "step": 11586 }, { "epoch": 0.24254793602947333, "grad_norm": 0.2744397521018982, "learning_rate": 0.00019680678997199778, "loss": 11.6764, "step": 11587 }, { "epoch": 0.24256886879343548, "grad_norm": 0.22738446295261383, "learning_rate": 0.00019680624030675006, "loss": 11.6677, "step": 11588 }, { "epoch": 0.24258980155739765, "grad_norm": 0.19711506366729736, "learning_rate": 0.00019680569059496566, "loss": 11.6865, "step": 11589 }, { "epoch": 0.2426107343213598, "grad_norm": 0.3215731084346771, "learning_rate": 0.00019680514083664482, "loss": 11.7096, "step": 11590 }, { "epoch": 0.24263166708532194, "grad_norm": 0.22650814056396484, "learning_rate": 0.00019680459103178782, "loss": 11.6633, "step": 11591 }, { "epoch": 0.2426525998492841, "grad_norm": 0.5546137690544128, "learning_rate": 0.00019680404118039492, "loss": 11.6874, "step": 11592 }, { "epoch": 0.24267353261324626, "grad_norm": 0.22427698969841003, "learning_rate": 0.00019680349128246635, "loss": 11.6806, "step": 11593 }, { "epoch": 0.2426944653772084, "grad_norm": 0.26281461119651794, "learning_rate": 0.0001968029413380024, "loss": 11.6661, "step": 11594 }, { "epoch": 0.24271539814117055, "grad_norm": 0.2613069713115692, "learning_rate": 0.00019680239134700338, "loss": 11.6798, "step": 11595 }, { "epoch": 0.24273633090513272, "grad_norm": 0.3251935839653015, "learning_rate": 0.0001968018413094695, "loss": 11.6804, "step": 11596 }, { "epoch": 0.24275726366909486, "grad_norm": 0.26091378927230835, "learning_rate": 0.000196801291225401, "loss": 11.6799, "step": 11597 }, { "epoch": 0.242778196433057, "grad_norm": 0.2420531064271927, "learning_rate": 0.0001968007410947982, "loss": 11.6742, "step": 11598 }, { "epoch": 0.24279912919701918, "grad_norm": 0.3136884868144989, "learning_rate": 0.00019680019091766133, "loss": 11.6841, "step": 11599 }, { "epoch": 0.24282006196098133, "grad_norm": 0.31701141595840454, "learning_rate": 0.0001967996406939907, "loss": 11.6799, "step": 11600 }, { "epoch": 0.24284099472494347, "grad_norm": 0.23972822725772858, "learning_rate": 0.0001967990904237865, "loss": 11.6791, "step": 11601 }, { "epoch": 0.24286192748890564, "grad_norm": 0.25684571266174316, "learning_rate": 0.00019679854010704907, "loss": 11.6674, "step": 11602 }, { "epoch": 0.2428828602528678, "grad_norm": 0.2780736982822418, "learning_rate": 0.00019679798974377865, "loss": 11.6653, "step": 11603 }, { "epoch": 0.24290379301682993, "grad_norm": 0.29090434312820435, "learning_rate": 0.00019679743933397547, "loss": 11.6841, "step": 11604 }, { "epoch": 0.2429247257807921, "grad_norm": 0.2721824049949646, "learning_rate": 0.00019679688887763982, "loss": 11.6756, "step": 11605 }, { "epoch": 0.24294565854475425, "grad_norm": 0.2696794867515564, "learning_rate": 0.00019679633837477197, "loss": 11.679, "step": 11606 }, { "epoch": 0.2429665913087164, "grad_norm": 0.27645906805992126, "learning_rate": 0.0001967957878253722, "loss": 11.6888, "step": 11607 }, { "epoch": 0.24298752407267857, "grad_norm": 0.26726067066192627, "learning_rate": 0.00019679523722944072, "loss": 11.6843, "step": 11608 }, { "epoch": 0.2430084568366407, "grad_norm": 0.23755408823490143, "learning_rate": 0.00019679468658697786, "loss": 11.6789, "step": 11609 }, { "epoch": 0.24302938960060286, "grad_norm": 0.2950786054134369, "learning_rate": 0.00019679413589798387, "loss": 11.6663, "step": 11610 }, { "epoch": 0.24305032236456503, "grad_norm": 0.3860076069831848, "learning_rate": 0.00019679358516245897, "loss": 11.6703, "step": 11611 }, { "epoch": 0.24307125512852717, "grad_norm": 0.28067734837532043, "learning_rate": 0.00019679303438040346, "loss": 11.6933, "step": 11612 }, { "epoch": 0.24309218789248932, "grad_norm": 0.26207226514816284, "learning_rate": 0.0001967924835518176, "loss": 11.6689, "step": 11613 }, { "epoch": 0.24311312065645146, "grad_norm": 0.231644868850708, "learning_rate": 0.00019679193267670164, "loss": 11.6785, "step": 11614 }, { "epoch": 0.24313405342041364, "grad_norm": 0.34686458110809326, "learning_rate": 0.00019679138175505588, "loss": 11.6787, "step": 11615 }, { "epoch": 0.24315498618437578, "grad_norm": 0.24994362890720367, "learning_rate": 0.00019679083078688055, "loss": 11.6693, "step": 11616 }, { "epoch": 0.24317591894833793, "grad_norm": 0.24580071866512299, "learning_rate": 0.0001967902797721759, "loss": 11.661, "step": 11617 }, { "epoch": 0.2431968517123001, "grad_norm": 0.22976933419704437, "learning_rate": 0.00019678972871094229, "loss": 11.67, "step": 11618 }, { "epoch": 0.24321778447626224, "grad_norm": 0.22454726696014404, "learning_rate": 0.0001967891776031799, "loss": 11.6747, "step": 11619 }, { "epoch": 0.2432387172402244, "grad_norm": 0.25707972049713135, "learning_rate": 0.00019678862644888895, "loss": 11.686, "step": 11620 }, { "epoch": 0.24325965000418656, "grad_norm": 0.3531193435192108, "learning_rate": 0.0001967880752480698, "loss": 11.666, "step": 11621 }, { "epoch": 0.2432805827681487, "grad_norm": 0.2373461127281189, "learning_rate": 0.0001967875240007227, "loss": 11.6861, "step": 11622 }, { "epoch": 0.24330151553211085, "grad_norm": 0.3099033832550049, "learning_rate": 0.00019678697270684788, "loss": 11.6867, "step": 11623 }, { "epoch": 0.24332244829607302, "grad_norm": 0.23440326750278473, "learning_rate": 0.00019678642136644565, "loss": 11.6838, "step": 11624 }, { "epoch": 0.24334338106003517, "grad_norm": 0.30421197414398193, "learning_rate": 0.00019678586997951624, "loss": 11.6723, "step": 11625 }, { "epoch": 0.2433643138239973, "grad_norm": 0.30927515029907227, "learning_rate": 0.00019678531854605988, "loss": 11.6647, "step": 11626 }, { "epoch": 0.2433852465879595, "grad_norm": 0.23839595913887024, "learning_rate": 0.00019678476706607692, "loss": 11.6797, "step": 11627 }, { "epoch": 0.24340617935192163, "grad_norm": 0.30262255668640137, "learning_rate": 0.0001967842155395676, "loss": 11.6795, "step": 11628 }, { "epoch": 0.24342711211588378, "grad_norm": 0.25001248717308044, "learning_rate": 0.00019678366396653214, "loss": 11.672, "step": 11629 }, { "epoch": 0.24344804487984595, "grad_norm": 0.23620881140232086, "learning_rate": 0.00019678311234697083, "loss": 11.676, "step": 11630 }, { "epoch": 0.2434689776438081, "grad_norm": 0.21491171419620514, "learning_rate": 0.0001967825606808839, "loss": 11.6601, "step": 11631 }, { "epoch": 0.24348991040777024, "grad_norm": 0.24567316472530365, "learning_rate": 0.00019678200896827174, "loss": 11.6856, "step": 11632 }, { "epoch": 0.24351084317173238, "grad_norm": 0.33988311886787415, "learning_rate": 0.00019678145720913447, "loss": 11.6922, "step": 11633 }, { "epoch": 0.24353177593569456, "grad_norm": 0.28461408615112305, "learning_rate": 0.00019678090540347243, "loss": 11.6739, "step": 11634 }, { "epoch": 0.2435527086996567, "grad_norm": 0.24953597784042358, "learning_rate": 0.00019678035355128587, "loss": 11.6879, "step": 11635 }, { "epoch": 0.24357364146361885, "grad_norm": 0.2537444531917572, "learning_rate": 0.00019677980165257506, "loss": 11.6649, "step": 11636 }, { "epoch": 0.24359457422758102, "grad_norm": 0.2817177474498749, "learning_rate": 0.00019677924970734026, "loss": 11.6747, "step": 11637 }, { "epoch": 0.24361550699154316, "grad_norm": 0.19696582853794098, "learning_rate": 0.0001967786977155817, "loss": 11.6787, "step": 11638 }, { "epoch": 0.2436364397555053, "grad_norm": 0.2822515070438385, "learning_rate": 0.00019677814567729972, "loss": 11.6563, "step": 11639 }, { "epoch": 0.24365737251946748, "grad_norm": 0.24458713829517365, "learning_rate": 0.00019677759359249455, "loss": 11.6627, "step": 11640 }, { "epoch": 0.24367830528342962, "grad_norm": 0.20597246289253235, "learning_rate": 0.00019677704146116645, "loss": 11.6738, "step": 11641 }, { "epoch": 0.24369923804739177, "grad_norm": 0.27185335755348206, "learning_rate": 0.00019677648928331565, "loss": 11.6745, "step": 11642 }, { "epoch": 0.24372017081135394, "grad_norm": 0.24021825194358826, "learning_rate": 0.00019677593705894246, "loss": 11.6845, "step": 11643 }, { "epoch": 0.2437411035753161, "grad_norm": 0.25655320286750793, "learning_rate": 0.00019677538478804718, "loss": 11.6733, "step": 11644 }, { "epoch": 0.24376203633927823, "grad_norm": 0.22566795349121094, "learning_rate": 0.00019677483247063, "loss": 11.6719, "step": 11645 }, { "epoch": 0.2437829691032404, "grad_norm": 0.23512639105319977, "learning_rate": 0.00019677428010669124, "loss": 11.6724, "step": 11646 }, { "epoch": 0.24380390186720255, "grad_norm": 0.2754265367984772, "learning_rate": 0.0001967737276962311, "loss": 11.691, "step": 11647 }, { "epoch": 0.2438248346311647, "grad_norm": 0.2934909760951996, "learning_rate": 0.00019677317523924992, "loss": 11.6802, "step": 11648 }, { "epoch": 0.24384576739512684, "grad_norm": 0.27556777000427246, "learning_rate": 0.00019677262273574796, "loss": 11.6883, "step": 11649 }, { "epoch": 0.243866700159089, "grad_norm": 0.21115455031394958, "learning_rate": 0.0001967720701857254, "loss": 11.6956, "step": 11650 }, { "epoch": 0.24388763292305116, "grad_norm": 0.36240389943122864, "learning_rate": 0.00019677151758918263, "loss": 11.6963, "step": 11651 }, { "epoch": 0.2439085656870133, "grad_norm": 0.269554078578949, "learning_rate": 0.0001967709649461198, "loss": 11.666, "step": 11652 }, { "epoch": 0.24392949845097547, "grad_norm": 0.24827122688293457, "learning_rate": 0.00019677041225653726, "loss": 11.6813, "step": 11653 }, { "epoch": 0.24395043121493762, "grad_norm": 0.21583476662635803, "learning_rate": 0.00019676985952043523, "loss": 11.6833, "step": 11654 }, { "epoch": 0.24397136397889976, "grad_norm": 0.3387455344200134, "learning_rate": 0.000196769306737814, "loss": 11.6614, "step": 11655 }, { "epoch": 0.24399229674286194, "grad_norm": 0.2489948570728302, "learning_rate": 0.00019676875390867382, "loss": 11.6721, "step": 11656 }, { "epoch": 0.24401322950682408, "grad_norm": 0.4154469668865204, "learning_rate": 0.00019676820103301497, "loss": 11.6747, "step": 11657 }, { "epoch": 0.24403416227078623, "grad_norm": 0.23675289750099182, "learning_rate": 0.00019676764811083768, "loss": 11.6859, "step": 11658 }, { "epoch": 0.2440550950347484, "grad_norm": 0.25074970722198486, "learning_rate": 0.00019676709514214227, "loss": 11.6718, "step": 11659 }, { "epoch": 0.24407602779871054, "grad_norm": 0.3713929355144501, "learning_rate": 0.00019676654212692895, "loss": 11.6873, "step": 11660 }, { "epoch": 0.2440969605626727, "grad_norm": 0.24362456798553467, "learning_rate": 0.00019676598906519805, "loss": 11.6528, "step": 11661 }, { "epoch": 0.24411789332663486, "grad_norm": 0.22921617329120636, "learning_rate": 0.00019676543595694977, "loss": 11.6849, "step": 11662 }, { "epoch": 0.244138826090597, "grad_norm": 0.2733314037322998, "learning_rate": 0.00019676488280218442, "loss": 11.68, "step": 11663 }, { "epoch": 0.24415975885455915, "grad_norm": 0.3946520686149597, "learning_rate": 0.00019676432960090225, "loss": 11.7078, "step": 11664 }, { "epoch": 0.24418069161852132, "grad_norm": 0.26229795813560486, "learning_rate": 0.00019676377635310352, "loss": 11.6579, "step": 11665 }, { "epoch": 0.24420162438248347, "grad_norm": 0.22087711095809937, "learning_rate": 0.00019676322305878848, "loss": 11.6707, "step": 11666 }, { "epoch": 0.2442225571464456, "grad_norm": 1.944225788116455, "learning_rate": 0.00019676266971795748, "loss": 11.6658, "step": 11667 }, { "epoch": 0.24424348991040776, "grad_norm": 0.2612309157848358, "learning_rate": 0.00019676211633061068, "loss": 11.68, "step": 11668 }, { "epoch": 0.24426442267436993, "grad_norm": 0.2316291779279709, "learning_rate": 0.00019676156289674841, "loss": 11.682, "step": 11669 }, { "epoch": 0.24428535543833207, "grad_norm": 0.25325214862823486, "learning_rate": 0.0001967610094163709, "loss": 11.675, "step": 11670 }, { "epoch": 0.24430628820229422, "grad_norm": 0.24912860989570618, "learning_rate": 0.00019676045588947844, "loss": 11.668, "step": 11671 }, { "epoch": 0.2443272209662564, "grad_norm": 0.3665127456188202, "learning_rate": 0.00019675990231607133, "loss": 11.6679, "step": 11672 }, { "epoch": 0.24434815373021854, "grad_norm": 0.25869020819664, "learning_rate": 0.00019675934869614974, "loss": 11.6961, "step": 11673 }, { "epoch": 0.24436908649418068, "grad_norm": 0.2850160002708435, "learning_rate": 0.00019675879502971402, "loss": 11.6905, "step": 11674 }, { "epoch": 0.24439001925814285, "grad_norm": 0.40451186895370483, "learning_rate": 0.0001967582413167644, "loss": 11.6832, "step": 11675 }, { "epoch": 0.244410952022105, "grad_norm": 0.2625719904899597, "learning_rate": 0.00019675768755730117, "loss": 11.6649, "step": 11676 }, { "epoch": 0.24443188478606714, "grad_norm": 0.26038071513175964, "learning_rate": 0.00019675713375132456, "loss": 11.692, "step": 11677 }, { "epoch": 0.24445281755002932, "grad_norm": 0.23397959768772125, "learning_rate": 0.00019675657989883484, "loss": 11.6874, "step": 11678 }, { "epoch": 0.24447375031399146, "grad_norm": 0.3174906373023987, "learning_rate": 0.00019675602599983233, "loss": 11.6649, "step": 11679 }, { "epoch": 0.2444946830779536, "grad_norm": 0.23165616393089294, "learning_rate": 0.00019675547205431725, "loss": 11.6666, "step": 11680 }, { "epoch": 0.24451561584191578, "grad_norm": 0.25592148303985596, "learning_rate": 0.0001967549180622899, "loss": 11.6892, "step": 11681 }, { "epoch": 0.24453654860587792, "grad_norm": 0.22088563442230225, "learning_rate": 0.00019675436402375048, "loss": 11.678, "step": 11682 }, { "epoch": 0.24455748136984007, "grad_norm": 0.24781571328639984, "learning_rate": 0.0001967538099386993, "loss": 11.6795, "step": 11683 }, { "epoch": 0.24457841413380224, "grad_norm": 0.22626399993896484, "learning_rate": 0.00019675325580713663, "loss": 11.6755, "step": 11684 }, { "epoch": 0.24459934689776439, "grad_norm": 0.2890778183937073, "learning_rate": 0.00019675270162906275, "loss": 11.6738, "step": 11685 }, { "epoch": 0.24462027966172653, "grad_norm": 0.2730399966239929, "learning_rate": 0.00019675214740447788, "loss": 11.6877, "step": 11686 }, { "epoch": 0.24464121242568868, "grad_norm": 0.2455950826406479, "learning_rate": 0.00019675159313338234, "loss": 11.6752, "step": 11687 }, { "epoch": 0.24466214518965085, "grad_norm": 0.30167409777641296, "learning_rate": 0.00019675103881577636, "loss": 11.6633, "step": 11688 }, { "epoch": 0.244683077953613, "grad_norm": 0.2318216860294342, "learning_rate": 0.00019675048445166022, "loss": 11.6782, "step": 11689 }, { "epoch": 0.24470401071757514, "grad_norm": 0.22566843032836914, "learning_rate": 0.00019674993004103417, "loss": 11.6892, "step": 11690 }, { "epoch": 0.2447249434815373, "grad_norm": 0.1976095288991928, "learning_rate": 0.0001967493755838985, "loss": 11.6608, "step": 11691 }, { "epoch": 0.24474587624549946, "grad_norm": 0.22127391397953033, "learning_rate": 0.00019674882108025346, "loss": 11.6721, "step": 11692 }, { "epoch": 0.2447668090094616, "grad_norm": 0.3034574091434479, "learning_rate": 0.00019674826653009935, "loss": 11.6569, "step": 11693 }, { "epoch": 0.24478774177342377, "grad_norm": 0.37493520975112915, "learning_rate": 0.00019674771193343638, "loss": 11.681, "step": 11694 }, { "epoch": 0.24480867453738592, "grad_norm": 0.22188809514045715, "learning_rate": 0.00019674715729026484, "loss": 11.6853, "step": 11695 }, { "epoch": 0.24482960730134806, "grad_norm": 0.24093452095985413, "learning_rate": 0.00019674660260058505, "loss": 11.6751, "step": 11696 }, { "epoch": 0.24485054006531023, "grad_norm": 0.271833598613739, "learning_rate": 0.00019674604786439715, "loss": 11.6897, "step": 11697 }, { "epoch": 0.24487147282927238, "grad_norm": 0.25748494267463684, "learning_rate": 0.00019674549308170157, "loss": 11.6647, "step": 11698 }, { "epoch": 0.24489240559323452, "grad_norm": 0.26686644554138184, "learning_rate": 0.00019674493825249845, "loss": 11.6915, "step": 11699 }, { "epoch": 0.2449133383571967, "grad_norm": 0.33799612522125244, "learning_rate": 0.00019674438337678808, "loss": 11.6745, "step": 11700 }, { "epoch": 0.24493427112115884, "grad_norm": 0.3319482207298279, "learning_rate": 0.00019674382845457077, "loss": 11.7006, "step": 11701 }, { "epoch": 0.244955203885121, "grad_norm": 0.27592650055885315, "learning_rate": 0.00019674327348584674, "loss": 11.6817, "step": 11702 }, { "epoch": 0.24497613664908313, "grad_norm": 0.25268813967704773, "learning_rate": 0.00019674271847061633, "loss": 11.686, "step": 11703 }, { "epoch": 0.2449970694130453, "grad_norm": 0.22832275927066803, "learning_rate": 0.00019674216340887974, "loss": 11.6726, "step": 11704 }, { "epoch": 0.24501800217700745, "grad_norm": 0.2662521004676819, "learning_rate": 0.00019674160830063722, "loss": 11.6773, "step": 11705 }, { "epoch": 0.2450389349409696, "grad_norm": 0.2673904299736023, "learning_rate": 0.0001967410531458891, "loss": 11.6785, "step": 11706 }, { "epoch": 0.24505986770493177, "grad_norm": 0.3061104714870453, "learning_rate": 0.00019674049794463563, "loss": 11.6715, "step": 11707 }, { "epoch": 0.2450808004688939, "grad_norm": 0.25911664962768555, "learning_rate": 0.00019673994269687702, "loss": 11.6796, "step": 11708 }, { "epoch": 0.24510173323285606, "grad_norm": 0.24092082679271698, "learning_rate": 0.0001967393874026136, "loss": 11.6877, "step": 11709 }, { "epoch": 0.24512266599681823, "grad_norm": 0.23744364082813263, "learning_rate": 0.00019673883206184562, "loss": 11.669, "step": 11710 }, { "epoch": 0.24514359876078037, "grad_norm": 0.3746742010116577, "learning_rate": 0.00019673827667457335, "loss": 11.6857, "step": 11711 }, { "epoch": 0.24516453152474252, "grad_norm": 0.24759621918201447, "learning_rate": 0.00019673772124079706, "loss": 11.689, "step": 11712 }, { "epoch": 0.2451854642887047, "grad_norm": 0.25488296151161194, "learning_rate": 0.000196737165760517, "loss": 11.6768, "step": 11713 }, { "epoch": 0.24520639705266684, "grad_norm": 0.30533286929130554, "learning_rate": 0.00019673661023373345, "loss": 11.6639, "step": 11714 }, { "epoch": 0.24522732981662898, "grad_norm": 0.2651672959327698, "learning_rate": 0.00019673605466044664, "loss": 11.6792, "step": 11715 }, { "epoch": 0.24524826258059115, "grad_norm": 0.21346724033355713, "learning_rate": 0.0001967354990406569, "loss": 11.6942, "step": 11716 }, { "epoch": 0.2452691953445533, "grad_norm": 0.22982949018478394, "learning_rate": 0.00019673494337436447, "loss": 11.6698, "step": 11717 }, { "epoch": 0.24529012810851544, "grad_norm": 0.3095170855522156, "learning_rate": 0.0001967343876615696, "loss": 11.6714, "step": 11718 }, { "epoch": 0.24531106087247762, "grad_norm": 0.2805757522583008, "learning_rate": 0.0001967338319022726, "loss": 11.6848, "step": 11719 }, { "epoch": 0.24533199363643976, "grad_norm": 0.29806333780288696, "learning_rate": 0.00019673327609647366, "loss": 11.6693, "step": 11720 }, { "epoch": 0.2453529264004019, "grad_norm": 0.25013113021850586, "learning_rate": 0.00019673272024417314, "loss": 11.6947, "step": 11721 }, { "epoch": 0.24537385916436405, "grad_norm": 0.271160751581192, "learning_rate": 0.00019673216434537124, "loss": 11.6804, "step": 11722 }, { "epoch": 0.24539479192832622, "grad_norm": 0.22669290006160736, "learning_rate": 0.00019673160840006824, "loss": 11.6689, "step": 11723 }, { "epoch": 0.24541572469228837, "grad_norm": 0.3923019766807556, "learning_rate": 0.00019673105240826443, "loss": 11.6807, "step": 11724 }, { "epoch": 0.2454366574562505, "grad_norm": 0.27719053626060486, "learning_rate": 0.00019673049636996005, "loss": 11.6607, "step": 11725 }, { "epoch": 0.24545759022021268, "grad_norm": 0.2314600646495819, "learning_rate": 0.0001967299402851554, "loss": 11.6639, "step": 11726 }, { "epoch": 0.24547852298417483, "grad_norm": 0.21550524234771729, "learning_rate": 0.00019672938415385077, "loss": 11.6764, "step": 11727 }, { "epoch": 0.24549945574813697, "grad_norm": 0.2530181407928467, "learning_rate": 0.00019672882797604633, "loss": 11.686, "step": 11728 }, { "epoch": 0.24552038851209915, "grad_norm": 0.25032299757003784, "learning_rate": 0.0001967282717517424, "loss": 11.6769, "step": 11729 }, { "epoch": 0.2455413212760613, "grad_norm": 0.276024729013443, "learning_rate": 0.00019672771548093925, "loss": 11.6884, "step": 11730 }, { "epoch": 0.24556225404002344, "grad_norm": 0.27423471212387085, "learning_rate": 0.0001967271591636372, "loss": 11.6796, "step": 11731 }, { "epoch": 0.2455831868039856, "grad_norm": 0.24255706369876862, "learning_rate": 0.00019672660279983643, "loss": 11.6549, "step": 11732 }, { "epoch": 0.24560411956794775, "grad_norm": 0.27491387724876404, "learning_rate": 0.00019672604638953724, "loss": 11.6715, "step": 11733 }, { "epoch": 0.2456250523319099, "grad_norm": 0.25209733843803406, "learning_rate": 0.00019672548993273994, "loss": 11.6854, "step": 11734 }, { "epoch": 0.24564598509587207, "grad_norm": 0.2947408854961395, "learning_rate": 0.00019672493342944472, "loss": 11.6582, "step": 11735 }, { "epoch": 0.24566691785983422, "grad_norm": 0.2753361463546753, "learning_rate": 0.00019672437687965187, "loss": 11.6809, "step": 11736 }, { "epoch": 0.24568785062379636, "grad_norm": 0.30812695622444153, "learning_rate": 0.0001967238202833617, "loss": 11.6966, "step": 11737 }, { "epoch": 0.2457087833877585, "grad_norm": 0.2864203453063965, "learning_rate": 0.00019672326364057448, "loss": 11.6575, "step": 11738 }, { "epoch": 0.24572971615172068, "grad_norm": 0.25317251682281494, "learning_rate": 0.00019672270695129037, "loss": 11.6703, "step": 11739 }, { "epoch": 0.24575064891568282, "grad_norm": 0.22236758470535278, "learning_rate": 0.0001967221502155098, "loss": 11.6683, "step": 11740 }, { "epoch": 0.24577158167964497, "grad_norm": 0.26152366399765015, "learning_rate": 0.0001967215934332329, "loss": 11.6518, "step": 11741 }, { "epoch": 0.24579251444360714, "grad_norm": 0.26051685214042664, "learning_rate": 0.00019672103660446, "loss": 11.6725, "step": 11742 }, { "epoch": 0.24581344720756929, "grad_norm": 0.24745692312717438, "learning_rate": 0.00019672047972919138, "loss": 11.6701, "step": 11743 }, { "epoch": 0.24583437997153143, "grad_norm": 0.2605096101760864, "learning_rate": 0.00019671992280742727, "loss": 11.6774, "step": 11744 }, { "epoch": 0.2458553127354936, "grad_norm": 0.2607211172580719, "learning_rate": 0.00019671936583916797, "loss": 11.6859, "step": 11745 }, { "epoch": 0.24587624549945575, "grad_norm": 0.34800249338150024, "learning_rate": 0.00019671880882441372, "loss": 11.6983, "step": 11746 }, { "epoch": 0.2458971782634179, "grad_norm": 0.2662924528121948, "learning_rate": 0.0001967182517631648, "loss": 11.6714, "step": 11747 }, { "epoch": 0.24591811102738007, "grad_norm": 0.23164193332195282, "learning_rate": 0.0001967176946554215, "loss": 11.6672, "step": 11748 }, { "epoch": 0.2459390437913422, "grad_norm": 0.24093899130821228, "learning_rate": 0.00019671713750118403, "loss": 11.6856, "step": 11749 }, { "epoch": 0.24595997655530435, "grad_norm": 0.31208479404449463, "learning_rate": 0.00019671658030045273, "loss": 11.6919, "step": 11750 }, { "epoch": 0.24598090931926653, "grad_norm": 0.2376825213432312, "learning_rate": 0.0001967160230532278, "loss": 11.6721, "step": 11751 }, { "epoch": 0.24600184208322867, "grad_norm": 0.22956518828868866, "learning_rate": 0.00019671546575950958, "loss": 11.6669, "step": 11752 }, { "epoch": 0.24602277484719082, "grad_norm": 0.2960282564163208, "learning_rate": 0.00019671490841929824, "loss": 11.693, "step": 11753 }, { "epoch": 0.246043707611153, "grad_norm": 0.24539917707443237, "learning_rate": 0.00019671435103259412, "loss": 11.6781, "step": 11754 }, { "epoch": 0.24606464037511513, "grad_norm": 0.23112694919109344, "learning_rate": 0.0001967137935993975, "loss": 11.6747, "step": 11755 }, { "epoch": 0.24608557313907728, "grad_norm": 0.38188886642456055, "learning_rate": 0.0001967132361197086, "loss": 11.6953, "step": 11756 }, { "epoch": 0.24610650590303942, "grad_norm": 0.25400757789611816, "learning_rate": 0.00019671267859352773, "loss": 11.677, "step": 11757 }, { "epoch": 0.2461274386670016, "grad_norm": 0.21992094814777374, "learning_rate": 0.00019671212102085513, "loss": 11.666, "step": 11758 }, { "epoch": 0.24614837143096374, "grad_norm": 0.22399777173995972, "learning_rate": 0.00019671156340169107, "loss": 11.6786, "step": 11759 }, { "epoch": 0.2461693041949259, "grad_norm": 0.28021493554115295, "learning_rate": 0.00019671100573603583, "loss": 11.6796, "step": 11760 }, { "epoch": 0.24619023695888806, "grad_norm": 0.3058393895626068, "learning_rate": 0.00019671044802388967, "loss": 11.6877, "step": 11761 }, { "epoch": 0.2462111697228502, "grad_norm": 0.26270008087158203, "learning_rate": 0.00019670989026525284, "loss": 11.6728, "step": 11762 }, { "epoch": 0.24623210248681235, "grad_norm": 0.224594846367836, "learning_rate": 0.00019670933246012566, "loss": 11.6696, "step": 11763 }, { "epoch": 0.24625303525077452, "grad_norm": 0.23394562304019928, "learning_rate": 0.00019670877460850833, "loss": 11.6677, "step": 11764 }, { "epoch": 0.24627396801473667, "grad_norm": 4.788288593292236, "learning_rate": 0.0001967082167104012, "loss": 11.6502, "step": 11765 }, { "epoch": 0.2462949007786988, "grad_norm": 0.2197735756635666, "learning_rate": 0.00019670765876580445, "loss": 11.6832, "step": 11766 }, { "epoch": 0.24631583354266098, "grad_norm": 0.26816052198410034, "learning_rate": 0.00019670710077471842, "loss": 11.6745, "step": 11767 }, { "epoch": 0.24633676630662313, "grad_norm": 0.26986926794052124, "learning_rate": 0.00019670654273714334, "loss": 11.6893, "step": 11768 }, { "epoch": 0.24635769907058527, "grad_norm": 0.24368378520011902, "learning_rate": 0.00019670598465307948, "loss": 11.678, "step": 11769 }, { "epoch": 0.24637863183454745, "grad_norm": 0.2756859064102173, "learning_rate": 0.0001967054265225271, "loss": 11.7021, "step": 11770 }, { "epoch": 0.2463995645985096, "grad_norm": 0.25098469853401184, "learning_rate": 0.0001967048683454865, "loss": 11.6771, "step": 11771 }, { "epoch": 0.24642049736247174, "grad_norm": 1.0917153358459473, "learning_rate": 0.00019670431012195795, "loss": 11.6658, "step": 11772 }, { "epoch": 0.2464414301264339, "grad_norm": 0.28001847863197327, "learning_rate": 0.00019670375185194168, "loss": 11.6819, "step": 11773 }, { "epoch": 0.24646236289039605, "grad_norm": 0.23327048122882843, "learning_rate": 0.00019670319353543798, "loss": 11.6612, "step": 11774 }, { "epoch": 0.2464832956543582, "grad_norm": 0.26976266503334045, "learning_rate": 0.00019670263517244712, "loss": 11.6763, "step": 11775 }, { "epoch": 0.24650422841832034, "grad_norm": 0.20024359226226807, "learning_rate": 0.00019670207676296934, "loss": 11.6787, "step": 11776 }, { "epoch": 0.24652516118228252, "grad_norm": 0.25339779257774353, "learning_rate": 0.00019670151830700495, "loss": 11.676, "step": 11777 }, { "epoch": 0.24654609394624466, "grad_norm": 0.23602832853794098, "learning_rate": 0.00019670095980455423, "loss": 11.6706, "step": 11778 }, { "epoch": 0.2465670267102068, "grad_norm": 0.28345227241516113, "learning_rate": 0.0001967004012556174, "loss": 11.6852, "step": 11779 }, { "epoch": 0.24658795947416898, "grad_norm": 0.1922868937253952, "learning_rate": 0.00019669984266019474, "loss": 11.6741, "step": 11780 }, { "epoch": 0.24660889223813112, "grad_norm": 0.24031104147434235, "learning_rate": 0.00019669928401828653, "loss": 11.6777, "step": 11781 }, { "epoch": 0.24662982500209327, "grad_norm": 0.2399197518825531, "learning_rate": 0.00019669872532989305, "loss": 11.6806, "step": 11782 }, { "epoch": 0.24665075776605544, "grad_norm": 0.24809233844280243, "learning_rate": 0.00019669816659501454, "loss": 11.6489, "step": 11783 }, { "epoch": 0.24667169053001758, "grad_norm": 0.28534460067749023, "learning_rate": 0.0001966976078136513, "loss": 11.687, "step": 11784 }, { "epoch": 0.24669262329397973, "grad_norm": 0.3109540641307831, "learning_rate": 0.00019669704898580358, "loss": 11.6894, "step": 11785 }, { "epoch": 0.2467135560579419, "grad_norm": 0.25619664788246155, "learning_rate": 0.00019669649011147165, "loss": 11.6712, "step": 11786 }, { "epoch": 0.24673448882190405, "grad_norm": 0.2739728093147278, "learning_rate": 0.00019669593119065577, "loss": 11.6612, "step": 11787 }, { "epoch": 0.2467554215858662, "grad_norm": 0.3043878674507141, "learning_rate": 0.0001966953722233562, "loss": 11.693, "step": 11788 }, { "epoch": 0.24677635434982836, "grad_norm": 0.3037276268005371, "learning_rate": 0.00019669481320957325, "loss": 11.6721, "step": 11789 }, { "epoch": 0.2467972871137905, "grad_norm": 0.25549665093421936, "learning_rate": 0.00019669425414930717, "loss": 11.6774, "step": 11790 }, { "epoch": 0.24681821987775265, "grad_norm": 0.2649567425251007, "learning_rate": 0.00019669369504255824, "loss": 11.6805, "step": 11791 }, { "epoch": 0.2468391526417148, "grad_norm": 0.26836150884628296, "learning_rate": 0.00019669313588932668, "loss": 11.6778, "step": 11792 }, { "epoch": 0.24686008540567697, "grad_norm": 0.2861992120742798, "learning_rate": 0.00019669257668961278, "loss": 11.6821, "step": 11793 }, { "epoch": 0.24688101816963912, "grad_norm": 0.31753605604171753, "learning_rate": 0.00019669201744341685, "loss": 11.6736, "step": 11794 }, { "epoch": 0.24690195093360126, "grad_norm": 0.2242257297039032, "learning_rate": 0.00019669145815073914, "loss": 11.6708, "step": 11795 }, { "epoch": 0.24692288369756343, "grad_norm": 0.2989294230937958, "learning_rate": 0.00019669089881157987, "loss": 11.668, "step": 11796 }, { "epoch": 0.24694381646152558, "grad_norm": 0.23819881677627563, "learning_rate": 0.00019669033942593938, "loss": 11.6815, "step": 11797 }, { "epoch": 0.24696474922548772, "grad_norm": 0.30128270387649536, "learning_rate": 0.00019668977999381788, "loss": 11.6956, "step": 11798 }, { "epoch": 0.2469856819894499, "grad_norm": 0.24658171832561493, "learning_rate": 0.0001966892205152157, "loss": 11.6713, "step": 11799 }, { "epoch": 0.24700661475341204, "grad_norm": 0.27731627225875854, "learning_rate": 0.00019668866099013305, "loss": 11.6959, "step": 11800 }, { "epoch": 0.24702754751737419, "grad_norm": 0.2452850639820099, "learning_rate": 0.00019668810141857025, "loss": 11.6808, "step": 11801 }, { "epoch": 0.24704848028133636, "grad_norm": 0.23910470306873322, "learning_rate": 0.00019668754180052748, "loss": 11.6889, "step": 11802 }, { "epoch": 0.2470694130452985, "grad_norm": 0.2371164858341217, "learning_rate": 0.00019668698213600513, "loss": 11.664, "step": 11803 }, { "epoch": 0.24709034580926065, "grad_norm": 0.3162979781627655, "learning_rate": 0.0001966864224250034, "loss": 11.6995, "step": 11804 }, { "epoch": 0.24711127857322282, "grad_norm": 0.2434314489364624, "learning_rate": 0.00019668586266752253, "loss": 11.6709, "step": 11805 }, { "epoch": 0.24713221133718496, "grad_norm": 0.2592330873012543, "learning_rate": 0.00019668530286356286, "loss": 11.6821, "step": 11806 }, { "epoch": 0.2471531441011471, "grad_norm": 0.30276381969451904, "learning_rate": 0.00019668474301312464, "loss": 11.6818, "step": 11807 }, { "epoch": 0.24717407686510928, "grad_norm": 0.22545357048511505, "learning_rate": 0.0001966841831162081, "loss": 11.6811, "step": 11808 }, { "epoch": 0.24719500962907143, "grad_norm": 0.3608298599720001, "learning_rate": 0.00019668362317281355, "loss": 11.679, "step": 11809 }, { "epoch": 0.24721594239303357, "grad_norm": 0.26592305302619934, "learning_rate": 0.00019668306318294123, "loss": 11.6564, "step": 11810 }, { "epoch": 0.24723687515699572, "grad_norm": 0.29383790493011475, "learning_rate": 0.00019668250314659145, "loss": 11.6854, "step": 11811 }, { "epoch": 0.2472578079209579, "grad_norm": 0.26899629831314087, "learning_rate": 0.00019668194306376443, "loss": 11.6706, "step": 11812 }, { "epoch": 0.24727874068492003, "grad_norm": 0.3104487359523773, "learning_rate": 0.0001966813829344605, "loss": 11.6895, "step": 11813 }, { "epoch": 0.24729967344888218, "grad_norm": 0.42416998744010925, "learning_rate": 0.00019668082275867984, "loss": 11.6937, "step": 11814 }, { "epoch": 0.24732060621284435, "grad_norm": 0.2609209418296814, "learning_rate": 0.0001966802625364228, "loss": 11.6663, "step": 11815 }, { "epoch": 0.2473415389768065, "grad_norm": 0.24796701967716217, "learning_rate": 0.00019667970226768962, "loss": 11.6861, "step": 11816 }, { "epoch": 0.24736247174076864, "grad_norm": 0.23377466201782227, "learning_rate": 0.00019667914195248054, "loss": 11.6833, "step": 11817 }, { "epoch": 0.24738340450473081, "grad_norm": 0.2808600068092346, "learning_rate": 0.00019667858159079588, "loss": 11.661, "step": 11818 }, { "epoch": 0.24740433726869296, "grad_norm": 0.21114014089107513, "learning_rate": 0.0001966780211826359, "loss": 11.6838, "step": 11819 }, { "epoch": 0.2474252700326551, "grad_norm": 0.2850750982761383, "learning_rate": 0.00019667746072800086, "loss": 11.6753, "step": 11820 }, { "epoch": 0.24744620279661728, "grad_norm": 0.2466963529586792, "learning_rate": 0.000196676900226891, "loss": 11.6644, "step": 11821 }, { "epoch": 0.24746713556057942, "grad_norm": 0.2678223252296448, "learning_rate": 0.00019667633967930664, "loss": 11.6797, "step": 11822 }, { "epoch": 0.24748806832454157, "grad_norm": 0.21309848129749298, "learning_rate": 0.00019667577908524803, "loss": 11.6859, "step": 11823 }, { "epoch": 0.24750900108850374, "grad_norm": 0.2496584802865982, "learning_rate": 0.0001966752184447154, "loss": 11.6813, "step": 11824 }, { "epoch": 0.24752993385246588, "grad_norm": 0.2312029004096985, "learning_rate": 0.0001966746577577091, "loss": 11.6756, "step": 11825 }, { "epoch": 0.24755086661642803, "grad_norm": 0.24984736740589142, "learning_rate": 0.00019667409702422932, "loss": 11.6679, "step": 11826 }, { "epoch": 0.24757179938039017, "grad_norm": 0.23028017580509186, "learning_rate": 0.00019667353624427638, "loss": 11.6753, "step": 11827 }, { "epoch": 0.24759273214435235, "grad_norm": 0.25759604573249817, "learning_rate": 0.00019667297541785055, "loss": 11.6901, "step": 11828 }, { "epoch": 0.2476136649083145, "grad_norm": 0.2183043658733368, "learning_rate": 0.00019667241454495205, "loss": 11.6768, "step": 11829 }, { "epoch": 0.24763459767227664, "grad_norm": 0.23781093955039978, "learning_rate": 0.0001966718536255812, "loss": 11.6749, "step": 11830 }, { "epoch": 0.2476555304362388, "grad_norm": 0.24781706929206848, "learning_rate": 0.00019667129265973826, "loss": 11.6757, "step": 11831 }, { "epoch": 0.24767646320020095, "grad_norm": 0.2615754008293152, "learning_rate": 0.00019667073164742347, "loss": 11.6778, "step": 11832 }, { "epoch": 0.2476973959641631, "grad_norm": 0.318695992231369, "learning_rate": 0.00019667017058863713, "loss": 11.6703, "step": 11833 }, { "epoch": 0.24771832872812527, "grad_norm": 0.2332259565591812, "learning_rate": 0.00019666960948337952, "loss": 11.663, "step": 11834 }, { "epoch": 0.24773926149208741, "grad_norm": 0.2629571259021759, "learning_rate": 0.00019666904833165087, "loss": 11.6859, "step": 11835 }, { "epoch": 0.24776019425604956, "grad_norm": 0.36069202423095703, "learning_rate": 0.00019666848713345146, "loss": 11.6788, "step": 11836 }, { "epoch": 0.24778112702001173, "grad_norm": 0.32411617040634155, "learning_rate": 0.0001966679258887816, "loss": 11.6759, "step": 11837 }, { "epoch": 0.24780205978397388, "grad_norm": 0.33172550797462463, "learning_rate": 0.00019666736459764153, "loss": 11.6759, "step": 11838 }, { "epoch": 0.24782299254793602, "grad_norm": 0.2824263572692871, "learning_rate": 0.0001966668032600315, "loss": 11.6889, "step": 11839 }, { "epoch": 0.2478439253118982, "grad_norm": 0.2683614492416382, "learning_rate": 0.00019666624187595182, "loss": 11.6727, "step": 11840 }, { "epoch": 0.24786485807586034, "grad_norm": 0.2870771288871765, "learning_rate": 0.00019666568044540274, "loss": 11.6621, "step": 11841 }, { "epoch": 0.24788579083982248, "grad_norm": 0.30839574337005615, "learning_rate": 0.00019666511896838454, "loss": 11.6922, "step": 11842 }, { "epoch": 0.24790672360378466, "grad_norm": 0.2715612053871155, "learning_rate": 0.00019666455744489744, "loss": 11.6943, "step": 11843 }, { "epoch": 0.2479276563677468, "grad_norm": 0.2059328705072403, "learning_rate": 0.00019666399587494178, "loss": 11.6651, "step": 11844 }, { "epoch": 0.24794858913170895, "grad_norm": 0.2940376102924347, "learning_rate": 0.0001966634342585178, "loss": 11.6833, "step": 11845 }, { "epoch": 0.2479695218956711, "grad_norm": 0.30005067586898804, "learning_rate": 0.00019666287259562576, "loss": 11.6816, "step": 11846 }, { "epoch": 0.24799045465963326, "grad_norm": 0.23307713866233826, "learning_rate": 0.00019666231088626593, "loss": 11.6861, "step": 11847 }, { "epoch": 0.2480113874235954, "grad_norm": 0.23118208348751068, "learning_rate": 0.0001966617491304386, "loss": 11.6682, "step": 11848 }, { "epoch": 0.24803232018755755, "grad_norm": 0.26128706336021423, "learning_rate": 0.00019666118732814404, "loss": 11.6701, "step": 11849 }, { "epoch": 0.24805325295151973, "grad_norm": 0.2708449065685272, "learning_rate": 0.00019666062547938248, "loss": 11.6784, "step": 11850 }, { "epoch": 0.24807418571548187, "grad_norm": 0.25020310282707214, "learning_rate": 0.00019666006358415428, "loss": 11.6756, "step": 11851 }, { "epoch": 0.24809511847944402, "grad_norm": 0.2769833207130432, "learning_rate": 0.00019665950164245958, "loss": 11.678, "step": 11852 }, { "epoch": 0.2481160512434062, "grad_norm": 0.25859981775283813, "learning_rate": 0.0001966589396542988, "loss": 11.6725, "step": 11853 }, { "epoch": 0.24813698400736833, "grad_norm": 0.2855927646160126, "learning_rate": 0.00019665837761967205, "loss": 11.6851, "step": 11854 }, { "epoch": 0.24815791677133048, "grad_norm": 0.28821510076522827, "learning_rate": 0.00019665781553857974, "loss": 11.693, "step": 11855 }, { "epoch": 0.24817884953529265, "grad_norm": 0.23135945200920105, "learning_rate": 0.00019665725341102202, "loss": 11.6686, "step": 11856 }, { "epoch": 0.2481997822992548, "grad_norm": 0.284353643655777, "learning_rate": 0.00019665669123699928, "loss": 11.6865, "step": 11857 }, { "epoch": 0.24822071506321694, "grad_norm": 0.2591618001461029, "learning_rate": 0.0001966561290165117, "loss": 11.6532, "step": 11858 }, { "epoch": 0.2482416478271791, "grad_norm": 0.2904079854488373, "learning_rate": 0.0001966555667495596, "loss": 11.6604, "step": 11859 }, { "epoch": 0.24826258059114126, "grad_norm": 0.23157955706119537, "learning_rate": 0.00019665500443614322, "loss": 11.6693, "step": 11860 }, { "epoch": 0.2482835133551034, "grad_norm": 0.2725996673107147, "learning_rate": 0.00019665444207626285, "loss": 11.6785, "step": 11861 }, { "epoch": 0.24830444611906557, "grad_norm": 0.2447664886713028, "learning_rate": 0.00019665387966991875, "loss": 11.6661, "step": 11862 }, { "epoch": 0.24832537888302772, "grad_norm": 0.27450108528137207, "learning_rate": 0.00019665331721711117, "loss": 11.6596, "step": 11863 }, { "epoch": 0.24834631164698986, "grad_norm": 0.2430989295244217, "learning_rate": 0.00019665275471784043, "loss": 11.6769, "step": 11864 }, { "epoch": 0.248367244410952, "grad_norm": 0.23288594186306, "learning_rate": 0.00019665219217210677, "loss": 11.6729, "step": 11865 }, { "epoch": 0.24838817717491418, "grad_norm": 0.2731831669807434, "learning_rate": 0.00019665162957991047, "loss": 11.6898, "step": 11866 }, { "epoch": 0.24840910993887633, "grad_norm": 0.23495209217071533, "learning_rate": 0.0001966510669412518, "loss": 11.6668, "step": 11867 }, { "epoch": 0.24843004270283847, "grad_norm": 0.20646880567073822, "learning_rate": 0.00019665050425613097, "loss": 11.6868, "step": 11868 }, { "epoch": 0.24845097546680064, "grad_norm": 0.25649192929267883, "learning_rate": 0.00019664994152454837, "loss": 11.695, "step": 11869 }, { "epoch": 0.2484719082307628, "grad_norm": 0.28709468245506287, "learning_rate": 0.0001966493787465042, "loss": 11.6666, "step": 11870 }, { "epoch": 0.24849284099472493, "grad_norm": 0.2678542137145996, "learning_rate": 0.0001966488159219987, "loss": 11.6881, "step": 11871 }, { "epoch": 0.2485137737586871, "grad_norm": 0.24079254269599915, "learning_rate": 0.0001966482530510322, "loss": 11.6729, "step": 11872 }, { "epoch": 0.24853470652264925, "grad_norm": 0.21543806791305542, "learning_rate": 0.00019664769013360496, "loss": 11.6744, "step": 11873 }, { "epoch": 0.2485556392866114, "grad_norm": 0.3567412197589874, "learning_rate": 0.00019664712716971724, "loss": 11.6722, "step": 11874 }, { "epoch": 0.24857657205057357, "grad_norm": 0.27642539143562317, "learning_rate": 0.0001966465641593693, "loss": 11.6738, "step": 11875 }, { "epoch": 0.2485975048145357, "grad_norm": 0.23284128308296204, "learning_rate": 0.0001966460011025614, "loss": 11.66, "step": 11876 }, { "epoch": 0.24861843757849786, "grad_norm": 0.4109097123146057, "learning_rate": 0.00019664543799929383, "loss": 11.6849, "step": 11877 }, { "epoch": 0.24863937034246003, "grad_norm": 0.28071272373199463, "learning_rate": 0.0001966448748495669, "loss": 11.6634, "step": 11878 }, { "epoch": 0.24866030310642218, "grad_norm": 0.23674754798412323, "learning_rate": 0.0001966443116533808, "loss": 11.687, "step": 11879 }, { "epoch": 0.24868123587038432, "grad_norm": 0.24766583740711212, "learning_rate": 0.00019664374841073587, "loss": 11.6689, "step": 11880 }, { "epoch": 0.24870216863434647, "grad_norm": 0.2297312170267105, "learning_rate": 0.00019664318512163233, "loss": 11.692, "step": 11881 }, { "epoch": 0.24872310139830864, "grad_norm": 0.3061063289642334, "learning_rate": 0.00019664262178607049, "loss": 11.6737, "step": 11882 }, { "epoch": 0.24874403416227078, "grad_norm": 0.30582183599472046, "learning_rate": 0.0001966420584040506, "loss": 11.6808, "step": 11883 }, { "epoch": 0.24876496692623293, "grad_norm": 0.2469683140516281, "learning_rate": 0.00019664149497557296, "loss": 11.6731, "step": 11884 }, { "epoch": 0.2487858996901951, "grad_norm": 0.2735876441001892, "learning_rate": 0.0001966409315006378, "loss": 11.6668, "step": 11885 }, { "epoch": 0.24880683245415725, "grad_norm": 0.2502857446670532, "learning_rate": 0.00019664036797924537, "loss": 11.6718, "step": 11886 }, { "epoch": 0.2488277652181194, "grad_norm": 0.34726884961128235, "learning_rate": 0.00019663980441139603, "loss": 11.6932, "step": 11887 }, { "epoch": 0.24884869798208156, "grad_norm": 0.23516424000263214, "learning_rate": 0.00019663924079708998, "loss": 11.6696, "step": 11888 }, { "epoch": 0.2488696307460437, "grad_norm": 0.2865477204322815, "learning_rate": 0.00019663867713632752, "loss": 11.677, "step": 11889 }, { "epoch": 0.24889056351000585, "grad_norm": 0.246050626039505, "learning_rate": 0.00019663811342910886, "loss": 11.6767, "step": 11890 }, { "epoch": 0.24891149627396802, "grad_norm": 0.3352477252483368, "learning_rate": 0.0001966375496754344, "loss": 11.6704, "step": 11891 }, { "epoch": 0.24893242903793017, "grad_norm": 0.360144704580307, "learning_rate": 0.00019663698587530428, "loss": 11.6681, "step": 11892 }, { "epoch": 0.24895336180189231, "grad_norm": 0.3132997751235962, "learning_rate": 0.00019663642202871886, "loss": 11.6713, "step": 11893 }, { "epoch": 0.2489742945658545, "grad_norm": 0.2513323426246643, "learning_rate": 0.00019663585813567834, "loss": 11.6646, "step": 11894 }, { "epoch": 0.24899522732981663, "grad_norm": 0.224949449300766, "learning_rate": 0.00019663529419618306, "loss": 11.6842, "step": 11895 }, { "epoch": 0.24901616009377878, "grad_norm": 0.24788399040699005, "learning_rate": 0.00019663473021023324, "loss": 11.6637, "step": 11896 }, { "epoch": 0.24903709285774095, "grad_norm": 0.26778650283813477, "learning_rate": 0.00019663416617782918, "loss": 11.696, "step": 11897 }, { "epoch": 0.2490580256217031, "grad_norm": 0.24674098193645477, "learning_rate": 0.00019663360209897112, "loss": 11.6716, "step": 11898 }, { "epoch": 0.24907895838566524, "grad_norm": 0.2150164544582367, "learning_rate": 0.00019663303797365936, "loss": 11.677, "step": 11899 }, { "epoch": 0.24909989114962738, "grad_norm": 0.2059495896100998, "learning_rate": 0.00019663247380189415, "loss": 11.6673, "step": 11900 }, { "epoch": 0.24912082391358956, "grad_norm": 0.2482273131608963, "learning_rate": 0.00019663190958367578, "loss": 11.6804, "step": 11901 }, { "epoch": 0.2491417566775517, "grad_norm": 0.23661772906780243, "learning_rate": 0.00019663134531900452, "loss": 11.6766, "step": 11902 }, { "epoch": 0.24916268944151385, "grad_norm": 0.2537512183189392, "learning_rate": 0.00019663078100788065, "loss": 11.6869, "step": 11903 }, { "epoch": 0.24918362220547602, "grad_norm": 0.19385318458080292, "learning_rate": 0.0001966302166503044, "loss": 11.6672, "step": 11904 }, { "epoch": 0.24920455496943816, "grad_norm": 0.2845446765422821, "learning_rate": 0.0001966296522462761, "loss": 11.6728, "step": 11905 }, { "epoch": 0.2492254877334003, "grad_norm": 0.23652338981628418, "learning_rate": 0.00019662908779579597, "loss": 11.6696, "step": 11906 }, { "epoch": 0.24924642049736248, "grad_norm": 0.2466815561056137, "learning_rate": 0.00019662852329886427, "loss": 11.6863, "step": 11907 }, { "epoch": 0.24926735326132463, "grad_norm": 0.2910439074039459, "learning_rate": 0.00019662795875548134, "loss": 11.6915, "step": 11908 }, { "epoch": 0.24928828602528677, "grad_norm": 0.2728576362133026, "learning_rate": 0.00019662739416564741, "loss": 11.6871, "step": 11909 }, { "epoch": 0.24930921878924894, "grad_norm": 0.24180187284946442, "learning_rate": 0.00019662682952936277, "loss": 11.6764, "step": 11910 }, { "epoch": 0.2493301515532111, "grad_norm": 0.3483361303806305, "learning_rate": 0.00019662626484662766, "loss": 11.6915, "step": 11911 }, { "epoch": 0.24935108431717323, "grad_norm": 0.3616599440574646, "learning_rate": 0.00019662570011744234, "loss": 11.6956, "step": 11912 }, { "epoch": 0.2493720170811354, "grad_norm": 0.26405641436576843, "learning_rate": 0.00019662513534180715, "loss": 11.6782, "step": 11913 }, { "epoch": 0.24939294984509755, "grad_norm": 0.28749507665634155, "learning_rate": 0.00019662457051972232, "loss": 11.6754, "step": 11914 }, { "epoch": 0.2494138826090597, "grad_norm": 0.26105955243110657, "learning_rate": 0.0001966240056511881, "loss": 11.6712, "step": 11915 }, { "epoch": 0.24943481537302187, "grad_norm": 0.3751121461391449, "learning_rate": 0.0001966234407362048, "loss": 11.6998, "step": 11916 }, { "epoch": 0.249455748136984, "grad_norm": 0.2497335970401764, "learning_rate": 0.00019662287577477266, "loss": 11.6837, "step": 11917 }, { "epoch": 0.24947668090094616, "grad_norm": 0.2447662353515625, "learning_rate": 0.00019662231076689199, "loss": 11.6902, "step": 11918 }, { "epoch": 0.2494976136649083, "grad_norm": 0.250700980424881, "learning_rate": 0.00019662174571256303, "loss": 11.6843, "step": 11919 }, { "epoch": 0.24951854642887047, "grad_norm": 0.23504787683486938, "learning_rate": 0.00019662118061178604, "loss": 11.6713, "step": 11920 }, { "epoch": 0.24953947919283262, "grad_norm": 0.27315008640289307, "learning_rate": 0.00019662061546456134, "loss": 11.6799, "step": 11921 }, { "epoch": 0.24956041195679476, "grad_norm": 0.3307650685310364, "learning_rate": 0.00019662005027088917, "loss": 11.687, "step": 11922 }, { "epoch": 0.24958134472075694, "grad_norm": 0.2879869043827057, "learning_rate": 0.0001966194850307698, "loss": 11.6928, "step": 11923 }, { "epoch": 0.24960227748471908, "grad_norm": 0.22695280611515045, "learning_rate": 0.00019661891974420353, "loss": 11.6658, "step": 11924 }, { "epoch": 0.24962321024868123, "grad_norm": 0.29927903413772583, "learning_rate": 0.00019661835441119057, "loss": 11.6758, "step": 11925 }, { "epoch": 0.2496441430126434, "grad_norm": 1.8214473724365234, "learning_rate": 0.00019661778903173126, "loss": 11.6525, "step": 11926 }, { "epoch": 0.24966507577660554, "grad_norm": 0.2973375618457794, "learning_rate": 0.0001966172236058258, "loss": 11.6721, "step": 11927 }, { "epoch": 0.2496860085405677, "grad_norm": 0.22865255177021027, "learning_rate": 0.00019661665813347455, "loss": 11.6641, "step": 11928 }, { "epoch": 0.24970694130452986, "grad_norm": 0.24373213946819305, "learning_rate": 0.00019661609261467772, "loss": 11.6833, "step": 11929 }, { "epoch": 0.249727874068492, "grad_norm": 0.24886257946491241, "learning_rate": 0.0001966155270494356, "loss": 11.695, "step": 11930 }, { "epoch": 0.24974880683245415, "grad_norm": 0.2859753370285034, "learning_rate": 0.00019661496143774844, "loss": 11.6824, "step": 11931 }, { "epoch": 0.24976973959641632, "grad_norm": 0.2675885856151581, "learning_rate": 0.00019661439577961656, "loss": 11.6781, "step": 11932 }, { "epoch": 0.24979067236037847, "grad_norm": 0.25712910294532776, "learning_rate": 0.0001966138300750402, "loss": 11.6748, "step": 11933 }, { "epoch": 0.2498116051243406, "grad_norm": 0.2467389702796936, "learning_rate": 0.00019661326432401962, "loss": 11.69, "step": 11934 }, { "epoch": 0.24983253788830276, "grad_norm": 0.26075059175491333, "learning_rate": 0.0001966126985265551, "loss": 11.6987, "step": 11935 }, { "epoch": 0.24985347065226493, "grad_norm": 0.2693171203136444, "learning_rate": 0.00019661213268264694, "loss": 11.6656, "step": 11936 }, { "epoch": 0.24987440341622708, "grad_norm": 0.23881745338439941, "learning_rate": 0.0001966115667922954, "loss": 11.676, "step": 11937 }, { "epoch": 0.24989533618018922, "grad_norm": 0.24120689928531647, "learning_rate": 0.00019661100085550072, "loss": 11.6758, "step": 11938 }, { "epoch": 0.2499162689441514, "grad_norm": 0.2556830048561096, "learning_rate": 0.00019661043487226323, "loss": 11.6762, "step": 11939 }, { "epoch": 0.24993720170811354, "grad_norm": 0.26973026990890503, "learning_rate": 0.00019660986884258313, "loss": 11.6807, "step": 11940 }, { "epoch": 0.24995813447207568, "grad_norm": 0.25508809089660645, "learning_rate": 0.00019660930276646073, "loss": 11.6868, "step": 11941 }, { "epoch": 0.24997906723603786, "grad_norm": 0.24143126606941223, "learning_rate": 0.00019660873664389633, "loss": 11.6932, "step": 11942 }, { "epoch": 0.25, "grad_norm": 0.2854894697666168, "learning_rate": 0.00019660817047489014, "loss": 11.6868, "step": 11943 }, { "epoch": 0.25002093276396214, "grad_norm": 0.21545343101024628, "learning_rate": 0.00019660760425944251, "loss": 11.6724, "step": 11944 }, { "epoch": 0.2500418655279243, "grad_norm": 0.3442021310329437, "learning_rate": 0.00019660703799755365, "loss": 11.6833, "step": 11945 }, { "epoch": 0.25006279829188643, "grad_norm": 0.2886117696762085, "learning_rate": 0.00019660647168922383, "loss": 11.6779, "step": 11946 }, { "epoch": 0.25008373105584863, "grad_norm": 0.3026919364929199, "learning_rate": 0.00019660590533445338, "loss": 11.6779, "step": 11947 }, { "epoch": 0.2501046638198108, "grad_norm": 0.4184858798980713, "learning_rate": 0.0001966053389332425, "loss": 11.6946, "step": 11948 }, { "epoch": 0.2501255965837729, "grad_norm": 0.23047201335430145, "learning_rate": 0.00019660477248559152, "loss": 11.6658, "step": 11949 }, { "epoch": 0.25014652934773507, "grad_norm": 0.32056954503059387, "learning_rate": 0.0001966042059915007, "loss": 11.6838, "step": 11950 }, { "epoch": 0.2501674621116972, "grad_norm": 0.26506999135017395, "learning_rate": 0.00019660363945097026, "loss": 11.6843, "step": 11951 }, { "epoch": 0.25018839487565936, "grad_norm": 0.2619359791278839, "learning_rate": 0.00019660307286400056, "loss": 11.6941, "step": 11952 }, { "epoch": 0.25020932763962156, "grad_norm": 0.2967762053012848, "learning_rate": 0.0001966025062305918, "loss": 11.6894, "step": 11953 }, { "epoch": 0.2502302604035837, "grad_norm": 0.2944711148738861, "learning_rate": 0.00019660193955074428, "loss": 11.6904, "step": 11954 }, { "epoch": 0.25025119316754585, "grad_norm": 0.261514812707901, "learning_rate": 0.0001966013728244583, "loss": 11.6702, "step": 11955 }, { "epoch": 0.250272125931508, "grad_norm": 0.2623773217201233, "learning_rate": 0.00019660080605173405, "loss": 11.6567, "step": 11956 }, { "epoch": 0.25029305869547014, "grad_norm": 0.3147634267807007, "learning_rate": 0.0001966002392325719, "loss": 11.6525, "step": 11957 }, { "epoch": 0.2503139914594323, "grad_norm": 0.336752712726593, "learning_rate": 0.00019659967236697207, "loss": 11.6736, "step": 11958 }, { "epoch": 0.2503349242233945, "grad_norm": 0.2872534990310669, "learning_rate": 0.00019659910545493484, "loss": 11.6724, "step": 11959 }, { "epoch": 0.25035585698735663, "grad_norm": 0.22876682877540588, "learning_rate": 0.00019659853849646048, "loss": 11.6628, "step": 11960 }, { "epoch": 0.2503767897513188, "grad_norm": 0.30457866191864014, "learning_rate": 0.00019659797149154925, "loss": 11.684, "step": 11961 }, { "epoch": 0.2503977225152809, "grad_norm": 0.28462445735931396, "learning_rate": 0.00019659740444020148, "loss": 11.6787, "step": 11962 }, { "epoch": 0.25041865527924306, "grad_norm": 0.24939046800136566, "learning_rate": 0.00019659683734241738, "loss": 11.6757, "step": 11963 }, { "epoch": 0.2504395880432052, "grad_norm": 0.27380654215812683, "learning_rate": 0.00019659627019819723, "loss": 11.6813, "step": 11964 }, { "epoch": 0.25046052080716735, "grad_norm": 0.2512611448764801, "learning_rate": 0.00019659570300754135, "loss": 11.6709, "step": 11965 }, { "epoch": 0.25048145357112955, "grad_norm": 0.3238244354724884, "learning_rate": 0.00019659513577044996, "loss": 11.6821, "step": 11966 }, { "epoch": 0.2505023863350917, "grad_norm": 0.29398369789123535, "learning_rate": 0.00019659456848692336, "loss": 11.6763, "step": 11967 }, { "epoch": 0.25052331909905384, "grad_norm": 0.30844563245773315, "learning_rate": 0.0001965940011569618, "loss": 11.6698, "step": 11968 }, { "epoch": 0.250544251863016, "grad_norm": 0.2107807993888855, "learning_rate": 0.0001965934337805656, "loss": 11.6575, "step": 11969 }, { "epoch": 0.25056518462697813, "grad_norm": 0.24427436292171478, "learning_rate": 0.00019659286635773495, "loss": 11.6716, "step": 11970 }, { "epoch": 0.2505861173909403, "grad_norm": 0.2706422209739685, "learning_rate": 0.00019659229888847023, "loss": 11.6778, "step": 11971 }, { "epoch": 0.2506070501549025, "grad_norm": 0.28295668959617615, "learning_rate": 0.0001965917313727716, "loss": 11.6864, "step": 11972 }, { "epoch": 0.2506279829188646, "grad_norm": 0.22580082714557648, "learning_rate": 0.00019659116381063943, "loss": 11.6852, "step": 11973 }, { "epoch": 0.25064891568282677, "grad_norm": 0.29348739981651306, "learning_rate": 0.00019659059620207394, "loss": 11.6512, "step": 11974 }, { "epoch": 0.2506698484467889, "grad_norm": 0.2550048232078552, "learning_rate": 0.00019659002854707542, "loss": 11.6979, "step": 11975 }, { "epoch": 0.25069078121075106, "grad_norm": 0.2672947645187378, "learning_rate": 0.00019658946084564414, "loss": 11.6877, "step": 11976 }, { "epoch": 0.2507117139747132, "grad_norm": 0.31000015139579773, "learning_rate": 0.00019658889309778034, "loss": 11.6921, "step": 11977 }, { "epoch": 0.2507326467386754, "grad_norm": 0.25692248344421387, "learning_rate": 0.00019658832530348435, "loss": 11.6813, "step": 11978 }, { "epoch": 0.25075357950263755, "grad_norm": 0.23392991721630096, "learning_rate": 0.0001965877574627564, "loss": 11.6814, "step": 11979 }, { "epoch": 0.2507745122665997, "grad_norm": 0.23133349418640137, "learning_rate": 0.0001965871895755968, "loss": 11.6662, "step": 11980 }, { "epoch": 0.25079544503056184, "grad_norm": 0.27797046303749084, "learning_rate": 0.0001965866216420058, "loss": 11.6775, "step": 11981 }, { "epoch": 0.250816377794524, "grad_norm": 0.222503662109375, "learning_rate": 0.00019658605366198366, "loss": 11.6636, "step": 11982 }, { "epoch": 0.2508373105584861, "grad_norm": 0.3286615014076233, "learning_rate": 0.0001965854856355307, "loss": 11.6879, "step": 11983 }, { "epoch": 0.25085824332244827, "grad_norm": 0.25007402896881104, "learning_rate": 0.00019658491756264712, "loss": 11.69, "step": 11984 }, { "epoch": 0.25087917608641047, "grad_norm": 0.24677610397338867, "learning_rate": 0.00019658434944333327, "loss": 11.6781, "step": 11985 }, { "epoch": 0.2509001088503726, "grad_norm": 0.23942257463932037, "learning_rate": 0.00019658378127758935, "loss": 11.678, "step": 11986 }, { "epoch": 0.25092104161433476, "grad_norm": 0.27354103326797485, "learning_rate": 0.0001965832130654157, "loss": 11.6603, "step": 11987 }, { "epoch": 0.2509419743782969, "grad_norm": 0.25563663244247437, "learning_rate": 0.00019658264480681257, "loss": 11.6835, "step": 11988 }, { "epoch": 0.25096290714225905, "grad_norm": 0.3111850619316101, "learning_rate": 0.0001965820765017802, "loss": 11.6714, "step": 11989 }, { "epoch": 0.2509838399062212, "grad_norm": 0.27756908535957336, "learning_rate": 0.0001965815081503189, "loss": 11.6824, "step": 11990 }, { "epoch": 0.2510047726701834, "grad_norm": 0.27214333415031433, "learning_rate": 0.00019658093975242892, "loss": 11.6663, "step": 11991 }, { "epoch": 0.25102570543414554, "grad_norm": 0.2632726728916168, "learning_rate": 0.00019658037130811058, "loss": 11.6852, "step": 11992 }, { "epoch": 0.2510466381981077, "grad_norm": 0.28555458784103394, "learning_rate": 0.0001965798028173641, "loss": 11.68, "step": 11993 }, { "epoch": 0.25106757096206983, "grad_norm": 0.2426946461200714, "learning_rate": 0.00019657923428018976, "loss": 11.6912, "step": 11994 }, { "epoch": 0.251088503726032, "grad_norm": 0.7140348553657532, "learning_rate": 0.00019657866569658787, "loss": 11.6396, "step": 11995 }, { "epoch": 0.2511094364899941, "grad_norm": 0.23569759726524353, "learning_rate": 0.00019657809706655866, "loss": 11.6796, "step": 11996 }, { "epoch": 0.25113036925395626, "grad_norm": 0.29882675409317017, "learning_rate": 0.00019657752839010245, "loss": 11.6799, "step": 11997 }, { "epoch": 0.25115130201791847, "grad_norm": 0.2276460826396942, "learning_rate": 0.00019657695966721948, "loss": 11.6698, "step": 11998 }, { "epoch": 0.2511722347818806, "grad_norm": 0.27277663350105286, "learning_rate": 0.00019657639089791003, "loss": 11.672, "step": 11999 }, { "epoch": 0.25119316754584275, "grad_norm": 0.27379587292671204, "learning_rate": 0.00019657582208217436, "loss": 11.6662, "step": 12000 }, { "epoch": 0.25119316754584275, "eval_loss": 11.677458763122559, "eval_runtime": 34.2666, "eval_samples_per_second": 28.045, "eval_steps_per_second": 7.033, "step": 12000 }, { "epoch": 0.2512141003098049, "grad_norm": 0.29923614859580994, "learning_rate": 0.00019657525322001277, "loss": 11.6941, "step": 12001 }, { "epoch": 0.25123503307376704, "grad_norm": 0.21347977221012115, "learning_rate": 0.0001965746843114255, "loss": 11.6614, "step": 12002 }, { "epoch": 0.2512559658377292, "grad_norm": 0.25064828991889954, "learning_rate": 0.00019657411535641285, "loss": 11.6768, "step": 12003 }, { "epoch": 0.2512768986016914, "grad_norm": 0.23598706722259521, "learning_rate": 0.0001965735463549751, "loss": 11.6916, "step": 12004 }, { "epoch": 0.25129783136565353, "grad_norm": 0.29582273960113525, "learning_rate": 0.00019657297730711252, "loss": 11.6751, "step": 12005 }, { "epoch": 0.2513187641296157, "grad_norm": 0.30073654651641846, "learning_rate": 0.00019657240821282533, "loss": 11.676, "step": 12006 }, { "epoch": 0.2513396968935778, "grad_norm": 0.38365861773490906, "learning_rate": 0.00019657183907211389, "loss": 11.6795, "step": 12007 }, { "epoch": 0.25136062965753997, "grad_norm": 0.26007649302482605, "learning_rate": 0.00019657126988497842, "loss": 11.662, "step": 12008 }, { "epoch": 0.2513815624215021, "grad_norm": 0.31536269187927246, "learning_rate": 0.0001965707006514192, "loss": 11.6604, "step": 12009 }, { "epoch": 0.2514024951854643, "grad_norm": 0.25386306643486023, "learning_rate": 0.00019657013137143654, "loss": 11.6724, "step": 12010 }, { "epoch": 0.25142342794942646, "grad_norm": 0.24007108807563782, "learning_rate": 0.00019656956204503064, "loss": 11.6849, "step": 12011 }, { "epoch": 0.2514443607133886, "grad_norm": 0.3737369179725647, "learning_rate": 0.00019656899267220186, "loss": 11.6836, "step": 12012 }, { "epoch": 0.25146529347735075, "grad_norm": 0.250805139541626, "learning_rate": 0.00019656842325295037, "loss": 11.6739, "step": 12013 }, { "epoch": 0.2514862262413129, "grad_norm": 0.2817780673503876, "learning_rate": 0.00019656785378727656, "loss": 11.6856, "step": 12014 }, { "epoch": 0.25150715900527504, "grad_norm": 0.26386216282844543, "learning_rate": 0.00019656728427518063, "loss": 11.6807, "step": 12015 }, { "epoch": 0.2515280917692372, "grad_norm": 0.2634202241897583, "learning_rate": 0.00019656671471666285, "loss": 11.6898, "step": 12016 }, { "epoch": 0.2515490245331994, "grad_norm": 0.2127254456281662, "learning_rate": 0.00019656614511172354, "loss": 11.6825, "step": 12017 }, { "epoch": 0.25156995729716153, "grad_norm": 0.23605747520923615, "learning_rate": 0.00019656557546036293, "loss": 11.6606, "step": 12018 }, { "epoch": 0.2515908900611237, "grad_norm": 0.24622870981693268, "learning_rate": 0.00019656500576258133, "loss": 11.6731, "step": 12019 }, { "epoch": 0.2516118228250858, "grad_norm": 0.2796400785446167, "learning_rate": 0.00019656443601837896, "loss": 11.6684, "step": 12020 }, { "epoch": 0.25163275558904796, "grad_norm": 0.26963964104652405, "learning_rate": 0.00019656386622775619, "loss": 11.671, "step": 12021 }, { "epoch": 0.2516536883530101, "grad_norm": 0.2512536644935608, "learning_rate": 0.00019656329639071318, "loss": 11.6832, "step": 12022 }, { "epoch": 0.2516746211169723, "grad_norm": 0.2879258692264557, "learning_rate": 0.0001965627265072503, "loss": 11.6756, "step": 12023 }, { "epoch": 0.25169555388093445, "grad_norm": 0.26550719141960144, "learning_rate": 0.00019656215657736775, "loss": 11.6811, "step": 12024 }, { "epoch": 0.2517164866448966, "grad_norm": 0.26815065741539, "learning_rate": 0.00019656158660106586, "loss": 11.6884, "step": 12025 }, { "epoch": 0.25173741940885874, "grad_norm": 0.34055766463279724, "learning_rate": 0.00019656101657834486, "loss": 11.6652, "step": 12026 }, { "epoch": 0.2517583521728209, "grad_norm": 0.24174034595489502, "learning_rate": 0.00019656044650920507, "loss": 11.672, "step": 12027 }, { "epoch": 0.25177928493678303, "grad_norm": 0.26138484477996826, "learning_rate": 0.0001965598763936467, "loss": 11.6761, "step": 12028 }, { "epoch": 0.25180021770074523, "grad_norm": 0.21932761371135712, "learning_rate": 0.00019655930623167008, "loss": 11.664, "step": 12029 }, { "epoch": 0.2518211504647074, "grad_norm": 0.30639684200286865, "learning_rate": 0.0001965587360232755, "loss": 11.6858, "step": 12030 }, { "epoch": 0.2518420832286695, "grad_norm": 0.31469985842704773, "learning_rate": 0.00019655816576846316, "loss": 11.6759, "step": 12031 }, { "epoch": 0.25186301599263167, "grad_norm": 0.22477993369102478, "learning_rate": 0.0001965575954672334, "loss": 11.67, "step": 12032 }, { "epoch": 0.2518839487565938, "grad_norm": 0.23885400593280792, "learning_rate": 0.00019655702511958646, "loss": 11.6885, "step": 12033 }, { "epoch": 0.25190488152055596, "grad_norm": 0.3214360475540161, "learning_rate": 0.0001965564547255226, "loss": 11.6793, "step": 12034 }, { "epoch": 0.2519258142845181, "grad_norm": 0.24009378254413605, "learning_rate": 0.00019655588428504211, "loss": 11.6693, "step": 12035 }, { "epoch": 0.2519467470484803, "grad_norm": 0.2244756668806076, "learning_rate": 0.00019655531379814532, "loss": 11.6769, "step": 12036 }, { "epoch": 0.25196767981244245, "grad_norm": 0.2633388638496399, "learning_rate": 0.00019655474326483243, "loss": 11.6799, "step": 12037 }, { "epoch": 0.2519886125764046, "grad_norm": 0.26673001050949097, "learning_rate": 0.00019655417268510375, "loss": 11.6885, "step": 12038 }, { "epoch": 0.25200954534036674, "grad_norm": 0.25771263241767883, "learning_rate": 0.00019655360205895952, "loss": 11.6713, "step": 12039 }, { "epoch": 0.2520304781043289, "grad_norm": 0.3588361144065857, "learning_rate": 0.00019655303138640007, "loss": 11.6773, "step": 12040 }, { "epoch": 0.252051410868291, "grad_norm": 0.22699368000030518, "learning_rate": 0.00019655246066742563, "loss": 11.6632, "step": 12041 }, { "epoch": 0.2520723436322532, "grad_norm": 0.22650837898254395, "learning_rate": 0.00019655188990203647, "loss": 11.6753, "step": 12042 }, { "epoch": 0.25209327639621537, "grad_norm": 0.29995211958885193, "learning_rate": 0.0001965513190902329, "loss": 11.6821, "step": 12043 }, { "epoch": 0.2521142091601775, "grad_norm": 0.2651486098766327, "learning_rate": 0.00019655074823201517, "loss": 11.6845, "step": 12044 }, { "epoch": 0.25213514192413966, "grad_norm": 0.2634795606136322, "learning_rate": 0.00019655017732738358, "loss": 11.6571, "step": 12045 }, { "epoch": 0.2521560746881018, "grad_norm": 0.2897833287715912, "learning_rate": 0.00019654960637633835, "loss": 11.691, "step": 12046 }, { "epoch": 0.25217700745206395, "grad_norm": 0.30205950140953064, "learning_rate": 0.00019654903537887983, "loss": 11.6744, "step": 12047 }, { "epoch": 0.25219794021602615, "grad_norm": 0.2801465392112732, "learning_rate": 0.00019654846433500822, "loss": 11.6779, "step": 12048 }, { "epoch": 0.2522188729799883, "grad_norm": 0.2843897044658661, "learning_rate": 0.00019654789324472383, "loss": 11.6769, "step": 12049 }, { "epoch": 0.25223980574395044, "grad_norm": 4.74504280090332, "learning_rate": 0.00019654732210802695, "loss": 11.5806, "step": 12050 }, { "epoch": 0.2522607385079126, "grad_norm": 0.22202050685882568, "learning_rate": 0.00019654675092491784, "loss": 11.6695, "step": 12051 }, { "epoch": 0.25228167127187473, "grad_norm": 0.23425862193107605, "learning_rate": 0.00019654617969539677, "loss": 11.6801, "step": 12052 }, { "epoch": 0.2523026040358369, "grad_norm": 0.30977708101272583, "learning_rate": 0.000196545608419464, "loss": 11.6864, "step": 12053 }, { "epoch": 0.252323536799799, "grad_norm": 0.2609964609146118, "learning_rate": 0.00019654503709711982, "loss": 11.6865, "step": 12054 }, { "epoch": 0.2523444695637612, "grad_norm": 0.18954463303089142, "learning_rate": 0.0001965444657283645, "loss": 11.6822, "step": 12055 }, { "epoch": 0.25236540232772336, "grad_norm": 0.23445038497447968, "learning_rate": 0.00019654389431319834, "loss": 11.6966, "step": 12056 }, { "epoch": 0.2523863350916855, "grad_norm": 0.24851103127002716, "learning_rate": 0.0001965433228516216, "loss": 11.6633, "step": 12057 }, { "epoch": 0.25240726785564765, "grad_norm": 0.24143555760383606, "learning_rate": 0.00019654275134363457, "loss": 11.6769, "step": 12058 }, { "epoch": 0.2524282006196098, "grad_norm": 0.22207093238830566, "learning_rate": 0.00019654217978923747, "loss": 11.67, "step": 12059 }, { "epoch": 0.25244913338357194, "grad_norm": 0.22969593107700348, "learning_rate": 0.00019654160818843058, "loss": 11.6787, "step": 12060 }, { "epoch": 0.25247006614753414, "grad_norm": 0.3829209804534912, "learning_rate": 0.00019654103654121428, "loss": 11.6843, "step": 12061 }, { "epoch": 0.2524909989114963, "grad_norm": 0.2247435748577118, "learning_rate": 0.0001965404648475887, "loss": 11.6796, "step": 12062 }, { "epoch": 0.25251193167545843, "grad_norm": 0.22245445847511292, "learning_rate": 0.00019653989310755422, "loss": 11.6673, "step": 12063 }, { "epoch": 0.2525328644394206, "grad_norm": 0.2295873463153839, "learning_rate": 0.00019653932132111107, "loss": 11.6657, "step": 12064 }, { "epoch": 0.2525537972033827, "grad_norm": 0.25235429406166077, "learning_rate": 0.00019653874948825952, "loss": 11.6746, "step": 12065 }, { "epoch": 0.25257472996734487, "grad_norm": 0.2641257345676422, "learning_rate": 0.0001965381776089999, "loss": 11.6653, "step": 12066 }, { "epoch": 0.25259566273130707, "grad_norm": 0.33377787470817566, "learning_rate": 0.0001965376056833324, "loss": 11.6785, "step": 12067 }, { "epoch": 0.2526165954952692, "grad_norm": 0.22384971380233765, "learning_rate": 0.00019653703371125736, "loss": 11.654, "step": 12068 }, { "epoch": 0.25263752825923136, "grad_norm": 0.26513999700546265, "learning_rate": 0.00019653646169277505, "loss": 11.6711, "step": 12069 }, { "epoch": 0.2526584610231935, "grad_norm": 0.2216508686542511, "learning_rate": 0.00019653588962788567, "loss": 11.6706, "step": 12070 }, { "epoch": 0.25267939378715565, "grad_norm": 0.2893613576889038, "learning_rate": 0.00019653531751658961, "loss": 11.6766, "step": 12071 }, { "epoch": 0.2527003265511178, "grad_norm": 0.2417161762714386, "learning_rate": 0.00019653474535888705, "loss": 11.6672, "step": 12072 }, { "epoch": 0.25272125931507994, "grad_norm": 0.303437739610672, "learning_rate": 0.00019653417315477833, "loss": 11.6835, "step": 12073 }, { "epoch": 0.25274219207904214, "grad_norm": 0.2508098781108856, "learning_rate": 0.0001965336009042637, "loss": 11.6899, "step": 12074 }, { "epoch": 0.2527631248430043, "grad_norm": 0.22651426494121552, "learning_rate": 0.00019653302860734342, "loss": 11.6814, "step": 12075 }, { "epoch": 0.25278405760696643, "grad_norm": 0.26861777901649475, "learning_rate": 0.0001965324562640178, "loss": 11.6773, "step": 12076 }, { "epoch": 0.2528049903709286, "grad_norm": 0.2419339269399643, "learning_rate": 0.00019653188387428708, "loss": 11.6718, "step": 12077 }, { "epoch": 0.2528259231348907, "grad_norm": 0.23719428479671478, "learning_rate": 0.0001965313114381515, "loss": 11.6752, "step": 12078 }, { "epoch": 0.25284685589885286, "grad_norm": 0.29526785016059875, "learning_rate": 0.00019653073895561145, "loss": 11.672, "step": 12079 }, { "epoch": 0.25286778866281506, "grad_norm": 0.26029691100120544, "learning_rate": 0.00019653016642666712, "loss": 11.665, "step": 12080 }, { "epoch": 0.2528887214267772, "grad_norm": 0.318708598613739, "learning_rate": 0.0001965295938513188, "loss": 11.6927, "step": 12081 }, { "epoch": 0.25290965419073935, "grad_norm": 0.2874426543712616, "learning_rate": 0.00019652902122956677, "loss": 11.6916, "step": 12082 }, { "epoch": 0.2529305869547015, "grad_norm": 0.4144330322742462, "learning_rate": 0.0001965284485614113, "loss": 11.6944, "step": 12083 }, { "epoch": 0.25295151971866364, "grad_norm": 0.2689844071865082, "learning_rate": 0.0001965278758468527, "loss": 11.6959, "step": 12084 }, { "epoch": 0.2529724524826258, "grad_norm": 0.2793649435043335, "learning_rate": 0.00019652730308589118, "loss": 11.6897, "step": 12085 }, { "epoch": 0.25299338524658793, "grad_norm": 0.2215295135974884, "learning_rate": 0.00019652673027852706, "loss": 11.6765, "step": 12086 }, { "epoch": 0.25301431801055013, "grad_norm": 0.23835480213165283, "learning_rate": 0.00019652615742476063, "loss": 11.6706, "step": 12087 }, { "epoch": 0.2530352507745123, "grad_norm": 0.25451338291168213, "learning_rate": 0.0001965255845245921, "loss": 11.678, "step": 12088 }, { "epoch": 0.2530561835384744, "grad_norm": 0.3124629855155945, "learning_rate": 0.00019652501157802182, "loss": 11.689, "step": 12089 }, { "epoch": 0.25307711630243657, "grad_norm": 0.26246610283851624, "learning_rate": 0.00019652443858505004, "loss": 11.6668, "step": 12090 }, { "epoch": 0.2530980490663987, "grad_norm": 0.2307569533586502, "learning_rate": 0.00019652386554567698, "loss": 11.6781, "step": 12091 }, { "epoch": 0.25311898183036086, "grad_norm": 0.2279043346643448, "learning_rate": 0.000196523292459903, "loss": 11.6629, "step": 12092 }, { "epoch": 0.25313991459432306, "grad_norm": 0.2523033320903778, "learning_rate": 0.00019652271932772832, "loss": 11.6749, "step": 12093 }, { "epoch": 0.2531608473582852, "grad_norm": 0.23165324330329895, "learning_rate": 0.00019652214614915327, "loss": 11.6716, "step": 12094 }, { "epoch": 0.25318178012224735, "grad_norm": 0.24081730842590332, "learning_rate": 0.00019652157292417805, "loss": 11.6667, "step": 12095 }, { "epoch": 0.2532027128862095, "grad_norm": 0.25012776255607605, "learning_rate": 0.000196520999652803, "loss": 11.6775, "step": 12096 }, { "epoch": 0.25322364565017164, "grad_norm": 0.31013959646224976, "learning_rate": 0.00019652042633502837, "loss": 11.665, "step": 12097 }, { "epoch": 0.2532445784141338, "grad_norm": 0.23800714313983917, "learning_rate": 0.00019651985297085444, "loss": 11.6743, "step": 12098 }, { "epoch": 0.253265511178096, "grad_norm": 0.2873944044113159, "learning_rate": 0.00019651927956028145, "loss": 11.6655, "step": 12099 }, { "epoch": 0.2532864439420581, "grad_norm": 0.2942279577255249, "learning_rate": 0.00019651870610330974, "loss": 11.6682, "step": 12100 }, { "epoch": 0.25330737670602027, "grad_norm": 0.2202378511428833, "learning_rate": 0.00019651813259993954, "loss": 11.6859, "step": 12101 }, { "epoch": 0.2533283094699824, "grad_norm": 0.26201295852661133, "learning_rate": 0.00019651755905017115, "loss": 11.6626, "step": 12102 }, { "epoch": 0.25334924223394456, "grad_norm": 0.2599363923072815, "learning_rate": 0.00019651698545400482, "loss": 11.6849, "step": 12103 }, { "epoch": 0.2533701749979067, "grad_norm": 0.23080633580684662, "learning_rate": 0.00019651641181144084, "loss": 11.6944, "step": 12104 }, { "epoch": 0.25339110776186885, "grad_norm": 0.2534674406051636, "learning_rate": 0.0001965158381224795, "loss": 11.6959, "step": 12105 }, { "epoch": 0.25341204052583105, "grad_norm": 0.3147134780883789, "learning_rate": 0.00019651526438712108, "loss": 11.6753, "step": 12106 }, { "epoch": 0.2534329732897932, "grad_norm": 0.2400529682636261, "learning_rate": 0.0001965146906053658, "loss": 11.676, "step": 12107 }, { "epoch": 0.25345390605375534, "grad_norm": 0.2620142102241516, "learning_rate": 0.000196514116777214, "loss": 11.6913, "step": 12108 }, { "epoch": 0.2534748388177175, "grad_norm": 0.3007015585899353, "learning_rate": 0.00019651354290266593, "loss": 11.6757, "step": 12109 }, { "epoch": 0.25349577158167963, "grad_norm": 0.24164824187755585, "learning_rate": 0.00019651296898172185, "loss": 11.6931, "step": 12110 }, { "epoch": 0.2535167043456418, "grad_norm": 0.2966882288455963, "learning_rate": 0.00019651239501438205, "loss": 11.6762, "step": 12111 }, { "epoch": 0.253537637109604, "grad_norm": 0.25251492857933044, "learning_rate": 0.00019651182100064682, "loss": 11.6862, "step": 12112 }, { "epoch": 0.2535585698735661, "grad_norm": 0.2654712200164795, "learning_rate": 0.00019651124694051642, "loss": 11.6694, "step": 12113 }, { "epoch": 0.25357950263752826, "grad_norm": 0.32955241203308105, "learning_rate": 0.0001965106728339911, "loss": 11.6698, "step": 12114 }, { "epoch": 0.2536004354014904, "grad_norm": 0.25761929154396057, "learning_rate": 0.0001965100986810712, "loss": 11.6803, "step": 12115 }, { "epoch": 0.25362136816545255, "grad_norm": 0.24981547892093658, "learning_rate": 0.00019650952448175697, "loss": 11.6762, "step": 12116 }, { "epoch": 0.2536423009294147, "grad_norm": 0.3414006233215332, "learning_rate": 0.00019650895023604867, "loss": 11.6702, "step": 12117 }, { "epoch": 0.2536632336933769, "grad_norm": 0.24614053964614868, "learning_rate": 0.00019650837594394657, "loss": 11.6649, "step": 12118 }, { "epoch": 0.25368416645733904, "grad_norm": 0.26306432485580444, "learning_rate": 0.00019650780160545097, "loss": 11.6789, "step": 12119 }, { "epoch": 0.2537050992213012, "grad_norm": 0.25372055172920227, "learning_rate": 0.00019650722722056214, "loss": 11.672, "step": 12120 }, { "epoch": 0.25372603198526333, "grad_norm": 0.2164861559867859, "learning_rate": 0.00019650665278928036, "loss": 11.6752, "step": 12121 }, { "epoch": 0.2537469647492255, "grad_norm": 0.32795414328575134, "learning_rate": 0.00019650607831160586, "loss": 11.6838, "step": 12122 }, { "epoch": 0.2537678975131876, "grad_norm": 0.3142109513282776, "learning_rate": 0.00019650550378753898, "loss": 11.6832, "step": 12123 }, { "epoch": 0.25378883027714977, "grad_norm": 0.23916181921958923, "learning_rate": 0.00019650492921707998, "loss": 11.6771, "step": 12124 }, { "epoch": 0.25380976304111197, "grad_norm": 0.25056713819503784, "learning_rate": 0.0001965043546002291, "loss": 11.6679, "step": 12125 }, { "epoch": 0.2538306958050741, "grad_norm": 0.22289153933525085, "learning_rate": 0.00019650377993698666, "loss": 11.6861, "step": 12126 }, { "epoch": 0.25385162856903626, "grad_norm": 0.284368634223938, "learning_rate": 0.0001965032052273529, "loss": 11.6748, "step": 12127 }, { "epoch": 0.2538725613329984, "grad_norm": 0.27473825216293335, "learning_rate": 0.00019650263047132812, "loss": 11.679, "step": 12128 }, { "epoch": 0.25389349409696055, "grad_norm": 0.26809537410736084, "learning_rate": 0.00019650205566891263, "loss": 11.6725, "step": 12129 }, { "epoch": 0.2539144268609227, "grad_norm": 0.2606107294559479, "learning_rate": 0.00019650148082010662, "loss": 11.6624, "step": 12130 }, { "epoch": 0.2539353596248849, "grad_norm": 0.24354632198810577, "learning_rate": 0.00019650090592491045, "loss": 11.6584, "step": 12131 }, { "epoch": 0.25395629238884704, "grad_norm": 0.24493703246116638, "learning_rate": 0.00019650033098332433, "loss": 11.6935, "step": 12132 }, { "epoch": 0.2539772251528092, "grad_norm": 0.24600282311439514, "learning_rate": 0.0001964997559953486, "loss": 11.668, "step": 12133 }, { "epoch": 0.2539981579167713, "grad_norm": 0.2291339784860611, "learning_rate": 0.00019649918096098347, "loss": 11.6839, "step": 12134 }, { "epoch": 0.2540190906807335, "grad_norm": 0.28297337889671326, "learning_rate": 0.00019649860588022926, "loss": 11.6853, "step": 12135 }, { "epoch": 0.2540400234446956, "grad_norm": 0.23570989072322845, "learning_rate": 0.00019649803075308625, "loss": 11.674, "step": 12136 }, { "epoch": 0.2540609562086578, "grad_norm": 0.26886117458343506, "learning_rate": 0.0001964974555795547, "loss": 11.6745, "step": 12137 }, { "epoch": 0.25408188897261996, "grad_norm": 0.2868748903274536, "learning_rate": 0.0001964968803596349, "loss": 11.6662, "step": 12138 }, { "epoch": 0.2541028217365821, "grad_norm": 0.3070056438446045, "learning_rate": 0.0001964963050933271, "loss": 11.6706, "step": 12139 }, { "epoch": 0.25412375450054425, "grad_norm": 0.23561052978038788, "learning_rate": 0.00019649572978063158, "loss": 11.6617, "step": 12140 }, { "epoch": 0.2541446872645064, "grad_norm": 0.25152620673179626, "learning_rate": 0.0001964951544215486, "loss": 11.676, "step": 12141 }, { "epoch": 0.25416562002846854, "grad_norm": 0.26985156536102295, "learning_rate": 0.00019649457901607854, "loss": 11.6823, "step": 12142 }, { "epoch": 0.2541865527924307, "grad_norm": 0.2765962481498718, "learning_rate": 0.00019649400356422156, "loss": 11.6914, "step": 12143 }, { "epoch": 0.2542074855563929, "grad_norm": 0.790597140789032, "learning_rate": 0.00019649342806597799, "loss": 11.6267, "step": 12144 }, { "epoch": 0.25422841832035503, "grad_norm": 0.26498618721961975, "learning_rate": 0.00019649285252134807, "loss": 11.676, "step": 12145 }, { "epoch": 0.2542493510843172, "grad_norm": 0.26605817675590515, "learning_rate": 0.00019649227693033217, "loss": 11.6857, "step": 12146 }, { "epoch": 0.2542702838482793, "grad_norm": 0.30188336968421936, "learning_rate": 0.0001964917012929304, "loss": 11.6827, "step": 12147 }, { "epoch": 0.25429121661224147, "grad_norm": 0.30573728680610657, "learning_rate": 0.00019649112560914323, "loss": 11.6881, "step": 12148 }, { "epoch": 0.2543121493762036, "grad_norm": 0.29642999172210693, "learning_rate": 0.00019649054987897076, "loss": 11.6653, "step": 12149 }, { "epoch": 0.2543330821401658, "grad_norm": 0.25200706720352173, "learning_rate": 0.00019648997410241342, "loss": 11.6852, "step": 12150 }, { "epoch": 0.25435401490412796, "grad_norm": 0.21818679571151733, "learning_rate": 0.00019648939827947138, "loss": 11.6728, "step": 12151 }, { "epoch": 0.2543749476680901, "grad_norm": 0.32254141569137573, "learning_rate": 0.00019648882241014496, "loss": 11.6872, "step": 12152 }, { "epoch": 0.25439588043205225, "grad_norm": 0.26704171299934387, "learning_rate": 0.00019648824649443444, "loss": 11.6736, "step": 12153 }, { "epoch": 0.2544168131960144, "grad_norm": 0.3258422613143921, "learning_rate": 0.00019648767053234004, "loss": 11.6678, "step": 12154 }, { "epoch": 0.25443774595997654, "grad_norm": 0.33955150842666626, "learning_rate": 0.00019648709452386214, "loss": 11.6903, "step": 12155 }, { "epoch": 0.25445867872393874, "grad_norm": 0.22506669163703918, "learning_rate": 0.00019648651846900092, "loss": 11.6759, "step": 12156 }, { "epoch": 0.2544796114879009, "grad_norm": 0.3069774806499481, "learning_rate": 0.00019648594236775676, "loss": 11.6832, "step": 12157 }, { "epoch": 0.254500544251863, "grad_norm": 0.2647036910057068, "learning_rate": 0.0001964853662201298, "loss": 11.6908, "step": 12158 }, { "epoch": 0.25452147701582517, "grad_norm": 0.23433229327201843, "learning_rate": 0.00019648479002612042, "loss": 11.6743, "step": 12159 }, { "epoch": 0.2545424097797873, "grad_norm": 0.22169546782970428, "learning_rate": 0.00019648421378572886, "loss": 11.6815, "step": 12160 }, { "epoch": 0.25456334254374946, "grad_norm": 0.33984896540641785, "learning_rate": 0.0001964836374989554, "loss": 11.6801, "step": 12161 }, { "epoch": 0.2545842753077116, "grad_norm": 0.26865601539611816, "learning_rate": 0.00019648306116580035, "loss": 11.6588, "step": 12162 }, { "epoch": 0.2546052080716738, "grad_norm": 0.2881278991699219, "learning_rate": 0.00019648248478626394, "loss": 11.6944, "step": 12163 }, { "epoch": 0.25462614083563595, "grad_norm": 0.2769958972930908, "learning_rate": 0.00019648190836034645, "loss": 11.6856, "step": 12164 }, { "epoch": 0.2546470735995981, "grad_norm": 0.2519270181655884, "learning_rate": 0.0001964813318880482, "loss": 11.667, "step": 12165 }, { "epoch": 0.25466800636356024, "grad_norm": 0.23758065700531006, "learning_rate": 0.00019648075536936945, "loss": 11.6749, "step": 12166 }, { "epoch": 0.2546889391275224, "grad_norm": 0.25008857250213623, "learning_rate": 0.00019648017880431043, "loss": 11.6563, "step": 12167 }, { "epoch": 0.25470987189148453, "grad_norm": 0.20284004509449005, "learning_rate": 0.00019647960219287147, "loss": 11.6678, "step": 12168 }, { "epoch": 0.25473080465544673, "grad_norm": 0.20503538846969604, "learning_rate": 0.00019647902553505286, "loss": 11.6674, "step": 12169 }, { "epoch": 0.2547517374194089, "grad_norm": 0.2218487560749054, "learning_rate": 0.00019647844883085482, "loss": 11.6815, "step": 12170 }, { "epoch": 0.254772670183371, "grad_norm": 0.23786431550979614, "learning_rate": 0.00019647787208027767, "loss": 11.6867, "step": 12171 }, { "epoch": 0.25479360294733316, "grad_norm": 0.2566094994544983, "learning_rate": 0.00019647729528332163, "loss": 11.698, "step": 12172 }, { "epoch": 0.2548145357112953, "grad_norm": 0.2221796065568924, "learning_rate": 0.00019647671843998706, "loss": 11.6708, "step": 12173 }, { "epoch": 0.25483546847525745, "grad_norm": 0.28836777806282043, "learning_rate": 0.0001964761415502742, "loss": 11.6586, "step": 12174 }, { "epoch": 0.25485640123921965, "grad_norm": 0.2556149959564209, "learning_rate": 0.00019647556461418333, "loss": 11.6856, "step": 12175 }, { "epoch": 0.2548773340031818, "grad_norm": 0.23459288477897644, "learning_rate": 0.0001964749876317147, "loss": 11.6676, "step": 12176 }, { "epoch": 0.25489826676714394, "grad_norm": 0.26558077335357666, "learning_rate": 0.00019647441060286863, "loss": 11.6801, "step": 12177 }, { "epoch": 0.2549191995311061, "grad_norm": 0.2618390619754791, "learning_rate": 0.00019647383352764536, "loss": 11.6782, "step": 12178 }, { "epoch": 0.25494013229506823, "grad_norm": 0.23797549307346344, "learning_rate": 0.0001964732564060452, "loss": 11.6781, "step": 12179 }, { "epoch": 0.2549610650590304, "grad_norm": 0.21743011474609375, "learning_rate": 0.0001964726792380684, "loss": 11.6595, "step": 12180 }, { "epoch": 0.2549819978229925, "grad_norm": 0.3753588795661926, "learning_rate": 0.00019647210202371524, "loss": 11.6848, "step": 12181 }, { "epoch": 0.2550029305869547, "grad_norm": 0.255778431892395, "learning_rate": 0.00019647152476298603, "loss": 11.6798, "step": 12182 }, { "epoch": 0.25502386335091687, "grad_norm": 0.2580547034740448, "learning_rate": 0.00019647094745588102, "loss": 11.6689, "step": 12183 }, { "epoch": 0.255044796114879, "grad_norm": 0.23838751018047333, "learning_rate": 0.0001964703701024005, "loss": 11.667, "step": 12184 }, { "epoch": 0.25506572887884116, "grad_norm": 0.3220728933811188, "learning_rate": 0.00019646979270254472, "loss": 11.6877, "step": 12185 }, { "epoch": 0.2550866616428033, "grad_norm": 0.2472648173570633, "learning_rate": 0.000196469215256314, "loss": 11.6743, "step": 12186 }, { "epoch": 0.25510759440676545, "grad_norm": 0.2180992066860199, "learning_rate": 0.00019646863776370856, "loss": 11.6801, "step": 12187 }, { "epoch": 0.25512852717072765, "grad_norm": 0.30093857645988464, "learning_rate": 0.00019646806022472873, "loss": 11.6751, "step": 12188 }, { "epoch": 0.2551494599346898, "grad_norm": 0.2488500475883484, "learning_rate": 0.00019646748263937478, "loss": 11.6766, "step": 12189 }, { "epoch": 0.25517039269865194, "grad_norm": 0.29480379819869995, "learning_rate": 0.00019646690500764696, "loss": 11.673, "step": 12190 }, { "epoch": 0.2551913254626141, "grad_norm": 0.20892371237277985, "learning_rate": 0.0001964663273295456, "loss": 11.6784, "step": 12191 }, { "epoch": 0.2552122582265762, "grad_norm": 0.2352825552225113, "learning_rate": 0.0001964657496050709, "loss": 11.6772, "step": 12192 }, { "epoch": 0.25523319099053837, "grad_norm": 0.24646106362342834, "learning_rate": 0.00019646517183422323, "loss": 11.6685, "step": 12193 }, { "epoch": 0.2552541237545005, "grad_norm": 0.2468329519033432, "learning_rate": 0.00019646459401700278, "loss": 11.6853, "step": 12194 }, { "epoch": 0.2552750565184627, "grad_norm": 0.3051709532737732, "learning_rate": 0.0001964640161534099, "loss": 11.6676, "step": 12195 }, { "epoch": 0.25529598928242486, "grad_norm": 0.353223592042923, "learning_rate": 0.0001964634382434448, "loss": 11.695, "step": 12196 }, { "epoch": 0.255316922046387, "grad_norm": 0.21983402967453003, "learning_rate": 0.00019646286028710778, "loss": 11.6721, "step": 12197 }, { "epoch": 0.25533785481034915, "grad_norm": 0.2818264365196228, "learning_rate": 0.00019646228228439915, "loss": 11.6747, "step": 12198 }, { "epoch": 0.2553587875743113, "grad_norm": 0.24788692593574524, "learning_rate": 0.00019646170423531916, "loss": 11.6747, "step": 12199 }, { "epoch": 0.25537972033827344, "grad_norm": 0.29798272252082825, "learning_rate": 0.00019646112613986812, "loss": 11.6686, "step": 12200 }, { "epoch": 0.25540065310223564, "grad_norm": 0.23069153726100922, "learning_rate": 0.00019646054799804627, "loss": 11.691, "step": 12201 }, { "epoch": 0.2554215858661978, "grad_norm": 0.354523241519928, "learning_rate": 0.0001964599698098539, "loss": 11.6747, "step": 12202 }, { "epoch": 0.25544251863015993, "grad_norm": 0.2419274002313614, "learning_rate": 0.0001964593915752913, "loss": 11.6642, "step": 12203 }, { "epoch": 0.2554634513941221, "grad_norm": 0.26256582140922546, "learning_rate": 0.00019645881329435875, "loss": 11.6839, "step": 12204 }, { "epoch": 0.2554843841580842, "grad_norm": 0.28673115372657776, "learning_rate": 0.00019645823496705647, "loss": 11.6724, "step": 12205 }, { "epoch": 0.25550531692204637, "grad_norm": 0.29872605204582214, "learning_rate": 0.00019645765659338478, "loss": 11.6831, "step": 12206 }, { "epoch": 0.25552624968600857, "grad_norm": 0.34245604276657104, "learning_rate": 0.00019645707817334403, "loss": 11.688, "step": 12207 }, { "epoch": 0.2555471824499707, "grad_norm": 0.2536598742008209, "learning_rate": 0.00019645649970693438, "loss": 11.6856, "step": 12208 }, { "epoch": 0.25556811521393286, "grad_norm": 0.22230613231658936, "learning_rate": 0.00019645592119415617, "loss": 11.6618, "step": 12209 }, { "epoch": 0.255589047977895, "grad_norm": 0.2636464238166809, "learning_rate": 0.00019645534263500967, "loss": 11.6762, "step": 12210 }, { "epoch": 0.25560998074185715, "grad_norm": 0.28028059005737305, "learning_rate": 0.00019645476402949515, "loss": 11.6712, "step": 12211 }, { "epoch": 0.2556309135058193, "grad_norm": 0.3217051029205322, "learning_rate": 0.00019645418537761288, "loss": 11.6762, "step": 12212 }, { "epoch": 0.25565184626978144, "grad_norm": 0.2946891486644745, "learning_rate": 0.00019645360667936316, "loss": 11.6933, "step": 12213 }, { "epoch": 0.25567277903374364, "grad_norm": 0.4790501296520233, "learning_rate": 0.00019645302793474626, "loss": 11.6921, "step": 12214 }, { "epoch": 0.2556937117977058, "grad_norm": 0.27371713519096375, "learning_rate": 0.00019645244914376245, "loss": 11.6682, "step": 12215 }, { "epoch": 0.2557146445616679, "grad_norm": 0.3018566071987152, "learning_rate": 0.000196451870306412, "loss": 11.6919, "step": 12216 }, { "epoch": 0.25573557732563007, "grad_norm": 0.27005735039711, "learning_rate": 0.00019645129142269521, "loss": 11.6861, "step": 12217 }, { "epoch": 0.2557565100895922, "grad_norm": 0.3468988537788391, "learning_rate": 0.00019645071249261236, "loss": 11.6891, "step": 12218 }, { "epoch": 0.25577744285355436, "grad_norm": 0.20427604019641876, "learning_rate": 0.00019645013351616373, "loss": 11.6695, "step": 12219 }, { "epoch": 0.25579837561751656, "grad_norm": 0.23080891370773315, "learning_rate": 0.00019644955449334955, "loss": 11.6793, "step": 12220 }, { "epoch": 0.2558193083814787, "grad_norm": 0.27849188446998596, "learning_rate": 0.0001964489754241702, "loss": 11.6885, "step": 12221 }, { "epoch": 0.25584024114544085, "grad_norm": 0.24049215018749237, "learning_rate": 0.00019644839630862584, "loss": 11.6828, "step": 12222 }, { "epoch": 0.255861173909403, "grad_norm": 0.22591660916805267, "learning_rate": 0.00019644781714671684, "loss": 11.6825, "step": 12223 }, { "epoch": 0.25588210667336514, "grad_norm": 0.19959762692451477, "learning_rate": 0.00019644723793844343, "loss": 11.6755, "step": 12224 }, { "epoch": 0.2559030394373273, "grad_norm": 0.26088035106658936, "learning_rate": 0.0001964466586838059, "loss": 11.6835, "step": 12225 }, { "epoch": 0.2559239722012895, "grad_norm": 0.2569616436958313, "learning_rate": 0.0001964460793828045, "loss": 11.6794, "step": 12226 }, { "epoch": 0.25594490496525163, "grad_norm": 0.29061615467071533, "learning_rate": 0.00019644550003543958, "loss": 11.6686, "step": 12227 }, { "epoch": 0.2559658377292138, "grad_norm": 0.2991061210632324, "learning_rate": 0.00019644492064171135, "loss": 11.6603, "step": 12228 }, { "epoch": 0.2559867704931759, "grad_norm": 0.21485714614391327, "learning_rate": 0.0001964443412016201, "loss": 11.6815, "step": 12229 }, { "epoch": 0.25600770325713806, "grad_norm": 0.2217516303062439, "learning_rate": 0.00019644376171516615, "loss": 11.6738, "step": 12230 }, { "epoch": 0.2560286360211002, "grad_norm": 0.27894365787506104, "learning_rate": 0.00019644318218234975, "loss": 11.692, "step": 12231 }, { "epoch": 0.25604956878506235, "grad_norm": 0.27405422925949097, "learning_rate": 0.00019644260260317115, "loss": 11.6763, "step": 12232 }, { "epoch": 0.25607050154902455, "grad_norm": 0.24257168173789978, "learning_rate": 0.0001964420229776307, "loss": 11.6785, "step": 12233 }, { "epoch": 0.2560914343129867, "grad_norm": 0.27059438824653625, "learning_rate": 0.00019644144330572863, "loss": 11.6749, "step": 12234 }, { "epoch": 0.25611236707694884, "grad_norm": 0.2617841064929962, "learning_rate": 0.0001964408635874652, "loss": 11.68, "step": 12235 }, { "epoch": 0.256133299840911, "grad_norm": 0.32628822326660156, "learning_rate": 0.00019644028382284073, "loss": 11.6593, "step": 12236 }, { "epoch": 0.25615423260487313, "grad_norm": 0.2434910386800766, "learning_rate": 0.00019643970401185548, "loss": 11.6876, "step": 12237 }, { "epoch": 0.2561751653688353, "grad_norm": 0.2567468285560608, "learning_rate": 0.00019643912415450972, "loss": 11.6764, "step": 12238 }, { "epoch": 0.2561960981327975, "grad_norm": 0.28460070490837097, "learning_rate": 0.00019643854425080375, "loss": 11.6613, "step": 12239 }, { "epoch": 0.2562170308967596, "grad_norm": 0.24782580137252808, "learning_rate": 0.00019643796430073783, "loss": 11.6902, "step": 12240 }, { "epoch": 0.25623796366072177, "grad_norm": 0.23820708692073822, "learning_rate": 0.00019643738430431227, "loss": 11.6745, "step": 12241 }, { "epoch": 0.2562588964246839, "grad_norm": 0.3354837894439697, "learning_rate": 0.0001964368042615273, "loss": 11.6752, "step": 12242 }, { "epoch": 0.25627982918864606, "grad_norm": 0.40027615427970886, "learning_rate": 0.00019643622417238327, "loss": 11.6922, "step": 12243 }, { "epoch": 0.2563007619526082, "grad_norm": 0.43961450457572937, "learning_rate": 0.00019643564403688037, "loss": 11.6782, "step": 12244 }, { "epoch": 0.2563216947165704, "grad_norm": 0.24858826398849487, "learning_rate": 0.00019643506385501894, "loss": 11.6806, "step": 12245 }, { "epoch": 0.25634262748053255, "grad_norm": 0.3320283889770508, "learning_rate": 0.00019643448362679925, "loss": 11.6892, "step": 12246 }, { "epoch": 0.2563635602444947, "grad_norm": 0.31879228353500366, "learning_rate": 0.00019643390335222154, "loss": 11.6931, "step": 12247 }, { "epoch": 0.25638449300845684, "grad_norm": 0.34712252020835876, "learning_rate": 0.00019643332303128614, "loss": 11.6731, "step": 12248 }, { "epoch": 0.256405425772419, "grad_norm": 0.281777024269104, "learning_rate": 0.0001964327426639933, "loss": 11.6884, "step": 12249 }, { "epoch": 0.2564263585363811, "grad_norm": 0.2692665457725525, "learning_rate": 0.00019643216225034333, "loss": 11.6719, "step": 12250 }, { "epoch": 0.25644729130034327, "grad_norm": 0.21783672273159027, "learning_rate": 0.00019643158179033648, "loss": 11.6776, "step": 12251 }, { "epoch": 0.2564682240643055, "grad_norm": 0.21315187215805054, "learning_rate": 0.000196431001283973, "loss": 11.6833, "step": 12252 }, { "epoch": 0.2564891568282676, "grad_norm": 0.2645193934440613, "learning_rate": 0.0001964304207312533, "loss": 11.6772, "step": 12253 }, { "epoch": 0.25651008959222976, "grad_norm": 0.24662786722183228, "learning_rate": 0.00019642984013217748, "loss": 11.6518, "step": 12254 }, { "epoch": 0.2565310223561919, "grad_norm": 0.29369914531707764, "learning_rate": 0.00019642925948674592, "loss": 11.6639, "step": 12255 }, { "epoch": 0.25655195512015405, "grad_norm": 0.28942808508872986, "learning_rate": 0.00019642867879495886, "loss": 11.6689, "step": 12256 }, { "epoch": 0.2565728878841162, "grad_norm": 0.3539624512195587, "learning_rate": 0.00019642809805681664, "loss": 11.6745, "step": 12257 }, { "epoch": 0.2565938206480784, "grad_norm": 0.25704818964004517, "learning_rate": 0.00019642751727231948, "loss": 11.6756, "step": 12258 }, { "epoch": 0.25661475341204054, "grad_norm": 0.2630975842475891, "learning_rate": 0.0001964269364414677, "loss": 11.6843, "step": 12259 }, { "epoch": 0.2566356861760027, "grad_norm": 0.20723500847816467, "learning_rate": 0.00019642635556426155, "loss": 11.6714, "step": 12260 }, { "epoch": 0.25665661893996483, "grad_norm": 0.25146380066871643, "learning_rate": 0.00019642577464070132, "loss": 11.654, "step": 12261 }, { "epoch": 0.256677551703927, "grad_norm": 0.2254563868045807, "learning_rate": 0.0001964251936707873, "loss": 11.6673, "step": 12262 }, { "epoch": 0.2566984844678891, "grad_norm": 0.25508663058280945, "learning_rate": 0.00019642461265451973, "loss": 11.6718, "step": 12263 }, { "epoch": 0.2567194172318513, "grad_norm": 0.22936581075191498, "learning_rate": 0.00019642403159189894, "loss": 11.6666, "step": 12264 }, { "epoch": 0.25674034999581347, "grad_norm": 0.24625171720981598, "learning_rate": 0.00019642345048292517, "loss": 11.6786, "step": 12265 }, { "epoch": 0.2567612827597756, "grad_norm": 0.2180560976266861, "learning_rate": 0.00019642286932759873, "loss": 11.6838, "step": 12266 }, { "epoch": 0.25678221552373776, "grad_norm": 0.30056262016296387, "learning_rate": 0.00019642228812591985, "loss": 11.6757, "step": 12267 }, { "epoch": 0.2568031482876999, "grad_norm": 0.22665202617645264, "learning_rate": 0.0001964217068778889, "loss": 11.6743, "step": 12268 }, { "epoch": 0.25682408105166205, "grad_norm": 0.2625408470630646, "learning_rate": 0.00019642112558350606, "loss": 11.6611, "step": 12269 }, { "epoch": 0.2568450138156242, "grad_norm": 0.262424111366272, "learning_rate": 0.00019642054424277167, "loss": 11.6884, "step": 12270 }, { "epoch": 0.2568659465795864, "grad_norm": 0.22035272419452667, "learning_rate": 0.00019641996285568595, "loss": 11.666, "step": 12271 }, { "epoch": 0.25688687934354854, "grad_norm": 0.2529217600822449, "learning_rate": 0.00019641938142224926, "loss": 11.6578, "step": 12272 }, { "epoch": 0.2569078121075107, "grad_norm": 0.21720576286315918, "learning_rate": 0.00019641879994246187, "loss": 11.6731, "step": 12273 }, { "epoch": 0.2569287448714728, "grad_norm": 0.25475114583969116, "learning_rate": 0.00019641821841632397, "loss": 11.6629, "step": 12274 }, { "epoch": 0.25694967763543497, "grad_norm": 0.2435263842344284, "learning_rate": 0.00019641763684383594, "loss": 11.6813, "step": 12275 }, { "epoch": 0.2569706103993971, "grad_norm": 0.2622295916080475, "learning_rate": 0.000196417055224998, "loss": 11.6896, "step": 12276 }, { "epoch": 0.2569915431633593, "grad_norm": 0.23549886047840118, "learning_rate": 0.00019641647355981046, "loss": 11.6587, "step": 12277 }, { "epoch": 0.25701247592732146, "grad_norm": 0.3086775541305542, "learning_rate": 0.0001964158918482736, "loss": 11.6739, "step": 12278 }, { "epoch": 0.2570334086912836, "grad_norm": 0.2112557739019394, "learning_rate": 0.00019641531009038766, "loss": 11.663, "step": 12279 }, { "epoch": 0.25705434145524575, "grad_norm": 0.2723347246646881, "learning_rate": 0.00019641472828615296, "loss": 11.6634, "step": 12280 }, { "epoch": 0.2570752742192079, "grad_norm": 0.36583003401756287, "learning_rate": 0.00019641414643556976, "loss": 11.6867, "step": 12281 }, { "epoch": 0.25709620698317004, "grad_norm": 0.29554957151412964, "learning_rate": 0.00019641356453863838, "loss": 11.6698, "step": 12282 }, { "epoch": 0.2571171397471322, "grad_norm": 0.23463445901870728, "learning_rate": 0.00019641298259535903, "loss": 11.6757, "step": 12283 }, { "epoch": 0.2571380725110944, "grad_norm": 0.24537093937397003, "learning_rate": 0.00019641240060573205, "loss": 11.6886, "step": 12284 }, { "epoch": 0.25715900527505653, "grad_norm": 0.24401818215847015, "learning_rate": 0.0001964118185697577, "loss": 11.6772, "step": 12285 }, { "epoch": 0.2571799380390187, "grad_norm": 0.27350321412086487, "learning_rate": 0.00019641123648743626, "loss": 11.6721, "step": 12286 }, { "epoch": 0.2572008708029808, "grad_norm": 0.24487276375293732, "learning_rate": 0.000196410654358768, "loss": 11.6636, "step": 12287 }, { "epoch": 0.25722180356694296, "grad_norm": 0.19801455736160278, "learning_rate": 0.00019641007218375317, "loss": 11.6906, "step": 12288 }, { "epoch": 0.2572427363309051, "grad_norm": 0.24732372164726257, "learning_rate": 0.00019640948996239212, "loss": 11.653, "step": 12289 }, { "epoch": 0.2572636690948673, "grad_norm": 0.2226586490869522, "learning_rate": 0.0001964089076946851, "loss": 11.6743, "step": 12290 }, { "epoch": 0.25728460185882945, "grad_norm": 0.24282674491405487, "learning_rate": 0.00019640832538063237, "loss": 11.6628, "step": 12291 }, { "epoch": 0.2573055346227916, "grad_norm": 0.26894840598106384, "learning_rate": 0.00019640774302023424, "loss": 11.6843, "step": 12292 }, { "epoch": 0.25732646738675374, "grad_norm": 0.2614423930644989, "learning_rate": 0.00019640716061349096, "loss": 11.6698, "step": 12293 }, { "epoch": 0.2573474001507159, "grad_norm": 0.24286770820617676, "learning_rate": 0.00019640657816040284, "loss": 11.6743, "step": 12294 }, { "epoch": 0.25736833291467803, "grad_norm": 0.24825789034366608, "learning_rate": 0.00019640599566097015, "loss": 11.675, "step": 12295 }, { "epoch": 0.25738926567864023, "grad_norm": 0.27160120010375977, "learning_rate": 0.00019640541311519315, "loss": 11.6801, "step": 12296 }, { "epoch": 0.2574101984426024, "grad_norm": 0.38108018040657043, "learning_rate": 0.00019640483052307215, "loss": 11.6796, "step": 12297 }, { "epoch": 0.2574311312065645, "grad_norm": 0.23130951821804047, "learning_rate": 0.0001964042478846074, "loss": 11.6794, "step": 12298 }, { "epoch": 0.25745206397052667, "grad_norm": 0.24279740452766418, "learning_rate": 0.00019640366519979924, "loss": 11.6638, "step": 12299 }, { "epoch": 0.2574729967344888, "grad_norm": 0.2387654036283493, "learning_rate": 0.00019640308246864785, "loss": 11.7024, "step": 12300 }, { "epoch": 0.25749392949845096, "grad_norm": 0.27928757667541504, "learning_rate": 0.0001964024996911536, "loss": 11.6774, "step": 12301 }, { "epoch": 0.2575148622624131, "grad_norm": 0.2341747134923935, "learning_rate": 0.0001964019168673167, "loss": 11.6719, "step": 12302 }, { "epoch": 0.2575357950263753, "grad_norm": 0.2259160578250885, "learning_rate": 0.0001964013339971375, "loss": 11.6657, "step": 12303 }, { "epoch": 0.25755672779033745, "grad_norm": 0.30119097232818604, "learning_rate": 0.00019640075108061624, "loss": 11.6677, "step": 12304 }, { "epoch": 0.2575776605542996, "grad_norm": 0.23230615258216858, "learning_rate": 0.0001964001681177532, "loss": 11.6584, "step": 12305 }, { "epoch": 0.25759859331826174, "grad_norm": 0.24959707260131836, "learning_rate": 0.00019639958510854868, "loss": 11.6645, "step": 12306 }, { "epoch": 0.2576195260822239, "grad_norm": 0.9894187450408936, "learning_rate": 0.0001963990020530029, "loss": 11.752, "step": 12307 }, { "epoch": 0.257640458846186, "grad_norm": 0.2544357478618622, "learning_rate": 0.00019639841895111626, "loss": 11.675, "step": 12308 }, { "epoch": 0.2576613916101482, "grad_norm": 0.3331409692764282, "learning_rate": 0.00019639783580288893, "loss": 11.6301, "step": 12309 }, { "epoch": 0.25768232437411037, "grad_norm": 0.2605505883693695, "learning_rate": 0.00019639725260832123, "loss": 11.6843, "step": 12310 }, { "epoch": 0.2577032571380725, "grad_norm": 0.25098511576652527, "learning_rate": 0.00019639666936741346, "loss": 11.672, "step": 12311 }, { "epoch": 0.25772418990203466, "grad_norm": 0.3384568691253662, "learning_rate": 0.00019639608608016586, "loss": 11.6643, "step": 12312 }, { "epoch": 0.2577451226659968, "grad_norm": 0.2411595582962036, "learning_rate": 0.00019639550274657875, "loss": 11.6802, "step": 12313 }, { "epoch": 0.25776605542995895, "grad_norm": 0.3201711177825928, "learning_rate": 0.00019639491936665235, "loss": 11.7003, "step": 12314 }, { "epoch": 0.25778698819392115, "grad_norm": 0.26806166768074036, "learning_rate": 0.00019639433594038702, "loss": 11.6753, "step": 12315 }, { "epoch": 0.2578079209578833, "grad_norm": 0.30889585614204407, "learning_rate": 0.000196393752467783, "loss": 11.6788, "step": 12316 }, { "epoch": 0.25782885372184544, "grad_norm": 0.2890869081020355, "learning_rate": 0.00019639316894884051, "loss": 11.6676, "step": 12317 }, { "epoch": 0.2578497864858076, "grad_norm": 0.27441760897636414, "learning_rate": 0.00019639258538355994, "loss": 11.6738, "step": 12318 }, { "epoch": 0.25787071924976973, "grad_norm": 0.2518320679664612, "learning_rate": 0.00019639200177194152, "loss": 11.6958, "step": 12319 }, { "epoch": 0.2578916520137319, "grad_norm": 0.2580522298812866, "learning_rate": 0.00019639141811398557, "loss": 11.6792, "step": 12320 }, { "epoch": 0.257912584777694, "grad_norm": 0.2296769618988037, "learning_rate": 0.00019639083440969226, "loss": 11.6627, "step": 12321 }, { "epoch": 0.2579335175416562, "grad_norm": 0.2963205873966217, "learning_rate": 0.00019639025065906198, "loss": 11.6682, "step": 12322 }, { "epoch": 0.25795445030561837, "grad_norm": 0.35134223103523254, "learning_rate": 0.000196389666862095, "loss": 11.6909, "step": 12323 }, { "epoch": 0.2579753830695805, "grad_norm": 0.26965978741645813, "learning_rate": 0.00019638908301879154, "loss": 11.6618, "step": 12324 }, { "epoch": 0.25799631583354266, "grad_norm": 0.27995890378952026, "learning_rate": 0.00019638849912915194, "loss": 11.6796, "step": 12325 }, { "epoch": 0.2580172485975048, "grad_norm": 0.31822872161865234, "learning_rate": 0.00019638791519317644, "loss": 11.6616, "step": 12326 }, { "epoch": 0.25803818136146695, "grad_norm": 0.25480273365974426, "learning_rate": 0.0001963873312108653, "loss": 11.671, "step": 12327 }, { "epoch": 0.25805911412542915, "grad_norm": 0.32044392824172974, "learning_rate": 0.0001963867471822189, "loss": 11.6935, "step": 12328 }, { "epoch": 0.2580800468893913, "grad_norm": 0.22204658389091492, "learning_rate": 0.00019638616310723746, "loss": 11.6542, "step": 12329 }, { "epoch": 0.25810097965335344, "grad_norm": 0.25194424390792847, "learning_rate": 0.00019638557898592123, "loss": 11.6773, "step": 12330 }, { "epoch": 0.2581219124173156, "grad_norm": 0.38961124420166016, "learning_rate": 0.0001963849948182705, "loss": 11.6655, "step": 12331 }, { "epoch": 0.2581428451812777, "grad_norm": 0.23424750566482544, "learning_rate": 0.00019638441060428562, "loss": 11.6797, "step": 12332 }, { "epoch": 0.25816377794523987, "grad_norm": 0.2552388608455658, "learning_rate": 0.00019638382634396678, "loss": 11.6894, "step": 12333 }, { "epoch": 0.25818471070920207, "grad_norm": 0.2605802118778229, "learning_rate": 0.00019638324203731435, "loss": 11.6776, "step": 12334 }, { "epoch": 0.2582056434731642, "grad_norm": 0.29867643117904663, "learning_rate": 0.00019638265768432854, "loss": 11.6888, "step": 12335 }, { "epoch": 0.25822657623712636, "grad_norm": 0.3063011169433594, "learning_rate": 0.00019638207328500967, "loss": 11.6829, "step": 12336 }, { "epoch": 0.2582475090010885, "grad_norm": 0.2695731818675995, "learning_rate": 0.000196381488839358, "loss": 11.6772, "step": 12337 }, { "epoch": 0.25826844176505065, "grad_norm": 0.2568800151348114, "learning_rate": 0.0001963809043473738, "loss": 11.6906, "step": 12338 }, { "epoch": 0.2582893745290128, "grad_norm": 0.250070720911026, "learning_rate": 0.00019638031980905734, "loss": 11.6606, "step": 12339 }, { "epoch": 0.25831030729297494, "grad_norm": 0.3154318332672119, "learning_rate": 0.00019637973522440897, "loss": 11.6625, "step": 12340 }, { "epoch": 0.25833124005693714, "grad_norm": 0.28957080841064453, "learning_rate": 0.00019637915059342892, "loss": 11.682, "step": 12341 }, { "epoch": 0.2583521728208993, "grad_norm": 0.24300403892993927, "learning_rate": 0.0001963785659161175, "loss": 11.6754, "step": 12342 }, { "epoch": 0.25837310558486143, "grad_norm": 0.2690151035785675, "learning_rate": 0.00019637798119247495, "loss": 11.6785, "step": 12343 }, { "epoch": 0.2583940383488236, "grad_norm": 0.275054931640625, "learning_rate": 0.00019637739642250156, "loss": 11.6796, "step": 12344 }, { "epoch": 0.2584149711127857, "grad_norm": 0.27026844024658203, "learning_rate": 0.00019637681160619767, "loss": 11.683, "step": 12345 }, { "epoch": 0.25843590387674786, "grad_norm": 0.2883935272693634, "learning_rate": 0.0001963762267435635, "loss": 11.6837, "step": 12346 }, { "epoch": 0.25845683664071006, "grad_norm": 0.30143219232559204, "learning_rate": 0.00019637564183459932, "loss": 11.6649, "step": 12347 }, { "epoch": 0.2584777694046722, "grad_norm": 0.303893119096756, "learning_rate": 0.00019637505687930545, "loss": 11.6956, "step": 12348 }, { "epoch": 0.25849870216863435, "grad_norm": 0.22942620515823364, "learning_rate": 0.00019637447187768217, "loss": 11.6739, "step": 12349 }, { "epoch": 0.2585196349325965, "grad_norm": 0.29064664244651794, "learning_rate": 0.00019637388682972975, "loss": 11.678, "step": 12350 }, { "epoch": 0.25854056769655864, "grad_norm": 0.3634110689163208, "learning_rate": 0.00019637330173544846, "loss": 11.6603, "step": 12351 }, { "epoch": 0.2585615004605208, "grad_norm": 0.28516578674316406, "learning_rate": 0.00019637271659483859, "loss": 11.673, "step": 12352 }, { "epoch": 0.258582433224483, "grad_norm": 0.23538292944431305, "learning_rate": 0.00019637213140790043, "loss": 11.6644, "step": 12353 }, { "epoch": 0.25860336598844513, "grad_norm": 0.24003416299819946, "learning_rate": 0.00019637154617463425, "loss": 11.6689, "step": 12354 }, { "epoch": 0.2586242987524073, "grad_norm": 0.2372545599937439, "learning_rate": 0.00019637096089504034, "loss": 11.6874, "step": 12355 }, { "epoch": 0.2586452315163694, "grad_norm": 0.23734360933303833, "learning_rate": 0.00019637037556911898, "loss": 11.6642, "step": 12356 }, { "epoch": 0.25866616428033157, "grad_norm": 0.2641700804233551, "learning_rate": 0.00019636979019687045, "loss": 11.6746, "step": 12357 }, { "epoch": 0.2586870970442937, "grad_norm": 0.2939087748527527, "learning_rate": 0.00019636920477829502, "loss": 11.6729, "step": 12358 }, { "epoch": 0.25870802980825586, "grad_norm": 0.2819800078868866, "learning_rate": 0.00019636861931339298, "loss": 11.6642, "step": 12359 }, { "epoch": 0.25872896257221806, "grad_norm": 0.2564164996147156, "learning_rate": 0.00019636803380216464, "loss": 11.6743, "step": 12360 }, { "epoch": 0.2587498953361802, "grad_norm": 0.3328121304512024, "learning_rate": 0.00019636744824461024, "loss": 11.6773, "step": 12361 }, { "epoch": 0.25877082810014235, "grad_norm": 0.27584293484687805, "learning_rate": 0.00019636686264073007, "loss": 11.6838, "step": 12362 }, { "epoch": 0.2587917608641045, "grad_norm": 0.2513900399208069, "learning_rate": 0.00019636627699052444, "loss": 11.6626, "step": 12363 }, { "epoch": 0.25881269362806664, "grad_norm": 0.33700481057167053, "learning_rate": 0.00019636569129399356, "loss": 11.673, "step": 12364 }, { "epoch": 0.2588336263920288, "grad_norm": 0.21774864196777344, "learning_rate": 0.0001963651055511378, "loss": 11.686, "step": 12365 }, { "epoch": 0.258854559155991, "grad_norm": 0.3176232576370239, "learning_rate": 0.0001963645197619574, "loss": 11.6484, "step": 12366 }, { "epoch": 0.2588754919199531, "grad_norm": 0.21350014209747314, "learning_rate": 0.00019636393392645266, "loss": 11.6659, "step": 12367 }, { "epoch": 0.25889642468391527, "grad_norm": 0.2888190746307373, "learning_rate": 0.00019636334804462382, "loss": 11.6606, "step": 12368 }, { "epoch": 0.2589173574478774, "grad_norm": 0.31827834248542786, "learning_rate": 0.00019636276211647117, "loss": 11.6881, "step": 12369 }, { "epoch": 0.25893829021183956, "grad_norm": 0.23330464959144592, "learning_rate": 0.00019636217614199506, "loss": 11.68, "step": 12370 }, { "epoch": 0.2589592229758017, "grad_norm": 0.22773310542106628, "learning_rate": 0.00019636159012119568, "loss": 11.6803, "step": 12371 }, { "epoch": 0.25898015573976385, "grad_norm": 0.22401992976665497, "learning_rate": 0.00019636100405407336, "loss": 11.6697, "step": 12372 }, { "epoch": 0.25900108850372605, "grad_norm": 0.2515951693058014, "learning_rate": 0.0001963604179406284, "loss": 11.6715, "step": 12373 }, { "epoch": 0.2590220212676882, "grad_norm": 0.4334583878517151, "learning_rate": 0.00019635983178086105, "loss": 11.6818, "step": 12374 }, { "epoch": 0.25904295403165034, "grad_norm": 0.2394721508026123, "learning_rate": 0.00019635924557477155, "loss": 11.6884, "step": 12375 }, { "epoch": 0.2590638867956125, "grad_norm": 0.23557806015014648, "learning_rate": 0.00019635865932236026, "loss": 11.6836, "step": 12376 }, { "epoch": 0.25908481955957463, "grad_norm": 0.26226022839546204, "learning_rate": 0.00019635807302362745, "loss": 11.6746, "step": 12377 }, { "epoch": 0.2591057523235368, "grad_norm": 0.26914796233177185, "learning_rate": 0.00019635748667857334, "loss": 11.6791, "step": 12378 }, { "epoch": 0.259126685087499, "grad_norm": 0.24441106617450714, "learning_rate": 0.0001963569002871983, "loss": 11.6827, "step": 12379 }, { "epoch": 0.2591476178514611, "grad_norm": 0.34853342175483704, "learning_rate": 0.00019635631384950255, "loss": 11.6729, "step": 12380 }, { "epoch": 0.25916855061542327, "grad_norm": 0.2937486171722412, "learning_rate": 0.0001963557273654864, "loss": 11.6772, "step": 12381 }, { "epoch": 0.2591894833793854, "grad_norm": 0.5643922090530396, "learning_rate": 0.0001963551408351501, "loss": 11.6849, "step": 12382 }, { "epoch": 0.25921041614334756, "grad_norm": 0.28727027773857117, "learning_rate": 0.00019635455425849396, "loss": 11.6751, "step": 12383 }, { "epoch": 0.2592313489073097, "grad_norm": 0.25652647018432617, "learning_rate": 0.00019635396763551826, "loss": 11.6632, "step": 12384 }, { "epoch": 0.2592522816712719, "grad_norm": 0.251812607049942, "learning_rate": 0.0001963533809662233, "loss": 11.6724, "step": 12385 }, { "epoch": 0.25927321443523405, "grad_norm": 0.2552877366542816, "learning_rate": 0.00019635279425060927, "loss": 11.6842, "step": 12386 }, { "epoch": 0.2592941471991962, "grad_norm": 0.23830640316009521, "learning_rate": 0.0001963522074886766, "loss": 11.6562, "step": 12387 }, { "epoch": 0.25931507996315833, "grad_norm": 0.23650754988193512, "learning_rate": 0.00019635162068042545, "loss": 11.6674, "step": 12388 }, { "epoch": 0.2593360127271205, "grad_norm": 0.3743787109851837, "learning_rate": 0.00019635103382585617, "loss": 11.6762, "step": 12389 }, { "epoch": 0.2593569454910826, "grad_norm": 0.33686956763267517, "learning_rate": 0.00019635044692496899, "loss": 11.679, "step": 12390 }, { "epoch": 0.25937787825504477, "grad_norm": 0.3156907260417938, "learning_rate": 0.0001963498599777642, "loss": 11.6954, "step": 12391 }, { "epoch": 0.25939881101900697, "grad_norm": 0.23888421058654785, "learning_rate": 0.00019634927298424213, "loss": 11.687, "step": 12392 }, { "epoch": 0.2594197437829691, "grad_norm": 0.27973058819770813, "learning_rate": 0.00019634868594440303, "loss": 11.6659, "step": 12393 }, { "epoch": 0.25944067654693126, "grad_norm": 0.23686687648296356, "learning_rate": 0.00019634809885824722, "loss": 11.6758, "step": 12394 }, { "epoch": 0.2594616093108934, "grad_norm": 0.2426145374774933, "learning_rate": 0.0001963475117257749, "loss": 11.6833, "step": 12395 }, { "epoch": 0.25948254207485555, "grad_norm": 0.28168773651123047, "learning_rate": 0.00019634692454698643, "loss": 11.6711, "step": 12396 }, { "epoch": 0.2595034748388177, "grad_norm": 0.24196778237819672, "learning_rate": 0.00019634633732188204, "loss": 11.6668, "step": 12397 }, { "epoch": 0.2595244076027799, "grad_norm": 0.32744088768959045, "learning_rate": 0.00019634575005046204, "loss": 11.6829, "step": 12398 }, { "epoch": 0.25954534036674204, "grad_norm": 0.2696175277233124, "learning_rate": 0.0001963451627327267, "loss": 11.6885, "step": 12399 }, { "epoch": 0.2595662731307042, "grad_norm": 0.31727513670921326, "learning_rate": 0.00019634457536867633, "loss": 11.6778, "step": 12400 }, { "epoch": 0.25958720589466633, "grad_norm": 0.2501017153263092, "learning_rate": 0.0001963439879583112, "loss": 11.6857, "step": 12401 }, { "epoch": 0.2596081386586285, "grad_norm": 0.2511175870895386, "learning_rate": 0.00019634340050163156, "loss": 11.6607, "step": 12402 }, { "epoch": 0.2596290714225906, "grad_norm": 0.2887336015701294, "learning_rate": 0.00019634281299863772, "loss": 11.6817, "step": 12403 }, { "epoch": 0.2596500041865528, "grad_norm": 0.27302441000938416, "learning_rate": 0.00019634222544932995, "loss": 11.6712, "step": 12404 }, { "epoch": 0.25967093695051496, "grad_norm": 0.2872581481933594, "learning_rate": 0.0001963416378537086, "loss": 11.6846, "step": 12405 }, { "epoch": 0.2596918697144771, "grad_norm": 0.30342862010002136, "learning_rate": 0.00019634105021177384, "loss": 11.6835, "step": 12406 }, { "epoch": 0.25971280247843925, "grad_norm": 0.2956918179988861, "learning_rate": 0.00019634046252352597, "loss": 11.6848, "step": 12407 }, { "epoch": 0.2597337352424014, "grad_norm": 0.2947099506855011, "learning_rate": 0.00019633987478896535, "loss": 11.6936, "step": 12408 }, { "epoch": 0.25975466800636354, "grad_norm": 0.23981384932994843, "learning_rate": 0.00019633928700809225, "loss": 11.6737, "step": 12409 }, { "epoch": 0.2597756007703257, "grad_norm": 0.29447105526924133, "learning_rate": 0.0001963386991809069, "loss": 11.6775, "step": 12410 }, { "epoch": 0.2597965335342879, "grad_norm": 0.2676754891872406, "learning_rate": 0.0001963381113074096, "loss": 11.6879, "step": 12411 }, { "epoch": 0.25981746629825003, "grad_norm": 0.26788946986198425, "learning_rate": 0.00019633752338760066, "loss": 11.6629, "step": 12412 }, { "epoch": 0.2598383990622122, "grad_norm": 0.26589205861091614, "learning_rate": 0.00019633693542148032, "loss": 11.6752, "step": 12413 }, { "epoch": 0.2598593318261743, "grad_norm": 0.31887394189834595, "learning_rate": 0.0001963363474090489, "loss": 11.6623, "step": 12414 }, { "epoch": 0.25988026459013647, "grad_norm": 0.4837748110294342, "learning_rate": 0.00019633575935030666, "loss": 11.6807, "step": 12415 }, { "epoch": 0.2599011973540986, "grad_norm": 0.3607413172721863, "learning_rate": 0.00019633517124525388, "loss": 11.672, "step": 12416 }, { "epoch": 0.2599221301180608, "grad_norm": 0.19660349190235138, "learning_rate": 0.00019633458309389087, "loss": 11.6703, "step": 12417 }, { "epoch": 0.25994306288202296, "grad_norm": 0.23763710260391235, "learning_rate": 0.00019633399489621788, "loss": 11.6614, "step": 12418 }, { "epoch": 0.2599639956459851, "grad_norm": 0.29465407133102417, "learning_rate": 0.00019633340665223522, "loss": 11.6647, "step": 12419 }, { "epoch": 0.25998492840994725, "grad_norm": 0.2433137744665146, "learning_rate": 0.00019633281836194314, "loss": 11.6782, "step": 12420 }, { "epoch": 0.2600058611739094, "grad_norm": 0.3229522705078125, "learning_rate": 0.00019633223002534197, "loss": 11.6793, "step": 12421 }, { "epoch": 0.26002679393787154, "grad_norm": 0.3164135217666626, "learning_rate": 0.00019633164164243195, "loss": 11.6758, "step": 12422 }, { "epoch": 0.26004772670183374, "grad_norm": 0.2809085547924042, "learning_rate": 0.00019633105321321338, "loss": 11.6756, "step": 12423 }, { "epoch": 0.2600686594657959, "grad_norm": 0.27395710349082947, "learning_rate": 0.00019633046473768655, "loss": 11.6745, "step": 12424 }, { "epoch": 0.260089592229758, "grad_norm": 0.24223855137825012, "learning_rate": 0.00019632987621585174, "loss": 11.6955, "step": 12425 }, { "epoch": 0.26011052499372017, "grad_norm": 0.22965918481349945, "learning_rate": 0.00019632928764770922, "loss": 11.6699, "step": 12426 }, { "epoch": 0.2601314577576823, "grad_norm": 0.2497628778219223, "learning_rate": 0.0001963286990332593, "loss": 11.6666, "step": 12427 }, { "epoch": 0.26015239052164446, "grad_norm": 0.3872498869895935, "learning_rate": 0.00019632811037250222, "loss": 11.6779, "step": 12428 }, { "epoch": 0.2601733232856066, "grad_norm": 0.22926104068756104, "learning_rate": 0.0001963275216654383, "loss": 11.6632, "step": 12429 }, { "epoch": 0.2601942560495688, "grad_norm": 0.2437467873096466, "learning_rate": 0.00019632693291206778, "loss": 11.6753, "step": 12430 }, { "epoch": 0.26021518881353095, "grad_norm": 0.26938703656196594, "learning_rate": 0.00019632634411239098, "loss": 11.6764, "step": 12431 }, { "epoch": 0.2602361215774931, "grad_norm": 0.24427799880504608, "learning_rate": 0.0001963257552664082, "loss": 11.7008, "step": 12432 }, { "epoch": 0.26025705434145524, "grad_norm": 0.31107252836227417, "learning_rate": 0.0001963251663741197, "loss": 11.6942, "step": 12433 }, { "epoch": 0.2602779871054174, "grad_norm": 0.30620983242988586, "learning_rate": 0.00019632457743552575, "loss": 11.696, "step": 12434 }, { "epoch": 0.26029891986937953, "grad_norm": 0.24139994382858276, "learning_rate": 0.00019632398845062667, "loss": 11.6615, "step": 12435 }, { "epoch": 0.26031985263334173, "grad_norm": 0.2767139971256256, "learning_rate": 0.00019632339941942268, "loss": 11.6772, "step": 12436 }, { "epoch": 0.2603407853973039, "grad_norm": 0.2853412926197052, "learning_rate": 0.00019632281034191414, "loss": 11.6824, "step": 12437 }, { "epoch": 0.260361718161266, "grad_norm": 0.2513192892074585, "learning_rate": 0.00019632222121810123, "loss": 11.6853, "step": 12438 }, { "epoch": 0.26038265092522817, "grad_norm": 0.23608386516571045, "learning_rate": 0.00019632163204798438, "loss": 11.666, "step": 12439 }, { "epoch": 0.2604035836891903, "grad_norm": 0.2580997049808502, "learning_rate": 0.00019632104283156374, "loss": 11.6501, "step": 12440 }, { "epoch": 0.26042451645315245, "grad_norm": 0.28073811531066895, "learning_rate": 0.00019632045356883965, "loss": 11.677, "step": 12441 }, { "epoch": 0.26044544921711466, "grad_norm": 0.2314738631248474, "learning_rate": 0.0001963198642598124, "loss": 11.6603, "step": 12442 }, { "epoch": 0.2604663819810768, "grad_norm": 0.3184232711791992, "learning_rate": 0.00019631927490448226, "loss": 11.6705, "step": 12443 }, { "epoch": 0.26048731474503894, "grad_norm": 0.2554934024810791, "learning_rate": 0.0001963186855028495, "loss": 11.6901, "step": 12444 }, { "epoch": 0.2605082475090011, "grad_norm": 0.33715730905532837, "learning_rate": 0.00019631809605491442, "loss": 11.6748, "step": 12445 }, { "epoch": 0.26052918027296323, "grad_norm": 0.253301739692688, "learning_rate": 0.0001963175065606773, "loss": 11.6893, "step": 12446 }, { "epoch": 0.2605501130369254, "grad_norm": 0.2742748558521271, "learning_rate": 0.00019631691702013844, "loss": 11.6812, "step": 12447 }, { "epoch": 0.2605710458008875, "grad_norm": 0.25232401490211487, "learning_rate": 0.00019631632743329812, "loss": 11.6808, "step": 12448 }, { "epoch": 0.2605919785648497, "grad_norm": 0.24683639407157898, "learning_rate": 0.0001963157378001566, "loss": 11.6824, "step": 12449 }, { "epoch": 0.26061291132881187, "grad_norm": 0.24729155004024506, "learning_rate": 0.00019631514812071415, "loss": 11.6655, "step": 12450 }, { "epoch": 0.260633844092774, "grad_norm": 0.2631022036075592, "learning_rate": 0.00019631455839497108, "loss": 11.6797, "step": 12451 }, { "epoch": 0.26065477685673616, "grad_norm": 0.2993323802947998, "learning_rate": 0.0001963139686229277, "loss": 11.6914, "step": 12452 }, { "epoch": 0.2606757096206983, "grad_norm": 0.278576523065567, "learning_rate": 0.00019631337880458426, "loss": 11.6748, "step": 12453 }, { "epoch": 0.26069664238466045, "grad_norm": 0.3329371511936188, "learning_rate": 0.00019631278893994102, "loss": 11.6686, "step": 12454 }, { "epoch": 0.26071757514862265, "grad_norm": 0.23689284920692444, "learning_rate": 0.0001963121990289983, "loss": 11.6739, "step": 12455 }, { "epoch": 0.2607385079125848, "grad_norm": 0.2760605812072754, "learning_rate": 0.00019631160907175642, "loss": 11.6918, "step": 12456 }, { "epoch": 0.26075944067654694, "grad_norm": 0.24307522177696228, "learning_rate": 0.00019631101906821558, "loss": 11.6874, "step": 12457 }, { "epoch": 0.2607803734405091, "grad_norm": 0.23033955693244934, "learning_rate": 0.0001963104290183761, "loss": 11.68, "step": 12458 }, { "epoch": 0.26080130620447123, "grad_norm": 0.23290322721004486, "learning_rate": 0.0001963098389222383, "loss": 11.657, "step": 12459 }, { "epoch": 0.2608222389684334, "grad_norm": 0.3432123363018036, "learning_rate": 0.0001963092487798024, "loss": 11.6814, "step": 12460 }, { "epoch": 0.2608431717323955, "grad_norm": 0.24519911408424377, "learning_rate": 0.00019630865859106872, "loss": 11.6783, "step": 12461 }, { "epoch": 0.2608641044963577, "grad_norm": 0.25418785214424133, "learning_rate": 0.0001963080683560375, "loss": 11.6739, "step": 12462 }, { "epoch": 0.26088503726031986, "grad_norm": 0.26744458079338074, "learning_rate": 0.00019630747807470914, "loss": 11.6597, "step": 12463 }, { "epoch": 0.260905970024282, "grad_norm": 0.2516375184059143, "learning_rate": 0.00019630688774708378, "loss": 11.6688, "step": 12464 }, { "epoch": 0.26092690278824415, "grad_norm": 0.27338463068008423, "learning_rate": 0.0001963062973731618, "loss": 11.6812, "step": 12465 }, { "epoch": 0.2609478355522063, "grad_norm": 0.29511335492134094, "learning_rate": 0.00019630570695294345, "loss": 11.6923, "step": 12466 }, { "epoch": 0.26096876831616844, "grad_norm": 0.31851130723953247, "learning_rate": 0.000196305116486429, "loss": 11.6704, "step": 12467 }, { "epoch": 0.26098970108013064, "grad_norm": 0.32141393423080444, "learning_rate": 0.00019630452597361878, "loss": 11.6801, "step": 12468 }, { "epoch": 0.2610106338440928, "grad_norm": 0.2963830232620239, "learning_rate": 0.00019630393541451304, "loss": 11.6855, "step": 12469 }, { "epoch": 0.26103156660805493, "grad_norm": 0.2667272686958313, "learning_rate": 0.00019630334480911206, "loss": 11.6821, "step": 12470 }, { "epoch": 0.2610524993720171, "grad_norm": 0.23474155366420746, "learning_rate": 0.0001963027541574161, "loss": 11.6826, "step": 12471 }, { "epoch": 0.2610734321359792, "grad_norm": 0.30062252283096313, "learning_rate": 0.00019630216345942553, "loss": 11.6651, "step": 12472 }, { "epoch": 0.26109436489994137, "grad_norm": 0.2771848738193512, "learning_rate": 0.00019630157271514055, "loss": 11.6834, "step": 12473 }, { "epoch": 0.26111529766390357, "grad_norm": 0.2738707661628723, "learning_rate": 0.0001963009819245615, "loss": 11.6646, "step": 12474 }, { "epoch": 0.2611362304278657, "grad_norm": 0.1987657994031906, "learning_rate": 0.0001963003910876886, "loss": 11.6655, "step": 12475 }, { "epoch": 0.26115716319182786, "grad_norm": 0.22682052850723267, "learning_rate": 0.00019629980020452218, "loss": 11.6588, "step": 12476 }, { "epoch": 0.26117809595579, "grad_norm": 0.2568027377128601, "learning_rate": 0.00019629920927506254, "loss": 11.6797, "step": 12477 }, { "epoch": 0.26119902871975215, "grad_norm": 0.2892286479473114, "learning_rate": 0.00019629861829930994, "loss": 11.6658, "step": 12478 }, { "epoch": 0.2612199614837143, "grad_norm": 0.22972223162651062, "learning_rate": 0.00019629802727726462, "loss": 11.6685, "step": 12479 }, { "epoch": 0.26124089424767644, "grad_norm": 0.26075056195259094, "learning_rate": 0.00019629743620892695, "loss": 11.6831, "step": 12480 }, { "epoch": 0.26126182701163864, "grad_norm": 0.22855499386787415, "learning_rate": 0.00019629684509429716, "loss": 11.701, "step": 12481 }, { "epoch": 0.2612827597756008, "grad_norm": 0.2804809808731079, "learning_rate": 0.00019629625393337557, "loss": 11.6632, "step": 12482 }, { "epoch": 0.2613036925395629, "grad_norm": 0.24761171638965607, "learning_rate": 0.0001962956627261624, "loss": 11.6775, "step": 12483 }, { "epoch": 0.26132462530352507, "grad_norm": 0.26280128955841064, "learning_rate": 0.000196295071472658, "loss": 11.6817, "step": 12484 }, { "epoch": 0.2613455580674872, "grad_norm": 0.28011247515678406, "learning_rate": 0.00019629448017286261, "loss": 11.667, "step": 12485 }, { "epoch": 0.26136649083144936, "grad_norm": 0.2700614333152771, "learning_rate": 0.00019629388882677656, "loss": 11.6711, "step": 12486 }, { "epoch": 0.26138742359541156, "grad_norm": 0.2591288685798645, "learning_rate": 0.0001962932974344001, "loss": 11.6755, "step": 12487 }, { "epoch": 0.2614083563593737, "grad_norm": 0.22442986071109772, "learning_rate": 0.0001962927059957335, "loss": 11.6809, "step": 12488 }, { "epoch": 0.26142928912333585, "grad_norm": 0.2765854001045227, "learning_rate": 0.0001962921145107771, "loss": 11.671, "step": 12489 }, { "epoch": 0.261450221887298, "grad_norm": 0.24199332296848297, "learning_rate": 0.00019629152297953112, "loss": 11.6697, "step": 12490 }, { "epoch": 0.26147115465126014, "grad_norm": 0.36005157232284546, "learning_rate": 0.0001962909314019959, "loss": 11.7007, "step": 12491 }, { "epoch": 0.2614920874152223, "grad_norm": 0.3704638183116913, "learning_rate": 0.0001962903397781717, "loss": 11.6772, "step": 12492 }, { "epoch": 0.2615130201791845, "grad_norm": 0.27034032344818115, "learning_rate": 0.00019628974810805878, "loss": 11.6769, "step": 12493 }, { "epoch": 0.26153395294314663, "grad_norm": 0.36799415946006775, "learning_rate": 0.00019628915639165745, "loss": 11.6691, "step": 12494 }, { "epoch": 0.2615548857071088, "grad_norm": 0.2663467526435852, "learning_rate": 0.000196288564628968, "loss": 11.6891, "step": 12495 }, { "epoch": 0.2615758184710709, "grad_norm": 0.23210923373699188, "learning_rate": 0.00019628797281999073, "loss": 11.6866, "step": 12496 }, { "epoch": 0.26159675123503306, "grad_norm": 0.27153512835502625, "learning_rate": 0.00019628738096472587, "loss": 11.6784, "step": 12497 }, { "epoch": 0.2616176839989952, "grad_norm": 0.26401790976524353, "learning_rate": 0.00019628678906317374, "loss": 11.6761, "step": 12498 }, { "epoch": 0.26163861676295735, "grad_norm": 0.24349690973758698, "learning_rate": 0.00019628619711533466, "loss": 11.6628, "step": 12499 }, { "epoch": 0.26165954952691955, "grad_norm": 0.26564544439315796, "learning_rate": 0.00019628560512120885, "loss": 11.6857, "step": 12500 }, { "epoch": 0.2616804822908817, "grad_norm": 0.23626483976840973, "learning_rate": 0.0001962850130807966, "loss": 11.6576, "step": 12501 }, { "epoch": 0.26170141505484384, "grad_norm": 0.23138007521629333, "learning_rate": 0.00019628442099409825, "loss": 11.6666, "step": 12502 }, { "epoch": 0.261722347818806, "grad_norm": 0.30998459458351135, "learning_rate": 0.00019628382886111403, "loss": 11.7043, "step": 12503 }, { "epoch": 0.26174328058276813, "grad_norm": 0.2726896405220032, "learning_rate": 0.00019628323668184424, "loss": 11.6895, "step": 12504 }, { "epoch": 0.2617642133467303, "grad_norm": 0.20768268406391144, "learning_rate": 0.00019628264445628916, "loss": 11.6648, "step": 12505 }, { "epoch": 0.2617851461106925, "grad_norm": 0.23289908468723297, "learning_rate": 0.0001962820521844491, "loss": 11.678, "step": 12506 }, { "epoch": 0.2618060788746546, "grad_norm": 0.3127429783344269, "learning_rate": 0.00019628145986632433, "loss": 11.6654, "step": 12507 }, { "epoch": 0.26182701163861677, "grad_norm": 0.5953047871589661, "learning_rate": 0.00019628086750191512, "loss": 11.704, "step": 12508 }, { "epoch": 0.2618479444025789, "grad_norm": 0.22985151410102844, "learning_rate": 0.00019628027509122179, "loss": 11.6782, "step": 12509 }, { "epoch": 0.26186887716654106, "grad_norm": 0.24088765680789948, "learning_rate": 0.00019627968263424456, "loss": 11.6691, "step": 12510 }, { "epoch": 0.2618898099305032, "grad_norm": 0.23700369894504547, "learning_rate": 0.0001962790901309838, "loss": 11.671, "step": 12511 }, { "epoch": 0.2619107426944654, "grad_norm": 0.34343570470809937, "learning_rate": 0.00019627849758143974, "loss": 11.6863, "step": 12512 }, { "epoch": 0.26193167545842755, "grad_norm": 0.2909896969795227, "learning_rate": 0.00019627790498561267, "loss": 11.6793, "step": 12513 }, { "epoch": 0.2619526082223897, "grad_norm": 0.2358667254447937, "learning_rate": 0.0001962773123435029, "loss": 11.6753, "step": 12514 }, { "epoch": 0.26197354098635184, "grad_norm": 0.276515930891037, "learning_rate": 0.00019627671965511068, "loss": 11.6924, "step": 12515 }, { "epoch": 0.261994473750314, "grad_norm": 0.24846182763576508, "learning_rate": 0.0001962761269204363, "loss": 11.6813, "step": 12516 }, { "epoch": 0.26201540651427613, "grad_norm": 0.2826305329799652, "learning_rate": 0.00019627553413948006, "loss": 11.6789, "step": 12517 }, { "epoch": 0.2620363392782383, "grad_norm": 0.2613489329814911, "learning_rate": 0.00019627494131224226, "loss": 11.6876, "step": 12518 }, { "epoch": 0.2620572720422005, "grad_norm": 0.29834893345832825, "learning_rate": 0.00019627434843872313, "loss": 11.6696, "step": 12519 }, { "epoch": 0.2620782048061626, "grad_norm": 0.23584754765033722, "learning_rate": 0.00019627375551892304, "loss": 11.6853, "step": 12520 }, { "epoch": 0.26209913757012476, "grad_norm": 0.24071741104125977, "learning_rate": 0.00019627316255284224, "loss": 11.6764, "step": 12521 }, { "epoch": 0.2621200703340869, "grad_norm": 0.24105378985404968, "learning_rate": 0.00019627256954048093, "loss": 11.6837, "step": 12522 }, { "epoch": 0.26214100309804905, "grad_norm": 0.2573118209838867, "learning_rate": 0.0001962719764818395, "loss": 11.6638, "step": 12523 }, { "epoch": 0.2621619358620112, "grad_norm": 0.24639801681041718, "learning_rate": 0.00019627138337691822, "loss": 11.6851, "step": 12524 }, { "epoch": 0.2621828686259734, "grad_norm": 0.3446851074695587, "learning_rate": 0.00019627079022571734, "loss": 11.6698, "step": 12525 }, { "epoch": 0.26220380138993554, "grad_norm": 0.33145415782928467, "learning_rate": 0.00019627019702823714, "loss": 11.6734, "step": 12526 }, { "epoch": 0.2622247341538977, "grad_norm": 0.249058336019516, "learning_rate": 0.00019626960378447795, "loss": 11.6639, "step": 12527 }, { "epoch": 0.26224566691785983, "grad_norm": 0.28300055861473083, "learning_rate": 0.00019626901049444003, "loss": 11.6878, "step": 12528 }, { "epoch": 0.262266599681822, "grad_norm": 0.2805570960044861, "learning_rate": 0.00019626841715812368, "loss": 11.6865, "step": 12529 }, { "epoch": 0.2622875324457841, "grad_norm": 0.21832118928432465, "learning_rate": 0.00019626782377552918, "loss": 11.6655, "step": 12530 }, { "epoch": 0.2623084652097463, "grad_norm": 0.21689866483211517, "learning_rate": 0.00019626723034665678, "loss": 11.6673, "step": 12531 }, { "epoch": 0.26232939797370847, "grad_norm": 0.2645220160484314, "learning_rate": 0.0001962666368715068, "loss": 11.6737, "step": 12532 }, { "epoch": 0.2623503307376706, "grad_norm": 0.22452329099178314, "learning_rate": 0.00019626604335007953, "loss": 11.6721, "step": 12533 }, { "epoch": 0.26237126350163276, "grad_norm": 0.2760728895664215, "learning_rate": 0.0001962654497823752, "loss": 11.6663, "step": 12534 }, { "epoch": 0.2623921962655949, "grad_norm": 0.33643609285354614, "learning_rate": 0.0001962648561683942, "loss": 11.6736, "step": 12535 }, { "epoch": 0.26241312902955705, "grad_norm": 0.24585038423538208, "learning_rate": 0.00019626426250813673, "loss": 11.6709, "step": 12536 }, { "epoch": 0.2624340617935192, "grad_norm": 0.29824718832969666, "learning_rate": 0.0001962636688016031, "loss": 11.6724, "step": 12537 }, { "epoch": 0.2624549945574814, "grad_norm": 0.23791426420211792, "learning_rate": 0.00019626307504879358, "loss": 11.6714, "step": 12538 }, { "epoch": 0.26247592732144354, "grad_norm": 0.25764647126197815, "learning_rate": 0.00019626248124970849, "loss": 11.6722, "step": 12539 }, { "epoch": 0.2624968600854057, "grad_norm": 0.33886390924453735, "learning_rate": 0.00019626188740434807, "loss": 11.6729, "step": 12540 }, { "epoch": 0.2625177928493678, "grad_norm": 0.23315998911857605, "learning_rate": 0.00019626129351271268, "loss": 11.6775, "step": 12541 }, { "epoch": 0.26253872561332997, "grad_norm": 0.2491483986377716, "learning_rate": 0.00019626069957480251, "loss": 11.6642, "step": 12542 }, { "epoch": 0.2625596583772921, "grad_norm": 0.28250837326049805, "learning_rate": 0.00019626010559061792, "loss": 11.6819, "step": 12543 }, { "epoch": 0.2625805911412543, "grad_norm": 0.2760452926158905, "learning_rate": 0.00019625951156015915, "loss": 11.6895, "step": 12544 }, { "epoch": 0.26260152390521646, "grad_norm": 0.2386472076177597, "learning_rate": 0.00019625891748342652, "loss": 11.6582, "step": 12545 }, { "epoch": 0.2626224566691786, "grad_norm": 0.2426125854253769, "learning_rate": 0.00019625832336042033, "loss": 11.6804, "step": 12546 }, { "epoch": 0.26264338943314075, "grad_norm": 0.32186853885650635, "learning_rate": 0.00019625772919114077, "loss": 11.6582, "step": 12547 }, { "epoch": 0.2626643221971029, "grad_norm": 0.28595495223999023, "learning_rate": 0.00019625713497558822, "loss": 11.6835, "step": 12548 }, { "epoch": 0.26268525496106504, "grad_norm": 0.3382599353790283, "learning_rate": 0.00019625654071376292, "loss": 11.7014, "step": 12549 }, { "epoch": 0.2627061877250272, "grad_norm": 0.3451959788799286, "learning_rate": 0.0001962559464056652, "loss": 11.6901, "step": 12550 }, { "epoch": 0.2627271204889894, "grad_norm": 0.2298276424407959, "learning_rate": 0.0001962553520512953, "loss": 11.6775, "step": 12551 }, { "epoch": 0.26274805325295153, "grad_norm": 0.21803459525108337, "learning_rate": 0.00019625475765065354, "loss": 11.6713, "step": 12552 }, { "epoch": 0.2627689860169137, "grad_norm": 0.2580867409706116, "learning_rate": 0.00019625416320374014, "loss": 11.671, "step": 12553 }, { "epoch": 0.2627899187808758, "grad_norm": 0.2117619514465332, "learning_rate": 0.0001962535687105555, "loss": 11.6604, "step": 12554 }, { "epoch": 0.26281085154483796, "grad_norm": 0.2572426199913025, "learning_rate": 0.0001962529741710998, "loss": 11.6772, "step": 12555 }, { "epoch": 0.2628317843088001, "grad_norm": 0.24097469449043274, "learning_rate": 0.00019625237958537336, "loss": 11.6701, "step": 12556 }, { "epoch": 0.2628527170727623, "grad_norm": 0.20892119407653809, "learning_rate": 0.00019625178495337654, "loss": 11.6548, "step": 12557 }, { "epoch": 0.26287364983672445, "grad_norm": 0.2436133474111557, "learning_rate": 0.00019625119027510949, "loss": 11.6864, "step": 12558 }, { "epoch": 0.2628945826006866, "grad_norm": 0.23127107322216034, "learning_rate": 0.0001962505955505726, "loss": 11.6689, "step": 12559 }, { "epoch": 0.26291551536464874, "grad_norm": 0.22901716828346252, "learning_rate": 0.00019625000077976606, "loss": 11.683, "step": 12560 }, { "epoch": 0.2629364481286109, "grad_norm": 0.3005974590778351, "learning_rate": 0.0001962494059626903, "loss": 11.6794, "step": 12561 }, { "epoch": 0.26295738089257303, "grad_norm": 0.2853096127510071, "learning_rate": 0.00019624881109934545, "loss": 11.6929, "step": 12562 }, { "epoch": 0.26297831365653523, "grad_norm": 0.3270168602466583, "learning_rate": 0.0001962482161897319, "loss": 11.699, "step": 12563 }, { "epoch": 0.2629992464204974, "grad_norm": 0.268652081489563, "learning_rate": 0.0001962476212338499, "loss": 11.6808, "step": 12564 }, { "epoch": 0.2630201791844595, "grad_norm": 0.26139870285987854, "learning_rate": 0.00019624702623169973, "loss": 11.6613, "step": 12565 }, { "epoch": 0.26304111194842167, "grad_norm": 0.2808021605014801, "learning_rate": 0.00019624643118328172, "loss": 11.6829, "step": 12566 }, { "epoch": 0.2630620447123838, "grad_norm": 0.33091917634010315, "learning_rate": 0.0001962458360885961, "loss": 11.6803, "step": 12567 }, { "epoch": 0.26308297747634596, "grad_norm": 0.3151460587978363, "learning_rate": 0.00019624524094764316, "loss": 11.6831, "step": 12568 }, { "epoch": 0.2631039102403081, "grad_norm": 0.2143566608428955, "learning_rate": 0.00019624464576042323, "loss": 11.6744, "step": 12569 }, { "epoch": 0.2631248430042703, "grad_norm": 0.21310558915138245, "learning_rate": 0.00019624405052693654, "loss": 11.6641, "step": 12570 }, { "epoch": 0.26314577576823245, "grad_norm": 0.3091428577899933, "learning_rate": 0.00019624345524718343, "loss": 11.6722, "step": 12571 }, { "epoch": 0.2631667085321946, "grad_norm": 0.2520226240158081, "learning_rate": 0.00019624285992116418, "loss": 11.6759, "step": 12572 }, { "epoch": 0.26318764129615674, "grad_norm": 0.23743754625320435, "learning_rate": 0.00019624226454887901, "loss": 11.6776, "step": 12573 }, { "epoch": 0.2632085740601189, "grad_norm": 0.29270410537719727, "learning_rate": 0.0001962416691303283, "loss": 11.6743, "step": 12574 }, { "epoch": 0.263229506824081, "grad_norm": 0.277680367231369, "learning_rate": 0.00019624107366551224, "loss": 11.6762, "step": 12575 }, { "epoch": 0.26325043958804323, "grad_norm": 0.29947251081466675, "learning_rate": 0.00019624047815443123, "loss": 11.6661, "step": 12576 }, { "epoch": 0.2632713723520054, "grad_norm": 0.22404521703720093, "learning_rate": 0.00019623988259708547, "loss": 11.6587, "step": 12577 }, { "epoch": 0.2632923051159675, "grad_norm": 0.22333115339279175, "learning_rate": 0.00019623928699347525, "loss": 11.6746, "step": 12578 }, { "epoch": 0.26331323787992966, "grad_norm": 0.21615463495254517, "learning_rate": 0.00019623869134360088, "loss": 11.6917, "step": 12579 }, { "epoch": 0.2633341706438918, "grad_norm": 0.2837461531162262, "learning_rate": 0.00019623809564746266, "loss": 11.678, "step": 12580 }, { "epoch": 0.26335510340785395, "grad_norm": 0.26598384976387024, "learning_rate": 0.00019623749990506085, "loss": 11.6937, "step": 12581 }, { "epoch": 0.26337603617181615, "grad_norm": 0.3004339039325714, "learning_rate": 0.00019623690411639576, "loss": 11.6792, "step": 12582 }, { "epoch": 0.2633969689357783, "grad_norm": 0.2953823506832123, "learning_rate": 0.00019623630828146765, "loss": 11.6735, "step": 12583 }, { "epoch": 0.26341790169974044, "grad_norm": 0.283947616815567, "learning_rate": 0.0001962357124002768, "loss": 11.6892, "step": 12584 }, { "epoch": 0.2634388344637026, "grad_norm": 0.2813152074813843, "learning_rate": 0.0001962351164728235, "loss": 11.676, "step": 12585 }, { "epoch": 0.26345976722766473, "grad_norm": 0.25283271074295044, "learning_rate": 0.0001962345204991081, "loss": 11.6773, "step": 12586 }, { "epoch": 0.2634806999916269, "grad_norm": 0.2806285321712494, "learning_rate": 0.00019623392447913084, "loss": 11.6754, "step": 12587 }, { "epoch": 0.263501632755589, "grad_norm": 0.22224989533424377, "learning_rate": 0.00019623332841289196, "loss": 11.6663, "step": 12588 }, { "epoch": 0.2635225655195512, "grad_norm": 0.22756211459636688, "learning_rate": 0.0001962327323003918, "loss": 11.6622, "step": 12589 }, { "epoch": 0.26354349828351337, "grad_norm": 0.27455511689186096, "learning_rate": 0.00019623213614163064, "loss": 11.6917, "step": 12590 }, { "epoch": 0.2635644310474755, "grad_norm": 0.2502821981906891, "learning_rate": 0.00019623153993660882, "loss": 11.6781, "step": 12591 }, { "epoch": 0.26358536381143766, "grad_norm": 0.24620814621448517, "learning_rate": 0.0001962309436853265, "loss": 11.6748, "step": 12592 }, { "epoch": 0.2636062965753998, "grad_norm": 0.23293542861938477, "learning_rate": 0.00019623034738778405, "loss": 11.6746, "step": 12593 }, { "epoch": 0.26362722933936195, "grad_norm": 0.303907573223114, "learning_rate": 0.00019622975104398175, "loss": 11.6652, "step": 12594 }, { "epoch": 0.26364816210332415, "grad_norm": 0.2342216968536377, "learning_rate": 0.0001962291546539199, "loss": 11.6648, "step": 12595 }, { "epoch": 0.2636690948672863, "grad_norm": 0.28591781854629517, "learning_rate": 0.00019622855821759875, "loss": 11.6628, "step": 12596 }, { "epoch": 0.26369002763124844, "grad_norm": 0.27616676688194275, "learning_rate": 0.0001962279617350186, "loss": 11.6859, "step": 12597 }, { "epoch": 0.2637109603952106, "grad_norm": 0.29279789328575134, "learning_rate": 0.00019622736520617974, "loss": 11.6869, "step": 12598 }, { "epoch": 0.2637318931591727, "grad_norm": 0.26844143867492676, "learning_rate": 0.00019622676863108248, "loss": 11.6813, "step": 12599 }, { "epoch": 0.26375282592313487, "grad_norm": 0.24676252901554108, "learning_rate": 0.00019622617200972707, "loss": 11.6758, "step": 12600 }, { "epoch": 0.26377375868709707, "grad_norm": 0.2714390754699707, "learning_rate": 0.00019622557534211378, "loss": 11.6729, "step": 12601 }, { "epoch": 0.2637946914510592, "grad_norm": 0.2921549081802368, "learning_rate": 0.00019622497862824297, "loss": 11.6844, "step": 12602 }, { "epoch": 0.26381562421502136, "grad_norm": 0.3143053352832794, "learning_rate": 0.00019622438186811487, "loss": 11.6816, "step": 12603 }, { "epoch": 0.2638365569789835, "grad_norm": 0.24801798164844513, "learning_rate": 0.00019622378506172977, "loss": 11.6661, "step": 12604 }, { "epoch": 0.26385748974294565, "grad_norm": 0.2782832682132721, "learning_rate": 0.00019622318820908797, "loss": 11.6771, "step": 12605 }, { "epoch": 0.2638784225069078, "grad_norm": 0.2720004916191101, "learning_rate": 0.0001962225913101898, "loss": 11.6731, "step": 12606 }, { "epoch": 0.26389935527086994, "grad_norm": 0.2587648928165436, "learning_rate": 0.00019622199436503544, "loss": 11.6663, "step": 12607 }, { "epoch": 0.26392028803483214, "grad_norm": 0.3329407870769501, "learning_rate": 0.00019622139737362525, "loss": 11.6943, "step": 12608 }, { "epoch": 0.2639412207987943, "grad_norm": 0.23206545412540436, "learning_rate": 0.00019622080033595952, "loss": 11.6726, "step": 12609 }, { "epoch": 0.26396215356275643, "grad_norm": 0.2958146333694458, "learning_rate": 0.0001962202032520385, "loss": 11.6715, "step": 12610 }, { "epoch": 0.2639830863267186, "grad_norm": 0.30196210741996765, "learning_rate": 0.00019621960612186255, "loss": 11.6902, "step": 12611 }, { "epoch": 0.2640040190906807, "grad_norm": 0.2642744481563568, "learning_rate": 0.00019621900894543188, "loss": 11.6784, "step": 12612 }, { "epoch": 0.26402495185464286, "grad_norm": 0.40514859557151794, "learning_rate": 0.0001962184117227468, "loss": 11.6937, "step": 12613 }, { "epoch": 0.26404588461860506, "grad_norm": 0.2860216796398163, "learning_rate": 0.00019621781445380761, "loss": 11.6717, "step": 12614 }, { "epoch": 0.2640668173825672, "grad_norm": 0.2831406891345978, "learning_rate": 0.0001962172171386146, "loss": 11.6921, "step": 12615 }, { "epoch": 0.26408775014652935, "grad_norm": 0.23952016234397888, "learning_rate": 0.00019621661977716802, "loss": 11.6653, "step": 12616 }, { "epoch": 0.2641086829104915, "grad_norm": 0.3686268925666809, "learning_rate": 0.00019621602236946818, "loss": 11.6895, "step": 12617 }, { "epoch": 0.26412961567445364, "grad_norm": 0.26518911123275757, "learning_rate": 0.0001962154249155154, "loss": 11.6779, "step": 12618 }, { "epoch": 0.2641505484384158, "grad_norm": 0.3256871700286865, "learning_rate": 0.0001962148274153099, "loss": 11.6972, "step": 12619 }, { "epoch": 0.264171481202378, "grad_norm": 0.32819655537605286, "learning_rate": 0.00019621422986885204, "loss": 11.6646, "step": 12620 }, { "epoch": 0.26419241396634013, "grad_norm": 0.3391203284263611, "learning_rate": 0.00019621363227614205, "loss": 11.6734, "step": 12621 }, { "epoch": 0.2642133467303023, "grad_norm": 0.29731157422065735, "learning_rate": 0.00019621303463718024, "loss": 11.6938, "step": 12622 }, { "epoch": 0.2642342794942644, "grad_norm": 0.2786785662174225, "learning_rate": 0.0001962124369519669, "loss": 11.685, "step": 12623 }, { "epoch": 0.26425521225822657, "grad_norm": 0.34124574065208435, "learning_rate": 0.0001962118392205023, "loss": 11.6646, "step": 12624 }, { "epoch": 0.2642761450221887, "grad_norm": 0.2719133198261261, "learning_rate": 0.00019621124144278675, "loss": 11.6793, "step": 12625 }, { "epoch": 0.26429707778615086, "grad_norm": 0.2802458107471466, "learning_rate": 0.00019621064361882056, "loss": 11.6719, "step": 12626 }, { "epoch": 0.26431801055011306, "grad_norm": 0.3647565543651581, "learning_rate": 0.00019621004574860393, "loss": 11.6874, "step": 12627 }, { "epoch": 0.2643389433140752, "grad_norm": 0.3540859520435333, "learning_rate": 0.00019620944783213724, "loss": 11.6826, "step": 12628 }, { "epoch": 0.26435987607803735, "grad_norm": 0.24859099090099335, "learning_rate": 0.00019620884986942073, "loss": 11.6884, "step": 12629 }, { "epoch": 0.2643808088419995, "grad_norm": 0.28282269835472107, "learning_rate": 0.0001962082518604547, "loss": 11.6778, "step": 12630 }, { "epoch": 0.26440174160596164, "grad_norm": 0.3111349940299988, "learning_rate": 0.00019620765380523944, "loss": 11.6719, "step": 12631 }, { "epoch": 0.2644226743699238, "grad_norm": 0.24921774864196777, "learning_rate": 0.00019620705570377522, "loss": 11.6728, "step": 12632 }, { "epoch": 0.264443607133886, "grad_norm": 0.28409039974212646, "learning_rate": 0.00019620645755606236, "loss": 11.6857, "step": 12633 }, { "epoch": 0.2644645398978481, "grad_norm": 0.23157300055027008, "learning_rate": 0.0001962058593621011, "loss": 11.6806, "step": 12634 }, { "epoch": 0.2644854726618103, "grad_norm": 0.2473846673965454, "learning_rate": 0.00019620526112189176, "loss": 11.6733, "step": 12635 }, { "epoch": 0.2645064054257724, "grad_norm": 0.258075088262558, "learning_rate": 0.00019620466283543463, "loss": 11.6811, "step": 12636 }, { "epoch": 0.26452733818973456, "grad_norm": 0.25747931003570557, "learning_rate": 0.00019620406450273001, "loss": 11.6881, "step": 12637 }, { "epoch": 0.2645482709536967, "grad_norm": 0.2989851236343384, "learning_rate": 0.00019620346612377816, "loss": 11.6813, "step": 12638 }, { "epoch": 0.2645692037176589, "grad_norm": 0.2570757567882538, "learning_rate": 0.00019620286769857933, "loss": 11.6895, "step": 12639 }, { "epoch": 0.26459013648162105, "grad_norm": 0.25102266669273376, "learning_rate": 0.00019620226922713392, "loss": 11.6772, "step": 12640 }, { "epoch": 0.2646110692455832, "grad_norm": 0.2947944700717926, "learning_rate": 0.0001962016707094421, "loss": 11.672, "step": 12641 }, { "epoch": 0.26463200200954534, "grad_norm": 0.23982807993888855, "learning_rate": 0.00019620107214550424, "loss": 11.6801, "step": 12642 }, { "epoch": 0.2646529347735075, "grad_norm": 0.2258777916431427, "learning_rate": 0.00019620047353532057, "loss": 11.6603, "step": 12643 }, { "epoch": 0.26467386753746963, "grad_norm": 0.29476308822631836, "learning_rate": 0.00019619987487889146, "loss": 11.6662, "step": 12644 }, { "epoch": 0.2646948003014318, "grad_norm": 0.31339892745018005, "learning_rate": 0.0001961992761762171, "loss": 11.6824, "step": 12645 }, { "epoch": 0.264715733065394, "grad_norm": 0.21565121412277222, "learning_rate": 0.00019619867742729782, "loss": 11.6645, "step": 12646 }, { "epoch": 0.2647366658293561, "grad_norm": 0.2666517198085785, "learning_rate": 0.00019619807863213392, "loss": 11.6683, "step": 12647 }, { "epoch": 0.26475759859331827, "grad_norm": 0.2569209635257721, "learning_rate": 0.00019619747979072566, "loss": 11.6801, "step": 12648 }, { "epoch": 0.2647785313572804, "grad_norm": 0.2613501250743866, "learning_rate": 0.00019619688090307335, "loss": 11.6659, "step": 12649 }, { "epoch": 0.26479946412124256, "grad_norm": 0.2845173180103302, "learning_rate": 0.00019619628196917727, "loss": 11.6958, "step": 12650 }, { "epoch": 0.2648203968852047, "grad_norm": 0.26255205273628235, "learning_rate": 0.00019619568298903772, "loss": 11.675, "step": 12651 }, { "epoch": 0.2648413296491669, "grad_norm": 0.25277671217918396, "learning_rate": 0.00019619508396265495, "loss": 11.6896, "step": 12652 }, { "epoch": 0.26486226241312905, "grad_norm": 0.2399752289056778, "learning_rate": 0.00019619448489002932, "loss": 11.6837, "step": 12653 }, { "epoch": 0.2648831951770912, "grad_norm": 0.3149886131286621, "learning_rate": 0.00019619388577116103, "loss": 11.6798, "step": 12654 }, { "epoch": 0.26490412794105334, "grad_norm": 0.2589791417121887, "learning_rate": 0.00019619328660605044, "loss": 11.6705, "step": 12655 }, { "epoch": 0.2649250607050155, "grad_norm": 0.36907076835632324, "learning_rate": 0.0001961926873946978, "loss": 11.6665, "step": 12656 }, { "epoch": 0.2649459934689776, "grad_norm": 0.22012153267860413, "learning_rate": 0.00019619208813710342, "loss": 11.6617, "step": 12657 }, { "epoch": 0.26496692623293977, "grad_norm": 0.3272063434123993, "learning_rate": 0.00019619148883326756, "loss": 11.6633, "step": 12658 }, { "epoch": 0.26498785899690197, "grad_norm": 1.269162893295288, "learning_rate": 0.00019619088948319053, "loss": 11.6463, "step": 12659 }, { "epoch": 0.2650087917608641, "grad_norm": 0.23810166120529175, "learning_rate": 0.00019619029008687263, "loss": 11.6783, "step": 12660 }, { "epoch": 0.26502972452482626, "grad_norm": 0.32883766293525696, "learning_rate": 0.0001961896906443141, "loss": 11.6758, "step": 12661 }, { "epoch": 0.2650506572887884, "grad_norm": 0.24708060920238495, "learning_rate": 0.00019618909115551525, "loss": 11.6911, "step": 12662 }, { "epoch": 0.26507159005275055, "grad_norm": 0.2566755712032318, "learning_rate": 0.0001961884916204764, "loss": 11.6615, "step": 12663 }, { "epoch": 0.2650925228167127, "grad_norm": 0.23211626708507538, "learning_rate": 0.00019618789203919778, "loss": 11.6751, "step": 12664 }, { "epoch": 0.2651134555806749, "grad_norm": 0.4045673608779907, "learning_rate": 0.00019618729241167973, "loss": 11.6659, "step": 12665 }, { "epoch": 0.26513438834463704, "grad_norm": 0.29290568828582764, "learning_rate": 0.00019618669273792255, "loss": 11.6753, "step": 12666 }, { "epoch": 0.2651553211085992, "grad_norm": 0.23636502027511597, "learning_rate": 0.0001961860930179265, "loss": 11.6919, "step": 12667 }, { "epoch": 0.26517625387256133, "grad_norm": 0.21781714260578156, "learning_rate": 0.00019618549325169183, "loss": 11.6643, "step": 12668 }, { "epoch": 0.2651971866365235, "grad_norm": 0.22122330963611603, "learning_rate": 0.0001961848934392189, "loss": 11.6619, "step": 12669 }, { "epoch": 0.2652181194004856, "grad_norm": 0.3233320713043213, "learning_rate": 0.00019618429358050793, "loss": 11.6786, "step": 12670 }, { "epoch": 0.2652390521644478, "grad_norm": 0.2708868086338043, "learning_rate": 0.00019618369367555927, "loss": 11.6778, "step": 12671 }, { "epoch": 0.26525998492840996, "grad_norm": 0.27253830432891846, "learning_rate": 0.00019618309372437315, "loss": 11.6752, "step": 12672 }, { "epoch": 0.2652809176923721, "grad_norm": 0.24792006611824036, "learning_rate": 0.00019618249372694994, "loss": 11.6863, "step": 12673 }, { "epoch": 0.26530185045633425, "grad_norm": 0.23246119916439056, "learning_rate": 0.00019618189368328983, "loss": 11.6715, "step": 12674 }, { "epoch": 0.2653227832202964, "grad_norm": 0.28744664788246155, "learning_rate": 0.0001961812935933932, "loss": 11.6714, "step": 12675 }, { "epoch": 0.26534371598425854, "grad_norm": 0.2584584653377533, "learning_rate": 0.00019618069345726024, "loss": 11.6801, "step": 12676 }, { "epoch": 0.2653646487482207, "grad_norm": 0.30350765585899353, "learning_rate": 0.00019618009327489132, "loss": 11.6874, "step": 12677 }, { "epoch": 0.2653855815121829, "grad_norm": 0.2946245074272156, "learning_rate": 0.0001961794930462867, "loss": 11.6697, "step": 12678 }, { "epoch": 0.26540651427614503, "grad_norm": 0.27889856696128845, "learning_rate": 0.0001961788927714467, "loss": 11.6691, "step": 12679 }, { "epoch": 0.2654274470401072, "grad_norm": 0.3063274919986725, "learning_rate": 0.00019617829245037154, "loss": 11.6865, "step": 12680 }, { "epoch": 0.2654483798040693, "grad_norm": 0.2823702096939087, "learning_rate": 0.00019617769208306156, "loss": 11.6585, "step": 12681 }, { "epoch": 0.26546931256803147, "grad_norm": 0.2774914503097534, "learning_rate": 0.00019617709166951705, "loss": 11.6842, "step": 12682 }, { "epoch": 0.2654902453319936, "grad_norm": 0.23115095496177673, "learning_rate": 0.00019617649120973828, "loss": 11.6626, "step": 12683 }, { "epoch": 0.2655111780959558, "grad_norm": 0.25785812735557556, "learning_rate": 0.0001961758907037255, "loss": 11.6771, "step": 12684 }, { "epoch": 0.26553211085991796, "grad_norm": 0.2987461984157562, "learning_rate": 0.00019617529015147908, "loss": 11.6749, "step": 12685 }, { "epoch": 0.2655530436238801, "grad_norm": 0.33682143688201904, "learning_rate": 0.00019617468955299928, "loss": 11.6673, "step": 12686 }, { "epoch": 0.26557397638784225, "grad_norm": 0.25117915868759155, "learning_rate": 0.00019617408890828638, "loss": 11.6909, "step": 12687 }, { "epoch": 0.2655949091518044, "grad_norm": 0.2117519974708557, "learning_rate": 0.00019617348821734064, "loss": 11.6718, "step": 12688 }, { "epoch": 0.26561584191576654, "grad_norm": 0.27432310581207275, "learning_rate": 0.00019617288748016238, "loss": 11.6719, "step": 12689 }, { "epoch": 0.26563677467972874, "grad_norm": 0.2377772033214569, "learning_rate": 0.0001961722866967519, "loss": 11.6635, "step": 12690 }, { "epoch": 0.2656577074436909, "grad_norm": 0.2961660325527191, "learning_rate": 0.00019617168586710947, "loss": 11.6863, "step": 12691 }, { "epoch": 0.265678640207653, "grad_norm": 0.2339957058429718, "learning_rate": 0.0001961710849912354, "loss": 11.6841, "step": 12692 }, { "epoch": 0.26569957297161517, "grad_norm": 0.24527110159397125, "learning_rate": 0.00019617048406912995, "loss": 11.6802, "step": 12693 }, { "epoch": 0.2657205057355773, "grad_norm": 0.30416375398635864, "learning_rate": 0.0001961698831007934, "loss": 11.6805, "step": 12694 }, { "epoch": 0.26574143849953946, "grad_norm": 0.25349655747413635, "learning_rate": 0.00019616928208622607, "loss": 11.6611, "step": 12695 }, { "epoch": 0.2657623712635016, "grad_norm": 0.25274497270584106, "learning_rate": 0.00019616868102542825, "loss": 11.6744, "step": 12696 }, { "epoch": 0.2657833040274638, "grad_norm": 0.2637423574924469, "learning_rate": 0.0001961680799184002, "loss": 11.6688, "step": 12697 }, { "epoch": 0.26580423679142595, "grad_norm": 0.24620427191257477, "learning_rate": 0.00019616747876514224, "loss": 11.6825, "step": 12698 }, { "epoch": 0.2658251695553881, "grad_norm": 0.3196682333946228, "learning_rate": 0.00019616687756565467, "loss": 11.6691, "step": 12699 }, { "epoch": 0.26584610231935024, "grad_norm": 0.24531607329845428, "learning_rate": 0.00019616627631993772, "loss": 11.6831, "step": 12700 }, { "epoch": 0.2658670350833124, "grad_norm": 0.32599446177482605, "learning_rate": 0.00019616567502799174, "loss": 11.6751, "step": 12701 }, { "epoch": 0.26588796784727453, "grad_norm": 0.2616899311542511, "learning_rate": 0.000196165073689817, "loss": 11.6762, "step": 12702 }, { "epoch": 0.26590890061123673, "grad_norm": 0.22589707374572754, "learning_rate": 0.00019616447230541372, "loss": 11.6677, "step": 12703 }, { "epoch": 0.2659298333751989, "grad_norm": 0.23322944343090057, "learning_rate": 0.0001961638708747823, "loss": 11.6837, "step": 12704 }, { "epoch": 0.265950766139161, "grad_norm": 0.28745555877685547, "learning_rate": 0.00019616326939792295, "loss": 11.6971, "step": 12705 }, { "epoch": 0.26597169890312317, "grad_norm": 0.28673025965690613, "learning_rate": 0.00019616266787483603, "loss": 11.679, "step": 12706 }, { "epoch": 0.2659926316670853, "grad_norm": 0.26566553115844727, "learning_rate": 0.00019616206630552175, "loss": 11.6685, "step": 12707 }, { "epoch": 0.26601356443104746, "grad_norm": 0.4054200351238251, "learning_rate": 0.00019616146468998049, "loss": 11.6859, "step": 12708 }, { "epoch": 0.26603449719500966, "grad_norm": 0.30545344948768616, "learning_rate": 0.00019616086302821244, "loss": 11.6905, "step": 12709 }, { "epoch": 0.2660554299589718, "grad_norm": 0.21898089349269867, "learning_rate": 0.00019616026132021795, "loss": 11.6811, "step": 12710 }, { "epoch": 0.26607636272293395, "grad_norm": 0.20095907151699066, "learning_rate": 0.00019615965956599728, "loss": 11.6751, "step": 12711 }, { "epoch": 0.2660972954868961, "grad_norm": 0.2503364384174347, "learning_rate": 0.00019615905776555076, "loss": 11.6882, "step": 12712 }, { "epoch": 0.26611822825085824, "grad_norm": 0.28871768712997437, "learning_rate": 0.00019615845591887862, "loss": 11.6659, "step": 12713 }, { "epoch": 0.2661391610148204, "grad_norm": 0.2738136053085327, "learning_rate": 0.00019615785402598123, "loss": 11.6785, "step": 12714 }, { "epoch": 0.2661600937787825, "grad_norm": 0.23929697275161743, "learning_rate": 0.0001961572520868588, "loss": 11.6794, "step": 12715 }, { "epoch": 0.2661810265427447, "grad_norm": 0.24567969143390656, "learning_rate": 0.00019615665010151164, "loss": 11.6771, "step": 12716 }, { "epoch": 0.26620195930670687, "grad_norm": 0.2094140648841858, "learning_rate": 0.0001961560480699401, "loss": 11.6883, "step": 12717 }, { "epoch": 0.266222892070669, "grad_norm": 0.2361280620098114, "learning_rate": 0.0001961554459921444, "loss": 11.6764, "step": 12718 }, { "epoch": 0.26624382483463116, "grad_norm": 0.28361383080482483, "learning_rate": 0.00019615484386812483, "loss": 11.6801, "step": 12719 }, { "epoch": 0.2662647575985933, "grad_norm": 0.215672105550766, "learning_rate": 0.0001961542416978817, "loss": 11.6742, "step": 12720 }, { "epoch": 0.26628569036255545, "grad_norm": 0.21860232949256897, "learning_rate": 0.00019615363948141532, "loss": 11.6684, "step": 12721 }, { "epoch": 0.26630662312651765, "grad_norm": 0.23762744665145874, "learning_rate": 0.00019615303721872595, "loss": 11.6661, "step": 12722 }, { "epoch": 0.2663275558904798, "grad_norm": 0.27622488141059875, "learning_rate": 0.0001961524349098139, "loss": 11.6767, "step": 12723 }, { "epoch": 0.26634848865444194, "grad_norm": 0.20293764770030975, "learning_rate": 0.00019615183255467943, "loss": 11.6815, "step": 12724 }, { "epoch": 0.2663694214184041, "grad_norm": 0.2787885367870331, "learning_rate": 0.00019615123015332287, "loss": 11.6715, "step": 12725 }, { "epoch": 0.26639035418236623, "grad_norm": 0.24163204431533813, "learning_rate": 0.00019615062770574445, "loss": 11.6848, "step": 12726 }, { "epoch": 0.2664112869463284, "grad_norm": 0.3439342975616455, "learning_rate": 0.0001961500252119445, "loss": 11.6745, "step": 12727 }, { "epoch": 0.2664322197102906, "grad_norm": 0.2826414406299591, "learning_rate": 0.00019614942267192332, "loss": 11.6938, "step": 12728 }, { "epoch": 0.2664531524742527, "grad_norm": 0.34413275122642517, "learning_rate": 0.00019614882008568121, "loss": 11.6883, "step": 12729 }, { "epoch": 0.26647408523821486, "grad_norm": 0.25428879261016846, "learning_rate": 0.0001961482174532184, "loss": 11.6642, "step": 12730 }, { "epoch": 0.266495018002177, "grad_norm": 0.2982998490333557, "learning_rate": 0.00019614761477453524, "loss": 11.6873, "step": 12731 }, { "epoch": 0.26651595076613915, "grad_norm": 0.2256050705909729, "learning_rate": 0.00019614701204963197, "loss": 11.6661, "step": 12732 }, { "epoch": 0.2665368835301013, "grad_norm": 0.2511313855648041, "learning_rate": 0.00019614640927850894, "loss": 11.6767, "step": 12733 }, { "epoch": 0.26655781629406344, "grad_norm": 0.22635629773139954, "learning_rate": 0.00019614580646116635, "loss": 11.6659, "step": 12734 }, { "epoch": 0.26657874905802564, "grad_norm": 0.22426408529281616, "learning_rate": 0.00019614520359760457, "loss": 11.6603, "step": 12735 }, { "epoch": 0.2665996818219878, "grad_norm": 0.30226758122444153, "learning_rate": 0.0001961446006878239, "loss": 11.6885, "step": 12736 }, { "epoch": 0.26662061458594993, "grad_norm": 0.2140752673149109, "learning_rate": 0.00019614399773182454, "loss": 11.6743, "step": 12737 }, { "epoch": 0.2666415473499121, "grad_norm": 0.3065999448299408, "learning_rate": 0.00019614339472960688, "loss": 11.6964, "step": 12738 }, { "epoch": 0.2666624801138742, "grad_norm": 0.34336167573928833, "learning_rate": 0.00019614279168117115, "loss": 11.6713, "step": 12739 }, { "epoch": 0.26668341287783637, "grad_norm": 0.30642813444137573, "learning_rate": 0.0001961421885865176, "loss": 11.6692, "step": 12740 }, { "epoch": 0.26670434564179857, "grad_norm": 0.2591429352760315, "learning_rate": 0.00019614158544564662, "loss": 11.6629, "step": 12741 }, { "epoch": 0.2667252784057607, "grad_norm": 0.2672561705112457, "learning_rate": 0.00019614098225855848, "loss": 11.6782, "step": 12742 }, { "epoch": 0.26674621116972286, "grad_norm": 0.2597532570362091, "learning_rate": 0.0001961403790252534, "loss": 11.6844, "step": 12743 }, { "epoch": 0.266767143933685, "grad_norm": 0.29425087571144104, "learning_rate": 0.00019613977574573172, "loss": 11.7056, "step": 12744 }, { "epoch": 0.26678807669764715, "grad_norm": 0.3018816411495209, "learning_rate": 0.00019613917241999374, "loss": 11.681, "step": 12745 }, { "epoch": 0.2668090094616093, "grad_norm": 0.26109278202056885, "learning_rate": 0.00019613856904803973, "loss": 11.6717, "step": 12746 }, { "epoch": 0.26682994222557144, "grad_norm": 0.2728938162326813, "learning_rate": 0.00019613796562986997, "loss": 11.6718, "step": 12747 }, { "epoch": 0.26685087498953364, "grad_norm": 0.3036970794200897, "learning_rate": 0.00019613736216548479, "loss": 11.6694, "step": 12748 }, { "epoch": 0.2668718077534958, "grad_norm": 0.24506430327892303, "learning_rate": 0.00019613675865488438, "loss": 11.6769, "step": 12749 }, { "epoch": 0.2668927405174579, "grad_norm": 0.27233046293258667, "learning_rate": 0.0001961361550980692, "loss": 11.6776, "step": 12750 }, { "epoch": 0.26691367328142007, "grad_norm": 0.2780713140964508, "learning_rate": 0.00019613555149503937, "loss": 11.6723, "step": 12751 }, { "epoch": 0.2669346060453822, "grad_norm": 0.2740823030471802, "learning_rate": 0.0001961349478457953, "loss": 11.6843, "step": 12752 }, { "epoch": 0.26695553880934436, "grad_norm": 0.35542652010917664, "learning_rate": 0.0001961343441503372, "loss": 11.6889, "step": 12753 }, { "epoch": 0.26697647157330656, "grad_norm": 0.26866406202316284, "learning_rate": 0.00019613374040866542, "loss": 11.6553, "step": 12754 }, { "epoch": 0.2669974043372687, "grad_norm": 0.26179179549217224, "learning_rate": 0.0001961331366207802, "loss": 11.6911, "step": 12755 }, { "epoch": 0.26701833710123085, "grad_norm": 0.20860211551189423, "learning_rate": 0.0001961325327866819, "loss": 11.6649, "step": 12756 }, { "epoch": 0.267039269865193, "grad_norm": 0.2528109848499298, "learning_rate": 0.00019613192890637074, "loss": 11.6622, "step": 12757 }, { "epoch": 0.26706020262915514, "grad_norm": 0.2590804100036621, "learning_rate": 0.000196131324979847, "loss": 11.662, "step": 12758 }, { "epoch": 0.2670811353931173, "grad_norm": 0.28211522102355957, "learning_rate": 0.00019613072100711104, "loss": 11.6785, "step": 12759 }, { "epoch": 0.2671020681570795, "grad_norm": 0.27276936173439026, "learning_rate": 0.00019613011698816312, "loss": 11.6709, "step": 12760 }, { "epoch": 0.26712300092104163, "grad_norm": 0.23231162130832672, "learning_rate": 0.00019612951292300351, "loss": 11.6706, "step": 12761 }, { "epoch": 0.2671439336850038, "grad_norm": 0.2847810387611389, "learning_rate": 0.00019612890881163253, "loss": 11.6712, "step": 12762 }, { "epoch": 0.2671648664489659, "grad_norm": 0.21495631337165833, "learning_rate": 0.00019612830465405044, "loss": 11.673, "step": 12763 }, { "epoch": 0.26718579921292807, "grad_norm": 0.25416281819343567, "learning_rate": 0.00019612770045025757, "loss": 11.6748, "step": 12764 }, { "epoch": 0.2672067319768902, "grad_norm": 0.24270206689834595, "learning_rate": 0.00019612709620025418, "loss": 11.6683, "step": 12765 }, { "epoch": 0.26722766474085236, "grad_norm": 0.27909746766090393, "learning_rate": 0.00019612649190404056, "loss": 11.669, "step": 12766 }, { "epoch": 0.26724859750481456, "grad_norm": 0.3223024606704712, "learning_rate": 0.00019612588756161704, "loss": 11.6871, "step": 12767 }, { "epoch": 0.2672695302687767, "grad_norm": 0.25299862027168274, "learning_rate": 0.00019612528317298385, "loss": 11.6778, "step": 12768 }, { "epoch": 0.26729046303273885, "grad_norm": 0.2679192125797272, "learning_rate": 0.00019612467873814131, "loss": 11.6724, "step": 12769 }, { "epoch": 0.267311395796701, "grad_norm": 0.29223906993865967, "learning_rate": 0.0001961240742570897, "loss": 11.6796, "step": 12770 }, { "epoch": 0.26733232856066313, "grad_norm": 0.4104803204536438, "learning_rate": 0.00019612346972982937, "loss": 11.6536, "step": 12771 }, { "epoch": 0.2673532613246253, "grad_norm": 0.2732057571411133, "learning_rate": 0.00019612286515636052, "loss": 11.6937, "step": 12772 }, { "epoch": 0.2673741940885875, "grad_norm": 0.26619526743888855, "learning_rate": 0.00019612226053668347, "loss": 11.6733, "step": 12773 }, { "epoch": 0.2673951268525496, "grad_norm": 0.2742380201816559, "learning_rate": 0.00019612165587079856, "loss": 11.684, "step": 12774 }, { "epoch": 0.26741605961651177, "grad_norm": 0.8620203733444214, "learning_rate": 0.00019612105115870602, "loss": 11.6076, "step": 12775 }, { "epoch": 0.2674369923804739, "grad_norm": 0.29503515362739563, "learning_rate": 0.00019612044640040615, "loss": 11.6814, "step": 12776 }, { "epoch": 0.26745792514443606, "grad_norm": 0.30595120787620544, "learning_rate": 0.00019611984159589928, "loss": 11.6708, "step": 12777 }, { "epoch": 0.2674788579083982, "grad_norm": 0.2775551676750183, "learning_rate": 0.00019611923674518568, "loss": 11.673, "step": 12778 }, { "epoch": 0.2674997906723604, "grad_norm": 0.32200729846954346, "learning_rate": 0.00019611863184826564, "loss": 11.687, "step": 12779 }, { "epoch": 0.26752072343632255, "grad_norm": 0.3498629033565521, "learning_rate": 0.00019611802690513942, "loss": 11.6805, "step": 12780 }, { "epoch": 0.2675416562002847, "grad_norm": 0.2689557373523712, "learning_rate": 0.00019611742191580738, "loss": 11.6756, "step": 12781 }, { "epoch": 0.26756258896424684, "grad_norm": 0.2856675088405609, "learning_rate": 0.00019611681688026973, "loss": 11.6975, "step": 12782 }, { "epoch": 0.267583521728209, "grad_norm": 0.2670077383518219, "learning_rate": 0.00019611621179852683, "loss": 11.6536, "step": 12783 }, { "epoch": 0.26760445449217113, "grad_norm": 0.25769880414009094, "learning_rate": 0.00019611560667057891, "loss": 11.6758, "step": 12784 }, { "epoch": 0.2676253872561333, "grad_norm": 0.30725786089897156, "learning_rate": 0.0001961150014964263, "loss": 11.6702, "step": 12785 }, { "epoch": 0.2676463200200955, "grad_norm": 0.2785726487636566, "learning_rate": 0.00019611439627606928, "loss": 11.6801, "step": 12786 }, { "epoch": 0.2676672527840576, "grad_norm": 0.20205329358577728, "learning_rate": 0.00019611379100950815, "loss": 11.677, "step": 12787 }, { "epoch": 0.26768818554801976, "grad_norm": 0.2619381248950958, "learning_rate": 0.00019611318569674323, "loss": 11.7078, "step": 12788 }, { "epoch": 0.2677091183119819, "grad_norm": 0.2547261416912079, "learning_rate": 0.0001961125803377747, "loss": 11.6867, "step": 12789 }, { "epoch": 0.26773005107594405, "grad_norm": 0.3747612535953522, "learning_rate": 0.00019611197493260297, "loss": 11.6915, "step": 12790 }, { "epoch": 0.2677509838399062, "grad_norm": 0.22972966730594635, "learning_rate": 0.0001961113694812283, "loss": 11.6664, "step": 12791 }, { "epoch": 0.2677719166038684, "grad_norm": 0.27492499351501465, "learning_rate": 0.00019611076398365096, "loss": 11.6655, "step": 12792 }, { "epoch": 0.26779284936783054, "grad_norm": 0.2215888351202011, "learning_rate": 0.00019611015843987124, "loss": 11.6804, "step": 12793 }, { "epoch": 0.2678137821317927, "grad_norm": 0.21765932440757751, "learning_rate": 0.00019610955284988946, "loss": 11.6754, "step": 12794 }, { "epoch": 0.26783471489575483, "grad_norm": 0.2616734504699707, "learning_rate": 0.00019610894721370587, "loss": 11.6813, "step": 12795 }, { "epoch": 0.267855647659717, "grad_norm": 0.33309459686279297, "learning_rate": 0.0001961083415313208, "loss": 11.6819, "step": 12796 }, { "epoch": 0.2678765804236791, "grad_norm": 0.28864172101020813, "learning_rate": 0.00019610773580273452, "loss": 11.6877, "step": 12797 }, { "epoch": 0.2678975131876413, "grad_norm": 0.32650506496429443, "learning_rate": 0.00019610713002794728, "loss": 11.6774, "step": 12798 }, { "epoch": 0.26791844595160347, "grad_norm": 0.2514379620552063, "learning_rate": 0.00019610652420695948, "loss": 11.6787, "step": 12799 }, { "epoch": 0.2679393787155656, "grad_norm": 0.21076509356498718, "learning_rate": 0.00019610591833977133, "loss": 11.6964, "step": 12800 }, { "epoch": 0.26796031147952776, "grad_norm": 0.2268127053976059, "learning_rate": 0.00019610531242638315, "loss": 11.6853, "step": 12801 }, { "epoch": 0.2679812442434899, "grad_norm": 0.27392101287841797, "learning_rate": 0.00019610470646679517, "loss": 11.6736, "step": 12802 }, { "epoch": 0.26800217700745205, "grad_norm": 0.2769998013973236, "learning_rate": 0.0001961041004610078, "loss": 11.6658, "step": 12803 }, { "epoch": 0.2680231097714142, "grad_norm": 0.2583613991737366, "learning_rate": 0.0001961034944090212, "loss": 11.6776, "step": 12804 }, { "epoch": 0.2680440425353764, "grad_norm": 0.24324171245098114, "learning_rate": 0.00019610288831083575, "loss": 11.6695, "step": 12805 }, { "epoch": 0.26806497529933854, "grad_norm": 0.3395000100135803, "learning_rate": 0.00019610228216645172, "loss": 11.6901, "step": 12806 }, { "epoch": 0.2680859080633007, "grad_norm": 0.27506354451179504, "learning_rate": 0.00019610167597586942, "loss": 11.6833, "step": 12807 }, { "epoch": 0.2681068408272628, "grad_norm": 0.2468017339706421, "learning_rate": 0.00019610106973908908, "loss": 11.6921, "step": 12808 }, { "epoch": 0.26812777359122497, "grad_norm": 0.2623906135559082, "learning_rate": 0.00019610046345611105, "loss": 11.6646, "step": 12809 }, { "epoch": 0.2681487063551871, "grad_norm": 0.23364321887493134, "learning_rate": 0.0001960998571269356, "loss": 11.6674, "step": 12810 }, { "epoch": 0.2681696391191493, "grad_norm": 0.2136724442243576, "learning_rate": 0.00019609925075156302, "loss": 11.6655, "step": 12811 }, { "epoch": 0.26819057188311146, "grad_norm": 0.2118529975414276, "learning_rate": 0.0001960986443299936, "loss": 11.6565, "step": 12812 }, { "epoch": 0.2682115046470736, "grad_norm": 0.2809067964553833, "learning_rate": 0.0001960980378622276, "loss": 11.6631, "step": 12813 }, { "epoch": 0.26823243741103575, "grad_norm": 0.25548750162124634, "learning_rate": 0.0001960974313482654, "loss": 11.6679, "step": 12814 }, { "epoch": 0.2682533701749979, "grad_norm": 0.29534438252449036, "learning_rate": 0.00019609682478810724, "loss": 11.6811, "step": 12815 }, { "epoch": 0.26827430293896004, "grad_norm": 0.30567291378974915, "learning_rate": 0.00019609621818175339, "loss": 11.6951, "step": 12816 }, { "epoch": 0.26829523570292224, "grad_norm": 0.324258416891098, "learning_rate": 0.00019609561152920417, "loss": 11.6617, "step": 12817 }, { "epoch": 0.2683161684668844, "grad_norm": 0.37316077947616577, "learning_rate": 0.00019609500483045987, "loss": 11.6864, "step": 12818 }, { "epoch": 0.26833710123084653, "grad_norm": 0.26550155878067017, "learning_rate": 0.00019609439808552077, "loss": 11.6747, "step": 12819 }, { "epoch": 0.2683580339948087, "grad_norm": 0.24034956097602844, "learning_rate": 0.00019609379129438716, "loss": 11.6807, "step": 12820 }, { "epoch": 0.2683789667587708, "grad_norm": 0.2196161150932312, "learning_rate": 0.00019609318445705932, "loss": 11.673, "step": 12821 }, { "epoch": 0.26839989952273297, "grad_norm": 0.3045961260795593, "learning_rate": 0.00019609257757353761, "loss": 11.68, "step": 12822 }, { "epoch": 0.2684208322866951, "grad_norm": 0.2814810574054718, "learning_rate": 0.00019609197064382223, "loss": 11.6768, "step": 12823 }, { "epoch": 0.2684417650506573, "grad_norm": 0.2810179591178894, "learning_rate": 0.00019609136366791354, "loss": 11.6664, "step": 12824 }, { "epoch": 0.26846269781461946, "grad_norm": 0.3515355587005615, "learning_rate": 0.0001960907566458118, "loss": 11.6708, "step": 12825 }, { "epoch": 0.2684836305785816, "grad_norm": 0.27338114380836487, "learning_rate": 0.00019609014957751728, "loss": 11.6659, "step": 12826 }, { "epoch": 0.26850456334254375, "grad_norm": 0.33102044463157654, "learning_rate": 0.00019608954246303032, "loss": 11.679, "step": 12827 }, { "epoch": 0.2685254961065059, "grad_norm": 0.2960251271724701, "learning_rate": 0.0001960889353023512, "loss": 11.6753, "step": 12828 }, { "epoch": 0.26854642887046803, "grad_norm": 0.32167166471481323, "learning_rate": 0.00019608832809548022, "loss": 11.6759, "step": 12829 }, { "epoch": 0.26856736163443024, "grad_norm": 0.38134950399398804, "learning_rate": 0.0001960877208424176, "loss": 11.6674, "step": 12830 }, { "epoch": 0.2685882943983924, "grad_norm": 0.26417285203933716, "learning_rate": 0.0001960871135431637, "loss": 11.6934, "step": 12831 }, { "epoch": 0.2686092271623545, "grad_norm": 0.266543984413147, "learning_rate": 0.00019608650619771885, "loss": 11.667, "step": 12832 }, { "epoch": 0.26863015992631667, "grad_norm": 0.2528383135795593, "learning_rate": 0.00019608589880608325, "loss": 11.6661, "step": 12833 }, { "epoch": 0.2686510926902788, "grad_norm": 0.2893964350223541, "learning_rate": 0.00019608529136825724, "loss": 11.6808, "step": 12834 }, { "epoch": 0.26867202545424096, "grad_norm": 0.30387842655181885, "learning_rate": 0.0001960846838842411, "loss": 11.6866, "step": 12835 }, { "epoch": 0.2686929582182031, "grad_norm": 0.24648016691207886, "learning_rate": 0.00019608407635403514, "loss": 11.6726, "step": 12836 }, { "epoch": 0.2687138909821653, "grad_norm": 0.28312766551971436, "learning_rate": 0.00019608346877763964, "loss": 11.6846, "step": 12837 }, { "epoch": 0.26873482374612745, "grad_norm": 0.2756843864917755, "learning_rate": 0.00019608286115505488, "loss": 11.6506, "step": 12838 }, { "epoch": 0.2687557565100896, "grad_norm": 0.22714684903621674, "learning_rate": 0.00019608225348628116, "loss": 11.668, "step": 12839 }, { "epoch": 0.26877668927405174, "grad_norm": 0.3055116534233093, "learning_rate": 0.00019608164577131877, "loss": 11.6861, "step": 12840 }, { "epoch": 0.2687976220380139, "grad_norm": 0.2572658360004425, "learning_rate": 0.00019608103801016803, "loss": 11.6756, "step": 12841 }, { "epoch": 0.26881855480197603, "grad_norm": 0.2896478474140167, "learning_rate": 0.00019608043020282918, "loss": 11.6659, "step": 12842 }, { "epoch": 0.26883948756593823, "grad_norm": 0.2621963918209076, "learning_rate": 0.00019607982234930258, "loss": 11.6829, "step": 12843 }, { "epoch": 0.2688604203299004, "grad_norm": 0.2721782922744751, "learning_rate": 0.00019607921444958845, "loss": 11.6798, "step": 12844 }, { "epoch": 0.2688813530938625, "grad_norm": 0.46453067660331726, "learning_rate": 0.00019607860650368712, "loss": 11.7035, "step": 12845 }, { "epoch": 0.26890228585782466, "grad_norm": 0.23007485270500183, "learning_rate": 0.00019607799851159888, "loss": 11.6734, "step": 12846 }, { "epoch": 0.2689232186217868, "grad_norm": 0.2708283066749573, "learning_rate": 0.00019607739047332404, "loss": 11.681, "step": 12847 }, { "epoch": 0.26894415138574895, "grad_norm": 0.2904626429080963, "learning_rate": 0.00019607678238886287, "loss": 11.6823, "step": 12848 }, { "epoch": 0.26896508414971115, "grad_norm": 0.23844419419765472, "learning_rate": 0.00019607617425821566, "loss": 11.6829, "step": 12849 }, { "epoch": 0.2689860169136733, "grad_norm": 0.2916209101676941, "learning_rate": 0.0001960755660813827, "loss": 11.6877, "step": 12850 }, { "epoch": 0.26900694967763544, "grad_norm": 0.2639281749725342, "learning_rate": 0.00019607495785836431, "loss": 11.676, "step": 12851 }, { "epoch": 0.2690278824415976, "grad_norm": 0.2739018201828003, "learning_rate": 0.00019607434958916073, "loss": 11.679, "step": 12852 }, { "epoch": 0.26904881520555973, "grad_norm": 0.2837178111076355, "learning_rate": 0.00019607374127377232, "loss": 11.6734, "step": 12853 }, { "epoch": 0.2690697479695219, "grad_norm": 0.2886309027671814, "learning_rate": 0.00019607313291219932, "loss": 11.6759, "step": 12854 }, { "epoch": 0.269090680733484, "grad_norm": 0.22893503308296204, "learning_rate": 0.000196072524504442, "loss": 11.6741, "step": 12855 }, { "epoch": 0.2691116134974462, "grad_norm": 0.2890338599681854, "learning_rate": 0.00019607191605050078, "loss": 11.6873, "step": 12856 }, { "epoch": 0.26913254626140837, "grad_norm": 0.23850366473197937, "learning_rate": 0.0001960713075503758, "loss": 11.6664, "step": 12857 }, { "epoch": 0.2691534790253705, "grad_norm": 0.22321642935276031, "learning_rate": 0.00019607069900406742, "loss": 11.6854, "step": 12858 }, { "epoch": 0.26917441178933266, "grad_norm": 0.2574407458305359, "learning_rate": 0.00019607009041157594, "loss": 11.6955, "step": 12859 }, { "epoch": 0.2691953445532948, "grad_norm": 0.22632275521755219, "learning_rate": 0.00019606948177290165, "loss": 11.6845, "step": 12860 }, { "epoch": 0.26921627731725695, "grad_norm": 0.3137761056423187, "learning_rate": 0.00019606887308804485, "loss": 11.6579, "step": 12861 }, { "epoch": 0.26923721008121915, "grad_norm": 0.2514231204986572, "learning_rate": 0.00019606826435700578, "loss": 11.6716, "step": 12862 }, { "epoch": 0.2692581428451813, "grad_norm": 0.24557079374790192, "learning_rate": 0.00019606765557978476, "loss": 11.6679, "step": 12863 }, { "epoch": 0.26927907560914344, "grad_norm": 0.2446974366903305, "learning_rate": 0.00019606704675638215, "loss": 11.6602, "step": 12864 }, { "epoch": 0.2693000083731056, "grad_norm": 0.22665348649024963, "learning_rate": 0.00019606643788679814, "loss": 11.6514, "step": 12865 }, { "epoch": 0.2693209411370677, "grad_norm": 0.24153146147727966, "learning_rate": 0.00019606582897103308, "loss": 11.6709, "step": 12866 }, { "epoch": 0.26934187390102987, "grad_norm": 0.4081500768661499, "learning_rate": 0.00019606522000908727, "loss": 11.6972, "step": 12867 }, { "epoch": 0.26936280666499207, "grad_norm": 0.32752710580825806, "learning_rate": 0.00019606461100096095, "loss": 11.6884, "step": 12868 }, { "epoch": 0.2693837394289542, "grad_norm": 0.28232720494270325, "learning_rate": 0.00019606400194665445, "loss": 11.6657, "step": 12869 }, { "epoch": 0.26940467219291636, "grad_norm": 0.27223923802375793, "learning_rate": 0.0001960633928461681, "loss": 11.6743, "step": 12870 }, { "epoch": 0.2694256049568785, "grad_norm": 0.32649752497673035, "learning_rate": 0.00019606278369950212, "loss": 11.6779, "step": 12871 }, { "epoch": 0.26944653772084065, "grad_norm": 0.24390307068824768, "learning_rate": 0.00019606217450665684, "loss": 11.6757, "step": 12872 }, { "epoch": 0.2694674704848028, "grad_norm": 0.3255890905857086, "learning_rate": 0.00019606156526763254, "loss": 11.6805, "step": 12873 }, { "epoch": 0.26948840324876494, "grad_norm": 0.30660301446914673, "learning_rate": 0.00019606095598242952, "loss": 11.6722, "step": 12874 }, { "epoch": 0.26950933601272714, "grad_norm": 0.2907039523124695, "learning_rate": 0.00019606034665104808, "loss": 11.6786, "step": 12875 }, { "epoch": 0.2695302687766893, "grad_norm": 0.21193785965442657, "learning_rate": 0.00019605973727348852, "loss": 11.6756, "step": 12876 }, { "epoch": 0.26955120154065143, "grad_norm": 0.2445330023765564, "learning_rate": 0.00019605912784975113, "loss": 11.6789, "step": 12877 }, { "epoch": 0.2695721343046136, "grad_norm": 0.24463728070259094, "learning_rate": 0.00019605851837983617, "loss": 11.6788, "step": 12878 }, { "epoch": 0.2695930670685757, "grad_norm": 0.21858757734298706, "learning_rate": 0.00019605790886374395, "loss": 11.6789, "step": 12879 }, { "epoch": 0.26961399983253787, "grad_norm": 0.29041051864624023, "learning_rate": 0.00019605729930147477, "loss": 11.6642, "step": 12880 }, { "epoch": 0.26963493259650007, "grad_norm": 0.3005702495574951, "learning_rate": 0.00019605668969302893, "loss": 11.6839, "step": 12881 }, { "epoch": 0.2696558653604622, "grad_norm": 0.24815607070922852, "learning_rate": 0.0001960560800384067, "loss": 11.6785, "step": 12882 }, { "epoch": 0.26967679812442436, "grad_norm": 0.3121027946472168, "learning_rate": 0.00019605547033760837, "loss": 11.6805, "step": 12883 }, { "epoch": 0.2696977308883865, "grad_norm": 0.3390631079673767, "learning_rate": 0.0001960548605906343, "loss": 11.6824, "step": 12884 }, { "epoch": 0.26971866365234864, "grad_norm": 0.2597219944000244, "learning_rate": 0.0001960542507974847, "loss": 11.6903, "step": 12885 }, { "epoch": 0.2697395964163108, "grad_norm": 0.3313451111316681, "learning_rate": 0.00019605364095815994, "loss": 11.6736, "step": 12886 }, { "epoch": 0.269760529180273, "grad_norm": 0.27891650795936584, "learning_rate": 0.00019605303107266022, "loss": 11.662, "step": 12887 }, { "epoch": 0.26978146194423513, "grad_norm": 0.26034098863601685, "learning_rate": 0.0001960524211409859, "loss": 11.6752, "step": 12888 }, { "epoch": 0.2698023947081973, "grad_norm": 0.22545866668224335, "learning_rate": 0.00019605181116313724, "loss": 11.6703, "step": 12889 }, { "epoch": 0.2698233274721594, "grad_norm": 0.2815296947956085, "learning_rate": 0.00019605120113911454, "loss": 11.6819, "step": 12890 }, { "epoch": 0.26984426023612157, "grad_norm": 0.2648443579673767, "learning_rate": 0.00019605059106891814, "loss": 11.6632, "step": 12891 }, { "epoch": 0.2698651930000837, "grad_norm": 0.2521919906139374, "learning_rate": 0.00019604998095254828, "loss": 11.6707, "step": 12892 }, { "epoch": 0.26988612576404586, "grad_norm": 0.2291676104068756, "learning_rate": 0.0001960493707900053, "loss": 11.6849, "step": 12893 }, { "epoch": 0.26990705852800806, "grad_norm": 0.25294142961502075, "learning_rate": 0.00019604876058128943, "loss": 11.6832, "step": 12894 }, { "epoch": 0.2699279912919702, "grad_norm": 0.2622135281562805, "learning_rate": 0.00019604815032640098, "loss": 11.6787, "step": 12895 }, { "epoch": 0.26994892405593235, "grad_norm": 0.23842282593250275, "learning_rate": 0.00019604754002534028, "loss": 11.6804, "step": 12896 }, { "epoch": 0.2699698568198945, "grad_norm": 0.2789101302623749, "learning_rate": 0.0001960469296781076, "loss": 11.6992, "step": 12897 }, { "epoch": 0.26999078958385664, "grad_norm": 0.22896337509155273, "learning_rate": 0.00019604631928470326, "loss": 11.6844, "step": 12898 }, { "epoch": 0.2700117223478188, "grad_norm": 0.23855362832546234, "learning_rate": 0.00019604570884512753, "loss": 11.6762, "step": 12899 }, { "epoch": 0.270032655111781, "grad_norm": 0.24610865116119385, "learning_rate": 0.00019604509835938064, "loss": 11.6852, "step": 12900 }, { "epoch": 0.27005358787574313, "grad_norm": 0.2738502025604248, "learning_rate": 0.000196044487827463, "loss": 11.6823, "step": 12901 }, { "epoch": 0.2700745206397053, "grad_norm": 0.22505010664463043, "learning_rate": 0.00019604387724937486, "loss": 11.675, "step": 12902 }, { "epoch": 0.2700954534036674, "grad_norm": 1.7852290868759155, "learning_rate": 0.00019604326662511646, "loss": 11.5829, "step": 12903 }, { "epoch": 0.27011638616762956, "grad_norm": 0.315165638923645, "learning_rate": 0.00019604265595468817, "loss": 11.6599, "step": 12904 }, { "epoch": 0.2701373189315917, "grad_norm": 0.20692145824432373, "learning_rate": 0.00019604204523809022, "loss": 11.6549, "step": 12905 }, { "epoch": 0.2701582516955539, "grad_norm": 0.2670610547065735, "learning_rate": 0.00019604143447532298, "loss": 11.6658, "step": 12906 }, { "epoch": 0.27017918445951605, "grad_norm": 0.2589159309864044, "learning_rate": 0.00019604082366638664, "loss": 11.6592, "step": 12907 }, { "epoch": 0.2702001172234782, "grad_norm": 0.3048743009567261, "learning_rate": 0.0001960402128112816, "loss": 11.6843, "step": 12908 }, { "epoch": 0.27022104998744034, "grad_norm": 0.2669053077697754, "learning_rate": 0.0001960396019100081, "loss": 11.6831, "step": 12909 }, { "epoch": 0.2702419827514025, "grad_norm": 0.2412487119436264, "learning_rate": 0.0001960389909625664, "loss": 11.6762, "step": 12910 }, { "epoch": 0.27026291551536463, "grad_norm": 0.23333284258842468, "learning_rate": 0.00019603837996895687, "loss": 11.6726, "step": 12911 }, { "epoch": 0.2702838482793268, "grad_norm": 0.2479962408542633, "learning_rate": 0.00019603776892917975, "loss": 11.6582, "step": 12912 }, { "epoch": 0.270304781043289, "grad_norm": 0.2913455665111542, "learning_rate": 0.00019603715784323534, "loss": 11.6735, "step": 12913 }, { "epoch": 0.2703257138072511, "grad_norm": 0.25206759572029114, "learning_rate": 0.00019603654671112398, "loss": 11.6846, "step": 12914 }, { "epoch": 0.27034664657121327, "grad_norm": 0.32593443989753723, "learning_rate": 0.0001960359355328459, "loss": 11.6833, "step": 12915 }, { "epoch": 0.2703675793351754, "grad_norm": 0.2585417628288269, "learning_rate": 0.00019603532430840143, "loss": 11.662, "step": 12916 }, { "epoch": 0.27038851209913756, "grad_norm": 0.2993650436401367, "learning_rate": 0.00019603471303779085, "loss": 11.7007, "step": 12917 }, { "epoch": 0.2704094448630997, "grad_norm": 0.24888157844543457, "learning_rate": 0.00019603410172101447, "loss": 11.6919, "step": 12918 }, { "epoch": 0.2704303776270619, "grad_norm": 0.24765539169311523, "learning_rate": 0.00019603349035807256, "loss": 11.6599, "step": 12919 }, { "epoch": 0.27045131039102405, "grad_norm": 0.2524113357067108, "learning_rate": 0.00019603287894896543, "loss": 11.6804, "step": 12920 }, { "epoch": 0.2704722431549862, "grad_norm": 0.2666206359863281, "learning_rate": 0.00019603226749369336, "loss": 11.6776, "step": 12921 }, { "epoch": 0.27049317591894834, "grad_norm": 0.39627590775489807, "learning_rate": 0.00019603165599225668, "loss": 11.7052, "step": 12922 }, { "epoch": 0.2705141086829105, "grad_norm": 0.21165727078914642, "learning_rate": 0.00019603104444465563, "loss": 11.6725, "step": 12923 }, { "epoch": 0.2705350414468726, "grad_norm": 0.26971155405044556, "learning_rate": 0.00019603043285089053, "loss": 11.6594, "step": 12924 }, { "epoch": 0.27055597421083477, "grad_norm": 0.263935387134552, "learning_rate": 0.0001960298212109617, "loss": 11.6638, "step": 12925 }, { "epoch": 0.27057690697479697, "grad_norm": 0.27184292674064636, "learning_rate": 0.00019602920952486942, "loss": 11.6681, "step": 12926 }, { "epoch": 0.2705978397387591, "grad_norm": 0.24071010947227478, "learning_rate": 0.00019602859779261397, "loss": 11.6523, "step": 12927 }, { "epoch": 0.27061877250272126, "grad_norm": 0.26640865206718445, "learning_rate": 0.00019602798601419562, "loss": 11.702, "step": 12928 }, { "epoch": 0.2706397052666834, "grad_norm": 0.2680353820323944, "learning_rate": 0.0001960273741896147, "loss": 11.682, "step": 12929 }, { "epoch": 0.27066063803064555, "grad_norm": 0.22343295812606812, "learning_rate": 0.0001960267623188715, "loss": 11.664, "step": 12930 }, { "epoch": 0.2706815707946077, "grad_norm": 0.24951204657554626, "learning_rate": 0.00019602615040196632, "loss": 11.6776, "step": 12931 }, { "epoch": 0.2707025035585699, "grad_norm": 0.2612898349761963, "learning_rate": 0.00019602553843889946, "loss": 11.6749, "step": 12932 }, { "epoch": 0.27072343632253204, "grad_norm": 0.22739997506141663, "learning_rate": 0.00019602492642967117, "loss": 11.6674, "step": 12933 }, { "epoch": 0.2707443690864942, "grad_norm": 0.23029877245426178, "learning_rate": 0.00019602431437428178, "loss": 11.659, "step": 12934 }, { "epoch": 0.27076530185045633, "grad_norm": 0.2751598060131073, "learning_rate": 0.0001960237022727316, "loss": 11.66, "step": 12935 }, { "epoch": 0.2707862346144185, "grad_norm": 0.27672427892684937, "learning_rate": 0.00019602309012502087, "loss": 11.6824, "step": 12936 }, { "epoch": 0.2708071673783806, "grad_norm": 0.2752169966697693, "learning_rate": 0.0001960224779311499, "loss": 11.6938, "step": 12937 }, { "epoch": 0.2708281001423428, "grad_norm": 0.22979052364826202, "learning_rate": 0.00019602186569111908, "loss": 11.6874, "step": 12938 }, { "epoch": 0.27084903290630497, "grad_norm": 0.23601242899894714, "learning_rate": 0.00019602125340492855, "loss": 11.6714, "step": 12939 }, { "epoch": 0.2708699656702671, "grad_norm": 0.24140246212482452, "learning_rate": 0.0001960206410725787, "loss": 11.6856, "step": 12940 }, { "epoch": 0.27089089843422925, "grad_norm": 0.27081021666526794, "learning_rate": 0.0001960200286940698, "loss": 11.6683, "step": 12941 }, { "epoch": 0.2709118311981914, "grad_norm": 0.292507141828537, "learning_rate": 0.0001960194162694022, "loss": 11.7034, "step": 12942 }, { "epoch": 0.27093276396215354, "grad_norm": 0.2977398931980133, "learning_rate": 0.00019601880379857607, "loss": 11.6706, "step": 12943 }, { "epoch": 0.2709536967261157, "grad_norm": 0.25248652696609497, "learning_rate": 0.0001960181912815918, "loss": 11.6808, "step": 12944 }, { "epoch": 0.2709746294900779, "grad_norm": 0.3199937045574188, "learning_rate": 0.00019601757871844967, "loss": 11.6682, "step": 12945 }, { "epoch": 0.27099556225404003, "grad_norm": 0.3687133193016052, "learning_rate": 0.00019601696610914996, "loss": 11.6958, "step": 12946 }, { "epoch": 0.2710164950180022, "grad_norm": 0.214016854763031, "learning_rate": 0.000196016353453693, "loss": 11.684, "step": 12947 }, { "epoch": 0.2710374277819643, "grad_norm": 0.20376449823379517, "learning_rate": 0.000196015740752079, "loss": 11.6702, "step": 12948 }, { "epoch": 0.27105836054592647, "grad_norm": 0.35299599170684814, "learning_rate": 0.00019601512800430834, "loss": 11.6999, "step": 12949 }, { "epoch": 0.2710792933098886, "grad_norm": 0.2161847949028015, "learning_rate": 0.0001960145152103813, "loss": 11.6622, "step": 12950 }, { "epoch": 0.2711002260738508, "grad_norm": 0.22986149787902832, "learning_rate": 0.00019601390237029812, "loss": 11.6726, "step": 12951 }, { "epoch": 0.27112115883781296, "grad_norm": 0.24041113257408142, "learning_rate": 0.00019601328948405918, "loss": 11.6634, "step": 12952 }, { "epoch": 0.2711420916017751, "grad_norm": 0.24460582435131073, "learning_rate": 0.0001960126765516647, "loss": 11.6762, "step": 12953 }, { "epoch": 0.27116302436573725, "grad_norm": 0.2648158669471741, "learning_rate": 0.000196012063573115, "loss": 11.6799, "step": 12954 }, { "epoch": 0.2711839571296994, "grad_norm": 0.25807881355285645, "learning_rate": 0.00019601145054841035, "loss": 11.6738, "step": 12955 }, { "epoch": 0.27120488989366154, "grad_norm": 0.5225321650505066, "learning_rate": 0.0001960108374775511, "loss": 11.6966, "step": 12956 }, { "epoch": 0.27122582265762374, "grad_norm": 0.36969152092933655, "learning_rate": 0.00019601022436053752, "loss": 11.6617, "step": 12957 }, { "epoch": 0.2712467554215859, "grad_norm": 0.24166899919509888, "learning_rate": 0.0001960096111973699, "loss": 11.674, "step": 12958 }, { "epoch": 0.27126768818554803, "grad_norm": 0.29678207635879517, "learning_rate": 0.00019600899798804852, "loss": 11.6818, "step": 12959 }, { "epoch": 0.2712886209495102, "grad_norm": 0.30723366141319275, "learning_rate": 0.00019600838473257372, "loss": 11.6703, "step": 12960 }, { "epoch": 0.2713095537134723, "grad_norm": 0.28230875730514526, "learning_rate": 0.00019600777143094576, "loss": 11.6763, "step": 12961 }, { "epoch": 0.27133048647743446, "grad_norm": 0.2658598721027374, "learning_rate": 0.0001960071580831649, "loss": 11.68, "step": 12962 }, { "epoch": 0.2713514192413966, "grad_norm": 0.27219492197036743, "learning_rate": 0.00019600654468923152, "loss": 11.687, "step": 12963 }, { "epoch": 0.2713723520053588, "grad_norm": 0.34055089950561523, "learning_rate": 0.00019600593124914585, "loss": 11.6985, "step": 12964 }, { "epoch": 0.27139328476932095, "grad_norm": 0.26461753249168396, "learning_rate": 0.0001960053177629082, "loss": 11.6653, "step": 12965 }, { "epoch": 0.2714142175332831, "grad_norm": 0.2646329700946808, "learning_rate": 0.0001960047042305189, "loss": 11.6815, "step": 12966 }, { "epoch": 0.27143515029724524, "grad_norm": 0.24781924486160278, "learning_rate": 0.00019600409065197817, "loss": 11.6546, "step": 12967 }, { "epoch": 0.2714560830612074, "grad_norm": 0.2858639359474182, "learning_rate": 0.0001960034770272864, "loss": 11.6757, "step": 12968 }, { "epoch": 0.27147701582516953, "grad_norm": 0.2609879970550537, "learning_rate": 0.0001960028633564438, "loss": 11.6813, "step": 12969 }, { "epoch": 0.27149794858913173, "grad_norm": 0.3017232120037079, "learning_rate": 0.0001960022496394507, "loss": 11.6801, "step": 12970 }, { "epoch": 0.2715188813530939, "grad_norm": 0.2738431394100189, "learning_rate": 0.0001960016358763074, "loss": 11.6841, "step": 12971 }, { "epoch": 0.271539814117056, "grad_norm": 0.3086194694042206, "learning_rate": 0.00019600102206701421, "loss": 11.6597, "step": 12972 }, { "epoch": 0.27156074688101817, "grad_norm": 0.2669341564178467, "learning_rate": 0.0001960004082115714, "loss": 11.6642, "step": 12973 }, { "epoch": 0.2715816796449803, "grad_norm": 0.23244702816009521, "learning_rate": 0.00019599979430997923, "loss": 11.6864, "step": 12974 }, { "epoch": 0.27160261240894246, "grad_norm": 0.2370760440826416, "learning_rate": 0.0001959991803622381, "loss": 11.6573, "step": 12975 }, { "epoch": 0.27162354517290466, "grad_norm": 0.2634977400302887, "learning_rate": 0.00019599856636834818, "loss": 11.6663, "step": 12976 }, { "epoch": 0.2716444779368668, "grad_norm": 0.36607399582862854, "learning_rate": 0.00019599795232830985, "loss": 11.6775, "step": 12977 }, { "epoch": 0.27166541070082895, "grad_norm": 0.2551988959312439, "learning_rate": 0.00019599733824212336, "loss": 11.6638, "step": 12978 }, { "epoch": 0.2716863434647911, "grad_norm": 0.2808060646057129, "learning_rate": 0.00019599672410978904, "loss": 11.6745, "step": 12979 }, { "epoch": 0.27170727622875324, "grad_norm": 0.2692837417125702, "learning_rate": 0.00019599610993130718, "loss": 11.6781, "step": 12980 }, { "epoch": 0.2717282089927154, "grad_norm": 0.25246983766555786, "learning_rate": 0.00019599549570667807, "loss": 11.6917, "step": 12981 }, { "epoch": 0.2717491417566775, "grad_norm": 0.355834424495697, "learning_rate": 0.00019599488143590198, "loss": 11.6738, "step": 12982 }, { "epoch": 0.2717700745206397, "grad_norm": 0.33757561445236206, "learning_rate": 0.00019599426711897926, "loss": 11.6844, "step": 12983 }, { "epoch": 0.27179100728460187, "grad_norm": 0.32204580307006836, "learning_rate": 0.00019599365275591013, "loss": 11.6719, "step": 12984 }, { "epoch": 0.271811940048564, "grad_norm": 0.2956126034259796, "learning_rate": 0.00019599303834669495, "loss": 11.6761, "step": 12985 }, { "epoch": 0.27183287281252616, "grad_norm": 0.25977611541748047, "learning_rate": 0.00019599242389133398, "loss": 11.6702, "step": 12986 }, { "epoch": 0.2718538055764883, "grad_norm": 0.2440444380044937, "learning_rate": 0.00019599180938982755, "loss": 11.6617, "step": 12987 }, { "epoch": 0.27187473834045045, "grad_norm": 0.2648613750934601, "learning_rate": 0.00019599119484217594, "loss": 11.6704, "step": 12988 }, { "epoch": 0.27189567110441265, "grad_norm": 0.2560688257217407, "learning_rate": 0.00019599058024837943, "loss": 11.6793, "step": 12989 }, { "epoch": 0.2719166038683748, "grad_norm": 0.23932583630084991, "learning_rate": 0.0001959899656084383, "loss": 11.689, "step": 12990 }, { "epoch": 0.27193753663233694, "grad_norm": 0.23008480668067932, "learning_rate": 0.0001959893509223529, "loss": 11.6785, "step": 12991 }, { "epoch": 0.2719584693962991, "grad_norm": 0.2811216413974762, "learning_rate": 0.00019598873619012345, "loss": 11.6831, "step": 12992 }, { "epoch": 0.27197940216026123, "grad_norm": 0.28421398997306824, "learning_rate": 0.00019598812141175033, "loss": 11.6817, "step": 12993 }, { "epoch": 0.2720003349242234, "grad_norm": 0.2483278214931488, "learning_rate": 0.0001959875065872338, "loss": 11.6835, "step": 12994 }, { "epoch": 0.2720212676881856, "grad_norm": 0.2597915530204773, "learning_rate": 0.00019598689171657413, "loss": 11.6709, "step": 12995 }, { "epoch": 0.2720422004521477, "grad_norm": 0.2722347378730774, "learning_rate": 0.00019598627679977165, "loss": 11.6746, "step": 12996 }, { "epoch": 0.27206313321610986, "grad_norm": 0.30841290950775146, "learning_rate": 0.00019598566183682666, "loss": 11.6841, "step": 12997 }, { "epoch": 0.272084065980072, "grad_norm": 0.2472875565290451, "learning_rate": 0.00019598504682773941, "loss": 11.6862, "step": 12998 }, { "epoch": 0.27210499874403415, "grad_norm": 0.2736356854438782, "learning_rate": 0.00019598443177251023, "loss": 11.6623, "step": 12999 }, { "epoch": 0.2721259315079963, "grad_norm": 0.22588315606117249, "learning_rate": 0.0001959838166711394, "loss": 11.6774, "step": 13000 }, { "epoch": 0.2721259315079963, "eval_loss": 11.676705360412598, "eval_runtime": 34.3372, "eval_samples_per_second": 27.987, "eval_steps_per_second": 7.019, "step": 13000 }, { "epoch": 0.27214686427195844, "grad_norm": 0.24281758069992065, "learning_rate": 0.00019598320152362726, "loss": 11.6735, "step": 13001 }, { "epoch": 0.27216779703592064, "grad_norm": 0.2710285484790802, "learning_rate": 0.00019598258632997406, "loss": 11.674, "step": 13002 }, { "epoch": 0.2721887297998828, "grad_norm": 0.28898417949676514, "learning_rate": 0.0001959819710901801, "loss": 11.6519, "step": 13003 }, { "epoch": 0.27220966256384493, "grad_norm": 0.2812093198299408, "learning_rate": 0.00019598135580424568, "loss": 11.6858, "step": 13004 }, { "epoch": 0.2722305953278071, "grad_norm": 0.3012925982475281, "learning_rate": 0.0001959807404721711, "loss": 11.7005, "step": 13005 }, { "epoch": 0.2722515280917692, "grad_norm": 0.24020396173000336, "learning_rate": 0.00019598012509395665, "loss": 11.6812, "step": 13006 }, { "epoch": 0.27227246085573137, "grad_norm": 0.2558495104312897, "learning_rate": 0.00019597950966960263, "loss": 11.693, "step": 13007 }, { "epoch": 0.27229339361969357, "grad_norm": 0.2699644863605499, "learning_rate": 0.00019597889419910934, "loss": 11.6657, "step": 13008 }, { "epoch": 0.2723143263836557, "grad_norm": 0.29846295714378357, "learning_rate": 0.00019597827868247708, "loss": 11.6816, "step": 13009 }, { "epoch": 0.27233525914761786, "grad_norm": 0.29491451382637024, "learning_rate": 0.00019597766311970613, "loss": 11.6829, "step": 13010 }, { "epoch": 0.27235619191158, "grad_norm": 0.23131157457828522, "learning_rate": 0.0001959770475107968, "loss": 11.6689, "step": 13011 }, { "epoch": 0.27237712467554215, "grad_norm": 0.28746700286865234, "learning_rate": 0.00019597643185574937, "loss": 11.6823, "step": 13012 }, { "epoch": 0.2723980574395043, "grad_norm": 0.2572939395904541, "learning_rate": 0.00019597581615456419, "loss": 11.6871, "step": 13013 }, { "epoch": 0.2724189902034665, "grad_norm": 0.30276772379875183, "learning_rate": 0.00019597520040724145, "loss": 11.6832, "step": 13014 }, { "epoch": 0.27243992296742864, "grad_norm": 0.2833397686481476, "learning_rate": 0.00019597458461378153, "loss": 11.6734, "step": 13015 }, { "epoch": 0.2724608557313908, "grad_norm": 0.3206089437007904, "learning_rate": 0.0001959739687741847, "loss": 11.6887, "step": 13016 }, { "epoch": 0.27248178849535293, "grad_norm": 0.2504902482032776, "learning_rate": 0.00019597335288845126, "loss": 11.6679, "step": 13017 }, { "epoch": 0.2725027212593151, "grad_norm": 0.28508567810058594, "learning_rate": 0.00019597273695658152, "loss": 11.6765, "step": 13018 }, { "epoch": 0.2725236540232772, "grad_norm": 0.26939234137535095, "learning_rate": 0.00019597212097857575, "loss": 11.6749, "step": 13019 }, { "epoch": 0.27254458678723936, "grad_norm": 0.2905929982662201, "learning_rate": 0.00019597150495443425, "loss": 11.6836, "step": 13020 }, { "epoch": 0.27256551955120156, "grad_norm": 0.3226439356803894, "learning_rate": 0.00019597088888415734, "loss": 11.6595, "step": 13021 }, { "epoch": 0.2725864523151637, "grad_norm": 0.251530259847641, "learning_rate": 0.0001959702727677453, "loss": 11.682, "step": 13022 }, { "epoch": 0.27260738507912585, "grad_norm": 0.3034529685974121, "learning_rate": 0.00019596965660519843, "loss": 11.679, "step": 13023 }, { "epoch": 0.272628317843088, "grad_norm": 0.3072458505630493, "learning_rate": 0.000195969040396517, "loss": 11.6756, "step": 13024 }, { "epoch": 0.27264925060705014, "grad_norm": 0.24614721536636353, "learning_rate": 0.00019596842414170137, "loss": 11.6836, "step": 13025 }, { "epoch": 0.2726701833710123, "grad_norm": 0.2421949952840805, "learning_rate": 0.00019596780784075177, "loss": 11.6714, "step": 13026 }, { "epoch": 0.2726911161349745, "grad_norm": 0.2907482087612152, "learning_rate": 0.0001959671914936685, "loss": 11.6737, "step": 13027 }, { "epoch": 0.27271204889893663, "grad_norm": 0.2977355122566223, "learning_rate": 0.0001959665751004519, "loss": 11.6763, "step": 13028 }, { "epoch": 0.2727329816628988, "grad_norm": 0.22903697192668915, "learning_rate": 0.00019596595866110225, "loss": 11.6868, "step": 13029 }, { "epoch": 0.2727539144268609, "grad_norm": 0.2582266628742218, "learning_rate": 0.00019596534217561983, "loss": 11.6943, "step": 13030 }, { "epoch": 0.27277484719082307, "grad_norm": 0.3308282792568207, "learning_rate": 0.00019596472564400498, "loss": 11.6846, "step": 13031 }, { "epoch": 0.2727957799547852, "grad_norm": 0.26208826899528503, "learning_rate": 0.0001959641090662579, "loss": 11.6852, "step": 13032 }, { "epoch": 0.27281671271874736, "grad_norm": 0.3178541958332062, "learning_rate": 0.00019596349244237898, "loss": 11.6845, "step": 13033 }, { "epoch": 0.27283764548270956, "grad_norm": 0.28292813897132874, "learning_rate": 0.00019596287577236852, "loss": 11.682, "step": 13034 }, { "epoch": 0.2728585782466717, "grad_norm": 0.24510136246681213, "learning_rate": 0.00019596225905622674, "loss": 11.6932, "step": 13035 }, { "epoch": 0.27287951101063385, "grad_norm": 0.2796984612941742, "learning_rate": 0.000195961642293954, "loss": 11.6774, "step": 13036 }, { "epoch": 0.272900443774596, "grad_norm": 0.28375691175460815, "learning_rate": 0.00019596102548555055, "loss": 11.7025, "step": 13037 }, { "epoch": 0.27292137653855814, "grad_norm": 0.29333609342575073, "learning_rate": 0.00019596040863101673, "loss": 11.6787, "step": 13038 }, { "epoch": 0.2729423093025203, "grad_norm": 0.263769268989563, "learning_rate": 0.00019595979173035283, "loss": 11.6821, "step": 13039 }, { "epoch": 0.2729632420664825, "grad_norm": 0.3079507350921631, "learning_rate": 0.0001959591747835591, "loss": 11.6872, "step": 13040 }, { "epoch": 0.2729841748304446, "grad_norm": 0.24477894604206085, "learning_rate": 0.0001959585577906359, "loss": 11.6674, "step": 13041 }, { "epoch": 0.27300510759440677, "grad_norm": 0.25474825501441956, "learning_rate": 0.0001959579407515835, "loss": 11.6628, "step": 13042 }, { "epoch": 0.2730260403583689, "grad_norm": 0.33632636070251465, "learning_rate": 0.0001959573236664022, "loss": 11.681, "step": 13043 }, { "epoch": 0.27304697312233106, "grad_norm": 0.2986171543598175, "learning_rate": 0.00019595670653509226, "loss": 11.671, "step": 13044 }, { "epoch": 0.2730679058862932, "grad_norm": 0.2817954123020172, "learning_rate": 0.000195956089357654, "loss": 11.6824, "step": 13045 }, { "epoch": 0.2730888386502554, "grad_norm": 0.32924240827560425, "learning_rate": 0.00019595547213408778, "loss": 11.6737, "step": 13046 }, { "epoch": 0.27310977141421755, "grad_norm": 0.306230753660202, "learning_rate": 0.00019595485486439382, "loss": 11.6786, "step": 13047 }, { "epoch": 0.2731307041781797, "grad_norm": 0.2858582139015198, "learning_rate": 0.00019595423754857242, "loss": 11.6888, "step": 13048 }, { "epoch": 0.27315163694214184, "grad_norm": 0.2693402171134949, "learning_rate": 0.0001959536201866239, "loss": 11.6804, "step": 13049 }, { "epoch": 0.273172569706104, "grad_norm": 0.28303277492523193, "learning_rate": 0.00019595300277854856, "loss": 11.6784, "step": 13050 }, { "epoch": 0.27319350247006613, "grad_norm": 0.2743145227432251, "learning_rate": 0.0001959523853243467, "loss": 11.6756, "step": 13051 }, { "epoch": 0.2732144352340283, "grad_norm": 0.2853420376777649, "learning_rate": 0.0001959517678240186, "loss": 11.6751, "step": 13052 }, { "epoch": 0.2732353679979905, "grad_norm": 0.31387317180633545, "learning_rate": 0.00019595115027756454, "loss": 11.6838, "step": 13053 }, { "epoch": 0.2732563007619526, "grad_norm": 0.23117130994796753, "learning_rate": 0.00019595053268498487, "loss": 11.6776, "step": 13054 }, { "epoch": 0.27327723352591476, "grad_norm": 0.32458898425102234, "learning_rate": 0.00019594991504627985, "loss": 11.6806, "step": 13055 }, { "epoch": 0.2732981662898769, "grad_norm": 0.2538113594055176, "learning_rate": 0.00019594929736144976, "loss": 11.6758, "step": 13056 }, { "epoch": 0.27331909905383905, "grad_norm": 0.2492760717868805, "learning_rate": 0.00019594867963049494, "loss": 11.6728, "step": 13057 }, { "epoch": 0.2733400318178012, "grad_norm": 0.25141647458076477, "learning_rate": 0.00019594806185341566, "loss": 11.678, "step": 13058 }, { "epoch": 0.2733609645817634, "grad_norm": 0.24437445402145386, "learning_rate": 0.0001959474440302122, "loss": 11.6898, "step": 13059 }, { "epoch": 0.27338189734572554, "grad_norm": 0.24886126816272736, "learning_rate": 0.0001959468261608849, "loss": 11.6716, "step": 13060 }, { "epoch": 0.2734028301096877, "grad_norm": 0.2541118562221527, "learning_rate": 0.00019594620824543406, "loss": 11.662, "step": 13061 }, { "epoch": 0.27342376287364983, "grad_norm": 0.22756610810756683, "learning_rate": 0.00019594559028385993, "loss": 11.668, "step": 13062 }, { "epoch": 0.273444695637612, "grad_norm": 0.27610793709754944, "learning_rate": 0.00019594497227616285, "loss": 11.6869, "step": 13063 }, { "epoch": 0.2734656284015741, "grad_norm": 0.31351912021636963, "learning_rate": 0.0001959443542223431, "loss": 11.6867, "step": 13064 }, { "epoch": 0.2734865611655363, "grad_norm": 0.2901233434677124, "learning_rate": 0.00019594373612240093, "loss": 11.6818, "step": 13065 }, { "epoch": 0.27350749392949847, "grad_norm": 0.22781947255134583, "learning_rate": 0.00019594311797633673, "loss": 11.6645, "step": 13066 }, { "epoch": 0.2735284266934606, "grad_norm": 0.2396935224533081, "learning_rate": 0.00019594249978415073, "loss": 11.6762, "step": 13067 }, { "epoch": 0.27354935945742276, "grad_norm": 0.29880377650260925, "learning_rate": 0.00019594188154584328, "loss": 11.6842, "step": 13068 }, { "epoch": 0.2735702922213849, "grad_norm": 0.3263603150844574, "learning_rate": 0.0001959412632614146, "loss": 11.685, "step": 13069 }, { "epoch": 0.27359122498534705, "grad_norm": 0.3021877408027649, "learning_rate": 0.00019594064493086503, "loss": 11.679, "step": 13070 }, { "epoch": 0.2736121577493092, "grad_norm": 0.2641358971595764, "learning_rate": 0.0001959400265541949, "loss": 11.6785, "step": 13071 }, { "epoch": 0.2736330905132714, "grad_norm": 0.23715506494045258, "learning_rate": 0.00019593940813140447, "loss": 11.6769, "step": 13072 }, { "epoch": 0.27365402327723354, "grad_norm": 0.2559080719947815, "learning_rate": 0.00019593878966249405, "loss": 11.6708, "step": 13073 }, { "epoch": 0.2736749560411957, "grad_norm": 0.2961064577102661, "learning_rate": 0.00019593817114746392, "loss": 11.6671, "step": 13074 }, { "epoch": 0.2736958888051578, "grad_norm": 0.23243103921413422, "learning_rate": 0.00019593755258631438, "loss": 11.6609, "step": 13075 }, { "epoch": 0.27371682156912, "grad_norm": 0.3223085105419159, "learning_rate": 0.00019593693397904578, "loss": 11.6832, "step": 13076 }, { "epoch": 0.2737377543330821, "grad_norm": 0.2454010546207428, "learning_rate": 0.00019593631532565831, "loss": 11.6788, "step": 13077 }, { "epoch": 0.2737586870970443, "grad_norm": 0.2565191686153412, "learning_rate": 0.00019593569662615238, "loss": 11.683, "step": 13078 }, { "epoch": 0.27377961986100646, "grad_norm": 0.23532184958457947, "learning_rate": 0.00019593507788052823, "loss": 11.6729, "step": 13079 }, { "epoch": 0.2738005526249686, "grad_norm": 0.24663090705871582, "learning_rate": 0.00019593445908878615, "loss": 11.6707, "step": 13080 }, { "epoch": 0.27382148538893075, "grad_norm": 0.3164748251438141, "learning_rate": 0.00019593384025092645, "loss": 11.6819, "step": 13081 }, { "epoch": 0.2738424181528929, "grad_norm": 0.29117006063461304, "learning_rate": 0.00019593322136694945, "loss": 11.6899, "step": 13082 }, { "epoch": 0.27386335091685504, "grad_norm": 0.24580766260623932, "learning_rate": 0.0001959326024368554, "loss": 11.6747, "step": 13083 }, { "epoch": 0.27388428368081724, "grad_norm": 0.2899816036224365, "learning_rate": 0.00019593198346064465, "loss": 11.6744, "step": 13084 }, { "epoch": 0.2739052164447794, "grad_norm": 0.29514428973197937, "learning_rate": 0.0001959313644383175, "loss": 11.6867, "step": 13085 }, { "epoch": 0.27392614920874153, "grad_norm": 0.24132247269153595, "learning_rate": 0.00019593074536987414, "loss": 11.6547, "step": 13086 }, { "epoch": 0.2739470819727037, "grad_norm": 0.2199143022298813, "learning_rate": 0.000195930126255315, "loss": 11.6809, "step": 13087 }, { "epoch": 0.2739680147366658, "grad_norm": 0.24721798300743103, "learning_rate": 0.00019592950709464033, "loss": 11.69, "step": 13088 }, { "epoch": 0.27398894750062797, "grad_norm": 0.32494980096817017, "learning_rate": 0.00019592888788785043, "loss": 11.6767, "step": 13089 }, { "epoch": 0.2740098802645901, "grad_norm": 0.2848994731903076, "learning_rate": 0.00019592826863494556, "loss": 11.6732, "step": 13090 }, { "epoch": 0.2740308130285523, "grad_norm": 0.2488088607788086, "learning_rate": 0.00019592764933592608, "loss": 11.6787, "step": 13091 }, { "epoch": 0.27405174579251446, "grad_norm": 0.2965836822986603, "learning_rate": 0.00019592702999079222, "loss": 11.6862, "step": 13092 }, { "epoch": 0.2740726785564766, "grad_norm": 0.26717010140419006, "learning_rate": 0.00019592641059954432, "loss": 11.6873, "step": 13093 }, { "epoch": 0.27409361132043875, "grad_norm": 0.25507068634033203, "learning_rate": 0.0001959257911621827, "loss": 11.6785, "step": 13094 }, { "epoch": 0.2741145440844009, "grad_norm": 0.2646118998527527, "learning_rate": 0.0001959251716787076, "loss": 11.6605, "step": 13095 }, { "epoch": 0.27413547684836304, "grad_norm": 0.280528724193573, "learning_rate": 0.00019592455214911938, "loss": 11.6713, "step": 13096 }, { "epoch": 0.27415640961232524, "grad_norm": 0.24000319838523865, "learning_rate": 0.0001959239325734183, "loss": 11.6795, "step": 13097 }, { "epoch": 0.2741773423762874, "grad_norm": 0.25783461332321167, "learning_rate": 0.00019592331295160465, "loss": 11.6699, "step": 13098 }, { "epoch": 0.2741982751402495, "grad_norm": 0.2742374539375305, "learning_rate": 0.00019592269328367874, "loss": 11.6818, "step": 13099 }, { "epoch": 0.27421920790421167, "grad_norm": 0.2517896890640259, "learning_rate": 0.00019592207356964084, "loss": 11.6656, "step": 13100 }, { "epoch": 0.2742401406681738, "grad_norm": 0.300917387008667, "learning_rate": 0.00019592145380949134, "loss": 11.664, "step": 13101 }, { "epoch": 0.27426107343213596, "grad_norm": 0.2686832845211029, "learning_rate": 0.00019592083400323044, "loss": 11.6825, "step": 13102 }, { "epoch": 0.27428200619609816, "grad_norm": 0.2270287573337555, "learning_rate": 0.00019592021415085849, "loss": 11.6874, "step": 13103 }, { "epoch": 0.2743029389600603, "grad_norm": 0.24381539225578308, "learning_rate": 0.00019591959425237576, "loss": 11.6654, "step": 13104 }, { "epoch": 0.27432387172402245, "grad_norm": 0.3844051659107208, "learning_rate": 0.00019591897430778254, "loss": 11.6787, "step": 13105 }, { "epoch": 0.2743448044879846, "grad_norm": 0.6844510436058044, "learning_rate": 0.00019591835431707918, "loss": 11.6172, "step": 13106 }, { "epoch": 0.27436573725194674, "grad_norm": 0.2214098870754242, "learning_rate": 0.00019591773428026592, "loss": 11.6557, "step": 13107 }, { "epoch": 0.2743866700159089, "grad_norm": 0.35249555110931396, "learning_rate": 0.00019591711419734308, "loss": 11.6721, "step": 13108 }, { "epoch": 0.27440760277987103, "grad_norm": 0.2505090832710266, "learning_rate": 0.00019591649406831097, "loss": 11.6641, "step": 13109 }, { "epoch": 0.27442853554383323, "grad_norm": 0.2372232973575592, "learning_rate": 0.0001959158738931699, "loss": 11.6708, "step": 13110 }, { "epoch": 0.2744494683077954, "grad_norm": 0.3330483138561249, "learning_rate": 0.00019591525367192012, "loss": 11.6809, "step": 13111 }, { "epoch": 0.2744704010717575, "grad_norm": 0.30286121368408203, "learning_rate": 0.00019591463340456195, "loss": 11.6717, "step": 13112 }, { "epoch": 0.27449133383571966, "grad_norm": 0.2795209586620331, "learning_rate": 0.00019591401309109572, "loss": 11.6731, "step": 13113 }, { "epoch": 0.2745122665996818, "grad_norm": 0.21119366586208344, "learning_rate": 0.00019591339273152168, "loss": 11.652, "step": 13114 }, { "epoch": 0.27453319936364395, "grad_norm": 0.22006578743457794, "learning_rate": 0.00019591277232584018, "loss": 11.6882, "step": 13115 }, { "epoch": 0.27455413212760615, "grad_norm": 0.27729228138923645, "learning_rate": 0.00019591215187405147, "loss": 11.6842, "step": 13116 }, { "epoch": 0.2745750648915683, "grad_norm": 0.31867051124572754, "learning_rate": 0.00019591153137615583, "loss": 11.681, "step": 13117 }, { "epoch": 0.27459599765553044, "grad_norm": 0.29431599378585815, "learning_rate": 0.00019591091083215364, "loss": 11.6671, "step": 13118 }, { "epoch": 0.2746169304194926, "grad_norm": 0.26020804047584534, "learning_rate": 0.00019591029024204515, "loss": 11.652, "step": 13119 }, { "epoch": 0.27463786318345473, "grad_norm": 0.2418055534362793, "learning_rate": 0.00019590966960583063, "loss": 11.647, "step": 13120 }, { "epoch": 0.2746587959474169, "grad_norm": 0.2982284426689148, "learning_rate": 0.00019590904892351045, "loss": 11.6768, "step": 13121 }, { "epoch": 0.274679728711379, "grad_norm": 0.24550513923168182, "learning_rate": 0.00019590842819508487, "loss": 11.6745, "step": 13122 }, { "epoch": 0.2747006614753412, "grad_norm": 0.30924561619758606, "learning_rate": 0.00019590780742055416, "loss": 11.6735, "step": 13123 }, { "epoch": 0.27472159423930337, "grad_norm": 0.28868991136550903, "learning_rate": 0.00019590718659991864, "loss": 11.6772, "step": 13124 }, { "epoch": 0.2747425270032655, "grad_norm": 0.26356303691864014, "learning_rate": 0.00019590656573317862, "loss": 11.6539, "step": 13125 }, { "epoch": 0.27476345976722766, "grad_norm": 0.30535218119621277, "learning_rate": 0.00019590594482033443, "loss": 11.6773, "step": 13126 }, { "epoch": 0.2747843925311898, "grad_norm": 1.8738700151443481, "learning_rate": 0.0001959053238613863, "loss": 11.6011, "step": 13127 }, { "epoch": 0.27480532529515195, "grad_norm": 0.2458217591047287, "learning_rate": 0.00019590470285633452, "loss": 11.6602, "step": 13128 }, { "epoch": 0.27482625805911415, "grad_norm": 0.2635565400123596, "learning_rate": 0.00019590408180517947, "loss": 11.6766, "step": 13129 }, { "epoch": 0.2748471908230763, "grad_norm": 0.3403686583042145, "learning_rate": 0.0001959034607079214, "loss": 11.7027, "step": 13130 }, { "epoch": 0.27486812358703844, "grad_norm": 0.28274109959602356, "learning_rate": 0.0001959028395645606, "loss": 11.675, "step": 13131 }, { "epoch": 0.2748890563510006, "grad_norm": 0.28084391355514526, "learning_rate": 0.0001959022183750974, "loss": 11.6649, "step": 13132 }, { "epoch": 0.2749099891149627, "grad_norm": 0.2819921374320984, "learning_rate": 0.0001959015971395321, "loss": 11.6788, "step": 13133 }, { "epoch": 0.27493092187892487, "grad_norm": 0.27680477499961853, "learning_rate": 0.00019590097585786497, "loss": 11.6662, "step": 13134 }, { "epoch": 0.2749518546428871, "grad_norm": 0.2971177101135254, "learning_rate": 0.0001959003545300963, "loss": 11.6851, "step": 13135 }, { "epoch": 0.2749727874068492, "grad_norm": 0.2928469777107239, "learning_rate": 0.0001958997331562264, "loss": 11.6796, "step": 13136 }, { "epoch": 0.27499372017081136, "grad_norm": 0.22297726571559906, "learning_rate": 0.0001958991117362556, "loss": 11.6785, "step": 13137 }, { "epoch": 0.2750146529347735, "grad_norm": 0.2835407257080078, "learning_rate": 0.0001958984902701842, "loss": 11.6794, "step": 13138 }, { "epoch": 0.27503558569873565, "grad_norm": 0.2785780131816864, "learning_rate": 0.0001958978687580124, "loss": 11.6622, "step": 13139 }, { "epoch": 0.2750565184626978, "grad_norm": 0.3733621835708618, "learning_rate": 0.00019589724719974063, "loss": 11.6744, "step": 13140 }, { "epoch": 0.27507745122665994, "grad_norm": 0.27713289856910706, "learning_rate": 0.0001958966255953691, "loss": 11.6697, "step": 13141 }, { "epoch": 0.27509838399062214, "grad_norm": 0.26652413606643677, "learning_rate": 0.00019589600394489818, "loss": 11.6889, "step": 13142 }, { "epoch": 0.2751193167545843, "grad_norm": 0.261109858751297, "learning_rate": 0.00019589538224832812, "loss": 11.6787, "step": 13143 }, { "epoch": 0.27514024951854643, "grad_norm": 0.2667236626148224, "learning_rate": 0.0001958947605056592, "loss": 11.6951, "step": 13144 }, { "epoch": 0.2751611822825086, "grad_norm": 0.33067819476127625, "learning_rate": 0.00019589413871689177, "loss": 11.6835, "step": 13145 }, { "epoch": 0.2751821150464707, "grad_norm": 0.2544555962085724, "learning_rate": 0.00019589351688202607, "loss": 11.6804, "step": 13146 }, { "epoch": 0.27520304781043287, "grad_norm": 0.2067122459411621, "learning_rate": 0.00019589289500106247, "loss": 11.6703, "step": 13147 }, { "epoch": 0.27522398057439507, "grad_norm": 0.29600387811660767, "learning_rate": 0.00019589227307400124, "loss": 11.6585, "step": 13148 }, { "epoch": 0.2752449133383572, "grad_norm": 0.4375346302986145, "learning_rate": 0.00019589165110084266, "loss": 11.6705, "step": 13149 }, { "epoch": 0.27526584610231936, "grad_norm": 0.22476741671562195, "learning_rate": 0.00019589102908158703, "loss": 11.6783, "step": 13150 }, { "epoch": 0.2752867788662815, "grad_norm": 0.22983650863170624, "learning_rate": 0.00019589040701623468, "loss": 11.659, "step": 13151 }, { "epoch": 0.27530771163024365, "grad_norm": 0.26631981134414673, "learning_rate": 0.00019588978490478587, "loss": 11.6844, "step": 13152 }, { "epoch": 0.2753286443942058, "grad_norm": 0.312190979719162, "learning_rate": 0.00019588916274724093, "loss": 11.6777, "step": 13153 }, { "epoch": 0.275349577158168, "grad_norm": 0.27597877383232117, "learning_rate": 0.00019588854054360013, "loss": 11.6726, "step": 13154 }, { "epoch": 0.27537050992213014, "grad_norm": 0.19975320994853973, "learning_rate": 0.0001958879182938638, "loss": 11.6677, "step": 13155 }, { "epoch": 0.2753914426860923, "grad_norm": 0.2677415609359741, "learning_rate": 0.00019588729599803224, "loss": 11.6795, "step": 13156 }, { "epoch": 0.2754123754500544, "grad_norm": 0.2973707914352417, "learning_rate": 0.0001958866736561057, "loss": 11.6783, "step": 13157 }, { "epoch": 0.27543330821401657, "grad_norm": 0.26152095198631287, "learning_rate": 0.00019588605126808457, "loss": 11.6673, "step": 13158 }, { "epoch": 0.2754542409779787, "grad_norm": 0.24690976738929749, "learning_rate": 0.00019588542883396907, "loss": 11.6705, "step": 13159 }, { "epoch": 0.27547517374194086, "grad_norm": 0.258674681186676, "learning_rate": 0.0001958848063537595, "loss": 11.6785, "step": 13160 }, { "epoch": 0.27549610650590306, "grad_norm": 0.284952312707901, "learning_rate": 0.0001958841838274562, "loss": 11.6934, "step": 13161 }, { "epoch": 0.2755170392698652, "grad_norm": 1.5778058767318726, "learning_rate": 0.00019588356125505943, "loss": 11.6851, "step": 13162 }, { "epoch": 0.27553797203382735, "grad_norm": 0.2805441915988922, "learning_rate": 0.00019588293863656954, "loss": 11.6797, "step": 13163 }, { "epoch": 0.2755589047977895, "grad_norm": 0.26837772130966187, "learning_rate": 0.00019588231597198678, "loss": 11.6729, "step": 13164 }, { "epoch": 0.27557983756175164, "grad_norm": 0.28031015396118164, "learning_rate": 0.0001958816932613115, "loss": 11.6757, "step": 13165 }, { "epoch": 0.2756007703257138, "grad_norm": 0.2820230722427368, "learning_rate": 0.00019588107050454395, "loss": 11.6793, "step": 13166 }, { "epoch": 0.275621703089676, "grad_norm": 0.22208940982818604, "learning_rate": 0.00019588044770168443, "loss": 11.6676, "step": 13167 }, { "epoch": 0.27564263585363813, "grad_norm": 0.2763191759586334, "learning_rate": 0.00019587982485273328, "loss": 11.6761, "step": 13168 }, { "epoch": 0.2756635686176003, "grad_norm": 0.2562524080276489, "learning_rate": 0.00019587920195769076, "loss": 11.6776, "step": 13169 }, { "epoch": 0.2756845013815624, "grad_norm": 0.26718437671661377, "learning_rate": 0.0001958785790165572, "loss": 11.6835, "step": 13170 }, { "epoch": 0.27570543414552456, "grad_norm": 0.2809213399887085, "learning_rate": 0.00019587795602933288, "loss": 11.6803, "step": 13171 }, { "epoch": 0.2757263669094867, "grad_norm": 0.25827839970588684, "learning_rate": 0.00019587733299601808, "loss": 11.6849, "step": 13172 }, { "epoch": 0.2757472996734489, "grad_norm": 0.26423153281211853, "learning_rate": 0.00019587670991661317, "loss": 11.6765, "step": 13173 }, { "epoch": 0.27576823243741105, "grad_norm": 0.2410145103931427, "learning_rate": 0.00019587608679111837, "loss": 11.6912, "step": 13174 }, { "epoch": 0.2757891652013732, "grad_norm": 0.25337597727775574, "learning_rate": 0.00019587546361953404, "loss": 11.6684, "step": 13175 }, { "epoch": 0.27581009796533534, "grad_norm": 0.23000216484069824, "learning_rate": 0.00019587484040186045, "loss": 11.6748, "step": 13176 }, { "epoch": 0.2758310307292975, "grad_norm": 0.30005940794944763, "learning_rate": 0.00019587421713809788, "loss": 11.6887, "step": 13177 }, { "epoch": 0.27585196349325963, "grad_norm": 0.26409968733787537, "learning_rate": 0.0001958735938282467, "loss": 11.6799, "step": 13178 }, { "epoch": 0.2758728962572218, "grad_norm": 0.2778889834880829, "learning_rate": 0.0001958729704723071, "loss": 11.6647, "step": 13179 }, { "epoch": 0.275893829021184, "grad_norm": 0.26933395862579346, "learning_rate": 0.00019587234707027948, "loss": 11.6916, "step": 13180 }, { "epoch": 0.2759147617851461, "grad_norm": 0.29295894503593445, "learning_rate": 0.0001958717236221641, "loss": 11.6841, "step": 13181 }, { "epoch": 0.27593569454910827, "grad_norm": 0.3193468749523163, "learning_rate": 0.00019587110012796123, "loss": 11.6783, "step": 13182 }, { "epoch": 0.2759566273130704, "grad_norm": 0.2583998441696167, "learning_rate": 0.00019587047658767124, "loss": 11.6832, "step": 13183 }, { "epoch": 0.27597756007703256, "grad_norm": 0.27419108152389526, "learning_rate": 0.00019586985300129437, "loss": 11.6775, "step": 13184 }, { "epoch": 0.2759984928409947, "grad_norm": 0.3579277992248535, "learning_rate": 0.00019586922936883094, "loss": 11.7051, "step": 13185 }, { "epoch": 0.2760194256049569, "grad_norm": 0.26050618290901184, "learning_rate": 0.00019586860569028124, "loss": 11.6771, "step": 13186 }, { "epoch": 0.27604035836891905, "grad_norm": 0.2976624071598053, "learning_rate": 0.0001958679819656456, "loss": 11.6847, "step": 13187 }, { "epoch": 0.2760612911328812, "grad_norm": 0.2194450944662094, "learning_rate": 0.0001958673581949243, "loss": 11.6612, "step": 13188 }, { "epoch": 0.27608222389684334, "grad_norm": 0.24050858616828918, "learning_rate": 0.00019586673437811764, "loss": 11.676, "step": 13189 }, { "epoch": 0.2761031566608055, "grad_norm": 0.30611535906791687, "learning_rate": 0.00019586611051522593, "loss": 11.6925, "step": 13190 }, { "epoch": 0.2761240894247676, "grad_norm": 0.2294025421142578, "learning_rate": 0.00019586548660624944, "loss": 11.6922, "step": 13191 }, { "epoch": 0.2761450221887298, "grad_norm": 1.3633480072021484, "learning_rate": 0.00019586486265118849, "loss": 11.5992, "step": 13192 }, { "epoch": 0.27616595495269197, "grad_norm": 0.23517504334449768, "learning_rate": 0.00019586423865004338, "loss": 11.6777, "step": 13193 }, { "epoch": 0.2761868877166541, "grad_norm": 0.25620436668395996, "learning_rate": 0.00019586361460281442, "loss": 11.6985, "step": 13194 }, { "epoch": 0.27620782048061626, "grad_norm": 0.2932768166065216, "learning_rate": 0.00019586299050950188, "loss": 11.6886, "step": 13195 }, { "epoch": 0.2762287532445784, "grad_norm": 0.2838175594806671, "learning_rate": 0.0001958623663701061, "loss": 11.6763, "step": 13196 }, { "epoch": 0.27624968600854055, "grad_norm": 0.2574804723262787, "learning_rate": 0.00019586174218462734, "loss": 11.6879, "step": 13197 }, { "epoch": 0.2762706187725027, "grad_norm": 0.2577804923057556, "learning_rate": 0.0001958611179530659, "loss": 11.6584, "step": 13198 }, { "epoch": 0.2762915515364649, "grad_norm": 0.3615660071372986, "learning_rate": 0.00019586049367542217, "loss": 11.6755, "step": 13199 }, { "epoch": 0.27631248430042704, "grad_norm": 0.310565322637558, "learning_rate": 0.00019585986935169634, "loss": 11.6649, "step": 13200 }, { "epoch": 0.2763334170643892, "grad_norm": 0.25671911239624023, "learning_rate": 0.00019585924498188875, "loss": 11.6675, "step": 13201 }, { "epoch": 0.27635434982835133, "grad_norm": 0.2544929087162018, "learning_rate": 0.0001958586205659997, "loss": 11.6896, "step": 13202 }, { "epoch": 0.2763752825923135, "grad_norm": 0.2418873906135559, "learning_rate": 0.0001958579961040295, "loss": 11.6819, "step": 13203 }, { "epoch": 0.2763962153562756, "grad_norm": 0.3185131847858429, "learning_rate": 0.0001958573715959784, "loss": 11.654, "step": 13204 }, { "epoch": 0.2764171481202378, "grad_norm": 0.22634540498256683, "learning_rate": 0.0001958567470418468, "loss": 11.6801, "step": 13205 }, { "epoch": 0.27643808088419997, "grad_norm": 0.2606185972690582, "learning_rate": 0.00019585612244163488, "loss": 11.6635, "step": 13206 }, { "epoch": 0.2764590136481621, "grad_norm": 0.3251055181026459, "learning_rate": 0.00019585549779534302, "loss": 11.6622, "step": 13207 }, { "epoch": 0.27647994641212426, "grad_norm": 0.25944784283638, "learning_rate": 0.0001958548731029715, "loss": 11.6785, "step": 13208 }, { "epoch": 0.2765008791760864, "grad_norm": 0.26280346512794495, "learning_rate": 0.00019585424836452065, "loss": 11.6844, "step": 13209 }, { "epoch": 0.27652181194004855, "grad_norm": 0.27022695541381836, "learning_rate": 0.00019585362357999074, "loss": 11.6724, "step": 13210 }, { "epoch": 0.2765427447040107, "grad_norm": 0.2561187148094177, "learning_rate": 0.00019585299874938205, "loss": 11.6765, "step": 13211 }, { "epoch": 0.2765636774679729, "grad_norm": 0.2348695546388626, "learning_rate": 0.0001958523738726949, "loss": 11.6738, "step": 13212 }, { "epoch": 0.27658461023193504, "grad_norm": 0.25785619020462036, "learning_rate": 0.00019585174894992961, "loss": 11.682, "step": 13213 }, { "epoch": 0.2766055429958972, "grad_norm": 0.2883511483669281, "learning_rate": 0.00019585112398108647, "loss": 11.6792, "step": 13214 }, { "epoch": 0.2766264757598593, "grad_norm": 0.29230019450187683, "learning_rate": 0.00019585049896616577, "loss": 11.6529, "step": 13215 }, { "epoch": 0.27664740852382147, "grad_norm": 0.24857951700687408, "learning_rate": 0.00019584987390516777, "loss": 11.6676, "step": 13216 }, { "epoch": 0.2766683412877836, "grad_norm": 0.2844168543815613, "learning_rate": 0.0001958492487980929, "loss": 11.6831, "step": 13217 }, { "epoch": 0.2766892740517458, "grad_norm": 0.2683213949203491, "learning_rate": 0.00019584862364494128, "loss": 11.6831, "step": 13218 }, { "epoch": 0.27671020681570796, "grad_norm": 0.2554028034210205, "learning_rate": 0.00019584799844571337, "loss": 11.6787, "step": 13219 }, { "epoch": 0.2767311395796701, "grad_norm": 0.2658996284008026, "learning_rate": 0.00019584737320040935, "loss": 11.6715, "step": 13220 }, { "epoch": 0.27675207234363225, "grad_norm": 0.2677006423473358, "learning_rate": 0.00019584674790902964, "loss": 11.6716, "step": 13221 }, { "epoch": 0.2767730051075944, "grad_norm": 0.3006184697151184, "learning_rate": 0.00019584612257157445, "loss": 11.6752, "step": 13222 }, { "epoch": 0.27679393787155654, "grad_norm": 0.28261369466781616, "learning_rate": 0.0001958454971880441, "loss": 11.6987, "step": 13223 }, { "epoch": 0.27681487063551874, "grad_norm": 0.21902509033679962, "learning_rate": 0.0001958448717584389, "loss": 11.6869, "step": 13224 }, { "epoch": 0.2768358033994809, "grad_norm": 0.2882871925830841, "learning_rate": 0.00019584424628275916, "loss": 11.6831, "step": 13225 }, { "epoch": 0.27685673616344303, "grad_norm": 0.25987306237220764, "learning_rate": 0.00019584362076100517, "loss": 11.679, "step": 13226 }, { "epoch": 0.2768776689274052, "grad_norm": 0.3267821669578552, "learning_rate": 0.00019584299519317724, "loss": 11.6736, "step": 13227 }, { "epoch": 0.2768986016913673, "grad_norm": 0.3985065817832947, "learning_rate": 0.00019584236957927566, "loss": 11.679, "step": 13228 }, { "epoch": 0.27691953445532946, "grad_norm": 0.23133279383182526, "learning_rate": 0.0001958417439193007, "loss": 11.6773, "step": 13229 }, { "epoch": 0.2769404672192916, "grad_norm": 0.298342227935791, "learning_rate": 0.00019584111821325273, "loss": 11.6943, "step": 13230 }, { "epoch": 0.2769613999832538, "grad_norm": 0.2968482971191406, "learning_rate": 0.00019584049246113198, "loss": 11.6789, "step": 13231 }, { "epoch": 0.27698233274721595, "grad_norm": 0.27105262875556946, "learning_rate": 0.00019583986666293882, "loss": 11.6826, "step": 13232 }, { "epoch": 0.2770032655111781, "grad_norm": 0.27873051166534424, "learning_rate": 0.00019583924081867352, "loss": 11.6769, "step": 13233 }, { "epoch": 0.27702419827514024, "grad_norm": 0.3274485766887665, "learning_rate": 0.00019583861492833632, "loss": 11.6837, "step": 13234 }, { "epoch": 0.2770451310391024, "grad_norm": 0.28414368629455566, "learning_rate": 0.00019583798899192763, "loss": 11.6725, "step": 13235 }, { "epoch": 0.27706606380306453, "grad_norm": 0.2557401955127716, "learning_rate": 0.00019583736300944767, "loss": 11.6838, "step": 13236 }, { "epoch": 0.27708699656702673, "grad_norm": 0.27752918004989624, "learning_rate": 0.0001958367369808968, "loss": 11.6838, "step": 13237 }, { "epoch": 0.2771079293309889, "grad_norm": 0.30074596405029297, "learning_rate": 0.00019583611090627525, "loss": 11.6744, "step": 13238 }, { "epoch": 0.277128862094951, "grad_norm": 0.357485830783844, "learning_rate": 0.00019583548478558337, "loss": 11.6938, "step": 13239 }, { "epoch": 0.27714979485891317, "grad_norm": 0.2830711603164673, "learning_rate": 0.00019583485861882148, "loss": 11.6697, "step": 13240 }, { "epoch": 0.2771707276228753, "grad_norm": 0.286098837852478, "learning_rate": 0.0001958342324059898, "loss": 11.6802, "step": 13241 }, { "epoch": 0.27719166038683746, "grad_norm": 0.2203827053308487, "learning_rate": 0.00019583360614708873, "loss": 11.6848, "step": 13242 }, { "epoch": 0.27721259315079966, "grad_norm": 0.2640278935432434, "learning_rate": 0.00019583297984211853, "loss": 11.6759, "step": 13243 }, { "epoch": 0.2772335259147618, "grad_norm": 0.312331885099411, "learning_rate": 0.00019583235349107948, "loss": 11.6747, "step": 13244 }, { "epoch": 0.27725445867872395, "grad_norm": 0.27866101264953613, "learning_rate": 0.00019583172709397188, "loss": 11.6575, "step": 13245 }, { "epoch": 0.2772753914426861, "grad_norm": 0.2644047141075134, "learning_rate": 0.0001958311006507961, "loss": 11.6726, "step": 13246 }, { "epoch": 0.27729632420664824, "grad_norm": 0.2549133598804474, "learning_rate": 0.00019583047416155233, "loss": 11.6862, "step": 13247 }, { "epoch": 0.2773172569706104, "grad_norm": 1.0950849056243896, "learning_rate": 0.00019582984762624096, "loss": 11.6097, "step": 13248 }, { "epoch": 0.2773381897345725, "grad_norm": 0.28290602564811707, "learning_rate": 0.00019582922104486225, "loss": 11.6866, "step": 13249 }, { "epoch": 0.2773591224985347, "grad_norm": 0.25788816809654236, "learning_rate": 0.00019582859441741652, "loss": 11.6814, "step": 13250 }, { "epoch": 0.27738005526249687, "grad_norm": 0.291405588388443, "learning_rate": 0.00019582796774390406, "loss": 11.6707, "step": 13251 }, { "epoch": 0.277400988026459, "grad_norm": 0.3099876046180725, "learning_rate": 0.00019582734102432521, "loss": 11.675, "step": 13252 }, { "epoch": 0.27742192079042116, "grad_norm": 0.20106208324432373, "learning_rate": 0.0001958267142586802, "loss": 11.6734, "step": 13253 }, { "epoch": 0.2774428535543833, "grad_norm": 0.294494092464447, "learning_rate": 0.00019582608744696936, "loss": 11.6904, "step": 13254 }, { "epoch": 0.27746378631834545, "grad_norm": 0.25890055298805237, "learning_rate": 0.000195825460589193, "loss": 11.687, "step": 13255 }, { "epoch": 0.27748471908230765, "grad_norm": 0.32366469502449036, "learning_rate": 0.00019582483368535145, "loss": 11.6645, "step": 13256 }, { "epoch": 0.2775056518462698, "grad_norm": 0.26489725708961487, "learning_rate": 0.00019582420673544497, "loss": 11.6878, "step": 13257 }, { "epoch": 0.27752658461023194, "grad_norm": 0.2430899739265442, "learning_rate": 0.00019582357973947387, "loss": 11.6782, "step": 13258 }, { "epoch": 0.2775475173741941, "grad_norm": 0.36155790090560913, "learning_rate": 0.00019582295269743845, "loss": 11.6851, "step": 13259 }, { "epoch": 0.27756845013815623, "grad_norm": 0.2940126061439514, "learning_rate": 0.000195822325609339, "loss": 11.6741, "step": 13260 }, { "epoch": 0.2775893829021184, "grad_norm": 0.25856003165245056, "learning_rate": 0.00019582169847517589, "loss": 11.6909, "step": 13261 }, { "epoch": 0.2776103156660806, "grad_norm": 0.26622870564460754, "learning_rate": 0.00019582107129494932, "loss": 11.695, "step": 13262 }, { "epoch": 0.2776312484300427, "grad_norm": 0.30117350816726685, "learning_rate": 0.00019582044406865967, "loss": 11.6839, "step": 13263 }, { "epoch": 0.27765218119400487, "grad_norm": 0.22462517023086548, "learning_rate": 0.00019581981679630718, "loss": 11.6663, "step": 13264 }, { "epoch": 0.277673113957967, "grad_norm": 0.25019070506095886, "learning_rate": 0.00019581918947789224, "loss": 11.6612, "step": 13265 }, { "epoch": 0.27769404672192916, "grad_norm": 0.3386751413345337, "learning_rate": 0.00019581856211341506, "loss": 11.6758, "step": 13266 }, { "epoch": 0.2777149794858913, "grad_norm": 0.25292855501174927, "learning_rate": 0.00019581793470287598, "loss": 11.6879, "step": 13267 }, { "epoch": 0.27773591224985344, "grad_norm": 0.35808131098747253, "learning_rate": 0.00019581730724627528, "loss": 11.6574, "step": 13268 }, { "epoch": 0.27775684501381565, "grad_norm": 0.3255235552787781, "learning_rate": 0.0001958166797436133, "loss": 11.68, "step": 13269 }, { "epoch": 0.2777777777777778, "grad_norm": 0.22991159558296204, "learning_rate": 0.00019581605219489033, "loss": 11.671, "step": 13270 }, { "epoch": 0.27779871054173993, "grad_norm": 0.2719188630580902, "learning_rate": 0.00019581542460010666, "loss": 11.6756, "step": 13271 }, { "epoch": 0.2778196433057021, "grad_norm": 0.27867695689201355, "learning_rate": 0.00019581479695926257, "loss": 11.6764, "step": 13272 }, { "epoch": 0.2778405760696642, "grad_norm": 0.2381899356842041, "learning_rate": 0.0001958141692723584, "loss": 11.6687, "step": 13273 }, { "epoch": 0.27786150883362637, "grad_norm": 0.253567099571228, "learning_rate": 0.00019581354153939444, "loss": 11.6827, "step": 13274 }, { "epoch": 0.27788244159758857, "grad_norm": 0.3873275816440582, "learning_rate": 0.00019581291376037099, "loss": 11.6833, "step": 13275 }, { "epoch": 0.2779033743615507, "grad_norm": 0.4006282687187195, "learning_rate": 0.00019581228593528835, "loss": 11.6852, "step": 13276 }, { "epoch": 0.27792430712551286, "grad_norm": 0.3687395751476288, "learning_rate": 0.00019581165806414682, "loss": 11.6721, "step": 13277 }, { "epoch": 0.277945239889475, "grad_norm": 0.30283358693122864, "learning_rate": 0.00019581103014694673, "loss": 11.6796, "step": 13278 }, { "epoch": 0.27796617265343715, "grad_norm": 0.3628978133201599, "learning_rate": 0.00019581040218368835, "loss": 11.6623, "step": 13279 }, { "epoch": 0.2779871054173993, "grad_norm": 0.2654741704463959, "learning_rate": 0.00019580977417437198, "loss": 11.6598, "step": 13280 }, { "epoch": 0.2780080381813615, "grad_norm": 0.3109518587589264, "learning_rate": 0.00019580914611899792, "loss": 11.6795, "step": 13281 }, { "epoch": 0.27802897094532364, "grad_norm": 0.2643962502479553, "learning_rate": 0.0001958085180175665, "loss": 11.6633, "step": 13282 }, { "epoch": 0.2780499037092858, "grad_norm": 0.2391868680715561, "learning_rate": 0.000195807889870078, "loss": 11.6882, "step": 13283 }, { "epoch": 0.27807083647324793, "grad_norm": 0.3313085436820984, "learning_rate": 0.00019580726167653273, "loss": 11.6761, "step": 13284 }, { "epoch": 0.2780917692372101, "grad_norm": 0.25473102927207947, "learning_rate": 0.00019580663343693103, "loss": 11.6832, "step": 13285 }, { "epoch": 0.2781127020011722, "grad_norm": 0.24795706570148468, "learning_rate": 0.0001958060051512731, "loss": 11.676, "step": 13286 }, { "epoch": 0.27813363476513436, "grad_norm": 0.2848627269268036, "learning_rate": 0.00019580537681955932, "loss": 11.6901, "step": 13287 }, { "epoch": 0.27815456752909656, "grad_norm": 0.21878984570503235, "learning_rate": 0.00019580474844179, "loss": 11.6664, "step": 13288 }, { "epoch": 0.2781755002930587, "grad_norm": 0.33308571577072144, "learning_rate": 0.00019580412001796537, "loss": 11.6706, "step": 13289 }, { "epoch": 0.27819643305702085, "grad_norm": 0.2797553837299347, "learning_rate": 0.00019580349154808583, "loss": 11.6639, "step": 13290 }, { "epoch": 0.278217365820983, "grad_norm": 0.28619590401649475, "learning_rate": 0.0001958028630321516, "loss": 11.6771, "step": 13291 }, { "epoch": 0.27823829858494514, "grad_norm": 0.31411394476890564, "learning_rate": 0.000195802234470163, "loss": 11.6668, "step": 13292 }, { "epoch": 0.2782592313489073, "grad_norm": 0.338935911655426, "learning_rate": 0.00019580160586212038, "loss": 11.6917, "step": 13293 }, { "epoch": 0.2782801641128695, "grad_norm": 0.22415943443775177, "learning_rate": 0.000195800977208024, "loss": 11.671, "step": 13294 }, { "epoch": 0.27830109687683163, "grad_norm": 0.23536738753318787, "learning_rate": 0.00019580034850787416, "loss": 11.6925, "step": 13295 }, { "epoch": 0.2783220296407938, "grad_norm": 0.2827579081058502, "learning_rate": 0.00019579971976167116, "loss": 11.684, "step": 13296 }, { "epoch": 0.2783429624047559, "grad_norm": 0.24723784625530243, "learning_rate": 0.00019579909096941535, "loss": 11.6679, "step": 13297 }, { "epoch": 0.27836389516871807, "grad_norm": 0.25113505125045776, "learning_rate": 0.000195798462131107, "loss": 11.679, "step": 13298 }, { "epoch": 0.2783848279326802, "grad_norm": 0.22442954778671265, "learning_rate": 0.0001957978332467464, "loss": 11.665, "step": 13299 }, { "epoch": 0.27840576069664236, "grad_norm": 0.2979099154472351, "learning_rate": 0.00019579720431633384, "loss": 11.6811, "step": 13300 }, { "epoch": 0.27842669346060456, "grad_norm": 0.27003213763237, "learning_rate": 0.00019579657533986967, "loss": 11.695, "step": 13301 }, { "epoch": 0.2784476262245667, "grad_norm": 0.2556716799736023, "learning_rate": 0.00019579594631735414, "loss": 11.6785, "step": 13302 }, { "epoch": 0.27846855898852885, "grad_norm": 0.35670801997184753, "learning_rate": 0.0001957953172487876, "loss": 11.6883, "step": 13303 }, { "epoch": 0.278489491752491, "grad_norm": 0.27641382813453674, "learning_rate": 0.0001957946881341703, "loss": 11.6801, "step": 13304 }, { "epoch": 0.27851042451645314, "grad_norm": 0.23698338866233826, "learning_rate": 0.0001957940589735026, "loss": 11.6699, "step": 13305 }, { "epoch": 0.2785313572804153, "grad_norm": 0.2700293958187103, "learning_rate": 0.00019579342976678478, "loss": 11.6515, "step": 13306 }, { "epoch": 0.2785522900443775, "grad_norm": 0.3400268256664276, "learning_rate": 0.00019579280051401713, "loss": 11.6495, "step": 13307 }, { "epoch": 0.2785732228083396, "grad_norm": 0.33868545293807983, "learning_rate": 0.00019579217121519995, "loss": 11.6881, "step": 13308 }, { "epoch": 0.27859415557230177, "grad_norm": 0.30030742287635803, "learning_rate": 0.0001957915418703336, "loss": 11.679, "step": 13309 }, { "epoch": 0.2786150883362639, "grad_norm": 0.31258657574653625, "learning_rate": 0.0001957909124794183, "loss": 11.6839, "step": 13310 }, { "epoch": 0.27863602110022606, "grad_norm": 0.26804906129837036, "learning_rate": 0.00019579028304245438, "loss": 11.669, "step": 13311 }, { "epoch": 0.2786569538641882, "grad_norm": 0.8791642785072327, "learning_rate": 0.00019578965355944217, "loss": 11.6608, "step": 13312 }, { "epoch": 0.2786778866281504, "grad_norm": 0.2388913929462433, "learning_rate": 0.00019578902403038194, "loss": 11.6858, "step": 13313 }, { "epoch": 0.27869881939211255, "grad_norm": 0.2727178931236267, "learning_rate": 0.000195788394455274, "loss": 11.6826, "step": 13314 }, { "epoch": 0.2787197521560747, "grad_norm": 0.2873784899711609, "learning_rate": 0.0001957877648341187, "loss": 11.6823, "step": 13315 }, { "epoch": 0.27874068492003684, "grad_norm": 0.2814461290836334, "learning_rate": 0.00019578713516691623, "loss": 11.6677, "step": 13316 }, { "epoch": 0.278761617683999, "grad_norm": 0.28378820419311523, "learning_rate": 0.00019578650545366702, "loss": 11.6638, "step": 13317 }, { "epoch": 0.27878255044796113, "grad_norm": 0.28930750489234924, "learning_rate": 0.0001957858756943713, "loss": 11.698, "step": 13318 }, { "epoch": 0.2788034832119233, "grad_norm": 0.2503683865070343, "learning_rate": 0.00019578524588902943, "loss": 11.6843, "step": 13319 }, { "epoch": 0.2788244159758855, "grad_norm": 0.2678022086620331, "learning_rate": 0.00019578461603764162, "loss": 11.6651, "step": 13320 }, { "epoch": 0.2788453487398476, "grad_norm": 0.2683449685573578, "learning_rate": 0.00019578398614020825, "loss": 11.6733, "step": 13321 }, { "epoch": 0.27886628150380977, "grad_norm": 0.23861974477767944, "learning_rate": 0.00019578335619672957, "loss": 11.68, "step": 13322 }, { "epoch": 0.2788872142677719, "grad_norm": 0.2907162308692932, "learning_rate": 0.00019578272620720595, "loss": 11.6855, "step": 13323 }, { "epoch": 0.27890814703173405, "grad_norm": 0.23946839570999146, "learning_rate": 0.00019578209617163763, "loss": 11.6776, "step": 13324 }, { "epoch": 0.2789290797956962, "grad_norm": 0.316497802734375, "learning_rate": 0.00019578146609002493, "loss": 11.6811, "step": 13325 }, { "epoch": 0.2789500125596584, "grad_norm": 0.23759737610816956, "learning_rate": 0.0001957808359623682, "loss": 11.6749, "step": 13326 }, { "epoch": 0.27897094532362054, "grad_norm": 0.2363598495721817, "learning_rate": 0.00019578020578866768, "loss": 11.6831, "step": 13327 }, { "epoch": 0.2789918780875827, "grad_norm": 0.29764285683631897, "learning_rate": 0.00019577957556892367, "loss": 11.6852, "step": 13328 }, { "epoch": 0.27901281085154483, "grad_norm": 0.259998619556427, "learning_rate": 0.00019577894530313654, "loss": 11.6761, "step": 13329 }, { "epoch": 0.279033743615507, "grad_norm": 0.2823834717273712, "learning_rate": 0.00019577831499130652, "loss": 11.6847, "step": 13330 }, { "epoch": 0.2790546763794691, "grad_norm": 0.2918465733528137, "learning_rate": 0.00019577768463343396, "loss": 11.6861, "step": 13331 }, { "epoch": 0.2790756091434313, "grad_norm": 0.2667373716831207, "learning_rate": 0.00019577705422951916, "loss": 11.6671, "step": 13332 }, { "epoch": 0.27909654190739347, "grad_norm": 0.317680686712265, "learning_rate": 0.00019577642377956238, "loss": 11.6878, "step": 13333 }, { "epoch": 0.2791174746713556, "grad_norm": 0.2606889009475708, "learning_rate": 0.00019577579328356398, "loss": 11.6708, "step": 13334 }, { "epoch": 0.27913840743531776, "grad_norm": 0.24494336545467377, "learning_rate": 0.0001957751627415242, "loss": 11.6589, "step": 13335 }, { "epoch": 0.2791593401992799, "grad_norm": 0.277118057012558, "learning_rate": 0.00019577453215344342, "loss": 11.6586, "step": 13336 }, { "epoch": 0.27918027296324205, "grad_norm": 0.299270898103714, "learning_rate": 0.0001957739015193219, "loss": 11.6802, "step": 13337 }, { "epoch": 0.2792012057272042, "grad_norm": 0.30717921257019043, "learning_rate": 0.00019577327083915993, "loss": 11.6757, "step": 13338 }, { "epoch": 0.2792221384911664, "grad_norm": 0.24593999981880188, "learning_rate": 0.00019577264011295782, "loss": 11.6925, "step": 13339 }, { "epoch": 0.27924307125512854, "grad_norm": 0.35315558314323425, "learning_rate": 0.0001957720093407159, "loss": 11.6708, "step": 13340 }, { "epoch": 0.2792640040190907, "grad_norm": 0.25684335827827454, "learning_rate": 0.00019577137852243445, "loss": 11.6696, "step": 13341 }, { "epoch": 0.27928493678305283, "grad_norm": 0.29594364762306213, "learning_rate": 0.00019577074765811378, "loss": 11.6804, "step": 13342 }, { "epoch": 0.279305869547015, "grad_norm": 0.27282655239105225, "learning_rate": 0.0001957701167477542, "loss": 11.6961, "step": 13343 }, { "epoch": 0.2793268023109771, "grad_norm": 0.3559470474720001, "learning_rate": 0.000195769485791356, "loss": 11.6699, "step": 13344 }, { "epoch": 0.2793477350749393, "grad_norm": 0.21090620756149292, "learning_rate": 0.0001957688547889195, "loss": 11.6874, "step": 13345 }, { "epoch": 0.27936866783890146, "grad_norm": 0.2698119580745697, "learning_rate": 0.00019576822374044495, "loss": 11.6805, "step": 13346 }, { "epoch": 0.2793896006028636, "grad_norm": 0.27044495940208435, "learning_rate": 0.00019576759264593277, "loss": 11.6664, "step": 13347 }, { "epoch": 0.27941053336682575, "grad_norm": 0.2130880206823349, "learning_rate": 0.00019576696150538312, "loss": 11.663, "step": 13348 }, { "epoch": 0.2794314661307879, "grad_norm": 0.2513481080532074, "learning_rate": 0.0001957663303187964, "loss": 11.6687, "step": 13349 }, { "epoch": 0.27945239889475004, "grad_norm": 0.2682645916938782, "learning_rate": 0.00019576569908617288, "loss": 11.6705, "step": 13350 }, { "epoch": 0.27947333165871224, "grad_norm": 0.26082223653793335, "learning_rate": 0.00019576506780751287, "loss": 11.6911, "step": 13351 }, { "epoch": 0.2794942644226744, "grad_norm": 0.28384286165237427, "learning_rate": 0.00019576443648281668, "loss": 11.6766, "step": 13352 }, { "epoch": 0.27951519718663653, "grad_norm": 0.3460752069950104, "learning_rate": 0.00019576380511208462, "loss": 11.6739, "step": 13353 }, { "epoch": 0.2795361299505987, "grad_norm": 0.3217736482620239, "learning_rate": 0.00019576317369531695, "loss": 11.6925, "step": 13354 }, { "epoch": 0.2795570627145608, "grad_norm": 0.26180917024612427, "learning_rate": 0.000195762542232514, "loss": 11.6774, "step": 13355 }, { "epoch": 0.27957799547852297, "grad_norm": 0.21674692630767822, "learning_rate": 0.0001957619107236761, "loss": 11.6675, "step": 13356 }, { "epoch": 0.2795989282424851, "grad_norm": 0.24347880482673645, "learning_rate": 0.00019576127916880352, "loss": 11.6677, "step": 13357 }, { "epoch": 0.2796198610064473, "grad_norm": 0.2464347630739212, "learning_rate": 0.00019576064756789659, "loss": 11.6737, "step": 13358 }, { "epoch": 0.27964079377040946, "grad_norm": 0.31337425112724304, "learning_rate": 0.00019576001592095556, "loss": 11.6771, "step": 13359 }, { "epoch": 0.2796617265343716, "grad_norm": 0.27551254630088806, "learning_rate": 0.0001957593842279808, "loss": 11.6815, "step": 13360 }, { "epoch": 0.27968265929833375, "grad_norm": 0.3363554775714874, "learning_rate": 0.0001957587524889726, "loss": 11.6678, "step": 13361 }, { "epoch": 0.2797035920622959, "grad_norm": 0.2113012820482254, "learning_rate": 0.00019575812070393123, "loss": 11.6759, "step": 13362 }, { "epoch": 0.27972452482625804, "grad_norm": 0.2732125222682953, "learning_rate": 0.000195757488872857, "loss": 11.6798, "step": 13363 }, { "epoch": 0.27974545759022024, "grad_norm": 0.23788495361804962, "learning_rate": 0.00019575685699575024, "loss": 11.6685, "step": 13364 }, { "epoch": 0.2797663903541824, "grad_norm": 0.29640790820121765, "learning_rate": 0.0001957562250726112, "loss": 11.6869, "step": 13365 }, { "epoch": 0.2797873231181445, "grad_norm": 0.29222744703292847, "learning_rate": 0.0001957555931034403, "loss": 11.6777, "step": 13366 }, { "epoch": 0.27980825588210667, "grad_norm": 0.37353047728538513, "learning_rate": 0.00019575496108823772, "loss": 11.6845, "step": 13367 }, { "epoch": 0.2798291886460688, "grad_norm": 0.27860307693481445, "learning_rate": 0.0001957543290270038, "loss": 11.6793, "step": 13368 }, { "epoch": 0.27985012141003096, "grad_norm": 0.34715551137924194, "learning_rate": 0.00019575369691973889, "loss": 11.6695, "step": 13369 }, { "epoch": 0.27987105417399316, "grad_norm": 0.24433980882167816, "learning_rate": 0.00019575306476644327, "loss": 11.6617, "step": 13370 }, { "epoch": 0.2798919869379553, "grad_norm": 0.32106056809425354, "learning_rate": 0.00019575243256711718, "loss": 11.6733, "step": 13371 }, { "epoch": 0.27991291970191745, "grad_norm": 0.29588744044303894, "learning_rate": 0.00019575180032176101, "loss": 11.6882, "step": 13372 }, { "epoch": 0.2799338524658796, "grad_norm": 0.26958930492401123, "learning_rate": 0.00019575116803037502, "loss": 11.6595, "step": 13373 }, { "epoch": 0.27995478522984174, "grad_norm": 0.24657732248306274, "learning_rate": 0.00019575053569295954, "loss": 11.6608, "step": 13374 }, { "epoch": 0.2799757179938039, "grad_norm": 0.24072645604610443, "learning_rate": 0.00019574990330951483, "loss": 11.6734, "step": 13375 }, { "epoch": 0.27999665075776603, "grad_norm": 0.30007681250572205, "learning_rate": 0.00019574927088004124, "loss": 11.6827, "step": 13376 }, { "epoch": 0.28001758352172823, "grad_norm": 0.34361761808395386, "learning_rate": 0.00019574863840453904, "loss": 11.6784, "step": 13377 }, { "epoch": 0.2800385162856904, "grad_norm": 0.2855750322341919, "learning_rate": 0.00019574800588300858, "loss": 11.6709, "step": 13378 }, { "epoch": 0.2800594490496525, "grad_norm": 0.2874051332473755, "learning_rate": 0.00019574737331545013, "loss": 11.6815, "step": 13379 }, { "epoch": 0.28008038181361467, "grad_norm": 0.2698240280151367, "learning_rate": 0.000195746740701864, "loss": 11.6799, "step": 13380 }, { "epoch": 0.2801013145775768, "grad_norm": 0.31095680594444275, "learning_rate": 0.00019574610804225047, "loss": 11.679, "step": 13381 }, { "epoch": 0.28012224734153895, "grad_norm": 0.24439264833927155, "learning_rate": 0.00019574547533660988, "loss": 11.6696, "step": 13382 }, { "epoch": 0.28014318010550116, "grad_norm": 0.2132313847541809, "learning_rate": 0.00019574484258494255, "loss": 11.6667, "step": 13383 }, { "epoch": 0.2801641128694633, "grad_norm": 0.2510887086391449, "learning_rate": 0.0001957442097872487, "loss": 11.6699, "step": 13384 }, { "epoch": 0.28018504563342544, "grad_norm": 0.3092038631439209, "learning_rate": 0.00019574357694352872, "loss": 11.6811, "step": 13385 }, { "epoch": 0.2802059783973876, "grad_norm": 0.22460675239562988, "learning_rate": 0.00019574294405378287, "loss": 11.6744, "step": 13386 }, { "epoch": 0.28022691116134973, "grad_norm": 0.22919438779354095, "learning_rate": 0.00019574231111801148, "loss": 11.6805, "step": 13387 }, { "epoch": 0.2802478439253119, "grad_norm": 0.33878880739212036, "learning_rate": 0.00019574167813621486, "loss": 11.6709, "step": 13388 }, { "epoch": 0.2802687766892741, "grad_norm": 0.25163134932518005, "learning_rate": 0.0001957410451083933, "loss": 11.6908, "step": 13389 }, { "epoch": 0.2802897094532362, "grad_norm": 0.2381904274225235, "learning_rate": 0.00019574041203454705, "loss": 11.6758, "step": 13390 }, { "epoch": 0.28031064221719837, "grad_norm": 0.28754156827926636, "learning_rate": 0.0001957397789146765, "loss": 11.6875, "step": 13391 }, { "epoch": 0.2803315749811605, "grad_norm": 0.27772918343544006, "learning_rate": 0.00019573914574878192, "loss": 11.6629, "step": 13392 }, { "epoch": 0.28035250774512266, "grad_norm": 0.2910713851451874, "learning_rate": 0.0001957385125368636, "loss": 11.6831, "step": 13393 }, { "epoch": 0.2803734405090848, "grad_norm": 0.276248574256897, "learning_rate": 0.00019573787927892186, "loss": 11.651, "step": 13394 }, { "epoch": 0.28039437327304695, "grad_norm": 0.2602790296077728, "learning_rate": 0.00019573724597495698, "loss": 11.6821, "step": 13395 }, { "epoch": 0.28041530603700915, "grad_norm": 0.23529016971588135, "learning_rate": 0.00019573661262496935, "loss": 11.6603, "step": 13396 }, { "epoch": 0.2804362388009713, "grad_norm": 0.25881099700927734, "learning_rate": 0.00019573597922895917, "loss": 11.6609, "step": 13397 }, { "epoch": 0.28045717156493344, "grad_norm": 0.2495485246181488, "learning_rate": 0.00019573534578692678, "loss": 11.6514, "step": 13398 }, { "epoch": 0.2804781043288956, "grad_norm": 0.26076754927635193, "learning_rate": 0.00019573471229887253, "loss": 11.6635, "step": 13399 }, { "epoch": 0.28049903709285773, "grad_norm": 0.29174768924713135, "learning_rate": 0.00019573407876479664, "loss": 11.6763, "step": 13400 }, { "epoch": 0.2805199698568199, "grad_norm": 0.24714113771915436, "learning_rate": 0.00019573344518469948, "loss": 11.6683, "step": 13401 }, { "epoch": 0.2805409026207821, "grad_norm": 0.21582964062690735, "learning_rate": 0.00019573281155858135, "loss": 11.6744, "step": 13402 }, { "epoch": 0.2805618353847442, "grad_norm": 0.2526676058769226, "learning_rate": 0.0001957321778864425, "loss": 11.6855, "step": 13403 }, { "epoch": 0.28058276814870636, "grad_norm": 0.2634945809841156, "learning_rate": 0.0001957315441682833, "loss": 11.6719, "step": 13404 }, { "epoch": 0.2806037009126685, "grad_norm": 0.2525668144226074, "learning_rate": 0.00019573091040410402, "loss": 11.6747, "step": 13405 }, { "epoch": 0.28062463367663065, "grad_norm": 0.25996777415275574, "learning_rate": 0.000195730276593905, "loss": 11.6775, "step": 13406 }, { "epoch": 0.2806455664405928, "grad_norm": 0.31500354409217834, "learning_rate": 0.00019572964273768652, "loss": 11.6809, "step": 13407 }, { "epoch": 0.28066649920455494, "grad_norm": 0.2847280502319336, "learning_rate": 0.00019572900883544882, "loss": 11.684, "step": 13408 }, { "epoch": 0.28068743196851714, "grad_norm": 0.28416314721107483, "learning_rate": 0.0001957283748871923, "loss": 11.6682, "step": 13409 }, { "epoch": 0.2807083647324793, "grad_norm": 0.30152255296707153, "learning_rate": 0.00019572774089291725, "loss": 11.6726, "step": 13410 }, { "epoch": 0.28072929749644143, "grad_norm": 0.2515126168727875, "learning_rate": 0.00019572710685262395, "loss": 11.6773, "step": 13411 }, { "epoch": 0.2807502302604036, "grad_norm": 0.25047940015792847, "learning_rate": 0.00019572647276631271, "loss": 11.6804, "step": 13412 }, { "epoch": 0.2807711630243657, "grad_norm": 0.26512420177459717, "learning_rate": 0.00019572583863398382, "loss": 11.6774, "step": 13413 }, { "epoch": 0.28079209578832787, "grad_norm": 0.305794358253479, "learning_rate": 0.0001957252044556376, "loss": 11.6751, "step": 13414 }, { "epoch": 0.28081302855229007, "grad_norm": 0.30939897894859314, "learning_rate": 0.00019572457023127437, "loss": 11.663, "step": 13415 }, { "epoch": 0.2808339613162522, "grad_norm": 0.26244720816612244, "learning_rate": 0.00019572393596089442, "loss": 11.6674, "step": 13416 }, { "epoch": 0.28085489408021436, "grad_norm": 0.27932992577552795, "learning_rate": 0.00019572330164449806, "loss": 11.6582, "step": 13417 }, { "epoch": 0.2808758268441765, "grad_norm": 0.3429452180862427, "learning_rate": 0.0001957226672820856, "loss": 11.7013, "step": 13418 }, { "epoch": 0.28089675960813865, "grad_norm": 0.2390311360359192, "learning_rate": 0.0001957220328736573, "loss": 11.6787, "step": 13419 }, { "epoch": 0.2809176923721008, "grad_norm": 0.23235204815864563, "learning_rate": 0.00019572139841921352, "loss": 11.6882, "step": 13420 }, { "epoch": 0.280938625136063, "grad_norm": 0.3390941619873047, "learning_rate": 0.00019572076391875456, "loss": 11.6824, "step": 13421 }, { "epoch": 0.28095955790002514, "grad_norm": 0.29280364513397217, "learning_rate": 0.00019572012937228071, "loss": 11.6812, "step": 13422 }, { "epoch": 0.2809804906639873, "grad_norm": 0.2696821391582489, "learning_rate": 0.00019571949477979224, "loss": 11.678, "step": 13423 }, { "epoch": 0.2810014234279494, "grad_norm": 0.28423577547073364, "learning_rate": 0.00019571886014128952, "loss": 11.6784, "step": 13424 }, { "epoch": 0.28102235619191157, "grad_norm": 0.27506422996520996, "learning_rate": 0.0001957182254567728, "loss": 11.6679, "step": 13425 }, { "epoch": 0.2810432889558737, "grad_norm": 0.23632311820983887, "learning_rate": 0.00019571759072624243, "loss": 11.6873, "step": 13426 }, { "epoch": 0.28106422171983586, "grad_norm": 0.30758094787597656, "learning_rate": 0.0001957169559496987, "loss": 11.6697, "step": 13427 }, { "epoch": 0.28108515448379806, "grad_norm": 0.22878515720367432, "learning_rate": 0.0001957163211271419, "loss": 11.6622, "step": 13428 }, { "epoch": 0.2811060872477602, "grad_norm": 0.3374781012535095, "learning_rate": 0.00019571568625857238, "loss": 11.6804, "step": 13429 }, { "epoch": 0.28112702001172235, "grad_norm": 0.2597868740558624, "learning_rate": 0.00019571505134399034, "loss": 11.6727, "step": 13430 }, { "epoch": 0.2811479527756845, "grad_norm": 0.25876131653785706, "learning_rate": 0.00019571441638339623, "loss": 11.6947, "step": 13431 }, { "epoch": 0.28116888553964664, "grad_norm": 0.25718095898628235, "learning_rate": 0.00019571378137679023, "loss": 11.6883, "step": 13432 }, { "epoch": 0.2811898183036088, "grad_norm": 0.2544748783111572, "learning_rate": 0.00019571314632417272, "loss": 11.6805, "step": 13433 }, { "epoch": 0.281210751067571, "grad_norm": 0.23205050826072693, "learning_rate": 0.000195712511225544, "loss": 11.6748, "step": 13434 }, { "epoch": 0.28123168383153313, "grad_norm": 0.24406738579273224, "learning_rate": 0.00019571187608090432, "loss": 11.6495, "step": 13435 }, { "epoch": 0.2812526165954953, "grad_norm": 0.39301788806915283, "learning_rate": 0.00019571124089025408, "loss": 11.6795, "step": 13436 }, { "epoch": 0.2812735493594574, "grad_norm": 0.2686977982521057, "learning_rate": 0.00019571060565359347, "loss": 11.675, "step": 13437 }, { "epoch": 0.28129448212341956, "grad_norm": 0.24493010342121124, "learning_rate": 0.00019570997037092288, "loss": 11.6785, "step": 13438 }, { "epoch": 0.2813154148873817, "grad_norm": 0.3133366107940674, "learning_rate": 0.00019570933504224259, "loss": 11.6644, "step": 13439 }, { "epoch": 0.2813363476513439, "grad_norm": 0.29335105419158936, "learning_rate": 0.0001957086996675529, "loss": 11.6933, "step": 13440 }, { "epoch": 0.28135728041530605, "grad_norm": 0.25384190678596497, "learning_rate": 0.0001957080642468541, "loss": 11.6794, "step": 13441 }, { "epoch": 0.2813782131792682, "grad_norm": 0.27603328227996826, "learning_rate": 0.00019570742878014655, "loss": 11.6742, "step": 13442 }, { "epoch": 0.28139914594323034, "grad_norm": 0.2554360628128052, "learning_rate": 0.00019570679326743048, "loss": 11.6656, "step": 13443 }, { "epoch": 0.2814200787071925, "grad_norm": 0.33261269330978394, "learning_rate": 0.00019570615770870628, "loss": 11.6694, "step": 13444 }, { "epoch": 0.28144101147115463, "grad_norm": 0.29421499371528625, "learning_rate": 0.0001957055221039742, "loss": 11.6825, "step": 13445 }, { "epoch": 0.2814619442351168, "grad_norm": 0.2463374137878418, "learning_rate": 0.00019570488645323454, "loss": 11.6886, "step": 13446 }, { "epoch": 0.281482876999079, "grad_norm": 0.2681390345096588, "learning_rate": 0.00019570425075648765, "loss": 11.6701, "step": 13447 }, { "epoch": 0.2815038097630411, "grad_norm": 0.31789517402648926, "learning_rate": 0.00019570361501373377, "loss": 11.6662, "step": 13448 }, { "epoch": 0.28152474252700327, "grad_norm": 0.32405969500541687, "learning_rate": 0.0001957029792249733, "loss": 11.681, "step": 13449 }, { "epoch": 0.2815456752909654, "grad_norm": 0.25763726234436035, "learning_rate": 0.00019570234339020645, "loss": 11.6867, "step": 13450 }, { "epoch": 0.28156660805492756, "grad_norm": 0.27221059799194336, "learning_rate": 0.00019570170750943358, "loss": 11.6761, "step": 13451 }, { "epoch": 0.2815875408188897, "grad_norm": 0.27096930146217346, "learning_rate": 0.00019570107158265495, "loss": 11.671, "step": 13452 }, { "epoch": 0.2816084735828519, "grad_norm": 0.3292958736419678, "learning_rate": 0.00019570043560987091, "loss": 11.6632, "step": 13453 }, { "epoch": 0.28162940634681405, "grad_norm": 0.3098187744617462, "learning_rate": 0.00019569979959108177, "loss": 11.6879, "step": 13454 }, { "epoch": 0.2816503391107762, "grad_norm": 0.22595858573913574, "learning_rate": 0.00019569916352628783, "loss": 11.6698, "step": 13455 }, { "epoch": 0.28167127187473834, "grad_norm": 0.2513502240180969, "learning_rate": 0.00019569852741548936, "loss": 11.6691, "step": 13456 }, { "epoch": 0.2816922046387005, "grad_norm": 0.8678156733512878, "learning_rate": 0.0001956978912586867, "loss": 11.6504, "step": 13457 }, { "epoch": 0.28171313740266263, "grad_norm": 0.29959145188331604, "learning_rate": 0.00019569725505588014, "loss": 11.6746, "step": 13458 }, { "epoch": 0.28173407016662483, "grad_norm": 0.26543763279914856, "learning_rate": 0.00019569661880707, "loss": 11.6713, "step": 13459 }, { "epoch": 0.281755002930587, "grad_norm": 0.2682836651802063, "learning_rate": 0.00019569598251225657, "loss": 11.6692, "step": 13460 }, { "epoch": 0.2817759356945491, "grad_norm": 0.2607152760028839, "learning_rate": 0.00019569534617144016, "loss": 11.6612, "step": 13461 }, { "epoch": 0.28179686845851126, "grad_norm": 0.29925742745399475, "learning_rate": 0.00019569470978462108, "loss": 11.6697, "step": 13462 }, { "epoch": 0.2818178012224734, "grad_norm": 0.3365982174873352, "learning_rate": 0.00019569407335179965, "loss": 11.7029, "step": 13463 }, { "epoch": 0.28183873398643555, "grad_norm": 0.23388591408729553, "learning_rate": 0.00019569343687297615, "loss": 11.6684, "step": 13464 }, { "epoch": 0.2818596667503977, "grad_norm": 0.2767772972583771, "learning_rate": 0.0001956928003481509, "loss": 11.6779, "step": 13465 }, { "epoch": 0.2818805995143599, "grad_norm": 0.31530874967575073, "learning_rate": 0.0001956921637773242, "loss": 11.6888, "step": 13466 }, { "epoch": 0.28190153227832204, "grad_norm": 0.2374824732542038, "learning_rate": 0.00019569152716049636, "loss": 11.6831, "step": 13467 }, { "epoch": 0.2819224650422842, "grad_norm": 0.2528652548789978, "learning_rate": 0.0001956908904976677, "loss": 11.6817, "step": 13468 }, { "epoch": 0.28194339780624633, "grad_norm": 0.22555305063724518, "learning_rate": 0.0001956902537888385, "loss": 11.6664, "step": 13469 }, { "epoch": 0.2819643305702085, "grad_norm": 0.2535194158554077, "learning_rate": 0.00019568961703400904, "loss": 11.6757, "step": 13470 }, { "epoch": 0.2819852633341706, "grad_norm": 0.43627405166625977, "learning_rate": 0.00019568898023317972, "loss": 11.699, "step": 13471 }, { "epoch": 0.2820061960981328, "grad_norm": 0.27631980180740356, "learning_rate": 0.00019568834338635078, "loss": 11.6777, "step": 13472 }, { "epoch": 0.28202712886209497, "grad_norm": 0.26149997115135193, "learning_rate": 0.00019568770649352254, "loss": 11.6712, "step": 13473 }, { "epoch": 0.2820480616260571, "grad_norm": 0.3146067261695862, "learning_rate": 0.00019568706955469525, "loss": 11.6686, "step": 13474 }, { "epoch": 0.28206899439001926, "grad_norm": 0.25288715958595276, "learning_rate": 0.0001956864325698693, "loss": 11.678, "step": 13475 }, { "epoch": 0.2820899271539814, "grad_norm": 0.2173919528722763, "learning_rate": 0.00019568579553904498, "loss": 11.6668, "step": 13476 }, { "epoch": 0.28211085991794355, "grad_norm": 0.2368086725473404, "learning_rate": 0.00019568515846222259, "loss": 11.6818, "step": 13477 }, { "epoch": 0.28213179268190575, "grad_norm": 0.29150497913360596, "learning_rate": 0.00019568452133940237, "loss": 11.6788, "step": 13478 }, { "epoch": 0.2821527254458679, "grad_norm": 0.3233468532562256, "learning_rate": 0.00019568388417058472, "loss": 11.6811, "step": 13479 }, { "epoch": 0.28217365820983004, "grad_norm": 0.22786928713321686, "learning_rate": 0.00019568324695576992, "loss": 11.6742, "step": 13480 }, { "epoch": 0.2821945909737922, "grad_norm": 0.35464969277381897, "learning_rate": 0.00019568260969495826, "loss": 11.677, "step": 13481 }, { "epoch": 0.2822155237377543, "grad_norm": 0.2674502432346344, "learning_rate": 0.00019568197238815005, "loss": 11.6519, "step": 13482 }, { "epoch": 0.28223645650171647, "grad_norm": 0.2714318335056305, "learning_rate": 0.0001956813350353456, "loss": 11.6869, "step": 13483 }, { "epoch": 0.2822573892656786, "grad_norm": 0.2618294954299927, "learning_rate": 0.0001956806976365452, "loss": 11.6799, "step": 13484 }, { "epoch": 0.2822783220296408, "grad_norm": 0.2372775673866272, "learning_rate": 0.00019568006019174918, "loss": 11.6895, "step": 13485 }, { "epoch": 0.28229925479360296, "grad_norm": 0.24794414639472961, "learning_rate": 0.00019567942270095782, "loss": 11.6949, "step": 13486 }, { "epoch": 0.2823201875575651, "grad_norm": 0.29588624835014343, "learning_rate": 0.00019567878516417146, "loss": 11.6736, "step": 13487 }, { "epoch": 0.28234112032152725, "grad_norm": 0.268004834651947, "learning_rate": 0.00019567814758139038, "loss": 11.6717, "step": 13488 }, { "epoch": 0.2823620530854894, "grad_norm": 0.31193140149116516, "learning_rate": 0.00019567750995261492, "loss": 11.69, "step": 13489 }, { "epoch": 0.28238298584945154, "grad_norm": 0.43027034401893616, "learning_rate": 0.00019567687227784535, "loss": 11.6791, "step": 13490 }, { "epoch": 0.28240391861341374, "grad_norm": 0.38888075947761536, "learning_rate": 0.000195676234557082, "loss": 11.6808, "step": 13491 }, { "epoch": 0.2824248513773759, "grad_norm": 0.2291058897972107, "learning_rate": 0.00019567559679032517, "loss": 11.6559, "step": 13492 }, { "epoch": 0.28244578414133803, "grad_norm": 0.3035990297794342, "learning_rate": 0.00019567495897757512, "loss": 11.6823, "step": 13493 }, { "epoch": 0.2824667169053002, "grad_norm": 0.2938354015350342, "learning_rate": 0.00019567432111883225, "loss": 11.6674, "step": 13494 }, { "epoch": 0.2824876496692623, "grad_norm": 0.27090615034103394, "learning_rate": 0.0001956736832140968, "loss": 11.6608, "step": 13495 }, { "epoch": 0.28250858243322446, "grad_norm": 0.2730434536933899, "learning_rate": 0.00019567304526336908, "loss": 11.6657, "step": 13496 }, { "epoch": 0.2825295151971866, "grad_norm": 0.30780547857284546, "learning_rate": 0.00019567240726664945, "loss": 11.6582, "step": 13497 }, { "epoch": 0.2825504479611488, "grad_norm": 0.29556936025619507, "learning_rate": 0.00019567176922393812, "loss": 11.6879, "step": 13498 }, { "epoch": 0.28257138072511095, "grad_norm": 0.30613911151885986, "learning_rate": 0.0001956711311352355, "loss": 11.6758, "step": 13499 }, { "epoch": 0.2825923134890731, "grad_norm": 0.24686434864997864, "learning_rate": 0.00019567049300054184, "loss": 11.6749, "step": 13500 }, { "epoch": 0.28261324625303524, "grad_norm": 0.24973313510417938, "learning_rate": 0.00019566985481985744, "loss": 11.6768, "step": 13501 }, { "epoch": 0.2826341790169974, "grad_norm": 0.22387804090976715, "learning_rate": 0.0001956692165931826, "loss": 11.6834, "step": 13502 }, { "epoch": 0.28265511178095953, "grad_norm": 0.26160213351249695, "learning_rate": 0.0001956685783205177, "loss": 11.684, "step": 13503 }, { "epoch": 0.28267604454492173, "grad_norm": 0.24896404147148132, "learning_rate": 0.000195667940001863, "loss": 11.6733, "step": 13504 }, { "epoch": 0.2826969773088839, "grad_norm": 0.24022457003593445, "learning_rate": 0.00019566730163721877, "loss": 11.6877, "step": 13505 }, { "epoch": 0.282717910072846, "grad_norm": 0.2854010760784149, "learning_rate": 0.00019566666322658538, "loss": 11.673, "step": 13506 }, { "epoch": 0.28273884283680817, "grad_norm": 0.37680864334106445, "learning_rate": 0.00019566602476996306, "loss": 11.6729, "step": 13507 }, { "epoch": 0.2827597756007703, "grad_norm": 0.2713305354118347, "learning_rate": 0.00019566538626735223, "loss": 11.6859, "step": 13508 }, { "epoch": 0.28278070836473246, "grad_norm": 0.34300482273101807, "learning_rate": 0.00019566474771875306, "loss": 11.69, "step": 13509 }, { "epoch": 0.28280164112869466, "grad_norm": 0.2262076884508133, "learning_rate": 0.000195664109124166, "loss": 11.6566, "step": 13510 }, { "epoch": 0.2828225738926568, "grad_norm": 0.2842716574668884, "learning_rate": 0.00019566347048359124, "loss": 11.6868, "step": 13511 }, { "epoch": 0.28284350665661895, "grad_norm": 0.2585478127002716, "learning_rate": 0.00019566283179702913, "loss": 11.6692, "step": 13512 }, { "epoch": 0.2828644394205811, "grad_norm": 0.23514260351657867, "learning_rate": 0.00019566219306448002, "loss": 11.6815, "step": 13513 }, { "epoch": 0.28288537218454324, "grad_norm": 0.3047807812690735, "learning_rate": 0.00019566155428594412, "loss": 11.672, "step": 13514 }, { "epoch": 0.2829063049485054, "grad_norm": 0.3132757842540741, "learning_rate": 0.00019566091546142185, "loss": 11.6735, "step": 13515 }, { "epoch": 0.2829272377124675, "grad_norm": 0.2735165059566498, "learning_rate": 0.00019566027659091343, "loss": 11.6756, "step": 13516 }, { "epoch": 0.28294817047642973, "grad_norm": 0.31834444403648376, "learning_rate": 0.00019565963767441923, "loss": 11.6652, "step": 13517 }, { "epoch": 0.2829691032403919, "grad_norm": 0.2842603325843811, "learning_rate": 0.00019565899871193949, "loss": 11.6864, "step": 13518 }, { "epoch": 0.282990036004354, "grad_norm": 0.25307971239089966, "learning_rate": 0.00019565835970347456, "loss": 11.6662, "step": 13519 }, { "epoch": 0.28301096876831616, "grad_norm": 0.26039448380470276, "learning_rate": 0.00019565772064902474, "loss": 11.6856, "step": 13520 }, { "epoch": 0.2830319015322783, "grad_norm": 0.23788873851299286, "learning_rate": 0.00019565708154859032, "loss": 11.6763, "step": 13521 }, { "epoch": 0.28305283429624045, "grad_norm": 0.2560143768787384, "learning_rate": 0.00019565644240217164, "loss": 11.653, "step": 13522 }, { "epoch": 0.28307376706020265, "grad_norm": 0.27086010575294495, "learning_rate": 0.00019565580320976902, "loss": 11.6847, "step": 13523 }, { "epoch": 0.2830946998241648, "grad_norm": 0.32578402757644653, "learning_rate": 0.0001956551639713827, "loss": 11.691, "step": 13524 }, { "epoch": 0.28311563258812694, "grad_norm": 0.2961321771144867, "learning_rate": 0.00019565452468701304, "loss": 11.6892, "step": 13525 }, { "epoch": 0.2831365653520891, "grad_norm": 0.2775235176086426, "learning_rate": 0.0001956538853566603, "loss": 11.6756, "step": 13526 }, { "epoch": 0.28315749811605123, "grad_norm": 0.23627781867980957, "learning_rate": 0.00019565324598032487, "loss": 11.6847, "step": 13527 }, { "epoch": 0.2831784308800134, "grad_norm": 0.336010217666626, "learning_rate": 0.000195652606558007, "loss": 11.6722, "step": 13528 }, { "epoch": 0.2831993636439756, "grad_norm": 0.21940718591213226, "learning_rate": 0.00019565196708970695, "loss": 11.6709, "step": 13529 }, { "epoch": 0.2832202964079377, "grad_norm": 0.2413448989391327, "learning_rate": 0.00019565132757542517, "loss": 11.6812, "step": 13530 }, { "epoch": 0.28324122917189987, "grad_norm": 0.34036001563072205, "learning_rate": 0.0001956506880151618, "loss": 11.6539, "step": 13531 }, { "epoch": 0.283262161935862, "grad_norm": 0.2909550964832306, "learning_rate": 0.0001956500484089173, "loss": 11.6866, "step": 13532 }, { "epoch": 0.28328309469982416, "grad_norm": 0.33058419823646545, "learning_rate": 0.00019564940875669185, "loss": 11.6823, "step": 13533 }, { "epoch": 0.2833040274637863, "grad_norm": 0.30856937170028687, "learning_rate": 0.00019564876905848585, "loss": 11.6911, "step": 13534 }, { "epoch": 0.28332496022774845, "grad_norm": 0.29527977108955383, "learning_rate": 0.00019564812931429954, "loss": 11.6783, "step": 13535 }, { "epoch": 0.28334589299171065, "grad_norm": 0.3001857101917267, "learning_rate": 0.00019564748952413327, "loss": 11.6747, "step": 13536 }, { "epoch": 0.2833668257556728, "grad_norm": 0.30688878893852234, "learning_rate": 0.00019564684968798732, "loss": 11.6746, "step": 13537 }, { "epoch": 0.28338775851963494, "grad_norm": 0.2454376369714737, "learning_rate": 0.00019564620980586203, "loss": 11.6683, "step": 13538 }, { "epoch": 0.2834086912835971, "grad_norm": 0.2593797445297241, "learning_rate": 0.00019564556987775768, "loss": 11.6718, "step": 13539 }, { "epoch": 0.2834296240475592, "grad_norm": 0.27667301893234253, "learning_rate": 0.0001956449299036746, "loss": 11.6697, "step": 13540 }, { "epoch": 0.28345055681152137, "grad_norm": 0.22404077649116516, "learning_rate": 0.00019564428988361307, "loss": 11.6726, "step": 13541 }, { "epoch": 0.28347148957548357, "grad_norm": 0.27583184838294983, "learning_rate": 0.0001956436498175734, "loss": 11.6681, "step": 13542 }, { "epoch": 0.2834924223394457, "grad_norm": 0.253697007894516, "learning_rate": 0.00019564300970555595, "loss": 11.6893, "step": 13543 }, { "epoch": 0.28351335510340786, "grad_norm": 0.2934669554233551, "learning_rate": 0.00019564236954756098, "loss": 11.6729, "step": 13544 }, { "epoch": 0.28353428786737, "grad_norm": 0.2489372342824936, "learning_rate": 0.00019564172934358878, "loss": 11.6754, "step": 13545 }, { "epoch": 0.28355522063133215, "grad_norm": 0.2528148293495178, "learning_rate": 0.00019564108909363973, "loss": 11.6875, "step": 13546 }, { "epoch": 0.2835761533952943, "grad_norm": 0.31103524565696716, "learning_rate": 0.00019564044879771407, "loss": 11.6816, "step": 13547 }, { "epoch": 0.2835970861592565, "grad_norm": 0.2571241557598114, "learning_rate": 0.00019563980845581212, "loss": 11.6746, "step": 13548 }, { "epoch": 0.28361801892321864, "grad_norm": 0.28181323409080505, "learning_rate": 0.0001956391680679342, "loss": 11.6709, "step": 13549 }, { "epoch": 0.2836389516871808, "grad_norm": 0.3011006712913513, "learning_rate": 0.0001956385276340806, "loss": 11.678, "step": 13550 }, { "epoch": 0.28365988445114293, "grad_norm": 0.24060465395450592, "learning_rate": 0.00019563788715425167, "loss": 11.683, "step": 13551 }, { "epoch": 0.2836808172151051, "grad_norm": 0.3012237548828125, "learning_rate": 0.00019563724662844767, "loss": 11.6625, "step": 13552 }, { "epoch": 0.2837017499790672, "grad_norm": 0.3100990056991577, "learning_rate": 0.00019563660605666894, "loss": 11.6822, "step": 13553 }, { "epoch": 0.28372268274302936, "grad_norm": 0.2696651518344879, "learning_rate": 0.0001956359654389158, "loss": 11.6795, "step": 13554 }, { "epoch": 0.28374361550699156, "grad_norm": 0.27161407470703125, "learning_rate": 0.00019563532477518848, "loss": 11.6766, "step": 13555 }, { "epoch": 0.2837645482709537, "grad_norm": 0.27964743971824646, "learning_rate": 0.00019563468406548736, "loss": 11.6812, "step": 13556 }, { "epoch": 0.28378548103491585, "grad_norm": 0.2658638656139374, "learning_rate": 0.00019563404330981277, "loss": 11.6861, "step": 13557 }, { "epoch": 0.283806413798878, "grad_norm": 0.24270384013652802, "learning_rate": 0.00019563340250816494, "loss": 11.6689, "step": 13558 }, { "epoch": 0.28382734656284014, "grad_norm": 0.3219495415687561, "learning_rate": 0.00019563276166054423, "loss": 11.6662, "step": 13559 }, { "epoch": 0.2838482793268023, "grad_norm": 0.27812573313713074, "learning_rate": 0.00019563212076695094, "loss": 11.6855, "step": 13560 }, { "epoch": 0.2838692120907645, "grad_norm": 0.23998883366584778, "learning_rate": 0.00019563147982738536, "loss": 11.679, "step": 13561 }, { "epoch": 0.28389014485472663, "grad_norm": 0.262134313583374, "learning_rate": 0.00019563083884184784, "loss": 11.6628, "step": 13562 }, { "epoch": 0.2839110776186888, "grad_norm": 0.27666735649108887, "learning_rate": 0.00019563019781033863, "loss": 11.668, "step": 13563 }, { "epoch": 0.2839320103826509, "grad_norm": 0.279249370098114, "learning_rate": 0.00019562955673285806, "loss": 11.6841, "step": 13564 }, { "epoch": 0.28395294314661307, "grad_norm": 0.29601722955703735, "learning_rate": 0.00019562891560940648, "loss": 11.6651, "step": 13565 }, { "epoch": 0.2839738759105752, "grad_norm": 0.27736470103263855, "learning_rate": 0.00019562827443998413, "loss": 11.6792, "step": 13566 }, { "epoch": 0.2839948086745374, "grad_norm": 0.27159976959228516, "learning_rate": 0.0001956276332245914, "loss": 11.6665, "step": 13567 }, { "epoch": 0.28401574143849956, "grad_norm": 0.2981821298599243, "learning_rate": 0.00019562699196322848, "loss": 11.6613, "step": 13568 }, { "epoch": 0.2840366742024617, "grad_norm": 0.321270614862442, "learning_rate": 0.00019562635065589577, "loss": 11.687, "step": 13569 }, { "epoch": 0.28405760696642385, "grad_norm": 0.27761614322662354, "learning_rate": 0.00019562570930259357, "loss": 11.6677, "step": 13570 }, { "epoch": 0.284078539730386, "grad_norm": 0.2949153780937195, "learning_rate": 0.0001956250679033222, "loss": 11.6772, "step": 13571 }, { "epoch": 0.28409947249434814, "grad_norm": 0.29407843947410583, "learning_rate": 0.00019562442645808194, "loss": 11.6712, "step": 13572 }, { "epoch": 0.2841204052583103, "grad_norm": 0.26578372716903687, "learning_rate": 0.00019562378496687307, "loss": 11.6655, "step": 13573 }, { "epoch": 0.2841413380222725, "grad_norm": 0.3370874524116516, "learning_rate": 0.00019562314342969593, "loss": 11.6946, "step": 13574 }, { "epoch": 0.2841622707862346, "grad_norm": 0.21802084147930145, "learning_rate": 0.00019562250184655087, "loss": 11.6835, "step": 13575 }, { "epoch": 0.2841832035501968, "grad_norm": 0.2724030017852783, "learning_rate": 0.00019562186021743812, "loss": 11.6754, "step": 13576 }, { "epoch": 0.2842041363141589, "grad_norm": 0.24751079082489014, "learning_rate": 0.00019562121854235805, "loss": 11.6591, "step": 13577 }, { "epoch": 0.28422506907812106, "grad_norm": 0.3635934889316559, "learning_rate": 0.00019562057682131093, "loss": 11.6726, "step": 13578 }, { "epoch": 0.2842460018420832, "grad_norm": 0.28544381260871887, "learning_rate": 0.00019561993505429705, "loss": 11.6714, "step": 13579 }, { "epoch": 0.2842669346060454, "grad_norm": 0.2418881207704544, "learning_rate": 0.0001956192932413168, "loss": 11.679, "step": 13580 }, { "epoch": 0.28428786737000755, "grad_norm": 0.28984636068344116, "learning_rate": 0.00019561865138237043, "loss": 11.6809, "step": 13581 }, { "epoch": 0.2843088001339697, "grad_norm": 0.32296329736709595, "learning_rate": 0.00019561800947745823, "loss": 11.6858, "step": 13582 }, { "epoch": 0.28432973289793184, "grad_norm": 0.2711890935897827, "learning_rate": 0.00019561736752658057, "loss": 11.658, "step": 13583 }, { "epoch": 0.284350665661894, "grad_norm": 0.28728288412094116, "learning_rate": 0.0001956167255297377, "loss": 11.663, "step": 13584 }, { "epoch": 0.28437159842585613, "grad_norm": 0.28049907088279724, "learning_rate": 0.00019561608348692998, "loss": 11.6761, "step": 13585 }, { "epoch": 0.2843925311898183, "grad_norm": 0.29723429679870605, "learning_rate": 0.0001956154413981577, "loss": 11.6765, "step": 13586 }, { "epoch": 0.2844134639537805, "grad_norm": 0.2293023020029068, "learning_rate": 0.0001956147992634211, "loss": 11.6579, "step": 13587 }, { "epoch": 0.2844343967177426, "grad_norm": 0.26619279384613037, "learning_rate": 0.0001956141570827206, "loss": 11.6858, "step": 13588 }, { "epoch": 0.28445532948170477, "grad_norm": 0.28715822100639343, "learning_rate": 0.00019561351485605647, "loss": 11.6903, "step": 13589 }, { "epoch": 0.2844762622456669, "grad_norm": 0.23908169567584991, "learning_rate": 0.00019561287258342897, "loss": 11.6686, "step": 13590 }, { "epoch": 0.28449719500962906, "grad_norm": 0.26198241114616394, "learning_rate": 0.00019561223026483843, "loss": 11.6673, "step": 13591 }, { "epoch": 0.2845181277735912, "grad_norm": 0.2837235629558563, "learning_rate": 0.00019561158790028525, "loss": 11.6885, "step": 13592 }, { "epoch": 0.2845390605375534, "grad_norm": 0.30534860491752625, "learning_rate": 0.00019561094548976963, "loss": 11.6943, "step": 13593 }, { "epoch": 0.28455999330151555, "grad_norm": 0.24811844527721405, "learning_rate": 0.00019561030303329185, "loss": 11.6602, "step": 13594 }, { "epoch": 0.2845809260654777, "grad_norm": 0.24331209063529968, "learning_rate": 0.00019560966053085234, "loss": 11.6932, "step": 13595 }, { "epoch": 0.28460185882943984, "grad_norm": 0.2903982102870941, "learning_rate": 0.00019560901798245137, "loss": 11.6632, "step": 13596 }, { "epoch": 0.284622791593402, "grad_norm": 0.2596234977245331, "learning_rate": 0.0001956083753880892, "loss": 11.6795, "step": 13597 }, { "epoch": 0.2846437243573641, "grad_norm": 0.2598724663257599, "learning_rate": 0.00019560773274776615, "loss": 11.6846, "step": 13598 }, { "epoch": 0.2846646571213263, "grad_norm": 0.2288816273212433, "learning_rate": 0.0001956070900614826, "loss": 11.6856, "step": 13599 }, { "epoch": 0.28468558988528847, "grad_norm": 0.256971150636673, "learning_rate": 0.00019560644732923875, "loss": 11.6805, "step": 13600 }, { "epoch": 0.2847065226492506, "grad_norm": 0.2825683653354645, "learning_rate": 0.000195605804551035, "loss": 11.6857, "step": 13601 }, { "epoch": 0.28472745541321276, "grad_norm": 0.2582632899284363, "learning_rate": 0.0001956051617268716, "loss": 11.6869, "step": 13602 }, { "epoch": 0.2847483881771749, "grad_norm": 0.2322959154844284, "learning_rate": 0.0001956045188567489, "loss": 11.677, "step": 13603 }, { "epoch": 0.28476932094113705, "grad_norm": 0.34655019640922546, "learning_rate": 0.00019560387594066717, "loss": 11.7038, "step": 13604 }, { "epoch": 0.2847902537050992, "grad_norm": 0.29500728845596313, "learning_rate": 0.00019560323297862676, "loss": 11.6892, "step": 13605 }, { "epoch": 0.2848111864690614, "grad_norm": 0.25864750146865845, "learning_rate": 0.00019560258997062798, "loss": 11.6758, "step": 13606 }, { "epoch": 0.28483211923302354, "grad_norm": 0.2382339984178543, "learning_rate": 0.00019560194691667108, "loss": 11.6791, "step": 13607 }, { "epoch": 0.2848530519969857, "grad_norm": 0.34721484780311584, "learning_rate": 0.00019560130381675642, "loss": 11.6815, "step": 13608 }, { "epoch": 0.28487398476094783, "grad_norm": 0.26563358306884766, "learning_rate": 0.0001956006606708843, "loss": 11.6801, "step": 13609 }, { "epoch": 0.28489491752491, "grad_norm": 0.26568686962127686, "learning_rate": 0.00019560001747905503, "loss": 11.6766, "step": 13610 }, { "epoch": 0.2849158502888721, "grad_norm": 0.23430368304252625, "learning_rate": 0.0001955993742412689, "loss": 11.6527, "step": 13611 }, { "epoch": 0.2849367830528343, "grad_norm": 0.34983840584754944, "learning_rate": 0.00019559873095752628, "loss": 11.6957, "step": 13612 }, { "epoch": 0.28495771581679646, "grad_norm": 0.2574385702610016, "learning_rate": 0.0001955980876278274, "loss": 11.6697, "step": 13613 }, { "epoch": 0.2849786485807586, "grad_norm": 0.2705009877681732, "learning_rate": 0.00019559744425217258, "loss": 11.6708, "step": 13614 }, { "epoch": 0.28499958134472075, "grad_norm": 0.2801392376422882, "learning_rate": 0.00019559680083056219, "loss": 11.6778, "step": 13615 }, { "epoch": 0.2850205141086829, "grad_norm": 0.30493196845054626, "learning_rate": 0.0001955961573629965, "loss": 11.6859, "step": 13616 }, { "epoch": 0.28504144687264504, "grad_norm": 0.3085530996322632, "learning_rate": 0.00019559551384947581, "loss": 11.6852, "step": 13617 }, { "epoch": 0.28506237963660724, "grad_norm": 0.2833620309829712, "learning_rate": 0.00019559487029000044, "loss": 11.6825, "step": 13618 }, { "epoch": 0.2850833124005694, "grad_norm": 0.22718477249145508, "learning_rate": 0.00019559422668457072, "loss": 11.6609, "step": 13619 }, { "epoch": 0.28510424516453153, "grad_norm": 0.24631890654563904, "learning_rate": 0.00019559358303318692, "loss": 11.676, "step": 13620 }, { "epoch": 0.2851251779284937, "grad_norm": 0.31342101097106934, "learning_rate": 0.00019559293933584936, "loss": 11.6563, "step": 13621 }, { "epoch": 0.2851461106924558, "grad_norm": 0.26737117767333984, "learning_rate": 0.00019559229559255837, "loss": 11.6686, "step": 13622 }, { "epoch": 0.28516704345641797, "grad_norm": 0.24187225103378296, "learning_rate": 0.00019559165180331424, "loss": 11.6725, "step": 13623 }, { "epoch": 0.2851879762203801, "grad_norm": 0.27135801315307617, "learning_rate": 0.0001955910079681173, "loss": 11.6658, "step": 13624 }, { "epoch": 0.2852089089843423, "grad_norm": 0.346095472574234, "learning_rate": 0.00019559036408696784, "loss": 11.6959, "step": 13625 }, { "epoch": 0.28522984174830446, "grad_norm": 0.2796814441680908, "learning_rate": 0.0001955897201598662, "loss": 11.6818, "step": 13626 }, { "epoch": 0.2852507745122666, "grad_norm": 0.2708483040332794, "learning_rate": 0.00019558907618681264, "loss": 11.6725, "step": 13627 }, { "epoch": 0.28527170727622875, "grad_norm": 0.2615165710449219, "learning_rate": 0.00019558843216780752, "loss": 11.6643, "step": 13628 }, { "epoch": 0.2852926400401909, "grad_norm": 0.29198554158210754, "learning_rate": 0.00019558778810285107, "loss": 11.6756, "step": 13629 }, { "epoch": 0.28531357280415304, "grad_norm": 0.29435598850250244, "learning_rate": 0.00019558714399194372, "loss": 11.6552, "step": 13630 }, { "epoch": 0.28533450556811524, "grad_norm": 0.2705862820148468, "learning_rate": 0.00019558649983508568, "loss": 11.685, "step": 13631 }, { "epoch": 0.2853554383320774, "grad_norm": 0.2838214337825775, "learning_rate": 0.0001955858556322773, "loss": 11.6631, "step": 13632 }, { "epoch": 0.2853763710960395, "grad_norm": 0.32441020011901855, "learning_rate": 0.00019558521138351886, "loss": 11.6876, "step": 13633 }, { "epoch": 0.28539730386000167, "grad_norm": 0.22230377793312073, "learning_rate": 0.00019558456708881073, "loss": 11.6619, "step": 13634 }, { "epoch": 0.2854182366239638, "grad_norm": 0.29889997839927673, "learning_rate": 0.00019558392274815316, "loss": 11.7008, "step": 13635 }, { "epoch": 0.28543916938792596, "grad_norm": 0.2319507747888565, "learning_rate": 0.00019558327836154647, "loss": 11.6828, "step": 13636 }, { "epoch": 0.28546010215188816, "grad_norm": 0.21633437275886536, "learning_rate": 0.000195582633928991, "loss": 11.6773, "step": 13637 }, { "epoch": 0.2854810349158503, "grad_norm": 0.19193671643733978, "learning_rate": 0.00019558198945048708, "loss": 11.6646, "step": 13638 }, { "epoch": 0.28550196767981245, "grad_norm": 0.24906735122203827, "learning_rate": 0.00019558134492603496, "loss": 11.6819, "step": 13639 }, { "epoch": 0.2855229004437746, "grad_norm": 0.2063652127981186, "learning_rate": 0.00019558070035563494, "loss": 11.6788, "step": 13640 }, { "epoch": 0.28554383320773674, "grad_norm": 0.2366613894701004, "learning_rate": 0.00019558005573928737, "loss": 11.6888, "step": 13641 }, { "epoch": 0.2855647659716989, "grad_norm": 0.2415686398744583, "learning_rate": 0.00019557941107699257, "loss": 11.6757, "step": 13642 }, { "epoch": 0.28558569873566103, "grad_norm": 0.29343387484550476, "learning_rate": 0.0001955787663687508, "loss": 11.6628, "step": 13643 }, { "epoch": 0.28560663149962323, "grad_norm": 0.3047623336315155, "learning_rate": 0.00019557812161456246, "loss": 11.6804, "step": 13644 }, { "epoch": 0.2856275642635854, "grad_norm": 0.2511507570743561, "learning_rate": 0.00019557747681442776, "loss": 11.6627, "step": 13645 }, { "epoch": 0.2856484970275475, "grad_norm": 0.26274678111076355, "learning_rate": 0.00019557683196834706, "loss": 11.6778, "step": 13646 }, { "epoch": 0.28566942979150967, "grad_norm": 0.2545621991157532, "learning_rate": 0.00019557618707632065, "loss": 11.6702, "step": 13647 }, { "epoch": 0.2856903625554718, "grad_norm": 0.2484942078590393, "learning_rate": 0.00019557554213834889, "loss": 11.6805, "step": 13648 }, { "epoch": 0.28571129531943396, "grad_norm": 0.35522565245628357, "learning_rate": 0.000195574897154432, "loss": 11.6885, "step": 13649 }, { "epoch": 0.28573222808339616, "grad_norm": 0.239463210105896, "learning_rate": 0.0001955742521245704, "loss": 11.6701, "step": 13650 }, { "epoch": 0.2857531608473583, "grad_norm": 0.2922644019126892, "learning_rate": 0.00019557360704876428, "loss": 11.6862, "step": 13651 }, { "epoch": 0.28577409361132045, "grad_norm": 0.2260754257440567, "learning_rate": 0.00019557296192701404, "loss": 11.6729, "step": 13652 }, { "epoch": 0.2857950263752826, "grad_norm": 0.22306977212429047, "learning_rate": 0.00019557231675931998, "loss": 11.688, "step": 13653 }, { "epoch": 0.28581595913924474, "grad_norm": 0.2559083104133606, "learning_rate": 0.00019557167154568233, "loss": 11.6457, "step": 13654 }, { "epoch": 0.2858368919032069, "grad_norm": 0.28279927372932434, "learning_rate": 0.00019557102628610154, "loss": 11.6787, "step": 13655 }, { "epoch": 0.2858578246671691, "grad_norm": 0.2803499102592468, "learning_rate": 0.00019557038098057781, "loss": 11.6766, "step": 13656 }, { "epoch": 0.2858787574311312, "grad_norm": 0.220816969871521, "learning_rate": 0.00019556973562911148, "loss": 11.6893, "step": 13657 }, { "epoch": 0.28589969019509337, "grad_norm": 0.2686333656311035, "learning_rate": 0.00019556909023170284, "loss": 11.6805, "step": 13658 }, { "epoch": 0.2859206229590555, "grad_norm": 0.2552483081817627, "learning_rate": 0.00019556844478835225, "loss": 11.6818, "step": 13659 }, { "epoch": 0.28594155572301766, "grad_norm": 0.28132009506225586, "learning_rate": 0.00019556779929906002, "loss": 11.6712, "step": 13660 }, { "epoch": 0.2859624884869798, "grad_norm": 0.2890552282333374, "learning_rate": 0.00019556715376382639, "loss": 11.6768, "step": 13661 }, { "epoch": 0.28598342125094195, "grad_norm": 0.2600996196269989, "learning_rate": 0.0001955665081826517, "loss": 11.663, "step": 13662 }, { "epoch": 0.28600435401490415, "grad_norm": 0.22958461940288544, "learning_rate": 0.00019556586255553633, "loss": 11.6767, "step": 13663 }, { "epoch": 0.2860252867788663, "grad_norm": 0.28836992383003235, "learning_rate": 0.0001955652168824805, "loss": 11.6566, "step": 13664 }, { "epoch": 0.28604621954282844, "grad_norm": 0.24444107711315155, "learning_rate": 0.00019556457116348458, "loss": 11.6686, "step": 13665 }, { "epoch": 0.2860671523067906, "grad_norm": 0.30195340514183044, "learning_rate": 0.00019556392539854883, "loss": 11.6737, "step": 13666 }, { "epoch": 0.28608808507075273, "grad_norm": 0.29335638880729675, "learning_rate": 0.00019556327958767357, "loss": 11.6874, "step": 13667 }, { "epoch": 0.2861090178347149, "grad_norm": 0.25594890117645264, "learning_rate": 0.00019556263373085917, "loss": 11.6726, "step": 13668 }, { "epoch": 0.2861299505986771, "grad_norm": 0.349129319190979, "learning_rate": 0.00019556198782810587, "loss": 11.6658, "step": 13669 }, { "epoch": 0.2861508833626392, "grad_norm": 0.27302876114845276, "learning_rate": 0.000195561341879414, "loss": 11.6601, "step": 13670 }, { "epoch": 0.28617181612660136, "grad_norm": 0.3076950013637543, "learning_rate": 0.00019556069588478388, "loss": 11.6771, "step": 13671 }, { "epoch": 0.2861927488905635, "grad_norm": 0.26949968934059143, "learning_rate": 0.00019556004984421586, "loss": 11.6757, "step": 13672 }, { "epoch": 0.28621368165452565, "grad_norm": 0.23523768782615662, "learning_rate": 0.00019555940375771017, "loss": 11.6584, "step": 13673 }, { "epoch": 0.2862346144184878, "grad_norm": 0.2236817628145218, "learning_rate": 0.00019555875762526718, "loss": 11.69, "step": 13674 }, { "epoch": 0.28625554718244994, "grad_norm": 0.2965924143791199, "learning_rate": 0.00019555811144688715, "loss": 11.686, "step": 13675 }, { "epoch": 0.28627647994641214, "grad_norm": 0.2801129221916199, "learning_rate": 0.00019555746522257047, "loss": 11.6683, "step": 13676 }, { "epoch": 0.2862974127103743, "grad_norm": 0.3525804877281189, "learning_rate": 0.00019555681895231736, "loss": 11.6756, "step": 13677 }, { "epoch": 0.28631834547433643, "grad_norm": 0.27738508582115173, "learning_rate": 0.00019555617263612818, "loss": 11.6772, "step": 13678 }, { "epoch": 0.2863392782382986, "grad_norm": 0.27569928765296936, "learning_rate": 0.00019555552627400325, "loss": 11.6635, "step": 13679 }, { "epoch": 0.2863602110022607, "grad_norm": 0.4016737937927246, "learning_rate": 0.00019555487986594282, "loss": 11.6796, "step": 13680 }, { "epoch": 0.28638114376622287, "grad_norm": 0.2639322578907013, "learning_rate": 0.0001955542334119473, "loss": 11.6708, "step": 13681 }, { "epoch": 0.28640207653018507, "grad_norm": 0.2661548852920532, "learning_rate": 0.0001955535869120169, "loss": 11.6816, "step": 13682 }, { "epoch": 0.2864230092941472, "grad_norm": 0.22303389012813568, "learning_rate": 0.000195552940366152, "loss": 11.6687, "step": 13683 }, { "epoch": 0.28644394205810936, "grad_norm": 0.2232573926448822, "learning_rate": 0.0001955522937743529, "loss": 11.676, "step": 13684 }, { "epoch": 0.2864648748220715, "grad_norm": 0.2655259668827057, "learning_rate": 0.00019555164713661987, "loss": 11.6778, "step": 13685 }, { "epoch": 0.28648580758603365, "grad_norm": 0.41009804606437683, "learning_rate": 0.00019555100045295324, "loss": 11.694, "step": 13686 }, { "epoch": 0.2865067403499958, "grad_norm": 0.28181856870651245, "learning_rate": 0.00019555035372335335, "loss": 11.6701, "step": 13687 }, { "epoch": 0.286527673113958, "grad_norm": 0.277682900428772, "learning_rate": 0.00019554970694782048, "loss": 11.6694, "step": 13688 }, { "epoch": 0.28654860587792014, "grad_norm": 0.24798423051834106, "learning_rate": 0.00019554906012635498, "loss": 11.6861, "step": 13689 }, { "epoch": 0.2865695386418823, "grad_norm": 0.24876470863819122, "learning_rate": 0.0001955484132589571, "loss": 11.6684, "step": 13690 }, { "epoch": 0.2865904714058444, "grad_norm": 0.25817763805389404, "learning_rate": 0.0001955477663456272, "loss": 11.6665, "step": 13691 }, { "epoch": 0.28661140416980657, "grad_norm": 0.26267513632774353, "learning_rate": 0.00019554711938636556, "loss": 11.6672, "step": 13692 }, { "epoch": 0.2866323369337687, "grad_norm": 0.3245038390159607, "learning_rate": 0.0001955464723811725, "loss": 11.6741, "step": 13693 }, { "epoch": 0.28665326969773086, "grad_norm": 0.2774076759815216, "learning_rate": 0.00019554582533004835, "loss": 11.6804, "step": 13694 }, { "epoch": 0.28667420246169306, "grad_norm": 0.24283504486083984, "learning_rate": 0.0001955451782329934, "loss": 11.6881, "step": 13695 }, { "epoch": 0.2866951352256552, "grad_norm": 0.2561240792274475, "learning_rate": 0.000195544531090008, "loss": 11.6613, "step": 13696 }, { "epoch": 0.28671606798961735, "grad_norm": 0.3098616898059845, "learning_rate": 0.00019554388390109238, "loss": 11.6651, "step": 13697 }, { "epoch": 0.2867370007535795, "grad_norm": 1.0609487295150757, "learning_rate": 0.00019554323666624694, "loss": 11.7305, "step": 13698 }, { "epoch": 0.28675793351754164, "grad_norm": 0.3027469515800476, "learning_rate": 0.00019554258938547192, "loss": 11.6779, "step": 13699 }, { "epoch": 0.2867788662815038, "grad_norm": 0.2166350781917572, "learning_rate": 0.00019554194205876767, "loss": 11.6627, "step": 13700 }, { "epoch": 0.286799799045466, "grad_norm": 0.36236196756362915, "learning_rate": 0.00019554129468613452, "loss": 11.6869, "step": 13701 }, { "epoch": 0.28682073180942813, "grad_norm": 0.31105566024780273, "learning_rate": 0.0001955406472675727, "loss": 11.6827, "step": 13702 }, { "epoch": 0.2868416645733903, "grad_norm": 0.3322991728782654, "learning_rate": 0.00019553999980308264, "loss": 11.6657, "step": 13703 }, { "epoch": 0.2868625973373524, "grad_norm": 0.2556525766849518, "learning_rate": 0.00019553935229266455, "loss": 11.65, "step": 13704 }, { "epoch": 0.28688353010131457, "grad_norm": 0.26662883162498474, "learning_rate": 0.0001955387047363188, "loss": 11.6733, "step": 13705 }, { "epoch": 0.2869044628652767, "grad_norm": 0.26436173915863037, "learning_rate": 0.00019553805713404566, "loss": 11.6855, "step": 13706 }, { "epoch": 0.2869253956292389, "grad_norm": 0.22982436418533325, "learning_rate": 0.00019553740948584548, "loss": 11.6784, "step": 13707 }, { "epoch": 0.28694632839320106, "grad_norm": 0.2750775218009949, "learning_rate": 0.00019553676179171853, "loss": 11.6782, "step": 13708 }, { "epoch": 0.2869672611571632, "grad_norm": 0.24095770716667175, "learning_rate": 0.00019553611405166517, "loss": 11.6737, "step": 13709 }, { "epoch": 0.28698819392112535, "grad_norm": 0.21594740450382233, "learning_rate": 0.00019553546626568564, "loss": 11.6642, "step": 13710 }, { "epoch": 0.2870091266850875, "grad_norm": 0.24603794515132904, "learning_rate": 0.00019553481843378033, "loss": 11.6781, "step": 13711 }, { "epoch": 0.28703005944904963, "grad_norm": 0.2762073278427124, "learning_rate": 0.00019553417055594952, "loss": 11.6599, "step": 13712 }, { "epoch": 0.2870509922130118, "grad_norm": 0.2814251184463501, "learning_rate": 0.00019553352263219353, "loss": 11.6634, "step": 13713 }, { "epoch": 0.287071924976974, "grad_norm": 0.22114625573158264, "learning_rate": 0.00019553287466251264, "loss": 11.6588, "step": 13714 }, { "epoch": 0.2870928577409361, "grad_norm": 0.2577425539493561, "learning_rate": 0.00019553222664690719, "loss": 11.679, "step": 13715 }, { "epoch": 0.28711379050489827, "grad_norm": 0.27468568086624146, "learning_rate": 0.00019553157858537748, "loss": 11.6789, "step": 13716 }, { "epoch": 0.2871347232688604, "grad_norm": 0.24535386264324188, "learning_rate": 0.00019553093047792382, "loss": 11.6741, "step": 13717 }, { "epoch": 0.28715565603282256, "grad_norm": 0.2499149888753891, "learning_rate": 0.00019553028232454652, "loss": 11.6622, "step": 13718 }, { "epoch": 0.2871765887967847, "grad_norm": 0.3527592718601227, "learning_rate": 0.00019552963412524594, "loss": 11.6844, "step": 13719 }, { "epoch": 0.2871975215607469, "grad_norm": 0.2375953495502472, "learning_rate": 0.0001955289858800223, "loss": 11.665, "step": 13720 }, { "epoch": 0.28721845432470905, "grad_norm": 0.24664124846458435, "learning_rate": 0.000195528337588876, "loss": 11.6666, "step": 13721 }, { "epoch": 0.2872393870886712, "grad_norm": 0.28866344690322876, "learning_rate": 0.00019552768925180732, "loss": 11.692, "step": 13722 }, { "epoch": 0.28726031985263334, "grad_norm": 0.24311786890029907, "learning_rate": 0.00019552704086881652, "loss": 11.6877, "step": 13723 }, { "epoch": 0.2872812526165955, "grad_norm": 0.2197762131690979, "learning_rate": 0.00019552639243990402, "loss": 11.67, "step": 13724 }, { "epoch": 0.28730218538055763, "grad_norm": 0.2980961501598358, "learning_rate": 0.00019552574396507002, "loss": 11.6886, "step": 13725 }, { "epoch": 0.28732311814451983, "grad_norm": 0.27932676672935486, "learning_rate": 0.0001955250954443149, "loss": 11.6743, "step": 13726 }, { "epoch": 0.287344050908482, "grad_norm": 0.21244387328624725, "learning_rate": 0.00019552444687763893, "loss": 11.6688, "step": 13727 }, { "epoch": 0.2873649836724441, "grad_norm": 0.2587020993232727, "learning_rate": 0.00019552379826504246, "loss": 11.6585, "step": 13728 }, { "epoch": 0.28738591643640626, "grad_norm": 0.22968973219394684, "learning_rate": 0.00019552314960652578, "loss": 11.674, "step": 13729 }, { "epoch": 0.2874068492003684, "grad_norm": 0.2628629505634308, "learning_rate": 0.0001955225009020892, "loss": 11.69, "step": 13730 }, { "epoch": 0.28742778196433055, "grad_norm": 0.2991289496421814, "learning_rate": 0.00019552185215173306, "loss": 11.6704, "step": 13731 }, { "epoch": 0.2874487147282927, "grad_norm": 0.3184035122394562, "learning_rate": 0.00019552120335545765, "loss": 11.6647, "step": 13732 }, { "epoch": 0.2874696474922549, "grad_norm": 0.29434841871261597, "learning_rate": 0.00019552055451326327, "loss": 11.681, "step": 13733 }, { "epoch": 0.28749058025621704, "grad_norm": 0.2892504334449768, "learning_rate": 0.00019551990562515027, "loss": 11.6796, "step": 13734 }, { "epoch": 0.2875115130201792, "grad_norm": 0.2676839828491211, "learning_rate": 0.0001955192566911189, "loss": 11.6752, "step": 13735 }, { "epoch": 0.28753244578414133, "grad_norm": 0.28913697600364685, "learning_rate": 0.00019551860771116955, "loss": 11.6738, "step": 13736 }, { "epoch": 0.2875533785481035, "grad_norm": 0.25738972425460815, "learning_rate": 0.00019551795868530248, "loss": 11.6934, "step": 13737 }, { "epoch": 0.2875743113120656, "grad_norm": 0.2532671093940735, "learning_rate": 0.00019551730961351796, "loss": 11.6698, "step": 13738 }, { "epoch": 0.2875952440760278, "grad_norm": 0.2699524462223053, "learning_rate": 0.00019551666049581642, "loss": 11.6788, "step": 13739 }, { "epoch": 0.28761617683998997, "grad_norm": 0.32697930932044983, "learning_rate": 0.0001955160113321981, "loss": 11.6794, "step": 13740 }, { "epoch": 0.2876371096039521, "grad_norm": 0.29169246554374695, "learning_rate": 0.0001955153621226633, "loss": 11.677, "step": 13741 }, { "epoch": 0.28765804236791426, "grad_norm": 0.25807833671569824, "learning_rate": 0.00019551471286721235, "loss": 11.6839, "step": 13742 }, { "epoch": 0.2876789751318764, "grad_norm": 0.3362000584602356, "learning_rate": 0.00019551406356584555, "loss": 11.6773, "step": 13743 }, { "epoch": 0.28769990789583855, "grad_norm": 0.23836861550807953, "learning_rate": 0.00019551341421856325, "loss": 11.6753, "step": 13744 }, { "epoch": 0.28772084065980075, "grad_norm": 0.26116281747817993, "learning_rate": 0.00019551276482536575, "loss": 11.682, "step": 13745 }, { "epoch": 0.2877417734237629, "grad_norm": 0.3887813091278076, "learning_rate": 0.00019551211538625332, "loss": 11.686, "step": 13746 }, { "epoch": 0.28776270618772504, "grad_norm": 0.28095147013664246, "learning_rate": 0.0001955114659012263, "loss": 11.6709, "step": 13747 }, { "epoch": 0.2877836389516872, "grad_norm": 0.2776939272880554, "learning_rate": 0.00019551081637028504, "loss": 11.6861, "step": 13748 }, { "epoch": 0.2878045717156493, "grad_norm": 0.25650128722190857, "learning_rate": 0.0001955101667934298, "loss": 11.6669, "step": 13749 }, { "epoch": 0.28782550447961147, "grad_norm": 0.3309888541698456, "learning_rate": 0.0001955095171706609, "loss": 11.6808, "step": 13750 }, { "epoch": 0.2878464372435736, "grad_norm": 0.2783063054084778, "learning_rate": 0.00019550886750197864, "loss": 11.6856, "step": 13751 }, { "epoch": 0.2878673700075358, "grad_norm": 0.28007209300994873, "learning_rate": 0.0001955082177873834, "loss": 11.6768, "step": 13752 }, { "epoch": 0.28788830277149796, "grad_norm": 0.3010198473930359, "learning_rate": 0.0001955075680268754, "loss": 11.6818, "step": 13753 }, { "epoch": 0.2879092355354601, "grad_norm": 0.2942894697189331, "learning_rate": 0.00019550691822045504, "loss": 11.6546, "step": 13754 }, { "epoch": 0.28793016829942225, "grad_norm": 0.2735561728477478, "learning_rate": 0.0001955062683681226, "loss": 11.6896, "step": 13755 }, { "epoch": 0.2879511010633844, "grad_norm": 0.24941983819007874, "learning_rate": 0.00019550561846987834, "loss": 11.6855, "step": 13756 }, { "epoch": 0.28797203382734654, "grad_norm": 0.2573341727256775, "learning_rate": 0.0001955049685257226, "loss": 11.6701, "step": 13757 }, { "epoch": 0.28799296659130874, "grad_norm": 0.25079286098480225, "learning_rate": 0.00019550431853565577, "loss": 11.6811, "step": 13758 }, { "epoch": 0.2880138993552709, "grad_norm": 0.21170882880687714, "learning_rate": 0.00019550366849967806, "loss": 11.6699, "step": 13759 }, { "epoch": 0.28803483211923303, "grad_norm": 0.2576701045036316, "learning_rate": 0.0001955030184177898, "loss": 11.6726, "step": 13760 }, { "epoch": 0.2880557648831952, "grad_norm": 0.9194487929344177, "learning_rate": 0.00019550236828999137, "loss": 11.6538, "step": 13761 }, { "epoch": 0.2880766976471573, "grad_norm": 0.33256733417510986, "learning_rate": 0.00019550171811628304, "loss": 11.6696, "step": 13762 }, { "epoch": 0.28809763041111947, "grad_norm": 0.3062199056148529, "learning_rate": 0.0001955010678966651, "loss": 11.6768, "step": 13763 }, { "epoch": 0.2881185631750816, "grad_norm": 0.28223004937171936, "learning_rate": 0.00019550041763113788, "loss": 11.6948, "step": 13764 }, { "epoch": 0.2881394959390438, "grad_norm": 0.2882833182811737, "learning_rate": 0.0001954997673197017, "loss": 11.67, "step": 13765 }, { "epoch": 0.28816042870300596, "grad_norm": 0.33628398180007935, "learning_rate": 0.00019549911696235687, "loss": 11.6665, "step": 13766 }, { "epoch": 0.2881813614669681, "grad_norm": 0.22930344939231873, "learning_rate": 0.00019549846655910372, "loss": 11.6658, "step": 13767 }, { "epoch": 0.28820229423093024, "grad_norm": 0.31209689378738403, "learning_rate": 0.0001954978161099425, "loss": 11.6892, "step": 13768 }, { "epoch": 0.2882232269948924, "grad_norm": 0.2626054883003235, "learning_rate": 0.00019549716561487358, "loss": 11.6792, "step": 13769 }, { "epoch": 0.28824415975885453, "grad_norm": 0.23711028695106506, "learning_rate": 0.00019549651507389727, "loss": 11.6702, "step": 13770 }, { "epoch": 0.28826509252281673, "grad_norm": 0.26359641551971436, "learning_rate": 0.00019549586448701386, "loss": 11.6763, "step": 13771 }, { "epoch": 0.2882860252867789, "grad_norm": 0.2612735331058502, "learning_rate": 0.0001954952138542237, "loss": 11.6663, "step": 13772 }, { "epoch": 0.288306958050741, "grad_norm": 0.3226492404937744, "learning_rate": 0.00019549456317552707, "loss": 11.6883, "step": 13773 }, { "epoch": 0.28832789081470317, "grad_norm": 0.3115417957305908, "learning_rate": 0.00019549391245092429, "loss": 11.6844, "step": 13774 }, { "epoch": 0.2883488235786653, "grad_norm": 0.2423575073480606, "learning_rate": 0.00019549326168041565, "loss": 11.6791, "step": 13775 }, { "epoch": 0.28836975634262746, "grad_norm": 0.2311246246099472, "learning_rate": 0.0001954926108640015, "loss": 11.6742, "step": 13776 }, { "epoch": 0.28839068910658966, "grad_norm": 0.2928861379623413, "learning_rate": 0.00019549196000168213, "loss": 11.6776, "step": 13777 }, { "epoch": 0.2884116218705518, "grad_norm": 0.35877686738967896, "learning_rate": 0.00019549130909345786, "loss": 11.6763, "step": 13778 }, { "epoch": 0.28843255463451395, "grad_norm": 0.2619587182998657, "learning_rate": 0.00019549065813932902, "loss": 11.6762, "step": 13779 }, { "epoch": 0.2884534873984761, "grad_norm": 0.2766570448875427, "learning_rate": 0.0001954900071392959, "loss": 11.6764, "step": 13780 }, { "epoch": 0.28847442016243824, "grad_norm": 0.2878161072731018, "learning_rate": 0.00019548935609335885, "loss": 11.6851, "step": 13781 }, { "epoch": 0.2884953529264004, "grad_norm": 0.24976563453674316, "learning_rate": 0.00019548870500151808, "loss": 11.6736, "step": 13782 }, { "epoch": 0.28851628569036253, "grad_norm": 0.24453523755073547, "learning_rate": 0.00019548805386377405, "loss": 11.6706, "step": 13783 }, { "epoch": 0.28853721845432473, "grad_norm": 0.3134225010871887, "learning_rate": 0.00019548740268012695, "loss": 11.6775, "step": 13784 }, { "epoch": 0.2885581512182869, "grad_norm": 0.3625974655151367, "learning_rate": 0.00019548675145057714, "loss": 11.6791, "step": 13785 }, { "epoch": 0.288579083982249, "grad_norm": 0.4482199549674988, "learning_rate": 0.00019548610017512495, "loss": 11.6998, "step": 13786 }, { "epoch": 0.28860001674621116, "grad_norm": 0.3102288544178009, "learning_rate": 0.00019548544885377068, "loss": 11.6651, "step": 13787 }, { "epoch": 0.2886209495101733, "grad_norm": 0.28667813539505005, "learning_rate": 0.00019548479748651465, "loss": 11.6584, "step": 13788 }, { "epoch": 0.28864188227413545, "grad_norm": 0.23382189869880676, "learning_rate": 0.00019548414607335716, "loss": 11.6769, "step": 13789 }, { "epoch": 0.28866281503809765, "grad_norm": 0.3064206838607788, "learning_rate": 0.00019548349461429853, "loss": 11.6656, "step": 13790 }, { "epoch": 0.2886837478020598, "grad_norm": 0.25606048107147217, "learning_rate": 0.00019548284310933907, "loss": 11.6607, "step": 13791 }, { "epoch": 0.28870468056602194, "grad_norm": 0.23423567414283752, "learning_rate": 0.0001954821915584791, "loss": 11.6796, "step": 13792 }, { "epoch": 0.2887256133299841, "grad_norm": 0.2812059819698334, "learning_rate": 0.00019548153996171893, "loss": 11.6664, "step": 13793 }, { "epoch": 0.28874654609394623, "grad_norm": 0.2786235213279724, "learning_rate": 0.00019548088831905882, "loss": 11.6748, "step": 13794 }, { "epoch": 0.2887674788579084, "grad_norm": 0.35971924662590027, "learning_rate": 0.00019548023663049918, "loss": 11.6632, "step": 13795 }, { "epoch": 0.2887884116218706, "grad_norm": 0.34827035665512085, "learning_rate": 0.00019547958489604027, "loss": 11.6977, "step": 13796 }, { "epoch": 0.2888093443858327, "grad_norm": 0.2189885973930359, "learning_rate": 0.00019547893311568242, "loss": 11.6806, "step": 13797 }, { "epoch": 0.28883027714979487, "grad_norm": 0.37150755524635315, "learning_rate": 0.0001954782812894259, "loss": 11.6771, "step": 13798 }, { "epoch": 0.288851209913757, "grad_norm": 0.26393309235572815, "learning_rate": 0.00019547762941727107, "loss": 11.6799, "step": 13799 }, { "epoch": 0.28887214267771916, "grad_norm": 0.29963114857673645, "learning_rate": 0.00019547697749921824, "loss": 11.6596, "step": 13800 }, { "epoch": 0.2888930754416813, "grad_norm": 0.23720213770866394, "learning_rate": 0.00019547632553526772, "loss": 11.6572, "step": 13801 }, { "epoch": 0.28891400820564345, "grad_norm": 0.34074142575263977, "learning_rate": 0.0001954756735254198, "loss": 11.6715, "step": 13802 }, { "epoch": 0.28893494096960565, "grad_norm": 0.28286606073379517, "learning_rate": 0.00019547502146967483, "loss": 11.6914, "step": 13803 }, { "epoch": 0.2889558737335678, "grad_norm": 0.24134495854377747, "learning_rate": 0.00019547436936803307, "loss": 11.6723, "step": 13804 }, { "epoch": 0.28897680649752994, "grad_norm": 0.2818893790245056, "learning_rate": 0.0001954737172204949, "loss": 11.6796, "step": 13805 }, { "epoch": 0.2889977392614921, "grad_norm": 0.36050719022750854, "learning_rate": 0.0001954730650270606, "loss": 11.6623, "step": 13806 }, { "epoch": 0.2890186720254542, "grad_norm": 0.2641330361366272, "learning_rate": 0.00019547241278773046, "loss": 11.6661, "step": 13807 }, { "epoch": 0.28903960478941637, "grad_norm": 0.3413085639476776, "learning_rate": 0.00019547176050250484, "loss": 11.6812, "step": 13808 }, { "epoch": 0.28906053755337857, "grad_norm": 0.3472078740596771, "learning_rate": 0.00019547110817138403, "loss": 11.691, "step": 13809 }, { "epoch": 0.2890814703173407, "grad_norm": 0.3112226724624634, "learning_rate": 0.00019547045579436834, "loss": 11.6587, "step": 13810 }, { "epoch": 0.28910240308130286, "grad_norm": 0.23254074156284332, "learning_rate": 0.00019546980337145807, "loss": 11.6672, "step": 13811 }, { "epoch": 0.289123335845265, "grad_norm": 0.26093050837516785, "learning_rate": 0.00019546915090265355, "loss": 11.6799, "step": 13812 }, { "epoch": 0.28914426860922715, "grad_norm": 0.2396949827671051, "learning_rate": 0.00019546849838795517, "loss": 11.6752, "step": 13813 }, { "epoch": 0.2891652013731893, "grad_norm": 0.2567175328731537, "learning_rate": 0.0001954678458273631, "loss": 11.6682, "step": 13814 }, { "epoch": 0.2891861341371515, "grad_norm": 0.297841876745224, "learning_rate": 0.00019546719322087772, "loss": 11.6675, "step": 13815 }, { "epoch": 0.28920706690111364, "grad_norm": 0.3214896321296692, "learning_rate": 0.00019546654056849935, "loss": 11.6821, "step": 13816 }, { "epoch": 0.2892279996650758, "grad_norm": 0.2678421437740326, "learning_rate": 0.00019546588787022832, "loss": 11.6587, "step": 13817 }, { "epoch": 0.28924893242903793, "grad_norm": 0.3423495292663574, "learning_rate": 0.00019546523512606494, "loss": 11.6708, "step": 13818 }, { "epoch": 0.2892698651930001, "grad_norm": 0.2747611701488495, "learning_rate": 0.00019546458233600948, "loss": 11.6761, "step": 13819 }, { "epoch": 0.2892907979569622, "grad_norm": 0.23939475417137146, "learning_rate": 0.00019546392950006228, "loss": 11.6738, "step": 13820 }, { "epoch": 0.28931173072092436, "grad_norm": 0.29562270641326904, "learning_rate": 0.0001954632766182237, "loss": 11.6782, "step": 13821 }, { "epoch": 0.28933266348488657, "grad_norm": 0.2545584440231323, "learning_rate": 0.00019546262369049395, "loss": 11.659, "step": 13822 }, { "epoch": 0.2893535962488487, "grad_norm": 0.29791978001594543, "learning_rate": 0.00019546197071687344, "loss": 11.6772, "step": 13823 }, { "epoch": 0.28937452901281085, "grad_norm": 0.3545673191547394, "learning_rate": 0.00019546131769736244, "loss": 11.6885, "step": 13824 }, { "epoch": 0.289395461776773, "grad_norm": 0.2949819266796112, "learning_rate": 0.00019546066463196126, "loss": 11.6808, "step": 13825 }, { "epoch": 0.28941639454073514, "grad_norm": 0.23913806676864624, "learning_rate": 0.00019546001152067027, "loss": 11.6714, "step": 13826 }, { "epoch": 0.2894373273046973, "grad_norm": 0.2680470049381256, "learning_rate": 0.00019545935836348967, "loss": 11.6733, "step": 13827 }, { "epoch": 0.2894582600686595, "grad_norm": 0.2962612807750702, "learning_rate": 0.00019545870516041988, "loss": 11.6815, "step": 13828 }, { "epoch": 0.28947919283262163, "grad_norm": 0.3091297149658203, "learning_rate": 0.00019545805191146118, "loss": 11.6707, "step": 13829 }, { "epoch": 0.2895001255965838, "grad_norm": 0.34077057242393494, "learning_rate": 0.0001954573986166139, "loss": 11.6907, "step": 13830 }, { "epoch": 0.2895210583605459, "grad_norm": 0.4395359754562378, "learning_rate": 0.0001954567452758783, "loss": 11.6303, "step": 13831 }, { "epoch": 0.28954199112450807, "grad_norm": 0.2363014668226242, "learning_rate": 0.00019545609188925474, "loss": 11.663, "step": 13832 }, { "epoch": 0.2895629238884702, "grad_norm": 0.2785797119140625, "learning_rate": 0.00019545543845674352, "loss": 11.7026, "step": 13833 }, { "epoch": 0.2895838566524324, "grad_norm": 0.26675945520401, "learning_rate": 0.00019545478497834498, "loss": 11.6771, "step": 13834 }, { "epoch": 0.28960478941639456, "grad_norm": 0.2208714783191681, "learning_rate": 0.00019545413145405938, "loss": 11.6712, "step": 13835 }, { "epoch": 0.2896257221803567, "grad_norm": 0.27247869968414307, "learning_rate": 0.00019545347788388709, "loss": 11.6693, "step": 13836 }, { "epoch": 0.28964665494431885, "grad_norm": 0.2831166386604309, "learning_rate": 0.00019545282426782837, "loss": 11.675, "step": 13837 }, { "epoch": 0.289667587708281, "grad_norm": 0.24388627707958221, "learning_rate": 0.0001954521706058836, "loss": 11.677, "step": 13838 }, { "epoch": 0.28968852047224314, "grad_norm": 0.3770415782928467, "learning_rate": 0.00019545151689805303, "loss": 11.6903, "step": 13839 }, { "epoch": 0.2897094532362053, "grad_norm": 0.24353553354740143, "learning_rate": 0.00019545086314433702, "loss": 11.672, "step": 13840 }, { "epoch": 0.2897303860001675, "grad_norm": 1.0679439306259155, "learning_rate": 0.0001954502093447359, "loss": 11.7118, "step": 13841 }, { "epoch": 0.28975131876412963, "grad_norm": 0.2539066970348358, "learning_rate": 0.0001954495554992499, "loss": 11.6709, "step": 13842 }, { "epoch": 0.2897722515280918, "grad_norm": 0.3140256702899933, "learning_rate": 0.0001954489016078794, "loss": 11.7099, "step": 13843 }, { "epoch": 0.2897931842920539, "grad_norm": 0.2622900605201721, "learning_rate": 0.0001954482476706247, "loss": 11.6808, "step": 13844 }, { "epoch": 0.28981411705601606, "grad_norm": 0.25781112909317017, "learning_rate": 0.00019544759368748612, "loss": 11.6646, "step": 13845 }, { "epoch": 0.2898350498199782, "grad_norm": 0.26200976967811584, "learning_rate": 0.00019544693965846394, "loss": 11.6707, "step": 13846 }, { "epoch": 0.2898559825839404, "grad_norm": 0.27723753452301025, "learning_rate": 0.00019544628558355856, "loss": 11.6777, "step": 13847 }, { "epoch": 0.28987691534790255, "grad_norm": 0.2764506936073303, "learning_rate": 0.00019544563146277019, "loss": 11.675, "step": 13848 }, { "epoch": 0.2898978481118647, "grad_norm": 0.34040015935897827, "learning_rate": 0.0001954449772960992, "loss": 11.6867, "step": 13849 }, { "epoch": 0.28991878087582684, "grad_norm": 0.26467499136924744, "learning_rate": 0.00019544432308354591, "loss": 11.6614, "step": 13850 }, { "epoch": 0.289939713639789, "grad_norm": 0.3235395848751068, "learning_rate": 0.00019544366882511062, "loss": 11.6779, "step": 13851 }, { "epoch": 0.28996064640375113, "grad_norm": 0.2620716691017151, "learning_rate": 0.00019544301452079364, "loss": 11.6892, "step": 13852 }, { "epoch": 0.28998157916771333, "grad_norm": 0.2810591459274292, "learning_rate": 0.0001954423601705953, "loss": 11.6815, "step": 13853 }, { "epoch": 0.2900025119316755, "grad_norm": 0.3037061393260956, "learning_rate": 0.0001954417057745159, "loss": 11.6798, "step": 13854 }, { "epoch": 0.2900234446956376, "grad_norm": 0.296454519033432, "learning_rate": 0.00019544105133255575, "loss": 11.6899, "step": 13855 }, { "epoch": 0.29004437745959977, "grad_norm": 0.3161892890930176, "learning_rate": 0.0001954403968447152, "loss": 11.6636, "step": 13856 }, { "epoch": 0.2900653102235619, "grad_norm": 0.3343977928161621, "learning_rate": 0.00019543974231099452, "loss": 11.6938, "step": 13857 }, { "epoch": 0.29008624298752406, "grad_norm": 0.2364022135734558, "learning_rate": 0.00019543908773139403, "loss": 11.6713, "step": 13858 }, { "epoch": 0.2901071757514862, "grad_norm": 0.3641209006309509, "learning_rate": 0.00019543843310591407, "loss": 11.6668, "step": 13859 }, { "epoch": 0.2901281085154484, "grad_norm": 0.2699582874774933, "learning_rate": 0.00019543777843455495, "loss": 11.676, "step": 13860 }, { "epoch": 0.29014904127941055, "grad_norm": 0.3232365548610687, "learning_rate": 0.00019543712371731698, "loss": 11.6731, "step": 13861 }, { "epoch": 0.2901699740433727, "grad_norm": 0.225128173828125, "learning_rate": 0.00019543646895420045, "loss": 11.6779, "step": 13862 }, { "epoch": 0.29019090680733484, "grad_norm": 0.2875944972038269, "learning_rate": 0.00019543581414520572, "loss": 11.6992, "step": 13863 }, { "epoch": 0.290211839571297, "grad_norm": 0.3727690875530243, "learning_rate": 0.00019543515929033305, "loss": 11.7084, "step": 13864 }, { "epoch": 0.2902327723352591, "grad_norm": 0.28479668498039246, "learning_rate": 0.0001954345043895828, "loss": 11.684, "step": 13865 }, { "epoch": 0.2902537050992213, "grad_norm": 0.24884025752544403, "learning_rate": 0.00019543384944295528, "loss": 11.685, "step": 13866 }, { "epoch": 0.29027463786318347, "grad_norm": 0.23422938585281372, "learning_rate": 0.00019543319445045081, "loss": 11.6614, "step": 13867 }, { "epoch": 0.2902955706271456, "grad_norm": 0.26558953523635864, "learning_rate": 0.0001954325394120697, "loss": 11.6811, "step": 13868 }, { "epoch": 0.29031650339110776, "grad_norm": 0.2958044707775116, "learning_rate": 0.0001954318843278122, "loss": 11.6626, "step": 13869 }, { "epoch": 0.2903374361550699, "grad_norm": 0.2900848984718323, "learning_rate": 0.00019543122919767872, "loss": 11.6696, "step": 13870 }, { "epoch": 0.29035836891903205, "grad_norm": 0.27217093110084534, "learning_rate": 0.00019543057402166954, "loss": 11.669, "step": 13871 }, { "epoch": 0.2903793016829942, "grad_norm": 0.29727375507354736, "learning_rate": 0.00019542991879978495, "loss": 11.6721, "step": 13872 }, { "epoch": 0.2904002344469564, "grad_norm": 0.37461939454078674, "learning_rate": 0.0001954292635320253, "loss": 11.6774, "step": 13873 }, { "epoch": 0.29042116721091854, "grad_norm": 0.22105199098587036, "learning_rate": 0.0001954286082183909, "loss": 11.6711, "step": 13874 }, { "epoch": 0.2904420999748807, "grad_norm": 0.29917556047439575, "learning_rate": 0.000195427952858882, "loss": 11.6965, "step": 13875 }, { "epoch": 0.29046303273884283, "grad_norm": 0.29507747292518616, "learning_rate": 0.000195427297453499, "loss": 11.67, "step": 13876 }, { "epoch": 0.290483965502805, "grad_norm": 0.28831976652145386, "learning_rate": 0.00019542664200224223, "loss": 11.6689, "step": 13877 }, { "epoch": 0.2905048982667671, "grad_norm": 0.3006915748119354, "learning_rate": 0.0001954259865051119, "loss": 11.6769, "step": 13878 }, { "epoch": 0.2905258310307293, "grad_norm": 0.2661145031452179, "learning_rate": 0.00019542533096210843, "loss": 11.6795, "step": 13879 }, { "epoch": 0.29054676379469146, "grad_norm": 0.29188406467437744, "learning_rate": 0.0001954246753732321, "loss": 11.6676, "step": 13880 }, { "epoch": 0.2905676965586536, "grad_norm": 0.2455851435661316, "learning_rate": 0.00019542401973848318, "loss": 11.6632, "step": 13881 }, { "epoch": 0.29058862932261575, "grad_norm": 0.2909663915634155, "learning_rate": 0.00019542336405786202, "loss": 11.6761, "step": 13882 }, { "epoch": 0.2906095620865779, "grad_norm": 0.22629885375499725, "learning_rate": 0.00019542270833136896, "loss": 11.6628, "step": 13883 }, { "epoch": 0.29063049485054004, "grad_norm": 0.22472478449344635, "learning_rate": 0.00019542205255900427, "loss": 11.6617, "step": 13884 }, { "epoch": 0.29065142761450224, "grad_norm": 0.2267293632030487, "learning_rate": 0.00019542139674076831, "loss": 11.6606, "step": 13885 }, { "epoch": 0.2906723603784644, "grad_norm": 0.2869862914085388, "learning_rate": 0.00019542074087666134, "loss": 11.6715, "step": 13886 }, { "epoch": 0.29069329314242653, "grad_norm": 0.2643267512321472, "learning_rate": 0.00019542008496668375, "loss": 11.6636, "step": 13887 }, { "epoch": 0.2907142259063887, "grad_norm": 0.2364838868379593, "learning_rate": 0.00019541942901083576, "loss": 11.6698, "step": 13888 }, { "epoch": 0.2907351586703508, "grad_norm": 0.267050176858902, "learning_rate": 0.0001954187730091178, "loss": 11.6675, "step": 13889 }, { "epoch": 0.29075609143431297, "grad_norm": 0.2811281085014343, "learning_rate": 0.00019541811696153008, "loss": 11.6643, "step": 13890 }, { "epoch": 0.2907770241982751, "grad_norm": 0.22662310302257538, "learning_rate": 0.00019541746086807296, "loss": 11.6694, "step": 13891 }, { "epoch": 0.2907979569622373, "grad_norm": 0.24896466732025146, "learning_rate": 0.00019541680472874676, "loss": 11.6644, "step": 13892 }, { "epoch": 0.29081888972619946, "grad_norm": 0.3551877439022064, "learning_rate": 0.00019541614854355182, "loss": 11.6895, "step": 13893 }, { "epoch": 0.2908398224901616, "grad_norm": 0.2673513889312744, "learning_rate": 0.00019541549231248838, "loss": 11.6629, "step": 13894 }, { "epoch": 0.29086075525412375, "grad_norm": 0.32442787289619446, "learning_rate": 0.00019541483603555685, "loss": 11.6758, "step": 13895 }, { "epoch": 0.2908816880180859, "grad_norm": 0.25179287791252136, "learning_rate": 0.00019541417971275746, "loss": 11.6824, "step": 13896 }, { "epoch": 0.29090262078204804, "grad_norm": 0.2994403839111328, "learning_rate": 0.00019541352334409055, "loss": 11.6677, "step": 13897 }, { "epoch": 0.29092355354601024, "grad_norm": 0.2578676640987396, "learning_rate": 0.00019541286692955647, "loss": 11.661, "step": 13898 }, { "epoch": 0.2909444863099724, "grad_norm": 0.30538272857666016, "learning_rate": 0.0001954122104691555, "loss": 11.671, "step": 13899 }, { "epoch": 0.29096541907393453, "grad_norm": 0.30401939153671265, "learning_rate": 0.000195411553962888, "loss": 11.6877, "step": 13900 }, { "epoch": 0.2909863518378967, "grad_norm": 0.301547110080719, "learning_rate": 0.00019541089741075422, "loss": 11.6698, "step": 13901 }, { "epoch": 0.2910072846018588, "grad_norm": 0.26557499170303345, "learning_rate": 0.00019541024081275453, "loss": 11.6769, "step": 13902 }, { "epoch": 0.29102821736582096, "grad_norm": 0.2662595808506012, "learning_rate": 0.00019540958416888922, "loss": 11.6798, "step": 13903 }, { "epoch": 0.29104915012978316, "grad_norm": 0.3180294334888458, "learning_rate": 0.0001954089274791586, "loss": 11.6762, "step": 13904 }, { "epoch": 0.2910700828937453, "grad_norm": 0.28777000308036804, "learning_rate": 0.000195408270743563, "loss": 11.6817, "step": 13905 }, { "epoch": 0.29109101565770745, "grad_norm": 0.24909783899784088, "learning_rate": 0.00019540761396210274, "loss": 11.669, "step": 13906 }, { "epoch": 0.2911119484216696, "grad_norm": 0.3036285936832428, "learning_rate": 0.0001954069571347781, "loss": 11.6899, "step": 13907 }, { "epoch": 0.29113288118563174, "grad_norm": 0.22515800595283508, "learning_rate": 0.00019540630026158948, "loss": 11.6754, "step": 13908 }, { "epoch": 0.2911538139495939, "grad_norm": 0.2384704202413559, "learning_rate": 0.00019540564334253707, "loss": 11.6559, "step": 13909 }, { "epoch": 0.29117474671355603, "grad_norm": 0.2605369985103607, "learning_rate": 0.00019540498637762133, "loss": 11.6636, "step": 13910 }, { "epoch": 0.29119567947751823, "grad_norm": 0.24431228637695312, "learning_rate": 0.00019540432936684246, "loss": 11.6563, "step": 13911 }, { "epoch": 0.2912166122414804, "grad_norm": 0.2251088172197342, "learning_rate": 0.0001954036723102008, "loss": 11.6769, "step": 13912 }, { "epoch": 0.2912375450054425, "grad_norm": 0.3421632647514343, "learning_rate": 0.0001954030152076967, "loss": 11.6778, "step": 13913 }, { "epoch": 0.29125847776940467, "grad_norm": 0.29902228713035583, "learning_rate": 0.00019540235805933046, "loss": 11.6749, "step": 13914 }, { "epoch": 0.2912794105333668, "grad_norm": 0.2611033022403717, "learning_rate": 0.0001954017008651024, "loss": 11.664, "step": 13915 }, { "epoch": 0.29130034329732896, "grad_norm": 0.301065057516098, "learning_rate": 0.00019540104362501283, "loss": 11.6879, "step": 13916 }, { "epoch": 0.29132127606129116, "grad_norm": 0.37645721435546875, "learning_rate": 0.00019540038633906207, "loss": 11.6926, "step": 13917 }, { "epoch": 0.2913422088252533, "grad_norm": 0.2630465626716614, "learning_rate": 0.00019539972900725044, "loss": 11.6817, "step": 13918 }, { "epoch": 0.29136314158921545, "grad_norm": 0.3067576289176941, "learning_rate": 0.00019539907162957824, "loss": 11.6784, "step": 13919 }, { "epoch": 0.2913840743531776, "grad_norm": 0.3051320016384125, "learning_rate": 0.00019539841420604577, "loss": 11.6716, "step": 13920 }, { "epoch": 0.29140500711713974, "grad_norm": 0.36634451150894165, "learning_rate": 0.0001953977567366534, "loss": 11.6705, "step": 13921 }, { "epoch": 0.2914259398811019, "grad_norm": 0.2977362871170044, "learning_rate": 0.0001953970992214014, "loss": 11.6752, "step": 13922 }, { "epoch": 0.2914468726450641, "grad_norm": 0.25283437967300415, "learning_rate": 0.0001953964416602901, "loss": 11.6618, "step": 13923 }, { "epoch": 0.2914678054090262, "grad_norm": 0.2247081995010376, "learning_rate": 0.0001953957840533198, "loss": 11.6638, "step": 13924 }, { "epoch": 0.29148873817298837, "grad_norm": 0.610517680644989, "learning_rate": 0.00019539512640049089, "loss": 11.5846, "step": 13925 }, { "epoch": 0.2915096709369505, "grad_norm": 0.2677708566188812, "learning_rate": 0.0001953944687018036, "loss": 11.6684, "step": 13926 }, { "epoch": 0.29153060370091266, "grad_norm": 0.2555280923843384, "learning_rate": 0.00019539381095725827, "loss": 11.6749, "step": 13927 }, { "epoch": 0.2915515364648748, "grad_norm": 0.27440640330314636, "learning_rate": 0.00019539315316685523, "loss": 11.6606, "step": 13928 }, { "epoch": 0.29157246922883695, "grad_norm": 0.2648269832134247, "learning_rate": 0.00019539249533059483, "loss": 11.6821, "step": 13929 }, { "epoch": 0.29159340199279915, "grad_norm": 0.2367931306362152, "learning_rate": 0.0001953918374484773, "loss": 11.6884, "step": 13930 }, { "epoch": 0.2916143347567613, "grad_norm": 0.24316391348838806, "learning_rate": 0.000195391179520503, "loss": 11.6723, "step": 13931 }, { "epoch": 0.29163526752072344, "grad_norm": 0.6953631043434143, "learning_rate": 0.00019539052154667226, "loss": 11.6654, "step": 13932 }, { "epoch": 0.2916562002846856, "grad_norm": 0.29209890961647034, "learning_rate": 0.00019538986352698537, "loss": 11.6705, "step": 13933 }, { "epoch": 0.29167713304864773, "grad_norm": 0.33108222484588623, "learning_rate": 0.0001953892054614427, "loss": 11.6725, "step": 13934 }, { "epoch": 0.2916980658126099, "grad_norm": 0.28733396530151367, "learning_rate": 0.00019538854735004447, "loss": 11.659, "step": 13935 }, { "epoch": 0.2917189985765721, "grad_norm": 0.30641067028045654, "learning_rate": 0.0001953878891927911, "loss": 11.6672, "step": 13936 }, { "epoch": 0.2917399313405342, "grad_norm": 0.24527180194854736, "learning_rate": 0.00019538723098968286, "loss": 11.6799, "step": 13937 }, { "epoch": 0.29176086410449636, "grad_norm": 0.2568723261356354, "learning_rate": 0.00019538657274072003, "loss": 11.6601, "step": 13938 }, { "epoch": 0.2917817968684585, "grad_norm": 0.31365910172462463, "learning_rate": 0.000195385914445903, "loss": 11.6652, "step": 13939 }, { "epoch": 0.29180272963242065, "grad_norm": 0.24637115001678467, "learning_rate": 0.00019538525610523203, "loss": 11.6683, "step": 13940 }, { "epoch": 0.2918236623963828, "grad_norm": 0.31231722235679626, "learning_rate": 0.00019538459771870748, "loss": 11.686, "step": 13941 }, { "epoch": 0.291844595160345, "grad_norm": 0.23621128499507904, "learning_rate": 0.0001953839392863296, "loss": 11.6725, "step": 13942 }, { "epoch": 0.29186552792430714, "grad_norm": 0.2664620578289032, "learning_rate": 0.0001953832808080988, "loss": 11.6751, "step": 13943 }, { "epoch": 0.2918864606882693, "grad_norm": 0.28711771965026855, "learning_rate": 0.0001953826222840153, "loss": 11.6806, "step": 13944 }, { "epoch": 0.29190739345223143, "grad_norm": 0.27022188901901245, "learning_rate": 0.00019538196371407946, "loss": 11.6667, "step": 13945 }, { "epoch": 0.2919283262161936, "grad_norm": 0.2444772720336914, "learning_rate": 0.00019538130509829163, "loss": 11.6748, "step": 13946 }, { "epoch": 0.2919492589801557, "grad_norm": 0.3199854791164398, "learning_rate": 0.00019538064643665208, "loss": 11.6818, "step": 13947 }, { "epoch": 0.29197019174411787, "grad_norm": 0.2431480884552002, "learning_rate": 0.00019537998772916116, "loss": 11.6614, "step": 13948 }, { "epoch": 0.29199112450808007, "grad_norm": 0.3162676990032196, "learning_rate": 0.00019537932897581913, "loss": 11.686, "step": 13949 }, { "epoch": 0.2920120572720422, "grad_norm": 0.27439865469932556, "learning_rate": 0.0001953786701766264, "loss": 11.6681, "step": 13950 }, { "epoch": 0.29203299003600436, "grad_norm": 0.2346552461385727, "learning_rate": 0.0001953780113315832, "loss": 11.6655, "step": 13951 }, { "epoch": 0.2920539227999665, "grad_norm": 0.23481136560440063, "learning_rate": 0.00019537735244068986, "loss": 11.6691, "step": 13952 }, { "epoch": 0.29207485556392865, "grad_norm": 0.2791694104671478, "learning_rate": 0.00019537669350394674, "loss": 11.6754, "step": 13953 }, { "epoch": 0.2920957883278908, "grad_norm": 0.2866550385951996, "learning_rate": 0.00019537603452135413, "loss": 11.665, "step": 13954 }, { "epoch": 0.292116721091853, "grad_norm": 0.2957107126712799, "learning_rate": 0.00019537537549291234, "loss": 11.6885, "step": 13955 }, { "epoch": 0.29213765385581514, "grad_norm": 0.27623608708381653, "learning_rate": 0.0001953747164186217, "loss": 11.6858, "step": 13956 }, { "epoch": 0.2921585866197773, "grad_norm": 0.3124552071094513, "learning_rate": 0.00019537405729848253, "loss": 11.6698, "step": 13957 }, { "epoch": 0.29217951938373943, "grad_norm": 0.258554607629776, "learning_rate": 0.00019537339813249514, "loss": 11.6615, "step": 13958 }, { "epoch": 0.2922004521477016, "grad_norm": 0.2877977788448334, "learning_rate": 0.00019537273892065982, "loss": 11.6828, "step": 13959 }, { "epoch": 0.2922213849116637, "grad_norm": 0.2569469213485718, "learning_rate": 0.00019537207966297695, "loss": 11.6815, "step": 13960 }, { "epoch": 0.29224231767562586, "grad_norm": 0.24367877840995789, "learning_rate": 0.00019537142035944678, "loss": 11.6627, "step": 13961 }, { "epoch": 0.29226325043958806, "grad_norm": 0.3033006191253662, "learning_rate": 0.00019537076101006965, "loss": 11.681, "step": 13962 }, { "epoch": 0.2922841832035502, "grad_norm": 0.2812539041042328, "learning_rate": 0.00019537010161484593, "loss": 11.6657, "step": 13963 }, { "epoch": 0.29230511596751235, "grad_norm": 0.27175045013427734, "learning_rate": 0.00019536944217377587, "loss": 11.6878, "step": 13964 }, { "epoch": 0.2923260487314745, "grad_norm": 0.21453161537647247, "learning_rate": 0.00019536878268685976, "loss": 11.6775, "step": 13965 }, { "epoch": 0.29234698149543664, "grad_norm": 0.28516414761543274, "learning_rate": 0.00019536812315409802, "loss": 11.6938, "step": 13966 }, { "epoch": 0.2923679142593988, "grad_norm": 0.31667453050613403, "learning_rate": 0.00019536746357549093, "loss": 11.6822, "step": 13967 }, { "epoch": 0.292388847023361, "grad_norm": 0.26013824343681335, "learning_rate": 0.00019536680395103876, "loss": 11.6676, "step": 13968 }, { "epoch": 0.29240977978732313, "grad_norm": 0.2986094057559967, "learning_rate": 0.00019536614428074183, "loss": 11.6808, "step": 13969 }, { "epoch": 0.2924307125512853, "grad_norm": 0.27648448944091797, "learning_rate": 0.00019536548456460052, "loss": 11.6805, "step": 13970 }, { "epoch": 0.2924516453152474, "grad_norm": 0.23691725730895996, "learning_rate": 0.00019536482480261508, "loss": 11.6644, "step": 13971 }, { "epoch": 0.29247257807920957, "grad_norm": 0.2808305323123932, "learning_rate": 0.00019536416499478588, "loss": 11.6718, "step": 13972 }, { "epoch": 0.2924935108431717, "grad_norm": 0.23596873879432678, "learning_rate": 0.0001953635051411132, "loss": 11.6837, "step": 13973 }, { "epoch": 0.2925144436071339, "grad_norm": 0.27313676476478577, "learning_rate": 0.0001953628452415974, "loss": 11.6821, "step": 13974 }, { "epoch": 0.29253537637109606, "grad_norm": 0.3647119104862213, "learning_rate": 0.00019536218529623872, "loss": 11.6833, "step": 13975 }, { "epoch": 0.2925563091350582, "grad_norm": 0.2588866949081421, "learning_rate": 0.00019536152530503758, "loss": 11.6782, "step": 13976 }, { "epoch": 0.29257724189902035, "grad_norm": 0.2559397220611572, "learning_rate": 0.00019536086526799422, "loss": 11.6764, "step": 13977 }, { "epoch": 0.2925981746629825, "grad_norm": 0.3020115792751312, "learning_rate": 0.00019536020518510897, "loss": 11.6774, "step": 13978 }, { "epoch": 0.29261910742694464, "grad_norm": 0.2560478448867798, "learning_rate": 0.00019535954505638217, "loss": 11.6679, "step": 13979 }, { "epoch": 0.2926400401909068, "grad_norm": 0.2921086847782135, "learning_rate": 0.00019535888488181414, "loss": 11.6884, "step": 13980 }, { "epoch": 0.292660972954869, "grad_norm": 0.4631282091140747, "learning_rate": 0.00019535822466140514, "loss": 11.6659, "step": 13981 }, { "epoch": 0.2926819057188311, "grad_norm": 0.26542210578918457, "learning_rate": 0.0001953575643951556, "loss": 11.6815, "step": 13982 }, { "epoch": 0.29270283848279327, "grad_norm": 0.26101747155189514, "learning_rate": 0.0001953569040830657, "loss": 11.6782, "step": 13983 }, { "epoch": 0.2927237712467554, "grad_norm": 0.30665165185928345, "learning_rate": 0.00019535624372513585, "loss": 11.6808, "step": 13984 }, { "epoch": 0.29274470401071756, "grad_norm": 0.32136228680610657, "learning_rate": 0.00019535558332136636, "loss": 11.6771, "step": 13985 }, { "epoch": 0.2927656367746797, "grad_norm": 0.27952632308006287, "learning_rate": 0.0001953549228717575, "loss": 11.676, "step": 13986 }, { "epoch": 0.2927865695386419, "grad_norm": 0.22960899770259857, "learning_rate": 0.00019535426237630966, "loss": 11.6869, "step": 13987 }, { "epoch": 0.29280750230260405, "grad_norm": 0.30424800515174866, "learning_rate": 0.00019535360183502306, "loss": 11.6681, "step": 13988 }, { "epoch": 0.2928284350665662, "grad_norm": 0.3183937668800354, "learning_rate": 0.0001953529412478981, "loss": 11.674, "step": 13989 }, { "epoch": 0.29284936783052834, "grad_norm": 0.23957659304141998, "learning_rate": 0.00019535228061493508, "loss": 11.6587, "step": 13990 }, { "epoch": 0.2928703005944905, "grad_norm": 0.27526170015335083, "learning_rate": 0.00019535161993613432, "loss": 11.6685, "step": 13991 }, { "epoch": 0.29289123335845263, "grad_norm": 0.2558603584766388, "learning_rate": 0.0001953509592114961, "loss": 11.6787, "step": 13992 }, { "epoch": 0.29291216612241483, "grad_norm": 0.37438347935676575, "learning_rate": 0.00019535029844102076, "loss": 11.6578, "step": 13993 }, { "epoch": 0.292933098886377, "grad_norm": 0.33060047030448914, "learning_rate": 0.0001953496376247086, "loss": 11.6742, "step": 13994 }, { "epoch": 0.2929540316503391, "grad_norm": 0.2817925810813904, "learning_rate": 0.00019534897676256, "loss": 11.679, "step": 13995 }, { "epoch": 0.29297496441430126, "grad_norm": 0.2776479423046112, "learning_rate": 0.00019534831585457522, "loss": 11.6629, "step": 13996 }, { "epoch": 0.2929958971782634, "grad_norm": 0.31007108092308044, "learning_rate": 0.0001953476549007546, "loss": 11.692, "step": 13997 }, { "epoch": 0.29301682994222555, "grad_norm": 0.3215950131416321, "learning_rate": 0.00019534699390109846, "loss": 11.6855, "step": 13998 }, { "epoch": 0.2930377627061877, "grad_norm": 0.270306795835495, "learning_rate": 0.0001953463328556071, "loss": 11.6851, "step": 13999 }, { "epoch": 0.2930586954701499, "grad_norm": 0.25063270330429077, "learning_rate": 0.00019534567176428082, "loss": 11.6938, "step": 14000 }, { "epoch": 0.2930586954701499, "eval_loss": 11.676020622253418, "eval_runtime": 34.3153, "eval_samples_per_second": 28.005, "eval_steps_per_second": 7.023, "step": 14000 }, { "epoch": 0.29307962823411204, "grad_norm": 0.2789647877216339, "learning_rate": 0.00019534501062712001, "loss": 11.6743, "step": 14001 }, { "epoch": 0.2931005609980742, "grad_norm": 0.34684115648269653, "learning_rate": 0.0001953443494441249, "loss": 11.6666, "step": 14002 }, { "epoch": 0.29312149376203633, "grad_norm": 0.30588987469673157, "learning_rate": 0.00019534368821529587, "loss": 11.6775, "step": 14003 }, { "epoch": 0.2931424265259985, "grad_norm": 0.3654230535030365, "learning_rate": 0.00019534302694063324, "loss": 11.6888, "step": 14004 }, { "epoch": 0.2931633592899606, "grad_norm": 0.31622248888015747, "learning_rate": 0.0001953423656201373, "loss": 11.6728, "step": 14005 }, { "epoch": 0.2931842920539228, "grad_norm": 0.298191100358963, "learning_rate": 0.00019534170425380835, "loss": 11.66, "step": 14006 }, { "epoch": 0.29320522481788497, "grad_norm": 0.23911231756210327, "learning_rate": 0.0001953410428416467, "loss": 11.6911, "step": 14007 }, { "epoch": 0.2932261575818471, "grad_norm": 0.29735496640205383, "learning_rate": 0.00019534038138365278, "loss": 11.6729, "step": 14008 }, { "epoch": 0.29324709034580926, "grad_norm": 0.28214409947395325, "learning_rate": 0.0001953397198798268, "loss": 11.6663, "step": 14009 }, { "epoch": 0.2932680231097714, "grad_norm": 0.23515437543392181, "learning_rate": 0.00019533905833016909, "loss": 11.6728, "step": 14010 }, { "epoch": 0.29328895587373355, "grad_norm": 0.27417466044425964, "learning_rate": 0.00019533839673467997, "loss": 11.6789, "step": 14011 }, { "epoch": 0.29330988863769575, "grad_norm": 0.25853022933006287, "learning_rate": 0.0001953377350933598, "loss": 11.6736, "step": 14012 }, { "epoch": 0.2933308214016579, "grad_norm": 0.2754870057106018, "learning_rate": 0.00019533707340620886, "loss": 11.6829, "step": 14013 }, { "epoch": 0.29335175416562004, "grad_norm": 0.2840052545070648, "learning_rate": 0.0001953364116732275, "loss": 11.6798, "step": 14014 }, { "epoch": 0.2933726869295822, "grad_norm": 0.29879477620124817, "learning_rate": 0.000195335749894416, "loss": 11.6586, "step": 14015 }, { "epoch": 0.2933936196935443, "grad_norm": 0.31539469957351685, "learning_rate": 0.00019533508806977468, "loss": 11.6924, "step": 14016 }, { "epoch": 0.2934145524575065, "grad_norm": 0.2647011876106262, "learning_rate": 0.0001953344261993039, "loss": 11.6797, "step": 14017 }, { "epoch": 0.2934354852214686, "grad_norm": 0.25374433398246765, "learning_rate": 0.00019533376428300392, "loss": 11.6899, "step": 14018 }, { "epoch": 0.2934564179854308, "grad_norm": 0.27011367678642273, "learning_rate": 0.0001953331023208751, "loss": 11.669, "step": 14019 }, { "epoch": 0.29347735074939296, "grad_norm": 0.33346524834632874, "learning_rate": 0.00019533244031291777, "loss": 11.6724, "step": 14020 }, { "epoch": 0.2934982835133551, "grad_norm": 0.2854616940021515, "learning_rate": 0.0001953317782591322, "loss": 11.6749, "step": 14021 }, { "epoch": 0.29351921627731725, "grad_norm": 0.20834051072597504, "learning_rate": 0.00019533111615951872, "loss": 11.6768, "step": 14022 }, { "epoch": 0.2935401490412794, "grad_norm": 0.2595488429069519, "learning_rate": 0.0001953304540140777, "loss": 11.678, "step": 14023 }, { "epoch": 0.29356108180524154, "grad_norm": 0.23464445769786835, "learning_rate": 0.00019532979182280941, "loss": 11.6779, "step": 14024 }, { "epoch": 0.29358201456920374, "grad_norm": 0.2604767978191376, "learning_rate": 0.00019532912958571417, "loss": 11.6596, "step": 14025 }, { "epoch": 0.2936029473331659, "grad_norm": 0.21633145213127136, "learning_rate": 0.0001953284673027923, "loss": 11.6632, "step": 14026 }, { "epoch": 0.29362388009712803, "grad_norm": 0.2631068825721741, "learning_rate": 0.00019532780497404412, "loss": 11.6694, "step": 14027 }, { "epoch": 0.2936448128610902, "grad_norm": 0.3329166769981384, "learning_rate": 0.00019532714259947, "loss": 11.6807, "step": 14028 }, { "epoch": 0.2936657456250523, "grad_norm": 0.23906973004341125, "learning_rate": 0.00019532648017907015, "loss": 11.6895, "step": 14029 }, { "epoch": 0.29368667838901447, "grad_norm": 0.29043272137641907, "learning_rate": 0.000195325817712845, "loss": 11.6838, "step": 14030 }, { "epoch": 0.29370761115297667, "grad_norm": 0.2775129973888397, "learning_rate": 0.0001953251552007948, "loss": 11.675, "step": 14031 }, { "epoch": 0.2937285439169388, "grad_norm": 0.2959712743759155, "learning_rate": 0.0001953244926429199, "loss": 11.6725, "step": 14032 }, { "epoch": 0.29374947668090096, "grad_norm": 0.3298304080963135, "learning_rate": 0.00019532383003922056, "loss": 11.679, "step": 14033 }, { "epoch": 0.2937704094448631, "grad_norm": 0.3678252100944519, "learning_rate": 0.00019532316738969717, "loss": 11.6926, "step": 14034 }, { "epoch": 0.29379134220882525, "grad_norm": 0.3578104078769684, "learning_rate": 0.00019532250469435006, "loss": 11.6744, "step": 14035 }, { "epoch": 0.2938122749727874, "grad_norm": 0.2546052634716034, "learning_rate": 0.0001953218419531795, "loss": 11.6729, "step": 14036 }, { "epoch": 0.29383320773674954, "grad_norm": 0.28024348616600037, "learning_rate": 0.00019532117916618578, "loss": 11.6684, "step": 14037 }, { "epoch": 0.29385414050071174, "grad_norm": 0.2454255074262619, "learning_rate": 0.0001953205163333693, "loss": 11.6648, "step": 14038 }, { "epoch": 0.2938750732646739, "grad_norm": 0.2451622039079666, "learning_rate": 0.00019531985345473029, "loss": 11.6642, "step": 14039 }, { "epoch": 0.293896006028636, "grad_norm": 0.28601595759391785, "learning_rate": 0.00019531919053026914, "loss": 11.683, "step": 14040 }, { "epoch": 0.29391693879259817, "grad_norm": 0.29101213812828064, "learning_rate": 0.00019531852755998616, "loss": 11.6704, "step": 14041 }, { "epoch": 0.2939378715565603, "grad_norm": 0.26756882667541504, "learning_rate": 0.00019531786454388166, "loss": 11.6697, "step": 14042 }, { "epoch": 0.29395880432052246, "grad_norm": 0.537836492061615, "learning_rate": 0.00019531720148195593, "loss": 11.6883, "step": 14043 }, { "epoch": 0.29397973708448466, "grad_norm": 0.4087158441543579, "learning_rate": 0.00019531653837420932, "loss": 11.6671, "step": 14044 }, { "epoch": 0.2940006698484468, "grad_norm": 0.2302040159702301, "learning_rate": 0.00019531587522064213, "loss": 11.6729, "step": 14045 }, { "epoch": 0.29402160261240895, "grad_norm": 0.29698294401168823, "learning_rate": 0.0001953152120212547, "loss": 11.6706, "step": 14046 }, { "epoch": 0.2940425353763711, "grad_norm": 0.3885432481765747, "learning_rate": 0.00019531454877604734, "loss": 11.6856, "step": 14047 }, { "epoch": 0.29406346814033324, "grad_norm": 0.2535174489021301, "learning_rate": 0.00019531388548502037, "loss": 11.659, "step": 14048 }, { "epoch": 0.2940844009042954, "grad_norm": 0.2678017318248749, "learning_rate": 0.00019531322214817406, "loss": 11.6688, "step": 14049 }, { "epoch": 0.29410533366825753, "grad_norm": 0.2963904142379761, "learning_rate": 0.00019531255876550883, "loss": 11.683, "step": 14050 }, { "epoch": 0.29412626643221973, "grad_norm": 0.23359405994415283, "learning_rate": 0.0001953118953370249, "loss": 11.6791, "step": 14051 }, { "epoch": 0.2941471991961819, "grad_norm": 0.24353229999542236, "learning_rate": 0.0001953112318627227, "loss": 11.6561, "step": 14052 }, { "epoch": 0.294168131960144, "grad_norm": 0.291122704744339, "learning_rate": 0.00019531056834260242, "loss": 11.6657, "step": 14053 }, { "epoch": 0.29418906472410616, "grad_norm": 0.29031333327293396, "learning_rate": 0.00019530990477666443, "loss": 11.6734, "step": 14054 }, { "epoch": 0.2942099974880683, "grad_norm": 0.3028852045536041, "learning_rate": 0.0001953092411649091, "loss": 11.691, "step": 14055 }, { "epoch": 0.29423093025203045, "grad_norm": 0.2546762526035309, "learning_rate": 0.00019530857750733668, "loss": 11.6849, "step": 14056 }, { "epoch": 0.29425186301599265, "grad_norm": 0.34296900033950806, "learning_rate": 0.00019530791380394753, "loss": 11.6896, "step": 14057 }, { "epoch": 0.2942727957799548, "grad_norm": 0.26291030645370483, "learning_rate": 0.00019530725005474195, "loss": 11.681, "step": 14058 }, { "epoch": 0.29429372854391694, "grad_norm": 0.27324387431144714, "learning_rate": 0.00019530658625972026, "loss": 11.6529, "step": 14059 }, { "epoch": 0.2943146613078791, "grad_norm": 0.2965784966945648, "learning_rate": 0.00019530592241888281, "loss": 11.6825, "step": 14060 }, { "epoch": 0.29433559407184123, "grad_norm": 0.31334543228149414, "learning_rate": 0.0001953052585322299, "loss": 11.6813, "step": 14061 }, { "epoch": 0.2943565268358034, "grad_norm": 0.26712900400161743, "learning_rate": 0.0001953045945997618, "loss": 11.6783, "step": 14062 }, { "epoch": 0.2943774595997656, "grad_norm": 0.2795414626598358, "learning_rate": 0.00019530393062147888, "loss": 11.6789, "step": 14063 }, { "epoch": 0.2943983923637277, "grad_norm": 0.2722131907939911, "learning_rate": 0.00019530326659738148, "loss": 11.6592, "step": 14064 }, { "epoch": 0.29441932512768987, "grad_norm": 0.2210148274898529, "learning_rate": 0.00019530260252746985, "loss": 11.683, "step": 14065 }, { "epoch": 0.294440257891652, "grad_norm": 0.22676768898963928, "learning_rate": 0.00019530193841174435, "loss": 11.6788, "step": 14066 }, { "epoch": 0.29446119065561416, "grad_norm": 0.3142639994621277, "learning_rate": 0.00019530127425020536, "loss": 11.661, "step": 14067 }, { "epoch": 0.2944821234195763, "grad_norm": 0.24908782541751862, "learning_rate": 0.00019530061004285307, "loss": 11.6845, "step": 14068 }, { "epoch": 0.29450305618353845, "grad_norm": 0.2669205963611603, "learning_rate": 0.00019529994578968787, "loss": 11.6643, "step": 14069 }, { "epoch": 0.29452398894750065, "grad_norm": 0.2695760726928711, "learning_rate": 0.0001952992814907101, "loss": 11.6591, "step": 14070 }, { "epoch": 0.2945449217114628, "grad_norm": 0.30094674229621887, "learning_rate": 0.0001952986171459201, "loss": 11.6903, "step": 14071 }, { "epoch": 0.29456585447542494, "grad_norm": 0.2880893647670746, "learning_rate": 0.0001952979527553181, "loss": 11.6802, "step": 14072 }, { "epoch": 0.2945867872393871, "grad_norm": 0.2650144398212433, "learning_rate": 0.00019529728831890443, "loss": 11.6802, "step": 14073 }, { "epoch": 0.2946077200033492, "grad_norm": 0.28487077355384827, "learning_rate": 0.0001952966238366795, "loss": 11.6574, "step": 14074 }, { "epoch": 0.29462865276731137, "grad_norm": 0.2807990312576294, "learning_rate": 0.00019529595930864351, "loss": 11.6605, "step": 14075 }, { "epoch": 0.2946495855312736, "grad_norm": 0.26688259840011597, "learning_rate": 0.00019529529473479688, "loss": 11.6673, "step": 14076 }, { "epoch": 0.2946705182952357, "grad_norm": 0.23713217675685883, "learning_rate": 0.0001952946301151399, "loss": 11.6685, "step": 14077 }, { "epoch": 0.29469145105919786, "grad_norm": 0.27782270312309265, "learning_rate": 0.0001952939654496729, "loss": 11.683, "step": 14078 }, { "epoch": 0.29471238382316, "grad_norm": 0.30447515845298767, "learning_rate": 0.00019529330073839613, "loss": 11.6675, "step": 14079 }, { "epoch": 0.29473331658712215, "grad_norm": 0.29871076345443726, "learning_rate": 0.00019529263598131, "loss": 11.67, "step": 14080 }, { "epoch": 0.2947542493510843, "grad_norm": 0.32415708899497986, "learning_rate": 0.00019529197117841479, "loss": 11.6816, "step": 14081 }, { "epoch": 0.2947751821150465, "grad_norm": 0.28643277287483215, "learning_rate": 0.0001952913063297108, "loss": 11.693, "step": 14082 }, { "epoch": 0.29479611487900864, "grad_norm": 0.25629064440727234, "learning_rate": 0.00019529064143519837, "loss": 11.6722, "step": 14083 }, { "epoch": 0.2948170476429708, "grad_norm": 0.2807011604309082, "learning_rate": 0.00019528997649487784, "loss": 11.6778, "step": 14084 }, { "epoch": 0.29483798040693293, "grad_norm": 0.241326242685318, "learning_rate": 0.00019528931150874948, "loss": 11.6681, "step": 14085 }, { "epoch": 0.2948589131708951, "grad_norm": 0.3533110022544861, "learning_rate": 0.00019528864647681366, "loss": 11.6865, "step": 14086 }, { "epoch": 0.2948798459348572, "grad_norm": 0.2481815665960312, "learning_rate": 0.00019528798139907067, "loss": 11.6628, "step": 14087 }, { "epoch": 0.29490077869881937, "grad_norm": 0.2561781704425812, "learning_rate": 0.00019528731627552084, "loss": 11.6664, "step": 14088 }, { "epoch": 0.29492171146278157, "grad_norm": 0.2734224498271942, "learning_rate": 0.00019528665110616448, "loss": 11.686, "step": 14089 }, { "epoch": 0.2949426442267437, "grad_norm": 0.272233784198761, "learning_rate": 0.00019528598589100193, "loss": 11.6721, "step": 14090 }, { "epoch": 0.29496357699070586, "grad_norm": 0.24801160395145416, "learning_rate": 0.0001952853206300335, "loss": 11.6693, "step": 14091 }, { "epoch": 0.294984509754668, "grad_norm": 0.24082809686660767, "learning_rate": 0.0001952846553232595, "loss": 11.6734, "step": 14092 }, { "epoch": 0.29500544251863015, "grad_norm": 0.2878239154815674, "learning_rate": 0.00019528398997068025, "loss": 11.6694, "step": 14093 }, { "epoch": 0.2950263752825923, "grad_norm": 0.3266063332557678, "learning_rate": 0.00019528332457229608, "loss": 11.6772, "step": 14094 }, { "epoch": 0.2950473080465545, "grad_norm": 0.27872082591056824, "learning_rate": 0.0001952826591281073, "loss": 11.6858, "step": 14095 }, { "epoch": 0.29506824081051664, "grad_norm": 0.23858879506587982, "learning_rate": 0.00019528199363811423, "loss": 11.6802, "step": 14096 }, { "epoch": 0.2950891735744788, "grad_norm": 0.30336007475852966, "learning_rate": 0.00019528132810231724, "loss": 11.6701, "step": 14097 }, { "epoch": 0.2951101063384409, "grad_norm": 0.34766891598701477, "learning_rate": 0.00019528066252071655, "loss": 11.6779, "step": 14098 }, { "epoch": 0.29513103910240307, "grad_norm": 0.29092735052108765, "learning_rate": 0.00019527999689331255, "loss": 11.6875, "step": 14099 }, { "epoch": 0.2951519718663652, "grad_norm": 0.2928124964237213, "learning_rate": 0.00019527933122010557, "loss": 11.6811, "step": 14100 }, { "epoch": 0.2951729046303274, "grad_norm": 0.3121495246887207, "learning_rate": 0.0001952786655010959, "loss": 11.6831, "step": 14101 }, { "epoch": 0.29519383739428956, "grad_norm": 0.255238801240921, "learning_rate": 0.00019527799973628388, "loss": 11.6771, "step": 14102 }, { "epoch": 0.2952147701582517, "grad_norm": 0.22681760787963867, "learning_rate": 0.00019527733392566977, "loss": 11.6844, "step": 14103 }, { "epoch": 0.29523570292221385, "grad_norm": 0.3484715521335602, "learning_rate": 0.00019527666806925398, "loss": 11.6804, "step": 14104 }, { "epoch": 0.295256635686176, "grad_norm": 0.316858172416687, "learning_rate": 0.00019527600216703674, "loss": 11.6698, "step": 14105 }, { "epoch": 0.29527756845013814, "grad_norm": 0.23288403451442719, "learning_rate": 0.00019527533621901846, "loss": 11.6592, "step": 14106 }, { "epoch": 0.2952985012141003, "grad_norm": 0.3322683870792389, "learning_rate": 0.0001952746702251994, "loss": 11.6631, "step": 14107 }, { "epoch": 0.2953194339780625, "grad_norm": 0.3063843846321106, "learning_rate": 0.0001952740041855799, "loss": 11.6706, "step": 14108 }, { "epoch": 0.29534036674202463, "grad_norm": 0.26697811484336853, "learning_rate": 0.00019527333810016027, "loss": 11.667, "step": 14109 }, { "epoch": 0.2953612995059868, "grad_norm": 0.29196178913116455, "learning_rate": 0.00019527267196894083, "loss": 11.6798, "step": 14110 }, { "epoch": 0.2953822322699489, "grad_norm": 0.2861813008785248, "learning_rate": 0.00019527200579192192, "loss": 11.6713, "step": 14111 }, { "epoch": 0.29540316503391106, "grad_norm": 0.34837305545806885, "learning_rate": 0.00019527133956910385, "loss": 11.6627, "step": 14112 }, { "epoch": 0.2954240977978732, "grad_norm": 0.28408724069595337, "learning_rate": 0.00019527067330048694, "loss": 11.6726, "step": 14113 }, { "epoch": 0.2954450305618354, "grad_norm": 0.35784104466438293, "learning_rate": 0.00019527000698607152, "loss": 11.6891, "step": 14114 }, { "epoch": 0.29546596332579755, "grad_norm": 0.29911187291145325, "learning_rate": 0.00019526934062585787, "loss": 11.6783, "step": 14115 }, { "epoch": 0.2954868960897597, "grad_norm": 0.25760674476623535, "learning_rate": 0.00019526867421984636, "loss": 11.6698, "step": 14116 }, { "epoch": 0.29550782885372184, "grad_norm": 0.2634181082248688, "learning_rate": 0.00019526800776803726, "loss": 11.6849, "step": 14117 }, { "epoch": 0.295528761617684, "grad_norm": 0.24360093474388123, "learning_rate": 0.00019526734127043094, "loss": 11.6614, "step": 14118 }, { "epoch": 0.29554969438164613, "grad_norm": 0.4623471796512604, "learning_rate": 0.0001952666747270277, "loss": 11.6569, "step": 14119 }, { "epoch": 0.29557062714560833, "grad_norm": 0.24152255058288574, "learning_rate": 0.00019526600813782788, "loss": 11.6858, "step": 14120 }, { "epoch": 0.2955915599095705, "grad_norm": 0.238265722990036, "learning_rate": 0.00019526534150283173, "loss": 11.6616, "step": 14121 }, { "epoch": 0.2956124926735326, "grad_norm": 0.3271554112434387, "learning_rate": 0.00019526467482203964, "loss": 11.6769, "step": 14122 }, { "epoch": 0.29563342543749477, "grad_norm": 0.24058112502098083, "learning_rate": 0.00019526400809545194, "loss": 11.6841, "step": 14123 }, { "epoch": 0.2956543582014569, "grad_norm": 0.2564162015914917, "learning_rate": 0.0001952633413230689, "loss": 11.6535, "step": 14124 }, { "epoch": 0.29567529096541906, "grad_norm": 0.2599796950817108, "learning_rate": 0.00019526267450489088, "loss": 11.6677, "step": 14125 }, { "epoch": 0.2956962237293812, "grad_norm": 0.3069133758544922, "learning_rate": 0.00019526200764091817, "loss": 11.6698, "step": 14126 }, { "epoch": 0.2957171564933434, "grad_norm": 0.2911573052406311, "learning_rate": 0.0001952613407311511, "loss": 11.6599, "step": 14127 }, { "epoch": 0.29573808925730555, "grad_norm": 0.2603919804096222, "learning_rate": 0.00019526067377558998, "loss": 11.6655, "step": 14128 }, { "epoch": 0.2957590220212677, "grad_norm": 0.2938872277736664, "learning_rate": 0.00019526000677423517, "loss": 11.6769, "step": 14129 }, { "epoch": 0.29577995478522984, "grad_norm": 0.24106711149215698, "learning_rate": 0.00019525933972708697, "loss": 11.668, "step": 14130 }, { "epoch": 0.295800887549192, "grad_norm": 0.266252726316452, "learning_rate": 0.00019525867263414567, "loss": 11.6566, "step": 14131 }, { "epoch": 0.2958218203131541, "grad_norm": 0.3518604040145874, "learning_rate": 0.00019525800549541165, "loss": 11.6954, "step": 14132 }, { "epoch": 0.2958427530771163, "grad_norm": 0.25895795226097107, "learning_rate": 0.00019525733831088517, "loss": 11.6715, "step": 14133 }, { "epoch": 0.29586368584107847, "grad_norm": 0.32995906472206116, "learning_rate": 0.0001952566710805666, "loss": 11.6808, "step": 14134 }, { "epoch": 0.2958846186050406, "grad_norm": 0.24911074340343475, "learning_rate": 0.00019525600380445622, "loss": 11.6906, "step": 14135 }, { "epoch": 0.29590555136900276, "grad_norm": 0.3308204114437103, "learning_rate": 0.00019525533648255438, "loss": 11.6691, "step": 14136 }, { "epoch": 0.2959264841329649, "grad_norm": 0.3921905755996704, "learning_rate": 0.00019525466911486138, "loss": 11.6744, "step": 14137 }, { "epoch": 0.29594741689692705, "grad_norm": 0.29638925194740295, "learning_rate": 0.00019525400170137758, "loss": 11.6804, "step": 14138 }, { "epoch": 0.2959683496608892, "grad_norm": 0.29969459772109985, "learning_rate": 0.00019525333424210323, "loss": 11.6535, "step": 14139 }, { "epoch": 0.2959892824248514, "grad_norm": 0.2640223801136017, "learning_rate": 0.00019525266673703871, "loss": 11.6677, "step": 14140 }, { "epoch": 0.29601021518881354, "grad_norm": 0.27706316113471985, "learning_rate": 0.00019525199918618436, "loss": 11.6822, "step": 14141 }, { "epoch": 0.2960311479527757, "grad_norm": 0.23992404341697693, "learning_rate": 0.00019525133158954043, "loss": 11.6773, "step": 14142 }, { "epoch": 0.29605208071673783, "grad_norm": 0.19696879386901855, "learning_rate": 0.00019525066394710728, "loss": 11.6659, "step": 14143 }, { "epoch": 0.2960730134807, "grad_norm": 0.3300172984600067, "learning_rate": 0.00019524999625888522, "loss": 11.6833, "step": 14144 }, { "epoch": 0.2960939462446621, "grad_norm": 0.2893886864185333, "learning_rate": 0.0001952493285248746, "loss": 11.6572, "step": 14145 }, { "epoch": 0.2961148790086243, "grad_norm": 0.28662100434303284, "learning_rate": 0.00019524866074507568, "loss": 11.6816, "step": 14146 }, { "epoch": 0.29613581177258647, "grad_norm": 0.24932345747947693, "learning_rate": 0.00019524799291948885, "loss": 11.6632, "step": 14147 }, { "epoch": 0.2961567445365486, "grad_norm": 0.42003390192985535, "learning_rate": 0.00019524732504811444, "loss": 11.687, "step": 14148 }, { "epoch": 0.29617767730051076, "grad_norm": 0.29494544863700867, "learning_rate": 0.00019524665713095267, "loss": 11.6568, "step": 14149 }, { "epoch": 0.2961986100644729, "grad_norm": 0.2821323275566101, "learning_rate": 0.00019524598916800394, "loss": 11.6683, "step": 14150 }, { "epoch": 0.29621954282843505, "grad_norm": 0.22186248004436493, "learning_rate": 0.00019524532115926854, "loss": 11.6711, "step": 14151 }, { "epoch": 0.29624047559239725, "grad_norm": 0.3209567964076996, "learning_rate": 0.00019524465310474684, "loss": 11.6818, "step": 14152 }, { "epoch": 0.2962614083563594, "grad_norm": 0.35442277789115906, "learning_rate": 0.0001952439850044391, "loss": 11.6659, "step": 14153 }, { "epoch": 0.29628234112032154, "grad_norm": 0.2623220384120941, "learning_rate": 0.00019524331685834566, "loss": 11.6689, "step": 14154 }, { "epoch": 0.2963032738842837, "grad_norm": 0.2552596628665924, "learning_rate": 0.00019524264866646685, "loss": 11.6738, "step": 14155 }, { "epoch": 0.2963242066482458, "grad_norm": 0.3265700936317444, "learning_rate": 0.00019524198042880303, "loss": 11.677, "step": 14156 }, { "epoch": 0.29634513941220797, "grad_norm": 0.31712833046913147, "learning_rate": 0.00019524131214535446, "loss": 11.6849, "step": 14157 }, { "epoch": 0.2963660721761701, "grad_norm": 0.22985993325710297, "learning_rate": 0.0001952406438161215, "loss": 11.6752, "step": 14158 }, { "epoch": 0.2963870049401323, "grad_norm": 0.2827606797218323, "learning_rate": 0.00019523997544110443, "loss": 11.6551, "step": 14159 }, { "epoch": 0.29640793770409446, "grad_norm": 0.2894403040409088, "learning_rate": 0.00019523930702030357, "loss": 11.6608, "step": 14160 }, { "epoch": 0.2964288704680566, "grad_norm": 0.2980426549911499, "learning_rate": 0.00019523863855371932, "loss": 11.6772, "step": 14161 }, { "epoch": 0.29644980323201875, "grad_norm": 0.2676908075809479, "learning_rate": 0.0001952379700413519, "loss": 11.6757, "step": 14162 }, { "epoch": 0.2964707359959809, "grad_norm": 0.23393067717552185, "learning_rate": 0.00019523730148320173, "loss": 11.6942, "step": 14163 }, { "epoch": 0.29649166875994304, "grad_norm": 0.32960328459739685, "learning_rate": 0.000195236632879269, "loss": 11.6661, "step": 14164 }, { "epoch": 0.29651260152390524, "grad_norm": 0.27756571769714355, "learning_rate": 0.0001952359642295542, "loss": 11.6724, "step": 14165 }, { "epoch": 0.2965335342878674, "grad_norm": 0.3324277400970459, "learning_rate": 0.0001952352955340575, "loss": 11.6897, "step": 14166 }, { "epoch": 0.29655446705182953, "grad_norm": 0.2585180103778839, "learning_rate": 0.00019523462679277932, "loss": 11.6806, "step": 14167 }, { "epoch": 0.2965753998157917, "grad_norm": 0.2445530742406845, "learning_rate": 0.00019523395800571995, "loss": 11.6506, "step": 14168 }, { "epoch": 0.2965963325797538, "grad_norm": 0.2188372164964676, "learning_rate": 0.00019523328917287969, "loss": 11.6737, "step": 14169 }, { "epoch": 0.29661726534371596, "grad_norm": 0.2990824282169342, "learning_rate": 0.00019523262029425887, "loss": 11.6739, "step": 14170 }, { "epoch": 0.29663819810767816, "grad_norm": 0.27697959542274475, "learning_rate": 0.00019523195136985787, "loss": 11.6681, "step": 14171 }, { "epoch": 0.2966591308716403, "grad_norm": 0.33401769399642944, "learning_rate": 0.00019523128239967691, "loss": 11.6767, "step": 14172 }, { "epoch": 0.29668006363560245, "grad_norm": 0.2583075761795044, "learning_rate": 0.00019523061338371636, "loss": 11.6558, "step": 14173 }, { "epoch": 0.2967009963995646, "grad_norm": 0.2946791648864746, "learning_rate": 0.0001952299443219766, "loss": 11.6797, "step": 14174 }, { "epoch": 0.29672192916352674, "grad_norm": 0.29109445214271545, "learning_rate": 0.00019522927521445786, "loss": 11.6823, "step": 14175 }, { "epoch": 0.2967428619274889, "grad_norm": 0.2780664563179016, "learning_rate": 0.00019522860606116052, "loss": 11.6882, "step": 14176 }, { "epoch": 0.29676379469145103, "grad_norm": 0.24357642233371735, "learning_rate": 0.00019522793686208484, "loss": 11.6759, "step": 14177 }, { "epoch": 0.29678472745541323, "grad_norm": 0.2974433898925781, "learning_rate": 0.0001952272676172312, "loss": 11.6752, "step": 14178 }, { "epoch": 0.2968056602193754, "grad_norm": 0.3020283579826355, "learning_rate": 0.00019522659832659994, "loss": 11.6604, "step": 14179 }, { "epoch": 0.2968265929833375, "grad_norm": 0.2158190906047821, "learning_rate": 0.00019522592899019132, "loss": 11.6686, "step": 14180 }, { "epoch": 0.29684752574729967, "grad_norm": 0.23163145780563354, "learning_rate": 0.00019522525960800568, "loss": 11.6704, "step": 14181 }, { "epoch": 0.2968684585112618, "grad_norm": 0.23566675186157227, "learning_rate": 0.00019522459018004334, "loss": 11.6655, "step": 14182 }, { "epoch": 0.29688939127522396, "grad_norm": 0.27813246846199036, "learning_rate": 0.00019522392070630466, "loss": 11.6814, "step": 14183 }, { "epoch": 0.29691032403918616, "grad_norm": 0.29198846220970154, "learning_rate": 0.0001952232511867899, "loss": 11.666, "step": 14184 }, { "epoch": 0.2969312568031483, "grad_norm": 0.27863895893096924, "learning_rate": 0.00019522258162149947, "loss": 11.6871, "step": 14185 }, { "epoch": 0.29695218956711045, "grad_norm": 0.2930417060852051, "learning_rate": 0.00019522191201043357, "loss": 11.6588, "step": 14186 }, { "epoch": 0.2969731223310726, "grad_norm": 0.2937825322151184, "learning_rate": 0.00019522124235359263, "loss": 11.673, "step": 14187 }, { "epoch": 0.29699405509503474, "grad_norm": 0.22120636701583862, "learning_rate": 0.00019522057265097692, "loss": 11.6752, "step": 14188 }, { "epoch": 0.2970149878589969, "grad_norm": 0.45085105299949646, "learning_rate": 0.0001952199029025868, "loss": 11.6636, "step": 14189 }, { "epoch": 0.2970359206229591, "grad_norm": 0.2614618241786957, "learning_rate": 0.00019521923310842251, "loss": 11.686, "step": 14190 }, { "epoch": 0.2970568533869212, "grad_norm": 0.3611006438732147, "learning_rate": 0.00019521856326848447, "loss": 11.6765, "step": 14191 }, { "epoch": 0.29707778615088337, "grad_norm": 0.2610997259616852, "learning_rate": 0.00019521789338277295, "loss": 11.6838, "step": 14192 }, { "epoch": 0.2970987189148455, "grad_norm": 0.2179853469133377, "learning_rate": 0.00019521722345128826, "loss": 11.6836, "step": 14193 }, { "epoch": 0.29711965167880766, "grad_norm": 0.24607235193252563, "learning_rate": 0.00019521655347403075, "loss": 11.6776, "step": 14194 }, { "epoch": 0.2971405844427698, "grad_norm": 0.23400336503982544, "learning_rate": 0.00019521588345100072, "loss": 11.6592, "step": 14195 }, { "epoch": 0.29716151720673195, "grad_norm": 0.3976946473121643, "learning_rate": 0.00019521521338219854, "loss": 11.7009, "step": 14196 }, { "epoch": 0.29718244997069415, "grad_norm": 0.35858654975891113, "learning_rate": 0.0001952145432676245, "loss": 11.6692, "step": 14197 }, { "epoch": 0.2972033827346563, "grad_norm": 0.26913896203041077, "learning_rate": 0.0001952138731072789, "loss": 11.6642, "step": 14198 }, { "epoch": 0.29722431549861844, "grad_norm": 0.2818124294281006, "learning_rate": 0.0001952132029011621, "loss": 11.6816, "step": 14199 }, { "epoch": 0.2972452482625806, "grad_norm": 0.3670310080051422, "learning_rate": 0.00019521253264927438, "loss": 11.6776, "step": 14200 }, { "epoch": 0.29726618102654273, "grad_norm": 0.2568022310733795, "learning_rate": 0.0001952118623516161, "loss": 11.6602, "step": 14201 }, { "epoch": 0.2972871137905049, "grad_norm": 0.3102432191371918, "learning_rate": 0.00019521119200818757, "loss": 11.6866, "step": 14202 }, { "epoch": 0.2973080465544671, "grad_norm": 0.32270392775535583, "learning_rate": 0.00019521052161898913, "loss": 11.6847, "step": 14203 }, { "epoch": 0.2973289793184292, "grad_norm": 0.27250516414642334, "learning_rate": 0.00019520985118402106, "loss": 11.6905, "step": 14204 }, { "epoch": 0.29734991208239137, "grad_norm": 0.2672688066959381, "learning_rate": 0.00019520918070328372, "loss": 11.6641, "step": 14205 }, { "epoch": 0.2973708448463535, "grad_norm": 0.2985868752002716, "learning_rate": 0.00019520851017677742, "loss": 11.6711, "step": 14206 }, { "epoch": 0.29739177761031566, "grad_norm": 0.21850019693374634, "learning_rate": 0.00019520783960450246, "loss": 11.6672, "step": 14207 }, { "epoch": 0.2974127103742778, "grad_norm": 0.3202662765979767, "learning_rate": 0.0001952071689864592, "loss": 11.6679, "step": 14208 }, { "epoch": 0.29743364313824, "grad_norm": 0.22402292490005493, "learning_rate": 0.00019520649832264794, "loss": 11.6718, "step": 14209 }, { "epoch": 0.29745457590220215, "grad_norm": 0.23664048314094543, "learning_rate": 0.000195205827613069, "loss": 11.6688, "step": 14210 }, { "epoch": 0.2974755086661643, "grad_norm": 0.21759003400802612, "learning_rate": 0.00019520515685772272, "loss": 11.6817, "step": 14211 }, { "epoch": 0.29749644143012643, "grad_norm": 0.24103528261184692, "learning_rate": 0.00019520448605660944, "loss": 11.6659, "step": 14212 }, { "epoch": 0.2975173741940886, "grad_norm": 0.2303745448589325, "learning_rate": 0.00019520381520972944, "loss": 11.6711, "step": 14213 }, { "epoch": 0.2975383069580507, "grad_norm": 0.25772255659103394, "learning_rate": 0.00019520314431708302, "loss": 11.6746, "step": 14214 }, { "epoch": 0.29755923972201287, "grad_norm": 0.2713398337364197, "learning_rate": 0.0001952024733786706, "loss": 11.6637, "step": 14215 }, { "epoch": 0.29758017248597507, "grad_norm": 0.24420060217380524, "learning_rate": 0.0001952018023944924, "loss": 11.7008, "step": 14216 }, { "epoch": 0.2976011052499372, "grad_norm": 0.22742098569869995, "learning_rate": 0.00019520113136454879, "loss": 11.6729, "step": 14217 }, { "epoch": 0.29762203801389936, "grad_norm": 0.2970460057258606, "learning_rate": 0.00019520046028884008, "loss": 11.6891, "step": 14218 }, { "epoch": 0.2976429707778615, "grad_norm": 0.29848963022232056, "learning_rate": 0.00019519978916736664, "loss": 11.6804, "step": 14219 }, { "epoch": 0.29766390354182365, "grad_norm": 0.2738702893257141, "learning_rate": 0.00019519911800012872, "loss": 11.6824, "step": 14220 }, { "epoch": 0.2976848363057858, "grad_norm": 0.25133612751960754, "learning_rate": 0.00019519844678712672, "loss": 11.6629, "step": 14221 }, { "epoch": 0.297705769069748, "grad_norm": 0.2551265060901642, "learning_rate": 0.00019519777552836086, "loss": 11.6705, "step": 14222 }, { "epoch": 0.29772670183371014, "grad_norm": 0.26934871077537537, "learning_rate": 0.00019519710422383156, "loss": 11.6673, "step": 14223 }, { "epoch": 0.2977476345976723, "grad_norm": 0.25455257296562195, "learning_rate": 0.0001951964328735391, "loss": 11.6788, "step": 14224 }, { "epoch": 0.29776856736163443, "grad_norm": 0.2607925832271576, "learning_rate": 0.00019519576147748377, "loss": 11.6723, "step": 14225 }, { "epoch": 0.2977895001255966, "grad_norm": 0.247784823179245, "learning_rate": 0.00019519509003566597, "loss": 11.6636, "step": 14226 }, { "epoch": 0.2978104328895587, "grad_norm": 0.26919475197792053, "learning_rate": 0.000195194418548086, "loss": 11.6686, "step": 14227 }, { "epoch": 0.2978313656535209, "grad_norm": 0.2689642310142517, "learning_rate": 0.00019519374701474412, "loss": 11.6663, "step": 14228 }, { "epoch": 0.29785229841748306, "grad_norm": 0.20783206820487976, "learning_rate": 0.00019519307543564073, "loss": 11.6663, "step": 14229 }, { "epoch": 0.2978732311814452, "grad_norm": 0.27643895149230957, "learning_rate": 0.0001951924038107761, "loss": 11.6949, "step": 14230 }, { "epoch": 0.29789416394540735, "grad_norm": 0.25983840227127075, "learning_rate": 0.00019519173214015053, "loss": 11.6652, "step": 14231 }, { "epoch": 0.2979150967093695, "grad_norm": 0.2957914173603058, "learning_rate": 0.00019519106042376448, "loss": 11.6714, "step": 14232 }, { "epoch": 0.29793602947333164, "grad_norm": 0.2935206890106201, "learning_rate": 0.00019519038866161811, "loss": 11.6649, "step": 14233 }, { "epoch": 0.2979569622372938, "grad_norm": 0.37926754355430603, "learning_rate": 0.00019518971685371183, "loss": 11.6851, "step": 14234 }, { "epoch": 0.297977895001256, "grad_norm": 0.30057838559150696, "learning_rate": 0.00019518904500004597, "loss": 11.6656, "step": 14235 }, { "epoch": 0.29799882776521813, "grad_norm": 0.3416709005832672, "learning_rate": 0.00019518837310062082, "loss": 11.6888, "step": 14236 }, { "epoch": 0.2980197605291803, "grad_norm": 0.3062872290611267, "learning_rate": 0.0001951877011554367, "loss": 11.6861, "step": 14237 }, { "epoch": 0.2980406932931424, "grad_norm": 0.27806633710861206, "learning_rate": 0.00019518702916449394, "loss": 11.6755, "step": 14238 }, { "epoch": 0.29806162605710457, "grad_norm": 0.2697010040283203, "learning_rate": 0.00019518635712779288, "loss": 11.6733, "step": 14239 }, { "epoch": 0.2980825588210667, "grad_norm": 0.2645370066165924, "learning_rate": 0.00019518568504533384, "loss": 11.6581, "step": 14240 }, { "epoch": 0.2981034915850289, "grad_norm": 0.24942196905612946, "learning_rate": 0.0001951850129171171, "loss": 11.6644, "step": 14241 }, { "epoch": 0.29812442434899106, "grad_norm": 0.2773553133010864, "learning_rate": 0.00019518434074314306, "loss": 11.6737, "step": 14242 }, { "epoch": 0.2981453571129532, "grad_norm": 0.29436033964157104, "learning_rate": 0.00019518366852341195, "loss": 11.6644, "step": 14243 }, { "epoch": 0.29816628987691535, "grad_norm": 0.2540006935596466, "learning_rate": 0.00019518299625792418, "loss": 11.6745, "step": 14244 }, { "epoch": 0.2981872226408775, "grad_norm": 0.4102931320667267, "learning_rate": 0.00019518232394668004, "loss": 11.7124, "step": 14245 }, { "epoch": 0.29820815540483964, "grad_norm": 0.24980682134628296, "learning_rate": 0.00019518165158967984, "loss": 11.6764, "step": 14246 }, { "epoch": 0.2982290881688018, "grad_norm": 1.7042346000671387, "learning_rate": 0.0001951809791869239, "loss": 11.6507, "step": 14247 }, { "epoch": 0.298250020932764, "grad_norm": 0.29139411449432373, "learning_rate": 0.00019518030673841254, "loss": 11.6679, "step": 14248 }, { "epoch": 0.2982709536967261, "grad_norm": 0.31886646151542664, "learning_rate": 0.00019517963424414613, "loss": 11.671, "step": 14249 }, { "epoch": 0.29829188646068827, "grad_norm": 0.2523687481880188, "learning_rate": 0.00019517896170412495, "loss": 11.6786, "step": 14250 }, { "epoch": 0.2983128192246504, "grad_norm": 0.3009592890739441, "learning_rate": 0.00019517828911834933, "loss": 11.6627, "step": 14251 }, { "epoch": 0.29833375198861256, "grad_norm": 0.26354771852493286, "learning_rate": 0.00019517761648681962, "loss": 11.6728, "step": 14252 }, { "epoch": 0.2983546847525747, "grad_norm": 0.33564814925193787, "learning_rate": 0.00019517694380953612, "loss": 11.6829, "step": 14253 }, { "epoch": 0.2983756175165369, "grad_norm": 0.23346967995166779, "learning_rate": 0.00019517627108649914, "loss": 11.6852, "step": 14254 }, { "epoch": 0.29839655028049905, "grad_norm": 0.2805561125278473, "learning_rate": 0.00019517559831770903, "loss": 11.6732, "step": 14255 }, { "epoch": 0.2984174830444612, "grad_norm": 0.27947998046875, "learning_rate": 0.00019517492550316613, "loss": 11.6861, "step": 14256 }, { "epoch": 0.29843841580842334, "grad_norm": 0.29993587732315063, "learning_rate": 0.00019517425264287068, "loss": 11.6637, "step": 14257 }, { "epoch": 0.2984593485723855, "grad_norm": 0.2791336476802826, "learning_rate": 0.0001951735797368231, "loss": 11.675, "step": 14258 }, { "epoch": 0.29848028133634763, "grad_norm": 0.26409995555877686, "learning_rate": 0.00019517290678502364, "loss": 11.6796, "step": 14259 }, { "epoch": 0.29850121410030983, "grad_norm": 0.30133727192878723, "learning_rate": 0.0001951722337874727, "loss": 11.6605, "step": 14260 }, { "epoch": 0.298522146864272, "grad_norm": 0.27729642391204834, "learning_rate": 0.0001951715607441705, "loss": 11.6767, "step": 14261 }, { "epoch": 0.2985430796282341, "grad_norm": 0.2372683435678482, "learning_rate": 0.00019517088765511748, "loss": 11.6787, "step": 14262 }, { "epoch": 0.29856401239219627, "grad_norm": 0.25199174880981445, "learning_rate": 0.0001951702145203139, "loss": 11.6791, "step": 14263 }, { "epoch": 0.2985849451561584, "grad_norm": 0.30033546686172485, "learning_rate": 0.00019516954133976007, "loss": 11.6765, "step": 14264 }, { "epoch": 0.29860587792012055, "grad_norm": 0.2588929235935211, "learning_rate": 0.00019516886811345635, "loss": 11.6818, "step": 14265 }, { "epoch": 0.2986268106840827, "grad_norm": 0.23679837584495544, "learning_rate": 0.00019516819484140303, "loss": 11.6712, "step": 14266 }, { "epoch": 0.2986477434480449, "grad_norm": 0.35840651392936707, "learning_rate": 0.00019516752152360045, "loss": 11.6782, "step": 14267 }, { "epoch": 0.29866867621200704, "grad_norm": 0.27936264872550964, "learning_rate": 0.00019516684816004896, "loss": 11.6727, "step": 14268 }, { "epoch": 0.2986896089759692, "grad_norm": 0.25716254115104675, "learning_rate": 0.00019516617475074882, "loss": 11.6753, "step": 14269 }, { "epoch": 0.29871054173993133, "grad_norm": 0.2877010405063629, "learning_rate": 0.00019516550129570042, "loss": 11.6712, "step": 14270 }, { "epoch": 0.2987314745038935, "grad_norm": 0.2550688683986664, "learning_rate": 0.00019516482779490402, "loss": 11.6785, "step": 14271 }, { "epoch": 0.2987524072678556, "grad_norm": 0.27720075845718384, "learning_rate": 0.00019516415424836002, "loss": 11.6672, "step": 14272 }, { "epoch": 0.2987733400318178, "grad_norm": 0.33668673038482666, "learning_rate": 0.0001951634806560687, "loss": 11.6749, "step": 14273 }, { "epoch": 0.29879427279577997, "grad_norm": 0.2662079632282257, "learning_rate": 0.0001951628070180304, "loss": 11.6784, "step": 14274 }, { "epoch": 0.2988152055597421, "grad_norm": 0.3398618996143341, "learning_rate": 0.00019516213333424542, "loss": 11.697, "step": 14275 }, { "epoch": 0.29883613832370426, "grad_norm": 0.27403584122657776, "learning_rate": 0.00019516145960471408, "loss": 11.6613, "step": 14276 }, { "epoch": 0.2988570710876664, "grad_norm": 0.2497708946466446, "learning_rate": 0.00019516078582943673, "loss": 11.6764, "step": 14277 }, { "epoch": 0.29887800385162855, "grad_norm": 0.34002920985221863, "learning_rate": 0.00019516011200841372, "loss": 11.6744, "step": 14278 }, { "epoch": 0.29889893661559075, "grad_norm": 0.2140975296497345, "learning_rate": 0.00019515943814164529, "loss": 11.6837, "step": 14279 }, { "epoch": 0.2989198693795529, "grad_norm": 0.27515432238578796, "learning_rate": 0.0001951587642291318, "loss": 11.6813, "step": 14280 }, { "epoch": 0.29894080214351504, "grad_norm": 0.3782751262187958, "learning_rate": 0.0001951580902708736, "loss": 11.7055, "step": 14281 }, { "epoch": 0.2989617349074772, "grad_norm": 0.29449179768562317, "learning_rate": 0.00019515741626687102, "loss": 11.6885, "step": 14282 }, { "epoch": 0.29898266767143933, "grad_norm": 0.21828113496303558, "learning_rate": 0.00019515674221712436, "loss": 11.6733, "step": 14283 }, { "epoch": 0.2990036004354015, "grad_norm": 0.31618475914001465, "learning_rate": 0.00019515606812163392, "loss": 11.6483, "step": 14284 }, { "epoch": 0.2990245331993636, "grad_norm": 0.28813472390174866, "learning_rate": 0.0001951553939804001, "loss": 11.6853, "step": 14285 }, { "epoch": 0.2990454659633258, "grad_norm": 0.24854616820812225, "learning_rate": 0.00019515471979342312, "loss": 11.6849, "step": 14286 }, { "epoch": 0.29906639872728796, "grad_norm": 0.2606796622276306, "learning_rate": 0.00019515404556070339, "loss": 11.6823, "step": 14287 }, { "epoch": 0.2990873314912501, "grad_norm": 0.31990736722946167, "learning_rate": 0.0001951533712822412, "loss": 11.6794, "step": 14288 }, { "epoch": 0.29910826425521225, "grad_norm": 0.27550965547561646, "learning_rate": 0.00019515269695803686, "loss": 11.6697, "step": 14289 }, { "epoch": 0.2991291970191744, "grad_norm": 0.23509003221988678, "learning_rate": 0.00019515202258809074, "loss": 11.6761, "step": 14290 }, { "epoch": 0.29915012978313654, "grad_norm": 0.24321968853473663, "learning_rate": 0.00019515134817240312, "loss": 11.6629, "step": 14291 }, { "epoch": 0.29917106254709874, "grad_norm": 0.25087589025497437, "learning_rate": 0.00019515067371097432, "loss": 11.6776, "step": 14292 }, { "epoch": 0.2991919953110609, "grad_norm": 0.32103991508483887, "learning_rate": 0.0001951499992038047, "loss": 11.6749, "step": 14293 }, { "epoch": 0.29921292807502303, "grad_norm": 0.29530659317970276, "learning_rate": 0.00019514932465089459, "loss": 11.6719, "step": 14294 }, { "epoch": 0.2992338608389852, "grad_norm": 0.27315017580986023, "learning_rate": 0.0001951486500522443, "loss": 11.6781, "step": 14295 }, { "epoch": 0.2992547936029473, "grad_norm": 0.2609765827655792, "learning_rate": 0.0001951479754078541, "loss": 11.6741, "step": 14296 }, { "epoch": 0.29927572636690947, "grad_norm": 0.23137131333351135, "learning_rate": 0.00019514730071772438, "loss": 11.6867, "step": 14297 }, { "epoch": 0.29929665913087167, "grad_norm": 0.3200807571411133, "learning_rate": 0.00019514662598185545, "loss": 11.6634, "step": 14298 }, { "epoch": 0.2993175918948338, "grad_norm": 0.2825828194618225, "learning_rate": 0.00019514595120024764, "loss": 11.684, "step": 14299 }, { "epoch": 0.29933852465879596, "grad_norm": 0.2486337125301361, "learning_rate": 0.00019514527637290124, "loss": 11.6861, "step": 14300 }, { "epoch": 0.2993594574227581, "grad_norm": 0.2834842801094055, "learning_rate": 0.0001951446014998166, "loss": 11.6782, "step": 14301 }, { "epoch": 0.29938039018672025, "grad_norm": 0.26595035195350647, "learning_rate": 0.00019514392658099409, "loss": 11.6725, "step": 14302 }, { "epoch": 0.2994013229506824, "grad_norm": 0.30093181133270264, "learning_rate": 0.00019514325161643393, "loss": 11.6806, "step": 14303 }, { "epoch": 0.29942225571464454, "grad_norm": 0.42802274227142334, "learning_rate": 0.00019514257660613652, "loss": 11.6766, "step": 14304 }, { "epoch": 0.29944318847860674, "grad_norm": 0.25092706084251404, "learning_rate": 0.0001951419015501022, "loss": 11.669, "step": 14305 }, { "epoch": 0.2994641212425689, "grad_norm": 0.31985750794410706, "learning_rate": 0.00019514122644833122, "loss": 11.6569, "step": 14306 }, { "epoch": 0.299485054006531, "grad_norm": 0.3519078493118286, "learning_rate": 0.00019514055130082395, "loss": 11.6802, "step": 14307 }, { "epoch": 0.29950598677049317, "grad_norm": 0.2883269488811493, "learning_rate": 0.00019513987610758073, "loss": 11.6692, "step": 14308 }, { "epoch": 0.2995269195344553, "grad_norm": 0.24031448364257812, "learning_rate": 0.00019513920086860188, "loss": 11.6771, "step": 14309 }, { "epoch": 0.29954785229841746, "grad_norm": 0.2409193068742752, "learning_rate": 0.00019513852558388766, "loss": 11.6744, "step": 14310 }, { "epoch": 0.29956878506237966, "grad_norm": 0.24409787356853485, "learning_rate": 0.0001951378502534385, "loss": 11.6875, "step": 14311 }, { "epoch": 0.2995897178263418, "grad_norm": 0.31109362840652466, "learning_rate": 0.00019513717487725462, "loss": 11.6741, "step": 14312 }, { "epoch": 0.29961065059030395, "grad_norm": 0.2897537648677826, "learning_rate": 0.0001951364994553364, "loss": 11.6808, "step": 14313 }, { "epoch": 0.2996315833542661, "grad_norm": 0.24953776597976685, "learning_rate": 0.0001951358239876842, "loss": 11.6795, "step": 14314 }, { "epoch": 0.29965251611822824, "grad_norm": 0.2377653419971466, "learning_rate": 0.00019513514847429827, "loss": 11.6701, "step": 14315 }, { "epoch": 0.2996734488821904, "grad_norm": 0.24807175993919373, "learning_rate": 0.00019513447291517893, "loss": 11.6752, "step": 14316 }, { "epoch": 0.2996943816461526, "grad_norm": 0.2616880238056183, "learning_rate": 0.00019513379731032662, "loss": 11.6781, "step": 14317 }, { "epoch": 0.29971531441011473, "grad_norm": 0.2611749470233917, "learning_rate": 0.00019513312165974153, "loss": 11.6677, "step": 14318 }, { "epoch": 0.2997362471740769, "grad_norm": 0.2987658679485321, "learning_rate": 0.00019513244596342405, "loss": 11.668, "step": 14319 }, { "epoch": 0.299757179938039, "grad_norm": 0.23926720023155212, "learning_rate": 0.00019513177022137455, "loss": 11.6721, "step": 14320 }, { "epoch": 0.29977811270200116, "grad_norm": 0.2874240279197693, "learning_rate": 0.00019513109443359324, "loss": 11.6786, "step": 14321 }, { "epoch": 0.2997990454659633, "grad_norm": 0.2723373770713806, "learning_rate": 0.00019513041860008053, "loss": 11.6782, "step": 14322 }, { "epoch": 0.29981997822992545, "grad_norm": 0.32672661542892456, "learning_rate": 0.00019512974272083673, "loss": 11.6746, "step": 14323 }, { "epoch": 0.29984091099388765, "grad_norm": 0.2717154920101166, "learning_rate": 0.0001951290667958621, "loss": 11.6773, "step": 14324 }, { "epoch": 0.2998618437578498, "grad_norm": 0.2509196102619171, "learning_rate": 0.0001951283908251571, "loss": 11.6734, "step": 14325 }, { "epoch": 0.29988277652181194, "grad_norm": 0.36821863055229187, "learning_rate": 0.00019512771480872194, "loss": 11.6648, "step": 14326 }, { "epoch": 0.2999037092857741, "grad_norm": 0.3232712745666504, "learning_rate": 0.00019512703874655698, "loss": 11.6734, "step": 14327 }, { "epoch": 0.29992464204973623, "grad_norm": 0.2615736126899719, "learning_rate": 0.00019512636263866256, "loss": 11.69, "step": 14328 }, { "epoch": 0.2999455748136984, "grad_norm": 0.3083869218826294, "learning_rate": 0.00019512568648503897, "loss": 11.6922, "step": 14329 }, { "epoch": 0.2999665075776606, "grad_norm": 0.24644501507282257, "learning_rate": 0.00019512501028568657, "loss": 11.6784, "step": 14330 }, { "epoch": 0.2999874403416227, "grad_norm": 0.27955400943756104, "learning_rate": 0.00019512433404060567, "loss": 11.6564, "step": 14331 }, { "epoch": 0.30000837310558487, "grad_norm": 0.24066530168056488, "learning_rate": 0.00019512365774979659, "loss": 11.6734, "step": 14332 }, { "epoch": 0.300029305869547, "grad_norm": 0.29996249079704285, "learning_rate": 0.00019512298141325965, "loss": 11.6735, "step": 14333 }, { "epoch": 0.30005023863350916, "grad_norm": 0.24617494642734528, "learning_rate": 0.0001951223050309952, "loss": 11.6975, "step": 14334 }, { "epoch": 0.3000711713974713, "grad_norm": 0.34105566143989563, "learning_rate": 0.00019512162860300358, "loss": 11.6745, "step": 14335 }, { "epoch": 0.30009210416143345, "grad_norm": 0.29763948917388916, "learning_rate": 0.00019512095212928504, "loss": 11.6633, "step": 14336 }, { "epoch": 0.30011303692539565, "grad_norm": 0.24087457358837128, "learning_rate": 0.00019512027560983998, "loss": 11.6736, "step": 14337 }, { "epoch": 0.3001339696893578, "grad_norm": 0.24521587789058685, "learning_rate": 0.0001951195990446687, "loss": 11.6758, "step": 14338 }, { "epoch": 0.30015490245331994, "grad_norm": 0.2492453008890152, "learning_rate": 0.0001951189224337715, "loss": 11.6651, "step": 14339 }, { "epoch": 0.3001758352172821, "grad_norm": 0.352451354265213, "learning_rate": 0.00019511824577714875, "loss": 11.702, "step": 14340 }, { "epoch": 0.30019676798124423, "grad_norm": 0.2728957533836365, "learning_rate": 0.00019511756907480073, "loss": 11.6879, "step": 14341 }, { "epoch": 0.3002177007452064, "grad_norm": 0.26750022172927856, "learning_rate": 0.00019511689232672781, "loss": 11.6714, "step": 14342 }, { "epoch": 0.3002386335091686, "grad_norm": 0.22193413972854614, "learning_rate": 0.0001951162155329303, "loss": 11.668, "step": 14343 }, { "epoch": 0.3002595662731307, "grad_norm": 0.3365238308906555, "learning_rate": 0.00019511553869340848, "loss": 11.6708, "step": 14344 }, { "epoch": 0.30028049903709286, "grad_norm": 0.23652604222297668, "learning_rate": 0.00019511486180816273, "loss": 11.6635, "step": 14345 }, { "epoch": 0.300301431801055, "grad_norm": 0.3077215254306793, "learning_rate": 0.0001951141848771934, "loss": 11.6682, "step": 14346 }, { "epoch": 0.30032236456501715, "grad_norm": 0.23623280227184296, "learning_rate": 0.00019511350790050072, "loss": 11.6672, "step": 14347 }, { "epoch": 0.3003432973289793, "grad_norm": 0.2561962902545929, "learning_rate": 0.00019511283087808508, "loss": 11.6733, "step": 14348 }, { "epoch": 0.3003642300929415, "grad_norm": 0.33380693197250366, "learning_rate": 0.00019511215380994682, "loss": 11.6826, "step": 14349 }, { "epoch": 0.30038516285690364, "grad_norm": 0.248850017786026, "learning_rate": 0.00019511147669608626, "loss": 11.6746, "step": 14350 }, { "epoch": 0.3004060956208658, "grad_norm": 0.2377271205186844, "learning_rate": 0.00019511079953650368, "loss": 11.6616, "step": 14351 }, { "epoch": 0.30042702838482793, "grad_norm": 0.33237922191619873, "learning_rate": 0.00019511012233119942, "loss": 11.679, "step": 14352 }, { "epoch": 0.3004479611487901, "grad_norm": 0.3350328803062439, "learning_rate": 0.0001951094450801738, "loss": 11.6914, "step": 14353 }, { "epoch": 0.3004688939127522, "grad_norm": 0.3879082500934601, "learning_rate": 0.0001951087677834272, "loss": 11.6974, "step": 14354 }, { "epoch": 0.30048982667671437, "grad_norm": 0.21636703610420227, "learning_rate": 0.0001951080904409599, "loss": 11.6657, "step": 14355 }, { "epoch": 0.30051075944067657, "grad_norm": 0.22964060306549072, "learning_rate": 0.00019510741305277227, "loss": 11.681, "step": 14356 }, { "epoch": 0.3005316922046387, "grad_norm": 0.3098403811454773, "learning_rate": 0.00019510673561886455, "loss": 11.6745, "step": 14357 }, { "epoch": 0.30055262496860086, "grad_norm": 0.22420728206634521, "learning_rate": 0.00019510605813923714, "loss": 11.6775, "step": 14358 }, { "epoch": 0.300573557732563, "grad_norm": 0.3165685534477234, "learning_rate": 0.00019510538061389034, "loss": 11.6573, "step": 14359 }, { "epoch": 0.30059449049652515, "grad_norm": 0.26820915937423706, "learning_rate": 0.00019510470304282449, "loss": 11.667, "step": 14360 }, { "epoch": 0.3006154232604873, "grad_norm": 0.25267699360847473, "learning_rate": 0.00019510402542603986, "loss": 11.6816, "step": 14361 }, { "epoch": 0.3006363560244495, "grad_norm": 0.2929735481739044, "learning_rate": 0.00019510334776353685, "loss": 11.6842, "step": 14362 }, { "epoch": 0.30065728878841164, "grad_norm": 0.24955996870994568, "learning_rate": 0.00019510267005531576, "loss": 11.6898, "step": 14363 }, { "epoch": 0.3006782215523738, "grad_norm": 0.295685350894928, "learning_rate": 0.0001951019923013769, "loss": 11.6826, "step": 14364 }, { "epoch": 0.3006991543163359, "grad_norm": 0.29115477204322815, "learning_rate": 0.0001951013145017206, "loss": 11.6748, "step": 14365 }, { "epoch": 0.30072008708029807, "grad_norm": 0.39609479904174805, "learning_rate": 0.00019510063665634722, "loss": 11.6728, "step": 14366 }, { "epoch": 0.3007410198442602, "grad_norm": 0.2699738144874573, "learning_rate": 0.00019509995876525703, "loss": 11.6777, "step": 14367 }, { "epoch": 0.3007619526082224, "grad_norm": 0.30468013882637024, "learning_rate": 0.0001950992808284504, "loss": 11.6785, "step": 14368 }, { "epoch": 0.30078288537218456, "grad_norm": 0.2205219268798828, "learning_rate": 0.00019509860284592763, "loss": 11.674, "step": 14369 }, { "epoch": 0.3008038181361467, "grad_norm": 0.2819949984550476, "learning_rate": 0.00019509792481768904, "loss": 11.6718, "step": 14370 }, { "epoch": 0.30082475090010885, "grad_norm": 0.32770970463752747, "learning_rate": 0.000195097246743735, "loss": 11.6837, "step": 14371 }, { "epoch": 0.300845683664071, "grad_norm": 0.25912031531333923, "learning_rate": 0.0001950965686240658, "loss": 11.6676, "step": 14372 }, { "epoch": 0.30086661642803314, "grad_norm": 1.0597913265228271, "learning_rate": 0.00019509589045868178, "loss": 11.5841, "step": 14373 }, { "epoch": 0.3008875491919953, "grad_norm": 0.23344609141349792, "learning_rate": 0.00019509521224758325, "loss": 11.6618, "step": 14374 }, { "epoch": 0.3009084819559575, "grad_norm": 0.3020874261856079, "learning_rate": 0.00019509453399077056, "loss": 11.6789, "step": 14375 }, { "epoch": 0.30092941471991963, "grad_norm": 0.2912105321884155, "learning_rate": 0.000195093855688244, "loss": 11.6747, "step": 14376 }, { "epoch": 0.3009503474838818, "grad_norm": 0.2668238580226898, "learning_rate": 0.00019509317734000393, "loss": 11.6765, "step": 14377 }, { "epoch": 0.3009712802478439, "grad_norm": 0.26189467310905457, "learning_rate": 0.00019509249894605066, "loss": 11.679, "step": 14378 }, { "epoch": 0.30099221301180606, "grad_norm": 0.3958357274532318, "learning_rate": 0.00019509182050638454, "loss": 11.6764, "step": 14379 }, { "epoch": 0.3010131457757682, "grad_norm": 0.3106708228588104, "learning_rate": 0.00019509114202100582, "loss": 11.6807, "step": 14380 }, { "epoch": 0.3010340785397304, "grad_norm": 0.252106249332428, "learning_rate": 0.00019509046348991494, "loss": 11.6573, "step": 14381 }, { "epoch": 0.30105501130369255, "grad_norm": 0.2843324542045593, "learning_rate": 0.00019508978491311214, "loss": 11.6816, "step": 14382 }, { "epoch": 0.3010759440676547, "grad_norm": 0.26728659868240356, "learning_rate": 0.00019508910629059779, "loss": 11.6726, "step": 14383 }, { "epoch": 0.30109687683161684, "grad_norm": 0.3436752259731293, "learning_rate": 0.0001950884276223722, "loss": 11.6827, "step": 14384 }, { "epoch": 0.301117809595579, "grad_norm": 0.3495244085788727, "learning_rate": 0.00019508774890843568, "loss": 11.6882, "step": 14385 }, { "epoch": 0.30113874235954113, "grad_norm": 0.2704688310623169, "learning_rate": 0.00019508707014878856, "loss": 11.6647, "step": 14386 }, { "epoch": 0.30115967512350333, "grad_norm": 0.2697959244251251, "learning_rate": 0.00019508639134343123, "loss": 11.6711, "step": 14387 }, { "epoch": 0.3011806078874655, "grad_norm": 0.3403039276599884, "learning_rate": 0.00019508571249236394, "loss": 11.6587, "step": 14388 }, { "epoch": 0.3012015406514276, "grad_norm": 0.21243339776992798, "learning_rate": 0.00019508503359558704, "loss": 11.6618, "step": 14389 }, { "epoch": 0.30122247341538977, "grad_norm": 0.304618239402771, "learning_rate": 0.00019508435465310085, "loss": 11.6759, "step": 14390 }, { "epoch": 0.3012434061793519, "grad_norm": 0.3238202929496765, "learning_rate": 0.0001950836756649057, "loss": 11.6753, "step": 14391 }, { "epoch": 0.30126433894331406, "grad_norm": 0.25248557329177856, "learning_rate": 0.00019508299663100197, "loss": 11.6767, "step": 14392 }, { "epoch": 0.3012852717072762, "grad_norm": 0.2813955843448639, "learning_rate": 0.0001950823175513899, "loss": 11.6855, "step": 14393 }, { "epoch": 0.3013062044712384, "grad_norm": 0.2838180959224701, "learning_rate": 0.00019508163842606985, "loss": 11.6637, "step": 14394 }, { "epoch": 0.30132713723520055, "grad_norm": 0.23463332653045654, "learning_rate": 0.00019508095925504216, "loss": 11.6788, "step": 14395 }, { "epoch": 0.3013480699991627, "grad_norm": 0.27688178420066833, "learning_rate": 0.00019508028003830717, "loss": 11.6677, "step": 14396 }, { "epoch": 0.30136900276312484, "grad_norm": 0.2322741001844406, "learning_rate": 0.00019507960077586517, "loss": 11.6734, "step": 14397 }, { "epoch": 0.301389935527087, "grad_norm": 0.2529422640800476, "learning_rate": 0.00019507892146771648, "loss": 11.6806, "step": 14398 }, { "epoch": 0.3014108682910491, "grad_norm": 0.290473610162735, "learning_rate": 0.00019507824211386146, "loss": 11.6761, "step": 14399 }, { "epoch": 0.30143180105501133, "grad_norm": 0.3056032061576843, "learning_rate": 0.0001950775627143004, "loss": 11.6743, "step": 14400 }, { "epoch": 0.3014527338189735, "grad_norm": 0.23934322595596313, "learning_rate": 0.00019507688326903368, "loss": 11.6672, "step": 14401 }, { "epoch": 0.3014736665829356, "grad_norm": 0.25871458649635315, "learning_rate": 0.00019507620377806162, "loss": 11.6805, "step": 14402 }, { "epoch": 0.30149459934689776, "grad_norm": 0.3360487222671509, "learning_rate": 0.0001950755242413845, "loss": 11.6799, "step": 14403 }, { "epoch": 0.3015155321108599, "grad_norm": 0.30723002552986145, "learning_rate": 0.00019507484465900265, "loss": 11.6699, "step": 14404 }, { "epoch": 0.30153646487482205, "grad_norm": 0.24383330345153809, "learning_rate": 0.00019507416503091642, "loss": 11.6747, "step": 14405 }, { "epoch": 0.30155739763878425, "grad_norm": 0.2151031494140625, "learning_rate": 0.00019507348535712616, "loss": 11.6653, "step": 14406 }, { "epoch": 0.3015783304027464, "grad_norm": 0.284354567527771, "learning_rate": 0.00019507280563763215, "loss": 11.6771, "step": 14407 }, { "epoch": 0.30159926316670854, "grad_norm": 0.24031101167201996, "learning_rate": 0.00019507212587243475, "loss": 11.6771, "step": 14408 }, { "epoch": 0.3016201959306707, "grad_norm": 0.2616669535636902, "learning_rate": 0.00019507144606153426, "loss": 11.6894, "step": 14409 }, { "epoch": 0.30164112869463283, "grad_norm": 0.3520675003528595, "learning_rate": 0.00019507076620493103, "loss": 11.6732, "step": 14410 }, { "epoch": 0.301662061458595, "grad_norm": 0.23271644115447998, "learning_rate": 0.00019507008630262538, "loss": 11.6694, "step": 14411 }, { "epoch": 0.3016829942225571, "grad_norm": 0.39599671959877014, "learning_rate": 0.00019506940635461762, "loss": 11.6809, "step": 14412 }, { "epoch": 0.3017039269865193, "grad_norm": 0.31173738837242126, "learning_rate": 0.00019506872636090812, "loss": 11.6748, "step": 14413 }, { "epoch": 0.30172485975048147, "grad_norm": 0.2687187194824219, "learning_rate": 0.00019506804632149716, "loss": 11.6759, "step": 14414 }, { "epoch": 0.3017457925144436, "grad_norm": 0.2971753180027008, "learning_rate": 0.00019506736623638507, "loss": 11.6715, "step": 14415 }, { "epoch": 0.30176672527840576, "grad_norm": 0.36551138758659363, "learning_rate": 0.00019506668610557218, "loss": 11.6819, "step": 14416 }, { "epoch": 0.3017876580423679, "grad_norm": 0.31665968894958496, "learning_rate": 0.00019506600592905887, "loss": 11.6699, "step": 14417 }, { "epoch": 0.30180859080633005, "grad_norm": 0.33113357424736023, "learning_rate": 0.0001950653257068454, "loss": 11.6837, "step": 14418 }, { "epoch": 0.30182952357029225, "grad_norm": 0.2666895091533661, "learning_rate": 0.00019506464543893215, "loss": 11.6751, "step": 14419 }, { "epoch": 0.3018504563342544, "grad_norm": 0.3496197760105133, "learning_rate": 0.00019506396512531938, "loss": 11.6724, "step": 14420 }, { "epoch": 0.30187138909821654, "grad_norm": 0.2867651581764221, "learning_rate": 0.0001950632847660075, "loss": 11.6823, "step": 14421 }, { "epoch": 0.3018923218621787, "grad_norm": 0.258129358291626, "learning_rate": 0.0001950626043609968, "loss": 11.6691, "step": 14422 }, { "epoch": 0.3019132546261408, "grad_norm": 0.292510449886322, "learning_rate": 0.00019506192391028756, "loss": 11.6775, "step": 14423 }, { "epoch": 0.30193418739010297, "grad_norm": 0.28816381096839905, "learning_rate": 0.00019506124341388015, "loss": 11.6776, "step": 14424 }, { "epoch": 0.3019551201540651, "grad_norm": 1.4443068504333496, "learning_rate": 0.0001950605628717749, "loss": 11.7605, "step": 14425 }, { "epoch": 0.3019760529180273, "grad_norm": 0.2584558427333832, "learning_rate": 0.00019505988228397215, "loss": 11.6564, "step": 14426 }, { "epoch": 0.30199698568198946, "grad_norm": 0.2981829345226288, "learning_rate": 0.0001950592016504722, "loss": 11.677, "step": 14427 }, { "epoch": 0.3020179184459516, "grad_norm": 0.2979842722415924, "learning_rate": 0.00019505852097127536, "loss": 11.6736, "step": 14428 }, { "epoch": 0.30203885120991375, "grad_norm": 0.26390233635902405, "learning_rate": 0.00019505784024638202, "loss": 11.673, "step": 14429 }, { "epoch": 0.3020597839738759, "grad_norm": 0.21344000101089478, "learning_rate": 0.0001950571594757925, "loss": 11.676, "step": 14430 }, { "epoch": 0.30208071673783804, "grad_norm": 0.38321396708488464, "learning_rate": 0.00019505647865950703, "loss": 11.6734, "step": 14431 }, { "epoch": 0.30210164950180024, "grad_norm": 0.30178534984588623, "learning_rate": 0.00019505579779752604, "loss": 11.6886, "step": 14432 }, { "epoch": 0.3021225822657624, "grad_norm": 0.2533649206161499, "learning_rate": 0.00019505511688984982, "loss": 11.6847, "step": 14433 }, { "epoch": 0.30214351502972453, "grad_norm": 0.2283194363117218, "learning_rate": 0.0001950544359364787, "loss": 11.6604, "step": 14434 }, { "epoch": 0.3021644477936867, "grad_norm": 0.2719120383262634, "learning_rate": 0.000195053754937413, "loss": 11.6975, "step": 14435 }, { "epoch": 0.3021853805576488, "grad_norm": 0.2758350670337677, "learning_rate": 0.00019505307389265308, "loss": 11.6762, "step": 14436 }, { "epoch": 0.30220631332161096, "grad_norm": 0.2698848843574524, "learning_rate": 0.00019505239280219922, "loss": 11.6753, "step": 14437 }, { "epoch": 0.30222724608557316, "grad_norm": 0.28055721521377563, "learning_rate": 0.00019505171166605179, "loss": 11.6802, "step": 14438 }, { "epoch": 0.3022481788495353, "grad_norm": 0.2589755356311798, "learning_rate": 0.00019505103048421106, "loss": 11.6701, "step": 14439 }, { "epoch": 0.30226911161349745, "grad_norm": 0.2494654506444931, "learning_rate": 0.00019505034925667742, "loss": 11.6775, "step": 14440 }, { "epoch": 0.3022900443774596, "grad_norm": 0.29943740367889404, "learning_rate": 0.00019504966798345117, "loss": 11.6966, "step": 14441 }, { "epoch": 0.30231097714142174, "grad_norm": 0.22879847884178162, "learning_rate": 0.00019504898666453265, "loss": 11.6598, "step": 14442 }, { "epoch": 0.3023319099053839, "grad_norm": 0.28925612568855286, "learning_rate": 0.00019504830529992216, "loss": 11.6616, "step": 14443 }, { "epoch": 0.30235284266934603, "grad_norm": 0.2846951186656952, "learning_rate": 0.00019504762388962004, "loss": 11.6756, "step": 14444 }, { "epoch": 0.30237377543330823, "grad_norm": 0.2789570093154907, "learning_rate": 0.00019504694243362662, "loss": 11.6732, "step": 14445 }, { "epoch": 0.3023947081972704, "grad_norm": 0.2817361652851105, "learning_rate": 0.00019504626093194225, "loss": 11.679, "step": 14446 }, { "epoch": 0.3024156409612325, "grad_norm": 0.29060396552085876, "learning_rate": 0.00019504557938456723, "loss": 11.6771, "step": 14447 }, { "epoch": 0.30243657372519467, "grad_norm": 0.2882673442363739, "learning_rate": 0.0001950448977915019, "loss": 11.6802, "step": 14448 }, { "epoch": 0.3024575064891568, "grad_norm": 0.26325610280036926, "learning_rate": 0.0001950442161527466, "loss": 11.6628, "step": 14449 }, { "epoch": 0.30247843925311896, "grad_norm": 0.26533976197242737, "learning_rate": 0.00019504353446830162, "loss": 11.6628, "step": 14450 }, { "epoch": 0.30249937201708116, "grad_norm": 0.3042287826538086, "learning_rate": 0.0001950428527381673, "loss": 11.6704, "step": 14451 }, { "epoch": 0.3025203047810433, "grad_norm": 0.3369668424129486, "learning_rate": 0.00019504217096234395, "loss": 11.6767, "step": 14452 }, { "epoch": 0.30254123754500545, "grad_norm": 0.29615333676338196, "learning_rate": 0.00019504148914083197, "loss": 11.6684, "step": 14453 }, { "epoch": 0.3025621703089676, "grad_norm": 0.24557137489318848, "learning_rate": 0.00019504080727363165, "loss": 11.6685, "step": 14454 }, { "epoch": 0.30258310307292974, "grad_norm": 0.2535128593444824, "learning_rate": 0.00019504012536074327, "loss": 11.6739, "step": 14455 }, { "epoch": 0.3026040358368919, "grad_norm": 0.27004504203796387, "learning_rate": 0.0001950394434021672, "loss": 11.6755, "step": 14456 }, { "epoch": 0.3026249686008541, "grad_norm": 0.27892494201660156, "learning_rate": 0.0001950387613979038, "loss": 11.6743, "step": 14457 }, { "epoch": 0.3026459013648162, "grad_norm": 0.28638145327568054, "learning_rate": 0.00019503807934795336, "loss": 11.6776, "step": 14458 }, { "epoch": 0.3026668341287784, "grad_norm": 0.23758049309253693, "learning_rate": 0.00019503739725231618, "loss": 11.6752, "step": 14459 }, { "epoch": 0.3026877668927405, "grad_norm": 0.3174208998680115, "learning_rate": 0.00019503671511099263, "loss": 11.6886, "step": 14460 }, { "epoch": 0.30270869965670266, "grad_norm": 0.26637816429138184, "learning_rate": 0.00019503603292398306, "loss": 11.669, "step": 14461 }, { "epoch": 0.3027296324206648, "grad_norm": 0.26669731736183167, "learning_rate": 0.0001950353506912877, "loss": 11.6605, "step": 14462 }, { "epoch": 0.30275056518462695, "grad_norm": 0.28518787026405334, "learning_rate": 0.000195034668412907, "loss": 11.6702, "step": 14463 }, { "epoch": 0.30277149794858915, "grad_norm": 0.33734583854675293, "learning_rate": 0.00019503398608884117, "loss": 11.6649, "step": 14464 }, { "epoch": 0.3027924307125513, "grad_norm": 0.2376081496477127, "learning_rate": 0.00019503330371909065, "loss": 11.6771, "step": 14465 }, { "epoch": 0.30281336347651344, "grad_norm": 0.4184916019439697, "learning_rate": 0.0001950326213036557, "loss": 11.6751, "step": 14466 }, { "epoch": 0.3028342962404756, "grad_norm": 0.3418104648590088, "learning_rate": 0.00019503193884253667, "loss": 11.6858, "step": 14467 }, { "epoch": 0.30285522900443773, "grad_norm": 0.30382242798805237, "learning_rate": 0.00019503125633573389, "loss": 11.67, "step": 14468 }, { "epoch": 0.3028761617683999, "grad_norm": 0.2777794599533081, "learning_rate": 0.00019503057378324768, "loss": 11.6883, "step": 14469 }, { "epoch": 0.3028970945323621, "grad_norm": 0.26819348335266113, "learning_rate": 0.00019502989118507836, "loss": 11.6696, "step": 14470 }, { "epoch": 0.3029180272963242, "grad_norm": 0.2768038809299469, "learning_rate": 0.00019502920854122624, "loss": 11.6829, "step": 14471 }, { "epoch": 0.30293896006028637, "grad_norm": 0.2810209095478058, "learning_rate": 0.0001950285258516917, "loss": 11.6684, "step": 14472 }, { "epoch": 0.3029598928242485, "grad_norm": 0.27910712361335754, "learning_rate": 0.00019502784311647503, "loss": 11.6793, "step": 14473 }, { "epoch": 0.30298082558821066, "grad_norm": 0.2541058659553528, "learning_rate": 0.0001950271603355766, "loss": 11.6858, "step": 14474 }, { "epoch": 0.3030017583521728, "grad_norm": 0.29930752515792847, "learning_rate": 0.00019502647750899667, "loss": 11.673, "step": 14475 }, { "epoch": 0.303022691116135, "grad_norm": 0.27721887826919556, "learning_rate": 0.00019502579463673566, "loss": 11.6841, "step": 14476 }, { "epoch": 0.30304362388009715, "grad_norm": 0.3889343738555908, "learning_rate": 0.0001950251117187938, "loss": 11.6528, "step": 14477 }, { "epoch": 0.3030645566440593, "grad_norm": 0.27436691522598267, "learning_rate": 0.00019502442875517147, "loss": 11.6788, "step": 14478 }, { "epoch": 0.30308548940802144, "grad_norm": 0.3492986559867859, "learning_rate": 0.000195023745745869, "loss": 11.6679, "step": 14479 }, { "epoch": 0.3031064221719836, "grad_norm": 0.2718517780303955, "learning_rate": 0.00019502306269088675, "loss": 11.6577, "step": 14480 }, { "epoch": 0.3031273549359457, "grad_norm": 0.28939288854599, "learning_rate": 0.000195022379590225, "loss": 11.6768, "step": 14481 }, { "epoch": 0.30314828769990787, "grad_norm": 0.3363107442855835, "learning_rate": 0.000195021696443884, "loss": 11.6896, "step": 14482 }, { "epoch": 0.30316922046387007, "grad_norm": 0.23154103755950928, "learning_rate": 0.00019502101325186426, "loss": 11.6819, "step": 14483 }, { "epoch": 0.3031901532278322, "grad_norm": 0.29605618119239807, "learning_rate": 0.00019502033001416596, "loss": 11.6804, "step": 14484 }, { "epoch": 0.30321108599179436, "grad_norm": 0.3341352641582489, "learning_rate": 0.00019501964673078952, "loss": 11.6683, "step": 14485 }, { "epoch": 0.3032320187557565, "grad_norm": 0.2536396086215973, "learning_rate": 0.00019501896340173524, "loss": 11.6928, "step": 14486 }, { "epoch": 0.30325295151971865, "grad_norm": 0.2909761965274811, "learning_rate": 0.0001950182800270034, "loss": 11.7016, "step": 14487 }, { "epoch": 0.3032738842836808, "grad_norm": 0.3113972842693329, "learning_rate": 0.0001950175966065944, "loss": 11.6778, "step": 14488 }, { "epoch": 0.303294817047643, "grad_norm": 0.2881608307361603, "learning_rate": 0.00019501691314050852, "loss": 11.674, "step": 14489 }, { "epoch": 0.30331574981160514, "grad_norm": 0.3217620849609375, "learning_rate": 0.00019501622962874612, "loss": 11.6925, "step": 14490 }, { "epoch": 0.3033366825755673, "grad_norm": 0.35402747988700867, "learning_rate": 0.0001950155460713075, "loss": 11.6672, "step": 14491 }, { "epoch": 0.30335761533952943, "grad_norm": 0.33927032351493835, "learning_rate": 0.000195014862468193, "loss": 11.6773, "step": 14492 }, { "epoch": 0.3033785481034916, "grad_norm": 0.21953824162483215, "learning_rate": 0.00019501417881940297, "loss": 11.6777, "step": 14493 }, { "epoch": 0.3033994808674537, "grad_norm": 0.37922096252441406, "learning_rate": 0.0001950134951249377, "loss": 11.6776, "step": 14494 }, { "epoch": 0.3034204136314159, "grad_norm": 0.2390032410621643, "learning_rate": 0.00019501281138479753, "loss": 11.6732, "step": 14495 }, { "epoch": 0.30344134639537806, "grad_norm": 0.2508355379104614, "learning_rate": 0.00019501212759898283, "loss": 11.6625, "step": 14496 }, { "epoch": 0.3034622791593402, "grad_norm": 0.30509909987449646, "learning_rate": 0.00019501144376749387, "loss": 11.6837, "step": 14497 }, { "epoch": 0.30348321192330235, "grad_norm": 0.2749782204627991, "learning_rate": 0.00019501075989033102, "loss": 11.6723, "step": 14498 }, { "epoch": 0.3035041446872645, "grad_norm": 0.2483045756816864, "learning_rate": 0.0001950100759674946, "loss": 11.6757, "step": 14499 }, { "epoch": 0.30352507745122664, "grad_norm": 0.27497413754463196, "learning_rate": 0.00019500939199898492, "loss": 11.6942, "step": 14500 }, { "epoch": 0.3035460102151888, "grad_norm": 0.27985724806785583, "learning_rate": 0.00019500870798480234, "loss": 11.6786, "step": 14501 }, { "epoch": 0.303566942979151, "grad_norm": 0.3723919689655304, "learning_rate": 0.00019500802392494713, "loss": 11.6765, "step": 14502 }, { "epoch": 0.30358787574311313, "grad_norm": 0.2205641269683838, "learning_rate": 0.0001950073398194197, "loss": 11.6767, "step": 14503 }, { "epoch": 0.3036088085070753, "grad_norm": 0.268480509519577, "learning_rate": 0.00019500665566822033, "loss": 11.6811, "step": 14504 }, { "epoch": 0.3036297412710374, "grad_norm": 0.2531074285507202, "learning_rate": 0.00019500597147134935, "loss": 11.6587, "step": 14505 }, { "epoch": 0.30365067403499957, "grad_norm": 0.21045663952827454, "learning_rate": 0.00019500528722880706, "loss": 11.6672, "step": 14506 }, { "epoch": 0.3036716067989617, "grad_norm": 0.24161913990974426, "learning_rate": 0.00019500460294059386, "loss": 11.6865, "step": 14507 }, { "epoch": 0.3036925395629239, "grad_norm": 0.24989169836044312, "learning_rate": 0.00019500391860671006, "loss": 11.6728, "step": 14508 }, { "epoch": 0.30371347232688606, "grad_norm": 0.3264669179916382, "learning_rate": 0.00019500323422715594, "loss": 11.7009, "step": 14509 }, { "epoch": 0.3037344050908482, "grad_norm": 0.25030428171157837, "learning_rate": 0.0001950025498019319, "loss": 11.6806, "step": 14510 }, { "epoch": 0.30375533785481035, "grad_norm": 0.23812812566757202, "learning_rate": 0.0001950018653310382, "loss": 11.6612, "step": 14511 }, { "epoch": 0.3037762706187725, "grad_norm": 0.2821640074253082, "learning_rate": 0.0001950011808144752, "loss": 11.6727, "step": 14512 }, { "epoch": 0.30379720338273464, "grad_norm": 0.2753972113132477, "learning_rate": 0.00019500049625224323, "loss": 11.6866, "step": 14513 }, { "epoch": 0.3038181361466968, "grad_norm": 0.32341718673706055, "learning_rate": 0.00019499981164434262, "loss": 11.6687, "step": 14514 }, { "epoch": 0.303839068910659, "grad_norm": 0.23582123219966888, "learning_rate": 0.0001949991269907737, "loss": 11.6885, "step": 14515 }, { "epoch": 0.3038600016746211, "grad_norm": 0.29434624314308167, "learning_rate": 0.00019499844229153677, "loss": 11.6722, "step": 14516 }, { "epoch": 0.3038809344385833, "grad_norm": 0.29025763273239136, "learning_rate": 0.00019499775754663223, "loss": 11.6955, "step": 14517 }, { "epoch": 0.3039018672025454, "grad_norm": 0.2618962228298187, "learning_rate": 0.00019499707275606035, "loss": 11.6542, "step": 14518 }, { "epoch": 0.30392279996650756, "grad_norm": 0.37399545311927795, "learning_rate": 0.00019499638791982147, "loss": 11.674, "step": 14519 }, { "epoch": 0.3039437327304697, "grad_norm": 0.2695391774177551, "learning_rate": 0.00019499570303791591, "loss": 11.6971, "step": 14520 }, { "epoch": 0.3039646654944319, "grad_norm": 0.25119414925575256, "learning_rate": 0.000194995018110344, "loss": 11.6838, "step": 14521 }, { "epoch": 0.30398559825839405, "grad_norm": 0.339436799287796, "learning_rate": 0.0001949943331371061, "loss": 11.6648, "step": 14522 }, { "epoch": 0.3040065310223562, "grad_norm": 0.23767152428627014, "learning_rate": 0.00019499364811820255, "loss": 11.6595, "step": 14523 }, { "epoch": 0.30402746378631834, "grad_norm": 0.26106399297714233, "learning_rate": 0.00019499296305363362, "loss": 11.6711, "step": 14524 }, { "epoch": 0.3040483965502805, "grad_norm": 0.21909809112548828, "learning_rate": 0.00019499227794339964, "loss": 11.6825, "step": 14525 }, { "epoch": 0.30406932931424263, "grad_norm": 0.2717812657356262, "learning_rate": 0.00019499159278750102, "loss": 11.6649, "step": 14526 }, { "epoch": 0.30409026207820483, "grad_norm": 0.274566650390625, "learning_rate": 0.00019499090758593802, "loss": 11.6763, "step": 14527 }, { "epoch": 0.304111194842167, "grad_norm": 0.27992013096809387, "learning_rate": 0.00019499022233871097, "loss": 11.6857, "step": 14528 }, { "epoch": 0.3041321276061291, "grad_norm": 0.3372924029827118, "learning_rate": 0.00019498953704582024, "loss": 11.6611, "step": 14529 }, { "epoch": 0.30415306037009127, "grad_norm": 0.2391049712896347, "learning_rate": 0.00019498885170726613, "loss": 11.679, "step": 14530 }, { "epoch": 0.3041739931340534, "grad_norm": 0.2315891534090042, "learning_rate": 0.00019498816632304897, "loss": 11.6743, "step": 14531 }, { "epoch": 0.30419492589801556, "grad_norm": 0.295379102230072, "learning_rate": 0.0001949874808931691, "loss": 11.6903, "step": 14532 }, { "epoch": 0.3042158586619777, "grad_norm": 0.25181272625923157, "learning_rate": 0.00019498679541762683, "loss": 11.6695, "step": 14533 }, { "epoch": 0.3042367914259399, "grad_norm": 0.26319193840026855, "learning_rate": 0.00019498610989642252, "loss": 11.6693, "step": 14534 }, { "epoch": 0.30425772418990205, "grad_norm": 0.3618348240852356, "learning_rate": 0.0001949854243295565, "loss": 11.6864, "step": 14535 }, { "epoch": 0.3042786569538642, "grad_norm": 0.26094281673431396, "learning_rate": 0.00019498473871702902, "loss": 11.691, "step": 14536 }, { "epoch": 0.30429958971782634, "grad_norm": 0.28329482674598694, "learning_rate": 0.00019498405305884054, "loss": 11.6668, "step": 14537 }, { "epoch": 0.3043205224817885, "grad_norm": 0.28363147377967834, "learning_rate": 0.0001949833673549913, "loss": 11.6825, "step": 14538 }, { "epoch": 0.3043414552457506, "grad_norm": 0.3144206404685974, "learning_rate": 0.00019498268160548162, "loss": 11.6683, "step": 14539 }, { "epoch": 0.3043623880097128, "grad_norm": 0.2865639925003052, "learning_rate": 0.0001949819958103119, "loss": 11.6607, "step": 14540 }, { "epoch": 0.30438332077367497, "grad_norm": 0.27129119634628296, "learning_rate": 0.00019498130996948242, "loss": 11.6624, "step": 14541 }, { "epoch": 0.3044042535376371, "grad_norm": 0.3005031645298004, "learning_rate": 0.00019498062408299354, "loss": 11.6754, "step": 14542 }, { "epoch": 0.30442518630159926, "grad_norm": 0.29773396253585815, "learning_rate": 0.00019497993815084552, "loss": 11.6645, "step": 14543 }, { "epoch": 0.3044461190655614, "grad_norm": 0.33037787675857544, "learning_rate": 0.0001949792521730388, "loss": 11.6704, "step": 14544 }, { "epoch": 0.30446705182952355, "grad_norm": 0.24439214169979095, "learning_rate": 0.00019497856614957362, "loss": 11.6726, "step": 14545 }, { "epoch": 0.30448798459348575, "grad_norm": 0.266495019197464, "learning_rate": 0.00019497788008045032, "loss": 11.6763, "step": 14546 }, { "epoch": 0.3045089173574479, "grad_norm": 0.24645698070526123, "learning_rate": 0.00019497719396566928, "loss": 11.6593, "step": 14547 }, { "epoch": 0.30452985012141004, "grad_norm": 0.2386535406112671, "learning_rate": 0.0001949765078052308, "loss": 11.6747, "step": 14548 }, { "epoch": 0.3045507828853722, "grad_norm": 0.25608035922050476, "learning_rate": 0.0001949758215991352, "loss": 11.6595, "step": 14549 }, { "epoch": 0.30457171564933433, "grad_norm": 0.2502225935459137, "learning_rate": 0.0001949751353473828, "loss": 11.6734, "step": 14550 }, { "epoch": 0.3045926484132965, "grad_norm": 0.3301413655281067, "learning_rate": 0.00019497444904997398, "loss": 11.6665, "step": 14551 }, { "epoch": 0.3046135811772586, "grad_norm": 0.24711520969867706, "learning_rate": 0.00019497376270690903, "loss": 11.6714, "step": 14552 }, { "epoch": 0.3046345139412208, "grad_norm": 0.2706660032272339, "learning_rate": 0.0001949730763181883, "loss": 11.6789, "step": 14553 }, { "epoch": 0.30465544670518296, "grad_norm": 0.2952232360839844, "learning_rate": 0.00019497238988381207, "loss": 11.6679, "step": 14554 }, { "epoch": 0.3046763794691451, "grad_norm": 0.22873736917972565, "learning_rate": 0.00019497170340378075, "loss": 11.6786, "step": 14555 }, { "epoch": 0.30469731223310725, "grad_norm": 0.2626960575580597, "learning_rate": 0.00019497101687809463, "loss": 11.6637, "step": 14556 }, { "epoch": 0.3047182449970694, "grad_norm": 0.26422932744026184, "learning_rate": 0.000194970330306754, "loss": 11.6781, "step": 14557 }, { "epoch": 0.30473917776103154, "grad_norm": 0.3191347122192383, "learning_rate": 0.00019496964368975926, "loss": 11.6742, "step": 14558 }, { "epoch": 0.30476011052499374, "grad_norm": 0.23574335873126984, "learning_rate": 0.0001949689570271107, "loss": 11.681, "step": 14559 }, { "epoch": 0.3047810432889559, "grad_norm": 0.3350891172885895, "learning_rate": 0.00019496827031880866, "loss": 11.6473, "step": 14560 }, { "epoch": 0.30480197605291803, "grad_norm": 0.26960980892181396, "learning_rate": 0.00019496758356485344, "loss": 11.6753, "step": 14561 }, { "epoch": 0.3048229088168802, "grad_norm": 0.2798084020614624, "learning_rate": 0.00019496689676524545, "loss": 11.6651, "step": 14562 }, { "epoch": 0.3048438415808423, "grad_norm": 0.2225833386182785, "learning_rate": 0.00019496620991998494, "loss": 11.6742, "step": 14563 }, { "epoch": 0.30486477434480447, "grad_norm": 0.2493879795074463, "learning_rate": 0.0001949655230290723, "loss": 11.6732, "step": 14564 }, { "epoch": 0.30488570710876667, "grad_norm": 0.3016563653945923, "learning_rate": 0.00019496483609250778, "loss": 11.6746, "step": 14565 }, { "epoch": 0.3049066398727288, "grad_norm": 0.24675816297531128, "learning_rate": 0.0001949641491102918, "loss": 11.6833, "step": 14566 }, { "epoch": 0.30492757263669096, "grad_norm": 0.2520492374897003, "learning_rate": 0.00019496346208242463, "loss": 11.6795, "step": 14567 }, { "epoch": 0.3049485054006531, "grad_norm": 0.23138108849525452, "learning_rate": 0.00019496277500890665, "loss": 11.6672, "step": 14568 }, { "epoch": 0.30496943816461525, "grad_norm": 0.2661174237728119, "learning_rate": 0.0001949620878897381, "loss": 11.6682, "step": 14569 }, { "epoch": 0.3049903709285774, "grad_norm": 0.2766183018684387, "learning_rate": 0.00019496140072491945, "loss": 11.6877, "step": 14570 }, { "epoch": 0.30501130369253954, "grad_norm": 0.3044975697994232, "learning_rate": 0.00019496071351445088, "loss": 11.6676, "step": 14571 }, { "epoch": 0.30503223645650174, "grad_norm": 0.2327665388584137, "learning_rate": 0.00019496002625833282, "loss": 11.6635, "step": 14572 }, { "epoch": 0.3050531692204639, "grad_norm": 0.3237205743789673, "learning_rate": 0.0001949593389565656, "loss": 11.672, "step": 14573 }, { "epoch": 0.305074101984426, "grad_norm": 0.26346075534820557, "learning_rate": 0.0001949586516091495, "loss": 11.6812, "step": 14574 }, { "epoch": 0.30509503474838817, "grad_norm": 0.2864261865615845, "learning_rate": 0.00019495796421608488, "loss": 11.6687, "step": 14575 }, { "epoch": 0.3051159675123503, "grad_norm": 0.2429570108652115, "learning_rate": 0.00019495727677737207, "loss": 11.669, "step": 14576 }, { "epoch": 0.30513690027631246, "grad_norm": 0.224540114402771, "learning_rate": 0.00019495658929301137, "loss": 11.6839, "step": 14577 }, { "epoch": 0.30515783304027466, "grad_norm": 0.29226475954055786, "learning_rate": 0.00019495590176300313, "loss": 11.6789, "step": 14578 }, { "epoch": 0.3051787658042368, "grad_norm": 0.35261818766593933, "learning_rate": 0.0001949552141873477, "loss": 11.6779, "step": 14579 }, { "epoch": 0.30519969856819895, "grad_norm": 0.2663767635822296, "learning_rate": 0.0001949545265660454, "loss": 11.6845, "step": 14580 }, { "epoch": 0.3052206313321611, "grad_norm": 0.2846803069114685, "learning_rate": 0.00019495383889909655, "loss": 11.6594, "step": 14581 }, { "epoch": 0.30524156409612324, "grad_norm": 0.2993335425853729, "learning_rate": 0.00019495315118650148, "loss": 11.6835, "step": 14582 }, { "epoch": 0.3052624968600854, "grad_norm": 0.2398126721382141, "learning_rate": 0.00019495246342826056, "loss": 11.6679, "step": 14583 }, { "epoch": 0.3052834296240476, "grad_norm": 0.23230130970478058, "learning_rate": 0.00019495177562437406, "loss": 11.6497, "step": 14584 }, { "epoch": 0.30530436238800973, "grad_norm": 0.2874374985694885, "learning_rate": 0.00019495108777484234, "loss": 11.6586, "step": 14585 }, { "epoch": 0.3053252951519719, "grad_norm": 0.2965799570083618, "learning_rate": 0.00019495039987966573, "loss": 11.6714, "step": 14586 }, { "epoch": 0.305346227915934, "grad_norm": 0.27131885290145874, "learning_rate": 0.00019494971193884455, "loss": 11.6672, "step": 14587 }, { "epoch": 0.30536716067989617, "grad_norm": 0.3309672474861145, "learning_rate": 0.00019494902395237915, "loss": 11.672, "step": 14588 }, { "epoch": 0.3053880934438583, "grad_norm": 0.2639594078063965, "learning_rate": 0.00019494833592026986, "loss": 11.6782, "step": 14589 }, { "epoch": 0.30540902620782046, "grad_norm": 0.2660384178161621, "learning_rate": 0.000194947647842517, "loss": 11.675, "step": 14590 }, { "epoch": 0.30542995897178266, "grad_norm": 0.40006139874458313, "learning_rate": 0.00019494695971912092, "loss": 11.6925, "step": 14591 }, { "epoch": 0.3054508917357448, "grad_norm": 0.3321239650249481, "learning_rate": 0.00019494627155008189, "loss": 11.6857, "step": 14592 }, { "epoch": 0.30547182449970695, "grad_norm": 0.3326137065887451, "learning_rate": 0.00019494558333540033, "loss": 11.669, "step": 14593 }, { "epoch": 0.3054927572636691, "grad_norm": 0.3064870536327362, "learning_rate": 0.00019494489507507648, "loss": 11.6777, "step": 14594 }, { "epoch": 0.30551369002763124, "grad_norm": 0.29338538646698, "learning_rate": 0.00019494420676911074, "loss": 11.684, "step": 14595 }, { "epoch": 0.3055346227915934, "grad_norm": 0.2518981993198395, "learning_rate": 0.00019494351841750342, "loss": 11.6778, "step": 14596 }, { "epoch": 0.3055555555555556, "grad_norm": 0.2816438674926758, "learning_rate": 0.00019494283002025484, "loss": 11.6727, "step": 14597 }, { "epoch": 0.3055764883195177, "grad_norm": 0.23318780958652496, "learning_rate": 0.00019494214157736534, "loss": 11.6753, "step": 14598 }, { "epoch": 0.30559742108347987, "grad_norm": 0.2761250138282776, "learning_rate": 0.00019494145308883524, "loss": 11.6835, "step": 14599 }, { "epoch": 0.305618353847442, "grad_norm": 0.26266035437583923, "learning_rate": 0.0001949407645546649, "loss": 11.6855, "step": 14600 }, { "epoch": 0.30563928661140416, "grad_norm": 0.2807371914386749, "learning_rate": 0.0001949400759748546, "loss": 11.6658, "step": 14601 }, { "epoch": 0.3056602193753663, "grad_norm": 0.3106034994125366, "learning_rate": 0.00019493938734940476, "loss": 11.6744, "step": 14602 }, { "epoch": 0.30568115213932845, "grad_norm": 0.318865031003952, "learning_rate": 0.0001949386986783156, "loss": 11.6667, "step": 14603 }, { "epoch": 0.30570208490329065, "grad_norm": 0.2915935218334198, "learning_rate": 0.00019493800996158754, "loss": 11.6764, "step": 14604 }, { "epoch": 0.3057230176672528, "grad_norm": 0.3547446131706238, "learning_rate": 0.00019493732119922087, "loss": 11.6854, "step": 14605 }, { "epoch": 0.30574395043121494, "grad_norm": 0.2913258373737335, "learning_rate": 0.00019493663239121594, "loss": 11.6792, "step": 14606 }, { "epoch": 0.3057648831951771, "grad_norm": 0.3119773864746094, "learning_rate": 0.00019493594353757303, "loss": 11.6911, "step": 14607 }, { "epoch": 0.30578581595913923, "grad_norm": 0.24087724089622498, "learning_rate": 0.00019493525463829255, "loss": 11.6727, "step": 14608 }, { "epoch": 0.3058067487231014, "grad_norm": 0.28993144631385803, "learning_rate": 0.00019493456569337475, "loss": 11.6585, "step": 14609 }, { "epoch": 0.3058276814870636, "grad_norm": 0.3122946619987488, "learning_rate": 0.00019493387670282, "loss": 11.6671, "step": 14610 }, { "epoch": 0.3058486142510257, "grad_norm": 0.31482070684432983, "learning_rate": 0.00019493318766662868, "loss": 11.6804, "step": 14611 }, { "epoch": 0.30586954701498786, "grad_norm": 0.29773402214050293, "learning_rate": 0.00019493249858480103, "loss": 11.6812, "step": 14612 }, { "epoch": 0.30589047977895, "grad_norm": 0.24895866215229034, "learning_rate": 0.00019493180945733744, "loss": 11.6891, "step": 14613 }, { "epoch": 0.30591141254291215, "grad_norm": 0.3221995234489441, "learning_rate": 0.00019493112028423824, "loss": 11.6741, "step": 14614 }, { "epoch": 0.3059323453068743, "grad_norm": 0.24641506373882294, "learning_rate": 0.00019493043106550375, "loss": 11.6707, "step": 14615 }, { "epoch": 0.3059532780708365, "grad_norm": 0.28322935104370117, "learning_rate": 0.00019492974180113426, "loss": 11.6662, "step": 14616 }, { "epoch": 0.30597421083479864, "grad_norm": 0.2808358669281006, "learning_rate": 0.0001949290524911302, "loss": 11.6718, "step": 14617 }, { "epoch": 0.3059951435987608, "grad_norm": 0.2324131727218628, "learning_rate": 0.00019492836313549178, "loss": 11.6798, "step": 14618 }, { "epoch": 0.30601607636272293, "grad_norm": 0.30352431535720825, "learning_rate": 0.00019492767373421944, "loss": 11.6847, "step": 14619 }, { "epoch": 0.3060370091266851, "grad_norm": 0.6432470083236694, "learning_rate": 0.00019492698428731344, "loss": 11.609, "step": 14620 }, { "epoch": 0.3060579418906472, "grad_norm": 0.29725393652915955, "learning_rate": 0.00019492629479477415, "loss": 11.6719, "step": 14621 }, { "epoch": 0.30607887465460937, "grad_norm": 0.25125470757484436, "learning_rate": 0.00019492560525660187, "loss": 11.6789, "step": 14622 }, { "epoch": 0.30609980741857157, "grad_norm": 0.25104159116744995, "learning_rate": 0.000194924915672797, "loss": 11.6831, "step": 14623 }, { "epoch": 0.3061207401825337, "grad_norm": 0.3061666786670685, "learning_rate": 0.00019492422604335975, "loss": 11.6684, "step": 14624 }, { "epoch": 0.30614167294649586, "grad_norm": 0.32533299922943115, "learning_rate": 0.00019492353636829055, "loss": 11.6767, "step": 14625 }, { "epoch": 0.306162605710458, "grad_norm": 0.34731608629226685, "learning_rate": 0.0001949228466475897, "loss": 11.6676, "step": 14626 }, { "epoch": 0.30618353847442015, "grad_norm": 0.4490818679332733, "learning_rate": 0.00019492215688125754, "loss": 11.69, "step": 14627 }, { "epoch": 0.3062044712383823, "grad_norm": 0.23187106847763062, "learning_rate": 0.00019492146706929443, "loss": 11.6712, "step": 14628 }, { "epoch": 0.3062254040023445, "grad_norm": 0.2757343053817749, "learning_rate": 0.00019492077721170063, "loss": 11.6833, "step": 14629 }, { "epoch": 0.30624633676630664, "grad_norm": 0.2703264057636261, "learning_rate": 0.00019492008730847654, "loss": 11.6732, "step": 14630 }, { "epoch": 0.3062672695302688, "grad_norm": 0.34570324420928955, "learning_rate": 0.00019491939735962241, "loss": 11.6656, "step": 14631 }, { "epoch": 0.3062882022942309, "grad_norm": 0.23966771364212036, "learning_rate": 0.00019491870736513867, "loss": 11.6671, "step": 14632 }, { "epoch": 0.30630913505819307, "grad_norm": 0.2911324203014374, "learning_rate": 0.00019491801732502556, "loss": 11.659, "step": 14633 }, { "epoch": 0.3063300678221552, "grad_norm": 0.3063318431377411, "learning_rate": 0.0001949173272392835, "loss": 11.6835, "step": 14634 }, { "epoch": 0.3063510005861174, "grad_norm": 0.2518276870250702, "learning_rate": 0.00019491663710791276, "loss": 11.6804, "step": 14635 }, { "epoch": 0.30637193335007956, "grad_norm": 0.2934921085834503, "learning_rate": 0.0001949159469309137, "loss": 11.6718, "step": 14636 }, { "epoch": 0.3063928661140417, "grad_norm": 0.2938697636127472, "learning_rate": 0.00019491525670828664, "loss": 11.6684, "step": 14637 }, { "epoch": 0.30641379887800385, "grad_norm": 0.39624133706092834, "learning_rate": 0.0001949145664400319, "loss": 11.6634, "step": 14638 }, { "epoch": 0.306434731641966, "grad_norm": 0.2737155854701996, "learning_rate": 0.00019491387612614983, "loss": 11.6909, "step": 14639 }, { "epoch": 0.30645566440592814, "grad_norm": 0.2831268608570099, "learning_rate": 0.00019491318576664077, "loss": 11.673, "step": 14640 }, { "epoch": 0.3064765971698903, "grad_norm": 0.2644246518611908, "learning_rate": 0.00019491249536150504, "loss": 11.6788, "step": 14641 }, { "epoch": 0.3064975299338525, "grad_norm": 0.3332579433917999, "learning_rate": 0.00019491180491074296, "loss": 11.6821, "step": 14642 }, { "epoch": 0.30651846269781463, "grad_norm": 0.2759631872177124, "learning_rate": 0.0001949111144143549, "loss": 11.6617, "step": 14643 }, { "epoch": 0.3065393954617768, "grad_norm": 0.2637336254119873, "learning_rate": 0.00019491042387234116, "loss": 11.6835, "step": 14644 }, { "epoch": 0.3065603282257389, "grad_norm": 0.26067355275154114, "learning_rate": 0.00019490973328470203, "loss": 11.6876, "step": 14645 }, { "epoch": 0.30658126098970107, "grad_norm": 0.28389450907707214, "learning_rate": 0.00019490904265143794, "loss": 11.6665, "step": 14646 }, { "epoch": 0.3066021937536632, "grad_norm": 0.4407258927822113, "learning_rate": 0.00019490835197254915, "loss": 11.6785, "step": 14647 }, { "epoch": 0.3066231265176254, "grad_norm": 0.3541334569454193, "learning_rate": 0.000194907661248036, "loss": 11.6856, "step": 14648 }, { "epoch": 0.30664405928158756, "grad_norm": 0.29882219433784485, "learning_rate": 0.00019490697047789884, "loss": 11.6824, "step": 14649 }, { "epoch": 0.3066649920455497, "grad_norm": 0.3207789361476898, "learning_rate": 0.00019490627966213802, "loss": 11.6807, "step": 14650 }, { "epoch": 0.30668592480951185, "grad_norm": 0.23970957100391388, "learning_rate": 0.00019490558880075386, "loss": 11.6659, "step": 14651 }, { "epoch": 0.306706857573474, "grad_norm": 0.2441159337759018, "learning_rate": 0.00019490489789374663, "loss": 11.6892, "step": 14652 }, { "epoch": 0.30672779033743613, "grad_norm": 0.2913559377193451, "learning_rate": 0.00019490420694111675, "loss": 11.6648, "step": 14653 }, { "epoch": 0.30674872310139834, "grad_norm": 0.30441614985466003, "learning_rate": 0.0001949035159428645, "loss": 11.6602, "step": 14654 }, { "epoch": 0.3067696558653605, "grad_norm": 0.2974947690963745, "learning_rate": 0.00019490282489899023, "loss": 11.6835, "step": 14655 }, { "epoch": 0.3067905886293226, "grad_norm": 0.3263629078865051, "learning_rate": 0.0001949021338094943, "loss": 11.6689, "step": 14656 }, { "epoch": 0.30681152139328477, "grad_norm": 0.2655569314956665, "learning_rate": 0.00019490144267437698, "loss": 11.6673, "step": 14657 }, { "epoch": 0.3068324541572469, "grad_norm": 0.3011240065097809, "learning_rate": 0.00019490075149363863, "loss": 11.6658, "step": 14658 }, { "epoch": 0.30685338692120906, "grad_norm": 0.32687219977378845, "learning_rate": 0.0001949000602672796, "loss": 11.6789, "step": 14659 }, { "epoch": 0.3068743196851712, "grad_norm": 0.3650446832180023, "learning_rate": 0.00019489936899530022, "loss": 11.6735, "step": 14660 }, { "epoch": 0.3068952524491334, "grad_norm": 0.3801429569721222, "learning_rate": 0.0001948986776777008, "loss": 11.6714, "step": 14661 }, { "epoch": 0.30691618521309555, "grad_norm": 0.3262176513671875, "learning_rate": 0.0001948979863144817, "loss": 11.6637, "step": 14662 }, { "epoch": 0.3069371179770577, "grad_norm": 0.2533968985080719, "learning_rate": 0.0001948972949056432, "loss": 11.67, "step": 14663 }, { "epoch": 0.30695805074101984, "grad_norm": 0.29713135957717896, "learning_rate": 0.0001948966034511857, "loss": 11.6716, "step": 14664 }, { "epoch": 0.306978983504982, "grad_norm": 0.27264919877052307, "learning_rate": 0.0001948959119511095, "loss": 11.6661, "step": 14665 }, { "epoch": 0.30699991626894413, "grad_norm": 0.3987010717391968, "learning_rate": 0.0001948952204054149, "loss": 11.68, "step": 14666 }, { "epoch": 0.30702084903290633, "grad_norm": 0.2551250457763672, "learning_rate": 0.0001948945288141023, "loss": 11.6829, "step": 14667 }, { "epoch": 0.3070417817968685, "grad_norm": 0.3063429892063141, "learning_rate": 0.00019489383717717198, "loss": 11.6635, "step": 14668 }, { "epoch": 0.3070627145608306, "grad_norm": 0.33145272731781006, "learning_rate": 0.00019489314549462432, "loss": 11.6644, "step": 14669 }, { "epoch": 0.30708364732479276, "grad_norm": 0.26504701375961304, "learning_rate": 0.0001948924537664596, "loss": 11.6825, "step": 14670 }, { "epoch": 0.3071045800887549, "grad_norm": 0.47676005959510803, "learning_rate": 0.00019489176199267817, "loss": 11.6789, "step": 14671 }, { "epoch": 0.30712551285271705, "grad_norm": 0.30770888924598694, "learning_rate": 0.00019489107017328037, "loss": 11.6751, "step": 14672 }, { "epoch": 0.30714644561667925, "grad_norm": 0.31990474462509155, "learning_rate": 0.00019489037830826655, "loss": 11.6848, "step": 14673 }, { "epoch": 0.3071673783806414, "grad_norm": 0.25296902656555176, "learning_rate": 0.000194889686397637, "loss": 11.6794, "step": 14674 }, { "epoch": 0.30718831114460354, "grad_norm": 0.32944291830062866, "learning_rate": 0.00019488899444139207, "loss": 11.6881, "step": 14675 }, { "epoch": 0.3072092439085657, "grad_norm": 0.28293919563293457, "learning_rate": 0.00019488830243953213, "loss": 11.6644, "step": 14676 }, { "epoch": 0.30723017667252783, "grad_norm": 0.2501629889011383, "learning_rate": 0.00019488761039205746, "loss": 11.6792, "step": 14677 }, { "epoch": 0.30725110943649, "grad_norm": 0.26290765404701233, "learning_rate": 0.00019488691829896843, "loss": 11.6583, "step": 14678 }, { "epoch": 0.3072720422004521, "grad_norm": 0.29543933272361755, "learning_rate": 0.00019488622616026534, "loss": 11.6426, "step": 14679 }, { "epoch": 0.3072929749644143, "grad_norm": 0.28286969661712646, "learning_rate": 0.00019488553397594856, "loss": 11.6658, "step": 14680 }, { "epoch": 0.30731390772837647, "grad_norm": 0.2368829846382141, "learning_rate": 0.0001948848417460184, "loss": 11.6481, "step": 14681 }, { "epoch": 0.3073348404923386, "grad_norm": 0.2814379036426544, "learning_rate": 0.00019488414947047518, "loss": 11.6716, "step": 14682 }, { "epoch": 0.30735577325630076, "grad_norm": 0.3080003559589386, "learning_rate": 0.00019488345714931927, "loss": 11.6822, "step": 14683 }, { "epoch": 0.3073767060202629, "grad_norm": 0.2653907239437103, "learning_rate": 0.00019488276478255094, "loss": 11.6714, "step": 14684 }, { "epoch": 0.30739763878422505, "grad_norm": 0.2340192049741745, "learning_rate": 0.00019488207237017057, "loss": 11.66, "step": 14685 }, { "epoch": 0.30741857154818725, "grad_norm": 0.28434571623802185, "learning_rate": 0.00019488137991217853, "loss": 11.6642, "step": 14686 }, { "epoch": 0.3074395043121494, "grad_norm": 0.31543079018592834, "learning_rate": 0.00019488068740857508, "loss": 11.6798, "step": 14687 }, { "epoch": 0.30746043707611154, "grad_norm": 0.2563920021057129, "learning_rate": 0.00019487999485936057, "loss": 11.6845, "step": 14688 }, { "epoch": 0.3074813698400737, "grad_norm": 0.2932000160217285, "learning_rate": 0.00019487930226453536, "loss": 11.6802, "step": 14689 }, { "epoch": 0.3075023026040358, "grad_norm": 0.33440351486206055, "learning_rate": 0.0001948786096240998, "loss": 11.6819, "step": 14690 }, { "epoch": 0.30752323536799797, "grad_norm": 0.40660107135772705, "learning_rate": 0.00019487791693805412, "loss": 11.6886, "step": 14691 }, { "epoch": 0.30754416813196017, "grad_norm": 0.24521507322788239, "learning_rate": 0.00019487722420639877, "loss": 11.6764, "step": 14692 }, { "epoch": 0.3075651008959223, "grad_norm": 0.3380935788154602, "learning_rate": 0.00019487653142913404, "loss": 11.6867, "step": 14693 }, { "epoch": 0.30758603365988446, "grad_norm": 0.31239598989486694, "learning_rate": 0.00019487583860626026, "loss": 11.6851, "step": 14694 }, { "epoch": 0.3076069664238466, "grad_norm": 0.32012060284614563, "learning_rate": 0.00019487514573777776, "loss": 11.6879, "step": 14695 }, { "epoch": 0.30762789918780875, "grad_norm": 0.31700417399406433, "learning_rate": 0.00019487445282368685, "loss": 11.6889, "step": 14696 }, { "epoch": 0.3076488319517709, "grad_norm": 0.28537067770957947, "learning_rate": 0.0001948737598639879, "loss": 11.6665, "step": 14697 }, { "epoch": 0.30766976471573304, "grad_norm": 0.25676774978637695, "learning_rate": 0.00019487306685868125, "loss": 11.6685, "step": 14698 }, { "epoch": 0.30769069747969524, "grad_norm": 0.258202463388443, "learning_rate": 0.00019487237380776722, "loss": 11.6708, "step": 14699 }, { "epoch": 0.3077116302436574, "grad_norm": 0.268070787191391, "learning_rate": 0.0001948716807112461, "loss": 11.6571, "step": 14700 }, { "epoch": 0.30773256300761953, "grad_norm": 0.2840063273906708, "learning_rate": 0.0001948709875691183, "loss": 11.6864, "step": 14701 }, { "epoch": 0.3077534957715817, "grad_norm": 0.22761766612529755, "learning_rate": 0.00019487029438138408, "loss": 11.6738, "step": 14702 }, { "epoch": 0.3077744285355438, "grad_norm": 0.2732684910297394, "learning_rate": 0.00019486960114804385, "loss": 11.6877, "step": 14703 }, { "epoch": 0.30779536129950597, "grad_norm": 0.2253868132829666, "learning_rate": 0.00019486890786909788, "loss": 11.6804, "step": 14704 }, { "epoch": 0.30781629406346817, "grad_norm": 0.23151375353336334, "learning_rate": 0.00019486821454454652, "loss": 11.6744, "step": 14705 }, { "epoch": 0.3078372268274303, "grad_norm": 0.27941399812698364, "learning_rate": 0.00019486752117439012, "loss": 11.6774, "step": 14706 }, { "epoch": 0.30785815959139246, "grad_norm": 0.21670864522457123, "learning_rate": 0.000194866827758629, "loss": 11.6707, "step": 14707 }, { "epoch": 0.3078790923553546, "grad_norm": 0.7185338735580444, "learning_rate": 0.00019486613429726347, "loss": 11.6499, "step": 14708 }, { "epoch": 0.30790002511931674, "grad_norm": 0.27382975816726685, "learning_rate": 0.00019486544079029393, "loss": 11.6864, "step": 14709 }, { "epoch": 0.3079209578832789, "grad_norm": 0.30920329689979553, "learning_rate": 0.00019486474723772067, "loss": 11.6769, "step": 14710 }, { "epoch": 0.30794189064724103, "grad_norm": 0.2895990014076233, "learning_rate": 0.00019486405363954397, "loss": 11.6816, "step": 14711 }, { "epoch": 0.30796282341120323, "grad_norm": 0.22659744322299957, "learning_rate": 0.00019486335999576427, "loss": 11.677, "step": 14712 }, { "epoch": 0.3079837561751654, "grad_norm": 0.24175703525543213, "learning_rate": 0.0001948626663063818, "loss": 11.6906, "step": 14713 }, { "epoch": 0.3080046889391275, "grad_norm": 0.2727223038673401, "learning_rate": 0.000194861972571397, "loss": 11.6724, "step": 14714 }, { "epoch": 0.30802562170308967, "grad_norm": 0.33167192339897156, "learning_rate": 0.00019486127879081013, "loss": 11.6837, "step": 14715 }, { "epoch": 0.3080465544670518, "grad_norm": 0.3082716166973114, "learning_rate": 0.00019486058496462154, "loss": 11.6757, "step": 14716 }, { "epoch": 0.30806748723101396, "grad_norm": 0.32888758182525635, "learning_rate": 0.00019485989109283155, "loss": 11.6818, "step": 14717 }, { "epoch": 0.30808841999497616, "grad_norm": 0.20771414041519165, "learning_rate": 0.00019485919717544055, "loss": 11.6858, "step": 14718 }, { "epoch": 0.3081093527589383, "grad_norm": 0.24417398869991302, "learning_rate": 0.0001948585032124488, "loss": 11.6688, "step": 14719 }, { "epoch": 0.30813028552290045, "grad_norm": 0.2736372947692871, "learning_rate": 0.00019485780920385667, "loss": 11.6674, "step": 14720 }, { "epoch": 0.3081512182868626, "grad_norm": 0.2343885749578476, "learning_rate": 0.00019485711514966448, "loss": 11.6779, "step": 14721 }, { "epoch": 0.30817215105082474, "grad_norm": 0.37023499608039856, "learning_rate": 0.00019485642104987263, "loss": 11.6889, "step": 14722 }, { "epoch": 0.3081930838147869, "grad_norm": 0.23833006620407104, "learning_rate": 0.00019485572690448134, "loss": 11.6601, "step": 14723 }, { "epoch": 0.3082140165787491, "grad_norm": 0.25690707564353943, "learning_rate": 0.000194855032713491, "loss": 11.6616, "step": 14724 }, { "epoch": 0.30823494934271123, "grad_norm": 0.27082398533821106, "learning_rate": 0.00019485433847690197, "loss": 11.6849, "step": 14725 }, { "epoch": 0.3082558821066734, "grad_norm": 0.3487207889556885, "learning_rate": 0.00019485364419471454, "loss": 11.6625, "step": 14726 }, { "epoch": 0.3082768148706355, "grad_norm": 0.33994248509407043, "learning_rate": 0.00019485294986692911, "loss": 11.6757, "step": 14727 }, { "epoch": 0.30829774763459766, "grad_norm": 0.3058052957057953, "learning_rate": 0.0001948522554935459, "loss": 11.6653, "step": 14728 }, { "epoch": 0.3083186803985598, "grad_norm": 0.2531087398529053, "learning_rate": 0.00019485156107456537, "loss": 11.6884, "step": 14729 }, { "epoch": 0.30833961316252195, "grad_norm": 0.2867588698863983, "learning_rate": 0.00019485086660998776, "loss": 11.673, "step": 14730 }, { "epoch": 0.30836054592648415, "grad_norm": 0.22319568693637848, "learning_rate": 0.00019485017209981345, "loss": 11.67, "step": 14731 }, { "epoch": 0.3083814786904463, "grad_norm": 0.3030359148979187, "learning_rate": 0.00019484947754404273, "loss": 11.6783, "step": 14732 }, { "epoch": 0.30840241145440844, "grad_norm": 0.2691345810890198, "learning_rate": 0.00019484878294267599, "loss": 11.68, "step": 14733 }, { "epoch": 0.3084233442183706, "grad_norm": 0.24011676013469696, "learning_rate": 0.00019484808829571356, "loss": 11.6634, "step": 14734 }, { "epoch": 0.30844427698233273, "grad_norm": 0.2579514980316162, "learning_rate": 0.0001948473936031557, "loss": 11.672, "step": 14735 }, { "epoch": 0.3084652097462949, "grad_norm": 0.3040193021297455, "learning_rate": 0.00019484669886500285, "loss": 11.6655, "step": 14736 }, { "epoch": 0.3084861425102571, "grad_norm": 0.33624181151390076, "learning_rate": 0.00019484600408125523, "loss": 11.6806, "step": 14737 }, { "epoch": 0.3085070752742192, "grad_norm": 0.28600627183914185, "learning_rate": 0.0001948453092519133, "loss": 11.6759, "step": 14738 }, { "epoch": 0.30852800803818137, "grad_norm": 0.2561083436012268, "learning_rate": 0.0001948446143769773, "loss": 11.6666, "step": 14739 }, { "epoch": 0.3085489408021435, "grad_norm": 0.23624077439308167, "learning_rate": 0.0001948439194564476, "loss": 11.6722, "step": 14740 }, { "epoch": 0.30856987356610566, "grad_norm": 0.23531149327754974, "learning_rate": 0.00019484322449032451, "loss": 11.6617, "step": 14741 }, { "epoch": 0.3085908063300678, "grad_norm": 0.30663740634918213, "learning_rate": 0.0001948425294786084, "loss": 11.6579, "step": 14742 }, { "epoch": 0.30861173909403, "grad_norm": 0.44513005018234253, "learning_rate": 0.00019484183442129957, "loss": 11.6881, "step": 14743 }, { "epoch": 0.30863267185799215, "grad_norm": 0.24582891166210175, "learning_rate": 0.00019484113931839838, "loss": 11.6754, "step": 14744 }, { "epoch": 0.3086536046219543, "grad_norm": 0.26019448041915894, "learning_rate": 0.00019484044416990517, "loss": 11.6755, "step": 14745 }, { "epoch": 0.30867453738591644, "grad_norm": 0.2660120725631714, "learning_rate": 0.00019483974897582026, "loss": 11.6848, "step": 14746 }, { "epoch": 0.3086954701498786, "grad_norm": 0.2617216110229492, "learning_rate": 0.00019483905373614393, "loss": 11.6745, "step": 14747 }, { "epoch": 0.3087164029138407, "grad_norm": 0.26920294761657715, "learning_rate": 0.0001948383584508766, "loss": 11.6743, "step": 14748 }, { "epoch": 0.30873733567780287, "grad_norm": 0.3785596489906311, "learning_rate": 0.00019483766312001856, "loss": 11.6656, "step": 14749 }, { "epoch": 0.30875826844176507, "grad_norm": 0.2819083631038666, "learning_rate": 0.0001948369677435702, "loss": 11.6739, "step": 14750 }, { "epoch": 0.3087792012057272, "grad_norm": 0.2997968792915344, "learning_rate": 0.00019483627232153174, "loss": 11.6639, "step": 14751 }, { "epoch": 0.30880013396968936, "grad_norm": 0.24258187413215637, "learning_rate": 0.00019483557685390362, "loss": 11.6813, "step": 14752 }, { "epoch": 0.3088210667336515, "grad_norm": 0.2412858009338379, "learning_rate": 0.00019483488134068615, "loss": 11.6778, "step": 14753 }, { "epoch": 0.30884199949761365, "grad_norm": 0.2698395252227783, "learning_rate": 0.0001948341857818796, "loss": 11.6712, "step": 14754 }, { "epoch": 0.3088629322615758, "grad_norm": 0.28840935230255127, "learning_rate": 0.00019483349017748443, "loss": 11.6616, "step": 14755 }, { "epoch": 0.308883865025538, "grad_norm": 0.2668178677558899, "learning_rate": 0.00019483279452750085, "loss": 11.6867, "step": 14756 }, { "epoch": 0.30890479778950014, "grad_norm": 0.26628702878952026, "learning_rate": 0.00019483209883192925, "loss": 11.6823, "step": 14757 }, { "epoch": 0.3089257305534623, "grad_norm": 0.2653065621852875, "learning_rate": 0.00019483140309076998, "loss": 11.6826, "step": 14758 }, { "epoch": 0.30894666331742443, "grad_norm": 0.33309921622276306, "learning_rate": 0.00019483070730402335, "loss": 11.6803, "step": 14759 }, { "epoch": 0.3089675960813866, "grad_norm": 0.34470075368881226, "learning_rate": 0.00019483001147168968, "loss": 11.6808, "step": 14760 }, { "epoch": 0.3089885288453487, "grad_norm": 0.33875536918640137, "learning_rate": 0.00019482931559376937, "loss": 11.6734, "step": 14761 }, { "epoch": 0.3090094616093109, "grad_norm": 0.26266682147979736, "learning_rate": 0.00019482861967026264, "loss": 11.6554, "step": 14762 }, { "epoch": 0.30903039437327307, "grad_norm": 0.22357775270938873, "learning_rate": 0.00019482792370116992, "loss": 11.663, "step": 14763 }, { "epoch": 0.3090513271372352, "grad_norm": 0.26604512333869934, "learning_rate": 0.00019482722768649152, "loss": 11.677, "step": 14764 }, { "epoch": 0.30907225990119735, "grad_norm": 0.2444392889738083, "learning_rate": 0.0001948265316262278, "loss": 11.6737, "step": 14765 }, { "epoch": 0.3090931926651595, "grad_norm": 0.3266863524913788, "learning_rate": 0.00019482583552037902, "loss": 11.6712, "step": 14766 }, { "epoch": 0.30911412542912164, "grad_norm": 0.29965701699256897, "learning_rate": 0.00019482513936894556, "loss": 11.6841, "step": 14767 }, { "epoch": 0.3091350581930838, "grad_norm": 0.26964452862739563, "learning_rate": 0.0001948244431719278, "loss": 11.6757, "step": 14768 }, { "epoch": 0.309155990957046, "grad_norm": 0.39292478561401367, "learning_rate": 0.000194823746929326, "loss": 11.6756, "step": 14769 }, { "epoch": 0.30917692372100813, "grad_norm": 0.2302129566669464, "learning_rate": 0.00019482305064114053, "loss": 11.6636, "step": 14770 }, { "epoch": 0.3091978564849703, "grad_norm": 0.2534029483795166, "learning_rate": 0.00019482235430737173, "loss": 11.6656, "step": 14771 }, { "epoch": 0.3092187892489324, "grad_norm": 0.3839346766471863, "learning_rate": 0.0001948216579280199, "loss": 11.6674, "step": 14772 }, { "epoch": 0.30923972201289457, "grad_norm": 0.2546077072620392, "learning_rate": 0.0001948209615030854, "loss": 11.6829, "step": 14773 }, { "epoch": 0.3092606547768567, "grad_norm": 0.27700045704841614, "learning_rate": 0.00019482026503256857, "loss": 11.6842, "step": 14774 }, { "epoch": 0.3092815875408189, "grad_norm": 0.2697990834712982, "learning_rate": 0.00019481956851646976, "loss": 11.6573, "step": 14775 }, { "epoch": 0.30930252030478106, "grad_norm": 0.27371490001678467, "learning_rate": 0.00019481887195478925, "loss": 11.675, "step": 14776 }, { "epoch": 0.3093234530687432, "grad_norm": 0.28104233741760254, "learning_rate": 0.0001948181753475274, "loss": 11.6755, "step": 14777 }, { "epoch": 0.30934438583270535, "grad_norm": 0.29366135597229004, "learning_rate": 0.0001948174786946846, "loss": 11.6693, "step": 14778 }, { "epoch": 0.3093653185966675, "grad_norm": 0.2679881155490875, "learning_rate": 0.00019481678199626112, "loss": 11.6744, "step": 14779 }, { "epoch": 0.30938625136062964, "grad_norm": 0.31862500309944153, "learning_rate": 0.00019481608525225732, "loss": 11.6782, "step": 14780 }, { "epoch": 0.30940718412459184, "grad_norm": 0.24516230821609497, "learning_rate": 0.0001948153884626735, "loss": 11.6734, "step": 14781 }, { "epoch": 0.309428116888554, "grad_norm": 0.22425667941570282, "learning_rate": 0.00019481469162751003, "loss": 11.6636, "step": 14782 }, { "epoch": 0.30944904965251613, "grad_norm": 0.3384539484977722, "learning_rate": 0.00019481399474676725, "loss": 11.6729, "step": 14783 }, { "epoch": 0.3094699824164783, "grad_norm": 0.2842777371406555, "learning_rate": 0.00019481329782044548, "loss": 11.6938, "step": 14784 }, { "epoch": 0.3094909151804404, "grad_norm": 0.27078503370285034, "learning_rate": 0.00019481260084854504, "loss": 11.6776, "step": 14785 }, { "epoch": 0.30951184794440256, "grad_norm": 0.2450903207063675, "learning_rate": 0.0001948119038310663, "loss": 11.6798, "step": 14786 }, { "epoch": 0.3095327807083647, "grad_norm": 0.28761714696884155, "learning_rate": 0.0001948112067680096, "loss": 11.6611, "step": 14787 }, { "epoch": 0.3095537134723269, "grad_norm": 0.24955374002456665, "learning_rate": 0.0001948105096593752, "loss": 11.6715, "step": 14788 }, { "epoch": 0.30957464623628905, "grad_norm": 0.297866553068161, "learning_rate": 0.00019480981250516352, "loss": 11.6606, "step": 14789 }, { "epoch": 0.3095955790002512, "grad_norm": 0.28271225094795227, "learning_rate": 0.00019480911530537487, "loss": 11.6735, "step": 14790 }, { "epoch": 0.30961651176421334, "grad_norm": 0.30115240812301636, "learning_rate": 0.00019480841806000955, "loss": 11.6794, "step": 14791 }, { "epoch": 0.3096374445281755, "grad_norm": 0.27411019802093506, "learning_rate": 0.00019480772076906796, "loss": 11.6817, "step": 14792 }, { "epoch": 0.30965837729213763, "grad_norm": 0.2398994266986847, "learning_rate": 0.0001948070234325504, "loss": 11.679, "step": 14793 }, { "epoch": 0.30967931005609983, "grad_norm": 0.31262725591659546, "learning_rate": 0.00019480632605045717, "loss": 11.6797, "step": 14794 }, { "epoch": 0.309700242820062, "grad_norm": 0.3185744285583496, "learning_rate": 0.00019480562862278866, "loss": 11.6815, "step": 14795 }, { "epoch": 0.3097211755840241, "grad_norm": 0.2684844136238098, "learning_rate": 0.00019480493114954516, "loss": 11.6711, "step": 14796 }, { "epoch": 0.30974210834798627, "grad_norm": 0.279479056596756, "learning_rate": 0.00019480423363072707, "loss": 11.6763, "step": 14797 }, { "epoch": 0.3097630411119484, "grad_norm": 0.2676374614238739, "learning_rate": 0.00019480353606633464, "loss": 11.664, "step": 14798 }, { "epoch": 0.30978397387591056, "grad_norm": 0.2954254448413849, "learning_rate": 0.00019480283845636827, "loss": 11.6642, "step": 14799 }, { "epoch": 0.3098049066398727, "grad_norm": 0.295108824968338, "learning_rate": 0.00019480214080082827, "loss": 11.6756, "step": 14800 }, { "epoch": 0.3098258394038349, "grad_norm": 0.20167605578899384, "learning_rate": 0.000194801443099715, "loss": 11.6821, "step": 14801 }, { "epoch": 0.30984677216779705, "grad_norm": 0.28081685304641724, "learning_rate": 0.00019480074535302877, "loss": 11.6632, "step": 14802 }, { "epoch": 0.3098677049317592, "grad_norm": 0.369394451379776, "learning_rate": 0.0001948000475607699, "loss": 11.6926, "step": 14803 }, { "epoch": 0.30988863769572134, "grad_norm": 0.27804329991340637, "learning_rate": 0.0001947993497229388, "loss": 11.6628, "step": 14804 }, { "epoch": 0.3099095704596835, "grad_norm": 0.24055667221546173, "learning_rate": 0.0001947986518395357, "loss": 11.6819, "step": 14805 }, { "epoch": 0.3099305032236456, "grad_norm": 0.3103674650192261, "learning_rate": 0.00019479795391056103, "loss": 11.6704, "step": 14806 }, { "epoch": 0.3099514359876078, "grad_norm": 0.3250870406627655, "learning_rate": 0.00019479725593601503, "loss": 11.6828, "step": 14807 }, { "epoch": 0.30997236875156997, "grad_norm": 0.318633496761322, "learning_rate": 0.00019479655791589812, "loss": 11.662, "step": 14808 }, { "epoch": 0.3099933015155321, "grad_norm": 0.2734096050262451, "learning_rate": 0.0001947958598502106, "loss": 11.6786, "step": 14809 }, { "epoch": 0.31001423427949426, "grad_norm": 0.28562769293785095, "learning_rate": 0.00019479516173895282, "loss": 11.6818, "step": 14810 }, { "epoch": 0.3100351670434564, "grad_norm": 0.28387612104415894, "learning_rate": 0.00019479446358212509, "loss": 11.6668, "step": 14811 }, { "epoch": 0.31005609980741855, "grad_norm": 0.34866002202033997, "learning_rate": 0.00019479376537972777, "loss": 11.6573, "step": 14812 }, { "epoch": 0.31007703257138075, "grad_norm": 0.246104896068573, "learning_rate": 0.00019479306713176119, "loss": 11.6634, "step": 14813 }, { "epoch": 0.3100979653353429, "grad_norm": 0.22782056033611298, "learning_rate": 0.0001947923688382257, "loss": 11.6752, "step": 14814 }, { "epoch": 0.31011889809930504, "grad_norm": 0.3336051404476166, "learning_rate": 0.0001947916704991216, "loss": 11.6847, "step": 14815 }, { "epoch": 0.3101398308632672, "grad_norm": 0.2657643258571625, "learning_rate": 0.00019479097211444924, "loss": 11.6629, "step": 14816 }, { "epoch": 0.31016076362722933, "grad_norm": 0.26214343309402466, "learning_rate": 0.00019479027368420897, "loss": 11.6821, "step": 14817 }, { "epoch": 0.3101816963911915, "grad_norm": 0.26006636023521423, "learning_rate": 0.0001947895752084011, "loss": 11.6773, "step": 14818 }, { "epoch": 0.3102026291551536, "grad_norm": 0.24167096614837646, "learning_rate": 0.000194788876687026, "loss": 11.6702, "step": 14819 }, { "epoch": 0.3102235619191158, "grad_norm": 0.30309516191482544, "learning_rate": 0.000194788178120084, "loss": 11.6827, "step": 14820 }, { "epoch": 0.31024449468307796, "grad_norm": 0.2611919045448303, "learning_rate": 0.0001947874795075754, "loss": 11.6628, "step": 14821 }, { "epoch": 0.3102654274470401, "grad_norm": 0.2725133001804352, "learning_rate": 0.00019478678084950053, "loss": 11.6705, "step": 14822 }, { "epoch": 0.31028636021100225, "grad_norm": 0.2850258946418762, "learning_rate": 0.00019478608214585982, "loss": 11.6868, "step": 14823 }, { "epoch": 0.3103072929749644, "grad_norm": 0.32923373579978943, "learning_rate": 0.00019478538339665348, "loss": 11.6747, "step": 14824 }, { "epoch": 0.31032822573892654, "grad_norm": 0.372504860162735, "learning_rate": 0.00019478468460188192, "loss": 11.6813, "step": 14825 }, { "epoch": 0.31034915850288874, "grad_norm": 0.2579571604728699, "learning_rate": 0.0001947839857615455, "loss": 11.6717, "step": 14826 }, { "epoch": 0.3103700912668509, "grad_norm": 0.3197653591632843, "learning_rate": 0.0001947832868756445, "loss": 11.6826, "step": 14827 }, { "epoch": 0.31039102403081303, "grad_norm": 0.3382514715194702, "learning_rate": 0.00019478258794417927, "loss": 11.6779, "step": 14828 }, { "epoch": 0.3104119567947752, "grad_norm": 0.24714332818984985, "learning_rate": 0.00019478188896715013, "loss": 11.6592, "step": 14829 }, { "epoch": 0.3104328895587373, "grad_norm": 0.22577743232250214, "learning_rate": 0.00019478118994455747, "loss": 11.6869, "step": 14830 }, { "epoch": 0.31045382232269947, "grad_norm": 0.23186828196048737, "learning_rate": 0.00019478049087640157, "loss": 11.6619, "step": 14831 }, { "epoch": 0.31047475508666167, "grad_norm": 0.3572319746017456, "learning_rate": 0.0001947797917626828, "loss": 11.668, "step": 14832 }, { "epoch": 0.3104956878506238, "grad_norm": 0.29979485273361206, "learning_rate": 0.00019477909260340147, "loss": 11.6746, "step": 14833 }, { "epoch": 0.31051662061458596, "grad_norm": 0.2609461545944214, "learning_rate": 0.00019477839339855798, "loss": 11.6889, "step": 14834 }, { "epoch": 0.3105375533785481, "grad_norm": 0.23864103853702545, "learning_rate": 0.00019477769414815257, "loss": 11.6784, "step": 14835 }, { "epoch": 0.31055848614251025, "grad_norm": 0.2886154353618622, "learning_rate": 0.00019477699485218562, "loss": 11.6766, "step": 14836 }, { "epoch": 0.3105794189064724, "grad_norm": 1.3316611051559448, "learning_rate": 0.00019477629551065747, "loss": 11.5962, "step": 14837 }, { "epoch": 0.31060035167043454, "grad_norm": 0.3320976197719574, "learning_rate": 0.00019477559612356848, "loss": 11.6856, "step": 14838 }, { "epoch": 0.31062128443439674, "grad_norm": 0.38647162914276123, "learning_rate": 0.00019477489669091896, "loss": 11.6668, "step": 14839 }, { "epoch": 0.3106422171983589, "grad_norm": 0.26692864298820496, "learning_rate": 0.00019477419721270923, "loss": 11.6698, "step": 14840 }, { "epoch": 0.31066314996232103, "grad_norm": 0.255456805229187, "learning_rate": 0.00019477349768893968, "loss": 11.6593, "step": 14841 }, { "epoch": 0.3106840827262832, "grad_norm": 0.25237229466438293, "learning_rate": 0.0001947727981196106, "loss": 11.6881, "step": 14842 }, { "epoch": 0.3107050154902453, "grad_norm": 0.2689674198627472, "learning_rate": 0.0001947720985047223, "loss": 11.6592, "step": 14843 }, { "epoch": 0.31072594825420746, "grad_norm": 0.23269398510456085, "learning_rate": 0.00019477139884427515, "loss": 11.6676, "step": 14844 }, { "epoch": 0.31074688101816966, "grad_norm": 0.30342817306518555, "learning_rate": 0.00019477069913826953, "loss": 11.6683, "step": 14845 }, { "epoch": 0.3107678137821318, "grad_norm": 0.24796149134635925, "learning_rate": 0.0001947699993867057, "loss": 11.672, "step": 14846 }, { "epoch": 0.31078874654609395, "grad_norm": 0.3070346415042877, "learning_rate": 0.00019476929958958406, "loss": 11.6604, "step": 14847 }, { "epoch": 0.3108096793100561, "grad_norm": 0.33198249340057373, "learning_rate": 0.00019476859974690492, "loss": 11.6945, "step": 14848 }, { "epoch": 0.31083061207401824, "grad_norm": 0.309128999710083, "learning_rate": 0.00019476789985866858, "loss": 11.6777, "step": 14849 }, { "epoch": 0.3108515448379804, "grad_norm": 0.31972944736480713, "learning_rate": 0.00019476719992487542, "loss": 11.6777, "step": 14850 }, { "epoch": 0.3108724776019426, "grad_norm": 0.2824084758758545, "learning_rate": 0.00019476649994552579, "loss": 11.6823, "step": 14851 }, { "epoch": 0.31089341036590473, "grad_norm": 0.2306741178035736, "learning_rate": 0.00019476579992062001, "loss": 11.671, "step": 14852 }, { "epoch": 0.3109143431298669, "grad_norm": 0.26353052258491516, "learning_rate": 0.00019476509985015837, "loss": 11.6668, "step": 14853 }, { "epoch": 0.310935275893829, "grad_norm": 0.27196431159973145, "learning_rate": 0.0001947643997341413, "loss": 11.6898, "step": 14854 }, { "epoch": 0.31095620865779117, "grad_norm": 0.27652472257614136, "learning_rate": 0.00019476369957256907, "loss": 11.6654, "step": 14855 }, { "epoch": 0.3109771414217533, "grad_norm": 0.28303641080856323, "learning_rate": 0.000194762999365442, "loss": 11.6656, "step": 14856 }, { "epoch": 0.31099807418571546, "grad_norm": 0.28132298588752747, "learning_rate": 0.00019476229911276048, "loss": 11.6824, "step": 14857 }, { "epoch": 0.31101900694967766, "grad_norm": 0.25652799010276794, "learning_rate": 0.00019476159881452482, "loss": 11.6633, "step": 14858 }, { "epoch": 0.3110399397136398, "grad_norm": 0.2880677580833435, "learning_rate": 0.00019476089847073535, "loss": 11.6498, "step": 14859 }, { "epoch": 0.31106087247760195, "grad_norm": 0.341950386762619, "learning_rate": 0.00019476019808139242, "loss": 11.6807, "step": 14860 }, { "epoch": 0.3110818052415641, "grad_norm": 0.27947375178337097, "learning_rate": 0.00019475949764649638, "loss": 11.6823, "step": 14861 }, { "epoch": 0.31110273800552624, "grad_norm": 0.27373942732810974, "learning_rate": 0.00019475879716604754, "loss": 11.6742, "step": 14862 }, { "epoch": 0.3111236707694884, "grad_norm": 0.24709968268871307, "learning_rate": 0.00019475809664004626, "loss": 11.6724, "step": 14863 }, { "epoch": 0.3111446035334506, "grad_norm": 0.20914201438426971, "learning_rate": 0.00019475739606849283, "loss": 11.6693, "step": 14864 }, { "epoch": 0.3111655362974127, "grad_norm": 0.2081698179244995, "learning_rate": 0.00019475669545138767, "loss": 11.6702, "step": 14865 }, { "epoch": 0.31118646906137487, "grad_norm": 0.34752118587493896, "learning_rate": 0.00019475599478873103, "loss": 11.6824, "step": 14866 }, { "epoch": 0.311207401825337, "grad_norm": 0.24904516339302063, "learning_rate": 0.0001947552940805233, "loss": 11.6747, "step": 14867 }, { "epoch": 0.31122833458929916, "grad_norm": 0.2731974720954895, "learning_rate": 0.0001947545933267648, "loss": 11.6809, "step": 14868 }, { "epoch": 0.3112492673532613, "grad_norm": 0.2709328234195709, "learning_rate": 0.00019475389252745587, "loss": 11.6748, "step": 14869 }, { "epoch": 0.3112702001172235, "grad_norm": 0.3439209461212158, "learning_rate": 0.00019475319168259683, "loss": 11.6881, "step": 14870 }, { "epoch": 0.31129113288118565, "grad_norm": 0.2846078872680664, "learning_rate": 0.00019475249079218805, "loss": 11.6631, "step": 14871 }, { "epoch": 0.3113120656451478, "grad_norm": 0.2811219394207001, "learning_rate": 0.00019475178985622983, "loss": 11.6842, "step": 14872 }, { "epoch": 0.31133299840910994, "grad_norm": 0.25033900141716003, "learning_rate": 0.00019475108887472253, "loss": 11.6691, "step": 14873 }, { "epoch": 0.3113539311730721, "grad_norm": 0.23765766620635986, "learning_rate": 0.00019475038784766652, "loss": 11.663, "step": 14874 }, { "epoch": 0.31137486393703423, "grad_norm": 0.22105827927589417, "learning_rate": 0.00019474968677506203, "loss": 11.6638, "step": 14875 }, { "epoch": 0.3113957967009964, "grad_norm": 0.32011136412620544, "learning_rate": 0.0001947489856569095, "loss": 11.6861, "step": 14876 }, { "epoch": 0.3114167294649586, "grad_norm": 0.2857969105243683, "learning_rate": 0.00019474828449320925, "loss": 11.6716, "step": 14877 }, { "epoch": 0.3114376622289207, "grad_norm": 0.3580513000488281, "learning_rate": 0.0001947475832839616, "loss": 11.6718, "step": 14878 }, { "epoch": 0.31145859499288286, "grad_norm": 0.26434722542762756, "learning_rate": 0.00019474688202916686, "loss": 11.671, "step": 14879 }, { "epoch": 0.311479527756845, "grad_norm": 0.2866368591785431, "learning_rate": 0.00019474618072882541, "loss": 11.6666, "step": 14880 }, { "epoch": 0.31150046052080715, "grad_norm": 0.3225613832473755, "learning_rate": 0.00019474547938293758, "loss": 11.6468, "step": 14881 }, { "epoch": 0.3115213932847693, "grad_norm": 0.24080264568328857, "learning_rate": 0.0001947447779915037, "loss": 11.6821, "step": 14882 }, { "epoch": 0.3115423260487315, "grad_norm": 0.24011936783790588, "learning_rate": 0.0001947440765545241, "loss": 11.6738, "step": 14883 }, { "epoch": 0.31156325881269364, "grad_norm": 0.22786873579025269, "learning_rate": 0.00019474337507199914, "loss": 11.6895, "step": 14884 }, { "epoch": 0.3115841915766558, "grad_norm": 0.2674315273761749, "learning_rate": 0.0001947426735439291, "loss": 11.66, "step": 14885 }, { "epoch": 0.31160512434061793, "grad_norm": 0.3356002867221832, "learning_rate": 0.00019474197197031439, "loss": 11.6848, "step": 14886 }, { "epoch": 0.3116260571045801, "grad_norm": 0.23791515827178955, "learning_rate": 0.0001947412703511553, "loss": 11.6811, "step": 14887 }, { "epoch": 0.3116469898685422, "grad_norm": 0.23984824120998383, "learning_rate": 0.0001947405686864522, "loss": 11.6864, "step": 14888 }, { "epoch": 0.31166792263250437, "grad_norm": 0.26809370517730713, "learning_rate": 0.0001947398669762054, "loss": 11.6709, "step": 14889 }, { "epoch": 0.31168885539646657, "grad_norm": 0.4062759280204773, "learning_rate": 0.00019473916522041526, "loss": 11.6791, "step": 14890 }, { "epoch": 0.3117097881604287, "grad_norm": 0.32563555240631104, "learning_rate": 0.00019473846341908207, "loss": 11.6813, "step": 14891 }, { "epoch": 0.31173072092439086, "grad_norm": 0.3233560025691986, "learning_rate": 0.00019473776157220623, "loss": 11.6958, "step": 14892 }, { "epoch": 0.311751653688353, "grad_norm": 0.3303290903568268, "learning_rate": 0.00019473705967978808, "loss": 11.6948, "step": 14893 }, { "epoch": 0.31177258645231515, "grad_norm": 0.24351122975349426, "learning_rate": 0.00019473635774182787, "loss": 11.659, "step": 14894 }, { "epoch": 0.3117935192162773, "grad_norm": 0.2665739059448242, "learning_rate": 0.00019473565575832603, "loss": 11.6894, "step": 14895 }, { "epoch": 0.3118144519802395, "grad_norm": 0.3167171776294708, "learning_rate": 0.00019473495372928284, "loss": 11.6675, "step": 14896 }, { "epoch": 0.31183538474420164, "grad_norm": 0.2684140205383301, "learning_rate": 0.00019473425165469866, "loss": 11.6558, "step": 14897 }, { "epoch": 0.3118563175081638, "grad_norm": 0.2924800217151642, "learning_rate": 0.00019473354953457385, "loss": 11.6795, "step": 14898 }, { "epoch": 0.3118772502721259, "grad_norm": 0.28658872842788696, "learning_rate": 0.0001947328473689087, "loss": 11.6766, "step": 14899 }, { "epoch": 0.3118981830360881, "grad_norm": 0.3274477422237396, "learning_rate": 0.0001947321451577036, "loss": 11.6779, "step": 14900 }, { "epoch": 0.3119191158000502, "grad_norm": 0.2635740041732788, "learning_rate": 0.00019473144290095882, "loss": 11.6736, "step": 14901 }, { "epoch": 0.3119400485640124, "grad_norm": 0.3429754972457886, "learning_rate": 0.00019473074059867475, "loss": 11.6666, "step": 14902 }, { "epoch": 0.31196098132797456, "grad_norm": 0.26529523730278015, "learning_rate": 0.00019473003825085173, "loss": 11.6786, "step": 14903 }, { "epoch": 0.3119819140919367, "grad_norm": 0.2625921666622162, "learning_rate": 0.00019472933585749008, "loss": 11.6639, "step": 14904 }, { "epoch": 0.31200284685589885, "grad_norm": 0.2828129529953003, "learning_rate": 0.00019472863341859014, "loss": 11.6645, "step": 14905 }, { "epoch": 0.312023779619861, "grad_norm": 0.2639378011226654, "learning_rate": 0.00019472793093415223, "loss": 11.6872, "step": 14906 }, { "epoch": 0.31204471238382314, "grad_norm": 0.26163753867149353, "learning_rate": 0.0001947272284041767, "loss": 11.6833, "step": 14907 }, { "epoch": 0.3120656451477853, "grad_norm": 0.24434348940849304, "learning_rate": 0.00019472652582866391, "loss": 11.6772, "step": 14908 }, { "epoch": 0.3120865779117475, "grad_norm": 0.3191523551940918, "learning_rate": 0.0001947258232076142, "loss": 11.6839, "step": 14909 }, { "epoch": 0.31210751067570963, "grad_norm": 0.2167264223098755, "learning_rate": 0.00019472512054102787, "loss": 11.6702, "step": 14910 }, { "epoch": 0.3121284434396718, "grad_norm": 0.3329755365848541, "learning_rate": 0.00019472441782890525, "loss": 11.6825, "step": 14911 }, { "epoch": 0.3121493762036339, "grad_norm": 0.26307880878448486, "learning_rate": 0.00019472371507124673, "loss": 11.6892, "step": 14912 }, { "epoch": 0.31217030896759607, "grad_norm": 0.41731366515159607, "learning_rate": 0.00019472301226805262, "loss": 11.7062, "step": 14913 }, { "epoch": 0.3121912417315582, "grad_norm": 0.25722745060920715, "learning_rate": 0.00019472230941932326, "loss": 11.6857, "step": 14914 }, { "epoch": 0.3122121744955204, "grad_norm": 0.2628493309020996, "learning_rate": 0.00019472160652505899, "loss": 11.6693, "step": 14915 }, { "epoch": 0.31223310725948256, "grad_norm": 0.272086501121521, "learning_rate": 0.00019472090358526014, "loss": 11.6821, "step": 14916 }, { "epoch": 0.3122540400234447, "grad_norm": 0.2949190139770508, "learning_rate": 0.00019472020059992706, "loss": 11.6944, "step": 14917 }, { "epoch": 0.31227497278740685, "grad_norm": 1.4274778366088867, "learning_rate": 0.00019471949756906008, "loss": 11.6133, "step": 14918 }, { "epoch": 0.312295905551369, "grad_norm": 0.23239675164222717, "learning_rate": 0.00019471879449265952, "loss": 11.6684, "step": 14919 }, { "epoch": 0.31231683831533114, "grad_norm": 0.2757679224014282, "learning_rate": 0.0001947180913707258, "loss": 11.6734, "step": 14920 }, { "epoch": 0.31233777107929334, "grad_norm": 0.24269013106822968, "learning_rate": 0.00019471738820325913, "loss": 11.6686, "step": 14921 }, { "epoch": 0.3123587038432555, "grad_norm": 0.335561066865921, "learning_rate": 0.00019471668499025993, "loss": 11.679, "step": 14922 }, { "epoch": 0.3123796366072176, "grad_norm": 0.26330697536468506, "learning_rate": 0.00019471598173172853, "loss": 11.6716, "step": 14923 }, { "epoch": 0.31240056937117977, "grad_norm": 0.3113623261451721, "learning_rate": 0.00019471527842766526, "loss": 11.6715, "step": 14924 }, { "epoch": 0.3124215021351419, "grad_norm": 0.2985730767250061, "learning_rate": 0.00019471457507807042, "loss": 11.6862, "step": 14925 }, { "epoch": 0.31244243489910406, "grad_norm": 0.28500622510910034, "learning_rate": 0.00019471387168294442, "loss": 11.6574, "step": 14926 }, { "epoch": 0.3124633676630662, "grad_norm": 0.23369081318378448, "learning_rate": 0.0001947131682422876, "loss": 11.6771, "step": 14927 }, { "epoch": 0.3124843004270284, "grad_norm": 0.31773561239242554, "learning_rate": 0.00019471246475610018, "loss": 11.6677, "step": 14928 }, { "epoch": 0.31250523319099055, "grad_norm": 0.226937398314476, "learning_rate": 0.00019471176122438263, "loss": 11.6719, "step": 14929 }, { "epoch": 0.3125261659549527, "grad_norm": 0.25266751646995544, "learning_rate": 0.00019471105764713525, "loss": 11.6816, "step": 14930 }, { "epoch": 0.31254709871891484, "grad_norm": 0.26347461342811584, "learning_rate": 0.00019471035402435834, "loss": 11.6733, "step": 14931 }, { "epoch": 0.312568031482877, "grad_norm": 0.2487143576145172, "learning_rate": 0.0001947096503560523, "loss": 11.6753, "step": 14932 }, { "epoch": 0.31258896424683913, "grad_norm": 0.27325278520584106, "learning_rate": 0.00019470894664221737, "loss": 11.6759, "step": 14933 }, { "epoch": 0.31260989701080133, "grad_norm": 0.3107414245605469, "learning_rate": 0.000194708242882854, "loss": 11.6808, "step": 14934 }, { "epoch": 0.3126308297747635, "grad_norm": 0.2546670138835907, "learning_rate": 0.00019470753907796246, "loss": 11.668, "step": 14935 }, { "epoch": 0.3126517625387256, "grad_norm": 0.33425942063331604, "learning_rate": 0.0001947068352275431, "loss": 11.6797, "step": 14936 }, { "epoch": 0.31267269530268776, "grad_norm": 0.2798275351524353, "learning_rate": 0.0001947061313315963, "loss": 11.6741, "step": 14937 }, { "epoch": 0.3126936280666499, "grad_norm": 0.27117159962654114, "learning_rate": 0.00019470542739012233, "loss": 11.6749, "step": 14938 }, { "epoch": 0.31271456083061205, "grad_norm": 0.2541445791721344, "learning_rate": 0.00019470472340312158, "loss": 11.6812, "step": 14939 }, { "epoch": 0.31273549359457425, "grad_norm": 0.27610963582992554, "learning_rate": 0.00019470401937059437, "loss": 11.6683, "step": 14940 }, { "epoch": 0.3127564263585364, "grad_norm": 0.25374147295951843, "learning_rate": 0.00019470331529254105, "loss": 11.6725, "step": 14941 }, { "epoch": 0.31277735912249854, "grad_norm": 0.2440698742866516, "learning_rate": 0.00019470261116896192, "loss": 11.6771, "step": 14942 }, { "epoch": 0.3127982918864607, "grad_norm": 0.22696754336357117, "learning_rate": 0.00019470190699985739, "loss": 11.6753, "step": 14943 }, { "epoch": 0.31281922465042283, "grad_norm": 0.38908106088638306, "learning_rate": 0.00019470120278522772, "loss": 11.6797, "step": 14944 }, { "epoch": 0.312840157414385, "grad_norm": 0.21643106639385223, "learning_rate": 0.00019470049852507332, "loss": 11.6723, "step": 14945 }, { "epoch": 0.3128610901783471, "grad_norm": 0.2514513432979584, "learning_rate": 0.00019469979421939443, "loss": 11.6726, "step": 14946 }, { "epoch": 0.3128820229423093, "grad_norm": 0.30646151304244995, "learning_rate": 0.00019469908986819153, "loss": 11.6818, "step": 14947 }, { "epoch": 0.31290295570627147, "grad_norm": 0.3828476071357727, "learning_rate": 0.00019469838547146482, "loss": 11.6832, "step": 14948 }, { "epoch": 0.3129238884702336, "grad_norm": 0.2560596764087677, "learning_rate": 0.0001946976810292147, "loss": 11.6648, "step": 14949 }, { "epoch": 0.31294482123419576, "grad_norm": 1.2332466840744019, "learning_rate": 0.00019469697654144152, "loss": 11.6406, "step": 14950 }, { "epoch": 0.3129657539981579, "grad_norm": 0.25232937932014465, "learning_rate": 0.00019469627200814564, "loss": 11.6818, "step": 14951 }, { "epoch": 0.31298668676212005, "grad_norm": 0.2512381970882416, "learning_rate": 0.00019469556742932734, "loss": 11.6773, "step": 14952 }, { "epoch": 0.31300761952608225, "grad_norm": 0.2913872003555298, "learning_rate": 0.00019469486280498694, "loss": 11.6656, "step": 14953 }, { "epoch": 0.3130285522900444, "grad_norm": 0.26198244094848633, "learning_rate": 0.00019469415813512487, "loss": 11.6882, "step": 14954 }, { "epoch": 0.31304948505400654, "grad_norm": 0.2615044414997101, "learning_rate": 0.0001946934534197414, "loss": 11.6821, "step": 14955 }, { "epoch": 0.3130704178179687, "grad_norm": 0.2817179262638092, "learning_rate": 0.0001946927486588369, "loss": 11.6638, "step": 14956 }, { "epoch": 0.3130913505819308, "grad_norm": 0.24300502240657806, "learning_rate": 0.00019469204385241172, "loss": 11.659, "step": 14957 }, { "epoch": 0.31311228334589297, "grad_norm": 0.2948299050331116, "learning_rate": 0.00019469133900046614, "loss": 11.6734, "step": 14958 }, { "epoch": 0.3131332161098552, "grad_norm": 0.2740571200847626, "learning_rate": 0.00019469063410300054, "loss": 11.6711, "step": 14959 }, { "epoch": 0.3131541488738173, "grad_norm": 0.28177833557128906, "learning_rate": 0.00019468992916001526, "loss": 11.6628, "step": 14960 }, { "epoch": 0.31317508163777946, "grad_norm": 0.24750393629074097, "learning_rate": 0.00019468922417151065, "loss": 11.6729, "step": 14961 }, { "epoch": 0.3131960144017416, "grad_norm": 0.2908405661582947, "learning_rate": 0.00019468851913748703, "loss": 11.6912, "step": 14962 }, { "epoch": 0.31321694716570375, "grad_norm": 0.25103068351745605, "learning_rate": 0.00019468781405794473, "loss": 11.6638, "step": 14963 }, { "epoch": 0.3132378799296659, "grad_norm": 0.2117655724287033, "learning_rate": 0.0001946871089328841, "loss": 11.6706, "step": 14964 }, { "epoch": 0.31325881269362804, "grad_norm": 0.24752824008464813, "learning_rate": 0.00019468640376230548, "loss": 11.673, "step": 14965 }, { "epoch": 0.31327974545759024, "grad_norm": 0.35678088665008545, "learning_rate": 0.00019468569854620918, "loss": 11.671, "step": 14966 }, { "epoch": 0.3133006782215524, "grad_norm": 0.2749952971935272, "learning_rate": 0.00019468499328459561, "loss": 11.6859, "step": 14967 }, { "epoch": 0.31332161098551453, "grad_norm": 0.2657085359096527, "learning_rate": 0.00019468428797746504, "loss": 11.6885, "step": 14968 }, { "epoch": 0.3133425437494767, "grad_norm": 0.29604020714759827, "learning_rate": 0.00019468358262481785, "loss": 11.6835, "step": 14969 }, { "epoch": 0.3133634765134388, "grad_norm": 0.3296821415424347, "learning_rate": 0.00019468287722665436, "loss": 11.6926, "step": 14970 }, { "epoch": 0.31338440927740097, "grad_norm": 0.3013516962528229, "learning_rate": 0.00019468217178297495, "loss": 11.6777, "step": 14971 }, { "epoch": 0.31340534204136317, "grad_norm": 0.37301766872406006, "learning_rate": 0.0001946814662937799, "loss": 11.6847, "step": 14972 }, { "epoch": 0.3134262748053253, "grad_norm": 0.29397818446159363, "learning_rate": 0.00019468076075906954, "loss": 11.6807, "step": 14973 }, { "epoch": 0.31344720756928746, "grad_norm": 0.3795351982116699, "learning_rate": 0.00019468005517884424, "loss": 11.6627, "step": 14974 }, { "epoch": 0.3134681403332496, "grad_norm": 0.29173439741134644, "learning_rate": 0.00019467934955310435, "loss": 11.6883, "step": 14975 }, { "epoch": 0.31348907309721175, "grad_norm": 0.20969988405704498, "learning_rate": 0.00019467864388185023, "loss": 11.669, "step": 14976 }, { "epoch": 0.3135100058611739, "grad_norm": 0.3980846405029297, "learning_rate": 0.00019467793816508214, "loss": 11.6637, "step": 14977 }, { "epoch": 0.31353093862513604, "grad_norm": 0.35522985458374023, "learning_rate": 0.00019467723240280052, "loss": 11.7032, "step": 14978 }, { "epoch": 0.31355187138909824, "grad_norm": 0.28498953580856323, "learning_rate": 0.00019467652659500563, "loss": 11.6758, "step": 14979 }, { "epoch": 0.3135728041530604, "grad_norm": 0.21470065414905548, "learning_rate": 0.00019467582074169783, "loss": 11.6849, "step": 14980 }, { "epoch": 0.3135937369170225, "grad_norm": 0.21958018839359283, "learning_rate": 0.00019467511484287748, "loss": 11.6737, "step": 14981 }, { "epoch": 0.31361466968098467, "grad_norm": 0.3214702010154724, "learning_rate": 0.00019467440889854492, "loss": 11.6621, "step": 14982 }, { "epoch": 0.3136356024449468, "grad_norm": 0.3280898630619049, "learning_rate": 0.00019467370290870043, "loss": 11.6852, "step": 14983 }, { "epoch": 0.31365653520890896, "grad_norm": 0.30666759610176086, "learning_rate": 0.0001946729968733444, "loss": 11.6683, "step": 14984 }, { "epoch": 0.31367746797287116, "grad_norm": 0.28104063868522644, "learning_rate": 0.0001946722907924772, "loss": 11.6755, "step": 14985 }, { "epoch": 0.3136984007368333, "grad_norm": 0.2422475516796112, "learning_rate": 0.0001946715846660991, "loss": 11.6868, "step": 14986 }, { "epoch": 0.31371933350079545, "grad_norm": 0.33239397406578064, "learning_rate": 0.0001946708784942105, "loss": 11.6732, "step": 14987 }, { "epoch": 0.3137402662647576, "grad_norm": 0.2722398042678833, "learning_rate": 0.0001946701722768117, "loss": 11.6588, "step": 14988 }, { "epoch": 0.31376119902871974, "grad_norm": 0.2675677239894867, "learning_rate": 0.00019466946601390305, "loss": 11.6689, "step": 14989 }, { "epoch": 0.3137821317926819, "grad_norm": 0.36534610390663147, "learning_rate": 0.00019466875970548488, "loss": 11.6803, "step": 14990 }, { "epoch": 0.3138030645566441, "grad_norm": 0.239951953291893, "learning_rate": 0.00019466805335155755, "loss": 11.6691, "step": 14991 }, { "epoch": 0.31382399732060623, "grad_norm": 0.24476687610149384, "learning_rate": 0.0001946673469521214, "loss": 11.6787, "step": 14992 }, { "epoch": 0.3138449300845684, "grad_norm": 0.3454229235649109, "learning_rate": 0.00019466664050717673, "loss": 11.6895, "step": 14993 }, { "epoch": 0.3138658628485305, "grad_norm": 0.3224354386329651, "learning_rate": 0.00019466593401672392, "loss": 11.6861, "step": 14994 }, { "epoch": 0.31388679561249266, "grad_norm": 0.2799404561519623, "learning_rate": 0.0001946652274807633, "loss": 11.6772, "step": 14995 }, { "epoch": 0.3139077283764548, "grad_norm": 0.3098539412021637, "learning_rate": 0.0001946645208992952, "loss": 11.6758, "step": 14996 }, { "epoch": 0.31392866114041695, "grad_norm": 0.23417681455612183, "learning_rate": 0.00019466381427232, "loss": 11.6785, "step": 14997 }, { "epoch": 0.31394959390437915, "grad_norm": 0.30350321531295776, "learning_rate": 0.00019466310759983798, "loss": 11.6595, "step": 14998 }, { "epoch": 0.3139705266683413, "grad_norm": 0.2957707345485687, "learning_rate": 0.00019466240088184954, "loss": 11.6634, "step": 14999 }, { "epoch": 0.31399145943230344, "grad_norm": 0.2643541991710663, "learning_rate": 0.00019466169411835494, "loss": 11.6661, "step": 15000 }, { "epoch": 0.31399145943230344, "eval_loss": 11.675930976867676, "eval_runtime": 34.3913, "eval_samples_per_second": 27.943, "eval_steps_per_second": 7.008, "step": 15000 }, { "epoch": 0.3140123921962656, "grad_norm": 0.28638115525245667, "learning_rate": 0.00019466098730935462, "loss": 11.6624, "step": 15001 }, { "epoch": 0.31403332496022773, "grad_norm": 0.30783072113990784, "learning_rate": 0.0001946602804548488, "loss": 11.685, "step": 15002 }, { "epoch": 0.3140542577241899, "grad_norm": 0.2939872741699219, "learning_rate": 0.00019465957355483792, "loss": 11.6584, "step": 15003 }, { "epoch": 0.3140751904881521, "grad_norm": 0.2238638550043106, "learning_rate": 0.0001946588666093223, "loss": 11.677, "step": 15004 }, { "epoch": 0.3140961232521142, "grad_norm": 0.240862175822258, "learning_rate": 0.00019465815961830225, "loss": 11.6716, "step": 15005 }, { "epoch": 0.31411705601607637, "grad_norm": 0.4314655065536499, "learning_rate": 0.00019465745258177811, "loss": 11.6832, "step": 15006 }, { "epoch": 0.3141379887800385, "grad_norm": 0.28827914595603943, "learning_rate": 0.00019465674549975028, "loss": 11.6715, "step": 15007 }, { "epoch": 0.31415892154400066, "grad_norm": 0.2664807140827179, "learning_rate": 0.00019465603837221903, "loss": 11.6491, "step": 15008 }, { "epoch": 0.3141798543079628, "grad_norm": 0.21817153692245483, "learning_rate": 0.0001946553311991847, "loss": 11.6742, "step": 15009 }, { "epoch": 0.314200787071925, "grad_norm": 0.3440549969673157, "learning_rate": 0.0001946546239806477, "loss": 11.6704, "step": 15010 }, { "epoch": 0.31422171983588715, "grad_norm": 0.2846863269805908, "learning_rate": 0.00019465391671660828, "loss": 11.6738, "step": 15011 }, { "epoch": 0.3142426525998493, "grad_norm": 0.30271029472351074, "learning_rate": 0.00019465320940706686, "loss": 11.6731, "step": 15012 }, { "epoch": 0.31426358536381144, "grad_norm": 0.30248844623565674, "learning_rate": 0.00019465250205202375, "loss": 11.6761, "step": 15013 }, { "epoch": 0.3142845181277736, "grad_norm": 0.2292071133852005, "learning_rate": 0.00019465179465147925, "loss": 11.6723, "step": 15014 }, { "epoch": 0.3143054508917357, "grad_norm": 0.30055639147758484, "learning_rate": 0.00019465108720543377, "loss": 11.6861, "step": 15015 }, { "epoch": 0.31432638365569787, "grad_norm": 0.22795981168746948, "learning_rate": 0.0001946503797138876, "loss": 11.6639, "step": 15016 }, { "epoch": 0.31434731641966007, "grad_norm": 0.4393022358417511, "learning_rate": 0.00019464967217684112, "loss": 11.6957, "step": 15017 }, { "epoch": 0.3143682491836222, "grad_norm": 0.24623127281665802, "learning_rate": 0.00019464896459429461, "loss": 11.6578, "step": 15018 }, { "epoch": 0.31438918194758436, "grad_norm": 0.367342084646225, "learning_rate": 0.00019464825696624845, "loss": 11.6764, "step": 15019 }, { "epoch": 0.3144101147115465, "grad_norm": 0.3509686291217804, "learning_rate": 0.00019464754929270298, "loss": 11.6744, "step": 15020 }, { "epoch": 0.31443104747550865, "grad_norm": 0.24921180307865143, "learning_rate": 0.00019464684157365856, "loss": 11.6655, "step": 15021 }, { "epoch": 0.3144519802394708, "grad_norm": 0.2624623477458954, "learning_rate": 0.00019464613380911546, "loss": 11.6606, "step": 15022 }, { "epoch": 0.314472913003433, "grad_norm": 0.3440338373184204, "learning_rate": 0.00019464542599907411, "loss": 11.6647, "step": 15023 }, { "epoch": 0.31449384576739514, "grad_norm": 0.30752032995224, "learning_rate": 0.00019464471814353477, "loss": 11.6902, "step": 15024 }, { "epoch": 0.3145147785313573, "grad_norm": 0.29300740361213684, "learning_rate": 0.00019464401024249788, "loss": 11.6618, "step": 15025 }, { "epoch": 0.31453571129531943, "grad_norm": 0.24366793036460876, "learning_rate": 0.00019464330229596367, "loss": 11.6735, "step": 15026 }, { "epoch": 0.3145566440592816, "grad_norm": 0.2600533664226532, "learning_rate": 0.00019464259430393253, "loss": 11.6686, "step": 15027 }, { "epoch": 0.3145775768232437, "grad_norm": 0.2697519361972809, "learning_rate": 0.00019464188626640478, "loss": 11.6503, "step": 15028 }, { "epoch": 0.3145985095872059, "grad_norm": 0.32420453429222107, "learning_rate": 0.00019464117818338082, "loss": 11.6776, "step": 15029 }, { "epoch": 0.31461944235116807, "grad_norm": 0.350803941488266, "learning_rate": 0.0001946404700548609, "loss": 11.6743, "step": 15030 }, { "epoch": 0.3146403751151302, "grad_norm": 0.2910792827606201, "learning_rate": 0.00019463976188084547, "loss": 11.6616, "step": 15031 }, { "epoch": 0.31466130787909236, "grad_norm": 0.2698107957839966, "learning_rate": 0.00019463905366133477, "loss": 11.676, "step": 15032 }, { "epoch": 0.3146822406430545, "grad_norm": 0.3346019983291626, "learning_rate": 0.0001946383453963292, "loss": 11.6677, "step": 15033 }, { "epoch": 0.31470317340701665, "grad_norm": 0.3069971203804016, "learning_rate": 0.00019463763708582908, "loss": 11.6704, "step": 15034 }, { "epoch": 0.3147241061709788, "grad_norm": 0.24820636212825775, "learning_rate": 0.00019463692872983472, "loss": 11.665, "step": 15035 }, { "epoch": 0.314745038934941, "grad_norm": 0.28390803933143616, "learning_rate": 0.00019463622032834653, "loss": 11.677, "step": 15036 }, { "epoch": 0.31476597169890314, "grad_norm": 0.25958582758903503, "learning_rate": 0.0001946355118813648, "loss": 11.6956, "step": 15037 }, { "epoch": 0.3147869044628653, "grad_norm": 0.27782827615737915, "learning_rate": 0.0001946348033888899, "loss": 11.6814, "step": 15038 }, { "epoch": 0.3148078372268274, "grad_norm": 0.25702911615371704, "learning_rate": 0.0001946340948509221, "loss": 11.6805, "step": 15039 }, { "epoch": 0.31482876999078957, "grad_norm": 0.27433687448501587, "learning_rate": 0.00019463338626746183, "loss": 11.6612, "step": 15040 }, { "epoch": 0.3148497027547517, "grad_norm": 0.260428786277771, "learning_rate": 0.0001946326776385094, "loss": 11.6748, "step": 15041 }, { "epoch": 0.3148706355187139, "grad_norm": 0.28962185978889465, "learning_rate": 0.0001946319689640651, "loss": 11.6969, "step": 15042 }, { "epoch": 0.31489156828267606, "grad_norm": 0.28356143832206726, "learning_rate": 0.00019463126024412937, "loss": 11.652, "step": 15043 }, { "epoch": 0.3149125010466382, "grad_norm": 0.2236020416021347, "learning_rate": 0.0001946305514787025, "loss": 11.6633, "step": 15044 }, { "epoch": 0.31493343381060035, "grad_norm": 0.2349325269460678, "learning_rate": 0.0001946298426677848, "loss": 11.6794, "step": 15045 }, { "epoch": 0.3149543665745625, "grad_norm": 0.24012835323810577, "learning_rate": 0.00019462913381137665, "loss": 11.6667, "step": 15046 }, { "epoch": 0.31497529933852464, "grad_norm": 0.34864914417266846, "learning_rate": 0.00019462842490947836, "loss": 11.6604, "step": 15047 }, { "epoch": 0.31499623210248684, "grad_norm": 0.26596641540527344, "learning_rate": 0.00019462771596209032, "loss": 11.6617, "step": 15048 }, { "epoch": 0.315017164866449, "grad_norm": 0.30908888578414917, "learning_rate": 0.00019462700696921284, "loss": 11.6849, "step": 15049 }, { "epoch": 0.31503809763041113, "grad_norm": 0.2911056876182556, "learning_rate": 0.00019462629793084624, "loss": 11.6643, "step": 15050 }, { "epoch": 0.3150590303943733, "grad_norm": 0.27936238050460815, "learning_rate": 0.00019462558884699086, "loss": 11.6733, "step": 15051 }, { "epoch": 0.3150799631583354, "grad_norm": 0.2668870985507965, "learning_rate": 0.0001946248797176471, "loss": 11.6696, "step": 15052 }, { "epoch": 0.31510089592229756, "grad_norm": 0.3015175461769104, "learning_rate": 0.00019462417054281526, "loss": 11.666, "step": 15053 }, { "epoch": 0.3151218286862597, "grad_norm": 0.32845428586006165, "learning_rate": 0.00019462346132249566, "loss": 11.6688, "step": 15054 }, { "epoch": 0.3151427614502219, "grad_norm": 0.24486225843429565, "learning_rate": 0.0001946227520566887, "loss": 11.667, "step": 15055 }, { "epoch": 0.31516369421418405, "grad_norm": 0.3232421278953552, "learning_rate": 0.0001946220427453947, "loss": 11.673, "step": 15056 }, { "epoch": 0.3151846269781462, "grad_norm": 0.23899400234222412, "learning_rate": 0.00019462133338861394, "loss": 11.6808, "step": 15057 }, { "epoch": 0.31520555974210834, "grad_norm": 0.31014445424079895, "learning_rate": 0.00019462062398634684, "loss": 11.6806, "step": 15058 }, { "epoch": 0.3152264925060705, "grad_norm": 0.3184446394443512, "learning_rate": 0.0001946199145385937, "loss": 11.6674, "step": 15059 }, { "epoch": 0.31524742527003263, "grad_norm": 0.30779239535331726, "learning_rate": 0.00019461920504535487, "loss": 11.6774, "step": 15060 }, { "epoch": 0.31526835803399483, "grad_norm": 0.20562246441841125, "learning_rate": 0.00019461849550663068, "loss": 11.6779, "step": 15061 }, { "epoch": 0.315289290797957, "grad_norm": 0.2620249092578888, "learning_rate": 0.0001946177859224215, "loss": 11.6698, "step": 15062 }, { "epoch": 0.3153102235619191, "grad_norm": 0.2796851396560669, "learning_rate": 0.00019461707629272765, "loss": 11.6736, "step": 15063 }, { "epoch": 0.31533115632588127, "grad_norm": 0.23868028819561005, "learning_rate": 0.00019461636661754946, "loss": 11.6676, "step": 15064 }, { "epoch": 0.3153520890898434, "grad_norm": 0.2617427408695221, "learning_rate": 0.00019461565689688731, "loss": 11.6687, "step": 15065 }, { "epoch": 0.31537302185380556, "grad_norm": 0.29017892479896545, "learning_rate": 0.0001946149471307415, "loss": 11.6628, "step": 15066 }, { "epoch": 0.31539395461776776, "grad_norm": 0.4064517021179199, "learning_rate": 0.0001946142373191124, "loss": 11.6658, "step": 15067 }, { "epoch": 0.3154148873817299, "grad_norm": 0.2908094823360443, "learning_rate": 0.00019461352746200038, "loss": 11.6804, "step": 15068 }, { "epoch": 0.31543582014569205, "grad_norm": 0.2682220935821533, "learning_rate": 0.00019461281755940569, "loss": 11.6853, "step": 15069 }, { "epoch": 0.3154567529096542, "grad_norm": 0.3005874454975128, "learning_rate": 0.00019461210761132872, "loss": 11.6785, "step": 15070 }, { "epoch": 0.31547768567361634, "grad_norm": 0.2677670121192932, "learning_rate": 0.00019461139761776984, "loss": 11.6828, "step": 15071 }, { "epoch": 0.3154986184375785, "grad_norm": 0.3184657394886017, "learning_rate": 0.00019461068757872934, "loss": 11.6569, "step": 15072 }, { "epoch": 0.3155195512015406, "grad_norm": 0.2848223149776459, "learning_rate": 0.00019460997749420759, "loss": 11.6753, "step": 15073 }, { "epoch": 0.3155404839655028, "grad_norm": 0.24026712775230408, "learning_rate": 0.00019460926736420496, "loss": 11.6776, "step": 15074 }, { "epoch": 0.31556141672946497, "grad_norm": 0.4294634163379669, "learning_rate": 0.0001946085571887217, "loss": 11.6618, "step": 15075 }, { "epoch": 0.3155823494934271, "grad_norm": 0.31911134719848633, "learning_rate": 0.00019460784696775826, "loss": 11.6588, "step": 15076 }, { "epoch": 0.31560328225738926, "grad_norm": 0.2585475444793701, "learning_rate": 0.00019460713670131492, "loss": 11.6809, "step": 15077 }, { "epoch": 0.3156242150213514, "grad_norm": 0.29653340578079224, "learning_rate": 0.00019460642638939203, "loss": 11.6714, "step": 15078 }, { "epoch": 0.31564514778531355, "grad_norm": 0.25732725858688354, "learning_rate": 0.00019460571603198995, "loss": 11.6735, "step": 15079 }, { "epoch": 0.31566608054927575, "grad_norm": 0.2770039141178131, "learning_rate": 0.00019460500562910898, "loss": 11.6822, "step": 15080 }, { "epoch": 0.3156870133132379, "grad_norm": 0.2206275761127472, "learning_rate": 0.0001946042951807495, "loss": 11.657, "step": 15081 }, { "epoch": 0.31570794607720004, "grad_norm": 0.2999093532562256, "learning_rate": 0.00019460358468691182, "loss": 11.6564, "step": 15082 }, { "epoch": 0.3157288788411622, "grad_norm": 0.23718322813510895, "learning_rate": 0.0001946028741475963, "loss": 11.6705, "step": 15083 }, { "epoch": 0.31574981160512433, "grad_norm": 0.21123532950878143, "learning_rate": 0.00019460216356280335, "loss": 11.6768, "step": 15084 }, { "epoch": 0.3157707443690865, "grad_norm": 0.22173617780208588, "learning_rate": 0.00019460145293253317, "loss": 11.676, "step": 15085 }, { "epoch": 0.3157916771330486, "grad_norm": 0.30830448865890503, "learning_rate": 0.00019460074225678619, "loss": 11.6818, "step": 15086 }, { "epoch": 0.3158126098970108, "grad_norm": 0.26802825927734375, "learning_rate": 0.00019460003153556278, "loss": 11.6616, "step": 15087 }, { "epoch": 0.31583354266097297, "grad_norm": 0.2479083240032196, "learning_rate": 0.00019459932076886318, "loss": 11.6683, "step": 15088 }, { "epoch": 0.3158544754249351, "grad_norm": 0.3375890552997589, "learning_rate": 0.00019459860995668784, "loss": 11.6593, "step": 15089 }, { "epoch": 0.31587540818889726, "grad_norm": 0.300546258687973, "learning_rate": 0.00019459789909903702, "loss": 11.6922, "step": 15090 }, { "epoch": 0.3158963409528594, "grad_norm": 0.31136783957481384, "learning_rate": 0.0001945971881959111, "loss": 11.6681, "step": 15091 }, { "epoch": 0.31591727371682155, "grad_norm": 0.27221572399139404, "learning_rate": 0.00019459647724731042, "loss": 11.6657, "step": 15092 }, { "epoch": 0.31593820648078375, "grad_norm": 0.23766733705997467, "learning_rate": 0.00019459576625323536, "loss": 11.6703, "step": 15093 }, { "epoch": 0.3159591392447459, "grad_norm": 0.2638649344444275, "learning_rate": 0.00019459505521368615, "loss": 11.6761, "step": 15094 }, { "epoch": 0.31598007200870804, "grad_norm": 0.2767100930213928, "learning_rate": 0.00019459434412866323, "loss": 11.6716, "step": 15095 }, { "epoch": 0.3160010047726702, "grad_norm": 0.2976115047931671, "learning_rate": 0.00019459363299816692, "loss": 11.6809, "step": 15096 }, { "epoch": 0.3160219375366323, "grad_norm": 0.19834686815738678, "learning_rate": 0.00019459292182219754, "loss": 11.6602, "step": 15097 }, { "epoch": 0.31604287030059447, "grad_norm": 0.32049280405044556, "learning_rate": 0.00019459221060075545, "loss": 11.6781, "step": 15098 }, { "epoch": 0.31606380306455667, "grad_norm": 0.2682638466358185, "learning_rate": 0.000194591499333841, "loss": 11.6788, "step": 15099 }, { "epoch": 0.3160847358285188, "grad_norm": 0.28160929679870605, "learning_rate": 0.0001945907880214545, "loss": 11.6847, "step": 15100 }, { "epoch": 0.31610566859248096, "grad_norm": 0.26543739438056946, "learning_rate": 0.00019459007666359633, "loss": 11.6777, "step": 15101 }, { "epoch": 0.3161266013564431, "grad_norm": 0.2561396360397339, "learning_rate": 0.00019458936526026683, "loss": 11.6831, "step": 15102 }, { "epoch": 0.31614753412040525, "grad_norm": 0.32292819023132324, "learning_rate": 0.0001945886538114663, "loss": 11.676, "step": 15103 }, { "epoch": 0.3161684668843674, "grad_norm": 0.2792416512966156, "learning_rate": 0.0001945879423171951, "loss": 11.6682, "step": 15104 }, { "epoch": 0.31618939964832954, "grad_norm": 0.22731460630893707, "learning_rate": 0.00019458723077745362, "loss": 11.6718, "step": 15105 }, { "epoch": 0.31621033241229174, "grad_norm": 0.2435857057571411, "learning_rate": 0.00019458651919224213, "loss": 11.6651, "step": 15106 }, { "epoch": 0.3162312651762539, "grad_norm": 0.26421791315078735, "learning_rate": 0.00019458580756156102, "loss": 11.6642, "step": 15107 }, { "epoch": 0.31625219794021603, "grad_norm": 0.24544085562229156, "learning_rate": 0.0001945850958854106, "loss": 11.6733, "step": 15108 }, { "epoch": 0.3162731307041782, "grad_norm": 0.24963195621967316, "learning_rate": 0.00019458438416379122, "loss": 11.6718, "step": 15109 }, { "epoch": 0.3162940634681403, "grad_norm": 0.23188704252243042, "learning_rate": 0.00019458367239670327, "loss": 11.6699, "step": 15110 }, { "epoch": 0.31631499623210246, "grad_norm": 0.3017161786556244, "learning_rate": 0.00019458296058414706, "loss": 11.6714, "step": 15111 }, { "epoch": 0.31633592899606466, "grad_norm": 0.30681851506233215, "learning_rate": 0.0001945822487261229, "loss": 11.6795, "step": 15112 }, { "epoch": 0.3163568617600268, "grad_norm": 0.22296026349067688, "learning_rate": 0.00019458153682263115, "loss": 11.6741, "step": 15113 }, { "epoch": 0.31637779452398895, "grad_norm": 0.27758121490478516, "learning_rate": 0.00019458082487367216, "loss": 11.6842, "step": 15114 }, { "epoch": 0.3163987272879511, "grad_norm": 0.31535589694976807, "learning_rate": 0.00019458011287924628, "loss": 11.696, "step": 15115 }, { "epoch": 0.31641966005191324, "grad_norm": 0.26587310433387756, "learning_rate": 0.00019457940083935385, "loss": 11.6709, "step": 15116 }, { "epoch": 0.3164405928158754, "grad_norm": 0.2879132926464081, "learning_rate": 0.00019457868875399518, "loss": 11.6789, "step": 15117 }, { "epoch": 0.3164615255798376, "grad_norm": 0.3102549612522125, "learning_rate": 0.00019457797662317066, "loss": 11.6743, "step": 15118 }, { "epoch": 0.31648245834379973, "grad_norm": 0.2729518711566925, "learning_rate": 0.00019457726444688063, "loss": 11.6736, "step": 15119 }, { "epoch": 0.3165033911077619, "grad_norm": 0.23883159458637238, "learning_rate": 0.0001945765522251254, "loss": 11.6786, "step": 15120 }, { "epoch": 0.316524323871724, "grad_norm": 0.33819854259490967, "learning_rate": 0.0001945758399579053, "loss": 11.6771, "step": 15121 }, { "epoch": 0.31654525663568617, "grad_norm": 0.2883945405483246, "learning_rate": 0.00019457512764522073, "loss": 11.6757, "step": 15122 }, { "epoch": 0.3165661893996483, "grad_norm": 0.2900153398513794, "learning_rate": 0.00019457441528707197, "loss": 11.6757, "step": 15123 }, { "epoch": 0.31658712216361046, "grad_norm": 0.2955652177333832, "learning_rate": 0.0001945737028834594, "loss": 11.6832, "step": 15124 }, { "epoch": 0.31660805492757266, "grad_norm": 0.2537615895271301, "learning_rate": 0.00019457299043438339, "loss": 11.6741, "step": 15125 }, { "epoch": 0.3166289876915348, "grad_norm": 0.22595766186714172, "learning_rate": 0.00019457227793984422, "loss": 11.674, "step": 15126 }, { "epoch": 0.31664992045549695, "grad_norm": 0.30844560265541077, "learning_rate": 0.00019457156539984228, "loss": 11.6653, "step": 15127 }, { "epoch": 0.3166708532194591, "grad_norm": 0.27219703793525696, "learning_rate": 0.00019457085281437788, "loss": 11.649, "step": 15128 }, { "epoch": 0.31669178598342124, "grad_norm": 0.3350715637207031, "learning_rate": 0.00019457014018345136, "loss": 11.655, "step": 15129 }, { "epoch": 0.3167127187473834, "grad_norm": 0.30879566073417664, "learning_rate": 0.0001945694275070631, "loss": 11.6819, "step": 15130 }, { "epoch": 0.3167336515113456, "grad_norm": 0.2561064064502716, "learning_rate": 0.0001945687147852134, "loss": 11.6939, "step": 15131 }, { "epoch": 0.3167545842753077, "grad_norm": 0.35817912220954895, "learning_rate": 0.00019456800201790266, "loss": 11.6707, "step": 15132 }, { "epoch": 0.31677551703926987, "grad_norm": 0.2842707931995392, "learning_rate": 0.00019456728920513115, "loss": 11.6693, "step": 15133 }, { "epoch": 0.316796449803232, "grad_norm": 0.2551674246788025, "learning_rate": 0.00019456657634689926, "loss": 11.6754, "step": 15134 }, { "epoch": 0.31681738256719416, "grad_norm": 0.3528452515602112, "learning_rate": 0.00019456586344320735, "loss": 11.7005, "step": 15135 }, { "epoch": 0.3168383153311563, "grad_norm": 0.2531086802482605, "learning_rate": 0.0001945651504940557, "loss": 11.6746, "step": 15136 }, { "epoch": 0.3168592480951185, "grad_norm": 0.7854019999504089, "learning_rate": 0.00019456443749944467, "loss": 11.6517, "step": 15137 }, { "epoch": 0.31688018085908065, "grad_norm": 0.29450100660324097, "learning_rate": 0.00019456372445937468, "loss": 11.6759, "step": 15138 }, { "epoch": 0.3169011136230428, "grad_norm": 0.2891371548175812, "learning_rate": 0.00019456301137384596, "loss": 11.6909, "step": 15139 }, { "epoch": 0.31692204638700494, "grad_norm": 0.23114635050296783, "learning_rate": 0.00019456229824285894, "loss": 11.6642, "step": 15140 }, { "epoch": 0.3169429791509671, "grad_norm": 0.24887458980083466, "learning_rate": 0.0001945615850664139, "loss": 11.6928, "step": 15141 }, { "epoch": 0.31696391191492923, "grad_norm": 0.30680692195892334, "learning_rate": 0.00019456087184451125, "loss": 11.6682, "step": 15142 }, { "epoch": 0.3169848446788914, "grad_norm": 0.3162968158721924, "learning_rate": 0.00019456015857715125, "loss": 11.6679, "step": 15143 }, { "epoch": 0.3170057774428536, "grad_norm": 0.2460947036743164, "learning_rate": 0.00019455944526433432, "loss": 11.6862, "step": 15144 }, { "epoch": 0.3170267102068157, "grad_norm": 0.28373488783836365, "learning_rate": 0.00019455873190606074, "loss": 11.6806, "step": 15145 }, { "epoch": 0.31704764297077787, "grad_norm": 0.2828312814235687, "learning_rate": 0.0001945580185023309, "loss": 11.6784, "step": 15146 }, { "epoch": 0.31706857573474, "grad_norm": 0.30142784118652344, "learning_rate": 0.0001945573050531451, "loss": 11.6556, "step": 15147 }, { "epoch": 0.31708950849870216, "grad_norm": 0.2590794563293457, "learning_rate": 0.00019455659155850373, "loss": 11.6795, "step": 15148 }, { "epoch": 0.3171104412626643, "grad_norm": 0.2911652624607086, "learning_rate": 0.00019455587801840712, "loss": 11.6786, "step": 15149 }, { "epoch": 0.3171313740266265, "grad_norm": 0.26803338527679443, "learning_rate": 0.0001945551644328556, "loss": 11.6909, "step": 15150 }, { "epoch": 0.31715230679058865, "grad_norm": 0.3305721580982208, "learning_rate": 0.00019455445080184954, "loss": 11.6782, "step": 15151 }, { "epoch": 0.3171732395545508, "grad_norm": 0.4192490875720978, "learning_rate": 0.00019455373712538922, "loss": 11.6751, "step": 15152 }, { "epoch": 0.31719417231851293, "grad_norm": 0.3438527286052704, "learning_rate": 0.00019455302340347505, "loss": 11.6879, "step": 15153 }, { "epoch": 0.3172151050824751, "grad_norm": 0.27598005533218384, "learning_rate": 0.00019455230963610733, "loss": 11.6593, "step": 15154 }, { "epoch": 0.3172360378464372, "grad_norm": 0.32184529304504395, "learning_rate": 0.0001945515958232864, "loss": 11.6726, "step": 15155 }, { "epoch": 0.3172569706103994, "grad_norm": 0.3551407754421234, "learning_rate": 0.00019455088196501267, "loss": 11.6816, "step": 15156 }, { "epoch": 0.31727790337436157, "grad_norm": 0.3520251512527466, "learning_rate": 0.00019455016806128642, "loss": 11.6626, "step": 15157 }, { "epoch": 0.3172988361383237, "grad_norm": 0.3116525709629059, "learning_rate": 0.00019454945411210802, "loss": 11.6751, "step": 15158 }, { "epoch": 0.31731976890228586, "grad_norm": 0.29390010237693787, "learning_rate": 0.00019454874011747776, "loss": 11.6453, "step": 15159 }, { "epoch": 0.317340701666248, "grad_norm": 0.3445691764354706, "learning_rate": 0.00019454802607739606, "loss": 11.685, "step": 15160 }, { "epoch": 0.31736163443021015, "grad_norm": 0.26904693245887756, "learning_rate": 0.00019454731199186322, "loss": 11.6768, "step": 15161 }, { "epoch": 0.3173825671941723, "grad_norm": 0.25350895524024963, "learning_rate": 0.0001945465978608796, "loss": 11.6683, "step": 15162 }, { "epoch": 0.3174034999581345, "grad_norm": 0.23100733757019043, "learning_rate": 0.00019454588368444554, "loss": 11.6724, "step": 15163 }, { "epoch": 0.31742443272209664, "grad_norm": 0.32241806387901306, "learning_rate": 0.00019454516946256135, "loss": 11.6733, "step": 15164 }, { "epoch": 0.3174453654860588, "grad_norm": 0.3592630922794342, "learning_rate": 0.00019454445519522743, "loss": 11.673, "step": 15165 }, { "epoch": 0.31746629825002093, "grad_norm": 0.2278292030096054, "learning_rate": 0.00019454374088244408, "loss": 11.6644, "step": 15166 }, { "epoch": 0.3174872310139831, "grad_norm": 0.25514882802963257, "learning_rate": 0.00019454302652421168, "loss": 11.6704, "step": 15167 }, { "epoch": 0.3175081637779452, "grad_norm": 0.2875540256500244, "learning_rate": 0.00019454231212053053, "loss": 11.6825, "step": 15168 }, { "epoch": 0.3175290965419074, "grad_norm": 0.3126726746559143, "learning_rate": 0.000194541597671401, "loss": 11.6739, "step": 15169 }, { "epoch": 0.31755002930586956, "grad_norm": 0.2879978120326996, "learning_rate": 0.00019454088317682344, "loss": 11.6805, "step": 15170 }, { "epoch": 0.3175709620698317, "grad_norm": 0.2484140545129776, "learning_rate": 0.00019454016863679817, "loss": 11.6682, "step": 15171 }, { "epoch": 0.31759189483379385, "grad_norm": 0.29266658425331116, "learning_rate": 0.00019453945405132557, "loss": 11.675, "step": 15172 }, { "epoch": 0.317612827597756, "grad_norm": 0.3623802363872528, "learning_rate": 0.00019453873942040594, "loss": 11.6602, "step": 15173 }, { "epoch": 0.31763376036171814, "grad_norm": 0.24366404116153717, "learning_rate": 0.00019453802474403965, "loss": 11.6628, "step": 15174 }, { "epoch": 0.3176546931256803, "grad_norm": 0.3521310091018677, "learning_rate": 0.00019453731002222702, "loss": 11.6766, "step": 15175 }, { "epoch": 0.3176756258896425, "grad_norm": 0.3334820866584778, "learning_rate": 0.00019453659525496843, "loss": 11.6756, "step": 15176 }, { "epoch": 0.31769655865360463, "grad_norm": 0.30272892117500305, "learning_rate": 0.0001945358804422642, "loss": 11.671, "step": 15177 }, { "epoch": 0.3177174914175668, "grad_norm": 0.24885261058807373, "learning_rate": 0.00019453516558411466, "loss": 11.6858, "step": 15178 }, { "epoch": 0.3177384241815289, "grad_norm": 0.3095758557319641, "learning_rate": 0.0001945344506805202, "loss": 11.6744, "step": 15179 }, { "epoch": 0.31775935694549107, "grad_norm": 0.29622697830200195, "learning_rate": 0.0001945337357314811, "loss": 11.6619, "step": 15180 }, { "epoch": 0.3177802897094532, "grad_norm": 0.25921890139579773, "learning_rate": 0.0001945330207369978, "loss": 11.6989, "step": 15181 }, { "epoch": 0.3178012224734154, "grad_norm": 0.33207881450653076, "learning_rate": 0.00019453230569707052, "loss": 11.6896, "step": 15182 }, { "epoch": 0.31782215523737756, "grad_norm": 0.32874467968940735, "learning_rate": 0.0001945315906116997, "loss": 11.6852, "step": 15183 }, { "epoch": 0.3178430880013397, "grad_norm": 0.30699917674064636, "learning_rate": 0.00019453087548088562, "loss": 11.6746, "step": 15184 }, { "epoch": 0.31786402076530185, "grad_norm": 0.24491830170154572, "learning_rate": 0.0001945301603046287, "loss": 11.6821, "step": 15185 }, { "epoch": 0.317884953529264, "grad_norm": 0.2463272362947464, "learning_rate": 0.00019452944508292918, "loss": 11.6732, "step": 15186 }, { "epoch": 0.31790588629322614, "grad_norm": 0.32851442694664, "learning_rate": 0.0001945287298157875, "loss": 11.6824, "step": 15187 }, { "epoch": 0.31792681905718834, "grad_norm": 0.2977944016456604, "learning_rate": 0.00019452801450320397, "loss": 11.6776, "step": 15188 }, { "epoch": 0.3179477518211505, "grad_norm": 0.28420406579971313, "learning_rate": 0.0001945272991451789, "loss": 11.6854, "step": 15189 }, { "epoch": 0.3179686845851126, "grad_norm": 0.2878025472164154, "learning_rate": 0.0001945265837417127, "loss": 11.6813, "step": 15190 }, { "epoch": 0.31798961734907477, "grad_norm": 0.27577781677246094, "learning_rate": 0.00019452586829280563, "loss": 11.6564, "step": 15191 }, { "epoch": 0.3180105501130369, "grad_norm": 0.29370084404945374, "learning_rate": 0.00019452515279845813, "loss": 11.6854, "step": 15192 }, { "epoch": 0.31803148287699906, "grad_norm": 0.3935089707374573, "learning_rate": 0.00019452443725867047, "loss": 11.6764, "step": 15193 }, { "epoch": 0.3180524156409612, "grad_norm": 0.28822535276412964, "learning_rate": 0.000194523721673443, "loss": 11.6676, "step": 15194 }, { "epoch": 0.3180733484049234, "grad_norm": 0.21240714192390442, "learning_rate": 0.0001945230060427761, "loss": 11.6678, "step": 15195 }, { "epoch": 0.31809428116888555, "grad_norm": 0.28103503584861755, "learning_rate": 0.0001945222903666701, "loss": 11.6827, "step": 15196 }, { "epoch": 0.3181152139328477, "grad_norm": 0.2505146861076355, "learning_rate": 0.00019452157464512534, "loss": 11.6783, "step": 15197 }, { "epoch": 0.31813614669680984, "grad_norm": 0.28148478269577026, "learning_rate": 0.00019452085887814216, "loss": 11.68, "step": 15198 }, { "epoch": 0.318157079460772, "grad_norm": 0.306077778339386, "learning_rate": 0.00019452014306572095, "loss": 11.6613, "step": 15199 }, { "epoch": 0.31817801222473413, "grad_norm": 0.2959590554237366, "learning_rate": 0.00019451942720786195, "loss": 11.6819, "step": 15200 }, { "epoch": 0.31819894498869633, "grad_norm": 0.2594701647758484, "learning_rate": 0.0001945187113045656, "loss": 11.6809, "step": 15201 }, { "epoch": 0.3182198777526585, "grad_norm": 0.33727672696113586, "learning_rate": 0.00019451799535583222, "loss": 11.6653, "step": 15202 }, { "epoch": 0.3182408105166206, "grad_norm": 0.26617202162742615, "learning_rate": 0.00019451727936166212, "loss": 11.6845, "step": 15203 }, { "epoch": 0.31826174328058277, "grad_norm": 0.2922731935977936, "learning_rate": 0.0001945165633220557, "loss": 11.6775, "step": 15204 }, { "epoch": 0.3182826760445449, "grad_norm": 0.22388991713523865, "learning_rate": 0.00019451584723701324, "loss": 11.6724, "step": 15205 }, { "epoch": 0.31830360880850705, "grad_norm": 0.2089536190032959, "learning_rate": 0.00019451513110653514, "loss": 11.6887, "step": 15206 }, { "epoch": 0.31832454157246926, "grad_norm": 0.24051766097545624, "learning_rate": 0.0001945144149306217, "loss": 11.6705, "step": 15207 }, { "epoch": 0.3183454743364314, "grad_norm": 0.30341142416000366, "learning_rate": 0.00019451369870927333, "loss": 11.6739, "step": 15208 }, { "epoch": 0.31836640710039354, "grad_norm": 0.30070003867149353, "learning_rate": 0.00019451298244249027, "loss": 11.6575, "step": 15209 }, { "epoch": 0.3183873398643557, "grad_norm": 0.2529660761356354, "learning_rate": 0.00019451226613027297, "loss": 11.6639, "step": 15210 }, { "epoch": 0.31840827262831783, "grad_norm": 0.29489678144454956, "learning_rate": 0.0001945115497726217, "loss": 11.6649, "step": 15211 }, { "epoch": 0.31842920539228, "grad_norm": 0.24981312453746796, "learning_rate": 0.00019451083336953685, "loss": 11.664, "step": 15212 }, { "epoch": 0.3184501381562421, "grad_norm": 0.25972265005111694, "learning_rate": 0.00019451011692101877, "loss": 11.674, "step": 15213 }, { "epoch": 0.3184710709202043, "grad_norm": 0.28760403394699097, "learning_rate": 0.00019450940042706777, "loss": 11.6842, "step": 15214 }, { "epoch": 0.31849200368416647, "grad_norm": 0.24343466758728027, "learning_rate": 0.0001945086838876842, "loss": 11.6658, "step": 15215 }, { "epoch": 0.3185129364481286, "grad_norm": 0.31968387961387634, "learning_rate": 0.0001945079673028684, "loss": 11.6685, "step": 15216 }, { "epoch": 0.31853386921209076, "grad_norm": 0.31134679913520813, "learning_rate": 0.00019450725067262075, "loss": 11.6678, "step": 15217 }, { "epoch": 0.3185548019760529, "grad_norm": 0.2502870261669159, "learning_rate": 0.00019450653399694155, "loss": 11.6583, "step": 15218 }, { "epoch": 0.31857573474001505, "grad_norm": 0.28321006894111633, "learning_rate": 0.00019450581727583117, "loss": 11.6735, "step": 15219 }, { "epoch": 0.31859666750397725, "grad_norm": 0.2640046775341034, "learning_rate": 0.00019450510050928997, "loss": 11.683, "step": 15220 }, { "epoch": 0.3186176002679394, "grad_norm": 0.22896185517311096, "learning_rate": 0.00019450438369731824, "loss": 11.6781, "step": 15221 }, { "epoch": 0.31863853303190154, "grad_norm": 0.23389752209186554, "learning_rate": 0.00019450366683991635, "loss": 11.6718, "step": 15222 }, { "epoch": 0.3186594657958637, "grad_norm": 0.2600420415401459, "learning_rate": 0.0001945029499370847, "loss": 11.653, "step": 15223 }, { "epoch": 0.31868039855982583, "grad_norm": 0.2590087056159973, "learning_rate": 0.00019450223298882356, "loss": 11.6648, "step": 15224 }, { "epoch": 0.318701331323788, "grad_norm": 0.2662627100944519, "learning_rate": 0.00019450151599513334, "loss": 11.6989, "step": 15225 }, { "epoch": 0.3187222640877502, "grad_norm": 0.26115772128105164, "learning_rate": 0.00019450079895601431, "loss": 11.6747, "step": 15226 }, { "epoch": 0.3187431968517123, "grad_norm": 0.28055933117866516, "learning_rate": 0.00019450008187146684, "loss": 11.6669, "step": 15227 }, { "epoch": 0.31876412961567446, "grad_norm": 0.2461353838443756, "learning_rate": 0.00019449936474149132, "loss": 11.6862, "step": 15228 }, { "epoch": 0.3187850623796366, "grad_norm": 0.25974082946777344, "learning_rate": 0.00019449864756608805, "loss": 11.6804, "step": 15229 }, { "epoch": 0.31880599514359875, "grad_norm": 0.2981007695198059, "learning_rate": 0.0001944979303452574, "loss": 11.6773, "step": 15230 }, { "epoch": 0.3188269279075609, "grad_norm": 0.3414466977119446, "learning_rate": 0.00019449721307899967, "loss": 11.6773, "step": 15231 }, { "epoch": 0.31884786067152304, "grad_norm": 0.25842586159706116, "learning_rate": 0.00019449649576731525, "loss": 11.6696, "step": 15232 }, { "epoch": 0.31886879343548524, "grad_norm": 0.3161969482898712, "learning_rate": 0.00019449577841020447, "loss": 11.6762, "step": 15233 }, { "epoch": 0.3188897261994474, "grad_norm": 0.2574119567871094, "learning_rate": 0.00019449506100766768, "loss": 11.6707, "step": 15234 }, { "epoch": 0.31891065896340953, "grad_norm": 0.24825674295425415, "learning_rate": 0.0001944943435597052, "loss": 11.6807, "step": 15235 }, { "epoch": 0.3189315917273717, "grad_norm": 0.2982928454875946, "learning_rate": 0.00019449362606631745, "loss": 11.665, "step": 15236 }, { "epoch": 0.3189525244913338, "grad_norm": 0.28624460101127625, "learning_rate": 0.0001944929085275047, "loss": 11.6797, "step": 15237 }, { "epoch": 0.31897345725529597, "grad_norm": 0.3351481556892395, "learning_rate": 0.00019449219094326727, "loss": 11.6673, "step": 15238 }, { "epoch": 0.31899439001925817, "grad_norm": 0.2671806812286377, "learning_rate": 0.00019449147331360557, "loss": 11.6731, "step": 15239 }, { "epoch": 0.3190153227832203, "grad_norm": 0.29030731320381165, "learning_rate": 0.00019449075563851995, "loss": 11.6821, "step": 15240 }, { "epoch": 0.31903625554718246, "grad_norm": 0.2663205564022064, "learning_rate": 0.0001944900379180107, "loss": 11.6724, "step": 15241 }, { "epoch": 0.3190571883111446, "grad_norm": 0.23042915761470795, "learning_rate": 0.00019448932015207824, "loss": 11.6701, "step": 15242 }, { "epoch": 0.31907812107510675, "grad_norm": 0.2779068648815155, "learning_rate": 0.0001944886023407228, "loss": 11.6797, "step": 15243 }, { "epoch": 0.3190990538390689, "grad_norm": 0.25658655166625977, "learning_rate": 0.00019448788448394485, "loss": 11.6656, "step": 15244 }, { "epoch": 0.3191199866030311, "grad_norm": 0.2791410982608795, "learning_rate": 0.00019448716658174468, "loss": 11.6943, "step": 15245 }, { "epoch": 0.31914091936699324, "grad_norm": 0.2617979347705841, "learning_rate": 0.00019448644863412262, "loss": 11.6793, "step": 15246 }, { "epoch": 0.3191618521309554, "grad_norm": 0.27724412083625793, "learning_rate": 0.00019448573064107904, "loss": 11.6759, "step": 15247 }, { "epoch": 0.3191827848949175, "grad_norm": 0.2956693172454834, "learning_rate": 0.00019448501260261426, "loss": 11.6613, "step": 15248 }, { "epoch": 0.31920371765887967, "grad_norm": 0.27813395857810974, "learning_rate": 0.00019448429451872864, "loss": 11.6765, "step": 15249 }, { "epoch": 0.3192246504228418, "grad_norm": 0.28767338395118713, "learning_rate": 0.00019448357638942256, "loss": 11.6881, "step": 15250 }, { "epoch": 0.31924558318680396, "grad_norm": 0.2696995139122009, "learning_rate": 0.0001944828582146963, "loss": 11.6759, "step": 15251 }, { "epoch": 0.31926651595076616, "grad_norm": 0.2463035136461258, "learning_rate": 0.00019448213999455024, "loss": 11.6636, "step": 15252 }, { "epoch": 0.3192874487147283, "grad_norm": 0.25510814785957336, "learning_rate": 0.00019448142172898473, "loss": 11.6769, "step": 15253 }, { "epoch": 0.31930838147869045, "grad_norm": 0.2572256326675415, "learning_rate": 0.0001944807034180001, "loss": 11.671, "step": 15254 }, { "epoch": 0.3193293142426526, "grad_norm": 0.30876174569129944, "learning_rate": 0.0001944799850615967, "loss": 11.6711, "step": 15255 }, { "epoch": 0.31935024700661474, "grad_norm": 0.2915526032447815, "learning_rate": 0.00019447926665977486, "loss": 11.6876, "step": 15256 }, { "epoch": 0.3193711797705769, "grad_norm": 0.36871013045310974, "learning_rate": 0.00019447854821253498, "loss": 11.6556, "step": 15257 }, { "epoch": 0.3193921125345391, "grad_norm": 0.233287051320076, "learning_rate": 0.00019447782971987736, "loss": 11.6681, "step": 15258 }, { "epoch": 0.31941304529850123, "grad_norm": 0.24366940557956696, "learning_rate": 0.00019447711118180237, "loss": 11.6878, "step": 15259 }, { "epoch": 0.3194339780624634, "grad_norm": 0.2763477861881256, "learning_rate": 0.0001944763925983103, "loss": 11.6782, "step": 15260 }, { "epoch": 0.3194549108264255, "grad_norm": 0.2877899408340454, "learning_rate": 0.00019447567396940155, "loss": 11.6742, "step": 15261 }, { "epoch": 0.31947584359038766, "grad_norm": 0.2788149118423462, "learning_rate": 0.00019447495529507648, "loss": 11.6791, "step": 15262 }, { "epoch": 0.3194967763543498, "grad_norm": 0.2671200931072235, "learning_rate": 0.00019447423657533534, "loss": 11.6804, "step": 15263 }, { "epoch": 0.31951770911831195, "grad_norm": 0.2820013165473938, "learning_rate": 0.0001944735178101786, "loss": 11.6789, "step": 15264 }, { "epoch": 0.31953864188227415, "grad_norm": 0.24852539598941803, "learning_rate": 0.0001944727989996065, "loss": 11.6768, "step": 15265 }, { "epoch": 0.3195595746462363, "grad_norm": 0.30417943000793457, "learning_rate": 0.00019447208014361947, "loss": 11.6588, "step": 15266 }, { "epoch": 0.31958050741019844, "grad_norm": 0.3569223880767822, "learning_rate": 0.00019447136124221782, "loss": 11.6837, "step": 15267 }, { "epoch": 0.3196014401741606, "grad_norm": 0.28340375423431396, "learning_rate": 0.00019447064229540187, "loss": 11.67, "step": 15268 }, { "epoch": 0.31962237293812273, "grad_norm": 0.23567722737789154, "learning_rate": 0.000194469923303172, "loss": 11.6617, "step": 15269 }, { "epoch": 0.3196433057020849, "grad_norm": 0.2723785638809204, "learning_rate": 0.00019446920426552855, "loss": 11.6756, "step": 15270 }, { "epoch": 0.3196642384660471, "grad_norm": 0.27521851658821106, "learning_rate": 0.00019446848518247186, "loss": 11.6679, "step": 15271 }, { "epoch": 0.3196851712300092, "grad_norm": 0.21989403665065765, "learning_rate": 0.00019446776605400225, "loss": 11.6686, "step": 15272 }, { "epoch": 0.31970610399397137, "grad_norm": 0.3371773660182953, "learning_rate": 0.0001944670468801201, "loss": 11.7029, "step": 15273 }, { "epoch": 0.3197270367579335, "grad_norm": 0.3412780463695526, "learning_rate": 0.00019446632766082576, "loss": 11.6745, "step": 15274 }, { "epoch": 0.31974796952189566, "grad_norm": 0.32919323444366455, "learning_rate": 0.00019446560839611956, "loss": 11.6766, "step": 15275 }, { "epoch": 0.3197689022858578, "grad_norm": 0.27262741327285767, "learning_rate": 0.00019446488908600186, "loss": 11.675, "step": 15276 }, { "epoch": 0.31978983504982, "grad_norm": 0.27911576628685, "learning_rate": 0.000194464169730473, "loss": 11.6652, "step": 15277 }, { "epoch": 0.31981076781378215, "grad_norm": 0.2810066044330597, "learning_rate": 0.0001944634503295333, "loss": 11.6616, "step": 15278 }, { "epoch": 0.3198317005777443, "grad_norm": 0.30277079343795776, "learning_rate": 0.00019446273088318314, "loss": 11.6675, "step": 15279 }, { "epoch": 0.31985263334170644, "grad_norm": 0.25106343626976013, "learning_rate": 0.00019446201139142284, "loss": 11.6745, "step": 15280 }, { "epoch": 0.3198735661056686, "grad_norm": 0.2887028157711029, "learning_rate": 0.00019446129185425278, "loss": 11.6687, "step": 15281 }, { "epoch": 0.31989449886963073, "grad_norm": 0.24415989220142365, "learning_rate": 0.00019446057227167325, "loss": 11.6802, "step": 15282 }, { "epoch": 0.3199154316335929, "grad_norm": 0.26612287759780884, "learning_rate": 0.00019445985264368462, "loss": 11.6724, "step": 15283 }, { "epoch": 0.3199363643975551, "grad_norm": 0.29034608602523804, "learning_rate": 0.00019445913297028728, "loss": 11.6754, "step": 15284 }, { "epoch": 0.3199572971615172, "grad_norm": 0.31968754529953003, "learning_rate": 0.00019445841325148157, "loss": 11.685, "step": 15285 }, { "epoch": 0.31997822992547936, "grad_norm": 0.3009254038333893, "learning_rate": 0.00019445769348726773, "loss": 11.659, "step": 15286 }, { "epoch": 0.3199991626894415, "grad_norm": 0.6492106914520264, "learning_rate": 0.00019445697367764624, "loss": 11.6897, "step": 15287 }, { "epoch": 0.32002009545340365, "grad_norm": 0.25122812390327454, "learning_rate": 0.00019445625382261738, "loss": 11.6725, "step": 15288 }, { "epoch": 0.3200410282173658, "grad_norm": 0.4553796648979187, "learning_rate": 0.00019445553392218149, "loss": 11.6731, "step": 15289 }, { "epoch": 0.320061960981328, "grad_norm": 0.2859881818294525, "learning_rate": 0.00019445481397633896, "loss": 11.6779, "step": 15290 }, { "epoch": 0.32008289374529014, "grad_norm": 0.3601688742637634, "learning_rate": 0.00019445409398509007, "loss": 11.6641, "step": 15291 }, { "epoch": 0.3201038265092523, "grad_norm": 0.23237401247024536, "learning_rate": 0.00019445337394843524, "loss": 11.6752, "step": 15292 }, { "epoch": 0.32012475927321443, "grad_norm": 0.3106730878353119, "learning_rate": 0.00019445265386637477, "loss": 11.6785, "step": 15293 }, { "epoch": 0.3201456920371766, "grad_norm": 0.3327590823173523, "learning_rate": 0.000194451933738909, "loss": 11.6819, "step": 15294 }, { "epoch": 0.3201666248011387, "grad_norm": 1.0684560537338257, "learning_rate": 0.0001944512135660383, "loss": 11.6715, "step": 15295 }, { "epoch": 0.3201875575651009, "grad_norm": 0.2523183226585388, "learning_rate": 0.00019445049334776302, "loss": 11.6778, "step": 15296 }, { "epoch": 0.32020849032906307, "grad_norm": 0.26675301790237427, "learning_rate": 0.00019444977308408348, "loss": 11.6777, "step": 15297 }, { "epoch": 0.3202294230930252, "grad_norm": 0.27211621403694153, "learning_rate": 0.00019444905277500008, "loss": 11.6767, "step": 15298 }, { "epoch": 0.32025035585698736, "grad_norm": 0.3240286111831665, "learning_rate": 0.00019444833242051308, "loss": 11.6671, "step": 15299 }, { "epoch": 0.3202712886209495, "grad_norm": 0.3157002031803131, "learning_rate": 0.0001944476120206229, "loss": 11.6754, "step": 15300 }, { "epoch": 0.32029222138491165, "grad_norm": 0.2506087124347687, "learning_rate": 0.00019444689157532981, "loss": 11.6888, "step": 15301 }, { "epoch": 0.3203131541488738, "grad_norm": 0.28578001260757446, "learning_rate": 0.00019444617108463428, "loss": 11.669, "step": 15302 }, { "epoch": 0.320334086912836, "grad_norm": 0.27218079566955566, "learning_rate": 0.00019444545054853656, "loss": 11.6785, "step": 15303 }, { "epoch": 0.32035501967679814, "grad_norm": 0.29271358251571655, "learning_rate": 0.000194444729967037, "loss": 11.6735, "step": 15304 }, { "epoch": 0.3203759524407603, "grad_norm": 0.27079424262046814, "learning_rate": 0.00019444400934013598, "loss": 11.6723, "step": 15305 }, { "epoch": 0.3203968852047224, "grad_norm": 0.2543182075023651, "learning_rate": 0.0001944432886678338, "loss": 11.6843, "step": 15306 }, { "epoch": 0.32041781796868457, "grad_norm": 0.29395076632499695, "learning_rate": 0.0001944425679501309, "loss": 11.6698, "step": 15307 }, { "epoch": 0.3204387507326467, "grad_norm": 0.3320311903953552, "learning_rate": 0.0001944418471870275, "loss": 11.6859, "step": 15308 }, { "epoch": 0.3204596834966089, "grad_norm": 0.318998247385025, "learning_rate": 0.00019444112637852405, "loss": 11.6752, "step": 15309 }, { "epoch": 0.32048061626057106, "grad_norm": 0.31653308868408203, "learning_rate": 0.00019444040552462087, "loss": 11.6669, "step": 15310 }, { "epoch": 0.3205015490245332, "grad_norm": 0.33708226680755615, "learning_rate": 0.00019443968462531824, "loss": 11.6775, "step": 15311 }, { "epoch": 0.32052248178849535, "grad_norm": 0.26141592860221863, "learning_rate": 0.00019443896368061663, "loss": 11.6853, "step": 15312 }, { "epoch": 0.3205434145524575, "grad_norm": 0.3458884358406067, "learning_rate": 0.00019443824269051627, "loss": 11.6661, "step": 15313 }, { "epoch": 0.32056434731641964, "grad_norm": 0.2693050503730774, "learning_rate": 0.00019443752165501757, "loss": 11.6848, "step": 15314 }, { "epoch": 0.32058528008038184, "grad_norm": 0.291940838098526, "learning_rate": 0.00019443680057412086, "loss": 11.6804, "step": 15315 }, { "epoch": 0.320606212844344, "grad_norm": 0.2626797556877136, "learning_rate": 0.00019443607944782647, "loss": 11.6689, "step": 15316 }, { "epoch": 0.32062714560830613, "grad_norm": 0.2981417775154114, "learning_rate": 0.0001944353582761348, "loss": 11.6852, "step": 15317 }, { "epoch": 0.3206480783722683, "grad_norm": 0.25685593485832214, "learning_rate": 0.00019443463705904614, "loss": 11.6828, "step": 15318 }, { "epoch": 0.3206690111362304, "grad_norm": 0.2802070379257202, "learning_rate": 0.00019443391579656086, "loss": 11.6641, "step": 15319 }, { "epoch": 0.32068994390019256, "grad_norm": 0.2800169885158539, "learning_rate": 0.0001944331944886793, "loss": 11.6667, "step": 15320 }, { "epoch": 0.3207108766641547, "grad_norm": 0.29404953122138977, "learning_rate": 0.0001944324731354018, "loss": 11.6788, "step": 15321 }, { "epoch": 0.3207318094281169, "grad_norm": 0.24702775478363037, "learning_rate": 0.00019443175173672873, "loss": 11.6734, "step": 15322 }, { "epoch": 0.32075274219207905, "grad_norm": 0.33493417501449585, "learning_rate": 0.00019443103029266042, "loss": 11.6934, "step": 15323 }, { "epoch": 0.3207736749560412, "grad_norm": 0.21231912076473236, "learning_rate": 0.00019443030880319723, "loss": 11.6748, "step": 15324 }, { "epoch": 0.32079460772000334, "grad_norm": 0.2672772705554962, "learning_rate": 0.00019442958726833949, "loss": 11.6844, "step": 15325 }, { "epoch": 0.3208155404839655, "grad_norm": 0.270938515663147, "learning_rate": 0.00019442886568808758, "loss": 11.6741, "step": 15326 }, { "epoch": 0.32083647324792763, "grad_norm": 0.24914740025997162, "learning_rate": 0.00019442814406244182, "loss": 11.6839, "step": 15327 }, { "epoch": 0.32085740601188983, "grad_norm": 0.25854042172431946, "learning_rate": 0.00019442742239140254, "loss": 11.6802, "step": 15328 }, { "epoch": 0.320878338775852, "grad_norm": 0.2976126968860626, "learning_rate": 0.0001944267006749701, "loss": 11.6619, "step": 15329 }, { "epoch": 0.3208992715398141, "grad_norm": 0.3231934905052185, "learning_rate": 0.00019442597891314485, "loss": 11.6748, "step": 15330 }, { "epoch": 0.32092020430377627, "grad_norm": 0.28173595666885376, "learning_rate": 0.00019442525710592715, "loss": 11.6863, "step": 15331 }, { "epoch": 0.3209411370677384, "grad_norm": 0.25621455907821655, "learning_rate": 0.00019442453525331732, "loss": 11.6821, "step": 15332 }, { "epoch": 0.32096206983170056, "grad_norm": 0.26730793714523315, "learning_rate": 0.00019442381335531573, "loss": 11.6832, "step": 15333 }, { "epoch": 0.32098300259566276, "grad_norm": 0.24657262861728668, "learning_rate": 0.00019442309141192275, "loss": 11.6659, "step": 15334 }, { "epoch": 0.3210039353596249, "grad_norm": 0.2580723166465759, "learning_rate": 0.00019442236942313868, "loss": 11.6733, "step": 15335 }, { "epoch": 0.32102486812358705, "grad_norm": 0.35337069630622864, "learning_rate": 0.00019442164738896388, "loss": 11.671, "step": 15336 }, { "epoch": 0.3210458008875492, "grad_norm": 0.23070713877677917, "learning_rate": 0.0001944209253093987, "loss": 11.6869, "step": 15337 }, { "epoch": 0.32106673365151134, "grad_norm": 0.2676820755004883, "learning_rate": 0.0001944202031844435, "loss": 11.6712, "step": 15338 }, { "epoch": 0.3210876664154735, "grad_norm": 0.25140243768692017, "learning_rate": 0.0001944194810140986, "loss": 11.6849, "step": 15339 }, { "epoch": 0.3211085991794356, "grad_norm": 0.2690705358982086, "learning_rate": 0.00019441875879836436, "loss": 11.685, "step": 15340 }, { "epoch": 0.32112953194339783, "grad_norm": 0.2622624635696411, "learning_rate": 0.00019441803653724114, "loss": 11.6807, "step": 15341 }, { "epoch": 0.32115046470736, "grad_norm": 0.283407598733902, "learning_rate": 0.00019441731423072928, "loss": 11.6869, "step": 15342 }, { "epoch": 0.3211713974713221, "grad_norm": 0.2893807888031006, "learning_rate": 0.00019441659187882911, "loss": 11.6768, "step": 15343 }, { "epoch": 0.32119233023528426, "grad_norm": 0.38948729634284973, "learning_rate": 0.00019441586948154102, "loss": 11.6853, "step": 15344 }, { "epoch": 0.3212132629992464, "grad_norm": 0.34534355998039246, "learning_rate": 0.00019441514703886532, "loss": 11.6808, "step": 15345 }, { "epoch": 0.32123419576320855, "grad_norm": 0.4080726206302643, "learning_rate": 0.00019441442455080238, "loss": 11.6972, "step": 15346 }, { "epoch": 0.32125512852717075, "grad_norm": 0.24237646162509918, "learning_rate": 0.0001944137020173525, "loss": 11.6617, "step": 15347 }, { "epoch": 0.3212760612911329, "grad_norm": 0.3018282651901245, "learning_rate": 0.00019441297943851608, "loss": 11.6662, "step": 15348 }, { "epoch": 0.32129699405509504, "grad_norm": 0.3316105008125305, "learning_rate": 0.00019441225681429345, "loss": 11.6782, "step": 15349 }, { "epoch": 0.3213179268190572, "grad_norm": 0.2477363646030426, "learning_rate": 0.00019441153414468496, "loss": 11.6651, "step": 15350 }, { "epoch": 0.32133885958301933, "grad_norm": 0.2949419915676117, "learning_rate": 0.00019441081142969094, "loss": 11.6739, "step": 15351 }, { "epoch": 0.3213597923469815, "grad_norm": 0.3418828845024109, "learning_rate": 0.00019441008866931175, "loss": 11.6702, "step": 15352 }, { "epoch": 0.3213807251109436, "grad_norm": 0.3422885537147522, "learning_rate": 0.00019440936586354775, "loss": 11.6737, "step": 15353 }, { "epoch": 0.3214016578749058, "grad_norm": 0.2613753378391266, "learning_rate": 0.0001944086430123993, "loss": 11.6731, "step": 15354 }, { "epoch": 0.32142259063886797, "grad_norm": 0.26062890887260437, "learning_rate": 0.00019440792011586665, "loss": 11.6791, "step": 15355 }, { "epoch": 0.3214435234028301, "grad_norm": 0.2412729412317276, "learning_rate": 0.00019440719717395027, "loss": 11.6833, "step": 15356 }, { "epoch": 0.32146445616679226, "grad_norm": 0.28921613097190857, "learning_rate": 0.00019440647418665047, "loss": 11.6739, "step": 15357 }, { "epoch": 0.3214853889307544, "grad_norm": 0.2794100344181061, "learning_rate": 0.00019440575115396756, "loss": 11.6646, "step": 15358 }, { "epoch": 0.32150632169471655, "grad_norm": 0.28156206011772156, "learning_rate": 0.00019440502807590196, "loss": 11.6744, "step": 15359 }, { "epoch": 0.32152725445867875, "grad_norm": 0.2577860355377197, "learning_rate": 0.00019440430495245393, "loss": 11.6757, "step": 15360 }, { "epoch": 0.3215481872226409, "grad_norm": 0.25515663623809814, "learning_rate": 0.00019440358178362383, "loss": 11.6863, "step": 15361 }, { "epoch": 0.32156911998660304, "grad_norm": 0.2851530909538269, "learning_rate": 0.00019440285856941208, "loss": 11.6824, "step": 15362 }, { "epoch": 0.3215900527505652, "grad_norm": 0.27155643701553345, "learning_rate": 0.00019440213530981897, "loss": 11.6902, "step": 15363 }, { "epoch": 0.3216109855145273, "grad_norm": 0.28141728043556213, "learning_rate": 0.00019440141200484487, "loss": 11.6855, "step": 15364 }, { "epoch": 0.32163191827848947, "grad_norm": 0.27321749925613403, "learning_rate": 0.00019440068865449013, "loss": 11.6817, "step": 15365 }, { "epoch": 0.32165285104245167, "grad_norm": 0.24851641058921814, "learning_rate": 0.00019439996525875505, "loss": 11.6646, "step": 15366 }, { "epoch": 0.3216737838064138, "grad_norm": 0.24654585123062134, "learning_rate": 0.00019439924181764004, "loss": 11.6686, "step": 15367 }, { "epoch": 0.32169471657037596, "grad_norm": 0.25809359550476074, "learning_rate": 0.00019439851833114542, "loss": 11.6949, "step": 15368 }, { "epoch": 0.3217156493343381, "grad_norm": 0.22639161348342896, "learning_rate": 0.00019439779479927153, "loss": 11.665, "step": 15369 }, { "epoch": 0.32173658209830025, "grad_norm": 0.2898156940937042, "learning_rate": 0.00019439707122201875, "loss": 11.6737, "step": 15370 }, { "epoch": 0.3217575148622624, "grad_norm": 0.2950102388858795, "learning_rate": 0.0001943963475993874, "loss": 11.6537, "step": 15371 }, { "epoch": 0.32177844762622454, "grad_norm": 0.35115838050842285, "learning_rate": 0.00019439562393137782, "loss": 11.6866, "step": 15372 }, { "epoch": 0.32179938039018674, "grad_norm": 0.2392311990261078, "learning_rate": 0.00019439490021799037, "loss": 11.6763, "step": 15373 }, { "epoch": 0.3218203131541489, "grad_norm": 0.3254365622997284, "learning_rate": 0.00019439417645922543, "loss": 11.6732, "step": 15374 }, { "epoch": 0.32184124591811103, "grad_norm": 0.3150182068347931, "learning_rate": 0.00019439345265508332, "loss": 11.6761, "step": 15375 }, { "epoch": 0.3218621786820732, "grad_norm": 0.2324720323085785, "learning_rate": 0.00019439272880556434, "loss": 11.6623, "step": 15376 }, { "epoch": 0.3218831114460353, "grad_norm": 0.2651022672653198, "learning_rate": 0.00019439200491066892, "loss": 11.6795, "step": 15377 }, { "epoch": 0.32190404420999746, "grad_norm": 0.28191208839416504, "learning_rate": 0.00019439128097039737, "loss": 11.6657, "step": 15378 }, { "epoch": 0.32192497697395966, "grad_norm": 0.21501824259757996, "learning_rate": 0.00019439055698475, "loss": 11.6717, "step": 15379 }, { "epoch": 0.3219459097379218, "grad_norm": 0.2992693781852722, "learning_rate": 0.00019438983295372726, "loss": 11.6735, "step": 15380 }, { "epoch": 0.32196684250188395, "grad_norm": 0.24965377151966095, "learning_rate": 0.0001943891088773294, "loss": 11.6756, "step": 15381 }, { "epoch": 0.3219877752658461, "grad_norm": 0.2579738199710846, "learning_rate": 0.0001943883847555568, "loss": 11.6628, "step": 15382 }, { "epoch": 0.32200870802980824, "grad_norm": 0.2827592194080353, "learning_rate": 0.00019438766058840981, "loss": 11.6734, "step": 15383 }, { "epoch": 0.3220296407937704, "grad_norm": 0.25359871983528137, "learning_rate": 0.00019438693637588878, "loss": 11.6735, "step": 15384 }, { "epoch": 0.3220505735577326, "grad_norm": 0.4095180332660675, "learning_rate": 0.00019438621211799407, "loss": 11.6841, "step": 15385 }, { "epoch": 0.32207150632169473, "grad_norm": 0.28893589973449707, "learning_rate": 0.00019438548781472602, "loss": 11.6724, "step": 15386 }, { "epoch": 0.3220924390856569, "grad_norm": 0.26723721623420715, "learning_rate": 0.00019438476346608495, "loss": 11.6755, "step": 15387 }, { "epoch": 0.322113371849619, "grad_norm": 0.2356197088956833, "learning_rate": 0.00019438403907207126, "loss": 11.6677, "step": 15388 }, { "epoch": 0.32213430461358117, "grad_norm": 0.27853262424468994, "learning_rate": 0.00019438331463268526, "loss": 11.6687, "step": 15389 }, { "epoch": 0.3221552373775433, "grad_norm": 0.21466681361198425, "learning_rate": 0.0001943825901479273, "loss": 11.6849, "step": 15390 }, { "epoch": 0.32217617014150546, "grad_norm": 0.31078633666038513, "learning_rate": 0.00019438186561779777, "loss": 11.6789, "step": 15391 }, { "epoch": 0.32219710290546766, "grad_norm": 0.27509963512420654, "learning_rate": 0.00019438114104229696, "loss": 11.6802, "step": 15392 }, { "epoch": 0.3222180356694298, "grad_norm": 0.3565348982810974, "learning_rate": 0.00019438041642142525, "loss": 11.6618, "step": 15393 }, { "epoch": 0.32223896843339195, "grad_norm": 0.25118714570999146, "learning_rate": 0.00019437969175518295, "loss": 11.6629, "step": 15394 }, { "epoch": 0.3222599011973541, "grad_norm": 0.2591386139392853, "learning_rate": 0.0001943789670435705, "loss": 11.6939, "step": 15395 }, { "epoch": 0.32228083396131624, "grad_norm": 0.2188371866941452, "learning_rate": 0.00019437824228658816, "loss": 11.6854, "step": 15396 }, { "epoch": 0.3223017667252784, "grad_norm": 0.2878488600254059, "learning_rate": 0.0001943775174842363, "loss": 11.6732, "step": 15397 }, { "epoch": 0.3223226994892406, "grad_norm": 0.23676562309265137, "learning_rate": 0.0001943767926365153, "loss": 11.6676, "step": 15398 }, { "epoch": 0.3223436322532027, "grad_norm": 0.3054056167602539, "learning_rate": 0.00019437606774342544, "loss": 11.676, "step": 15399 }, { "epoch": 0.3223645650171649, "grad_norm": 0.24836242198944092, "learning_rate": 0.00019437534280496713, "loss": 11.6792, "step": 15400 }, { "epoch": 0.322385497781127, "grad_norm": 0.2857871949672699, "learning_rate": 0.00019437461782114072, "loss": 11.6792, "step": 15401 }, { "epoch": 0.32240643054508916, "grad_norm": 0.2785317897796631, "learning_rate": 0.00019437389279194654, "loss": 11.6647, "step": 15402 }, { "epoch": 0.3224273633090513, "grad_norm": 0.3077442944049835, "learning_rate": 0.00019437316771738492, "loss": 11.6763, "step": 15403 }, { "epoch": 0.3224482960730135, "grad_norm": 0.3523654639720917, "learning_rate": 0.00019437244259745625, "loss": 11.6811, "step": 15404 }, { "epoch": 0.32246922883697565, "grad_norm": 0.3363371193408966, "learning_rate": 0.00019437171743216084, "loss": 11.6721, "step": 15405 }, { "epoch": 0.3224901616009378, "grad_norm": 0.27205872535705566, "learning_rate": 0.00019437099222149903, "loss": 11.6761, "step": 15406 }, { "epoch": 0.32251109436489994, "grad_norm": 0.2594752609729767, "learning_rate": 0.00019437026696547124, "loss": 11.6686, "step": 15407 }, { "epoch": 0.3225320271288621, "grad_norm": 0.23721732199192047, "learning_rate": 0.00019436954166407775, "loss": 11.6692, "step": 15408 }, { "epoch": 0.32255295989282423, "grad_norm": 0.22204725444316864, "learning_rate": 0.00019436881631731894, "loss": 11.6741, "step": 15409 }, { "epoch": 0.3225738926567864, "grad_norm": 0.21418800950050354, "learning_rate": 0.00019436809092519513, "loss": 11.6872, "step": 15410 }, { "epoch": 0.3225948254207486, "grad_norm": 0.3048327565193176, "learning_rate": 0.0001943673654877067, "loss": 11.6752, "step": 15411 }, { "epoch": 0.3226157581847107, "grad_norm": 0.2481425404548645, "learning_rate": 0.00019436664000485398, "loss": 11.6438, "step": 15412 }, { "epoch": 0.32263669094867287, "grad_norm": 0.24773362278938293, "learning_rate": 0.00019436591447663735, "loss": 11.6829, "step": 15413 }, { "epoch": 0.322657623712635, "grad_norm": 0.28099963068962097, "learning_rate": 0.0001943651889030571, "loss": 11.6679, "step": 15414 }, { "epoch": 0.32267855647659716, "grad_norm": 0.25097522139549255, "learning_rate": 0.00019436446328411364, "loss": 11.6571, "step": 15415 }, { "epoch": 0.3226994892405593, "grad_norm": 0.24616935849189758, "learning_rate": 0.00019436373761980727, "loss": 11.6675, "step": 15416 }, { "epoch": 0.3227204220045215, "grad_norm": 0.21518494188785553, "learning_rate": 0.00019436301191013837, "loss": 11.667, "step": 15417 }, { "epoch": 0.32274135476848365, "grad_norm": 0.39363616704940796, "learning_rate": 0.00019436228615510727, "loss": 11.6805, "step": 15418 }, { "epoch": 0.3227622875324458, "grad_norm": 0.2695038318634033, "learning_rate": 0.00019436156035471434, "loss": 11.6922, "step": 15419 }, { "epoch": 0.32278322029640794, "grad_norm": 0.30071020126342773, "learning_rate": 0.0001943608345089599, "loss": 11.683, "step": 15420 }, { "epoch": 0.3228041530603701, "grad_norm": 0.24839237332344055, "learning_rate": 0.00019436010861784432, "loss": 11.6658, "step": 15421 }, { "epoch": 0.3228250858243322, "grad_norm": 0.29768767952919006, "learning_rate": 0.00019435938268136794, "loss": 11.6791, "step": 15422 }, { "epoch": 0.3228460185882944, "grad_norm": 0.33917707204818726, "learning_rate": 0.00019435865669953115, "loss": 11.6678, "step": 15423 }, { "epoch": 0.32286695135225657, "grad_norm": 0.22656165063381195, "learning_rate": 0.00019435793067233424, "loss": 11.6778, "step": 15424 }, { "epoch": 0.3228878841162187, "grad_norm": 0.29004114866256714, "learning_rate": 0.00019435720459977756, "loss": 11.6579, "step": 15425 }, { "epoch": 0.32290881688018086, "grad_norm": 0.27163970470428467, "learning_rate": 0.00019435647848186152, "loss": 11.6764, "step": 15426 }, { "epoch": 0.322929749644143, "grad_norm": 0.4446371793746948, "learning_rate": 0.0001943557523185864, "loss": 11.6596, "step": 15427 }, { "epoch": 0.32295068240810515, "grad_norm": 0.31760066747665405, "learning_rate": 0.00019435502610995258, "loss": 11.6864, "step": 15428 }, { "epoch": 0.3229716151720673, "grad_norm": 0.3141096532344818, "learning_rate": 0.00019435429985596042, "loss": 11.6589, "step": 15429 }, { "epoch": 0.3229925479360295, "grad_norm": 0.29394254088401794, "learning_rate": 0.00019435357355661025, "loss": 11.6814, "step": 15430 }, { "epoch": 0.32301348069999164, "grad_norm": 0.3198721706867218, "learning_rate": 0.00019435284721190245, "loss": 11.6678, "step": 15431 }, { "epoch": 0.3230344134639538, "grad_norm": 0.3188283443450928, "learning_rate": 0.00019435212082183733, "loss": 11.6639, "step": 15432 }, { "epoch": 0.32305534622791593, "grad_norm": 0.28411662578582764, "learning_rate": 0.00019435139438641526, "loss": 11.6849, "step": 15433 }, { "epoch": 0.3230762789918781, "grad_norm": 0.25803911685943604, "learning_rate": 0.00019435066790563657, "loss": 11.6702, "step": 15434 }, { "epoch": 0.3230972117558402, "grad_norm": 0.2814268469810486, "learning_rate": 0.00019434994137950163, "loss": 11.6701, "step": 15435 }, { "epoch": 0.3231181445198024, "grad_norm": 0.29663681983947754, "learning_rate": 0.00019434921480801075, "loss": 11.6742, "step": 15436 }, { "epoch": 0.32313907728376456, "grad_norm": 0.2759396731853485, "learning_rate": 0.00019434848819116436, "loss": 11.6851, "step": 15437 }, { "epoch": 0.3231600100477267, "grad_norm": 0.28386545181274414, "learning_rate": 0.00019434776152896273, "loss": 11.6845, "step": 15438 }, { "epoch": 0.32318094281168885, "grad_norm": 0.2830076813697815, "learning_rate": 0.00019434703482140627, "loss": 11.6797, "step": 15439 }, { "epoch": 0.323201875575651, "grad_norm": 0.3016425371170044, "learning_rate": 0.00019434630806849527, "loss": 11.6848, "step": 15440 }, { "epoch": 0.32322280833961314, "grad_norm": 0.30359894037246704, "learning_rate": 0.00019434558127023012, "loss": 11.6827, "step": 15441 }, { "epoch": 0.3232437411035753, "grad_norm": 0.23681922256946564, "learning_rate": 0.00019434485442661116, "loss": 11.6732, "step": 15442 }, { "epoch": 0.3232646738675375, "grad_norm": 0.26679712533950806, "learning_rate": 0.00019434412753763876, "loss": 11.665, "step": 15443 }, { "epoch": 0.32328560663149963, "grad_norm": 0.28109604120254517, "learning_rate": 0.00019434340060331322, "loss": 11.6788, "step": 15444 }, { "epoch": 0.3233065393954618, "grad_norm": 0.24689838290214539, "learning_rate": 0.0001943426736236349, "loss": 11.681, "step": 15445 }, { "epoch": 0.3233274721594239, "grad_norm": 0.23948970437049866, "learning_rate": 0.0001943419465986042, "loss": 11.6689, "step": 15446 }, { "epoch": 0.32334840492338607, "grad_norm": 0.2916176915168762, "learning_rate": 0.00019434121952822145, "loss": 11.6824, "step": 15447 }, { "epoch": 0.3233693376873482, "grad_norm": 0.3559313416481018, "learning_rate": 0.00019434049241248694, "loss": 11.6758, "step": 15448 }, { "epoch": 0.3233902704513104, "grad_norm": 0.2697136104106903, "learning_rate": 0.00019433976525140106, "loss": 11.6605, "step": 15449 }, { "epoch": 0.32341120321527256, "grad_norm": 0.3825431764125824, "learning_rate": 0.0001943390380449642, "loss": 11.6793, "step": 15450 }, { "epoch": 0.3234321359792347, "grad_norm": 0.27239990234375, "learning_rate": 0.00019433831079317666, "loss": 11.6911, "step": 15451 }, { "epoch": 0.32345306874319685, "grad_norm": 0.27975496649742126, "learning_rate": 0.0001943375834960388, "loss": 11.684, "step": 15452 }, { "epoch": 0.323474001507159, "grad_norm": 0.24619604647159576, "learning_rate": 0.000194336856153551, "loss": 11.6705, "step": 15453 }, { "epoch": 0.32349493427112114, "grad_norm": 0.2637394368648529, "learning_rate": 0.00019433612876571354, "loss": 11.6814, "step": 15454 }, { "epoch": 0.32351586703508334, "grad_norm": 0.28865671157836914, "learning_rate": 0.00019433540133252682, "loss": 11.6732, "step": 15455 }, { "epoch": 0.3235367997990455, "grad_norm": 0.2822772264480591, "learning_rate": 0.00019433467385399122, "loss": 11.6607, "step": 15456 }, { "epoch": 0.3235577325630076, "grad_norm": 0.2498978227376938, "learning_rate": 0.00019433394633010703, "loss": 11.6724, "step": 15457 }, { "epoch": 0.32357866532696977, "grad_norm": 0.2756820321083069, "learning_rate": 0.00019433321876087463, "loss": 11.6699, "step": 15458 }, { "epoch": 0.3235995980909319, "grad_norm": 0.2926572859287262, "learning_rate": 0.00019433249114629434, "loss": 11.6623, "step": 15459 }, { "epoch": 0.32362053085489406, "grad_norm": 0.2673548460006714, "learning_rate": 0.00019433176348636653, "loss": 11.659, "step": 15460 }, { "epoch": 0.3236414636188562, "grad_norm": 0.2870110869407654, "learning_rate": 0.00019433103578109158, "loss": 11.6603, "step": 15461 }, { "epoch": 0.3236623963828184, "grad_norm": 0.3713989853858948, "learning_rate": 0.00019433030803046977, "loss": 11.6736, "step": 15462 }, { "epoch": 0.32368332914678055, "grad_norm": 0.30032333731651306, "learning_rate": 0.00019432958023450153, "loss": 11.6658, "step": 15463 }, { "epoch": 0.3237042619107427, "grad_norm": 0.3005804121494293, "learning_rate": 0.00019432885239318714, "loss": 11.6988, "step": 15464 }, { "epoch": 0.32372519467470484, "grad_norm": 0.312602698802948, "learning_rate": 0.000194328124506527, "loss": 11.6967, "step": 15465 }, { "epoch": 0.323746127438667, "grad_norm": 0.2743452489376068, "learning_rate": 0.00019432739657452143, "loss": 11.6782, "step": 15466 }, { "epoch": 0.32376706020262913, "grad_norm": 0.2981812655925751, "learning_rate": 0.0001943266685971708, "loss": 11.668, "step": 15467 }, { "epoch": 0.32378799296659133, "grad_norm": 0.3849439024925232, "learning_rate": 0.00019432594057447545, "loss": 11.6903, "step": 15468 }, { "epoch": 0.3238089257305535, "grad_norm": 0.2949821949005127, "learning_rate": 0.00019432521250643573, "loss": 11.6792, "step": 15469 }, { "epoch": 0.3238298584945156, "grad_norm": 0.3246372938156128, "learning_rate": 0.000194324484393052, "loss": 11.66, "step": 15470 }, { "epoch": 0.32385079125847777, "grad_norm": 0.3182958960533142, "learning_rate": 0.00019432375623432458, "loss": 11.6778, "step": 15471 }, { "epoch": 0.3238717240224399, "grad_norm": 0.27940765023231506, "learning_rate": 0.0001943230280302538, "loss": 11.6662, "step": 15472 }, { "epoch": 0.32389265678640206, "grad_norm": 0.3490777909755707, "learning_rate": 0.00019432229978084013, "loss": 11.6806, "step": 15473 }, { "epoch": 0.32391358955036426, "grad_norm": 0.25449275970458984, "learning_rate": 0.00019432157148608378, "loss": 11.6692, "step": 15474 }, { "epoch": 0.3239345223143264, "grad_norm": 0.2950269281864166, "learning_rate": 0.0001943208431459852, "loss": 11.6716, "step": 15475 }, { "epoch": 0.32395545507828855, "grad_norm": 0.25313082337379456, "learning_rate": 0.00019432011476054468, "loss": 11.6722, "step": 15476 }, { "epoch": 0.3239763878422507, "grad_norm": 0.259310781955719, "learning_rate": 0.0001943193863297626, "loss": 11.666, "step": 15477 }, { "epoch": 0.32399732060621284, "grad_norm": 0.26533886790275574, "learning_rate": 0.00019431865785363929, "loss": 11.6585, "step": 15478 }, { "epoch": 0.324018253370175, "grad_norm": 0.28955966234207153, "learning_rate": 0.00019431792933217513, "loss": 11.6797, "step": 15479 }, { "epoch": 0.3240391861341371, "grad_norm": 0.266753613948822, "learning_rate": 0.00019431720076537042, "loss": 11.6648, "step": 15480 }, { "epoch": 0.3240601188980993, "grad_norm": 0.2874961197376251, "learning_rate": 0.00019431647215322558, "loss": 11.6699, "step": 15481 }, { "epoch": 0.32408105166206147, "grad_norm": 0.2994866669178009, "learning_rate": 0.0001943157434957409, "loss": 11.6707, "step": 15482 }, { "epoch": 0.3241019844260236, "grad_norm": 0.3230719566345215, "learning_rate": 0.00019431501479291673, "loss": 11.6654, "step": 15483 }, { "epoch": 0.32412291718998576, "grad_norm": 0.21220722794532776, "learning_rate": 0.0001943142860447535, "loss": 11.6625, "step": 15484 }, { "epoch": 0.3241438499539479, "grad_norm": 0.24477699398994446, "learning_rate": 0.00019431355725125146, "loss": 11.6736, "step": 15485 }, { "epoch": 0.32416478271791005, "grad_norm": 0.3388870656490326, "learning_rate": 0.000194312828412411, "loss": 11.6831, "step": 15486 }, { "epoch": 0.32418571548187225, "grad_norm": 0.31135326623916626, "learning_rate": 0.0001943120995282325, "loss": 11.6674, "step": 15487 }, { "epoch": 0.3242066482458344, "grad_norm": 0.3022449016571045, "learning_rate": 0.00019431137059871626, "loss": 11.6756, "step": 15488 }, { "epoch": 0.32422758100979654, "grad_norm": 0.2595624625682831, "learning_rate": 0.00019431064162386266, "loss": 11.6622, "step": 15489 }, { "epoch": 0.3242485137737587, "grad_norm": 0.3310447037220001, "learning_rate": 0.00019430991260367203, "loss": 11.6754, "step": 15490 }, { "epoch": 0.32426944653772083, "grad_norm": 0.29538658261299133, "learning_rate": 0.0001943091835381448, "loss": 11.6679, "step": 15491 }, { "epoch": 0.324290379301683, "grad_norm": 0.2869834303855896, "learning_rate": 0.00019430845442728117, "loss": 11.6575, "step": 15492 }, { "epoch": 0.3243113120656452, "grad_norm": 0.3414922058582306, "learning_rate": 0.00019430772527108164, "loss": 11.6893, "step": 15493 }, { "epoch": 0.3243322448296073, "grad_norm": 0.29369091987609863, "learning_rate": 0.00019430699606954644, "loss": 11.6855, "step": 15494 }, { "epoch": 0.32435317759356946, "grad_norm": 0.33104848861694336, "learning_rate": 0.00019430626682267603, "loss": 11.6688, "step": 15495 }, { "epoch": 0.3243741103575316, "grad_norm": 0.27961260080337524, "learning_rate": 0.00019430553753047066, "loss": 11.6717, "step": 15496 }, { "epoch": 0.32439504312149375, "grad_norm": 0.23905019462108612, "learning_rate": 0.0001943048081929308, "loss": 11.6813, "step": 15497 }, { "epoch": 0.3244159758854559, "grad_norm": 0.3447815775871277, "learning_rate": 0.00019430407881005667, "loss": 11.6645, "step": 15498 }, { "epoch": 0.32443690864941804, "grad_norm": 0.28707239031791687, "learning_rate": 0.00019430334938184867, "loss": 11.6738, "step": 15499 }, { "epoch": 0.32445784141338024, "grad_norm": 0.27892768383026123, "learning_rate": 0.0001943026199083072, "loss": 11.671, "step": 15500 }, { "epoch": 0.3244787741773424, "grad_norm": 0.2751528322696686, "learning_rate": 0.00019430189038943256, "loss": 11.666, "step": 15501 }, { "epoch": 0.32449970694130453, "grad_norm": 0.27298811078071594, "learning_rate": 0.00019430116082522512, "loss": 11.6825, "step": 15502 }, { "epoch": 0.3245206397052667, "grad_norm": 0.29437026381492615, "learning_rate": 0.00019430043121568518, "loss": 11.6677, "step": 15503 }, { "epoch": 0.3245415724692288, "grad_norm": 0.24599450826644897, "learning_rate": 0.00019429970156081315, "loss": 11.6773, "step": 15504 }, { "epoch": 0.32456250523319097, "grad_norm": 0.2852638065814972, "learning_rate": 0.00019429897186060938, "loss": 11.6675, "step": 15505 }, { "epoch": 0.32458343799715317, "grad_norm": 0.25730374455451965, "learning_rate": 0.00019429824211507422, "loss": 11.6745, "step": 15506 }, { "epoch": 0.3246043707611153, "grad_norm": 0.23887740075588226, "learning_rate": 0.00019429751232420797, "loss": 11.6504, "step": 15507 }, { "epoch": 0.32462530352507746, "grad_norm": 0.34663698077201843, "learning_rate": 0.00019429678248801104, "loss": 11.6724, "step": 15508 }, { "epoch": 0.3246462362890396, "grad_norm": 0.2670632302761078, "learning_rate": 0.00019429605260648373, "loss": 11.6626, "step": 15509 }, { "epoch": 0.32466716905300175, "grad_norm": 0.28946998715400696, "learning_rate": 0.00019429532267962644, "loss": 11.6753, "step": 15510 }, { "epoch": 0.3246881018169639, "grad_norm": 0.2443520724773407, "learning_rate": 0.0001942945927074395, "loss": 11.6741, "step": 15511 }, { "epoch": 0.3247090345809261, "grad_norm": 0.29282158613204956, "learning_rate": 0.00019429386268992325, "loss": 11.6752, "step": 15512 }, { "epoch": 0.32472996734488824, "grad_norm": 0.2832334041595459, "learning_rate": 0.00019429313262707805, "loss": 11.6738, "step": 15513 }, { "epoch": 0.3247509001088504, "grad_norm": 0.4530763328075409, "learning_rate": 0.00019429240251890426, "loss": 11.6818, "step": 15514 }, { "epoch": 0.3247718328728125, "grad_norm": 0.18920916318893433, "learning_rate": 0.0001942916723654022, "loss": 11.6616, "step": 15515 }, { "epoch": 0.32479276563677467, "grad_norm": 0.30447831749916077, "learning_rate": 0.00019429094216657226, "loss": 11.6761, "step": 15516 }, { "epoch": 0.3248136984007368, "grad_norm": 0.2673070728778839, "learning_rate": 0.00019429021192241477, "loss": 11.665, "step": 15517 }, { "epoch": 0.32483463116469896, "grad_norm": 0.24831467866897583, "learning_rate": 0.0001942894816329301, "loss": 11.6582, "step": 15518 }, { "epoch": 0.32485556392866116, "grad_norm": 0.2683151066303253, "learning_rate": 0.0001942887512981186, "loss": 11.6585, "step": 15519 }, { "epoch": 0.3248764966926233, "grad_norm": 0.28667306900024414, "learning_rate": 0.00019428802091798057, "loss": 11.6823, "step": 15520 }, { "epoch": 0.32489742945658545, "grad_norm": 0.34594637155532837, "learning_rate": 0.00019428729049251643, "loss": 11.6939, "step": 15521 }, { "epoch": 0.3249183622205476, "grad_norm": 0.22782999277114868, "learning_rate": 0.00019428656002172648, "loss": 11.6775, "step": 15522 }, { "epoch": 0.32493929498450974, "grad_norm": 0.33388039469718933, "learning_rate": 0.00019428582950561108, "loss": 11.6821, "step": 15523 }, { "epoch": 0.3249602277484719, "grad_norm": 0.3201602101325989, "learning_rate": 0.0001942850989441706, "loss": 11.6671, "step": 15524 }, { "epoch": 0.3249811605124341, "grad_norm": 0.2744583785533905, "learning_rate": 0.0001942843683374054, "loss": 11.6694, "step": 15525 }, { "epoch": 0.32500209327639623, "grad_norm": 0.3114674687385559, "learning_rate": 0.00019428363768531578, "loss": 11.679, "step": 15526 }, { "epoch": 0.3250230260403584, "grad_norm": 0.2817896604537964, "learning_rate": 0.00019428290698790215, "loss": 11.6884, "step": 15527 }, { "epoch": 0.3250439588043205, "grad_norm": 0.3710344135761261, "learning_rate": 0.00019428217624516486, "loss": 11.6946, "step": 15528 }, { "epoch": 0.32506489156828267, "grad_norm": 0.2912500202655792, "learning_rate": 0.0001942814454571042, "loss": 11.6723, "step": 15529 }, { "epoch": 0.3250858243322448, "grad_norm": 0.2530078589916229, "learning_rate": 0.00019428071462372057, "loss": 11.6741, "step": 15530 }, { "epoch": 0.325106757096207, "grad_norm": 0.3295880854129791, "learning_rate": 0.00019427998374501432, "loss": 11.6583, "step": 15531 }, { "epoch": 0.32512768986016916, "grad_norm": 0.33926165103912354, "learning_rate": 0.0001942792528209858, "loss": 11.6817, "step": 15532 }, { "epoch": 0.3251486226241313, "grad_norm": 0.2579435706138611, "learning_rate": 0.00019427852185163532, "loss": 11.6818, "step": 15533 }, { "epoch": 0.32516955538809345, "grad_norm": 0.27126389741897583, "learning_rate": 0.0001942777908369633, "loss": 11.6752, "step": 15534 }, { "epoch": 0.3251904881520556, "grad_norm": 0.23759526014328003, "learning_rate": 0.00019427705977697005, "loss": 11.674, "step": 15535 }, { "epoch": 0.32521142091601773, "grad_norm": 0.29344552755355835, "learning_rate": 0.00019427632867165593, "loss": 11.6744, "step": 15536 }, { "epoch": 0.3252323536799799, "grad_norm": 0.2826080024242401, "learning_rate": 0.00019427559752102126, "loss": 11.6823, "step": 15537 }, { "epoch": 0.3252532864439421, "grad_norm": 0.3109343647956848, "learning_rate": 0.00019427486632506646, "loss": 11.6786, "step": 15538 }, { "epoch": 0.3252742192079042, "grad_norm": 0.29261359572410583, "learning_rate": 0.0001942741350837918, "loss": 11.6726, "step": 15539 }, { "epoch": 0.32529515197186637, "grad_norm": 0.2721630334854126, "learning_rate": 0.00019427340379719773, "loss": 11.6692, "step": 15540 }, { "epoch": 0.3253160847358285, "grad_norm": 0.3692622780799866, "learning_rate": 0.00019427267246528452, "loss": 11.6666, "step": 15541 }, { "epoch": 0.32533701749979066, "grad_norm": 0.26571932435035706, "learning_rate": 0.00019427194108805252, "loss": 11.6763, "step": 15542 }, { "epoch": 0.3253579502637528, "grad_norm": 0.29173681139945984, "learning_rate": 0.00019427120966550215, "loss": 11.6897, "step": 15543 }, { "epoch": 0.325378883027715, "grad_norm": 0.24092696607112885, "learning_rate": 0.00019427047819763372, "loss": 11.6853, "step": 15544 }, { "epoch": 0.32539981579167715, "grad_norm": 0.24880929291248322, "learning_rate": 0.00019426974668444754, "loss": 11.6763, "step": 15545 }, { "epoch": 0.3254207485556393, "grad_norm": 0.2731962502002716, "learning_rate": 0.00019426901512594401, "loss": 11.6714, "step": 15546 }, { "epoch": 0.32544168131960144, "grad_norm": 0.24762354791164398, "learning_rate": 0.0001942682835221235, "loss": 11.6796, "step": 15547 }, { "epoch": 0.3254626140835636, "grad_norm": 0.27065309882164, "learning_rate": 0.00019426755187298633, "loss": 11.6725, "step": 15548 }, { "epoch": 0.32548354684752573, "grad_norm": 0.44987058639526367, "learning_rate": 0.00019426682017853285, "loss": 11.6752, "step": 15549 }, { "epoch": 0.3255044796114879, "grad_norm": 0.3629443347454071, "learning_rate": 0.00019426608843876343, "loss": 11.6786, "step": 15550 }, { "epoch": 0.3255254123754501, "grad_norm": 0.25316378474235535, "learning_rate": 0.00019426535665367843, "loss": 11.6582, "step": 15551 }, { "epoch": 0.3255463451394122, "grad_norm": 0.2364221066236496, "learning_rate": 0.00019426462482327814, "loss": 11.664, "step": 15552 }, { "epoch": 0.32556727790337436, "grad_norm": 0.28162604570388794, "learning_rate": 0.000194263892947563, "loss": 11.6804, "step": 15553 }, { "epoch": 0.3255882106673365, "grad_norm": 0.2783329486846924, "learning_rate": 0.0001942631610265333, "loss": 11.685, "step": 15554 }, { "epoch": 0.32560914343129865, "grad_norm": 0.3166045844554901, "learning_rate": 0.0001942624290601894, "loss": 11.6785, "step": 15555 }, { "epoch": 0.3256300761952608, "grad_norm": 0.3187616169452667, "learning_rate": 0.00019426169704853168, "loss": 11.6677, "step": 15556 }, { "epoch": 0.325651008959223, "grad_norm": 0.2616272568702698, "learning_rate": 0.00019426096499156046, "loss": 11.6836, "step": 15557 }, { "epoch": 0.32567194172318514, "grad_norm": 0.32758259773254395, "learning_rate": 0.00019426023288927611, "loss": 11.6728, "step": 15558 }, { "epoch": 0.3256928744871473, "grad_norm": 0.24426066875457764, "learning_rate": 0.00019425950074167895, "loss": 11.6764, "step": 15559 }, { "epoch": 0.32571380725110943, "grad_norm": 0.30697932839393616, "learning_rate": 0.00019425876854876938, "loss": 11.6683, "step": 15560 }, { "epoch": 0.3257347400150716, "grad_norm": 0.29164960980415344, "learning_rate": 0.00019425803631054772, "loss": 11.6712, "step": 15561 }, { "epoch": 0.3257556727790337, "grad_norm": 0.2151847779750824, "learning_rate": 0.00019425730402701437, "loss": 11.6668, "step": 15562 }, { "epoch": 0.3257766055429959, "grad_norm": 0.2579726278781891, "learning_rate": 0.00019425657169816962, "loss": 11.6717, "step": 15563 }, { "epoch": 0.32579753830695807, "grad_norm": 0.2433432638645172, "learning_rate": 0.00019425583932401386, "loss": 11.6862, "step": 15564 }, { "epoch": 0.3258184710709202, "grad_norm": 0.26163801550865173, "learning_rate": 0.00019425510690454742, "loss": 11.6711, "step": 15565 }, { "epoch": 0.32583940383488236, "grad_norm": 0.24867942929267883, "learning_rate": 0.00019425437443977063, "loss": 11.6796, "step": 15566 }, { "epoch": 0.3258603365988445, "grad_norm": 0.29429998993873596, "learning_rate": 0.0001942536419296839, "loss": 11.6682, "step": 15567 }, { "epoch": 0.32588126936280665, "grad_norm": 0.27749279141426086, "learning_rate": 0.00019425290937428758, "loss": 11.6888, "step": 15568 }, { "epoch": 0.3259022021267688, "grad_norm": 0.26090964674949646, "learning_rate": 0.00019425217677358197, "loss": 11.6686, "step": 15569 }, { "epoch": 0.325923134890731, "grad_norm": 0.23270946741104126, "learning_rate": 0.00019425144412756745, "loss": 11.667, "step": 15570 }, { "epoch": 0.32594406765469314, "grad_norm": 0.34418362379074097, "learning_rate": 0.0001942507114362444, "loss": 11.6736, "step": 15571 }, { "epoch": 0.3259650004186553, "grad_norm": 0.26248326897621155, "learning_rate": 0.00019424997869961312, "loss": 11.6657, "step": 15572 }, { "epoch": 0.3259859331826174, "grad_norm": 0.28999412059783936, "learning_rate": 0.00019424924591767396, "loss": 11.6645, "step": 15573 }, { "epoch": 0.32600686594657957, "grad_norm": 0.30687785148620605, "learning_rate": 0.00019424851309042732, "loss": 11.6813, "step": 15574 }, { "epoch": 0.3260277987105417, "grad_norm": 0.2512012720108032, "learning_rate": 0.00019424778021787358, "loss": 11.6518, "step": 15575 }, { "epoch": 0.3260487314745039, "grad_norm": 0.23792631924152374, "learning_rate": 0.00019424704730001299, "loss": 11.6764, "step": 15576 }, { "epoch": 0.32606966423846606, "grad_norm": 0.3163081109523773, "learning_rate": 0.00019424631433684594, "loss": 11.6708, "step": 15577 }, { "epoch": 0.3260905970024282, "grad_norm": 0.24080902338027954, "learning_rate": 0.00019424558132837282, "loss": 11.6858, "step": 15578 }, { "epoch": 0.32611152976639035, "grad_norm": 0.22845828533172607, "learning_rate": 0.000194244848274594, "loss": 11.6706, "step": 15579 }, { "epoch": 0.3261324625303525, "grad_norm": 0.33529627323150635, "learning_rate": 0.00019424411517550975, "loss": 11.7019, "step": 15580 }, { "epoch": 0.32615339529431464, "grad_norm": 0.30534109473228455, "learning_rate": 0.00019424338203112047, "loss": 11.6842, "step": 15581 }, { "epoch": 0.32617432805827684, "grad_norm": 0.23921427130699158, "learning_rate": 0.0001942426488414265, "loss": 11.6679, "step": 15582 }, { "epoch": 0.326195260822239, "grad_norm": 0.4464891254901886, "learning_rate": 0.0001942419156064282, "loss": 11.6958, "step": 15583 }, { "epoch": 0.32621619358620113, "grad_norm": 0.3750474750995636, "learning_rate": 0.00019424118232612596, "loss": 11.6892, "step": 15584 }, { "epoch": 0.3262371263501633, "grad_norm": 0.2898106873035431, "learning_rate": 0.00019424044900052007, "loss": 11.6713, "step": 15585 }, { "epoch": 0.3262580591141254, "grad_norm": 0.36047670245170593, "learning_rate": 0.0001942397156296109, "loss": 11.6612, "step": 15586 }, { "epoch": 0.32627899187808757, "grad_norm": 0.29239872097969055, "learning_rate": 0.0001942389822133988, "loss": 11.6656, "step": 15587 }, { "epoch": 0.3262999246420497, "grad_norm": 0.35747936367988586, "learning_rate": 0.00019423824875188417, "loss": 11.6851, "step": 15588 }, { "epoch": 0.3263208574060119, "grad_norm": 0.25795140862464905, "learning_rate": 0.00019423751524506728, "loss": 11.661, "step": 15589 }, { "epoch": 0.32634179016997406, "grad_norm": 0.25252699851989746, "learning_rate": 0.00019423678169294857, "loss": 11.6553, "step": 15590 }, { "epoch": 0.3263627229339362, "grad_norm": 0.2802503705024719, "learning_rate": 0.00019423604809552832, "loss": 11.6727, "step": 15591 }, { "epoch": 0.32638365569789834, "grad_norm": 0.26131686568260193, "learning_rate": 0.00019423531445280693, "loss": 11.6699, "step": 15592 }, { "epoch": 0.3264045884618605, "grad_norm": 0.2356129139661789, "learning_rate": 0.00019423458076478473, "loss": 11.6629, "step": 15593 }, { "epoch": 0.32642552122582263, "grad_norm": 0.350526362657547, "learning_rate": 0.00019423384703146207, "loss": 11.6762, "step": 15594 }, { "epoch": 0.32644645398978483, "grad_norm": 0.28837472200393677, "learning_rate": 0.00019423311325283933, "loss": 11.6788, "step": 15595 }, { "epoch": 0.326467386753747, "grad_norm": 0.30758845806121826, "learning_rate": 0.00019423237942891683, "loss": 11.6792, "step": 15596 }, { "epoch": 0.3264883195177091, "grad_norm": 0.29285457730293274, "learning_rate": 0.00019423164555969495, "loss": 11.6571, "step": 15597 }, { "epoch": 0.32650925228167127, "grad_norm": 0.30774959921836853, "learning_rate": 0.00019423091164517403, "loss": 11.663, "step": 15598 }, { "epoch": 0.3265301850456334, "grad_norm": 0.3483129143714905, "learning_rate": 0.00019423017768535437, "loss": 11.6485, "step": 15599 }, { "epoch": 0.32655111780959556, "grad_norm": 0.2892566919326782, "learning_rate": 0.00019422944368023646, "loss": 11.6562, "step": 15600 }, { "epoch": 0.32657205057355776, "grad_norm": 0.2546330690383911, "learning_rate": 0.00019422870962982052, "loss": 11.6729, "step": 15601 }, { "epoch": 0.3265929833375199, "grad_norm": 0.2400866448879242, "learning_rate": 0.00019422797553410694, "loss": 11.6785, "step": 15602 }, { "epoch": 0.32661391610148205, "grad_norm": 0.26524677872657776, "learning_rate": 0.00019422724139309613, "loss": 11.6731, "step": 15603 }, { "epoch": 0.3266348488654442, "grad_norm": 0.28949472308158875, "learning_rate": 0.00019422650720678838, "loss": 11.6736, "step": 15604 }, { "epoch": 0.32665578162940634, "grad_norm": 0.24520891904830933, "learning_rate": 0.00019422577297518401, "loss": 11.6686, "step": 15605 }, { "epoch": 0.3266767143933685, "grad_norm": 0.3288830816745758, "learning_rate": 0.00019422503869828347, "loss": 11.6907, "step": 15606 }, { "epoch": 0.32669764715733063, "grad_norm": 0.27067476511001587, "learning_rate": 0.00019422430437608705, "loss": 11.6863, "step": 15607 }, { "epoch": 0.32671857992129283, "grad_norm": 0.26141831278800964, "learning_rate": 0.00019422357000859513, "loss": 11.6663, "step": 15608 }, { "epoch": 0.326739512685255, "grad_norm": 0.2334112524986267, "learning_rate": 0.00019422283559580804, "loss": 11.6852, "step": 15609 }, { "epoch": 0.3267604454492171, "grad_norm": 0.2660447359085083, "learning_rate": 0.00019422210113772618, "loss": 11.6906, "step": 15610 }, { "epoch": 0.32678137821317926, "grad_norm": 0.3203671872615814, "learning_rate": 0.00019422136663434982, "loss": 11.6856, "step": 15611 }, { "epoch": 0.3268023109771414, "grad_norm": 0.29349520802497864, "learning_rate": 0.00019422063208567936, "loss": 11.6735, "step": 15612 }, { "epoch": 0.32682324374110355, "grad_norm": 0.2500372529029846, "learning_rate": 0.00019421989749171517, "loss": 11.6635, "step": 15613 }, { "epoch": 0.32684417650506575, "grad_norm": 0.36393481492996216, "learning_rate": 0.00019421916285245763, "loss": 11.6751, "step": 15614 }, { "epoch": 0.3268651092690279, "grad_norm": 0.2291146069765091, "learning_rate": 0.00019421842816790702, "loss": 11.6636, "step": 15615 }, { "epoch": 0.32688604203299004, "grad_norm": 0.29407814145088196, "learning_rate": 0.0001942176934380637, "loss": 11.6787, "step": 15616 }, { "epoch": 0.3269069747969522, "grad_norm": 0.29404863715171814, "learning_rate": 0.00019421695866292805, "loss": 11.6836, "step": 15617 }, { "epoch": 0.32692790756091433, "grad_norm": 0.2949507236480713, "learning_rate": 0.00019421622384250046, "loss": 11.6721, "step": 15618 }, { "epoch": 0.3269488403248765, "grad_norm": 0.27730217576026917, "learning_rate": 0.00019421548897678118, "loss": 11.6575, "step": 15619 }, { "epoch": 0.3269697730888387, "grad_norm": 0.5155344009399414, "learning_rate": 0.00019421475406577066, "loss": 11.6872, "step": 15620 }, { "epoch": 0.3269907058528008, "grad_norm": 0.24773162603378296, "learning_rate": 0.00019421401910946922, "loss": 11.6697, "step": 15621 }, { "epoch": 0.32701163861676297, "grad_norm": 0.299192875623703, "learning_rate": 0.0001942132841078772, "loss": 11.6968, "step": 15622 }, { "epoch": 0.3270325713807251, "grad_norm": 0.27153849601745605, "learning_rate": 0.000194212549060995, "loss": 11.6863, "step": 15623 }, { "epoch": 0.32705350414468726, "grad_norm": 0.2765059173107147, "learning_rate": 0.00019421181396882291, "loss": 11.687, "step": 15624 }, { "epoch": 0.3270744369086494, "grad_norm": 0.2424791157245636, "learning_rate": 0.00019421107883136132, "loss": 11.6795, "step": 15625 }, { "epoch": 0.32709536967261155, "grad_norm": 0.23470263183116913, "learning_rate": 0.0001942103436486106, "loss": 11.6751, "step": 15626 }, { "epoch": 0.32711630243657375, "grad_norm": 0.28740590810775757, "learning_rate": 0.00019420960842057104, "loss": 11.6597, "step": 15627 }, { "epoch": 0.3271372352005359, "grad_norm": 0.31773579120635986, "learning_rate": 0.00019420887314724306, "loss": 11.6784, "step": 15628 }, { "epoch": 0.32715816796449804, "grad_norm": 0.2222537100315094, "learning_rate": 0.00019420813782862698, "loss": 11.6773, "step": 15629 }, { "epoch": 0.3271791007284602, "grad_norm": 0.356436163187027, "learning_rate": 0.00019420740246472312, "loss": 11.6719, "step": 15630 }, { "epoch": 0.3272000334924223, "grad_norm": 0.3478642702102661, "learning_rate": 0.00019420666705553195, "loss": 11.6756, "step": 15631 }, { "epoch": 0.32722096625638447, "grad_norm": 0.24677327275276184, "learning_rate": 0.00019420593160105368, "loss": 11.6819, "step": 15632 }, { "epoch": 0.32724189902034667, "grad_norm": 0.29099199175834656, "learning_rate": 0.00019420519610128877, "loss": 11.6919, "step": 15633 }, { "epoch": 0.3272628317843088, "grad_norm": 0.3131590485572815, "learning_rate": 0.00019420446055623753, "loss": 11.6836, "step": 15634 }, { "epoch": 0.32728376454827096, "grad_norm": 0.42441877722740173, "learning_rate": 0.00019420372496590032, "loss": 11.6652, "step": 15635 }, { "epoch": 0.3273046973122331, "grad_norm": 0.31508979201316833, "learning_rate": 0.00019420298933027749, "loss": 11.6699, "step": 15636 }, { "epoch": 0.32732563007619525, "grad_norm": 0.2550795376300812, "learning_rate": 0.00019420225364936937, "loss": 11.6633, "step": 15637 }, { "epoch": 0.3273465628401574, "grad_norm": 0.30073082447052, "learning_rate": 0.00019420151792317634, "loss": 11.6727, "step": 15638 }, { "epoch": 0.32736749560411954, "grad_norm": 0.43325480818748474, "learning_rate": 0.0001942007821516988, "loss": 11.6813, "step": 15639 }, { "epoch": 0.32738842836808174, "grad_norm": 0.2919028699398041, "learning_rate": 0.00019420004633493703, "loss": 11.6727, "step": 15640 }, { "epoch": 0.3274093611320439, "grad_norm": 0.3101692199707031, "learning_rate": 0.0001941993104728914, "loss": 11.6553, "step": 15641 }, { "epoch": 0.32743029389600603, "grad_norm": 0.3094949722290039, "learning_rate": 0.00019419857456556228, "loss": 11.6713, "step": 15642 }, { "epoch": 0.3274512266599682, "grad_norm": 0.2981553077697754, "learning_rate": 0.00019419783861295004, "loss": 11.6672, "step": 15643 }, { "epoch": 0.3274721594239303, "grad_norm": 0.2649163603782654, "learning_rate": 0.00019419710261505495, "loss": 11.6765, "step": 15644 }, { "epoch": 0.32749309218789247, "grad_norm": 0.28925323486328125, "learning_rate": 0.00019419636657187748, "loss": 11.6553, "step": 15645 }, { "epoch": 0.32751402495185467, "grad_norm": 0.3285694718360901, "learning_rate": 0.00019419563048341792, "loss": 11.6761, "step": 15646 }, { "epoch": 0.3275349577158168, "grad_norm": 0.27769017219543457, "learning_rate": 0.0001941948943496766, "loss": 11.6817, "step": 15647 }, { "epoch": 0.32755589047977896, "grad_norm": 0.2787511646747589, "learning_rate": 0.00019419415817065395, "loss": 11.6828, "step": 15648 }, { "epoch": 0.3275768232437411, "grad_norm": 0.24349988996982574, "learning_rate": 0.00019419342194635023, "loss": 11.6679, "step": 15649 }, { "epoch": 0.32759775600770324, "grad_norm": 0.3073282539844513, "learning_rate": 0.0001941926856767659, "loss": 11.6803, "step": 15650 }, { "epoch": 0.3276186887716654, "grad_norm": 0.4449150860309601, "learning_rate": 0.00019419194936190124, "loss": 11.6804, "step": 15651 }, { "epoch": 0.3276396215356276, "grad_norm": 0.2638191282749176, "learning_rate": 0.0001941912130017566, "loss": 11.667, "step": 15652 }, { "epoch": 0.32766055429958973, "grad_norm": 0.25851306319236755, "learning_rate": 0.0001941904765963324, "loss": 11.6727, "step": 15653 }, { "epoch": 0.3276814870635519, "grad_norm": 0.3048706650733948, "learning_rate": 0.00019418974014562892, "loss": 11.6589, "step": 15654 }, { "epoch": 0.327702419827514, "grad_norm": 0.27928125858306885, "learning_rate": 0.00019418900364964655, "loss": 11.6805, "step": 15655 }, { "epoch": 0.32772335259147617, "grad_norm": 0.3148435950279236, "learning_rate": 0.00019418826710838563, "loss": 11.6723, "step": 15656 }, { "epoch": 0.3277442853554383, "grad_norm": 0.3552516996860504, "learning_rate": 0.00019418753052184652, "loss": 11.6796, "step": 15657 }, { "epoch": 0.32776521811940046, "grad_norm": 0.26493579149246216, "learning_rate": 0.0001941867938900296, "loss": 11.665, "step": 15658 }, { "epoch": 0.32778615088336266, "grad_norm": 0.3597473204135895, "learning_rate": 0.00019418605721293515, "loss": 11.6972, "step": 15659 }, { "epoch": 0.3278070836473248, "grad_norm": 0.3138962984085083, "learning_rate": 0.00019418532049056362, "loss": 11.6813, "step": 15660 }, { "epoch": 0.32782801641128695, "grad_norm": 0.25754716992378235, "learning_rate": 0.00019418458372291532, "loss": 11.6732, "step": 15661 }, { "epoch": 0.3278489491752491, "grad_norm": 0.3262658417224884, "learning_rate": 0.00019418384690999058, "loss": 11.6631, "step": 15662 }, { "epoch": 0.32786988193921124, "grad_norm": 0.3019630014896393, "learning_rate": 0.00019418311005178982, "loss": 11.6732, "step": 15663 }, { "epoch": 0.3278908147031734, "grad_norm": 0.2994644045829773, "learning_rate": 0.00019418237314831332, "loss": 11.682, "step": 15664 }, { "epoch": 0.3279117474671356, "grad_norm": 0.2708653509616852, "learning_rate": 0.00019418163619956146, "loss": 11.6514, "step": 15665 }, { "epoch": 0.32793268023109773, "grad_norm": 0.27253541350364685, "learning_rate": 0.0001941808992055346, "loss": 11.6631, "step": 15666 }, { "epoch": 0.3279536129950599, "grad_norm": 0.2969311773777008, "learning_rate": 0.0001941801621662331, "loss": 11.6755, "step": 15667 }, { "epoch": 0.327974545759022, "grad_norm": 0.23517602682113647, "learning_rate": 0.00019417942508165732, "loss": 11.6705, "step": 15668 }, { "epoch": 0.32799547852298416, "grad_norm": 0.2960970997810364, "learning_rate": 0.0001941786879518076, "loss": 11.6645, "step": 15669 }, { "epoch": 0.3280164112869463, "grad_norm": 0.250908762216568, "learning_rate": 0.0001941779507766843, "loss": 11.6833, "step": 15670 }, { "epoch": 0.3280373440509085, "grad_norm": 0.26573750376701355, "learning_rate": 0.00019417721355628772, "loss": 11.6657, "step": 15671 }, { "epoch": 0.32805827681487065, "grad_norm": 0.5130938291549683, "learning_rate": 0.00019417647629061833, "loss": 11.6724, "step": 15672 }, { "epoch": 0.3280792095788328, "grad_norm": 0.27942609786987305, "learning_rate": 0.0001941757389796764, "loss": 11.6538, "step": 15673 }, { "epoch": 0.32810014234279494, "grad_norm": 0.2722499370574951, "learning_rate": 0.0001941750016234623, "loss": 11.662, "step": 15674 }, { "epoch": 0.3281210751067571, "grad_norm": 0.3097854256629944, "learning_rate": 0.00019417426422197639, "loss": 11.6762, "step": 15675 }, { "epoch": 0.32814200787071923, "grad_norm": 0.24269802868366241, "learning_rate": 0.00019417352677521903, "loss": 11.6816, "step": 15676 }, { "epoch": 0.3281629406346814, "grad_norm": 0.36265942454338074, "learning_rate": 0.00019417278928319056, "loss": 11.6808, "step": 15677 }, { "epoch": 0.3281838733986436, "grad_norm": 0.2807983160018921, "learning_rate": 0.00019417205174589137, "loss": 11.6787, "step": 15678 }, { "epoch": 0.3282048061626057, "grad_norm": 0.25885671377182007, "learning_rate": 0.00019417131416332178, "loss": 11.6703, "step": 15679 }, { "epoch": 0.32822573892656787, "grad_norm": 0.28341588377952576, "learning_rate": 0.0001941705765354821, "loss": 11.6686, "step": 15680 }, { "epoch": 0.32824667169053, "grad_norm": 0.2900877594947815, "learning_rate": 0.0001941698388623728, "loss": 11.6867, "step": 15681 }, { "epoch": 0.32826760445449216, "grad_norm": 0.2823481857776642, "learning_rate": 0.00019416910114399414, "loss": 11.6758, "step": 15682 }, { "epoch": 0.3282885372184543, "grad_norm": 0.2815944254398346, "learning_rate": 0.00019416836338034653, "loss": 11.6637, "step": 15683 }, { "epoch": 0.3283094699824165, "grad_norm": 0.247785747051239, "learning_rate": 0.00019416762557143027, "loss": 11.6879, "step": 15684 }, { "epoch": 0.32833040274637865, "grad_norm": 0.22312068939208984, "learning_rate": 0.00019416688771724572, "loss": 11.681, "step": 15685 }, { "epoch": 0.3283513355103408, "grad_norm": 0.3473184108734131, "learning_rate": 0.00019416614981779333, "loss": 11.6716, "step": 15686 }, { "epoch": 0.32837226827430294, "grad_norm": 0.3529978096485138, "learning_rate": 0.00019416541187307335, "loss": 11.672, "step": 15687 }, { "epoch": 0.3283932010382651, "grad_norm": 0.2203454077243805, "learning_rate": 0.0001941646738830862, "loss": 11.6768, "step": 15688 }, { "epoch": 0.3284141338022272, "grad_norm": 0.3029811382293701, "learning_rate": 0.00019416393584783213, "loss": 11.6748, "step": 15689 }, { "epoch": 0.3284350665661894, "grad_norm": 0.2250567525625229, "learning_rate": 0.00019416319776731163, "loss": 11.661, "step": 15690 }, { "epoch": 0.32845599933015157, "grad_norm": 0.2793343663215637, "learning_rate": 0.00019416245964152498, "loss": 11.6706, "step": 15691 }, { "epoch": 0.3284769320941137, "grad_norm": 0.2711917459964752, "learning_rate": 0.00019416172147047257, "loss": 11.684, "step": 15692 }, { "epoch": 0.32849786485807586, "grad_norm": 0.3082917332649231, "learning_rate": 0.00019416098325415472, "loss": 11.6983, "step": 15693 }, { "epoch": 0.328518797622038, "grad_norm": 1.6072733402252197, "learning_rate": 0.00019416024499257177, "loss": 11.6445, "step": 15694 }, { "epoch": 0.32853973038600015, "grad_norm": 0.36479422450065613, "learning_rate": 0.00019415950668572412, "loss": 11.6806, "step": 15695 }, { "epoch": 0.3285606631499623, "grad_norm": 0.2649945318698883, "learning_rate": 0.0001941587683336121, "loss": 11.6743, "step": 15696 }, { "epoch": 0.3285815959139245, "grad_norm": 0.32892560958862305, "learning_rate": 0.00019415802993623607, "loss": 11.6755, "step": 15697 }, { "epoch": 0.32860252867788664, "grad_norm": 0.2890794277191162, "learning_rate": 0.00019415729149359644, "loss": 11.6676, "step": 15698 }, { "epoch": 0.3286234614418488, "grad_norm": 0.2781369984149933, "learning_rate": 0.00019415655300569347, "loss": 11.6775, "step": 15699 }, { "epoch": 0.32864439420581093, "grad_norm": 0.2295273393392563, "learning_rate": 0.00019415581447252756, "loss": 11.6736, "step": 15700 }, { "epoch": 0.3286653269697731, "grad_norm": 0.3477286100387573, "learning_rate": 0.00019415507589409906, "loss": 11.6731, "step": 15701 }, { "epoch": 0.3286862597337352, "grad_norm": 0.26119980216026306, "learning_rate": 0.00019415433727040834, "loss": 11.6684, "step": 15702 }, { "epoch": 0.3287071924976974, "grad_norm": 0.28441259264945984, "learning_rate": 0.00019415359860145573, "loss": 11.6762, "step": 15703 }, { "epoch": 0.32872812526165957, "grad_norm": 0.2582857012748718, "learning_rate": 0.0001941528598872416, "loss": 11.6845, "step": 15704 }, { "epoch": 0.3287490580256217, "grad_norm": 0.2598893642425537, "learning_rate": 0.00019415212112776633, "loss": 11.6805, "step": 15705 }, { "epoch": 0.32876999078958385, "grad_norm": 0.2355547398328781, "learning_rate": 0.00019415138232303021, "loss": 11.683, "step": 15706 }, { "epoch": 0.328790923553546, "grad_norm": 0.282232403755188, "learning_rate": 0.00019415064347303368, "loss": 11.6699, "step": 15707 }, { "epoch": 0.32881185631750814, "grad_norm": 0.37417834997177124, "learning_rate": 0.000194149904577777, "loss": 11.6617, "step": 15708 }, { "epoch": 0.32883278908147034, "grad_norm": 0.5128169059753418, "learning_rate": 0.00019414916563726057, "loss": 11.7009, "step": 15709 }, { "epoch": 0.3288537218454325, "grad_norm": 0.2745727002620697, "learning_rate": 0.0001941484266514848, "loss": 11.6718, "step": 15710 }, { "epoch": 0.32887465460939463, "grad_norm": 0.3335399627685547, "learning_rate": 0.00019414768762044996, "loss": 11.6938, "step": 15711 }, { "epoch": 0.3288955873733568, "grad_norm": 0.23119686543941498, "learning_rate": 0.00019414694854415644, "loss": 11.6751, "step": 15712 }, { "epoch": 0.3289165201373189, "grad_norm": 0.26387548446655273, "learning_rate": 0.00019414620942260463, "loss": 11.6579, "step": 15713 }, { "epoch": 0.32893745290128107, "grad_norm": 0.327343612909317, "learning_rate": 0.0001941454702557948, "loss": 11.6674, "step": 15714 }, { "epoch": 0.3289583856652432, "grad_norm": 0.265288770198822, "learning_rate": 0.0001941447310437274, "loss": 11.6789, "step": 15715 }, { "epoch": 0.3289793184292054, "grad_norm": 0.27981314063072205, "learning_rate": 0.0001941439917864027, "loss": 11.6643, "step": 15716 }, { "epoch": 0.32900025119316756, "grad_norm": 0.3118704855442047, "learning_rate": 0.00019414325248382112, "loss": 11.6617, "step": 15717 }, { "epoch": 0.3290211839571297, "grad_norm": 0.25805914402008057, "learning_rate": 0.000194142513135983, "loss": 11.6795, "step": 15718 }, { "epoch": 0.32904211672109185, "grad_norm": 0.2793330252170563, "learning_rate": 0.00019414177374288866, "loss": 11.6764, "step": 15719 }, { "epoch": 0.329063049485054, "grad_norm": 0.28767460584640503, "learning_rate": 0.00019414103430453848, "loss": 11.6967, "step": 15720 }, { "epoch": 0.32908398224901614, "grad_norm": 0.24660834670066833, "learning_rate": 0.00019414029482093287, "loss": 11.6578, "step": 15721 }, { "epoch": 0.32910491501297834, "grad_norm": 0.1950012445449829, "learning_rate": 0.00019413955529207208, "loss": 11.6791, "step": 15722 }, { "epoch": 0.3291258477769405, "grad_norm": 0.37479883432388306, "learning_rate": 0.00019413881571795656, "loss": 11.6586, "step": 15723 }, { "epoch": 0.32914678054090263, "grad_norm": 0.3077218234539032, "learning_rate": 0.0001941380760985866, "loss": 11.6796, "step": 15724 }, { "epoch": 0.3291677133048648, "grad_norm": 0.3539036810398102, "learning_rate": 0.00019413733643396257, "loss": 11.6735, "step": 15725 }, { "epoch": 0.3291886460688269, "grad_norm": 0.27228447794914246, "learning_rate": 0.00019413659672408486, "loss": 11.6771, "step": 15726 }, { "epoch": 0.32920957883278906, "grad_norm": 0.38266894221305847, "learning_rate": 0.0001941358569689538, "loss": 11.6814, "step": 15727 }, { "epoch": 0.3292305115967512, "grad_norm": 0.3978087306022644, "learning_rate": 0.00019413511716856972, "loss": 11.7001, "step": 15728 }, { "epoch": 0.3292514443607134, "grad_norm": 0.3542962074279785, "learning_rate": 0.00019413437732293304, "loss": 11.6524, "step": 15729 }, { "epoch": 0.32927237712467555, "grad_norm": 0.27705731987953186, "learning_rate": 0.00019413363743204406, "loss": 11.6809, "step": 15730 }, { "epoch": 0.3292933098886377, "grad_norm": 0.29718104004859924, "learning_rate": 0.00019413289749590316, "loss": 11.6763, "step": 15731 }, { "epoch": 0.32931424265259984, "grad_norm": 0.26526767015457153, "learning_rate": 0.00019413215751451065, "loss": 11.6809, "step": 15732 }, { "epoch": 0.329335175416562, "grad_norm": 0.2593597173690796, "learning_rate": 0.00019413141748786698, "loss": 11.676, "step": 15733 }, { "epoch": 0.32935610818052413, "grad_norm": 0.32871729135513306, "learning_rate": 0.00019413067741597245, "loss": 11.6913, "step": 15734 }, { "epoch": 0.32937704094448633, "grad_norm": 0.27393221855163574, "learning_rate": 0.0001941299372988274, "loss": 11.6667, "step": 15735 }, { "epoch": 0.3293979737084485, "grad_norm": 0.24645474553108215, "learning_rate": 0.00019412919713643218, "loss": 11.6869, "step": 15736 }, { "epoch": 0.3294189064724106, "grad_norm": 0.2966483533382416, "learning_rate": 0.00019412845692878721, "loss": 11.673, "step": 15737 }, { "epoch": 0.32943983923637277, "grad_norm": 0.2843824625015259, "learning_rate": 0.00019412771667589275, "loss": 11.6641, "step": 15738 }, { "epoch": 0.3294607720003349, "grad_norm": 0.24952936172485352, "learning_rate": 0.00019412697637774927, "loss": 11.6578, "step": 15739 }, { "epoch": 0.32948170476429706, "grad_norm": 0.39107611775398254, "learning_rate": 0.00019412623603435702, "loss": 11.6628, "step": 15740 }, { "epoch": 0.32950263752825926, "grad_norm": 0.2947896420955658, "learning_rate": 0.00019412549564571643, "loss": 11.6837, "step": 15741 }, { "epoch": 0.3295235702922214, "grad_norm": 0.2555946409702301, "learning_rate": 0.00019412475521182783, "loss": 11.6651, "step": 15742 }, { "epoch": 0.32954450305618355, "grad_norm": 0.23357440531253815, "learning_rate": 0.00019412401473269155, "loss": 11.6704, "step": 15743 }, { "epoch": 0.3295654358201457, "grad_norm": 0.2792518436908722, "learning_rate": 0.00019412327420830798, "loss": 11.6826, "step": 15744 }, { "epoch": 0.32958636858410784, "grad_norm": 0.2773728370666504, "learning_rate": 0.00019412253363867749, "loss": 11.6713, "step": 15745 }, { "epoch": 0.32960730134807, "grad_norm": 0.32399916648864746, "learning_rate": 0.0001941217930238004, "loss": 11.6786, "step": 15746 }, { "epoch": 0.3296282341120321, "grad_norm": 0.27923306822776794, "learning_rate": 0.00019412105236367704, "loss": 11.6585, "step": 15747 }, { "epoch": 0.3296491668759943, "grad_norm": 0.25468185544013977, "learning_rate": 0.00019412031165830785, "loss": 11.6756, "step": 15748 }, { "epoch": 0.32967009963995647, "grad_norm": 0.26207858324050903, "learning_rate": 0.0001941195709076931, "loss": 11.6661, "step": 15749 }, { "epoch": 0.3296910324039186, "grad_norm": 0.28430792689323425, "learning_rate": 0.0001941188301118332, "loss": 11.6764, "step": 15750 }, { "epoch": 0.32971196516788076, "grad_norm": 0.2702578902244568, "learning_rate": 0.00019411808927072852, "loss": 11.6808, "step": 15751 }, { "epoch": 0.3297328979318429, "grad_norm": 0.26773783564567566, "learning_rate": 0.00019411734838437937, "loss": 11.6743, "step": 15752 }, { "epoch": 0.32975383069580505, "grad_norm": 0.2993413209915161, "learning_rate": 0.00019411660745278613, "loss": 11.677, "step": 15753 }, { "epoch": 0.32977476345976725, "grad_norm": 0.28806763887405396, "learning_rate": 0.00019411586647594912, "loss": 11.661, "step": 15754 }, { "epoch": 0.3297956962237294, "grad_norm": 0.37811243534088135, "learning_rate": 0.00019411512545386875, "loss": 11.6831, "step": 15755 }, { "epoch": 0.32981662898769154, "grad_norm": 0.4196528494358063, "learning_rate": 0.00019411438438654539, "loss": 11.6618, "step": 15756 }, { "epoch": 0.3298375617516537, "grad_norm": 0.25262585282325745, "learning_rate": 0.00019411364327397932, "loss": 11.6872, "step": 15757 }, { "epoch": 0.32985849451561583, "grad_norm": 0.24955697357654572, "learning_rate": 0.00019411290211617094, "loss": 11.6644, "step": 15758 }, { "epoch": 0.329879427279578, "grad_norm": 0.24841955304145813, "learning_rate": 0.00019411216091312058, "loss": 11.6751, "step": 15759 }, { "epoch": 0.3299003600435402, "grad_norm": 0.2438874989748001, "learning_rate": 0.00019411141966482864, "loss": 11.6859, "step": 15760 }, { "epoch": 0.3299212928075023, "grad_norm": 0.36657440662384033, "learning_rate": 0.00019411067837129547, "loss": 11.6791, "step": 15761 }, { "epoch": 0.32994222557146446, "grad_norm": 0.36620453000068665, "learning_rate": 0.0001941099370325214, "loss": 11.6876, "step": 15762 }, { "epoch": 0.3299631583354266, "grad_norm": 0.2964290678501129, "learning_rate": 0.0001941091956485068, "loss": 11.6781, "step": 15763 }, { "epoch": 0.32998409109938875, "grad_norm": 0.28021150827407837, "learning_rate": 0.000194108454219252, "loss": 11.672, "step": 15764 }, { "epoch": 0.3300050238633509, "grad_norm": 0.2768596410751343, "learning_rate": 0.0001941077127447574, "loss": 11.6615, "step": 15765 }, { "epoch": 0.33002595662731304, "grad_norm": 0.33137252926826477, "learning_rate": 0.00019410697122502333, "loss": 11.6808, "step": 15766 }, { "epoch": 0.33004688939127524, "grad_norm": 0.31150591373443604, "learning_rate": 0.00019410622966005016, "loss": 11.6861, "step": 15767 }, { "epoch": 0.3300678221552374, "grad_norm": 0.28438159823417664, "learning_rate": 0.00019410548804983823, "loss": 11.6717, "step": 15768 }, { "epoch": 0.33008875491919953, "grad_norm": 0.31986743211746216, "learning_rate": 0.0001941047463943879, "loss": 11.688, "step": 15769 }, { "epoch": 0.3301096876831617, "grad_norm": 0.31489235162734985, "learning_rate": 0.00019410400469369954, "loss": 11.6955, "step": 15770 }, { "epoch": 0.3301306204471238, "grad_norm": 0.28871282935142517, "learning_rate": 0.00019410326294777352, "loss": 11.6744, "step": 15771 }, { "epoch": 0.33015155321108597, "grad_norm": 0.2204156517982483, "learning_rate": 0.00019410252115661014, "loss": 11.6796, "step": 15772 }, { "epoch": 0.33017248597504817, "grad_norm": 0.23064294457435608, "learning_rate": 0.00019410177932020984, "loss": 11.6746, "step": 15773 }, { "epoch": 0.3301934187390103, "grad_norm": 0.26998773217201233, "learning_rate": 0.00019410103743857288, "loss": 11.6654, "step": 15774 }, { "epoch": 0.33021435150297246, "grad_norm": 0.30610936880111694, "learning_rate": 0.0001941002955116997, "loss": 11.6776, "step": 15775 }, { "epoch": 0.3302352842669346, "grad_norm": 0.2288990169763565, "learning_rate": 0.0001940995535395906, "loss": 11.6632, "step": 15776 }, { "epoch": 0.33025621703089675, "grad_norm": 0.2831745743751526, "learning_rate": 0.00019409881152224597, "loss": 11.6888, "step": 15777 }, { "epoch": 0.3302771497948589, "grad_norm": 0.27398937940597534, "learning_rate": 0.00019409806945966612, "loss": 11.6665, "step": 15778 }, { "epoch": 0.3302980825588211, "grad_norm": 0.265961229801178, "learning_rate": 0.0001940973273518515, "loss": 11.6722, "step": 15779 }, { "epoch": 0.33031901532278324, "grad_norm": 0.25691458582878113, "learning_rate": 0.0001940965851988024, "loss": 11.6605, "step": 15780 }, { "epoch": 0.3303399480867454, "grad_norm": 0.3985527753829956, "learning_rate": 0.00019409584300051914, "loss": 11.6882, "step": 15781 }, { "epoch": 0.33036088085070753, "grad_norm": 0.2706790566444397, "learning_rate": 0.00019409510075700217, "loss": 11.6853, "step": 15782 }, { "epoch": 0.3303818136146697, "grad_norm": 0.27099788188934326, "learning_rate": 0.00019409435846825176, "loss": 11.6804, "step": 15783 }, { "epoch": 0.3304027463786318, "grad_norm": 0.2794772982597351, "learning_rate": 0.00019409361613426836, "loss": 11.6807, "step": 15784 }, { "epoch": 0.33042367914259396, "grad_norm": 0.3086702525615692, "learning_rate": 0.0001940928737550522, "loss": 11.688, "step": 15785 }, { "epoch": 0.33044461190655616, "grad_norm": 0.2639043927192688, "learning_rate": 0.00019409213133060377, "loss": 11.6643, "step": 15786 }, { "epoch": 0.3304655446705183, "grad_norm": 0.32709720730781555, "learning_rate": 0.00019409138886092336, "loss": 11.6584, "step": 15787 }, { "epoch": 0.33048647743448045, "grad_norm": 0.3141099810600281, "learning_rate": 0.0001940906463460113, "loss": 11.6796, "step": 15788 }, { "epoch": 0.3305074101984426, "grad_norm": 0.2445485144853592, "learning_rate": 0.00019408990378586802, "loss": 11.6902, "step": 15789 }, { "epoch": 0.33052834296240474, "grad_norm": 0.2549464702606201, "learning_rate": 0.0001940891611804938, "loss": 11.6773, "step": 15790 }, { "epoch": 0.3305492757263669, "grad_norm": 0.30106857419013977, "learning_rate": 0.00019408841852988905, "loss": 11.6761, "step": 15791 }, { "epoch": 0.3305702084903291, "grad_norm": 0.2750580608844757, "learning_rate": 0.00019408767583405413, "loss": 11.6841, "step": 15792 }, { "epoch": 0.33059114125429123, "grad_norm": 0.2291417121887207, "learning_rate": 0.00019408693309298935, "loss": 11.6744, "step": 15793 }, { "epoch": 0.3306120740182534, "grad_norm": 0.2670571208000183, "learning_rate": 0.0001940861903066951, "loss": 11.6848, "step": 15794 }, { "epoch": 0.3306330067822155, "grad_norm": 0.303190678358078, "learning_rate": 0.00019408544747517175, "loss": 11.6874, "step": 15795 }, { "epoch": 0.33065393954617767, "grad_norm": 0.25245124101638794, "learning_rate": 0.00019408470459841964, "loss": 11.6795, "step": 15796 }, { "epoch": 0.3306748723101398, "grad_norm": 0.23723304271697998, "learning_rate": 0.00019408396167643912, "loss": 11.6926, "step": 15797 }, { "epoch": 0.330695805074102, "grad_norm": 0.3415849208831787, "learning_rate": 0.00019408321870923055, "loss": 11.677, "step": 15798 }, { "epoch": 0.33071673783806416, "grad_norm": 0.26957887411117554, "learning_rate": 0.0001940824756967943, "loss": 11.6711, "step": 15799 }, { "epoch": 0.3307376706020263, "grad_norm": 0.23828689754009247, "learning_rate": 0.0001940817326391307, "loss": 11.6737, "step": 15800 }, { "epoch": 0.33075860336598845, "grad_norm": 0.28611624240875244, "learning_rate": 0.00019408098953624015, "loss": 11.6727, "step": 15801 }, { "epoch": 0.3307795361299506, "grad_norm": 0.2551301121711731, "learning_rate": 0.00019408024638812297, "loss": 11.6449, "step": 15802 }, { "epoch": 0.33080046889391274, "grad_norm": 0.2796227037906647, "learning_rate": 0.00019407950319477951, "loss": 11.6875, "step": 15803 }, { "epoch": 0.3308214016578749, "grad_norm": 0.2605123817920685, "learning_rate": 0.00019407875995621022, "loss": 11.6948, "step": 15804 }, { "epoch": 0.3308423344218371, "grad_norm": 0.25217726826667786, "learning_rate": 0.0001940780166724153, "loss": 11.6814, "step": 15805 }, { "epoch": 0.3308632671857992, "grad_norm": 0.31040769815444946, "learning_rate": 0.00019407727334339522, "loss": 11.6849, "step": 15806 }, { "epoch": 0.33088419994976137, "grad_norm": 0.2625413239002228, "learning_rate": 0.00019407652996915033, "loss": 11.6847, "step": 15807 }, { "epoch": 0.3309051327137235, "grad_norm": 0.2607378363609314, "learning_rate": 0.00019407578654968095, "loss": 11.6724, "step": 15808 }, { "epoch": 0.33092606547768566, "grad_norm": 0.309602290391922, "learning_rate": 0.00019407504308498744, "loss": 11.6612, "step": 15809 }, { "epoch": 0.3309469982416478, "grad_norm": 0.2933996319770813, "learning_rate": 0.0001940742995750702, "loss": 11.6734, "step": 15810 }, { "epoch": 0.33096793100561, "grad_norm": 0.30456310510635376, "learning_rate": 0.00019407355601992957, "loss": 11.6753, "step": 15811 }, { "epoch": 0.33098886376957215, "grad_norm": 0.3125596046447754, "learning_rate": 0.00019407281241956586, "loss": 11.6571, "step": 15812 }, { "epoch": 0.3310097965335343, "grad_norm": 0.3374766409397125, "learning_rate": 0.00019407206877397948, "loss": 11.6695, "step": 15813 }, { "epoch": 0.33103072929749644, "grad_norm": 0.3342602252960205, "learning_rate": 0.00019407132508317076, "loss": 11.6817, "step": 15814 }, { "epoch": 0.3310516620614586, "grad_norm": 0.2481159269809723, "learning_rate": 0.0001940705813471401, "loss": 11.6846, "step": 15815 }, { "epoch": 0.33107259482542073, "grad_norm": 0.2069099098443985, "learning_rate": 0.00019406983756588777, "loss": 11.6815, "step": 15816 }, { "epoch": 0.3310935275893829, "grad_norm": 0.6188452243804932, "learning_rate": 0.00019406909373941424, "loss": 11.6047, "step": 15817 }, { "epoch": 0.3311144603533451, "grad_norm": 0.2611764073371887, "learning_rate": 0.00019406834986771977, "loss": 11.6718, "step": 15818 }, { "epoch": 0.3311353931173072, "grad_norm": 0.30726560950279236, "learning_rate": 0.00019406760595080478, "loss": 11.669, "step": 15819 }, { "epoch": 0.33115632588126936, "grad_norm": 0.25984257459640503, "learning_rate": 0.0001940668619886696, "loss": 11.6705, "step": 15820 }, { "epoch": 0.3311772586452315, "grad_norm": 0.27839717268943787, "learning_rate": 0.00019406611798131461, "loss": 11.67, "step": 15821 }, { "epoch": 0.33119819140919365, "grad_norm": 0.24134644865989685, "learning_rate": 0.00019406537392874012, "loss": 11.6645, "step": 15822 }, { "epoch": 0.3312191241731558, "grad_norm": 0.27045392990112305, "learning_rate": 0.00019406462983094657, "loss": 11.6839, "step": 15823 }, { "epoch": 0.331240056937118, "grad_norm": 0.24411343038082123, "learning_rate": 0.00019406388568793422, "loss": 11.6862, "step": 15824 }, { "epoch": 0.33126098970108014, "grad_norm": 0.24223583936691284, "learning_rate": 0.00019406314149970352, "loss": 11.6839, "step": 15825 }, { "epoch": 0.3312819224650423, "grad_norm": 0.2856830060482025, "learning_rate": 0.00019406239726625475, "loss": 11.6685, "step": 15826 }, { "epoch": 0.33130285522900443, "grad_norm": 0.26179322600364685, "learning_rate": 0.0001940616529875883, "loss": 11.6902, "step": 15827 }, { "epoch": 0.3313237879929666, "grad_norm": 0.308153361082077, "learning_rate": 0.00019406090866370455, "loss": 11.6774, "step": 15828 }, { "epoch": 0.3313447207569287, "grad_norm": 0.26083478331565857, "learning_rate": 0.0001940601642946038, "loss": 11.6755, "step": 15829 }, { "epoch": 0.3313656535208909, "grad_norm": 0.29573705792427063, "learning_rate": 0.00019405941988028647, "loss": 11.6709, "step": 15830 }, { "epoch": 0.33138658628485307, "grad_norm": 0.2317112237215042, "learning_rate": 0.00019405867542075287, "loss": 11.6604, "step": 15831 }, { "epoch": 0.3314075190488152, "grad_norm": 0.25406187772750854, "learning_rate": 0.0001940579309160034, "loss": 11.6771, "step": 15832 }, { "epoch": 0.33142845181277736, "grad_norm": 0.2601199746131897, "learning_rate": 0.0001940571863660384, "loss": 11.6651, "step": 15833 }, { "epoch": 0.3314493845767395, "grad_norm": 0.2647240161895752, "learning_rate": 0.00019405644177085822, "loss": 11.6742, "step": 15834 }, { "epoch": 0.33147031734070165, "grad_norm": 0.29217153787612915, "learning_rate": 0.00019405569713046324, "loss": 11.6712, "step": 15835 }, { "epoch": 0.3314912501046638, "grad_norm": 0.28560101985931396, "learning_rate": 0.00019405495244485376, "loss": 11.6667, "step": 15836 }, { "epoch": 0.331512182868626, "grad_norm": 0.3162440359592438, "learning_rate": 0.0001940542077140302, "loss": 11.6668, "step": 15837 }, { "epoch": 0.33153311563258814, "grad_norm": 0.23310841619968414, "learning_rate": 0.00019405346293799292, "loss": 11.6922, "step": 15838 }, { "epoch": 0.3315540483965503, "grad_norm": 0.322187602519989, "learning_rate": 0.00019405271811674224, "loss": 11.6674, "step": 15839 }, { "epoch": 0.3315749811605124, "grad_norm": 0.24838514626026154, "learning_rate": 0.00019405197325027853, "loss": 11.683, "step": 15840 }, { "epoch": 0.3315959139244746, "grad_norm": 0.2546926438808441, "learning_rate": 0.00019405122833860214, "loss": 11.6773, "step": 15841 }, { "epoch": 0.3316168466884367, "grad_norm": 0.26619330048561096, "learning_rate": 0.00019405048338171345, "loss": 11.6776, "step": 15842 }, { "epoch": 0.3316377794523989, "grad_norm": 0.3100275695323944, "learning_rate": 0.00019404973837961284, "loss": 11.6936, "step": 15843 }, { "epoch": 0.33165871221636106, "grad_norm": 0.26891306042671204, "learning_rate": 0.0001940489933323006, "loss": 11.6757, "step": 15844 }, { "epoch": 0.3316796449803232, "grad_norm": 0.2641845941543579, "learning_rate": 0.00019404824823977712, "loss": 11.6816, "step": 15845 }, { "epoch": 0.33170057774428535, "grad_norm": 0.2881620526313782, "learning_rate": 0.00019404750310204278, "loss": 11.6856, "step": 15846 }, { "epoch": 0.3317215105082475, "grad_norm": 0.27062752842903137, "learning_rate": 0.0001940467579190979, "loss": 11.6743, "step": 15847 }, { "epoch": 0.33174244327220964, "grad_norm": 0.24246086180210114, "learning_rate": 0.00019404601269094288, "loss": 11.685, "step": 15848 }, { "epoch": 0.33176337603617184, "grad_norm": 0.24966883659362793, "learning_rate": 0.00019404526741757805, "loss": 11.6767, "step": 15849 }, { "epoch": 0.331784308800134, "grad_norm": 0.2993452548980713, "learning_rate": 0.00019404452209900377, "loss": 11.6737, "step": 15850 }, { "epoch": 0.33180524156409613, "grad_norm": 0.2300506830215454, "learning_rate": 0.0001940437767352204, "loss": 11.6724, "step": 15851 }, { "epoch": 0.3318261743280583, "grad_norm": 0.29000985622406006, "learning_rate": 0.0001940430313262283, "loss": 11.6932, "step": 15852 }, { "epoch": 0.3318471070920204, "grad_norm": 0.3679528534412384, "learning_rate": 0.00019404228587202783, "loss": 11.6904, "step": 15853 }, { "epoch": 0.33186803985598257, "grad_norm": 0.3733130991458893, "learning_rate": 0.00019404154037261937, "loss": 11.6755, "step": 15854 }, { "epoch": 0.3318889726199447, "grad_norm": 0.31028905510902405, "learning_rate": 0.00019404079482800325, "loss": 11.6635, "step": 15855 }, { "epoch": 0.3319099053839069, "grad_norm": 0.22939172387123108, "learning_rate": 0.00019404004923817984, "loss": 11.6756, "step": 15856 }, { "epoch": 0.33193083814786906, "grad_norm": 0.28990438580513, "learning_rate": 0.00019403930360314947, "loss": 11.6574, "step": 15857 }, { "epoch": 0.3319517709118312, "grad_norm": 0.3432672917842865, "learning_rate": 0.00019403855792291254, "loss": 11.6548, "step": 15858 }, { "epoch": 0.33197270367579335, "grad_norm": 0.23214946687221527, "learning_rate": 0.0001940378121974694, "loss": 11.6633, "step": 15859 }, { "epoch": 0.3319936364397555, "grad_norm": 0.3000122606754303, "learning_rate": 0.00019403706642682038, "loss": 11.6882, "step": 15860 }, { "epoch": 0.33201456920371764, "grad_norm": 0.3374806046485901, "learning_rate": 0.0001940363206109659, "loss": 11.6701, "step": 15861 }, { "epoch": 0.33203550196767984, "grad_norm": 0.34406253695487976, "learning_rate": 0.0001940355747499062, "loss": 11.6796, "step": 15862 }, { "epoch": 0.332056434731642, "grad_norm": 0.2702077627182007, "learning_rate": 0.00019403482884364177, "loss": 11.6719, "step": 15863 }, { "epoch": 0.3320773674956041, "grad_norm": 0.2548113763332367, "learning_rate": 0.00019403408289217288, "loss": 11.6792, "step": 15864 }, { "epoch": 0.33209830025956627, "grad_norm": 0.35813647508621216, "learning_rate": 0.00019403333689549995, "loss": 11.688, "step": 15865 }, { "epoch": 0.3321192330235284, "grad_norm": 0.23536811769008636, "learning_rate": 0.0001940325908536233, "loss": 11.6685, "step": 15866 }, { "epoch": 0.33214016578749056, "grad_norm": 0.33234021067619324, "learning_rate": 0.0001940318447665433, "loss": 11.671, "step": 15867 }, { "epoch": 0.33216109855145276, "grad_norm": 0.28726184368133545, "learning_rate": 0.0001940310986342603, "loss": 11.6883, "step": 15868 }, { "epoch": 0.3321820313154149, "grad_norm": 0.3100031018257141, "learning_rate": 0.0001940303524567747, "loss": 11.6756, "step": 15869 }, { "epoch": 0.33220296407937705, "grad_norm": 0.3389561176300049, "learning_rate": 0.00019402960623408677, "loss": 11.6807, "step": 15870 }, { "epoch": 0.3322238968433392, "grad_norm": 0.31312066316604614, "learning_rate": 0.00019402885996619697, "loss": 11.6676, "step": 15871 }, { "epoch": 0.33224482960730134, "grad_norm": 0.31994128227233887, "learning_rate": 0.0001940281136531056, "loss": 11.6702, "step": 15872 }, { "epoch": 0.3322657623712635, "grad_norm": 0.27702513337135315, "learning_rate": 0.00019402736729481302, "loss": 11.6509, "step": 15873 }, { "epoch": 0.33228669513522563, "grad_norm": 0.2446582168340683, "learning_rate": 0.0001940266208913196, "loss": 11.674, "step": 15874 }, { "epoch": 0.33230762789918783, "grad_norm": 0.32047319412231445, "learning_rate": 0.00019402587444262572, "loss": 11.6612, "step": 15875 }, { "epoch": 0.33232856066315, "grad_norm": 0.263247549533844, "learning_rate": 0.0001940251279487317, "loss": 11.6668, "step": 15876 }, { "epoch": 0.3323494934271121, "grad_norm": 0.22509701550006866, "learning_rate": 0.0001940243814096379, "loss": 11.6783, "step": 15877 }, { "epoch": 0.33237042619107426, "grad_norm": 0.308415025472641, "learning_rate": 0.00019402363482534473, "loss": 11.6766, "step": 15878 }, { "epoch": 0.3323913589550364, "grad_norm": 0.3188289403915405, "learning_rate": 0.0001940228881958525, "loss": 11.6924, "step": 15879 }, { "epoch": 0.33241229171899855, "grad_norm": 0.2331651747226715, "learning_rate": 0.0001940221415211616, "loss": 11.683, "step": 15880 }, { "epoch": 0.33243322448296075, "grad_norm": 0.26109379529953003, "learning_rate": 0.00019402139480127235, "loss": 11.6725, "step": 15881 }, { "epoch": 0.3324541572469229, "grad_norm": 0.2175064980983734, "learning_rate": 0.00019402064803618511, "loss": 11.6618, "step": 15882 }, { "epoch": 0.33247509001088504, "grad_norm": 0.3179900348186493, "learning_rate": 0.0001940199012259003, "loss": 11.674, "step": 15883 }, { "epoch": 0.3324960227748472, "grad_norm": 0.34056970477104187, "learning_rate": 0.00019401915437041822, "loss": 11.6616, "step": 15884 }, { "epoch": 0.33251695553880933, "grad_norm": 0.25367337465286255, "learning_rate": 0.00019401840746973928, "loss": 11.6677, "step": 15885 }, { "epoch": 0.3325378883027715, "grad_norm": 0.3326256573200226, "learning_rate": 0.00019401766052386376, "loss": 11.7095, "step": 15886 }, { "epoch": 0.3325588210667337, "grad_norm": 0.34781354665756226, "learning_rate": 0.0001940169135327921, "loss": 11.6863, "step": 15887 }, { "epoch": 0.3325797538306958, "grad_norm": 0.34516873955726624, "learning_rate": 0.00019401616649652464, "loss": 11.691, "step": 15888 }, { "epoch": 0.33260068659465797, "grad_norm": 0.2408992499113083, "learning_rate": 0.00019401541941506167, "loss": 11.6862, "step": 15889 }, { "epoch": 0.3326216193586201, "grad_norm": 0.19963572919368744, "learning_rate": 0.00019401467228840363, "loss": 11.6731, "step": 15890 }, { "epoch": 0.33264255212258226, "grad_norm": 0.2624397575855255, "learning_rate": 0.00019401392511655087, "loss": 11.6692, "step": 15891 }, { "epoch": 0.3326634848865444, "grad_norm": 0.33630943298339844, "learning_rate": 0.0001940131778995037, "loss": 11.68, "step": 15892 }, { "epoch": 0.33268441765050655, "grad_norm": 0.3557702898979187, "learning_rate": 0.00019401243063726256, "loss": 11.6723, "step": 15893 }, { "epoch": 0.33270535041446875, "grad_norm": 0.2384214699268341, "learning_rate": 0.00019401168332982774, "loss": 11.6799, "step": 15894 }, { "epoch": 0.3327262831784309, "grad_norm": 0.24679869413375854, "learning_rate": 0.0001940109359771996, "loss": 11.6739, "step": 15895 }, { "epoch": 0.33274721594239304, "grad_norm": 0.41554468870162964, "learning_rate": 0.0001940101885793785, "loss": 11.6765, "step": 15896 }, { "epoch": 0.3327681487063552, "grad_norm": 0.3370458781719208, "learning_rate": 0.00019400944113636488, "loss": 11.6867, "step": 15897 }, { "epoch": 0.3327890814703173, "grad_norm": 0.31241801381111145, "learning_rate": 0.000194008693648159, "loss": 11.664, "step": 15898 }, { "epoch": 0.33281001423427947, "grad_norm": 0.2796398401260376, "learning_rate": 0.00019400794611476124, "loss": 11.6889, "step": 15899 }, { "epoch": 0.3328309469982417, "grad_norm": 0.29045000672340393, "learning_rate": 0.00019400719853617201, "loss": 11.6788, "step": 15900 }, { "epoch": 0.3328518797622038, "grad_norm": 0.27283814549446106, "learning_rate": 0.0001940064509123916, "loss": 11.6786, "step": 15901 }, { "epoch": 0.33287281252616596, "grad_norm": 0.2681799829006195, "learning_rate": 0.00019400570324342047, "loss": 11.6741, "step": 15902 }, { "epoch": 0.3328937452901281, "grad_norm": 0.2828218936920166, "learning_rate": 0.00019400495552925886, "loss": 11.6712, "step": 15903 }, { "epoch": 0.33291467805409025, "grad_norm": 0.29128798842430115, "learning_rate": 0.0001940042077699072, "loss": 11.6639, "step": 15904 }, { "epoch": 0.3329356108180524, "grad_norm": 0.3068321645259857, "learning_rate": 0.0001940034599653658, "loss": 11.677, "step": 15905 }, { "epoch": 0.3329565435820146, "grad_norm": 0.4151850938796997, "learning_rate": 0.00019400271211563513, "loss": 11.7141, "step": 15906 }, { "epoch": 0.33297747634597674, "grad_norm": 0.28934091329574585, "learning_rate": 0.0001940019642207154, "loss": 11.6617, "step": 15907 }, { "epoch": 0.3329984091099389, "grad_norm": 0.2668772041797638, "learning_rate": 0.00019400121628060707, "loss": 11.6643, "step": 15908 }, { "epoch": 0.33301934187390103, "grad_norm": 0.2703882157802582, "learning_rate": 0.0001940004682953105, "loss": 11.6829, "step": 15909 }, { "epoch": 0.3330402746378632, "grad_norm": 0.25538909435272217, "learning_rate": 0.00019399972026482598, "loss": 11.6631, "step": 15910 }, { "epoch": 0.3330612074018253, "grad_norm": 0.2434965968132019, "learning_rate": 0.00019399897218915394, "loss": 11.6669, "step": 15911 }, { "epoch": 0.33308214016578747, "grad_norm": 0.42223283648490906, "learning_rate": 0.00019399822406829468, "loss": 11.6808, "step": 15912 }, { "epoch": 0.33310307292974967, "grad_norm": 0.23963741958141327, "learning_rate": 0.0001939974759022486, "loss": 11.6802, "step": 15913 }, { "epoch": 0.3331240056937118, "grad_norm": 0.265803724527359, "learning_rate": 0.00019399672769101605, "loss": 11.6754, "step": 15914 }, { "epoch": 0.33314493845767396, "grad_norm": 0.23786373436450958, "learning_rate": 0.0001939959794345974, "loss": 11.6821, "step": 15915 }, { "epoch": 0.3331658712216361, "grad_norm": 0.2505614757537842, "learning_rate": 0.00019399523113299298, "loss": 11.6765, "step": 15916 }, { "epoch": 0.33318680398559825, "grad_norm": 0.24288105964660645, "learning_rate": 0.00019399448278620317, "loss": 11.6732, "step": 15917 }, { "epoch": 0.3332077367495604, "grad_norm": 0.30160850286483765, "learning_rate": 0.00019399373439422834, "loss": 11.6655, "step": 15918 }, { "epoch": 0.3332286695135226, "grad_norm": 0.24393366277217865, "learning_rate": 0.00019399298595706882, "loss": 11.6835, "step": 15919 }, { "epoch": 0.33324960227748474, "grad_norm": 0.2843005359172821, "learning_rate": 0.00019399223747472503, "loss": 11.6633, "step": 15920 }, { "epoch": 0.3332705350414469, "grad_norm": 0.2728371024131775, "learning_rate": 0.00019399148894719726, "loss": 11.6718, "step": 15921 }, { "epoch": 0.333291467805409, "grad_norm": 0.34479719400405884, "learning_rate": 0.0001939907403744859, "loss": 11.6867, "step": 15922 }, { "epoch": 0.33331240056937117, "grad_norm": 0.31456753611564636, "learning_rate": 0.0001939899917565913, "loss": 11.6755, "step": 15923 }, { "epoch": 0.3333333333333333, "grad_norm": 0.2853509485721588, "learning_rate": 0.00019398924309351384, "loss": 11.6721, "step": 15924 }, { "epoch": 0.33335426609729546, "grad_norm": 0.2845878303050995, "learning_rate": 0.00019398849438525387, "loss": 11.688, "step": 15925 }, { "epoch": 0.33337519886125766, "grad_norm": 0.28340432047843933, "learning_rate": 0.00019398774563181173, "loss": 11.6748, "step": 15926 }, { "epoch": 0.3333961316252198, "grad_norm": 0.24289631843566895, "learning_rate": 0.00019398699683318782, "loss": 11.6655, "step": 15927 }, { "epoch": 0.33341706438918195, "grad_norm": 0.2927587628364563, "learning_rate": 0.00019398624798938244, "loss": 11.6784, "step": 15928 }, { "epoch": 0.3334379971531441, "grad_norm": 0.2836395800113678, "learning_rate": 0.00019398549910039603, "loss": 11.6512, "step": 15929 }, { "epoch": 0.33345892991710624, "grad_norm": 0.24310554563999176, "learning_rate": 0.00019398475016622888, "loss": 11.6716, "step": 15930 }, { "epoch": 0.3334798626810684, "grad_norm": 0.3109128475189209, "learning_rate": 0.00019398400118688137, "loss": 11.6774, "step": 15931 }, { "epoch": 0.3335007954450306, "grad_norm": 0.26695552468299866, "learning_rate": 0.00019398325216235388, "loss": 11.6742, "step": 15932 }, { "epoch": 0.33352172820899273, "grad_norm": 2.2176597118377686, "learning_rate": 0.00019398250309264677, "loss": 11.5924, "step": 15933 }, { "epoch": 0.3335426609729549, "grad_norm": 0.2777899205684662, "learning_rate": 0.00019398175397776038, "loss": 11.6692, "step": 15934 }, { "epoch": 0.333563593736917, "grad_norm": 0.2420423924922943, "learning_rate": 0.00019398100481769504, "loss": 11.6783, "step": 15935 }, { "epoch": 0.33358452650087916, "grad_norm": 0.2164268046617508, "learning_rate": 0.00019398025561245122, "loss": 11.6709, "step": 15936 }, { "epoch": 0.3336054592648413, "grad_norm": 0.35900700092315674, "learning_rate": 0.00019397950636202915, "loss": 11.666, "step": 15937 }, { "epoch": 0.3336263920288035, "grad_norm": 0.3886376917362213, "learning_rate": 0.00019397875706642927, "loss": 11.6941, "step": 15938 }, { "epoch": 0.33364732479276565, "grad_norm": 1.6011605262756348, "learning_rate": 0.00019397800772565194, "loss": 11.585, "step": 15939 }, { "epoch": 0.3336682575567278, "grad_norm": 0.25587180256843567, "learning_rate": 0.00019397725833969744, "loss": 11.672, "step": 15940 }, { "epoch": 0.33368919032068994, "grad_norm": 0.2930874526500702, "learning_rate": 0.00019397650890856625, "loss": 11.6758, "step": 15941 }, { "epoch": 0.3337101230846521, "grad_norm": 0.2885758578777313, "learning_rate": 0.00019397575943225863, "loss": 11.6552, "step": 15942 }, { "epoch": 0.33373105584861423, "grad_norm": 0.33858564496040344, "learning_rate": 0.00019397500991077499, "loss": 11.681, "step": 15943 }, { "epoch": 0.3337519886125764, "grad_norm": 0.3443588316440582, "learning_rate": 0.00019397426034411567, "loss": 11.6868, "step": 15944 }, { "epoch": 0.3337729213765386, "grad_norm": 0.3084399998188019, "learning_rate": 0.00019397351073228105, "loss": 11.6705, "step": 15945 }, { "epoch": 0.3337938541405007, "grad_norm": 0.2912517786026001, "learning_rate": 0.00019397276107527148, "loss": 11.6831, "step": 15946 }, { "epoch": 0.33381478690446287, "grad_norm": 0.31553828716278076, "learning_rate": 0.0001939720113730873, "loss": 11.6786, "step": 15947 }, { "epoch": 0.333835719668425, "grad_norm": 0.2472091168165207, "learning_rate": 0.00019397126162572895, "loss": 11.6621, "step": 15948 }, { "epoch": 0.33385665243238716, "grad_norm": 0.3300713896751404, "learning_rate": 0.0001939705118331967, "loss": 11.6847, "step": 15949 }, { "epoch": 0.3338775851963493, "grad_norm": 0.34503859281539917, "learning_rate": 0.0001939697619954909, "loss": 11.6679, "step": 15950 }, { "epoch": 0.3338985179603115, "grad_norm": 0.2389460802078247, "learning_rate": 0.000193969012112612, "loss": 11.6607, "step": 15951 }, { "epoch": 0.33391945072427365, "grad_norm": 0.4138774871826172, "learning_rate": 0.00019396826218456031, "loss": 11.6922, "step": 15952 }, { "epoch": 0.3339403834882358, "grad_norm": 0.2579207718372345, "learning_rate": 0.00019396751221133617, "loss": 11.6802, "step": 15953 }, { "epoch": 0.33396131625219794, "grad_norm": 0.2970564067363739, "learning_rate": 0.00019396676219293995, "loss": 11.6558, "step": 15954 }, { "epoch": 0.3339822490161601, "grad_norm": 0.29469868540763855, "learning_rate": 0.00019396601212937206, "loss": 11.6771, "step": 15955 }, { "epoch": 0.3340031817801222, "grad_norm": 0.2507772147655487, "learning_rate": 0.00019396526202063283, "loss": 11.6767, "step": 15956 }, { "epoch": 0.3340241145440844, "grad_norm": 0.3443230092525482, "learning_rate": 0.0001939645118667226, "loss": 11.6731, "step": 15957 }, { "epoch": 0.33404504730804657, "grad_norm": 0.24431589245796204, "learning_rate": 0.00019396376166764173, "loss": 11.6729, "step": 15958 }, { "epoch": 0.3340659800720087, "grad_norm": 0.3430330455303192, "learning_rate": 0.00019396301142339062, "loss": 11.6698, "step": 15959 }, { "epoch": 0.33408691283597086, "grad_norm": 0.2698568105697632, "learning_rate": 0.00019396226113396962, "loss": 11.6893, "step": 15960 }, { "epoch": 0.334107845599933, "grad_norm": 0.277171790599823, "learning_rate": 0.00019396151079937904, "loss": 11.6776, "step": 15961 }, { "epoch": 0.33412877836389515, "grad_norm": 0.25834494829177856, "learning_rate": 0.0001939607604196193, "loss": 11.6784, "step": 15962 }, { "epoch": 0.3341497111278573, "grad_norm": 0.33612966537475586, "learning_rate": 0.00019396000999469072, "loss": 11.6662, "step": 15963 }, { "epoch": 0.3341706438918195, "grad_norm": 0.3685480058193207, "learning_rate": 0.00019395925952459374, "loss": 11.6815, "step": 15964 }, { "epoch": 0.33419157665578164, "grad_norm": 0.25642913579940796, "learning_rate": 0.0001939585090093286, "loss": 11.68, "step": 15965 }, { "epoch": 0.3342125094197438, "grad_norm": 0.27110353112220764, "learning_rate": 0.00019395775844889573, "loss": 11.6675, "step": 15966 }, { "epoch": 0.33423344218370593, "grad_norm": 0.24364642798900604, "learning_rate": 0.00019395700784329548, "loss": 11.6803, "step": 15967 }, { "epoch": 0.3342543749476681, "grad_norm": 0.28685787320137024, "learning_rate": 0.00019395625719252825, "loss": 11.6805, "step": 15968 }, { "epoch": 0.3342753077116302, "grad_norm": 0.30406591296195984, "learning_rate": 0.00019395550649659434, "loss": 11.6783, "step": 15969 }, { "epoch": 0.3342962404755924, "grad_norm": 0.28032544255256653, "learning_rate": 0.0001939547557554941, "loss": 11.6824, "step": 15970 }, { "epoch": 0.33431717323955457, "grad_norm": 0.26349160075187683, "learning_rate": 0.00019395400496922797, "loss": 11.672, "step": 15971 }, { "epoch": 0.3343381060035167, "grad_norm": 0.24862267076969147, "learning_rate": 0.00019395325413779627, "loss": 11.6636, "step": 15972 }, { "epoch": 0.33435903876747886, "grad_norm": 0.2969316840171814, "learning_rate": 0.00019395250326119934, "loss": 11.6747, "step": 15973 }, { "epoch": 0.334379971531441, "grad_norm": 0.24680361151695251, "learning_rate": 0.0001939517523394376, "loss": 11.6711, "step": 15974 }, { "epoch": 0.33440090429540315, "grad_norm": 0.24117594957351685, "learning_rate": 0.0001939510013725113, "loss": 11.6748, "step": 15975 }, { "epoch": 0.33442183705936535, "grad_norm": 0.30065611004829407, "learning_rate": 0.00019395025036042092, "loss": 11.6719, "step": 15976 }, { "epoch": 0.3344427698233275, "grad_norm": 0.35103875398635864, "learning_rate": 0.00019394949930316675, "loss": 11.6666, "step": 15977 }, { "epoch": 0.33446370258728964, "grad_norm": 0.29872873425483704, "learning_rate": 0.00019394874820074918, "loss": 11.6646, "step": 15978 }, { "epoch": 0.3344846353512518, "grad_norm": 0.2616105079650879, "learning_rate": 0.00019394799705316858, "loss": 11.672, "step": 15979 }, { "epoch": 0.3345055681152139, "grad_norm": 0.3222949206829071, "learning_rate": 0.00019394724586042529, "loss": 11.6716, "step": 15980 }, { "epoch": 0.33452650087917607, "grad_norm": 0.2953726649284363, "learning_rate": 0.00019394649462251964, "loss": 11.6798, "step": 15981 }, { "epoch": 0.3345474336431382, "grad_norm": 0.2658580243587494, "learning_rate": 0.00019394574333945203, "loss": 11.6766, "step": 15982 }, { "epoch": 0.3345683664071004, "grad_norm": 0.2511758506298065, "learning_rate": 0.00019394499201122285, "loss": 11.6622, "step": 15983 }, { "epoch": 0.33458929917106256, "grad_norm": 0.25503817200660706, "learning_rate": 0.00019394424063783243, "loss": 11.6735, "step": 15984 }, { "epoch": 0.3346102319350247, "grad_norm": 0.248407781124115, "learning_rate": 0.0001939434892192811, "loss": 11.6905, "step": 15985 }, { "epoch": 0.33463116469898685, "grad_norm": 0.3877677619457245, "learning_rate": 0.0001939427377555693, "loss": 11.6748, "step": 15986 }, { "epoch": 0.334652097462949, "grad_norm": 0.3502494990825653, "learning_rate": 0.0001939419862466973, "loss": 11.6859, "step": 15987 }, { "epoch": 0.33467303022691114, "grad_norm": 0.2913818359375, "learning_rate": 0.00019394123469266555, "loss": 11.6787, "step": 15988 }, { "epoch": 0.33469396299087334, "grad_norm": 0.30608808994293213, "learning_rate": 0.00019394048309347433, "loss": 11.6773, "step": 15989 }, { "epoch": 0.3347148957548355, "grad_norm": 0.2633102834224701, "learning_rate": 0.00019393973144912402, "loss": 11.6661, "step": 15990 }, { "epoch": 0.33473582851879763, "grad_norm": 0.2726215720176697, "learning_rate": 0.00019393897975961504, "loss": 11.6622, "step": 15991 }, { "epoch": 0.3347567612827598, "grad_norm": 0.31942883133888245, "learning_rate": 0.00019393822802494766, "loss": 11.6725, "step": 15992 }, { "epoch": 0.3347776940467219, "grad_norm": 0.29354268312454224, "learning_rate": 0.00019393747624512234, "loss": 11.6936, "step": 15993 }, { "epoch": 0.33479862681068406, "grad_norm": 0.259237140417099, "learning_rate": 0.0001939367244201394, "loss": 11.6847, "step": 15994 }, { "epoch": 0.33481955957464626, "grad_norm": 0.2666240930557251, "learning_rate": 0.00019393597254999917, "loss": 11.6881, "step": 15995 }, { "epoch": 0.3348404923386084, "grad_norm": 0.29538530111312866, "learning_rate": 0.00019393522063470203, "loss": 11.6697, "step": 15996 }, { "epoch": 0.33486142510257055, "grad_norm": 0.27367252111434937, "learning_rate": 0.00019393446867424835, "loss": 11.6683, "step": 15997 }, { "epoch": 0.3348823578665327, "grad_norm": 0.31046274304389954, "learning_rate": 0.0001939337166686385, "loss": 11.6726, "step": 15998 }, { "epoch": 0.33490329063049484, "grad_norm": 0.28503474593162537, "learning_rate": 0.00019393296461787281, "loss": 11.6729, "step": 15999 }, { "epoch": 0.334924223394457, "grad_norm": 0.28641706705093384, "learning_rate": 0.00019393221252195168, "loss": 11.6974, "step": 16000 }, { "epoch": 0.334924223394457, "eval_loss": 11.675071716308594, "eval_runtime": 34.2549, "eval_samples_per_second": 28.054, "eval_steps_per_second": 7.035, "step": 16000 }, { "epoch": 0.33494515615841913, "grad_norm": 0.2726696729660034, "learning_rate": 0.00019393146038087545, "loss": 11.6786, "step": 16001 }, { "epoch": 0.33496608892238133, "grad_norm": 0.3539586067199707, "learning_rate": 0.00019393070819464448, "loss": 11.6691, "step": 16002 }, { "epoch": 0.3349870216863435, "grad_norm": 0.30465468764305115, "learning_rate": 0.00019392995596325915, "loss": 11.6826, "step": 16003 }, { "epoch": 0.3350079544503056, "grad_norm": 0.27413514256477356, "learning_rate": 0.00019392920368671977, "loss": 11.6898, "step": 16004 }, { "epoch": 0.33502888721426777, "grad_norm": 0.4564453363418579, "learning_rate": 0.00019392845136502677, "loss": 11.6669, "step": 16005 }, { "epoch": 0.3350498199782299, "grad_norm": 0.25111618638038635, "learning_rate": 0.00019392769899818048, "loss": 11.6875, "step": 16006 }, { "epoch": 0.33507075274219206, "grad_norm": 0.2592332065105438, "learning_rate": 0.00019392694658618126, "loss": 11.6657, "step": 16007 }, { "epoch": 0.33509168550615426, "grad_norm": 0.39531415700912476, "learning_rate": 0.00019392619412902947, "loss": 11.6816, "step": 16008 }, { "epoch": 0.3351126182701164, "grad_norm": 0.25230610370635986, "learning_rate": 0.0001939254416267255, "loss": 11.6803, "step": 16009 }, { "epoch": 0.33513355103407855, "grad_norm": 0.2685173749923706, "learning_rate": 0.00019392468907926963, "loss": 11.6654, "step": 16010 }, { "epoch": 0.3351544837980407, "grad_norm": 0.25924572348594666, "learning_rate": 0.00019392393648666234, "loss": 11.6638, "step": 16011 }, { "epoch": 0.33517541656200284, "grad_norm": 0.31861263513565063, "learning_rate": 0.0001939231838489039, "loss": 11.6787, "step": 16012 }, { "epoch": 0.335196349325965, "grad_norm": 0.3299262821674347, "learning_rate": 0.00019392243116599472, "loss": 11.6797, "step": 16013 }, { "epoch": 0.3352172820899271, "grad_norm": 0.2425946742296219, "learning_rate": 0.00019392167843793515, "loss": 11.6783, "step": 16014 }, { "epoch": 0.3352382148538893, "grad_norm": 0.3776840567588806, "learning_rate": 0.0001939209256647255, "loss": 11.6852, "step": 16015 }, { "epoch": 0.33525914761785147, "grad_norm": 0.29138174653053284, "learning_rate": 0.00019392017284636622, "loss": 11.6745, "step": 16016 }, { "epoch": 0.3352800803818136, "grad_norm": 0.3164568245410919, "learning_rate": 0.00019391941998285763, "loss": 11.6636, "step": 16017 }, { "epoch": 0.33530101314577576, "grad_norm": 0.2587098777294159, "learning_rate": 0.0001939186670742001, "loss": 11.6757, "step": 16018 }, { "epoch": 0.3353219459097379, "grad_norm": 0.29487383365631104, "learning_rate": 0.00019391791412039396, "loss": 11.6686, "step": 16019 }, { "epoch": 0.33534287867370005, "grad_norm": 0.2536908984184265, "learning_rate": 0.00019391716112143962, "loss": 11.6417, "step": 16020 }, { "epoch": 0.33536381143766225, "grad_norm": 0.3386850357055664, "learning_rate": 0.00019391640807733736, "loss": 11.688, "step": 16021 }, { "epoch": 0.3353847442016244, "grad_norm": 0.2590080201625824, "learning_rate": 0.00019391565498808765, "loss": 11.6701, "step": 16022 }, { "epoch": 0.33540567696558654, "grad_norm": 0.3055127263069153, "learning_rate": 0.00019391490185369082, "loss": 11.6833, "step": 16023 }, { "epoch": 0.3354266097295487, "grad_norm": 0.2415037453174591, "learning_rate": 0.00019391414867414719, "loss": 11.6587, "step": 16024 }, { "epoch": 0.33544754249351083, "grad_norm": 0.27341586351394653, "learning_rate": 0.00019391339544945715, "loss": 11.6554, "step": 16025 }, { "epoch": 0.335468475257473, "grad_norm": 0.2537500262260437, "learning_rate": 0.00019391264217962105, "loss": 11.6642, "step": 16026 }, { "epoch": 0.3354894080214352, "grad_norm": 0.32365283370018005, "learning_rate": 0.00019391188886463926, "loss": 11.6597, "step": 16027 }, { "epoch": 0.3355103407853973, "grad_norm": 0.26244261860847473, "learning_rate": 0.00019391113550451216, "loss": 11.6784, "step": 16028 }, { "epoch": 0.33553127354935947, "grad_norm": 0.3102896809577942, "learning_rate": 0.0001939103820992401, "loss": 11.6631, "step": 16029 }, { "epoch": 0.3355522063133216, "grad_norm": 0.30582395195961, "learning_rate": 0.00019390962864882343, "loss": 11.6814, "step": 16030 }, { "epoch": 0.33557313907728376, "grad_norm": 0.3551124930381775, "learning_rate": 0.0001939088751532625, "loss": 11.6812, "step": 16031 }, { "epoch": 0.3355940718412459, "grad_norm": 0.28249672055244446, "learning_rate": 0.0001939081216125577, "loss": 11.6883, "step": 16032 }, { "epoch": 0.33561500460520804, "grad_norm": 0.28587794303894043, "learning_rate": 0.0001939073680267094, "loss": 11.6782, "step": 16033 }, { "epoch": 0.33563593736917025, "grad_norm": 0.25113365054130554, "learning_rate": 0.00019390661439571793, "loss": 11.6752, "step": 16034 }, { "epoch": 0.3356568701331324, "grad_norm": 0.28875574469566345, "learning_rate": 0.00019390586071958368, "loss": 11.6822, "step": 16035 }, { "epoch": 0.33567780289709453, "grad_norm": 0.31144267320632935, "learning_rate": 0.000193905106998307, "loss": 11.6879, "step": 16036 }, { "epoch": 0.3356987356610567, "grad_norm": 0.26656028628349304, "learning_rate": 0.00019390435323188824, "loss": 11.673, "step": 16037 }, { "epoch": 0.3357196684250188, "grad_norm": 0.23876991868019104, "learning_rate": 0.0001939035994203278, "loss": 11.6715, "step": 16038 }, { "epoch": 0.33574060118898097, "grad_norm": 0.25062763690948486, "learning_rate": 0.00019390284556362598, "loss": 11.6749, "step": 16039 }, { "epoch": 0.33576153395294317, "grad_norm": 0.31283169984817505, "learning_rate": 0.00019390209166178323, "loss": 11.6552, "step": 16040 }, { "epoch": 0.3357824667169053, "grad_norm": 0.30716821551322937, "learning_rate": 0.00019390133771479982, "loss": 11.6767, "step": 16041 }, { "epoch": 0.33580339948086746, "grad_norm": 0.2538910210132599, "learning_rate": 0.00019390058372267618, "loss": 11.6693, "step": 16042 }, { "epoch": 0.3358243322448296, "grad_norm": 0.26266419887542725, "learning_rate": 0.00019389982968541264, "loss": 11.6716, "step": 16043 }, { "epoch": 0.33584526500879175, "grad_norm": 0.29149705171585083, "learning_rate": 0.00019389907560300954, "loss": 11.6908, "step": 16044 }, { "epoch": 0.3358661977727539, "grad_norm": 0.3582354485988617, "learning_rate": 0.0001938983214754673, "loss": 11.6956, "step": 16045 }, { "epoch": 0.3358871305367161, "grad_norm": 0.33995798230171204, "learning_rate": 0.00019389756730278627, "loss": 11.68, "step": 16046 }, { "epoch": 0.33590806330067824, "grad_norm": 0.2680727243423462, "learning_rate": 0.00019389681308496678, "loss": 11.6774, "step": 16047 }, { "epoch": 0.3359289960646404, "grad_norm": 0.2604110538959503, "learning_rate": 0.0001938960588220092, "loss": 11.6745, "step": 16048 }, { "epoch": 0.33594992882860253, "grad_norm": 0.3039074242115021, "learning_rate": 0.00019389530451391392, "loss": 11.6688, "step": 16049 }, { "epoch": 0.3359708615925647, "grad_norm": 0.302071213722229, "learning_rate": 0.0001938945501606813, "loss": 11.6862, "step": 16050 }, { "epoch": 0.3359917943565268, "grad_norm": 0.30483949184417725, "learning_rate": 0.00019389379576231167, "loss": 11.6791, "step": 16051 }, { "epoch": 0.33601272712048896, "grad_norm": 0.2516389787197113, "learning_rate": 0.00019389304131880541, "loss": 11.6639, "step": 16052 }, { "epoch": 0.33603365988445116, "grad_norm": 0.2486477494239807, "learning_rate": 0.00019389228683016287, "loss": 11.6739, "step": 16053 }, { "epoch": 0.3360545926484133, "grad_norm": 0.2625424265861511, "learning_rate": 0.00019389153229638442, "loss": 11.6768, "step": 16054 }, { "epoch": 0.33607552541237545, "grad_norm": 0.2511330842971802, "learning_rate": 0.00019389077771747045, "loss": 11.6728, "step": 16055 }, { "epoch": 0.3360964581763376, "grad_norm": 0.26889485120773315, "learning_rate": 0.00019389002309342133, "loss": 11.677, "step": 16056 }, { "epoch": 0.33611739094029974, "grad_norm": 0.22673411667346954, "learning_rate": 0.00019388926842423736, "loss": 11.6539, "step": 16057 }, { "epoch": 0.3361383237042619, "grad_norm": 0.3142753541469574, "learning_rate": 0.00019388851370991895, "loss": 11.6645, "step": 16058 }, { "epoch": 0.3361592564682241, "grad_norm": 0.32527533173561096, "learning_rate": 0.00019388775895046645, "loss": 11.6566, "step": 16059 }, { "epoch": 0.33618018923218623, "grad_norm": 0.2278107851743698, "learning_rate": 0.00019388700414588018, "loss": 11.6829, "step": 16060 }, { "epoch": 0.3362011219961484, "grad_norm": 0.30750468373298645, "learning_rate": 0.0001938862492961606, "loss": 11.6714, "step": 16061 }, { "epoch": 0.3362220547601105, "grad_norm": 0.26138338446617126, "learning_rate": 0.00019388549440130798, "loss": 11.6655, "step": 16062 }, { "epoch": 0.33624298752407267, "grad_norm": 0.2713371515274048, "learning_rate": 0.00019388473946132276, "loss": 11.6583, "step": 16063 }, { "epoch": 0.3362639202880348, "grad_norm": 0.28575223684310913, "learning_rate": 0.00019388398447620523, "loss": 11.6706, "step": 16064 }, { "epoch": 0.336284853051997, "grad_norm": 0.3062044084072113, "learning_rate": 0.00019388322944595583, "loss": 11.6892, "step": 16065 }, { "epoch": 0.33630578581595916, "grad_norm": 0.29495444893836975, "learning_rate": 0.00019388247437057484, "loss": 11.6744, "step": 16066 }, { "epoch": 0.3363267185799213, "grad_norm": 0.29410624504089355, "learning_rate": 0.00019388171925006266, "loss": 11.6919, "step": 16067 }, { "epoch": 0.33634765134388345, "grad_norm": 0.25881972908973694, "learning_rate": 0.0001938809640844197, "loss": 11.6734, "step": 16068 }, { "epoch": 0.3363685841078456, "grad_norm": 0.25864139199256897, "learning_rate": 0.00019388020887364622, "loss": 11.6773, "step": 16069 }, { "epoch": 0.33638951687180774, "grad_norm": 0.3048064112663269, "learning_rate": 0.00019387945361774267, "loss": 11.6835, "step": 16070 }, { "epoch": 0.3364104496357699, "grad_norm": 0.31416887044906616, "learning_rate": 0.0001938786983167094, "loss": 11.6779, "step": 16071 }, { "epoch": 0.3364313823997321, "grad_norm": 0.24649661779403687, "learning_rate": 0.00019387794297054677, "loss": 11.6809, "step": 16072 }, { "epoch": 0.3364523151636942, "grad_norm": 0.2570700943470001, "learning_rate": 0.0001938771875792551, "loss": 11.6589, "step": 16073 }, { "epoch": 0.33647324792765637, "grad_norm": 0.2585891783237457, "learning_rate": 0.00019387643214283478, "loss": 11.6648, "step": 16074 }, { "epoch": 0.3364941806916185, "grad_norm": 0.2774241864681244, "learning_rate": 0.00019387567666128618, "loss": 11.6602, "step": 16075 }, { "epoch": 0.33651511345558066, "grad_norm": 0.30601969361305237, "learning_rate": 0.0001938749211346097, "loss": 11.6602, "step": 16076 }, { "epoch": 0.3365360462195428, "grad_norm": 0.3947798013687134, "learning_rate": 0.00019387416556280562, "loss": 11.6691, "step": 16077 }, { "epoch": 0.336556978983505, "grad_norm": 0.21759000420570374, "learning_rate": 0.00019387340994587434, "loss": 11.6847, "step": 16078 }, { "epoch": 0.33657791174746715, "grad_norm": 0.2422817498445511, "learning_rate": 0.00019387265428381627, "loss": 11.6633, "step": 16079 }, { "epoch": 0.3365988445114293, "grad_norm": 0.3733188807964325, "learning_rate": 0.00019387189857663172, "loss": 11.6769, "step": 16080 }, { "epoch": 0.33661977727539144, "grad_norm": 0.2766168713569641, "learning_rate": 0.00019387114282432106, "loss": 11.6733, "step": 16081 }, { "epoch": 0.3366407100393536, "grad_norm": 0.32071739435195923, "learning_rate": 0.00019387038702688467, "loss": 11.6833, "step": 16082 }, { "epoch": 0.33666164280331573, "grad_norm": 0.28896063566207886, "learning_rate": 0.00019386963118432288, "loss": 11.6515, "step": 16083 }, { "epoch": 0.33668257556727793, "grad_norm": 0.32055288553237915, "learning_rate": 0.00019386887529663612, "loss": 11.6776, "step": 16084 }, { "epoch": 0.3367035083312401, "grad_norm": 0.24832560122013092, "learning_rate": 0.00019386811936382469, "loss": 11.6694, "step": 16085 }, { "epoch": 0.3367244410952022, "grad_norm": 0.27310407161712646, "learning_rate": 0.00019386736338588894, "loss": 11.6783, "step": 16086 }, { "epoch": 0.33674537385916437, "grad_norm": 0.2937227487564087, "learning_rate": 0.0001938666073628293, "loss": 11.667, "step": 16087 }, { "epoch": 0.3367663066231265, "grad_norm": 0.2566169798374176, "learning_rate": 0.00019386585129464612, "loss": 11.6836, "step": 16088 }, { "epoch": 0.33678723938708865, "grad_norm": 0.24488359689712524, "learning_rate": 0.0001938650951813397, "loss": 11.684, "step": 16089 }, { "epoch": 0.3368081721510508, "grad_norm": 0.3401803970336914, "learning_rate": 0.0001938643390229105, "loss": 11.67, "step": 16090 }, { "epoch": 0.336829104915013, "grad_norm": 0.2590148448944092, "learning_rate": 0.00019386358281935876, "loss": 11.6951, "step": 16091 }, { "epoch": 0.33685003767897514, "grad_norm": 0.30602529644966125, "learning_rate": 0.00019386282657068497, "loss": 11.6794, "step": 16092 }, { "epoch": 0.3368709704429373, "grad_norm": 0.31544172763824463, "learning_rate": 0.00019386207027688942, "loss": 11.6807, "step": 16093 }, { "epoch": 0.33689190320689943, "grad_norm": 0.2826894223690033, "learning_rate": 0.0001938613139379725, "loss": 11.6504, "step": 16094 }, { "epoch": 0.3369128359708616, "grad_norm": 0.32759761810302734, "learning_rate": 0.00019386055755393454, "loss": 11.6721, "step": 16095 }, { "epoch": 0.3369337687348237, "grad_norm": 0.26437410712242126, "learning_rate": 0.00019385980112477598, "loss": 11.6803, "step": 16096 }, { "epoch": 0.3369547014987859, "grad_norm": 0.24071232974529266, "learning_rate": 0.0001938590446504971, "loss": 11.6552, "step": 16097 }, { "epoch": 0.33697563426274807, "grad_norm": 0.3740476369857788, "learning_rate": 0.00019385828813109829, "loss": 11.6832, "step": 16098 }, { "epoch": 0.3369965670267102, "grad_norm": 0.28616228699684143, "learning_rate": 0.00019385753156657992, "loss": 11.69, "step": 16099 }, { "epoch": 0.33701749979067236, "grad_norm": 0.2818305194377899, "learning_rate": 0.00019385677495694238, "loss": 11.6904, "step": 16100 }, { "epoch": 0.3370384325546345, "grad_norm": 0.29992398619651794, "learning_rate": 0.00019385601830218597, "loss": 11.6657, "step": 16101 }, { "epoch": 0.33705936531859665, "grad_norm": 0.3128072917461395, "learning_rate": 0.00019385526160231112, "loss": 11.6689, "step": 16102 }, { "epoch": 0.3370802980825588, "grad_norm": 0.23401834070682526, "learning_rate": 0.00019385450485731815, "loss": 11.6775, "step": 16103 }, { "epoch": 0.337101230846521, "grad_norm": 0.24570538103580475, "learning_rate": 0.00019385374806720745, "loss": 11.6713, "step": 16104 }, { "epoch": 0.33712216361048314, "grad_norm": 0.3170951306819916, "learning_rate": 0.00019385299123197936, "loss": 11.6876, "step": 16105 }, { "epoch": 0.3371430963744453, "grad_norm": 0.2389523833990097, "learning_rate": 0.00019385223435163426, "loss": 11.6681, "step": 16106 }, { "epoch": 0.33716402913840743, "grad_norm": 0.2692157030105591, "learning_rate": 0.0001938514774261725, "loss": 11.6763, "step": 16107 }, { "epoch": 0.3371849619023696, "grad_norm": 0.24654173851013184, "learning_rate": 0.00019385072045559448, "loss": 11.6774, "step": 16108 }, { "epoch": 0.3372058946663317, "grad_norm": 0.27720192074775696, "learning_rate": 0.0001938499634399005, "loss": 11.6841, "step": 16109 }, { "epoch": 0.3372268274302939, "grad_norm": 0.2609979212284088, "learning_rate": 0.00019384920637909102, "loss": 11.6549, "step": 16110 }, { "epoch": 0.33724776019425606, "grad_norm": 0.29752111434936523, "learning_rate": 0.0001938484492731663, "loss": 11.6821, "step": 16111 }, { "epoch": 0.3372686929582182, "grad_norm": 0.3911050856113434, "learning_rate": 0.00019384769212212678, "loss": 11.6791, "step": 16112 }, { "epoch": 0.33728962572218035, "grad_norm": 0.29176685214042664, "learning_rate": 0.00019384693492597275, "loss": 11.6726, "step": 16113 }, { "epoch": 0.3373105584861425, "grad_norm": 0.25324681401252747, "learning_rate": 0.00019384617768470466, "loss": 11.6674, "step": 16114 }, { "epoch": 0.33733149125010464, "grad_norm": 0.26350200176239014, "learning_rate": 0.00019384542039832284, "loss": 11.6757, "step": 16115 }, { "epoch": 0.33735242401406684, "grad_norm": 0.23445317149162292, "learning_rate": 0.00019384466306682758, "loss": 11.6759, "step": 16116 }, { "epoch": 0.337373356778029, "grad_norm": 0.3733670711517334, "learning_rate": 0.00019384390569021935, "loss": 11.6839, "step": 16117 }, { "epoch": 0.33739428954199113, "grad_norm": 0.25739240646362305, "learning_rate": 0.0001938431482684985, "loss": 11.693, "step": 16118 }, { "epoch": 0.3374152223059533, "grad_norm": 0.315042108297348, "learning_rate": 0.00019384239080166534, "loss": 11.6417, "step": 16119 }, { "epoch": 0.3374361550699154, "grad_norm": 0.3985973596572876, "learning_rate": 0.00019384163328972024, "loss": 11.7008, "step": 16120 }, { "epoch": 0.33745708783387757, "grad_norm": 0.2950638234615326, "learning_rate": 0.00019384087573266363, "loss": 11.6698, "step": 16121 }, { "epoch": 0.3374780205978397, "grad_norm": 0.2559715211391449, "learning_rate": 0.0001938401181304958, "loss": 11.6811, "step": 16122 }, { "epoch": 0.3374989533618019, "grad_norm": 0.2755431532859802, "learning_rate": 0.00019383936048321715, "loss": 11.6837, "step": 16123 }, { "epoch": 0.33751988612576406, "grad_norm": 0.25901705026626587, "learning_rate": 0.00019383860279082807, "loss": 11.659, "step": 16124 }, { "epoch": 0.3375408188897262, "grad_norm": 0.2332252711057663, "learning_rate": 0.00019383784505332886, "loss": 11.6646, "step": 16125 }, { "epoch": 0.33756175165368835, "grad_norm": 0.2913053333759308, "learning_rate": 0.00019383708727071993, "loss": 11.6855, "step": 16126 }, { "epoch": 0.3375826844176505, "grad_norm": 0.32118120789527893, "learning_rate": 0.00019383632944300163, "loss": 11.6751, "step": 16127 }, { "epoch": 0.33760361718161264, "grad_norm": 0.25273367762565613, "learning_rate": 0.0001938355715701743, "loss": 11.6665, "step": 16128 }, { "epoch": 0.33762454994557484, "grad_norm": 0.2712266147136688, "learning_rate": 0.00019383481365223837, "loss": 11.6777, "step": 16129 }, { "epoch": 0.337645482709537, "grad_norm": 0.30800437927246094, "learning_rate": 0.00019383405568919416, "loss": 11.68, "step": 16130 }, { "epoch": 0.3376664154734991, "grad_norm": 0.2967412769794464, "learning_rate": 0.000193833297681042, "loss": 11.6813, "step": 16131 }, { "epoch": 0.33768734823746127, "grad_norm": 0.24216662347316742, "learning_rate": 0.00019383253962778233, "loss": 11.6662, "step": 16132 }, { "epoch": 0.3377082810014234, "grad_norm": 0.2824460566043854, "learning_rate": 0.00019383178152941545, "loss": 11.6724, "step": 16133 }, { "epoch": 0.33772921376538556, "grad_norm": 0.280326247215271, "learning_rate": 0.00019383102338594178, "loss": 11.6596, "step": 16134 }, { "epoch": 0.33775014652934776, "grad_norm": 0.2769678831100464, "learning_rate": 0.00019383026519736166, "loss": 11.6809, "step": 16135 }, { "epoch": 0.3377710792933099, "grad_norm": 0.26167649030685425, "learning_rate": 0.00019382950696367543, "loss": 11.6783, "step": 16136 }, { "epoch": 0.33779201205727205, "grad_norm": 0.3030230700969696, "learning_rate": 0.00019382874868488349, "loss": 11.6637, "step": 16137 }, { "epoch": 0.3378129448212342, "grad_norm": 0.34748807549476624, "learning_rate": 0.00019382799036098618, "loss": 11.6819, "step": 16138 }, { "epoch": 0.33783387758519634, "grad_norm": 0.293048232793808, "learning_rate": 0.00019382723199198388, "loss": 11.6758, "step": 16139 }, { "epoch": 0.3378548103491585, "grad_norm": 0.26205208897590637, "learning_rate": 0.00019382647357787694, "loss": 11.6547, "step": 16140 }, { "epoch": 0.33787574311312063, "grad_norm": 0.33539050817489624, "learning_rate": 0.00019382571511866574, "loss": 11.6713, "step": 16141 }, { "epoch": 0.33789667587708283, "grad_norm": 0.2843760848045349, "learning_rate": 0.00019382495661435066, "loss": 11.664, "step": 16142 }, { "epoch": 0.337917608641045, "grad_norm": 0.2818138599395752, "learning_rate": 0.00019382419806493202, "loss": 11.6824, "step": 16143 }, { "epoch": 0.3379385414050071, "grad_norm": 0.24269431829452515, "learning_rate": 0.0001938234394704102, "loss": 11.6734, "step": 16144 }, { "epoch": 0.33795947416896926, "grad_norm": 0.35702070593833923, "learning_rate": 0.0001938226808307856, "loss": 11.6886, "step": 16145 }, { "epoch": 0.3379804069329314, "grad_norm": 0.261539101600647, "learning_rate": 0.00019382192214605851, "loss": 11.6737, "step": 16146 }, { "epoch": 0.33800133969689355, "grad_norm": 0.25149932503700256, "learning_rate": 0.0001938211634162294, "loss": 11.6697, "step": 16147 }, { "epoch": 0.33802227246085575, "grad_norm": 0.29501041769981384, "learning_rate": 0.00019382040464129853, "loss": 11.666, "step": 16148 }, { "epoch": 0.3380432052248179, "grad_norm": 0.34389835596084595, "learning_rate": 0.00019381964582126633, "loss": 11.6655, "step": 16149 }, { "epoch": 0.33806413798878004, "grad_norm": 0.3335476815700531, "learning_rate": 0.00019381888695613314, "loss": 11.6586, "step": 16150 }, { "epoch": 0.3380850707527422, "grad_norm": 0.282400518655777, "learning_rate": 0.00019381812804589933, "loss": 11.6663, "step": 16151 }, { "epoch": 0.33810600351670433, "grad_norm": 0.27369028329849243, "learning_rate": 0.00019381736909056526, "loss": 11.678, "step": 16152 }, { "epoch": 0.3381269362806665, "grad_norm": 0.3600926995277405, "learning_rate": 0.00019381661009013136, "loss": 11.6651, "step": 16153 }, { "epoch": 0.3381478690446287, "grad_norm": 0.2713000178337097, "learning_rate": 0.00019381585104459784, "loss": 11.6816, "step": 16154 }, { "epoch": 0.3381688018085908, "grad_norm": 0.3116188943386078, "learning_rate": 0.00019381509195396524, "loss": 11.6604, "step": 16155 }, { "epoch": 0.33818973457255297, "grad_norm": 0.2671522796154022, "learning_rate": 0.0001938143328182338, "loss": 11.6776, "step": 16156 }, { "epoch": 0.3382106673365151, "grad_norm": 0.23333723843097687, "learning_rate": 0.00019381357363740394, "loss": 11.6754, "step": 16157 }, { "epoch": 0.33823160010047726, "grad_norm": 0.2481294870376587, "learning_rate": 0.00019381281441147602, "loss": 11.6754, "step": 16158 }, { "epoch": 0.3382525328644394, "grad_norm": 0.24859392642974854, "learning_rate": 0.00019381205514045039, "loss": 11.6704, "step": 16159 }, { "epoch": 0.33827346562840155, "grad_norm": 0.3488836884498596, "learning_rate": 0.00019381129582432744, "loss": 11.6641, "step": 16160 }, { "epoch": 0.33829439839236375, "grad_norm": 0.2631065845489502, "learning_rate": 0.00019381053646310754, "loss": 11.6615, "step": 16161 }, { "epoch": 0.3383153311563259, "grad_norm": 0.23170416057109833, "learning_rate": 0.000193809777056791, "loss": 11.6875, "step": 16162 }, { "epoch": 0.33833626392028804, "grad_norm": 0.3385789692401886, "learning_rate": 0.00019380901760537823, "loss": 11.6734, "step": 16163 }, { "epoch": 0.3383571966842502, "grad_norm": 0.3373786211013794, "learning_rate": 0.00019380825810886958, "loss": 11.6634, "step": 16164 }, { "epoch": 0.33837812944821233, "grad_norm": 0.3370153605937958, "learning_rate": 0.00019380749856726543, "loss": 11.6531, "step": 16165 }, { "epoch": 0.3383990622121745, "grad_norm": 0.22815507650375366, "learning_rate": 0.00019380673898056615, "loss": 11.6824, "step": 16166 }, { "epoch": 0.3384199949761367, "grad_norm": 0.2733933627605438, "learning_rate": 0.00019380597934877205, "loss": 11.6737, "step": 16167 }, { "epoch": 0.3384409277400988, "grad_norm": 0.24853649735450745, "learning_rate": 0.00019380521967188354, "loss": 11.6865, "step": 16168 }, { "epoch": 0.33846186050406096, "grad_norm": 0.28364649415016174, "learning_rate": 0.00019380445994990103, "loss": 11.6762, "step": 16169 }, { "epoch": 0.3384827932680231, "grad_norm": 0.2704371213912964, "learning_rate": 0.0001938037001828248, "loss": 11.6827, "step": 16170 }, { "epoch": 0.33850372603198525, "grad_norm": 0.23277004063129425, "learning_rate": 0.00019380294037065526, "loss": 11.6685, "step": 16171 }, { "epoch": 0.3385246587959474, "grad_norm": 0.2990517020225525, "learning_rate": 0.00019380218051339275, "loss": 11.6811, "step": 16172 }, { "epoch": 0.3385455915599096, "grad_norm": 0.3274298310279846, "learning_rate": 0.00019380142061103768, "loss": 11.6732, "step": 16173 }, { "epoch": 0.33856652432387174, "grad_norm": 0.32831713557243347, "learning_rate": 0.00019380066066359034, "loss": 11.7031, "step": 16174 }, { "epoch": 0.3385874570878339, "grad_norm": 0.2649492025375366, "learning_rate": 0.00019379990067105117, "loss": 11.673, "step": 16175 }, { "epoch": 0.33860838985179603, "grad_norm": 0.2513584494590759, "learning_rate": 0.00019379914063342052, "loss": 11.6675, "step": 16176 }, { "epoch": 0.3386293226157582, "grad_norm": 0.2689751088619232, "learning_rate": 0.00019379838055069874, "loss": 11.6735, "step": 16177 }, { "epoch": 0.3386502553797203, "grad_norm": 0.28330421447753906, "learning_rate": 0.00019379762042288622, "loss": 11.6676, "step": 16178 }, { "epoch": 0.33867118814368247, "grad_norm": 0.2601189613342285, "learning_rate": 0.00019379686024998324, "loss": 11.6697, "step": 16179 }, { "epoch": 0.33869212090764467, "grad_norm": 0.2545444965362549, "learning_rate": 0.0001937961000319903, "loss": 11.674, "step": 16180 }, { "epoch": 0.3387130536716068, "grad_norm": 0.29610082507133484, "learning_rate": 0.00019379533976890767, "loss": 11.6817, "step": 16181 }, { "epoch": 0.33873398643556896, "grad_norm": 0.2447909116744995, "learning_rate": 0.00019379457946073572, "loss": 11.67, "step": 16182 }, { "epoch": 0.3387549191995311, "grad_norm": 0.2736307680606842, "learning_rate": 0.00019379381910747485, "loss": 11.6873, "step": 16183 }, { "epoch": 0.33877585196349325, "grad_norm": 0.2467290461063385, "learning_rate": 0.00019379305870912542, "loss": 11.6698, "step": 16184 }, { "epoch": 0.3387967847274554, "grad_norm": 0.2778765857219696, "learning_rate": 0.00019379229826568776, "loss": 11.6748, "step": 16185 }, { "epoch": 0.3388177174914176, "grad_norm": 0.3244139552116394, "learning_rate": 0.00019379153777716228, "loss": 11.6774, "step": 16186 }, { "epoch": 0.33883865025537974, "grad_norm": 0.3375740051269531, "learning_rate": 0.00019379077724354936, "loss": 11.6774, "step": 16187 }, { "epoch": 0.3388595830193419, "grad_norm": 0.3029187023639679, "learning_rate": 0.0001937900166648493, "loss": 11.6775, "step": 16188 }, { "epoch": 0.338880515783304, "grad_norm": 0.2679896056652069, "learning_rate": 0.0001937892560410625, "loss": 11.6673, "step": 16189 }, { "epoch": 0.33890144854726617, "grad_norm": 0.25329121947288513, "learning_rate": 0.0001937884953721893, "loss": 11.6624, "step": 16190 }, { "epoch": 0.3389223813112283, "grad_norm": 0.23243515193462372, "learning_rate": 0.00019378773465823014, "loss": 11.6829, "step": 16191 }, { "epoch": 0.33894331407519046, "grad_norm": 0.2466219812631607, "learning_rate": 0.00019378697389918533, "loss": 11.6629, "step": 16192 }, { "epoch": 0.33896424683915266, "grad_norm": 0.29261720180511475, "learning_rate": 0.0001937862130950552, "loss": 11.6668, "step": 16193 }, { "epoch": 0.3389851796031148, "grad_norm": 0.3131009042263031, "learning_rate": 0.0001937854522458402, "loss": 11.6594, "step": 16194 }, { "epoch": 0.33900611236707695, "grad_norm": 0.2983933687210083, "learning_rate": 0.00019378469135154062, "loss": 11.6705, "step": 16195 }, { "epoch": 0.3390270451310391, "grad_norm": 0.27978867292404175, "learning_rate": 0.00019378393041215688, "loss": 11.6899, "step": 16196 }, { "epoch": 0.33904797789500124, "grad_norm": 0.27903690934181213, "learning_rate": 0.00019378316942768932, "loss": 11.676, "step": 16197 }, { "epoch": 0.3390689106589634, "grad_norm": 0.2457589954137802, "learning_rate": 0.00019378240839813831, "loss": 11.6829, "step": 16198 }, { "epoch": 0.3390898434229256, "grad_norm": 0.3513204753398895, "learning_rate": 0.00019378164732350423, "loss": 11.6766, "step": 16199 }, { "epoch": 0.33911077618688773, "grad_norm": 0.2909734845161438, "learning_rate": 0.00019378088620378741, "loss": 11.6405, "step": 16200 }, { "epoch": 0.3391317089508499, "grad_norm": 0.28212860226631165, "learning_rate": 0.00019378012503898826, "loss": 11.6622, "step": 16201 }, { "epoch": 0.339152641714812, "grad_norm": 0.30222341418266296, "learning_rate": 0.00019377936382910713, "loss": 11.6559, "step": 16202 }, { "epoch": 0.33917357447877416, "grad_norm": 0.2628610134124756, "learning_rate": 0.00019377860257414436, "loss": 11.6862, "step": 16203 }, { "epoch": 0.3391945072427363, "grad_norm": 0.319341242313385, "learning_rate": 0.00019377784127410033, "loss": 11.6532, "step": 16204 }, { "epoch": 0.3392154400066985, "grad_norm": 0.32412251830101013, "learning_rate": 0.00019377707992897545, "loss": 11.6846, "step": 16205 }, { "epoch": 0.33923637277066065, "grad_norm": 0.2977753281593323, "learning_rate": 0.00019377631853877004, "loss": 11.6711, "step": 16206 }, { "epoch": 0.3392573055346228, "grad_norm": 0.2671520709991455, "learning_rate": 0.00019377555710348442, "loss": 11.6696, "step": 16207 }, { "epoch": 0.33927823829858494, "grad_norm": 0.26141953468322754, "learning_rate": 0.00019377479562311906, "loss": 11.6583, "step": 16208 }, { "epoch": 0.3392991710625471, "grad_norm": 0.34288546442985535, "learning_rate": 0.00019377403409767427, "loss": 11.6649, "step": 16209 }, { "epoch": 0.33932010382650923, "grad_norm": 0.2357964813709259, "learning_rate": 0.00019377327252715043, "loss": 11.6599, "step": 16210 }, { "epoch": 0.3393410365904714, "grad_norm": 0.3154085874557495, "learning_rate": 0.00019377251091154786, "loss": 11.6752, "step": 16211 }, { "epoch": 0.3393619693544336, "grad_norm": 0.3302789330482483, "learning_rate": 0.000193771749250867, "loss": 11.6803, "step": 16212 }, { "epoch": 0.3393829021183957, "grad_norm": 0.27394071221351624, "learning_rate": 0.00019377098754510817, "loss": 11.6789, "step": 16213 }, { "epoch": 0.33940383488235787, "grad_norm": 0.2685302197933197, "learning_rate": 0.00019377022579427174, "loss": 11.6602, "step": 16214 }, { "epoch": 0.33942476764632, "grad_norm": 0.3396351635456085, "learning_rate": 0.0001937694639983581, "loss": 11.6724, "step": 16215 }, { "epoch": 0.33944570041028216, "grad_norm": 0.2850930094718933, "learning_rate": 0.0001937687021573676, "loss": 11.6746, "step": 16216 }, { "epoch": 0.3394666331742443, "grad_norm": 0.3513805866241455, "learning_rate": 0.0001937679402713006, "loss": 11.674, "step": 16217 }, { "epoch": 0.3394875659382065, "grad_norm": 0.2674286365509033, "learning_rate": 0.00019376717834015746, "loss": 11.6739, "step": 16218 }, { "epoch": 0.33950849870216865, "grad_norm": 0.26117029786109924, "learning_rate": 0.00019376641636393858, "loss": 11.6733, "step": 16219 }, { "epoch": 0.3395294314661308, "grad_norm": 0.3063511550426483, "learning_rate": 0.0001937656543426443, "loss": 11.6904, "step": 16220 }, { "epoch": 0.33955036423009294, "grad_norm": 0.32457777857780457, "learning_rate": 0.00019376489227627497, "loss": 11.6741, "step": 16221 }, { "epoch": 0.3395712969940551, "grad_norm": 0.2501375079154968, "learning_rate": 0.000193764130164831, "loss": 11.6625, "step": 16222 }, { "epoch": 0.33959222975801723, "grad_norm": 0.3072677552700043, "learning_rate": 0.00019376336800831271, "loss": 11.6987, "step": 16223 }, { "epoch": 0.33961316252197943, "grad_norm": 0.30224916338920593, "learning_rate": 0.0001937626058067205, "loss": 11.6775, "step": 16224 }, { "epoch": 0.3396340952859416, "grad_norm": 0.24290308356285095, "learning_rate": 0.00019376184356005474, "loss": 11.6568, "step": 16225 }, { "epoch": 0.3396550280499037, "grad_norm": 0.32108110189437866, "learning_rate": 0.00019376108126831576, "loss": 11.6653, "step": 16226 }, { "epoch": 0.33967596081386586, "grad_norm": 0.31113001704216003, "learning_rate": 0.000193760318931504, "loss": 11.6665, "step": 16227 }, { "epoch": 0.339696893577828, "grad_norm": 0.2905203104019165, "learning_rate": 0.0001937595565496197, "loss": 11.6691, "step": 16228 }, { "epoch": 0.33971782634179015, "grad_norm": 0.2847651541233063, "learning_rate": 0.00019375879412266336, "loss": 11.6628, "step": 16229 }, { "epoch": 0.3397387591057523, "grad_norm": 0.24889396131038666, "learning_rate": 0.00019375803165063528, "loss": 11.6806, "step": 16230 }, { "epoch": 0.3397596918697145, "grad_norm": 0.35322701930999756, "learning_rate": 0.00019375726913353582, "loss": 11.6683, "step": 16231 }, { "epoch": 0.33978062463367664, "grad_norm": 0.29550328850746155, "learning_rate": 0.00019375650657136535, "loss": 11.6966, "step": 16232 }, { "epoch": 0.3398015573976388, "grad_norm": 0.25985172390937805, "learning_rate": 0.00019375574396412426, "loss": 11.6603, "step": 16233 }, { "epoch": 0.33982249016160093, "grad_norm": 0.26632606983184814, "learning_rate": 0.0001937549813118129, "loss": 11.667, "step": 16234 }, { "epoch": 0.3398434229255631, "grad_norm": 0.2748858630657196, "learning_rate": 0.00019375421861443165, "loss": 11.6808, "step": 16235 }, { "epoch": 0.3398643556895252, "grad_norm": 0.26566362380981445, "learning_rate": 0.0001937534558719809, "loss": 11.6764, "step": 16236 }, { "epoch": 0.3398852884534874, "grad_norm": 0.2655895948410034, "learning_rate": 0.00019375269308446097, "loss": 11.6725, "step": 16237 }, { "epoch": 0.33990622121744957, "grad_norm": 0.2977735102176666, "learning_rate": 0.0001937519302518722, "loss": 11.6733, "step": 16238 }, { "epoch": 0.3399271539814117, "grad_norm": 0.34238407015800476, "learning_rate": 0.00019375116737421503, "loss": 11.6919, "step": 16239 }, { "epoch": 0.33994808674537386, "grad_norm": 0.23205822706222534, "learning_rate": 0.00019375040445148976, "loss": 11.6673, "step": 16240 }, { "epoch": 0.339969019509336, "grad_norm": 0.28417328000068665, "learning_rate": 0.00019374964148369684, "loss": 11.675, "step": 16241 }, { "epoch": 0.33998995227329815, "grad_norm": 0.26010459661483765, "learning_rate": 0.0001937488784708366, "loss": 11.6677, "step": 16242 }, { "epoch": 0.34001088503726035, "grad_norm": 0.3254680931568146, "learning_rate": 0.00019374811541290933, "loss": 11.6587, "step": 16243 }, { "epoch": 0.3400318178012225, "grad_norm": 0.24711106717586517, "learning_rate": 0.0001937473523099155, "loss": 11.701, "step": 16244 }, { "epoch": 0.34005275056518464, "grad_norm": 0.3018239736557007, "learning_rate": 0.00019374658916185546, "loss": 11.6844, "step": 16245 }, { "epoch": 0.3400736833291468, "grad_norm": 0.32243677973747253, "learning_rate": 0.00019374582596872952, "loss": 11.687, "step": 16246 }, { "epoch": 0.3400946160931089, "grad_norm": 0.35474225878715515, "learning_rate": 0.0001937450627305381, "loss": 11.6828, "step": 16247 }, { "epoch": 0.34011554885707107, "grad_norm": 0.3023838996887207, "learning_rate": 0.00019374429944728154, "loss": 11.6753, "step": 16248 }, { "epoch": 0.3401364816210332, "grad_norm": 0.2717426121234894, "learning_rate": 0.0001937435361189602, "loss": 11.6603, "step": 16249 }, { "epoch": 0.3401574143849954, "grad_norm": 0.2811192572116852, "learning_rate": 0.0001937427727455745, "loss": 11.6815, "step": 16250 }, { "epoch": 0.34017834714895756, "grad_norm": 0.2489369511604309, "learning_rate": 0.00019374200932712477, "loss": 11.6729, "step": 16251 }, { "epoch": 0.3401992799129197, "grad_norm": 0.2774256467819214, "learning_rate": 0.00019374124586361136, "loss": 11.6799, "step": 16252 }, { "epoch": 0.34022021267688185, "grad_norm": 0.2989460229873657, "learning_rate": 0.00019374048235503466, "loss": 11.6711, "step": 16253 }, { "epoch": 0.340241145440844, "grad_norm": 0.25003546476364136, "learning_rate": 0.00019373971880139502, "loss": 11.668, "step": 16254 }, { "epoch": 0.34026207820480614, "grad_norm": 0.3297734260559082, "learning_rate": 0.00019373895520269282, "loss": 11.6596, "step": 16255 }, { "epoch": 0.34028301096876834, "grad_norm": 0.33363765478134155, "learning_rate": 0.00019373819155892844, "loss": 11.6639, "step": 16256 }, { "epoch": 0.3403039437327305, "grad_norm": 0.2441415786743164, "learning_rate": 0.00019373742787010223, "loss": 11.6646, "step": 16257 }, { "epoch": 0.34032487649669263, "grad_norm": 0.299596905708313, "learning_rate": 0.00019373666413621457, "loss": 11.6852, "step": 16258 }, { "epoch": 0.3403458092606548, "grad_norm": 0.27280184626579285, "learning_rate": 0.0001937359003572658, "loss": 11.6656, "step": 16259 }, { "epoch": 0.3403667420246169, "grad_norm": 0.3240976929664612, "learning_rate": 0.0001937351365332563, "loss": 11.6875, "step": 16260 }, { "epoch": 0.34038767478857906, "grad_norm": 0.23447029292583466, "learning_rate": 0.00019373437266418644, "loss": 11.6794, "step": 16261 }, { "epoch": 0.34040860755254126, "grad_norm": 0.3253335654735565, "learning_rate": 0.0001937336087500566, "loss": 11.6863, "step": 16262 }, { "epoch": 0.3404295403165034, "grad_norm": 0.3183076083660126, "learning_rate": 0.00019373284479086712, "loss": 11.6685, "step": 16263 }, { "epoch": 0.34045047308046555, "grad_norm": 0.23649147152900696, "learning_rate": 0.0001937320807866184, "loss": 11.6738, "step": 16264 }, { "epoch": 0.3404714058444277, "grad_norm": 0.3087405860424042, "learning_rate": 0.0001937313167373108, "loss": 11.6876, "step": 16265 }, { "epoch": 0.34049233860838984, "grad_norm": 0.2607972323894501, "learning_rate": 0.00019373055264294468, "loss": 11.668, "step": 16266 }, { "epoch": 0.340513271372352, "grad_norm": 0.27558666467666626, "learning_rate": 0.00019372978850352036, "loss": 11.6784, "step": 16267 }, { "epoch": 0.34053420413631413, "grad_norm": 0.2911771237850189, "learning_rate": 0.0001937290243190383, "loss": 11.6616, "step": 16268 }, { "epoch": 0.34055513690027633, "grad_norm": 0.29230526089668274, "learning_rate": 0.0001937282600894988, "loss": 11.6792, "step": 16269 }, { "epoch": 0.3405760696642385, "grad_norm": 0.3194241225719452, "learning_rate": 0.00019372749581490225, "loss": 11.6826, "step": 16270 }, { "epoch": 0.3405970024282006, "grad_norm": 0.33793720602989197, "learning_rate": 0.000193726731495249, "loss": 11.6724, "step": 16271 }, { "epoch": 0.34061793519216277, "grad_norm": 0.34485551714897156, "learning_rate": 0.00019372596713053945, "loss": 11.6768, "step": 16272 }, { "epoch": 0.3406388679561249, "grad_norm": 0.25338104367256165, "learning_rate": 0.00019372520272077393, "loss": 11.6636, "step": 16273 }, { "epoch": 0.34065980072008706, "grad_norm": 0.35452800989151, "learning_rate": 0.00019372443826595284, "loss": 11.6683, "step": 16274 }, { "epoch": 0.34068073348404926, "grad_norm": 0.33319300413131714, "learning_rate": 0.00019372367376607654, "loss": 11.6864, "step": 16275 }, { "epoch": 0.3407016662480114, "grad_norm": 0.27494290471076965, "learning_rate": 0.0001937229092211454, "loss": 11.6642, "step": 16276 }, { "epoch": 0.34072259901197355, "grad_norm": 0.4185244143009186, "learning_rate": 0.00019372214463115976, "loss": 11.673, "step": 16277 }, { "epoch": 0.3407435317759357, "grad_norm": 0.2632211148738861, "learning_rate": 0.00019372137999612002, "loss": 11.6511, "step": 16278 }, { "epoch": 0.34076446453989784, "grad_norm": 0.28799569606781006, "learning_rate": 0.00019372061531602652, "loss": 11.6732, "step": 16279 }, { "epoch": 0.34078539730386, "grad_norm": 0.2700311541557312, "learning_rate": 0.00019371985059087965, "loss": 11.6835, "step": 16280 }, { "epoch": 0.3408063300678222, "grad_norm": 0.29262322187423706, "learning_rate": 0.00019371908582067974, "loss": 11.6739, "step": 16281 }, { "epoch": 0.34082726283178433, "grad_norm": 0.2944019138813019, "learning_rate": 0.00019371832100542724, "loss": 11.6675, "step": 16282 }, { "epoch": 0.3408481955957465, "grad_norm": 0.23664063215255737, "learning_rate": 0.00019371755614512243, "loss": 11.6683, "step": 16283 }, { "epoch": 0.3408691283597086, "grad_norm": 0.2681671380996704, "learning_rate": 0.00019371679123976574, "loss": 11.6692, "step": 16284 }, { "epoch": 0.34089006112367076, "grad_norm": 0.3277702033519745, "learning_rate": 0.0001937160262893575, "loss": 11.6724, "step": 16285 }, { "epoch": 0.3409109938876329, "grad_norm": 0.2988947629928589, "learning_rate": 0.00019371526129389807, "loss": 11.6579, "step": 16286 }, { "epoch": 0.34093192665159505, "grad_norm": 0.3292640447616577, "learning_rate": 0.00019371449625338785, "loss": 11.6787, "step": 16287 }, { "epoch": 0.34095285941555725, "grad_norm": 0.30765479803085327, "learning_rate": 0.0001937137311678272, "loss": 11.6746, "step": 16288 }, { "epoch": 0.3409737921795194, "grad_norm": 0.31914210319519043, "learning_rate": 0.00019371296603721647, "loss": 11.6879, "step": 16289 }, { "epoch": 0.34099472494348154, "grad_norm": 0.26567062735557556, "learning_rate": 0.00019371220086155603, "loss": 11.6645, "step": 16290 }, { "epoch": 0.3410156577074437, "grad_norm": 0.28816255927085876, "learning_rate": 0.00019371143564084626, "loss": 11.6638, "step": 16291 }, { "epoch": 0.34103659047140583, "grad_norm": 0.31883150339126587, "learning_rate": 0.00019371067037508754, "loss": 11.6887, "step": 16292 }, { "epoch": 0.341057523235368, "grad_norm": 0.31328561902046204, "learning_rate": 0.00019370990506428025, "loss": 11.6812, "step": 16293 }, { "epoch": 0.3410784559993302, "grad_norm": 0.26969343423843384, "learning_rate": 0.00019370913970842465, "loss": 11.6576, "step": 16294 }, { "epoch": 0.3410993887632923, "grad_norm": 0.2934074401855469, "learning_rate": 0.00019370837430752124, "loss": 11.6755, "step": 16295 }, { "epoch": 0.34112032152725447, "grad_norm": 0.27503547072410583, "learning_rate": 0.00019370760886157037, "loss": 11.6664, "step": 16296 }, { "epoch": 0.3411412542912166, "grad_norm": 0.24094128608703613, "learning_rate": 0.0001937068433705723, "loss": 11.6565, "step": 16297 }, { "epoch": 0.34116218705517876, "grad_norm": 0.24855567514896393, "learning_rate": 0.0001937060778345275, "loss": 11.681, "step": 16298 }, { "epoch": 0.3411831198191409, "grad_norm": 0.35775625705718994, "learning_rate": 0.00019370531225343634, "loss": 11.6777, "step": 16299 }, { "epoch": 0.34120405258310305, "grad_norm": 0.267648309469223, "learning_rate": 0.00019370454662729914, "loss": 11.6779, "step": 16300 }, { "epoch": 0.34122498534706525, "grad_norm": 0.2859273850917816, "learning_rate": 0.00019370378095611628, "loss": 11.6915, "step": 16301 }, { "epoch": 0.3412459181110274, "grad_norm": 0.27471816539764404, "learning_rate": 0.00019370301523988817, "loss": 11.6769, "step": 16302 }, { "epoch": 0.34126685087498954, "grad_norm": 0.26749294996261597, "learning_rate": 0.00019370224947861512, "loss": 11.6734, "step": 16303 }, { "epoch": 0.3412877836389517, "grad_norm": 0.3322632908821106, "learning_rate": 0.00019370148367229748, "loss": 11.6954, "step": 16304 }, { "epoch": 0.3413087164029138, "grad_norm": 0.34528642892837524, "learning_rate": 0.0001937007178209357, "loss": 11.6748, "step": 16305 }, { "epoch": 0.34132964916687597, "grad_norm": 0.26954028010368347, "learning_rate": 0.0001936999519245301, "loss": 11.663, "step": 16306 }, { "epoch": 0.34135058193083817, "grad_norm": 0.3400998115539551, "learning_rate": 0.00019369918598308104, "loss": 11.6711, "step": 16307 }, { "epoch": 0.3413715146948003, "grad_norm": 0.3169436454772949, "learning_rate": 0.00019369841999658893, "loss": 11.6967, "step": 16308 }, { "epoch": 0.34139244745876246, "grad_norm": 0.3809642493724823, "learning_rate": 0.0001936976539650541, "loss": 11.6613, "step": 16309 }, { "epoch": 0.3414133802227246, "grad_norm": 0.2941405773162842, "learning_rate": 0.00019369688788847692, "loss": 11.6717, "step": 16310 }, { "epoch": 0.34143431298668675, "grad_norm": 0.23178832232952118, "learning_rate": 0.00019369612176685778, "loss": 11.668, "step": 16311 }, { "epoch": 0.3414552457506489, "grad_norm": 0.28707629442214966, "learning_rate": 0.00019369535560019704, "loss": 11.6767, "step": 16312 }, { "epoch": 0.3414761785146111, "grad_norm": 0.25992339849472046, "learning_rate": 0.00019369458938849507, "loss": 11.6824, "step": 16313 }, { "epoch": 0.34149711127857324, "grad_norm": 0.33874666690826416, "learning_rate": 0.00019369382313175223, "loss": 11.6796, "step": 16314 }, { "epoch": 0.3415180440425354, "grad_norm": 0.3873659670352936, "learning_rate": 0.00019369305682996886, "loss": 11.682, "step": 16315 }, { "epoch": 0.34153897680649753, "grad_norm": 0.3092590868473053, "learning_rate": 0.00019369229048314538, "loss": 11.657, "step": 16316 }, { "epoch": 0.3415599095704597, "grad_norm": 0.3341062664985657, "learning_rate": 0.00019369152409128219, "loss": 11.698, "step": 16317 }, { "epoch": 0.3415808423344218, "grad_norm": 0.3698572516441345, "learning_rate": 0.00019369075765437953, "loss": 11.6703, "step": 16318 }, { "epoch": 0.34160177509838396, "grad_norm": 0.2768072783946991, "learning_rate": 0.00019368999117243786, "loss": 11.6676, "step": 16319 }, { "epoch": 0.34162270786234616, "grad_norm": 0.2864672839641571, "learning_rate": 0.00019368922464545755, "loss": 11.6773, "step": 16320 }, { "epoch": 0.3416436406263083, "grad_norm": 0.2760576009750366, "learning_rate": 0.00019368845807343895, "loss": 11.6765, "step": 16321 }, { "epoch": 0.34166457339027045, "grad_norm": 0.2968007028102875, "learning_rate": 0.00019368769145638247, "loss": 11.6769, "step": 16322 }, { "epoch": 0.3416855061542326, "grad_norm": 0.26255059242248535, "learning_rate": 0.0001936869247942884, "loss": 11.6575, "step": 16323 }, { "epoch": 0.34170643891819474, "grad_norm": 0.3006095290184021, "learning_rate": 0.00019368615808715712, "loss": 11.6832, "step": 16324 }, { "epoch": 0.3417273716821569, "grad_norm": 0.23866714537143707, "learning_rate": 0.00019368539133498907, "loss": 11.6859, "step": 16325 }, { "epoch": 0.3417483044461191, "grad_norm": 0.2925117611885071, "learning_rate": 0.00019368462453778454, "loss": 11.6921, "step": 16326 }, { "epoch": 0.34176923721008123, "grad_norm": 0.2754358947277069, "learning_rate": 0.00019368385769554395, "loss": 11.6784, "step": 16327 }, { "epoch": 0.3417901699740434, "grad_norm": 0.32858598232269287, "learning_rate": 0.00019368309080826767, "loss": 11.6715, "step": 16328 }, { "epoch": 0.3418111027380055, "grad_norm": 0.2872031629085541, "learning_rate": 0.000193682323875956, "loss": 11.6782, "step": 16329 }, { "epoch": 0.34183203550196767, "grad_norm": 0.2766225039958954, "learning_rate": 0.00019368155689860944, "loss": 11.6641, "step": 16330 }, { "epoch": 0.3418529682659298, "grad_norm": 0.32986342906951904, "learning_rate": 0.00019368078987622823, "loss": 11.6819, "step": 16331 }, { "epoch": 0.341873901029892, "grad_norm": 0.2813011407852173, "learning_rate": 0.00019368002280881278, "loss": 11.6752, "step": 16332 }, { "epoch": 0.34189483379385416, "grad_norm": 0.25758302211761475, "learning_rate": 0.00019367925569636348, "loss": 11.6771, "step": 16333 }, { "epoch": 0.3419157665578163, "grad_norm": 0.3002636134624481, "learning_rate": 0.00019367848853888065, "loss": 11.6847, "step": 16334 }, { "epoch": 0.34193669932177845, "grad_norm": 0.31310293078422546, "learning_rate": 0.00019367772133636475, "loss": 11.684, "step": 16335 }, { "epoch": 0.3419576320857406, "grad_norm": 0.2665090262889862, "learning_rate": 0.00019367695408881606, "loss": 11.6845, "step": 16336 }, { "epoch": 0.34197856484970274, "grad_norm": 0.26107099652290344, "learning_rate": 0.000193676186796235, "loss": 11.6858, "step": 16337 }, { "epoch": 0.3419994976136649, "grad_norm": 0.26818403601646423, "learning_rate": 0.0001936754194586219, "loss": 11.6828, "step": 16338 }, { "epoch": 0.3420204303776271, "grad_norm": 0.2635054290294647, "learning_rate": 0.00019367465207597716, "loss": 11.6796, "step": 16339 }, { "epoch": 0.3420413631415892, "grad_norm": 0.272776335477829, "learning_rate": 0.00019367388464830115, "loss": 11.6661, "step": 16340 }, { "epoch": 0.3420622959055514, "grad_norm": 0.29038408398628235, "learning_rate": 0.0001936731171755942, "loss": 11.6727, "step": 16341 }, { "epoch": 0.3420832286695135, "grad_norm": 0.2471991628408432, "learning_rate": 0.0001936723496578567, "loss": 11.6688, "step": 16342 }, { "epoch": 0.34210416143347566, "grad_norm": 0.333270400762558, "learning_rate": 0.00019367158209508903, "loss": 11.691, "step": 16343 }, { "epoch": 0.3421250941974378, "grad_norm": 0.34237122535705566, "learning_rate": 0.00019367081448729158, "loss": 11.6798, "step": 16344 }, { "epoch": 0.3421460269614, "grad_norm": 0.26861342787742615, "learning_rate": 0.00019367004683446467, "loss": 11.6682, "step": 16345 }, { "epoch": 0.34216695972536215, "grad_norm": 0.24485145509243011, "learning_rate": 0.00019366927913660868, "loss": 11.6674, "step": 16346 }, { "epoch": 0.3421878924893243, "grad_norm": 0.29300788044929504, "learning_rate": 0.00019366851139372403, "loss": 11.6912, "step": 16347 }, { "epoch": 0.34220882525328644, "grad_norm": 0.2773926258087158, "learning_rate": 0.000193667743605811, "loss": 11.6794, "step": 16348 }, { "epoch": 0.3422297580172486, "grad_norm": 0.26576921343803406, "learning_rate": 0.00019366697577287006, "loss": 11.6715, "step": 16349 }, { "epoch": 0.34225069078121073, "grad_norm": 0.2698068618774414, "learning_rate": 0.0001936662078949015, "loss": 11.6758, "step": 16350 }, { "epoch": 0.34227162354517293, "grad_norm": 0.2336575984954834, "learning_rate": 0.0001936654399719057, "loss": 11.6858, "step": 16351 }, { "epoch": 0.3422925563091351, "grad_norm": 0.24224497377872467, "learning_rate": 0.00019366467200388306, "loss": 11.6864, "step": 16352 }, { "epoch": 0.3423134890730972, "grad_norm": 0.2628777325153351, "learning_rate": 0.00019366390399083394, "loss": 11.6662, "step": 16353 }, { "epoch": 0.34233442183705937, "grad_norm": 0.2900276184082031, "learning_rate": 0.0001936631359327587, "loss": 11.6759, "step": 16354 }, { "epoch": 0.3423553546010215, "grad_norm": 0.2341100126504898, "learning_rate": 0.00019366236782965773, "loss": 11.6801, "step": 16355 }, { "epoch": 0.34237628736498366, "grad_norm": 0.3302159309387207, "learning_rate": 0.00019366159968153138, "loss": 11.6804, "step": 16356 }, { "epoch": 0.3423972201289458, "grad_norm": 0.31290578842163086, "learning_rate": 0.00019366083148838, "loss": 11.6803, "step": 16357 }, { "epoch": 0.342418152892908, "grad_norm": 0.24308258295059204, "learning_rate": 0.000193660063250204, "loss": 11.6662, "step": 16358 }, { "epoch": 0.34243908565687015, "grad_norm": 0.23990710079669952, "learning_rate": 0.0001936592949670037, "loss": 11.6803, "step": 16359 }, { "epoch": 0.3424600184208323, "grad_norm": 0.29754218459129333, "learning_rate": 0.00019365852663877955, "loss": 11.6861, "step": 16360 }, { "epoch": 0.34248095118479444, "grad_norm": 0.32690754532814026, "learning_rate": 0.00019365775826553182, "loss": 11.6706, "step": 16361 }, { "epoch": 0.3425018839487566, "grad_norm": 0.25146427750587463, "learning_rate": 0.00019365698984726096, "loss": 11.6713, "step": 16362 }, { "epoch": 0.3425228167127187, "grad_norm": 0.2801300287246704, "learning_rate": 0.00019365622138396731, "loss": 11.6909, "step": 16363 }, { "epoch": 0.3425437494766809, "grad_norm": 0.29790621995925903, "learning_rate": 0.00019365545287565122, "loss": 11.6686, "step": 16364 }, { "epoch": 0.34256468224064307, "grad_norm": 0.3173867464065552, "learning_rate": 0.0001936546843223131, "loss": 11.6877, "step": 16365 }, { "epoch": 0.3425856150046052, "grad_norm": 0.33941981196403503, "learning_rate": 0.00019365391572395327, "loss": 11.6985, "step": 16366 }, { "epoch": 0.34260654776856736, "grad_norm": 0.24631591141223907, "learning_rate": 0.00019365314708057215, "loss": 11.6745, "step": 16367 }, { "epoch": 0.3426274805325295, "grad_norm": 0.23456518352031708, "learning_rate": 0.00019365237839217005, "loss": 11.6868, "step": 16368 }, { "epoch": 0.34264841329649165, "grad_norm": 0.2586410939693451, "learning_rate": 0.0001936516096587474, "loss": 11.6873, "step": 16369 }, { "epoch": 0.34266934606045385, "grad_norm": 0.3047213554382324, "learning_rate": 0.00019365084088030456, "loss": 11.6824, "step": 16370 }, { "epoch": 0.342690278824416, "grad_norm": 0.28824925422668457, "learning_rate": 0.00019365007205684185, "loss": 11.7004, "step": 16371 }, { "epoch": 0.34271121158837814, "grad_norm": 0.3722909688949585, "learning_rate": 0.00019364930318835967, "loss": 11.6632, "step": 16372 }, { "epoch": 0.3427321443523403, "grad_norm": 0.2968967854976654, "learning_rate": 0.00019364853427485842, "loss": 11.6758, "step": 16373 }, { "epoch": 0.34275307711630243, "grad_norm": 0.2663840353488922, "learning_rate": 0.00019364776531633843, "loss": 11.6882, "step": 16374 }, { "epoch": 0.3427740098802646, "grad_norm": 0.29111596941947937, "learning_rate": 0.0001936469963128001, "loss": 11.6766, "step": 16375 }, { "epoch": 0.3427949426442267, "grad_norm": 0.24880166351795197, "learning_rate": 0.00019364622726424378, "loss": 11.6793, "step": 16376 }, { "epoch": 0.3428158754081889, "grad_norm": 0.2545192837715149, "learning_rate": 0.00019364545817066983, "loss": 11.6845, "step": 16377 }, { "epoch": 0.34283680817215106, "grad_norm": 0.26185232400894165, "learning_rate": 0.00019364468903207863, "loss": 11.6621, "step": 16378 }, { "epoch": 0.3428577409361132, "grad_norm": 0.27960336208343506, "learning_rate": 0.00019364391984847058, "loss": 11.6606, "step": 16379 }, { "epoch": 0.34287867370007535, "grad_norm": 0.20195236802101135, "learning_rate": 0.00019364315061984596, "loss": 11.6742, "step": 16380 }, { "epoch": 0.3428996064640375, "grad_norm": 0.27765896916389465, "learning_rate": 0.00019364238134620524, "loss": 11.6681, "step": 16381 }, { "epoch": 0.34292053922799964, "grad_norm": 0.26367536187171936, "learning_rate": 0.00019364161202754874, "loss": 11.6615, "step": 16382 }, { "epoch": 0.34294147199196184, "grad_norm": 0.30605602264404297, "learning_rate": 0.00019364084266387685, "loss": 11.669, "step": 16383 }, { "epoch": 0.342962404755924, "grad_norm": 0.27315476536750793, "learning_rate": 0.00019364007325518994, "loss": 11.6662, "step": 16384 }, { "epoch": 0.34298333751988613, "grad_norm": 0.27095165848731995, "learning_rate": 0.00019363930380148837, "loss": 11.6851, "step": 16385 }, { "epoch": 0.3430042702838483, "grad_norm": 0.2505572438240051, "learning_rate": 0.00019363853430277248, "loss": 11.6709, "step": 16386 }, { "epoch": 0.3430252030478104, "grad_norm": 0.22811676561832428, "learning_rate": 0.0001936377647590427, "loss": 11.6714, "step": 16387 }, { "epoch": 0.34304613581177257, "grad_norm": 0.3106546103954315, "learning_rate": 0.00019363699517029934, "loss": 11.6841, "step": 16388 }, { "epoch": 0.3430670685757347, "grad_norm": 0.4047892689704895, "learning_rate": 0.00019363622553654283, "loss": 11.6951, "step": 16389 }, { "epoch": 0.3430880013396969, "grad_norm": 0.30329519510269165, "learning_rate": 0.00019363545585777348, "loss": 11.6657, "step": 16390 }, { "epoch": 0.34310893410365906, "grad_norm": 0.30086854100227356, "learning_rate": 0.00019363468613399173, "loss": 11.6758, "step": 16391 }, { "epoch": 0.3431298668676212, "grad_norm": 0.2322869598865509, "learning_rate": 0.00019363391636519788, "loss": 11.6691, "step": 16392 }, { "epoch": 0.34315079963158335, "grad_norm": 0.2715480327606201, "learning_rate": 0.00019363314655139233, "loss": 11.6577, "step": 16393 }, { "epoch": 0.3431717323955455, "grad_norm": 0.2634119987487793, "learning_rate": 0.00019363237669257547, "loss": 11.6688, "step": 16394 }, { "epoch": 0.34319266515950764, "grad_norm": 0.2429312914609909, "learning_rate": 0.00019363160678874763, "loss": 11.6714, "step": 16395 }, { "epoch": 0.34321359792346984, "grad_norm": 0.28983965516090393, "learning_rate": 0.00019363083683990922, "loss": 11.6835, "step": 16396 }, { "epoch": 0.343234530687432, "grad_norm": 0.2504403591156006, "learning_rate": 0.0001936300668460606, "loss": 11.6565, "step": 16397 }, { "epoch": 0.3432554634513941, "grad_norm": 0.3135169744491577, "learning_rate": 0.00019362929680720207, "loss": 11.6732, "step": 16398 }, { "epoch": 0.34327639621535627, "grad_norm": 0.3052734136581421, "learning_rate": 0.0001936285267233341, "loss": 11.6912, "step": 16399 }, { "epoch": 0.3432973289793184, "grad_norm": 0.28936073184013367, "learning_rate": 0.00019362775659445702, "loss": 11.6744, "step": 16400 }, { "epoch": 0.34331826174328056, "grad_norm": 0.2548259496688843, "learning_rate": 0.0001936269864205712, "loss": 11.6584, "step": 16401 }, { "epoch": 0.34333919450724276, "grad_norm": 0.228358656167984, "learning_rate": 0.000193626216201677, "loss": 11.6734, "step": 16402 }, { "epoch": 0.3433601272712049, "grad_norm": 0.307979017496109, "learning_rate": 0.00019362544593777483, "loss": 11.6701, "step": 16403 }, { "epoch": 0.34338106003516705, "grad_norm": 0.2707976698875427, "learning_rate": 0.00019362467562886502, "loss": 11.6617, "step": 16404 }, { "epoch": 0.3434019927991292, "grad_norm": 0.8989408016204834, "learning_rate": 0.00019362390527494797, "loss": 11.7098, "step": 16405 }, { "epoch": 0.34342292556309134, "grad_norm": 0.3378587067127228, "learning_rate": 0.000193623134876024, "loss": 11.6747, "step": 16406 }, { "epoch": 0.3434438583270535, "grad_norm": 0.3305457532405853, "learning_rate": 0.00019362236443209348, "loss": 11.6858, "step": 16407 }, { "epoch": 0.34346479109101563, "grad_norm": 0.22527654469013214, "learning_rate": 0.00019362159394315689, "loss": 11.6897, "step": 16408 }, { "epoch": 0.34348572385497783, "grad_norm": 0.25894850492477417, "learning_rate": 0.0001936208234092145, "loss": 11.6746, "step": 16409 }, { "epoch": 0.34350665661894, "grad_norm": 0.2481926530599594, "learning_rate": 0.0001936200528302667, "loss": 11.6714, "step": 16410 }, { "epoch": 0.3435275893829021, "grad_norm": 0.2946898937225342, "learning_rate": 0.00019361928220631384, "loss": 11.6655, "step": 16411 }, { "epoch": 0.34354852214686427, "grad_norm": 0.24743081629276276, "learning_rate": 0.00019361851153735634, "loss": 11.6746, "step": 16412 }, { "epoch": 0.3435694549108264, "grad_norm": 0.29373446106910706, "learning_rate": 0.00019361774082339453, "loss": 11.6757, "step": 16413 }, { "epoch": 0.34359038767478856, "grad_norm": 0.24176271259784698, "learning_rate": 0.0001936169700644288, "loss": 11.6661, "step": 16414 }, { "epoch": 0.34361132043875076, "grad_norm": 0.3251970410346985, "learning_rate": 0.00019361619926045953, "loss": 11.6917, "step": 16415 }, { "epoch": 0.3436322532027129, "grad_norm": 0.26942217350006104, "learning_rate": 0.00019361542841148707, "loss": 11.6558, "step": 16416 }, { "epoch": 0.34365318596667505, "grad_norm": 0.2491481900215149, "learning_rate": 0.0001936146575175118, "loss": 11.6798, "step": 16417 }, { "epoch": 0.3436741187306372, "grad_norm": 0.2943958640098572, "learning_rate": 0.0001936138865785341, "loss": 11.6859, "step": 16418 }, { "epoch": 0.34369505149459934, "grad_norm": 0.24890945851802826, "learning_rate": 0.00019361311559455428, "loss": 11.6738, "step": 16419 }, { "epoch": 0.3437159842585615, "grad_norm": 0.22604766488075256, "learning_rate": 0.0001936123445655728, "loss": 11.6727, "step": 16420 }, { "epoch": 0.3437369170225237, "grad_norm": 0.2838890254497528, "learning_rate": 0.00019361157349159, "loss": 11.6686, "step": 16421 }, { "epoch": 0.3437578497864858, "grad_norm": 0.35917961597442627, "learning_rate": 0.0001936108023726062, "loss": 11.6822, "step": 16422 }, { "epoch": 0.34377878255044797, "grad_norm": 0.31969568133354187, "learning_rate": 0.00019361003120862185, "loss": 11.6673, "step": 16423 }, { "epoch": 0.3437997153144101, "grad_norm": 0.3450329303741455, "learning_rate": 0.00019360925999963727, "loss": 11.6748, "step": 16424 }, { "epoch": 0.34382064807837226, "grad_norm": 0.37733468413352966, "learning_rate": 0.00019360848874565282, "loss": 11.6714, "step": 16425 }, { "epoch": 0.3438415808423344, "grad_norm": 0.28238528966903687, "learning_rate": 0.00019360771744666893, "loss": 11.6654, "step": 16426 }, { "epoch": 0.34386251360629655, "grad_norm": 0.38076186180114746, "learning_rate": 0.00019360694610268592, "loss": 11.6756, "step": 16427 }, { "epoch": 0.34388344637025875, "grad_norm": 0.31791433691978455, "learning_rate": 0.00019360617471370417, "loss": 11.6734, "step": 16428 }, { "epoch": 0.3439043791342209, "grad_norm": 0.2711796760559082, "learning_rate": 0.00019360540327972406, "loss": 11.6697, "step": 16429 }, { "epoch": 0.34392531189818304, "grad_norm": 0.25148484110832214, "learning_rate": 0.00019360463180074591, "loss": 11.682, "step": 16430 }, { "epoch": 0.3439462446621452, "grad_norm": 0.2811480462551117, "learning_rate": 0.00019360386027677019, "loss": 11.6587, "step": 16431 }, { "epoch": 0.34396717742610733, "grad_norm": 0.2886736989021301, "learning_rate": 0.00019360308870779722, "loss": 11.6593, "step": 16432 }, { "epoch": 0.3439881101900695, "grad_norm": 0.2582201063632965, "learning_rate": 0.00019360231709382737, "loss": 11.6796, "step": 16433 }, { "epoch": 0.3440090429540317, "grad_norm": 0.3305763304233551, "learning_rate": 0.000193601545434861, "loss": 11.6804, "step": 16434 }, { "epoch": 0.3440299757179938, "grad_norm": 0.26024574041366577, "learning_rate": 0.0001936007737308985, "loss": 11.6691, "step": 16435 }, { "epoch": 0.34405090848195596, "grad_norm": 0.20510149002075195, "learning_rate": 0.0001936000019819402, "loss": 11.6738, "step": 16436 }, { "epoch": 0.3440718412459181, "grad_norm": 0.28400754928588867, "learning_rate": 0.00019359923018798654, "loss": 11.6684, "step": 16437 }, { "epoch": 0.34409277400988025, "grad_norm": 0.2831646203994751, "learning_rate": 0.00019359845834903783, "loss": 11.6839, "step": 16438 }, { "epoch": 0.3441137067738424, "grad_norm": 0.27878841757774353, "learning_rate": 0.00019359768646509448, "loss": 11.6774, "step": 16439 }, { "epoch": 0.3441346395378046, "grad_norm": 0.27947998046875, "learning_rate": 0.00019359691453615685, "loss": 11.6768, "step": 16440 }, { "epoch": 0.34415557230176674, "grad_norm": 0.3082318603992462, "learning_rate": 0.0001935961425622253, "loss": 11.6914, "step": 16441 }, { "epoch": 0.3441765050657289, "grad_norm": 0.3056885302066803, "learning_rate": 0.00019359537054330022, "loss": 11.6692, "step": 16442 }, { "epoch": 0.34419743782969103, "grad_norm": 0.29205504059791565, "learning_rate": 0.00019359459847938194, "loss": 11.6834, "step": 16443 }, { "epoch": 0.3442183705936532, "grad_norm": 0.27220243215560913, "learning_rate": 0.0001935938263704709, "loss": 11.6604, "step": 16444 }, { "epoch": 0.3442393033576153, "grad_norm": 0.2923957109451294, "learning_rate": 0.0001935930542165674, "loss": 11.6709, "step": 16445 }, { "epoch": 0.34426023612157747, "grad_norm": 0.27411088347435, "learning_rate": 0.00019359228201767186, "loss": 11.6719, "step": 16446 }, { "epoch": 0.34428116888553967, "grad_norm": 0.2556041181087494, "learning_rate": 0.0001935915097737846, "loss": 11.6854, "step": 16447 }, { "epoch": 0.3443021016495018, "grad_norm": 0.2935214340686798, "learning_rate": 0.00019359073748490606, "loss": 11.6579, "step": 16448 }, { "epoch": 0.34432303441346396, "grad_norm": 0.31767165660858154, "learning_rate": 0.00019358996515103657, "loss": 11.6957, "step": 16449 }, { "epoch": 0.3443439671774261, "grad_norm": 0.21639308333396912, "learning_rate": 0.00019358919277217654, "loss": 11.679, "step": 16450 }, { "epoch": 0.34436489994138825, "grad_norm": 0.2780183255672455, "learning_rate": 0.00019358842034832626, "loss": 11.6857, "step": 16451 }, { "epoch": 0.3443858327053504, "grad_norm": 0.34144601225852966, "learning_rate": 0.00019358764787948617, "loss": 11.68, "step": 16452 }, { "epoch": 0.3444067654693126, "grad_norm": 0.31189265847206116, "learning_rate": 0.00019358687536565663, "loss": 11.6781, "step": 16453 }, { "epoch": 0.34442769823327474, "grad_norm": 0.2769184708595276, "learning_rate": 0.000193586102806838, "loss": 11.672, "step": 16454 }, { "epoch": 0.3444486309972369, "grad_norm": 0.29630711674690247, "learning_rate": 0.00019358533020303063, "loss": 11.6694, "step": 16455 }, { "epoch": 0.344469563761199, "grad_norm": 0.28090786933898926, "learning_rate": 0.00019358455755423494, "loss": 11.6717, "step": 16456 }, { "epoch": 0.34449049652516117, "grad_norm": 0.3188472390174866, "learning_rate": 0.00019358378486045128, "loss": 11.6747, "step": 16457 }, { "epoch": 0.3445114292891233, "grad_norm": 0.26010745763778687, "learning_rate": 0.00019358301212168, "loss": 11.6757, "step": 16458 }, { "epoch": 0.3445323620530855, "grad_norm": 0.3465397357940674, "learning_rate": 0.00019358223933792153, "loss": 11.6699, "step": 16459 }, { "epoch": 0.34455329481704766, "grad_norm": 0.33547505736351013, "learning_rate": 0.00019358146650917617, "loss": 11.6611, "step": 16460 }, { "epoch": 0.3445742275810098, "grad_norm": 0.23955625295639038, "learning_rate": 0.00019358069363544433, "loss": 11.6642, "step": 16461 }, { "epoch": 0.34459516034497195, "grad_norm": 0.3388494551181793, "learning_rate": 0.00019357992071672637, "loss": 11.6782, "step": 16462 }, { "epoch": 0.3446160931089341, "grad_norm": 0.3972828984260559, "learning_rate": 0.0001935791477530227, "loss": 11.6843, "step": 16463 }, { "epoch": 0.34463702587289624, "grad_norm": 0.2687375545501709, "learning_rate": 0.0001935783747443336, "loss": 11.6732, "step": 16464 }, { "epoch": 0.3446579586368584, "grad_norm": 0.30933135747909546, "learning_rate": 0.00019357760169065954, "loss": 11.6764, "step": 16465 }, { "epoch": 0.3446788914008206, "grad_norm": 0.28025153279304504, "learning_rate": 0.00019357682859200086, "loss": 11.6754, "step": 16466 }, { "epoch": 0.34469982416478273, "grad_norm": 0.3278101980686188, "learning_rate": 0.0001935760554483579, "loss": 11.6738, "step": 16467 }, { "epoch": 0.3447207569287449, "grad_norm": 0.30142146348953247, "learning_rate": 0.00019357528225973105, "loss": 11.6857, "step": 16468 }, { "epoch": 0.344741689692707, "grad_norm": 0.29711583256721497, "learning_rate": 0.0001935745090261207, "loss": 11.6838, "step": 16469 }, { "epoch": 0.34476262245666917, "grad_norm": 0.38919344544410706, "learning_rate": 0.0001935737357475272, "loss": 11.671, "step": 16470 }, { "epoch": 0.3447835552206313, "grad_norm": 0.40700602531433105, "learning_rate": 0.00019357296242395093, "loss": 11.6885, "step": 16471 }, { "epoch": 0.3448044879845935, "grad_norm": 0.27511468529701233, "learning_rate": 0.00019357218905539228, "loss": 11.6757, "step": 16472 }, { "epoch": 0.34482542074855566, "grad_norm": 0.2913696765899658, "learning_rate": 0.0001935714156418516, "loss": 11.6776, "step": 16473 }, { "epoch": 0.3448463535125178, "grad_norm": 0.27621525526046753, "learning_rate": 0.00019357064218332922, "loss": 11.6728, "step": 16474 }, { "epoch": 0.34486728627647995, "grad_norm": 0.2643006145954132, "learning_rate": 0.0001935698686798256, "loss": 11.6764, "step": 16475 }, { "epoch": 0.3448882190404421, "grad_norm": 0.23485292494297028, "learning_rate": 0.00019356909513134108, "loss": 11.6733, "step": 16476 }, { "epoch": 0.34490915180440423, "grad_norm": 0.3479172885417938, "learning_rate": 0.000193568321537876, "loss": 11.6862, "step": 16477 }, { "epoch": 0.3449300845683664, "grad_norm": 0.3486018776893616, "learning_rate": 0.00019356754789943075, "loss": 11.68, "step": 16478 }, { "epoch": 0.3449510173323286, "grad_norm": 0.27165350317955017, "learning_rate": 0.0001935667742160057, "loss": 11.6667, "step": 16479 }, { "epoch": 0.3449719500962907, "grad_norm": 0.3585233688354492, "learning_rate": 0.00019356600048760125, "loss": 11.6886, "step": 16480 }, { "epoch": 0.34499288286025287, "grad_norm": 0.296201229095459, "learning_rate": 0.00019356522671421772, "loss": 11.6684, "step": 16481 }, { "epoch": 0.345013815624215, "grad_norm": 0.3496871292591095, "learning_rate": 0.00019356445289585552, "loss": 11.6618, "step": 16482 }, { "epoch": 0.34503474838817716, "grad_norm": 0.27457231283187866, "learning_rate": 0.00019356367903251503, "loss": 11.6779, "step": 16483 }, { "epoch": 0.3450556811521393, "grad_norm": 0.2427564114332199, "learning_rate": 0.00019356290512419658, "loss": 11.6581, "step": 16484 }, { "epoch": 0.3450766139161015, "grad_norm": 0.28369954228401184, "learning_rate": 0.0001935621311709006, "loss": 11.6835, "step": 16485 }, { "epoch": 0.34509754668006365, "grad_norm": 0.2622183561325073, "learning_rate": 0.0001935613571726274, "loss": 11.6802, "step": 16486 }, { "epoch": 0.3451184794440258, "grad_norm": 0.2691608965396881, "learning_rate": 0.00019356058312937737, "loss": 11.6688, "step": 16487 }, { "epoch": 0.34513941220798794, "grad_norm": 0.26590263843536377, "learning_rate": 0.0001935598090411509, "loss": 11.6625, "step": 16488 }, { "epoch": 0.3451603449719501, "grad_norm": 0.2360115796327591, "learning_rate": 0.0001935590349079484, "loss": 11.6689, "step": 16489 }, { "epoch": 0.34518127773591223, "grad_norm": 0.2485620230436325, "learning_rate": 0.00019355826072977016, "loss": 11.686, "step": 16490 }, { "epoch": 0.34520221049987443, "grad_norm": 0.2671261131763458, "learning_rate": 0.00019355748650661659, "loss": 11.6799, "step": 16491 }, { "epoch": 0.3452231432638366, "grad_norm": 0.3596493899822235, "learning_rate": 0.00019355671223848805, "loss": 11.6453, "step": 16492 }, { "epoch": 0.3452440760277987, "grad_norm": 0.3223181962966919, "learning_rate": 0.00019355593792538494, "loss": 11.6788, "step": 16493 }, { "epoch": 0.34526500879176086, "grad_norm": 0.31253859400749207, "learning_rate": 0.00019355516356730763, "loss": 11.6719, "step": 16494 }, { "epoch": 0.345285941555723, "grad_norm": 0.46764829754829407, "learning_rate": 0.00019355438916425643, "loss": 11.6928, "step": 16495 }, { "epoch": 0.34530687431968515, "grad_norm": 0.268313467502594, "learning_rate": 0.0001935536147162318, "loss": 11.6634, "step": 16496 }, { "epoch": 0.3453278070836473, "grad_norm": 0.261838436126709, "learning_rate": 0.00019355284022323405, "loss": 11.6833, "step": 16497 }, { "epoch": 0.3453487398476095, "grad_norm": 0.2836855947971344, "learning_rate": 0.00019355206568526362, "loss": 11.6738, "step": 16498 }, { "epoch": 0.34536967261157164, "grad_norm": 0.2960122227668762, "learning_rate": 0.0001935512911023208, "loss": 11.6975, "step": 16499 }, { "epoch": 0.3453906053755338, "grad_norm": 0.25830140709877014, "learning_rate": 0.00019355051647440598, "loss": 11.6815, "step": 16500 }, { "epoch": 0.34541153813949593, "grad_norm": 0.2850506603717804, "learning_rate": 0.0001935497418015196, "loss": 11.6767, "step": 16501 }, { "epoch": 0.3454324709034581, "grad_norm": 0.27961820363998413, "learning_rate": 0.00019354896708366197, "loss": 11.6731, "step": 16502 }, { "epoch": 0.3454534036674202, "grad_norm": 0.36117246747016907, "learning_rate": 0.00019354819232083345, "loss": 11.6756, "step": 16503 }, { "epoch": 0.3454743364313824, "grad_norm": 0.29903504252433777, "learning_rate": 0.00019354741751303448, "loss": 11.6733, "step": 16504 }, { "epoch": 0.34549526919534457, "grad_norm": 0.29007488489151, "learning_rate": 0.00019354664266026534, "loss": 11.6826, "step": 16505 }, { "epoch": 0.3455162019593067, "grad_norm": 0.28586485981941223, "learning_rate": 0.0001935458677625265, "loss": 11.6699, "step": 16506 }, { "epoch": 0.34553713472326886, "grad_norm": 0.23414777219295502, "learning_rate": 0.00019354509281981826, "loss": 11.6634, "step": 16507 }, { "epoch": 0.345558067487231, "grad_norm": 0.2912455201148987, "learning_rate": 0.00019354431783214103, "loss": 11.6762, "step": 16508 }, { "epoch": 0.34557900025119315, "grad_norm": 0.2236766666173935, "learning_rate": 0.00019354354279949517, "loss": 11.6797, "step": 16509 }, { "epoch": 0.34559993301515535, "grad_norm": 0.349639356136322, "learning_rate": 0.00019354276772188105, "loss": 11.6949, "step": 16510 }, { "epoch": 0.3456208657791175, "grad_norm": 0.32942914962768555, "learning_rate": 0.00019354199259929904, "loss": 11.6731, "step": 16511 }, { "epoch": 0.34564179854307964, "grad_norm": 0.33530518412590027, "learning_rate": 0.00019354121743174956, "loss": 11.6725, "step": 16512 }, { "epoch": 0.3456627313070418, "grad_norm": 0.30484601855278015, "learning_rate": 0.00019354044221923288, "loss": 11.6663, "step": 16513 }, { "epoch": 0.3456836640710039, "grad_norm": 0.2797049582004547, "learning_rate": 0.00019353966696174948, "loss": 11.6903, "step": 16514 }, { "epoch": 0.34570459683496607, "grad_norm": 0.21218597888946533, "learning_rate": 0.0001935388916592997, "loss": 11.6587, "step": 16515 }, { "epoch": 0.3457255295989282, "grad_norm": 0.3968314826488495, "learning_rate": 0.00019353811631188386, "loss": 11.6986, "step": 16516 }, { "epoch": 0.3457464623628904, "grad_norm": 0.2944835126399994, "learning_rate": 0.0001935373409195024, "loss": 11.6642, "step": 16517 }, { "epoch": 0.34576739512685256, "grad_norm": 0.28836098313331604, "learning_rate": 0.00019353656548215563, "loss": 11.6715, "step": 16518 }, { "epoch": 0.3457883278908147, "grad_norm": 0.27474936842918396, "learning_rate": 0.000193535789999844, "loss": 11.6776, "step": 16519 }, { "epoch": 0.34580926065477685, "grad_norm": 0.2785254418849945, "learning_rate": 0.0001935350144725678, "loss": 11.6664, "step": 16520 }, { "epoch": 0.345830193418739, "grad_norm": 0.2703991234302521, "learning_rate": 0.00019353423890032748, "loss": 11.6653, "step": 16521 }, { "epoch": 0.34585112618270114, "grad_norm": 0.23773126304149628, "learning_rate": 0.0001935334632831234, "loss": 11.6831, "step": 16522 }, { "epoch": 0.34587205894666334, "grad_norm": 0.3396669924259186, "learning_rate": 0.00019353268762095584, "loss": 11.6571, "step": 16523 }, { "epoch": 0.3458929917106255, "grad_norm": 0.35675379633903503, "learning_rate": 0.00019353191191382528, "loss": 11.6703, "step": 16524 }, { "epoch": 0.34591392447458763, "grad_norm": 0.2950794994831085, "learning_rate": 0.00019353113616173205, "loss": 11.6693, "step": 16525 }, { "epoch": 0.3459348572385498, "grad_norm": 0.31443169713020325, "learning_rate": 0.00019353036036467652, "loss": 11.6855, "step": 16526 }, { "epoch": 0.3459557900025119, "grad_norm": 0.22271674871444702, "learning_rate": 0.0001935295845226591, "loss": 11.6647, "step": 16527 }, { "epoch": 0.34597672276647407, "grad_norm": 0.24068032205104828, "learning_rate": 0.00019352880863568014, "loss": 11.6722, "step": 16528 }, { "epoch": 0.34599765553043627, "grad_norm": 0.31779465079307556, "learning_rate": 0.00019352803270373997, "loss": 11.6804, "step": 16529 }, { "epoch": 0.3460185882943984, "grad_norm": 0.3218877613544464, "learning_rate": 0.000193527256726839, "loss": 11.6591, "step": 16530 }, { "epoch": 0.34603952105836056, "grad_norm": 0.30071699619293213, "learning_rate": 0.00019352648070497763, "loss": 11.6722, "step": 16531 }, { "epoch": 0.3460604538223227, "grad_norm": 0.26811888813972473, "learning_rate": 0.00019352570463815617, "loss": 11.6666, "step": 16532 }, { "epoch": 0.34608138658628484, "grad_norm": 0.26472920179367065, "learning_rate": 0.00019352492852637508, "loss": 11.6728, "step": 16533 }, { "epoch": 0.346102319350247, "grad_norm": 0.3295767307281494, "learning_rate": 0.00019352415236963464, "loss": 11.6712, "step": 16534 }, { "epoch": 0.34612325211420913, "grad_norm": 0.2534242570400238, "learning_rate": 0.0001935233761679353, "loss": 11.6724, "step": 16535 }, { "epoch": 0.34614418487817133, "grad_norm": 0.3097379207611084, "learning_rate": 0.0001935225999212774, "loss": 11.6615, "step": 16536 }, { "epoch": 0.3461651176421335, "grad_norm": 0.2712955176830292, "learning_rate": 0.00019352182362966126, "loss": 11.6754, "step": 16537 }, { "epoch": 0.3461860504060956, "grad_norm": 0.25080516934394836, "learning_rate": 0.00019352104729308735, "loss": 11.6667, "step": 16538 }, { "epoch": 0.34620698317005777, "grad_norm": 0.2483019083738327, "learning_rate": 0.000193520270911556, "loss": 11.6904, "step": 16539 }, { "epoch": 0.3462279159340199, "grad_norm": 0.3510572910308838, "learning_rate": 0.00019351949448506757, "loss": 11.6738, "step": 16540 }, { "epoch": 0.34624884869798206, "grad_norm": 0.2959469258785248, "learning_rate": 0.00019351871801362243, "loss": 11.6738, "step": 16541 }, { "epoch": 0.34626978146194426, "grad_norm": 0.28875064849853516, "learning_rate": 0.000193517941497221, "loss": 11.6751, "step": 16542 }, { "epoch": 0.3462907142259064, "grad_norm": 0.2683071792125702, "learning_rate": 0.0001935171649358636, "loss": 11.6696, "step": 16543 }, { "epoch": 0.34631164698986855, "grad_norm": 0.3215905725955963, "learning_rate": 0.0001935163883295506, "loss": 11.6665, "step": 16544 }, { "epoch": 0.3463325797538307, "grad_norm": 0.29137271642684937, "learning_rate": 0.00019351561167828244, "loss": 11.6548, "step": 16545 }, { "epoch": 0.34635351251779284, "grad_norm": 0.2988457977771759, "learning_rate": 0.00019351483498205942, "loss": 11.6691, "step": 16546 }, { "epoch": 0.346374445281755, "grad_norm": 0.27233603596687317, "learning_rate": 0.00019351405824088197, "loss": 11.6738, "step": 16547 }, { "epoch": 0.3463953780457172, "grad_norm": 0.2616819441318512, "learning_rate": 0.00019351328145475046, "loss": 11.6813, "step": 16548 }, { "epoch": 0.34641631080967933, "grad_norm": 0.3020935654640198, "learning_rate": 0.0001935125046236652, "loss": 11.6833, "step": 16549 }, { "epoch": 0.3464372435736415, "grad_norm": 0.29479295015335083, "learning_rate": 0.0001935117277476266, "loss": 11.68, "step": 16550 }, { "epoch": 0.3464581763376036, "grad_norm": 0.3327769935131073, "learning_rate": 0.00019351095082663507, "loss": 11.6875, "step": 16551 }, { "epoch": 0.34647910910156576, "grad_norm": 0.31344544887542725, "learning_rate": 0.00019351017386069094, "loss": 11.6827, "step": 16552 }, { "epoch": 0.3465000418655279, "grad_norm": 0.2624429166316986, "learning_rate": 0.00019350939684979456, "loss": 11.6723, "step": 16553 }, { "epoch": 0.34652097462949005, "grad_norm": 0.2673267722129822, "learning_rate": 0.0001935086197939464, "loss": 11.6812, "step": 16554 }, { "epoch": 0.34654190739345225, "grad_norm": 0.252226322889328, "learning_rate": 0.00019350784269314675, "loss": 11.683, "step": 16555 }, { "epoch": 0.3465628401574144, "grad_norm": 0.22849701344966888, "learning_rate": 0.00019350706554739598, "loss": 11.6844, "step": 16556 }, { "epoch": 0.34658377292137654, "grad_norm": 0.2956079840660095, "learning_rate": 0.00019350628835669453, "loss": 11.6839, "step": 16557 }, { "epoch": 0.3466047056853387, "grad_norm": 0.28043633699417114, "learning_rate": 0.0001935055111210427, "loss": 11.6679, "step": 16558 }, { "epoch": 0.34662563844930083, "grad_norm": 0.24203833937644958, "learning_rate": 0.0001935047338404409, "loss": 11.6733, "step": 16559 }, { "epoch": 0.346646571213263, "grad_norm": 0.24462373554706573, "learning_rate": 0.00019350395651488952, "loss": 11.689, "step": 16560 }, { "epoch": 0.3466675039772252, "grad_norm": 0.3070599436759949, "learning_rate": 0.00019350317914438892, "loss": 11.6698, "step": 16561 }, { "epoch": 0.3466884367411873, "grad_norm": 0.2986792027950287, "learning_rate": 0.00019350240172893944, "loss": 11.663, "step": 16562 }, { "epoch": 0.34670936950514947, "grad_norm": 0.21408624947071075, "learning_rate": 0.0001935016242685415, "loss": 11.6598, "step": 16563 }, { "epoch": 0.3467303022691116, "grad_norm": 0.28676050901412964, "learning_rate": 0.00019350084676319544, "loss": 11.6732, "step": 16564 }, { "epoch": 0.34675123503307376, "grad_norm": 0.2255738526582718, "learning_rate": 0.00019350006921290165, "loss": 11.6862, "step": 16565 }, { "epoch": 0.3467721677970359, "grad_norm": 0.3210945725440979, "learning_rate": 0.00019349929161766052, "loss": 11.6862, "step": 16566 }, { "epoch": 0.34679310056099805, "grad_norm": 0.2681971490383148, "learning_rate": 0.0001934985139774724, "loss": 11.6839, "step": 16567 }, { "epoch": 0.34681403332496025, "grad_norm": 0.2529780864715576, "learning_rate": 0.00019349773629233767, "loss": 11.6762, "step": 16568 }, { "epoch": 0.3468349660889224, "grad_norm": 0.2672407031059265, "learning_rate": 0.0001934969585622567, "loss": 11.6771, "step": 16569 }, { "epoch": 0.34685589885288454, "grad_norm": 0.2683192789554596, "learning_rate": 0.0001934961807872299, "loss": 11.6771, "step": 16570 }, { "epoch": 0.3468768316168467, "grad_norm": 0.28875336050987244, "learning_rate": 0.00019349540296725756, "loss": 11.6743, "step": 16571 }, { "epoch": 0.3468977643808088, "grad_norm": 0.30927222967147827, "learning_rate": 0.00019349462510234014, "loss": 11.6651, "step": 16572 }, { "epoch": 0.34691869714477097, "grad_norm": 0.34723660349845886, "learning_rate": 0.00019349384719247797, "loss": 11.6749, "step": 16573 }, { "epoch": 0.34693962990873317, "grad_norm": 0.27596211433410645, "learning_rate": 0.00019349306923767144, "loss": 11.6753, "step": 16574 }, { "epoch": 0.3469605626726953, "grad_norm": 0.26214462518692017, "learning_rate": 0.0001934922912379209, "loss": 11.6891, "step": 16575 }, { "epoch": 0.34698149543665746, "grad_norm": 0.3265022039413452, "learning_rate": 0.00019349151319322675, "loss": 11.6753, "step": 16576 }, { "epoch": 0.3470024282006196, "grad_norm": 0.2589261829853058, "learning_rate": 0.00019349073510358937, "loss": 11.6825, "step": 16577 }, { "epoch": 0.34702336096458175, "grad_norm": 0.3002606928348541, "learning_rate": 0.0001934899569690091, "loss": 11.6686, "step": 16578 }, { "epoch": 0.3470442937285439, "grad_norm": 0.36838608980178833, "learning_rate": 0.00019348917878948637, "loss": 11.6721, "step": 16579 }, { "epoch": 0.3470652264925061, "grad_norm": 0.3426513075828552, "learning_rate": 0.0001934884005650215, "loss": 11.6759, "step": 16580 }, { "epoch": 0.34708615925646824, "grad_norm": 0.2817768156528473, "learning_rate": 0.00019348762229561483, "loss": 11.6899, "step": 16581 }, { "epoch": 0.3471070920204304, "grad_norm": 0.25069889426231384, "learning_rate": 0.00019348684398126683, "loss": 11.6641, "step": 16582 }, { "epoch": 0.34712802478439253, "grad_norm": 0.3004610538482666, "learning_rate": 0.00019348606562197786, "loss": 11.6679, "step": 16583 }, { "epoch": 0.3471489575483547, "grad_norm": 0.266617089509964, "learning_rate": 0.00019348528721774822, "loss": 11.6796, "step": 16584 }, { "epoch": 0.3471698903123168, "grad_norm": 0.30324506759643555, "learning_rate": 0.00019348450876857835, "loss": 11.6823, "step": 16585 }, { "epoch": 0.34719082307627896, "grad_norm": 0.26387491822242737, "learning_rate": 0.00019348373027446863, "loss": 11.68, "step": 16586 }, { "epoch": 0.34721175584024117, "grad_norm": 0.2534589171409607, "learning_rate": 0.00019348295173541934, "loss": 11.6688, "step": 16587 }, { "epoch": 0.3472326886042033, "grad_norm": 0.2930370271205902, "learning_rate": 0.00019348217315143097, "loss": 11.6787, "step": 16588 }, { "epoch": 0.34725362136816545, "grad_norm": 0.4278009533882141, "learning_rate": 0.00019348139452250384, "loss": 11.6729, "step": 16589 }, { "epoch": 0.3472745541321276, "grad_norm": 0.23578469455242157, "learning_rate": 0.00019348061584863833, "loss": 11.6751, "step": 16590 }, { "epoch": 0.34729548689608974, "grad_norm": 0.28405269980430603, "learning_rate": 0.0001934798371298348, "loss": 11.6823, "step": 16591 }, { "epoch": 0.3473164196600519, "grad_norm": 0.36375224590301514, "learning_rate": 0.00019347905836609365, "loss": 11.6799, "step": 16592 }, { "epoch": 0.3473373524240141, "grad_norm": 0.2943582832813263, "learning_rate": 0.00019347827955741524, "loss": 11.6721, "step": 16593 }, { "epoch": 0.34735828518797623, "grad_norm": 0.33839353919029236, "learning_rate": 0.00019347750070379996, "loss": 11.6725, "step": 16594 }, { "epoch": 0.3473792179519384, "grad_norm": 0.3030601441860199, "learning_rate": 0.00019347672180524813, "loss": 11.6603, "step": 16595 }, { "epoch": 0.3474001507159005, "grad_norm": 0.30990925431251526, "learning_rate": 0.0001934759428617602, "loss": 11.6744, "step": 16596 }, { "epoch": 0.34742108347986267, "grad_norm": 0.32072579860687256, "learning_rate": 0.00019347516387333653, "loss": 11.6777, "step": 16597 }, { "epoch": 0.3474420162438248, "grad_norm": 0.3076894283294678, "learning_rate": 0.00019347438483997747, "loss": 11.6929, "step": 16598 }, { "epoch": 0.347462949007787, "grad_norm": 0.4314691126346588, "learning_rate": 0.00019347360576168335, "loss": 11.6808, "step": 16599 }, { "epoch": 0.34748388177174916, "grad_norm": 0.3147174119949341, "learning_rate": 0.00019347282663845463, "loss": 11.6826, "step": 16600 }, { "epoch": 0.3475048145357113, "grad_norm": 0.2615439295768738, "learning_rate": 0.00019347204747029167, "loss": 11.6703, "step": 16601 }, { "epoch": 0.34752574729967345, "grad_norm": 0.2592655420303345, "learning_rate": 0.0001934712682571948, "loss": 11.6712, "step": 16602 }, { "epoch": 0.3475466800636356, "grad_norm": 0.26515790820121765, "learning_rate": 0.0001934704889991644, "loss": 11.6599, "step": 16603 }, { "epoch": 0.34756761282759774, "grad_norm": 0.26511308550834656, "learning_rate": 0.0001934697096962009, "loss": 11.6763, "step": 16604 }, { "epoch": 0.3475885455915599, "grad_norm": 0.28667324781417847, "learning_rate": 0.0001934689303483046, "loss": 11.674, "step": 16605 }, { "epoch": 0.3476094783555221, "grad_norm": 0.26644444465637207, "learning_rate": 0.00019346815095547596, "loss": 11.6945, "step": 16606 }, { "epoch": 0.34763041111948423, "grad_norm": 0.2457924634218216, "learning_rate": 0.00019346737151771528, "loss": 11.6793, "step": 16607 }, { "epoch": 0.3476513438834464, "grad_norm": 0.335984468460083, "learning_rate": 0.00019346659203502294, "loss": 11.6944, "step": 16608 }, { "epoch": 0.3476722766474085, "grad_norm": 0.37355661392211914, "learning_rate": 0.00019346581250739937, "loss": 11.6766, "step": 16609 }, { "epoch": 0.34769320941137066, "grad_norm": 0.268224835395813, "learning_rate": 0.0001934650329348449, "loss": 11.6645, "step": 16610 }, { "epoch": 0.3477141421753328, "grad_norm": 0.29375237226486206, "learning_rate": 0.0001934642533173599, "loss": 11.6828, "step": 16611 }, { "epoch": 0.347735074939295, "grad_norm": 0.2695609927177429, "learning_rate": 0.00019346347365494478, "loss": 11.6707, "step": 16612 }, { "epoch": 0.34775600770325715, "grad_norm": 0.2622312903404236, "learning_rate": 0.0001934626939475999, "loss": 11.6921, "step": 16613 }, { "epoch": 0.3477769404672193, "grad_norm": 0.283848375082016, "learning_rate": 0.0001934619141953256, "loss": 11.6662, "step": 16614 }, { "epoch": 0.34779787323118144, "grad_norm": 0.7322645783424377, "learning_rate": 0.0001934611343981223, "loss": 11.5735, "step": 16615 }, { "epoch": 0.3478188059951436, "grad_norm": 0.32425665855407715, "learning_rate": 0.00019346035455599037, "loss": 11.6784, "step": 16616 }, { "epoch": 0.34783973875910573, "grad_norm": 0.24882611632347107, "learning_rate": 0.00019345957466893016, "loss": 11.6994, "step": 16617 }, { "epoch": 0.34786067152306793, "grad_norm": 0.26083627343177795, "learning_rate": 0.00019345879473694206, "loss": 11.6798, "step": 16618 }, { "epoch": 0.3478816042870301, "grad_norm": 0.408690869808197, "learning_rate": 0.00019345801476002646, "loss": 11.684, "step": 16619 }, { "epoch": 0.3479025370509922, "grad_norm": 0.22006629407405853, "learning_rate": 0.0001934572347381837, "loss": 11.674, "step": 16620 }, { "epoch": 0.34792346981495437, "grad_norm": 0.24191737174987793, "learning_rate": 0.0001934564546714142, "loss": 11.6782, "step": 16621 }, { "epoch": 0.3479444025789165, "grad_norm": 0.3089815080165863, "learning_rate": 0.0001934556745597183, "loss": 11.668, "step": 16622 }, { "epoch": 0.34796533534287866, "grad_norm": 0.30725064873695374, "learning_rate": 0.00019345489440309637, "loss": 11.6823, "step": 16623 }, { "epoch": 0.3479862681068408, "grad_norm": 0.2753872275352478, "learning_rate": 0.0001934541142015488, "loss": 11.6753, "step": 16624 }, { "epoch": 0.348007200870803, "grad_norm": 0.30057498812675476, "learning_rate": 0.00019345333395507597, "loss": 11.6657, "step": 16625 }, { "epoch": 0.34802813363476515, "grad_norm": 0.2827196717262268, "learning_rate": 0.00019345255366367824, "loss": 11.6892, "step": 16626 }, { "epoch": 0.3480490663987273, "grad_norm": 0.3241328299045563, "learning_rate": 0.000193451773327356, "loss": 11.694, "step": 16627 }, { "epoch": 0.34806999916268944, "grad_norm": 0.25698068737983704, "learning_rate": 0.00019345099294610965, "loss": 11.6799, "step": 16628 }, { "epoch": 0.3480909319266516, "grad_norm": 0.3494913876056671, "learning_rate": 0.0001934502125199395, "loss": 11.6812, "step": 16629 }, { "epoch": 0.3481118646906137, "grad_norm": 0.2886520326137543, "learning_rate": 0.00019344943204884597, "loss": 11.6877, "step": 16630 }, { "epoch": 0.3481327974545759, "grad_norm": 0.32754960656166077, "learning_rate": 0.00019344865153282946, "loss": 11.6765, "step": 16631 }, { "epoch": 0.34815373021853807, "grad_norm": 0.34273624420166016, "learning_rate": 0.00019344787097189025, "loss": 11.6809, "step": 16632 }, { "epoch": 0.3481746629825002, "grad_norm": 0.2977084815502167, "learning_rate": 0.0001934470903660288, "loss": 11.6802, "step": 16633 }, { "epoch": 0.34819559574646236, "grad_norm": 0.24623902142047882, "learning_rate": 0.00019344630971524548, "loss": 11.6739, "step": 16634 }, { "epoch": 0.3482165285104245, "grad_norm": 0.2933329939842224, "learning_rate": 0.00019344552901954062, "loss": 11.6793, "step": 16635 }, { "epoch": 0.34823746127438665, "grad_norm": 0.2894226908683777, "learning_rate": 0.00019344474827891463, "loss": 11.6849, "step": 16636 }, { "epoch": 0.34825839403834885, "grad_norm": 0.25197237730026245, "learning_rate": 0.00019344396749336788, "loss": 11.6766, "step": 16637 }, { "epoch": 0.348279326802311, "grad_norm": 0.25041452050209045, "learning_rate": 0.00019344318666290072, "loss": 11.6757, "step": 16638 }, { "epoch": 0.34830025956627314, "grad_norm": 0.2712478041648865, "learning_rate": 0.00019344240578751359, "loss": 11.6714, "step": 16639 }, { "epoch": 0.3483211923302353, "grad_norm": 0.2390090376138687, "learning_rate": 0.0001934416248672068, "loss": 11.6743, "step": 16640 }, { "epoch": 0.34834212509419743, "grad_norm": 0.2637791335582733, "learning_rate": 0.00019344084390198076, "loss": 11.6612, "step": 16641 }, { "epoch": 0.3483630578581596, "grad_norm": 0.27831876277923584, "learning_rate": 0.0001934400628918358, "loss": 11.6793, "step": 16642 }, { "epoch": 0.3483839906221217, "grad_norm": 0.29140281677246094, "learning_rate": 0.00019343928183677237, "loss": 11.6904, "step": 16643 }, { "epoch": 0.3484049233860839, "grad_norm": 0.2642841041088104, "learning_rate": 0.0001934385007367908, "loss": 11.6707, "step": 16644 }, { "epoch": 0.34842585615004606, "grad_norm": 0.32077303528785706, "learning_rate": 0.00019343771959189144, "loss": 11.6514, "step": 16645 }, { "epoch": 0.3484467889140082, "grad_norm": 0.3195774555206299, "learning_rate": 0.0001934369384020747, "loss": 11.6868, "step": 16646 }, { "epoch": 0.34846772167797035, "grad_norm": 0.2430208921432495, "learning_rate": 0.00019343615716734095, "loss": 11.6593, "step": 16647 }, { "epoch": 0.3484886544419325, "grad_norm": 0.32755616307258606, "learning_rate": 0.0001934353758876906, "loss": 11.6989, "step": 16648 }, { "epoch": 0.34850958720589464, "grad_norm": 0.30249568819999695, "learning_rate": 0.00019343459456312397, "loss": 11.6612, "step": 16649 }, { "epoch": 0.34853051996985684, "grad_norm": 0.27797049283981323, "learning_rate": 0.00019343381319364146, "loss": 11.693, "step": 16650 }, { "epoch": 0.348551452733819, "grad_norm": 0.2901480793952942, "learning_rate": 0.00019343303177924345, "loss": 11.677, "step": 16651 }, { "epoch": 0.34857238549778113, "grad_norm": 0.31427279114723206, "learning_rate": 0.0001934322503199303, "loss": 11.6696, "step": 16652 }, { "epoch": 0.3485933182617433, "grad_norm": 0.3080577552318573, "learning_rate": 0.0001934314688157024, "loss": 11.6616, "step": 16653 }, { "epoch": 0.3486142510257054, "grad_norm": 0.3395240604877472, "learning_rate": 0.00019343068726656012, "loss": 11.6634, "step": 16654 }, { "epoch": 0.34863518378966757, "grad_norm": 0.28265807032585144, "learning_rate": 0.00019342990567250382, "loss": 11.6598, "step": 16655 }, { "epoch": 0.3486561165536297, "grad_norm": 0.3292485177516937, "learning_rate": 0.00019342912403353394, "loss": 11.6827, "step": 16656 }, { "epoch": 0.3486770493175919, "grad_norm": 0.3212994635105133, "learning_rate": 0.00019342834234965074, "loss": 11.684, "step": 16657 }, { "epoch": 0.34869798208155406, "grad_norm": 0.3140281140804291, "learning_rate": 0.00019342756062085473, "loss": 11.6666, "step": 16658 }, { "epoch": 0.3487189148455162, "grad_norm": 0.30244341492652893, "learning_rate": 0.00019342677884714616, "loss": 11.6778, "step": 16659 }, { "epoch": 0.34873984760947835, "grad_norm": 0.3648269474506378, "learning_rate": 0.0001934259970285255, "loss": 11.6729, "step": 16660 }, { "epoch": 0.3487607803734405, "grad_norm": 0.23540958762168884, "learning_rate": 0.0001934252151649931, "loss": 11.6798, "step": 16661 }, { "epoch": 0.34878171313740264, "grad_norm": 0.31206610798835754, "learning_rate": 0.0001934244332565493, "loss": 11.6532, "step": 16662 }, { "epoch": 0.34880264590136484, "grad_norm": 0.2723735570907593, "learning_rate": 0.00019342365130319453, "loss": 11.6678, "step": 16663 }, { "epoch": 0.348823578665327, "grad_norm": 0.3083888590335846, "learning_rate": 0.0001934228693049291, "loss": 11.6734, "step": 16664 }, { "epoch": 0.34884451142928913, "grad_norm": 0.29653242230415344, "learning_rate": 0.00019342208726175347, "loss": 11.6763, "step": 16665 }, { "epoch": 0.3488654441932513, "grad_norm": 0.26908859610557556, "learning_rate": 0.00019342130517366793, "loss": 11.6784, "step": 16666 }, { "epoch": 0.3488863769572134, "grad_norm": 0.250715047121048, "learning_rate": 0.00019342052304067292, "loss": 11.686, "step": 16667 }, { "epoch": 0.34890730972117556, "grad_norm": 0.30065634846687317, "learning_rate": 0.0001934197408627688, "loss": 11.6529, "step": 16668 }, { "epoch": 0.34892824248513776, "grad_norm": 0.31342628598213196, "learning_rate": 0.0001934189586399559, "loss": 11.6728, "step": 16669 }, { "epoch": 0.3489491752490999, "grad_norm": 0.2497672736644745, "learning_rate": 0.00019341817637223466, "loss": 11.6668, "step": 16670 }, { "epoch": 0.34897010801306205, "grad_norm": 0.2706489562988281, "learning_rate": 0.00019341739405960543, "loss": 11.6669, "step": 16671 }, { "epoch": 0.3489910407770242, "grad_norm": 0.22966259717941284, "learning_rate": 0.0001934166117020686, "loss": 11.6651, "step": 16672 }, { "epoch": 0.34901197354098634, "grad_norm": 0.28952619433403015, "learning_rate": 0.00019341582929962453, "loss": 11.6818, "step": 16673 }, { "epoch": 0.3490329063049485, "grad_norm": 0.2503104507923126, "learning_rate": 0.00019341504685227356, "loss": 11.6805, "step": 16674 }, { "epoch": 0.34905383906891063, "grad_norm": 0.38820409774780273, "learning_rate": 0.00019341426436001613, "loss": 11.6587, "step": 16675 }, { "epoch": 0.34907477183287283, "grad_norm": 0.2734255790710449, "learning_rate": 0.0001934134818228526, "loss": 11.6705, "step": 16676 }, { "epoch": 0.349095704596835, "grad_norm": 0.2840173542499542, "learning_rate": 0.00019341269924078334, "loss": 11.6625, "step": 16677 }, { "epoch": 0.3491166373607971, "grad_norm": 0.29304808378219604, "learning_rate": 0.00019341191661380872, "loss": 11.6936, "step": 16678 }, { "epoch": 0.34913757012475927, "grad_norm": 0.280789315700531, "learning_rate": 0.0001934111339419291, "loss": 11.6618, "step": 16679 }, { "epoch": 0.3491585028887214, "grad_norm": 0.2576916515827179, "learning_rate": 0.0001934103512251449, "loss": 11.6668, "step": 16680 }, { "epoch": 0.34917943565268356, "grad_norm": 0.2823030352592468, "learning_rate": 0.00019340956846345645, "loss": 11.6899, "step": 16681 }, { "epoch": 0.34920036841664576, "grad_norm": 0.28640609979629517, "learning_rate": 0.00019340878565686417, "loss": 11.6608, "step": 16682 }, { "epoch": 0.3492213011806079, "grad_norm": 0.248068168759346, "learning_rate": 0.00019340800280536838, "loss": 11.659, "step": 16683 }, { "epoch": 0.34924223394457005, "grad_norm": 0.31882721185684204, "learning_rate": 0.00019340721990896952, "loss": 11.6818, "step": 16684 }, { "epoch": 0.3492631667085322, "grad_norm": 0.22230029106140137, "learning_rate": 0.00019340643696766794, "loss": 11.6687, "step": 16685 }, { "epoch": 0.34928409947249434, "grad_norm": 0.3514837920665741, "learning_rate": 0.00019340565398146402, "loss": 11.6778, "step": 16686 }, { "epoch": 0.3493050322364565, "grad_norm": 0.28573259711265564, "learning_rate": 0.0001934048709503581, "loss": 11.6624, "step": 16687 }, { "epoch": 0.3493259650004187, "grad_norm": 0.3407416343688965, "learning_rate": 0.0001934040878743506, "loss": 11.6783, "step": 16688 }, { "epoch": 0.3493468977643808, "grad_norm": 0.2641488015651703, "learning_rate": 0.00019340330475344192, "loss": 11.6763, "step": 16689 }, { "epoch": 0.34936783052834297, "grad_norm": 0.2493032068014145, "learning_rate": 0.00019340252158763234, "loss": 11.6713, "step": 16690 }, { "epoch": 0.3493887632923051, "grad_norm": 0.2436344176530838, "learning_rate": 0.00019340173837692235, "loss": 11.6702, "step": 16691 }, { "epoch": 0.34940969605626726, "grad_norm": 0.2829970419406891, "learning_rate": 0.0001934009551213122, "loss": 11.6774, "step": 16692 }, { "epoch": 0.3494306288202294, "grad_norm": 0.2834765315055847, "learning_rate": 0.00019340017182080242, "loss": 11.6854, "step": 16693 }, { "epoch": 0.34945156158419155, "grad_norm": 0.2561899721622467, "learning_rate": 0.00019339938847539326, "loss": 11.6575, "step": 16694 }, { "epoch": 0.34947249434815375, "grad_norm": 0.3187577426433563, "learning_rate": 0.00019339860508508515, "loss": 11.6664, "step": 16695 }, { "epoch": 0.3494934271121159, "grad_norm": 0.2724819779396057, "learning_rate": 0.00019339782164987844, "loss": 11.6773, "step": 16696 }, { "epoch": 0.34951435987607804, "grad_norm": 0.29687902331352234, "learning_rate": 0.00019339703816977357, "loss": 11.6739, "step": 16697 }, { "epoch": 0.3495352926400402, "grad_norm": 0.2559053301811218, "learning_rate": 0.00019339625464477085, "loss": 11.6908, "step": 16698 }, { "epoch": 0.34955622540400233, "grad_norm": 0.3825598359107971, "learning_rate": 0.0001933954710748707, "loss": 11.6609, "step": 16699 }, { "epoch": 0.3495771581679645, "grad_norm": 0.246841698884964, "learning_rate": 0.00019339468746007343, "loss": 11.668, "step": 16700 }, { "epoch": 0.3495980909319267, "grad_norm": 0.22624236345291138, "learning_rate": 0.0001933939038003795, "loss": 11.6807, "step": 16701 }, { "epoch": 0.3496190236958888, "grad_norm": 0.21415580809116364, "learning_rate": 0.00019339312009578923, "loss": 11.678, "step": 16702 }, { "epoch": 0.34963995645985096, "grad_norm": 0.31340017914772034, "learning_rate": 0.00019339233634630302, "loss": 11.6618, "step": 16703 }, { "epoch": 0.3496608892238131, "grad_norm": 0.26160261034965515, "learning_rate": 0.00019339155255192122, "loss": 11.6771, "step": 16704 }, { "epoch": 0.34968182198777525, "grad_norm": 0.3729269802570343, "learning_rate": 0.00019339076871264426, "loss": 11.6758, "step": 16705 }, { "epoch": 0.3497027547517374, "grad_norm": 0.37398257851600647, "learning_rate": 0.0001933899848284725, "loss": 11.6709, "step": 16706 }, { "epoch": 0.3497236875156996, "grad_norm": 0.27647125720977783, "learning_rate": 0.00019338920089940627, "loss": 11.6782, "step": 16707 }, { "epoch": 0.34974462027966174, "grad_norm": 0.26169291138648987, "learning_rate": 0.00019338841692544598, "loss": 11.665, "step": 16708 }, { "epoch": 0.3497655530436239, "grad_norm": 0.35015636682510376, "learning_rate": 0.00019338763290659201, "loss": 11.6428, "step": 16709 }, { "epoch": 0.34978648580758603, "grad_norm": 0.38830631971359253, "learning_rate": 0.00019338684884284474, "loss": 11.6964, "step": 16710 }, { "epoch": 0.3498074185715482, "grad_norm": 0.3003973364830017, "learning_rate": 0.00019338606473420455, "loss": 11.6734, "step": 16711 }, { "epoch": 0.3498283513355103, "grad_norm": 0.35560208559036255, "learning_rate": 0.00019338528058067178, "loss": 11.6886, "step": 16712 }, { "epoch": 0.34984928409947247, "grad_norm": 0.33536508679389954, "learning_rate": 0.00019338449638224686, "loss": 11.6592, "step": 16713 }, { "epoch": 0.34987021686343467, "grad_norm": 0.253445029258728, "learning_rate": 0.00019338371213893014, "loss": 11.6831, "step": 16714 }, { "epoch": 0.3498911496273968, "grad_norm": 0.23393547534942627, "learning_rate": 0.00019338292785072198, "loss": 11.6776, "step": 16715 }, { "epoch": 0.34991208239135896, "grad_norm": 0.3240651488304138, "learning_rate": 0.00019338214351762278, "loss": 11.6828, "step": 16716 }, { "epoch": 0.3499330151553211, "grad_norm": 0.21874867379665375, "learning_rate": 0.00019338135913963292, "loss": 11.668, "step": 16717 }, { "epoch": 0.34995394791928325, "grad_norm": 0.2634436786174774, "learning_rate": 0.00019338057471675278, "loss": 11.6786, "step": 16718 }, { "epoch": 0.3499748806832454, "grad_norm": 0.28253430128097534, "learning_rate": 0.0001933797902489827, "loss": 11.6707, "step": 16719 }, { "epoch": 0.3499958134472076, "grad_norm": 0.2866402268409729, "learning_rate": 0.00019337900573632307, "loss": 11.6795, "step": 16720 }, { "epoch": 0.35001674621116974, "grad_norm": 0.3501662313938141, "learning_rate": 0.00019337822117877434, "loss": 11.6578, "step": 16721 }, { "epoch": 0.3500376789751319, "grad_norm": 0.2805338501930237, "learning_rate": 0.00019337743657633677, "loss": 11.6806, "step": 16722 }, { "epoch": 0.350058611739094, "grad_norm": 0.2872869074344635, "learning_rate": 0.00019337665192901084, "loss": 11.6549, "step": 16723 }, { "epoch": 0.3500795445030562, "grad_norm": 0.2974426746368408, "learning_rate": 0.00019337586723679686, "loss": 11.6615, "step": 16724 }, { "epoch": 0.3501004772670183, "grad_norm": 0.2506639063358307, "learning_rate": 0.00019337508249969522, "loss": 11.684, "step": 16725 }, { "epoch": 0.3501214100309805, "grad_norm": 1.7653285264968872, "learning_rate": 0.00019337429771770635, "loss": 11.5765, "step": 16726 }, { "epoch": 0.35014234279494266, "grad_norm": 0.2533940374851227, "learning_rate": 0.00019337351289083055, "loss": 11.678, "step": 16727 }, { "epoch": 0.3501632755589048, "grad_norm": 0.3464130461215973, "learning_rate": 0.0001933727280190682, "loss": 11.6897, "step": 16728 }, { "epoch": 0.35018420832286695, "grad_norm": 0.2918190658092499, "learning_rate": 0.00019337194310241974, "loss": 11.6805, "step": 16729 }, { "epoch": 0.3502051410868291, "grad_norm": 0.3446022868156433, "learning_rate": 0.00019337115814088552, "loss": 11.688, "step": 16730 }, { "epoch": 0.35022607385079124, "grad_norm": 0.30044084787368774, "learning_rate": 0.0001933703731344659, "loss": 11.6759, "step": 16731 }, { "epoch": 0.3502470066147534, "grad_norm": 0.28552302718162537, "learning_rate": 0.0001933695880831613, "loss": 11.6782, "step": 16732 }, { "epoch": 0.3502679393787156, "grad_norm": 0.2432354837656021, "learning_rate": 0.00019336880298697208, "loss": 11.6645, "step": 16733 }, { "epoch": 0.35028887214267773, "grad_norm": 0.2760658860206604, "learning_rate": 0.00019336801784589853, "loss": 11.6757, "step": 16734 }, { "epoch": 0.3503098049066399, "grad_norm": 0.28846240043640137, "learning_rate": 0.00019336723265994118, "loss": 11.6635, "step": 16735 }, { "epoch": 0.350330737670602, "grad_norm": 0.3189903497695923, "learning_rate": 0.00019336644742910027, "loss": 11.6967, "step": 16736 }, { "epoch": 0.35035167043456417, "grad_norm": 0.2753998339176178, "learning_rate": 0.0001933656621533763, "loss": 11.6683, "step": 16737 }, { "epoch": 0.3503726031985263, "grad_norm": 0.3049989640712738, "learning_rate": 0.00019336487683276952, "loss": 11.6742, "step": 16738 }, { "epoch": 0.3503935359624885, "grad_norm": 0.2935037910938263, "learning_rate": 0.00019336409146728038, "loss": 11.6628, "step": 16739 }, { "epoch": 0.35041446872645066, "grad_norm": 0.2614760398864746, "learning_rate": 0.00019336330605690928, "loss": 11.6784, "step": 16740 }, { "epoch": 0.3504354014904128, "grad_norm": 0.3148437440395355, "learning_rate": 0.00019336252060165657, "loss": 11.6793, "step": 16741 }, { "epoch": 0.35045633425437495, "grad_norm": 0.2354615032672882, "learning_rate": 0.00019336173510152262, "loss": 11.6765, "step": 16742 }, { "epoch": 0.3504772670183371, "grad_norm": 0.319060355424881, "learning_rate": 0.0001933609495565078, "loss": 11.6716, "step": 16743 }, { "epoch": 0.35049819978229924, "grad_norm": 0.3118121325969696, "learning_rate": 0.00019336016396661248, "loss": 11.6744, "step": 16744 }, { "epoch": 0.35051913254626144, "grad_norm": 0.28402554988861084, "learning_rate": 0.00019335937833183712, "loss": 11.6732, "step": 16745 }, { "epoch": 0.3505400653102236, "grad_norm": 0.27973315119743347, "learning_rate": 0.00019335859265218202, "loss": 11.6771, "step": 16746 }, { "epoch": 0.3505609980741857, "grad_norm": 0.26468607783317566, "learning_rate": 0.00019335780692764754, "loss": 11.6692, "step": 16747 }, { "epoch": 0.35058193083814787, "grad_norm": 0.3119930922985077, "learning_rate": 0.0001933570211582341, "loss": 11.6756, "step": 16748 }, { "epoch": 0.35060286360211, "grad_norm": 0.24043165147304535, "learning_rate": 0.00019335623534394208, "loss": 11.676, "step": 16749 }, { "epoch": 0.35062379636607216, "grad_norm": 0.2686154544353485, "learning_rate": 0.00019335544948477184, "loss": 11.6831, "step": 16750 }, { "epoch": 0.3506447291300343, "grad_norm": 0.2663232982158661, "learning_rate": 0.00019335466358072378, "loss": 11.6772, "step": 16751 }, { "epoch": 0.3506656618939965, "grad_norm": 0.2868266701698303, "learning_rate": 0.00019335387763179824, "loss": 11.6728, "step": 16752 }, { "epoch": 0.35068659465795865, "grad_norm": 0.24661290645599365, "learning_rate": 0.00019335309163799562, "loss": 11.6845, "step": 16753 }, { "epoch": 0.3507075274219208, "grad_norm": 0.38301026821136475, "learning_rate": 0.00019335230559931632, "loss": 11.6857, "step": 16754 }, { "epoch": 0.35072846018588294, "grad_norm": 0.39059901237487793, "learning_rate": 0.0001933515195157607, "loss": 11.6842, "step": 16755 }, { "epoch": 0.3507493929498451, "grad_norm": 0.5060957074165344, "learning_rate": 0.0001933507333873291, "loss": 11.6802, "step": 16756 }, { "epoch": 0.35077032571380723, "grad_norm": 0.27919113636016846, "learning_rate": 0.00019334994721402195, "loss": 11.6597, "step": 16757 }, { "epoch": 0.35079125847776943, "grad_norm": 0.27700474858283997, "learning_rate": 0.0001933491609958396, "loss": 11.6802, "step": 16758 }, { "epoch": 0.3508121912417316, "grad_norm": 0.3056204617023468, "learning_rate": 0.00019334837473278244, "loss": 11.6778, "step": 16759 }, { "epoch": 0.3508331240056937, "grad_norm": 0.34660959243774414, "learning_rate": 0.00019334758842485088, "loss": 11.6968, "step": 16760 }, { "epoch": 0.35085405676965586, "grad_norm": 0.31789976358413696, "learning_rate": 0.00019334680207204523, "loss": 11.6727, "step": 16761 }, { "epoch": 0.350874989533618, "grad_norm": 0.3241395652294159, "learning_rate": 0.0001933460156743659, "loss": 11.6608, "step": 16762 }, { "epoch": 0.35089592229758015, "grad_norm": 0.271829217672348, "learning_rate": 0.00019334522923181329, "loss": 11.6687, "step": 16763 }, { "epoch": 0.3509168550615423, "grad_norm": 0.2568420469760895, "learning_rate": 0.00019334444274438775, "loss": 11.675, "step": 16764 }, { "epoch": 0.3509377878255045, "grad_norm": 0.28009963035583496, "learning_rate": 0.00019334365621208964, "loss": 11.6674, "step": 16765 }, { "epoch": 0.35095872058946664, "grad_norm": 0.32699355483055115, "learning_rate": 0.00019334286963491938, "loss": 11.6868, "step": 16766 }, { "epoch": 0.3509796533534288, "grad_norm": 0.24566204845905304, "learning_rate": 0.00019334208301287735, "loss": 11.663, "step": 16767 }, { "epoch": 0.35100058611739093, "grad_norm": 0.323181688785553, "learning_rate": 0.00019334129634596389, "loss": 11.683, "step": 16768 }, { "epoch": 0.3510215188813531, "grad_norm": 0.2559824585914612, "learning_rate": 0.0001933405096341794, "loss": 11.6802, "step": 16769 }, { "epoch": 0.3510424516453152, "grad_norm": 0.2830560803413391, "learning_rate": 0.00019333972287752428, "loss": 11.6693, "step": 16770 }, { "epoch": 0.3510633844092774, "grad_norm": 0.293425053358078, "learning_rate": 0.00019333893607599887, "loss": 11.6752, "step": 16771 }, { "epoch": 0.35108431717323957, "grad_norm": 0.30396798253059387, "learning_rate": 0.00019333814922960354, "loss": 11.6886, "step": 16772 }, { "epoch": 0.3511052499372017, "grad_norm": 0.2646876275539398, "learning_rate": 0.00019333736233833872, "loss": 11.6687, "step": 16773 }, { "epoch": 0.35112618270116386, "grad_norm": 0.33401888608932495, "learning_rate": 0.00019333657540220473, "loss": 11.6775, "step": 16774 }, { "epoch": 0.351147115465126, "grad_norm": 0.28601962327957153, "learning_rate": 0.00019333578842120201, "loss": 11.6629, "step": 16775 }, { "epoch": 0.35116804822908815, "grad_norm": 0.28281858563423157, "learning_rate": 0.0001933350013953309, "loss": 11.6793, "step": 16776 }, { "epoch": 0.35118898099305035, "grad_norm": 0.357077419757843, "learning_rate": 0.00019333421432459177, "loss": 11.6658, "step": 16777 }, { "epoch": 0.3512099137570125, "grad_norm": 0.38032007217407227, "learning_rate": 0.00019333342720898503, "loss": 11.6802, "step": 16778 }, { "epoch": 0.35123084652097464, "grad_norm": 0.34616294503211975, "learning_rate": 0.00019333264004851102, "loss": 11.6775, "step": 16779 }, { "epoch": 0.3512517792849368, "grad_norm": 0.2754495441913605, "learning_rate": 0.00019333185284317015, "loss": 11.6687, "step": 16780 }, { "epoch": 0.3512727120488989, "grad_norm": 0.35662737488746643, "learning_rate": 0.0001933310655929628, "loss": 11.6776, "step": 16781 }, { "epoch": 0.3512936448128611, "grad_norm": 0.28250619769096375, "learning_rate": 0.0001933302782978893, "loss": 11.6675, "step": 16782 }, { "epoch": 0.3513145775768232, "grad_norm": 0.34260016679763794, "learning_rate": 0.0001933294909579501, "loss": 11.6629, "step": 16783 }, { "epoch": 0.3513355103407854, "grad_norm": 0.26954224705696106, "learning_rate": 0.00019332870357314553, "loss": 11.6724, "step": 16784 }, { "epoch": 0.35135644310474756, "grad_norm": 0.21669279038906097, "learning_rate": 0.000193327916143476, "loss": 11.674, "step": 16785 }, { "epoch": 0.3513773758687097, "grad_norm": 0.24406033754348755, "learning_rate": 0.00019332712866894185, "loss": 11.6757, "step": 16786 }, { "epoch": 0.35139830863267185, "grad_norm": 0.2224235087633133, "learning_rate": 0.0001933263411495435, "loss": 11.6849, "step": 16787 }, { "epoch": 0.351419241396634, "grad_norm": 0.2819780111312866, "learning_rate": 0.0001933255535852813, "loss": 11.6835, "step": 16788 }, { "epoch": 0.35144017416059614, "grad_norm": 0.24738182127475739, "learning_rate": 0.0001933247659761556, "loss": 11.6677, "step": 16789 }, { "epoch": 0.35146110692455834, "grad_norm": 0.2718347907066345, "learning_rate": 0.00019332397832216684, "loss": 11.663, "step": 16790 }, { "epoch": 0.3514820396885205, "grad_norm": 0.24238497018814087, "learning_rate": 0.00019332319062331538, "loss": 11.6925, "step": 16791 }, { "epoch": 0.35150297245248263, "grad_norm": 0.2926843762397766, "learning_rate": 0.0001933224028796016, "loss": 11.6776, "step": 16792 }, { "epoch": 0.3515239052164448, "grad_norm": 0.2966620922088623, "learning_rate": 0.00019332161509102584, "loss": 11.6674, "step": 16793 }, { "epoch": 0.3515448379804069, "grad_norm": 0.26243188977241516, "learning_rate": 0.00019332082725758855, "loss": 11.6649, "step": 16794 }, { "epoch": 0.35156577074436907, "grad_norm": 0.25172367691993713, "learning_rate": 0.00019332003937929003, "loss": 11.6722, "step": 16795 }, { "epoch": 0.35158670350833127, "grad_norm": 0.24031047523021698, "learning_rate": 0.00019331925145613072, "loss": 11.6721, "step": 16796 }, { "epoch": 0.3516076362722934, "grad_norm": 0.3260500431060791, "learning_rate": 0.00019331846348811098, "loss": 11.6827, "step": 16797 }, { "epoch": 0.35162856903625556, "grad_norm": 0.3419903814792633, "learning_rate": 0.00019331767547523117, "loss": 11.6716, "step": 16798 }, { "epoch": 0.3516495018002177, "grad_norm": 0.23904868960380554, "learning_rate": 0.00019331688741749167, "loss": 11.6788, "step": 16799 }, { "epoch": 0.35167043456417985, "grad_norm": 0.2795262634754181, "learning_rate": 0.0001933160993148929, "loss": 11.6653, "step": 16800 }, { "epoch": 0.351691367328142, "grad_norm": 0.2170346975326538, "learning_rate": 0.0001933153111674352, "loss": 11.6713, "step": 16801 }, { "epoch": 0.35171230009210414, "grad_norm": 0.3188835680484772, "learning_rate": 0.00019331452297511895, "loss": 11.6653, "step": 16802 }, { "epoch": 0.35173323285606634, "grad_norm": 0.27318575978279114, "learning_rate": 0.00019331373473794455, "loss": 11.6697, "step": 16803 }, { "epoch": 0.3517541656200285, "grad_norm": 0.2870616912841797, "learning_rate": 0.00019331294645591238, "loss": 11.6732, "step": 16804 }, { "epoch": 0.3517750983839906, "grad_norm": 0.31011733412742615, "learning_rate": 0.0001933121581290228, "loss": 11.6853, "step": 16805 }, { "epoch": 0.35179603114795277, "grad_norm": 0.36179354786872864, "learning_rate": 0.00019331136975727617, "loss": 11.6646, "step": 16806 }, { "epoch": 0.3518169639119149, "grad_norm": 0.29125407338142395, "learning_rate": 0.0001933105813406729, "loss": 11.671, "step": 16807 }, { "epoch": 0.35183789667587706, "grad_norm": 0.3613451421260834, "learning_rate": 0.0001933097928792134, "loss": 11.688, "step": 16808 }, { "epoch": 0.35185882943983926, "grad_norm": 0.2849053144454956, "learning_rate": 0.00019330900437289797, "loss": 11.6643, "step": 16809 }, { "epoch": 0.3518797622038014, "grad_norm": 0.2727099657058716, "learning_rate": 0.00019330821582172708, "loss": 11.676, "step": 16810 }, { "epoch": 0.35190069496776355, "grad_norm": 0.289549320936203, "learning_rate": 0.00019330742722570104, "loss": 11.6544, "step": 16811 }, { "epoch": 0.3519216277317257, "grad_norm": 0.30902761220932007, "learning_rate": 0.00019330663858482022, "loss": 11.6775, "step": 16812 }, { "epoch": 0.35194256049568784, "grad_norm": 0.2587846517562866, "learning_rate": 0.00019330584989908504, "loss": 11.6812, "step": 16813 }, { "epoch": 0.35196349325965, "grad_norm": 0.28955078125, "learning_rate": 0.0001933050611684959, "loss": 11.6688, "step": 16814 }, { "epoch": 0.3519844260236122, "grad_norm": 0.31137338280677795, "learning_rate": 0.00019330427239305312, "loss": 11.6801, "step": 16815 }, { "epoch": 0.35200535878757433, "grad_norm": 0.29580453038215637, "learning_rate": 0.0001933034835727571, "loss": 11.6916, "step": 16816 }, { "epoch": 0.3520262915515365, "grad_norm": 0.2792806923389435, "learning_rate": 0.00019330269470760824, "loss": 11.6746, "step": 16817 }, { "epoch": 0.3520472243154986, "grad_norm": 0.258407324552536, "learning_rate": 0.00019330190579760692, "loss": 11.6773, "step": 16818 }, { "epoch": 0.35206815707946076, "grad_norm": 0.2890965938568115, "learning_rate": 0.00019330111684275346, "loss": 11.6704, "step": 16819 }, { "epoch": 0.3520890898434229, "grad_norm": 0.2570069134235382, "learning_rate": 0.00019330032784304832, "loss": 11.6754, "step": 16820 }, { "epoch": 0.35211002260738505, "grad_norm": 0.3089180290699005, "learning_rate": 0.00019329953879849185, "loss": 11.6707, "step": 16821 }, { "epoch": 0.35213095537134725, "grad_norm": 0.22714649140834808, "learning_rate": 0.0001932987497090844, "loss": 11.6636, "step": 16822 }, { "epoch": 0.3521518881353094, "grad_norm": 0.36603426933288574, "learning_rate": 0.00019329796057482638, "loss": 11.6952, "step": 16823 }, { "epoch": 0.35217282089927154, "grad_norm": 0.28436797857284546, "learning_rate": 0.00019329717139571814, "loss": 11.6623, "step": 16824 }, { "epoch": 0.3521937536632337, "grad_norm": 0.23273661732673645, "learning_rate": 0.0001932963821717601, "loss": 11.6711, "step": 16825 }, { "epoch": 0.35221468642719583, "grad_norm": 0.2767435312271118, "learning_rate": 0.0001932955929029526, "loss": 11.6801, "step": 16826 }, { "epoch": 0.352235619191158, "grad_norm": 0.31540006399154663, "learning_rate": 0.00019329480358929607, "loss": 11.6977, "step": 16827 }, { "epoch": 0.3522565519551202, "grad_norm": 0.26854100823402405, "learning_rate": 0.00019329401423079086, "loss": 11.6633, "step": 16828 }, { "epoch": 0.3522774847190823, "grad_norm": 0.2365017980337143, "learning_rate": 0.00019329322482743733, "loss": 11.6875, "step": 16829 }, { "epoch": 0.35229841748304447, "grad_norm": 0.27936822175979614, "learning_rate": 0.00019329243537923586, "loss": 11.6958, "step": 16830 }, { "epoch": 0.3523193502470066, "grad_norm": 0.24930599331855774, "learning_rate": 0.00019329164588618688, "loss": 11.6751, "step": 16831 }, { "epoch": 0.35234028301096876, "grad_norm": 0.32759249210357666, "learning_rate": 0.0001932908563482907, "loss": 11.677, "step": 16832 }, { "epoch": 0.3523612157749309, "grad_norm": 0.2402517944574356, "learning_rate": 0.00019329006676554778, "loss": 11.6819, "step": 16833 }, { "epoch": 0.3523821485388931, "grad_norm": 0.3206425905227661, "learning_rate": 0.00019328927713795841, "loss": 11.6846, "step": 16834 }, { "epoch": 0.35240308130285525, "grad_norm": 0.2647460401058197, "learning_rate": 0.00019328848746552306, "loss": 11.6697, "step": 16835 }, { "epoch": 0.3524240140668174, "grad_norm": 0.32528606057167053, "learning_rate": 0.00019328769774824202, "loss": 11.6725, "step": 16836 }, { "epoch": 0.35244494683077954, "grad_norm": 0.25228267908096313, "learning_rate": 0.00019328690798611574, "loss": 11.6708, "step": 16837 }, { "epoch": 0.3524658795947417, "grad_norm": 0.28138911724090576, "learning_rate": 0.00019328611817914456, "loss": 11.681, "step": 16838 }, { "epoch": 0.3524868123587038, "grad_norm": 0.2486088126897812, "learning_rate": 0.0001932853283273289, "loss": 11.6541, "step": 16839 }, { "epoch": 0.35250774512266597, "grad_norm": 0.2969990372657776, "learning_rate": 0.00019328453843066908, "loss": 11.6773, "step": 16840 }, { "epoch": 0.3525286778866282, "grad_norm": 0.3790161907672882, "learning_rate": 0.00019328374848916554, "loss": 11.6695, "step": 16841 }, { "epoch": 0.3525496106505903, "grad_norm": 0.31138312816619873, "learning_rate": 0.00019328295850281864, "loss": 11.6783, "step": 16842 }, { "epoch": 0.35257054341455246, "grad_norm": 0.2643279433250427, "learning_rate": 0.00019328216847162873, "loss": 11.6538, "step": 16843 }, { "epoch": 0.3525914761785146, "grad_norm": 0.4288049340248108, "learning_rate": 0.0001932813783955962, "loss": 11.6802, "step": 16844 }, { "epoch": 0.35261240894247675, "grad_norm": 0.25408676266670227, "learning_rate": 0.00019328058827472146, "loss": 11.6798, "step": 16845 }, { "epoch": 0.3526333417064389, "grad_norm": 0.21067728102207184, "learning_rate": 0.00019327979810900486, "loss": 11.6824, "step": 16846 }, { "epoch": 0.3526542744704011, "grad_norm": 0.264961302280426, "learning_rate": 0.00019327900789844682, "loss": 11.6691, "step": 16847 }, { "epoch": 0.35267520723436324, "grad_norm": 0.24085696041584015, "learning_rate": 0.00019327821764304767, "loss": 11.6855, "step": 16848 }, { "epoch": 0.3526961399983254, "grad_norm": 0.30884453654289246, "learning_rate": 0.00019327742734280781, "loss": 11.6664, "step": 16849 }, { "epoch": 0.35271707276228753, "grad_norm": 0.38679417967796326, "learning_rate": 0.00019327663699772763, "loss": 11.6919, "step": 16850 }, { "epoch": 0.3527380055262497, "grad_norm": 0.32273322343826294, "learning_rate": 0.0001932758466078075, "loss": 11.6654, "step": 16851 }, { "epoch": 0.3527589382902118, "grad_norm": 0.2178211808204651, "learning_rate": 0.0001932750561730478, "loss": 11.6591, "step": 16852 }, { "epoch": 0.35277987105417397, "grad_norm": 0.29479268193244934, "learning_rate": 0.0001932742656934489, "loss": 11.6917, "step": 16853 }, { "epoch": 0.35280080381813617, "grad_norm": 0.359739750623703, "learning_rate": 0.0001932734751690112, "loss": 11.6911, "step": 16854 }, { "epoch": 0.3528217365820983, "grad_norm": 0.24177393317222595, "learning_rate": 0.00019327268459973508, "loss": 11.6606, "step": 16855 }, { "epoch": 0.35284266934606046, "grad_norm": 0.2781588137149811, "learning_rate": 0.00019327189398562092, "loss": 11.6548, "step": 16856 }, { "epoch": 0.3528636021100226, "grad_norm": 0.3138987720012665, "learning_rate": 0.00019327110332666907, "loss": 11.6811, "step": 16857 }, { "epoch": 0.35288453487398475, "grad_norm": 0.28549182415008545, "learning_rate": 0.00019327031262287995, "loss": 11.6741, "step": 16858 }, { "epoch": 0.3529054676379469, "grad_norm": 0.3048546016216278, "learning_rate": 0.0001932695218742539, "loss": 11.6823, "step": 16859 }, { "epoch": 0.3529264004019091, "grad_norm": 0.24469324946403503, "learning_rate": 0.00019326873108079133, "loss": 11.6754, "step": 16860 }, { "epoch": 0.35294733316587124, "grad_norm": 0.258283793926239, "learning_rate": 0.00019326794024249262, "loss": 11.6862, "step": 16861 }, { "epoch": 0.3529682659298334, "grad_norm": 0.2594655156135559, "learning_rate": 0.0001932671493593581, "loss": 11.6812, "step": 16862 }, { "epoch": 0.3529891986937955, "grad_norm": 0.25124672055244446, "learning_rate": 0.00019326635843138824, "loss": 11.6748, "step": 16863 }, { "epoch": 0.35301013145775767, "grad_norm": 0.31458210945129395, "learning_rate": 0.00019326556745858335, "loss": 11.6929, "step": 16864 }, { "epoch": 0.3530310642217198, "grad_norm": 0.304561048746109, "learning_rate": 0.00019326477644094386, "loss": 11.6766, "step": 16865 }, { "epoch": 0.353051996985682, "grad_norm": 1.191182255744934, "learning_rate": 0.00019326398537847009, "loss": 11.5787, "step": 16866 }, { "epoch": 0.35307292974964416, "grad_norm": 0.23556609451770782, "learning_rate": 0.00019326319427116247, "loss": 11.6517, "step": 16867 }, { "epoch": 0.3530938625136063, "grad_norm": 0.26132020354270935, "learning_rate": 0.00019326240311902138, "loss": 11.6626, "step": 16868 }, { "epoch": 0.35311479527756845, "grad_norm": 0.2568359971046448, "learning_rate": 0.00019326161192204715, "loss": 11.6703, "step": 16869 }, { "epoch": 0.3531357280415306, "grad_norm": 0.4158162772655487, "learning_rate": 0.0001932608206802402, "loss": 11.6984, "step": 16870 }, { "epoch": 0.35315666080549274, "grad_norm": 0.24568521976470947, "learning_rate": 0.00019326002939360092, "loss": 11.6627, "step": 16871 }, { "epoch": 0.3531775935694549, "grad_norm": 0.32044094800949097, "learning_rate": 0.00019325923806212968, "loss": 11.679, "step": 16872 }, { "epoch": 0.3531985263334171, "grad_norm": 0.24589145183563232, "learning_rate": 0.00019325844668582683, "loss": 11.6751, "step": 16873 }, { "epoch": 0.35321945909737923, "grad_norm": 0.2741810083389282, "learning_rate": 0.0001932576552646928, "loss": 11.6651, "step": 16874 }, { "epoch": 0.3532403918613414, "grad_norm": 0.34668004512786865, "learning_rate": 0.00019325686379872793, "loss": 11.6855, "step": 16875 }, { "epoch": 0.3532613246253035, "grad_norm": 0.2938081920146942, "learning_rate": 0.00019325607228793263, "loss": 11.6773, "step": 16876 }, { "epoch": 0.35328225738926566, "grad_norm": 0.27228912711143494, "learning_rate": 0.00019325528073230725, "loss": 11.6781, "step": 16877 }, { "epoch": 0.3533031901532278, "grad_norm": 0.2538076341152191, "learning_rate": 0.0001932544891318522, "loss": 11.6729, "step": 16878 }, { "epoch": 0.35332412291719, "grad_norm": 0.27840209007263184, "learning_rate": 0.00019325369748656783, "loss": 11.6631, "step": 16879 }, { "epoch": 0.35334505568115215, "grad_norm": 0.2790590822696686, "learning_rate": 0.0001932529057964546, "loss": 11.6735, "step": 16880 }, { "epoch": 0.3533659884451143, "grad_norm": 0.28720247745513916, "learning_rate": 0.00019325211406151277, "loss": 11.6698, "step": 16881 }, { "epoch": 0.35338692120907644, "grad_norm": 0.31871798634529114, "learning_rate": 0.0001932513222817428, "loss": 11.6764, "step": 16882 }, { "epoch": 0.3534078539730386, "grad_norm": 0.24490946531295776, "learning_rate": 0.00019325053045714504, "loss": 11.6537, "step": 16883 }, { "epoch": 0.35342878673700073, "grad_norm": 0.3010004460811615, "learning_rate": 0.00019324973858771987, "loss": 11.6821, "step": 16884 }, { "epoch": 0.35344971950096293, "grad_norm": 0.24300725758075714, "learning_rate": 0.0001932489466734677, "loss": 11.6827, "step": 16885 }, { "epoch": 0.3534706522649251, "grad_norm": 0.25912854075431824, "learning_rate": 0.00019324815471438887, "loss": 11.683, "step": 16886 }, { "epoch": 0.3534915850288872, "grad_norm": 0.23650850355625153, "learning_rate": 0.00019324736271048381, "loss": 11.6534, "step": 16887 }, { "epoch": 0.35351251779284937, "grad_norm": 0.26761534810066223, "learning_rate": 0.00019324657066175288, "loss": 11.6719, "step": 16888 }, { "epoch": 0.3535334505568115, "grad_norm": 0.2630567252635956, "learning_rate": 0.00019324577856819644, "loss": 11.6454, "step": 16889 }, { "epoch": 0.35355438332077366, "grad_norm": 0.36554861068725586, "learning_rate": 0.0001932449864298149, "loss": 11.6647, "step": 16890 }, { "epoch": 0.3535753160847358, "grad_norm": 0.33080223202705383, "learning_rate": 0.0001932441942466086, "loss": 11.6681, "step": 16891 }, { "epoch": 0.353596248848698, "grad_norm": 0.24689637124538422, "learning_rate": 0.00019324340201857797, "loss": 11.6789, "step": 16892 }, { "epoch": 0.35361718161266015, "grad_norm": 0.2710965573787689, "learning_rate": 0.00019324260974572336, "loss": 11.6789, "step": 16893 }, { "epoch": 0.3536381143766223, "grad_norm": 0.2784768044948578, "learning_rate": 0.00019324181742804515, "loss": 11.6697, "step": 16894 }, { "epoch": 0.35365904714058444, "grad_norm": 0.3327540159225464, "learning_rate": 0.00019324102506554375, "loss": 11.679, "step": 16895 }, { "epoch": 0.3536799799045466, "grad_norm": 0.3631344437599182, "learning_rate": 0.0001932402326582195, "loss": 11.6898, "step": 16896 }, { "epoch": 0.3537009126685087, "grad_norm": 0.28064244985580444, "learning_rate": 0.00019323944020607282, "loss": 11.6715, "step": 16897 }, { "epoch": 0.3537218454324709, "grad_norm": 0.2989894449710846, "learning_rate": 0.00019323864770910405, "loss": 11.6829, "step": 16898 }, { "epoch": 0.35374277819643307, "grad_norm": 0.27653253078460693, "learning_rate": 0.0001932378551673136, "loss": 11.6472, "step": 16899 }, { "epoch": 0.3537637109603952, "grad_norm": 0.36167648434638977, "learning_rate": 0.00019323706258070184, "loss": 11.6785, "step": 16900 }, { "epoch": 0.35378464372435736, "grad_norm": 0.30039626359939575, "learning_rate": 0.0001932362699492692, "loss": 11.6661, "step": 16901 }, { "epoch": 0.3538055764883195, "grad_norm": 0.3350807726383209, "learning_rate": 0.00019323547727301596, "loss": 11.687, "step": 16902 }, { "epoch": 0.35382650925228165, "grad_norm": 0.2810347080230713, "learning_rate": 0.0001932346845519426, "loss": 11.6768, "step": 16903 }, { "epoch": 0.35384744201624385, "grad_norm": 0.3033250868320465, "learning_rate": 0.00019323389178604942, "loss": 11.678, "step": 16904 }, { "epoch": 0.353868374780206, "grad_norm": 0.3143307566642761, "learning_rate": 0.00019323309897533686, "loss": 11.6953, "step": 16905 }, { "epoch": 0.35388930754416814, "grad_norm": 0.27717798948287964, "learning_rate": 0.00019323230611980528, "loss": 11.657, "step": 16906 }, { "epoch": 0.3539102403081303, "grad_norm": 0.3421151340007782, "learning_rate": 0.00019323151321945506, "loss": 11.6793, "step": 16907 }, { "epoch": 0.35393117307209243, "grad_norm": 0.25474855303764343, "learning_rate": 0.0001932307202742866, "loss": 11.6603, "step": 16908 }, { "epoch": 0.3539521058360546, "grad_norm": 0.2604868412017822, "learning_rate": 0.00019322992728430024, "loss": 11.6567, "step": 16909 }, { "epoch": 0.3539730386000167, "grad_norm": 0.23654845356941223, "learning_rate": 0.00019322913424949637, "loss": 11.6616, "step": 16910 }, { "epoch": 0.3539939713639789, "grad_norm": 0.2722037434577942, "learning_rate": 0.00019322834116987543, "loss": 11.6625, "step": 16911 }, { "epoch": 0.35401490412794107, "grad_norm": 0.2930489480495453, "learning_rate": 0.00019322754804543775, "loss": 11.6752, "step": 16912 }, { "epoch": 0.3540358368919032, "grad_norm": 0.20676226913928986, "learning_rate": 0.00019322675487618368, "loss": 11.6782, "step": 16913 }, { "epoch": 0.35405676965586536, "grad_norm": 0.3564967215061188, "learning_rate": 0.0001932259616621137, "loss": 11.6913, "step": 16914 }, { "epoch": 0.3540777024198275, "grad_norm": 0.23321972787380219, "learning_rate": 0.0001932251684032281, "loss": 11.652, "step": 16915 }, { "epoch": 0.35409863518378965, "grad_norm": 0.28552865982055664, "learning_rate": 0.0001932243750995273, "loss": 11.6889, "step": 16916 }, { "epoch": 0.35411956794775185, "grad_norm": 0.2986627519130707, "learning_rate": 0.00019322358175101164, "loss": 11.6785, "step": 16917 }, { "epoch": 0.354140500711714, "grad_norm": 0.28098803758621216, "learning_rate": 0.0001932227883576816, "loss": 11.681, "step": 16918 }, { "epoch": 0.35416143347567614, "grad_norm": 0.3552432656288147, "learning_rate": 0.00019322199491953745, "loss": 11.6628, "step": 16919 }, { "epoch": 0.3541823662396383, "grad_norm": 0.2145538628101349, "learning_rate": 0.00019322120143657964, "loss": 11.6562, "step": 16920 }, { "epoch": 0.3542032990036004, "grad_norm": 0.2616879343986511, "learning_rate": 0.0001932204079088085, "loss": 11.665, "step": 16921 }, { "epoch": 0.35422423176756257, "grad_norm": 0.23895038664340973, "learning_rate": 0.00019321961433622448, "loss": 11.6567, "step": 16922 }, { "epoch": 0.35424516453152477, "grad_norm": 0.9972710013389587, "learning_rate": 0.0001932188207188279, "loss": 11.6078, "step": 16923 }, { "epoch": 0.3542660972954869, "grad_norm": 0.29860246181488037, "learning_rate": 0.00019321802705661917, "loss": 11.6678, "step": 16924 }, { "epoch": 0.35428703005944906, "grad_norm": 0.280760794878006, "learning_rate": 0.00019321723334959868, "loss": 11.6697, "step": 16925 }, { "epoch": 0.3543079628234112, "grad_norm": 0.36348047852516174, "learning_rate": 0.00019321643959776679, "loss": 11.6735, "step": 16926 }, { "epoch": 0.35432889558737335, "grad_norm": 0.24239124357700348, "learning_rate": 0.00019321564580112387, "loss": 11.6642, "step": 16927 }, { "epoch": 0.3543498283513355, "grad_norm": 0.30177611112594604, "learning_rate": 0.00019321485195967036, "loss": 11.6744, "step": 16928 }, { "epoch": 0.35437076111529764, "grad_norm": 0.3530556559562683, "learning_rate": 0.00019321405807340658, "loss": 11.7047, "step": 16929 }, { "epoch": 0.35439169387925984, "grad_norm": 0.2620765268802643, "learning_rate": 0.00019321326414233295, "loss": 11.6851, "step": 16930 }, { "epoch": 0.354412626643222, "grad_norm": 0.3435528874397278, "learning_rate": 0.00019321247016644978, "loss": 11.652, "step": 16931 }, { "epoch": 0.35443355940718413, "grad_norm": 0.28861311078071594, "learning_rate": 0.00019321167614575754, "loss": 11.6809, "step": 16932 }, { "epoch": 0.3544544921711463, "grad_norm": 0.32447123527526855, "learning_rate": 0.0001932108820802566, "loss": 11.6766, "step": 16933 }, { "epoch": 0.3544754249351084, "grad_norm": 0.30570361018180847, "learning_rate": 0.0001932100879699473, "loss": 11.6764, "step": 16934 }, { "epoch": 0.35449635769907056, "grad_norm": 0.2906964123249054, "learning_rate": 0.00019320929381483006, "loss": 11.6861, "step": 16935 }, { "epoch": 0.35451729046303276, "grad_norm": 0.27628228068351746, "learning_rate": 0.00019320849961490524, "loss": 11.6568, "step": 16936 }, { "epoch": 0.3545382232269949, "grad_norm": 0.4305513799190521, "learning_rate": 0.0001932077053701732, "loss": 11.681, "step": 16937 }, { "epoch": 0.35455915599095705, "grad_norm": 0.42106565833091736, "learning_rate": 0.0001932069110806344, "loss": 11.6708, "step": 16938 }, { "epoch": 0.3545800887549192, "grad_norm": 0.30931970477104187, "learning_rate": 0.00019320611674628912, "loss": 11.6805, "step": 16939 }, { "epoch": 0.35460102151888134, "grad_norm": 0.324248343706131, "learning_rate": 0.00019320532236713782, "loss": 11.6602, "step": 16940 }, { "epoch": 0.3546219542828435, "grad_norm": 0.27783676981925964, "learning_rate": 0.00019320452794318083, "loss": 11.6818, "step": 16941 }, { "epoch": 0.35464288704680563, "grad_norm": 0.31753358244895935, "learning_rate": 0.00019320373347441857, "loss": 11.6774, "step": 16942 }, { "epoch": 0.35466381981076783, "grad_norm": 0.299620121717453, "learning_rate": 0.0001932029389608514, "loss": 11.6812, "step": 16943 }, { "epoch": 0.35468475257473, "grad_norm": 0.24112145602703094, "learning_rate": 0.00019320214440247976, "loss": 11.6652, "step": 16944 }, { "epoch": 0.3547056853386921, "grad_norm": 0.3004738986492157, "learning_rate": 0.00019320134979930394, "loss": 11.6841, "step": 16945 }, { "epoch": 0.35472661810265427, "grad_norm": 0.30115607380867004, "learning_rate": 0.00019320055515132433, "loss": 11.665, "step": 16946 }, { "epoch": 0.3547475508666164, "grad_norm": 0.27856674790382385, "learning_rate": 0.00019319976045854137, "loss": 11.6647, "step": 16947 }, { "epoch": 0.35476848363057856, "grad_norm": 0.284824013710022, "learning_rate": 0.00019319896572095544, "loss": 11.6599, "step": 16948 }, { "epoch": 0.35478941639454076, "grad_norm": 0.23040227591991425, "learning_rate": 0.00019319817093856688, "loss": 11.6517, "step": 16949 }, { "epoch": 0.3548103491585029, "grad_norm": 0.28093791007995605, "learning_rate": 0.0001931973761113761, "loss": 11.698, "step": 16950 }, { "epoch": 0.35483128192246505, "grad_norm": 0.2979304790496826, "learning_rate": 0.00019319658123938347, "loss": 11.6827, "step": 16951 }, { "epoch": 0.3548522146864272, "grad_norm": 0.37992313504219055, "learning_rate": 0.00019319578632258938, "loss": 11.6818, "step": 16952 }, { "epoch": 0.35487314745038934, "grad_norm": 0.30506259202957153, "learning_rate": 0.00019319499136099418, "loss": 11.671, "step": 16953 }, { "epoch": 0.3548940802143515, "grad_norm": 0.2664627134799957, "learning_rate": 0.00019319419635459832, "loss": 11.6821, "step": 16954 }, { "epoch": 0.3549150129783137, "grad_norm": 0.25895705819129944, "learning_rate": 0.00019319340130340214, "loss": 11.666, "step": 16955 }, { "epoch": 0.3549359457422758, "grad_norm": 0.24854131042957306, "learning_rate": 0.000193192606207406, "loss": 11.6919, "step": 16956 }, { "epoch": 0.35495687850623797, "grad_norm": 0.3092489540576935, "learning_rate": 0.0001931918110666103, "loss": 11.6829, "step": 16957 }, { "epoch": 0.3549778112702001, "grad_norm": 0.29494017362594604, "learning_rate": 0.00019319101588101547, "loss": 11.6775, "step": 16958 }, { "epoch": 0.35499874403416226, "grad_norm": 0.26267698407173157, "learning_rate": 0.0001931902206506218, "loss": 11.6744, "step": 16959 }, { "epoch": 0.3550196767981244, "grad_norm": 0.35072922706604004, "learning_rate": 0.00019318942537542975, "loss": 11.6636, "step": 16960 }, { "epoch": 0.35504060956208655, "grad_norm": 0.2591342329978943, "learning_rate": 0.0001931886300554397, "loss": 11.6891, "step": 16961 }, { "epoch": 0.35506154232604875, "grad_norm": 0.29006901383399963, "learning_rate": 0.00019318783469065198, "loss": 11.6663, "step": 16962 }, { "epoch": 0.3550824750900109, "grad_norm": 0.2780150771141052, "learning_rate": 0.00019318703928106697, "loss": 11.6775, "step": 16963 }, { "epoch": 0.35510340785397304, "grad_norm": 0.25455719232559204, "learning_rate": 0.00019318624382668516, "loss": 11.6793, "step": 16964 }, { "epoch": 0.3551243406179352, "grad_norm": 0.28267616033554077, "learning_rate": 0.0001931854483275068, "loss": 11.667, "step": 16965 }, { "epoch": 0.35514527338189733, "grad_norm": 0.2539859712123871, "learning_rate": 0.00019318465278353233, "loss": 11.6744, "step": 16966 }, { "epoch": 0.3551662061458595, "grad_norm": 0.3726375102996826, "learning_rate": 0.00019318385719476212, "loss": 11.6714, "step": 16967 }, { "epoch": 0.3551871389098217, "grad_norm": 0.3163251280784607, "learning_rate": 0.0001931830615611966, "loss": 11.6778, "step": 16968 }, { "epoch": 0.3552080716737838, "grad_norm": 0.332775741815567, "learning_rate": 0.0001931822658828361, "loss": 11.6904, "step": 16969 }, { "epoch": 0.35522900443774597, "grad_norm": 0.290806382894516, "learning_rate": 0.00019318147015968102, "loss": 11.6707, "step": 16970 }, { "epoch": 0.3552499372017081, "grad_norm": 0.2826460301876068, "learning_rate": 0.0001931806743917317, "loss": 11.682, "step": 16971 }, { "epoch": 0.35527086996567026, "grad_norm": 0.2971789538860321, "learning_rate": 0.00019317987857898862, "loss": 11.6705, "step": 16972 }, { "epoch": 0.3552918027296324, "grad_norm": 0.21691954135894775, "learning_rate": 0.00019317908272145209, "loss": 11.6696, "step": 16973 }, { "epoch": 0.3553127354935946, "grad_norm": 0.2920422852039337, "learning_rate": 0.0001931782868191225, "loss": 11.6658, "step": 16974 }, { "epoch": 0.35533366825755675, "grad_norm": 0.2502650022506714, "learning_rate": 0.00019317749087200024, "loss": 11.6868, "step": 16975 }, { "epoch": 0.3553546010215189, "grad_norm": 0.3114893436431885, "learning_rate": 0.00019317669488008566, "loss": 11.6816, "step": 16976 }, { "epoch": 0.35537553378548103, "grad_norm": 0.3162182867527008, "learning_rate": 0.00019317589884337922, "loss": 11.6874, "step": 16977 }, { "epoch": 0.3553964665494432, "grad_norm": 0.30142730474472046, "learning_rate": 0.00019317510276188125, "loss": 11.6764, "step": 16978 }, { "epoch": 0.3554173993134053, "grad_norm": 0.25636979937553406, "learning_rate": 0.00019317430663559214, "loss": 11.672, "step": 16979 }, { "epoch": 0.35543833207736747, "grad_norm": 0.277721643447876, "learning_rate": 0.00019317351046451226, "loss": 11.6789, "step": 16980 }, { "epoch": 0.35545926484132967, "grad_norm": 0.27305617928504944, "learning_rate": 0.000193172714248642, "loss": 11.6773, "step": 16981 }, { "epoch": 0.3554801976052918, "grad_norm": 0.3972371220588684, "learning_rate": 0.0001931719179879818, "loss": 11.6758, "step": 16982 }, { "epoch": 0.35550113036925396, "grad_norm": 0.3411576747894287, "learning_rate": 0.00019317112168253195, "loss": 11.6819, "step": 16983 }, { "epoch": 0.3555220631332161, "grad_norm": 0.2981320321559906, "learning_rate": 0.00019317032533229287, "loss": 11.6611, "step": 16984 }, { "epoch": 0.35554299589717825, "grad_norm": 0.22591908276081085, "learning_rate": 0.00019316952893726495, "loss": 11.6619, "step": 16985 }, { "epoch": 0.3555639286611404, "grad_norm": 0.2419668287038803, "learning_rate": 0.00019316873249744857, "loss": 11.6677, "step": 16986 }, { "epoch": 0.3555848614251026, "grad_norm": 0.245167538523674, "learning_rate": 0.00019316793601284414, "loss": 11.6586, "step": 16987 }, { "epoch": 0.35560579418906474, "grad_norm": 0.2519902288913727, "learning_rate": 0.000193167139483452, "loss": 11.6378, "step": 16988 }, { "epoch": 0.3556267269530269, "grad_norm": 0.3123528063297272, "learning_rate": 0.00019316634290927252, "loss": 11.6592, "step": 16989 }, { "epoch": 0.35564765971698903, "grad_norm": 0.2392304241657257, "learning_rate": 0.00019316554629030618, "loss": 11.6685, "step": 16990 }, { "epoch": 0.3556685924809512, "grad_norm": 0.3331544101238251, "learning_rate": 0.00019316474962655325, "loss": 11.6797, "step": 16991 }, { "epoch": 0.3556895252449133, "grad_norm": 0.29752838611602783, "learning_rate": 0.00019316395291801416, "loss": 11.6796, "step": 16992 }, { "epoch": 0.3557104580088755, "grad_norm": 0.2657117247581482, "learning_rate": 0.0001931631561646893, "loss": 11.6865, "step": 16993 }, { "epoch": 0.35573139077283766, "grad_norm": 0.2824183404445648, "learning_rate": 0.000193162359366579, "loss": 11.6526, "step": 16994 }, { "epoch": 0.3557523235367998, "grad_norm": 0.3847886323928833, "learning_rate": 0.00019316156252368374, "loss": 11.6901, "step": 16995 }, { "epoch": 0.35577325630076195, "grad_norm": 0.26770034432411194, "learning_rate": 0.00019316076563600384, "loss": 11.6699, "step": 16996 }, { "epoch": 0.3557941890647241, "grad_norm": 0.23826250433921814, "learning_rate": 0.0001931599687035397, "loss": 11.6791, "step": 16997 }, { "epoch": 0.35581512182868624, "grad_norm": 0.3055706322193146, "learning_rate": 0.00019315917172629167, "loss": 11.6772, "step": 16998 }, { "epoch": 0.3558360545926484, "grad_norm": 0.288818895816803, "learning_rate": 0.00019315837470426018, "loss": 11.6594, "step": 16999 }, { "epoch": 0.3558569873566106, "grad_norm": 0.3169967532157898, "learning_rate": 0.0001931575776374456, "loss": 11.6776, "step": 17000 }, { "epoch": 0.3558569873566106, "eval_loss": 11.67497444152832, "eval_runtime": 34.4019, "eval_samples_per_second": 27.935, "eval_steps_per_second": 7.005, "step": 17000 }, { "epoch": 0.35587792012057273, "grad_norm": 0.2695867717266083, "learning_rate": 0.00019315678052584824, "loss": 11.6995, "step": 17001 }, { "epoch": 0.3558988528845349, "grad_norm": 0.35392311215400696, "learning_rate": 0.00019315598336946862, "loss": 11.6833, "step": 17002 }, { "epoch": 0.355919785648497, "grad_norm": 0.23190844058990479, "learning_rate": 0.000193155186168307, "loss": 11.6835, "step": 17003 }, { "epoch": 0.35594071841245917, "grad_norm": 0.2925897538661957, "learning_rate": 0.0001931543889223639, "loss": 11.6932, "step": 17004 }, { "epoch": 0.3559616511764213, "grad_norm": 0.3201126754283905, "learning_rate": 0.00019315359163163954, "loss": 11.6802, "step": 17005 }, { "epoch": 0.3559825839403835, "grad_norm": 0.30185970664024353, "learning_rate": 0.0001931527942961344, "loss": 11.6741, "step": 17006 }, { "epoch": 0.35600351670434566, "grad_norm": 0.2626304626464844, "learning_rate": 0.00019315199691584883, "loss": 11.6826, "step": 17007 }, { "epoch": 0.3560244494683078, "grad_norm": 0.2637590765953064, "learning_rate": 0.00019315119949078326, "loss": 11.6709, "step": 17008 }, { "epoch": 0.35604538223226995, "grad_norm": 0.25749093294143677, "learning_rate": 0.00019315040202093803, "loss": 11.6593, "step": 17009 }, { "epoch": 0.3560663149962321, "grad_norm": 0.2559129595756531, "learning_rate": 0.00019314960450631354, "loss": 11.67, "step": 17010 }, { "epoch": 0.35608724776019424, "grad_norm": 0.3200802206993103, "learning_rate": 0.00019314880694691015, "loss": 11.666, "step": 17011 }, { "epoch": 0.35610818052415644, "grad_norm": 0.2766326665878296, "learning_rate": 0.0001931480093427283, "loss": 11.6815, "step": 17012 }, { "epoch": 0.3561291132881186, "grad_norm": 0.33183765411376953, "learning_rate": 0.0001931472116937683, "loss": 11.6843, "step": 17013 }, { "epoch": 0.3561500460520807, "grad_norm": 0.31533676385879517, "learning_rate": 0.00019314641400003057, "loss": 11.6659, "step": 17014 }, { "epoch": 0.35617097881604287, "grad_norm": 0.2428409904241562, "learning_rate": 0.0001931456162615155, "loss": 11.6657, "step": 17015 }, { "epoch": 0.356191911580005, "grad_norm": 0.3078097701072693, "learning_rate": 0.00019314481847822348, "loss": 11.6692, "step": 17016 }, { "epoch": 0.35621284434396716, "grad_norm": 0.27623361349105835, "learning_rate": 0.00019314402065015486, "loss": 11.6676, "step": 17017 }, { "epoch": 0.3562337771079293, "grad_norm": 0.2646852135658264, "learning_rate": 0.00019314322277731006, "loss": 11.6765, "step": 17018 }, { "epoch": 0.3562547098718915, "grad_norm": 0.2560790181159973, "learning_rate": 0.00019314242485968944, "loss": 11.6817, "step": 17019 }, { "epoch": 0.35627564263585365, "grad_norm": 0.26979711651802063, "learning_rate": 0.00019314162689729338, "loss": 11.6736, "step": 17020 }, { "epoch": 0.3562965753998158, "grad_norm": 0.23500588536262512, "learning_rate": 0.0001931408288901223, "loss": 11.6593, "step": 17021 }, { "epoch": 0.35631750816377794, "grad_norm": 0.27638453245162964, "learning_rate": 0.0001931400308381765, "loss": 11.6841, "step": 17022 }, { "epoch": 0.3563384409277401, "grad_norm": 0.26643845438957214, "learning_rate": 0.00019313923274145647, "loss": 11.6683, "step": 17023 }, { "epoch": 0.35635937369170223, "grad_norm": 0.25344929099082947, "learning_rate": 0.00019313843459996252, "loss": 11.6662, "step": 17024 }, { "epoch": 0.35638030645566443, "grad_norm": 0.2893896698951721, "learning_rate": 0.00019313763641369506, "loss": 11.6713, "step": 17025 }, { "epoch": 0.3564012392196266, "grad_norm": 0.30523762106895447, "learning_rate": 0.0001931368381826545, "loss": 11.6771, "step": 17026 }, { "epoch": 0.3564221719835887, "grad_norm": 0.2496262788772583, "learning_rate": 0.00019313603990684115, "loss": 11.6633, "step": 17027 }, { "epoch": 0.35644310474755087, "grad_norm": 0.3020300567150116, "learning_rate": 0.00019313524158625546, "loss": 11.6739, "step": 17028 }, { "epoch": 0.356464037511513, "grad_norm": 0.3019371032714844, "learning_rate": 0.00019313444322089783, "loss": 11.674, "step": 17029 }, { "epoch": 0.35648497027547515, "grad_norm": 0.3326088786125183, "learning_rate": 0.00019313364481076856, "loss": 11.6597, "step": 17030 }, { "epoch": 0.3565059030394373, "grad_norm": 0.24626658856868744, "learning_rate": 0.00019313284635586811, "loss": 11.6837, "step": 17031 }, { "epoch": 0.3565268358033995, "grad_norm": 0.35488343238830566, "learning_rate": 0.00019313204785619678, "loss": 11.6737, "step": 17032 }, { "epoch": 0.35654776856736164, "grad_norm": 0.30014607310295105, "learning_rate": 0.00019313124931175506, "loss": 11.6613, "step": 17033 }, { "epoch": 0.3565687013313238, "grad_norm": 0.2961876392364502, "learning_rate": 0.0001931304507225433, "loss": 11.6654, "step": 17034 }, { "epoch": 0.35658963409528593, "grad_norm": 0.2837608754634857, "learning_rate": 0.0001931296520885618, "loss": 11.6804, "step": 17035 }, { "epoch": 0.3566105668592481, "grad_norm": 0.2852199673652649, "learning_rate": 0.00019312885340981107, "loss": 11.6837, "step": 17036 }, { "epoch": 0.3566314996232102, "grad_norm": 0.2855612635612488, "learning_rate": 0.00019312805468629137, "loss": 11.6674, "step": 17037 }, { "epoch": 0.3566524323871724, "grad_norm": 0.2931860685348511, "learning_rate": 0.0001931272559180032, "loss": 11.6781, "step": 17038 }, { "epoch": 0.35667336515113457, "grad_norm": 0.34011542797088623, "learning_rate": 0.0001931264571049469, "loss": 11.6654, "step": 17039 }, { "epoch": 0.3566942979150967, "grad_norm": 0.28770601749420166, "learning_rate": 0.00019312565824712282, "loss": 11.6658, "step": 17040 }, { "epoch": 0.35671523067905886, "grad_norm": 0.3143799901008606, "learning_rate": 0.00019312485934453136, "loss": 11.6772, "step": 17041 }, { "epoch": 0.356736163443021, "grad_norm": 0.3074432909488678, "learning_rate": 0.00019312406039717294, "loss": 11.6968, "step": 17042 }, { "epoch": 0.35675709620698315, "grad_norm": 0.2203270047903061, "learning_rate": 0.0001931232614050479, "loss": 11.6674, "step": 17043 }, { "epoch": 0.35677802897094535, "grad_norm": 0.24812668561935425, "learning_rate": 0.00019312246236815662, "loss": 11.666, "step": 17044 }, { "epoch": 0.3567989617349075, "grad_norm": 0.2847230136394501, "learning_rate": 0.00019312166328649957, "loss": 11.6747, "step": 17045 }, { "epoch": 0.35681989449886964, "grad_norm": 0.2559318542480469, "learning_rate": 0.00019312086416007701, "loss": 11.658, "step": 17046 }, { "epoch": 0.3568408272628318, "grad_norm": 0.2841322720050812, "learning_rate": 0.00019312006498888943, "loss": 11.6522, "step": 17047 }, { "epoch": 0.35686176002679393, "grad_norm": 0.2317958027124405, "learning_rate": 0.00019311926577293714, "loss": 11.6664, "step": 17048 }, { "epoch": 0.3568826927907561, "grad_norm": 0.2792723774909973, "learning_rate": 0.00019311846651222055, "loss": 11.6641, "step": 17049 }, { "epoch": 0.3569036255547182, "grad_norm": 0.2825806438922882, "learning_rate": 0.00019311766720674008, "loss": 11.6833, "step": 17050 }, { "epoch": 0.3569245583186804, "grad_norm": 0.26730045676231384, "learning_rate": 0.00019311686785649604, "loss": 11.6869, "step": 17051 }, { "epoch": 0.35694549108264256, "grad_norm": 0.271392285823822, "learning_rate": 0.00019311606846148885, "loss": 11.6858, "step": 17052 }, { "epoch": 0.3569664238466047, "grad_norm": 0.31225094199180603, "learning_rate": 0.00019311526902171896, "loss": 11.6537, "step": 17053 }, { "epoch": 0.35698735661056685, "grad_norm": 0.30939334630966187, "learning_rate": 0.00019311446953718667, "loss": 11.6731, "step": 17054 }, { "epoch": 0.357008289374529, "grad_norm": 0.29921165108680725, "learning_rate": 0.00019311367000789237, "loss": 11.6752, "step": 17055 }, { "epoch": 0.35702922213849114, "grad_norm": 0.2413286417722702, "learning_rate": 0.00019311287043383645, "loss": 11.6649, "step": 17056 }, { "epoch": 0.35705015490245334, "grad_norm": 0.25070205330848694, "learning_rate": 0.00019311207081501933, "loss": 11.6822, "step": 17057 }, { "epoch": 0.3570710876664155, "grad_norm": 0.28036150336265564, "learning_rate": 0.00019311127115144138, "loss": 11.6691, "step": 17058 }, { "epoch": 0.35709202043037763, "grad_norm": 0.3217814564704895, "learning_rate": 0.00019311047144310295, "loss": 11.6519, "step": 17059 }, { "epoch": 0.3571129531943398, "grad_norm": 0.2734214663505554, "learning_rate": 0.00019310967169000444, "loss": 11.6676, "step": 17060 }, { "epoch": 0.3571338859583019, "grad_norm": 0.24445748329162598, "learning_rate": 0.00019310887189214629, "loss": 11.6624, "step": 17061 }, { "epoch": 0.35715481872226407, "grad_norm": 0.28972676396369934, "learning_rate": 0.0001931080720495288, "loss": 11.6776, "step": 17062 }, { "epoch": 0.35717575148622627, "grad_norm": 0.28886106610298157, "learning_rate": 0.00019310727216215244, "loss": 11.6752, "step": 17063 }, { "epoch": 0.3571966842501884, "grad_norm": 0.29411134123802185, "learning_rate": 0.0001931064722300175, "loss": 11.6616, "step": 17064 }, { "epoch": 0.35721761701415056, "grad_norm": 0.2571777403354645, "learning_rate": 0.00019310567225312445, "loss": 11.6758, "step": 17065 }, { "epoch": 0.3572385497781127, "grad_norm": 0.29835426807403564, "learning_rate": 0.0001931048722314736, "loss": 11.6736, "step": 17066 }, { "epoch": 0.35725948254207485, "grad_norm": 0.2536884546279907, "learning_rate": 0.0001931040721650654, "loss": 11.6727, "step": 17067 }, { "epoch": 0.357280415306037, "grad_norm": 0.269890159368515, "learning_rate": 0.00019310327205390018, "loss": 11.6766, "step": 17068 }, { "epoch": 0.35730134806999914, "grad_norm": 0.34103402495384216, "learning_rate": 0.0001931024718979784, "loss": 11.6727, "step": 17069 }, { "epoch": 0.35732228083396134, "grad_norm": 0.25542521476745605, "learning_rate": 0.00019310167169730037, "loss": 11.6711, "step": 17070 }, { "epoch": 0.3573432135979235, "grad_norm": 0.3062431216239929, "learning_rate": 0.0001931008714518665, "loss": 11.6813, "step": 17071 }, { "epoch": 0.3573641463618856, "grad_norm": 0.2834201753139496, "learning_rate": 0.00019310007116167717, "loss": 11.6853, "step": 17072 }, { "epoch": 0.35738507912584777, "grad_norm": 0.3151417076587677, "learning_rate": 0.00019309927082673277, "loss": 11.6603, "step": 17073 }, { "epoch": 0.3574060118898099, "grad_norm": 0.24714626371860504, "learning_rate": 0.00019309847044703368, "loss": 11.6591, "step": 17074 }, { "epoch": 0.35742694465377206, "grad_norm": 0.2810657322406769, "learning_rate": 0.0001930976700225803, "loss": 11.6838, "step": 17075 }, { "epoch": 0.35744787741773426, "grad_norm": 0.2502402663230896, "learning_rate": 0.00019309686955337301, "loss": 11.6721, "step": 17076 }, { "epoch": 0.3574688101816964, "grad_norm": 0.3461856544017792, "learning_rate": 0.00019309606903941218, "loss": 11.6705, "step": 17077 }, { "epoch": 0.35748974294565855, "grad_norm": 0.33535996079444885, "learning_rate": 0.00019309526848069818, "loss": 11.6877, "step": 17078 }, { "epoch": 0.3575106757096207, "grad_norm": 0.28525909781455994, "learning_rate": 0.00019309446787723147, "loss": 11.6976, "step": 17079 }, { "epoch": 0.35753160847358284, "grad_norm": 0.2777497470378876, "learning_rate": 0.00019309366722901236, "loss": 11.6722, "step": 17080 }, { "epoch": 0.357552541237545, "grad_norm": 0.3230760097503662, "learning_rate": 0.00019309286653604123, "loss": 11.6853, "step": 17081 }, { "epoch": 0.3575734740015072, "grad_norm": 0.27228468656539917, "learning_rate": 0.00019309206579831852, "loss": 11.6714, "step": 17082 }, { "epoch": 0.35759440676546933, "grad_norm": 0.2678815424442291, "learning_rate": 0.0001930912650158446, "loss": 11.6868, "step": 17083 }, { "epoch": 0.3576153395294315, "grad_norm": 0.24581246078014374, "learning_rate": 0.00019309046418861984, "loss": 11.6718, "step": 17084 }, { "epoch": 0.3576362722933936, "grad_norm": 0.27268266677856445, "learning_rate": 0.0001930896633166446, "loss": 11.6721, "step": 17085 }, { "epoch": 0.35765720505735576, "grad_norm": 0.25175127387046814, "learning_rate": 0.00019308886239991933, "loss": 11.692, "step": 17086 }, { "epoch": 0.3576781378213179, "grad_norm": 0.2853133976459503, "learning_rate": 0.00019308806143844434, "loss": 11.6763, "step": 17087 }, { "epoch": 0.35769907058528005, "grad_norm": 0.23808173835277557, "learning_rate": 0.00019308726043222008, "loss": 11.6702, "step": 17088 }, { "epoch": 0.35772000334924225, "grad_norm": 0.2927999794483185, "learning_rate": 0.0001930864593812469, "loss": 11.6634, "step": 17089 }, { "epoch": 0.3577409361132044, "grad_norm": 0.3009131848812103, "learning_rate": 0.0001930856582855252, "loss": 11.6642, "step": 17090 }, { "epoch": 0.35776186887716654, "grad_norm": 0.30868667364120483, "learning_rate": 0.00019308485714505537, "loss": 11.6491, "step": 17091 }, { "epoch": 0.3577828016411287, "grad_norm": 0.3408205807209015, "learning_rate": 0.00019308405595983775, "loss": 11.6696, "step": 17092 }, { "epoch": 0.35780373440509083, "grad_norm": 0.2471981942653656, "learning_rate": 0.00019308325472987278, "loss": 11.6746, "step": 17093 }, { "epoch": 0.357824667169053, "grad_norm": 0.25143080949783325, "learning_rate": 0.00019308245345516084, "loss": 11.684, "step": 17094 }, { "epoch": 0.3578455999330152, "grad_norm": 0.2569192945957184, "learning_rate": 0.00019308165213570227, "loss": 11.6629, "step": 17095 }, { "epoch": 0.3578665326969773, "grad_norm": 0.2652282118797302, "learning_rate": 0.00019308085077149748, "loss": 11.6768, "step": 17096 }, { "epoch": 0.35788746546093947, "grad_norm": 0.25342273712158203, "learning_rate": 0.0001930800493625469, "loss": 11.6745, "step": 17097 }, { "epoch": 0.3579083982249016, "grad_norm": 0.24802498519420624, "learning_rate": 0.00019307924790885084, "loss": 11.6722, "step": 17098 }, { "epoch": 0.35792933098886376, "grad_norm": 0.24301685392856598, "learning_rate": 0.0001930784464104097, "loss": 11.6557, "step": 17099 }, { "epoch": 0.3579502637528259, "grad_norm": 0.2682758867740631, "learning_rate": 0.00019307764486722393, "loss": 11.6733, "step": 17100 }, { "epoch": 0.3579711965167881, "grad_norm": 0.2582970857620239, "learning_rate": 0.00019307684327929384, "loss": 11.687, "step": 17101 }, { "epoch": 0.35799212928075025, "grad_norm": 0.274088591337204, "learning_rate": 0.0001930760416466199, "loss": 11.6844, "step": 17102 }, { "epoch": 0.3580130620447124, "grad_norm": 0.2591331899166107, "learning_rate": 0.0001930752399692024, "loss": 11.6665, "step": 17103 }, { "epoch": 0.35803399480867454, "grad_norm": 0.22266755998134613, "learning_rate": 0.00019307443824704177, "loss": 11.6711, "step": 17104 }, { "epoch": 0.3580549275726367, "grad_norm": 0.30714723467826843, "learning_rate": 0.00019307363648013838, "loss": 11.6757, "step": 17105 }, { "epoch": 0.35807586033659883, "grad_norm": 0.2878027856349945, "learning_rate": 0.00019307283466849263, "loss": 11.6773, "step": 17106 }, { "epoch": 0.358096793100561, "grad_norm": 0.251859575510025, "learning_rate": 0.00019307203281210492, "loss": 11.6536, "step": 17107 }, { "epoch": 0.3581177258645232, "grad_norm": 0.2569381892681122, "learning_rate": 0.00019307123091097562, "loss": 11.6637, "step": 17108 }, { "epoch": 0.3581386586284853, "grad_norm": 0.26685377955436707, "learning_rate": 0.00019307042896510513, "loss": 11.6761, "step": 17109 }, { "epoch": 0.35815959139244746, "grad_norm": 0.3008860945701599, "learning_rate": 0.00019306962697449378, "loss": 11.6899, "step": 17110 }, { "epoch": 0.3581805241564096, "grad_norm": 0.3100118041038513, "learning_rate": 0.000193068824939142, "loss": 11.6705, "step": 17111 }, { "epoch": 0.35820145692037175, "grad_norm": 0.308713436126709, "learning_rate": 0.0001930680228590502, "loss": 11.6755, "step": 17112 }, { "epoch": 0.3582223896843339, "grad_norm": 0.30462339520454407, "learning_rate": 0.0001930672207342187, "loss": 11.663, "step": 17113 }, { "epoch": 0.3582433224482961, "grad_norm": 0.2983757257461548, "learning_rate": 0.00019306641856464793, "loss": 11.6631, "step": 17114 }, { "epoch": 0.35826425521225824, "grad_norm": 0.27388378977775574, "learning_rate": 0.00019306561635033828, "loss": 11.6674, "step": 17115 }, { "epoch": 0.3582851879762204, "grad_norm": 0.3047347664833069, "learning_rate": 0.00019306481409129013, "loss": 11.6889, "step": 17116 }, { "epoch": 0.35830612074018253, "grad_norm": 0.3020009994506836, "learning_rate": 0.00019306401178750387, "loss": 11.6931, "step": 17117 }, { "epoch": 0.3583270535041447, "grad_norm": 0.30334416031837463, "learning_rate": 0.00019306320943897985, "loss": 11.6872, "step": 17118 }, { "epoch": 0.3583479862681068, "grad_norm": 0.2608238756656647, "learning_rate": 0.00019306240704571848, "loss": 11.6895, "step": 17119 }, { "epoch": 0.358368919032069, "grad_norm": 0.24861636757850647, "learning_rate": 0.00019306160460772013, "loss": 11.6703, "step": 17120 }, { "epoch": 0.35838985179603117, "grad_norm": 0.2560203969478607, "learning_rate": 0.00019306080212498522, "loss": 11.6778, "step": 17121 }, { "epoch": 0.3584107845599933, "grad_norm": 0.2682657837867737, "learning_rate": 0.00019305999959751412, "loss": 11.6698, "step": 17122 }, { "epoch": 0.35843171732395546, "grad_norm": 0.3043026924133301, "learning_rate": 0.00019305919702530724, "loss": 11.6745, "step": 17123 }, { "epoch": 0.3584526500879176, "grad_norm": 0.2003174126148224, "learning_rate": 0.0001930583944083649, "loss": 11.6658, "step": 17124 }, { "epoch": 0.35847358285187975, "grad_norm": 0.3628293573856354, "learning_rate": 0.00019305759174668754, "loss": 11.6698, "step": 17125 }, { "epoch": 0.3584945156158419, "grad_norm": 0.2896677255630493, "learning_rate": 0.00019305678904027552, "loss": 11.688, "step": 17126 }, { "epoch": 0.3585154483798041, "grad_norm": 0.262569785118103, "learning_rate": 0.00019305598628912925, "loss": 11.6843, "step": 17127 }, { "epoch": 0.35853638114376624, "grad_norm": 0.2639760375022888, "learning_rate": 0.00019305518349324908, "loss": 11.6633, "step": 17128 }, { "epoch": 0.3585573139077284, "grad_norm": 0.330436110496521, "learning_rate": 0.00019305438065263543, "loss": 11.6774, "step": 17129 }, { "epoch": 0.3585782466716905, "grad_norm": 0.32018885016441345, "learning_rate": 0.0001930535777672887, "loss": 11.6797, "step": 17130 }, { "epoch": 0.35859917943565267, "grad_norm": 0.3671424984931946, "learning_rate": 0.00019305277483720924, "loss": 11.675, "step": 17131 }, { "epoch": 0.3586201121996148, "grad_norm": 0.27164092659950256, "learning_rate": 0.0001930519718623974, "loss": 11.6607, "step": 17132 }, { "epoch": 0.358641044963577, "grad_norm": 0.3405766189098358, "learning_rate": 0.00019305116884285367, "loss": 11.6805, "step": 17133 }, { "epoch": 0.35866197772753916, "grad_norm": 0.32766586542129517, "learning_rate": 0.00019305036577857836, "loss": 11.676, "step": 17134 }, { "epoch": 0.3586829104915013, "grad_norm": 0.28312063217163086, "learning_rate": 0.00019304956266957187, "loss": 11.6772, "step": 17135 }, { "epoch": 0.35870384325546345, "grad_norm": 0.2873225808143616, "learning_rate": 0.00019304875951583458, "loss": 11.6732, "step": 17136 }, { "epoch": 0.3587247760194256, "grad_norm": 0.32836776971817017, "learning_rate": 0.0001930479563173669, "loss": 11.675, "step": 17137 }, { "epoch": 0.35874570878338774, "grad_norm": 0.2790750563144684, "learning_rate": 0.0001930471530741692, "loss": 11.6752, "step": 17138 }, { "epoch": 0.3587666415473499, "grad_norm": 0.2408442348241806, "learning_rate": 0.00019304634978624188, "loss": 11.6663, "step": 17139 }, { "epoch": 0.3587875743113121, "grad_norm": 0.24883349239826202, "learning_rate": 0.0001930455464535853, "loss": 11.6581, "step": 17140 }, { "epoch": 0.35880850707527423, "grad_norm": 0.2402714341878891, "learning_rate": 0.00019304474307619987, "loss": 11.6771, "step": 17141 }, { "epoch": 0.3588294398392364, "grad_norm": 0.2260150909423828, "learning_rate": 0.00019304393965408596, "loss": 11.6732, "step": 17142 }, { "epoch": 0.3588503726031985, "grad_norm": 0.34695833921432495, "learning_rate": 0.00019304313618724397, "loss": 11.6691, "step": 17143 }, { "epoch": 0.35887130536716066, "grad_norm": 0.2890873849391937, "learning_rate": 0.0001930423326756743, "loss": 11.6691, "step": 17144 }, { "epoch": 0.3588922381311228, "grad_norm": 0.3121226131916046, "learning_rate": 0.00019304152911937727, "loss": 11.6823, "step": 17145 }, { "epoch": 0.358913170895085, "grad_norm": 0.22888198494911194, "learning_rate": 0.00019304072551835333, "loss": 11.6769, "step": 17146 }, { "epoch": 0.35893410365904715, "grad_norm": 0.27152490615844727, "learning_rate": 0.00019303992187260285, "loss": 11.6714, "step": 17147 }, { "epoch": 0.3589550364230093, "grad_norm": 0.24241861701011658, "learning_rate": 0.00019303911818212624, "loss": 11.6725, "step": 17148 }, { "epoch": 0.35897596918697144, "grad_norm": 0.2272573858499527, "learning_rate": 0.00019303831444692385, "loss": 11.6901, "step": 17149 }, { "epoch": 0.3589969019509336, "grad_norm": 0.3123536705970764, "learning_rate": 0.00019303751066699604, "loss": 11.6803, "step": 17150 }, { "epoch": 0.35901783471489573, "grad_norm": 0.29681628942489624, "learning_rate": 0.00019303670684234328, "loss": 11.6686, "step": 17151 }, { "epoch": 0.35903876747885793, "grad_norm": 0.2848275601863861, "learning_rate": 0.0001930359029729659, "loss": 11.6642, "step": 17152 }, { "epoch": 0.3590597002428201, "grad_norm": 0.2799599766731262, "learning_rate": 0.00019303509905886426, "loss": 11.6547, "step": 17153 }, { "epoch": 0.3590806330067822, "grad_norm": 0.3033020794391632, "learning_rate": 0.00019303429510003883, "loss": 11.6741, "step": 17154 }, { "epoch": 0.35910156577074437, "grad_norm": 0.23876117169857025, "learning_rate": 0.00019303349109648995, "loss": 11.6564, "step": 17155 }, { "epoch": 0.3591224985347065, "grad_norm": 0.27123600244522095, "learning_rate": 0.00019303268704821798, "loss": 11.6651, "step": 17156 }, { "epoch": 0.35914343129866866, "grad_norm": 0.33490148186683655, "learning_rate": 0.00019303188295522332, "loss": 11.6828, "step": 17157 }, { "epoch": 0.3591643640626308, "grad_norm": 0.27482733130455017, "learning_rate": 0.0001930310788175064, "loss": 11.6851, "step": 17158 }, { "epoch": 0.359185296826593, "grad_norm": 0.2247827798128128, "learning_rate": 0.00019303027463506758, "loss": 11.6667, "step": 17159 }, { "epoch": 0.35920622959055515, "grad_norm": 0.2409963756799698, "learning_rate": 0.00019302947040790723, "loss": 11.6616, "step": 17160 }, { "epoch": 0.3592271623545173, "grad_norm": 0.25702694058418274, "learning_rate": 0.00019302866613602574, "loss": 11.6589, "step": 17161 }, { "epoch": 0.35924809511847944, "grad_norm": 0.2857266068458557, "learning_rate": 0.0001930278618194235, "loss": 11.6841, "step": 17162 }, { "epoch": 0.3592690278824416, "grad_norm": 0.3175048530101776, "learning_rate": 0.00019302705745810093, "loss": 11.6737, "step": 17163 }, { "epoch": 0.3592899606464037, "grad_norm": 0.28541576862335205, "learning_rate": 0.0001930262530520584, "loss": 11.6606, "step": 17164 }, { "epoch": 0.35931089341036593, "grad_norm": 0.25633570551872253, "learning_rate": 0.00019302544860129626, "loss": 11.6807, "step": 17165 }, { "epoch": 0.3593318261743281, "grad_norm": 0.2688116431236267, "learning_rate": 0.0001930246441058149, "loss": 11.6777, "step": 17166 }, { "epoch": 0.3593527589382902, "grad_norm": 0.3128076493740082, "learning_rate": 0.00019302383956561477, "loss": 11.6812, "step": 17167 }, { "epoch": 0.35937369170225236, "grad_norm": 0.28346696496009827, "learning_rate": 0.0001930230349806962, "loss": 11.6637, "step": 17168 }, { "epoch": 0.3593946244662145, "grad_norm": 0.2984893321990967, "learning_rate": 0.0001930222303510596, "loss": 11.6665, "step": 17169 }, { "epoch": 0.35941555723017665, "grad_norm": 0.28445887565612793, "learning_rate": 0.00019302142567670534, "loss": 11.6848, "step": 17170 }, { "epoch": 0.35943648999413885, "grad_norm": 0.33991414308547974, "learning_rate": 0.00019302062095763383, "loss": 11.6916, "step": 17171 }, { "epoch": 0.359457422758101, "grad_norm": 0.2986152470111847, "learning_rate": 0.00019301981619384542, "loss": 11.6662, "step": 17172 }, { "epoch": 0.35947835552206314, "grad_norm": 0.30258312821388245, "learning_rate": 0.00019301901138534054, "loss": 11.6794, "step": 17173 }, { "epoch": 0.3594992882860253, "grad_norm": 0.4618990421295166, "learning_rate": 0.00019301820653211955, "loss": 11.709, "step": 17174 }, { "epoch": 0.35952022104998743, "grad_norm": 0.2619754374027252, "learning_rate": 0.00019301740163418286, "loss": 11.6471, "step": 17175 }, { "epoch": 0.3595411538139496, "grad_norm": 0.23814961314201355, "learning_rate": 0.0001930165966915308, "loss": 11.6773, "step": 17176 }, { "epoch": 0.3595620865779117, "grad_norm": 0.2679876387119293, "learning_rate": 0.0001930157917041638, "loss": 11.676, "step": 17177 }, { "epoch": 0.3595830193418739, "grad_norm": 0.302447110414505, "learning_rate": 0.00019301498667208229, "loss": 11.6693, "step": 17178 }, { "epoch": 0.35960395210583607, "grad_norm": 0.2683567702770233, "learning_rate": 0.00019301418159528658, "loss": 11.6574, "step": 17179 }, { "epoch": 0.3596248848697982, "grad_norm": 0.27052080631256104, "learning_rate": 0.0001930133764737771, "loss": 11.6764, "step": 17180 }, { "epoch": 0.35964581763376036, "grad_norm": 0.2237369865179062, "learning_rate": 0.00019301257130755424, "loss": 11.6703, "step": 17181 }, { "epoch": 0.3596667503977225, "grad_norm": 0.27562475204467773, "learning_rate": 0.00019301176609661834, "loss": 11.6727, "step": 17182 }, { "epoch": 0.35968768316168465, "grad_norm": 0.3216896057128906, "learning_rate": 0.00019301096084096985, "loss": 11.6808, "step": 17183 }, { "epoch": 0.35970861592564685, "grad_norm": 0.24947622418403625, "learning_rate": 0.00019301015554060912, "loss": 11.6744, "step": 17184 }, { "epoch": 0.359729548689609, "grad_norm": 0.32601064443588257, "learning_rate": 0.00019300935019553653, "loss": 11.68, "step": 17185 }, { "epoch": 0.35975048145357114, "grad_norm": 0.32780423760414124, "learning_rate": 0.0001930085448057525, "loss": 11.6577, "step": 17186 }, { "epoch": 0.3597714142175333, "grad_norm": 0.2973897159099579, "learning_rate": 0.0001930077393712574, "loss": 11.669, "step": 17187 }, { "epoch": 0.3597923469814954, "grad_norm": 0.27493658661842346, "learning_rate": 0.0001930069338920516, "loss": 11.6648, "step": 17188 }, { "epoch": 0.35981327974545757, "grad_norm": 0.2447567582130432, "learning_rate": 0.0001930061283681355, "loss": 11.6639, "step": 17189 }, { "epoch": 0.35983421250941977, "grad_norm": 0.2785256505012512, "learning_rate": 0.00019300532279950948, "loss": 11.6726, "step": 17190 }, { "epoch": 0.3598551452733819, "grad_norm": 0.26198723912239075, "learning_rate": 0.00019300451718617397, "loss": 11.6849, "step": 17191 }, { "epoch": 0.35987607803734406, "grad_norm": 0.2735373079776764, "learning_rate": 0.0001930037115281293, "loss": 11.6732, "step": 17192 }, { "epoch": 0.3598970108013062, "grad_norm": 0.3398706316947937, "learning_rate": 0.0001930029058253759, "loss": 11.6752, "step": 17193 }, { "epoch": 0.35991794356526835, "grad_norm": 0.24772174656391144, "learning_rate": 0.00019300210007791414, "loss": 11.6668, "step": 17194 }, { "epoch": 0.3599388763292305, "grad_norm": 0.3143351376056671, "learning_rate": 0.00019300129428574442, "loss": 11.675, "step": 17195 }, { "epoch": 0.35995980909319264, "grad_norm": 0.2841035723686218, "learning_rate": 0.00019300048844886708, "loss": 11.6692, "step": 17196 }, { "epoch": 0.35998074185715484, "grad_norm": 0.2868373990058899, "learning_rate": 0.00019299968256728255, "loss": 11.6782, "step": 17197 }, { "epoch": 0.360001674621117, "grad_norm": 0.2640674114227295, "learning_rate": 0.00019299887664099121, "loss": 11.6645, "step": 17198 }, { "epoch": 0.36002260738507913, "grad_norm": 0.303508996963501, "learning_rate": 0.00019299807066999348, "loss": 11.6846, "step": 17199 }, { "epoch": 0.3600435401490413, "grad_norm": 0.3098019063472748, "learning_rate": 0.00019299726465428965, "loss": 11.6776, "step": 17200 }, { "epoch": 0.3600644729130034, "grad_norm": 0.26799285411834717, "learning_rate": 0.0001929964585938802, "loss": 11.6611, "step": 17201 }, { "epoch": 0.36008540567696556, "grad_norm": 0.2434607595205307, "learning_rate": 0.0001929956524887655, "loss": 11.6712, "step": 17202 }, { "epoch": 0.36010633844092776, "grad_norm": 0.2764805555343628, "learning_rate": 0.00019299484633894594, "loss": 11.6708, "step": 17203 }, { "epoch": 0.3601272712048899, "grad_norm": 0.3011089861392975, "learning_rate": 0.00019299404014442187, "loss": 11.6906, "step": 17204 }, { "epoch": 0.36014820396885205, "grad_norm": 0.2967984974384308, "learning_rate": 0.0001929932339051937, "loss": 11.6894, "step": 17205 }, { "epoch": 0.3601691367328142, "grad_norm": 0.4371606111526489, "learning_rate": 0.00019299242762126182, "loss": 11.6791, "step": 17206 }, { "epoch": 0.36019006949677634, "grad_norm": 0.24643665552139282, "learning_rate": 0.0001929916212926266, "loss": 11.6816, "step": 17207 }, { "epoch": 0.3602110022607385, "grad_norm": 0.27958258986473083, "learning_rate": 0.00019299081491928847, "loss": 11.6652, "step": 17208 }, { "epoch": 0.3602319350247007, "grad_norm": 0.2523527145385742, "learning_rate": 0.0001929900085012478, "loss": 11.6829, "step": 17209 }, { "epoch": 0.36025286778866283, "grad_norm": 0.3008050322532654, "learning_rate": 0.00019298920203850495, "loss": 11.6741, "step": 17210 }, { "epoch": 0.360273800552625, "grad_norm": 0.28775182366371155, "learning_rate": 0.00019298839553106033, "loss": 11.679, "step": 17211 }, { "epoch": 0.3602947333165871, "grad_norm": 0.2585199177265167, "learning_rate": 0.00019298758897891435, "loss": 11.6579, "step": 17212 }, { "epoch": 0.36031566608054927, "grad_norm": 0.2329792082309723, "learning_rate": 0.00019298678238206733, "loss": 11.6713, "step": 17213 }, { "epoch": 0.3603365988445114, "grad_norm": 0.2672267556190491, "learning_rate": 0.00019298597574051972, "loss": 11.6704, "step": 17214 }, { "epoch": 0.36035753160847356, "grad_norm": 0.29570820927619934, "learning_rate": 0.0001929851690542719, "loss": 11.6775, "step": 17215 }, { "epoch": 0.36037846437243576, "grad_norm": 0.2632077634334564, "learning_rate": 0.00019298436232332422, "loss": 11.6571, "step": 17216 }, { "epoch": 0.3603993971363979, "grad_norm": 0.2597660422325134, "learning_rate": 0.00019298355554767712, "loss": 11.6709, "step": 17217 }, { "epoch": 0.36042032990036005, "grad_norm": 0.3091462552547455, "learning_rate": 0.00019298274872733094, "loss": 11.6762, "step": 17218 }, { "epoch": 0.3604412626643222, "grad_norm": 0.2630300223827362, "learning_rate": 0.0001929819418622861, "loss": 11.6833, "step": 17219 }, { "epoch": 0.36046219542828434, "grad_norm": 0.2651437520980835, "learning_rate": 0.00019298113495254296, "loss": 11.6576, "step": 17220 }, { "epoch": 0.3604831281922465, "grad_norm": 0.21145427227020264, "learning_rate": 0.00019298032799810194, "loss": 11.6647, "step": 17221 }, { "epoch": 0.3605040609562087, "grad_norm": 0.2952377200126648, "learning_rate": 0.00019297952099896342, "loss": 11.6792, "step": 17222 }, { "epoch": 0.3605249937201708, "grad_norm": 0.27051037549972534, "learning_rate": 0.0001929787139551278, "loss": 11.6792, "step": 17223 }, { "epoch": 0.360545926484133, "grad_norm": 0.3159310221672058, "learning_rate": 0.00019297790686659537, "loss": 11.6745, "step": 17224 }, { "epoch": 0.3605668592480951, "grad_norm": 0.3246210217475891, "learning_rate": 0.00019297709973336669, "loss": 11.677, "step": 17225 }, { "epoch": 0.36058779201205726, "grad_norm": 0.3349575102329254, "learning_rate": 0.000192976292555442, "loss": 11.6776, "step": 17226 }, { "epoch": 0.3606087247760194, "grad_norm": 0.33363044261932373, "learning_rate": 0.00019297548533282174, "loss": 11.6767, "step": 17227 }, { "epoch": 0.36062965753998155, "grad_norm": 0.2803632915019989, "learning_rate": 0.00019297467806550636, "loss": 11.6704, "step": 17228 }, { "epoch": 0.36065059030394375, "grad_norm": 0.3187895119190216, "learning_rate": 0.00019297387075349615, "loss": 11.6716, "step": 17229 }, { "epoch": 0.3606715230679059, "grad_norm": 0.2784036695957184, "learning_rate": 0.00019297306339679152, "loss": 11.6655, "step": 17230 }, { "epoch": 0.36069245583186804, "grad_norm": 0.4749388098716736, "learning_rate": 0.00019297225599539287, "loss": 11.6699, "step": 17231 }, { "epoch": 0.3607133885958302, "grad_norm": 0.3218359649181366, "learning_rate": 0.00019297144854930063, "loss": 11.6739, "step": 17232 }, { "epoch": 0.36073432135979233, "grad_norm": 0.29051294922828674, "learning_rate": 0.00019297064105851513, "loss": 11.6566, "step": 17233 }, { "epoch": 0.3607552541237545, "grad_norm": 0.35458266735076904, "learning_rate": 0.00019296983352303678, "loss": 11.6722, "step": 17234 }, { "epoch": 0.3607761868877167, "grad_norm": 0.2894414961338043, "learning_rate": 0.000192969025942866, "loss": 11.6807, "step": 17235 }, { "epoch": 0.3607971196516788, "grad_norm": 0.27515384554862976, "learning_rate": 0.0001929682183180031, "loss": 11.6559, "step": 17236 }, { "epoch": 0.36081805241564097, "grad_norm": 0.2607389986515045, "learning_rate": 0.0001929674106484485, "loss": 11.6714, "step": 17237 }, { "epoch": 0.3608389851796031, "grad_norm": 0.2537914216518402, "learning_rate": 0.00019296660293420265, "loss": 11.674, "step": 17238 }, { "epoch": 0.36085991794356526, "grad_norm": 0.32898908853530884, "learning_rate": 0.00019296579517526588, "loss": 11.6851, "step": 17239 }, { "epoch": 0.3608808507075274, "grad_norm": 0.2678612768650055, "learning_rate": 0.00019296498737163858, "loss": 11.6771, "step": 17240 }, { "epoch": 0.3609017834714896, "grad_norm": 0.3135394752025604, "learning_rate": 0.00019296417952332115, "loss": 11.6717, "step": 17241 }, { "epoch": 0.36092271623545175, "grad_norm": 0.29110127687454224, "learning_rate": 0.000192963371630314, "loss": 11.6728, "step": 17242 }, { "epoch": 0.3609436489994139, "grad_norm": 0.24969029426574707, "learning_rate": 0.00019296256369261746, "loss": 11.6692, "step": 17243 }, { "epoch": 0.36096458176337604, "grad_norm": 0.27054744958877563, "learning_rate": 0.00019296175571023198, "loss": 11.6578, "step": 17244 }, { "epoch": 0.3609855145273382, "grad_norm": 0.3046121597290039, "learning_rate": 0.00019296094768315788, "loss": 11.6575, "step": 17245 }, { "epoch": 0.3610064472913003, "grad_norm": 0.28830477595329285, "learning_rate": 0.00019296013961139563, "loss": 11.6648, "step": 17246 }, { "epoch": 0.36102738005526247, "grad_norm": 0.3092663884162903, "learning_rate": 0.00019295933149494557, "loss": 11.6817, "step": 17247 }, { "epoch": 0.36104831281922467, "grad_norm": 0.26726284623146057, "learning_rate": 0.00019295852333380808, "loss": 11.6739, "step": 17248 }, { "epoch": 0.3610692455831868, "grad_norm": 0.2870306074619293, "learning_rate": 0.0001929577151279836, "loss": 11.6775, "step": 17249 }, { "epoch": 0.36109017834714896, "grad_norm": 0.24724805355072021, "learning_rate": 0.00019295690687747246, "loss": 11.6611, "step": 17250 }, { "epoch": 0.3611111111111111, "grad_norm": 0.37838783860206604, "learning_rate": 0.00019295609858227506, "loss": 11.6868, "step": 17251 }, { "epoch": 0.36113204387507325, "grad_norm": 0.2868368625640869, "learning_rate": 0.0001929552902423918, "loss": 11.6863, "step": 17252 }, { "epoch": 0.3611529766390354, "grad_norm": 0.3143414855003357, "learning_rate": 0.00019295448185782307, "loss": 11.6712, "step": 17253 }, { "epoch": 0.3611739094029976, "grad_norm": 0.2877773642539978, "learning_rate": 0.0001929536734285693, "loss": 11.6607, "step": 17254 }, { "epoch": 0.36119484216695974, "grad_norm": 0.3018067479133606, "learning_rate": 0.0001929528649546308, "loss": 11.6642, "step": 17255 }, { "epoch": 0.3612157749309219, "grad_norm": 0.26567575335502625, "learning_rate": 0.00019295205643600797, "loss": 11.6798, "step": 17256 }, { "epoch": 0.36123670769488403, "grad_norm": 0.3119374215602875, "learning_rate": 0.00019295124787270128, "loss": 11.6881, "step": 17257 }, { "epoch": 0.3612576404588462, "grad_norm": 0.3180207312107086, "learning_rate": 0.000192950439264711, "loss": 11.6723, "step": 17258 }, { "epoch": 0.3612785732228083, "grad_norm": 0.2334289252758026, "learning_rate": 0.00019294963061203764, "loss": 11.688, "step": 17259 }, { "epoch": 0.3612995059867705, "grad_norm": 0.27345219254493713, "learning_rate": 0.00019294882191468152, "loss": 11.6716, "step": 17260 }, { "epoch": 0.36132043875073266, "grad_norm": 0.30242064595222473, "learning_rate": 0.000192948013172643, "loss": 11.6863, "step": 17261 }, { "epoch": 0.3613413715146948, "grad_norm": 0.2566036581993103, "learning_rate": 0.00019294720438592253, "loss": 11.6975, "step": 17262 }, { "epoch": 0.36136230427865695, "grad_norm": 0.2593989074230194, "learning_rate": 0.0001929463955545205, "loss": 11.6581, "step": 17263 }, { "epoch": 0.3613832370426191, "grad_norm": 0.3297199606895447, "learning_rate": 0.00019294558667843723, "loss": 11.6691, "step": 17264 }, { "epoch": 0.36140416980658124, "grad_norm": 0.30920955538749695, "learning_rate": 0.0001929447777576732, "loss": 11.6767, "step": 17265 }, { "epoch": 0.3614251025705434, "grad_norm": 0.3443134129047394, "learning_rate": 0.0001929439687922287, "loss": 11.6868, "step": 17266 }, { "epoch": 0.3614460353345056, "grad_norm": 0.23990236222743988, "learning_rate": 0.0001929431597821042, "loss": 11.6736, "step": 17267 }, { "epoch": 0.36146696809846773, "grad_norm": 0.24737422168254852, "learning_rate": 0.0001929423507273001, "loss": 11.6648, "step": 17268 }, { "epoch": 0.3614879008624299, "grad_norm": 0.22353652119636536, "learning_rate": 0.0001929415416278167, "loss": 11.6781, "step": 17269 }, { "epoch": 0.361508833626392, "grad_norm": 0.2632157504558563, "learning_rate": 0.00019294073248365444, "loss": 11.6592, "step": 17270 }, { "epoch": 0.36152976639035417, "grad_norm": 0.27437013387680054, "learning_rate": 0.00019293992329481375, "loss": 11.6601, "step": 17271 }, { "epoch": 0.3615506991543163, "grad_norm": 0.2516850233078003, "learning_rate": 0.00019293911406129493, "loss": 11.6771, "step": 17272 }, { "epoch": 0.3615716319182785, "grad_norm": 0.28952282667160034, "learning_rate": 0.00019293830478309842, "loss": 11.6586, "step": 17273 }, { "epoch": 0.36159256468224066, "grad_norm": 0.32251614332199097, "learning_rate": 0.00019293749546022462, "loss": 11.6837, "step": 17274 }, { "epoch": 0.3616134974462028, "grad_norm": 0.26287782192230225, "learning_rate": 0.00019293668609267388, "loss": 11.6662, "step": 17275 }, { "epoch": 0.36163443021016495, "grad_norm": 0.28275033831596375, "learning_rate": 0.00019293587668044664, "loss": 11.6787, "step": 17276 }, { "epoch": 0.3616553629741271, "grad_norm": 0.32809725403785706, "learning_rate": 0.00019293506722354326, "loss": 11.67, "step": 17277 }, { "epoch": 0.36167629573808924, "grad_norm": 0.2992044687271118, "learning_rate": 0.00019293425772196412, "loss": 11.6617, "step": 17278 }, { "epoch": 0.36169722850205144, "grad_norm": 0.29213985800743103, "learning_rate": 0.0001929334481757096, "loss": 11.6923, "step": 17279 }, { "epoch": 0.3617181612660136, "grad_norm": 0.28517574071884155, "learning_rate": 0.00019293263858478015, "loss": 11.6581, "step": 17280 }, { "epoch": 0.3617390940299757, "grad_norm": 0.33123868703842163, "learning_rate": 0.0001929318289491761, "loss": 11.6734, "step": 17281 }, { "epoch": 0.36176002679393787, "grad_norm": 0.24505199491977692, "learning_rate": 0.00019293101926889784, "loss": 11.6725, "step": 17282 }, { "epoch": 0.3617809595579, "grad_norm": 0.31851711869239807, "learning_rate": 0.00019293020954394582, "loss": 11.6686, "step": 17283 }, { "epoch": 0.36180189232186216, "grad_norm": 0.2861378490924835, "learning_rate": 0.00019292939977432033, "loss": 11.6557, "step": 17284 }, { "epoch": 0.3618228250858243, "grad_norm": 0.34128695726394653, "learning_rate": 0.00019292858996002187, "loss": 11.6763, "step": 17285 }, { "epoch": 0.3618437578497865, "grad_norm": 0.30871593952178955, "learning_rate": 0.00019292778010105072, "loss": 11.6993, "step": 17286 }, { "epoch": 0.36186469061374865, "grad_norm": 0.2672678530216217, "learning_rate": 0.0001929269701974074, "loss": 11.6593, "step": 17287 }, { "epoch": 0.3618856233777108, "grad_norm": 0.32237911224365234, "learning_rate": 0.00019292616024909213, "loss": 11.702, "step": 17288 }, { "epoch": 0.36190655614167294, "grad_norm": 0.2964039444923401, "learning_rate": 0.00019292535025610546, "loss": 11.6712, "step": 17289 }, { "epoch": 0.3619274889056351, "grad_norm": 0.2546464502811432, "learning_rate": 0.0001929245402184477, "loss": 11.6734, "step": 17290 }, { "epoch": 0.36194842166959723, "grad_norm": 0.2863789498806, "learning_rate": 0.00019292373013611922, "loss": 11.6646, "step": 17291 }, { "epoch": 0.36196935443355943, "grad_norm": 0.26749104261398315, "learning_rate": 0.00019292292000912048, "loss": 11.667, "step": 17292 }, { "epoch": 0.3619902871975216, "grad_norm": 0.2736530601978302, "learning_rate": 0.0001929221098374518, "loss": 11.6813, "step": 17293 }, { "epoch": 0.3620112199614837, "grad_norm": 0.2736169397830963, "learning_rate": 0.0001929212996211136, "loss": 11.6851, "step": 17294 }, { "epoch": 0.36203215272544587, "grad_norm": 0.22756987810134888, "learning_rate": 0.0001929204893601063, "loss": 11.6779, "step": 17295 }, { "epoch": 0.362053085489408, "grad_norm": 0.2603537142276764, "learning_rate": 0.00019291967905443023, "loss": 11.6884, "step": 17296 }, { "epoch": 0.36207401825337016, "grad_norm": 0.27869006991386414, "learning_rate": 0.0001929188687040858, "loss": 11.6716, "step": 17297 }, { "epoch": 0.36209495101733236, "grad_norm": 0.2709595561027527, "learning_rate": 0.00019291805830907343, "loss": 11.6723, "step": 17298 }, { "epoch": 0.3621158837812945, "grad_norm": 0.27026134729385376, "learning_rate": 0.0001929172478693935, "loss": 11.6732, "step": 17299 }, { "epoch": 0.36213681654525665, "grad_norm": 0.264543741941452, "learning_rate": 0.00019291643738504637, "loss": 11.6779, "step": 17300 }, { "epoch": 0.3621577493092188, "grad_norm": 0.27585336565971375, "learning_rate": 0.00019291562685603243, "loss": 11.6804, "step": 17301 }, { "epoch": 0.36217868207318094, "grad_norm": 0.24487468600273132, "learning_rate": 0.0001929148162823521, "loss": 11.6641, "step": 17302 }, { "epoch": 0.3621996148371431, "grad_norm": 0.25924772024154663, "learning_rate": 0.00019291400566400578, "loss": 11.6658, "step": 17303 }, { "epoch": 0.3622205476011052, "grad_norm": 0.3167756199836731, "learning_rate": 0.00019291319500099377, "loss": 11.6784, "step": 17304 }, { "epoch": 0.3622414803650674, "grad_norm": 0.3325020968914032, "learning_rate": 0.0001929123842933166, "loss": 11.6793, "step": 17305 }, { "epoch": 0.36226241312902957, "grad_norm": 0.25818249583244324, "learning_rate": 0.00019291157354097454, "loss": 11.6477, "step": 17306 }, { "epoch": 0.3622833458929917, "grad_norm": 0.2523599863052368, "learning_rate": 0.00019291076274396805, "loss": 11.6742, "step": 17307 }, { "epoch": 0.36230427865695386, "grad_norm": 0.23976922035217285, "learning_rate": 0.00019290995190229747, "loss": 11.6829, "step": 17308 }, { "epoch": 0.362325211420916, "grad_norm": 0.35540732741355896, "learning_rate": 0.00019290914101596324, "loss": 11.6778, "step": 17309 }, { "epoch": 0.36234614418487815, "grad_norm": 0.24638502299785614, "learning_rate": 0.00019290833008496573, "loss": 11.662, "step": 17310 }, { "epoch": 0.36236707694884035, "grad_norm": 0.2495926022529602, "learning_rate": 0.00019290751910930527, "loss": 11.6663, "step": 17311 }, { "epoch": 0.3623880097128025, "grad_norm": 0.3816368281841278, "learning_rate": 0.00019290670808898237, "loss": 11.6717, "step": 17312 }, { "epoch": 0.36240894247676464, "grad_norm": 0.27708354592323303, "learning_rate": 0.00019290589702399731, "loss": 11.6727, "step": 17313 }, { "epoch": 0.3624298752407268, "grad_norm": 0.25743040442466736, "learning_rate": 0.00019290508591435053, "loss": 11.6781, "step": 17314 }, { "epoch": 0.36245080800468893, "grad_norm": 0.28790560364723206, "learning_rate": 0.00019290427476004242, "loss": 11.6709, "step": 17315 }, { "epoch": 0.3624717407686511, "grad_norm": 0.33309414982795715, "learning_rate": 0.00019290346356107336, "loss": 11.6884, "step": 17316 }, { "epoch": 0.3624926735326132, "grad_norm": 0.24698035418987274, "learning_rate": 0.00019290265231744374, "loss": 11.6683, "step": 17317 }, { "epoch": 0.3625136062965754, "grad_norm": 0.309897243976593, "learning_rate": 0.00019290184102915397, "loss": 11.6473, "step": 17318 }, { "epoch": 0.36253453906053756, "grad_norm": 0.3178090453147888, "learning_rate": 0.00019290102969620441, "loss": 11.7013, "step": 17319 }, { "epoch": 0.3625554718244997, "grad_norm": 0.29322126507759094, "learning_rate": 0.00019290021831859546, "loss": 11.662, "step": 17320 }, { "epoch": 0.36257640458846185, "grad_norm": 0.35111284255981445, "learning_rate": 0.00019289940689632752, "loss": 11.6748, "step": 17321 }, { "epoch": 0.362597337352424, "grad_norm": 0.2982501685619354, "learning_rate": 0.00019289859542940096, "loss": 11.67, "step": 17322 }, { "epoch": 0.36261827011638614, "grad_norm": 0.3195735514163971, "learning_rate": 0.0001928977839178162, "loss": 11.6606, "step": 17323 }, { "epoch": 0.36263920288034834, "grad_norm": 0.2591640055179596, "learning_rate": 0.00019289697236157363, "loss": 11.6586, "step": 17324 }, { "epoch": 0.3626601356443105, "grad_norm": 0.29877451062202454, "learning_rate": 0.0001928961607606736, "loss": 11.6698, "step": 17325 }, { "epoch": 0.36268106840827263, "grad_norm": 0.3142666220664978, "learning_rate": 0.0001928953491151165, "loss": 11.672, "step": 17326 }, { "epoch": 0.3627020011722348, "grad_norm": 0.3578208088874817, "learning_rate": 0.00019289453742490277, "loss": 11.6767, "step": 17327 }, { "epoch": 0.3627229339361969, "grad_norm": 0.3139943778514862, "learning_rate": 0.00019289372569003278, "loss": 11.6824, "step": 17328 }, { "epoch": 0.36274386670015907, "grad_norm": 0.6302174925804138, "learning_rate": 0.0001928929139105069, "loss": 11.5995, "step": 17329 }, { "epoch": 0.36276479946412127, "grad_norm": 0.24589498341083527, "learning_rate": 0.00019289210208632557, "loss": 11.6766, "step": 17330 }, { "epoch": 0.3627857322280834, "grad_norm": 0.2662030756473541, "learning_rate": 0.0001928912902174891, "loss": 11.6829, "step": 17331 }, { "epoch": 0.36280666499204556, "grad_norm": 0.2589862048625946, "learning_rate": 0.00019289047830399792, "loss": 11.6699, "step": 17332 }, { "epoch": 0.3628275977560077, "grad_norm": 0.3389517664909363, "learning_rate": 0.00019288966634585245, "loss": 11.7018, "step": 17333 }, { "epoch": 0.36284853051996985, "grad_norm": 0.2856941521167755, "learning_rate": 0.0001928888543430531, "loss": 11.6587, "step": 17334 }, { "epoch": 0.362869463283932, "grad_norm": 0.23672862350940704, "learning_rate": 0.00019288804229560013, "loss": 11.6846, "step": 17335 }, { "epoch": 0.36289039604789414, "grad_norm": 0.3282758295536041, "learning_rate": 0.00019288723020349408, "loss": 11.6816, "step": 17336 }, { "epoch": 0.36291132881185634, "grad_norm": 0.2553313076496124, "learning_rate": 0.00019288641806673525, "loss": 11.6723, "step": 17337 }, { "epoch": 0.3629322615758185, "grad_norm": 0.3610917925834656, "learning_rate": 0.00019288560588532407, "loss": 11.6746, "step": 17338 }, { "epoch": 0.3629531943397806, "grad_norm": 0.27159008383750916, "learning_rate": 0.00019288479365926092, "loss": 11.6728, "step": 17339 }, { "epoch": 0.36297412710374277, "grad_norm": 0.2492758184671402, "learning_rate": 0.00019288398138854617, "loss": 11.6877, "step": 17340 }, { "epoch": 0.3629950598677049, "grad_norm": 0.24840842187404633, "learning_rate": 0.00019288316907318024, "loss": 11.6859, "step": 17341 }, { "epoch": 0.36301599263166706, "grad_norm": 0.39801445603370667, "learning_rate": 0.0001928823567131635, "loss": 11.6919, "step": 17342 }, { "epoch": 0.36303692539562926, "grad_norm": 0.25314798951148987, "learning_rate": 0.00019288154430849636, "loss": 11.6952, "step": 17343 }, { "epoch": 0.3630578581595914, "grad_norm": 0.3081686496734619, "learning_rate": 0.0001928807318591792, "loss": 11.6531, "step": 17344 }, { "epoch": 0.36307879092355355, "grad_norm": 0.2632991671562195, "learning_rate": 0.0001928799193652124, "loss": 11.6809, "step": 17345 }, { "epoch": 0.3630997236875157, "grad_norm": 0.2797451317310333, "learning_rate": 0.00019287910682659636, "loss": 11.666, "step": 17346 }, { "epoch": 0.36312065645147784, "grad_norm": 0.29720863699913025, "learning_rate": 0.00019287829424333147, "loss": 11.673, "step": 17347 }, { "epoch": 0.36314158921544, "grad_norm": 0.27245959639549255, "learning_rate": 0.00019287748161541814, "loss": 11.6774, "step": 17348 }, { "epoch": 0.3631625219794022, "grad_norm": 0.3746148645877838, "learning_rate": 0.00019287666894285673, "loss": 11.6976, "step": 17349 }, { "epoch": 0.36318345474336433, "grad_norm": 0.26724860072135925, "learning_rate": 0.0001928758562256477, "loss": 11.6917, "step": 17350 }, { "epoch": 0.3632043875073265, "grad_norm": 0.274976521730423, "learning_rate": 0.00019287504346379128, "loss": 11.6674, "step": 17351 }, { "epoch": 0.3632253202712886, "grad_norm": 0.27629995346069336, "learning_rate": 0.00019287423065728803, "loss": 11.6741, "step": 17352 }, { "epoch": 0.36324625303525077, "grad_norm": 0.24607184529304504, "learning_rate": 0.00019287341780613827, "loss": 11.6613, "step": 17353 }, { "epoch": 0.3632671857992129, "grad_norm": 0.2865992784500122, "learning_rate": 0.00019287260491034238, "loss": 11.6754, "step": 17354 }, { "epoch": 0.36328811856317506, "grad_norm": 0.23981113731861115, "learning_rate": 0.0001928717919699008, "loss": 11.6714, "step": 17355 }, { "epoch": 0.36330905132713726, "grad_norm": 0.24205905199050903, "learning_rate": 0.00019287097898481386, "loss": 11.66, "step": 17356 }, { "epoch": 0.3633299840910994, "grad_norm": 0.22835451364517212, "learning_rate": 0.000192870165955082, "loss": 11.6842, "step": 17357 }, { "epoch": 0.36335091685506155, "grad_norm": 0.2319566160440445, "learning_rate": 0.00019286935288070559, "loss": 11.6832, "step": 17358 }, { "epoch": 0.3633718496190237, "grad_norm": 0.2927985191345215, "learning_rate": 0.00019286853976168498, "loss": 11.6547, "step": 17359 }, { "epoch": 0.36339278238298584, "grad_norm": 0.2626303434371948, "learning_rate": 0.00019286772659802062, "loss": 11.6774, "step": 17360 }, { "epoch": 0.363413715146948, "grad_norm": 0.30571088194847107, "learning_rate": 0.0001928669133897129, "loss": 11.6732, "step": 17361 }, { "epoch": 0.3634346479109102, "grad_norm": 0.2839353084564209, "learning_rate": 0.0001928661001367622, "loss": 11.6787, "step": 17362 }, { "epoch": 0.3634555806748723, "grad_norm": 0.2549206018447876, "learning_rate": 0.0001928652868391689, "loss": 11.6658, "step": 17363 }, { "epoch": 0.36347651343883447, "grad_norm": 0.2980014681816101, "learning_rate": 0.00019286447349693337, "loss": 11.6749, "step": 17364 }, { "epoch": 0.3634974462027966, "grad_norm": 0.28443238139152527, "learning_rate": 0.00019286366011005607, "loss": 11.6808, "step": 17365 }, { "epoch": 0.36351837896675876, "grad_norm": 0.6149141192436218, "learning_rate": 0.0001928628466785373, "loss": 11.7126, "step": 17366 }, { "epoch": 0.3635393117307209, "grad_norm": 0.3461689054965973, "learning_rate": 0.00019286203320237753, "loss": 11.6602, "step": 17367 }, { "epoch": 0.3635602444946831, "grad_norm": 0.3519173562526703, "learning_rate": 0.0001928612196815771, "loss": 11.6798, "step": 17368 }, { "epoch": 0.36358117725864525, "grad_norm": 0.3269363045692444, "learning_rate": 0.00019286040611613645, "loss": 11.6903, "step": 17369 }, { "epoch": 0.3636021100226074, "grad_norm": 0.2676350474357605, "learning_rate": 0.00019285959250605591, "loss": 11.682, "step": 17370 }, { "epoch": 0.36362304278656954, "grad_norm": 0.2940099537372589, "learning_rate": 0.00019285877885133593, "loss": 11.6816, "step": 17371 }, { "epoch": 0.3636439755505317, "grad_norm": 0.2677032947540283, "learning_rate": 0.00019285796515197684, "loss": 11.6718, "step": 17372 }, { "epoch": 0.36366490831449383, "grad_norm": 0.30637773871421814, "learning_rate": 0.00019285715140797912, "loss": 11.6793, "step": 17373 }, { "epoch": 0.363685841078456, "grad_norm": 0.28582480549812317, "learning_rate": 0.00019285633761934306, "loss": 11.659, "step": 17374 }, { "epoch": 0.3637067738424182, "grad_norm": 0.3119961619377136, "learning_rate": 0.0001928555237860691, "loss": 11.6657, "step": 17375 }, { "epoch": 0.3637277066063803, "grad_norm": 0.263584703207016, "learning_rate": 0.00019285470990815763, "loss": 11.6654, "step": 17376 }, { "epoch": 0.36374863937034246, "grad_norm": 0.29724106192588806, "learning_rate": 0.00019285389598560905, "loss": 11.68, "step": 17377 }, { "epoch": 0.3637695721343046, "grad_norm": 0.3307077884674072, "learning_rate": 0.00019285308201842373, "loss": 11.6832, "step": 17378 }, { "epoch": 0.36379050489826675, "grad_norm": 0.247174933552742, "learning_rate": 0.0001928522680066021, "loss": 11.6702, "step": 17379 }, { "epoch": 0.3638114376622289, "grad_norm": 0.2931458353996277, "learning_rate": 0.0001928514539501445, "loss": 11.6656, "step": 17380 }, { "epoch": 0.3638323704261911, "grad_norm": 0.3032531142234802, "learning_rate": 0.00019285063984905137, "loss": 11.6825, "step": 17381 }, { "epoch": 0.36385330319015324, "grad_norm": 0.3094694912433624, "learning_rate": 0.00019284982570332303, "loss": 11.6641, "step": 17382 }, { "epoch": 0.3638742359541154, "grad_norm": 0.21731264889240265, "learning_rate": 0.00019284901151295995, "loss": 11.6709, "step": 17383 }, { "epoch": 0.36389516871807753, "grad_norm": 0.29705682396888733, "learning_rate": 0.00019284819727796251, "loss": 11.6653, "step": 17384 }, { "epoch": 0.3639161014820397, "grad_norm": 0.3142188489437103, "learning_rate": 0.00019284738299833105, "loss": 11.6837, "step": 17385 }, { "epoch": 0.3639370342460018, "grad_norm": 0.30049824714660645, "learning_rate": 0.000192846568674066, "loss": 11.6845, "step": 17386 }, { "epoch": 0.363957967009964, "grad_norm": 0.24087852239608765, "learning_rate": 0.00019284575430516773, "loss": 11.6639, "step": 17387 }, { "epoch": 0.36397889977392617, "grad_norm": 0.2610989212989807, "learning_rate": 0.00019284493989163665, "loss": 11.6908, "step": 17388 }, { "epoch": 0.3639998325378883, "grad_norm": 0.30642974376678467, "learning_rate": 0.00019284412543347316, "loss": 11.6535, "step": 17389 }, { "epoch": 0.36402076530185046, "grad_norm": 0.3468562960624695, "learning_rate": 0.00019284331093067763, "loss": 11.6841, "step": 17390 }, { "epoch": 0.3640416980658126, "grad_norm": 0.29097604751586914, "learning_rate": 0.00019284249638325048, "loss": 11.6909, "step": 17391 }, { "epoch": 0.36406263082977475, "grad_norm": 0.31587228178977966, "learning_rate": 0.00019284168179119204, "loss": 11.6868, "step": 17392 }, { "epoch": 0.3640835635937369, "grad_norm": 0.30789312720298767, "learning_rate": 0.00019284086715450278, "loss": 11.683, "step": 17393 }, { "epoch": 0.3641044963576991, "grad_norm": 0.2252562940120697, "learning_rate": 0.00019284005247318303, "loss": 11.6725, "step": 17394 }, { "epoch": 0.36412542912166124, "grad_norm": 0.27977076172828674, "learning_rate": 0.00019283923774723324, "loss": 11.6632, "step": 17395 }, { "epoch": 0.3641463618856234, "grad_norm": 0.28585106134414673, "learning_rate": 0.00019283842297665373, "loss": 11.6748, "step": 17396 }, { "epoch": 0.3641672946495855, "grad_norm": 0.3035162091255188, "learning_rate": 0.00019283760816144496, "loss": 11.6749, "step": 17397 }, { "epoch": 0.36418822741354767, "grad_norm": 0.28863441944122314, "learning_rate": 0.00019283679330160726, "loss": 11.6768, "step": 17398 }, { "epoch": 0.3642091601775098, "grad_norm": 0.26362577080726624, "learning_rate": 0.0001928359783971411, "loss": 11.6572, "step": 17399 }, { "epoch": 0.364230092941472, "grad_norm": 0.28638073801994324, "learning_rate": 0.00019283516344804677, "loss": 11.6678, "step": 17400 }, { "epoch": 0.36425102570543416, "grad_norm": 0.28448620438575745, "learning_rate": 0.00019283434845432474, "loss": 11.6785, "step": 17401 }, { "epoch": 0.3642719584693963, "grad_norm": 0.21994231641292572, "learning_rate": 0.0001928335334159754, "loss": 11.662, "step": 17402 }, { "epoch": 0.36429289123335845, "grad_norm": 0.302413672208786, "learning_rate": 0.00019283271833299908, "loss": 11.6558, "step": 17403 }, { "epoch": 0.3643138239973206, "grad_norm": 0.21863651275634766, "learning_rate": 0.00019283190320539624, "loss": 11.6681, "step": 17404 }, { "epoch": 0.36433475676128274, "grad_norm": 0.3237805664539337, "learning_rate": 0.00019283108803316723, "loss": 11.6828, "step": 17405 }, { "epoch": 0.3643556895252449, "grad_norm": 0.2611425220966339, "learning_rate": 0.00019283027281631246, "loss": 11.665, "step": 17406 }, { "epoch": 0.3643766222892071, "grad_norm": 0.29050326347351074, "learning_rate": 0.0001928294575548323, "loss": 11.6718, "step": 17407 }, { "epoch": 0.36439755505316923, "grad_norm": 0.33285340666770935, "learning_rate": 0.0001928286422487272, "loss": 11.6817, "step": 17408 }, { "epoch": 0.3644184878171314, "grad_norm": 0.4693987965583801, "learning_rate": 0.0001928278268979975, "loss": 11.6776, "step": 17409 }, { "epoch": 0.3644394205810935, "grad_norm": 0.23610414564609528, "learning_rate": 0.0001928270115026436, "loss": 11.6785, "step": 17410 }, { "epoch": 0.36446035334505567, "grad_norm": 0.2723468542098999, "learning_rate": 0.00019282619606266589, "loss": 11.6891, "step": 17411 }, { "epoch": 0.3644812861090178, "grad_norm": 0.25484567880630493, "learning_rate": 0.00019282538057806473, "loss": 11.6749, "step": 17412 }, { "epoch": 0.36450221887298, "grad_norm": 0.3355158269405365, "learning_rate": 0.00019282456504884059, "loss": 11.6764, "step": 17413 }, { "epoch": 0.36452315163694216, "grad_norm": 0.2722611427307129, "learning_rate": 0.00019282374947499382, "loss": 11.6736, "step": 17414 }, { "epoch": 0.3645440844009043, "grad_norm": 0.3063303828239441, "learning_rate": 0.00019282293385652482, "loss": 11.6851, "step": 17415 }, { "epoch": 0.36456501716486645, "grad_norm": 0.27077001333236694, "learning_rate": 0.00019282211819343395, "loss": 11.6832, "step": 17416 }, { "epoch": 0.3645859499288286, "grad_norm": 0.3238333761692047, "learning_rate": 0.00019282130248572164, "loss": 11.6855, "step": 17417 }, { "epoch": 0.36460688269279073, "grad_norm": 0.3382206857204437, "learning_rate": 0.00019282048673338825, "loss": 11.6766, "step": 17418 }, { "epoch": 0.36462781545675294, "grad_norm": 0.30652689933776855, "learning_rate": 0.00019281967093643423, "loss": 11.6668, "step": 17419 }, { "epoch": 0.3646487482207151, "grad_norm": 0.28146520256996155, "learning_rate": 0.00019281885509485992, "loss": 11.6692, "step": 17420 }, { "epoch": 0.3646696809846772, "grad_norm": 0.30990466475486755, "learning_rate": 0.00019281803920866573, "loss": 11.7007, "step": 17421 }, { "epoch": 0.36469061374863937, "grad_norm": 0.3297424018383026, "learning_rate": 0.00019281722327785207, "loss": 11.6892, "step": 17422 }, { "epoch": 0.3647115465126015, "grad_norm": 0.27335497736930847, "learning_rate": 0.00019281640730241925, "loss": 11.6693, "step": 17423 }, { "epoch": 0.36473247927656366, "grad_norm": 0.35861653089523315, "learning_rate": 0.00019281559128236776, "loss": 11.6781, "step": 17424 }, { "epoch": 0.3647534120405258, "grad_norm": 0.2706791162490845, "learning_rate": 0.00019281477521769795, "loss": 11.6558, "step": 17425 }, { "epoch": 0.364774344804488, "grad_norm": 0.288557231426239, "learning_rate": 0.00019281395910841022, "loss": 11.6875, "step": 17426 }, { "epoch": 0.36479527756845015, "grad_norm": 0.342460960149765, "learning_rate": 0.00019281314295450496, "loss": 11.6785, "step": 17427 }, { "epoch": 0.3648162103324123, "grad_norm": 0.27376604080200195, "learning_rate": 0.00019281232675598257, "loss": 11.6739, "step": 17428 }, { "epoch": 0.36483714309637444, "grad_norm": 0.2322707176208496, "learning_rate": 0.0001928115105128434, "loss": 11.6732, "step": 17429 }, { "epoch": 0.3648580758603366, "grad_norm": 0.28789079189300537, "learning_rate": 0.00019281069422508792, "loss": 11.6684, "step": 17430 }, { "epoch": 0.36487900862429873, "grad_norm": 0.3599301874637604, "learning_rate": 0.00019280987789271645, "loss": 11.6796, "step": 17431 }, { "epoch": 0.36489994138826093, "grad_norm": 0.2731302082538605, "learning_rate": 0.00019280906151572944, "loss": 11.6583, "step": 17432 }, { "epoch": 0.3649208741522231, "grad_norm": 0.3300744891166687, "learning_rate": 0.00019280824509412722, "loss": 11.6815, "step": 17433 }, { "epoch": 0.3649418069161852, "grad_norm": 0.3338935673236847, "learning_rate": 0.00019280742862791024, "loss": 11.6609, "step": 17434 }, { "epoch": 0.36496273968014736, "grad_norm": 0.2951779067516327, "learning_rate": 0.00019280661211707888, "loss": 11.6823, "step": 17435 }, { "epoch": 0.3649836724441095, "grad_norm": 0.2783496677875519, "learning_rate": 0.0001928057955616335, "loss": 11.6774, "step": 17436 }, { "epoch": 0.36500460520807165, "grad_norm": 0.285146027803421, "learning_rate": 0.0001928049789615745, "loss": 11.6814, "step": 17437 }, { "epoch": 0.36502553797203385, "grad_norm": 0.25498464703559875, "learning_rate": 0.00019280416231690233, "loss": 11.6594, "step": 17438 }, { "epoch": 0.365046470735996, "grad_norm": 0.28107354044914246, "learning_rate": 0.0001928033456276173, "loss": 11.6869, "step": 17439 }, { "epoch": 0.36506740349995814, "grad_norm": 0.29159533977508545, "learning_rate": 0.00019280252889371986, "loss": 11.6702, "step": 17440 }, { "epoch": 0.3650883362639203, "grad_norm": 0.2792370319366455, "learning_rate": 0.0001928017121152104, "loss": 11.6833, "step": 17441 }, { "epoch": 0.36510926902788243, "grad_norm": 0.31155484914779663, "learning_rate": 0.00019280089529208927, "loss": 11.6623, "step": 17442 }, { "epoch": 0.3651302017918446, "grad_norm": 0.38185474276542664, "learning_rate": 0.00019280007842435688, "loss": 11.6681, "step": 17443 }, { "epoch": 0.3651511345558067, "grad_norm": 0.2653866708278656, "learning_rate": 0.00019279926151201366, "loss": 11.6683, "step": 17444 }, { "epoch": 0.3651720673197689, "grad_norm": 0.3401147127151489, "learning_rate": 0.00019279844455505997, "loss": 11.6811, "step": 17445 }, { "epoch": 0.36519300008373107, "grad_norm": 0.3239196538925171, "learning_rate": 0.0001927976275534962, "loss": 11.668, "step": 17446 }, { "epoch": 0.3652139328476932, "grad_norm": 0.3070553243160248, "learning_rate": 0.00019279681050732276, "loss": 11.671, "step": 17447 }, { "epoch": 0.36523486561165536, "grad_norm": 0.3299254775047302, "learning_rate": 0.00019279599341654003, "loss": 11.6784, "step": 17448 }, { "epoch": 0.3652557983756175, "grad_norm": 0.29117804765701294, "learning_rate": 0.0001927951762811484, "loss": 11.6763, "step": 17449 }, { "epoch": 0.36527673113957965, "grad_norm": 0.2510817050933838, "learning_rate": 0.0001927943591011483, "loss": 11.6902, "step": 17450 }, { "epoch": 0.36529766390354185, "grad_norm": 0.2606850862503052, "learning_rate": 0.00019279354187654005, "loss": 11.6822, "step": 17451 }, { "epoch": 0.365318596667504, "grad_norm": 0.29232266545295715, "learning_rate": 0.0001927927246073241, "loss": 11.6703, "step": 17452 }, { "epoch": 0.36533952943146614, "grad_norm": 0.31044721603393555, "learning_rate": 0.00019279190729350082, "loss": 11.6722, "step": 17453 }, { "epoch": 0.3653604621954283, "grad_norm": 0.2811093330383301, "learning_rate": 0.0001927910899350706, "loss": 11.6757, "step": 17454 }, { "epoch": 0.3653813949593904, "grad_norm": 0.2239370197057724, "learning_rate": 0.0001927902725320339, "loss": 11.6737, "step": 17455 }, { "epoch": 0.36540232772335257, "grad_norm": 0.21088382601737976, "learning_rate": 0.000192789455084391, "loss": 11.6752, "step": 17456 }, { "epoch": 0.36542326048731477, "grad_norm": 0.3279900848865509, "learning_rate": 0.00019278863759214238, "loss": 11.687, "step": 17457 }, { "epoch": 0.3654441932512769, "grad_norm": 0.28286558389663696, "learning_rate": 0.0001927878200552884, "loss": 11.6757, "step": 17458 }, { "epoch": 0.36546512601523906, "grad_norm": 0.3117018938064575, "learning_rate": 0.00019278700247382945, "loss": 11.684, "step": 17459 }, { "epoch": 0.3654860587792012, "grad_norm": 0.2917799651622772, "learning_rate": 0.0001927861848477659, "loss": 11.6813, "step": 17460 }, { "epoch": 0.36550699154316335, "grad_norm": 0.2711787521839142, "learning_rate": 0.00019278536717709822, "loss": 11.6808, "step": 17461 }, { "epoch": 0.3655279243071255, "grad_norm": 0.4703507721424103, "learning_rate": 0.00019278454946182675, "loss": 11.6785, "step": 17462 }, { "epoch": 0.36554885707108764, "grad_norm": 0.24144168198108673, "learning_rate": 0.00019278373170195185, "loss": 11.67, "step": 17463 }, { "epoch": 0.36556978983504984, "grad_norm": 0.3169805109500885, "learning_rate": 0.00019278291389747396, "loss": 11.6808, "step": 17464 }, { "epoch": 0.365590722599012, "grad_norm": 0.2451755702495575, "learning_rate": 0.00019278209604839348, "loss": 11.6733, "step": 17465 }, { "epoch": 0.36561165536297413, "grad_norm": 0.29705965518951416, "learning_rate": 0.0001927812781547108, "loss": 11.682, "step": 17466 }, { "epoch": 0.3656325881269363, "grad_norm": 0.2963055968284607, "learning_rate": 0.00019278046021642625, "loss": 11.6761, "step": 17467 }, { "epoch": 0.3656535208908984, "grad_norm": 0.24953322112560272, "learning_rate": 0.0001927796422335403, "loss": 11.6766, "step": 17468 }, { "epoch": 0.36567445365486057, "grad_norm": 0.2612346410751343, "learning_rate": 0.00019277882420605335, "loss": 11.6729, "step": 17469 }, { "epoch": 0.36569538641882277, "grad_norm": 0.3046565353870392, "learning_rate": 0.00019277800613396573, "loss": 11.6735, "step": 17470 }, { "epoch": 0.3657163191827849, "grad_norm": 0.3044291138648987, "learning_rate": 0.00019277718801727784, "loss": 11.6681, "step": 17471 }, { "epoch": 0.36573725194674706, "grad_norm": 0.27628254890441895, "learning_rate": 0.00019277636985599013, "loss": 11.6758, "step": 17472 }, { "epoch": 0.3657581847107092, "grad_norm": 0.23605114221572876, "learning_rate": 0.00019277555165010294, "loss": 11.6902, "step": 17473 }, { "epoch": 0.36577911747467134, "grad_norm": 0.25322166085243225, "learning_rate": 0.0001927747333996167, "loss": 11.6621, "step": 17474 }, { "epoch": 0.3658000502386335, "grad_norm": 0.2906867563724518, "learning_rate": 0.0001927739151045318, "loss": 11.678, "step": 17475 }, { "epoch": 0.3658209830025957, "grad_norm": 0.23988716304302216, "learning_rate": 0.00019277309676484858, "loss": 11.662, "step": 17476 }, { "epoch": 0.36584191576655783, "grad_norm": 0.27556219696998596, "learning_rate": 0.00019277227838056748, "loss": 11.6656, "step": 17477 }, { "epoch": 0.36586284853052, "grad_norm": 0.3957583010196686, "learning_rate": 0.0001927714599516889, "loss": 11.6625, "step": 17478 }, { "epoch": 0.3658837812944821, "grad_norm": 0.20650343596935272, "learning_rate": 0.00019277064147821325, "loss": 11.6648, "step": 17479 }, { "epoch": 0.36590471405844427, "grad_norm": 0.40494316816329956, "learning_rate": 0.00019276982296014087, "loss": 11.6742, "step": 17480 }, { "epoch": 0.3659256468224064, "grad_norm": 0.3076879382133484, "learning_rate": 0.00019276900439747213, "loss": 11.6645, "step": 17481 }, { "epoch": 0.36594657958636856, "grad_norm": 0.3040468990802765, "learning_rate": 0.00019276818579020753, "loss": 11.6566, "step": 17482 }, { "epoch": 0.36596751235033076, "grad_norm": 0.24279849231243134, "learning_rate": 0.00019276736713834739, "loss": 11.6729, "step": 17483 }, { "epoch": 0.3659884451142929, "grad_norm": 0.27656304836273193, "learning_rate": 0.0001927665484418921, "loss": 11.6859, "step": 17484 }, { "epoch": 0.36600937787825505, "grad_norm": 0.2600311040878296, "learning_rate": 0.00019276572970084208, "loss": 11.6683, "step": 17485 }, { "epoch": 0.3660303106422172, "grad_norm": 0.28058305382728577, "learning_rate": 0.0001927649109151977, "loss": 11.6764, "step": 17486 }, { "epoch": 0.36605124340617934, "grad_norm": 0.2731846570968628, "learning_rate": 0.0001927640920849594, "loss": 11.6755, "step": 17487 }, { "epoch": 0.3660721761701415, "grad_norm": 0.24457994103431702, "learning_rate": 0.00019276327321012752, "loss": 11.6596, "step": 17488 }, { "epoch": 0.3660931089341037, "grad_norm": 0.30968111753463745, "learning_rate": 0.0001927624542907025, "loss": 11.6637, "step": 17489 }, { "epoch": 0.36611404169806583, "grad_norm": 0.26477131247520447, "learning_rate": 0.0001927616353266847, "loss": 11.6615, "step": 17490 }, { "epoch": 0.366134974462028, "grad_norm": 0.3140282928943634, "learning_rate": 0.0001927608163180745, "loss": 11.6536, "step": 17491 }, { "epoch": 0.3661559072259901, "grad_norm": 0.33339574933052063, "learning_rate": 0.00019275999726487233, "loss": 11.6904, "step": 17492 }, { "epoch": 0.36617683998995226, "grad_norm": 0.2643621861934662, "learning_rate": 0.00019275917816707859, "loss": 11.6775, "step": 17493 }, { "epoch": 0.3661977727539144, "grad_norm": 0.21774783730506897, "learning_rate": 0.00019275835902469361, "loss": 11.6707, "step": 17494 }, { "epoch": 0.36621870551787655, "grad_norm": 0.32710376381874084, "learning_rate": 0.00019275753983771787, "loss": 11.6593, "step": 17495 }, { "epoch": 0.36623963828183875, "grad_norm": 0.2560828626155853, "learning_rate": 0.00019275672060615172, "loss": 11.6703, "step": 17496 }, { "epoch": 0.3662605710458009, "grad_norm": 0.24043403565883636, "learning_rate": 0.00019275590132999552, "loss": 11.668, "step": 17497 }, { "epoch": 0.36628150380976304, "grad_norm": 0.27386483550071716, "learning_rate": 0.00019275508200924973, "loss": 11.6608, "step": 17498 }, { "epoch": 0.3663024365737252, "grad_norm": 0.3006191551685333, "learning_rate": 0.0001927542626439147, "loss": 11.6822, "step": 17499 }, { "epoch": 0.36632336933768733, "grad_norm": 0.2527864873409271, "learning_rate": 0.00019275344323399084, "loss": 11.6695, "step": 17500 }, { "epoch": 0.3663443021016495, "grad_norm": 0.3192417621612549, "learning_rate": 0.00019275262377947856, "loss": 11.6777, "step": 17501 }, { "epoch": 0.3663652348656117, "grad_norm": 0.26126760244369507, "learning_rate": 0.0001927518042803782, "loss": 11.6845, "step": 17502 }, { "epoch": 0.3663861676295738, "grad_norm": 0.21593564748764038, "learning_rate": 0.00019275098473669022, "loss": 11.6583, "step": 17503 }, { "epoch": 0.36640710039353597, "grad_norm": 0.2643071413040161, "learning_rate": 0.00019275016514841499, "loss": 11.665, "step": 17504 }, { "epoch": 0.3664280331574981, "grad_norm": 0.29714080691337585, "learning_rate": 0.00019274934551555286, "loss": 11.6734, "step": 17505 }, { "epoch": 0.36644896592146026, "grad_norm": 0.28279492259025574, "learning_rate": 0.00019274852583810428, "loss": 11.6687, "step": 17506 }, { "epoch": 0.3664698986854224, "grad_norm": 0.24173954129219055, "learning_rate": 0.00019274770611606962, "loss": 11.6655, "step": 17507 }, { "epoch": 0.3664908314493846, "grad_norm": 0.29301777482032776, "learning_rate": 0.00019274688634944925, "loss": 11.6698, "step": 17508 }, { "epoch": 0.36651176421334675, "grad_norm": 0.25633710622787476, "learning_rate": 0.00019274606653824362, "loss": 11.6611, "step": 17509 }, { "epoch": 0.3665326969773089, "grad_norm": 0.25647950172424316, "learning_rate": 0.00019274524668245314, "loss": 11.6503, "step": 17510 }, { "epoch": 0.36655362974127104, "grad_norm": 0.32615935802459717, "learning_rate": 0.0001927444267820781, "loss": 11.6796, "step": 17511 }, { "epoch": 0.3665745625052332, "grad_norm": 0.29746559262275696, "learning_rate": 0.000192743606837119, "loss": 11.6736, "step": 17512 }, { "epoch": 0.3665954952691953, "grad_norm": 0.2680235803127289, "learning_rate": 0.00019274278684757614, "loss": 11.6732, "step": 17513 }, { "epoch": 0.36661642803315747, "grad_norm": 0.29404208064079285, "learning_rate": 0.00019274196681345, "loss": 11.7018, "step": 17514 }, { "epoch": 0.36663736079711967, "grad_norm": 0.25961384177207947, "learning_rate": 0.00019274114673474094, "loss": 11.6682, "step": 17515 }, { "epoch": 0.3666582935610818, "grad_norm": 0.3329431116580963, "learning_rate": 0.0001927403266114493, "loss": 11.6752, "step": 17516 }, { "epoch": 0.36667922632504396, "grad_norm": 0.33905264735221863, "learning_rate": 0.00019273950644357562, "loss": 11.6737, "step": 17517 }, { "epoch": 0.3667001590890061, "grad_norm": 0.315621018409729, "learning_rate": 0.00019273868623112016, "loss": 11.6882, "step": 17518 }, { "epoch": 0.36672109185296825, "grad_norm": 0.34519484639167786, "learning_rate": 0.00019273786597408333, "loss": 11.6572, "step": 17519 }, { "epoch": 0.3667420246169304, "grad_norm": 0.2862425446510315, "learning_rate": 0.00019273704567246552, "loss": 11.6835, "step": 17520 }, { "epoch": 0.3667629573808926, "grad_norm": 0.28255581855773926, "learning_rate": 0.00019273622532626724, "loss": 11.6782, "step": 17521 }, { "epoch": 0.36678389014485474, "grad_norm": 0.27738144993782043, "learning_rate": 0.00019273540493548874, "loss": 11.6678, "step": 17522 }, { "epoch": 0.3668048229088169, "grad_norm": 0.26057228446006775, "learning_rate": 0.0001927345845001305, "loss": 11.66, "step": 17523 }, { "epoch": 0.36682575567277903, "grad_norm": 0.3133874833583832, "learning_rate": 0.00019273376402019284, "loss": 11.6667, "step": 17524 }, { "epoch": 0.3668466884367412, "grad_norm": 0.250346302986145, "learning_rate": 0.00019273294349567626, "loss": 11.6628, "step": 17525 }, { "epoch": 0.3668676212007033, "grad_norm": 0.30129796266555786, "learning_rate": 0.00019273212292658106, "loss": 11.6811, "step": 17526 }, { "epoch": 0.3668885539646655, "grad_norm": 0.3442072868347168, "learning_rate": 0.00019273130231290767, "loss": 11.6701, "step": 17527 }, { "epoch": 0.36690948672862767, "grad_norm": 0.26009508967399597, "learning_rate": 0.00019273048165465651, "loss": 11.6733, "step": 17528 }, { "epoch": 0.3669304194925898, "grad_norm": 0.27167925238609314, "learning_rate": 0.00019272966095182793, "loss": 11.6542, "step": 17529 }, { "epoch": 0.36695135225655195, "grad_norm": 0.23433062434196472, "learning_rate": 0.00019272884020442237, "loss": 11.6814, "step": 17530 }, { "epoch": 0.3669722850205141, "grad_norm": 0.27080827951431274, "learning_rate": 0.00019272801941244016, "loss": 11.6649, "step": 17531 }, { "epoch": 0.36699321778447624, "grad_norm": 0.26545313000679016, "learning_rate": 0.00019272719857588174, "loss": 11.6637, "step": 17532 }, { "epoch": 0.3670141505484384, "grad_norm": 0.2814275920391083, "learning_rate": 0.00019272637769474752, "loss": 11.6782, "step": 17533 }, { "epoch": 0.3670350833124006, "grad_norm": 0.3401321768760681, "learning_rate": 0.00019272555676903784, "loss": 11.6731, "step": 17534 }, { "epoch": 0.36705601607636273, "grad_norm": 0.2887554466724396, "learning_rate": 0.00019272473579875316, "loss": 11.6897, "step": 17535 }, { "epoch": 0.3670769488403249, "grad_norm": 0.33748456835746765, "learning_rate": 0.00019272391478389382, "loss": 11.6814, "step": 17536 }, { "epoch": 0.367097881604287, "grad_norm": 0.28444400429725647, "learning_rate": 0.00019272309372446024, "loss": 11.6831, "step": 17537 }, { "epoch": 0.36711881436824917, "grad_norm": 0.23415258526802063, "learning_rate": 0.0001927222726204528, "loss": 11.6786, "step": 17538 }, { "epoch": 0.3671397471322113, "grad_norm": 0.24669872224330902, "learning_rate": 0.00019272145147187196, "loss": 11.6889, "step": 17539 }, { "epoch": 0.3671606798961735, "grad_norm": 0.2842104434967041, "learning_rate": 0.000192720630278718, "loss": 11.6714, "step": 17540 }, { "epoch": 0.36718161266013566, "grad_norm": 0.33018457889556885, "learning_rate": 0.0001927198090409914, "loss": 11.6721, "step": 17541 }, { "epoch": 0.3672025454240978, "grad_norm": 0.3392390310764313, "learning_rate": 0.00019271898775869253, "loss": 11.6877, "step": 17542 }, { "epoch": 0.36722347818805995, "grad_norm": 0.274530827999115, "learning_rate": 0.00019271816643182175, "loss": 11.659, "step": 17543 }, { "epoch": 0.3672444109520221, "grad_norm": 0.4054679274559021, "learning_rate": 0.00019271734506037954, "loss": 11.6707, "step": 17544 }, { "epoch": 0.36726534371598424, "grad_norm": 0.3098743259906769, "learning_rate": 0.0001927165236443662, "loss": 11.6619, "step": 17545 }, { "epoch": 0.36728627647994644, "grad_norm": 0.29924464225769043, "learning_rate": 0.00019271570218378222, "loss": 11.6893, "step": 17546 }, { "epoch": 0.3673072092439086, "grad_norm": 0.2630927860736847, "learning_rate": 0.0001927148806786279, "loss": 11.675, "step": 17547 }, { "epoch": 0.36732814200787073, "grad_norm": 0.2773263156414032, "learning_rate": 0.0001927140591289037, "loss": 11.6559, "step": 17548 }, { "epoch": 0.3673490747718329, "grad_norm": 0.3295728266239166, "learning_rate": 0.00019271323753461, "loss": 11.6799, "step": 17549 }, { "epoch": 0.367370007535795, "grad_norm": 0.39680448174476624, "learning_rate": 0.00019271241589574714, "loss": 11.6726, "step": 17550 }, { "epoch": 0.36739094029975716, "grad_norm": 0.28641656041145325, "learning_rate": 0.00019271159421231562, "loss": 11.6714, "step": 17551 }, { "epoch": 0.3674118730637193, "grad_norm": 0.294356107711792, "learning_rate": 0.00019271077248431576, "loss": 11.6634, "step": 17552 }, { "epoch": 0.3674328058276815, "grad_norm": 0.4779207110404968, "learning_rate": 0.000192709950711748, "loss": 11.6527, "step": 17553 }, { "epoch": 0.36745373859164365, "grad_norm": 0.26330435276031494, "learning_rate": 0.00019270912889461265, "loss": 11.6639, "step": 17554 }, { "epoch": 0.3674746713556058, "grad_norm": 0.35008201003074646, "learning_rate": 0.00019270830703291018, "loss": 11.6806, "step": 17555 }, { "epoch": 0.36749560411956794, "grad_norm": 0.4426853060722351, "learning_rate": 0.00019270748512664098, "loss": 11.6835, "step": 17556 }, { "epoch": 0.3675165368835301, "grad_norm": 0.32029983401298523, "learning_rate": 0.00019270666317580543, "loss": 11.6704, "step": 17557 }, { "epoch": 0.36753746964749223, "grad_norm": 0.23529234528541565, "learning_rate": 0.00019270584118040394, "loss": 11.6652, "step": 17558 }, { "epoch": 0.36755840241145443, "grad_norm": 0.2910207211971283, "learning_rate": 0.0001927050191404369, "loss": 11.6571, "step": 17559 }, { "epoch": 0.3675793351754166, "grad_norm": 0.21161378920078278, "learning_rate": 0.0001927041970559047, "loss": 11.6663, "step": 17560 }, { "epoch": 0.3676002679393787, "grad_norm": 0.2965371310710907, "learning_rate": 0.00019270337492680771, "loss": 11.6666, "step": 17561 }, { "epoch": 0.36762120070334087, "grad_norm": 0.2988393008708954, "learning_rate": 0.00019270255275314637, "loss": 11.68, "step": 17562 }, { "epoch": 0.367642133467303, "grad_norm": 0.26812514662742615, "learning_rate": 0.00019270173053492106, "loss": 11.6695, "step": 17563 }, { "epoch": 0.36766306623126516, "grad_norm": 0.2589428722858429, "learning_rate": 0.00019270090827213214, "loss": 11.6715, "step": 17564 }, { "epoch": 0.36768399899522736, "grad_norm": 0.2823813259601593, "learning_rate": 0.00019270008596478008, "loss": 11.6728, "step": 17565 }, { "epoch": 0.3677049317591895, "grad_norm": 0.32183709740638733, "learning_rate": 0.00019269926361286519, "loss": 11.66, "step": 17566 }, { "epoch": 0.36772586452315165, "grad_norm": 0.2683444321155548, "learning_rate": 0.0001926984412163879, "loss": 11.6766, "step": 17567 }, { "epoch": 0.3677467972871138, "grad_norm": 0.26076462864875793, "learning_rate": 0.00019269761877534867, "loss": 11.673, "step": 17568 }, { "epoch": 0.36776773005107594, "grad_norm": 0.3344903886318207, "learning_rate": 0.0001926967962897478, "loss": 11.6819, "step": 17569 }, { "epoch": 0.3677886628150381, "grad_norm": 0.28807732462882996, "learning_rate": 0.0001926959737595857, "loss": 11.6872, "step": 17570 }, { "epoch": 0.3678095955790002, "grad_norm": 0.3776153326034546, "learning_rate": 0.00019269515118486283, "loss": 11.6621, "step": 17571 }, { "epoch": 0.3678305283429624, "grad_norm": 0.3260062336921692, "learning_rate": 0.0001926943285655795, "loss": 11.671, "step": 17572 }, { "epoch": 0.36785146110692457, "grad_norm": 0.2641657590866089, "learning_rate": 0.0001926935059017362, "loss": 11.6767, "step": 17573 }, { "epoch": 0.3678723938708867, "grad_norm": 0.34742507338523865, "learning_rate": 0.00019269268319333324, "loss": 11.6962, "step": 17574 }, { "epoch": 0.36789332663484886, "grad_norm": 0.2665502429008484, "learning_rate": 0.00019269186044037107, "loss": 11.681, "step": 17575 }, { "epoch": 0.367914259398811, "grad_norm": 0.29137328267097473, "learning_rate": 0.00019269103764285003, "loss": 11.6793, "step": 17576 }, { "epoch": 0.36793519216277315, "grad_norm": 0.32724064588546753, "learning_rate": 0.0001926902148007706, "loss": 11.6775, "step": 17577 }, { "epoch": 0.36795612492673535, "grad_norm": 0.3336000144481659, "learning_rate": 0.00019268939191413308, "loss": 11.695, "step": 17578 }, { "epoch": 0.3679770576906975, "grad_norm": 0.3148568272590637, "learning_rate": 0.00019268856898293794, "loss": 11.6651, "step": 17579 }, { "epoch": 0.36799799045465964, "grad_norm": 0.31433361768722534, "learning_rate": 0.00019268774600718553, "loss": 11.6804, "step": 17580 }, { "epoch": 0.3680189232186218, "grad_norm": 0.2843415141105652, "learning_rate": 0.00019268692298687626, "loss": 11.668, "step": 17581 }, { "epoch": 0.36803985598258393, "grad_norm": 0.29542508721351624, "learning_rate": 0.00019268609992201054, "loss": 11.6707, "step": 17582 }, { "epoch": 0.3680607887465461, "grad_norm": 0.24019016325473785, "learning_rate": 0.00019268527681258878, "loss": 11.6817, "step": 17583 }, { "epoch": 0.3680817215105083, "grad_norm": 0.23668532073497772, "learning_rate": 0.0001926844536586113, "loss": 11.6764, "step": 17584 }, { "epoch": 0.3681026542744704, "grad_norm": 0.2866821587085724, "learning_rate": 0.0001926836304600786, "loss": 11.6668, "step": 17585 }, { "epoch": 0.36812358703843256, "grad_norm": 0.24663493037223816, "learning_rate": 0.00019268280721699096, "loss": 11.6908, "step": 17586 }, { "epoch": 0.3681445198023947, "grad_norm": 0.26715248823165894, "learning_rate": 0.00019268198392934888, "loss": 11.6874, "step": 17587 }, { "epoch": 0.36816545256635685, "grad_norm": 0.3283717632293701, "learning_rate": 0.0001926811605971527, "loss": 11.6804, "step": 17588 }, { "epoch": 0.368186385330319, "grad_norm": 0.34397247433662415, "learning_rate": 0.0001926803372204028, "loss": 11.6629, "step": 17589 }, { "epoch": 0.36820731809428114, "grad_norm": 0.3098047077655792, "learning_rate": 0.00019267951379909966, "loss": 11.6697, "step": 17590 }, { "epoch": 0.36822825085824334, "grad_norm": 0.30338433384895325, "learning_rate": 0.0001926786903332436, "loss": 11.6644, "step": 17591 }, { "epoch": 0.3682491836222055, "grad_norm": 0.2528296709060669, "learning_rate": 0.00019267786682283504, "loss": 11.6611, "step": 17592 }, { "epoch": 0.36827011638616763, "grad_norm": 0.29820019006729126, "learning_rate": 0.00019267704326787435, "loss": 11.666, "step": 17593 }, { "epoch": 0.3682910491501298, "grad_norm": 0.2662726044654846, "learning_rate": 0.00019267621966836197, "loss": 11.6649, "step": 17594 }, { "epoch": 0.3683119819140919, "grad_norm": 0.2751735746860504, "learning_rate": 0.00019267539602429827, "loss": 11.6695, "step": 17595 }, { "epoch": 0.36833291467805407, "grad_norm": 0.34383517503738403, "learning_rate": 0.00019267457233568364, "loss": 11.6867, "step": 17596 }, { "epoch": 0.36835384744201627, "grad_norm": 0.30419427156448364, "learning_rate": 0.00019267374860251847, "loss": 11.6686, "step": 17597 }, { "epoch": 0.3683747802059784, "grad_norm": 0.2810532748699188, "learning_rate": 0.0001926729248248032, "loss": 11.6687, "step": 17598 }, { "epoch": 0.36839571296994056, "grad_norm": 0.39437034726142883, "learning_rate": 0.00019267210100253822, "loss": 11.6812, "step": 17599 }, { "epoch": 0.3684166457339027, "grad_norm": 0.25770482420921326, "learning_rate": 0.00019267127713572385, "loss": 11.6629, "step": 17600 }, { "epoch": 0.36843757849786485, "grad_norm": 0.2677479088306427, "learning_rate": 0.00019267045322436056, "loss": 11.6872, "step": 17601 }, { "epoch": 0.368458511261827, "grad_norm": 0.30105939507484436, "learning_rate": 0.00019266962926844876, "loss": 11.6641, "step": 17602 }, { "epoch": 0.36847944402578914, "grad_norm": 0.29109153151512146, "learning_rate": 0.00019266880526798877, "loss": 11.6751, "step": 17603 }, { "epoch": 0.36850037678975134, "grad_norm": 0.30338820815086365, "learning_rate": 0.00019266798122298103, "loss": 11.6829, "step": 17604 }, { "epoch": 0.3685213095537135, "grad_norm": 0.3227078318595886, "learning_rate": 0.00019266715713342594, "loss": 11.68, "step": 17605 }, { "epoch": 0.36854224231767563, "grad_norm": 0.297719806432724, "learning_rate": 0.00019266633299932392, "loss": 11.6811, "step": 17606 }, { "epoch": 0.3685631750816378, "grad_norm": 0.24839425086975098, "learning_rate": 0.00019266550882067528, "loss": 11.6595, "step": 17607 }, { "epoch": 0.3685841078455999, "grad_norm": 0.28510239720344543, "learning_rate": 0.00019266468459748054, "loss": 11.6917, "step": 17608 }, { "epoch": 0.36860504060956206, "grad_norm": 0.2508524954319, "learning_rate": 0.00019266386032973997, "loss": 11.6524, "step": 17609 }, { "epoch": 0.36862597337352426, "grad_norm": 0.31609463691711426, "learning_rate": 0.00019266303601745408, "loss": 11.6789, "step": 17610 }, { "epoch": 0.3686469061374864, "grad_norm": 0.2960744798183441, "learning_rate": 0.00019266221166062316, "loss": 11.6867, "step": 17611 }, { "epoch": 0.36866783890144855, "grad_norm": 0.3255254626274109, "learning_rate": 0.00019266138725924768, "loss": 11.659, "step": 17612 }, { "epoch": 0.3686887716654107, "grad_norm": 0.3836366832256317, "learning_rate": 0.000192660562813328, "loss": 11.6877, "step": 17613 }, { "epoch": 0.36870970442937284, "grad_norm": 0.3288120925426483, "learning_rate": 0.00019265973832286454, "loss": 11.675, "step": 17614 }, { "epoch": 0.368730637193335, "grad_norm": 0.25650840997695923, "learning_rate": 0.0001926589137878577, "loss": 11.6817, "step": 17615 }, { "epoch": 0.3687515699572972, "grad_norm": 0.37601983547210693, "learning_rate": 0.00019265808920830785, "loss": 11.696, "step": 17616 }, { "epoch": 0.36877250272125933, "grad_norm": 0.3181994557380676, "learning_rate": 0.00019265726458421543, "loss": 11.681, "step": 17617 }, { "epoch": 0.3687934354852215, "grad_norm": 0.32486283779144287, "learning_rate": 0.00019265643991558075, "loss": 11.6807, "step": 17618 }, { "epoch": 0.3688143682491836, "grad_norm": 0.2735552191734314, "learning_rate": 0.0001926556152024043, "loss": 11.6629, "step": 17619 }, { "epoch": 0.36883530101314577, "grad_norm": 0.31917470693588257, "learning_rate": 0.00019265479044468643, "loss": 11.6895, "step": 17620 }, { "epoch": 0.3688562337771079, "grad_norm": 0.2981281876564026, "learning_rate": 0.00019265396564242755, "loss": 11.6703, "step": 17621 }, { "epoch": 0.36887716654107006, "grad_norm": 0.2679312527179718, "learning_rate": 0.00019265314079562807, "loss": 11.6711, "step": 17622 }, { "epoch": 0.36889809930503226, "grad_norm": 0.440339058637619, "learning_rate": 0.00019265231590428835, "loss": 11.6814, "step": 17623 }, { "epoch": 0.3689190320689944, "grad_norm": 0.20878705382347107, "learning_rate": 0.0001926514909684088, "loss": 11.6653, "step": 17624 }, { "epoch": 0.36893996483295655, "grad_norm": 0.2966923713684082, "learning_rate": 0.00019265066598798982, "loss": 11.6719, "step": 17625 }, { "epoch": 0.3689608975969187, "grad_norm": 0.2836446762084961, "learning_rate": 0.0001926498409630318, "loss": 11.6717, "step": 17626 }, { "epoch": 0.36898183036088084, "grad_norm": 0.31384769082069397, "learning_rate": 0.00019264901589353514, "loss": 11.668, "step": 17627 }, { "epoch": 0.369002763124843, "grad_norm": 0.3145841658115387, "learning_rate": 0.00019264819077950026, "loss": 11.662, "step": 17628 }, { "epoch": 0.3690236958888052, "grad_norm": 0.2950732707977295, "learning_rate": 0.00019264736562092755, "loss": 11.6903, "step": 17629 }, { "epoch": 0.3690446286527673, "grad_norm": 0.3205551505088806, "learning_rate": 0.0001926465404178174, "loss": 11.6707, "step": 17630 }, { "epoch": 0.36906556141672947, "grad_norm": 0.23980829119682312, "learning_rate": 0.00019264571517017015, "loss": 11.6767, "step": 17631 }, { "epoch": 0.3690864941806916, "grad_norm": 0.30014222860336304, "learning_rate": 0.00019264488987798627, "loss": 11.6658, "step": 17632 }, { "epoch": 0.36910742694465376, "grad_norm": 0.3406776189804077, "learning_rate": 0.00019264406454126616, "loss": 11.67, "step": 17633 }, { "epoch": 0.3691283597086159, "grad_norm": 0.2840489149093628, "learning_rate": 0.0001926432391600102, "loss": 11.679, "step": 17634 }, { "epoch": 0.3691492924725781, "grad_norm": 0.29486578702926636, "learning_rate": 0.00019264241373421874, "loss": 11.6655, "step": 17635 }, { "epoch": 0.36917022523654025, "grad_norm": 0.262782484292984, "learning_rate": 0.00019264158826389223, "loss": 11.6559, "step": 17636 }, { "epoch": 0.3691911580005024, "grad_norm": 0.27705973386764526, "learning_rate": 0.00019264076274903104, "loss": 11.6826, "step": 17637 }, { "epoch": 0.36921209076446454, "grad_norm": 0.2746037542819977, "learning_rate": 0.0001926399371896356, "loss": 11.6838, "step": 17638 }, { "epoch": 0.3692330235284267, "grad_norm": 0.25343939661979675, "learning_rate": 0.0001926391115857063, "loss": 11.6669, "step": 17639 }, { "epoch": 0.36925395629238883, "grad_norm": 0.26708126068115234, "learning_rate": 0.0001926382859372435, "loss": 11.6689, "step": 17640 }, { "epoch": 0.369274889056351, "grad_norm": 0.266525000333786, "learning_rate": 0.0001926374602442476, "loss": 11.6635, "step": 17641 }, { "epoch": 0.3692958218203132, "grad_norm": 0.26881343126296997, "learning_rate": 0.00019263663450671902, "loss": 11.6702, "step": 17642 }, { "epoch": 0.3693167545842753, "grad_norm": 0.25790926814079285, "learning_rate": 0.00019263580872465815, "loss": 11.6664, "step": 17643 }, { "epoch": 0.36933768734823746, "grad_norm": 0.36075133085250854, "learning_rate": 0.00019263498289806544, "loss": 11.688, "step": 17644 }, { "epoch": 0.3693586201121996, "grad_norm": 0.2867531180381775, "learning_rate": 0.0001926341570269412, "loss": 11.6834, "step": 17645 }, { "epoch": 0.36937955287616175, "grad_norm": 0.33019304275512695, "learning_rate": 0.00019263333111128587, "loss": 11.6884, "step": 17646 }, { "epoch": 0.3694004856401239, "grad_norm": 0.3274680972099304, "learning_rate": 0.00019263250515109986, "loss": 11.679, "step": 17647 }, { "epoch": 0.3694214184040861, "grad_norm": 0.27381837368011475, "learning_rate": 0.0001926316791463835, "loss": 11.6916, "step": 17648 }, { "epoch": 0.36944235116804824, "grad_norm": 0.27843064069747925, "learning_rate": 0.0001926308530971373, "loss": 11.676, "step": 17649 }, { "epoch": 0.3694632839320104, "grad_norm": 0.33194050192832947, "learning_rate": 0.00019263002700336152, "loss": 11.6712, "step": 17650 }, { "epoch": 0.36948421669597253, "grad_norm": 0.3929242789745331, "learning_rate": 0.00019262920086505667, "loss": 11.6771, "step": 17651 }, { "epoch": 0.3695051494599347, "grad_norm": 0.28516101837158203, "learning_rate": 0.0001926283746822231, "loss": 11.6775, "step": 17652 }, { "epoch": 0.3695260822238968, "grad_norm": 0.28715813159942627, "learning_rate": 0.00019262754845486124, "loss": 11.6697, "step": 17653 }, { "epoch": 0.369547014987859, "grad_norm": 0.36074578762054443, "learning_rate": 0.00019262672218297143, "loss": 11.6715, "step": 17654 }, { "epoch": 0.36956794775182117, "grad_norm": 0.3249436020851135, "learning_rate": 0.0001926258958665541, "loss": 11.6857, "step": 17655 }, { "epoch": 0.3695888805157833, "grad_norm": 0.3149310350418091, "learning_rate": 0.00019262506950560966, "loss": 11.6713, "step": 17656 }, { "epoch": 0.36960981327974546, "grad_norm": 0.3071182370185852, "learning_rate": 0.00019262424310013848, "loss": 11.6641, "step": 17657 }, { "epoch": 0.3696307460437076, "grad_norm": 0.23505495488643646, "learning_rate": 0.00019262341665014096, "loss": 11.6814, "step": 17658 }, { "epoch": 0.36965167880766975, "grad_norm": 0.3556511700153351, "learning_rate": 0.00019262259015561753, "loss": 11.6634, "step": 17659 }, { "epoch": 0.3696726115716319, "grad_norm": 0.2560129761695862, "learning_rate": 0.00019262176361656858, "loss": 11.66, "step": 17660 }, { "epoch": 0.3696935443355941, "grad_norm": 0.26937243342399597, "learning_rate": 0.00019262093703299447, "loss": 11.6738, "step": 17661 }, { "epoch": 0.36971447709955624, "grad_norm": 0.3503726124763489, "learning_rate": 0.0001926201104048956, "loss": 11.6905, "step": 17662 }, { "epoch": 0.3697354098635184, "grad_norm": 0.2704465091228485, "learning_rate": 0.0001926192837322724, "loss": 11.6755, "step": 17663 }, { "epoch": 0.3697563426274805, "grad_norm": 0.2683663070201874, "learning_rate": 0.00019261845701512527, "loss": 11.693, "step": 17664 }, { "epoch": 0.3697772753914427, "grad_norm": 0.2778322696685791, "learning_rate": 0.00019261763025345458, "loss": 11.6801, "step": 17665 }, { "epoch": 0.3697982081554048, "grad_norm": 0.3641083240509033, "learning_rate": 0.00019261680344726076, "loss": 11.6695, "step": 17666 }, { "epoch": 0.369819140919367, "grad_norm": 0.44350460171699524, "learning_rate": 0.00019261597659654415, "loss": 11.6708, "step": 17667 }, { "epoch": 0.36984007368332916, "grad_norm": 0.2754305303096771, "learning_rate": 0.0001926151497013052, "loss": 11.6721, "step": 17668 }, { "epoch": 0.3698610064472913, "grad_norm": 0.3117747902870178, "learning_rate": 0.00019261432276154432, "loss": 11.6806, "step": 17669 }, { "epoch": 0.36988193921125345, "grad_norm": 0.3634875416755676, "learning_rate": 0.00019261349577726183, "loss": 11.6756, "step": 17670 }, { "epoch": 0.3699028719752156, "grad_norm": 0.28023552894592285, "learning_rate": 0.0001926126687484582, "loss": 11.6637, "step": 17671 }, { "epoch": 0.36992380473917774, "grad_norm": 0.39790382981300354, "learning_rate": 0.0001926118416751338, "loss": 11.6735, "step": 17672 }, { "epoch": 0.36994473750313994, "grad_norm": 0.30921387672424316, "learning_rate": 0.00019261101455728903, "loss": 11.6827, "step": 17673 }, { "epoch": 0.3699656702671021, "grad_norm": 0.28135907649993896, "learning_rate": 0.0001926101873949243, "loss": 11.6744, "step": 17674 }, { "epoch": 0.36998660303106423, "grad_norm": 0.35348182916641235, "learning_rate": 0.00019260936018804, "loss": 11.6708, "step": 17675 }, { "epoch": 0.3700075357950264, "grad_norm": 0.28114622831344604, "learning_rate": 0.0001926085329366365, "loss": 11.6712, "step": 17676 }, { "epoch": 0.3700284685589885, "grad_norm": 0.25364238023757935, "learning_rate": 0.00019260770564071423, "loss": 11.6653, "step": 17677 }, { "epoch": 0.37004940132295067, "grad_norm": 0.26466044783592224, "learning_rate": 0.0001926068783002736, "loss": 11.6667, "step": 17678 }, { "epoch": 0.3700703340869128, "grad_norm": 0.3254198729991913, "learning_rate": 0.000192606050915315, "loss": 11.6736, "step": 17679 }, { "epoch": 0.370091266850875, "grad_norm": 0.3046332597732544, "learning_rate": 0.0001926052234858388, "loss": 11.6898, "step": 17680 }, { "epoch": 0.37011219961483716, "grad_norm": 0.28273719549179077, "learning_rate": 0.00019260439601184536, "loss": 11.6632, "step": 17681 }, { "epoch": 0.3701331323787993, "grad_norm": 0.2753434479236603, "learning_rate": 0.0001926035684933352, "loss": 11.6658, "step": 17682 }, { "epoch": 0.37015406514276145, "grad_norm": 0.28106632828712463, "learning_rate": 0.0001926027409303086, "loss": 11.6795, "step": 17683 }, { "epoch": 0.3701749979067236, "grad_norm": 0.2291097640991211, "learning_rate": 0.00019260191332276606, "loss": 11.6821, "step": 17684 }, { "epoch": 0.37019593067068574, "grad_norm": 0.24320535361766815, "learning_rate": 0.0001926010856707079, "loss": 11.6716, "step": 17685 }, { "epoch": 0.37021686343464794, "grad_norm": 0.29680123925209045, "learning_rate": 0.00019260025797413453, "loss": 11.6824, "step": 17686 }, { "epoch": 0.3702377961986101, "grad_norm": 0.2564712166786194, "learning_rate": 0.00019259943023304635, "loss": 11.6755, "step": 17687 }, { "epoch": 0.3702587289625722, "grad_norm": 0.3302239775657654, "learning_rate": 0.0001925986024474438, "loss": 11.6658, "step": 17688 }, { "epoch": 0.37027966172653437, "grad_norm": 0.35695284605026245, "learning_rate": 0.00019259777461732723, "loss": 11.6778, "step": 17689 }, { "epoch": 0.3703005944904965, "grad_norm": 0.3055529296398163, "learning_rate": 0.00019259694674269705, "loss": 11.6611, "step": 17690 }, { "epoch": 0.37032152725445866, "grad_norm": 0.2840222120285034, "learning_rate": 0.00019259611882355365, "loss": 11.6913, "step": 17691 }, { "epoch": 0.3703424600184208, "grad_norm": 0.3418848514556885, "learning_rate": 0.0001925952908598975, "loss": 11.6939, "step": 17692 }, { "epoch": 0.370363392782383, "grad_norm": 0.2665480673313141, "learning_rate": 0.00019259446285172887, "loss": 11.6649, "step": 17693 }, { "epoch": 0.37038432554634515, "grad_norm": 0.306525856256485, "learning_rate": 0.00019259363479904825, "loss": 11.6738, "step": 17694 }, { "epoch": 0.3704052583103073, "grad_norm": 0.2854554057121277, "learning_rate": 0.00019259280670185603, "loss": 11.6619, "step": 17695 }, { "epoch": 0.37042619107426944, "grad_norm": 0.2610689699649811, "learning_rate": 0.00019259197856015257, "loss": 11.6884, "step": 17696 }, { "epoch": 0.3704471238382316, "grad_norm": 0.24602927267551422, "learning_rate": 0.0001925911503739383, "loss": 11.6687, "step": 17697 }, { "epoch": 0.37046805660219373, "grad_norm": 0.31112220883369446, "learning_rate": 0.00019259032214321363, "loss": 11.653, "step": 17698 }, { "epoch": 0.37048898936615593, "grad_norm": 0.28439491987228394, "learning_rate": 0.0001925894938679789, "loss": 11.6694, "step": 17699 }, { "epoch": 0.3705099221301181, "grad_norm": 0.3161987066268921, "learning_rate": 0.00019258866554823455, "loss": 11.6796, "step": 17700 }, { "epoch": 0.3705308548940802, "grad_norm": 0.23372913897037506, "learning_rate": 0.00019258783718398103, "loss": 11.6893, "step": 17701 }, { "epoch": 0.37055178765804236, "grad_norm": 0.29960742592811584, "learning_rate": 0.0001925870087752186, "loss": 11.68, "step": 17702 }, { "epoch": 0.3705727204220045, "grad_norm": 0.2572924494743347, "learning_rate": 0.00019258618032194776, "loss": 11.6643, "step": 17703 }, { "epoch": 0.37059365318596665, "grad_norm": 0.3360368311405182, "learning_rate": 0.00019258535182416892, "loss": 11.6823, "step": 17704 }, { "epoch": 0.37061458594992885, "grad_norm": 0.2995077967643738, "learning_rate": 0.0001925845232818824, "loss": 11.6824, "step": 17705 }, { "epoch": 0.370635518713891, "grad_norm": 0.28227725625038147, "learning_rate": 0.0001925836946950887, "loss": 11.6802, "step": 17706 }, { "epoch": 0.37065645147785314, "grad_norm": 0.30704811215400696, "learning_rate": 0.00019258286606378813, "loss": 11.6623, "step": 17707 }, { "epoch": 0.3706773842418153, "grad_norm": 0.30946123600006104, "learning_rate": 0.00019258203738798112, "loss": 11.6795, "step": 17708 }, { "epoch": 0.37069831700577743, "grad_norm": 0.339839369058609, "learning_rate": 0.00019258120866766807, "loss": 11.6557, "step": 17709 }, { "epoch": 0.3707192497697396, "grad_norm": 0.2995203733444214, "learning_rate": 0.0001925803799028494, "loss": 11.6781, "step": 17710 }, { "epoch": 0.3707401825337017, "grad_norm": 0.26547548174858093, "learning_rate": 0.00019257955109352546, "loss": 11.6619, "step": 17711 }, { "epoch": 0.3707611152976639, "grad_norm": 0.32452353835105896, "learning_rate": 0.00019257872223969668, "loss": 11.688, "step": 17712 }, { "epoch": 0.37078204806162607, "grad_norm": 0.34682708978652954, "learning_rate": 0.00019257789334136346, "loss": 11.6689, "step": 17713 }, { "epoch": 0.3708029808255882, "grad_norm": 0.29155972599983215, "learning_rate": 0.00019257706439852615, "loss": 11.6861, "step": 17714 }, { "epoch": 0.37082391358955036, "grad_norm": 0.32227852940559387, "learning_rate": 0.00019257623541118525, "loss": 11.6702, "step": 17715 }, { "epoch": 0.3708448463535125, "grad_norm": 0.39418578147888184, "learning_rate": 0.00019257540637934107, "loss": 11.6729, "step": 17716 }, { "epoch": 0.37086577911747465, "grad_norm": 0.2383851259946823, "learning_rate": 0.00019257457730299405, "loss": 11.669, "step": 17717 }, { "epoch": 0.37088671188143685, "grad_norm": 0.3158298432826996, "learning_rate": 0.00019257374818214458, "loss": 11.6918, "step": 17718 }, { "epoch": 0.370907644645399, "grad_norm": 0.25270530581474304, "learning_rate": 0.00019257291901679302, "loss": 11.6772, "step": 17719 }, { "epoch": 0.37092857740936114, "grad_norm": 0.26737359166145325, "learning_rate": 0.0001925720898069398, "loss": 11.678, "step": 17720 }, { "epoch": 0.3709495101733233, "grad_norm": 0.25743401050567627, "learning_rate": 0.00019257126055258536, "loss": 11.6622, "step": 17721 }, { "epoch": 0.3709704429372854, "grad_norm": 0.2401367425918579, "learning_rate": 0.00019257043125373005, "loss": 11.6896, "step": 17722 }, { "epoch": 0.37099137570124757, "grad_norm": 0.2424694150686264, "learning_rate": 0.00019256960191037426, "loss": 11.683, "step": 17723 }, { "epoch": 0.3710123084652098, "grad_norm": 0.38432908058166504, "learning_rate": 0.00019256877252251843, "loss": 11.6876, "step": 17724 }, { "epoch": 0.3710332412291719, "grad_norm": 0.24729475378990173, "learning_rate": 0.00019256794309016292, "loss": 11.669, "step": 17725 }, { "epoch": 0.37105417399313406, "grad_norm": 0.2629285752773285, "learning_rate": 0.00019256711361330818, "loss": 11.6729, "step": 17726 }, { "epoch": 0.3710751067570962, "grad_norm": 0.30879053473472595, "learning_rate": 0.00019256628409195452, "loss": 11.672, "step": 17727 }, { "epoch": 0.37109603952105835, "grad_norm": 0.32982194423675537, "learning_rate": 0.00019256545452610242, "loss": 11.6617, "step": 17728 }, { "epoch": 0.3711169722850205, "grad_norm": 0.30575987696647644, "learning_rate": 0.00019256462491575225, "loss": 11.6515, "step": 17729 }, { "epoch": 0.37113790504898264, "grad_norm": 0.2774678170681, "learning_rate": 0.00019256379526090442, "loss": 11.6651, "step": 17730 }, { "epoch": 0.37115883781294484, "grad_norm": 0.2829032838344574, "learning_rate": 0.00019256296556155927, "loss": 11.6862, "step": 17731 }, { "epoch": 0.371179770576907, "grad_norm": 0.2817457914352417, "learning_rate": 0.0001925621358177173, "loss": 11.6656, "step": 17732 }, { "epoch": 0.37120070334086913, "grad_norm": 0.2576284110546112, "learning_rate": 0.00019256130602937884, "loss": 11.6706, "step": 17733 }, { "epoch": 0.3712216361048313, "grad_norm": 0.28219953179359436, "learning_rate": 0.00019256047619654431, "loss": 11.6751, "step": 17734 }, { "epoch": 0.3712425688687934, "grad_norm": 0.2649763524532318, "learning_rate": 0.0001925596463192141, "loss": 11.6501, "step": 17735 }, { "epoch": 0.37126350163275557, "grad_norm": 0.25457146763801575, "learning_rate": 0.00019255881639738858, "loss": 11.6838, "step": 17736 }, { "epoch": 0.37128443439671777, "grad_norm": 0.29697874188423157, "learning_rate": 0.00019255798643106825, "loss": 11.6674, "step": 17737 }, { "epoch": 0.3713053671606799, "grad_norm": 0.2716732323169708, "learning_rate": 0.0001925571564202534, "loss": 11.6862, "step": 17738 }, { "epoch": 0.37132629992464206, "grad_norm": 0.2458890825510025, "learning_rate": 0.00019255632636494446, "loss": 11.6678, "step": 17739 }, { "epoch": 0.3713472326886042, "grad_norm": 0.4223184287548065, "learning_rate": 0.00019255549626514187, "loss": 11.6763, "step": 17740 }, { "epoch": 0.37136816545256635, "grad_norm": 0.3313099145889282, "learning_rate": 0.00019255466612084598, "loss": 11.6916, "step": 17741 }, { "epoch": 0.3713890982165285, "grad_norm": 0.4517689347267151, "learning_rate": 0.0001925538359320572, "loss": 11.6767, "step": 17742 }, { "epoch": 0.3714100309804907, "grad_norm": 0.2837156653404236, "learning_rate": 0.00019255300569877596, "loss": 11.6482, "step": 17743 }, { "epoch": 0.37143096374445284, "grad_norm": 0.22279037535190582, "learning_rate": 0.00019255217542100264, "loss": 11.6661, "step": 17744 }, { "epoch": 0.371451896508415, "grad_norm": 0.2671840786933899, "learning_rate": 0.0001925513450987376, "loss": 11.6757, "step": 17745 }, { "epoch": 0.3714728292723771, "grad_norm": 0.26038897037506104, "learning_rate": 0.00019255051473198129, "loss": 11.6832, "step": 17746 }, { "epoch": 0.37149376203633927, "grad_norm": 0.26522448658943176, "learning_rate": 0.0001925496843207341, "loss": 11.6763, "step": 17747 }, { "epoch": 0.3715146948003014, "grad_norm": 0.26304149627685547, "learning_rate": 0.00019254885386499638, "loss": 11.6625, "step": 17748 }, { "epoch": 0.37153562756426356, "grad_norm": 0.5665823817253113, "learning_rate": 0.00019254802336476863, "loss": 11.5897, "step": 17749 }, { "epoch": 0.37155656032822576, "grad_norm": 0.26416313648223877, "learning_rate": 0.00019254719282005116, "loss": 11.676, "step": 17750 }, { "epoch": 0.3715774930921879, "grad_norm": 0.3210226893424988, "learning_rate": 0.00019254636223084442, "loss": 11.6895, "step": 17751 }, { "epoch": 0.37159842585615005, "grad_norm": 0.24915826320648193, "learning_rate": 0.00019254553159714875, "loss": 11.6763, "step": 17752 }, { "epoch": 0.3716193586201122, "grad_norm": 0.301840603351593, "learning_rate": 0.0001925447009189646, "loss": 11.6726, "step": 17753 }, { "epoch": 0.37164029138407434, "grad_norm": 0.26791536808013916, "learning_rate": 0.0001925438701962924, "loss": 11.687, "step": 17754 }, { "epoch": 0.3716612241480365, "grad_norm": 0.27645057439804077, "learning_rate": 0.00019254303942913247, "loss": 11.6596, "step": 17755 }, { "epoch": 0.3716821569119987, "grad_norm": 0.34006375074386597, "learning_rate": 0.00019254220861748527, "loss": 11.6791, "step": 17756 }, { "epoch": 0.37170308967596083, "grad_norm": 0.21710698306560516, "learning_rate": 0.00019254137776135116, "loss": 11.6762, "step": 17757 }, { "epoch": 0.371724022439923, "grad_norm": 0.2975142002105713, "learning_rate": 0.00019254054686073055, "loss": 11.6541, "step": 17758 }, { "epoch": 0.3717449552038851, "grad_norm": 0.3136174976825714, "learning_rate": 0.00019253971591562383, "loss": 11.6805, "step": 17759 }, { "epoch": 0.37176588796784726, "grad_norm": 0.30980727076530457, "learning_rate": 0.00019253888492603145, "loss": 11.6689, "step": 17760 }, { "epoch": 0.3717868207318094, "grad_norm": 0.28843092918395996, "learning_rate": 0.00019253805389195377, "loss": 11.6828, "step": 17761 }, { "epoch": 0.3718077534957716, "grad_norm": 0.23456580936908722, "learning_rate": 0.00019253722281339117, "loss": 11.6741, "step": 17762 }, { "epoch": 0.37182868625973375, "grad_norm": 0.3052934408187866, "learning_rate": 0.00019253639169034407, "loss": 11.6737, "step": 17763 }, { "epoch": 0.3718496190236959, "grad_norm": 0.31338179111480713, "learning_rate": 0.0001925355605228129, "loss": 11.691, "step": 17764 }, { "epoch": 0.37187055178765804, "grad_norm": 0.2788873612880707, "learning_rate": 0.00019253472931079802, "loss": 11.6745, "step": 17765 }, { "epoch": 0.3718914845516202, "grad_norm": 0.29840654134750366, "learning_rate": 0.0001925338980542998, "loss": 11.6805, "step": 17766 }, { "epoch": 0.37191241731558233, "grad_norm": 0.539242148399353, "learning_rate": 0.00019253306675331874, "loss": 11.6987, "step": 17767 }, { "epoch": 0.3719333500795445, "grad_norm": 0.2669370472431183, "learning_rate": 0.00019253223540785516, "loss": 11.6742, "step": 17768 }, { "epoch": 0.3719542828435067, "grad_norm": 0.264845073223114, "learning_rate": 0.00019253140401790948, "loss": 11.6827, "step": 17769 }, { "epoch": 0.3719752156074688, "grad_norm": 0.3436167538166046, "learning_rate": 0.0001925305725834821, "loss": 11.6643, "step": 17770 }, { "epoch": 0.37199614837143097, "grad_norm": 0.270455002784729, "learning_rate": 0.0001925297411045734, "loss": 11.6665, "step": 17771 }, { "epoch": 0.3720170811353931, "grad_norm": 0.2673594057559967, "learning_rate": 0.00019252890958118383, "loss": 11.6698, "step": 17772 }, { "epoch": 0.37203801389935526, "grad_norm": 0.319570928812027, "learning_rate": 0.00019252807801331377, "loss": 11.6784, "step": 17773 }, { "epoch": 0.3720589466633174, "grad_norm": 0.26096105575561523, "learning_rate": 0.00019252724640096358, "loss": 11.6564, "step": 17774 }, { "epoch": 0.3720798794272796, "grad_norm": 0.2584330141544342, "learning_rate": 0.0001925264147441337, "loss": 11.6589, "step": 17775 }, { "epoch": 0.37210081219124175, "grad_norm": 0.2950060963630676, "learning_rate": 0.00019252558304282447, "loss": 11.6653, "step": 17776 }, { "epoch": 0.3721217449552039, "grad_norm": 0.34396833181381226, "learning_rate": 0.0001925247512970364, "loss": 11.6885, "step": 17777 }, { "epoch": 0.37214267771916604, "grad_norm": 0.26217955350875854, "learning_rate": 0.0001925239195067698, "loss": 11.6702, "step": 17778 }, { "epoch": 0.3721636104831282, "grad_norm": 0.2505326271057129, "learning_rate": 0.0001925230876720251, "loss": 11.6603, "step": 17779 }, { "epoch": 0.3721845432470903, "grad_norm": 0.3123444616794586, "learning_rate": 0.00019252225579280272, "loss": 11.6676, "step": 17780 }, { "epoch": 0.37220547601105247, "grad_norm": 0.39042502641677856, "learning_rate": 0.000192521423869103, "loss": 11.6661, "step": 17781 }, { "epoch": 0.37222640877501467, "grad_norm": 0.30394092202186584, "learning_rate": 0.0001925205919009264, "loss": 11.6731, "step": 17782 }, { "epoch": 0.3722473415389768, "grad_norm": 0.3139370381832123, "learning_rate": 0.0001925197598882733, "loss": 11.6826, "step": 17783 }, { "epoch": 0.37226827430293896, "grad_norm": 0.27239659428596497, "learning_rate": 0.00019251892783114408, "loss": 11.675, "step": 17784 }, { "epoch": 0.3722892070669011, "grad_norm": 0.3009944260120392, "learning_rate": 0.00019251809572953918, "loss": 11.6801, "step": 17785 }, { "epoch": 0.37231013983086325, "grad_norm": 3.5741240978240967, "learning_rate": 0.00019251726358345897, "loss": 11.6802, "step": 17786 }, { "epoch": 0.3723310725948254, "grad_norm": 0.36416560411453247, "learning_rate": 0.00019251643139290385, "loss": 11.6754, "step": 17787 }, { "epoch": 0.3723520053587876, "grad_norm": 0.3336300551891327, "learning_rate": 0.00019251559915787426, "loss": 11.6533, "step": 17788 }, { "epoch": 0.37237293812274974, "grad_norm": 0.29392755031585693, "learning_rate": 0.0001925147668783705, "loss": 11.6501, "step": 17789 }, { "epoch": 0.3723938708867119, "grad_norm": 0.2917764186859131, "learning_rate": 0.0001925139345543931, "loss": 11.6581, "step": 17790 }, { "epoch": 0.37241480365067403, "grad_norm": 0.2920847237110138, "learning_rate": 0.00019251310218594236, "loss": 11.6626, "step": 17791 }, { "epoch": 0.3724357364146362, "grad_norm": 0.3165396451950073, "learning_rate": 0.00019251226977301875, "loss": 11.6664, "step": 17792 }, { "epoch": 0.3724566691785983, "grad_norm": 0.2848545014858246, "learning_rate": 0.0001925114373156226, "loss": 11.6729, "step": 17793 }, { "epoch": 0.3724776019425605, "grad_norm": 0.3193669617176056, "learning_rate": 0.00019251060481375438, "loss": 11.6535, "step": 17794 }, { "epoch": 0.37249853470652267, "grad_norm": 0.4108915328979492, "learning_rate": 0.00019250977226741443, "loss": 11.6844, "step": 17795 }, { "epoch": 0.3725194674704848, "grad_norm": 0.3455093204975128, "learning_rate": 0.0001925089396766032, "loss": 11.699, "step": 17796 }, { "epoch": 0.37254040023444696, "grad_norm": 0.3481331169605255, "learning_rate": 0.00019250810704132105, "loss": 11.675, "step": 17797 }, { "epoch": 0.3725613329984091, "grad_norm": 0.2751075029373169, "learning_rate": 0.00019250727436156842, "loss": 11.6787, "step": 17798 }, { "epoch": 0.37258226576237125, "grad_norm": 0.24136926233768463, "learning_rate": 0.0001925064416373457, "loss": 11.6672, "step": 17799 }, { "epoch": 0.3726031985263334, "grad_norm": 0.29109054803848267, "learning_rate": 0.00019250560886865326, "loss": 11.6675, "step": 17800 }, { "epoch": 0.3726241312902956, "grad_norm": 0.2892839014530182, "learning_rate": 0.00019250477605549152, "loss": 11.6662, "step": 17801 }, { "epoch": 0.37264506405425774, "grad_norm": 0.3410956561565399, "learning_rate": 0.0001925039431978609, "loss": 11.6659, "step": 17802 }, { "epoch": 0.3726659968182199, "grad_norm": 0.3450641632080078, "learning_rate": 0.00019250311029576174, "loss": 11.6886, "step": 17803 }, { "epoch": 0.372686929582182, "grad_norm": 0.2071351259946823, "learning_rate": 0.0001925022773491945, "loss": 11.6788, "step": 17804 }, { "epoch": 0.37270786234614417, "grad_norm": 0.2952772378921509, "learning_rate": 0.00019250144435815957, "loss": 11.6701, "step": 17805 }, { "epoch": 0.3727287951101063, "grad_norm": 0.24554482102394104, "learning_rate": 0.00019250061132265733, "loss": 11.6729, "step": 17806 }, { "epoch": 0.3727497278740685, "grad_norm": 0.2804144024848938, "learning_rate": 0.00019249977824268818, "loss": 11.6747, "step": 17807 }, { "epoch": 0.37277066063803066, "grad_norm": 0.24844145774841309, "learning_rate": 0.00019249894511825258, "loss": 11.6776, "step": 17808 }, { "epoch": 0.3727915934019928, "grad_norm": 0.36279296875, "learning_rate": 0.00019249811194935084, "loss": 11.6826, "step": 17809 }, { "epoch": 0.37281252616595495, "grad_norm": 0.30695730447769165, "learning_rate": 0.00019249727873598343, "loss": 11.6691, "step": 17810 }, { "epoch": 0.3728334589299171, "grad_norm": 0.2706131637096405, "learning_rate": 0.00019249644547815071, "loss": 11.6634, "step": 17811 }, { "epoch": 0.37285439169387924, "grad_norm": 0.2436893880367279, "learning_rate": 0.0001924956121758531, "loss": 11.6639, "step": 17812 }, { "epoch": 0.37287532445784144, "grad_norm": 0.3022702932357788, "learning_rate": 0.00019249477882909099, "loss": 11.679, "step": 17813 }, { "epoch": 0.3728962572218036, "grad_norm": 0.24898430705070496, "learning_rate": 0.00019249394543786478, "loss": 11.6763, "step": 17814 }, { "epoch": 0.37291718998576573, "grad_norm": 0.38189586997032166, "learning_rate": 0.0001924931120021749, "loss": 11.6692, "step": 17815 }, { "epoch": 0.3729381227497279, "grad_norm": 0.31963014602661133, "learning_rate": 0.00019249227852202172, "loss": 11.6628, "step": 17816 }, { "epoch": 0.37295905551369, "grad_norm": 0.31336939334869385, "learning_rate": 0.00019249144499740566, "loss": 11.6732, "step": 17817 }, { "epoch": 0.37297998827765216, "grad_norm": 0.3077103793621063, "learning_rate": 0.00019249061142832707, "loss": 11.6585, "step": 17818 }, { "epoch": 0.3730009210416143, "grad_norm": 0.2816663384437561, "learning_rate": 0.0001924897778147864, "loss": 11.6631, "step": 17819 }, { "epoch": 0.3730218538055765, "grad_norm": 0.3069625198841095, "learning_rate": 0.00019248894415678408, "loss": 11.6705, "step": 17820 }, { "epoch": 0.37304278656953865, "grad_norm": 0.3322453796863556, "learning_rate": 0.00019248811045432043, "loss": 11.6746, "step": 17821 }, { "epoch": 0.3730637193335008, "grad_norm": 0.3018379211425781, "learning_rate": 0.0001924872767073959, "loss": 11.6896, "step": 17822 }, { "epoch": 0.37308465209746294, "grad_norm": 0.38952136039733887, "learning_rate": 0.00019248644291601087, "loss": 11.6797, "step": 17823 }, { "epoch": 0.3731055848614251, "grad_norm": 0.2705842852592468, "learning_rate": 0.0001924856090801658, "loss": 11.6952, "step": 17824 }, { "epoch": 0.37312651762538723, "grad_norm": 0.2229498028755188, "learning_rate": 0.00019248477519986102, "loss": 11.66, "step": 17825 }, { "epoch": 0.37314745038934943, "grad_norm": 0.27940917015075684, "learning_rate": 0.00019248394127509695, "loss": 11.6726, "step": 17826 }, { "epoch": 0.3731683831533116, "grad_norm": 0.24630284309387207, "learning_rate": 0.00019248310730587398, "loss": 11.6572, "step": 17827 }, { "epoch": 0.3731893159172737, "grad_norm": 0.242630273103714, "learning_rate": 0.00019248227329219254, "loss": 11.6644, "step": 17828 }, { "epoch": 0.37321024868123587, "grad_norm": 0.23487086594104767, "learning_rate": 0.00019248143923405304, "loss": 11.6803, "step": 17829 }, { "epoch": 0.373231181445198, "grad_norm": 0.3277173638343811, "learning_rate": 0.00019248060513145582, "loss": 11.6676, "step": 17830 }, { "epoch": 0.37325211420916016, "grad_norm": 0.48313555121421814, "learning_rate": 0.00019247977098440133, "loss": 11.6922, "step": 17831 }, { "epoch": 0.37327304697312236, "grad_norm": 0.33566251397132874, "learning_rate": 0.00019247893679289, "loss": 11.658, "step": 17832 }, { "epoch": 0.3732939797370845, "grad_norm": 0.30122581124305725, "learning_rate": 0.00019247810255692212, "loss": 11.6651, "step": 17833 }, { "epoch": 0.37331491250104665, "grad_norm": 0.34028536081314087, "learning_rate": 0.00019247726827649825, "loss": 11.6564, "step": 17834 }, { "epoch": 0.3733358452650088, "grad_norm": 0.23106235265731812, "learning_rate": 0.00019247643395161866, "loss": 11.6735, "step": 17835 }, { "epoch": 0.37335677802897094, "grad_norm": 0.2517492175102234, "learning_rate": 0.00019247559958228378, "loss": 11.6905, "step": 17836 }, { "epoch": 0.3733777107929331, "grad_norm": 0.36108964681625366, "learning_rate": 0.00019247476516849404, "loss": 11.6934, "step": 17837 }, { "epoch": 0.3733986435568952, "grad_norm": 0.29417523741722107, "learning_rate": 0.0001924739307102498, "loss": 11.6746, "step": 17838 }, { "epoch": 0.3734195763208574, "grad_norm": 0.319936066865921, "learning_rate": 0.00019247309620755153, "loss": 11.6731, "step": 17839 }, { "epoch": 0.37344050908481957, "grad_norm": 0.33511418104171753, "learning_rate": 0.00019247226166039957, "loss": 11.6809, "step": 17840 }, { "epoch": 0.3734614418487817, "grad_norm": 0.4209447503089905, "learning_rate": 0.00019247142706879436, "loss": 11.6893, "step": 17841 }, { "epoch": 0.37348237461274386, "grad_norm": 0.31603744626045227, "learning_rate": 0.00019247059243273626, "loss": 11.6582, "step": 17842 }, { "epoch": 0.373503307376706, "grad_norm": 0.28854435682296753, "learning_rate": 0.00019246975775222567, "loss": 11.6703, "step": 17843 }, { "epoch": 0.37352424014066815, "grad_norm": 0.2642650008201599, "learning_rate": 0.00019246892302726308, "loss": 11.6761, "step": 17844 }, { "epoch": 0.37354517290463035, "grad_norm": 0.27653175592422485, "learning_rate": 0.00019246808825784878, "loss": 11.6604, "step": 17845 }, { "epoch": 0.3735661056685925, "grad_norm": 0.2488936483860016, "learning_rate": 0.0001924672534439832, "loss": 11.6666, "step": 17846 }, { "epoch": 0.37358703843255464, "grad_norm": 0.2798581123352051, "learning_rate": 0.00019246641858566683, "loss": 11.6765, "step": 17847 }, { "epoch": 0.3736079711965168, "grad_norm": 0.26042380928993225, "learning_rate": 0.00019246558368289995, "loss": 11.6818, "step": 17848 }, { "epoch": 0.37362890396047893, "grad_norm": 0.2777658998966217, "learning_rate": 0.000192464748735683, "loss": 11.6641, "step": 17849 }, { "epoch": 0.3736498367244411, "grad_norm": 0.3445165753364563, "learning_rate": 0.00019246391374401644, "loss": 11.6656, "step": 17850 }, { "epoch": 0.3736707694884033, "grad_norm": 0.43221476674079895, "learning_rate": 0.00019246307870790058, "loss": 11.6818, "step": 17851 }, { "epoch": 0.3736917022523654, "grad_norm": 0.29372912645339966, "learning_rate": 0.00019246224362733588, "loss": 11.6767, "step": 17852 }, { "epoch": 0.37371263501632757, "grad_norm": 0.28308621048927307, "learning_rate": 0.00019246140850232273, "loss": 11.6824, "step": 17853 }, { "epoch": 0.3737335677802897, "grad_norm": 0.34425580501556396, "learning_rate": 0.0001924605733328615, "loss": 11.6614, "step": 17854 }, { "epoch": 0.37375450054425186, "grad_norm": 0.2749102711677551, "learning_rate": 0.00019245973811895267, "loss": 11.672, "step": 17855 }, { "epoch": 0.373775433308214, "grad_norm": 0.3542536795139313, "learning_rate": 0.00019245890286059654, "loss": 11.668, "step": 17856 }, { "epoch": 0.37379636607217614, "grad_norm": 0.33319684863090515, "learning_rate": 0.00019245806755779357, "loss": 11.6753, "step": 17857 }, { "epoch": 0.37381729883613835, "grad_norm": 0.29380103945732117, "learning_rate": 0.0001924572322105442, "loss": 11.6631, "step": 17858 }, { "epoch": 0.3738382316001005, "grad_norm": 0.2623545229434967, "learning_rate": 0.00019245639681884872, "loss": 11.6738, "step": 17859 }, { "epoch": 0.37385916436406263, "grad_norm": 0.24203407764434814, "learning_rate": 0.00019245556138270764, "loss": 11.6788, "step": 17860 }, { "epoch": 0.3738800971280248, "grad_norm": 0.30243945121765137, "learning_rate": 0.00019245472590212131, "loss": 11.643, "step": 17861 }, { "epoch": 0.3739010298919869, "grad_norm": 0.30011653900146484, "learning_rate": 0.00019245389037709013, "loss": 11.6735, "step": 17862 }, { "epoch": 0.37392196265594907, "grad_norm": 0.3043409585952759, "learning_rate": 0.00019245305480761454, "loss": 11.6647, "step": 17863 }, { "epoch": 0.37394289541991127, "grad_norm": 0.2828151285648346, "learning_rate": 0.0001924522191936949, "loss": 11.6659, "step": 17864 }, { "epoch": 0.3739638281838734, "grad_norm": 0.249469593167305, "learning_rate": 0.00019245138353533162, "loss": 11.6711, "step": 17865 }, { "epoch": 0.37398476094783556, "grad_norm": 0.2736554443836212, "learning_rate": 0.0001924505478325251, "loss": 11.6713, "step": 17866 }, { "epoch": 0.3740056937117977, "grad_norm": 0.27214959263801575, "learning_rate": 0.00019244971208527575, "loss": 11.6793, "step": 17867 }, { "epoch": 0.37402662647575985, "grad_norm": 0.343568354845047, "learning_rate": 0.00019244887629358399, "loss": 11.6713, "step": 17868 }, { "epoch": 0.374047559239722, "grad_norm": 0.2855481505393982, "learning_rate": 0.0001924480404574502, "loss": 11.6713, "step": 17869 }, { "epoch": 0.37406849200368414, "grad_norm": 0.27086305618286133, "learning_rate": 0.00019244720457687478, "loss": 11.6628, "step": 17870 }, { "epoch": 0.37408942476764634, "grad_norm": 0.31779375672340393, "learning_rate": 0.0001924463686518581, "loss": 11.6727, "step": 17871 }, { "epoch": 0.3741103575316085, "grad_norm": 0.2444971650838852, "learning_rate": 0.00019244553268240065, "loss": 11.6655, "step": 17872 }, { "epoch": 0.37413129029557063, "grad_norm": 0.30919042229652405, "learning_rate": 0.00019244469666850276, "loss": 11.6647, "step": 17873 }, { "epoch": 0.3741522230595328, "grad_norm": 0.311626672744751, "learning_rate": 0.00019244386061016485, "loss": 11.6602, "step": 17874 }, { "epoch": 0.3741731558234949, "grad_norm": 0.2960524559020996, "learning_rate": 0.00019244302450738732, "loss": 11.6801, "step": 17875 }, { "epoch": 0.37419408858745706, "grad_norm": 0.29841670393943787, "learning_rate": 0.00019244218836017059, "loss": 11.6856, "step": 17876 }, { "epoch": 0.37421502135141926, "grad_norm": 0.3494950532913208, "learning_rate": 0.000192441352168515, "loss": 11.6818, "step": 17877 }, { "epoch": 0.3742359541153814, "grad_norm": 0.31374916434288025, "learning_rate": 0.00019244051593242105, "loss": 11.6697, "step": 17878 }, { "epoch": 0.37425688687934355, "grad_norm": 0.30663958191871643, "learning_rate": 0.00019243967965188908, "loss": 11.6692, "step": 17879 }, { "epoch": 0.3742778196433057, "grad_norm": 0.3047415614128113, "learning_rate": 0.00019243884332691952, "loss": 11.6549, "step": 17880 }, { "epoch": 0.37429875240726784, "grad_norm": 0.25746333599090576, "learning_rate": 0.00019243800695751273, "loss": 11.6937, "step": 17881 }, { "epoch": 0.37431968517123, "grad_norm": 0.25079190731048584, "learning_rate": 0.00019243717054366914, "loss": 11.676, "step": 17882 }, { "epoch": 0.3743406179351922, "grad_norm": 0.22777831554412842, "learning_rate": 0.00019243633408538914, "loss": 11.6671, "step": 17883 }, { "epoch": 0.37436155069915433, "grad_norm": 0.2720332443714142, "learning_rate": 0.00019243549758267317, "loss": 11.6726, "step": 17884 }, { "epoch": 0.3743824834631165, "grad_norm": 0.2338872253894806, "learning_rate": 0.0001924346610355216, "loss": 11.6726, "step": 17885 }, { "epoch": 0.3744034162270786, "grad_norm": 0.35056376457214355, "learning_rate": 0.0001924338244439348, "loss": 11.6724, "step": 17886 }, { "epoch": 0.37442434899104077, "grad_norm": 0.27544406056404114, "learning_rate": 0.00019243298780791327, "loss": 11.6783, "step": 17887 }, { "epoch": 0.3744452817550029, "grad_norm": 0.23076438903808594, "learning_rate": 0.00019243215112745729, "loss": 11.6831, "step": 17888 }, { "epoch": 0.37446621451896506, "grad_norm": 0.2915836274623871, "learning_rate": 0.00019243131440256734, "loss": 11.6591, "step": 17889 }, { "epoch": 0.37448714728292726, "grad_norm": 0.2760770320892334, "learning_rate": 0.00019243047763324383, "loss": 11.668, "step": 17890 }, { "epoch": 0.3745080800468894, "grad_norm": 0.21644873917102814, "learning_rate": 0.00019242964081948714, "loss": 11.6615, "step": 17891 }, { "epoch": 0.37452901281085155, "grad_norm": 0.29034239053726196, "learning_rate": 0.00019242880396129762, "loss": 11.6747, "step": 17892 }, { "epoch": 0.3745499455748137, "grad_norm": 0.28518062829971313, "learning_rate": 0.00019242796705867577, "loss": 11.6747, "step": 17893 }, { "epoch": 0.37457087833877584, "grad_norm": 0.31227052211761475, "learning_rate": 0.00019242713011162192, "loss": 11.6875, "step": 17894 }, { "epoch": 0.374591811102738, "grad_norm": 0.3041297495365143, "learning_rate": 0.0001924262931201365, "loss": 11.6841, "step": 17895 }, { "epoch": 0.3746127438667002, "grad_norm": 0.2834971249103546, "learning_rate": 0.0001924254560842199, "loss": 11.673, "step": 17896 }, { "epoch": 0.3746336766306623, "grad_norm": 0.3349420130252838, "learning_rate": 0.00019242461900387253, "loss": 11.6696, "step": 17897 }, { "epoch": 0.37465460939462447, "grad_norm": 0.24582059681415558, "learning_rate": 0.00019242378187909483, "loss": 11.672, "step": 17898 }, { "epoch": 0.3746755421585866, "grad_norm": 0.28408682346343994, "learning_rate": 0.00019242294470988712, "loss": 11.6721, "step": 17899 }, { "epoch": 0.37469647492254876, "grad_norm": 0.26625534892082214, "learning_rate": 0.00019242210749624987, "loss": 11.6654, "step": 17900 }, { "epoch": 0.3747174076865109, "grad_norm": 0.300697922706604, "learning_rate": 0.00019242127023818347, "loss": 11.6752, "step": 17901 }, { "epoch": 0.3747383404504731, "grad_norm": 0.3793579339981079, "learning_rate": 0.00019242043293568828, "loss": 11.6785, "step": 17902 }, { "epoch": 0.37475927321443525, "grad_norm": 0.37864139676094055, "learning_rate": 0.00019241959558876476, "loss": 11.6848, "step": 17903 }, { "epoch": 0.3747802059783974, "grad_norm": 0.35445377230644226, "learning_rate": 0.00019241875819741327, "loss": 11.6714, "step": 17904 }, { "epoch": 0.37480113874235954, "grad_norm": 0.2928647994995117, "learning_rate": 0.00019241792076163427, "loss": 11.6647, "step": 17905 }, { "epoch": 0.3748220715063217, "grad_norm": 0.3652775287628174, "learning_rate": 0.0001924170832814281, "loss": 11.7009, "step": 17906 }, { "epoch": 0.37484300427028383, "grad_norm": 0.2783944308757782, "learning_rate": 0.0001924162457567952, "loss": 11.6729, "step": 17907 }, { "epoch": 0.374863937034246, "grad_norm": 0.3117271065711975, "learning_rate": 0.00019241540818773592, "loss": 11.6911, "step": 17908 }, { "epoch": 0.3748848697982082, "grad_norm": 0.38099968433380127, "learning_rate": 0.00019241457057425073, "loss": 11.6752, "step": 17909 }, { "epoch": 0.3749058025621703, "grad_norm": 0.27751755714416504, "learning_rate": 0.00019241373291634, "loss": 11.6691, "step": 17910 }, { "epoch": 0.37492673532613247, "grad_norm": 0.3863565921783447, "learning_rate": 0.00019241289521400415, "loss": 11.6833, "step": 17911 }, { "epoch": 0.3749476680900946, "grad_norm": 0.30416515469551086, "learning_rate": 0.00019241205746724354, "loss": 11.6873, "step": 17912 }, { "epoch": 0.37496860085405676, "grad_norm": 0.2541596293449402, "learning_rate": 0.00019241121967605862, "loss": 11.672, "step": 17913 }, { "epoch": 0.3749895336180189, "grad_norm": 0.23907038569450378, "learning_rate": 0.00019241038184044978, "loss": 11.6731, "step": 17914 }, { "epoch": 0.3750104663819811, "grad_norm": 0.379793256521225, "learning_rate": 0.0001924095439604174, "loss": 11.6501, "step": 17915 }, { "epoch": 0.37503139914594324, "grad_norm": 0.3149658739566803, "learning_rate": 0.00019240870603596193, "loss": 11.6644, "step": 17916 }, { "epoch": 0.3750523319099054, "grad_norm": 0.24893997609615326, "learning_rate": 0.0001924078680670837, "loss": 11.671, "step": 17917 }, { "epoch": 0.37507326467386753, "grad_norm": 0.3269589841365814, "learning_rate": 0.0001924070300537832, "loss": 11.7022, "step": 17918 }, { "epoch": 0.3750941974378297, "grad_norm": 0.2493656426668167, "learning_rate": 0.00019240619199606076, "loss": 11.6631, "step": 17919 }, { "epoch": 0.3751151302017918, "grad_norm": 0.25711899995803833, "learning_rate": 0.00019240535389391684, "loss": 11.6693, "step": 17920 }, { "epoch": 0.375136062965754, "grad_norm": 0.2698284685611725, "learning_rate": 0.00019240451574735178, "loss": 11.6685, "step": 17921 }, { "epoch": 0.37515699572971617, "grad_norm": 0.36585649847984314, "learning_rate": 0.000192403677556366, "loss": 11.6684, "step": 17922 }, { "epoch": 0.3751779284936783, "grad_norm": 0.2645397186279297, "learning_rate": 0.00019240283932095997, "loss": 11.6913, "step": 17923 }, { "epoch": 0.37519886125764046, "grad_norm": 0.27532005310058594, "learning_rate": 0.00019240200104113405, "loss": 11.6603, "step": 17924 }, { "epoch": 0.3752197940216026, "grad_norm": 0.25747495889663696, "learning_rate": 0.0001924011627168886, "loss": 11.6602, "step": 17925 }, { "epoch": 0.37524072678556475, "grad_norm": 0.28799471259117126, "learning_rate": 0.00019240032434822405, "loss": 11.6726, "step": 17926 }, { "epoch": 0.3752616595495269, "grad_norm": 0.2511344850063324, "learning_rate": 0.00019239948593514084, "loss": 11.6678, "step": 17927 }, { "epoch": 0.3752825923134891, "grad_norm": 0.2607882022857666, "learning_rate": 0.00019239864747763936, "loss": 11.6903, "step": 17928 }, { "epoch": 0.37530352507745124, "grad_norm": 0.2563789188861847, "learning_rate": 0.00019239780897571996, "loss": 11.6796, "step": 17929 }, { "epoch": 0.3753244578414134, "grad_norm": 0.4096130132675171, "learning_rate": 0.0001923969704293831, "loss": 11.6807, "step": 17930 }, { "epoch": 0.37534539060537553, "grad_norm": 0.2879255414009094, "learning_rate": 0.00019239613183862916, "loss": 11.6635, "step": 17931 }, { "epoch": 0.3753663233693377, "grad_norm": 0.2938454747200012, "learning_rate": 0.00019239529320345855, "loss": 11.6562, "step": 17932 }, { "epoch": 0.3753872561332998, "grad_norm": 0.28457558155059814, "learning_rate": 0.0001923944545238717, "loss": 11.6675, "step": 17933 }, { "epoch": 0.375408188897262, "grad_norm": 0.264627069234848, "learning_rate": 0.00019239361579986893, "loss": 11.6713, "step": 17934 }, { "epoch": 0.37542912166122416, "grad_norm": 0.28270137310028076, "learning_rate": 0.00019239277703145074, "loss": 11.6907, "step": 17935 }, { "epoch": 0.3754500544251863, "grad_norm": 0.21206262707710266, "learning_rate": 0.00019239193821861744, "loss": 11.6855, "step": 17936 }, { "epoch": 0.37547098718914845, "grad_norm": 0.2711130380630493, "learning_rate": 0.00019239109936136954, "loss": 11.6779, "step": 17937 }, { "epoch": 0.3754919199531106, "grad_norm": 0.3430188298225403, "learning_rate": 0.00019239026045970737, "loss": 11.6748, "step": 17938 }, { "epoch": 0.37551285271707274, "grad_norm": 0.3575153946876526, "learning_rate": 0.00019238942151363135, "loss": 11.6626, "step": 17939 }, { "epoch": 0.37553378548103494, "grad_norm": 0.2987390160560608, "learning_rate": 0.00019238858252314188, "loss": 11.6795, "step": 17940 }, { "epoch": 0.3755547182449971, "grad_norm": 0.29533132910728455, "learning_rate": 0.00019238774348823937, "loss": 11.6799, "step": 17941 }, { "epoch": 0.37557565100895923, "grad_norm": 0.24249562621116638, "learning_rate": 0.0001923869044089242, "loss": 11.6728, "step": 17942 }, { "epoch": 0.3755965837729214, "grad_norm": 0.39510414004325867, "learning_rate": 0.0001923860652851968, "loss": 11.6773, "step": 17943 }, { "epoch": 0.3756175165368835, "grad_norm": 0.30705809593200684, "learning_rate": 0.0001923852261170576, "loss": 11.693, "step": 17944 }, { "epoch": 0.37563844930084567, "grad_norm": 0.2847978472709656, "learning_rate": 0.00019238438690450692, "loss": 11.6797, "step": 17945 }, { "epoch": 0.3756593820648078, "grad_norm": 0.2932620346546173, "learning_rate": 0.00019238354764754525, "loss": 11.6808, "step": 17946 }, { "epoch": 0.37568031482877, "grad_norm": 0.26223939657211304, "learning_rate": 0.00019238270834617295, "loss": 11.6802, "step": 17947 }, { "epoch": 0.37570124759273216, "grad_norm": 0.2880858778953552, "learning_rate": 0.00019238186900039043, "loss": 11.669, "step": 17948 }, { "epoch": 0.3757221803566943, "grad_norm": 0.30498653650283813, "learning_rate": 0.00019238102961019807, "loss": 11.6687, "step": 17949 }, { "epoch": 0.37574311312065645, "grad_norm": 0.232888862490654, "learning_rate": 0.00019238019017559635, "loss": 11.6836, "step": 17950 }, { "epoch": 0.3757640458846186, "grad_norm": 0.3830397129058838, "learning_rate": 0.0001923793506965856, "loss": 11.6697, "step": 17951 }, { "epoch": 0.37578497864858074, "grad_norm": 0.2190600484609604, "learning_rate": 0.00019237851117316623, "loss": 11.6701, "step": 17952 }, { "epoch": 0.37580591141254294, "grad_norm": 0.23016682267189026, "learning_rate": 0.00019237767160533865, "loss": 11.6626, "step": 17953 }, { "epoch": 0.3758268441765051, "grad_norm": 0.2523225247859955, "learning_rate": 0.0001923768319931033, "loss": 11.6693, "step": 17954 }, { "epoch": 0.3758477769404672, "grad_norm": 0.27306827902793884, "learning_rate": 0.00019237599233646054, "loss": 11.6829, "step": 17955 }, { "epoch": 0.37586870970442937, "grad_norm": 0.27276840806007385, "learning_rate": 0.0001923751526354108, "loss": 11.6745, "step": 17956 }, { "epoch": 0.3758896424683915, "grad_norm": 0.3125632703304291, "learning_rate": 0.00019237431288995444, "loss": 11.6669, "step": 17957 }, { "epoch": 0.37591057523235366, "grad_norm": 0.3813784420490265, "learning_rate": 0.00019237347310009195, "loss": 11.6661, "step": 17958 }, { "epoch": 0.37593150799631586, "grad_norm": 0.2564711272716522, "learning_rate": 0.00019237263326582363, "loss": 11.6594, "step": 17959 }, { "epoch": 0.375952440760278, "grad_norm": 0.2962082326412201, "learning_rate": 0.00019237179338714997, "loss": 11.6809, "step": 17960 }, { "epoch": 0.37597337352424015, "grad_norm": 0.21488401293754578, "learning_rate": 0.00019237095346407132, "loss": 11.6741, "step": 17961 }, { "epoch": 0.3759943062882023, "grad_norm": 0.28523480892181396, "learning_rate": 0.0001923701134965881, "loss": 11.7003, "step": 17962 }, { "epoch": 0.37601523905216444, "grad_norm": 0.3025227189064026, "learning_rate": 0.00019236927348470072, "loss": 11.6704, "step": 17963 }, { "epoch": 0.3760361718161266, "grad_norm": 0.4111042022705078, "learning_rate": 0.0001923684334284096, "loss": 11.6825, "step": 17964 }, { "epoch": 0.37605710458008873, "grad_norm": 0.24752970039844513, "learning_rate": 0.0001923675933277151, "loss": 11.6805, "step": 17965 }, { "epoch": 0.37607803734405093, "grad_norm": 0.32257139682769775, "learning_rate": 0.00019236675318261764, "loss": 11.6803, "step": 17966 }, { "epoch": 0.3760989701080131, "grad_norm": 0.313630074262619, "learning_rate": 0.00019236591299311764, "loss": 11.6653, "step": 17967 }, { "epoch": 0.3761199028719752, "grad_norm": 0.31755828857421875, "learning_rate": 0.0001923650727592155, "loss": 11.6683, "step": 17968 }, { "epoch": 0.37614083563593737, "grad_norm": 0.2666832208633423, "learning_rate": 0.0001923642324809116, "loss": 11.6691, "step": 17969 }, { "epoch": 0.3761617683998995, "grad_norm": 0.3740176260471344, "learning_rate": 0.00019236339215820638, "loss": 11.6849, "step": 17970 }, { "epoch": 0.37618270116386165, "grad_norm": 0.29774829745292664, "learning_rate": 0.0001923625517911002, "loss": 11.6808, "step": 17971 }, { "epoch": 0.37620363392782386, "grad_norm": 0.31791290640830994, "learning_rate": 0.00019236171137959352, "loss": 11.6806, "step": 17972 }, { "epoch": 0.376224566691786, "grad_norm": 0.2344653308391571, "learning_rate": 0.00019236087092368672, "loss": 11.6707, "step": 17973 }, { "epoch": 0.37624549945574814, "grad_norm": 0.2698647975921631, "learning_rate": 0.00019236003042338019, "loss": 11.6669, "step": 17974 }, { "epoch": 0.3762664322197103, "grad_norm": 0.2676132619380951, "learning_rate": 0.00019235918987867433, "loss": 11.6676, "step": 17975 }, { "epoch": 0.37628736498367243, "grad_norm": 0.24895623326301575, "learning_rate": 0.00019235834928956958, "loss": 11.6692, "step": 17976 }, { "epoch": 0.3763082977476346, "grad_norm": 0.2754514813423157, "learning_rate": 0.00019235750865606628, "loss": 11.6693, "step": 17977 }, { "epoch": 0.3763292305115967, "grad_norm": 0.2888352870941162, "learning_rate": 0.0001923566679781649, "loss": 11.6785, "step": 17978 }, { "epoch": 0.3763501632755589, "grad_norm": 0.257558137178421, "learning_rate": 0.00019235582725586582, "loss": 11.6762, "step": 17979 }, { "epoch": 0.37637109603952107, "grad_norm": 0.35190141201019287, "learning_rate": 0.00019235498648916945, "loss": 11.6911, "step": 17980 }, { "epoch": 0.3763920288034832, "grad_norm": 0.29233235120773315, "learning_rate": 0.0001923541456780762, "loss": 11.6772, "step": 17981 }, { "epoch": 0.37641296156744536, "grad_norm": 0.26963555812835693, "learning_rate": 0.00019235330482258642, "loss": 11.674, "step": 17982 }, { "epoch": 0.3764338943314075, "grad_norm": 0.28559795022010803, "learning_rate": 0.00019235246392270057, "loss": 11.6667, "step": 17983 }, { "epoch": 0.37645482709536965, "grad_norm": 0.2637408971786499, "learning_rate": 0.00019235162297841904, "loss": 11.6701, "step": 17984 }, { "epoch": 0.37647575985933185, "grad_norm": 0.3184886872768402, "learning_rate": 0.00019235078198974223, "loss": 11.6982, "step": 17985 }, { "epoch": 0.376496692623294, "grad_norm": 0.28972092270851135, "learning_rate": 0.00019234994095667057, "loss": 11.6784, "step": 17986 }, { "epoch": 0.37651762538725614, "grad_norm": 0.2996479570865631, "learning_rate": 0.00019234909987920443, "loss": 11.6816, "step": 17987 }, { "epoch": 0.3765385581512183, "grad_norm": 0.37924158573150635, "learning_rate": 0.00019234825875734423, "loss": 11.6751, "step": 17988 }, { "epoch": 0.37655949091518043, "grad_norm": 0.30699223279953003, "learning_rate": 0.00019234741759109036, "loss": 11.6786, "step": 17989 }, { "epoch": 0.3765804236791426, "grad_norm": 0.29999101161956787, "learning_rate": 0.00019234657638044324, "loss": 11.6762, "step": 17990 }, { "epoch": 0.3766013564431048, "grad_norm": 0.26692572236061096, "learning_rate": 0.0001923457351254033, "loss": 11.6896, "step": 17991 }, { "epoch": 0.3766222892070669, "grad_norm": 0.3020786643028259, "learning_rate": 0.00019234489382597087, "loss": 11.6756, "step": 17992 }, { "epoch": 0.37664322197102906, "grad_norm": 0.25238561630249023, "learning_rate": 0.00019234405248214645, "loss": 11.6729, "step": 17993 }, { "epoch": 0.3766641547349912, "grad_norm": 0.2744004726409912, "learning_rate": 0.00019234321109393035, "loss": 11.6564, "step": 17994 }, { "epoch": 0.37668508749895335, "grad_norm": 0.23795956373214722, "learning_rate": 0.000192342369661323, "loss": 11.6568, "step": 17995 }, { "epoch": 0.3767060202629155, "grad_norm": 0.26840364933013916, "learning_rate": 0.00019234152818432487, "loss": 11.6769, "step": 17996 }, { "epoch": 0.37672695302687764, "grad_norm": 0.2880818843841553, "learning_rate": 0.0001923406866629363, "loss": 11.666, "step": 17997 }, { "epoch": 0.37674788579083984, "grad_norm": 0.34121719002723694, "learning_rate": 0.0001923398450971577, "loss": 11.6588, "step": 17998 }, { "epoch": 0.376768818554802, "grad_norm": 0.25856077671051025, "learning_rate": 0.0001923390034869895, "loss": 11.6868, "step": 17999 }, { "epoch": 0.37678975131876413, "grad_norm": 0.31327110528945923, "learning_rate": 0.00019233816183243207, "loss": 11.6676, "step": 18000 }, { "epoch": 0.37678975131876413, "eval_loss": 11.67434024810791, "eval_runtime": 34.3348, "eval_samples_per_second": 27.989, "eval_steps_per_second": 7.019, "step": 18000 }, { "epoch": 0.3768106840827263, "grad_norm": 0.2639787793159485, "learning_rate": 0.00019233732013348585, "loss": 11.6687, "step": 18001 }, { "epoch": 0.3768316168466884, "grad_norm": 0.23706059157848358, "learning_rate": 0.00019233647839015126, "loss": 11.6822, "step": 18002 }, { "epoch": 0.37685254961065057, "grad_norm": 0.24676980078220367, "learning_rate": 0.00019233563660242864, "loss": 11.6795, "step": 18003 }, { "epoch": 0.37687348237461277, "grad_norm": 0.28305187821388245, "learning_rate": 0.0001923347947703184, "loss": 11.6835, "step": 18004 }, { "epoch": 0.3768944151385749, "grad_norm": 0.2572258412837982, "learning_rate": 0.00019233395289382102, "loss": 11.6805, "step": 18005 }, { "epoch": 0.37691534790253706, "grad_norm": 0.2451065480709076, "learning_rate": 0.00019233311097293686, "loss": 11.6657, "step": 18006 }, { "epoch": 0.3769362806664992, "grad_norm": 0.2922796607017517, "learning_rate": 0.00019233226900766632, "loss": 11.6744, "step": 18007 }, { "epoch": 0.37695721343046135, "grad_norm": 0.2664565145969391, "learning_rate": 0.0001923314269980098, "loss": 11.6558, "step": 18008 }, { "epoch": 0.3769781461944235, "grad_norm": 0.31835824251174927, "learning_rate": 0.0001923305849439677, "loss": 11.6677, "step": 18009 }, { "epoch": 0.3769990789583857, "grad_norm": 0.28284889459609985, "learning_rate": 0.00019232974284554043, "loss": 11.6579, "step": 18010 }, { "epoch": 0.37702001172234784, "grad_norm": 0.3104635775089264, "learning_rate": 0.00019232890070272843, "loss": 11.6859, "step": 18011 }, { "epoch": 0.37704094448631, "grad_norm": 0.323759526014328, "learning_rate": 0.0001923280585155321, "loss": 11.6674, "step": 18012 }, { "epoch": 0.3770618772502721, "grad_norm": 0.2884000837802887, "learning_rate": 0.00019232721628395177, "loss": 11.649, "step": 18013 }, { "epoch": 0.37708281001423427, "grad_norm": 0.2935855984687805, "learning_rate": 0.00019232637400798793, "loss": 11.6791, "step": 18014 }, { "epoch": 0.3771037427781964, "grad_norm": 0.3494332432746887, "learning_rate": 0.0001923255316876409, "loss": 11.6792, "step": 18015 }, { "epoch": 0.37712467554215856, "grad_norm": 0.31187665462493896, "learning_rate": 0.00019232468932291118, "loss": 11.6841, "step": 18016 }, { "epoch": 0.37714560830612076, "grad_norm": 0.39420485496520996, "learning_rate": 0.00019232384691379912, "loss": 11.6822, "step": 18017 }, { "epoch": 0.3771665410700829, "grad_norm": 0.3273184597492218, "learning_rate": 0.00019232300446030517, "loss": 11.67, "step": 18018 }, { "epoch": 0.37718747383404505, "grad_norm": 0.29598239064216614, "learning_rate": 0.00019232216196242967, "loss": 11.6752, "step": 18019 }, { "epoch": 0.3772084065980072, "grad_norm": 0.2800162434577942, "learning_rate": 0.00019232131942017303, "loss": 11.6629, "step": 18020 }, { "epoch": 0.37722933936196934, "grad_norm": 0.2524474859237671, "learning_rate": 0.00019232047683353574, "loss": 11.6714, "step": 18021 }, { "epoch": 0.3772502721259315, "grad_norm": 0.2741273045539856, "learning_rate": 0.0001923196342025181, "loss": 11.6624, "step": 18022 }, { "epoch": 0.3772712048898937, "grad_norm": 0.24311503767967224, "learning_rate": 0.0001923187915271206, "loss": 11.6681, "step": 18023 }, { "epoch": 0.37729213765385583, "grad_norm": 0.2467663437128067, "learning_rate": 0.00019231794880734357, "loss": 11.6885, "step": 18024 }, { "epoch": 0.377313070417818, "grad_norm": 0.26947078108787537, "learning_rate": 0.00019231710604318748, "loss": 11.6853, "step": 18025 }, { "epoch": 0.3773340031817801, "grad_norm": 0.32478901743888855, "learning_rate": 0.0001923162632346527, "loss": 11.6641, "step": 18026 }, { "epoch": 0.37735493594574226, "grad_norm": 0.36410248279571533, "learning_rate": 0.00019231542038173965, "loss": 11.7009, "step": 18027 }, { "epoch": 0.3773758687097044, "grad_norm": 0.2888876497745514, "learning_rate": 0.00019231457748444873, "loss": 11.6787, "step": 18028 }, { "epoch": 0.3773968014736666, "grad_norm": 0.4733099937438965, "learning_rate": 0.0001923137345427803, "loss": 11.686, "step": 18029 }, { "epoch": 0.37741773423762875, "grad_norm": 0.28614383935928345, "learning_rate": 0.00019231289155673484, "loss": 11.6755, "step": 18030 }, { "epoch": 0.3774386670015909, "grad_norm": 0.21174563467502594, "learning_rate": 0.00019231204852631273, "loss": 11.6644, "step": 18031 }, { "epoch": 0.37745959976555304, "grad_norm": 0.2726001739501953, "learning_rate": 0.00019231120545151436, "loss": 11.6852, "step": 18032 }, { "epoch": 0.3774805325295152, "grad_norm": 0.3525823652744293, "learning_rate": 0.0001923103623323401, "loss": 11.6748, "step": 18033 }, { "epoch": 0.37750146529347733, "grad_norm": 0.28873518109321594, "learning_rate": 0.00019230951916879047, "loss": 11.6688, "step": 18034 }, { "epoch": 0.3775223980574395, "grad_norm": 0.29543599486351013, "learning_rate": 0.0001923086759608658, "loss": 11.6689, "step": 18035 }, { "epoch": 0.3775433308214017, "grad_norm": 0.3025944232940674, "learning_rate": 0.00019230783270856648, "loss": 11.678, "step": 18036 }, { "epoch": 0.3775642635853638, "grad_norm": 0.21497713029384613, "learning_rate": 0.0001923069894118929, "loss": 11.6875, "step": 18037 }, { "epoch": 0.37758519634932597, "grad_norm": 0.4069598913192749, "learning_rate": 0.00019230614607084555, "loss": 11.6772, "step": 18038 }, { "epoch": 0.3776061291132881, "grad_norm": 0.2621471583843231, "learning_rate": 0.00019230530268542475, "loss": 11.6706, "step": 18039 }, { "epoch": 0.37762706187725026, "grad_norm": 0.27131983637809753, "learning_rate": 0.00019230445925563094, "loss": 11.6762, "step": 18040 }, { "epoch": 0.3776479946412124, "grad_norm": 0.24750575423240662, "learning_rate": 0.00019230361578146456, "loss": 11.6622, "step": 18041 }, { "epoch": 0.3776689274051746, "grad_norm": 0.3091298043727875, "learning_rate": 0.00019230277226292596, "loss": 11.6827, "step": 18042 }, { "epoch": 0.37768986016913675, "grad_norm": 0.31058359146118164, "learning_rate": 0.00019230192870001557, "loss": 11.6662, "step": 18043 }, { "epoch": 0.3777107929330989, "grad_norm": 0.26794567704200745, "learning_rate": 0.0001923010850927338, "loss": 11.6573, "step": 18044 }, { "epoch": 0.37773172569706104, "grad_norm": 0.27084779739379883, "learning_rate": 0.00019230024144108106, "loss": 11.6596, "step": 18045 }, { "epoch": 0.3777526584610232, "grad_norm": 0.3260512351989746, "learning_rate": 0.00019229939774505772, "loss": 11.673, "step": 18046 }, { "epoch": 0.37777359122498533, "grad_norm": 0.2347078025341034, "learning_rate": 0.00019229855400466423, "loss": 11.6581, "step": 18047 }, { "epoch": 0.37779452398894753, "grad_norm": 0.2668546736240387, "learning_rate": 0.00019229771021990095, "loss": 11.674, "step": 18048 }, { "epoch": 0.3778154567529097, "grad_norm": 0.33357274532318115, "learning_rate": 0.00019229686639076832, "loss": 11.6801, "step": 18049 }, { "epoch": 0.3778363895168718, "grad_norm": 0.33642706274986267, "learning_rate": 0.00019229602251726675, "loss": 11.6878, "step": 18050 }, { "epoch": 0.37785732228083396, "grad_norm": 0.3050215542316437, "learning_rate": 0.00019229517859939663, "loss": 11.6764, "step": 18051 }, { "epoch": 0.3778782550447961, "grad_norm": 0.2578217685222626, "learning_rate": 0.00019229433463715837, "loss": 11.6856, "step": 18052 }, { "epoch": 0.37789918780875825, "grad_norm": 0.27899378538131714, "learning_rate": 0.00019229349063055236, "loss": 11.671, "step": 18053 }, { "epoch": 0.3779201205727204, "grad_norm": 0.29606109857559204, "learning_rate": 0.00019229264657957902, "loss": 11.664, "step": 18054 }, { "epoch": 0.3779410533366826, "grad_norm": 0.30579257011413574, "learning_rate": 0.00019229180248423876, "loss": 11.6744, "step": 18055 }, { "epoch": 0.37796198610064474, "grad_norm": 0.2679774761199951, "learning_rate": 0.000192290958344532, "loss": 11.668, "step": 18056 }, { "epoch": 0.3779829188646069, "grad_norm": 0.2594255805015564, "learning_rate": 0.00019229011416045907, "loss": 11.6659, "step": 18057 }, { "epoch": 0.37800385162856903, "grad_norm": 0.2587157189846039, "learning_rate": 0.0001922892699320205, "loss": 11.6735, "step": 18058 }, { "epoch": 0.3780247843925312, "grad_norm": 0.2976538836956024, "learning_rate": 0.0001922884256592166, "loss": 11.6824, "step": 18059 }, { "epoch": 0.3780457171564933, "grad_norm": 0.3022438585758209, "learning_rate": 0.0001922875813420478, "loss": 11.6812, "step": 18060 }, { "epoch": 0.3780666499204555, "grad_norm": 0.35717734694480896, "learning_rate": 0.0001922867369805145, "loss": 11.6689, "step": 18061 }, { "epoch": 0.37808758268441767, "grad_norm": 0.24636210501194, "learning_rate": 0.00019228589257461713, "loss": 11.6705, "step": 18062 }, { "epoch": 0.3781085154483798, "grad_norm": 0.23304253816604614, "learning_rate": 0.0001922850481243561, "loss": 11.6716, "step": 18063 }, { "epoch": 0.37812944821234196, "grad_norm": 0.2822917699813843, "learning_rate": 0.0001922842036297318, "loss": 11.6876, "step": 18064 }, { "epoch": 0.3781503809763041, "grad_norm": 0.28195783495903015, "learning_rate": 0.0001922833590907446, "loss": 11.6647, "step": 18065 }, { "epoch": 0.37817131374026625, "grad_norm": 0.40102675557136536, "learning_rate": 0.00019228251450739494, "loss": 11.673, "step": 18066 }, { "epoch": 0.3781922465042284, "grad_norm": 0.33246099948883057, "learning_rate": 0.00019228166987968323, "loss": 11.683, "step": 18067 }, { "epoch": 0.3782131792681906, "grad_norm": 0.3195124864578247, "learning_rate": 0.0001922808252076099, "loss": 11.6844, "step": 18068 }, { "epoch": 0.37823411203215274, "grad_norm": 0.3433782756328583, "learning_rate": 0.0001922799804911753, "loss": 11.6899, "step": 18069 }, { "epoch": 0.3782550447961149, "grad_norm": 0.2953408360481262, "learning_rate": 0.00019227913573037988, "loss": 11.6881, "step": 18070 }, { "epoch": 0.378275977560077, "grad_norm": 0.33549538254737854, "learning_rate": 0.000192278290925224, "loss": 11.6836, "step": 18071 }, { "epoch": 0.37829691032403917, "grad_norm": 0.30524682998657227, "learning_rate": 0.00019227744607570814, "loss": 11.6611, "step": 18072 }, { "epoch": 0.3783178430880013, "grad_norm": 0.2785753905773163, "learning_rate": 0.00019227660118183263, "loss": 11.679, "step": 18073 }, { "epoch": 0.3783387758519635, "grad_norm": 0.24909207224845886, "learning_rate": 0.00019227575624359793, "loss": 11.6794, "step": 18074 }, { "epoch": 0.37835970861592566, "grad_norm": 0.31479525566101074, "learning_rate": 0.0001922749112610044, "loss": 11.6818, "step": 18075 }, { "epoch": 0.3783806413798878, "grad_norm": 0.27288973331451416, "learning_rate": 0.0001922740662340525, "loss": 11.663, "step": 18076 }, { "epoch": 0.37840157414384995, "grad_norm": 0.27137959003448486, "learning_rate": 0.0001922732211627426, "loss": 11.6805, "step": 18077 }, { "epoch": 0.3784225069078121, "grad_norm": 0.2960326373577118, "learning_rate": 0.0001922723760470751, "loss": 11.6627, "step": 18078 }, { "epoch": 0.37844343967177424, "grad_norm": 0.2908734679222107, "learning_rate": 0.00019227153088705043, "loss": 11.6691, "step": 18079 }, { "epoch": 0.37846437243573644, "grad_norm": 0.276556134223938, "learning_rate": 0.00019227068568266898, "loss": 11.6753, "step": 18080 }, { "epoch": 0.3784853051996986, "grad_norm": 0.3280506730079651, "learning_rate": 0.00019226984043393117, "loss": 11.653, "step": 18081 }, { "epoch": 0.37850623796366073, "grad_norm": 0.30517512559890747, "learning_rate": 0.0001922689951408374, "loss": 11.6765, "step": 18082 }, { "epoch": 0.3785271707276229, "grad_norm": 0.3180367350578308, "learning_rate": 0.0001922681498033881, "loss": 11.6765, "step": 18083 }, { "epoch": 0.378548103491585, "grad_norm": 0.2844317853450775, "learning_rate": 0.00019226730442158357, "loss": 11.6818, "step": 18084 }, { "epoch": 0.37856903625554716, "grad_norm": 0.33167487382888794, "learning_rate": 0.00019226645899542434, "loss": 11.673, "step": 18085 }, { "epoch": 0.3785899690195093, "grad_norm": 0.2984837293624878, "learning_rate": 0.00019226561352491078, "loss": 11.6731, "step": 18086 }, { "epoch": 0.3786109017834715, "grad_norm": 0.32490071654319763, "learning_rate": 0.00019226476801004333, "loss": 11.6786, "step": 18087 }, { "epoch": 0.37863183454743365, "grad_norm": 0.23371869325637817, "learning_rate": 0.0001922639224508223, "loss": 11.677, "step": 18088 }, { "epoch": 0.3786527673113958, "grad_norm": 0.3200162351131439, "learning_rate": 0.00019226307684724818, "loss": 11.6652, "step": 18089 }, { "epoch": 0.37867370007535794, "grad_norm": 0.2688886523246765, "learning_rate": 0.00019226223119932134, "loss": 11.6696, "step": 18090 }, { "epoch": 0.3786946328393201, "grad_norm": 0.3171798884868622, "learning_rate": 0.0001922613855070422, "loss": 11.679, "step": 18091 }, { "epoch": 0.37871556560328223, "grad_norm": 0.2826230525970459, "learning_rate": 0.00019226053977041115, "loss": 11.6722, "step": 18092 }, { "epoch": 0.37873649836724443, "grad_norm": 0.32635363936424255, "learning_rate": 0.00019225969398942866, "loss": 11.6795, "step": 18093 }, { "epoch": 0.3787574311312066, "grad_norm": 0.35164275765419006, "learning_rate": 0.00019225884816409503, "loss": 11.6889, "step": 18094 }, { "epoch": 0.3787783638951687, "grad_norm": 0.22111919522285461, "learning_rate": 0.00019225800229441073, "loss": 11.6798, "step": 18095 }, { "epoch": 0.37879929665913087, "grad_norm": 0.32319512963294983, "learning_rate": 0.0001922571563803762, "loss": 11.6873, "step": 18096 }, { "epoch": 0.378820229423093, "grad_norm": 0.2303595095872879, "learning_rate": 0.00019225631042199174, "loss": 11.6642, "step": 18097 }, { "epoch": 0.37884116218705516, "grad_norm": 0.24916952848434448, "learning_rate": 0.0001922554644192579, "loss": 11.6772, "step": 18098 }, { "epoch": 0.37886209495101736, "grad_norm": 0.2982698678970337, "learning_rate": 0.00019225461837217496, "loss": 11.6803, "step": 18099 }, { "epoch": 0.3788830277149795, "grad_norm": 0.29239457845687866, "learning_rate": 0.00019225377228074337, "loss": 11.6728, "step": 18100 }, { "epoch": 0.37890396047894165, "grad_norm": 0.269674152135849, "learning_rate": 0.00019225292614496355, "loss": 11.6669, "step": 18101 }, { "epoch": 0.3789248932429038, "grad_norm": 0.35402193665504456, "learning_rate": 0.00019225207996483593, "loss": 11.6771, "step": 18102 }, { "epoch": 0.37894582600686594, "grad_norm": 0.25548598170280457, "learning_rate": 0.00019225123374036086, "loss": 11.6798, "step": 18103 }, { "epoch": 0.3789667587708281, "grad_norm": 0.23319534957408905, "learning_rate": 0.00019225038747153875, "loss": 11.6589, "step": 18104 }, { "epoch": 0.3789876915347902, "grad_norm": 0.3343031704425812, "learning_rate": 0.00019224954115837006, "loss": 11.6706, "step": 18105 }, { "epoch": 0.37900862429875243, "grad_norm": 0.2675975561141968, "learning_rate": 0.00019224869480085518, "loss": 11.6713, "step": 18106 }, { "epoch": 0.3790295570627146, "grad_norm": 0.2857496738433838, "learning_rate": 0.00019224784839899447, "loss": 11.6537, "step": 18107 }, { "epoch": 0.3790504898266767, "grad_norm": 0.2375916689634323, "learning_rate": 0.0001922470019527884, "loss": 11.6723, "step": 18108 }, { "epoch": 0.37907142259063886, "grad_norm": 0.23354484140872955, "learning_rate": 0.00019224615546223735, "loss": 11.6709, "step": 18109 }, { "epoch": 0.379092355354601, "grad_norm": 0.3365464210510254, "learning_rate": 0.0001922453089273417, "loss": 11.6697, "step": 18110 }, { "epoch": 0.37911328811856315, "grad_norm": 0.26889941096305847, "learning_rate": 0.00019224446234810189, "loss": 11.6514, "step": 18111 }, { "epoch": 0.37913422088252535, "grad_norm": 0.23719218373298645, "learning_rate": 0.0001922436157245183, "loss": 11.6633, "step": 18112 }, { "epoch": 0.3791551536464875, "grad_norm": 0.3784976303577423, "learning_rate": 0.0001922427690565914, "loss": 11.6754, "step": 18113 }, { "epoch": 0.37917608641044964, "grad_norm": 0.36801716685295105, "learning_rate": 0.00019224192234432153, "loss": 11.6737, "step": 18114 }, { "epoch": 0.3791970191744118, "grad_norm": 0.32900193333625793, "learning_rate": 0.00019224107558770908, "loss": 11.6646, "step": 18115 }, { "epoch": 0.37921795193837393, "grad_norm": 0.24194832146167755, "learning_rate": 0.00019224022878675456, "loss": 11.6715, "step": 18116 }, { "epoch": 0.3792388847023361, "grad_norm": 0.32520705461502075, "learning_rate": 0.0001922393819414583, "loss": 11.6733, "step": 18117 }, { "epoch": 0.3792598174662983, "grad_norm": 0.4168156385421753, "learning_rate": 0.0001922385350518207, "loss": 11.6765, "step": 18118 }, { "epoch": 0.3792807502302604, "grad_norm": 0.2864810824394226, "learning_rate": 0.0001922376881178422, "loss": 11.671, "step": 18119 }, { "epoch": 0.37930168299422257, "grad_norm": 0.3743269145488739, "learning_rate": 0.0001922368411395232, "loss": 11.6915, "step": 18120 }, { "epoch": 0.3793226157581847, "grad_norm": 0.2598487436771393, "learning_rate": 0.00019223599411686407, "loss": 11.6611, "step": 18121 }, { "epoch": 0.37934354852214686, "grad_norm": 0.28806453943252563, "learning_rate": 0.00019223514704986528, "loss": 11.6795, "step": 18122 }, { "epoch": 0.379364481286109, "grad_norm": 0.255776584148407, "learning_rate": 0.0001922342999385272, "loss": 11.6676, "step": 18123 }, { "epoch": 0.37938541405007115, "grad_norm": 0.29479020833969116, "learning_rate": 0.00019223345278285024, "loss": 11.6708, "step": 18124 }, { "epoch": 0.37940634681403335, "grad_norm": 0.2593539357185364, "learning_rate": 0.00019223260558283483, "loss": 11.6656, "step": 18125 }, { "epoch": 0.3794272795779955, "grad_norm": 0.2754042148590088, "learning_rate": 0.0001922317583384813, "loss": 11.6842, "step": 18126 }, { "epoch": 0.37944821234195764, "grad_norm": 0.33686771988868713, "learning_rate": 0.00019223091104979018, "loss": 11.6713, "step": 18127 }, { "epoch": 0.3794691451059198, "grad_norm": 0.33100590109825134, "learning_rate": 0.0001922300637167618, "loss": 11.677, "step": 18128 }, { "epoch": 0.3794900778698819, "grad_norm": 0.3040931522846222, "learning_rate": 0.00019222921633939654, "loss": 11.6764, "step": 18129 }, { "epoch": 0.37951101063384407, "grad_norm": 0.26830437779426575, "learning_rate": 0.00019222836891769489, "loss": 11.6793, "step": 18130 }, { "epoch": 0.37953194339780627, "grad_norm": 0.24724052846431732, "learning_rate": 0.00019222752145165722, "loss": 11.6855, "step": 18131 }, { "epoch": 0.3795528761617684, "grad_norm": 0.3051457107067108, "learning_rate": 0.0001922266739412839, "loss": 11.6695, "step": 18132 }, { "epoch": 0.37957380892573056, "grad_norm": 0.2401677668094635, "learning_rate": 0.00019222582638657535, "loss": 11.684, "step": 18133 }, { "epoch": 0.3795947416896927, "grad_norm": 0.21111756563186646, "learning_rate": 0.00019222497878753204, "loss": 11.6646, "step": 18134 }, { "epoch": 0.37961567445365485, "grad_norm": 0.30028265714645386, "learning_rate": 0.00019222413114415433, "loss": 11.6716, "step": 18135 }, { "epoch": 0.379636607217617, "grad_norm": 0.25958195328712463, "learning_rate": 0.00019222328345644264, "loss": 11.6764, "step": 18136 }, { "epoch": 0.3796575399815792, "grad_norm": 0.2560279667377472, "learning_rate": 0.00019222243572439735, "loss": 11.6723, "step": 18137 }, { "epoch": 0.37967847274554134, "grad_norm": 0.25420552492141724, "learning_rate": 0.00019222158794801887, "loss": 11.6702, "step": 18138 }, { "epoch": 0.3796994055095035, "grad_norm": 0.3548143804073334, "learning_rate": 0.00019222074012730767, "loss": 11.6697, "step": 18139 }, { "epoch": 0.37972033827346563, "grad_norm": 0.2673226594924927, "learning_rate": 0.00019221989226226408, "loss": 11.6696, "step": 18140 }, { "epoch": 0.3797412710374278, "grad_norm": 0.23589597642421722, "learning_rate": 0.00019221904435288856, "loss": 11.6691, "step": 18141 }, { "epoch": 0.3797622038013899, "grad_norm": 0.2451375275850296, "learning_rate": 0.00019221819639918147, "loss": 11.6555, "step": 18142 }, { "epoch": 0.37978313656535206, "grad_norm": 0.38410183787345886, "learning_rate": 0.00019221734840114328, "loss": 11.6665, "step": 18143 }, { "epoch": 0.37980406932931426, "grad_norm": 0.2934235632419586, "learning_rate": 0.00019221650035877432, "loss": 11.6669, "step": 18144 }, { "epoch": 0.3798250020932764, "grad_norm": 0.30729344487190247, "learning_rate": 0.00019221565227207509, "loss": 11.6841, "step": 18145 }, { "epoch": 0.37984593485723855, "grad_norm": 0.2717798948287964, "learning_rate": 0.00019221480414104592, "loss": 11.6787, "step": 18146 }, { "epoch": 0.3798668676212007, "grad_norm": 0.24541229009628296, "learning_rate": 0.00019221395596568725, "loss": 11.6684, "step": 18147 }, { "epoch": 0.37988780038516284, "grad_norm": 0.23612216114997864, "learning_rate": 0.00019221310774599948, "loss": 11.6667, "step": 18148 }, { "epoch": 0.379908733149125, "grad_norm": 0.31096556782722473, "learning_rate": 0.00019221225948198303, "loss": 11.6761, "step": 18149 }, { "epoch": 0.3799296659130872, "grad_norm": 0.28563541173934937, "learning_rate": 0.00019221141117363826, "loss": 11.6717, "step": 18150 }, { "epoch": 0.37995059867704933, "grad_norm": 0.3109777867794037, "learning_rate": 0.00019221056282096565, "loss": 11.6694, "step": 18151 }, { "epoch": 0.3799715314410115, "grad_norm": 0.23242390155792236, "learning_rate": 0.00019220971442396559, "loss": 11.6627, "step": 18152 }, { "epoch": 0.3799924642049736, "grad_norm": 0.28317707777023315, "learning_rate": 0.00019220886598263846, "loss": 11.6599, "step": 18153 }, { "epoch": 0.38001339696893577, "grad_norm": 0.2987602651119232, "learning_rate": 0.00019220801749698466, "loss": 11.672, "step": 18154 }, { "epoch": 0.3800343297328979, "grad_norm": 0.287335067987442, "learning_rate": 0.00019220716896700462, "loss": 11.6657, "step": 18155 }, { "epoch": 0.38005526249686006, "grad_norm": 0.22566251456737518, "learning_rate": 0.00019220632039269878, "loss": 11.6561, "step": 18156 }, { "epoch": 0.38007619526082226, "grad_norm": 0.2618602514266968, "learning_rate": 0.00019220547177406746, "loss": 11.6636, "step": 18157 }, { "epoch": 0.3800971280247844, "grad_norm": 0.3272906243801117, "learning_rate": 0.00019220462311111118, "loss": 11.6667, "step": 18158 }, { "epoch": 0.38011806078874655, "grad_norm": 0.31141701340675354, "learning_rate": 0.00019220377440383026, "loss": 11.6386, "step": 18159 }, { "epoch": 0.3801389935527087, "grad_norm": 0.24023424088954926, "learning_rate": 0.00019220292565222513, "loss": 11.6679, "step": 18160 }, { "epoch": 0.38015992631667084, "grad_norm": 0.25119492411613464, "learning_rate": 0.00019220207685629624, "loss": 11.6554, "step": 18161 }, { "epoch": 0.380180859080633, "grad_norm": 0.2843497097492218, "learning_rate": 0.00019220122801604395, "loss": 11.6837, "step": 18162 }, { "epoch": 0.3802017918445952, "grad_norm": 0.24520325660705566, "learning_rate": 0.00019220037913146866, "loss": 11.6674, "step": 18163 }, { "epoch": 0.3802227246085573, "grad_norm": 0.3088889718055725, "learning_rate": 0.00019219953020257083, "loss": 11.6839, "step": 18164 }, { "epoch": 0.3802436573725195, "grad_norm": 0.26593324542045593, "learning_rate": 0.0001921986812293508, "loss": 11.6693, "step": 18165 }, { "epoch": 0.3802645901364816, "grad_norm": 0.23458650708198547, "learning_rate": 0.00019219783221180906, "loss": 11.6845, "step": 18166 }, { "epoch": 0.38028552290044376, "grad_norm": 0.2472279816865921, "learning_rate": 0.00019219698314994595, "loss": 11.69, "step": 18167 }, { "epoch": 0.3803064556644059, "grad_norm": 0.254300594329834, "learning_rate": 0.00019219613404376188, "loss": 11.6723, "step": 18168 }, { "epoch": 0.3803273884283681, "grad_norm": 0.28825703263282776, "learning_rate": 0.00019219528489325733, "loss": 11.6698, "step": 18169 }, { "epoch": 0.38034832119233025, "grad_norm": 0.3062630891799927, "learning_rate": 0.0001921944356984326, "loss": 11.6667, "step": 18170 }, { "epoch": 0.3803692539562924, "grad_norm": 0.25847378373146057, "learning_rate": 0.00019219358645928823, "loss": 11.6713, "step": 18171 }, { "epoch": 0.38039018672025454, "grad_norm": 0.274936318397522, "learning_rate": 0.0001921927371758245, "loss": 11.6786, "step": 18172 }, { "epoch": 0.3804111194842167, "grad_norm": 0.22787627577781677, "learning_rate": 0.0001921918878480419, "loss": 11.667, "step": 18173 }, { "epoch": 0.38043205224817883, "grad_norm": 0.3405802249908447, "learning_rate": 0.0001921910384759408, "loss": 11.6855, "step": 18174 }, { "epoch": 0.380452985012141, "grad_norm": 0.26599302887916565, "learning_rate": 0.0001921901890595216, "loss": 11.6771, "step": 18175 }, { "epoch": 0.3804739177761032, "grad_norm": 0.26785653829574585, "learning_rate": 0.00019218933959878477, "loss": 11.6855, "step": 18176 }, { "epoch": 0.3804948505400653, "grad_norm": 0.2755815088748932, "learning_rate": 0.00019218849009373066, "loss": 11.6635, "step": 18177 }, { "epoch": 0.38051578330402747, "grad_norm": 0.27136537432670593, "learning_rate": 0.00019218764054435968, "loss": 11.6953, "step": 18178 }, { "epoch": 0.3805367160679896, "grad_norm": 0.24845673143863678, "learning_rate": 0.0001921867909506723, "loss": 11.6611, "step": 18179 }, { "epoch": 0.38055764883195176, "grad_norm": 0.341525137424469, "learning_rate": 0.00019218594131266886, "loss": 11.6768, "step": 18180 }, { "epoch": 0.3805785815959139, "grad_norm": 0.24460521340370178, "learning_rate": 0.00019218509163034976, "loss": 11.6719, "step": 18181 }, { "epoch": 0.3805995143598761, "grad_norm": 0.2572365403175354, "learning_rate": 0.00019218424190371546, "loss": 11.6724, "step": 18182 }, { "epoch": 0.38062044712383825, "grad_norm": 0.28233960270881653, "learning_rate": 0.00019218339213276635, "loss": 11.6728, "step": 18183 }, { "epoch": 0.3806413798878004, "grad_norm": 0.32036063075065613, "learning_rate": 0.0001921825423175028, "loss": 11.6628, "step": 18184 }, { "epoch": 0.38066231265176254, "grad_norm": 0.3010023832321167, "learning_rate": 0.0001921816924579253, "loss": 11.664, "step": 18185 }, { "epoch": 0.3806832454157247, "grad_norm": 0.29439929127693176, "learning_rate": 0.0001921808425540342, "loss": 11.6849, "step": 18186 }, { "epoch": 0.3807041781796868, "grad_norm": 0.2892305552959442, "learning_rate": 0.00019217999260582995, "loss": 11.6745, "step": 18187 }, { "epoch": 0.380725110943649, "grad_norm": 0.24805377423763275, "learning_rate": 0.0001921791426133129, "loss": 11.6749, "step": 18188 }, { "epoch": 0.38074604370761117, "grad_norm": 0.2763669788837433, "learning_rate": 0.0001921782925764835, "loss": 11.6754, "step": 18189 }, { "epoch": 0.3807669764715733, "grad_norm": 0.3324623107910156, "learning_rate": 0.0001921774424953421, "loss": 11.6818, "step": 18190 }, { "epoch": 0.38078790923553546, "grad_norm": 0.3288341164588928, "learning_rate": 0.00019217659236988924, "loss": 11.681, "step": 18191 }, { "epoch": 0.3808088419994976, "grad_norm": 0.2921867370605469, "learning_rate": 0.00019217574220012517, "loss": 11.6886, "step": 18192 }, { "epoch": 0.38082977476345975, "grad_norm": 0.24526049196720123, "learning_rate": 0.0001921748919860504, "loss": 11.6637, "step": 18193 }, { "epoch": 0.3808507075274219, "grad_norm": 0.3436034619808197, "learning_rate": 0.0001921740417276653, "loss": 11.6826, "step": 18194 }, { "epoch": 0.3808716402913841, "grad_norm": 0.2555503845214844, "learning_rate": 0.00019217319142497035, "loss": 11.6774, "step": 18195 }, { "epoch": 0.38089257305534624, "grad_norm": 0.261605829000473, "learning_rate": 0.00019217234107796584, "loss": 11.6823, "step": 18196 }, { "epoch": 0.3809135058193084, "grad_norm": 0.25318193435668945, "learning_rate": 0.00019217149068665227, "loss": 11.6688, "step": 18197 }, { "epoch": 0.38093443858327053, "grad_norm": 0.2746317684650421, "learning_rate": 0.00019217064025102998, "loss": 11.6774, "step": 18198 }, { "epoch": 0.3809553713472327, "grad_norm": 0.3097745180130005, "learning_rate": 0.00019216978977109944, "loss": 11.6648, "step": 18199 }, { "epoch": 0.3809763041111948, "grad_norm": 0.2930777966976166, "learning_rate": 0.00019216893924686103, "loss": 11.672, "step": 18200 }, { "epoch": 0.380997236875157, "grad_norm": 0.3256111443042755, "learning_rate": 0.00019216808867831517, "loss": 11.6792, "step": 18201 }, { "epoch": 0.38101816963911916, "grad_norm": 0.24501588940620422, "learning_rate": 0.00019216723806546226, "loss": 11.6861, "step": 18202 }, { "epoch": 0.3810391024030813, "grad_norm": 0.2576456665992737, "learning_rate": 0.0001921663874083027, "loss": 11.6661, "step": 18203 }, { "epoch": 0.38106003516704345, "grad_norm": 0.30255427956581116, "learning_rate": 0.00019216553670683693, "loss": 11.6721, "step": 18204 }, { "epoch": 0.3810809679310056, "grad_norm": 0.2650638222694397, "learning_rate": 0.00019216468596106533, "loss": 11.6717, "step": 18205 }, { "epoch": 0.38110190069496774, "grad_norm": 0.2924027144908905, "learning_rate": 0.00019216383517098833, "loss": 11.6759, "step": 18206 }, { "epoch": 0.38112283345892994, "grad_norm": 0.2724054157733917, "learning_rate": 0.00019216298433660632, "loss": 11.6627, "step": 18207 }, { "epoch": 0.3811437662228921, "grad_norm": 0.25601568818092346, "learning_rate": 0.00019216213345791972, "loss": 11.6773, "step": 18208 }, { "epoch": 0.38116469898685423, "grad_norm": 0.2518558204174042, "learning_rate": 0.0001921612825349289, "loss": 11.6745, "step": 18209 }, { "epoch": 0.3811856317508164, "grad_norm": 0.2723217010498047, "learning_rate": 0.00019216043156763436, "loss": 11.6695, "step": 18210 }, { "epoch": 0.3812065645147785, "grad_norm": 0.2772351801395416, "learning_rate": 0.00019215958055603642, "loss": 11.6886, "step": 18211 }, { "epoch": 0.38122749727874067, "grad_norm": 0.3130882680416107, "learning_rate": 0.00019215872950013554, "loss": 11.6648, "step": 18212 }, { "epoch": 0.3812484300427028, "grad_norm": 0.2602485716342926, "learning_rate": 0.0001921578783999321, "loss": 11.6811, "step": 18213 }, { "epoch": 0.381269362806665, "grad_norm": 0.4400673806667328, "learning_rate": 0.0001921570272554265, "loss": 11.6664, "step": 18214 }, { "epoch": 0.38129029557062716, "grad_norm": 0.276811420917511, "learning_rate": 0.0001921561760666192, "loss": 11.6949, "step": 18215 }, { "epoch": 0.3813112283345893, "grad_norm": 0.2773332893848419, "learning_rate": 0.00019215532483351057, "loss": 11.6631, "step": 18216 }, { "epoch": 0.38133216109855145, "grad_norm": 0.2777496874332428, "learning_rate": 0.00019215447355610103, "loss": 11.6863, "step": 18217 }, { "epoch": 0.3813530938625136, "grad_norm": 0.28994497656822205, "learning_rate": 0.00019215362223439102, "loss": 11.6806, "step": 18218 }, { "epoch": 0.38137402662647574, "grad_norm": 0.3094833791255951, "learning_rate": 0.0001921527708683809, "loss": 11.6571, "step": 18219 }, { "epoch": 0.38139495939043794, "grad_norm": 0.35277703404426575, "learning_rate": 0.00019215191945807104, "loss": 11.6623, "step": 18220 }, { "epoch": 0.3814158921544001, "grad_norm": 0.3179609775543213, "learning_rate": 0.00019215106800346196, "loss": 11.6694, "step": 18221 }, { "epoch": 0.3814368249183622, "grad_norm": 0.24102617800235748, "learning_rate": 0.000192150216504554, "loss": 11.6724, "step": 18222 }, { "epoch": 0.38145775768232437, "grad_norm": 0.27031540870666504, "learning_rate": 0.0001921493649613476, "loss": 11.677, "step": 18223 }, { "epoch": 0.3814786904462865, "grad_norm": 0.4377216696739197, "learning_rate": 0.00019214851337384311, "loss": 11.6658, "step": 18224 }, { "epoch": 0.38149962321024866, "grad_norm": 0.292521208524704, "learning_rate": 0.000192147661742041, "loss": 11.6732, "step": 18225 }, { "epoch": 0.38152055597421086, "grad_norm": 0.24513296782970428, "learning_rate": 0.0001921468100659417, "loss": 11.6573, "step": 18226 }, { "epoch": 0.381541488738173, "grad_norm": 0.346788227558136, "learning_rate": 0.00019214595834554556, "loss": 11.68, "step": 18227 }, { "epoch": 0.38156242150213515, "grad_norm": 0.2687370181083679, "learning_rate": 0.00019214510658085301, "loss": 11.6767, "step": 18228 }, { "epoch": 0.3815833542660973, "grad_norm": 0.2578287124633789, "learning_rate": 0.00019214425477186446, "loss": 11.6902, "step": 18229 }, { "epoch": 0.38160428703005944, "grad_norm": 0.3608991801738739, "learning_rate": 0.00019214340291858032, "loss": 11.6687, "step": 18230 }, { "epoch": 0.3816252197940216, "grad_norm": 0.3009984791278839, "learning_rate": 0.000192142551021001, "loss": 11.6766, "step": 18231 }, { "epoch": 0.38164615255798373, "grad_norm": 0.3099294900894165, "learning_rate": 0.00019214169907912692, "loss": 11.6568, "step": 18232 }, { "epoch": 0.38166708532194593, "grad_norm": 0.26412031054496765, "learning_rate": 0.00019214084709295848, "loss": 11.6807, "step": 18233 }, { "epoch": 0.3816880180859081, "grad_norm": 0.28106412291526794, "learning_rate": 0.00019213999506249608, "loss": 11.6796, "step": 18234 }, { "epoch": 0.3817089508498702, "grad_norm": 0.29719415307044983, "learning_rate": 0.0001921391429877401, "loss": 11.6902, "step": 18235 }, { "epoch": 0.38172988361383237, "grad_norm": 0.3013495206832886, "learning_rate": 0.000192138290868691, "loss": 11.6811, "step": 18236 }, { "epoch": 0.3817508163777945, "grad_norm": 0.2453051060438156, "learning_rate": 0.00019213743870534922, "loss": 11.6681, "step": 18237 }, { "epoch": 0.38177174914175666, "grad_norm": 0.31543639302253723, "learning_rate": 0.00019213658649771512, "loss": 11.6804, "step": 18238 }, { "epoch": 0.38179268190571886, "grad_norm": 0.28973570466041565, "learning_rate": 0.0001921357342457891, "loss": 11.6587, "step": 18239 }, { "epoch": 0.381813614669681, "grad_norm": 0.26238036155700684, "learning_rate": 0.0001921348819495716, "loss": 11.6839, "step": 18240 }, { "epoch": 0.38183454743364315, "grad_norm": 0.2820311486721039, "learning_rate": 0.00019213402960906298, "loss": 11.6664, "step": 18241 }, { "epoch": 0.3818554801976053, "grad_norm": 0.35297295451164246, "learning_rate": 0.0001921331772242637, "loss": 11.6727, "step": 18242 }, { "epoch": 0.38187641296156744, "grad_norm": 0.28352704644203186, "learning_rate": 0.00019213232479517415, "loss": 11.6913, "step": 18243 }, { "epoch": 0.3818973457255296, "grad_norm": 0.27129295468330383, "learning_rate": 0.00019213147232179477, "loss": 11.6775, "step": 18244 }, { "epoch": 0.3819182784894917, "grad_norm": 0.26901790499687195, "learning_rate": 0.00019213061980412596, "loss": 11.6871, "step": 18245 }, { "epoch": 0.3819392112534539, "grad_norm": 0.2948000431060791, "learning_rate": 0.00019212976724216808, "loss": 11.68, "step": 18246 }, { "epoch": 0.38196014401741607, "grad_norm": 0.34674426913261414, "learning_rate": 0.00019212891463592157, "loss": 11.6904, "step": 18247 }, { "epoch": 0.3819810767813782, "grad_norm": 0.36427563428878784, "learning_rate": 0.00019212806198538686, "loss": 11.6837, "step": 18248 }, { "epoch": 0.38200200954534036, "grad_norm": 0.23590686917304993, "learning_rate": 0.00019212720929056434, "loss": 11.666, "step": 18249 }, { "epoch": 0.3820229423093025, "grad_norm": 0.2813338339328766, "learning_rate": 0.00019212635655145444, "loss": 11.6772, "step": 18250 }, { "epoch": 0.38204387507326465, "grad_norm": 0.27228057384490967, "learning_rate": 0.00019212550376805754, "loss": 11.6584, "step": 18251 }, { "epoch": 0.38206480783722685, "grad_norm": 0.29217541217803955, "learning_rate": 0.00019212465094037406, "loss": 11.677, "step": 18252 }, { "epoch": 0.382085740601189, "grad_norm": 0.2618221640586853, "learning_rate": 0.0001921237980684044, "loss": 11.6674, "step": 18253 }, { "epoch": 0.38210667336515114, "grad_norm": 0.31023862957954407, "learning_rate": 0.00019212294515214904, "loss": 11.6831, "step": 18254 }, { "epoch": 0.3821276061291133, "grad_norm": 0.4072962999343872, "learning_rate": 0.00019212209219160827, "loss": 11.6932, "step": 18255 }, { "epoch": 0.38214853889307543, "grad_norm": 0.33970215916633606, "learning_rate": 0.0001921212391867826, "loss": 11.6781, "step": 18256 }, { "epoch": 0.3821694716570376, "grad_norm": 0.3448624014854431, "learning_rate": 0.0001921203861376724, "loss": 11.6971, "step": 18257 }, { "epoch": 0.3821904044209998, "grad_norm": 0.3590269982814789, "learning_rate": 0.00019211953304427808, "loss": 11.6911, "step": 18258 }, { "epoch": 0.3822113371849619, "grad_norm": 0.29711246490478516, "learning_rate": 0.00019211867990660003, "loss": 11.6659, "step": 18259 }, { "epoch": 0.38223226994892406, "grad_norm": 0.2922630310058594, "learning_rate": 0.00019211782672463873, "loss": 11.6912, "step": 18260 }, { "epoch": 0.3822532027128862, "grad_norm": 0.27859562635421753, "learning_rate": 0.00019211697349839451, "loss": 11.6587, "step": 18261 }, { "epoch": 0.38227413547684835, "grad_norm": 0.31857961416244507, "learning_rate": 0.00019211612022786785, "loss": 11.6703, "step": 18262 }, { "epoch": 0.3822950682408105, "grad_norm": 0.28431886434555054, "learning_rate": 0.00019211526691305906, "loss": 11.6723, "step": 18263 }, { "epoch": 0.38231600100477264, "grad_norm": 0.3695206344127655, "learning_rate": 0.0001921144135539687, "loss": 11.6894, "step": 18264 }, { "epoch": 0.38233693376873484, "grad_norm": 0.2944813072681427, "learning_rate": 0.00019211356015059704, "loss": 11.6949, "step": 18265 }, { "epoch": 0.382357866532697, "grad_norm": 0.2831137776374817, "learning_rate": 0.00019211270670294454, "loss": 11.6785, "step": 18266 }, { "epoch": 0.38237879929665913, "grad_norm": 0.3045814633369446, "learning_rate": 0.00019211185321101163, "loss": 11.6758, "step": 18267 }, { "epoch": 0.3823997320606213, "grad_norm": 0.281607449054718, "learning_rate": 0.00019211099967479873, "loss": 11.6727, "step": 18268 }, { "epoch": 0.3824206648245834, "grad_norm": 0.39869359135627747, "learning_rate": 0.00019211014609430622, "loss": 11.6786, "step": 18269 }, { "epoch": 0.38244159758854557, "grad_norm": 0.30204474925994873, "learning_rate": 0.0001921092924695345, "loss": 11.661, "step": 18270 }, { "epoch": 0.38246253035250777, "grad_norm": 0.3126935362815857, "learning_rate": 0.00019210843880048398, "loss": 11.6828, "step": 18271 }, { "epoch": 0.3824834631164699, "grad_norm": 0.24742166697978973, "learning_rate": 0.0001921075850871551, "loss": 11.6666, "step": 18272 }, { "epoch": 0.38250439588043206, "grad_norm": 0.3394683599472046, "learning_rate": 0.0001921067313295483, "loss": 11.6652, "step": 18273 }, { "epoch": 0.3825253286443942, "grad_norm": 0.28049197793006897, "learning_rate": 0.0001921058775276639, "loss": 11.6752, "step": 18274 }, { "epoch": 0.38254626140835635, "grad_norm": 0.2357763648033142, "learning_rate": 0.00019210502368150237, "loss": 11.665, "step": 18275 }, { "epoch": 0.3825671941723185, "grad_norm": 0.2935357987880707, "learning_rate": 0.00019210416979106412, "loss": 11.6715, "step": 18276 }, { "epoch": 0.3825881269362807, "grad_norm": 0.33350205421447754, "learning_rate": 0.00019210331585634954, "loss": 11.6838, "step": 18277 }, { "epoch": 0.38260905970024284, "grad_norm": 0.3030647933483124, "learning_rate": 0.00019210246187735907, "loss": 11.6875, "step": 18278 }, { "epoch": 0.382629992464205, "grad_norm": 0.34607070684432983, "learning_rate": 0.0001921016078540931, "loss": 11.6874, "step": 18279 }, { "epoch": 0.3826509252281671, "grad_norm": 0.2811428904533386, "learning_rate": 0.000192100753786552, "loss": 11.6785, "step": 18280 }, { "epoch": 0.38267185799212927, "grad_norm": 0.29527953267097473, "learning_rate": 0.00019209989967473626, "loss": 11.6516, "step": 18281 }, { "epoch": 0.3826927907560914, "grad_norm": 0.24882429838180542, "learning_rate": 0.00019209904551864622, "loss": 11.6663, "step": 18282 }, { "epoch": 0.38271372352005356, "grad_norm": 0.29415711760520935, "learning_rate": 0.00019209819131828235, "loss": 11.67, "step": 18283 }, { "epoch": 0.38273465628401576, "grad_norm": 0.36412808299064636, "learning_rate": 0.00019209733707364502, "loss": 11.6586, "step": 18284 }, { "epoch": 0.3827555890479779, "grad_norm": 0.3353389799594879, "learning_rate": 0.0001920964827847347, "loss": 11.6585, "step": 18285 }, { "epoch": 0.38277652181194005, "grad_norm": 0.3065551221370697, "learning_rate": 0.00019209562845155169, "loss": 11.6775, "step": 18286 }, { "epoch": 0.3827974545759022, "grad_norm": 0.25281497836112976, "learning_rate": 0.0001920947740740965, "loss": 11.6785, "step": 18287 }, { "epoch": 0.38281838733986434, "grad_norm": 0.34929922223091125, "learning_rate": 0.00019209391965236948, "loss": 11.6614, "step": 18288 }, { "epoch": 0.3828393201038265, "grad_norm": 0.38294345140457153, "learning_rate": 0.0001920930651863711, "loss": 11.6818, "step": 18289 }, { "epoch": 0.3828602528677887, "grad_norm": 0.32138752937316895, "learning_rate": 0.00019209221067610172, "loss": 11.6856, "step": 18290 }, { "epoch": 0.38288118563175083, "grad_norm": 0.2733157277107239, "learning_rate": 0.00019209135612156177, "loss": 11.6656, "step": 18291 }, { "epoch": 0.382902118395713, "grad_norm": 0.29253023862838745, "learning_rate": 0.00019209050152275164, "loss": 11.6769, "step": 18292 }, { "epoch": 0.3829230511596751, "grad_norm": 0.24296627938747406, "learning_rate": 0.00019208964687967181, "loss": 11.6882, "step": 18293 }, { "epoch": 0.38294398392363727, "grad_norm": 0.27280285954475403, "learning_rate": 0.0001920887921923226, "loss": 11.6694, "step": 18294 }, { "epoch": 0.3829649166875994, "grad_norm": 0.25386303663253784, "learning_rate": 0.0001920879374607045, "loss": 11.676, "step": 18295 }, { "epoch": 0.3829858494515616, "grad_norm": 0.2889464497566223, "learning_rate": 0.00019208708268481786, "loss": 11.6781, "step": 18296 }, { "epoch": 0.38300678221552376, "grad_norm": 0.39417266845703125, "learning_rate": 0.0001920862278646631, "loss": 11.6698, "step": 18297 }, { "epoch": 0.3830277149794859, "grad_norm": 0.29232481122016907, "learning_rate": 0.00019208537300024068, "loss": 11.6661, "step": 18298 }, { "epoch": 0.38304864774344805, "grad_norm": 0.2427581399679184, "learning_rate": 0.00019208451809155092, "loss": 11.6738, "step": 18299 }, { "epoch": 0.3830695805074102, "grad_norm": 0.2630794048309326, "learning_rate": 0.00019208366313859434, "loss": 11.6812, "step": 18300 }, { "epoch": 0.38309051327137233, "grad_norm": 0.2514550983905792, "learning_rate": 0.00019208280814137128, "loss": 11.6789, "step": 18301 }, { "epoch": 0.3831114460353345, "grad_norm": 0.30618253350257874, "learning_rate": 0.00019208195309988217, "loss": 11.6571, "step": 18302 }, { "epoch": 0.3831323787992967, "grad_norm": 0.2580065131187439, "learning_rate": 0.0001920810980141274, "loss": 11.6655, "step": 18303 }, { "epoch": 0.3831533115632588, "grad_norm": 0.2905290722846985, "learning_rate": 0.00019208024288410743, "loss": 11.6701, "step": 18304 }, { "epoch": 0.38317424432722097, "grad_norm": 0.27523642778396606, "learning_rate": 0.00019207938770982264, "loss": 11.6717, "step": 18305 }, { "epoch": 0.3831951770911831, "grad_norm": 0.22440315783023834, "learning_rate": 0.00019207853249127342, "loss": 11.6674, "step": 18306 }, { "epoch": 0.38321610985514526, "grad_norm": 0.2792311906814575, "learning_rate": 0.0001920776772284602, "loss": 11.6592, "step": 18307 }, { "epoch": 0.3832370426191074, "grad_norm": 0.31435975432395935, "learning_rate": 0.00019207682192138342, "loss": 11.6805, "step": 18308 }, { "epoch": 0.3832579753830696, "grad_norm": 0.2910655736923218, "learning_rate": 0.00019207596657004348, "loss": 11.68, "step": 18309 }, { "epoch": 0.38327890814703175, "grad_norm": 0.30620190501213074, "learning_rate": 0.0001920751111744407, "loss": 11.6814, "step": 18310 }, { "epoch": 0.3832998409109939, "grad_norm": 0.2799266576766968, "learning_rate": 0.00019207425573457566, "loss": 11.6761, "step": 18311 }, { "epoch": 0.38332077367495604, "grad_norm": 0.34116408228874207, "learning_rate": 0.00019207340025044863, "loss": 11.6819, "step": 18312 }, { "epoch": 0.3833417064389182, "grad_norm": 0.30859583616256714, "learning_rate": 0.0001920725447220601, "loss": 11.6674, "step": 18313 }, { "epoch": 0.38336263920288033, "grad_norm": 0.255447119474411, "learning_rate": 0.00019207168914941046, "loss": 11.6776, "step": 18314 }, { "epoch": 0.38338357196684253, "grad_norm": 0.32575979828834534, "learning_rate": 0.00019207083353250007, "loss": 11.6879, "step": 18315 }, { "epoch": 0.3834045047308047, "grad_norm": 0.3013212978839874, "learning_rate": 0.00019206997787132944, "loss": 11.6812, "step": 18316 }, { "epoch": 0.3834254374947668, "grad_norm": 0.28278324007987976, "learning_rate": 0.00019206912216589888, "loss": 11.6697, "step": 18317 }, { "epoch": 0.38344637025872896, "grad_norm": 0.2950734496116638, "learning_rate": 0.00019206826641620886, "loss": 11.689, "step": 18318 }, { "epoch": 0.3834673030226911, "grad_norm": 0.3329458236694336, "learning_rate": 0.0001920674106222598, "loss": 11.6642, "step": 18319 }, { "epoch": 0.38348823578665325, "grad_norm": 0.31139785051345825, "learning_rate": 0.00019206655478405207, "loss": 11.6597, "step": 18320 }, { "epoch": 0.3835091685506154, "grad_norm": 0.3961757719516754, "learning_rate": 0.0001920656989015861, "loss": 11.6784, "step": 18321 }, { "epoch": 0.3835301013145776, "grad_norm": 0.2735694348812103, "learning_rate": 0.0001920648429748623, "loss": 11.6866, "step": 18322 }, { "epoch": 0.38355103407853974, "grad_norm": 0.2898713946342468, "learning_rate": 0.00019206398700388107, "loss": 11.6722, "step": 18323 }, { "epoch": 0.3835719668425019, "grad_norm": 0.3042747974395752, "learning_rate": 0.00019206313098864289, "loss": 11.6816, "step": 18324 }, { "epoch": 0.38359289960646403, "grad_norm": 0.25636914372444153, "learning_rate": 0.00019206227492914808, "loss": 11.6805, "step": 18325 }, { "epoch": 0.3836138323704262, "grad_norm": 0.31332799792289734, "learning_rate": 0.00019206141882539707, "loss": 11.6879, "step": 18326 }, { "epoch": 0.3836347651343883, "grad_norm": 0.23607857525348663, "learning_rate": 0.00019206056267739033, "loss": 11.675, "step": 18327 }, { "epoch": 0.3836556978983505, "grad_norm": 0.33789268136024475, "learning_rate": 0.00019205970648512822, "loss": 11.6705, "step": 18328 }, { "epoch": 0.38367663066231267, "grad_norm": 0.27780407667160034, "learning_rate": 0.00019205885024861118, "loss": 11.6784, "step": 18329 }, { "epoch": 0.3836975634262748, "grad_norm": 0.3305244743824005, "learning_rate": 0.0001920579939678396, "loss": 11.6718, "step": 18330 }, { "epoch": 0.38371849619023696, "grad_norm": 0.2713267505168915, "learning_rate": 0.00019205713764281388, "loss": 11.6672, "step": 18331 }, { "epoch": 0.3837394289541991, "grad_norm": 0.2926989793777466, "learning_rate": 0.00019205628127353446, "loss": 11.6625, "step": 18332 }, { "epoch": 0.38376036171816125, "grad_norm": 0.2630443274974823, "learning_rate": 0.00019205542486000173, "loss": 11.6744, "step": 18333 }, { "epoch": 0.38378129448212345, "grad_norm": 0.243269681930542, "learning_rate": 0.00019205456840221612, "loss": 11.6727, "step": 18334 }, { "epoch": 0.3838022272460856, "grad_norm": 0.3132655918598175, "learning_rate": 0.00019205371190017805, "loss": 11.6797, "step": 18335 }, { "epoch": 0.38382316001004774, "grad_norm": 0.35957881808280945, "learning_rate": 0.00019205285535388788, "loss": 11.6807, "step": 18336 }, { "epoch": 0.3838440927740099, "grad_norm": 0.2652471661567688, "learning_rate": 0.0001920519987633461, "loss": 11.6538, "step": 18337 }, { "epoch": 0.383865025537972, "grad_norm": 0.2990303337574005, "learning_rate": 0.00019205114212855303, "loss": 11.6695, "step": 18338 }, { "epoch": 0.38388595830193417, "grad_norm": 0.26648762822151184, "learning_rate": 0.00019205028544950917, "loss": 11.6748, "step": 18339 }, { "epoch": 0.3839068910658963, "grad_norm": 0.3119064271450043, "learning_rate": 0.0001920494287262149, "loss": 11.6821, "step": 18340 }, { "epoch": 0.3839278238298585, "grad_norm": 0.2449491322040558, "learning_rate": 0.00019204857195867059, "loss": 11.6787, "step": 18341 }, { "epoch": 0.38394875659382066, "grad_norm": 0.2700040638446808, "learning_rate": 0.0001920477151468767, "loss": 11.6678, "step": 18342 }, { "epoch": 0.3839696893577828, "grad_norm": 0.26869577169418335, "learning_rate": 0.00019204685829083364, "loss": 11.6804, "step": 18343 }, { "epoch": 0.38399062212174495, "grad_norm": 0.27032768726348877, "learning_rate": 0.0001920460013905418, "loss": 11.6668, "step": 18344 }, { "epoch": 0.3840115548857071, "grad_norm": 0.2876708507537842, "learning_rate": 0.00019204514444600158, "loss": 11.6797, "step": 18345 }, { "epoch": 0.38403248764966924, "grad_norm": 0.2777102291584015, "learning_rate": 0.00019204428745721346, "loss": 11.6726, "step": 18346 }, { "epoch": 0.38405342041363144, "grad_norm": 0.28773191571235657, "learning_rate": 0.00019204343042417778, "loss": 11.6657, "step": 18347 }, { "epoch": 0.3840743531775936, "grad_norm": 0.30407074093818665, "learning_rate": 0.000192042573346895, "loss": 11.6677, "step": 18348 }, { "epoch": 0.38409528594155573, "grad_norm": 0.27294817566871643, "learning_rate": 0.00019204171622536547, "loss": 11.6648, "step": 18349 }, { "epoch": 0.3841162187055179, "grad_norm": 0.2636427879333496, "learning_rate": 0.0001920408590595897, "loss": 11.6531, "step": 18350 }, { "epoch": 0.38413715146948, "grad_norm": 0.3092135488986969, "learning_rate": 0.000192040001849568, "loss": 11.6715, "step": 18351 }, { "epoch": 0.38415808423344217, "grad_norm": 0.25857463479042053, "learning_rate": 0.00019203914459530084, "loss": 11.6669, "step": 18352 }, { "epoch": 0.3841790169974043, "grad_norm": 0.26173311471939087, "learning_rate": 0.0001920382872967886, "loss": 11.6644, "step": 18353 }, { "epoch": 0.3841999497613665, "grad_norm": 0.27956950664520264, "learning_rate": 0.00019203742995403173, "loss": 11.6749, "step": 18354 }, { "epoch": 0.38422088252532866, "grad_norm": 0.2623101770877838, "learning_rate": 0.00019203657256703066, "loss": 11.6779, "step": 18355 }, { "epoch": 0.3842418152892908, "grad_norm": 0.24905812740325928, "learning_rate": 0.0001920357151357857, "loss": 11.6929, "step": 18356 }, { "epoch": 0.38426274805325294, "grad_norm": 0.29691705107688904, "learning_rate": 0.00019203485766029735, "loss": 11.6747, "step": 18357 }, { "epoch": 0.3842836808172151, "grad_norm": 0.27860555052757263, "learning_rate": 0.000192034000140566, "loss": 11.6539, "step": 18358 }, { "epoch": 0.38430461358117723, "grad_norm": 0.2670356035232544, "learning_rate": 0.0001920331425765921, "loss": 11.6484, "step": 18359 }, { "epoch": 0.38432554634513943, "grad_norm": 0.2660782039165497, "learning_rate": 0.00019203228496837596, "loss": 11.6583, "step": 18360 }, { "epoch": 0.3843464791091016, "grad_norm": 0.2380618005990982, "learning_rate": 0.0001920314273159181, "loss": 11.6705, "step": 18361 }, { "epoch": 0.3843674118730637, "grad_norm": 0.25319981575012207, "learning_rate": 0.00019203056961921886, "loss": 11.6653, "step": 18362 }, { "epoch": 0.38438834463702587, "grad_norm": 0.36668145656585693, "learning_rate": 0.00019202971187827868, "loss": 11.6774, "step": 18363 }, { "epoch": 0.384409277400988, "grad_norm": 0.26398709416389465, "learning_rate": 0.000192028854093098, "loss": 11.6651, "step": 18364 }, { "epoch": 0.38443021016495016, "grad_norm": 0.2855193018913269, "learning_rate": 0.0001920279962636772, "loss": 11.6647, "step": 18365 }, { "epoch": 0.38445114292891236, "grad_norm": 0.28281092643737793, "learning_rate": 0.0001920271383900167, "loss": 11.6876, "step": 18366 }, { "epoch": 0.3844720756928745, "grad_norm": 0.3578912019729614, "learning_rate": 0.0001920262804721169, "loss": 11.6748, "step": 18367 }, { "epoch": 0.38449300845683665, "grad_norm": 0.32056522369384766, "learning_rate": 0.00019202542250997822, "loss": 11.6807, "step": 18368 }, { "epoch": 0.3845139412207988, "grad_norm": 0.4456508457660675, "learning_rate": 0.0001920245645036011, "loss": 11.6627, "step": 18369 }, { "epoch": 0.38453487398476094, "grad_norm": 0.2576320171356201, "learning_rate": 0.0001920237064529859, "loss": 11.6643, "step": 18370 }, { "epoch": 0.3845558067487231, "grad_norm": 0.28802356123924255, "learning_rate": 0.00019202284835813304, "loss": 11.6841, "step": 18371 }, { "epoch": 0.38457673951268523, "grad_norm": 0.3033284544944763, "learning_rate": 0.000192021990219043, "loss": 11.6684, "step": 18372 }, { "epoch": 0.38459767227664743, "grad_norm": 0.23708312213420868, "learning_rate": 0.0001920211320357161, "loss": 11.667, "step": 18373 }, { "epoch": 0.3846186050406096, "grad_norm": 0.21530082821846008, "learning_rate": 0.00019202027380815285, "loss": 11.6659, "step": 18374 }, { "epoch": 0.3846395378045717, "grad_norm": 0.36394140124320984, "learning_rate": 0.0001920194155363536, "loss": 11.6843, "step": 18375 }, { "epoch": 0.38466047056853386, "grad_norm": 0.30570656061172485, "learning_rate": 0.0001920185572203187, "loss": 11.6664, "step": 18376 }, { "epoch": 0.384681403332496, "grad_norm": 0.2619967758655548, "learning_rate": 0.00019201769886004872, "loss": 11.6727, "step": 18377 }, { "epoch": 0.38470233609645815, "grad_norm": 0.25084036588668823, "learning_rate": 0.00019201684045554396, "loss": 11.6885, "step": 18378 }, { "epoch": 0.38472326886042035, "grad_norm": 0.33939725160598755, "learning_rate": 0.00019201598200680486, "loss": 11.6843, "step": 18379 }, { "epoch": 0.3847442016243825, "grad_norm": 0.23919832706451416, "learning_rate": 0.00019201512351383184, "loss": 11.6587, "step": 18380 }, { "epoch": 0.38476513438834464, "grad_norm": 0.3186259865760803, "learning_rate": 0.00019201426497662529, "loss": 11.6731, "step": 18381 }, { "epoch": 0.3847860671523068, "grad_norm": 0.38229790329933167, "learning_rate": 0.00019201340639518564, "loss": 11.6669, "step": 18382 }, { "epoch": 0.38480699991626893, "grad_norm": 0.49113789200782776, "learning_rate": 0.00019201254776951332, "loss": 11.6854, "step": 18383 }, { "epoch": 0.3848279326802311, "grad_norm": 0.315475195646286, "learning_rate": 0.00019201168909960872, "loss": 11.6659, "step": 18384 }, { "epoch": 0.3848488654441933, "grad_norm": 0.3427392542362213, "learning_rate": 0.00019201083038547222, "loss": 11.6732, "step": 18385 }, { "epoch": 0.3848697982081554, "grad_norm": 0.2820205092430115, "learning_rate": 0.00019200997162710433, "loss": 11.684, "step": 18386 }, { "epoch": 0.38489073097211757, "grad_norm": 0.25899821519851685, "learning_rate": 0.00019200911282450536, "loss": 11.6652, "step": 18387 }, { "epoch": 0.3849116637360797, "grad_norm": 0.3704025149345398, "learning_rate": 0.0001920082539776758, "loss": 11.6727, "step": 18388 }, { "epoch": 0.38493259650004186, "grad_norm": 0.3883669972419739, "learning_rate": 0.00019200739508661602, "loss": 11.6737, "step": 18389 }, { "epoch": 0.384953529264004, "grad_norm": 0.34515705704689026, "learning_rate": 0.0001920065361513264, "loss": 11.661, "step": 18390 }, { "epoch": 0.38497446202796615, "grad_norm": 0.27307480573654175, "learning_rate": 0.00019200567717180743, "loss": 11.6771, "step": 18391 }, { "epoch": 0.38499539479192835, "grad_norm": 0.28293728828430176, "learning_rate": 0.0001920048181480595, "loss": 11.6851, "step": 18392 }, { "epoch": 0.3850163275558905, "grad_norm": 0.353496789932251, "learning_rate": 0.00019200395908008298, "loss": 11.688, "step": 18393 }, { "epoch": 0.38503726031985264, "grad_norm": 0.26461726427078247, "learning_rate": 0.00019200309996787832, "loss": 11.682, "step": 18394 }, { "epoch": 0.3850581930838148, "grad_norm": 0.30735886096954346, "learning_rate": 0.00019200224081144593, "loss": 11.6631, "step": 18395 }, { "epoch": 0.3850791258477769, "grad_norm": 0.32925981283187866, "learning_rate": 0.00019200138161078623, "loss": 11.6873, "step": 18396 }, { "epoch": 0.38510005861173907, "grad_norm": 0.3066357672214508, "learning_rate": 0.0001920005223658996, "loss": 11.6837, "step": 18397 }, { "epoch": 0.38512099137570127, "grad_norm": 0.3571334183216095, "learning_rate": 0.0001919996630767865, "loss": 11.6682, "step": 18398 }, { "epoch": 0.3851419241396634, "grad_norm": 0.2570094168186188, "learning_rate": 0.00019199880374344729, "loss": 11.6611, "step": 18399 }, { "epoch": 0.38516285690362556, "grad_norm": 0.2664920389652252, "learning_rate": 0.00019199794436588243, "loss": 11.6653, "step": 18400 }, { "epoch": 0.3851837896675877, "grad_norm": 0.29700514674186707, "learning_rate": 0.00019199708494409233, "loss": 11.6683, "step": 18401 }, { "epoch": 0.38520472243154985, "grad_norm": 0.3555777072906494, "learning_rate": 0.00019199622547807734, "loss": 11.6881, "step": 18402 }, { "epoch": 0.385225655195512, "grad_norm": 1.1257824897766113, "learning_rate": 0.00019199536596783796, "loss": 11.6579, "step": 18403 }, { "epoch": 0.3852465879594742, "grad_norm": 0.22849443554878235, "learning_rate": 0.00019199450641337455, "loss": 11.6652, "step": 18404 }, { "epoch": 0.38526752072343634, "grad_norm": 0.2428765445947647, "learning_rate": 0.00019199364681468753, "loss": 11.6749, "step": 18405 }, { "epoch": 0.3852884534873985, "grad_norm": 0.2813017964363098, "learning_rate": 0.00019199278717177735, "loss": 11.6589, "step": 18406 }, { "epoch": 0.38530938625136063, "grad_norm": 0.2672468423843384, "learning_rate": 0.00019199192748464437, "loss": 11.6719, "step": 18407 }, { "epoch": 0.3853303190153228, "grad_norm": 0.3189615309238434, "learning_rate": 0.00019199106775328903, "loss": 11.6717, "step": 18408 }, { "epoch": 0.3853512517792849, "grad_norm": 0.7060273289680481, "learning_rate": 0.00019199020797771175, "loss": 11.6464, "step": 18409 }, { "epoch": 0.38537218454324706, "grad_norm": 0.27237454056739807, "learning_rate": 0.00019198934815791292, "loss": 11.6861, "step": 18410 }, { "epoch": 0.38539311730720927, "grad_norm": 0.2599242925643921, "learning_rate": 0.00019198848829389298, "loss": 11.684, "step": 18411 }, { "epoch": 0.3854140500711714, "grad_norm": 0.2806360125541687, "learning_rate": 0.0001919876283856523, "loss": 11.6766, "step": 18412 }, { "epoch": 0.38543498283513355, "grad_norm": 0.27736103534698486, "learning_rate": 0.00019198676843319135, "loss": 11.6856, "step": 18413 }, { "epoch": 0.3854559155990957, "grad_norm": 0.27901217341423035, "learning_rate": 0.00019198590843651052, "loss": 11.6618, "step": 18414 }, { "epoch": 0.38547684836305784, "grad_norm": 0.3577668368816376, "learning_rate": 0.00019198504839561023, "loss": 11.6941, "step": 18415 }, { "epoch": 0.38549778112702, "grad_norm": 0.3168961703777313, "learning_rate": 0.00019198418831049084, "loss": 11.6825, "step": 18416 }, { "epoch": 0.3855187138909822, "grad_norm": 0.2912035584449768, "learning_rate": 0.00019198332818115286, "loss": 11.6748, "step": 18417 }, { "epoch": 0.38553964665494433, "grad_norm": 0.29557475447654724, "learning_rate": 0.00019198246800759662, "loss": 11.6588, "step": 18418 }, { "epoch": 0.3855605794189065, "grad_norm": 0.2926892936229706, "learning_rate": 0.00019198160778982257, "loss": 11.6898, "step": 18419 }, { "epoch": 0.3855815121828686, "grad_norm": 0.2837664484977722, "learning_rate": 0.0001919807475278311, "loss": 11.6693, "step": 18420 }, { "epoch": 0.38560244494683077, "grad_norm": 0.24468401074409485, "learning_rate": 0.00019197988722162267, "loss": 11.6681, "step": 18421 }, { "epoch": 0.3856233777107929, "grad_norm": 0.31173616647720337, "learning_rate": 0.00019197902687119765, "loss": 11.6852, "step": 18422 }, { "epoch": 0.3856443104747551, "grad_norm": 0.25503653287887573, "learning_rate": 0.00019197816647655646, "loss": 11.6711, "step": 18423 }, { "epoch": 0.38566524323871726, "grad_norm": 0.3065040111541748, "learning_rate": 0.00019197730603769954, "loss": 11.6782, "step": 18424 }, { "epoch": 0.3856861760026794, "grad_norm": 0.2947550117969513, "learning_rate": 0.00019197644555462727, "loss": 11.6593, "step": 18425 }, { "epoch": 0.38570710876664155, "grad_norm": 0.3284755051136017, "learning_rate": 0.0001919755850273401, "loss": 11.6569, "step": 18426 }, { "epoch": 0.3857280415306037, "grad_norm": 0.2582494020462036, "learning_rate": 0.00019197472445583842, "loss": 11.6744, "step": 18427 }, { "epoch": 0.38574897429456584, "grad_norm": 0.25659647583961487, "learning_rate": 0.00019197386384012263, "loss": 11.6859, "step": 18428 }, { "epoch": 0.385769907058528, "grad_norm": 0.2796685993671417, "learning_rate": 0.00019197300318019318, "loss": 11.6699, "step": 18429 }, { "epoch": 0.3857908398224902, "grad_norm": 0.305939644575119, "learning_rate": 0.00019197214247605043, "loss": 11.6743, "step": 18430 }, { "epoch": 0.38581177258645233, "grad_norm": 0.30413198471069336, "learning_rate": 0.00019197128172769484, "loss": 11.6727, "step": 18431 }, { "epoch": 0.3858327053504145, "grad_norm": 0.2887078523635864, "learning_rate": 0.00019197042093512684, "loss": 11.6762, "step": 18432 }, { "epoch": 0.3858536381143766, "grad_norm": 0.4116668999195099, "learning_rate": 0.0001919695600983468, "loss": 11.6603, "step": 18433 }, { "epoch": 0.38587457087833876, "grad_norm": 0.2990817427635193, "learning_rate": 0.00019196869921735515, "loss": 11.6661, "step": 18434 }, { "epoch": 0.3858955036423009, "grad_norm": 0.2752944827079773, "learning_rate": 0.00019196783829215233, "loss": 11.6566, "step": 18435 }, { "epoch": 0.3859164364062631, "grad_norm": 0.27727967500686646, "learning_rate": 0.00019196697732273867, "loss": 11.6738, "step": 18436 }, { "epoch": 0.38593736917022525, "grad_norm": 0.2997409999370575, "learning_rate": 0.00019196611630911467, "loss": 11.67, "step": 18437 }, { "epoch": 0.3859583019341874, "grad_norm": 0.34599465131759644, "learning_rate": 0.00019196525525128072, "loss": 11.6861, "step": 18438 }, { "epoch": 0.38597923469814954, "grad_norm": 0.25541597604751587, "learning_rate": 0.00019196439414923724, "loss": 11.6723, "step": 18439 }, { "epoch": 0.3860001674621117, "grad_norm": 0.33616068959236145, "learning_rate": 0.0001919635330029846, "loss": 11.668, "step": 18440 }, { "epoch": 0.38602110022607383, "grad_norm": 0.257942259311676, "learning_rate": 0.00019196267181252329, "loss": 11.6525, "step": 18441 }, { "epoch": 0.386042032990036, "grad_norm": 0.3215440511703491, "learning_rate": 0.00019196181057785363, "loss": 11.6601, "step": 18442 }, { "epoch": 0.3860629657539982, "grad_norm": 0.24782250821590424, "learning_rate": 0.0001919609492989761, "loss": 11.6692, "step": 18443 }, { "epoch": 0.3860838985179603, "grad_norm": 0.27942532300949097, "learning_rate": 0.0001919600879758911, "loss": 11.677, "step": 18444 }, { "epoch": 0.38610483128192247, "grad_norm": 0.29276400804519653, "learning_rate": 0.00019195922660859906, "loss": 11.6626, "step": 18445 }, { "epoch": 0.3861257640458846, "grad_norm": 0.3263302147388458, "learning_rate": 0.00019195836519710036, "loss": 11.6922, "step": 18446 }, { "epoch": 0.38614669680984676, "grad_norm": 0.6201602220535278, "learning_rate": 0.00019195750374139542, "loss": 11.6801, "step": 18447 }, { "epoch": 0.3861676295738089, "grad_norm": 0.34465861320495605, "learning_rate": 0.0001919566422414847, "loss": 11.6797, "step": 18448 }, { "epoch": 0.3861885623377711, "grad_norm": 0.3506636619567871, "learning_rate": 0.00019195578069736857, "loss": 11.6968, "step": 18449 }, { "epoch": 0.38620949510173325, "grad_norm": 0.32599085569381714, "learning_rate": 0.00019195491910904743, "loss": 11.6644, "step": 18450 }, { "epoch": 0.3862304278656954, "grad_norm": 0.28493815660476685, "learning_rate": 0.00019195405747652174, "loss": 11.6812, "step": 18451 }, { "epoch": 0.38625136062965754, "grad_norm": 0.25081539154052734, "learning_rate": 0.0001919531957997919, "loss": 11.6873, "step": 18452 }, { "epoch": 0.3862722933936197, "grad_norm": 0.33151450753211975, "learning_rate": 0.00019195233407885828, "loss": 11.6802, "step": 18453 }, { "epoch": 0.3862932261575818, "grad_norm": 0.26416611671447754, "learning_rate": 0.00019195147231372138, "loss": 11.6647, "step": 18454 }, { "epoch": 0.386314158921544, "grad_norm": 0.3003356456756592, "learning_rate": 0.00019195061050438154, "loss": 11.6788, "step": 18455 }, { "epoch": 0.38633509168550617, "grad_norm": 0.26780614256858826, "learning_rate": 0.00019194974865083917, "loss": 11.6772, "step": 18456 }, { "epoch": 0.3863560244494683, "grad_norm": 0.3206152319908142, "learning_rate": 0.00019194888675309475, "loss": 11.6721, "step": 18457 }, { "epoch": 0.38637695721343046, "grad_norm": 0.31247735023498535, "learning_rate": 0.00019194802481114863, "loss": 11.6586, "step": 18458 }, { "epoch": 0.3863978899773926, "grad_norm": 0.27535921335220337, "learning_rate": 0.00019194716282500127, "loss": 11.6901, "step": 18459 }, { "epoch": 0.38641882274135475, "grad_norm": 0.2964020073413849, "learning_rate": 0.00019194630079465307, "loss": 11.6788, "step": 18460 }, { "epoch": 0.3864397555053169, "grad_norm": 0.3964436650276184, "learning_rate": 0.00019194543872010441, "loss": 11.6794, "step": 18461 }, { "epoch": 0.3864606882692791, "grad_norm": 0.27932411432266235, "learning_rate": 0.00019194457660135576, "loss": 11.6822, "step": 18462 }, { "epoch": 0.38648162103324124, "grad_norm": 0.23391424119472504, "learning_rate": 0.00019194371443840752, "loss": 11.68, "step": 18463 }, { "epoch": 0.3865025537972034, "grad_norm": 0.3471236824989319, "learning_rate": 0.00019194285223126008, "loss": 11.6648, "step": 18464 }, { "epoch": 0.38652348656116553, "grad_norm": 0.3153994083404541, "learning_rate": 0.00019194198997991388, "loss": 11.668, "step": 18465 }, { "epoch": 0.3865444193251277, "grad_norm": 1.81692636013031, "learning_rate": 0.00019194112768436931, "loss": 11.6906, "step": 18466 }, { "epoch": 0.3865653520890898, "grad_norm": 0.3074084222316742, "learning_rate": 0.00019194026534462679, "loss": 11.6659, "step": 18467 }, { "epoch": 0.386586284853052, "grad_norm": 0.2505762279033661, "learning_rate": 0.00019193940296068676, "loss": 11.6703, "step": 18468 }, { "epoch": 0.38660721761701416, "grad_norm": 0.2668132483959198, "learning_rate": 0.0001919385405325496, "loss": 11.6761, "step": 18469 }, { "epoch": 0.3866281503809763, "grad_norm": 0.3701975345611572, "learning_rate": 0.00019193767806021574, "loss": 11.6767, "step": 18470 }, { "epoch": 0.38664908314493845, "grad_norm": 0.31753847002983093, "learning_rate": 0.00019193681554368564, "loss": 11.6857, "step": 18471 }, { "epoch": 0.3866700159089006, "grad_norm": 0.2663976848125458, "learning_rate": 0.00019193595298295962, "loss": 11.6573, "step": 18472 }, { "epoch": 0.38669094867286274, "grad_norm": 0.3013160824775696, "learning_rate": 0.00019193509037803815, "loss": 11.6695, "step": 18473 }, { "epoch": 0.38671188143682494, "grad_norm": 0.36810946464538574, "learning_rate": 0.00019193422772892165, "loss": 11.6927, "step": 18474 }, { "epoch": 0.3867328142007871, "grad_norm": 0.27510732412338257, "learning_rate": 0.00019193336503561055, "loss": 11.662, "step": 18475 }, { "epoch": 0.38675374696474923, "grad_norm": 0.262198805809021, "learning_rate": 0.0001919325022981052, "loss": 11.6706, "step": 18476 }, { "epoch": 0.3867746797287114, "grad_norm": 0.34911128878593445, "learning_rate": 0.00019193163951640606, "loss": 11.6731, "step": 18477 }, { "epoch": 0.3867956124926735, "grad_norm": 0.24321888387203217, "learning_rate": 0.00019193077669051354, "loss": 11.6668, "step": 18478 }, { "epoch": 0.38681654525663567, "grad_norm": 0.34379053115844727, "learning_rate": 0.00019192991382042807, "loss": 11.6752, "step": 18479 }, { "epoch": 0.3868374780205978, "grad_norm": 0.2822454571723938, "learning_rate": 0.00019192905090615003, "loss": 11.6665, "step": 18480 }, { "epoch": 0.38685841078456, "grad_norm": 0.2748052179813385, "learning_rate": 0.0001919281879476799, "loss": 11.6601, "step": 18481 }, { "epoch": 0.38687934354852216, "grad_norm": 0.31575414538383484, "learning_rate": 0.00019192732494501796, "loss": 11.6742, "step": 18482 }, { "epoch": 0.3869002763124843, "grad_norm": 0.6460968255996704, "learning_rate": 0.0001919264618981648, "loss": 11.5857, "step": 18483 }, { "epoch": 0.38692120907644645, "grad_norm": 0.2859669029712677, "learning_rate": 0.00019192559880712069, "loss": 11.6683, "step": 18484 }, { "epoch": 0.3869421418404086, "grad_norm": 0.2911761701107025, "learning_rate": 0.00019192473567188612, "loss": 11.6715, "step": 18485 }, { "epoch": 0.38696307460437074, "grad_norm": 0.2587744891643524, "learning_rate": 0.00019192387249246148, "loss": 11.674, "step": 18486 }, { "epoch": 0.38698400736833294, "grad_norm": 0.34470120072364807, "learning_rate": 0.0001919230092688472, "loss": 11.685, "step": 18487 }, { "epoch": 0.3870049401322951, "grad_norm": 0.4281754195690155, "learning_rate": 0.0001919221460010437, "loss": 11.6827, "step": 18488 }, { "epoch": 0.38702587289625723, "grad_norm": 0.3107337951660156, "learning_rate": 0.00019192128268905137, "loss": 11.6741, "step": 18489 }, { "epoch": 0.3870468056602194, "grad_norm": 0.33518439531326294, "learning_rate": 0.00019192041933287064, "loss": 11.6651, "step": 18490 }, { "epoch": 0.3870677384241815, "grad_norm": 0.24813641607761383, "learning_rate": 0.00019191955593250191, "loss": 11.6644, "step": 18491 }, { "epoch": 0.38708867118814366, "grad_norm": 0.5551936030387878, "learning_rate": 0.00019191869248794564, "loss": 11.6527, "step": 18492 }, { "epoch": 0.38710960395210586, "grad_norm": 0.27075573801994324, "learning_rate": 0.00019191782899920216, "loss": 11.6707, "step": 18493 }, { "epoch": 0.387130536716068, "grad_norm": 0.3139132261276245, "learning_rate": 0.000191916965466272, "loss": 11.6957, "step": 18494 }, { "epoch": 0.38715146948003015, "grad_norm": 0.32978495955467224, "learning_rate": 0.00019191610188915546, "loss": 11.6709, "step": 18495 }, { "epoch": 0.3871724022439923, "grad_norm": 0.34370362758636475, "learning_rate": 0.00019191523826785303, "loss": 11.6674, "step": 18496 }, { "epoch": 0.38719333500795444, "grad_norm": 0.2563905715942383, "learning_rate": 0.0001919143746023651, "loss": 11.673, "step": 18497 }, { "epoch": 0.3872142677719166, "grad_norm": 0.24067841470241547, "learning_rate": 0.00019191351089269207, "loss": 11.689, "step": 18498 }, { "epoch": 0.38723520053587873, "grad_norm": 0.2572672963142395, "learning_rate": 0.0001919126471388344, "loss": 11.6766, "step": 18499 }, { "epoch": 0.38725613329984093, "grad_norm": 0.2232656180858612, "learning_rate": 0.00019191178334079247, "loss": 11.6698, "step": 18500 }, { "epoch": 0.3872770660638031, "grad_norm": 0.24435338377952576, "learning_rate": 0.0001919109194985667, "loss": 11.6594, "step": 18501 }, { "epoch": 0.3872979988277652, "grad_norm": 0.30513423681259155, "learning_rate": 0.00019191005561215754, "loss": 11.6861, "step": 18502 }, { "epoch": 0.38731893159172737, "grad_norm": 0.3392195701599121, "learning_rate": 0.00019190919168156535, "loss": 11.6617, "step": 18503 }, { "epoch": 0.3873398643556895, "grad_norm": 0.31967395544052124, "learning_rate": 0.00019190832770679057, "loss": 11.6832, "step": 18504 }, { "epoch": 0.38736079711965166, "grad_norm": 0.2979945242404938, "learning_rate": 0.00019190746368783358, "loss": 11.6927, "step": 18505 }, { "epoch": 0.38738172988361386, "grad_norm": 0.2044730931520462, "learning_rate": 0.00019190659962469488, "loss": 11.6676, "step": 18506 }, { "epoch": 0.387402662647576, "grad_norm": 0.25284287333488464, "learning_rate": 0.00019190573551737482, "loss": 11.6608, "step": 18507 }, { "epoch": 0.38742359541153815, "grad_norm": 0.3027273714542389, "learning_rate": 0.00019190487136587384, "loss": 11.6729, "step": 18508 }, { "epoch": 0.3874445281755003, "grad_norm": 0.3128526508808136, "learning_rate": 0.00019190400717019232, "loss": 11.6781, "step": 18509 }, { "epoch": 0.38746546093946244, "grad_norm": 0.2860357463359833, "learning_rate": 0.0001919031429303307, "loss": 11.6654, "step": 18510 }, { "epoch": 0.3874863937034246, "grad_norm": 0.3003471791744232, "learning_rate": 0.00019190227864628945, "loss": 11.6876, "step": 18511 }, { "epoch": 0.3875073264673868, "grad_norm": 0.30091115832328796, "learning_rate": 0.00019190141431806889, "loss": 11.6855, "step": 18512 }, { "epoch": 0.3875282592313489, "grad_norm": 0.27160781621932983, "learning_rate": 0.0001919005499456695, "loss": 11.6716, "step": 18513 }, { "epoch": 0.38754919199531107, "grad_norm": 0.2997470498085022, "learning_rate": 0.00019189968552909162, "loss": 11.6763, "step": 18514 }, { "epoch": 0.3875701247592732, "grad_norm": 0.31223440170288086, "learning_rate": 0.0001918988210683358, "loss": 11.6741, "step": 18515 }, { "epoch": 0.38759105752323536, "grad_norm": 0.9406543374061584, "learning_rate": 0.0001918979565634023, "loss": 11.6786, "step": 18516 }, { "epoch": 0.3876119902871975, "grad_norm": 0.25607436895370483, "learning_rate": 0.00019189709201429168, "loss": 11.6771, "step": 18517 }, { "epoch": 0.38763292305115965, "grad_norm": 0.4062292277812958, "learning_rate": 0.00019189622742100425, "loss": 11.678, "step": 18518 }, { "epoch": 0.38765385581512185, "grad_norm": 0.2976676821708679, "learning_rate": 0.00019189536278354046, "loss": 11.6634, "step": 18519 }, { "epoch": 0.387674788579084, "grad_norm": 0.3289982080459595, "learning_rate": 0.00019189449810190072, "loss": 11.685, "step": 18520 }, { "epoch": 0.38769572134304614, "grad_norm": 0.2394544780254364, "learning_rate": 0.00019189363337608547, "loss": 11.678, "step": 18521 }, { "epoch": 0.3877166541070083, "grad_norm": 0.28724968433380127, "learning_rate": 0.0001918927686060951, "loss": 11.6751, "step": 18522 }, { "epoch": 0.38773758687097043, "grad_norm": 0.25681567192077637, "learning_rate": 0.00019189190379193005, "loss": 11.6558, "step": 18523 }, { "epoch": 0.3877585196349326, "grad_norm": 0.2529838979244232, "learning_rate": 0.0001918910389335907, "loss": 11.6593, "step": 18524 }, { "epoch": 0.3877794523988948, "grad_norm": 0.32286834716796875, "learning_rate": 0.0001918901740310775, "loss": 11.6634, "step": 18525 }, { "epoch": 0.3878003851628569, "grad_norm": 0.30067867040634155, "learning_rate": 0.00019188930908439083, "loss": 11.669, "step": 18526 }, { "epoch": 0.38782131792681906, "grad_norm": 0.29961293935775757, "learning_rate": 0.00019188844409353116, "loss": 11.6759, "step": 18527 }, { "epoch": 0.3878422506907812, "grad_norm": 0.2549994885921478, "learning_rate": 0.00019188757905849886, "loss": 11.6642, "step": 18528 }, { "epoch": 0.38786318345474335, "grad_norm": 0.2773214876651764, "learning_rate": 0.00019188671397929436, "loss": 11.6933, "step": 18529 }, { "epoch": 0.3878841162187055, "grad_norm": 0.2989468276500702, "learning_rate": 0.00019188584885591805, "loss": 11.6741, "step": 18530 }, { "epoch": 0.38790504898266764, "grad_norm": 0.2547402083873749, "learning_rate": 0.0001918849836883704, "loss": 11.6636, "step": 18531 }, { "epoch": 0.38792598174662984, "grad_norm": 0.2811352610588074, "learning_rate": 0.0001918841184766518, "loss": 11.6668, "step": 18532 }, { "epoch": 0.387946914510592, "grad_norm": 0.25932103395462036, "learning_rate": 0.00019188325322076267, "loss": 11.6658, "step": 18533 }, { "epoch": 0.38796784727455413, "grad_norm": 0.26211708784103394, "learning_rate": 0.00019188238792070336, "loss": 11.6747, "step": 18534 }, { "epoch": 0.3879887800385163, "grad_norm": 0.3238229751586914, "learning_rate": 0.0001918815225764744, "loss": 11.6765, "step": 18535 }, { "epoch": 0.3880097128024784, "grad_norm": 0.3123135268688202, "learning_rate": 0.00019188065718807616, "loss": 11.6848, "step": 18536 }, { "epoch": 0.38803064556644057, "grad_norm": 0.2755253314971924, "learning_rate": 0.000191879791755509, "loss": 11.658, "step": 18537 }, { "epoch": 0.38805157833040277, "grad_norm": 0.319793164730072, "learning_rate": 0.00019187892627877342, "loss": 11.679, "step": 18538 }, { "epoch": 0.3880725110943649, "grad_norm": 0.2812769114971161, "learning_rate": 0.00019187806075786977, "loss": 11.6665, "step": 18539 }, { "epoch": 0.38809344385832706, "grad_norm": 0.2446509450674057, "learning_rate": 0.00019187719519279852, "loss": 11.6621, "step": 18540 }, { "epoch": 0.3881143766222892, "grad_norm": 0.2978024184703827, "learning_rate": 0.00019187632958356005, "loss": 11.6527, "step": 18541 }, { "epoch": 0.38813530938625135, "grad_norm": 0.3306892216205597, "learning_rate": 0.0001918754639301548, "loss": 11.6722, "step": 18542 }, { "epoch": 0.3881562421502135, "grad_norm": 0.23539885878562927, "learning_rate": 0.00019187459823258315, "loss": 11.6759, "step": 18543 }, { "epoch": 0.3881771749141757, "grad_norm": 0.25072935223579407, "learning_rate": 0.00019187373249084555, "loss": 11.6602, "step": 18544 }, { "epoch": 0.38819810767813784, "grad_norm": 0.2800005376338959, "learning_rate": 0.0001918728667049424, "loss": 11.6812, "step": 18545 }, { "epoch": 0.3882190404421, "grad_norm": 0.31092536449432373, "learning_rate": 0.00019187200087487414, "loss": 11.6669, "step": 18546 }, { "epoch": 0.38823997320606213, "grad_norm": 0.30316102504730225, "learning_rate": 0.00019187113500064115, "loss": 11.6657, "step": 18547 }, { "epoch": 0.3882609059700243, "grad_norm": 0.31652480363845825, "learning_rate": 0.0001918702690822439, "loss": 11.6854, "step": 18548 }, { "epoch": 0.3882818387339864, "grad_norm": 0.2833079695701599, "learning_rate": 0.00019186940311968274, "loss": 11.659, "step": 18549 }, { "epoch": 0.38830277149794856, "grad_norm": 0.31108060479164124, "learning_rate": 0.0001918685371129581, "loss": 11.6634, "step": 18550 }, { "epoch": 0.38832370426191076, "grad_norm": 0.2957475483417511, "learning_rate": 0.0001918676710620704, "loss": 11.6733, "step": 18551 }, { "epoch": 0.3883446370258729, "grad_norm": 0.25194093585014343, "learning_rate": 0.00019186680496702013, "loss": 11.6593, "step": 18552 }, { "epoch": 0.38836556978983505, "grad_norm": 0.2756614685058594, "learning_rate": 0.00019186593882780764, "loss": 11.6637, "step": 18553 }, { "epoch": 0.3883865025537972, "grad_norm": 0.2752130627632141, "learning_rate": 0.00019186507264443332, "loss": 11.676, "step": 18554 }, { "epoch": 0.38840743531775934, "grad_norm": 0.3495985269546509, "learning_rate": 0.00019186420641689763, "loss": 11.6882, "step": 18555 }, { "epoch": 0.3884283680817215, "grad_norm": 0.26140737533569336, "learning_rate": 0.00019186334014520098, "loss": 11.6721, "step": 18556 }, { "epoch": 0.3884493008456837, "grad_norm": 0.34688466787338257, "learning_rate": 0.00019186247382934374, "loss": 11.6733, "step": 18557 }, { "epoch": 0.38847023360964583, "grad_norm": 0.356687068939209, "learning_rate": 0.00019186160746932643, "loss": 11.6702, "step": 18558 }, { "epoch": 0.388491166373608, "grad_norm": 0.38154202699661255, "learning_rate": 0.00019186074106514935, "loss": 11.6745, "step": 18559 }, { "epoch": 0.3885120991375701, "grad_norm": 0.24459125101566315, "learning_rate": 0.000191859874616813, "loss": 11.6795, "step": 18560 }, { "epoch": 0.38853303190153227, "grad_norm": 0.2933008372783661, "learning_rate": 0.00019185900812431777, "loss": 11.6729, "step": 18561 }, { "epoch": 0.3885539646654944, "grad_norm": 0.28476011753082275, "learning_rate": 0.00019185814158766408, "loss": 11.6855, "step": 18562 }, { "epoch": 0.3885748974294566, "grad_norm": 0.2925805449485779, "learning_rate": 0.0001918572750068523, "loss": 11.6617, "step": 18563 }, { "epoch": 0.38859583019341876, "grad_norm": 0.2528415024280548, "learning_rate": 0.00019185640838188296, "loss": 11.6657, "step": 18564 }, { "epoch": 0.3886167629573809, "grad_norm": 0.2875519096851349, "learning_rate": 0.00019185554171275636, "loss": 11.669, "step": 18565 }, { "epoch": 0.38863769572134305, "grad_norm": 0.32853007316589355, "learning_rate": 0.00019185467499947294, "loss": 11.6707, "step": 18566 }, { "epoch": 0.3886586284853052, "grad_norm": 0.2613454759120941, "learning_rate": 0.00019185380824203316, "loss": 11.6802, "step": 18567 }, { "epoch": 0.38867956124926734, "grad_norm": 0.3429446518421173, "learning_rate": 0.00019185294144043739, "loss": 11.691, "step": 18568 }, { "epoch": 0.3887004940132295, "grad_norm": 0.3368200957775116, "learning_rate": 0.00019185207459468612, "loss": 11.6705, "step": 18569 }, { "epoch": 0.3887214267771917, "grad_norm": 0.3837263286113739, "learning_rate": 0.0001918512077047797, "loss": 11.6636, "step": 18570 }, { "epoch": 0.3887423595411538, "grad_norm": 0.2678878903388977, "learning_rate": 0.00019185034077071853, "loss": 11.6619, "step": 18571 }, { "epoch": 0.38876329230511597, "grad_norm": 0.31012946367263794, "learning_rate": 0.0001918494737925031, "loss": 11.6713, "step": 18572 }, { "epoch": 0.3887842250690781, "grad_norm": 0.36608415842056274, "learning_rate": 0.00019184860677013375, "loss": 11.6769, "step": 18573 }, { "epoch": 0.38880515783304026, "grad_norm": 0.25249046087265015, "learning_rate": 0.00019184773970361095, "loss": 11.67, "step": 18574 }, { "epoch": 0.3888260905970024, "grad_norm": 0.3217807710170746, "learning_rate": 0.00019184687259293513, "loss": 11.6631, "step": 18575 }, { "epoch": 0.3888470233609646, "grad_norm": 0.3378514051437378, "learning_rate": 0.00019184600543810664, "loss": 11.6727, "step": 18576 }, { "epoch": 0.38886795612492675, "grad_norm": 0.31650182604789734, "learning_rate": 0.00019184513823912593, "loss": 11.6824, "step": 18577 }, { "epoch": 0.3888888888888889, "grad_norm": 0.29588374495506287, "learning_rate": 0.00019184427099599346, "loss": 11.6676, "step": 18578 }, { "epoch": 0.38890982165285104, "grad_norm": 0.3552842438220978, "learning_rate": 0.00019184340370870956, "loss": 11.683, "step": 18579 }, { "epoch": 0.3889307544168132, "grad_norm": 0.3411242961883545, "learning_rate": 0.00019184253637727475, "loss": 11.6784, "step": 18580 }, { "epoch": 0.38895168718077533, "grad_norm": 0.281684011220932, "learning_rate": 0.00019184166900168935, "loss": 11.6743, "step": 18581 }, { "epoch": 0.38897261994473753, "grad_norm": 0.21844729781150818, "learning_rate": 0.00019184080158195384, "loss": 11.6703, "step": 18582 }, { "epoch": 0.3889935527086997, "grad_norm": 0.26861774921417236, "learning_rate": 0.00019183993411806859, "loss": 11.6865, "step": 18583 }, { "epoch": 0.3890144854726618, "grad_norm": 0.2730664312839508, "learning_rate": 0.00019183906661003408, "loss": 11.6726, "step": 18584 }, { "epoch": 0.38903541823662396, "grad_norm": 0.2302883118391037, "learning_rate": 0.00019183819905785066, "loss": 11.6692, "step": 18585 }, { "epoch": 0.3890563510005861, "grad_norm": 0.38538306951522827, "learning_rate": 0.0001918373314615188, "loss": 11.7024, "step": 18586 }, { "epoch": 0.38907728376454825, "grad_norm": 0.32019370794296265, "learning_rate": 0.0001918364638210389, "loss": 11.6793, "step": 18587 }, { "epoch": 0.3890982165285104, "grad_norm": 0.31863048672676086, "learning_rate": 0.00019183559613641132, "loss": 11.6881, "step": 18588 }, { "epoch": 0.3891191492924726, "grad_norm": 0.2972007393836975, "learning_rate": 0.00019183472840763658, "loss": 11.6629, "step": 18589 }, { "epoch": 0.38914008205643474, "grad_norm": 0.279814749956131, "learning_rate": 0.00019183386063471503, "loss": 11.6771, "step": 18590 }, { "epoch": 0.3891610148203969, "grad_norm": 0.2599724531173706, "learning_rate": 0.0001918329928176471, "loss": 11.6652, "step": 18591 }, { "epoch": 0.38918194758435903, "grad_norm": 0.29197558760643005, "learning_rate": 0.0001918321249564332, "loss": 11.6885, "step": 18592 }, { "epoch": 0.3892028803483212, "grad_norm": 0.25495484471321106, "learning_rate": 0.00019183125705107379, "loss": 11.6699, "step": 18593 }, { "epoch": 0.3892238131122833, "grad_norm": 0.27265387773513794, "learning_rate": 0.00019183038910156923, "loss": 11.6654, "step": 18594 }, { "epoch": 0.3892447458762455, "grad_norm": 0.31216081976890564, "learning_rate": 0.00019182952110791997, "loss": 11.6732, "step": 18595 }, { "epoch": 0.38926567864020767, "grad_norm": 0.32923486828804016, "learning_rate": 0.0001918286530701264, "loss": 11.686, "step": 18596 }, { "epoch": 0.3892866114041698, "grad_norm": 0.28750866651535034, "learning_rate": 0.000191827784988189, "loss": 11.6704, "step": 18597 }, { "epoch": 0.38930754416813196, "grad_norm": 0.2494051605463028, "learning_rate": 0.0001918269168621081, "loss": 11.6759, "step": 18598 }, { "epoch": 0.3893284769320941, "grad_norm": 0.30040574073791504, "learning_rate": 0.00019182604869188419, "loss": 11.6551, "step": 18599 }, { "epoch": 0.38934940969605625, "grad_norm": 0.31219184398651123, "learning_rate": 0.00019182518047751763, "loss": 11.679, "step": 18600 }, { "epoch": 0.38937034246001845, "grad_norm": 0.2755618095397949, "learning_rate": 0.00019182431221900886, "loss": 11.6695, "step": 18601 }, { "epoch": 0.3893912752239806, "grad_norm": 0.32901573181152344, "learning_rate": 0.00019182344391635833, "loss": 11.6811, "step": 18602 }, { "epoch": 0.38941220798794274, "grad_norm": 0.32760000228881836, "learning_rate": 0.00019182257556956645, "loss": 11.6935, "step": 18603 }, { "epoch": 0.3894331407519049, "grad_norm": 0.28092214465141296, "learning_rate": 0.00019182170717863358, "loss": 11.6829, "step": 18604 }, { "epoch": 0.389454073515867, "grad_norm": 0.3097468316555023, "learning_rate": 0.0001918208387435602, "loss": 11.6855, "step": 18605 }, { "epoch": 0.3894750062798292, "grad_norm": 0.263698935508728, "learning_rate": 0.00019181997026434665, "loss": 11.6671, "step": 18606 }, { "epoch": 0.3894959390437913, "grad_norm": 0.3305168151855469, "learning_rate": 0.00019181910174099348, "loss": 11.6679, "step": 18607 }, { "epoch": 0.3895168718077535, "grad_norm": 0.23853717744350433, "learning_rate": 0.00019181823317350097, "loss": 11.6573, "step": 18608 }, { "epoch": 0.38953780457171566, "grad_norm": 0.26882368326187134, "learning_rate": 0.0001918173645618696, "loss": 11.6669, "step": 18609 }, { "epoch": 0.3895587373356778, "grad_norm": 0.2640392482280731, "learning_rate": 0.00019181649590609978, "loss": 11.6813, "step": 18610 }, { "epoch": 0.38957967009963995, "grad_norm": 0.2640511393547058, "learning_rate": 0.00019181562720619195, "loss": 11.6693, "step": 18611 }, { "epoch": 0.3896006028636021, "grad_norm": 0.30463817715644836, "learning_rate": 0.00019181475846214653, "loss": 11.6769, "step": 18612 }, { "epoch": 0.38962153562756424, "grad_norm": 0.2549448311328888, "learning_rate": 0.00019181388967396388, "loss": 11.6722, "step": 18613 }, { "epoch": 0.38964246839152644, "grad_norm": 0.2996290326118469, "learning_rate": 0.00019181302084164445, "loss": 11.6674, "step": 18614 }, { "epoch": 0.3896634011554886, "grad_norm": 0.23595976829528809, "learning_rate": 0.00019181215196518867, "loss": 11.676, "step": 18615 }, { "epoch": 0.38968433391945073, "grad_norm": 0.354748398065567, "learning_rate": 0.00019181128304459697, "loss": 11.6881, "step": 18616 }, { "epoch": 0.3897052666834129, "grad_norm": 0.3387853503227234, "learning_rate": 0.00019181041407986973, "loss": 11.673, "step": 18617 }, { "epoch": 0.389726199447375, "grad_norm": 0.25951752066612244, "learning_rate": 0.00019180954507100738, "loss": 11.6757, "step": 18618 }, { "epoch": 0.38974713221133717, "grad_norm": 0.2677544355392456, "learning_rate": 0.0001918086760180103, "loss": 11.6804, "step": 18619 }, { "epoch": 0.3897680649752993, "grad_norm": 0.26489830017089844, "learning_rate": 0.00019180780692087902, "loss": 11.6864, "step": 18620 }, { "epoch": 0.3897889977392615, "grad_norm": 0.2744029462337494, "learning_rate": 0.00019180693777961385, "loss": 11.6588, "step": 18621 }, { "epoch": 0.38980993050322366, "grad_norm": 0.2907766103744507, "learning_rate": 0.00019180606859421525, "loss": 11.6675, "step": 18622 }, { "epoch": 0.3898308632671858, "grad_norm": 0.28637003898620605, "learning_rate": 0.00019180519936468367, "loss": 11.6832, "step": 18623 }, { "epoch": 0.38985179603114795, "grad_norm": 0.2875422239303589, "learning_rate": 0.00019180433009101947, "loss": 11.6726, "step": 18624 }, { "epoch": 0.3898727287951101, "grad_norm": 0.29558566212654114, "learning_rate": 0.00019180346077322304, "loss": 11.6592, "step": 18625 }, { "epoch": 0.38989366155907224, "grad_norm": 0.2959875464439392, "learning_rate": 0.00019180259141129491, "loss": 11.6808, "step": 18626 }, { "epoch": 0.38991459432303444, "grad_norm": 0.3875056207180023, "learning_rate": 0.0001918017220052354, "loss": 11.6619, "step": 18627 }, { "epoch": 0.3899355270869966, "grad_norm": 0.2961406707763672, "learning_rate": 0.00019180085255504495, "loss": 11.6763, "step": 18628 }, { "epoch": 0.3899564598509587, "grad_norm": 0.28333449363708496, "learning_rate": 0.00019179998306072404, "loss": 11.672, "step": 18629 }, { "epoch": 0.38997739261492087, "grad_norm": 0.2608977258205414, "learning_rate": 0.000191799113522273, "loss": 11.6521, "step": 18630 }, { "epoch": 0.389998325378883, "grad_norm": 0.3224067687988281, "learning_rate": 0.0001917982439396923, "loss": 11.6802, "step": 18631 }, { "epoch": 0.39001925814284516, "grad_norm": 0.2819446623325348, "learning_rate": 0.00019179737431298234, "loss": 11.6674, "step": 18632 }, { "epoch": 0.39004019090680736, "grad_norm": 0.27086758613586426, "learning_rate": 0.00019179650464214355, "loss": 11.6761, "step": 18633 }, { "epoch": 0.3900611236707695, "grad_norm": 0.2973104417324066, "learning_rate": 0.00019179563492717635, "loss": 11.672, "step": 18634 }, { "epoch": 0.39008205643473165, "grad_norm": 0.26747986674308777, "learning_rate": 0.0001917947651680811, "loss": 11.6792, "step": 18635 }, { "epoch": 0.3901029891986938, "grad_norm": 0.29057547450065613, "learning_rate": 0.00019179389536485832, "loss": 11.6702, "step": 18636 }, { "epoch": 0.39012392196265594, "grad_norm": 0.29172268509864807, "learning_rate": 0.00019179302551750835, "loss": 11.6601, "step": 18637 }, { "epoch": 0.3901448547266181, "grad_norm": 0.32646334171295166, "learning_rate": 0.00019179215562603163, "loss": 11.6694, "step": 18638 }, { "epoch": 0.39016578749058023, "grad_norm": 0.34850454330444336, "learning_rate": 0.00019179128569042858, "loss": 11.679, "step": 18639 }, { "epoch": 0.39018672025454243, "grad_norm": 0.2741352617740631, "learning_rate": 0.00019179041571069965, "loss": 11.6756, "step": 18640 }, { "epoch": 0.3902076530185046, "grad_norm": 0.2454475611448288, "learning_rate": 0.0001917895456868452, "loss": 11.666, "step": 18641 }, { "epoch": 0.3902285857824667, "grad_norm": 0.25414666533470154, "learning_rate": 0.00019178867561886568, "loss": 11.6944, "step": 18642 }, { "epoch": 0.39024951854642886, "grad_norm": 0.2626791000366211, "learning_rate": 0.0001917878055067615, "loss": 11.6736, "step": 18643 }, { "epoch": 0.390270451310391, "grad_norm": 0.26652535796165466, "learning_rate": 0.00019178693535053308, "loss": 11.6642, "step": 18644 }, { "epoch": 0.39029138407435315, "grad_norm": 0.34225547313690186, "learning_rate": 0.00019178606515018087, "loss": 11.6825, "step": 18645 }, { "epoch": 0.39031231683831535, "grad_norm": 0.3166179358959198, "learning_rate": 0.00019178519490570524, "loss": 11.6841, "step": 18646 }, { "epoch": 0.3903332496022775, "grad_norm": 0.33072710037231445, "learning_rate": 0.00019178432461710662, "loss": 11.6768, "step": 18647 }, { "epoch": 0.39035418236623964, "grad_norm": 0.29412490129470825, "learning_rate": 0.00019178345428438545, "loss": 11.6951, "step": 18648 }, { "epoch": 0.3903751151302018, "grad_norm": 0.27098116278648376, "learning_rate": 0.00019178258390754212, "loss": 11.6746, "step": 18649 }, { "epoch": 0.39039604789416393, "grad_norm": 0.287431538105011, "learning_rate": 0.00019178171348657706, "loss": 11.6715, "step": 18650 }, { "epoch": 0.3904169806581261, "grad_norm": 0.30295923352241516, "learning_rate": 0.00019178084302149068, "loss": 11.6552, "step": 18651 }, { "epoch": 0.3904379134220883, "grad_norm": 0.29695048928260803, "learning_rate": 0.00019177997251228345, "loss": 11.6823, "step": 18652 }, { "epoch": 0.3904588461860504, "grad_norm": 0.19277071952819824, "learning_rate": 0.00019177910195895573, "loss": 11.6661, "step": 18653 }, { "epoch": 0.39047977895001257, "grad_norm": 0.2674564719200134, "learning_rate": 0.00019177823136150797, "loss": 11.6789, "step": 18654 }, { "epoch": 0.3905007117139747, "grad_norm": 0.3001430332660675, "learning_rate": 0.00019177736071994053, "loss": 11.6696, "step": 18655 }, { "epoch": 0.39052164447793686, "grad_norm": 0.2599974274635315, "learning_rate": 0.0001917764900342539, "loss": 11.6623, "step": 18656 }, { "epoch": 0.390542577241899, "grad_norm": 0.2708991467952728, "learning_rate": 0.00019177561930444848, "loss": 11.6662, "step": 18657 }, { "epoch": 0.39056351000586115, "grad_norm": 0.2309952974319458, "learning_rate": 0.0001917747485305247, "loss": 11.6707, "step": 18658 }, { "epoch": 0.39058444276982335, "grad_norm": 0.3073175549507141, "learning_rate": 0.0001917738777124829, "loss": 11.6821, "step": 18659 }, { "epoch": 0.3906053755337855, "grad_norm": 0.3258190155029297, "learning_rate": 0.0001917730068503236, "loss": 11.6801, "step": 18660 }, { "epoch": 0.39062630829774764, "grad_norm": 0.28456950187683105, "learning_rate": 0.00019177213594404716, "loss": 11.6985, "step": 18661 }, { "epoch": 0.3906472410617098, "grad_norm": 0.2701529860496521, "learning_rate": 0.00019177126499365403, "loss": 11.6654, "step": 18662 }, { "epoch": 0.3906681738256719, "grad_norm": 1.2780849933624268, "learning_rate": 0.0001917703939991446, "loss": 11.6534, "step": 18663 }, { "epoch": 0.39068910658963407, "grad_norm": 0.33429446816444397, "learning_rate": 0.00019176952296051934, "loss": 11.685, "step": 18664 }, { "epoch": 0.3907100393535963, "grad_norm": 0.33334222435951233, "learning_rate": 0.0001917686518777786, "loss": 11.6809, "step": 18665 }, { "epoch": 0.3907309721175584, "grad_norm": 0.3342861235141754, "learning_rate": 0.0001917677807509228, "loss": 11.6666, "step": 18666 }, { "epoch": 0.39075190488152056, "grad_norm": 0.24370408058166504, "learning_rate": 0.00019176690957995243, "loss": 11.6596, "step": 18667 }, { "epoch": 0.3907728376454827, "grad_norm": 0.2674315869808197, "learning_rate": 0.00019176603836486785, "loss": 11.6658, "step": 18668 }, { "epoch": 0.39079377040944485, "grad_norm": 0.22664444148540497, "learning_rate": 0.0001917651671056695, "loss": 11.6731, "step": 18669 }, { "epoch": 0.390814703173407, "grad_norm": 0.24108250439167023, "learning_rate": 0.00019176429580235782, "loss": 11.6836, "step": 18670 }, { "epoch": 0.3908356359373692, "grad_norm": 0.3010008633136749, "learning_rate": 0.00019176342445493316, "loss": 11.6754, "step": 18671 }, { "epoch": 0.39085656870133134, "grad_norm": 0.2643817961215973, "learning_rate": 0.00019176255306339602, "loss": 11.6649, "step": 18672 }, { "epoch": 0.3908775014652935, "grad_norm": 0.27398180961608887, "learning_rate": 0.00019176168162774677, "loss": 11.6659, "step": 18673 }, { "epoch": 0.39089843422925563, "grad_norm": 0.28745126724243164, "learning_rate": 0.00019176081014798583, "loss": 11.668, "step": 18674 }, { "epoch": 0.3909193669932178, "grad_norm": 0.2814176082611084, "learning_rate": 0.00019175993862411363, "loss": 11.6772, "step": 18675 }, { "epoch": 0.3909402997571799, "grad_norm": 0.9099265933036804, "learning_rate": 0.0001917590670561306, "loss": 11.7114, "step": 18676 }, { "epoch": 0.39096123252114207, "grad_norm": 0.272626668214798, "learning_rate": 0.00019175819544403714, "loss": 11.6604, "step": 18677 }, { "epoch": 0.39098216528510427, "grad_norm": 0.24249117076396942, "learning_rate": 0.00019175732378783368, "loss": 11.6818, "step": 18678 }, { "epoch": 0.3910030980490664, "grad_norm": 0.2952238917350769, "learning_rate": 0.00019175645208752064, "loss": 11.6643, "step": 18679 }, { "epoch": 0.39102403081302856, "grad_norm": 0.2901279926300049, "learning_rate": 0.00019175558034309844, "loss": 11.6829, "step": 18680 }, { "epoch": 0.3910449635769907, "grad_norm": 0.2888962924480438, "learning_rate": 0.00019175470855456748, "loss": 11.6731, "step": 18681 }, { "epoch": 0.39106589634095285, "grad_norm": 0.2670581042766571, "learning_rate": 0.0001917538367219282, "loss": 11.6818, "step": 18682 }, { "epoch": 0.391086829104915, "grad_norm": 0.2379913479089737, "learning_rate": 0.00019175296484518102, "loss": 11.6865, "step": 18683 }, { "epoch": 0.3911077618688772, "grad_norm": 0.24506334960460663, "learning_rate": 0.00019175209292432633, "loss": 11.6555, "step": 18684 }, { "epoch": 0.39112869463283934, "grad_norm": 0.31547728180885315, "learning_rate": 0.00019175122095936456, "loss": 11.6608, "step": 18685 }, { "epoch": 0.3911496273968015, "grad_norm": 0.41908442974090576, "learning_rate": 0.00019175034895029617, "loss": 11.6744, "step": 18686 }, { "epoch": 0.3911705601607636, "grad_norm": 0.25101661682128906, "learning_rate": 0.00019174947689712152, "loss": 11.6677, "step": 18687 }, { "epoch": 0.39119149292472577, "grad_norm": 0.3191535770893097, "learning_rate": 0.0001917486047998411, "loss": 11.6754, "step": 18688 }, { "epoch": 0.3912124256886879, "grad_norm": 0.2743088901042938, "learning_rate": 0.00019174773265845526, "loss": 11.6722, "step": 18689 }, { "epoch": 0.3912333584526501, "grad_norm": 0.2664576470851898, "learning_rate": 0.00019174686047296446, "loss": 11.6701, "step": 18690 }, { "epoch": 0.39125429121661226, "grad_norm": 0.29464998841285706, "learning_rate": 0.00019174598824336906, "loss": 11.6538, "step": 18691 }, { "epoch": 0.3912752239805744, "grad_norm": 0.2474200427532196, "learning_rate": 0.00019174511596966958, "loss": 11.6692, "step": 18692 }, { "epoch": 0.39129615674453655, "grad_norm": 0.2765200436115265, "learning_rate": 0.00019174424365186636, "loss": 11.6596, "step": 18693 }, { "epoch": 0.3913170895084987, "grad_norm": 0.27860739827156067, "learning_rate": 0.00019174337128995985, "loss": 11.6732, "step": 18694 }, { "epoch": 0.39133802227246084, "grad_norm": 0.28771528601646423, "learning_rate": 0.00019174249888395046, "loss": 11.6686, "step": 18695 }, { "epoch": 0.391358955036423, "grad_norm": 0.3361299932003021, "learning_rate": 0.0001917416264338386, "loss": 11.6972, "step": 18696 }, { "epoch": 0.3913798878003852, "grad_norm": 0.3178459703922272, "learning_rate": 0.0001917407539396247, "loss": 11.6636, "step": 18697 }, { "epoch": 0.39140082056434733, "grad_norm": 0.2638225555419922, "learning_rate": 0.0001917398814013092, "loss": 11.6775, "step": 18698 }, { "epoch": 0.3914217533283095, "grad_norm": 0.3264216482639313, "learning_rate": 0.00019173900881889252, "loss": 11.6807, "step": 18699 }, { "epoch": 0.3914426860922716, "grad_norm": 0.2968601882457733, "learning_rate": 0.00019173813619237503, "loss": 11.6815, "step": 18700 }, { "epoch": 0.39146361885623376, "grad_norm": 0.2814139425754547, "learning_rate": 0.0001917372635217572, "loss": 11.6782, "step": 18701 }, { "epoch": 0.3914845516201959, "grad_norm": 0.32398009300231934, "learning_rate": 0.0001917363908070394, "loss": 11.6867, "step": 18702 }, { "epoch": 0.3915054843841581, "grad_norm": 0.2412613183259964, "learning_rate": 0.00019173551804822208, "loss": 11.6743, "step": 18703 }, { "epoch": 0.39152641714812025, "grad_norm": 0.27247217297554016, "learning_rate": 0.00019173464524530566, "loss": 11.6893, "step": 18704 }, { "epoch": 0.3915473499120824, "grad_norm": 0.31375113129615784, "learning_rate": 0.0001917337723982906, "loss": 11.6851, "step": 18705 }, { "epoch": 0.39156828267604454, "grad_norm": 0.23678967356681824, "learning_rate": 0.00019173289950717723, "loss": 11.6602, "step": 18706 }, { "epoch": 0.3915892154400067, "grad_norm": 0.3748861253261566, "learning_rate": 0.00019173202657196602, "loss": 11.659, "step": 18707 }, { "epoch": 0.39161014820396883, "grad_norm": 0.28435343503952026, "learning_rate": 0.00019173115359265742, "loss": 11.684, "step": 18708 }, { "epoch": 0.391631080967931, "grad_norm": 0.32797911763191223, "learning_rate": 0.0001917302805692518, "loss": 11.6647, "step": 18709 }, { "epoch": 0.3916520137318932, "grad_norm": 0.25214049220085144, "learning_rate": 0.0001917294075017496, "loss": 11.6788, "step": 18710 }, { "epoch": 0.3916729464958553, "grad_norm": 0.2813698649406433, "learning_rate": 0.0001917285343901512, "loss": 11.6683, "step": 18711 }, { "epoch": 0.39169387925981747, "grad_norm": 0.2520250082015991, "learning_rate": 0.0001917276612344571, "loss": 11.6678, "step": 18712 }, { "epoch": 0.3917148120237796, "grad_norm": 0.29764989018440247, "learning_rate": 0.00019172678803466763, "loss": 11.6785, "step": 18713 }, { "epoch": 0.39173574478774176, "grad_norm": 0.331531286239624, "learning_rate": 0.0001917259147907833, "loss": 11.6923, "step": 18714 }, { "epoch": 0.3917566775517039, "grad_norm": 0.2684990167617798, "learning_rate": 0.00019172504150280446, "loss": 11.6699, "step": 18715 }, { "epoch": 0.3917776103156661, "grad_norm": 0.25391799211502075, "learning_rate": 0.00019172416817073157, "loss": 11.662, "step": 18716 }, { "epoch": 0.39179854307962825, "grad_norm": 0.2182372808456421, "learning_rate": 0.00019172329479456504, "loss": 11.6789, "step": 18717 }, { "epoch": 0.3918194758435904, "grad_norm": 0.2842418849468231, "learning_rate": 0.00019172242137430526, "loss": 11.6543, "step": 18718 }, { "epoch": 0.39184040860755254, "grad_norm": 0.2559264004230499, "learning_rate": 0.00019172154790995268, "loss": 11.6686, "step": 18719 }, { "epoch": 0.3918613413715147, "grad_norm": 0.2934030592441559, "learning_rate": 0.0001917206744015077, "loss": 11.6662, "step": 18720 }, { "epoch": 0.3918822741354768, "grad_norm": 0.318442702293396, "learning_rate": 0.00019171980084897077, "loss": 11.6854, "step": 18721 }, { "epoch": 0.391903206899439, "grad_norm": 0.32225170731544495, "learning_rate": 0.0001917189272523423, "loss": 11.6651, "step": 18722 }, { "epoch": 0.39192413966340117, "grad_norm": 0.22651837766170502, "learning_rate": 0.0001917180536116227, "loss": 11.6713, "step": 18723 }, { "epoch": 0.3919450724273633, "grad_norm": 0.29865869879722595, "learning_rate": 0.0001917171799268124, "loss": 11.6817, "step": 18724 }, { "epoch": 0.39196600519132546, "grad_norm": 0.2457541525363922, "learning_rate": 0.00019171630619791179, "loss": 11.6914, "step": 18725 }, { "epoch": 0.3919869379552876, "grad_norm": 0.2641479969024658, "learning_rate": 0.00019171543242492133, "loss": 11.6641, "step": 18726 }, { "epoch": 0.39200787071924975, "grad_norm": 0.2684461772441864, "learning_rate": 0.0001917145586078414, "loss": 11.669, "step": 18727 }, { "epoch": 0.3920288034832119, "grad_norm": 0.22840237617492676, "learning_rate": 0.0001917136847466725, "loss": 11.6684, "step": 18728 }, { "epoch": 0.3920497362471741, "grad_norm": 0.28408971428871155, "learning_rate": 0.00019171281084141495, "loss": 11.6848, "step": 18729 }, { "epoch": 0.39207066901113624, "grad_norm": 0.309507817029953, "learning_rate": 0.00019171193689206922, "loss": 11.6988, "step": 18730 }, { "epoch": 0.3920916017750984, "grad_norm": 0.25628983974456787, "learning_rate": 0.00019171106289863572, "loss": 11.6719, "step": 18731 }, { "epoch": 0.39211253453906053, "grad_norm": 0.28164756298065186, "learning_rate": 0.0001917101888611149, "loss": 11.672, "step": 18732 }, { "epoch": 0.3921334673030227, "grad_norm": 0.3365744352340698, "learning_rate": 0.00019170931477950712, "loss": 11.6559, "step": 18733 }, { "epoch": 0.3921544000669848, "grad_norm": 0.2535492479801178, "learning_rate": 0.00019170844065381285, "loss": 11.6725, "step": 18734 }, { "epoch": 0.392175332830947, "grad_norm": 0.2891874611377716, "learning_rate": 0.0001917075664840325, "loss": 11.6746, "step": 18735 }, { "epoch": 0.39219626559490917, "grad_norm": 0.2767772376537323, "learning_rate": 0.00019170669227016647, "loss": 11.6774, "step": 18736 }, { "epoch": 0.3922171983588713, "grad_norm": 0.26237040758132935, "learning_rate": 0.0001917058180122152, "loss": 11.6754, "step": 18737 }, { "epoch": 0.39223813112283346, "grad_norm": 0.21555331349372864, "learning_rate": 0.00019170494371017913, "loss": 11.6711, "step": 18738 }, { "epoch": 0.3922590638867956, "grad_norm": 0.3275409936904907, "learning_rate": 0.00019170406936405864, "loss": 11.6716, "step": 18739 }, { "epoch": 0.39227999665075775, "grad_norm": 0.31314370036125183, "learning_rate": 0.00019170319497385414, "loss": 11.6661, "step": 18740 }, { "epoch": 0.39230092941471995, "grad_norm": 0.33273211121559143, "learning_rate": 0.00019170232053956608, "loss": 11.6602, "step": 18741 }, { "epoch": 0.3923218621786821, "grad_norm": 0.4176032245159149, "learning_rate": 0.0001917014460611949, "loss": 11.6821, "step": 18742 }, { "epoch": 0.39234279494264424, "grad_norm": 0.3778906762599945, "learning_rate": 0.000191700571538741, "loss": 11.694, "step": 18743 }, { "epoch": 0.3923637277066064, "grad_norm": 0.25745511054992676, "learning_rate": 0.0001916996969722048, "loss": 11.6752, "step": 18744 }, { "epoch": 0.3923846604705685, "grad_norm": 0.3251444399356842, "learning_rate": 0.00019169882236158673, "loss": 11.6784, "step": 18745 }, { "epoch": 0.39240559323453067, "grad_norm": 0.3909689784049988, "learning_rate": 0.00019169794770688715, "loss": 11.6654, "step": 18746 }, { "epoch": 0.3924265259984928, "grad_norm": 0.3141250014305115, "learning_rate": 0.00019169707300810656, "loss": 11.6931, "step": 18747 }, { "epoch": 0.392447458762455, "grad_norm": 0.2894217371940613, "learning_rate": 0.00019169619826524533, "loss": 11.6731, "step": 18748 }, { "epoch": 0.39246839152641716, "grad_norm": 0.23735474050045013, "learning_rate": 0.00019169532347830393, "loss": 11.6657, "step": 18749 }, { "epoch": 0.3924893242903793, "grad_norm": 0.34305912256240845, "learning_rate": 0.00019169444864728275, "loss": 11.6672, "step": 18750 }, { "epoch": 0.39251025705434145, "grad_norm": 0.34731894731521606, "learning_rate": 0.0001916935737721822, "loss": 11.6895, "step": 18751 }, { "epoch": 0.3925311898183036, "grad_norm": 0.3390064537525177, "learning_rate": 0.00019169269885300268, "loss": 11.6626, "step": 18752 }, { "epoch": 0.39255212258226574, "grad_norm": 0.3068358302116394, "learning_rate": 0.00019169182388974467, "loss": 11.6752, "step": 18753 }, { "epoch": 0.39257305534622794, "grad_norm": 0.29249095916748047, "learning_rate": 0.00019169094888240858, "loss": 11.668, "step": 18754 }, { "epoch": 0.3925939881101901, "grad_norm": 0.30209049582481384, "learning_rate": 0.0001916900738309948, "loss": 11.6975, "step": 18755 }, { "epoch": 0.39261492087415223, "grad_norm": 0.21955934166908264, "learning_rate": 0.00019168919873550376, "loss": 11.6565, "step": 18756 }, { "epoch": 0.3926358536381144, "grad_norm": 0.2700099050998688, "learning_rate": 0.00019168832359593588, "loss": 11.6632, "step": 18757 }, { "epoch": 0.3926567864020765, "grad_norm": 1.7575018405914307, "learning_rate": 0.00019168744841229155, "loss": 11.6953, "step": 18758 }, { "epoch": 0.39267771916603866, "grad_norm": 0.3037737309932709, "learning_rate": 0.00019168657318457128, "loss": 11.6856, "step": 18759 }, { "epoch": 0.39269865193000086, "grad_norm": 0.3255596458911896, "learning_rate": 0.00019168569791277543, "loss": 11.6814, "step": 18760 }, { "epoch": 0.392719584693963, "grad_norm": 0.2971564829349518, "learning_rate": 0.00019168482259690443, "loss": 11.6718, "step": 18761 }, { "epoch": 0.39274051745792515, "grad_norm": 0.26950058341026306, "learning_rate": 0.00019168394723695868, "loss": 11.663, "step": 18762 }, { "epoch": 0.3927614502218873, "grad_norm": 0.30716365575790405, "learning_rate": 0.00019168307183293863, "loss": 11.6753, "step": 18763 }, { "epoch": 0.39278238298584944, "grad_norm": 0.26905620098114014, "learning_rate": 0.00019168219638484468, "loss": 11.6821, "step": 18764 }, { "epoch": 0.3928033157498116, "grad_norm": 0.29528486728668213, "learning_rate": 0.00019168132089267728, "loss": 11.661, "step": 18765 }, { "epoch": 0.39282424851377373, "grad_norm": 0.2484077662229538, "learning_rate": 0.00019168044535643682, "loss": 11.6761, "step": 18766 }, { "epoch": 0.39284518127773593, "grad_norm": 0.31749215722084045, "learning_rate": 0.00019167956977612374, "loss": 11.6763, "step": 18767 }, { "epoch": 0.3928661140416981, "grad_norm": 0.26568126678466797, "learning_rate": 0.00019167869415173843, "loss": 11.6674, "step": 18768 }, { "epoch": 0.3928870468056602, "grad_norm": 0.26533618569374084, "learning_rate": 0.00019167781848328136, "loss": 11.6641, "step": 18769 }, { "epoch": 0.39290797956962237, "grad_norm": 0.37285923957824707, "learning_rate": 0.0001916769427707529, "loss": 11.6676, "step": 18770 }, { "epoch": 0.3929289123335845, "grad_norm": 0.3010789453983307, "learning_rate": 0.00019167606701415353, "loss": 11.6925, "step": 18771 }, { "epoch": 0.39294984509754666, "grad_norm": 0.2692829370498657, "learning_rate": 0.00019167519121348362, "loss": 11.6669, "step": 18772 }, { "epoch": 0.39297077786150886, "grad_norm": 0.30240583419799805, "learning_rate": 0.00019167431536874362, "loss": 11.6575, "step": 18773 }, { "epoch": 0.392991710625471, "grad_norm": 0.21910914778709412, "learning_rate": 0.0001916734394799339, "loss": 11.6477, "step": 18774 }, { "epoch": 0.39301264338943315, "grad_norm": 0.27084946632385254, "learning_rate": 0.00019167256354705496, "loss": 11.6707, "step": 18775 }, { "epoch": 0.3930335761533953, "grad_norm": 0.25755393505096436, "learning_rate": 0.00019167168757010716, "loss": 11.6674, "step": 18776 }, { "epoch": 0.39305450891735744, "grad_norm": 0.3137299418449402, "learning_rate": 0.00019167081154909096, "loss": 11.6732, "step": 18777 }, { "epoch": 0.3930754416813196, "grad_norm": 0.27592021226882935, "learning_rate": 0.00019166993548400677, "loss": 11.6662, "step": 18778 }, { "epoch": 0.3930963744452818, "grad_norm": 0.25512129068374634, "learning_rate": 0.00019166905937485496, "loss": 11.6807, "step": 18779 }, { "epoch": 0.3931173072092439, "grad_norm": 0.28077879548072815, "learning_rate": 0.00019166818322163603, "loss": 11.6851, "step": 18780 }, { "epoch": 0.39313823997320607, "grad_norm": 0.3624906837940216, "learning_rate": 0.00019166730702435038, "loss": 11.6875, "step": 18781 }, { "epoch": 0.3931591727371682, "grad_norm": 0.2784394323825836, "learning_rate": 0.0001916664307829984, "loss": 11.6789, "step": 18782 }, { "epoch": 0.39318010550113036, "grad_norm": 0.3603552281856537, "learning_rate": 0.00019166555449758054, "loss": 11.6714, "step": 18783 }, { "epoch": 0.3932010382650925, "grad_norm": 0.5993039011955261, "learning_rate": 0.0001916646781680972, "loss": 11.585, "step": 18784 }, { "epoch": 0.39322197102905465, "grad_norm": 0.2707367539405823, "learning_rate": 0.00019166380179454882, "loss": 11.6842, "step": 18785 }, { "epoch": 0.39324290379301685, "grad_norm": 0.28192436695098877, "learning_rate": 0.0001916629253769358, "loss": 11.6816, "step": 18786 }, { "epoch": 0.393263836556979, "grad_norm": 0.40387675166130066, "learning_rate": 0.00019166204891525859, "loss": 11.6918, "step": 18787 }, { "epoch": 0.39328476932094114, "grad_norm": 0.33814650774002075, "learning_rate": 0.0001916611724095176, "loss": 11.6647, "step": 18788 }, { "epoch": 0.3933057020849033, "grad_norm": 0.290018230676651, "learning_rate": 0.0001916602958597132, "loss": 11.6638, "step": 18789 }, { "epoch": 0.39332663484886543, "grad_norm": 0.31961458921432495, "learning_rate": 0.00019165941926584595, "loss": 11.6784, "step": 18790 }, { "epoch": 0.3933475676128276, "grad_norm": 0.2756035029888153, "learning_rate": 0.00019165854262791608, "loss": 11.6672, "step": 18791 }, { "epoch": 0.3933685003767898, "grad_norm": 0.22995269298553467, "learning_rate": 0.0001916576659459242, "loss": 11.6572, "step": 18792 }, { "epoch": 0.3933894331407519, "grad_norm": 0.21790024638175964, "learning_rate": 0.0001916567892198706, "loss": 11.6642, "step": 18793 }, { "epoch": 0.39341036590471407, "grad_norm": 0.34275639057159424, "learning_rate": 0.00019165591244975572, "loss": 11.6821, "step": 18794 }, { "epoch": 0.3934312986686762, "grad_norm": 0.2550158202648163, "learning_rate": 0.00019165503563558004, "loss": 11.664, "step": 18795 }, { "epoch": 0.39345223143263836, "grad_norm": 0.3193528950214386, "learning_rate": 0.00019165415877734395, "loss": 11.6702, "step": 18796 }, { "epoch": 0.3934731641966005, "grad_norm": 0.3091302812099457, "learning_rate": 0.00019165328187504784, "loss": 11.6806, "step": 18797 }, { "epoch": 0.3934940969605627, "grad_norm": 0.28279662132263184, "learning_rate": 0.0001916524049286922, "loss": 11.6801, "step": 18798 }, { "epoch": 0.39351502972452485, "grad_norm": 0.3049365282058716, "learning_rate": 0.0001916515279382774, "loss": 11.6844, "step": 18799 }, { "epoch": 0.393535962488487, "grad_norm": 0.3188205659389496, "learning_rate": 0.00019165065090380386, "loss": 11.6512, "step": 18800 }, { "epoch": 0.39355689525244913, "grad_norm": 0.28499656915664673, "learning_rate": 0.00019164977382527202, "loss": 11.6763, "step": 18801 }, { "epoch": 0.3935778280164113, "grad_norm": 0.41971611976623535, "learning_rate": 0.0001916488967026823, "loss": 11.6786, "step": 18802 }, { "epoch": 0.3935987607803734, "grad_norm": 0.24853043258190155, "learning_rate": 0.0001916480195360351, "loss": 11.6662, "step": 18803 }, { "epoch": 0.39361969354433557, "grad_norm": 0.2768370509147644, "learning_rate": 0.00019164714232533089, "loss": 11.6737, "step": 18804 }, { "epoch": 0.39364062630829777, "grad_norm": 0.29007184505462646, "learning_rate": 0.00019164626507057005, "loss": 11.6547, "step": 18805 }, { "epoch": 0.3936615590722599, "grad_norm": 0.3749313950538635, "learning_rate": 0.000191645387771753, "loss": 11.6863, "step": 18806 }, { "epoch": 0.39368249183622206, "grad_norm": 0.2936974763870239, "learning_rate": 0.0001916445104288802, "loss": 11.679, "step": 18807 }, { "epoch": 0.3937034246001842, "grad_norm": 0.26684653759002686, "learning_rate": 0.00019164363304195202, "loss": 11.6711, "step": 18808 }, { "epoch": 0.39372435736414635, "grad_norm": 0.29461076855659485, "learning_rate": 0.00019164275561096893, "loss": 11.6809, "step": 18809 }, { "epoch": 0.3937452901281085, "grad_norm": 0.32378053665161133, "learning_rate": 0.00019164187813593134, "loss": 11.6706, "step": 18810 }, { "epoch": 0.3937662228920707, "grad_norm": 0.3695918619632721, "learning_rate": 0.00019164100061683966, "loss": 11.6774, "step": 18811 }, { "epoch": 0.39378715565603284, "grad_norm": 0.2713177502155304, "learning_rate": 0.0001916401230536943, "loss": 11.6572, "step": 18812 }, { "epoch": 0.393808088419995, "grad_norm": 0.30286046862602234, "learning_rate": 0.0001916392454464957, "loss": 11.6709, "step": 18813 }, { "epoch": 0.39382902118395713, "grad_norm": 0.2863549590110779, "learning_rate": 0.00019163836779524424, "loss": 11.6783, "step": 18814 }, { "epoch": 0.3938499539479193, "grad_norm": 0.3213571012020111, "learning_rate": 0.00019163749009994043, "loss": 11.6653, "step": 18815 }, { "epoch": 0.3938708867118814, "grad_norm": 0.2692621052265167, "learning_rate": 0.00019163661236058462, "loss": 11.6627, "step": 18816 }, { "epoch": 0.39389181947584356, "grad_norm": 0.22315293550491333, "learning_rate": 0.00019163573457717727, "loss": 11.6725, "step": 18817 }, { "epoch": 0.39391275223980576, "grad_norm": 0.29214224219322205, "learning_rate": 0.00019163485674971877, "loss": 11.674, "step": 18818 }, { "epoch": 0.3939336850037679, "grad_norm": 0.26995161175727844, "learning_rate": 0.0001916339788782096, "loss": 11.672, "step": 18819 }, { "epoch": 0.39395461776773005, "grad_norm": 0.28652939200401306, "learning_rate": 0.0001916331009626501, "loss": 11.6693, "step": 18820 }, { "epoch": 0.3939755505316922, "grad_norm": 0.31569352746009827, "learning_rate": 0.00019163222300304074, "loss": 11.6896, "step": 18821 }, { "epoch": 0.39399648329565434, "grad_norm": 0.22774454951286316, "learning_rate": 0.00019163134499938192, "loss": 11.6765, "step": 18822 }, { "epoch": 0.3940174160596165, "grad_norm": 0.29186105728149414, "learning_rate": 0.0001916304669516741, "loss": 11.6633, "step": 18823 }, { "epoch": 0.3940383488235787, "grad_norm": 0.2729244828224182, "learning_rate": 0.00019162958885991766, "loss": 11.6818, "step": 18824 }, { "epoch": 0.39405928158754083, "grad_norm": 0.2578391432762146, "learning_rate": 0.00019162871072411303, "loss": 11.6738, "step": 18825 }, { "epoch": 0.394080214351503, "grad_norm": 0.29569777846336365, "learning_rate": 0.00019162783254426068, "loss": 11.6727, "step": 18826 }, { "epoch": 0.3941011471154651, "grad_norm": 0.31957703828811646, "learning_rate": 0.000191626954320361, "loss": 11.7023, "step": 18827 }, { "epoch": 0.39412207987942727, "grad_norm": 0.2219352275133133, "learning_rate": 0.00019162607605241436, "loss": 11.6606, "step": 18828 }, { "epoch": 0.3941430126433894, "grad_norm": 0.3232387602329254, "learning_rate": 0.00019162519774042127, "loss": 11.6773, "step": 18829 }, { "epoch": 0.3941639454073516, "grad_norm": 0.23121224343776703, "learning_rate": 0.00019162431938438208, "loss": 11.6674, "step": 18830 }, { "epoch": 0.39418487817131376, "grad_norm": 0.37593594193458557, "learning_rate": 0.00019162344098429726, "loss": 11.6857, "step": 18831 }, { "epoch": 0.3942058109352759, "grad_norm": 0.24500465393066406, "learning_rate": 0.00019162256254016722, "loss": 11.6625, "step": 18832 }, { "epoch": 0.39422674369923805, "grad_norm": 0.3146151900291443, "learning_rate": 0.00019162168405199235, "loss": 11.6846, "step": 18833 }, { "epoch": 0.3942476764632002, "grad_norm": 0.2190091907978058, "learning_rate": 0.00019162080551977313, "loss": 11.6752, "step": 18834 }, { "epoch": 0.39426860922716234, "grad_norm": 0.25138410925865173, "learning_rate": 0.00019161992694350996, "loss": 11.6757, "step": 18835 }, { "epoch": 0.3942895419911245, "grad_norm": 0.2757253050804138, "learning_rate": 0.0001916190483232032, "loss": 11.6698, "step": 18836 }, { "epoch": 0.3943104747550867, "grad_norm": 0.38136324286460876, "learning_rate": 0.0001916181696588534, "loss": 11.6768, "step": 18837 }, { "epoch": 0.3943314075190488, "grad_norm": 0.2651193141937256, "learning_rate": 0.00019161729095046087, "loss": 11.6806, "step": 18838 }, { "epoch": 0.39435234028301097, "grad_norm": 0.25675880908966064, "learning_rate": 0.00019161641219802607, "loss": 11.6788, "step": 18839 }, { "epoch": 0.3943732730469731, "grad_norm": 0.3223661482334137, "learning_rate": 0.00019161553340154943, "loss": 11.6811, "step": 18840 }, { "epoch": 0.39439420581093526, "grad_norm": 0.22730429470539093, "learning_rate": 0.00019161465456103138, "loss": 11.659, "step": 18841 }, { "epoch": 0.3944151385748974, "grad_norm": 0.3102183938026428, "learning_rate": 0.0001916137756764723, "loss": 11.6784, "step": 18842 }, { "epoch": 0.3944360713388596, "grad_norm": 0.32899776101112366, "learning_rate": 0.00019161289674787265, "loss": 11.6462, "step": 18843 }, { "epoch": 0.39445700410282175, "grad_norm": 0.2733626961708069, "learning_rate": 0.00019161201777523289, "loss": 11.6706, "step": 18844 }, { "epoch": 0.3944779368667839, "grad_norm": 0.2493123859167099, "learning_rate": 0.00019161113875855335, "loss": 11.6736, "step": 18845 }, { "epoch": 0.39449886963074604, "grad_norm": 0.23537766933441162, "learning_rate": 0.00019161025969783452, "loss": 11.6762, "step": 18846 }, { "epoch": 0.3945198023947082, "grad_norm": 0.34008172154426575, "learning_rate": 0.00019160938059307678, "loss": 11.6758, "step": 18847 }, { "epoch": 0.39454073515867033, "grad_norm": 0.2572660744190216, "learning_rate": 0.0001916085014442806, "loss": 11.6543, "step": 18848 }, { "epoch": 0.39456166792263253, "grad_norm": 0.3620525300502777, "learning_rate": 0.00019160762225144638, "loss": 11.6689, "step": 18849 }, { "epoch": 0.3945826006865947, "grad_norm": 0.2653718888759613, "learning_rate": 0.00019160674301457452, "loss": 11.6679, "step": 18850 }, { "epoch": 0.3946035334505568, "grad_norm": 0.34069377183914185, "learning_rate": 0.00019160586373366547, "loss": 11.6649, "step": 18851 }, { "epoch": 0.39462446621451897, "grad_norm": 0.26975393295288086, "learning_rate": 0.00019160498440871964, "loss": 11.6621, "step": 18852 }, { "epoch": 0.3946453989784811, "grad_norm": 0.41317835450172424, "learning_rate": 0.00019160410503973746, "loss": 11.6821, "step": 18853 }, { "epoch": 0.39466633174244325, "grad_norm": 0.27313560247421265, "learning_rate": 0.00019160322562671936, "loss": 11.6722, "step": 18854 }, { "epoch": 0.3946872645064054, "grad_norm": 0.2928408682346344, "learning_rate": 0.00019160234616966574, "loss": 11.6631, "step": 18855 }, { "epoch": 0.3947081972703676, "grad_norm": 0.23220187425613403, "learning_rate": 0.00019160146666857703, "loss": 11.6654, "step": 18856 }, { "epoch": 0.39472913003432974, "grad_norm": 0.29116982221603394, "learning_rate": 0.0001916005871234537, "loss": 11.6932, "step": 18857 }, { "epoch": 0.3947500627982919, "grad_norm": 0.3145011365413666, "learning_rate": 0.00019159970753429608, "loss": 11.6737, "step": 18858 }, { "epoch": 0.39477099556225403, "grad_norm": 0.3358250856399536, "learning_rate": 0.00019159882790110468, "loss": 11.6831, "step": 18859 }, { "epoch": 0.3947919283262162, "grad_norm": 0.325153648853302, "learning_rate": 0.0001915979482238799, "loss": 11.6721, "step": 18860 }, { "epoch": 0.3948128610901783, "grad_norm": 0.26483479142189026, "learning_rate": 0.00019159706850262212, "loss": 11.662, "step": 18861 }, { "epoch": 0.3948337938541405, "grad_norm": 0.30138254165649414, "learning_rate": 0.0001915961887373318, "loss": 11.6704, "step": 18862 }, { "epoch": 0.39485472661810267, "grad_norm": 0.27600201964378357, "learning_rate": 0.00019159530892800937, "loss": 11.6722, "step": 18863 }, { "epoch": 0.3948756593820648, "grad_norm": 0.2895861566066742, "learning_rate": 0.0001915944290746552, "loss": 11.671, "step": 18864 }, { "epoch": 0.39489659214602696, "grad_norm": 0.3652314245700836, "learning_rate": 0.0001915935491772698, "loss": 11.6662, "step": 18865 }, { "epoch": 0.3949175249099891, "grad_norm": 0.2713547945022583, "learning_rate": 0.00019159266923585353, "loss": 11.6774, "step": 18866 }, { "epoch": 0.39493845767395125, "grad_norm": 0.28172919154167175, "learning_rate": 0.00019159178925040683, "loss": 11.6594, "step": 18867 }, { "epoch": 0.39495939043791345, "grad_norm": 0.33334478735923767, "learning_rate": 0.0001915909092209301, "loss": 11.6722, "step": 18868 }, { "epoch": 0.3949803232018756, "grad_norm": 0.3080471158027649, "learning_rate": 0.0001915900291474238, "loss": 11.6763, "step": 18869 }, { "epoch": 0.39500125596583774, "grad_norm": 0.3011902868747711, "learning_rate": 0.00019158914902988835, "loss": 11.6852, "step": 18870 }, { "epoch": 0.3950221887297999, "grad_norm": 0.2799787223339081, "learning_rate": 0.00019158826886832414, "loss": 11.6721, "step": 18871 }, { "epoch": 0.39504312149376203, "grad_norm": 0.34564319252967834, "learning_rate": 0.00019158738866273162, "loss": 11.6577, "step": 18872 }, { "epoch": 0.3950640542577242, "grad_norm": 0.2797090411186218, "learning_rate": 0.00019158650841311122, "loss": 11.6886, "step": 18873 }, { "epoch": 0.3950849870216863, "grad_norm": 0.40491998195648193, "learning_rate": 0.00019158562811946332, "loss": 11.6623, "step": 18874 }, { "epoch": 0.3951059197856485, "grad_norm": 0.34412720799446106, "learning_rate": 0.0001915847477817884, "loss": 11.6885, "step": 18875 }, { "epoch": 0.39512685254961066, "grad_norm": 0.3878549039363861, "learning_rate": 0.00019158386740008684, "loss": 11.6706, "step": 18876 }, { "epoch": 0.3951477853135728, "grad_norm": 0.2107759416103363, "learning_rate": 0.0001915829869743591, "loss": 11.6713, "step": 18877 }, { "epoch": 0.39516871807753495, "grad_norm": 0.3046669363975525, "learning_rate": 0.00019158210650460557, "loss": 11.658, "step": 18878 }, { "epoch": 0.3951896508414971, "grad_norm": 0.3190741539001465, "learning_rate": 0.0001915812259908267, "loss": 11.6562, "step": 18879 }, { "epoch": 0.39521058360545924, "grad_norm": 0.3901616334915161, "learning_rate": 0.00019158034543302288, "loss": 11.6736, "step": 18880 }, { "epoch": 0.39523151636942144, "grad_norm": 0.28669479489326477, "learning_rate": 0.00019157946483119455, "loss": 11.6837, "step": 18881 }, { "epoch": 0.3952524491333836, "grad_norm": 0.25989454984664917, "learning_rate": 0.00019157858418534216, "loss": 11.6759, "step": 18882 }, { "epoch": 0.39527338189734573, "grad_norm": 0.26249685883522034, "learning_rate": 0.00019157770349546608, "loss": 11.6551, "step": 18883 }, { "epoch": 0.3952943146613079, "grad_norm": 0.382722407579422, "learning_rate": 0.0001915768227615668, "loss": 11.6711, "step": 18884 }, { "epoch": 0.39531524742527, "grad_norm": 0.30978745222091675, "learning_rate": 0.00019157594198364466, "loss": 11.6707, "step": 18885 }, { "epoch": 0.39533618018923217, "grad_norm": 0.3223610818386078, "learning_rate": 0.00019157506116170017, "loss": 11.6743, "step": 18886 }, { "epoch": 0.39535711295319437, "grad_norm": 0.2749709486961365, "learning_rate": 0.00019157418029573368, "loss": 11.6607, "step": 18887 }, { "epoch": 0.3953780457171565, "grad_norm": 0.2700234651565552, "learning_rate": 0.00019157329938574566, "loss": 11.6548, "step": 18888 }, { "epoch": 0.39539897848111866, "grad_norm": 0.3144732415676117, "learning_rate": 0.00019157241843173654, "loss": 11.6747, "step": 18889 }, { "epoch": 0.3954199112450808, "grad_norm": 0.2569822371006012, "learning_rate": 0.0001915715374337067, "loss": 11.6564, "step": 18890 }, { "epoch": 0.39544084400904295, "grad_norm": 0.27990013360977173, "learning_rate": 0.0001915706563916566, "loss": 11.6731, "step": 18891 }, { "epoch": 0.3954617767730051, "grad_norm": 0.3093789517879486, "learning_rate": 0.00019156977530558665, "loss": 11.6862, "step": 18892 }, { "epoch": 0.39548270953696724, "grad_norm": 0.3443552851676941, "learning_rate": 0.00019156889417549727, "loss": 11.6846, "step": 18893 }, { "epoch": 0.39550364230092944, "grad_norm": 0.28803253173828125, "learning_rate": 0.00019156801300138887, "loss": 11.6591, "step": 18894 }, { "epoch": 0.3955245750648916, "grad_norm": 0.2545841932296753, "learning_rate": 0.00019156713178326192, "loss": 11.6747, "step": 18895 }, { "epoch": 0.3955455078288537, "grad_norm": 0.30948689579963684, "learning_rate": 0.0001915662505211168, "loss": 11.6832, "step": 18896 }, { "epoch": 0.39556644059281587, "grad_norm": 0.2921221852302551, "learning_rate": 0.00019156536921495393, "loss": 11.6733, "step": 18897 }, { "epoch": 0.395587373356778, "grad_norm": 0.24100780487060547, "learning_rate": 0.00019156448786477378, "loss": 11.6693, "step": 18898 }, { "epoch": 0.39560830612074016, "grad_norm": 0.3934009075164795, "learning_rate": 0.00019156360647057673, "loss": 11.6815, "step": 18899 }, { "epoch": 0.39562923888470236, "grad_norm": 0.33336836099624634, "learning_rate": 0.00019156272503236321, "loss": 11.6639, "step": 18900 }, { "epoch": 0.3956501716486645, "grad_norm": 0.2583090662956238, "learning_rate": 0.00019156184355013367, "loss": 11.6849, "step": 18901 }, { "epoch": 0.39567110441262665, "grad_norm": 0.31983721256256104, "learning_rate": 0.00019156096202388852, "loss": 11.6744, "step": 18902 }, { "epoch": 0.3956920371765888, "grad_norm": 0.33659884333610535, "learning_rate": 0.00019156008045362818, "loss": 11.6877, "step": 18903 }, { "epoch": 0.39571296994055094, "grad_norm": 0.3243785500526428, "learning_rate": 0.00019155919883935308, "loss": 11.6767, "step": 18904 }, { "epoch": 0.3957339027045131, "grad_norm": 0.30144771933555603, "learning_rate": 0.00019155831718106362, "loss": 11.6673, "step": 18905 }, { "epoch": 0.39575483546847523, "grad_norm": 0.2975177764892578, "learning_rate": 0.00019155743547876023, "loss": 11.6708, "step": 18906 }, { "epoch": 0.39577576823243743, "grad_norm": 0.3384913504123688, "learning_rate": 0.0001915565537324434, "loss": 11.6689, "step": 18907 }, { "epoch": 0.3957967009963996, "grad_norm": 0.3041480779647827, "learning_rate": 0.00019155567194211343, "loss": 11.6765, "step": 18908 }, { "epoch": 0.3958176337603617, "grad_norm": 0.2772604823112488, "learning_rate": 0.00019155479010777085, "loss": 11.6734, "step": 18909 }, { "epoch": 0.39583856652432386, "grad_norm": 0.3167145252227783, "learning_rate": 0.00019155390822941609, "loss": 11.6755, "step": 18910 }, { "epoch": 0.395859499288286, "grad_norm": 0.3062995970249176, "learning_rate": 0.0001915530263070495, "loss": 11.671, "step": 18911 }, { "epoch": 0.39588043205224815, "grad_norm": 0.2714833617210388, "learning_rate": 0.0001915521443406715, "loss": 11.6823, "step": 18912 }, { "epoch": 0.39590136481621035, "grad_norm": 0.2980509400367737, "learning_rate": 0.00019155126233028256, "loss": 11.6667, "step": 18913 }, { "epoch": 0.3959222975801725, "grad_norm": 0.3384474217891693, "learning_rate": 0.00019155038027588312, "loss": 11.6739, "step": 18914 }, { "epoch": 0.39594323034413464, "grad_norm": 0.29640617966651917, "learning_rate": 0.00019154949817747356, "loss": 11.6801, "step": 18915 }, { "epoch": 0.3959641631080968, "grad_norm": 0.29929712414741516, "learning_rate": 0.0001915486160350543, "loss": 11.6819, "step": 18916 }, { "epoch": 0.39598509587205893, "grad_norm": 0.2686936855316162, "learning_rate": 0.00019154773384862583, "loss": 11.6731, "step": 18917 }, { "epoch": 0.3960060286360211, "grad_norm": 0.292256236076355, "learning_rate": 0.0001915468516181885, "loss": 11.676, "step": 18918 }, { "epoch": 0.3960269613999833, "grad_norm": 0.34312763810157776, "learning_rate": 0.00019154596934374278, "loss": 11.6669, "step": 18919 }, { "epoch": 0.3960478941639454, "grad_norm": 0.2611798346042633, "learning_rate": 0.0001915450870252891, "loss": 11.6723, "step": 18920 }, { "epoch": 0.39606882692790757, "grad_norm": 0.32659974694252014, "learning_rate": 0.00019154420466282784, "loss": 11.6689, "step": 18921 }, { "epoch": 0.3960897596918697, "grad_norm": 0.4073292911052704, "learning_rate": 0.00019154332225635942, "loss": 11.6747, "step": 18922 }, { "epoch": 0.39611069245583186, "grad_norm": 0.31123456358909607, "learning_rate": 0.00019154243980588431, "loss": 11.6862, "step": 18923 }, { "epoch": 0.396131625219794, "grad_norm": 0.35198232531547546, "learning_rate": 0.00019154155731140293, "loss": 11.6695, "step": 18924 }, { "epoch": 0.39615255798375615, "grad_norm": 0.28562575578689575, "learning_rate": 0.00019154067477291568, "loss": 11.6845, "step": 18925 }, { "epoch": 0.39617349074771835, "grad_norm": 0.28040027618408203, "learning_rate": 0.000191539792190423, "loss": 11.659, "step": 18926 }, { "epoch": 0.3961944235116805, "grad_norm": 0.25618550181388855, "learning_rate": 0.00019153890956392528, "loss": 11.6687, "step": 18927 }, { "epoch": 0.39621535627564264, "grad_norm": 0.28431376814842224, "learning_rate": 0.000191538026893423, "loss": 11.6796, "step": 18928 }, { "epoch": 0.3962362890396048, "grad_norm": 0.2729148268699646, "learning_rate": 0.00019153714417891653, "loss": 11.6686, "step": 18929 }, { "epoch": 0.39625722180356693, "grad_norm": 0.3117508292198181, "learning_rate": 0.00019153626142040634, "loss": 11.6782, "step": 18930 }, { "epoch": 0.3962781545675291, "grad_norm": 0.3136942386627197, "learning_rate": 0.00019153537861789284, "loss": 11.6883, "step": 18931 }, { "epoch": 0.3962990873314913, "grad_norm": 0.2653926908969879, "learning_rate": 0.0001915344957713764, "loss": 11.6941, "step": 18932 }, { "epoch": 0.3963200200954534, "grad_norm": 0.3011096715927124, "learning_rate": 0.00019153361288085753, "loss": 11.6613, "step": 18933 }, { "epoch": 0.39634095285941556, "grad_norm": 0.3621598780155182, "learning_rate": 0.00019153272994633665, "loss": 11.6583, "step": 18934 }, { "epoch": 0.3963618856233777, "grad_norm": 0.2767977714538574, "learning_rate": 0.00019153184696781413, "loss": 11.6706, "step": 18935 }, { "epoch": 0.39638281838733985, "grad_norm": 0.3051969110965729, "learning_rate": 0.0001915309639452904, "loss": 11.6801, "step": 18936 }, { "epoch": 0.396403751151302, "grad_norm": 0.2584758996963501, "learning_rate": 0.0001915300808787659, "loss": 11.664, "step": 18937 }, { "epoch": 0.3964246839152642, "grad_norm": 0.2564418911933899, "learning_rate": 0.00019152919776824106, "loss": 11.6677, "step": 18938 }, { "epoch": 0.39644561667922634, "grad_norm": 0.3737073242664337, "learning_rate": 0.00019152831461371632, "loss": 11.6963, "step": 18939 }, { "epoch": 0.3964665494431885, "grad_norm": 0.26730895042419434, "learning_rate": 0.00019152743141519207, "loss": 11.6534, "step": 18940 }, { "epoch": 0.39648748220715063, "grad_norm": 0.2749719023704529, "learning_rate": 0.00019152654817266874, "loss": 11.6634, "step": 18941 }, { "epoch": 0.3965084149711128, "grad_norm": 0.4029703438282013, "learning_rate": 0.0001915256648861468, "loss": 11.6844, "step": 18942 }, { "epoch": 0.3965293477350749, "grad_norm": 0.3392343521118164, "learning_rate": 0.0001915247815556266, "loss": 11.6665, "step": 18943 }, { "epoch": 0.39655028049903707, "grad_norm": 0.2901957035064697, "learning_rate": 0.00019152389818110862, "loss": 11.6928, "step": 18944 }, { "epoch": 0.39657121326299927, "grad_norm": 0.24393406510353088, "learning_rate": 0.00019152301476259328, "loss": 11.6649, "step": 18945 }, { "epoch": 0.3965921460269614, "grad_norm": 0.2917926609516144, "learning_rate": 0.00019152213130008096, "loss": 11.6641, "step": 18946 }, { "epoch": 0.39661307879092356, "grad_norm": 0.28569188714027405, "learning_rate": 0.00019152124779357214, "loss": 11.678, "step": 18947 }, { "epoch": 0.3966340115548857, "grad_norm": 0.25431907176971436, "learning_rate": 0.00019152036424306722, "loss": 11.6681, "step": 18948 }, { "epoch": 0.39665494431884785, "grad_norm": 0.3107373118400574, "learning_rate": 0.00019151948064856665, "loss": 11.685, "step": 18949 }, { "epoch": 0.39667587708281, "grad_norm": 0.27832087874412537, "learning_rate": 0.00019151859701007078, "loss": 11.6664, "step": 18950 }, { "epoch": 0.3966968098467722, "grad_norm": 0.26547864079475403, "learning_rate": 0.0001915177133275801, "loss": 11.6709, "step": 18951 }, { "epoch": 0.39671774261073434, "grad_norm": 0.28355836868286133, "learning_rate": 0.00019151682960109504, "loss": 11.6641, "step": 18952 }, { "epoch": 0.3967386753746965, "grad_norm": 0.3120417594909668, "learning_rate": 0.00019151594583061602, "loss": 11.6705, "step": 18953 }, { "epoch": 0.3967596081386586, "grad_norm": 0.26655101776123047, "learning_rate": 0.00019151506201614343, "loss": 11.6802, "step": 18954 }, { "epoch": 0.39678054090262077, "grad_norm": 0.25582602620124817, "learning_rate": 0.00019151417815767768, "loss": 11.6761, "step": 18955 }, { "epoch": 0.3968014736665829, "grad_norm": 0.38074991106987, "learning_rate": 0.0001915132942552193, "loss": 11.6853, "step": 18956 }, { "epoch": 0.3968224064305451, "grad_norm": 0.2458983063697815, "learning_rate": 0.00019151241030876862, "loss": 11.6818, "step": 18957 }, { "epoch": 0.39684333919450726, "grad_norm": 0.3129073679447174, "learning_rate": 0.00019151152631832608, "loss": 11.6553, "step": 18958 }, { "epoch": 0.3968642719584694, "grad_norm": 0.3038809895515442, "learning_rate": 0.0001915106422838921, "loss": 11.6673, "step": 18959 }, { "epoch": 0.39688520472243155, "grad_norm": 0.2348141223192215, "learning_rate": 0.00019150975820546716, "loss": 11.695, "step": 18960 }, { "epoch": 0.3969061374863937, "grad_norm": 0.3074004054069519, "learning_rate": 0.0001915088740830516, "loss": 11.6821, "step": 18961 }, { "epoch": 0.39692707025035584, "grad_norm": 0.24404515326023102, "learning_rate": 0.0001915079899166459, "loss": 11.6725, "step": 18962 }, { "epoch": 0.396948003014318, "grad_norm": 0.253204882144928, "learning_rate": 0.0001915071057062505, "loss": 11.6692, "step": 18963 }, { "epoch": 0.3969689357782802, "grad_norm": 0.2854006588459015, "learning_rate": 0.00019150622145186578, "loss": 11.668, "step": 18964 }, { "epoch": 0.39698986854224233, "grad_norm": 0.257307231426239, "learning_rate": 0.0001915053371534922, "loss": 11.675, "step": 18965 }, { "epoch": 0.3970108013062045, "grad_norm": 0.2790180444717407, "learning_rate": 0.00019150445281113017, "loss": 11.6727, "step": 18966 }, { "epoch": 0.3970317340701666, "grad_norm": 0.2687712013721466, "learning_rate": 0.00019150356842478013, "loss": 11.6704, "step": 18967 }, { "epoch": 0.39705266683412876, "grad_norm": 0.34790554642677307, "learning_rate": 0.00019150268399444244, "loss": 11.6695, "step": 18968 }, { "epoch": 0.3970735995980909, "grad_norm": 0.3035637438297272, "learning_rate": 0.00019150179952011765, "loss": 11.6798, "step": 18969 }, { "epoch": 0.3970945323620531, "grad_norm": 0.2643408477306366, "learning_rate": 0.00019150091500180606, "loss": 11.6879, "step": 18970 }, { "epoch": 0.39711546512601525, "grad_norm": 0.3032224774360657, "learning_rate": 0.00019150003043950814, "loss": 11.6704, "step": 18971 }, { "epoch": 0.3971363978899774, "grad_norm": 0.2712016999721527, "learning_rate": 0.00019149914583322435, "loss": 11.6822, "step": 18972 }, { "epoch": 0.39715733065393954, "grad_norm": 0.2957266569137573, "learning_rate": 0.00019149826118295505, "loss": 11.6912, "step": 18973 }, { "epoch": 0.3971782634179017, "grad_norm": 0.2666143774986267, "learning_rate": 0.00019149737648870073, "loss": 11.642, "step": 18974 }, { "epoch": 0.39719919618186383, "grad_norm": 0.3839825391769409, "learning_rate": 0.00019149649175046181, "loss": 11.6764, "step": 18975 }, { "epoch": 0.39722012894582603, "grad_norm": 0.39043352007865906, "learning_rate": 0.00019149560696823866, "loss": 11.6778, "step": 18976 }, { "epoch": 0.3972410617097882, "grad_norm": 0.2466183453798294, "learning_rate": 0.00019149472214203175, "loss": 11.6757, "step": 18977 }, { "epoch": 0.3972619944737503, "grad_norm": 0.29865211248397827, "learning_rate": 0.00019149383727184148, "loss": 11.6701, "step": 18978 }, { "epoch": 0.39728292723771247, "grad_norm": 0.2755659222602844, "learning_rate": 0.0001914929523576683, "loss": 11.6771, "step": 18979 }, { "epoch": 0.3973038600016746, "grad_norm": 0.3506315350532532, "learning_rate": 0.00019149206739951262, "loss": 11.6817, "step": 18980 }, { "epoch": 0.39732479276563676, "grad_norm": 0.3033738434314728, "learning_rate": 0.00019149118239737486, "loss": 11.6648, "step": 18981 }, { "epoch": 0.3973457255295989, "grad_norm": 0.2888173758983612, "learning_rate": 0.0001914902973512555, "loss": 11.6726, "step": 18982 }, { "epoch": 0.3973666582935611, "grad_norm": 0.28425970673561096, "learning_rate": 0.00019148941226115486, "loss": 11.6598, "step": 18983 }, { "epoch": 0.39738759105752325, "grad_norm": 0.33520078659057617, "learning_rate": 0.00019148852712707346, "loss": 11.6792, "step": 18984 }, { "epoch": 0.3974085238214854, "grad_norm": 0.35289227962493896, "learning_rate": 0.0001914876419490117, "loss": 11.6694, "step": 18985 }, { "epoch": 0.39742945658544754, "grad_norm": 0.2258092314004898, "learning_rate": 0.00019148675672696997, "loss": 11.6746, "step": 18986 }, { "epoch": 0.3974503893494097, "grad_norm": 0.3276665210723877, "learning_rate": 0.00019148587146094873, "loss": 11.6673, "step": 18987 }, { "epoch": 0.3974713221133718, "grad_norm": 0.35502415895462036, "learning_rate": 0.0001914849861509484, "loss": 11.6732, "step": 18988 }, { "epoch": 0.39749225487733403, "grad_norm": 0.29181385040283203, "learning_rate": 0.00019148410079696942, "loss": 11.6694, "step": 18989 }, { "epoch": 0.3975131876412962, "grad_norm": 0.2442161589860916, "learning_rate": 0.00019148321539901218, "loss": 11.662, "step": 18990 }, { "epoch": 0.3975341204052583, "grad_norm": 0.31679829955101013, "learning_rate": 0.00019148232995707713, "loss": 11.6612, "step": 18991 }, { "epoch": 0.39755505316922046, "grad_norm": 0.27335983514785767, "learning_rate": 0.0001914814444711647, "loss": 11.6832, "step": 18992 }, { "epoch": 0.3975759859331826, "grad_norm": 0.26138028502464294, "learning_rate": 0.00019148055894127527, "loss": 11.6626, "step": 18993 }, { "epoch": 0.39759691869714475, "grad_norm": 0.3404761850833893, "learning_rate": 0.00019147967336740935, "loss": 11.6704, "step": 18994 }, { "epoch": 0.3976178514611069, "grad_norm": 0.3228626251220703, "learning_rate": 0.00019147878774956732, "loss": 11.6771, "step": 18995 }, { "epoch": 0.3976387842250691, "grad_norm": 0.36165040731430054, "learning_rate": 0.00019147790208774956, "loss": 11.6855, "step": 18996 }, { "epoch": 0.39765971698903124, "grad_norm": 0.2457798570394516, "learning_rate": 0.00019147701638195657, "loss": 11.6673, "step": 18997 }, { "epoch": 0.3976806497529934, "grad_norm": 0.2853536605834961, "learning_rate": 0.0001914761306321887, "loss": 11.6665, "step": 18998 }, { "epoch": 0.39770158251695553, "grad_norm": 0.34922751784324646, "learning_rate": 0.00019147524483844648, "loss": 11.6677, "step": 18999 }, { "epoch": 0.3977225152809177, "grad_norm": 0.2909804880619049, "learning_rate": 0.00019147435900073026, "loss": 11.6664, "step": 19000 }, { "epoch": 0.3977225152809177, "eval_loss": 11.673826217651367, "eval_runtime": 34.3518, "eval_samples_per_second": 27.975, "eval_steps_per_second": 7.016, "step": 19000 }, { "epoch": 0.3977434480448798, "grad_norm": 0.3336133062839508, "learning_rate": 0.00019147347311904047, "loss": 11.6565, "step": 19001 }, { "epoch": 0.397764380808842, "grad_norm": 0.27328330278396606, "learning_rate": 0.00019147258719337754, "loss": 11.6733, "step": 19002 }, { "epoch": 0.39778531357280417, "grad_norm": 0.3336730897426605, "learning_rate": 0.0001914717012237419, "loss": 11.6694, "step": 19003 }, { "epoch": 0.3978062463367663, "grad_norm": 0.31767475605010986, "learning_rate": 0.00019147081521013402, "loss": 11.682, "step": 19004 }, { "epoch": 0.39782717910072846, "grad_norm": 0.25440242886543274, "learning_rate": 0.00019146992915255426, "loss": 11.6705, "step": 19005 }, { "epoch": 0.3978481118646906, "grad_norm": 0.264981746673584, "learning_rate": 0.0001914690430510031, "loss": 11.6901, "step": 19006 }, { "epoch": 0.39786904462865275, "grad_norm": 0.4023740291595459, "learning_rate": 0.0001914681569054809, "loss": 11.6948, "step": 19007 }, { "epoch": 0.39788997739261495, "grad_norm": 0.3888242840766907, "learning_rate": 0.00019146727071598813, "loss": 11.6656, "step": 19008 }, { "epoch": 0.3979109101565771, "grad_norm": 0.26874685287475586, "learning_rate": 0.00019146638448252522, "loss": 11.659, "step": 19009 }, { "epoch": 0.39793184292053924, "grad_norm": 0.24100381135940552, "learning_rate": 0.00019146549820509262, "loss": 11.6662, "step": 19010 }, { "epoch": 0.3979527756845014, "grad_norm": 0.25892359018325806, "learning_rate": 0.00019146461188369066, "loss": 11.6917, "step": 19011 }, { "epoch": 0.3979737084484635, "grad_norm": 0.30286741256713867, "learning_rate": 0.00019146372551831985, "loss": 11.6581, "step": 19012 }, { "epoch": 0.39799464121242567, "grad_norm": 0.28320077061653137, "learning_rate": 0.00019146283910898059, "loss": 11.6675, "step": 19013 }, { "epoch": 0.3980155739763878, "grad_norm": 0.3137003183364868, "learning_rate": 0.00019146195265567334, "loss": 11.6606, "step": 19014 }, { "epoch": 0.39803650674035, "grad_norm": 0.28531453013420105, "learning_rate": 0.00019146106615839848, "loss": 11.6723, "step": 19015 }, { "epoch": 0.39805743950431216, "grad_norm": 0.28832054138183594, "learning_rate": 0.00019146017961715642, "loss": 11.6751, "step": 19016 }, { "epoch": 0.3980783722682743, "grad_norm": 0.27151572704315186, "learning_rate": 0.00019145929303194766, "loss": 11.6689, "step": 19017 }, { "epoch": 0.39809930503223645, "grad_norm": 0.25049328804016113, "learning_rate": 0.00019145840640277258, "loss": 11.6835, "step": 19018 }, { "epoch": 0.3981202377961986, "grad_norm": 0.27853837609291077, "learning_rate": 0.00019145751972963159, "loss": 11.6753, "step": 19019 }, { "epoch": 0.39814117056016074, "grad_norm": 0.2389262169599533, "learning_rate": 0.00019145663301252513, "loss": 11.6781, "step": 19020 }, { "epoch": 0.39816210332412294, "grad_norm": 0.3439844250679016, "learning_rate": 0.0001914557462514537, "loss": 11.6853, "step": 19021 }, { "epoch": 0.3981830360880851, "grad_norm": 0.28812751173973083, "learning_rate": 0.0001914548594464176, "loss": 11.6803, "step": 19022 }, { "epoch": 0.39820396885204723, "grad_norm": 0.24467416107654572, "learning_rate": 0.00019145397259741733, "loss": 11.6744, "step": 19023 }, { "epoch": 0.3982249016160094, "grad_norm": 0.3988525867462158, "learning_rate": 0.00019145308570445328, "loss": 11.666, "step": 19024 }, { "epoch": 0.3982458343799715, "grad_norm": 0.31526800990104675, "learning_rate": 0.00019145219876752593, "loss": 11.6652, "step": 19025 }, { "epoch": 0.39826676714393366, "grad_norm": 0.31921589374542236, "learning_rate": 0.00019145131178663566, "loss": 11.6858, "step": 19026 }, { "epoch": 0.39828769990789586, "grad_norm": 0.2702678143978119, "learning_rate": 0.00019145042476178295, "loss": 11.6825, "step": 19027 }, { "epoch": 0.398308632671858, "grad_norm": 0.310420960187912, "learning_rate": 0.00019144953769296814, "loss": 11.6773, "step": 19028 }, { "epoch": 0.39832956543582015, "grad_norm": 0.33155909180641174, "learning_rate": 0.00019144865058019173, "loss": 11.6697, "step": 19029 }, { "epoch": 0.3983504981997823, "grad_norm": 0.2563263475894928, "learning_rate": 0.00019144776342345412, "loss": 11.6822, "step": 19030 }, { "epoch": 0.39837143096374444, "grad_norm": 0.36229556798934937, "learning_rate": 0.0001914468762227557, "loss": 11.6882, "step": 19031 }, { "epoch": 0.3983923637277066, "grad_norm": 0.25592631101608276, "learning_rate": 0.00019144598897809697, "loss": 11.6774, "step": 19032 }, { "epoch": 0.39841329649166873, "grad_norm": 0.2874368727207184, "learning_rate": 0.0001914451016894783, "loss": 11.6683, "step": 19033 }, { "epoch": 0.39843422925563093, "grad_norm": 0.21999086439609528, "learning_rate": 0.00019144421435690016, "loss": 11.6632, "step": 19034 }, { "epoch": 0.3984551620195931, "grad_norm": 0.29298070073127747, "learning_rate": 0.00019144332698036297, "loss": 11.6542, "step": 19035 }, { "epoch": 0.3984760947835552, "grad_norm": 0.2512110769748688, "learning_rate": 0.0001914424395598671, "loss": 11.6625, "step": 19036 }, { "epoch": 0.39849702754751737, "grad_norm": 0.3079002797603607, "learning_rate": 0.00019144155209541305, "loss": 11.6796, "step": 19037 }, { "epoch": 0.3985179603114795, "grad_norm": 0.29463768005371094, "learning_rate": 0.00019144066458700118, "loss": 11.664, "step": 19038 }, { "epoch": 0.39853889307544166, "grad_norm": 0.2846987545490265, "learning_rate": 0.00019143977703463196, "loss": 11.6728, "step": 19039 }, { "epoch": 0.39855982583940386, "grad_norm": 0.2153492271900177, "learning_rate": 0.00019143888943830582, "loss": 11.672, "step": 19040 }, { "epoch": 0.398580758603366, "grad_norm": 0.26914188265800476, "learning_rate": 0.00019143800179802315, "loss": 11.6828, "step": 19041 }, { "epoch": 0.39860169136732815, "grad_norm": 0.2784264385700226, "learning_rate": 0.00019143711411378442, "loss": 11.683, "step": 19042 }, { "epoch": 0.3986226241312903, "grad_norm": 0.25446218252182007, "learning_rate": 0.00019143622638559006, "loss": 11.6775, "step": 19043 }, { "epoch": 0.39864355689525244, "grad_norm": 0.25983133912086487, "learning_rate": 0.00019143533861344043, "loss": 11.6672, "step": 19044 }, { "epoch": 0.3986644896592146, "grad_norm": 0.4459591507911682, "learning_rate": 0.00019143445079733602, "loss": 11.6657, "step": 19045 }, { "epoch": 0.3986854224231768, "grad_norm": 0.345823734998703, "learning_rate": 0.00019143356293727722, "loss": 11.674, "step": 19046 }, { "epoch": 0.3987063551871389, "grad_norm": 0.31216928362846375, "learning_rate": 0.0001914326750332645, "loss": 11.6731, "step": 19047 }, { "epoch": 0.3987272879511011, "grad_norm": 0.3149070739746094, "learning_rate": 0.00019143178708529824, "loss": 11.6743, "step": 19048 }, { "epoch": 0.3987482207150632, "grad_norm": 0.26567649841308594, "learning_rate": 0.0001914308990933789, "loss": 11.6602, "step": 19049 }, { "epoch": 0.39876915347902536, "grad_norm": 0.28025737404823303, "learning_rate": 0.00019143001105750688, "loss": 11.6729, "step": 19050 }, { "epoch": 0.3987900862429875, "grad_norm": 0.2963455617427826, "learning_rate": 0.00019142912297768263, "loss": 11.6512, "step": 19051 }, { "epoch": 0.39881101900694965, "grad_norm": 0.31389909982681274, "learning_rate": 0.00019142823485390658, "loss": 11.6827, "step": 19052 }, { "epoch": 0.39883195177091185, "grad_norm": 0.2729823589324951, "learning_rate": 0.00019142734668617912, "loss": 11.6585, "step": 19053 }, { "epoch": 0.398852884534874, "grad_norm": 0.3178640305995941, "learning_rate": 0.00019142645847450073, "loss": 11.6858, "step": 19054 }, { "epoch": 0.39887381729883614, "grad_norm": 0.25564295053482056, "learning_rate": 0.0001914255702188718, "loss": 11.6783, "step": 19055 }, { "epoch": 0.3988947500627983, "grad_norm": 0.30750828981399536, "learning_rate": 0.00019142468191929278, "loss": 11.682, "step": 19056 }, { "epoch": 0.39891568282676043, "grad_norm": 0.32312777638435364, "learning_rate": 0.00019142379357576402, "loss": 11.6765, "step": 19057 }, { "epoch": 0.3989366155907226, "grad_norm": 0.37383753061294556, "learning_rate": 0.00019142290518828607, "loss": 11.6699, "step": 19058 }, { "epoch": 0.3989575483546848, "grad_norm": 0.4092901945114136, "learning_rate": 0.00019142201675685929, "loss": 11.6708, "step": 19059 }, { "epoch": 0.3989784811186469, "grad_norm": 0.2304297387599945, "learning_rate": 0.00019142112828148412, "loss": 11.661, "step": 19060 }, { "epoch": 0.39899941388260907, "grad_norm": 0.31971535086631775, "learning_rate": 0.00019142023976216095, "loss": 11.6852, "step": 19061 }, { "epoch": 0.3990203466465712, "grad_norm": 0.25959011912345886, "learning_rate": 0.00019141935119889025, "loss": 11.6759, "step": 19062 }, { "epoch": 0.39904127941053336, "grad_norm": 0.2557916045188904, "learning_rate": 0.00019141846259167246, "loss": 11.6758, "step": 19063 }, { "epoch": 0.3990622121744955, "grad_norm": 0.25154101848602295, "learning_rate": 0.00019141757394050795, "loss": 11.6714, "step": 19064 }, { "epoch": 0.3990831449384577, "grad_norm": 0.38171252608299255, "learning_rate": 0.0001914166852453972, "loss": 11.6934, "step": 19065 }, { "epoch": 0.39910407770241985, "grad_norm": 0.3737812936306, "learning_rate": 0.00019141579650634063, "loss": 11.6824, "step": 19066 }, { "epoch": 0.399125010466382, "grad_norm": 0.283687949180603, "learning_rate": 0.00019141490772333865, "loss": 11.674, "step": 19067 }, { "epoch": 0.39914594323034414, "grad_norm": 0.24801300466060638, "learning_rate": 0.0001914140188963917, "loss": 11.6784, "step": 19068 }, { "epoch": 0.3991668759943063, "grad_norm": 0.3073553740978241, "learning_rate": 0.00019141313002550016, "loss": 11.6827, "step": 19069 }, { "epoch": 0.3991878087582684, "grad_norm": 0.2990429401397705, "learning_rate": 0.00019141224111066452, "loss": 11.6616, "step": 19070 }, { "epoch": 0.39920874152223057, "grad_norm": 0.22240634262561798, "learning_rate": 0.00019141135215188518, "loss": 11.6824, "step": 19071 }, { "epoch": 0.39922967428619277, "grad_norm": 0.2521466016769409, "learning_rate": 0.00019141046314916256, "loss": 11.6663, "step": 19072 }, { "epoch": 0.3992506070501549, "grad_norm": 0.3078365623950958, "learning_rate": 0.00019140957410249712, "loss": 11.6673, "step": 19073 }, { "epoch": 0.39927153981411706, "grad_norm": 0.33625662326812744, "learning_rate": 0.00019140868501188923, "loss": 11.6664, "step": 19074 }, { "epoch": 0.3992924725780792, "grad_norm": 0.25881677865982056, "learning_rate": 0.0001914077958773394, "loss": 11.6807, "step": 19075 }, { "epoch": 0.39931340534204135, "grad_norm": 0.30977344512939453, "learning_rate": 0.00019140690669884798, "loss": 11.6746, "step": 19076 }, { "epoch": 0.3993343381060035, "grad_norm": 0.28913623094558716, "learning_rate": 0.00019140601747641545, "loss": 11.6891, "step": 19077 }, { "epoch": 0.3993552708699657, "grad_norm": 0.2770788073539734, "learning_rate": 0.0001914051282100422, "loss": 11.6714, "step": 19078 }, { "epoch": 0.39937620363392784, "grad_norm": 0.2937484681606293, "learning_rate": 0.00019140423889972867, "loss": 11.6812, "step": 19079 }, { "epoch": 0.39939713639789, "grad_norm": 0.35384511947631836, "learning_rate": 0.00019140334954547528, "loss": 11.6773, "step": 19080 }, { "epoch": 0.39941806916185213, "grad_norm": 0.2813098728656769, "learning_rate": 0.0001914024601472825, "loss": 11.6877, "step": 19081 }, { "epoch": 0.3994390019258143, "grad_norm": 0.29567307233810425, "learning_rate": 0.00019140157070515073, "loss": 11.6692, "step": 19082 }, { "epoch": 0.3994599346897764, "grad_norm": 0.24920348823070526, "learning_rate": 0.00019140068121908038, "loss": 11.6596, "step": 19083 }, { "epoch": 0.39948086745373856, "grad_norm": 0.26838162541389465, "learning_rate": 0.00019139979168907189, "loss": 11.6691, "step": 19084 }, { "epoch": 0.39950180021770076, "grad_norm": 0.2996504008769989, "learning_rate": 0.00019139890211512567, "loss": 11.6751, "step": 19085 }, { "epoch": 0.3995227329816629, "grad_norm": 0.3577755391597748, "learning_rate": 0.00019139801249724219, "loss": 11.6774, "step": 19086 }, { "epoch": 0.39954366574562505, "grad_norm": 0.2736586034297943, "learning_rate": 0.00019139712283542182, "loss": 11.6653, "step": 19087 }, { "epoch": 0.3995645985095872, "grad_norm": 0.26668301224708557, "learning_rate": 0.00019139623312966505, "loss": 11.6699, "step": 19088 }, { "epoch": 0.39958553127354934, "grad_norm": 0.28991058468818665, "learning_rate": 0.00019139534337997226, "loss": 11.6739, "step": 19089 }, { "epoch": 0.3996064640375115, "grad_norm": 0.2481715977191925, "learning_rate": 0.00019139445358634394, "loss": 11.6726, "step": 19090 }, { "epoch": 0.3996273968014737, "grad_norm": 0.3405316174030304, "learning_rate": 0.00019139356374878045, "loss": 11.6549, "step": 19091 }, { "epoch": 0.39964832956543583, "grad_norm": 0.25960469245910645, "learning_rate": 0.0001913926738672822, "loss": 11.6727, "step": 19092 }, { "epoch": 0.399669262329398, "grad_norm": 0.2786807417869568, "learning_rate": 0.0001913917839418497, "loss": 11.6775, "step": 19093 }, { "epoch": 0.3996901950933601, "grad_norm": 0.22739176452159882, "learning_rate": 0.00019139089397248333, "loss": 11.671, "step": 19094 }, { "epoch": 0.39971112785732227, "grad_norm": 0.3045566976070404, "learning_rate": 0.00019139000395918356, "loss": 11.67, "step": 19095 }, { "epoch": 0.3997320606212844, "grad_norm": 0.2779538631439209, "learning_rate": 0.00019138911390195076, "loss": 11.6676, "step": 19096 }, { "epoch": 0.3997529933852466, "grad_norm": 0.3125237822532654, "learning_rate": 0.00019138822380078535, "loss": 11.6674, "step": 19097 }, { "epoch": 0.39977392614920876, "grad_norm": 0.2178695946931839, "learning_rate": 0.00019138733365568782, "loss": 11.6882, "step": 19098 }, { "epoch": 0.3997948589131709, "grad_norm": 0.29469671845436096, "learning_rate": 0.00019138644346665856, "loss": 11.678, "step": 19099 }, { "epoch": 0.39981579167713305, "grad_norm": 0.25995200872421265, "learning_rate": 0.00019138555323369803, "loss": 11.6496, "step": 19100 }, { "epoch": 0.3998367244410952, "grad_norm": 0.26447585225105286, "learning_rate": 0.00019138466295680662, "loss": 11.6698, "step": 19101 }, { "epoch": 0.39985765720505734, "grad_norm": 0.296528160572052, "learning_rate": 0.00019138377263598475, "loss": 11.6691, "step": 19102 }, { "epoch": 0.3998785899690195, "grad_norm": 0.35357099771499634, "learning_rate": 0.00019138288227123286, "loss": 11.6759, "step": 19103 }, { "epoch": 0.3998995227329817, "grad_norm": 0.2807397246360779, "learning_rate": 0.00019138199186255142, "loss": 11.6728, "step": 19104 }, { "epoch": 0.3999204554969438, "grad_norm": 0.26037490367889404, "learning_rate": 0.00019138110140994085, "loss": 11.6546, "step": 19105 }, { "epoch": 0.399941388260906, "grad_norm": 0.3147982656955719, "learning_rate": 0.0001913802109134015, "loss": 11.6712, "step": 19106 }, { "epoch": 0.3999623210248681, "grad_norm": 0.2602373957633972, "learning_rate": 0.00019137932037293384, "loss": 11.6601, "step": 19107 }, { "epoch": 0.39998325378883026, "grad_norm": 0.23311488330364227, "learning_rate": 0.00019137842978853836, "loss": 11.6808, "step": 19108 }, { "epoch": 0.4000041865527924, "grad_norm": 0.2839830219745636, "learning_rate": 0.00019137753916021542, "loss": 11.6654, "step": 19109 }, { "epoch": 0.4000251193167546, "grad_norm": 0.22950533032417297, "learning_rate": 0.00019137664848796547, "loss": 11.6674, "step": 19110 }, { "epoch": 0.40004605208071675, "grad_norm": 0.33355897665023804, "learning_rate": 0.0001913757577717889, "loss": 11.6797, "step": 19111 }, { "epoch": 0.4000669848446789, "grad_norm": 0.23585288226604462, "learning_rate": 0.0001913748670116862, "loss": 11.6672, "step": 19112 }, { "epoch": 0.40008791760864104, "grad_norm": 0.27269741892814636, "learning_rate": 0.00019137397620765776, "loss": 11.6859, "step": 19113 }, { "epoch": 0.4001088503726032, "grad_norm": 0.39532554149627686, "learning_rate": 0.00019137308535970403, "loss": 11.6835, "step": 19114 }, { "epoch": 0.40012978313656533, "grad_norm": 0.21823452413082123, "learning_rate": 0.00019137219446782543, "loss": 11.6734, "step": 19115 }, { "epoch": 0.40015071590052753, "grad_norm": 0.2928224205970764, "learning_rate": 0.00019137130353202237, "loss": 11.667, "step": 19116 }, { "epoch": 0.4001716486644897, "grad_norm": 0.26535001397132874, "learning_rate": 0.0001913704125522953, "loss": 11.6737, "step": 19117 }, { "epoch": 0.4001925814284518, "grad_norm": 0.31821539998054504, "learning_rate": 0.00019136952152864467, "loss": 11.6628, "step": 19118 }, { "epoch": 0.40021351419241397, "grad_norm": 0.24768134951591492, "learning_rate": 0.00019136863046107086, "loss": 11.686, "step": 19119 }, { "epoch": 0.4002344469563761, "grad_norm": 0.27656763792037964, "learning_rate": 0.00019136773934957428, "loss": 11.6763, "step": 19120 }, { "epoch": 0.40025537972033826, "grad_norm": 0.384478896856308, "learning_rate": 0.00019136684819415543, "loss": 11.6717, "step": 19121 }, { "epoch": 0.4002763124843004, "grad_norm": 0.2531653642654419, "learning_rate": 0.0001913659569948147, "loss": 11.6747, "step": 19122 }, { "epoch": 0.4002972452482626, "grad_norm": 0.2943190932273865, "learning_rate": 0.00019136506575155254, "loss": 11.6915, "step": 19123 }, { "epoch": 0.40031817801222475, "grad_norm": 0.24692638218402863, "learning_rate": 0.00019136417446436935, "loss": 11.6732, "step": 19124 }, { "epoch": 0.4003391107761869, "grad_norm": 0.2819637060165405, "learning_rate": 0.00019136328313326553, "loss": 11.6747, "step": 19125 }, { "epoch": 0.40036004354014904, "grad_norm": 0.2928975224494934, "learning_rate": 0.0001913623917582416, "loss": 11.6676, "step": 19126 }, { "epoch": 0.4003809763041112, "grad_norm": 0.3135569989681244, "learning_rate": 0.00019136150033929793, "loss": 11.6616, "step": 19127 }, { "epoch": 0.4004019090680733, "grad_norm": 0.2709248661994934, "learning_rate": 0.00019136060887643496, "loss": 11.6504, "step": 19128 }, { "epoch": 0.4004228418320355, "grad_norm": 0.2780870795249939, "learning_rate": 0.00019135971736965312, "loss": 11.6676, "step": 19129 }, { "epoch": 0.40044377459599767, "grad_norm": 0.22206604480743408, "learning_rate": 0.0001913588258189528, "loss": 11.6821, "step": 19130 }, { "epoch": 0.4004647073599598, "grad_norm": 0.2864975929260254, "learning_rate": 0.00019135793422433447, "loss": 11.6872, "step": 19131 }, { "epoch": 0.40048564012392196, "grad_norm": 0.34817731380462646, "learning_rate": 0.0001913570425857986, "loss": 11.6795, "step": 19132 }, { "epoch": 0.4005065728878841, "grad_norm": 0.32552632689476013, "learning_rate": 0.0001913561509033455, "loss": 11.6844, "step": 19133 }, { "epoch": 0.40052750565184625, "grad_norm": 0.31035298109054565, "learning_rate": 0.0001913552591769757, "loss": 11.6895, "step": 19134 }, { "epoch": 0.40054843841580845, "grad_norm": 0.3378925919532776, "learning_rate": 0.0001913543674066896, "loss": 11.6823, "step": 19135 }, { "epoch": 0.4005693711797706, "grad_norm": 0.2761833667755127, "learning_rate": 0.00019135347559248763, "loss": 11.6773, "step": 19136 }, { "epoch": 0.40059030394373274, "grad_norm": 0.27960675954818726, "learning_rate": 0.0001913525837343702, "loss": 11.6652, "step": 19137 }, { "epoch": 0.4006112367076949, "grad_norm": 0.5342798829078674, "learning_rate": 0.00019135169183233773, "loss": 11.6744, "step": 19138 }, { "epoch": 0.40063216947165703, "grad_norm": 0.28573599457740784, "learning_rate": 0.00019135079988639067, "loss": 11.6827, "step": 19139 }, { "epoch": 0.4006531022356192, "grad_norm": 0.2672635316848755, "learning_rate": 0.00019134990789652952, "loss": 11.6903, "step": 19140 }, { "epoch": 0.4006740349995813, "grad_norm": 0.31023988127708435, "learning_rate": 0.00019134901586275458, "loss": 11.6847, "step": 19141 }, { "epoch": 0.4006949677635435, "grad_norm": 0.28389567136764526, "learning_rate": 0.00019134812378506636, "loss": 11.6749, "step": 19142 }, { "epoch": 0.40071590052750566, "grad_norm": 0.26204878091812134, "learning_rate": 0.00019134723166346524, "loss": 11.6587, "step": 19143 }, { "epoch": 0.4007368332914678, "grad_norm": 0.4480503499507904, "learning_rate": 0.00019134633949795168, "loss": 11.6816, "step": 19144 }, { "epoch": 0.40075776605542995, "grad_norm": 0.38289037346839905, "learning_rate": 0.00019134544728852611, "loss": 11.6642, "step": 19145 }, { "epoch": 0.4007786988193921, "grad_norm": 0.20809689164161682, "learning_rate": 0.00019134455503518898, "loss": 11.6538, "step": 19146 }, { "epoch": 0.40079963158335424, "grad_norm": 0.3137246072292328, "learning_rate": 0.00019134366273794066, "loss": 11.683, "step": 19147 }, { "epoch": 0.40082056434731644, "grad_norm": 0.31587356328964233, "learning_rate": 0.0001913427703967816, "loss": 11.6814, "step": 19148 }, { "epoch": 0.4008414971112786, "grad_norm": 0.2888236939907074, "learning_rate": 0.00019134187801171228, "loss": 11.6765, "step": 19149 }, { "epoch": 0.40086242987524073, "grad_norm": 0.2770266532897949, "learning_rate": 0.00019134098558273307, "loss": 11.6852, "step": 19150 }, { "epoch": 0.4008833626392029, "grad_norm": 0.3130655884742737, "learning_rate": 0.00019134009310984443, "loss": 11.679, "step": 19151 }, { "epoch": 0.400904295403165, "grad_norm": 0.3214876055717468, "learning_rate": 0.00019133920059304676, "loss": 11.6671, "step": 19152 }, { "epoch": 0.40092522816712717, "grad_norm": 0.3665456473827362, "learning_rate": 0.00019133830803234048, "loss": 11.6816, "step": 19153 }, { "epoch": 0.40094616093108937, "grad_norm": 0.4304959774017334, "learning_rate": 0.0001913374154277261, "loss": 11.6677, "step": 19154 }, { "epoch": 0.4009670936950515, "grad_norm": 0.28883397579193115, "learning_rate": 0.00019133652277920397, "loss": 11.6741, "step": 19155 }, { "epoch": 0.40098802645901366, "grad_norm": 0.28965339064598083, "learning_rate": 0.00019133563008677453, "loss": 11.6755, "step": 19156 }, { "epoch": 0.4010089592229758, "grad_norm": 0.24438536167144775, "learning_rate": 0.00019133473735043826, "loss": 11.6728, "step": 19157 }, { "epoch": 0.40102989198693795, "grad_norm": 0.30580779910087585, "learning_rate": 0.0001913338445701955, "loss": 11.6793, "step": 19158 }, { "epoch": 0.4010508247509001, "grad_norm": 0.30786386132240295, "learning_rate": 0.00019133295174604675, "loss": 11.6839, "step": 19159 }, { "epoch": 0.40107175751486224, "grad_norm": 0.24970830976963043, "learning_rate": 0.00019133205887799244, "loss": 11.6646, "step": 19160 }, { "epoch": 0.40109269027882444, "grad_norm": 0.2860870659351349, "learning_rate": 0.00019133116596603298, "loss": 11.6748, "step": 19161 }, { "epoch": 0.4011136230427866, "grad_norm": 0.25090575218200684, "learning_rate": 0.00019133027301016875, "loss": 11.6668, "step": 19162 }, { "epoch": 0.4011345558067487, "grad_norm": 0.23513680696487427, "learning_rate": 0.00019132938001040027, "loss": 11.6758, "step": 19163 }, { "epoch": 0.40115548857071087, "grad_norm": 0.2633773684501648, "learning_rate": 0.00019132848696672792, "loss": 11.6669, "step": 19164 }, { "epoch": 0.401176421334673, "grad_norm": 0.29528725147247314, "learning_rate": 0.00019132759387915213, "loss": 11.6672, "step": 19165 }, { "epoch": 0.40119735409863516, "grad_norm": 0.323307067155838, "learning_rate": 0.00019132670074767334, "loss": 11.6797, "step": 19166 }, { "epoch": 0.40121828686259736, "grad_norm": 0.2761850357055664, "learning_rate": 0.00019132580757229197, "loss": 11.6727, "step": 19167 }, { "epoch": 0.4012392196265595, "grad_norm": 0.3086599111557007, "learning_rate": 0.00019132491435300847, "loss": 11.6642, "step": 19168 }, { "epoch": 0.40126015239052165, "grad_norm": 0.3500676453113556, "learning_rate": 0.00019132402108982323, "loss": 11.6853, "step": 19169 }, { "epoch": 0.4012810851544838, "grad_norm": 0.22091634571552277, "learning_rate": 0.0001913231277827367, "loss": 11.6688, "step": 19170 }, { "epoch": 0.40130201791844594, "grad_norm": 0.24157381057739258, "learning_rate": 0.00019132223443174936, "loss": 11.6667, "step": 19171 }, { "epoch": 0.4013229506824081, "grad_norm": 2.2828922271728516, "learning_rate": 0.00019132134103686156, "loss": 11.6413, "step": 19172 }, { "epoch": 0.4013438834463703, "grad_norm": 0.22538292407989502, "learning_rate": 0.00019132044759807374, "loss": 11.6663, "step": 19173 }, { "epoch": 0.40136481621033243, "grad_norm": 0.2976123094558716, "learning_rate": 0.00019131955411538635, "loss": 11.6775, "step": 19174 }, { "epoch": 0.4013857489742946, "grad_norm": 0.2582504153251648, "learning_rate": 0.00019131866058879986, "loss": 11.6891, "step": 19175 }, { "epoch": 0.4014066817382567, "grad_norm": 0.30745911598205566, "learning_rate": 0.00019131776701831462, "loss": 11.6709, "step": 19176 }, { "epoch": 0.40142761450221887, "grad_norm": 0.2581102252006531, "learning_rate": 0.00019131687340393115, "loss": 11.6631, "step": 19177 }, { "epoch": 0.401448547266181, "grad_norm": 0.26777246594429016, "learning_rate": 0.0001913159797456498, "loss": 11.6632, "step": 19178 }, { "epoch": 0.40146948003014316, "grad_norm": 0.3509569764137268, "learning_rate": 0.00019131508604347102, "loss": 11.6895, "step": 19179 }, { "epoch": 0.40149041279410536, "grad_norm": 0.28351157903671265, "learning_rate": 0.00019131419229739526, "loss": 11.6601, "step": 19180 }, { "epoch": 0.4015113455580675, "grad_norm": 0.23692627251148224, "learning_rate": 0.0001913132985074229, "loss": 11.6771, "step": 19181 }, { "epoch": 0.40153227832202965, "grad_norm": 0.2998722791671753, "learning_rate": 0.00019131240467355447, "loss": 11.6654, "step": 19182 }, { "epoch": 0.4015532110859918, "grad_norm": 1.0090937614440918, "learning_rate": 0.0001913115107957903, "loss": 11.6402, "step": 19183 }, { "epoch": 0.40157414384995394, "grad_norm": 0.2565263509750366, "learning_rate": 0.00019131061687413086, "loss": 11.6654, "step": 19184 }, { "epoch": 0.4015950766139161, "grad_norm": 0.27749741077423096, "learning_rate": 0.00019130972290857658, "loss": 11.6764, "step": 19185 }, { "epoch": 0.4016160093778783, "grad_norm": 0.27914053201675415, "learning_rate": 0.00019130882889912786, "loss": 11.6817, "step": 19186 }, { "epoch": 0.4016369421418404, "grad_norm": 0.3139823079109192, "learning_rate": 0.00019130793484578522, "loss": 11.6758, "step": 19187 }, { "epoch": 0.40165787490580257, "grad_norm": 0.2703968584537506, "learning_rate": 0.00019130704074854897, "loss": 11.6723, "step": 19188 }, { "epoch": 0.4016788076697647, "grad_norm": 0.2812999486923218, "learning_rate": 0.00019130614660741962, "loss": 11.6598, "step": 19189 }, { "epoch": 0.40169974043372686, "grad_norm": 0.2627095580101013, "learning_rate": 0.00019130525242239759, "loss": 11.6764, "step": 19190 }, { "epoch": 0.401720673197689, "grad_norm": 0.31973785161972046, "learning_rate": 0.00019130435819348325, "loss": 11.6784, "step": 19191 }, { "epoch": 0.40174160596165115, "grad_norm": 0.331352561712265, "learning_rate": 0.0001913034639206771, "loss": 11.6775, "step": 19192 }, { "epoch": 0.40176253872561335, "grad_norm": 0.23828920722007751, "learning_rate": 0.00019130256960397954, "loss": 11.6643, "step": 19193 }, { "epoch": 0.4017834714895755, "grad_norm": 0.29185751080513, "learning_rate": 0.00019130167524339102, "loss": 11.6732, "step": 19194 }, { "epoch": 0.40180440425353764, "grad_norm": 0.273907333612442, "learning_rate": 0.00019130078083891196, "loss": 11.6844, "step": 19195 }, { "epoch": 0.4018253370174998, "grad_norm": 0.31712257862091064, "learning_rate": 0.00019129988639054278, "loss": 11.6714, "step": 19196 }, { "epoch": 0.40184626978146193, "grad_norm": 0.24435846507549286, "learning_rate": 0.0001912989918982839, "loss": 11.681, "step": 19197 }, { "epoch": 0.4018672025454241, "grad_norm": 0.2305413782596588, "learning_rate": 0.00019129809736213575, "loss": 11.6592, "step": 19198 }, { "epoch": 0.4018881353093863, "grad_norm": 0.27140098810195923, "learning_rate": 0.0001912972027820988, "loss": 11.682, "step": 19199 }, { "epoch": 0.4019090680733484, "grad_norm": 0.3243195712566376, "learning_rate": 0.00019129630815817346, "loss": 11.681, "step": 19200 }, { "epoch": 0.40193000083731056, "grad_norm": 0.26245930790901184, "learning_rate": 0.00019129541349036013, "loss": 11.6692, "step": 19201 }, { "epoch": 0.4019509336012727, "grad_norm": 0.24836166203022003, "learning_rate": 0.00019129451877865927, "loss": 11.6728, "step": 19202 }, { "epoch": 0.40197186636523485, "grad_norm": 0.32812750339508057, "learning_rate": 0.00019129362402307135, "loss": 11.6977, "step": 19203 }, { "epoch": 0.401992799129197, "grad_norm": 0.3183629810810089, "learning_rate": 0.0001912927292235967, "loss": 11.6721, "step": 19204 }, { "epoch": 0.4020137318931592, "grad_norm": 0.26112526655197144, "learning_rate": 0.00019129183438023584, "loss": 11.6794, "step": 19205 }, { "epoch": 0.40203466465712134, "grad_norm": 0.2953752875328064, "learning_rate": 0.00019129093949298915, "loss": 11.6797, "step": 19206 }, { "epoch": 0.4020555974210835, "grad_norm": 0.3745753765106201, "learning_rate": 0.00019129004456185707, "loss": 11.6776, "step": 19207 }, { "epoch": 0.40207653018504563, "grad_norm": 0.24271202087402344, "learning_rate": 0.00019128914958684004, "loss": 11.6683, "step": 19208 }, { "epoch": 0.4020974629490078, "grad_norm": 0.3475382924079895, "learning_rate": 0.0001912882545679385, "loss": 11.6682, "step": 19209 }, { "epoch": 0.4021183957129699, "grad_norm": 0.2811582684516907, "learning_rate": 0.00019128735950515286, "loss": 11.6638, "step": 19210 }, { "epoch": 0.40213932847693207, "grad_norm": 0.22925066947937012, "learning_rate": 0.00019128646439848357, "loss": 11.6661, "step": 19211 }, { "epoch": 0.40216026124089427, "grad_norm": 0.2870859205722809, "learning_rate": 0.00019128556924793101, "loss": 11.6596, "step": 19212 }, { "epoch": 0.4021811940048564, "grad_norm": 0.32139819860458374, "learning_rate": 0.00019128467405349567, "loss": 11.6868, "step": 19213 }, { "epoch": 0.40220212676881856, "grad_norm": 0.2599199712276459, "learning_rate": 0.00019128377881517795, "loss": 11.6905, "step": 19214 }, { "epoch": 0.4022230595327807, "grad_norm": 0.2666713297367096, "learning_rate": 0.0001912828835329783, "loss": 11.6817, "step": 19215 }, { "epoch": 0.40224399229674285, "grad_norm": 0.27322807908058167, "learning_rate": 0.00019128198820689716, "loss": 11.6658, "step": 19216 }, { "epoch": 0.402264925060705, "grad_norm": 0.3071306645870209, "learning_rate": 0.0001912810928369349, "loss": 11.6882, "step": 19217 }, { "epoch": 0.4022858578246672, "grad_norm": 0.23619654774665833, "learning_rate": 0.000191280197423092, "loss": 11.6776, "step": 19218 }, { "epoch": 0.40230679058862934, "grad_norm": 0.28122371435165405, "learning_rate": 0.00019127930196536887, "loss": 11.6758, "step": 19219 }, { "epoch": 0.4023277233525915, "grad_norm": 0.2711189389228821, "learning_rate": 0.00019127840646376596, "loss": 11.6689, "step": 19220 }, { "epoch": 0.4023486561165536, "grad_norm": 0.2886359691619873, "learning_rate": 0.0001912775109182837, "loss": 11.6835, "step": 19221 }, { "epoch": 0.40236958888051577, "grad_norm": 0.47290143370628357, "learning_rate": 0.0001912766153289225, "loss": 11.6985, "step": 19222 }, { "epoch": 0.4023905216444779, "grad_norm": 0.2961130142211914, "learning_rate": 0.0001912757196956828, "loss": 11.6471, "step": 19223 }, { "epoch": 0.4024114544084401, "grad_norm": 0.2544567584991455, "learning_rate": 0.00019127482401856504, "loss": 11.6813, "step": 19224 }, { "epoch": 0.40243238717240226, "grad_norm": 0.27163928747177124, "learning_rate": 0.00019127392829756964, "loss": 11.678, "step": 19225 }, { "epoch": 0.4024533199363644, "grad_norm": 0.29487401247024536, "learning_rate": 0.00019127303253269707, "loss": 11.6706, "step": 19226 }, { "epoch": 0.40247425270032655, "grad_norm": 0.29761433601379395, "learning_rate": 0.00019127213672394766, "loss": 11.6775, "step": 19227 }, { "epoch": 0.4024951854642887, "grad_norm": 0.3383238613605499, "learning_rate": 0.00019127124087132193, "loss": 11.6821, "step": 19228 }, { "epoch": 0.40251611822825084, "grad_norm": 0.3199462294578552, "learning_rate": 0.0001912703449748203, "loss": 11.6766, "step": 19229 }, { "epoch": 0.402537050992213, "grad_norm": 0.27757105231285095, "learning_rate": 0.00019126944903444316, "loss": 11.664, "step": 19230 }, { "epoch": 0.4025579837561752, "grad_norm": 0.37367865443229675, "learning_rate": 0.00019126855305019098, "loss": 11.6866, "step": 19231 }, { "epoch": 0.40257891652013733, "grad_norm": 0.38702598214149475, "learning_rate": 0.0001912676570220642, "loss": 11.6872, "step": 19232 }, { "epoch": 0.4025998492840995, "grad_norm": 0.2717192471027374, "learning_rate": 0.0001912667609500632, "loss": 11.6605, "step": 19233 }, { "epoch": 0.4026207820480616, "grad_norm": 0.28361934423446655, "learning_rate": 0.00019126586483418845, "loss": 11.6767, "step": 19234 }, { "epoch": 0.40264171481202377, "grad_norm": 0.2382739931344986, "learning_rate": 0.00019126496867444037, "loss": 11.6673, "step": 19235 }, { "epoch": 0.4026626475759859, "grad_norm": 0.3778284788131714, "learning_rate": 0.00019126407247081937, "loss": 11.6567, "step": 19236 }, { "epoch": 0.4026835803399481, "grad_norm": 0.27556008100509644, "learning_rate": 0.00019126317622332595, "loss": 11.6896, "step": 19237 }, { "epoch": 0.40270451310391026, "grad_norm": 0.3325798809528351, "learning_rate": 0.00019126227993196044, "loss": 11.672, "step": 19238 }, { "epoch": 0.4027254458678724, "grad_norm": 0.2676265835762024, "learning_rate": 0.00019126138359672334, "loss": 11.6718, "step": 19239 }, { "epoch": 0.40274637863183455, "grad_norm": 0.30189359188079834, "learning_rate": 0.00019126048721761506, "loss": 11.6702, "step": 19240 }, { "epoch": 0.4027673113957967, "grad_norm": 0.3102656304836273, "learning_rate": 0.00019125959079463605, "loss": 11.6737, "step": 19241 }, { "epoch": 0.40278824415975883, "grad_norm": 0.266507625579834, "learning_rate": 0.0001912586943277867, "loss": 11.6489, "step": 19242 }, { "epoch": 0.40280917692372104, "grad_norm": 0.3043871819972992, "learning_rate": 0.0001912577978170675, "loss": 11.6804, "step": 19243 }, { "epoch": 0.4028301096876832, "grad_norm": 0.42602723836898804, "learning_rate": 0.00019125690126247885, "loss": 11.6668, "step": 19244 }, { "epoch": 0.4028510424516453, "grad_norm": 0.3184007704257965, "learning_rate": 0.00019125600466402115, "loss": 11.6682, "step": 19245 }, { "epoch": 0.40287197521560747, "grad_norm": 0.3126644194126129, "learning_rate": 0.00019125510802169487, "loss": 11.6672, "step": 19246 }, { "epoch": 0.4028929079795696, "grad_norm": 0.30377593636512756, "learning_rate": 0.00019125421133550045, "loss": 11.6608, "step": 19247 }, { "epoch": 0.40291384074353176, "grad_norm": 0.3295704424381256, "learning_rate": 0.00019125331460543828, "loss": 11.6675, "step": 19248 }, { "epoch": 0.4029347735074939, "grad_norm": 0.26410627365112305, "learning_rate": 0.0001912524178315088, "loss": 11.6645, "step": 19249 }, { "epoch": 0.4029557062714561, "grad_norm": 0.2728229761123657, "learning_rate": 0.00019125152101371248, "loss": 11.6678, "step": 19250 }, { "epoch": 0.40297663903541825, "grad_norm": 0.29832977056503296, "learning_rate": 0.00019125062415204972, "loss": 11.6995, "step": 19251 }, { "epoch": 0.4029975717993804, "grad_norm": 0.32590252161026, "learning_rate": 0.00019124972724652095, "loss": 11.7005, "step": 19252 }, { "epoch": 0.40301850456334254, "grad_norm": 0.2628290057182312, "learning_rate": 0.0001912488302971266, "loss": 11.685, "step": 19253 }, { "epoch": 0.4030394373273047, "grad_norm": 0.3449271023273468, "learning_rate": 0.00019124793330386713, "loss": 11.6771, "step": 19254 }, { "epoch": 0.40306037009126683, "grad_norm": 0.28555798530578613, "learning_rate": 0.00019124703626674294, "loss": 11.6725, "step": 19255 }, { "epoch": 0.40308130285522903, "grad_norm": 0.3145686089992523, "learning_rate": 0.00019124613918575448, "loss": 11.6789, "step": 19256 }, { "epoch": 0.4031022356191912, "grad_norm": 0.36992496252059937, "learning_rate": 0.00019124524206090215, "loss": 11.6641, "step": 19257 }, { "epoch": 0.4031231683831533, "grad_norm": 0.28906869888305664, "learning_rate": 0.0001912443448921864, "loss": 11.6693, "step": 19258 }, { "epoch": 0.40314410114711546, "grad_norm": 0.259519100189209, "learning_rate": 0.0001912434476796077, "loss": 11.6687, "step": 19259 }, { "epoch": 0.4031650339110776, "grad_norm": 0.3339080512523651, "learning_rate": 0.00019124255042316643, "loss": 11.665, "step": 19260 }, { "epoch": 0.40318596667503975, "grad_norm": 0.26431217789649963, "learning_rate": 0.00019124165312286304, "loss": 11.665, "step": 19261 }, { "epoch": 0.40320689943900195, "grad_norm": 0.24719253182411194, "learning_rate": 0.00019124075577869794, "loss": 11.6798, "step": 19262 }, { "epoch": 0.4032278322029641, "grad_norm": 0.2665022015571594, "learning_rate": 0.0001912398583906716, "loss": 11.651, "step": 19263 }, { "epoch": 0.40324876496692624, "grad_norm": 0.27828744053840637, "learning_rate": 0.00019123896095878443, "loss": 11.6934, "step": 19264 }, { "epoch": 0.4032696977308884, "grad_norm": 0.2711038589477539, "learning_rate": 0.00019123806348303685, "loss": 11.6748, "step": 19265 }, { "epoch": 0.40329063049485053, "grad_norm": 0.28123536705970764, "learning_rate": 0.0001912371659634293, "loss": 11.6679, "step": 19266 }, { "epoch": 0.4033115632588127, "grad_norm": 0.2981744706630707, "learning_rate": 0.00019123626839996223, "loss": 11.6763, "step": 19267 }, { "epoch": 0.4033324960227748, "grad_norm": 0.3552071452140808, "learning_rate": 0.00019123537079263606, "loss": 11.6921, "step": 19268 }, { "epoch": 0.403353428786737, "grad_norm": 0.25566408038139343, "learning_rate": 0.0001912344731414512, "loss": 11.6522, "step": 19269 }, { "epoch": 0.40337436155069917, "grad_norm": 0.29777538776397705, "learning_rate": 0.00019123357544640812, "loss": 11.6771, "step": 19270 }, { "epoch": 0.4033952943146613, "grad_norm": 0.34092697501182556, "learning_rate": 0.00019123267770750725, "loss": 11.668, "step": 19271 }, { "epoch": 0.40341622707862346, "grad_norm": 0.2961982786655426, "learning_rate": 0.00019123177992474896, "loss": 11.6762, "step": 19272 }, { "epoch": 0.4034371598425856, "grad_norm": 0.28692203760147095, "learning_rate": 0.00019123088209813376, "loss": 11.6811, "step": 19273 }, { "epoch": 0.40345809260654775, "grad_norm": 0.24767599999904633, "learning_rate": 0.000191229984227662, "loss": 11.6728, "step": 19274 }, { "epoch": 0.40347902537050995, "grad_norm": 0.32891160249710083, "learning_rate": 0.00019122908631333423, "loss": 11.6855, "step": 19275 }, { "epoch": 0.4034999581344721, "grad_norm": 0.281000018119812, "learning_rate": 0.00019122818835515075, "loss": 11.6623, "step": 19276 }, { "epoch": 0.40352089089843424, "grad_norm": 0.35124242305755615, "learning_rate": 0.00019122729035311208, "loss": 11.6749, "step": 19277 }, { "epoch": 0.4035418236623964, "grad_norm": 0.315455824136734, "learning_rate": 0.00019122639230721863, "loss": 11.6651, "step": 19278 }, { "epoch": 0.4035627564263585, "grad_norm": 0.2928553819656372, "learning_rate": 0.0001912254942174708, "loss": 11.6751, "step": 19279 }, { "epoch": 0.40358368919032067, "grad_norm": 0.25995075702667236, "learning_rate": 0.00019122459608386904, "loss": 11.6672, "step": 19280 }, { "epoch": 0.4036046219542828, "grad_norm": 0.2936914563179016, "learning_rate": 0.0001912236979064138, "loss": 11.6811, "step": 19281 }, { "epoch": 0.403625554718245, "grad_norm": 0.29623448848724365, "learning_rate": 0.00019122279968510552, "loss": 11.6719, "step": 19282 }, { "epoch": 0.40364648748220716, "grad_norm": 0.2605574131011963, "learning_rate": 0.0001912219014199446, "loss": 11.6821, "step": 19283 }, { "epoch": 0.4036674202461693, "grad_norm": 0.288757860660553, "learning_rate": 0.00019122100311093148, "loss": 11.6886, "step": 19284 }, { "epoch": 0.40368835301013145, "grad_norm": 0.23080593347549438, "learning_rate": 0.0001912201047580666, "loss": 11.6623, "step": 19285 }, { "epoch": 0.4037092857740936, "grad_norm": 0.284420371055603, "learning_rate": 0.00019121920636135038, "loss": 11.6737, "step": 19286 }, { "epoch": 0.40373021853805574, "grad_norm": 0.2549879550933838, "learning_rate": 0.00019121830792078327, "loss": 11.6624, "step": 19287 }, { "epoch": 0.40375115130201794, "grad_norm": 0.27260521054267883, "learning_rate": 0.00019121740943636568, "loss": 11.6706, "step": 19288 }, { "epoch": 0.4037720840659801, "grad_norm": 0.27116501331329346, "learning_rate": 0.00019121651090809808, "loss": 11.6807, "step": 19289 }, { "epoch": 0.40379301682994223, "grad_norm": 0.30337944626808167, "learning_rate": 0.0001912156123359808, "loss": 11.6727, "step": 19290 }, { "epoch": 0.4038139495939044, "grad_norm": 0.31123605370521545, "learning_rate": 0.00019121471372001442, "loss": 11.6617, "step": 19291 }, { "epoch": 0.4038348823578665, "grad_norm": 0.28994986414909363, "learning_rate": 0.0001912138150601993, "loss": 11.6627, "step": 19292 }, { "epoch": 0.40385581512182867, "grad_norm": 0.29250863194465637, "learning_rate": 0.00019121291635653583, "loss": 11.6716, "step": 19293 }, { "epoch": 0.40387674788579087, "grad_norm": 0.2866871953010559, "learning_rate": 0.00019121201760902452, "loss": 11.674, "step": 19294 }, { "epoch": 0.403897680649753, "grad_norm": 0.24894295632839203, "learning_rate": 0.00019121111881766574, "loss": 11.6712, "step": 19295 }, { "epoch": 0.40391861341371516, "grad_norm": 0.24834267795085907, "learning_rate": 0.00019121021998245996, "loss": 11.6633, "step": 19296 }, { "epoch": 0.4039395461776773, "grad_norm": 0.2525475323200226, "learning_rate": 0.0001912093211034076, "loss": 11.6798, "step": 19297 }, { "epoch": 0.40396047894163944, "grad_norm": 0.29846224188804626, "learning_rate": 0.0001912084221805091, "loss": 11.6619, "step": 19298 }, { "epoch": 0.4039814117056016, "grad_norm": 0.26055705547332764, "learning_rate": 0.00019120752321376486, "loss": 11.6709, "step": 19299 }, { "epoch": 0.40400234446956373, "grad_norm": 0.2304876148700714, "learning_rate": 0.00019120662420317537, "loss": 11.6868, "step": 19300 }, { "epoch": 0.40402327723352593, "grad_norm": 0.35230693221092224, "learning_rate": 0.000191205725148741, "loss": 11.6662, "step": 19301 }, { "epoch": 0.4040442099974881, "grad_norm": 0.2653972804546356, "learning_rate": 0.00019120482605046218, "loss": 11.6898, "step": 19302 }, { "epoch": 0.4040651427614502, "grad_norm": 0.24120517075061798, "learning_rate": 0.00019120392690833945, "loss": 11.6623, "step": 19303 }, { "epoch": 0.40408607552541237, "grad_norm": 0.27736735343933105, "learning_rate": 0.0001912030277223731, "loss": 11.6703, "step": 19304 }, { "epoch": 0.4041070082893745, "grad_norm": 0.298306405544281, "learning_rate": 0.00019120212849256364, "loss": 11.6896, "step": 19305 }, { "epoch": 0.40412794105333666, "grad_norm": 0.333624929189682, "learning_rate": 0.0001912012292189115, "loss": 11.6598, "step": 19306 }, { "epoch": 0.40414887381729886, "grad_norm": 0.21260257065296173, "learning_rate": 0.00019120032990141712, "loss": 11.6699, "step": 19307 }, { "epoch": 0.404169806581261, "grad_norm": 0.25948411226272583, "learning_rate": 0.00019119943054008088, "loss": 11.6805, "step": 19308 }, { "epoch": 0.40419073934522315, "grad_norm": 0.2610640823841095, "learning_rate": 0.00019119853113490324, "loss": 11.6528, "step": 19309 }, { "epoch": 0.4042116721091853, "grad_norm": 0.27459052205085754, "learning_rate": 0.0001911976316858847, "loss": 11.6721, "step": 19310 }, { "epoch": 0.40423260487314744, "grad_norm": 0.32204076647758484, "learning_rate": 0.00019119673219302556, "loss": 11.6918, "step": 19311 }, { "epoch": 0.4042535376371096, "grad_norm": 0.35300859808921814, "learning_rate": 0.00019119583265632634, "loss": 11.6728, "step": 19312 }, { "epoch": 0.4042744704010718, "grad_norm": 0.31100597977638245, "learning_rate": 0.00019119493307578748, "loss": 11.6694, "step": 19313 }, { "epoch": 0.40429540316503393, "grad_norm": 0.2854349613189697, "learning_rate": 0.0001911940334514094, "loss": 11.6656, "step": 19314 }, { "epoch": 0.4043163359289961, "grad_norm": 0.33507058024406433, "learning_rate": 0.0001911931337831925, "loss": 11.6704, "step": 19315 }, { "epoch": 0.4043372686929582, "grad_norm": 0.42600709199905396, "learning_rate": 0.00019119223407113722, "loss": 11.6822, "step": 19316 }, { "epoch": 0.40435820145692036, "grad_norm": 0.2681053876876831, "learning_rate": 0.000191191334315244, "loss": 11.6593, "step": 19317 }, { "epoch": 0.4043791342208825, "grad_norm": 0.28852203488349915, "learning_rate": 0.0001911904345155133, "loss": 11.6769, "step": 19318 }, { "epoch": 0.40440006698484465, "grad_norm": 0.23912066221237183, "learning_rate": 0.00019118953467194553, "loss": 11.6589, "step": 19319 }, { "epoch": 0.40442099974880685, "grad_norm": 0.31889820098876953, "learning_rate": 0.00019118863478454113, "loss": 11.6591, "step": 19320 }, { "epoch": 0.404441932512769, "grad_norm": 0.36077821254730225, "learning_rate": 0.0001911877348533005, "loss": 11.6774, "step": 19321 }, { "epoch": 0.40446286527673114, "grad_norm": 0.2609040439128876, "learning_rate": 0.0001911868348782241, "loss": 11.664, "step": 19322 }, { "epoch": 0.4044837980406933, "grad_norm": 0.24488170444965363, "learning_rate": 0.00019118593485931238, "loss": 11.6809, "step": 19323 }, { "epoch": 0.40450473080465543, "grad_norm": 0.2791001796722412, "learning_rate": 0.00019118503479656574, "loss": 11.6773, "step": 19324 }, { "epoch": 0.4045256635686176, "grad_norm": 0.2499622404575348, "learning_rate": 0.00019118413468998465, "loss": 11.6744, "step": 19325 }, { "epoch": 0.4045465963325798, "grad_norm": 0.22879931330680847, "learning_rate": 0.00019118323453956948, "loss": 11.6823, "step": 19326 }, { "epoch": 0.4045675290965419, "grad_norm": 0.2656625211238861, "learning_rate": 0.00019118233434532074, "loss": 11.6702, "step": 19327 }, { "epoch": 0.40458846186050407, "grad_norm": 0.22481106221675873, "learning_rate": 0.0001911814341072388, "loss": 11.6764, "step": 19328 }, { "epoch": 0.4046093946244662, "grad_norm": 0.2260395884513855, "learning_rate": 0.00019118053382532414, "loss": 11.6612, "step": 19329 }, { "epoch": 0.40463032738842836, "grad_norm": 0.2651435136795044, "learning_rate": 0.00019117963349957716, "loss": 11.6696, "step": 19330 }, { "epoch": 0.4046512601523905, "grad_norm": 0.2421732097864151, "learning_rate": 0.00019117873312999827, "loss": 11.6763, "step": 19331 }, { "epoch": 0.4046721929163527, "grad_norm": 0.26043620705604553, "learning_rate": 0.000191177832716588, "loss": 11.6853, "step": 19332 }, { "epoch": 0.40469312568031485, "grad_norm": 0.23043030500411987, "learning_rate": 0.00019117693225934667, "loss": 11.6581, "step": 19333 }, { "epoch": 0.404714058444277, "grad_norm": 0.3050472140312195, "learning_rate": 0.00019117603175827474, "loss": 11.6889, "step": 19334 }, { "epoch": 0.40473499120823914, "grad_norm": 0.2758740782737732, "learning_rate": 0.0001911751312133727, "loss": 11.6771, "step": 19335 }, { "epoch": 0.4047559239722013, "grad_norm": 0.26661497354507446, "learning_rate": 0.00019117423062464095, "loss": 11.6865, "step": 19336 }, { "epoch": 0.4047768567361634, "grad_norm": 0.26986163854599, "learning_rate": 0.00019117332999207993, "loss": 11.675, "step": 19337 }, { "epoch": 0.40479778950012557, "grad_norm": 0.24729996919631958, "learning_rate": 0.00019117242931569007, "loss": 11.659, "step": 19338 }, { "epoch": 0.40481872226408777, "grad_norm": 0.27459433674812317, "learning_rate": 0.00019117152859547175, "loss": 11.6819, "step": 19339 }, { "epoch": 0.4048396550280499, "grad_norm": 0.25672444701194763, "learning_rate": 0.00019117062783142547, "loss": 11.6745, "step": 19340 }, { "epoch": 0.40486058779201206, "grad_norm": 0.31407812237739563, "learning_rate": 0.00019116972702355164, "loss": 11.6744, "step": 19341 }, { "epoch": 0.4048815205559742, "grad_norm": 0.26704835891723633, "learning_rate": 0.00019116882617185074, "loss": 11.6676, "step": 19342 }, { "epoch": 0.40490245331993635, "grad_norm": 0.22987554967403412, "learning_rate": 0.00019116792527632307, "loss": 11.6685, "step": 19343 }, { "epoch": 0.4049233860838985, "grad_norm": 0.3028585910797119, "learning_rate": 0.00019116702433696922, "loss": 11.6763, "step": 19344 }, { "epoch": 0.4049443188478607, "grad_norm": 0.2928241491317749, "learning_rate": 0.00019116612335378955, "loss": 11.6825, "step": 19345 }, { "epoch": 0.40496525161182284, "grad_norm": 0.23949342966079712, "learning_rate": 0.00019116522232678448, "loss": 11.6705, "step": 19346 }, { "epoch": 0.404986184375785, "grad_norm": 0.29138749837875366, "learning_rate": 0.00019116432125595444, "loss": 11.6565, "step": 19347 }, { "epoch": 0.40500711713974713, "grad_norm": 0.3089809715747833, "learning_rate": 0.0001911634201412999, "loss": 11.6762, "step": 19348 }, { "epoch": 0.4050280499037093, "grad_norm": 0.2423427253961563, "learning_rate": 0.00019116251898282133, "loss": 11.6666, "step": 19349 }, { "epoch": 0.4050489826676714, "grad_norm": 0.27605655789375305, "learning_rate": 0.00019116161778051904, "loss": 11.6733, "step": 19350 }, { "epoch": 0.4050699154316336, "grad_norm": 0.32047414779663086, "learning_rate": 0.00019116071653439357, "loss": 11.6766, "step": 19351 }, { "epoch": 0.40509084819559577, "grad_norm": 0.2249285876750946, "learning_rate": 0.0001911598152444453, "loss": 11.6796, "step": 19352 }, { "epoch": 0.4051117809595579, "grad_norm": 0.4169480800628662, "learning_rate": 0.00019115891391067468, "loss": 11.6819, "step": 19353 }, { "epoch": 0.40513271372352005, "grad_norm": 0.2818051278591156, "learning_rate": 0.00019115801253308216, "loss": 11.6812, "step": 19354 }, { "epoch": 0.4051536464874822, "grad_norm": 0.3418147563934326, "learning_rate": 0.0001911571111116681, "loss": 11.6765, "step": 19355 }, { "epoch": 0.40517457925144434, "grad_norm": 0.2473854124546051, "learning_rate": 0.00019115620964643308, "loss": 11.6736, "step": 19356 }, { "epoch": 0.4051955120154065, "grad_norm": 0.2758273780345917, "learning_rate": 0.00019115530813737738, "loss": 11.6712, "step": 19357 }, { "epoch": 0.4052164447793687, "grad_norm": 0.27965405583381653, "learning_rate": 0.00019115440658450152, "loss": 11.6496, "step": 19358 }, { "epoch": 0.40523737754333083, "grad_norm": 0.2946639657020569, "learning_rate": 0.00019115350498780592, "loss": 11.6801, "step": 19359 }, { "epoch": 0.405258310307293, "grad_norm": 0.2604827284812927, "learning_rate": 0.00019115260334729098, "loss": 11.6778, "step": 19360 }, { "epoch": 0.4052792430712551, "grad_norm": 0.22379569709300995, "learning_rate": 0.00019115170166295715, "loss": 11.6655, "step": 19361 }, { "epoch": 0.40530017583521727, "grad_norm": 0.31686723232269287, "learning_rate": 0.0001911507999348049, "loss": 11.66, "step": 19362 }, { "epoch": 0.4053211085991794, "grad_norm": 0.27800658345222473, "learning_rate": 0.00019114989816283463, "loss": 11.6808, "step": 19363 }, { "epoch": 0.4053420413631416, "grad_norm": 0.2675074636936188, "learning_rate": 0.00019114899634704676, "loss": 11.6728, "step": 19364 }, { "epoch": 0.40536297412710376, "grad_norm": 0.3879280984401703, "learning_rate": 0.00019114809448744172, "loss": 11.6638, "step": 19365 }, { "epoch": 0.4053839068910659, "grad_norm": 0.26017752289772034, "learning_rate": 0.00019114719258402, "loss": 11.6706, "step": 19366 }, { "epoch": 0.40540483965502805, "grad_norm": 0.2383706420660019, "learning_rate": 0.00019114629063678198, "loss": 11.6634, "step": 19367 }, { "epoch": 0.4054257724189902, "grad_norm": 0.3869055211544037, "learning_rate": 0.00019114538864572814, "loss": 11.686, "step": 19368 }, { "epoch": 0.40544670518295234, "grad_norm": 0.24741433560848236, "learning_rate": 0.00019114448661085884, "loss": 11.6585, "step": 19369 }, { "epoch": 0.4054676379469145, "grad_norm": 0.318450391292572, "learning_rate": 0.0001911435845321746, "loss": 11.6866, "step": 19370 }, { "epoch": 0.4054885707108767, "grad_norm": 0.33089500665664673, "learning_rate": 0.0001911426824096758, "loss": 11.6688, "step": 19371 }, { "epoch": 0.40550950347483883, "grad_norm": 0.3499387800693512, "learning_rate": 0.00019114178024336287, "loss": 11.6759, "step": 19372 }, { "epoch": 0.405530436238801, "grad_norm": 0.34182974696159363, "learning_rate": 0.00019114087803323628, "loss": 11.6809, "step": 19373 }, { "epoch": 0.4055513690027631, "grad_norm": 0.31659770011901855, "learning_rate": 0.00019113997577929642, "loss": 11.6781, "step": 19374 }, { "epoch": 0.40557230176672526, "grad_norm": 0.25512680411338806, "learning_rate": 0.0001911390734815438, "loss": 11.6676, "step": 19375 }, { "epoch": 0.4055932345306874, "grad_norm": 0.23628771305084229, "learning_rate": 0.00019113817113997877, "loss": 11.6667, "step": 19376 }, { "epoch": 0.4056141672946496, "grad_norm": 0.28089427947998047, "learning_rate": 0.00019113726875460177, "loss": 11.678, "step": 19377 }, { "epoch": 0.40563510005861175, "grad_norm": 0.27834007143974304, "learning_rate": 0.00019113636632541326, "loss": 11.6586, "step": 19378 }, { "epoch": 0.4056560328225739, "grad_norm": 0.3724096417427063, "learning_rate": 0.0001911354638524137, "loss": 11.6845, "step": 19379 }, { "epoch": 0.40567696558653604, "grad_norm": 0.3115440011024475, "learning_rate": 0.0001911345613356035, "loss": 11.6778, "step": 19380 }, { "epoch": 0.4056978983504982, "grad_norm": 0.2874268889427185, "learning_rate": 0.00019113365877498307, "loss": 11.6575, "step": 19381 }, { "epoch": 0.40571883111446033, "grad_norm": 0.2700720727443695, "learning_rate": 0.00019113275617055285, "loss": 11.6732, "step": 19382 }, { "epoch": 0.40573976387842253, "grad_norm": 0.2602531313896179, "learning_rate": 0.0001911318535223133, "loss": 11.6766, "step": 19383 }, { "epoch": 0.4057606966423847, "grad_norm": 0.29222339391708374, "learning_rate": 0.00019113095083026486, "loss": 11.6785, "step": 19384 }, { "epoch": 0.4057816294063468, "grad_norm": 0.23323790729045868, "learning_rate": 0.00019113004809440795, "loss": 11.6795, "step": 19385 }, { "epoch": 0.40580256217030897, "grad_norm": 0.31385889649391174, "learning_rate": 0.00019112914531474296, "loss": 11.6764, "step": 19386 }, { "epoch": 0.4058234949342711, "grad_norm": 0.27725279331207275, "learning_rate": 0.0001911282424912704, "loss": 11.6744, "step": 19387 }, { "epoch": 0.40584442769823326, "grad_norm": 0.2739395499229431, "learning_rate": 0.00019112733962399066, "loss": 11.6747, "step": 19388 }, { "epoch": 0.4058653604621954, "grad_norm": 0.28909623622894287, "learning_rate": 0.00019112643671290416, "loss": 11.6837, "step": 19389 }, { "epoch": 0.4058862932261576, "grad_norm": 0.27150383591651917, "learning_rate": 0.0001911255337580114, "loss": 11.662, "step": 19390 }, { "epoch": 0.40590722599011975, "grad_norm": 0.2277749627828598, "learning_rate": 0.00019112463075931274, "loss": 11.675, "step": 19391 }, { "epoch": 0.4059281587540819, "grad_norm": 0.25911617279052734, "learning_rate": 0.00019112372771680863, "loss": 11.6624, "step": 19392 }, { "epoch": 0.40594909151804404, "grad_norm": 0.3211026191711426, "learning_rate": 0.00019112282463049955, "loss": 11.6954, "step": 19393 }, { "epoch": 0.4059700242820062, "grad_norm": 0.30888503789901733, "learning_rate": 0.0001911219215003859, "loss": 11.6661, "step": 19394 }, { "epoch": 0.4059909570459683, "grad_norm": 0.3133237659931183, "learning_rate": 0.00019112101832646807, "loss": 11.6963, "step": 19395 }, { "epoch": 0.4060118898099305, "grad_norm": 0.3337234556674957, "learning_rate": 0.0001911201151087466, "loss": 11.6722, "step": 19396 }, { "epoch": 0.40603282257389267, "grad_norm": 0.2684558629989624, "learning_rate": 0.00019111921184722182, "loss": 11.6811, "step": 19397 }, { "epoch": 0.4060537553378548, "grad_norm": 0.2762398421764374, "learning_rate": 0.0001911183085418942, "loss": 11.6739, "step": 19398 }, { "epoch": 0.40607468810181696, "grad_norm": 0.3245835304260254, "learning_rate": 0.00019111740519276422, "loss": 11.6762, "step": 19399 }, { "epoch": 0.4060956208657791, "grad_norm": 0.27487650513648987, "learning_rate": 0.00019111650179983228, "loss": 11.6604, "step": 19400 }, { "epoch": 0.40611655362974125, "grad_norm": 0.4405742287635803, "learning_rate": 0.0001911155983630988, "loss": 11.6839, "step": 19401 }, { "epoch": 0.40613748639370345, "grad_norm": 0.3237990438938141, "learning_rate": 0.0001911146948825642, "loss": 11.6819, "step": 19402 }, { "epoch": 0.4061584191576656, "grad_norm": 0.2736937403678894, "learning_rate": 0.000191113791358229, "loss": 11.6732, "step": 19403 }, { "epoch": 0.40617935192162774, "grad_norm": 0.26954951882362366, "learning_rate": 0.00019111288779009354, "loss": 11.6822, "step": 19404 }, { "epoch": 0.4062002846855899, "grad_norm": 0.3559613823890686, "learning_rate": 0.0001911119841781583, "loss": 11.6722, "step": 19405 }, { "epoch": 0.40622121744955203, "grad_norm": 0.21462953090667725, "learning_rate": 0.00019111108052242366, "loss": 11.6664, "step": 19406 }, { "epoch": 0.4062421502135142, "grad_norm": 0.2813960611820221, "learning_rate": 0.00019111017682289013, "loss": 11.6827, "step": 19407 }, { "epoch": 0.4062630829774763, "grad_norm": 0.29318854212760925, "learning_rate": 0.00019110927307955811, "loss": 11.6871, "step": 19408 }, { "epoch": 0.4062840157414385, "grad_norm": 0.2780715227127075, "learning_rate": 0.00019110836929242805, "loss": 11.6632, "step": 19409 }, { "epoch": 0.40630494850540066, "grad_norm": 0.38740116357803345, "learning_rate": 0.00019110746546150036, "loss": 11.6634, "step": 19410 }, { "epoch": 0.4063258812693628, "grad_norm": 0.2986586093902588, "learning_rate": 0.00019110656158677548, "loss": 11.661, "step": 19411 }, { "epoch": 0.40634681403332495, "grad_norm": 0.34065181016921997, "learning_rate": 0.00019110565766825385, "loss": 11.6849, "step": 19412 }, { "epoch": 0.4063677467972871, "grad_norm": 0.27032262086868286, "learning_rate": 0.00019110475370593593, "loss": 11.6735, "step": 19413 }, { "epoch": 0.40638867956124924, "grad_norm": 0.29329150915145874, "learning_rate": 0.0001911038496998221, "loss": 11.6534, "step": 19414 }, { "epoch": 0.40640961232521144, "grad_norm": 0.3303200602531433, "learning_rate": 0.00019110294564991283, "loss": 11.656, "step": 19415 }, { "epoch": 0.4064305450891736, "grad_norm": 0.2986126244068146, "learning_rate": 0.00019110204155620855, "loss": 11.6847, "step": 19416 }, { "epoch": 0.40645147785313573, "grad_norm": 0.2443348467350006, "learning_rate": 0.00019110113741870968, "loss": 11.6653, "step": 19417 }, { "epoch": 0.4064724106170979, "grad_norm": 0.2732298672199249, "learning_rate": 0.0001911002332374167, "loss": 11.6787, "step": 19418 }, { "epoch": 0.40649334338106, "grad_norm": 0.2650921642780304, "learning_rate": 0.00019109932901232996, "loss": 11.6751, "step": 19419 }, { "epoch": 0.40651427614502217, "grad_norm": 0.2711637318134308, "learning_rate": 0.00019109842474345, "loss": 11.6732, "step": 19420 }, { "epoch": 0.40653520890898437, "grad_norm": 0.2912948429584503, "learning_rate": 0.00019109752043077719, "loss": 11.6825, "step": 19421 }, { "epoch": 0.4065561416729465, "grad_norm": 0.2718961238861084, "learning_rate": 0.00019109661607431196, "loss": 11.6906, "step": 19422 }, { "epoch": 0.40657707443690866, "grad_norm": 0.3199320435523987, "learning_rate": 0.00019109571167405474, "loss": 11.6683, "step": 19423 }, { "epoch": 0.4065980072008708, "grad_norm": 0.34924086928367615, "learning_rate": 0.00019109480723000603, "loss": 11.6708, "step": 19424 }, { "epoch": 0.40661893996483295, "grad_norm": 0.23321565985679626, "learning_rate": 0.0001910939027421662, "loss": 11.6681, "step": 19425 }, { "epoch": 0.4066398727287951, "grad_norm": 0.3371816873550415, "learning_rate": 0.00019109299821053573, "loss": 11.6742, "step": 19426 }, { "epoch": 0.40666080549275724, "grad_norm": 0.28257128596305847, "learning_rate": 0.000191092093635115, "loss": 11.6739, "step": 19427 }, { "epoch": 0.40668173825671944, "grad_norm": 0.27348658442497253, "learning_rate": 0.0001910911890159045, "loss": 11.6749, "step": 19428 }, { "epoch": 0.4067026710206816, "grad_norm": 0.30549952387809753, "learning_rate": 0.0001910902843529046, "loss": 11.6752, "step": 19429 }, { "epoch": 0.40672360378464373, "grad_norm": 0.28714993596076965, "learning_rate": 0.00019108937964611581, "loss": 11.663, "step": 19430 }, { "epoch": 0.4067445365486059, "grad_norm": 0.26913222670555115, "learning_rate": 0.0001910884748955385, "loss": 11.6753, "step": 19431 }, { "epoch": 0.406765469312568, "grad_norm": 0.24883656203746796, "learning_rate": 0.0001910875701011732, "loss": 11.6701, "step": 19432 }, { "epoch": 0.40678640207653016, "grad_norm": 0.26194682717323303, "learning_rate": 0.0001910866652630202, "loss": 11.676, "step": 19433 }, { "epoch": 0.40680733484049236, "grad_norm": 0.3891434371471405, "learning_rate": 0.00019108576038108006, "loss": 11.6726, "step": 19434 }, { "epoch": 0.4068282676044545, "grad_norm": 0.3103387951850891, "learning_rate": 0.00019108485545535315, "loss": 11.6711, "step": 19435 }, { "epoch": 0.40684920036841665, "grad_norm": 0.3095760643482208, "learning_rate": 0.00019108395048583992, "loss": 11.6835, "step": 19436 }, { "epoch": 0.4068701331323788, "grad_norm": 0.3723523020744324, "learning_rate": 0.00019108304547254084, "loss": 11.6691, "step": 19437 }, { "epoch": 0.40689106589634094, "grad_norm": 0.24476827681064606, "learning_rate": 0.0001910821404154563, "loss": 11.6862, "step": 19438 }, { "epoch": 0.4069119986603031, "grad_norm": 0.32876473665237427, "learning_rate": 0.00019108123531458675, "loss": 11.6642, "step": 19439 }, { "epoch": 0.4069329314242653, "grad_norm": 0.3221859335899353, "learning_rate": 0.00019108033016993262, "loss": 11.6805, "step": 19440 }, { "epoch": 0.40695386418822743, "grad_norm": 0.3720267415046692, "learning_rate": 0.00019107942498149436, "loss": 11.6722, "step": 19441 }, { "epoch": 0.4069747969521896, "grad_norm": 0.24931061267852783, "learning_rate": 0.0001910785197492724, "loss": 11.6898, "step": 19442 }, { "epoch": 0.4069957297161517, "grad_norm": 0.25066933035850525, "learning_rate": 0.00019107761447326716, "loss": 11.6742, "step": 19443 }, { "epoch": 0.40701666248011387, "grad_norm": 0.21175074577331543, "learning_rate": 0.00019107670915347906, "loss": 11.6877, "step": 19444 }, { "epoch": 0.407037595244076, "grad_norm": 0.27336394786834717, "learning_rate": 0.00019107580378990857, "loss": 11.6721, "step": 19445 }, { "epoch": 0.40705852800803816, "grad_norm": 0.24308806657791138, "learning_rate": 0.00019107489838255615, "loss": 11.6604, "step": 19446 }, { "epoch": 0.40707946077200036, "grad_norm": 0.32859480381011963, "learning_rate": 0.00019107399293142216, "loss": 11.6626, "step": 19447 }, { "epoch": 0.4071003935359625, "grad_norm": 0.30021387338638306, "learning_rate": 0.0001910730874365071, "loss": 11.6606, "step": 19448 }, { "epoch": 0.40712132629992465, "grad_norm": 0.2907654345035553, "learning_rate": 0.0001910721818978114, "loss": 11.6827, "step": 19449 }, { "epoch": 0.4071422590638868, "grad_norm": 0.34245210886001587, "learning_rate": 0.00019107127631533545, "loss": 11.6773, "step": 19450 }, { "epoch": 0.40716319182784894, "grad_norm": 0.2700614631175995, "learning_rate": 0.00019107037068907972, "loss": 11.6664, "step": 19451 }, { "epoch": 0.4071841245918111, "grad_norm": 0.2921738028526306, "learning_rate": 0.00019106946501904463, "loss": 11.6729, "step": 19452 }, { "epoch": 0.4072050573557733, "grad_norm": 0.2556048631668091, "learning_rate": 0.00019106855930523065, "loss": 11.6749, "step": 19453 }, { "epoch": 0.4072259901197354, "grad_norm": 0.272633820772171, "learning_rate": 0.00019106765354763816, "loss": 11.6643, "step": 19454 }, { "epoch": 0.40724692288369757, "grad_norm": 0.3120223581790924, "learning_rate": 0.00019106674774626762, "loss": 11.6684, "step": 19455 }, { "epoch": 0.4072678556476597, "grad_norm": 0.25224852561950684, "learning_rate": 0.00019106584190111949, "loss": 11.6811, "step": 19456 }, { "epoch": 0.40728878841162186, "grad_norm": 0.27081432938575745, "learning_rate": 0.00019106493601219414, "loss": 11.6672, "step": 19457 }, { "epoch": 0.407309721175584, "grad_norm": 0.25253987312316895, "learning_rate": 0.0001910640300794921, "loss": 11.6828, "step": 19458 }, { "epoch": 0.40733065393954615, "grad_norm": 0.2533012330532074, "learning_rate": 0.00019106312410301378, "loss": 11.6527, "step": 19459 }, { "epoch": 0.40735158670350835, "grad_norm": 0.3560492992401123, "learning_rate": 0.00019106221808275951, "loss": 11.684, "step": 19460 }, { "epoch": 0.4073725194674705, "grad_norm": 0.3147757649421692, "learning_rate": 0.00019106131201872986, "loss": 11.6679, "step": 19461 }, { "epoch": 0.40739345223143264, "grad_norm": 0.24908886849880219, "learning_rate": 0.0001910604059109252, "loss": 11.6788, "step": 19462 }, { "epoch": 0.4074143849953948, "grad_norm": 0.330235093832016, "learning_rate": 0.000191059499759346, "loss": 11.6764, "step": 19463 }, { "epoch": 0.40743531775935693, "grad_norm": 0.3286157250404358, "learning_rate": 0.00019105859356399266, "loss": 11.6752, "step": 19464 }, { "epoch": 0.4074562505233191, "grad_norm": 0.30686554312705994, "learning_rate": 0.0001910576873248656, "loss": 11.6748, "step": 19465 }, { "epoch": 0.4074771832872813, "grad_norm": 0.3816978931427002, "learning_rate": 0.0001910567810419653, "loss": 11.6946, "step": 19466 }, { "epoch": 0.4074981160512434, "grad_norm": 0.31512922048568726, "learning_rate": 0.0001910558747152922, "loss": 11.6639, "step": 19467 }, { "epoch": 0.40751904881520556, "grad_norm": 0.3208017945289612, "learning_rate": 0.00019105496834484674, "loss": 11.6776, "step": 19468 }, { "epoch": 0.4075399815791677, "grad_norm": 0.24398063123226166, "learning_rate": 0.00019105406193062926, "loss": 11.6634, "step": 19469 }, { "epoch": 0.40756091434312985, "grad_norm": 0.37664252519607544, "learning_rate": 0.00019105315547264032, "loss": 11.6911, "step": 19470 }, { "epoch": 0.407581847107092, "grad_norm": 0.2462492734193802, "learning_rate": 0.00019105224897088028, "loss": 11.6621, "step": 19471 }, { "epoch": 0.4076027798710542, "grad_norm": 0.2451564371585846, "learning_rate": 0.0001910513424253496, "loss": 11.68, "step": 19472 }, { "epoch": 0.40762371263501634, "grad_norm": 0.3590337336063385, "learning_rate": 0.00019105043583604874, "loss": 11.6924, "step": 19473 }, { "epoch": 0.4076446453989785, "grad_norm": 0.21656319499015808, "learning_rate": 0.0001910495292029781, "loss": 11.6743, "step": 19474 }, { "epoch": 0.40766557816294063, "grad_norm": 0.2672189176082611, "learning_rate": 0.00019104862252613812, "loss": 11.6675, "step": 19475 }, { "epoch": 0.4076865109269028, "grad_norm": 0.2613832950592041, "learning_rate": 0.00019104771580552923, "loss": 11.6708, "step": 19476 }, { "epoch": 0.4077074436908649, "grad_norm": 0.2846944332122803, "learning_rate": 0.0001910468090411519, "loss": 11.6696, "step": 19477 }, { "epoch": 0.40772837645482707, "grad_norm": 0.31682106852531433, "learning_rate": 0.00019104590223300652, "loss": 11.675, "step": 19478 }, { "epoch": 0.40774930921878927, "grad_norm": 0.2270352691411972, "learning_rate": 0.00019104499538109357, "loss": 11.6625, "step": 19479 }, { "epoch": 0.4077702419827514, "grad_norm": 0.31325414776802063, "learning_rate": 0.00019104408848541346, "loss": 11.6927, "step": 19480 }, { "epoch": 0.40779117474671356, "grad_norm": 0.23165912926197052, "learning_rate": 0.00019104318154596666, "loss": 11.6786, "step": 19481 }, { "epoch": 0.4078121075106757, "grad_norm": 0.23819930851459503, "learning_rate": 0.00019104227456275355, "loss": 11.6631, "step": 19482 }, { "epoch": 0.40783304027463785, "grad_norm": 0.27385029196739197, "learning_rate": 0.00019104136753577458, "loss": 11.6773, "step": 19483 }, { "epoch": 0.4078539730386, "grad_norm": 0.3009251356124878, "learning_rate": 0.00019104046046503021, "loss": 11.6734, "step": 19484 }, { "epoch": 0.4078749058025622, "grad_norm": 0.2968297302722931, "learning_rate": 0.0001910395533505209, "loss": 11.6786, "step": 19485 }, { "epoch": 0.40789583856652434, "grad_norm": 0.3071300983428955, "learning_rate": 0.00019103864619224702, "loss": 11.6635, "step": 19486 }, { "epoch": 0.4079167713304865, "grad_norm": 0.2542865574359894, "learning_rate": 0.00019103773899020903, "loss": 11.6717, "step": 19487 }, { "epoch": 0.4079377040944486, "grad_norm": 0.22502905130386353, "learning_rate": 0.00019103683174440738, "loss": 11.677, "step": 19488 }, { "epoch": 0.4079586368584108, "grad_norm": 0.5858476161956787, "learning_rate": 0.0001910359244548425, "loss": 11.7069, "step": 19489 }, { "epoch": 0.4079795696223729, "grad_norm": 0.23749621212482452, "learning_rate": 0.00019103501712151483, "loss": 11.6637, "step": 19490 }, { "epoch": 0.4080005023863351, "grad_norm": 0.3103274703025818, "learning_rate": 0.0001910341097444248, "loss": 11.6735, "step": 19491 }, { "epoch": 0.40802143515029726, "grad_norm": 0.293694406747818, "learning_rate": 0.0001910332023235729, "loss": 11.6622, "step": 19492 }, { "epoch": 0.4080423679142594, "grad_norm": 0.3767867684364319, "learning_rate": 0.00019103229485895943, "loss": 11.6716, "step": 19493 }, { "epoch": 0.40806330067822155, "grad_norm": 0.28915393352508545, "learning_rate": 0.00019103138735058496, "loss": 11.6859, "step": 19494 }, { "epoch": 0.4080842334421837, "grad_norm": 0.2855812609195709, "learning_rate": 0.00019103047979844988, "loss": 11.6545, "step": 19495 }, { "epoch": 0.40810516620614584, "grad_norm": 0.4430766701698303, "learning_rate": 0.00019102957220255463, "loss": 11.6964, "step": 19496 }, { "epoch": 0.408126098970108, "grad_norm": 0.25728535652160645, "learning_rate": 0.00019102866456289963, "loss": 11.659, "step": 19497 }, { "epoch": 0.4081470317340702, "grad_norm": 0.3162117302417755, "learning_rate": 0.0001910277568794853, "loss": 11.6706, "step": 19498 }, { "epoch": 0.40816796449803233, "grad_norm": 0.3238479495048523, "learning_rate": 0.00019102684915231212, "loss": 11.6762, "step": 19499 }, { "epoch": 0.4081888972619945, "grad_norm": 0.3921191990375519, "learning_rate": 0.00019102594138138053, "loss": 11.6859, "step": 19500 }, { "epoch": 0.4082098300259566, "grad_norm": 0.26569074392318726, "learning_rate": 0.00019102503356669093, "loss": 11.6692, "step": 19501 }, { "epoch": 0.40823076278991877, "grad_norm": 0.3891879618167877, "learning_rate": 0.00019102412570824376, "loss": 11.7004, "step": 19502 }, { "epoch": 0.4082516955538809, "grad_norm": 0.32956039905548096, "learning_rate": 0.0001910232178060395, "loss": 11.6794, "step": 19503 }, { "epoch": 0.4082726283178431, "grad_norm": 0.4181137979030609, "learning_rate": 0.00019102230986007852, "loss": 11.6655, "step": 19504 }, { "epoch": 0.40829356108180526, "grad_norm": 0.28250423073768616, "learning_rate": 0.00019102140187036133, "loss": 11.681, "step": 19505 }, { "epoch": 0.4083144938457674, "grad_norm": 0.2882595360279083, "learning_rate": 0.0001910204938368883, "loss": 11.6724, "step": 19506 }, { "epoch": 0.40833542660972955, "grad_norm": 0.281080961227417, "learning_rate": 0.00019101958575965992, "loss": 11.6769, "step": 19507 }, { "epoch": 0.4083563593736917, "grad_norm": 0.30892014503479004, "learning_rate": 0.00019101867763867656, "loss": 11.6582, "step": 19508 }, { "epoch": 0.40837729213765384, "grad_norm": 0.3157646954059601, "learning_rate": 0.0001910177694739387, "loss": 11.6716, "step": 19509 }, { "epoch": 0.40839822490161604, "grad_norm": 0.4656490981578827, "learning_rate": 0.00019101686126544683, "loss": 11.6657, "step": 19510 }, { "epoch": 0.4084191576655782, "grad_norm": 0.242603600025177, "learning_rate": 0.0001910159530132013, "loss": 11.6809, "step": 19511 }, { "epoch": 0.4084400904295403, "grad_norm": 0.3267386853694916, "learning_rate": 0.00019101504471720254, "loss": 11.6637, "step": 19512 }, { "epoch": 0.40846102319350247, "grad_norm": 0.25736528635025024, "learning_rate": 0.00019101413637745104, "loss": 11.6781, "step": 19513 }, { "epoch": 0.4084819559574646, "grad_norm": 0.2940478026866913, "learning_rate": 0.00019101322799394726, "loss": 11.6842, "step": 19514 }, { "epoch": 0.40850288872142676, "grad_norm": 0.2682988941669464, "learning_rate": 0.00019101231956669156, "loss": 11.6623, "step": 19515 }, { "epoch": 0.4085238214853889, "grad_norm": 0.266995370388031, "learning_rate": 0.00019101141109568442, "loss": 11.6887, "step": 19516 }, { "epoch": 0.4085447542493511, "grad_norm": 0.23955108225345612, "learning_rate": 0.0001910105025809263, "loss": 11.692, "step": 19517 }, { "epoch": 0.40856568701331325, "grad_norm": 0.3165567219257355, "learning_rate": 0.00019100959402241759, "loss": 11.6779, "step": 19518 }, { "epoch": 0.4085866197772754, "grad_norm": 0.3567744195461273, "learning_rate": 0.00019100868542015875, "loss": 11.6688, "step": 19519 }, { "epoch": 0.40860755254123754, "grad_norm": 0.3058020770549774, "learning_rate": 0.0001910077767741502, "loss": 11.6845, "step": 19520 }, { "epoch": 0.4086284853051997, "grad_norm": 0.5013641119003296, "learning_rate": 0.0001910068680843924, "loss": 11.6759, "step": 19521 }, { "epoch": 0.40864941806916183, "grad_norm": 0.2576732039451599, "learning_rate": 0.00019100595935088574, "loss": 11.6783, "step": 19522 }, { "epoch": 0.40867035083312403, "grad_norm": 0.2753080129623413, "learning_rate": 0.0001910050505736307, "loss": 11.6805, "step": 19523 }, { "epoch": 0.4086912835970862, "grad_norm": 0.3210597336292267, "learning_rate": 0.00019100414175262772, "loss": 11.6845, "step": 19524 }, { "epoch": 0.4087122163610483, "grad_norm": 0.31845933198928833, "learning_rate": 0.00019100323288787725, "loss": 11.6851, "step": 19525 }, { "epoch": 0.40873314912501046, "grad_norm": 0.24459576606750488, "learning_rate": 0.00019100232397937966, "loss": 11.6695, "step": 19526 }, { "epoch": 0.4087540818889726, "grad_norm": 0.3982303738594055, "learning_rate": 0.00019100141502713546, "loss": 11.6737, "step": 19527 }, { "epoch": 0.40877501465293475, "grad_norm": 0.2611059844493866, "learning_rate": 0.00019100050603114505, "loss": 11.6628, "step": 19528 }, { "epoch": 0.40879594741689695, "grad_norm": 0.35319462418556213, "learning_rate": 0.00019099959699140888, "loss": 11.6606, "step": 19529 }, { "epoch": 0.4088168801808591, "grad_norm": 0.2621401846408844, "learning_rate": 0.00019099868790792737, "loss": 11.6775, "step": 19530 }, { "epoch": 0.40883781294482124, "grad_norm": 0.27946627140045166, "learning_rate": 0.00019099777878070097, "loss": 11.6644, "step": 19531 }, { "epoch": 0.4088587457087834, "grad_norm": 0.28457027673721313, "learning_rate": 0.00019099686960973012, "loss": 11.6693, "step": 19532 }, { "epoch": 0.40887967847274553, "grad_norm": 0.2755642831325531, "learning_rate": 0.00019099596039501523, "loss": 11.6664, "step": 19533 }, { "epoch": 0.4089006112367077, "grad_norm": 0.23294398188591003, "learning_rate": 0.00019099505113655675, "loss": 11.6629, "step": 19534 }, { "epoch": 0.4089215440006698, "grad_norm": 0.394069105386734, "learning_rate": 0.00019099414183435518, "loss": 11.6695, "step": 19535 }, { "epoch": 0.408942476764632, "grad_norm": 0.3052396774291992, "learning_rate": 0.00019099323248841085, "loss": 11.6626, "step": 19536 }, { "epoch": 0.40896340952859417, "grad_norm": 0.32875823974609375, "learning_rate": 0.00019099232309872428, "loss": 11.6644, "step": 19537 }, { "epoch": 0.4089843422925563, "grad_norm": 0.2509830892086029, "learning_rate": 0.00019099141366529584, "loss": 11.6652, "step": 19538 }, { "epoch": 0.40900527505651846, "grad_norm": 0.35133326053619385, "learning_rate": 0.00019099050418812605, "loss": 11.6862, "step": 19539 }, { "epoch": 0.4090262078204806, "grad_norm": 0.33815446496009827, "learning_rate": 0.0001909895946672153, "loss": 11.674, "step": 19540 }, { "epoch": 0.40904714058444275, "grad_norm": 0.28622278571128845, "learning_rate": 0.000190988685102564, "loss": 11.6934, "step": 19541 }, { "epoch": 0.40906807334840495, "grad_norm": 0.2781476378440857, "learning_rate": 0.00019098777549417261, "loss": 11.6809, "step": 19542 }, { "epoch": 0.4090890061123671, "grad_norm": 0.4143672287464142, "learning_rate": 0.0001909868658420416, "loss": 11.6913, "step": 19543 }, { "epoch": 0.40910993887632924, "grad_norm": 0.31401318311691284, "learning_rate": 0.00019098595614617137, "loss": 11.6912, "step": 19544 }, { "epoch": 0.4091308716402914, "grad_norm": 0.30664920806884766, "learning_rate": 0.00019098504640656234, "loss": 11.6827, "step": 19545 }, { "epoch": 0.4091518044042535, "grad_norm": 0.29406821727752686, "learning_rate": 0.00019098413662321502, "loss": 11.6671, "step": 19546 }, { "epoch": 0.40917273716821567, "grad_norm": 0.33266812562942505, "learning_rate": 0.0001909832267961298, "loss": 11.6794, "step": 19547 }, { "epoch": 0.4091936699321778, "grad_norm": 0.33044829964637756, "learning_rate": 0.00019098231692530707, "loss": 11.6778, "step": 19548 }, { "epoch": 0.40921460269614, "grad_norm": 0.2875364124774933, "learning_rate": 0.00019098140701074735, "loss": 11.6736, "step": 19549 }, { "epoch": 0.40923553546010216, "grad_norm": 0.38050276041030884, "learning_rate": 0.00019098049705245106, "loss": 11.6874, "step": 19550 }, { "epoch": 0.4092564682240643, "grad_norm": 0.2963218688964844, "learning_rate": 0.0001909795870504186, "loss": 11.6656, "step": 19551 }, { "epoch": 0.40927740098802645, "grad_norm": 0.25893813371658325, "learning_rate": 0.0001909786770046504, "loss": 11.6642, "step": 19552 }, { "epoch": 0.4092983337519886, "grad_norm": 0.23307378590106964, "learning_rate": 0.000190977766915147, "loss": 11.6738, "step": 19553 }, { "epoch": 0.40931926651595074, "grad_norm": 0.25514230132102966, "learning_rate": 0.00019097685678190872, "loss": 11.6542, "step": 19554 }, { "epoch": 0.40934019927991294, "grad_norm": 0.27505993843078613, "learning_rate": 0.00019097594660493603, "loss": 11.6577, "step": 19555 }, { "epoch": 0.4093611320438751, "grad_norm": 0.2940889298915863, "learning_rate": 0.0001909750363842294, "loss": 11.6536, "step": 19556 }, { "epoch": 0.40938206480783723, "grad_norm": 0.24301452934741974, "learning_rate": 0.00019097412611978923, "loss": 11.6816, "step": 19557 }, { "epoch": 0.4094029975717994, "grad_norm": 0.2445344626903534, "learning_rate": 0.000190973215811616, "loss": 11.6789, "step": 19558 }, { "epoch": 0.4094239303357615, "grad_norm": 0.26432332396507263, "learning_rate": 0.00019097230545971013, "loss": 11.6721, "step": 19559 }, { "epoch": 0.40944486309972367, "grad_norm": 0.30995693802833557, "learning_rate": 0.000190971395064072, "loss": 11.6606, "step": 19560 }, { "epoch": 0.40946579586368587, "grad_norm": 0.2560977637767792, "learning_rate": 0.00019097048462470213, "loss": 11.6638, "step": 19561 }, { "epoch": 0.409486728627648, "grad_norm": 0.3078520596027374, "learning_rate": 0.00019096957414160091, "loss": 11.6608, "step": 19562 }, { "epoch": 0.40950766139161016, "grad_norm": 0.30632534623146057, "learning_rate": 0.00019096866361476881, "loss": 11.6673, "step": 19563 }, { "epoch": 0.4095285941555723, "grad_norm": 0.39898064732551575, "learning_rate": 0.00019096775304420626, "loss": 11.6694, "step": 19564 }, { "epoch": 0.40954952691953445, "grad_norm": 0.24135346710681915, "learning_rate": 0.00019096684242991368, "loss": 11.6907, "step": 19565 }, { "epoch": 0.4095704596834966, "grad_norm": 0.28811338543891907, "learning_rate": 0.0001909659317718915, "loss": 11.6644, "step": 19566 }, { "epoch": 0.40959139244745874, "grad_norm": 0.29452142119407654, "learning_rate": 0.00019096502107014018, "loss": 11.6836, "step": 19567 }, { "epoch": 0.40961232521142094, "grad_norm": 0.2734185755252838, "learning_rate": 0.00019096411032466014, "loss": 11.6827, "step": 19568 }, { "epoch": 0.4096332579753831, "grad_norm": 0.3450280427932739, "learning_rate": 0.00019096319953545185, "loss": 11.6822, "step": 19569 }, { "epoch": 0.4096541907393452, "grad_norm": 0.31944364309310913, "learning_rate": 0.0001909622887025157, "loss": 11.6843, "step": 19570 }, { "epoch": 0.40967512350330737, "grad_norm": 0.30948740243911743, "learning_rate": 0.0001909613778258522, "loss": 11.6892, "step": 19571 }, { "epoch": 0.4096960562672695, "grad_norm": 0.2716006934642792, "learning_rate": 0.0001909604669054617, "loss": 11.6693, "step": 19572 }, { "epoch": 0.40971698903123166, "grad_norm": 0.30075082182884216, "learning_rate": 0.0001909595559413447, "loss": 11.6812, "step": 19573 }, { "epoch": 0.40973792179519386, "grad_norm": 0.2643094062805176, "learning_rate": 0.00019095864493350162, "loss": 11.67, "step": 19574 }, { "epoch": 0.409758854559156, "grad_norm": 0.3236771821975708, "learning_rate": 0.0001909577338819329, "loss": 11.6751, "step": 19575 }, { "epoch": 0.40977978732311815, "grad_norm": 0.2832843065261841, "learning_rate": 0.00019095682278663894, "loss": 11.6641, "step": 19576 }, { "epoch": 0.4098007200870803, "grad_norm": 0.2864574193954468, "learning_rate": 0.00019095591164762025, "loss": 11.6753, "step": 19577 }, { "epoch": 0.40982165285104244, "grad_norm": 0.2744738459587097, "learning_rate": 0.00019095500046487725, "loss": 11.6778, "step": 19578 }, { "epoch": 0.4098425856150046, "grad_norm": 0.39357683062553406, "learning_rate": 0.0001909540892384103, "loss": 11.6798, "step": 19579 }, { "epoch": 0.4098635183789668, "grad_norm": 0.2988497316837311, "learning_rate": 0.0001909531779682199, "loss": 11.6866, "step": 19580 }, { "epoch": 0.40988445114292893, "grad_norm": 0.37501442432403564, "learning_rate": 0.00019095226665430656, "loss": 11.6857, "step": 19581 }, { "epoch": 0.4099053839068911, "grad_norm": 0.33781707286834717, "learning_rate": 0.00019095135529667058, "loss": 11.6682, "step": 19582 }, { "epoch": 0.4099263166708532, "grad_norm": 0.34652844071388245, "learning_rate": 0.00019095044389531247, "loss": 11.6846, "step": 19583 }, { "epoch": 0.40994724943481536, "grad_norm": 0.2555338144302368, "learning_rate": 0.00019094953245023266, "loss": 11.6609, "step": 19584 }, { "epoch": 0.4099681821987775, "grad_norm": 0.29177454113960266, "learning_rate": 0.0001909486209614316, "loss": 11.6592, "step": 19585 }, { "epoch": 0.40998911496273965, "grad_norm": 0.31731751561164856, "learning_rate": 0.0001909477094289097, "loss": 11.6972, "step": 19586 }, { "epoch": 0.41001004772670185, "grad_norm": 0.3159114420413971, "learning_rate": 0.00019094679785266742, "loss": 11.6692, "step": 19587 }, { "epoch": 0.410030980490664, "grad_norm": 0.2959292531013489, "learning_rate": 0.0001909458862327052, "loss": 11.6934, "step": 19588 }, { "epoch": 0.41005191325462614, "grad_norm": 0.25531089305877686, "learning_rate": 0.00019094497456902346, "loss": 11.6842, "step": 19589 }, { "epoch": 0.4100728460185883, "grad_norm": 0.31905558705329895, "learning_rate": 0.00019094406286162264, "loss": 11.6783, "step": 19590 }, { "epoch": 0.41009377878255043, "grad_norm": 0.2511715590953827, "learning_rate": 0.00019094315111050318, "loss": 11.6555, "step": 19591 }, { "epoch": 0.4101147115465126, "grad_norm": 0.2593907117843628, "learning_rate": 0.00019094223931566555, "loss": 11.6649, "step": 19592 }, { "epoch": 0.4101356443104748, "grad_norm": 0.3925720751285553, "learning_rate": 0.00019094132747711013, "loss": 11.6769, "step": 19593 }, { "epoch": 0.4101565770744369, "grad_norm": 0.258428692817688, "learning_rate": 0.00019094041559483742, "loss": 11.6777, "step": 19594 }, { "epoch": 0.41017750983839907, "grad_norm": 0.27872005105018616, "learning_rate": 0.00019093950366884786, "loss": 11.679, "step": 19595 }, { "epoch": 0.4101984426023612, "grad_norm": 0.2543196380138397, "learning_rate": 0.00019093859169914178, "loss": 11.6837, "step": 19596 }, { "epoch": 0.41021937536632336, "grad_norm": 0.29897481203079224, "learning_rate": 0.00019093767968571975, "loss": 11.6722, "step": 19597 }, { "epoch": 0.4102403081302855, "grad_norm": 0.2716052234172821, "learning_rate": 0.00019093676762858215, "loss": 11.6802, "step": 19598 }, { "epoch": 0.4102612408942477, "grad_norm": 0.29782018065452576, "learning_rate": 0.0001909358555277294, "loss": 11.6864, "step": 19599 }, { "epoch": 0.41028217365820985, "grad_norm": 0.21293103694915771, "learning_rate": 0.00019093494338316198, "loss": 11.664, "step": 19600 }, { "epoch": 0.410303106422172, "grad_norm": 0.26039451360702515, "learning_rate": 0.0001909340311948803, "loss": 11.6658, "step": 19601 }, { "epoch": 0.41032403918613414, "grad_norm": 0.244327574968338, "learning_rate": 0.00019093311896288483, "loss": 11.6858, "step": 19602 }, { "epoch": 0.4103449719500963, "grad_norm": 0.2766466438770294, "learning_rate": 0.00019093220668717597, "loss": 11.6871, "step": 19603 }, { "epoch": 0.4103659047140584, "grad_norm": 0.30772098898887634, "learning_rate": 0.0001909312943677542, "loss": 11.6762, "step": 19604 }, { "epoch": 0.41038683747802057, "grad_norm": 0.3194458782672882, "learning_rate": 0.0001909303820046199, "loss": 11.6599, "step": 19605 }, { "epoch": 0.41040777024198277, "grad_norm": 0.2652152180671692, "learning_rate": 0.00019092946959777356, "loss": 11.6836, "step": 19606 }, { "epoch": 0.4104287030059449, "grad_norm": 0.30930349230766296, "learning_rate": 0.0001909285571472156, "loss": 11.6673, "step": 19607 }, { "epoch": 0.41044963576990706, "grad_norm": 0.23739397525787354, "learning_rate": 0.00019092764465294647, "loss": 11.6649, "step": 19608 }, { "epoch": 0.4104705685338692, "grad_norm": 0.2942639887332916, "learning_rate": 0.00019092673211496659, "loss": 11.6588, "step": 19609 }, { "epoch": 0.41049150129783135, "grad_norm": 0.2661329507827759, "learning_rate": 0.0001909258195332764, "loss": 11.6742, "step": 19610 }, { "epoch": 0.4105124340617935, "grad_norm": 0.35695064067840576, "learning_rate": 0.00019092490690787636, "loss": 11.6852, "step": 19611 }, { "epoch": 0.4105333668257557, "grad_norm": 0.3420618772506714, "learning_rate": 0.00019092399423876688, "loss": 11.6907, "step": 19612 }, { "epoch": 0.41055429958971784, "grad_norm": 0.3087829053401947, "learning_rate": 0.00019092308152594844, "loss": 11.6676, "step": 19613 }, { "epoch": 0.41057523235368, "grad_norm": 0.3155747056007385, "learning_rate": 0.00019092216876942142, "loss": 11.6668, "step": 19614 }, { "epoch": 0.41059616511764213, "grad_norm": 0.2905408442020416, "learning_rate": 0.00019092125596918631, "loss": 11.6658, "step": 19615 }, { "epoch": 0.4106170978816043, "grad_norm": 0.36129236221313477, "learning_rate": 0.00019092034312524355, "loss": 11.6742, "step": 19616 }, { "epoch": 0.4106380306455664, "grad_norm": 0.308838427066803, "learning_rate": 0.00019091943023759352, "loss": 11.6446, "step": 19617 }, { "epoch": 0.4106589634095286, "grad_norm": 0.33902406692504883, "learning_rate": 0.0001909185173062367, "loss": 11.6617, "step": 19618 }, { "epoch": 0.41067989617349077, "grad_norm": 0.22430302202701569, "learning_rate": 0.00019091760433117357, "loss": 11.6664, "step": 19619 }, { "epoch": 0.4107008289374529, "grad_norm": 0.2863880395889282, "learning_rate": 0.00019091669131240447, "loss": 11.6827, "step": 19620 }, { "epoch": 0.41072176170141506, "grad_norm": 0.2840779721736908, "learning_rate": 0.0001909157782499299, "loss": 11.6784, "step": 19621 }, { "epoch": 0.4107426944653772, "grad_norm": 0.30333107709884644, "learning_rate": 0.00019091486514375032, "loss": 11.6948, "step": 19622 }, { "epoch": 0.41076362722933935, "grad_norm": 0.28296253085136414, "learning_rate": 0.00019091395199386614, "loss": 11.6763, "step": 19623 }, { "epoch": 0.4107845599933015, "grad_norm": 0.27693042159080505, "learning_rate": 0.00019091303880027782, "loss": 11.6766, "step": 19624 }, { "epoch": 0.4108054927572637, "grad_norm": 0.3222489655017853, "learning_rate": 0.00019091212556298574, "loss": 11.6666, "step": 19625 }, { "epoch": 0.41082642552122584, "grad_norm": 0.25969329476356506, "learning_rate": 0.0001909112122819904, "loss": 11.6615, "step": 19626 }, { "epoch": 0.410847358285188, "grad_norm": 0.2738885283470154, "learning_rate": 0.00019091029895729222, "loss": 11.6743, "step": 19627 }, { "epoch": 0.4108682910491501, "grad_norm": 0.22898487746715546, "learning_rate": 0.0001909093855888916, "loss": 11.6577, "step": 19628 }, { "epoch": 0.41088922381311227, "grad_norm": 0.2730991542339325, "learning_rate": 0.0001909084721767891, "loss": 11.6669, "step": 19629 }, { "epoch": 0.4109101565770744, "grad_norm": 0.297597199678421, "learning_rate": 0.000190907558720985, "loss": 11.6796, "step": 19630 }, { "epoch": 0.4109310893410366, "grad_norm": 0.3100835382938385, "learning_rate": 0.00019090664522147983, "loss": 11.6681, "step": 19631 }, { "epoch": 0.41095202210499876, "grad_norm": 0.28708335757255554, "learning_rate": 0.00019090573167827404, "loss": 11.6691, "step": 19632 }, { "epoch": 0.4109729548689609, "grad_norm": 0.294416218996048, "learning_rate": 0.00019090481809136802, "loss": 11.6483, "step": 19633 }, { "epoch": 0.41099388763292305, "grad_norm": 0.2572370171546936, "learning_rate": 0.00019090390446076225, "loss": 11.6682, "step": 19634 }, { "epoch": 0.4110148203968852, "grad_norm": 0.2569344639778137, "learning_rate": 0.00019090299078645715, "loss": 11.6811, "step": 19635 }, { "epoch": 0.41103575316084734, "grad_norm": 0.2923959493637085, "learning_rate": 0.00019090207706845314, "loss": 11.6602, "step": 19636 }, { "epoch": 0.41105668592480954, "grad_norm": 0.2698462903499603, "learning_rate": 0.00019090116330675068, "loss": 11.6663, "step": 19637 }, { "epoch": 0.4110776186887717, "grad_norm": 0.37828969955444336, "learning_rate": 0.00019090024950135025, "loss": 11.6721, "step": 19638 }, { "epoch": 0.41109855145273383, "grad_norm": 0.25559860467910767, "learning_rate": 0.0001908993356522522, "loss": 11.6717, "step": 19639 }, { "epoch": 0.411119484216696, "grad_norm": 0.24893061816692352, "learning_rate": 0.00019089842175945703, "loss": 11.6814, "step": 19640 }, { "epoch": 0.4111404169806581, "grad_norm": 0.37115225195884705, "learning_rate": 0.00019089750782296517, "loss": 11.6849, "step": 19641 }, { "epoch": 0.41116134974462026, "grad_norm": 0.2614109516143799, "learning_rate": 0.00019089659384277706, "loss": 11.6598, "step": 19642 }, { "epoch": 0.4111822825085824, "grad_norm": 0.27137991786003113, "learning_rate": 0.00019089567981889314, "loss": 11.6569, "step": 19643 }, { "epoch": 0.4112032152725446, "grad_norm": 0.30728548765182495, "learning_rate": 0.00019089476575131383, "loss": 11.6844, "step": 19644 }, { "epoch": 0.41122414803650675, "grad_norm": 0.26412513852119446, "learning_rate": 0.00019089385164003958, "loss": 11.6952, "step": 19645 }, { "epoch": 0.4112450808004689, "grad_norm": 0.23672594130039215, "learning_rate": 0.00019089293748507087, "loss": 11.6684, "step": 19646 }, { "epoch": 0.41126601356443104, "grad_norm": 0.29381030797958374, "learning_rate": 0.00019089202328640808, "loss": 11.6686, "step": 19647 }, { "epoch": 0.4112869463283932, "grad_norm": 0.2873271405696869, "learning_rate": 0.00019089110904405167, "loss": 11.6695, "step": 19648 }, { "epoch": 0.41130787909235533, "grad_norm": 0.31320372223854065, "learning_rate": 0.0001908901947580021, "loss": 11.6676, "step": 19649 }, { "epoch": 0.41132881185631753, "grad_norm": 0.3167835474014282, "learning_rate": 0.00019088928042825976, "loss": 11.6687, "step": 19650 }, { "epoch": 0.4113497446202797, "grad_norm": 0.2324686050415039, "learning_rate": 0.00019088836605482516, "loss": 11.6733, "step": 19651 }, { "epoch": 0.4113706773842418, "grad_norm": 0.24200665950775146, "learning_rate": 0.00019088745163769867, "loss": 11.6733, "step": 19652 }, { "epoch": 0.41139161014820397, "grad_norm": 0.2697261869907379, "learning_rate": 0.00019088653717688078, "loss": 11.6692, "step": 19653 }, { "epoch": 0.4114125429121661, "grad_norm": 0.295582115650177, "learning_rate": 0.0001908856226723719, "loss": 11.6729, "step": 19654 }, { "epoch": 0.41143347567612826, "grad_norm": 0.2699007987976074, "learning_rate": 0.00019088470812417247, "loss": 11.6647, "step": 19655 }, { "epoch": 0.4114544084400904, "grad_norm": 0.321526437997818, "learning_rate": 0.00019088379353228297, "loss": 11.6778, "step": 19656 }, { "epoch": 0.4114753412040526, "grad_norm": 0.27406007051467896, "learning_rate": 0.00019088287889670378, "loss": 11.6798, "step": 19657 }, { "epoch": 0.41149627396801475, "grad_norm": 0.3284604847431183, "learning_rate": 0.0001908819642174354, "loss": 11.6687, "step": 19658 }, { "epoch": 0.4115172067319769, "grad_norm": 0.24716182053089142, "learning_rate": 0.0001908810494944782, "loss": 11.6609, "step": 19659 }, { "epoch": 0.41153813949593904, "grad_norm": 0.2395942360162735, "learning_rate": 0.00019088013472783267, "loss": 11.6652, "step": 19660 }, { "epoch": 0.4115590722599012, "grad_norm": 0.22387415170669556, "learning_rate": 0.00019087921991749925, "loss": 11.6804, "step": 19661 }, { "epoch": 0.4115800050238633, "grad_norm": 0.28807035088539124, "learning_rate": 0.00019087830506347837, "loss": 11.6768, "step": 19662 }, { "epoch": 0.4116009377878255, "grad_norm": 0.31968843936920166, "learning_rate": 0.00019087739016577046, "loss": 11.6527, "step": 19663 }, { "epoch": 0.41162187055178767, "grad_norm": 0.2605865001678467, "learning_rate": 0.00019087647522437594, "loss": 11.6886, "step": 19664 }, { "epoch": 0.4116428033157498, "grad_norm": 0.27776604890823364, "learning_rate": 0.00019087556023929532, "loss": 11.6623, "step": 19665 }, { "epoch": 0.41166373607971196, "grad_norm": 0.27449601888656616, "learning_rate": 0.00019087464521052897, "loss": 11.6751, "step": 19666 }, { "epoch": 0.4116846688436741, "grad_norm": 0.2730898857116699, "learning_rate": 0.00019087373013807737, "loss": 11.6695, "step": 19667 }, { "epoch": 0.41170560160763625, "grad_norm": 0.32325392961502075, "learning_rate": 0.00019087281502194094, "loss": 11.6779, "step": 19668 }, { "epoch": 0.41172653437159845, "grad_norm": 0.28766095638275146, "learning_rate": 0.00019087189986212013, "loss": 11.6753, "step": 19669 }, { "epoch": 0.4117474671355606, "grad_norm": 0.24793361127376556, "learning_rate": 0.00019087098465861538, "loss": 11.684, "step": 19670 }, { "epoch": 0.41176839989952274, "grad_norm": 0.28318047523498535, "learning_rate": 0.00019087006941142715, "loss": 11.6545, "step": 19671 }, { "epoch": 0.4117893326634849, "grad_norm": 0.2889116406440735, "learning_rate": 0.00019086915412055584, "loss": 11.6704, "step": 19672 }, { "epoch": 0.41181026542744703, "grad_norm": 0.24587491154670715, "learning_rate": 0.0001908682387860019, "loss": 11.6738, "step": 19673 }, { "epoch": 0.4118311981914092, "grad_norm": 0.285725474357605, "learning_rate": 0.00019086732340776576, "loss": 11.6858, "step": 19674 }, { "epoch": 0.4118521309553713, "grad_norm": 0.3900924623012543, "learning_rate": 0.0001908664079858479, "loss": 11.6667, "step": 19675 }, { "epoch": 0.4118730637193335, "grad_norm": 0.3102843165397644, "learning_rate": 0.0001908654925202487, "loss": 11.6788, "step": 19676 }, { "epoch": 0.41189399648329567, "grad_norm": 0.2548796832561493, "learning_rate": 0.00019086457701096866, "loss": 11.6777, "step": 19677 }, { "epoch": 0.4119149292472578, "grad_norm": 0.24158819019794464, "learning_rate": 0.00019086366145800823, "loss": 11.6726, "step": 19678 }, { "epoch": 0.41193586201121996, "grad_norm": 0.26131671667099, "learning_rate": 0.0001908627458613678, "loss": 11.6778, "step": 19679 }, { "epoch": 0.4119567947751821, "grad_norm": 0.28639987111091614, "learning_rate": 0.00019086183022104777, "loss": 11.6714, "step": 19680 }, { "epoch": 0.41197772753914425, "grad_norm": 0.26849979162216187, "learning_rate": 0.0001908609145370487, "loss": 11.6697, "step": 19681 }, { "epoch": 0.41199866030310645, "grad_norm": 0.27714163064956665, "learning_rate": 0.00019085999880937094, "loss": 11.6862, "step": 19682 }, { "epoch": 0.4120195930670686, "grad_norm": 0.33682048320770264, "learning_rate": 0.00019085908303801498, "loss": 11.6802, "step": 19683 }, { "epoch": 0.41204052583103074, "grad_norm": 0.2909316420555115, "learning_rate": 0.00019085816722298122, "loss": 11.6584, "step": 19684 }, { "epoch": 0.4120614585949929, "grad_norm": 0.4380275011062622, "learning_rate": 0.00019085725136427012, "loss": 11.6891, "step": 19685 }, { "epoch": 0.412082391358955, "grad_norm": 0.24830739200115204, "learning_rate": 0.00019085633546188214, "loss": 11.6675, "step": 19686 }, { "epoch": 0.41210332412291717, "grad_norm": 0.29453524947166443, "learning_rate": 0.0001908554195158177, "loss": 11.6687, "step": 19687 }, { "epoch": 0.41212425688687937, "grad_norm": 0.24316896498203278, "learning_rate": 0.00019085450352607718, "loss": 11.6534, "step": 19688 }, { "epoch": 0.4121451896508415, "grad_norm": 0.32137995958328247, "learning_rate": 0.00019085358749266112, "loss": 11.6566, "step": 19689 }, { "epoch": 0.41216612241480366, "grad_norm": 0.3106651306152344, "learning_rate": 0.00019085267141556993, "loss": 11.6694, "step": 19690 }, { "epoch": 0.4121870551787658, "grad_norm": 0.3339098393917084, "learning_rate": 0.00019085175529480404, "loss": 11.6891, "step": 19691 }, { "epoch": 0.41220798794272795, "grad_norm": 0.26188424229621887, "learning_rate": 0.00019085083913036389, "loss": 11.6714, "step": 19692 }, { "epoch": 0.4122289207066901, "grad_norm": 0.37087923288345337, "learning_rate": 0.0001908499229222499, "loss": 11.6801, "step": 19693 }, { "epoch": 0.41224985347065224, "grad_norm": 0.31393861770629883, "learning_rate": 0.00019084900667046254, "loss": 11.6872, "step": 19694 }, { "epoch": 0.41227078623461444, "grad_norm": 0.27501678466796875, "learning_rate": 0.00019084809037500224, "loss": 11.6717, "step": 19695 }, { "epoch": 0.4122917189985766, "grad_norm": 0.22066736221313477, "learning_rate": 0.00019084717403586944, "loss": 11.6695, "step": 19696 }, { "epoch": 0.41231265176253873, "grad_norm": 0.2856481671333313, "learning_rate": 0.0001908462576530646, "loss": 11.675, "step": 19697 }, { "epoch": 0.4123335845265009, "grad_norm": 0.27460306882858276, "learning_rate": 0.00019084534122658812, "loss": 11.671, "step": 19698 }, { "epoch": 0.412354517290463, "grad_norm": 0.25601640343666077, "learning_rate": 0.0001908444247564405, "loss": 11.6667, "step": 19699 }, { "epoch": 0.41237545005442516, "grad_norm": 0.24485236406326294, "learning_rate": 0.00019084350824262212, "loss": 11.6693, "step": 19700 }, { "epoch": 0.41239638281838736, "grad_norm": 0.2840944826602936, "learning_rate": 0.00019084259168513344, "loss": 11.6827, "step": 19701 }, { "epoch": 0.4124173155823495, "grad_norm": 0.2762194275856018, "learning_rate": 0.0001908416750839749, "loss": 11.6713, "step": 19702 }, { "epoch": 0.41243824834631165, "grad_norm": 0.28686198592185974, "learning_rate": 0.00019084075843914697, "loss": 11.6688, "step": 19703 }, { "epoch": 0.4124591811102738, "grad_norm": 0.2831454873085022, "learning_rate": 0.00019083984175065005, "loss": 11.66, "step": 19704 }, { "epoch": 0.41248011387423594, "grad_norm": 0.2671646773815155, "learning_rate": 0.0001908389250184846, "loss": 11.681, "step": 19705 }, { "epoch": 0.4125010466381981, "grad_norm": 0.2703233063220978, "learning_rate": 0.0001908380082426511, "loss": 11.6636, "step": 19706 }, { "epoch": 0.4125219794021603, "grad_norm": 0.34975460171699524, "learning_rate": 0.00019083709142314988, "loss": 11.6626, "step": 19707 }, { "epoch": 0.41254291216612243, "grad_norm": 0.21186132729053497, "learning_rate": 0.0001908361745599815, "loss": 11.6829, "step": 19708 }, { "epoch": 0.4125638449300846, "grad_norm": 0.301907479763031, "learning_rate": 0.0001908352576531463, "loss": 11.674, "step": 19709 }, { "epoch": 0.4125847776940467, "grad_norm": 0.2283841222524643, "learning_rate": 0.0001908343407026448, "loss": 11.6724, "step": 19710 }, { "epoch": 0.41260571045800887, "grad_norm": 0.445571631193161, "learning_rate": 0.00019083342370847744, "loss": 11.6771, "step": 19711 }, { "epoch": 0.412626643221971, "grad_norm": 0.2446443736553192, "learning_rate": 0.0001908325066706446, "loss": 11.6734, "step": 19712 }, { "epoch": 0.41264757598593316, "grad_norm": 0.3359247148036957, "learning_rate": 0.00019083158958914674, "loss": 11.6731, "step": 19713 }, { "epoch": 0.41266850874989536, "grad_norm": 0.29587605595588684, "learning_rate": 0.00019083067246398433, "loss": 11.6662, "step": 19714 }, { "epoch": 0.4126894415138575, "grad_norm": 0.24173983931541443, "learning_rate": 0.00019082975529515782, "loss": 11.6744, "step": 19715 }, { "epoch": 0.41271037427781965, "grad_norm": 0.320767879486084, "learning_rate": 0.00019082883808266762, "loss": 11.6751, "step": 19716 }, { "epoch": 0.4127313070417818, "grad_norm": 0.31308260560035706, "learning_rate": 0.00019082792082651415, "loss": 11.6754, "step": 19717 }, { "epoch": 0.41275223980574394, "grad_norm": 0.2785224914550781, "learning_rate": 0.00019082700352669792, "loss": 11.6563, "step": 19718 }, { "epoch": 0.4127731725697061, "grad_norm": 0.2400587648153305, "learning_rate": 0.00019082608618321926, "loss": 11.6664, "step": 19719 }, { "epoch": 0.4127941053336683, "grad_norm": 0.24135491251945496, "learning_rate": 0.00019082516879607875, "loss": 11.6775, "step": 19720 }, { "epoch": 0.4128150380976304, "grad_norm": 0.30685317516326904, "learning_rate": 0.00019082425136527674, "loss": 11.6735, "step": 19721 }, { "epoch": 0.41283597086159257, "grad_norm": 0.21984225511550903, "learning_rate": 0.0001908233338908137, "loss": 11.6628, "step": 19722 }, { "epoch": 0.4128569036255547, "grad_norm": 0.30625855922698975, "learning_rate": 0.00019082241637269004, "loss": 11.6794, "step": 19723 }, { "epoch": 0.41287783638951686, "grad_norm": 0.2507808804512024, "learning_rate": 0.00019082149881090622, "loss": 11.6736, "step": 19724 }, { "epoch": 0.412898769153479, "grad_norm": 0.2767013907432556, "learning_rate": 0.00019082058120546268, "loss": 11.668, "step": 19725 }, { "epoch": 0.4129197019174412, "grad_norm": 0.2999260425567627, "learning_rate": 0.0001908196635563599, "loss": 11.6758, "step": 19726 }, { "epoch": 0.41294063468140335, "grad_norm": 0.30780279636383057, "learning_rate": 0.00019081874586359828, "loss": 11.6712, "step": 19727 }, { "epoch": 0.4129615674453655, "grad_norm": 0.3183727264404297, "learning_rate": 0.00019081782812717827, "loss": 11.683, "step": 19728 }, { "epoch": 0.41298250020932764, "grad_norm": 0.295097678899765, "learning_rate": 0.00019081691034710028, "loss": 11.6651, "step": 19729 }, { "epoch": 0.4130034329732898, "grad_norm": 0.2954424023628235, "learning_rate": 0.0001908159925233648, "loss": 11.6768, "step": 19730 }, { "epoch": 0.41302436573725193, "grad_norm": 0.2917608320713043, "learning_rate": 0.00019081507465597226, "loss": 11.6846, "step": 19731 }, { "epoch": 0.4130452985012141, "grad_norm": 0.23626026511192322, "learning_rate": 0.0001908141567449231, "loss": 11.6585, "step": 19732 }, { "epoch": 0.4130662312651763, "grad_norm": 0.30618759989738464, "learning_rate": 0.00019081323879021773, "loss": 11.6637, "step": 19733 }, { "epoch": 0.4130871640291384, "grad_norm": 0.3261905610561371, "learning_rate": 0.00019081232079185662, "loss": 11.6704, "step": 19734 }, { "epoch": 0.41310809679310057, "grad_norm": 0.23942406475543976, "learning_rate": 0.00019081140274984022, "loss": 11.6709, "step": 19735 }, { "epoch": 0.4131290295570627, "grad_norm": 0.40190503001213074, "learning_rate": 0.00019081048466416896, "loss": 11.6795, "step": 19736 }, { "epoch": 0.41314996232102486, "grad_norm": 0.2923485338687897, "learning_rate": 0.00019080956653484327, "loss": 11.6589, "step": 19737 }, { "epoch": 0.413170895084987, "grad_norm": 0.27757528424263, "learning_rate": 0.00019080864836186362, "loss": 11.6549, "step": 19738 }, { "epoch": 0.4131918278489492, "grad_norm": 0.23976802825927734, "learning_rate": 0.00019080773014523041, "loss": 11.645, "step": 19739 }, { "epoch": 0.41321276061291135, "grad_norm": 1.9873422384262085, "learning_rate": 0.0001908068118849441, "loss": 11.6562, "step": 19740 }, { "epoch": 0.4132336933768735, "grad_norm": 0.30788180232048035, "learning_rate": 0.00019080589358100517, "loss": 11.6755, "step": 19741 }, { "epoch": 0.41325462614083563, "grad_norm": 0.24810025095939636, "learning_rate": 0.000190804975233414, "loss": 11.6752, "step": 19742 }, { "epoch": 0.4132755589047978, "grad_norm": 0.29158133268356323, "learning_rate": 0.0001908040568421711, "loss": 11.6716, "step": 19743 }, { "epoch": 0.4132964916687599, "grad_norm": 0.3141266405582428, "learning_rate": 0.00019080313840727682, "loss": 11.6566, "step": 19744 }, { "epoch": 0.41331742443272207, "grad_norm": 0.2436762899160385, "learning_rate": 0.00019080221992873167, "loss": 11.671, "step": 19745 }, { "epoch": 0.41333835719668427, "grad_norm": 0.3100895583629608, "learning_rate": 0.00019080130140653606, "loss": 11.6749, "step": 19746 }, { "epoch": 0.4133592899606464, "grad_norm": 0.33483392000198364, "learning_rate": 0.0001908003828406905, "loss": 11.6671, "step": 19747 }, { "epoch": 0.41338022272460856, "grad_norm": 0.32153698801994324, "learning_rate": 0.0001907994642311953, "loss": 11.674, "step": 19748 }, { "epoch": 0.4134011554885707, "grad_norm": 0.3437776267528534, "learning_rate": 0.00019079854557805102, "loss": 11.6704, "step": 19749 }, { "epoch": 0.41342208825253285, "grad_norm": 0.31840378046035767, "learning_rate": 0.00019079762688125804, "loss": 11.6755, "step": 19750 }, { "epoch": 0.413443021016495, "grad_norm": 0.3611869513988495, "learning_rate": 0.00019079670814081684, "loss": 11.673, "step": 19751 }, { "epoch": 0.4134639537804572, "grad_norm": 0.22825726866722107, "learning_rate": 0.00019079578935672783, "loss": 11.6926, "step": 19752 }, { "epoch": 0.41348488654441934, "grad_norm": 0.29368454217910767, "learning_rate": 0.00019079487052899147, "loss": 11.6763, "step": 19753 }, { "epoch": 0.4135058193083815, "grad_norm": 0.2967759072780609, "learning_rate": 0.0001907939516576082, "loss": 11.6794, "step": 19754 }, { "epoch": 0.41352675207234363, "grad_norm": 0.2638140916824341, "learning_rate": 0.00019079303274257845, "loss": 11.6736, "step": 19755 }, { "epoch": 0.4135476848363058, "grad_norm": 0.27547043561935425, "learning_rate": 0.00019079211378390267, "loss": 11.6809, "step": 19756 }, { "epoch": 0.4135686176002679, "grad_norm": 0.3223177492618561, "learning_rate": 0.0001907911947815813, "loss": 11.6918, "step": 19757 }, { "epoch": 0.4135895503642301, "grad_norm": 0.3269215524196625, "learning_rate": 0.0001907902757356148, "loss": 11.6613, "step": 19758 }, { "epoch": 0.41361048312819226, "grad_norm": 0.38452550768852234, "learning_rate": 0.00019078935664600357, "loss": 11.6639, "step": 19759 }, { "epoch": 0.4136314158921544, "grad_norm": 0.26528629660606384, "learning_rate": 0.0001907884375127481, "loss": 11.6684, "step": 19760 }, { "epoch": 0.41365234865611655, "grad_norm": 0.2713168263435364, "learning_rate": 0.00019078751833584879, "loss": 11.6641, "step": 19761 }, { "epoch": 0.4136732814200787, "grad_norm": 0.3590140640735626, "learning_rate": 0.00019078659911530608, "loss": 11.6801, "step": 19762 }, { "epoch": 0.41369421418404084, "grad_norm": 0.32348373532295227, "learning_rate": 0.00019078567985112048, "loss": 11.6794, "step": 19763 }, { "epoch": 0.413715146948003, "grad_norm": 0.32146817445755005, "learning_rate": 0.00019078476054329237, "loss": 11.6655, "step": 19764 }, { "epoch": 0.4137360797119652, "grad_norm": 0.29856598377227783, "learning_rate": 0.0001907838411918222, "loss": 11.6791, "step": 19765 }, { "epoch": 0.41375701247592733, "grad_norm": 0.2853759825229645, "learning_rate": 0.00019078292179671043, "loss": 11.6739, "step": 19766 }, { "epoch": 0.4137779452398895, "grad_norm": 0.24654467403888702, "learning_rate": 0.00019078200235795746, "loss": 11.6526, "step": 19767 }, { "epoch": 0.4137988780038516, "grad_norm": 0.294952392578125, "learning_rate": 0.0001907810828755638, "loss": 11.6736, "step": 19768 }, { "epoch": 0.41381981076781377, "grad_norm": 0.30159083008766174, "learning_rate": 0.00019078016334952983, "loss": 11.6729, "step": 19769 }, { "epoch": 0.4138407435317759, "grad_norm": 0.2332662045955658, "learning_rate": 0.00019077924377985603, "loss": 11.6803, "step": 19770 }, { "epoch": 0.4138616762957381, "grad_norm": 0.3288039267063141, "learning_rate": 0.0001907783241665428, "loss": 11.6771, "step": 19771 }, { "epoch": 0.41388260905970026, "grad_norm": 0.2550630569458008, "learning_rate": 0.00019077740450959064, "loss": 11.6722, "step": 19772 }, { "epoch": 0.4139035418236624, "grad_norm": 0.2928078770637512, "learning_rate": 0.00019077648480899994, "loss": 11.6579, "step": 19773 }, { "epoch": 0.41392447458762455, "grad_norm": 0.7346647381782532, "learning_rate": 0.0001907755650647712, "loss": 11.6875, "step": 19774 }, { "epoch": 0.4139454073515867, "grad_norm": 0.23116149008274078, "learning_rate": 0.0001907746452769048, "loss": 11.6811, "step": 19775 }, { "epoch": 0.41396634011554884, "grad_norm": 0.23363475501537323, "learning_rate": 0.00019077372544540122, "loss": 11.6688, "step": 19776 }, { "epoch": 0.41398727287951104, "grad_norm": 0.3826017379760742, "learning_rate": 0.00019077280557026088, "loss": 11.6856, "step": 19777 }, { "epoch": 0.4140082056434732, "grad_norm": 0.2996879816055298, "learning_rate": 0.00019077188565148423, "loss": 11.6954, "step": 19778 }, { "epoch": 0.4140291384074353, "grad_norm": 0.26559579372406006, "learning_rate": 0.00019077096568907172, "loss": 11.6676, "step": 19779 }, { "epoch": 0.41405007117139747, "grad_norm": 0.3441770374774933, "learning_rate": 0.0001907700456830238, "loss": 11.6777, "step": 19780 }, { "epoch": 0.4140710039353596, "grad_norm": 0.30411478877067566, "learning_rate": 0.0001907691256333409, "loss": 11.6693, "step": 19781 }, { "epoch": 0.41409193669932176, "grad_norm": 0.2561984658241272, "learning_rate": 0.00019076820554002344, "loss": 11.6671, "step": 19782 }, { "epoch": 0.4141128694632839, "grad_norm": 0.32860514521598816, "learning_rate": 0.0001907672854030719, "loss": 11.6944, "step": 19783 }, { "epoch": 0.4141338022272461, "grad_norm": 0.25370606780052185, "learning_rate": 0.0001907663652224867, "loss": 11.6775, "step": 19784 }, { "epoch": 0.41415473499120825, "grad_norm": 0.3228239417076111, "learning_rate": 0.0001907654449982683, "loss": 11.6927, "step": 19785 }, { "epoch": 0.4141756677551704, "grad_norm": 0.24959155917167664, "learning_rate": 0.0001907645247304171, "loss": 11.6684, "step": 19786 }, { "epoch": 0.41419660051913254, "grad_norm": 0.2944454848766327, "learning_rate": 0.00019076360441893362, "loss": 11.6651, "step": 19787 }, { "epoch": 0.4142175332830947, "grad_norm": 0.23775136470794678, "learning_rate": 0.00019076268406381823, "loss": 11.6745, "step": 19788 }, { "epoch": 0.41423846604705683, "grad_norm": 0.33718401193618774, "learning_rate": 0.0001907617636650714, "loss": 11.6623, "step": 19789 }, { "epoch": 0.41425939881101903, "grad_norm": 0.29095813632011414, "learning_rate": 0.0001907608432226936, "loss": 11.6601, "step": 19790 }, { "epoch": 0.4142803315749812, "grad_norm": 0.19512340426445007, "learning_rate": 0.0001907599227366852, "loss": 11.6743, "step": 19791 }, { "epoch": 0.4143012643389433, "grad_norm": 0.2629339098930359, "learning_rate": 0.00019075900220704673, "loss": 11.6733, "step": 19792 }, { "epoch": 0.41432219710290547, "grad_norm": 0.24992673099040985, "learning_rate": 0.00019075808163377853, "loss": 11.6783, "step": 19793 }, { "epoch": 0.4143431298668676, "grad_norm": 0.3358423411846161, "learning_rate": 0.00019075716101688115, "loss": 11.6665, "step": 19794 }, { "epoch": 0.41436406263082975, "grad_norm": 0.3196238875389099, "learning_rate": 0.00019075624035635498, "loss": 11.6647, "step": 19795 }, { "epoch": 0.41438499539479196, "grad_norm": 0.3374142646789551, "learning_rate": 0.00019075531965220046, "loss": 11.6923, "step": 19796 }, { "epoch": 0.4144059281587541, "grad_norm": 0.26107528805732727, "learning_rate": 0.00019075439890441803, "loss": 11.6798, "step": 19797 }, { "epoch": 0.41442686092271624, "grad_norm": 0.38533177971839905, "learning_rate": 0.0001907534781130081, "loss": 11.6984, "step": 19798 }, { "epoch": 0.4144477936866784, "grad_norm": 0.23071198165416718, "learning_rate": 0.00019075255727797123, "loss": 11.6595, "step": 19799 }, { "epoch": 0.41446872645064053, "grad_norm": 0.28259557485580444, "learning_rate": 0.00019075163639930778, "loss": 11.6787, "step": 19800 }, { "epoch": 0.4144896592146027, "grad_norm": 0.35491707921028137, "learning_rate": 0.00019075071547701818, "loss": 11.6666, "step": 19801 }, { "epoch": 0.4145105919785648, "grad_norm": 0.31983283162117004, "learning_rate": 0.0001907497945111029, "loss": 11.6634, "step": 19802 }, { "epoch": 0.414531524742527, "grad_norm": 0.4238777160644531, "learning_rate": 0.00019074887350156234, "loss": 11.6729, "step": 19803 }, { "epoch": 0.41455245750648917, "grad_norm": 0.3809691369533539, "learning_rate": 0.00019074795244839702, "loss": 11.6845, "step": 19804 }, { "epoch": 0.4145733902704513, "grad_norm": 0.3499848544597626, "learning_rate": 0.00019074703135160733, "loss": 11.677, "step": 19805 }, { "epoch": 0.41459432303441346, "grad_norm": 0.2865459620952606, "learning_rate": 0.00019074611021119368, "loss": 11.6593, "step": 19806 }, { "epoch": 0.4146152557983756, "grad_norm": 0.2773614525794983, "learning_rate": 0.00019074518902715662, "loss": 11.674, "step": 19807 }, { "epoch": 0.41463618856233775, "grad_norm": 0.3143217861652374, "learning_rate": 0.0001907442677994965, "loss": 11.6711, "step": 19808 }, { "epoch": 0.41465712132629995, "grad_norm": 0.2914440929889679, "learning_rate": 0.00019074334652821378, "loss": 11.675, "step": 19809 }, { "epoch": 0.4146780540902621, "grad_norm": 0.2450336366891861, "learning_rate": 0.00019074242521330894, "loss": 11.6703, "step": 19810 }, { "epoch": 0.41469898685422424, "grad_norm": 0.3528762757778168, "learning_rate": 0.0001907415038547824, "loss": 11.6769, "step": 19811 }, { "epoch": 0.4147199196181864, "grad_norm": 0.2964296340942383, "learning_rate": 0.00019074058245263458, "loss": 11.6698, "step": 19812 }, { "epoch": 0.41474085238214853, "grad_norm": 0.3344893753528595, "learning_rate": 0.00019073966100686596, "loss": 11.668, "step": 19813 }, { "epoch": 0.4147617851461107, "grad_norm": 0.30048689246177673, "learning_rate": 0.00019073873951747693, "loss": 11.6995, "step": 19814 }, { "epoch": 0.4147827179100729, "grad_norm": 0.37053096294403076, "learning_rate": 0.00019073781798446802, "loss": 11.6672, "step": 19815 }, { "epoch": 0.414803650674035, "grad_norm": 0.3176805078983307, "learning_rate": 0.00019073689640783958, "loss": 11.6774, "step": 19816 }, { "epoch": 0.41482458343799716, "grad_norm": 0.2824370265007019, "learning_rate": 0.00019073597478759213, "loss": 11.6657, "step": 19817 }, { "epoch": 0.4148455162019593, "grad_norm": 0.24258378148078918, "learning_rate": 0.00019073505312372605, "loss": 11.6748, "step": 19818 }, { "epoch": 0.41486644896592145, "grad_norm": 0.2588724195957184, "learning_rate": 0.00019073413141624187, "loss": 11.6763, "step": 19819 }, { "epoch": 0.4148873817298836, "grad_norm": 0.28880050778388977, "learning_rate": 0.0001907332096651399, "loss": 11.6697, "step": 19820 }, { "epoch": 0.41490831449384574, "grad_norm": 0.4122743010520935, "learning_rate": 0.00019073228787042071, "loss": 11.674, "step": 19821 }, { "epoch": 0.41492924725780794, "grad_norm": 0.3667631149291992, "learning_rate": 0.00019073136603208466, "loss": 11.6772, "step": 19822 }, { "epoch": 0.4149501800217701, "grad_norm": 0.26441240310668945, "learning_rate": 0.0001907304441501322, "loss": 11.6825, "step": 19823 }, { "epoch": 0.41497111278573223, "grad_norm": 0.3579091727733612, "learning_rate": 0.00019072952222456383, "loss": 11.6874, "step": 19824 }, { "epoch": 0.4149920455496944, "grad_norm": 0.2623949646949768, "learning_rate": 0.00019072860025538, "loss": 11.6695, "step": 19825 }, { "epoch": 0.4150129783136565, "grad_norm": 0.2636190354824066, "learning_rate": 0.00019072767824258107, "loss": 11.6733, "step": 19826 }, { "epoch": 0.41503391107761867, "grad_norm": 0.24561458826065063, "learning_rate": 0.0001907267561861675, "loss": 11.6783, "step": 19827 }, { "epoch": 0.41505484384158087, "grad_norm": 0.3698432743549347, "learning_rate": 0.0001907258340861398, "loss": 11.6863, "step": 19828 }, { "epoch": 0.415075776605543, "grad_norm": 0.234842449426651, "learning_rate": 0.00019072491194249838, "loss": 11.6547, "step": 19829 }, { "epoch": 0.41509670936950516, "grad_norm": 0.30930912494659424, "learning_rate": 0.00019072398975524365, "loss": 11.6861, "step": 19830 }, { "epoch": 0.4151176421334673, "grad_norm": 0.3819533586502075, "learning_rate": 0.0001907230675243761, "loss": 11.6626, "step": 19831 }, { "epoch": 0.41513857489742945, "grad_norm": 0.42807722091674805, "learning_rate": 0.00019072214524989614, "loss": 11.6754, "step": 19832 }, { "epoch": 0.4151595076613916, "grad_norm": 0.289777934551239, "learning_rate": 0.00019072122293180423, "loss": 11.6652, "step": 19833 }, { "epoch": 0.41518044042535374, "grad_norm": 0.3017142713069916, "learning_rate": 0.0001907203005701008, "loss": 11.6686, "step": 19834 }, { "epoch": 0.41520137318931594, "grad_norm": 0.3874691426753998, "learning_rate": 0.00019071937816478632, "loss": 11.6959, "step": 19835 }, { "epoch": 0.4152223059532781, "grad_norm": 0.2575210928916931, "learning_rate": 0.00019071845571586122, "loss": 11.6735, "step": 19836 }, { "epoch": 0.4152432387172402, "grad_norm": 0.24002283811569214, "learning_rate": 0.00019071753322332592, "loss": 11.661, "step": 19837 }, { "epoch": 0.41526417148120237, "grad_norm": 0.305785208940506, "learning_rate": 0.00019071661068718093, "loss": 11.6769, "step": 19838 }, { "epoch": 0.4152851042451645, "grad_norm": 0.33560484647750854, "learning_rate": 0.00019071568810742658, "loss": 11.6566, "step": 19839 }, { "epoch": 0.41530603700912666, "grad_norm": 0.286266028881073, "learning_rate": 0.00019071476548406343, "loss": 11.6657, "step": 19840 }, { "epoch": 0.41532696977308886, "grad_norm": 0.3516821265220642, "learning_rate": 0.00019071384281709185, "loss": 11.6748, "step": 19841 }, { "epoch": 0.415347902537051, "grad_norm": 0.28831544518470764, "learning_rate": 0.00019071292010651233, "loss": 11.671, "step": 19842 }, { "epoch": 0.41536883530101315, "grad_norm": 0.2975551187992096, "learning_rate": 0.00019071199735232528, "loss": 11.6873, "step": 19843 }, { "epoch": 0.4153897680649753, "grad_norm": 0.3395232856273651, "learning_rate": 0.00019071107455453118, "loss": 11.6733, "step": 19844 }, { "epoch": 0.41541070082893744, "grad_norm": 0.2492370903491974, "learning_rate": 0.0001907101517131304, "loss": 11.6762, "step": 19845 }, { "epoch": 0.4154316335928996, "grad_norm": 1.4104300737380981, "learning_rate": 0.00019070922882812345, "loss": 11.636, "step": 19846 }, { "epoch": 0.4154525663568618, "grad_norm": 0.21633391082286835, "learning_rate": 0.00019070830589951076, "loss": 11.6627, "step": 19847 }, { "epoch": 0.41547349912082393, "grad_norm": 0.30578187108039856, "learning_rate": 0.00019070738292729277, "loss": 11.6714, "step": 19848 }, { "epoch": 0.4154944318847861, "grad_norm": 0.248399555683136, "learning_rate": 0.00019070645991146992, "loss": 11.6778, "step": 19849 }, { "epoch": 0.4155153646487482, "grad_norm": 0.29975202679634094, "learning_rate": 0.00019070553685204264, "loss": 11.6816, "step": 19850 }, { "epoch": 0.41553629741271036, "grad_norm": 0.29364460706710815, "learning_rate": 0.00019070461374901142, "loss": 11.6683, "step": 19851 }, { "epoch": 0.4155572301766725, "grad_norm": 0.2518980801105499, "learning_rate": 0.00019070369060237666, "loss": 11.6768, "step": 19852 }, { "epoch": 0.41557816294063465, "grad_norm": 0.25803473591804504, "learning_rate": 0.00019070276741213883, "loss": 11.6773, "step": 19853 }, { "epoch": 0.41559909570459685, "grad_norm": 0.2587244510650635, "learning_rate": 0.00019070184417829835, "loss": 11.6711, "step": 19854 }, { "epoch": 0.415620028468559, "grad_norm": 0.2514171004295349, "learning_rate": 0.00019070092090085568, "loss": 11.6751, "step": 19855 }, { "epoch": 0.41564096123252114, "grad_norm": 0.26620984077453613, "learning_rate": 0.00019069999757981125, "loss": 11.6616, "step": 19856 }, { "epoch": 0.4156618939964833, "grad_norm": 0.3114781379699707, "learning_rate": 0.00019069907421516552, "loss": 11.6587, "step": 19857 }, { "epoch": 0.41568282676044543, "grad_norm": 0.31042781472206116, "learning_rate": 0.00019069815080691895, "loss": 11.6797, "step": 19858 }, { "epoch": 0.4157037595244076, "grad_norm": 0.2446472942829132, "learning_rate": 0.00019069722735507195, "loss": 11.6807, "step": 19859 }, { "epoch": 0.4157246922883698, "grad_norm": 0.42829132080078125, "learning_rate": 0.00019069630385962495, "loss": 11.7011, "step": 19860 }, { "epoch": 0.4157456250523319, "grad_norm": 0.25863534212112427, "learning_rate": 0.00019069538032057842, "loss": 11.6735, "step": 19861 }, { "epoch": 0.41576655781629407, "grad_norm": 0.32837599515914917, "learning_rate": 0.00019069445673793284, "loss": 11.6524, "step": 19862 }, { "epoch": 0.4157874905802562, "grad_norm": 0.3071647584438324, "learning_rate": 0.0001906935331116886, "loss": 11.674, "step": 19863 }, { "epoch": 0.41580842334421836, "grad_norm": 0.33924600481987, "learning_rate": 0.00019069260944184615, "loss": 11.6599, "step": 19864 }, { "epoch": 0.4158293561081805, "grad_norm": 0.35244470834732056, "learning_rate": 0.00019069168572840596, "loss": 11.687, "step": 19865 }, { "epoch": 0.4158502888721427, "grad_norm": 0.25622987747192383, "learning_rate": 0.00019069076197136844, "loss": 11.6682, "step": 19866 }, { "epoch": 0.41587122163610485, "grad_norm": 0.2725464403629303, "learning_rate": 0.0001906898381707341, "loss": 11.6476, "step": 19867 }, { "epoch": 0.415892154400067, "grad_norm": 0.2575601637363434, "learning_rate": 0.0001906889143265033, "loss": 11.6568, "step": 19868 }, { "epoch": 0.41591308716402914, "grad_norm": 0.39066609740257263, "learning_rate": 0.0001906879904386765, "loss": 11.675, "step": 19869 }, { "epoch": 0.4159340199279913, "grad_norm": 0.36191028356552124, "learning_rate": 0.0001906870665072542, "loss": 11.6856, "step": 19870 }, { "epoch": 0.41595495269195343, "grad_norm": 0.2977895736694336, "learning_rate": 0.00019068614253223677, "loss": 11.6751, "step": 19871 }, { "epoch": 0.4159758854559156, "grad_norm": 0.27682316303253174, "learning_rate": 0.00019068521851362476, "loss": 11.6606, "step": 19872 }, { "epoch": 0.4159968182198778, "grad_norm": 0.2637793719768524, "learning_rate": 0.00019068429445141848, "loss": 11.6952, "step": 19873 }, { "epoch": 0.4160177509838399, "grad_norm": 0.3400776982307434, "learning_rate": 0.0001906833703456185, "loss": 11.6753, "step": 19874 }, { "epoch": 0.41603868374780206, "grad_norm": 0.36377495527267456, "learning_rate": 0.00019068244619622517, "loss": 11.6629, "step": 19875 }, { "epoch": 0.4160596165117642, "grad_norm": 0.23455579578876495, "learning_rate": 0.000190681522003239, "loss": 11.652, "step": 19876 }, { "epoch": 0.41608054927572635, "grad_norm": 0.28039586544036865, "learning_rate": 0.00019068059776666037, "loss": 11.663, "step": 19877 }, { "epoch": 0.4161014820396885, "grad_norm": 0.27225589752197266, "learning_rate": 0.0001906796734864898, "loss": 11.6741, "step": 19878 }, { "epoch": 0.4161224148036507, "grad_norm": 0.2754122018814087, "learning_rate": 0.00019067874916272763, "loss": 11.6681, "step": 19879 }, { "epoch": 0.41614334756761284, "grad_norm": 0.22659792006015778, "learning_rate": 0.00019067782479537443, "loss": 11.6695, "step": 19880 }, { "epoch": 0.416164280331575, "grad_norm": 0.3104671239852905, "learning_rate": 0.00019067690038443056, "loss": 11.6744, "step": 19881 }, { "epoch": 0.41618521309553713, "grad_norm": 0.3217677175998688, "learning_rate": 0.00019067597592989648, "loss": 11.684, "step": 19882 }, { "epoch": 0.4162061458594993, "grad_norm": 0.2815398871898651, "learning_rate": 0.00019067505143177265, "loss": 11.6631, "step": 19883 }, { "epoch": 0.4162270786234614, "grad_norm": 0.44492796063423157, "learning_rate": 0.0001906741268900595, "loss": 11.6548, "step": 19884 }, { "epoch": 0.4162480113874236, "grad_norm": 0.3548087775707245, "learning_rate": 0.00019067320230475748, "loss": 11.6682, "step": 19885 }, { "epoch": 0.41626894415138577, "grad_norm": 0.2691611647605896, "learning_rate": 0.00019067227767586705, "loss": 11.6788, "step": 19886 }, { "epoch": 0.4162898769153479, "grad_norm": 0.2458062618970871, "learning_rate": 0.0001906713530033886, "loss": 11.6654, "step": 19887 }, { "epoch": 0.41631080967931006, "grad_norm": 0.37229853868484497, "learning_rate": 0.00019067042828732267, "loss": 11.6826, "step": 19888 }, { "epoch": 0.4163317424432722, "grad_norm": 0.2963806688785553, "learning_rate": 0.0001906695035276696, "loss": 11.6749, "step": 19889 }, { "epoch": 0.41635267520723435, "grad_norm": 0.3310701251029968, "learning_rate": 0.0001906685787244299, "loss": 11.6673, "step": 19890 }, { "epoch": 0.4163736079711965, "grad_norm": 0.3669862747192383, "learning_rate": 0.00019066765387760402, "loss": 11.6794, "step": 19891 }, { "epoch": 0.4163945407351587, "grad_norm": 0.29001185297966003, "learning_rate": 0.00019066672898719237, "loss": 11.6793, "step": 19892 }, { "epoch": 0.41641547349912084, "grad_norm": 0.2711872160434723, "learning_rate": 0.00019066580405319535, "loss": 11.6569, "step": 19893 }, { "epoch": 0.416436406263083, "grad_norm": 0.293148398399353, "learning_rate": 0.00019066487907561355, "loss": 11.689, "step": 19894 }, { "epoch": 0.4164573390270451, "grad_norm": 0.33072033524513245, "learning_rate": 0.00019066395405444725, "loss": 11.6535, "step": 19895 }, { "epoch": 0.41647827179100727, "grad_norm": 0.28998231887817383, "learning_rate": 0.00019066302898969702, "loss": 11.6675, "step": 19896 }, { "epoch": 0.4164992045549694, "grad_norm": 0.26812735199928284, "learning_rate": 0.00019066210388136323, "loss": 11.6874, "step": 19897 }, { "epoch": 0.4165201373189316, "grad_norm": 0.31508636474609375, "learning_rate": 0.00019066117872944637, "loss": 11.6616, "step": 19898 }, { "epoch": 0.41654107008289376, "grad_norm": 0.3189955949783325, "learning_rate": 0.00019066025353394684, "loss": 11.6567, "step": 19899 }, { "epoch": 0.4165620028468559, "grad_norm": 0.2624196410179138, "learning_rate": 0.00019065932829486512, "loss": 11.6739, "step": 19900 }, { "epoch": 0.41658293561081805, "grad_norm": 0.29165223240852356, "learning_rate": 0.00019065840301220167, "loss": 11.6934, "step": 19901 }, { "epoch": 0.4166038683747802, "grad_norm": 0.26296454668045044, "learning_rate": 0.00019065747768595687, "loss": 11.6792, "step": 19902 }, { "epoch": 0.41662480113874234, "grad_norm": 0.2672307789325714, "learning_rate": 0.0001906565523161312, "loss": 11.6767, "step": 19903 }, { "epoch": 0.41664573390270454, "grad_norm": 0.2634826898574829, "learning_rate": 0.00019065562690272515, "loss": 11.6592, "step": 19904 }, { "epoch": 0.4166666666666667, "grad_norm": 0.2810455858707428, "learning_rate": 0.00019065470144573912, "loss": 11.6678, "step": 19905 }, { "epoch": 0.41668759943062883, "grad_norm": 0.32978054881095886, "learning_rate": 0.0001906537759451735, "loss": 11.6622, "step": 19906 }, { "epoch": 0.416708532194591, "grad_norm": 0.310439795255661, "learning_rate": 0.00019065285040102884, "loss": 11.664, "step": 19907 }, { "epoch": 0.4167294649585531, "grad_norm": 0.27152058482170105, "learning_rate": 0.00019065192481330554, "loss": 11.6564, "step": 19908 }, { "epoch": 0.41675039772251526, "grad_norm": 0.31145626306533813, "learning_rate": 0.00019065099918200403, "loss": 11.6808, "step": 19909 }, { "epoch": 0.4167713304864774, "grad_norm": 0.2417410910129547, "learning_rate": 0.00019065007350712478, "loss": 11.6697, "step": 19910 }, { "epoch": 0.4167922632504396, "grad_norm": 0.2966710031032562, "learning_rate": 0.00019064914778866822, "loss": 11.6625, "step": 19911 }, { "epoch": 0.41681319601440175, "grad_norm": 0.25912266969680786, "learning_rate": 0.00019064822202663478, "loss": 11.6665, "step": 19912 }, { "epoch": 0.4168341287783639, "grad_norm": 0.26736512780189514, "learning_rate": 0.00019064729622102493, "loss": 11.6784, "step": 19913 }, { "epoch": 0.41685506154232604, "grad_norm": 0.25109946727752686, "learning_rate": 0.00019064637037183912, "loss": 11.6716, "step": 19914 }, { "epoch": 0.4168759943062882, "grad_norm": 0.24513685703277588, "learning_rate": 0.00019064544447907779, "loss": 11.675, "step": 19915 }, { "epoch": 0.41689692707025033, "grad_norm": 0.26672905683517456, "learning_rate": 0.00019064451854274137, "loss": 11.6627, "step": 19916 }, { "epoch": 0.41691785983421253, "grad_norm": 0.5106838941574097, "learning_rate": 0.00019064359256283032, "loss": 11.6691, "step": 19917 }, { "epoch": 0.4169387925981747, "grad_norm": 0.330885112285614, "learning_rate": 0.00019064266653934505, "loss": 11.669, "step": 19918 }, { "epoch": 0.4169597253621368, "grad_norm": 0.27022019028663635, "learning_rate": 0.00019064174047228605, "loss": 11.6713, "step": 19919 }, { "epoch": 0.41698065812609897, "grad_norm": 0.3018261790275574, "learning_rate": 0.00019064081436165372, "loss": 11.6817, "step": 19920 }, { "epoch": 0.4170015908900611, "grad_norm": 0.3121267259120941, "learning_rate": 0.00019063988820744858, "loss": 11.6843, "step": 19921 }, { "epoch": 0.41702252365402326, "grad_norm": 0.23883341252803802, "learning_rate": 0.000190638962009671, "loss": 11.6658, "step": 19922 }, { "epoch": 0.4170434564179854, "grad_norm": 0.4005714952945709, "learning_rate": 0.0001906380357683215, "loss": 11.6604, "step": 19923 }, { "epoch": 0.4170643891819476, "grad_norm": 0.2969123423099518, "learning_rate": 0.00019063710948340044, "loss": 11.6771, "step": 19924 }, { "epoch": 0.41708532194590975, "grad_norm": 0.2732032239437103, "learning_rate": 0.0001906361831549083, "loss": 11.681, "step": 19925 }, { "epoch": 0.4171062547098719, "grad_norm": 0.22740589082241058, "learning_rate": 0.00019063525678284556, "loss": 11.6799, "step": 19926 }, { "epoch": 0.41712718747383404, "grad_norm": 0.3502745032310486, "learning_rate": 0.0001906343303672126, "loss": 11.6807, "step": 19927 }, { "epoch": 0.4171481202377962, "grad_norm": 0.2679480314254761, "learning_rate": 0.00019063340390800993, "loss": 11.6584, "step": 19928 }, { "epoch": 0.4171690530017583, "grad_norm": 0.24924050271511078, "learning_rate": 0.00019063247740523794, "loss": 11.6571, "step": 19929 }, { "epoch": 0.41718998576572053, "grad_norm": 0.31960976123809814, "learning_rate": 0.00019063155085889712, "loss": 11.6708, "step": 19930 }, { "epoch": 0.4172109185296827, "grad_norm": 0.33670496940612793, "learning_rate": 0.0001906306242689879, "loss": 11.668, "step": 19931 }, { "epoch": 0.4172318512936448, "grad_norm": 0.29812952876091003, "learning_rate": 0.00019062969763551074, "loss": 11.6769, "step": 19932 }, { "epoch": 0.41725278405760696, "grad_norm": 0.3125721514225006, "learning_rate": 0.00019062877095846602, "loss": 11.6889, "step": 19933 }, { "epoch": 0.4172737168215691, "grad_norm": 0.4215731620788574, "learning_rate": 0.00019062784423785426, "loss": 11.6759, "step": 19934 }, { "epoch": 0.41729464958553125, "grad_norm": 0.2432103008031845, "learning_rate": 0.0001906269174736759, "loss": 11.666, "step": 19935 }, { "epoch": 0.41731558234949345, "grad_norm": 0.2794901132583618, "learning_rate": 0.00019062599066593135, "loss": 11.6488, "step": 19936 }, { "epoch": 0.4173365151134556, "grad_norm": 0.3217996060848236, "learning_rate": 0.00019062506381462105, "loss": 11.6684, "step": 19937 }, { "epoch": 0.41735744787741774, "grad_norm": 0.3245691657066345, "learning_rate": 0.00019062413691974545, "loss": 11.6726, "step": 19938 }, { "epoch": 0.4173783806413799, "grad_norm": 0.3029713034629822, "learning_rate": 0.00019062320998130506, "loss": 11.6675, "step": 19939 }, { "epoch": 0.41739931340534203, "grad_norm": 0.26089170575141907, "learning_rate": 0.00019062228299930026, "loss": 11.6819, "step": 19940 }, { "epoch": 0.4174202461693042, "grad_norm": 0.26147541403770447, "learning_rate": 0.0001906213559737315, "loss": 11.651, "step": 19941 }, { "epoch": 0.4174411789332663, "grad_norm": 0.3320203125476837, "learning_rate": 0.00019062042890459927, "loss": 11.6849, "step": 19942 }, { "epoch": 0.4174621116972285, "grad_norm": 0.42436274886131287, "learning_rate": 0.00019061950179190395, "loss": 11.6828, "step": 19943 }, { "epoch": 0.41748304446119067, "grad_norm": 0.3185548186302185, "learning_rate": 0.00019061857463564602, "loss": 11.664, "step": 19944 }, { "epoch": 0.4175039772251528, "grad_norm": 0.2944299876689911, "learning_rate": 0.00019061764743582592, "loss": 11.6736, "step": 19945 }, { "epoch": 0.41752490998911496, "grad_norm": 0.27075257897377014, "learning_rate": 0.00019061672019244415, "loss": 11.6698, "step": 19946 }, { "epoch": 0.4175458427530771, "grad_norm": 0.3669733703136444, "learning_rate": 0.00019061579290550107, "loss": 11.6765, "step": 19947 }, { "epoch": 0.41756677551703925, "grad_norm": 0.4007703959941864, "learning_rate": 0.00019061486557499718, "loss": 11.6781, "step": 19948 }, { "epoch": 0.41758770828100145, "grad_norm": 0.35997408628463745, "learning_rate": 0.00019061393820093286, "loss": 11.6821, "step": 19949 }, { "epoch": 0.4176086410449636, "grad_norm": 0.3551355302333832, "learning_rate": 0.00019061301078330864, "loss": 11.6811, "step": 19950 }, { "epoch": 0.41762957380892574, "grad_norm": 0.25751546025276184, "learning_rate": 0.00019061208332212494, "loss": 11.6527, "step": 19951 }, { "epoch": 0.4176505065728879, "grad_norm": 0.36978816986083984, "learning_rate": 0.00019061115581738217, "loss": 11.6716, "step": 19952 }, { "epoch": 0.41767143933685, "grad_norm": 0.36741212010383606, "learning_rate": 0.00019061022826908085, "loss": 11.6669, "step": 19953 }, { "epoch": 0.41769237210081217, "grad_norm": 0.2640628218650818, "learning_rate": 0.00019060930067722132, "loss": 11.6583, "step": 19954 }, { "epoch": 0.41771330486477437, "grad_norm": 0.27761492133140564, "learning_rate": 0.0001906083730418041, "loss": 11.6497, "step": 19955 }, { "epoch": 0.4177342376287365, "grad_norm": 0.31852632761001587, "learning_rate": 0.00019060744536282962, "loss": 11.6843, "step": 19956 }, { "epoch": 0.41775517039269866, "grad_norm": 0.2906469404697418, "learning_rate": 0.00019060651764029834, "loss": 11.6782, "step": 19957 }, { "epoch": 0.4177761031566608, "grad_norm": 0.2936926484107971, "learning_rate": 0.0001906055898742107, "loss": 11.6643, "step": 19958 }, { "epoch": 0.41779703592062295, "grad_norm": 0.2842041254043579, "learning_rate": 0.00019060466206456708, "loss": 11.6584, "step": 19959 }, { "epoch": 0.4178179686845851, "grad_norm": 0.33662712574005127, "learning_rate": 0.00019060373421136804, "loss": 11.6886, "step": 19960 }, { "epoch": 0.41783890144854724, "grad_norm": 0.25777581334114075, "learning_rate": 0.00019060280631461396, "loss": 11.6731, "step": 19961 }, { "epoch": 0.41785983421250944, "grad_norm": 0.27507802844047546, "learning_rate": 0.00019060187837430527, "loss": 11.6755, "step": 19962 }, { "epoch": 0.4178807669764716, "grad_norm": 0.3580242693424225, "learning_rate": 0.00019060095039044245, "loss": 11.6787, "step": 19963 }, { "epoch": 0.41790169974043373, "grad_norm": 0.21984447538852692, "learning_rate": 0.00019060002236302595, "loss": 11.6694, "step": 19964 }, { "epoch": 0.4179226325043959, "grad_norm": 0.25059381127357483, "learning_rate": 0.00019059909429205621, "loss": 11.674, "step": 19965 }, { "epoch": 0.417943565268358, "grad_norm": 0.31787970662117004, "learning_rate": 0.00019059816617753364, "loss": 11.6719, "step": 19966 }, { "epoch": 0.41796449803232016, "grad_norm": 0.31726959347724915, "learning_rate": 0.00019059723801945875, "loss": 11.6701, "step": 19967 }, { "epoch": 0.41798543079628236, "grad_norm": 0.2970918118953705, "learning_rate": 0.00019059630981783191, "loss": 11.6617, "step": 19968 }, { "epoch": 0.4180063635602445, "grad_norm": 0.29299452900886536, "learning_rate": 0.00019059538157265366, "loss": 11.6825, "step": 19969 }, { "epoch": 0.41802729632420665, "grad_norm": 0.3354470729827881, "learning_rate": 0.00019059445328392438, "loss": 11.6972, "step": 19970 }, { "epoch": 0.4180482290881688, "grad_norm": 0.35608553886413574, "learning_rate": 0.00019059352495164451, "loss": 11.6578, "step": 19971 }, { "epoch": 0.41806916185213094, "grad_norm": 0.2747066617012024, "learning_rate": 0.00019059259657581453, "loss": 11.6797, "step": 19972 }, { "epoch": 0.4180900946160931, "grad_norm": 0.2600780427455902, "learning_rate": 0.00019059166815643485, "loss": 11.6704, "step": 19973 }, { "epoch": 0.4181110273800553, "grad_norm": 0.30592548847198486, "learning_rate": 0.000190590739693506, "loss": 11.6827, "step": 19974 }, { "epoch": 0.41813196014401743, "grad_norm": 0.24958083033561707, "learning_rate": 0.0001905898111870283, "loss": 11.6656, "step": 19975 }, { "epoch": 0.4181528929079796, "grad_norm": 0.28216710686683655, "learning_rate": 0.00019058888263700228, "loss": 11.6755, "step": 19976 }, { "epoch": 0.4181738256719417, "grad_norm": 0.28194206953048706, "learning_rate": 0.00019058795404342836, "loss": 11.6672, "step": 19977 }, { "epoch": 0.41819475843590387, "grad_norm": 0.3134080767631531, "learning_rate": 0.00019058702540630703, "loss": 11.6641, "step": 19978 }, { "epoch": 0.418215691199866, "grad_norm": 0.273574560880661, "learning_rate": 0.0001905860967256387, "loss": 11.6649, "step": 19979 }, { "epoch": 0.41823662396382816, "grad_norm": 0.2693462371826172, "learning_rate": 0.0001905851680014238, "loss": 11.668, "step": 19980 }, { "epoch": 0.41825755672779036, "grad_norm": 0.23241430521011353, "learning_rate": 0.0001905842392336628, "loss": 11.6756, "step": 19981 }, { "epoch": 0.4182784894917525, "grad_norm": 0.305372029542923, "learning_rate": 0.00019058331042235614, "loss": 11.6749, "step": 19982 }, { "epoch": 0.41829942225571465, "grad_norm": 0.25592195987701416, "learning_rate": 0.00019058238156750425, "loss": 11.6532, "step": 19983 }, { "epoch": 0.4183203550196768, "grad_norm": 0.33765581250190735, "learning_rate": 0.0001905814526691076, "loss": 11.6778, "step": 19984 }, { "epoch": 0.41834128778363894, "grad_norm": 0.2978449761867523, "learning_rate": 0.00019058052372716665, "loss": 11.6713, "step": 19985 }, { "epoch": 0.4183622205476011, "grad_norm": 0.2934790551662445, "learning_rate": 0.0001905795947416818, "loss": 11.6659, "step": 19986 }, { "epoch": 0.4183831533115633, "grad_norm": 0.3151269257068634, "learning_rate": 0.00019057866571265354, "loss": 11.6521, "step": 19987 }, { "epoch": 0.4184040860755254, "grad_norm": 0.23717451095581055, "learning_rate": 0.0001905777366400823, "loss": 11.667, "step": 19988 }, { "epoch": 0.4184250188394876, "grad_norm": 0.28372564911842346, "learning_rate": 0.00019057680752396854, "loss": 11.6664, "step": 19989 }, { "epoch": 0.4184459516034497, "grad_norm": 0.2635188102722168, "learning_rate": 0.00019057587836431266, "loss": 11.6614, "step": 19990 }, { "epoch": 0.41846688436741186, "grad_norm": 0.28051912784576416, "learning_rate": 0.00019057494916111518, "loss": 11.6633, "step": 19991 }, { "epoch": 0.418487817131374, "grad_norm": 0.2753032445907593, "learning_rate": 0.00019057401991437649, "loss": 11.6673, "step": 19992 }, { "epoch": 0.4185087498953362, "grad_norm": 0.3559580445289612, "learning_rate": 0.00019057309062409705, "loss": 11.6624, "step": 19993 }, { "epoch": 0.41852968265929835, "grad_norm": 0.32276687026023865, "learning_rate": 0.0001905721612902773, "loss": 11.676, "step": 19994 }, { "epoch": 0.4185506154232605, "grad_norm": 0.36473217606544495, "learning_rate": 0.00019057123191291772, "loss": 11.6664, "step": 19995 }, { "epoch": 0.41857154818722264, "grad_norm": 0.3024616539478302, "learning_rate": 0.00019057030249201872, "loss": 11.6526, "step": 19996 }, { "epoch": 0.4185924809511848, "grad_norm": 0.2615247666835785, "learning_rate": 0.00019056937302758075, "loss": 11.6766, "step": 19997 }, { "epoch": 0.41861341371514693, "grad_norm": 0.2893396019935608, "learning_rate": 0.00019056844351960427, "loss": 11.6572, "step": 19998 }, { "epoch": 0.4186343464791091, "grad_norm": 0.3538023829460144, "learning_rate": 0.00019056751396808973, "loss": 11.6787, "step": 19999 }, { "epoch": 0.4186552792430713, "grad_norm": 0.29776352643966675, "learning_rate": 0.0001905665843730376, "loss": 11.685, "step": 20000 }, { "epoch": 0.4186552792430713, "eval_loss": 11.673357009887695, "eval_runtime": 34.3397, "eval_samples_per_second": 27.985, "eval_steps_per_second": 7.018, "step": 20000 }, { "epoch": 0.4186762120070334, "grad_norm": 0.3423495888710022, "learning_rate": 0.00019056565473444826, "loss": 11.6587, "step": 20001 }, { "epoch": 0.41869714477099557, "grad_norm": 0.29350897669792175, "learning_rate": 0.0001905647250523222, "loss": 11.6876, "step": 20002 }, { "epoch": 0.4187180775349577, "grad_norm": 0.28399795293807983, "learning_rate": 0.00019056379532665988, "loss": 11.6715, "step": 20003 }, { "epoch": 0.41873901029891986, "grad_norm": 0.28734543919563293, "learning_rate": 0.0001905628655574617, "loss": 11.6742, "step": 20004 }, { "epoch": 0.418759943062882, "grad_norm": 0.24904923141002655, "learning_rate": 0.00019056193574472816, "loss": 11.6791, "step": 20005 }, { "epoch": 0.4187808758268442, "grad_norm": 0.24748633801937103, "learning_rate": 0.00019056100588845968, "loss": 11.6688, "step": 20006 }, { "epoch": 0.41880180859080635, "grad_norm": 0.30974724888801575, "learning_rate": 0.0001905600759886567, "loss": 11.6761, "step": 20007 }, { "epoch": 0.4188227413547685, "grad_norm": 0.26599961519241333, "learning_rate": 0.0001905591460453197, "loss": 11.6699, "step": 20008 }, { "epoch": 0.41884367411873064, "grad_norm": 0.3016759157180786, "learning_rate": 0.00019055821605844906, "loss": 11.677, "step": 20009 }, { "epoch": 0.4188646068826928, "grad_norm": 0.26733294129371643, "learning_rate": 0.0001905572860280453, "loss": 11.657, "step": 20010 }, { "epoch": 0.4188855396466549, "grad_norm": 0.39506325125694275, "learning_rate": 0.00019055635595410884, "loss": 11.687, "step": 20011 }, { "epoch": 0.4189064724106171, "grad_norm": 0.2552371621131897, "learning_rate": 0.00019055542583664011, "loss": 11.661, "step": 20012 }, { "epoch": 0.41892740517457927, "grad_norm": 0.30492037534713745, "learning_rate": 0.0001905544956756396, "loss": 11.6804, "step": 20013 }, { "epoch": 0.4189483379385414, "grad_norm": 0.32747405767440796, "learning_rate": 0.0001905535654711077, "loss": 11.6832, "step": 20014 }, { "epoch": 0.41896927070250356, "grad_norm": 0.2921220660209656, "learning_rate": 0.0001905526352230449, "loss": 11.6592, "step": 20015 }, { "epoch": 0.4189902034664657, "grad_norm": 0.24449242651462555, "learning_rate": 0.00019055170493145163, "loss": 11.6696, "step": 20016 }, { "epoch": 0.41901113623042785, "grad_norm": 0.30119654536247253, "learning_rate": 0.00019055077459632832, "loss": 11.6686, "step": 20017 }, { "epoch": 0.41903206899439, "grad_norm": 0.29474925994873047, "learning_rate": 0.00019054984421767548, "loss": 11.6872, "step": 20018 }, { "epoch": 0.4190530017583522, "grad_norm": 0.27201375365257263, "learning_rate": 0.00019054891379549348, "loss": 11.6648, "step": 20019 }, { "epoch": 0.41907393452231434, "grad_norm": 0.26115158200263977, "learning_rate": 0.00019054798332978284, "loss": 11.6891, "step": 20020 }, { "epoch": 0.4190948672862765, "grad_norm": 0.3506361246109009, "learning_rate": 0.00019054705282054392, "loss": 11.6786, "step": 20021 }, { "epoch": 0.41911580005023863, "grad_norm": 0.2970269024372101, "learning_rate": 0.00019054612226777727, "loss": 11.6814, "step": 20022 }, { "epoch": 0.4191367328142008, "grad_norm": 0.2978886663913727, "learning_rate": 0.00019054519167148324, "loss": 11.6743, "step": 20023 }, { "epoch": 0.4191576655781629, "grad_norm": 0.23342670500278473, "learning_rate": 0.00019054426103166233, "loss": 11.6575, "step": 20024 }, { "epoch": 0.4191785983421251, "grad_norm": 0.3368737995624542, "learning_rate": 0.000190543330348315, "loss": 11.6834, "step": 20025 }, { "epoch": 0.41919953110608726, "grad_norm": 0.32518813014030457, "learning_rate": 0.00019054239962144166, "loss": 11.6841, "step": 20026 }, { "epoch": 0.4192204638700494, "grad_norm": 0.3250269889831543, "learning_rate": 0.00019054146885104278, "loss": 11.6679, "step": 20027 }, { "epoch": 0.41924139663401155, "grad_norm": 0.2824965715408325, "learning_rate": 0.0001905405380371188, "loss": 11.6704, "step": 20028 }, { "epoch": 0.4192623293979737, "grad_norm": 0.31963154673576355, "learning_rate": 0.00019053960717967016, "loss": 11.6713, "step": 20029 }, { "epoch": 0.41928326216193584, "grad_norm": 0.3073025941848755, "learning_rate": 0.00019053867627869732, "loss": 11.6707, "step": 20030 }, { "epoch": 0.419304194925898, "grad_norm": 0.2798648476600647, "learning_rate": 0.00019053774533420075, "loss": 11.664, "step": 20031 }, { "epoch": 0.4193251276898602, "grad_norm": 0.24909840524196625, "learning_rate": 0.00019053681434618084, "loss": 11.6699, "step": 20032 }, { "epoch": 0.41934606045382233, "grad_norm": 0.2972845137119293, "learning_rate": 0.0001905358833146381, "loss": 11.673, "step": 20033 }, { "epoch": 0.4193669932177845, "grad_norm": 0.23482012748718262, "learning_rate": 0.0001905349522395729, "loss": 11.6777, "step": 20034 }, { "epoch": 0.4193879259817466, "grad_norm": 0.29145488142967224, "learning_rate": 0.00019053402112098578, "loss": 11.6645, "step": 20035 }, { "epoch": 0.41940885874570877, "grad_norm": 0.3304387331008911, "learning_rate": 0.00019053308995887713, "loss": 11.6733, "step": 20036 }, { "epoch": 0.4194297915096709, "grad_norm": 0.2714085876941681, "learning_rate": 0.0001905321587532474, "loss": 11.6662, "step": 20037 }, { "epoch": 0.4194507242736331, "grad_norm": 0.25373026728630066, "learning_rate": 0.00019053122750409705, "loss": 11.6597, "step": 20038 }, { "epoch": 0.41947165703759526, "grad_norm": 0.29418623447418213, "learning_rate": 0.00019053029621142651, "loss": 11.6774, "step": 20039 }, { "epoch": 0.4194925898015574, "grad_norm": 0.293291300535202, "learning_rate": 0.00019052936487523624, "loss": 11.6899, "step": 20040 }, { "epoch": 0.41951352256551955, "grad_norm": 0.3729376792907715, "learning_rate": 0.00019052843349552672, "loss": 11.6597, "step": 20041 }, { "epoch": 0.4195344553294817, "grad_norm": 0.29693448543548584, "learning_rate": 0.00019052750207229836, "loss": 11.6829, "step": 20042 }, { "epoch": 0.41955538809344384, "grad_norm": 0.26606056094169617, "learning_rate": 0.0001905265706055516, "loss": 11.6837, "step": 20043 }, { "epoch": 0.41957632085740604, "grad_norm": 0.2690543830394745, "learning_rate": 0.00019052563909528693, "loss": 11.6735, "step": 20044 }, { "epoch": 0.4195972536213682, "grad_norm": 0.30920839309692383, "learning_rate": 0.00019052470754150474, "loss": 11.6756, "step": 20045 }, { "epoch": 0.4196181863853303, "grad_norm": 0.2891378402709961, "learning_rate": 0.00019052377594420551, "loss": 11.675, "step": 20046 }, { "epoch": 0.41963911914929247, "grad_norm": 0.3952689468860626, "learning_rate": 0.00019052284430338972, "loss": 11.6816, "step": 20047 }, { "epoch": 0.4196600519132546, "grad_norm": 0.3498287796974182, "learning_rate": 0.00019052191261905777, "loss": 11.6617, "step": 20048 }, { "epoch": 0.41968098467721676, "grad_norm": 0.3259216248989105, "learning_rate": 0.00019052098089121012, "loss": 11.6792, "step": 20049 }, { "epoch": 0.4197019174411789, "grad_norm": 0.23512069880962372, "learning_rate": 0.0001905200491198472, "loss": 11.6769, "step": 20050 }, { "epoch": 0.4197228502051411, "grad_norm": 0.3375770151615143, "learning_rate": 0.0001905191173049695, "loss": 11.6643, "step": 20051 }, { "epoch": 0.41974378296910325, "grad_norm": 0.2897188067436218, "learning_rate": 0.00019051818544657745, "loss": 11.671, "step": 20052 }, { "epoch": 0.4197647157330654, "grad_norm": 0.24607667326927185, "learning_rate": 0.0001905172535446715, "loss": 11.6637, "step": 20053 }, { "epoch": 0.41978564849702754, "grad_norm": 0.34415456652641296, "learning_rate": 0.00019051632159925206, "loss": 11.6761, "step": 20054 }, { "epoch": 0.4198065812609897, "grad_norm": 0.3487740457057953, "learning_rate": 0.00019051538961031963, "loss": 11.6713, "step": 20055 }, { "epoch": 0.41982751402495183, "grad_norm": 0.2786464989185333, "learning_rate": 0.00019051445757787465, "loss": 11.6672, "step": 20056 }, { "epoch": 0.41984844678891403, "grad_norm": 0.2779599726200104, "learning_rate": 0.00019051352550191752, "loss": 11.6808, "step": 20057 }, { "epoch": 0.4198693795528762, "grad_norm": 0.2833392918109894, "learning_rate": 0.00019051259338244875, "loss": 11.6768, "step": 20058 }, { "epoch": 0.4198903123168383, "grad_norm": 0.329191118478775, "learning_rate": 0.00019051166121946875, "loss": 11.6739, "step": 20059 }, { "epoch": 0.41991124508080047, "grad_norm": 0.2569153606891632, "learning_rate": 0.00019051072901297799, "loss": 11.6868, "step": 20060 }, { "epoch": 0.4199321778447626, "grad_norm": 0.2783067524433136, "learning_rate": 0.00019050979676297694, "loss": 11.6663, "step": 20061 }, { "epoch": 0.41995311060872476, "grad_norm": 0.2813999056816101, "learning_rate": 0.00019050886446946597, "loss": 11.6632, "step": 20062 }, { "epoch": 0.41997404337268696, "grad_norm": 0.2566015422344208, "learning_rate": 0.00019050793213244558, "loss": 11.6699, "step": 20063 }, { "epoch": 0.4199949761366491, "grad_norm": 0.26817548274993896, "learning_rate": 0.00019050699975191624, "loss": 11.6734, "step": 20064 }, { "epoch": 0.42001590890061125, "grad_norm": 0.27978041768074036, "learning_rate": 0.00019050606732787833, "loss": 11.6808, "step": 20065 }, { "epoch": 0.4200368416645734, "grad_norm": 0.2976657748222351, "learning_rate": 0.0001905051348603324, "loss": 11.6641, "step": 20066 }, { "epoch": 0.42005777442853554, "grad_norm": 0.31968075037002563, "learning_rate": 0.0001905042023492788, "loss": 11.6735, "step": 20067 }, { "epoch": 0.4200787071924977, "grad_norm": 0.2811531722545624, "learning_rate": 0.000190503269794718, "loss": 11.691, "step": 20068 }, { "epoch": 0.4200996399564598, "grad_norm": 0.27272123098373413, "learning_rate": 0.00019050233719665048, "loss": 11.6703, "step": 20069 }, { "epoch": 0.420120572720422, "grad_norm": 0.31804075837135315, "learning_rate": 0.00019050140455507668, "loss": 11.6743, "step": 20070 }, { "epoch": 0.42014150548438417, "grad_norm": 0.2855881452560425, "learning_rate": 0.00019050047186999704, "loss": 11.6815, "step": 20071 }, { "epoch": 0.4201624382483463, "grad_norm": 0.32015934586524963, "learning_rate": 0.00019049953914141202, "loss": 11.6856, "step": 20072 }, { "epoch": 0.42018337101230846, "grad_norm": 0.34250545501708984, "learning_rate": 0.00019049860636932204, "loss": 11.6909, "step": 20073 }, { "epoch": 0.4202043037762706, "grad_norm": 0.26744401454925537, "learning_rate": 0.00019049767355372755, "loss": 11.6689, "step": 20074 }, { "epoch": 0.42022523654023275, "grad_norm": 0.3030986487865448, "learning_rate": 0.00019049674069462907, "loss": 11.6764, "step": 20075 }, { "epoch": 0.42024616930419495, "grad_norm": 0.3137882649898529, "learning_rate": 0.00019049580779202695, "loss": 11.6724, "step": 20076 }, { "epoch": 0.4202671020681571, "grad_norm": 0.2665514647960663, "learning_rate": 0.0001904948748459217, "loss": 11.6935, "step": 20077 }, { "epoch": 0.42028803483211924, "grad_norm": 0.28932759165763855, "learning_rate": 0.00019049394185631372, "loss": 11.6767, "step": 20078 }, { "epoch": 0.4203089675960814, "grad_norm": 0.26825645565986633, "learning_rate": 0.00019049300882320353, "loss": 11.6617, "step": 20079 }, { "epoch": 0.42032990036004353, "grad_norm": 0.26774224638938904, "learning_rate": 0.0001904920757465915, "loss": 11.6672, "step": 20080 }, { "epoch": 0.4203508331240057, "grad_norm": 0.4022795557975769, "learning_rate": 0.00019049114262647814, "loss": 11.6587, "step": 20081 }, { "epoch": 0.4203717658879679, "grad_norm": 0.3101721704006195, "learning_rate": 0.00019049020946286386, "loss": 11.68, "step": 20082 }, { "epoch": 0.42039269865193, "grad_norm": 0.3743285834789276, "learning_rate": 0.00019048927625574914, "loss": 11.6757, "step": 20083 }, { "epoch": 0.42041363141589216, "grad_norm": 0.2719149887561798, "learning_rate": 0.00019048834300513438, "loss": 11.6873, "step": 20084 }, { "epoch": 0.4204345641798543, "grad_norm": 0.25745904445648193, "learning_rate": 0.0001904874097110201, "loss": 11.6446, "step": 20085 }, { "epoch": 0.42045549694381645, "grad_norm": 0.41782793402671814, "learning_rate": 0.0001904864763734067, "loss": 11.66, "step": 20086 }, { "epoch": 0.4204764297077786, "grad_norm": 0.25915002822875977, "learning_rate": 0.00019048554299229463, "loss": 11.6752, "step": 20087 }, { "epoch": 0.42049736247174074, "grad_norm": 0.31668558716773987, "learning_rate": 0.00019048460956768435, "loss": 11.6733, "step": 20088 }, { "epoch": 0.42051829523570294, "grad_norm": 0.24902909994125366, "learning_rate": 0.00019048367609957628, "loss": 11.6711, "step": 20089 }, { "epoch": 0.4205392279996651, "grad_norm": 0.31412869691848755, "learning_rate": 0.00019048274258797092, "loss": 11.6734, "step": 20090 }, { "epoch": 0.42056016076362723, "grad_norm": 0.25006476044654846, "learning_rate": 0.0001904818090328687, "loss": 11.6753, "step": 20091 }, { "epoch": 0.4205810935275894, "grad_norm": 0.3295828700065613, "learning_rate": 0.00019048087543427003, "loss": 11.6759, "step": 20092 }, { "epoch": 0.4206020262915515, "grad_norm": 0.24626989662647247, "learning_rate": 0.00019047994179217538, "loss": 11.6436, "step": 20093 }, { "epoch": 0.42062295905551367, "grad_norm": 0.25768765807151794, "learning_rate": 0.00019047900810658524, "loss": 11.6614, "step": 20094 }, { "epoch": 0.42064389181947587, "grad_norm": 0.2572149932384491, "learning_rate": 0.00019047807437750004, "loss": 11.6737, "step": 20095 }, { "epoch": 0.420664824583438, "grad_norm": 0.2749441862106323, "learning_rate": 0.00019047714060492022, "loss": 11.6656, "step": 20096 }, { "epoch": 0.42068575734740016, "grad_norm": 0.255827397108078, "learning_rate": 0.0001904762067888462, "loss": 11.656, "step": 20097 }, { "epoch": 0.4207066901113623, "grad_norm": 0.2818969488143921, "learning_rate": 0.00019047527292927842, "loss": 11.6833, "step": 20098 }, { "epoch": 0.42072762287532445, "grad_norm": 0.35494115948677063, "learning_rate": 0.0001904743390262174, "loss": 11.6776, "step": 20099 }, { "epoch": 0.4207485556392866, "grad_norm": 0.2777784466743469, "learning_rate": 0.00019047340507966358, "loss": 11.6731, "step": 20100 }, { "epoch": 0.4207694884032488, "grad_norm": 0.26976466178894043, "learning_rate": 0.00019047247108961734, "loss": 11.6738, "step": 20101 }, { "epoch": 0.42079042116721094, "grad_norm": 0.262491375207901, "learning_rate": 0.0001904715370560792, "loss": 11.6727, "step": 20102 }, { "epoch": 0.4208113539311731, "grad_norm": 0.2828463315963745, "learning_rate": 0.00019047060297904954, "loss": 11.6771, "step": 20103 }, { "epoch": 0.4208322866951352, "grad_norm": 0.3053388297557831, "learning_rate": 0.0001904696688585289, "loss": 11.6566, "step": 20104 }, { "epoch": 0.42085321945909737, "grad_norm": 0.26022806763648987, "learning_rate": 0.00019046873469451763, "loss": 11.6595, "step": 20105 }, { "epoch": 0.4208741522230595, "grad_norm": 0.27381017804145813, "learning_rate": 0.00019046780048701626, "loss": 11.6794, "step": 20106 }, { "epoch": 0.42089508498702166, "grad_norm": 0.3122251331806183, "learning_rate": 0.0001904668662360252, "loss": 11.6766, "step": 20107 }, { "epoch": 0.42091601775098386, "grad_norm": 2.1177704334259033, "learning_rate": 0.00019046593194154486, "loss": 11.6309, "step": 20108 }, { "epoch": 0.420936950514946, "grad_norm": 0.25455978512763977, "learning_rate": 0.00019046499760357575, "loss": 11.6724, "step": 20109 }, { "epoch": 0.42095788327890815, "grad_norm": 0.29987984895706177, "learning_rate": 0.00019046406322211835, "loss": 11.6933, "step": 20110 }, { "epoch": 0.4209788160428703, "grad_norm": 0.26996421813964844, "learning_rate": 0.00019046312879717302, "loss": 11.6712, "step": 20111 }, { "epoch": 0.42099974880683244, "grad_norm": 0.25315001606941223, "learning_rate": 0.0001904621943287403, "loss": 11.6723, "step": 20112 }, { "epoch": 0.4210206815707946, "grad_norm": 0.25177139043807983, "learning_rate": 0.00019046125981682054, "loss": 11.6616, "step": 20113 }, { "epoch": 0.4210416143347568, "grad_norm": 0.3396899104118347, "learning_rate": 0.00019046032526141425, "loss": 11.68, "step": 20114 }, { "epoch": 0.42106254709871893, "grad_norm": 0.31213700771331787, "learning_rate": 0.00019045939066252186, "loss": 11.686, "step": 20115 }, { "epoch": 0.4210834798626811, "grad_norm": 0.2957036793231964, "learning_rate": 0.00019045845602014386, "loss": 11.702, "step": 20116 }, { "epoch": 0.4211044126266432, "grad_norm": 0.3379497230052948, "learning_rate": 0.00019045752133428065, "loss": 11.6494, "step": 20117 }, { "epoch": 0.42112534539060537, "grad_norm": 0.32969945669174194, "learning_rate": 0.00019045658660493269, "loss": 11.6555, "step": 20118 }, { "epoch": 0.4211462781545675, "grad_norm": 0.30271631479263306, "learning_rate": 0.00019045565183210044, "loss": 11.6811, "step": 20119 }, { "epoch": 0.42116721091852966, "grad_norm": 0.3204614818096161, "learning_rate": 0.00019045471701578434, "loss": 11.6628, "step": 20120 }, { "epoch": 0.42118814368249186, "grad_norm": 0.3062838613986969, "learning_rate": 0.00019045378215598485, "loss": 11.6834, "step": 20121 }, { "epoch": 0.421209076446454, "grad_norm": 0.3046727478504181, "learning_rate": 0.00019045284725270243, "loss": 11.6753, "step": 20122 }, { "epoch": 0.42123000921041615, "grad_norm": 0.25744181871414185, "learning_rate": 0.00019045191230593748, "loss": 11.6782, "step": 20123 }, { "epoch": 0.4212509419743783, "grad_norm": 0.373614102602005, "learning_rate": 0.00019045097731569053, "loss": 11.6819, "step": 20124 }, { "epoch": 0.42127187473834043, "grad_norm": 0.34893617033958435, "learning_rate": 0.00019045004228196192, "loss": 11.6704, "step": 20125 }, { "epoch": 0.4212928075023026, "grad_norm": 0.30233117938041687, "learning_rate": 0.0001904491072047522, "loss": 11.6759, "step": 20126 }, { "epoch": 0.4213137402662648, "grad_norm": 0.3916962444782257, "learning_rate": 0.00019044817208406178, "loss": 11.6832, "step": 20127 }, { "epoch": 0.4213346730302269, "grad_norm": 0.29932528734207153, "learning_rate": 0.00019044723691989112, "loss": 11.6558, "step": 20128 }, { "epoch": 0.42135560579418907, "grad_norm": 0.3224540054798126, "learning_rate": 0.00019044630171224065, "loss": 11.6717, "step": 20129 }, { "epoch": 0.4213765385581512, "grad_norm": 0.2776283621788025, "learning_rate": 0.00019044536646111083, "loss": 11.6717, "step": 20130 }, { "epoch": 0.42139747132211336, "grad_norm": 0.21799641847610474, "learning_rate": 0.00019044443116650211, "loss": 11.667, "step": 20131 }, { "epoch": 0.4214184040860755, "grad_norm": 0.3208913803100586, "learning_rate": 0.0001904434958284149, "loss": 11.6808, "step": 20132 }, { "epoch": 0.4214393368500377, "grad_norm": 0.3304367959499359, "learning_rate": 0.00019044256044684977, "loss": 11.676, "step": 20133 }, { "epoch": 0.42146026961399985, "grad_norm": 0.24967996776103973, "learning_rate": 0.00019044162502180706, "loss": 11.6707, "step": 20134 }, { "epoch": 0.421481202377962, "grad_norm": 0.2693294882774353, "learning_rate": 0.00019044068955328722, "loss": 11.6618, "step": 20135 }, { "epoch": 0.42150213514192414, "grad_norm": 0.36754125356674194, "learning_rate": 0.00019043975404129074, "loss": 11.6814, "step": 20136 }, { "epoch": 0.4215230679058863, "grad_norm": 0.2793561518192291, "learning_rate": 0.00019043881848581807, "loss": 11.6731, "step": 20137 }, { "epoch": 0.42154400066984843, "grad_norm": 0.256857305765152, "learning_rate": 0.00019043788288686963, "loss": 11.6729, "step": 20138 }, { "epoch": 0.4215649334338106, "grad_norm": 0.2836311161518097, "learning_rate": 0.0001904369472444459, "loss": 11.6803, "step": 20139 }, { "epoch": 0.4215858661977728, "grad_norm": 0.2869178354740143, "learning_rate": 0.00019043601155854732, "loss": 11.679, "step": 20140 }, { "epoch": 0.4216067989617349, "grad_norm": 0.3651684522628784, "learning_rate": 0.00019043507582917432, "loss": 11.6631, "step": 20141 }, { "epoch": 0.42162773172569706, "grad_norm": 0.31259554624557495, "learning_rate": 0.0001904341400563274, "loss": 11.6914, "step": 20142 }, { "epoch": 0.4216486644896592, "grad_norm": 0.2950044274330139, "learning_rate": 0.00019043320424000693, "loss": 11.6752, "step": 20143 }, { "epoch": 0.42166959725362135, "grad_norm": 0.3015673756599426, "learning_rate": 0.0001904322683802134, "loss": 11.6806, "step": 20144 }, { "epoch": 0.4216905300175835, "grad_norm": 0.2974456548690796, "learning_rate": 0.00019043133247694733, "loss": 11.6731, "step": 20145 }, { "epoch": 0.4217114627815457, "grad_norm": 0.3042478859424591, "learning_rate": 0.00019043039653020906, "loss": 11.6713, "step": 20146 }, { "epoch": 0.42173239554550784, "grad_norm": 0.3417010009288788, "learning_rate": 0.0001904294605399991, "loss": 11.677, "step": 20147 }, { "epoch": 0.42175332830947, "grad_norm": 0.31662535667419434, "learning_rate": 0.0001904285245063179, "loss": 11.6809, "step": 20148 }, { "epoch": 0.42177426107343213, "grad_norm": 0.2355518639087677, "learning_rate": 0.0001904275884291659, "loss": 11.663, "step": 20149 }, { "epoch": 0.4217951938373943, "grad_norm": 0.2667733132839203, "learning_rate": 0.0001904266523085435, "loss": 11.6758, "step": 20150 }, { "epoch": 0.4218161266013564, "grad_norm": 0.2865027189254761, "learning_rate": 0.00019042571614445123, "loss": 11.6538, "step": 20151 }, { "epoch": 0.4218370593653186, "grad_norm": 0.2862221896648407, "learning_rate": 0.0001904247799368895, "loss": 11.6726, "step": 20152 }, { "epoch": 0.42185799212928077, "grad_norm": 0.2841642200946808, "learning_rate": 0.00019042384368585875, "loss": 11.6729, "step": 20153 }, { "epoch": 0.4218789248932429, "grad_norm": 0.31627100706100464, "learning_rate": 0.00019042290739135947, "loss": 11.6663, "step": 20154 }, { "epoch": 0.42189985765720506, "grad_norm": 0.3276827335357666, "learning_rate": 0.00019042197105339205, "loss": 11.6482, "step": 20155 }, { "epoch": 0.4219207904211672, "grad_norm": 0.2649567127227783, "learning_rate": 0.000190421034671957, "loss": 11.6735, "step": 20156 }, { "epoch": 0.42194172318512935, "grad_norm": 0.32730934023857117, "learning_rate": 0.00019042009824705476, "loss": 11.6701, "step": 20157 }, { "epoch": 0.4219626559490915, "grad_norm": 0.33799469470977783, "learning_rate": 0.00019041916177868577, "loss": 11.6911, "step": 20158 }, { "epoch": 0.4219835887130537, "grad_norm": 0.3521634042263031, "learning_rate": 0.00019041822526685045, "loss": 11.691, "step": 20159 }, { "epoch": 0.42200452147701584, "grad_norm": 0.3213501572608948, "learning_rate": 0.0001904172887115493, "loss": 11.6689, "step": 20160 }, { "epoch": 0.422025454240978, "grad_norm": 0.3420467972755432, "learning_rate": 0.00019041635211278275, "loss": 11.6657, "step": 20161 }, { "epoch": 0.4220463870049401, "grad_norm": 0.28574836254119873, "learning_rate": 0.0001904154154705512, "loss": 11.6665, "step": 20162 }, { "epoch": 0.42206731976890227, "grad_norm": 0.49692556262016296, "learning_rate": 0.0001904144787848552, "loss": 11.6662, "step": 20163 }, { "epoch": 0.4220882525328644, "grad_norm": 0.2927764654159546, "learning_rate": 0.00019041354205569512, "loss": 11.685, "step": 20164 }, { "epoch": 0.4221091852968266, "grad_norm": 0.24936287105083466, "learning_rate": 0.00019041260528307149, "loss": 11.6819, "step": 20165 }, { "epoch": 0.42213011806078876, "grad_norm": 0.2768065631389618, "learning_rate": 0.00019041166846698465, "loss": 11.6587, "step": 20166 }, { "epoch": 0.4221510508247509, "grad_norm": 0.2462977170944214, "learning_rate": 0.00019041073160743516, "loss": 11.6536, "step": 20167 }, { "epoch": 0.42217198358871305, "grad_norm": 0.27421438694000244, "learning_rate": 0.00019040979470442336, "loss": 11.6738, "step": 20168 }, { "epoch": 0.4221929163526752, "grad_norm": 0.30774182081222534, "learning_rate": 0.00019040885775794978, "loss": 11.6802, "step": 20169 }, { "epoch": 0.42221384911663734, "grad_norm": 0.26983147859573364, "learning_rate": 0.00019040792076801486, "loss": 11.6714, "step": 20170 }, { "epoch": 0.42223478188059954, "grad_norm": 0.2779233753681183, "learning_rate": 0.00019040698373461905, "loss": 11.6739, "step": 20171 }, { "epoch": 0.4222557146445617, "grad_norm": 0.282694935798645, "learning_rate": 0.00019040604665776276, "loss": 11.6692, "step": 20172 }, { "epoch": 0.42227664740852383, "grad_norm": 0.3168267607688904, "learning_rate": 0.00019040510953744651, "loss": 11.681, "step": 20173 }, { "epoch": 0.422297580172486, "grad_norm": 0.24157020449638367, "learning_rate": 0.0001904041723736707, "loss": 11.6757, "step": 20174 }, { "epoch": 0.4223185129364481, "grad_norm": 0.4000272750854492, "learning_rate": 0.00019040323516643576, "loss": 11.6951, "step": 20175 }, { "epoch": 0.42233944570041027, "grad_norm": 0.27676472067832947, "learning_rate": 0.00019040229791574223, "loss": 11.6913, "step": 20176 }, { "epoch": 0.4223603784643724, "grad_norm": 0.2852681577205658, "learning_rate": 0.00019040136062159046, "loss": 11.667, "step": 20177 }, { "epoch": 0.4223813112283346, "grad_norm": 0.27510902285575867, "learning_rate": 0.00019040042328398097, "loss": 11.6588, "step": 20178 }, { "epoch": 0.42240224399229676, "grad_norm": 0.261959046125412, "learning_rate": 0.00019039948590291415, "loss": 11.66, "step": 20179 }, { "epoch": 0.4224231767562589, "grad_norm": 0.2997737228870392, "learning_rate": 0.00019039854847839052, "loss": 11.6683, "step": 20180 }, { "epoch": 0.42244410952022104, "grad_norm": 0.6012257933616638, "learning_rate": 0.00019039761101041048, "loss": 11.6616, "step": 20181 }, { "epoch": 0.4224650422841832, "grad_norm": 0.26362574100494385, "learning_rate": 0.0001903966734989745, "loss": 11.6705, "step": 20182 }, { "epoch": 0.42248597504814533, "grad_norm": 0.2591893672943115, "learning_rate": 0.00019039573594408301, "loss": 11.6855, "step": 20183 }, { "epoch": 0.42250690781210753, "grad_norm": 0.22061039507389069, "learning_rate": 0.0001903947983457365, "loss": 11.6535, "step": 20184 }, { "epoch": 0.4225278405760697, "grad_norm": 0.32746753096580505, "learning_rate": 0.00019039386070393542, "loss": 11.6813, "step": 20185 }, { "epoch": 0.4225487733400318, "grad_norm": 0.3430730104446411, "learning_rate": 0.00019039292301868015, "loss": 11.6763, "step": 20186 }, { "epoch": 0.42256970610399397, "grad_norm": 0.3152962327003479, "learning_rate": 0.0001903919852899712, "loss": 11.6641, "step": 20187 }, { "epoch": 0.4225906388679561, "grad_norm": 0.3166406750679016, "learning_rate": 0.00019039104751780906, "loss": 11.671, "step": 20188 }, { "epoch": 0.42261157163191826, "grad_norm": 0.2943207025527954, "learning_rate": 0.00019039010970219408, "loss": 11.6721, "step": 20189 }, { "epoch": 0.42263250439588046, "grad_norm": 0.26945608854293823, "learning_rate": 0.0001903891718431268, "loss": 11.6848, "step": 20190 }, { "epoch": 0.4226534371598426, "grad_norm": 0.31424084305763245, "learning_rate": 0.0001903882339406076, "loss": 11.6706, "step": 20191 }, { "epoch": 0.42267436992380475, "grad_norm": 0.3509442210197449, "learning_rate": 0.00019038729599463698, "loss": 11.6782, "step": 20192 }, { "epoch": 0.4226953026877669, "grad_norm": 0.2659956216812134, "learning_rate": 0.0001903863580052154, "loss": 11.6806, "step": 20193 }, { "epoch": 0.42271623545172904, "grad_norm": 0.3244294822216034, "learning_rate": 0.00019038541997234325, "loss": 11.6714, "step": 20194 }, { "epoch": 0.4227371682156912, "grad_norm": 0.25726935267448425, "learning_rate": 0.00019038448189602104, "loss": 11.6703, "step": 20195 }, { "epoch": 0.42275810097965333, "grad_norm": 0.23909813165664673, "learning_rate": 0.00019038354377624915, "loss": 11.6693, "step": 20196 }, { "epoch": 0.42277903374361553, "grad_norm": 0.2695252299308777, "learning_rate": 0.00019038260561302813, "loss": 11.6657, "step": 20197 }, { "epoch": 0.4227999665075777, "grad_norm": 0.22952210903167725, "learning_rate": 0.00019038166740635836, "loss": 11.6633, "step": 20198 }, { "epoch": 0.4228208992715398, "grad_norm": 0.2777538299560547, "learning_rate": 0.00019038072915624033, "loss": 11.68, "step": 20199 }, { "epoch": 0.42284183203550196, "grad_norm": 0.289614737033844, "learning_rate": 0.00019037979086267446, "loss": 11.6693, "step": 20200 }, { "epoch": 0.4228627647994641, "grad_norm": 0.26503798365592957, "learning_rate": 0.0001903788525256612, "loss": 11.6766, "step": 20201 }, { "epoch": 0.42288369756342625, "grad_norm": 0.33431074023246765, "learning_rate": 0.00019037791414520103, "loss": 11.6782, "step": 20202 }, { "epoch": 0.42290463032738845, "grad_norm": 0.2876477539539337, "learning_rate": 0.0001903769757212944, "loss": 11.666, "step": 20203 }, { "epoch": 0.4229255630913506, "grad_norm": 0.28300780057907104, "learning_rate": 0.0001903760372539417, "loss": 11.6731, "step": 20204 }, { "epoch": 0.42294649585531274, "grad_norm": 0.2733275890350342, "learning_rate": 0.00019037509874314344, "loss": 11.6645, "step": 20205 }, { "epoch": 0.4229674286192749, "grad_norm": 0.29811209440231323, "learning_rate": 0.0001903741601889001, "loss": 11.664, "step": 20206 }, { "epoch": 0.42298836138323703, "grad_norm": 0.30915990471839905, "learning_rate": 0.00019037322159121207, "loss": 11.6626, "step": 20207 }, { "epoch": 0.4230092941471992, "grad_norm": 0.32453641295433044, "learning_rate": 0.0001903722829500798, "loss": 11.7002, "step": 20208 }, { "epoch": 0.4230302269111613, "grad_norm": 0.32819056510925293, "learning_rate": 0.00019037134426550377, "loss": 11.6561, "step": 20209 }, { "epoch": 0.4230511596751235, "grad_norm": 0.2609371244907379, "learning_rate": 0.00019037040553748446, "loss": 11.6737, "step": 20210 }, { "epoch": 0.42307209243908567, "grad_norm": 0.23544202744960785, "learning_rate": 0.00019036946676602226, "loss": 11.6664, "step": 20211 }, { "epoch": 0.4230930252030478, "grad_norm": 0.3591926693916321, "learning_rate": 0.00019036852795111764, "loss": 11.6749, "step": 20212 }, { "epoch": 0.42311395796700996, "grad_norm": 0.3581501245498657, "learning_rate": 0.00019036758909277105, "loss": 11.6804, "step": 20213 }, { "epoch": 0.4231348907309721, "grad_norm": 0.35915663838386536, "learning_rate": 0.00019036665019098298, "loss": 11.6808, "step": 20214 }, { "epoch": 0.42315582349493425, "grad_norm": 0.27564501762390137, "learning_rate": 0.00019036571124575382, "loss": 11.6615, "step": 20215 }, { "epoch": 0.42317675625889645, "grad_norm": 0.26551175117492676, "learning_rate": 0.00019036477225708407, "loss": 11.6778, "step": 20216 }, { "epoch": 0.4231976890228586, "grad_norm": 0.3350493013858795, "learning_rate": 0.00019036383322497416, "loss": 11.6771, "step": 20217 }, { "epoch": 0.42321862178682074, "grad_norm": 0.3518497347831726, "learning_rate": 0.00019036289414942452, "loss": 11.6942, "step": 20218 }, { "epoch": 0.4232395545507829, "grad_norm": 0.28815171122550964, "learning_rate": 0.00019036195503043565, "loss": 11.6697, "step": 20219 }, { "epoch": 0.423260487314745, "grad_norm": 0.32462358474731445, "learning_rate": 0.00019036101586800797, "loss": 11.6637, "step": 20220 }, { "epoch": 0.42328142007870717, "grad_norm": 0.2708267271518707, "learning_rate": 0.00019036007666214195, "loss": 11.6586, "step": 20221 }, { "epoch": 0.42330235284266937, "grad_norm": 0.2959258258342743, "learning_rate": 0.00019035913741283805, "loss": 11.6896, "step": 20222 }, { "epoch": 0.4233232856066315, "grad_norm": 0.3548925817012787, "learning_rate": 0.00019035819812009665, "loss": 11.6756, "step": 20223 }, { "epoch": 0.42334421837059366, "grad_norm": 0.2665382921695709, "learning_rate": 0.00019035725878391828, "loss": 11.6736, "step": 20224 }, { "epoch": 0.4233651511345558, "grad_norm": 0.29485654830932617, "learning_rate": 0.00019035631940430336, "loss": 11.6749, "step": 20225 }, { "epoch": 0.42338608389851795, "grad_norm": 0.24091781675815582, "learning_rate": 0.00019035537998125234, "loss": 11.6579, "step": 20226 }, { "epoch": 0.4234070166624801, "grad_norm": 0.24016153812408447, "learning_rate": 0.0001903544405147657, "loss": 11.6713, "step": 20227 }, { "epoch": 0.42342794942644224, "grad_norm": 0.24788294732570648, "learning_rate": 0.00019035350100484385, "loss": 11.6557, "step": 20228 }, { "epoch": 0.42344888219040444, "grad_norm": 0.27967211604118347, "learning_rate": 0.00019035256145148727, "loss": 11.6733, "step": 20229 }, { "epoch": 0.4234698149543666, "grad_norm": 0.2780088186264038, "learning_rate": 0.0001903516218546964, "loss": 11.6652, "step": 20230 }, { "epoch": 0.42349074771832873, "grad_norm": 0.335686057806015, "learning_rate": 0.0001903506822144717, "loss": 11.6886, "step": 20231 }, { "epoch": 0.4235116804822909, "grad_norm": 0.27011415362358093, "learning_rate": 0.0001903497425308136, "loss": 11.6927, "step": 20232 }, { "epoch": 0.423532613246253, "grad_norm": 0.28327417373657227, "learning_rate": 0.0001903488028037226, "loss": 11.6665, "step": 20233 }, { "epoch": 0.42355354601021517, "grad_norm": 0.26485562324523926, "learning_rate": 0.0001903478630331991, "loss": 11.6503, "step": 20234 }, { "epoch": 0.42357447877417737, "grad_norm": 0.2975393831729889, "learning_rate": 0.00019034692321924358, "loss": 11.671, "step": 20235 }, { "epoch": 0.4235954115381395, "grad_norm": 0.3743925392627716, "learning_rate": 0.00019034598336185648, "loss": 11.6708, "step": 20236 }, { "epoch": 0.42361634430210166, "grad_norm": 0.24540846049785614, "learning_rate": 0.00019034504346103823, "loss": 11.6715, "step": 20237 }, { "epoch": 0.4236372770660638, "grad_norm": 0.2483040690422058, "learning_rate": 0.00019034410351678936, "loss": 11.6645, "step": 20238 }, { "epoch": 0.42365820983002594, "grad_norm": 0.32977280020713806, "learning_rate": 0.00019034316352911022, "loss": 11.6627, "step": 20239 }, { "epoch": 0.4236791425939881, "grad_norm": 0.265322208404541, "learning_rate": 0.00019034222349800133, "loss": 11.6712, "step": 20240 }, { "epoch": 0.4237000753579503, "grad_norm": 0.5195993781089783, "learning_rate": 0.00019034128342346312, "loss": 11.6542, "step": 20241 }, { "epoch": 0.42372100812191243, "grad_norm": 0.2915489673614502, "learning_rate": 0.00019034034330549603, "loss": 11.6663, "step": 20242 }, { "epoch": 0.4237419408858746, "grad_norm": 0.3206271827220917, "learning_rate": 0.00019033940314410055, "loss": 11.6676, "step": 20243 }, { "epoch": 0.4237628736498367, "grad_norm": 0.28531524538993835, "learning_rate": 0.00019033846293927707, "loss": 11.6747, "step": 20244 }, { "epoch": 0.42378380641379887, "grad_norm": 0.25499993562698364, "learning_rate": 0.00019033752269102613, "loss": 11.6823, "step": 20245 }, { "epoch": 0.423804739177761, "grad_norm": 0.27371376752853394, "learning_rate": 0.0001903365823993481, "loss": 11.6642, "step": 20246 }, { "epoch": 0.42382567194172316, "grad_norm": 0.42972004413604736, "learning_rate": 0.00019033564206424351, "loss": 11.6835, "step": 20247 }, { "epoch": 0.42384660470568536, "grad_norm": 0.2954034209251404, "learning_rate": 0.00019033470168571272, "loss": 11.6824, "step": 20248 }, { "epoch": 0.4238675374696475, "grad_norm": 0.25374650955200195, "learning_rate": 0.00019033376126375627, "loss": 11.678, "step": 20249 }, { "epoch": 0.42388847023360965, "grad_norm": 0.35523611307144165, "learning_rate": 0.0001903328207983745, "loss": 11.6862, "step": 20250 }, { "epoch": 0.4239094029975718, "grad_norm": 0.2608833611011505, "learning_rate": 0.00019033188028956798, "loss": 11.6791, "step": 20251 }, { "epoch": 0.42393033576153394, "grad_norm": 0.2805303931236267, "learning_rate": 0.00019033093973733713, "loss": 11.6698, "step": 20252 }, { "epoch": 0.4239512685254961, "grad_norm": 0.24738715589046478, "learning_rate": 0.00019032999914168235, "loss": 11.6673, "step": 20253 }, { "epoch": 0.4239722012894583, "grad_norm": 0.2700577676296234, "learning_rate": 0.00019032905850260417, "loss": 11.6718, "step": 20254 }, { "epoch": 0.42399313405342043, "grad_norm": 0.300204873085022, "learning_rate": 0.00019032811782010298, "loss": 11.651, "step": 20255 }, { "epoch": 0.4240140668173826, "grad_norm": 0.3392237722873688, "learning_rate": 0.00019032717709417926, "loss": 11.6763, "step": 20256 }, { "epoch": 0.4240349995813447, "grad_norm": 0.2562854290008545, "learning_rate": 0.00019032623632483345, "loss": 11.6839, "step": 20257 }, { "epoch": 0.42405593234530686, "grad_norm": 0.324179470539093, "learning_rate": 0.00019032529551206597, "loss": 11.6762, "step": 20258 }, { "epoch": 0.424076865109269, "grad_norm": 0.3315499722957611, "learning_rate": 0.00019032435465587738, "loss": 11.6575, "step": 20259 }, { "epoch": 0.4240977978732312, "grad_norm": 0.26624324917793274, "learning_rate": 0.00019032341375626802, "loss": 11.6707, "step": 20260 }, { "epoch": 0.42411873063719335, "grad_norm": 0.31161290407180786, "learning_rate": 0.0001903224728132384, "loss": 11.6637, "step": 20261 }, { "epoch": 0.4241396634011555, "grad_norm": 0.25094228982925415, "learning_rate": 0.00019032153182678893, "loss": 11.6733, "step": 20262 }, { "epoch": 0.42416059616511764, "grad_norm": 0.30972421169281006, "learning_rate": 0.0001903205907969201, "loss": 11.6838, "step": 20263 }, { "epoch": 0.4241815289290798, "grad_norm": 0.23525266349315643, "learning_rate": 0.00019031964972363236, "loss": 11.6681, "step": 20264 }, { "epoch": 0.42420246169304193, "grad_norm": 0.2943527102470398, "learning_rate": 0.00019031870860692618, "loss": 11.6773, "step": 20265 }, { "epoch": 0.4242233944570041, "grad_norm": 0.3620046079158783, "learning_rate": 0.00019031776744680198, "loss": 11.6802, "step": 20266 }, { "epoch": 0.4242443272209663, "grad_norm": 0.2668781876564026, "learning_rate": 0.0001903168262432602, "loss": 11.6681, "step": 20267 }, { "epoch": 0.4242652599849284, "grad_norm": 0.3418181240558624, "learning_rate": 0.0001903158849963013, "loss": 11.6691, "step": 20268 }, { "epoch": 0.42428619274889057, "grad_norm": 0.3189254403114319, "learning_rate": 0.00019031494370592576, "loss": 11.691, "step": 20269 }, { "epoch": 0.4243071255128527, "grad_norm": 0.3101271986961365, "learning_rate": 0.000190314002372134, "loss": 11.6562, "step": 20270 }, { "epoch": 0.42432805827681486, "grad_norm": 0.228016197681427, "learning_rate": 0.0001903130609949265, "loss": 11.6659, "step": 20271 }, { "epoch": 0.424348991040777, "grad_norm": 0.25532209873199463, "learning_rate": 0.0001903121195743037, "loss": 11.6812, "step": 20272 }, { "epoch": 0.4243699238047392, "grad_norm": 0.26055434346199036, "learning_rate": 0.0001903111781102661, "loss": 11.6765, "step": 20273 }, { "epoch": 0.42439085656870135, "grad_norm": 0.31789180636405945, "learning_rate": 0.00019031023660281404, "loss": 11.6812, "step": 20274 }, { "epoch": 0.4244117893326635, "grad_norm": 0.2616533637046814, "learning_rate": 0.00019030929505194807, "loss": 11.6902, "step": 20275 }, { "epoch": 0.42443272209662564, "grad_norm": 0.27894431352615356, "learning_rate": 0.00019030835345766863, "loss": 11.6524, "step": 20276 }, { "epoch": 0.4244536548605878, "grad_norm": 0.25528115034103394, "learning_rate": 0.00019030741181997612, "loss": 11.6735, "step": 20277 }, { "epoch": 0.4244745876245499, "grad_norm": 0.2771439552307129, "learning_rate": 0.00019030647013887103, "loss": 11.6738, "step": 20278 }, { "epoch": 0.4244955203885121, "grad_norm": 0.29652824997901917, "learning_rate": 0.00019030552841435382, "loss": 11.6707, "step": 20279 }, { "epoch": 0.42451645315247427, "grad_norm": 0.31472450494766235, "learning_rate": 0.00019030458664642493, "loss": 11.6781, "step": 20280 }, { "epoch": 0.4245373859164364, "grad_norm": 0.2843270003795624, "learning_rate": 0.00019030364483508483, "loss": 11.6603, "step": 20281 }, { "epoch": 0.42455831868039856, "grad_norm": 0.3284306526184082, "learning_rate": 0.00019030270298033396, "loss": 11.6577, "step": 20282 }, { "epoch": 0.4245792514443607, "grad_norm": 0.31004467606544495, "learning_rate": 0.00019030176108217274, "loss": 11.6633, "step": 20283 }, { "epoch": 0.42460018420832285, "grad_norm": 0.2212507724761963, "learning_rate": 0.00019030081914060167, "loss": 11.6651, "step": 20284 }, { "epoch": 0.424621116972285, "grad_norm": 0.24638132750988007, "learning_rate": 0.0001902998771556212, "loss": 11.6748, "step": 20285 }, { "epoch": 0.4246420497362472, "grad_norm": 0.27177944779396057, "learning_rate": 0.00019029893512723174, "loss": 11.6618, "step": 20286 }, { "epoch": 0.42466298250020934, "grad_norm": 0.26018837094306946, "learning_rate": 0.00019029799305543378, "loss": 11.6615, "step": 20287 }, { "epoch": 0.4246839152641715, "grad_norm": 0.34595218300819397, "learning_rate": 0.0001902970509402278, "loss": 11.6901, "step": 20288 }, { "epoch": 0.42470484802813363, "grad_norm": 0.2916131615638733, "learning_rate": 0.00019029610878161418, "loss": 11.6675, "step": 20289 }, { "epoch": 0.4247257807920958, "grad_norm": 0.2620432674884796, "learning_rate": 0.00019029516657959343, "loss": 11.6569, "step": 20290 }, { "epoch": 0.4247467135560579, "grad_norm": 0.3431771397590637, "learning_rate": 0.00019029422433416597, "loss": 11.6609, "step": 20291 }, { "epoch": 0.4247676463200201, "grad_norm": 0.34697628021240234, "learning_rate": 0.0001902932820453323, "loss": 11.7003, "step": 20292 }, { "epoch": 0.42478857908398227, "grad_norm": 0.31315040588378906, "learning_rate": 0.0001902923397130928, "loss": 11.6582, "step": 20293 }, { "epoch": 0.4248095118479444, "grad_norm": 0.24518190324306488, "learning_rate": 0.00019029139733744798, "loss": 11.6633, "step": 20294 }, { "epoch": 0.42483044461190655, "grad_norm": 0.21280620992183685, "learning_rate": 0.00019029045491839828, "loss": 11.6614, "step": 20295 }, { "epoch": 0.4248513773758687, "grad_norm": 0.29861631989479065, "learning_rate": 0.00019028951245594414, "loss": 11.6504, "step": 20296 }, { "epoch": 0.42487231013983084, "grad_norm": 0.2906394302845001, "learning_rate": 0.000190288569950086, "loss": 11.6705, "step": 20297 }, { "epoch": 0.424893242903793, "grad_norm": 0.32110661268234253, "learning_rate": 0.00019028762740082438, "loss": 11.6758, "step": 20298 }, { "epoch": 0.4249141756677552, "grad_norm": 0.30566561222076416, "learning_rate": 0.00019028668480815966, "loss": 11.6859, "step": 20299 }, { "epoch": 0.42493510843171733, "grad_norm": 0.2993954122066498, "learning_rate": 0.0001902857421720923, "loss": 11.6679, "step": 20300 }, { "epoch": 0.4249560411956795, "grad_norm": 0.2488877773284912, "learning_rate": 0.00019028479949262283, "loss": 11.6663, "step": 20301 }, { "epoch": 0.4249769739596416, "grad_norm": 0.2917160391807556, "learning_rate": 0.00019028385676975163, "loss": 11.6615, "step": 20302 }, { "epoch": 0.42499790672360377, "grad_norm": 0.25956404209136963, "learning_rate": 0.00019028291400347916, "loss": 11.6793, "step": 20303 }, { "epoch": 0.4250188394875659, "grad_norm": 0.28829389810562134, "learning_rate": 0.0001902819711938059, "loss": 11.684, "step": 20304 }, { "epoch": 0.4250397722515281, "grad_norm": 0.28436991572380066, "learning_rate": 0.00019028102834073226, "loss": 11.6932, "step": 20305 }, { "epoch": 0.42506070501549026, "grad_norm": 0.27989181876182556, "learning_rate": 0.00019028008544425874, "loss": 11.674, "step": 20306 }, { "epoch": 0.4250816377794524, "grad_norm": 0.2993091940879822, "learning_rate": 0.00019027914250438577, "loss": 11.6758, "step": 20307 }, { "epoch": 0.42510257054341455, "grad_norm": 0.26690322160720825, "learning_rate": 0.0001902781995211138, "loss": 11.6698, "step": 20308 }, { "epoch": 0.4251235033073767, "grad_norm": 0.3343530297279358, "learning_rate": 0.0001902772564944433, "loss": 11.684, "step": 20309 }, { "epoch": 0.42514443607133884, "grad_norm": 0.2618630826473236, "learning_rate": 0.0001902763134243747, "loss": 11.6647, "step": 20310 }, { "epoch": 0.42516536883530104, "grad_norm": 0.42081159353256226, "learning_rate": 0.00019027537031090846, "loss": 11.6798, "step": 20311 }, { "epoch": 0.4251863015992632, "grad_norm": 0.30312737822532654, "learning_rate": 0.00019027442715404503, "loss": 11.6888, "step": 20312 }, { "epoch": 0.42520723436322533, "grad_norm": 0.26309892535209656, "learning_rate": 0.00019027348395378492, "loss": 11.6537, "step": 20313 }, { "epoch": 0.4252281671271875, "grad_norm": 0.2708231508731842, "learning_rate": 0.0001902725407101285, "loss": 11.6656, "step": 20314 }, { "epoch": 0.4252490998911496, "grad_norm": 0.2857815623283386, "learning_rate": 0.0001902715974230763, "loss": 11.6805, "step": 20315 }, { "epoch": 0.42527003265511176, "grad_norm": 0.2975136339664459, "learning_rate": 0.00019027065409262873, "loss": 11.6667, "step": 20316 }, { "epoch": 0.4252909654190739, "grad_norm": 0.3187563717365265, "learning_rate": 0.00019026971071878622, "loss": 11.6793, "step": 20317 }, { "epoch": 0.4253118981830361, "grad_norm": 0.24113047122955322, "learning_rate": 0.00019026876730154926, "loss": 11.6629, "step": 20318 }, { "epoch": 0.42533283094699825, "grad_norm": 0.2558184266090393, "learning_rate": 0.00019026782384091827, "loss": 11.66, "step": 20319 }, { "epoch": 0.4253537637109604, "grad_norm": 0.2763369381427765, "learning_rate": 0.00019026688033689374, "loss": 11.6915, "step": 20320 }, { "epoch": 0.42537469647492254, "grad_norm": 0.264283150434494, "learning_rate": 0.00019026593678947613, "loss": 11.6554, "step": 20321 }, { "epoch": 0.4253956292388847, "grad_norm": 0.2705507278442383, "learning_rate": 0.00019026499319866588, "loss": 11.6626, "step": 20322 }, { "epoch": 0.42541656200284683, "grad_norm": 0.24525940418243408, "learning_rate": 0.00019026404956446342, "loss": 11.6747, "step": 20323 }, { "epoch": 0.42543749476680903, "grad_norm": 0.2948329746723175, "learning_rate": 0.00019026310588686924, "loss": 11.6638, "step": 20324 }, { "epoch": 0.4254584275307712, "grad_norm": 0.2946721613407135, "learning_rate": 0.00019026216216588376, "loss": 11.6912, "step": 20325 }, { "epoch": 0.4254793602947333, "grad_norm": 0.30440396070480347, "learning_rate": 0.00019026121840150746, "loss": 11.6633, "step": 20326 }, { "epoch": 0.42550029305869547, "grad_norm": 0.32620319724082947, "learning_rate": 0.0001902602745937408, "loss": 11.6696, "step": 20327 }, { "epoch": 0.4255212258226576, "grad_norm": 0.2645588219165802, "learning_rate": 0.00019025933074258417, "loss": 11.6703, "step": 20328 }, { "epoch": 0.42554215858661976, "grad_norm": 0.26764774322509766, "learning_rate": 0.0001902583868480381, "loss": 11.6686, "step": 20329 }, { "epoch": 0.42556309135058196, "grad_norm": 0.31046316027641296, "learning_rate": 0.000190257442910103, "loss": 11.6737, "step": 20330 }, { "epoch": 0.4255840241145441, "grad_norm": 0.2683459520339966, "learning_rate": 0.00019025649892877934, "loss": 11.6815, "step": 20331 }, { "epoch": 0.42560495687850625, "grad_norm": 0.3134269118309021, "learning_rate": 0.00019025555490406762, "loss": 11.6827, "step": 20332 }, { "epoch": 0.4256258896424684, "grad_norm": 0.3056780695915222, "learning_rate": 0.00019025461083596818, "loss": 11.6932, "step": 20333 }, { "epoch": 0.42564682240643054, "grad_norm": 0.2844698131084442, "learning_rate": 0.0001902536667244816, "loss": 11.6678, "step": 20334 }, { "epoch": 0.4256677551703927, "grad_norm": 0.3189803957939148, "learning_rate": 0.00019025272256960823, "loss": 11.6695, "step": 20335 }, { "epoch": 0.4256886879343548, "grad_norm": 0.2431335151195526, "learning_rate": 0.00019025177837134858, "loss": 11.6851, "step": 20336 }, { "epoch": 0.425709620698317, "grad_norm": 0.2998037040233612, "learning_rate": 0.00019025083412970307, "loss": 11.6851, "step": 20337 }, { "epoch": 0.42573055346227917, "grad_norm": 0.43965843319892883, "learning_rate": 0.0001902498898446722, "loss": 11.6645, "step": 20338 }, { "epoch": 0.4257514862262413, "grad_norm": 0.2547926902770996, "learning_rate": 0.0001902489455162564, "loss": 11.6735, "step": 20339 }, { "epoch": 0.42577241899020346, "grad_norm": 0.33520975708961487, "learning_rate": 0.00019024800114445612, "loss": 11.6752, "step": 20340 }, { "epoch": 0.4257933517541656, "grad_norm": 0.26968279480934143, "learning_rate": 0.00019024705672927183, "loss": 11.6591, "step": 20341 }, { "epoch": 0.42581428451812775, "grad_norm": 0.26773151755332947, "learning_rate": 0.00019024611227070394, "loss": 11.6658, "step": 20342 }, { "epoch": 0.42583521728208995, "grad_norm": 0.2809949815273285, "learning_rate": 0.00019024516776875296, "loss": 11.674, "step": 20343 }, { "epoch": 0.4258561500460521, "grad_norm": 0.28381097316741943, "learning_rate": 0.0001902442232234193, "loss": 11.6853, "step": 20344 }, { "epoch": 0.42587708281001424, "grad_norm": 0.2613206207752228, "learning_rate": 0.00019024327863470346, "loss": 11.6649, "step": 20345 }, { "epoch": 0.4258980155739764, "grad_norm": 0.25923386216163635, "learning_rate": 0.00019024233400260585, "loss": 11.6701, "step": 20346 }, { "epoch": 0.42591894833793853, "grad_norm": 0.2580215632915497, "learning_rate": 0.00019024138932712694, "loss": 11.69, "step": 20347 }, { "epoch": 0.4259398811019007, "grad_norm": 0.27602720260620117, "learning_rate": 0.0001902404446082672, "loss": 11.6832, "step": 20348 }, { "epoch": 0.4259608138658629, "grad_norm": 0.2830759584903717, "learning_rate": 0.00019023949984602707, "loss": 11.6704, "step": 20349 }, { "epoch": 0.425981746629825, "grad_norm": 0.2447669357061386, "learning_rate": 0.00019023855504040696, "loss": 11.6764, "step": 20350 }, { "epoch": 0.42600267939378716, "grad_norm": 0.28243255615234375, "learning_rate": 0.0001902376101914074, "loss": 11.6739, "step": 20351 }, { "epoch": 0.4260236121577493, "grad_norm": 0.2864817678928375, "learning_rate": 0.00019023666529902883, "loss": 11.678, "step": 20352 }, { "epoch": 0.42604454492171145, "grad_norm": 0.24996909499168396, "learning_rate": 0.0001902357203632717, "loss": 11.6847, "step": 20353 }, { "epoch": 0.4260654776856736, "grad_norm": 0.2822587192058563, "learning_rate": 0.00019023477538413638, "loss": 11.6897, "step": 20354 }, { "epoch": 0.42608641044963574, "grad_norm": 0.3259688913822174, "learning_rate": 0.00019023383036162345, "loss": 11.6826, "step": 20355 }, { "epoch": 0.42610734321359794, "grad_norm": 0.3209255337715149, "learning_rate": 0.00019023288529573332, "loss": 11.6722, "step": 20356 }, { "epoch": 0.4261282759775601, "grad_norm": 0.38055863976478577, "learning_rate": 0.00019023194018646638, "loss": 11.6839, "step": 20357 }, { "epoch": 0.42614920874152223, "grad_norm": 0.2674378454685211, "learning_rate": 0.00019023099503382319, "loss": 11.6784, "step": 20358 }, { "epoch": 0.4261701415054844, "grad_norm": 0.3219608962535858, "learning_rate": 0.0001902300498378041, "loss": 11.6845, "step": 20359 }, { "epoch": 0.4261910742694465, "grad_norm": 0.32270631194114685, "learning_rate": 0.00019022910459840967, "loss": 11.6696, "step": 20360 }, { "epoch": 0.42621200703340867, "grad_norm": 0.3115996718406677, "learning_rate": 0.00019022815931564027, "loss": 11.6749, "step": 20361 }, { "epoch": 0.42623293979737087, "grad_norm": 0.2428983598947525, "learning_rate": 0.0001902272139894964, "loss": 11.672, "step": 20362 }, { "epoch": 0.426253872561333, "grad_norm": 0.3175920248031616, "learning_rate": 0.0001902262686199785, "loss": 11.6667, "step": 20363 }, { "epoch": 0.42627480532529516, "grad_norm": 0.7871426343917847, "learning_rate": 0.00019022532320708702, "loss": 11.5621, "step": 20364 }, { "epoch": 0.4262957380892573, "grad_norm": 0.297191321849823, "learning_rate": 0.0001902243777508224, "loss": 11.64, "step": 20365 }, { "epoch": 0.42631667085321945, "grad_norm": 0.39349648356437683, "learning_rate": 0.00019022343225118515, "loss": 11.6591, "step": 20366 }, { "epoch": 0.4263376036171816, "grad_norm": 0.3027323782444, "learning_rate": 0.00019022248670817567, "loss": 11.6846, "step": 20367 }, { "epoch": 0.4263585363811438, "grad_norm": 0.27248871326446533, "learning_rate": 0.00019022154112179442, "loss": 11.679, "step": 20368 }, { "epoch": 0.42637946914510594, "grad_norm": 0.2637699842453003, "learning_rate": 0.00019022059549204189, "loss": 11.6791, "step": 20369 }, { "epoch": 0.4264004019090681, "grad_norm": 0.32533830404281616, "learning_rate": 0.0001902196498189185, "loss": 11.6863, "step": 20370 }, { "epoch": 0.42642133467303023, "grad_norm": 0.24833612143993378, "learning_rate": 0.00019021870410242472, "loss": 11.6659, "step": 20371 }, { "epoch": 0.4264422674369924, "grad_norm": 0.31443533301353455, "learning_rate": 0.000190217758342561, "loss": 11.6717, "step": 20372 }, { "epoch": 0.4264632002009545, "grad_norm": 0.3128231167793274, "learning_rate": 0.00019021681253932778, "loss": 11.668, "step": 20373 }, { "epoch": 0.42648413296491666, "grad_norm": 0.2479614019393921, "learning_rate": 0.00019021586669272554, "loss": 11.6832, "step": 20374 }, { "epoch": 0.42650506572887886, "grad_norm": 0.2906326353549957, "learning_rate": 0.00019021492080275473, "loss": 11.6657, "step": 20375 }, { "epoch": 0.426525998492841, "grad_norm": 0.35248830914497375, "learning_rate": 0.0001902139748694158, "loss": 11.6786, "step": 20376 }, { "epoch": 0.42654693125680315, "grad_norm": 0.2917509377002716, "learning_rate": 0.00019021302889270922, "loss": 11.6694, "step": 20377 }, { "epoch": 0.4265678640207653, "grad_norm": 0.3551805913448334, "learning_rate": 0.0001902120828726354, "loss": 11.6731, "step": 20378 }, { "epoch": 0.42658879678472744, "grad_norm": 0.2522903084754944, "learning_rate": 0.00019021113680919483, "loss": 11.6615, "step": 20379 }, { "epoch": 0.4266097295486896, "grad_norm": 0.29715150594711304, "learning_rate": 0.00019021019070238797, "loss": 11.6602, "step": 20380 }, { "epoch": 0.4266306623126518, "grad_norm": 0.317038893699646, "learning_rate": 0.00019020924455221528, "loss": 11.6735, "step": 20381 }, { "epoch": 0.42665159507661393, "grad_norm": 0.2636875510215759, "learning_rate": 0.00019020829835867715, "loss": 11.6587, "step": 20382 }, { "epoch": 0.4266725278405761, "grad_norm": 0.2876491844654083, "learning_rate": 0.0001902073521217741, "loss": 11.6771, "step": 20383 }, { "epoch": 0.4266934606045382, "grad_norm": 1.584131121635437, "learning_rate": 0.0001902064058415066, "loss": 11.7167, "step": 20384 }, { "epoch": 0.42671439336850037, "grad_norm": 0.2645118534564972, "learning_rate": 0.00019020545951787504, "loss": 11.6892, "step": 20385 }, { "epoch": 0.4267353261324625, "grad_norm": 0.24440252780914307, "learning_rate": 0.00019020451315087994, "loss": 11.6696, "step": 20386 }, { "epoch": 0.4267562588964247, "grad_norm": 0.31130626797676086, "learning_rate": 0.00019020356674052165, "loss": 11.6805, "step": 20387 }, { "epoch": 0.42677719166038686, "grad_norm": 0.3426171541213989, "learning_rate": 0.00019020262028680078, "loss": 11.6976, "step": 20388 }, { "epoch": 0.426798124424349, "grad_norm": 0.34226930141448975, "learning_rate": 0.00019020167378971768, "loss": 11.6603, "step": 20389 }, { "epoch": 0.42681905718831115, "grad_norm": 0.24109400808811188, "learning_rate": 0.00019020072724927284, "loss": 11.6822, "step": 20390 }, { "epoch": 0.4268399899522733, "grad_norm": 0.3358055353164673, "learning_rate": 0.00019019978066546665, "loss": 11.6832, "step": 20391 }, { "epoch": 0.42686092271623544, "grad_norm": 0.22060270607471466, "learning_rate": 0.00019019883403829964, "loss": 11.663, "step": 20392 }, { "epoch": 0.4268818554801976, "grad_norm": 0.3226190209388733, "learning_rate": 0.00019019788736777227, "loss": 11.6621, "step": 20393 }, { "epoch": 0.4269027882441598, "grad_norm": 0.2810724377632141, "learning_rate": 0.00019019694065388495, "loss": 11.6662, "step": 20394 }, { "epoch": 0.4269237210081219, "grad_norm": 0.3197990357875824, "learning_rate": 0.00019019599389663813, "loss": 11.6678, "step": 20395 }, { "epoch": 0.42694465377208407, "grad_norm": 0.3046116232872009, "learning_rate": 0.00019019504709603233, "loss": 11.6719, "step": 20396 }, { "epoch": 0.4269655865360462, "grad_norm": 0.2921418249607086, "learning_rate": 0.00019019410025206796, "loss": 11.6808, "step": 20397 }, { "epoch": 0.42698651930000836, "grad_norm": 0.22905664145946503, "learning_rate": 0.00019019315336474545, "loss": 11.6698, "step": 20398 }, { "epoch": 0.4270074520639705, "grad_norm": 0.3198992908000946, "learning_rate": 0.0001901922064340653, "loss": 11.6656, "step": 20399 }, { "epoch": 0.4270283848279327, "grad_norm": 0.3713543117046356, "learning_rate": 0.00019019125946002796, "loss": 11.6692, "step": 20400 }, { "epoch": 0.42704931759189485, "grad_norm": 0.3321746587753296, "learning_rate": 0.00019019031244263385, "loss": 11.6541, "step": 20401 }, { "epoch": 0.427070250355857, "grad_norm": 0.2906089425086975, "learning_rate": 0.00019018936538188345, "loss": 11.6727, "step": 20402 }, { "epoch": 0.42709118311981914, "grad_norm": 0.34190550446510315, "learning_rate": 0.00019018841827777726, "loss": 11.6664, "step": 20403 }, { "epoch": 0.4271121158837813, "grad_norm": 0.287992000579834, "learning_rate": 0.00019018747113031564, "loss": 11.6724, "step": 20404 }, { "epoch": 0.42713304864774343, "grad_norm": 0.29663822054862976, "learning_rate": 0.00019018652393949908, "loss": 11.6824, "step": 20405 }, { "epoch": 0.4271539814117056, "grad_norm": 0.23739682137966156, "learning_rate": 0.00019018557670532807, "loss": 11.6627, "step": 20406 }, { "epoch": 0.4271749141756678, "grad_norm": 0.22291167080402374, "learning_rate": 0.00019018462942780307, "loss": 11.6789, "step": 20407 }, { "epoch": 0.4271958469396299, "grad_norm": 0.27166688442230225, "learning_rate": 0.0001901836821069245, "loss": 11.6781, "step": 20408 }, { "epoch": 0.42721677970359206, "grad_norm": 0.26349884271621704, "learning_rate": 0.00019018273474269282, "loss": 11.6769, "step": 20409 }, { "epoch": 0.4272377124675542, "grad_norm": 0.26767095923423767, "learning_rate": 0.00019018178733510853, "loss": 11.6723, "step": 20410 }, { "epoch": 0.42725864523151635, "grad_norm": 0.2758007049560547, "learning_rate": 0.000190180839884172, "loss": 11.6793, "step": 20411 }, { "epoch": 0.4272795779954785, "grad_norm": 0.2831670045852661, "learning_rate": 0.00019017989238988373, "loss": 11.6878, "step": 20412 }, { "epoch": 0.4273005107594407, "grad_norm": 0.24043972790241241, "learning_rate": 0.0001901789448522442, "loss": 11.6642, "step": 20413 }, { "epoch": 0.42732144352340284, "grad_norm": 0.34371984004974365, "learning_rate": 0.00019017799727125384, "loss": 11.6679, "step": 20414 }, { "epoch": 0.427342376287365, "grad_norm": 0.2739797830581665, "learning_rate": 0.00019017704964691313, "loss": 11.6879, "step": 20415 }, { "epoch": 0.42736330905132713, "grad_norm": 0.3816547393798828, "learning_rate": 0.00019017610197922247, "loss": 11.6668, "step": 20416 }, { "epoch": 0.4273842418152893, "grad_norm": 0.3091026246547699, "learning_rate": 0.00019017515426818235, "loss": 11.6793, "step": 20417 }, { "epoch": 0.4274051745792514, "grad_norm": 0.3676305413246155, "learning_rate": 0.00019017420651379325, "loss": 11.678, "step": 20418 }, { "epoch": 0.4274261073432136, "grad_norm": 0.2567495107650757, "learning_rate": 0.0001901732587160556, "loss": 11.687, "step": 20419 }, { "epoch": 0.42744704010717577, "grad_norm": 0.2701069712638855, "learning_rate": 0.00019017231087496984, "loss": 11.697, "step": 20420 }, { "epoch": 0.4274679728711379, "grad_norm": 0.27778297662734985, "learning_rate": 0.00019017136299053647, "loss": 11.6969, "step": 20421 }, { "epoch": 0.42748890563510006, "grad_norm": 0.28368234634399414, "learning_rate": 0.0001901704150627559, "loss": 11.6693, "step": 20422 }, { "epoch": 0.4275098383990622, "grad_norm": 0.2888929843902588, "learning_rate": 0.00019016946709162863, "loss": 11.6739, "step": 20423 }, { "epoch": 0.42753077116302435, "grad_norm": 0.29461807012557983, "learning_rate": 0.00019016851907715504, "loss": 11.6874, "step": 20424 }, { "epoch": 0.4275517039269865, "grad_norm": 0.28033575415611267, "learning_rate": 0.00019016757101933567, "loss": 11.6765, "step": 20425 }, { "epoch": 0.4275726366909487, "grad_norm": 0.4004790186882019, "learning_rate": 0.00019016662291817092, "loss": 11.6937, "step": 20426 }, { "epoch": 0.42759356945491084, "grad_norm": 0.3049621880054474, "learning_rate": 0.00019016567477366127, "loss": 11.6738, "step": 20427 }, { "epoch": 0.427614502218873, "grad_norm": 0.25258031487464905, "learning_rate": 0.0001901647265858072, "loss": 11.6782, "step": 20428 }, { "epoch": 0.4276354349828351, "grad_norm": 0.24536347389221191, "learning_rate": 0.00019016377835460912, "loss": 11.6811, "step": 20429 }, { "epoch": 0.4276563677467973, "grad_norm": 0.36375561356544495, "learning_rate": 0.00019016283008006752, "loss": 11.6721, "step": 20430 }, { "epoch": 0.4276773005107594, "grad_norm": 0.26808273792266846, "learning_rate": 0.00019016188176218283, "loss": 11.6547, "step": 20431 }, { "epoch": 0.4276982332747216, "grad_norm": 0.2783339321613312, "learning_rate": 0.00019016093340095552, "loss": 11.6776, "step": 20432 }, { "epoch": 0.42771916603868376, "grad_norm": 0.3143589496612549, "learning_rate": 0.00019015998499638604, "loss": 11.7008, "step": 20433 }, { "epoch": 0.4277400988026459, "grad_norm": 0.3587454557418823, "learning_rate": 0.00019015903654847485, "loss": 11.6662, "step": 20434 }, { "epoch": 0.42776103156660805, "grad_norm": 0.3597831428050995, "learning_rate": 0.00019015808805722238, "loss": 11.6707, "step": 20435 }, { "epoch": 0.4277819643305702, "grad_norm": 0.2881494164466858, "learning_rate": 0.00019015713952262914, "loss": 11.69, "step": 20436 }, { "epoch": 0.42780289709453234, "grad_norm": 0.26871952414512634, "learning_rate": 0.00019015619094469555, "loss": 11.6795, "step": 20437 }, { "epoch": 0.42782382985849454, "grad_norm": 0.2774913012981415, "learning_rate": 0.00019015524232342208, "loss": 11.6603, "step": 20438 }, { "epoch": 0.4278447626224567, "grad_norm": 0.25354239344596863, "learning_rate": 0.00019015429365880918, "loss": 11.6496, "step": 20439 }, { "epoch": 0.42786569538641883, "grad_norm": 0.25159966945648193, "learning_rate": 0.00019015334495085727, "loss": 11.6719, "step": 20440 }, { "epoch": 0.427886628150381, "grad_norm": 0.2473030686378479, "learning_rate": 0.00019015239619956685, "loss": 11.6613, "step": 20441 }, { "epoch": 0.4279075609143431, "grad_norm": 0.25264525413513184, "learning_rate": 0.0001901514474049384, "loss": 11.6937, "step": 20442 }, { "epoch": 0.42792849367830527, "grad_norm": 0.291535884141922, "learning_rate": 0.0001901504985669723, "loss": 11.6701, "step": 20443 }, { "epoch": 0.4279494264422674, "grad_norm": 0.2551214098930359, "learning_rate": 0.00019014954968566908, "loss": 11.6635, "step": 20444 }, { "epoch": 0.4279703592062296, "grad_norm": 0.3116401135921478, "learning_rate": 0.00019014860076102915, "loss": 11.6541, "step": 20445 }, { "epoch": 0.42799129197019176, "grad_norm": 0.3776739537715912, "learning_rate": 0.00019014765179305298, "loss": 11.677, "step": 20446 }, { "epoch": 0.4280122247341539, "grad_norm": 0.3132983446121216, "learning_rate": 0.00019014670278174104, "loss": 11.6838, "step": 20447 }, { "epoch": 0.42803315749811605, "grad_norm": 0.2727983593940735, "learning_rate": 0.00019014575372709378, "loss": 11.6666, "step": 20448 }, { "epoch": 0.4280540902620782, "grad_norm": 0.30403217673301697, "learning_rate": 0.0001901448046291116, "loss": 11.6836, "step": 20449 }, { "epoch": 0.42807502302604034, "grad_norm": 0.3031008243560791, "learning_rate": 0.00019014385548779506, "loss": 11.6798, "step": 20450 }, { "epoch": 0.42809595579000254, "grad_norm": 0.28006091713905334, "learning_rate": 0.00019014290630314456, "loss": 11.6609, "step": 20451 }, { "epoch": 0.4281168885539647, "grad_norm": 0.26636597514152527, "learning_rate": 0.00019014195707516053, "loss": 11.6812, "step": 20452 }, { "epoch": 0.4281378213179268, "grad_norm": 0.28311458230018616, "learning_rate": 0.00019014100780384346, "loss": 11.6635, "step": 20453 }, { "epoch": 0.42815875408188897, "grad_norm": 0.36202970147132874, "learning_rate": 0.00019014005848919384, "loss": 11.6702, "step": 20454 }, { "epoch": 0.4281796868458511, "grad_norm": 0.34033486247062683, "learning_rate": 0.00019013910913121205, "loss": 11.6502, "step": 20455 }, { "epoch": 0.42820061960981326, "grad_norm": 0.327114999294281, "learning_rate": 0.00019013815972989857, "loss": 11.6738, "step": 20456 }, { "epoch": 0.42822155237377546, "grad_norm": 0.29640960693359375, "learning_rate": 0.00019013721028525392, "loss": 11.6761, "step": 20457 }, { "epoch": 0.4282424851377376, "grad_norm": 0.31663814187049866, "learning_rate": 0.00019013626079727848, "loss": 11.6723, "step": 20458 }, { "epoch": 0.42826341790169975, "grad_norm": 0.27652740478515625, "learning_rate": 0.00019013531126597274, "loss": 11.6684, "step": 20459 }, { "epoch": 0.4282843506656619, "grad_norm": 0.2817951738834381, "learning_rate": 0.00019013436169133712, "loss": 11.6681, "step": 20460 }, { "epoch": 0.42830528342962404, "grad_norm": 0.24663995206356049, "learning_rate": 0.0001901334120733721, "loss": 11.6741, "step": 20461 }, { "epoch": 0.4283262161935862, "grad_norm": 0.3043328523635864, "learning_rate": 0.0001901324624120782, "loss": 11.6812, "step": 20462 }, { "epoch": 0.42834714895754833, "grad_norm": 0.2532672882080078, "learning_rate": 0.00019013151270745577, "loss": 11.6786, "step": 20463 }, { "epoch": 0.42836808172151053, "grad_norm": 0.31571778655052185, "learning_rate": 0.00019013056295950534, "loss": 11.6875, "step": 20464 }, { "epoch": 0.4283890144854727, "grad_norm": 0.3206906318664551, "learning_rate": 0.00019012961316822733, "loss": 11.6761, "step": 20465 }, { "epoch": 0.4284099472494348, "grad_norm": 0.3326411843299866, "learning_rate": 0.0001901286633336222, "loss": 11.6663, "step": 20466 }, { "epoch": 0.42843088001339696, "grad_norm": 0.33248332142829895, "learning_rate": 0.00019012771345569045, "loss": 11.664, "step": 20467 }, { "epoch": 0.4284518127773591, "grad_norm": 0.28026536107063293, "learning_rate": 0.00019012676353443247, "loss": 11.6507, "step": 20468 }, { "epoch": 0.42847274554132125, "grad_norm": 0.24093300104141235, "learning_rate": 0.00019012581356984875, "loss": 11.6748, "step": 20469 }, { "epoch": 0.42849367830528345, "grad_norm": 0.29076114296913147, "learning_rate": 0.00019012486356193977, "loss": 11.675, "step": 20470 }, { "epoch": 0.4285146110692456, "grad_norm": 0.262173593044281, "learning_rate": 0.00019012391351070593, "loss": 11.6729, "step": 20471 }, { "epoch": 0.42853554383320774, "grad_norm": 0.3033019006252289, "learning_rate": 0.00019012296341614776, "loss": 11.6713, "step": 20472 }, { "epoch": 0.4285564765971699, "grad_norm": 0.24903923273086548, "learning_rate": 0.00019012201327826565, "loss": 11.6703, "step": 20473 }, { "epoch": 0.42857740936113203, "grad_norm": 0.26011860370635986, "learning_rate": 0.0001901210630970601, "loss": 11.6735, "step": 20474 }, { "epoch": 0.4285983421250942, "grad_norm": 0.31377896666526794, "learning_rate": 0.0001901201128725315, "loss": 11.6777, "step": 20475 }, { "epoch": 0.4286192748890564, "grad_norm": 0.27175289392471313, "learning_rate": 0.00019011916260468039, "loss": 11.6711, "step": 20476 }, { "epoch": 0.4286402076530185, "grad_norm": 0.21422840654850006, "learning_rate": 0.00019011821229350718, "loss": 11.6718, "step": 20477 }, { "epoch": 0.42866114041698067, "grad_norm": 0.2972899377346039, "learning_rate": 0.00019011726193901235, "loss": 11.6681, "step": 20478 }, { "epoch": 0.4286820731809428, "grad_norm": 0.28232601284980774, "learning_rate": 0.00019011631154119634, "loss": 11.6864, "step": 20479 }, { "epoch": 0.42870300594490496, "grad_norm": 0.2593918740749359, "learning_rate": 0.00019011536110005964, "loss": 11.6736, "step": 20480 }, { "epoch": 0.4287239387088671, "grad_norm": 0.32686150074005127, "learning_rate": 0.00019011441061560268, "loss": 11.6837, "step": 20481 }, { "epoch": 0.42874487147282925, "grad_norm": 0.25290966033935547, "learning_rate": 0.00019011346008782586, "loss": 11.6684, "step": 20482 }, { "epoch": 0.42876580423679145, "grad_norm": 0.3543376624584198, "learning_rate": 0.00019011250951672974, "loss": 11.6665, "step": 20483 }, { "epoch": 0.4287867370007536, "grad_norm": 0.407452791929245, "learning_rate": 0.00019011155890231472, "loss": 11.6741, "step": 20484 }, { "epoch": 0.42880766976471574, "grad_norm": 0.2870527505874634, "learning_rate": 0.00019011060824458128, "loss": 11.6739, "step": 20485 }, { "epoch": 0.4288286025286779, "grad_norm": 0.29322412610054016, "learning_rate": 0.00019010965754352984, "loss": 11.6718, "step": 20486 }, { "epoch": 0.42884953529264, "grad_norm": 0.2814720571041107, "learning_rate": 0.0001901087067991609, "loss": 11.6655, "step": 20487 }, { "epoch": 0.42887046805660217, "grad_norm": 0.31345778703689575, "learning_rate": 0.00019010775601147487, "loss": 11.677, "step": 20488 }, { "epoch": 0.4288914008205644, "grad_norm": 0.38512328267097473, "learning_rate": 0.0001901068051804723, "loss": 11.6777, "step": 20489 }, { "epoch": 0.4289123335845265, "grad_norm": 0.25254708528518677, "learning_rate": 0.00019010585430615353, "loss": 11.6851, "step": 20490 }, { "epoch": 0.42893326634848866, "grad_norm": 0.3523057997226715, "learning_rate": 0.00019010490338851907, "loss": 11.6586, "step": 20491 }, { "epoch": 0.4289541991124508, "grad_norm": 0.22564579546451569, "learning_rate": 0.0001901039524275694, "loss": 11.6683, "step": 20492 }, { "epoch": 0.42897513187641295, "grad_norm": 0.26831886172294617, "learning_rate": 0.00019010300142330495, "loss": 11.6687, "step": 20493 }, { "epoch": 0.4289960646403751, "grad_norm": 0.24558821320533752, "learning_rate": 0.00019010205037572618, "loss": 11.675, "step": 20494 }, { "epoch": 0.42901699740433724, "grad_norm": 0.22434300184249878, "learning_rate": 0.00019010109928483356, "loss": 11.6666, "step": 20495 }, { "epoch": 0.42903793016829944, "grad_norm": 0.3134113848209381, "learning_rate": 0.0001901001481506275, "loss": 11.6727, "step": 20496 }, { "epoch": 0.4290588629322616, "grad_norm": 0.2903362810611725, "learning_rate": 0.00019009919697310853, "loss": 11.6826, "step": 20497 }, { "epoch": 0.42907979569622373, "grad_norm": 0.40208306908607483, "learning_rate": 0.00019009824575227704, "loss": 11.6796, "step": 20498 }, { "epoch": 0.4291007284601859, "grad_norm": 0.3022923171520233, "learning_rate": 0.00019009729448813352, "loss": 11.6708, "step": 20499 }, { "epoch": 0.429121661224148, "grad_norm": 0.31459227204322815, "learning_rate": 0.00019009634318067846, "loss": 11.687, "step": 20500 }, { "epoch": 0.42914259398811017, "grad_norm": 0.35283657908439636, "learning_rate": 0.00019009539182991225, "loss": 11.693, "step": 20501 }, { "epoch": 0.42916352675207237, "grad_norm": 0.3021894097328186, "learning_rate": 0.0001900944404358354, "loss": 11.6746, "step": 20502 }, { "epoch": 0.4291844595160345, "grad_norm": 0.28857988119125366, "learning_rate": 0.0001900934889984483, "loss": 11.6611, "step": 20503 }, { "epoch": 0.42920539227999666, "grad_norm": 0.29877719283103943, "learning_rate": 0.0001900925375177515, "loss": 11.6602, "step": 20504 }, { "epoch": 0.4292263250439588, "grad_norm": 0.3161042034626007, "learning_rate": 0.00019009158599374542, "loss": 11.6856, "step": 20505 }, { "epoch": 0.42924725780792095, "grad_norm": 0.3647085726261139, "learning_rate": 0.00019009063442643046, "loss": 11.6833, "step": 20506 }, { "epoch": 0.4292681905718831, "grad_norm": 0.33289018273353577, "learning_rate": 0.00019008968281580718, "loss": 11.6855, "step": 20507 }, { "epoch": 0.4292891233358453, "grad_norm": 0.32257387042045593, "learning_rate": 0.00019008873116187596, "loss": 11.6874, "step": 20508 }, { "epoch": 0.42931005609980744, "grad_norm": 0.35081225633621216, "learning_rate": 0.00019008777946463728, "loss": 11.6947, "step": 20509 }, { "epoch": 0.4293309888637696, "grad_norm": 0.2807978391647339, "learning_rate": 0.00019008682772409162, "loss": 11.681, "step": 20510 }, { "epoch": 0.4293519216277317, "grad_norm": 0.3294611871242523, "learning_rate": 0.0001900858759402394, "loss": 11.6643, "step": 20511 }, { "epoch": 0.42937285439169387, "grad_norm": 0.2781130373477936, "learning_rate": 0.00019008492411308107, "loss": 11.6682, "step": 20512 }, { "epoch": 0.429393787155656, "grad_norm": 0.28436774015426636, "learning_rate": 0.00019008397224261713, "loss": 11.6647, "step": 20513 }, { "epoch": 0.42941471991961816, "grad_norm": 0.3674072027206421, "learning_rate": 0.00019008302032884805, "loss": 11.6656, "step": 20514 }, { "epoch": 0.42943565268358036, "grad_norm": 0.35310864448547363, "learning_rate": 0.0001900820683717742, "loss": 11.6716, "step": 20515 }, { "epoch": 0.4294565854475425, "grad_norm": 0.28934669494628906, "learning_rate": 0.00019008111637139615, "loss": 11.677, "step": 20516 }, { "epoch": 0.42947751821150465, "grad_norm": 0.29599443078041077, "learning_rate": 0.00019008016432771427, "loss": 11.6698, "step": 20517 }, { "epoch": 0.4294984509754668, "grad_norm": 0.2648240029811859, "learning_rate": 0.00019007921224072907, "loss": 11.6782, "step": 20518 }, { "epoch": 0.42951938373942894, "grad_norm": 0.2985423803329468, "learning_rate": 0.00019007826011044094, "loss": 11.6764, "step": 20519 }, { "epoch": 0.4295403165033911, "grad_norm": 0.30219000577926636, "learning_rate": 0.00019007730793685045, "loss": 11.674, "step": 20520 }, { "epoch": 0.4295612492673533, "grad_norm": 0.3366326689720154, "learning_rate": 0.00019007635571995795, "loss": 11.667, "step": 20521 }, { "epoch": 0.42958218203131543, "grad_norm": 0.2686101794242859, "learning_rate": 0.00019007540345976397, "loss": 11.6574, "step": 20522 }, { "epoch": 0.4296031147952776, "grad_norm": 0.2930244505405426, "learning_rate": 0.0001900744511562689, "loss": 11.673, "step": 20523 }, { "epoch": 0.4296240475592397, "grad_norm": 0.2735714316368103, "learning_rate": 0.00019007349880947327, "loss": 11.6728, "step": 20524 }, { "epoch": 0.42964498032320186, "grad_norm": 0.26770827174186707, "learning_rate": 0.00019007254641937747, "loss": 11.6764, "step": 20525 }, { "epoch": 0.429665913087164, "grad_norm": 0.27127695083618164, "learning_rate": 0.00019007159398598203, "loss": 11.6736, "step": 20526 }, { "epoch": 0.4296868458511262, "grad_norm": 0.2632148861885071, "learning_rate": 0.00019007064150928734, "loss": 11.6644, "step": 20527 }, { "epoch": 0.42970777861508835, "grad_norm": 0.24514606595039368, "learning_rate": 0.00019006968898929388, "loss": 11.6736, "step": 20528 }, { "epoch": 0.4297287113790505, "grad_norm": 0.33568423986434937, "learning_rate": 0.00019006873642600215, "loss": 11.6712, "step": 20529 }, { "epoch": 0.42974964414301264, "grad_norm": 0.2520271837711334, "learning_rate": 0.00019006778381941257, "loss": 11.6639, "step": 20530 }, { "epoch": 0.4297705769069748, "grad_norm": 0.2972407341003418, "learning_rate": 0.0001900668311695256, "loss": 11.6743, "step": 20531 }, { "epoch": 0.42979150967093693, "grad_norm": 0.3761979937553406, "learning_rate": 0.0001900658784763417, "loss": 11.6887, "step": 20532 }, { "epoch": 0.4298124424348991, "grad_norm": 0.32401520013809204, "learning_rate": 0.0001900649257398613, "loss": 11.6866, "step": 20533 }, { "epoch": 0.4298333751988613, "grad_norm": 0.34070566296577454, "learning_rate": 0.00019006397296008492, "loss": 11.6846, "step": 20534 }, { "epoch": 0.4298543079628234, "grad_norm": 0.30894726514816284, "learning_rate": 0.00019006302013701296, "loss": 11.6706, "step": 20535 }, { "epoch": 0.42987524072678557, "grad_norm": 0.27988654375076294, "learning_rate": 0.0001900620672706459, "loss": 11.6653, "step": 20536 }, { "epoch": 0.4298961734907477, "grad_norm": 0.28899359703063965, "learning_rate": 0.0001900611143609842, "loss": 11.6868, "step": 20537 }, { "epoch": 0.42991710625470986, "grad_norm": 0.35319337248802185, "learning_rate": 0.0001900601614080283, "loss": 11.6657, "step": 20538 }, { "epoch": 0.429938039018672, "grad_norm": 0.2873304784297943, "learning_rate": 0.0001900592084117787, "loss": 11.6688, "step": 20539 }, { "epoch": 0.4299589717826342, "grad_norm": 0.32418084144592285, "learning_rate": 0.00019005825537223587, "loss": 11.6752, "step": 20540 }, { "epoch": 0.42997990454659635, "grad_norm": 0.29653558135032654, "learning_rate": 0.0001900573022894002, "loss": 11.6559, "step": 20541 }, { "epoch": 0.4300008373105585, "grad_norm": 0.263872355222702, "learning_rate": 0.00019005634916327219, "loss": 11.6564, "step": 20542 }, { "epoch": 0.43002177007452064, "grad_norm": 0.2872530221939087, "learning_rate": 0.00019005539599385225, "loss": 11.6783, "step": 20543 }, { "epoch": 0.4300427028384828, "grad_norm": 0.2919337749481201, "learning_rate": 0.0001900544427811409, "loss": 11.6738, "step": 20544 }, { "epoch": 0.4300636356024449, "grad_norm": 0.243285670876503, "learning_rate": 0.00019005348952513858, "loss": 11.6689, "step": 20545 }, { "epoch": 0.4300845683664071, "grad_norm": 0.30564504861831665, "learning_rate": 0.00019005253622584572, "loss": 11.6697, "step": 20546 }, { "epoch": 0.43010550113036927, "grad_norm": 0.2973926067352295, "learning_rate": 0.00019005158288326284, "loss": 11.6772, "step": 20547 }, { "epoch": 0.4301264338943314, "grad_norm": 0.2651679217815399, "learning_rate": 0.00019005062949739032, "loss": 11.653, "step": 20548 }, { "epoch": 0.43014736665829356, "grad_norm": 0.359210729598999, "learning_rate": 0.00019004967606822869, "loss": 11.7042, "step": 20549 }, { "epoch": 0.4301682994222557, "grad_norm": 0.33233821392059326, "learning_rate": 0.00019004872259577836, "loss": 11.6794, "step": 20550 }, { "epoch": 0.43018923218621785, "grad_norm": 0.27351537346839905, "learning_rate": 0.00019004776908003977, "loss": 11.6753, "step": 20551 }, { "epoch": 0.43021016495018, "grad_norm": 0.26798614859580994, "learning_rate": 0.00019004681552101346, "loss": 11.6759, "step": 20552 }, { "epoch": 0.4302310977141422, "grad_norm": 0.3427196443080902, "learning_rate": 0.00019004586191869987, "loss": 11.6706, "step": 20553 }, { "epoch": 0.43025203047810434, "grad_norm": 0.3876521587371826, "learning_rate": 0.00019004490827309936, "loss": 11.6836, "step": 20554 }, { "epoch": 0.4302729632420665, "grad_norm": 0.32687872648239136, "learning_rate": 0.0001900439545842125, "loss": 11.6731, "step": 20555 }, { "epoch": 0.43029389600602863, "grad_norm": 0.33345481753349304, "learning_rate": 0.0001900430008520397, "loss": 11.6816, "step": 20556 }, { "epoch": 0.4303148287699908, "grad_norm": 0.27404096722602844, "learning_rate": 0.0001900420470765814, "loss": 11.6623, "step": 20557 }, { "epoch": 0.4303357615339529, "grad_norm": 0.29113245010375977, "learning_rate": 0.00019004109325783812, "loss": 11.6744, "step": 20558 }, { "epoch": 0.4303566942979151, "grad_norm": 0.3186984062194824, "learning_rate": 0.00019004013939581026, "loss": 11.6545, "step": 20559 }, { "epoch": 0.43037762706187727, "grad_norm": 0.32687851786613464, "learning_rate": 0.00019003918549049831, "loss": 11.6774, "step": 20560 }, { "epoch": 0.4303985598258394, "grad_norm": 0.28851091861724854, "learning_rate": 0.0001900382315419027, "loss": 11.669, "step": 20561 }, { "epoch": 0.43041949258980156, "grad_norm": 0.2957193851470947, "learning_rate": 0.00019003727755002395, "loss": 11.6927, "step": 20562 }, { "epoch": 0.4304404253537637, "grad_norm": 0.3947747051715851, "learning_rate": 0.00019003632351486242, "loss": 11.685, "step": 20563 }, { "epoch": 0.43046135811772585, "grad_norm": 0.2828271687030792, "learning_rate": 0.00019003536943641865, "loss": 11.6846, "step": 20564 }, { "epoch": 0.43048229088168805, "grad_norm": 0.26136526465415955, "learning_rate": 0.0001900344153146931, "loss": 11.6745, "step": 20565 }, { "epoch": 0.4305032236456502, "grad_norm": 0.28346332907676697, "learning_rate": 0.00019003346114968616, "loss": 11.6703, "step": 20566 }, { "epoch": 0.43052415640961234, "grad_norm": 0.2632717788219452, "learning_rate": 0.00019003250694139836, "loss": 11.6632, "step": 20567 }, { "epoch": 0.4305450891735745, "grad_norm": 0.24964484572410583, "learning_rate": 0.00019003155268983014, "loss": 11.6943, "step": 20568 }, { "epoch": 0.4305660219375366, "grad_norm": 0.2437438666820526, "learning_rate": 0.0001900305983949819, "loss": 11.6796, "step": 20569 }, { "epoch": 0.43058695470149877, "grad_norm": 0.2604215145111084, "learning_rate": 0.00019002964405685416, "loss": 11.6779, "step": 20570 }, { "epoch": 0.4306078874654609, "grad_norm": 0.31162452697753906, "learning_rate": 0.0001900286896754474, "loss": 11.6781, "step": 20571 }, { "epoch": 0.4306288202294231, "grad_norm": 0.2823929190635681, "learning_rate": 0.00019002773525076205, "loss": 11.685, "step": 20572 }, { "epoch": 0.43064975299338526, "grad_norm": 0.25279349088668823, "learning_rate": 0.00019002678078279854, "loss": 11.6866, "step": 20573 }, { "epoch": 0.4306706857573474, "grad_norm": 0.31667470932006836, "learning_rate": 0.00019002582627155733, "loss": 11.6627, "step": 20574 }, { "epoch": 0.43069161852130955, "grad_norm": 0.330826073884964, "learning_rate": 0.00019002487171703892, "loss": 11.6562, "step": 20575 }, { "epoch": 0.4307125512852717, "grad_norm": 0.29944443702697754, "learning_rate": 0.00019002391711924377, "loss": 11.6782, "step": 20576 }, { "epoch": 0.43073348404923384, "grad_norm": 0.2705652415752411, "learning_rate": 0.00019002296247817228, "loss": 11.6731, "step": 20577 }, { "epoch": 0.43075441681319604, "grad_norm": 0.2932930886745453, "learning_rate": 0.00019002200779382497, "loss": 11.6677, "step": 20578 }, { "epoch": 0.4307753495771582, "grad_norm": 0.29439181089401245, "learning_rate": 0.00019002105306620227, "loss": 11.6771, "step": 20579 }, { "epoch": 0.43079628234112033, "grad_norm": 0.2584019899368286, "learning_rate": 0.00019002009829530468, "loss": 11.671, "step": 20580 }, { "epoch": 0.4308172151050825, "grad_norm": 0.29950883984565735, "learning_rate": 0.00019001914348113259, "loss": 11.6867, "step": 20581 }, { "epoch": 0.4308381478690446, "grad_norm": 0.25500839948654175, "learning_rate": 0.00019001818862368652, "loss": 11.6689, "step": 20582 }, { "epoch": 0.43085908063300676, "grad_norm": 0.29926589131355286, "learning_rate": 0.00019001723372296688, "loss": 11.6786, "step": 20583 }, { "epoch": 0.4308800133969689, "grad_norm": 0.27846670150756836, "learning_rate": 0.00019001627877897415, "loss": 11.6827, "step": 20584 }, { "epoch": 0.4309009461609311, "grad_norm": 0.34108564257621765, "learning_rate": 0.00019001532379170878, "loss": 11.6779, "step": 20585 }, { "epoch": 0.43092187892489325, "grad_norm": 0.3120829463005066, "learning_rate": 0.00019001436876117127, "loss": 11.6538, "step": 20586 }, { "epoch": 0.4309428116888554, "grad_norm": 0.2868315875530243, "learning_rate": 0.00019001341368736203, "loss": 11.6729, "step": 20587 }, { "epoch": 0.43096374445281754, "grad_norm": 0.2565523087978363, "learning_rate": 0.00019001245857028156, "loss": 11.6678, "step": 20588 }, { "epoch": 0.4309846772167797, "grad_norm": 0.22228659689426422, "learning_rate": 0.00019001150340993024, "loss": 11.6664, "step": 20589 }, { "epoch": 0.43100560998074183, "grad_norm": 0.25085070729255676, "learning_rate": 0.00019001054820630867, "loss": 11.6691, "step": 20590 }, { "epoch": 0.43102654274470403, "grad_norm": 0.2727035880088806, "learning_rate": 0.00019000959295941718, "loss": 11.6722, "step": 20591 }, { "epoch": 0.4310474755086662, "grad_norm": 0.2740151286125183, "learning_rate": 0.00019000863766925624, "loss": 11.6837, "step": 20592 }, { "epoch": 0.4310684082726283, "grad_norm": 0.405072420835495, "learning_rate": 0.00019000768233582642, "loss": 11.668, "step": 20593 }, { "epoch": 0.43108934103659047, "grad_norm": 0.37797811627388, "learning_rate": 0.00019000672695912804, "loss": 11.6692, "step": 20594 }, { "epoch": 0.4311102738005526, "grad_norm": 0.36798515915870667, "learning_rate": 0.00019000577153916167, "loss": 11.6728, "step": 20595 }, { "epoch": 0.43113120656451476, "grad_norm": 0.2545943260192871, "learning_rate": 0.0001900048160759277, "loss": 11.6629, "step": 20596 }, { "epoch": 0.43115213932847696, "grad_norm": 0.2694210708141327, "learning_rate": 0.0001900038605694266, "loss": 11.6823, "step": 20597 }, { "epoch": 0.4311730720924391, "grad_norm": 0.27139008045196533, "learning_rate": 0.00019000290501965884, "loss": 11.674, "step": 20598 }, { "epoch": 0.43119400485640125, "grad_norm": 0.24800077080726624, "learning_rate": 0.0001900019494266249, "loss": 11.6775, "step": 20599 }, { "epoch": 0.4312149376203634, "grad_norm": 0.28181713819503784, "learning_rate": 0.00019000099379032522, "loss": 11.6909, "step": 20600 }, { "epoch": 0.43123587038432554, "grad_norm": 0.29295316338539124, "learning_rate": 0.00019000003811076024, "loss": 11.6705, "step": 20601 }, { "epoch": 0.4312568031482877, "grad_norm": 0.31223833560943604, "learning_rate": 0.00018999908238793046, "loss": 11.6484, "step": 20602 }, { "epoch": 0.4312777359122498, "grad_norm": 0.34700247645378113, "learning_rate": 0.00018999812662183629, "loss": 11.6806, "step": 20603 }, { "epoch": 0.431298668676212, "grad_norm": 0.2760709524154663, "learning_rate": 0.00018999717081247822, "loss": 11.6683, "step": 20604 }, { "epoch": 0.43131960144017417, "grad_norm": 0.3054122030735016, "learning_rate": 0.00018999621495985672, "loss": 11.6803, "step": 20605 }, { "epoch": 0.4313405342041363, "grad_norm": 0.30992820858955383, "learning_rate": 0.00018999525906397225, "loss": 11.6813, "step": 20606 }, { "epoch": 0.43136146696809846, "grad_norm": 0.360782265663147, "learning_rate": 0.00018999430312482523, "loss": 11.6721, "step": 20607 }, { "epoch": 0.4313823997320606, "grad_norm": 0.42464566230773926, "learning_rate": 0.00018999334714241617, "loss": 11.6618, "step": 20608 }, { "epoch": 0.43140333249602275, "grad_norm": 0.2644120156764984, "learning_rate": 0.00018999239111674548, "loss": 11.6641, "step": 20609 }, { "epoch": 0.43142426525998495, "grad_norm": 0.30774766206741333, "learning_rate": 0.00018999143504781367, "loss": 11.6685, "step": 20610 }, { "epoch": 0.4314451980239471, "grad_norm": 0.3426184058189392, "learning_rate": 0.00018999047893562113, "loss": 11.6762, "step": 20611 }, { "epoch": 0.43146613078790924, "grad_norm": 0.2999902367591858, "learning_rate": 0.0001899895227801684, "loss": 11.6672, "step": 20612 }, { "epoch": 0.4314870635518714, "grad_norm": 0.2929171025753021, "learning_rate": 0.0001899885665814559, "loss": 11.6753, "step": 20613 }, { "epoch": 0.43150799631583353, "grad_norm": 0.288379430770874, "learning_rate": 0.00018998761033948408, "loss": 11.6763, "step": 20614 }, { "epoch": 0.4315289290797957, "grad_norm": 0.24681395292282104, "learning_rate": 0.00018998665405425342, "loss": 11.664, "step": 20615 }, { "epoch": 0.4315498618437579, "grad_norm": 0.30587589740753174, "learning_rate": 0.00018998569772576436, "loss": 11.6668, "step": 20616 }, { "epoch": 0.43157079460772, "grad_norm": 0.28666847944259644, "learning_rate": 0.0001899847413540174, "loss": 11.6744, "step": 20617 }, { "epoch": 0.43159172737168217, "grad_norm": 0.26229575276374817, "learning_rate": 0.00018998378493901294, "loss": 11.6836, "step": 20618 }, { "epoch": 0.4316126601356443, "grad_norm": 0.27940183877944946, "learning_rate": 0.00018998282848075148, "loss": 11.6875, "step": 20619 }, { "epoch": 0.43163359289960646, "grad_norm": 0.3027454614639282, "learning_rate": 0.00018998187197923348, "loss": 11.6821, "step": 20620 }, { "epoch": 0.4316545256635686, "grad_norm": 0.32981443405151367, "learning_rate": 0.0001899809154344594, "loss": 11.6736, "step": 20621 }, { "epoch": 0.43167545842753074, "grad_norm": 0.24022583663463593, "learning_rate": 0.00018997995884642967, "loss": 11.6743, "step": 20622 }, { "epoch": 0.43169639119149295, "grad_norm": 0.25165578722953796, "learning_rate": 0.00018997900221514477, "loss": 11.6787, "step": 20623 }, { "epoch": 0.4317173239554551, "grad_norm": 0.3833416700363159, "learning_rate": 0.00018997804554060518, "loss": 11.6724, "step": 20624 }, { "epoch": 0.43173825671941723, "grad_norm": 0.23284709453582764, "learning_rate": 0.0001899770888228113, "loss": 11.6604, "step": 20625 }, { "epoch": 0.4317591894833794, "grad_norm": 0.32055580615997314, "learning_rate": 0.0001899761320617637, "loss": 11.671, "step": 20626 }, { "epoch": 0.4317801222473415, "grad_norm": 0.2898392975330353, "learning_rate": 0.0001899751752574627, "loss": 11.6696, "step": 20627 }, { "epoch": 0.43180105501130367, "grad_norm": 0.2991567552089691, "learning_rate": 0.00018997421840990887, "loss": 11.672, "step": 20628 }, { "epoch": 0.43182198777526587, "grad_norm": 0.26589763164520264, "learning_rate": 0.00018997326151910264, "loss": 11.6538, "step": 20629 }, { "epoch": 0.431842920539228, "grad_norm": 0.2908901572227478, "learning_rate": 0.00018997230458504442, "loss": 11.6899, "step": 20630 }, { "epoch": 0.43186385330319016, "grad_norm": 0.25904664397239685, "learning_rate": 0.0001899713476077347, "loss": 11.6684, "step": 20631 }, { "epoch": 0.4318847860671523, "grad_norm": 0.3738067150115967, "learning_rate": 0.000189970390587174, "loss": 11.6552, "step": 20632 }, { "epoch": 0.43190571883111445, "grad_norm": 0.23587149381637573, "learning_rate": 0.00018996943352336273, "loss": 11.6728, "step": 20633 }, { "epoch": 0.4319266515950766, "grad_norm": 0.2527922987937927, "learning_rate": 0.00018996847641630131, "loss": 11.6699, "step": 20634 }, { "epoch": 0.4319475843590388, "grad_norm": 0.27001556754112244, "learning_rate": 0.00018996751926599027, "loss": 11.6693, "step": 20635 }, { "epoch": 0.43196851712300094, "grad_norm": 0.28562861680984497, "learning_rate": 0.00018996656207243, "loss": 11.6699, "step": 20636 }, { "epoch": 0.4319894498869631, "grad_norm": 0.2732062339782715, "learning_rate": 0.00018996560483562107, "loss": 11.6576, "step": 20637 }, { "epoch": 0.43201038265092523, "grad_norm": 0.36040714383125305, "learning_rate": 0.00018996464755556382, "loss": 11.6849, "step": 20638 }, { "epoch": 0.4320313154148874, "grad_norm": 0.6642906069755554, "learning_rate": 0.0001899636902322588, "loss": 11.6858, "step": 20639 }, { "epoch": 0.4320522481788495, "grad_norm": 0.4071578085422516, "learning_rate": 0.0001899627328657064, "loss": 11.695, "step": 20640 }, { "epoch": 0.43207318094281166, "grad_norm": 0.31836700439453125, "learning_rate": 0.0001899617754559071, "loss": 11.6786, "step": 20641 }, { "epoch": 0.43209411370677386, "grad_norm": 0.3286045491695404, "learning_rate": 0.0001899608180028614, "loss": 11.6892, "step": 20642 }, { "epoch": 0.432115046470736, "grad_norm": 0.24184776842594147, "learning_rate": 0.00018995986050656972, "loss": 11.6758, "step": 20643 }, { "epoch": 0.43213597923469815, "grad_norm": 0.400275319814682, "learning_rate": 0.00018995890296703253, "loss": 11.6776, "step": 20644 }, { "epoch": 0.4321569119986603, "grad_norm": 0.309275358915329, "learning_rate": 0.00018995794538425028, "loss": 11.6697, "step": 20645 }, { "epoch": 0.43217784476262244, "grad_norm": 0.33502793312072754, "learning_rate": 0.00018995698775822346, "loss": 11.6727, "step": 20646 }, { "epoch": 0.4321987775265846, "grad_norm": 0.2840519845485687, "learning_rate": 0.0001899560300889525, "loss": 11.6629, "step": 20647 }, { "epoch": 0.4322197102905468, "grad_norm": 0.35753726959228516, "learning_rate": 0.0001899550723764379, "loss": 11.6639, "step": 20648 }, { "epoch": 0.43224064305450893, "grad_norm": 0.2784659266471863, "learning_rate": 0.00018995411462068007, "loss": 11.6625, "step": 20649 }, { "epoch": 0.4322615758184711, "grad_norm": 0.25082775950431824, "learning_rate": 0.0001899531568216795, "loss": 11.6777, "step": 20650 }, { "epoch": 0.4322825085824332, "grad_norm": 0.29393669962882996, "learning_rate": 0.00018995219897943665, "loss": 11.6675, "step": 20651 }, { "epoch": 0.43230344134639537, "grad_norm": 0.30733242630958557, "learning_rate": 0.00018995124109395198, "loss": 11.6723, "step": 20652 }, { "epoch": 0.4323243741103575, "grad_norm": 0.28007903695106506, "learning_rate": 0.00018995028316522595, "loss": 11.664, "step": 20653 }, { "epoch": 0.4323453068743197, "grad_norm": 0.329324871301651, "learning_rate": 0.000189949325193259, "loss": 11.6987, "step": 20654 }, { "epoch": 0.43236623963828186, "grad_norm": 0.2899172306060791, "learning_rate": 0.0001899483671780516, "loss": 11.6574, "step": 20655 }, { "epoch": 0.432387172402244, "grad_norm": 0.3015861511230469, "learning_rate": 0.00018994740911960426, "loss": 11.6691, "step": 20656 }, { "epoch": 0.43240810516620615, "grad_norm": 0.31645458936691284, "learning_rate": 0.00018994645101791732, "loss": 11.6666, "step": 20657 }, { "epoch": 0.4324290379301683, "grad_norm": 0.2678534984588623, "learning_rate": 0.0001899454928729914, "loss": 11.6552, "step": 20658 }, { "epoch": 0.43244997069413044, "grad_norm": 0.29891473054885864, "learning_rate": 0.00018994453468482682, "loss": 11.6566, "step": 20659 }, { "epoch": 0.4324709034580926, "grad_norm": 0.25319430232048035, "learning_rate": 0.00018994357645342412, "loss": 11.6835, "step": 20660 }, { "epoch": 0.4324918362220548, "grad_norm": 0.22974298894405365, "learning_rate": 0.00018994261817878376, "loss": 11.6775, "step": 20661 }, { "epoch": 0.4325127689860169, "grad_norm": 0.3090200126171112, "learning_rate": 0.00018994165986090615, "loss": 11.6664, "step": 20662 }, { "epoch": 0.43253370174997907, "grad_norm": 0.27748921513557434, "learning_rate": 0.0001899407014997918, "loss": 11.6685, "step": 20663 }, { "epoch": 0.4325546345139412, "grad_norm": 0.3470871150493622, "learning_rate": 0.00018993974309544116, "loss": 11.6548, "step": 20664 }, { "epoch": 0.43257556727790336, "grad_norm": 0.23545093834400177, "learning_rate": 0.00018993878464785468, "loss": 11.6669, "step": 20665 }, { "epoch": 0.4325965000418655, "grad_norm": 0.3022078275680542, "learning_rate": 0.0001899378261570328, "loss": 11.679, "step": 20666 }, { "epoch": 0.4326174328058277, "grad_norm": 0.27516043186187744, "learning_rate": 0.00018993686762297602, "loss": 11.6728, "step": 20667 }, { "epoch": 0.43263836556978985, "grad_norm": 0.24612225592136383, "learning_rate": 0.0001899359090456848, "loss": 11.6736, "step": 20668 }, { "epoch": 0.432659298333752, "grad_norm": 0.2955547273159027, "learning_rate": 0.00018993495042515955, "loss": 11.6718, "step": 20669 }, { "epoch": 0.43268023109771414, "grad_norm": 0.30609530210494995, "learning_rate": 0.0001899339917614008, "loss": 11.6941, "step": 20670 }, { "epoch": 0.4327011638616763, "grad_norm": 0.32847359776496887, "learning_rate": 0.000189933033054409, "loss": 11.6693, "step": 20671 }, { "epoch": 0.43272209662563843, "grad_norm": 0.34913402795791626, "learning_rate": 0.00018993207430418455, "loss": 11.6911, "step": 20672 }, { "epoch": 0.4327430293896006, "grad_norm": 0.28904226422309875, "learning_rate": 0.00018993111551072794, "loss": 11.6692, "step": 20673 }, { "epoch": 0.4327639621535628, "grad_norm": 0.2827743887901306, "learning_rate": 0.00018993015667403965, "loss": 11.6675, "step": 20674 }, { "epoch": 0.4327848949175249, "grad_norm": 0.2968321144580841, "learning_rate": 0.00018992919779412012, "loss": 11.6739, "step": 20675 }, { "epoch": 0.43280582768148707, "grad_norm": 0.29363396763801575, "learning_rate": 0.00018992823887096987, "loss": 11.6787, "step": 20676 }, { "epoch": 0.4328267604454492, "grad_norm": 0.30801108479499817, "learning_rate": 0.00018992727990458928, "loss": 11.6687, "step": 20677 }, { "epoch": 0.43284769320941135, "grad_norm": 0.265241414308548, "learning_rate": 0.00018992632089497887, "loss": 11.6786, "step": 20678 }, { "epoch": 0.4328686259733735, "grad_norm": 0.23613624274730682, "learning_rate": 0.00018992536184213904, "loss": 11.6679, "step": 20679 }, { "epoch": 0.4328895587373357, "grad_norm": 0.3436969816684723, "learning_rate": 0.00018992440274607028, "loss": 11.6801, "step": 20680 }, { "epoch": 0.43291049150129784, "grad_norm": 0.28617167472839355, "learning_rate": 0.00018992344360677308, "loss": 11.695, "step": 20681 }, { "epoch": 0.43293142426526, "grad_norm": 0.27940475940704346, "learning_rate": 0.0001899224844242479, "loss": 11.6753, "step": 20682 }, { "epoch": 0.43295235702922213, "grad_norm": 0.28524723649024963, "learning_rate": 0.00018992152519849514, "loss": 11.6782, "step": 20683 }, { "epoch": 0.4329732897931843, "grad_norm": 0.30875661969184875, "learning_rate": 0.0001899205659295153, "loss": 11.6664, "step": 20684 }, { "epoch": 0.4329942225571464, "grad_norm": 0.3352207541465759, "learning_rate": 0.00018991960661730888, "loss": 11.6775, "step": 20685 }, { "epoch": 0.4330151553211086, "grad_norm": 0.3017655909061432, "learning_rate": 0.00018991864726187628, "loss": 11.6558, "step": 20686 }, { "epoch": 0.43303608808507077, "grad_norm": 0.2930501103401184, "learning_rate": 0.00018991768786321797, "loss": 11.6716, "step": 20687 }, { "epoch": 0.4330570208490329, "grad_norm": 0.34882572293281555, "learning_rate": 0.00018991672842133444, "loss": 11.6508, "step": 20688 }, { "epoch": 0.43307795361299506, "grad_norm": 0.2868849039077759, "learning_rate": 0.00018991576893622613, "loss": 11.6818, "step": 20689 }, { "epoch": 0.4330988863769572, "grad_norm": 0.39409419894218445, "learning_rate": 0.00018991480940789353, "loss": 11.6847, "step": 20690 }, { "epoch": 0.43311981914091935, "grad_norm": 0.2654271721839905, "learning_rate": 0.00018991384983633704, "loss": 11.6644, "step": 20691 }, { "epoch": 0.4331407519048815, "grad_norm": 0.2509596049785614, "learning_rate": 0.0001899128902215572, "loss": 11.6747, "step": 20692 }, { "epoch": 0.4331616846688437, "grad_norm": 0.34082892537117004, "learning_rate": 0.00018991193056355443, "loss": 11.6701, "step": 20693 }, { "epoch": 0.43318261743280584, "grad_norm": 0.25395557284355164, "learning_rate": 0.00018991097086232917, "loss": 11.6601, "step": 20694 }, { "epoch": 0.433203550196768, "grad_norm": 0.2498232126235962, "learning_rate": 0.00018991001111788193, "loss": 11.685, "step": 20695 }, { "epoch": 0.43322448296073013, "grad_norm": 0.2754209637641907, "learning_rate": 0.00018990905133021312, "loss": 11.6667, "step": 20696 }, { "epoch": 0.4332454157246923, "grad_norm": 0.26139703392982483, "learning_rate": 0.00018990809149932325, "loss": 11.6679, "step": 20697 }, { "epoch": 0.4332663484886544, "grad_norm": 0.3034261465072632, "learning_rate": 0.00018990713162521275, "loss": 11.6507, "step": 20698 }, { "epoch": 0.4332872812526166, "grad_norm": 0.34363242983818054, "learning_rate": 0.00018990617170788206, "loss": 11.6587, "step": 20699 }, { "epoch": 0.43330821401657876, "grad_norm": 0.25370103120803833, "learning_rate": 0.00018990521174733172, "loss": 11.6715, "step": 20700 }, { "epoch": 0.4333291467805409, "grad_norm": 0.27728772163391113, "learning_rate": 0.0001899042517435621, "loss": 11.6633, "step": 20701 }, { "epoch": 0.43335007954450305, "grad_norm": 0.2964633107185364, "learning_rate": 0.00018990329169657372, "loss": 11.6706, "step": 20702 }, { "epoch": 0.4333710123084652, "grad_norm": 0.3412077724933624, "learning_rate": 0.00018990233160636705, "loss": 11.6752, "step": 20703 }, { "epoch": 0.43339194507242734, "grad_norm": 0.2525392174720764, "learning_rate": 0.00018990137147294252, "loss": 11.6863, "step": 20704 }, { "epoch": 0.43341287783638954, "grad_norm": 0.26880666613578796, "learning_rate": 0.00018990041129630057, "loss": 11.6686, "step": 20705 }, { "epoch": 0.4334338106003517, "grad_norm": 0.26692119240760803, "learning_rate": 0.0001898994510764417, "loss": 11.6827, "step": 20706 }, { "epoch": 0.43345474336431383, "grad_norm": 0.28600558638572693, "learning_rate": 0.00018989849081336638, "loss": 11.6776, "step": 20707 }, { "epoch": 0.433475676128276, "grad_norm": 0.2503957152366638, "learning_rate": 0.00018989753050707505, "loss": 11.6676, "step": 20708 }, { "epoch": 0.4334966088922381, "grad_norm": 0.2696111798286438, "learning_rate": 0.00018989657015756816, "loss": 11.6647, "step": 20709 }, { "epoch": 0.43351754165620027, "grad_norm": 0.331892728805542, "learning_rate": 0.00018989560976484618, "loss": 11.674, "step": 20710 }, { "epoch": 0.4335384744201624, "grad_norm": 0.30570492148399353, "learning_rate": 0.00018989464932890965, "loss": 11.6671, "step": 20711 }, { "epoch": 0.4335594071841246, "grad_norm": 0.42628827691078186, "learning_rate": 0.00018989368884975887, "loss": 11.679, "step": 20712 }, { "epoch": 0.43358033994808676, "grad_norm": 0.26883721351623535, "learning_rate": 0.00018989272832739445, "loss": 11.6697, "step": 20713 }, { "epoch": 0.4336012727120489, "grad_norm": 0.2767021358013153, "learning_rate": 0.00018989176776181674, "loss": 11.6763, "step": 20714 }, { "epoch": 0.43362220547601105, "grad_norm": 0.3154783844947815, "learning_rate": 0.00018989080715302631, "loss": 11.6855, "step": 20715 }, { "epoch": 0.4336431382399732, "grad_norm": 1.8670207262039185, "learning_rate": 0.00018988984650102352, "loss": 11.6498, "step": 20716 }, { "epoch": 0.43366407100393534, "grad_norm": 0.28799328207969666, "learning_rate": 0.00018988888580580892, "loss": 11.6779, "step": 20717 }, { "epoch": 0.43368500376789754, "grad_norm": 0.353744775056839, "learning_rate": 0.00018988792506738293, "loss": 11.6939, "step": 20718 }, { "epoch": 0.4337059365318597, "grad_norm": 0.3161364495754242, "learning_rate": 0.00018988696428574598, "loss": 11.6725, "step": 20719 }, { "epoch": 0.4337268692958218, "grad_norm": 0.25606000423431396, "learning_rate": 0.0001898860034608986, "loss": 11.6759, "step": 20720 }, { "epoch": 0.43374780205978397, "grad_norm": 0.39585286378860474, "learning_rate": 0.00018988504259284122, "loss": 11.696, "step": 20721 }, { "epoch": 0.4337687348237461, "grad_norm": 0.2764904499053955, "learning_rate": 0.00018988408168157426, "loss": 11.6565, "step": 20722 }, { "epoch": 0.43378966758770826, "grad_norm": 0.3106691837310791, "learning_rate": 0.00018988312072709825, "loss": 11.6647, "step": 20723 }, { "epoch": 0.43381060035167046, "grad_norm": 0.21851710975170135, "learning_rate": 0.0001898821597294136, "loss": 11.6727, "step": 20724 }, { "epoch": 0.4338315331156326, "grad_norm": 0.37030667066574097, "learning_rate": 0.0001898811986885208, "loss": 11.6609, "step": 20725 }, { "epoch": 0.43385246587959475, "grad_norm": 0.28648412227630615, "learning_rate": 0.00018988023760442034, "loss": 11.676, "step": 20726 }, { "epoch": 0.4338733986435569, "grad_norm": 0.2889202833175659, "learning_rate": 0.0001898792764771126, "loss": 11.6766, "step": 20727 }, { "epoch": 0.43389433140751904, "grad_norm": 0.2440352588891983, "learning_rate": 0.00018987831530659812, "loss": 11.682, "step": 20728 }, { "epoch": 0.4339152641714812, "grad_norm": 0.30266430974006653, "learning_rate": 0.00018987735409287733, "loss": 11.6758, "step": 20729 }, { "epoch": 0.43393619693544333, "grad_norm": 0.335069477558136, "learning_rate": 0.00018987639283595068, "loss": 11.6747, "step": 20730 }, { "epoch": 0.43395712969940553, "grad_norm": 0.29261139035224915, "learning_rate": 0.00018987543153581866, "loss": 11.6818, "step": 20731 }, { "epoch": 0.4339780624633677, "grad_norm": 0.29610270261764526, "learning_rate": 0.00018987447019248172, "loss": 11.6638, "step": 20732 }, { "epoch": 0.4339989952273298, "grad_norm": 0.2801278233528137, "learning_rate": 0.00018987350880594032, "loss": 11.6629, "step": 20733 }, { "epoch": 0.43401992799129196, "grad_norm": 0.3016170859336853, "learning_rate": 0.00018987254737619494, "loss": 11.675, "step": 20734 }, { "epoch": 0.4340408607552541, "grad_norm": 0.27547743916511536, "learning_rate": 0.00018987158590324602, "loss": 11.6689, "step": 20735 }, { "epoch": 0.43406179351921625, "grad_norm": 0.28156381845474243, "learning_rate": 0.00018987062438709397, "loss": 11.6677, "step": 20736 }, { "epoch": 0.43408272628317845, "grad_norm": 0.33946096897125244, "learning_rate": 0.0001898696628277394, "loss": 11.681, "step": 20737 }, { "epoch": 0.4341036590471406, "grad_norm": 0.3945106267929077, "learning_rate": 0.00018986870122518262, "loss": 11.6591, "step": 20738 }, { "epoch": 0.43412459181110274, "grad_norm": 0.2512127161026001, "learning_rate": 0.00018986773957942415, "loss": 11.6724, "step": 20739 }, { "epoch": 0.4341455245750649, "grad_norm": 0.257373183965683, "learning_rate": 0.0001898667778904645, "loss": 11.6676, "step": 20740 }, { "epoch": 0.43416645733902703, "grad_norm": 0.27872928977012634, "learning_rate": 0.00018986581615830405, "loss": 11.6693, "step": 20741 }, { "epoch": 0.4341873901029892, "grad_norm": 0.27860066294670105, "learning_rate": 0.00018986485438294334, "loss": 11.6591, "step": 20742 }, { "epoch": 0.4342083228669514, "grad_norm": 0.2553057372570038, "learning_rate": 0.00018986389256438275, "loss": 11.6637, "step": 20743 }, { "epoch": 0.4342292556309135, "grad_norm": 0.287532776594162, "learning_rate": 0.00018986293070262283, "loss": 11.6753, "step": 20744 }, { "epoch": 0.43425018839487567, "grad_norm": 0.3139355480670929, "learning_rate": 0.00018986196879766396, "loss": 11.6865, "step": 20745 }, { "epoch": 0.4342711211588378, "grad_norm": 0.26805898547172546, "learning_rate": 0.00018986100684950666, "loss": 11.6858, "step": 20746 }, { "epoch": 0.43429205392279996, "grad_norm": 0.33052346110343933, "learning_rate": 0.00018986004485815137, "loss": 11.6854, "step": 20747 }, { "epoch": 0.4343129866867621, "grad_norm": 0.24796274304389954, "learning_rate": 0.00018985908282359855, "loss": 11.6824, "step": 20748 }, { "epoch": 0.43433391945072425, "grad_norm": 0.2662505805492401, "learning_rate": 0.00018985812074584865, "loss": 11.6568, "step": 20749 }, { "epoch": 0.43435485221468645, "grad_norm": 0.41030874848365784, "learning_rate": 0.00018985715862490218, "loss": 11.6768, "step": 20750 }, { "epoch": 0.4343757849786486, "grad_norm": 0.2920226752758026, "learning_rate": 0.00018985619646075957, "loss": 11.6803, "step": 20751 }, { "epoch": 0.43439671774261074, "grad_norm": 0.28288623690605164, "learning_rate": 0.0001898552342534213, "loss": 11.6646, "step": 20752 }, { "epoch": 0.4344176505065729, "grad_norm": 0.2765235900878906, "learning_rate": 0.00018985427200288779, "loss": 11.6742, "step": 20753 }, { "epoch": 0.43443858327053503, "grad_norm": 0.30223244428634644, "learning_rate": 0.00018985330970915952, "loss": 11.6618, "step": 20754 }, { "epoch": 0.4344595160344972, "grad_norm": 0.2340482473373413, "learning_rate": 0.000189852347372237, "loss": 11.6676, "step": 20755 }, { "epoch": 0.4344804487984594, "grad_norm": 0.3309233784675598, "learning_rate": 0.00018985138499212062, "loss": 11.6653, "step": 20756 }, { "epoch": 0.4345013815624215, "grad_norm": 0.3162977397441864, "learning_rate": 0.00018985042256881092, "loss": 11.6744, "step": 20757 }, { "epoch": 0.43452231432638366, "grad_norm": 0.31350070238113403, "learning_rate": 0.0001898494601023083, "loss": 11.666, "step": 20758 }, { "epoch": 0.4345432470903458, "grad_norm": 0.36318641901016235, "learning_rate": 0.00018984849759261325, "loss": 11.6603, "step": 20759 }, { "epoch": 0.43456417985430795, "grad_norm": 0.3122495114803314, "learning_rate": 0.00018984753503972624, "loss": 11.6765, "step": 20760 }, { "epoch": 0.4345851126182701, "grad_norm": 0.28129342198371887, "learning_rate": 0.00018984657244364768, "loss": 11.6512, "step": 20761 }, { "epoch": 0.43460604538223224, "grad_norm": 0.29737111926078796, "learning_rate": 0.0001898456098043781, "loss": 11.6566, "step": 20762 }, { "epoch": 0.43462697814619444, "grad_norm": 0.2481434941291809, "learning_rate": 0.00018984464712191792, "loss": 11.6786, "step": 20763 }, { "epoch": 0.4346479109101566, "grad_norm": 0.322435200214386, "learning_rate": 0.0001898436843962676, "loss": 11.6763, "step": 20764 }, { "epoch": 0.43466884367411873, "grad_norm": 0.31894680857658386, "learning_rate": 0.00018984272162742767, "loss": 11.6825, "step": 20765 }, { "epoch": 0.4346897764380809, "grad_norm": 0.29125744104385376, "learning_rate": 0.0001898417588153985, "loss": 11.6886, "step": 20766 }, { "epoch": 0.434710709202043, "grad_norm": 0.31791943311691284, "learning_rate": 0.00018984079596018062, "loss": 11.6601, "step": 20767 }, { "epoch": 0.43473164196600517, "grad_norm": 0.2744191884994507, "learning_rate": 0.00018983983306177445, "loss": 11.6644, "step": 20768 }, { "epoch": 0.43475257472996737, "grad_norm": 0.26055407524108887, "learning_rate": 0.0001898388701201805, "loss": 11.6704, "step": 20769 }, { "epoch": 0.4347735074939295, "grad_norm": 0.2934418022632599, "learning_rate": 0.0001898379071353992, "loss": 11.6887, "step": 20770 }, { "epoch": 0.43479444025789166, "grad_norm": 0.3047645092010498, "learning_rate": 0.000189836944107431, "loss": 11.6742, "step": 20771 }, { "epoch": 0.4348153730218538, "grad_norm": 0.21209712326526642, "learning_rate": 0.00018983598103627637, "loss": 11.6561, "step": 20772 }, { "epoch": 0.43483630578581595, "grad_norm": 0.35075199604034424, "learning_rate": 0.00018983501792193584, "loss": 11.6815, "step": 20773 }, { "epoch": 0.4348572385497781, "grad_norm": 0.28668248653411865, "learning_rate": 0.00018983405476440978, "loss": 11.6546, "step": 20774 }, { "epoch": 0.4348781713137403, "grad_norm": 0.28794869780540466, "learning_rate": 0.0001898330915636987, "loss": 11.6836, "step": 20775 }, { "epoch": 0.43489910407770244, "grad_norm": 0.3510243594646454, "learning_rate": 0.00018983212831980302, "loss": 11.6666, "step": 20776 }, { "epoch": 0.4349200368416646, "grad_norm": 0.2925921082496643, "learning_rate": 0.00018983116503272325, "loss": 11.6784, "step": 20777 }, { "epoch": 0.4349409696056267, "grad_norm": 0.28679370880126953, "learning_rate": 0.00018983020170245985, "loss": 11.6634, "step": 20778 }, { "epoch": 0.43496190236958887, "grad_norm": 0.2919705808162689, "learning_rate": 0.00018982923832901326, "loss": 11.6699, "step": 20779 }, { "epoch": 0.434982835133551, "grad_norm": 0.3808642327785492, "learning_rate": 0.00018982827491238394, "loss": 11.6912, "step": 20780 }, { "epoch": 0.43500376789751316, "grad_norm": 0.3028031587600708, "learning_rate": 0.00018982731145257242, "loss": 11.669, "step": 20781 }, { "epoch": 0.43502470066147536, "grad_norm": 0.2563742399215698, "learning_rate": 0.00018982634794957907, "loss": 11.6652, "step": 20782 }, { "epoch": 0.4350456334254375, "grad_norm": 0.2671441435813904, "learning_rate": 0.00018982538440340442, "loss": 11.6668, "step": 20783 }, { "epoch": 0.43506656618939965, "grad_norm": 0.3039293587207794, "learning_rate": 0.00018982442081404887, "loss": 11.6721, "step": 20784 }, { "epoch": 0.4350874989533618, "grad_norm": 0.33703848719596863, "learning_rate": 0.00018982345718151296, "loss": 11.6672, "step": 20785 }, { "epoch": 0.43510843171732394, "grad_norm": 0.34268543124198914, "learning_rate": 0.0001898224935057971, "loss": 11.6751, "step": 20786 }, { "epoch": 0.4351293644812861, "grad_norm": 0.3466280400753021, "learning_rate": 0.00018982152978690175, "loss": 11.6579, "step": 20787 }, { "epoch": 0.4351502972452483, "grad_norm": 0.24512232840061188, "learning_rate": 0.00018982056602482743, "loss": 11.6575, "step": 20788 }, { "epoch": 0.43517123000921043, "grad_norm": 0.3062930703163147, "learning_rate": 0.00018981960221957453, "loss": 11.6629, "step": 20789 }, { "epoch": 0.4351921627731726, "grad_norm": 0.3769836723804474, "learning_rate": 0.00018981863837114356, "loss": 11.6822, "step": 20790 }, { "epoch": 0.4352130955371347, "grad_norm": 0.2827107012271881, "learning_rate": 0.00018981767447953497, "loss": 11.6603, "step": 20791 }, { "epoch": 0.43523402830109686, "grad_norm": 0.23619744181632996, "learning_rate": 0.00018981671054474922, "loss": 11.662, "step": 20792 }, { "epoch": 0.435254961065059, "grad_norm": 0.2509779930114746, "learning_rate": 0.00018981574656678677, "loss": 11.671, "step": 20793 }, { "epoch": 0.4352758938290212, "grad_norm": 0.3148321807384491, "learning_rate": 0.00018981478254564808, "loss": 11.6695, "step": 20794 }, { "epoch": 0.43529682659298335, "grad_norm": 0.27693110704421997, "learning_rate": 0.00018981381848133367, "loss": 11.6795, "step": 20795 }, { "epoch": 0.4353177593569455, "grad_norm": 0.30721405148506165, "learning_rate": 0.0001898128543738439, "loss": 11.6757, "step": 20796 }, { "epoch": 0.43533869212090764, "grad_norm": 0.2773950695991516, "learning_rate": 0.00018981189022317937, "loss": 11.66, "step": 20797 }, { "epoch": 0.4353596248848698, "grad_norm": 0.34267982840538025, "learning_rate": 0.0001898109260293404, "loss": 11.6736, "step": 20798 }, { "epoch": 0.43538055764883193, "grad_norm": 0.25049567222595215, "learning_rate": 0.00018980996179232751, "loss": 11.6635, "step": 20799 }, { "epoch": 0.4354014904127941, "grad_norm": 0.32078665494918823, "learning_rate": 0.00018980899751214122, "loss": 11.6656, "step": 20800 }, { "epoch": 0.4354224231767563, "grad_norm": 0.29734161496162415, "learning_rate": 0.0001898080331887819, "loss": 11.6721, "step": 20801 }, { "epoch": 0.4354433559407184, "grad_norm": 0.36551111936569214, "learning_rate": 0.0001898070688222501, "loss": 11.6779, "step": 20802 }, { "epoch": 0.43546428870468057, "grad_norm": 0.3643476665019989, "learning_rate": 0.00018980610441254623, "loss": 11.6842, "step": 20803 }, { "epoch": 0.4354852214686427, "grad_norm": 0.3424210548400879, "learning_rate": 0.00018980513995967075, "loss": 11.6706, "step": 20804 }, { "epoch": 0.43550615423260486, "grad_norm": 0.33307790756225586, "learning_rate": 0.0001898041754636241, "loss": 11.6606, "step": 20805 }, { "epoch": 0.435527086996567, "grad_norm": 0.3297097980976105, "learning_rate": 0.00018980321092440687, "loss": 11.6616, "step": 20806 }, { "epoch": 0.4355480197605292, "grad_norm": 0.34121689200401306, "learning_rate": 0.00018980224634201938, "loss": 11.6637, "step": 20807 }, { "epoch": 0.43556895252449135, "grad_norm": 0.24478958547115326, "learning_rate": 0.0001898012817164622, "loss": 11.6631, "step": 20808 }, { "epoch": 0.4355898852884535, "grad_norm": 0.308817595243454, "learning_rate": 0.0001898003170477357, "loss": 11.6745, "step": 20809 }, { "epoch": 0.43561081805241564, "grad_norm": 0.24570555984973907, "learning_rate": 0.0001897993523358404, "loss": 11.6726, "step": 20810 }, { "epoch": 0.4356317508163778, "grad_norm": 0.30873697996139526, "learning_rate": 0.00018979838758077672, "loss": 11.6638, "step": 20811 }, { "epoch": 0.43565268358033993, "grad_norm": 0.3073810935020447, "learning_rate": 0.0001897974227825452, "loss": 11.6716, "step": 20812 }, { "epoch": 0.43567361634430213, "grad_norm": 0.25272616744041443, "learning_rate": 0.00018979645794114624, "loss": 11.6707, "step": 20813 }, { "epoch": 0.4356945491082643, "grad_norm": 0.31167733669281006, "learning_rate": 0.00018979549305658035, "loss": 11.6699, "step": 20814 }, { "epoch": 0.4357154818722264, "grad_norm": 0.35842305421829224, "learning_rate": 0.00018979452812884794, "loss": 11.6805, "step": 20815 }, { "epoch": 0.43573641463618856, "grad_norm": 0.2528513967990875, "learning_rate": 0.0001897935631579495, "loss": 11.6822, "step": 20816 }, { "epoch": 0.4357573474001507, "grad_norm": 0.3440033793449402, "learning_rate": 0.0001897925981438855, "loss": 11.6802, "step": 20817 }, { "epoch": 0.43577828016411285, "grad_norm": 0.36714956164360046, "learning_rate": 0.00018979163308665642, "loss": 11.6823, "step": 20818 }, { "epoch": 0.435799212928075, "grad_norm": 0.31649985909461975, "learning_rate": 0.0001897906679862627, "loss": 11.6738, "step": 20819 }, { "epoch": 0.4358201456920372, "grad_norm": 0.3019639253616333, "learning_rate": 0.00018978970284270477, "loss": 11.6784, "step": 20820 }, { "epoch": 0.43584107845599934, "grad_norm": 0.25789082050323486, "learning_rate": 0.00018978873765598315, "loss": 11.6663, "step": 20821 }, { "epoch": 0.4358620112199615, "grad_norm": 0.24951185286045074, "learning_rate": 0.00018978777242609825, "loss": 11.6793, "step": 20822 }, { "epoch": 0.43588294398392363, "grad_norm": 0.34192362427711487, "learning_rate": 0.00018978680715305064, "loss": 11.6933, "step": 20823 }, { "epoch": 0.4359038767478858, "grad_norm": 0.2967914342880249, "learning_rate": 0.00018978584183684067, "loss": 11.6668, "step": 20824 }, { "epoch": 0.4359248095118479, "grad_norm": 0.2543134093284607, "learning_rate": 0.00018978487647746886, "loss": 11.6633, "step": 20825 }, { "epoch": 0.4359457422758101, "grad_norm": 0.26933741569519043, "learning_rate": 0.00018978391107493564, "loss": 11.6726, "step": 20826 }, { "epoch": 0.43596667503977227, "grad_norm": 0.2670445442199707, "learning_rate": 0.00018978294562924152, "loss": 11.6646, "step": 20827 }, { "epoch": 0.4359876078037344, "grad_norm": 0.2925131320953369, "learning_rate": 0.00018978198014038693, "loss": 11.6766, "step": 20828 }, { "epoch": 0.43600854056769656, "grad_norm": 0.26515135169029236, "learning_rate": 0.00018978101460837232, "loss": 11.6669, "step": 20829 }, { "epoch": 0.4360294733316587, "grad_norm": 0.25460100173950195, "learning_rate": 0.0001897800490331982, "loss": 11.6609, "step": 20830 }, { "epoch": 0.43605040609562085, "grad_norm": 0.3105486035346985, "learning_rate": 0.00018977908341486502, "loss": 11.6766, "step": 20831 }, { "epoch": 0.43607133885958305, "grad_norm": 0.3028774559497833, "learning_rate": 0.00018977811775337323, "loss": 11.659, "step": 20832 }, { "epoch": 0.4360922716235452, "grad_norm": 0.32726988196372986, "learning_rate": 0.00018977715204872327, "loss": 11.6684, "step": 20833 }, { "epoch": 0.43611320438750734, "grad_norm": 0.25001829862594604, "learning_rate": 0.00018977618630091567, "loss": 11.6706, "step": 20834 }, { "epoch": 0.4361341371514695, "grad_norm": 0.3044244945049286, "learning_rate": 0.0001897752205099508, "loss": 11.6686, "step": 20835 }, { "epoch": 0.4361550699154316, "grad_norm": 0.26754090189933777, "learning_rate": 0.00018977425467582926, "loss": 11.6665, "step": 20836 }, { "epoch": 0.43617600267939377, "grad_norm": 0.2580946683883667, "learning_rate": 0.0001897732887985514, "loss": 11.6731, "step": 20837 }, { "epoch": 0.4361969354433559, "grad_norm": 0.3326525092124939, "learning_rate": 0.00018977232287811771, "loss": 11.6701, "step": 20838 }, { "epoch": 0.4362178682073181, "grad_norm": 0.30134129524230957, "learning_rate": 0.0001897713569145287, "loss": 11.6627, "step": 20839 }, { "epoch": 0.43623880097128026, "grad_norm": 0.36156395077705383, "learning_rate": 0.00018977039090778476, "loss": 11.6792, "step": 20840 }, { "epoch": 0.4362597337352424, "grad_norm": 0.2582399249076843, "learning_rate": 0.00018976942485788643, "loss": 11.6838, "step": 20841 }, { "epoch": 0.43628066649920455, "grad_norm": 0.27528902888298035, "learning_rate": 0.0001897684587648341, "loss": 11.6805, "step": 20842 }, { "epoch": 0.4363015992631667, "grad_norm": 0.2928130626678467, "learning_rate": 0.00018976749262862832, "loss": 11.6766, "step": 20843 }, { "epoch": 0.43632253202712884, "grad_norm": 0.23270638287067413, "learning_rate": 0.0001897665264492695, "loss": 11.6766, "step": 20844 }, { "epoch": 0.43634346479109104, "grad_norm": 0.36414292454719543, "learning_rate": 0.00018976556022675807, "loss": 11.671, "step": 20845 }, { "epoch": 0.4363643975550532, "grad_norm": 0.24781811237335205, "learning_rate": 0.00018976459396109456, "loss": 11.6823, "step": 20846 }, { "epoch": 0.43638533031901533, "grad_norm": 0.2670518159866333, "learning_rate": 0.00018976362765227943, "loss": 11.6759, "step": 20847 }, { "epoch": 0.4364062630829775, "grad_norm": 0.2891790568828583, "learning_rate": 0.00018976266130031307, "loss": 11.6643, "step": 20848 }, { "epoch": 0.4364271958469396, "grad_norm": 0.2841739356517792, "learning_rate": 0.00018976169490519604, "loss": 11.6726, "step": 20849 }, { "epoch": 0.43644812861090176, "grad_norm": 0.25988996028900146, "learning_rate": 0.00018976072846692876, "loss": 11.667, "step": 20850 }, { "epoch": 0.43646906137486396, "grad_norm": 0.3477168083190918, "learning_rate": 0.00018975976198551173, "loss": 11.6723, "step": 20851 }, { "epoch": 0.4364899941388261, "grad_norm": 0.3580903708934784, "learning_rate": 0.00018975879546094534, "loss": 11.6757, "step": 20852 }, { "epoch": 0.43651092690278825, "grad_norm": 0.27116578817367554, "learning_rate": 0.0001897578288932301, "loss": 11.6692, "step": 20853 }, { "epoch": 0.4365318596667504, "grad_norm": 0.25919458270072937, "learning_rate": 0.00018975686228236649, "loss": 11.66, "step": 20854 }, { "epoch": 0.43655279243071254, "grad_norm": 0.31694018840789795, "learning_rate": 0.00018975589562835493, "loss": 11.6778, "step": 20855 }, { "epoch": 0.4365737251946747, "grad_norm": 0.23310093581676483, "learning_rate": 0.00018975492893119595, "loss": 11.6714, "step": 20856 }, { "epoch": 0.43659465795863683, "grad_norm": 0.2913830876350403, "learning_rate": 0.00018975396219088998, "loss": 11.6684, "step": 20857 }, { "epoch": 0.43661559072259903, "grad_norm": 0.280356764793396, "learning_rate": 0.00018975299540743745, "loss": 11.6743, "step": 20858 }, { "epoch": 0.4366365234865612, "grad_norm": 0.37195873260498047, "learning_rate": 0.00018975202858083888, "loss": 11.6763, "step": 20859 }, { "epoch": 0.4366574562505233, "grad_norm": 0.3321485221385956, "learning_rate": 0.0001897510617110947, "loss": 11.6766, "step": 20860 }, { "epoch": 0.43667838901448547, "grad_norm": 0.2615862190723419, "learning_rate": 0.00018975009479820537, "loss": 11.6654, "step": 20861 }, { "epoch": 0.4366993217784476, "grad_norm": 0.308846652507782, "learning_rate": 0.00018974912784217138, "loss": 11.6934, "step": 20862 }, { "epoch": 0.43672025454240976, "grad_norm": 0.3455450236797333, "learning_rate": 0.0001897481608429932, "loss": 11.6712, "step": 20863 }, { "epoch": 0.43674118730637196, "grad_norm": 0.32029443979263306, "learning_rate": 0.00018974719380067127, "loss": 11.671, "step": 20864 }, { "epoch": 0.4367621200703341, "grad_norm": 0.3060269355773926, "learning_rate": 0.0001897462267152061, "loss": 11.675, "step": 20865 }, { "epoch": 0.43678305283429625, "grad_norm": 0.36510011553764343, "learning_rate": 0.00018974525958659805, "loss": 11.6839, "step": 20866 }, { "epoch": 0.4368039855982584, "grad_norm": 0.26295924186706543, "learning_rate": 0.00018974429241484773, "loss": 11.6605, "step": 20867 }, { "epoch": 0.43682491836222054, "grad_norm": 0.2626013457775116, "learning_rate": 0.00018974332519995548, "loss": 11.6699, "step": 20868 }, { "epoch": 0.4368458511261827, "grad_norm": 0.3126820921897888, "learning_rate": 0.00018974235794192183, "loss": 11.684, "step": 20869 }, { "epoch": 0.4368667838901448, "grad_norm": 0.2966277599334717, "learning_rate": 0.00018974139064074723, "loss": 11.6733, "step": 20870 }, { "epoch": 0.43688771665410703, "grad_norm": 0.28169283270835876, "learning_rate": 0.00018974042329643212, "loss": 11.6708, "step": 20871 }, { "epoch": 0.4369086494180692, "grad_norm": 0.3016032576560974, "learning_rate": 0.00018973945590897704, "loss": 11.6782, "step": 20872 }, { "epoch": 0.4369295821820313, "grad_norm": 0.2811288833618164, "learning_rate": 0.00018973848847838237, "loss": 11.6613, "step": 20873 }, { "epoch": 0.43695051494599346, "grad_norm": 0.29487547278404236, "learning_rate": 0.00018973752100464862, "loss": 11.6785, "step": 20874 }, { "epoch": 0.4369714477099556, "grad_norm": 0.3027171790599823, "learning_rate": 0.00018973655348777623, "loss": 11.6644, "step": 20875 }, { "epoch": 0.43699238047391775, "grad_norm": 0.2703615725040436, "learning_rate": 0.00018973558592776574, "loss": 11.6597, "step": 20876 }, { "epoch": 0.43701331323787995, "grad_norm": 0.29764190316200256, "learning_rate": 0.0001897346183246175, "loss": 11.6798, "step": 20877 }, { "epoch": 0.4370342460018421, "grad_norm": 0.32187020778656006, "learning_rate": 0.00018973365067833205, "loss": 11.6846, "step": 20878 }, { "epoch": 0.43705517876580424, "grad_norm": 0.3516574203968048, "learning_rate": 0.0001897326829889098, "loss": 11.6777, "step": 20879 }, { "epoch": 0.4370761115297664, "grad_norm": 0.33715465664863586, "learning_rate": 0.00018973171525635127, "loss": 11.6765, "step": 20880 }, { "epoch": 0.43709704429372853, "grad_norm": 0.28647753596305847, "learning_rate": 0.00018973074748065692, "loss": 11.6726, "step": 20881 }, { "epoch": 0.4371179770576907, "grad_norm": 0.31383925676345825, "learning_rate": 0.00018972977966182718, "loss": 11.6793, "step": 20882 }, { "epoch": 0.4371389098216529, "grad_norm": 0.3361402153968811, "learning_rate": 0.00018972881179986256, "loss": 11.6696, "step": 20883 }, { "epoch": 0.437159842585615, "grad_norm": 0.30936378240585327, "learning_rate": 0.00018972784389476353, "loss": 11.6668, "step": 20884 }, { "epoch": 0.43718077534957717, "grad_norm": 0.2677799165248871, "learning_rate": 0.00018972687594653048, "loss": 11.6574, "step": 20885 }, { "epoch": 0.4372017081135393, "grad_norm": 0.3207727074623108, "learning_rate": 0.00018972590795516392, "loss": 11.6776, "step": 20886 }, { "epoch": 0.43722264087750146, "grad_norm": 0.3016665279865265, "learning_rate": 0.00018972493992066433, "loss": 11.6682, "step": 20887 }, { "epoch": 0.4372435736414636, "grad_norm": 0.37029120326042175, "learning_rate": 0.0001897239718430322, "loss": 11.6598, "step": 20888 }, { "epoch": 0.43726450640542575, "grad_norm": 0.30572405457496643, "learning_rate": 0.0001897230037222679, "loss": 11.6762, "step": 20889 }, { "epoch": 0.43728543916938795, "grad_norm": 0.3213483989238739, "learning_rate": 0.00018972203555837197, "loss": 11.6633, "step": 20890 }, { "epoch": 0.4373063719333501, "grad_norm": 0.2813907563686371, "learning_rate": 0.0001897210673513449, "loss": 11.6939, "step": 20891 }, { "epoch": 0.43732730469731224, "grad_norm": 0.32641708850860596, "learning_rate": 0.00018972009910118707, "loss": 11.6775, "step": 20892 }, { "epoch": 0.4373482374612744, "grad_norm": 0.2874467074871063, "learning_rate": 0.000189719130807899, "loss": 11.6934, "step": 20893 }, { "epoch": 0.4373691702252365, "grad_norm": 0.2356303483247757, "learning_rate": 0.00018971816247148114, "loss": 11.6806, "step": 20894 }, { "epoch": 0.43739010298919867, "grad_norm": 0.3173784017562866, "learning_rate": 0.00018971719409193396, "loss": 11.6763, "step": 20895 }, { "epoch": 0.43741103575316087, "grad_norm": 0.3316507637500763, "learning_rate": 0.00018971622566925795, "loss": 11.6753, "step": 20896 }, { "epoch": 0.437431968517123, "grad_norm": 0.3473229706287384, "learning_rate": 0.00018971525720345354, "loss": 11.6728, "step": 20897 }, { "epoch": 0.43745290128108516, "grad_norm": 0.25438547134399414, "learning_rate": 0.0001897142886945212, "loss": 11.6724, "step": 20898 }, { "epoch": 0.4374738340450473, "grad_norm": 0.2515142858028412, "learning_rate": 0.00018971332014246141, "loss": 11.6608, "step": 20899 }, { "epoch": 0.43749476680900945, "grad_norm": 0.23850473761558533, "learning_rate": 0.00018971235154727464, "loss": 11.6652, "step": 20900 }, { "epoch": 0.4375156995729716, "grad_norm": 0.38736656308174133, "learning_rate": 0.00018971138290896133, "loss": 11.6858, "step": 20901 }, { "epoch": 0.4375366323369338, "grad_norm": 0.24937668442726135, "learning_rate": 0.000189710414227522, "loss": 11.6679, "step": 20902 }, { "epoch": 0.43755756510089594, "grad_norm": 0.3833576440811157, "learning_rate": 0.00018970944550295702, "loss": 11.6911, "step": 20903 }, { "epoch": 0.4375784978648581, "grad_norm": 0.3268835246562958, "learning_rate": 0.00018970847673526693, "loss": 11.6686, "step": 20904 }, { "epoch": 0.43759943062882023, "grad_norm": 0.48484909534454346, "learning_rate": 0.00018970750792445218, "loss": 11.6767, "step": 20905 }, { "epoch": 0.4376203633927824, "grad_norm": 0.300377756357193, "learning_rate": 0.00018970653907051323, "loss": 11.6843, "step": 20906 }, { "epoch": 0.4376412961567445, "grad_norm": 0.2951982915401459, "learning_rate": 0.00018970557017345055, "loss": 11.6781, "step": 20907 }, { "epoch": 0.43766222892070666, "grad_norm": 0.2672124207019806, "learning_rate": 0.0001897046012332646, "loss": 11.6669, "step": 20908 }, { "epoch": 0.43768316168466886, "grad_norm": 0.30157583951950073, "learning_rate": 0.00018970363224995586, "loss": 11.6717, "step": 20909 }, { "epoch": 0.437704094448631, "grad_norm": 0.28591814637184143, "learning_rate": 0.0001897026632235248, "loss": 11.6754, "step": 20910 }, { "epoch": 0.43772502721259315, "grad_norm": 0.39893651008605957, "learning_rate": 0.00018970169415397184, "loss": 11.6644, "step": 20911 }, { "epoch": 0.4377459599765553, "grad_norm": 0.3742942810058594, "learning_rate": 0.0001897007250412975, "loss": 11.6791, "step": 20912 }, { "epoch": 0.43776689274051744, "grad_norm": 0.2633489966392517, "learning_rate": 0.0001896997558855022, "loss": 11.6536, "step": 20913 }, { "epoch": 0.4377878255044796, "grad_norm": 0.3114969730377197, "learning_rate": 0.00018969878668658648, "loss": 11.6845, "step": 20914 }, { "epoch": 0.4378087582684418, "grad_norm": 0.28197190165519714, "learning_rate": 0.00018969781744455073, "loss": 11.684, "step": 20915 }, { "epoch": 0.43782969103240393, "grad_norm": 0.2788107991218567, "learning_rate": 0.00018969684815939544, "loss": 11.6761, "step": 20916 }, { "epoch": 0.4378506237963661, "grad_norm": 0.2592238187789917, "learning_rate": 0.00018969587883112105, "loss": 11.6519, "step": 20917 }, { "epoch": 0.4378715565603282, "grad_norm": 0.2565328776836395, "learning_rate": 0.00018969490945972808, "loss": 11.6773, "step": 20918 }, { "epoch": 0.43789248932429037, "grad_norm": 0.2618654668331146, "learning_rate": 0.00018969394004521696, "loss": 11.6682, "step": 20919 }, { "epoch": 0.4379134220882525, "grad_norm": 0.34637585282325745, "learning_rate": 0.0001896929705875882, "loss": 11.683, "step": 20920 }, { "epoch": 0.4379343548522147, "grad_norm": 0.26415860652923584, "learning_rate": 0.00018969200108684216, "loss": 11.6706, "step": 20921 }, { "epoch": 0.43795528761617686, "grad_norm": 0.29133567214012146, "learning_rate": 0.00018969103154297943, "loss": 11.681, "step": 20922 }, { "epoch": 0.437976220380139, "grad_norm": 0.29234740138053894, "learning_rate": 0.00018969006195600042, "loss": 11.696, "step": 20923 }, { "epoch": 0.43799715314410115, "grad_norm": 0.32726144790649414, "learning_rate": 0.0001896890923259056, "loss": 11.6836, "step": 20924 }, { "epoch": 0.4380180859080633, "grad_norm": 0.26280492544174194, "learning_rate": 0.00018968812265269543, "loss": 11.6656, "step": 20925 }, { "epoch": 0.43803901867202544, "grad_norm": 0.24755924940109253, "learning_rate": 0.00018968715293637037, "loss": 11.6684, "step": 20926 }, { "epoch": 0.4380599514359876, "grad_norm": 0.33829358220100403, "learning_rate": 0.00018968618317693093, "loss": 11.6729, "step": 20927 }, { "epoch": 0.4380808841999498, "grad_norm": 0.26708483695983887, "learning_rate": 0.00018968521337437753, "loss": 11.6624, "step": 20928 }, { "epoch": 0.4381018169639119, "grad_norm": 0.33554235100746155, "learning_rate": 0.00018968424352871065, "loss": 11.6864, "step": 20929 }, { "epoch": 0.4381227497278741, "grad_norm": 0.27462875843048096, "learning_rate": 0.00018968327363993074, "loss": 11.6811, "step": 20930 }, { "epoch": 0.4381436824918362, "grad_norm": 0.4196169376373291, "learning_rate": 0.00018968230370803828, "loss": 11.6682, "step": 20931 }, { "epoch": 0.43816461525579836, "grad_norm": 0.3044993281364441, "learning_rate": 0.00018968133373303375, "loss": 11.6779, "step": 20932 }, { "epoch": 0.4381855480197605, "grad_norm": 0.27665460109710693, "learning_rate": 0.00018968036371491763, "loss": 11.6576, "step": 20933 }, { "epoch": 0.4382064807837227, "grad_norm": 0.27373006939888, "learning_rate": 0.00018967939365369034, "loss": 11.6877, "step": 20934 }, { "epoch": 0.43822741354768485, "grad_norm": 0.31356722116470337, "learning_rate": 0.00018967842354935237, "loss": 11.6861, "step": 20935 }, { "epoch": 0.438248346311647, "grad_norm": 0.32026490569114685, "learning_rate": 0.00018967745340190418, "loss": 11.6832, "step": 20936 }, { "epoch": 0.43826927907560914, "grad_norm": 0.25606393814086914, "learning_rate": 0.00018967648321134624, "loss": 11.6778, "step": 20937 }, { "epoch": 0.4382902118395713, "grad_norm": 0.2646309733390808, "learning_rate": 0.00018967551297767901, "loss": 11.6671, "step": 20938 }, { "epoch": 0.43831114460353343, "grad_norm": 0.2737692892551422, "learning_rate": 0.000189674542700903, "loss": 11.6717, "step": 20939 }, { "epoch": 0.43833207736749563, "grad_norm": 0.3397323489189148, "learning_rate": 0.00018967357238101862, "loss": 11.6718, "step": 20940 }, { "epoch": 0.4383530101314578, "grad_norm": 0.22330698370933533, "learning_rate": 0.00018967260201802635, "loss": 11.6709, "step": 20941 }, { "epoch": 0.4383739428954199, "grad_norm": 1.4449846744537354, "learning_rate": 0.00018967163161192667, "loss": 11.6468, "step": 20942 }, { "epoch": 0.43839487565938207, "grad_norm": 0.40950992703437805, "learning_rate": 0.00018967066116272002, "loss": 11.6917, "step": 20943 }, { "epoch": 0.4384158084233442, "grad_norm": 0.2835026681423187, "learning_rate": 0.0001896696906704069, "loss": 11.668, "step": 20944 }, { "epoch": 0.43843674118730636, "grad_norm": 0.31358054280281067, "learning_rate": 0.0001896687201349878, "loss": 11.6674, "step": 20945 }, { "epoch": 0.4384576739512685, "grad_norm": 0.26614588499069214, "learning_rate": 0.0001896677495564631, "loss": 11.6817, "step": 20946 }, { "epoch": 0.4384786067152307, "grad_norm": 0.35007956624031067, "learning_rate": 0.00018966677893483334, "loss": 11.6836, "step": 20947 }, { "epoch": 0.43849953947919285, "grad_norm": 0.30522510409355164, "learning_rate": 0.00018966580827009896, "loss": 11.6693, "step": 20948 }, { "epoch": 0.438520472243155, "grad_norm": 0.31843966245651245, "learning_rate": 0.0001896648375622604, "loss": 11.663, "step": 20949 }, { "epoch": 0.43854140500711714, "grad_norm": 0.28027966618537903, "learning_rate": 0.0001896638668113182, "loss": 11.6762, "step": 20950 }, { "epoch": 0.4385623377710793, "grad_norm": 0.319369912147522, "learning_rate": 0.00018966289601727275, "loss": 11.6564, "step": 20951 }, { "epoch": 0.4385832705350414, "grad_norm": 0.289368599653244, "learning_rate": 0.00018966192518012455, "loss": 11.6556, "step": 20952 }, { "epoch": 0.4386042032990036, "grad_norm": 0.2847442030906677, "learning_rate": 0.00018966095429987406, "loss": 11.6655, "step": 20953 }, { "epoch": 0.43862513606296577, "grad_norm": 0.3478136956691742, "learning_rate": 0.00018965998337652177, "loss": 11.6877, "step": 20954 }, { "epoch": 0.4386460688269279, "grad_norm": 0.3447815775871277, "learning_rate": 0.00018965901241006813, "loss": 11.6992, "step": 20955 }, { "epoch": 0.43866700159089006, "grad_norm": 0.32130154967308044, "learning_rate": 0.00018965804140051362, "loss": 11.6849, "step": 20956 }, { "epoch": 0.4386879343548522, "grad_norm": 0.39008596539497375, "learning_rate": 0.00018965707034785867, "loss": 11.6827, "step": 20957 }, { "epoch": 0.43870886711881435, "grad_norm": 0.26520511507987976, "learning_rate": 0.00018965609925210376, "loss": 11.6742, "step": 20958 }, { "epoch": 0.4387297998827765, "grad_norm": 0.30134207010269165, "learning_rate": 0.0001896551281132494, "loss": 11.6625, "step": 20959 }, { "epoch": 0.4387507326467387, "grad_norm": 0.2847816050052643, "learning_rate": 0.000189654156931296, "loss": 11.6682, "step": 20960 }, { "epoch": 0.43877166541070084, "grad_norm": 0.2436036765575409, "learning_rate": 0.00018965318570624405, "loss": 11.6699, "step": 20961 }, { "epoch": 0.438792598174663, "grad_norm": 0.23959654569625854, "learning_rate": 0.00018965221443809402, "loss": 11.6793, "step": 20962 }, { "epoch": 0.43881353093862513, "grad_norm": 0.3574361205101013, "learning_rate": 0.00018965124312684636, "loss": 11.6759, "step": 20963 }, { "epoch": 0.4388344637025873, "grad_norm": 0.34618961811065674, "learning_rate": 0.00018965027177250159, "loss": 11.676, "step": 20964 }, { "epoch": 0.4388553964665494, "grad_norm": 0.30572259426116943, "learning_rate": 0.00018964930037506012, "loss": 11.6782, "step": 20965 }, { "epoch": 0.4388763292305116, "grad_norm": 0.26885178685188293, "learning_rate": 0.0001896483289345224, "loss": 11.6704, "step": 20966 }, { "epoch": 0.43889726199447376, "grad_norm": 0.2852385640144348, "learning_rate": 0.00018964735745088897, "loss": 11.6718, "step": 20967 }, { "epoch": 0.4389181947584359, "grad_norm": 0.2331550568342209, "learning_rate": 0.00018964638592416027, "loss": 11.6575, "step": 20968 }, { "epoch": 0.43893912752239805, "grad_norm": 0.27066248655319214, "learning_rate": 0.00018964541435433674, "loss": 11.6789, "step": 20969 }, { "epoch": 0.4389600602863602, "grad_norm": 0.299983412027359, "learning_rate": 0.00018964444274141885, "loss": 11.672, "step": 20970 }, { "epoch": 0.43898099305032234, "grad_norm": 0.31644126772880554, "learning_rate": 0.00018964347108540707, "loss": 11.6833, "step": 20971 }, { "epoch": 0.43900192581428454, "grad_norm": 0.3688512146472931, "learning_rate": 0.0001896424993863019, "loss": 11.6731, "step": 20972 }, { "epoch": 0.4390228585782467, "grad_norm": 0.27447962760925293, "learning_rate": 0.00018964152764410378, "loss": 11.6656, "step": 20973 }, { "epoch": 0.43904379134220883, "grad_norm": 0.3500136137008667, "learning_rate": 0.0001896405558588132, "loss": 11.6569, "step": 20974 }, { "epoch": 0.439064724106171, "grad_norm": 0.41535550355911255, "learning_rate": 0.0001896395840304306, "loss": 11.6722, "step": 20975 }, { "epoch": 0.4390856568701331, "grad_norm": 0.2663990557193756, "learning_rate": 0.0001896386121589564, "loss": 11.6846, "step": 20976 }, { "epoch": 0.43910658963409527, "grad_norm": 0.2869042158126831, "learning_rate": 0.00018963764024439115, "loss": 11.6768, "step": 20977 }, { "epoch": 0.4391275223980574, "grad_norm": 0.2761960029602051, "learning_rate": 0.00018963666828673533, "loss": 11.6722, "step": 20978 }, { "epoch": 0.4391484551620196, "grad_norm": 0.2692911922931671, "learning_rate": 0.00018963569628598935, "loss": 11.6647, "step": 20979 }, { "epoch": 0.43916938792598176, "grad_norm": 0.2627401351928711, "learning_rate": 0.00018963472424215368, "loss": 11.6799, "step": 20980 }, { "epoch": 0.4391903206899439, "grad_norm": 0.3099290132522583, "learning_rate": 0.0001896337521552288, "loss": 11.6718, "step": 20981 }, { "epoch": 0.43921125345390605, "grad_norm": 0.27693527936935425, "learning_rate": 0.0001896327800252152, "loss": 11.6585, "step": 20982 }, { "epoch": 0.4392321862178682, "grad_norm": 0.3665941059589386, "learning_rate": 0.00018963180785211333, "loss": 11.6746, "step": 20983 }, { "epoch": 0.43925311898183034, "grad_norm": 0.2671438157558441, "learning_rate": 0.00018963083563592364, "loss": 11.6773, "step": 20984 }, { "epoch": 0.43927405174579254, "grad_norm": 0.2915148437023163, "learning_rate": 0.0001896298633766466, "loss": 11.6774, "step": 20985 }, { "epoch": 0.4392949845097547, "grad_norm": 0.300857812166214, "learning_rate": 0.0001896288910742827, "loss": 11.6785, "step": 20986 }, { "epoch": 0.4393159172737168, "grad_norm": 0.2963988482952118, "learning_rate": 0.0001896279187288324, "loss": 11.6707, "step": 20987 }, { "epoch": 0.43933685003767897, "grad_norm": 0.3357778489589691, "learning_rate": 0.00018962694634029614, "loss": 11.6785, "step": 20988 }, { "epoch": 0.4393577828016411, "grad_norm": 0.2378886193037033, "learning_rate": 0.00018962597390867445, "loss": 11.6628, "step": 20989 }, { "epoch": 0.43937871556560326, "grad_norm": 0.2477119266986847, "learning_rate": 0.00018962500143396773, "loss": 11.6719, "step": 20990 }, { "epoch": 0.43939964832956546, "grad_norm": 0.3928779065608978, "learning_rate": 0.00018962402891617647, "loss": 11.6811, "step": 20991 }, { "epoch": 0.4394205810935276, "grad_norm": 0.25681284070014954, "learning_rate": 0.00018962305635530116, "loss": 11.6651, "step": 20992 }, { "epoch": 0.43944151385748975, "grad_norm": 0.2880609631538391, "learning_rate": 0.00018962208375134225, "loss": 11.674, "step": 20993 }, { "epoch": 0.4394624466214519, "grad_norm": 0.3873435854911804, "learning_rate": 0.00018962111110430022, "loss": 11.6854, "step": 20994 }, { "epoch": 0.43948337938541404, "grad_norm": 0.23244813084602356, "learning_rate": 0.00018962013841417548, "loss": 11.6761, "step": 20995 }, { "epoch": 0.4395043121493762, "grad_norm": 0.2860332429409027, "learning_rate": 0.0001896191656809686, "loss": 11.6733, "step": 20996 }, { "epoch": 0.43952524491333833, "grad_norm": 0.24026335775852203, "learning_rate": 0.00018961819290467995, "loss": 11.6636, "step": 20997 }, { "epoch": 0.43954617767730053, "grad_norm": 0.5746130347251892, "learning_rate": 0.00018961722008531005, "loss": 11.6768, "step": 20998 }, { "epoch": 0.4395671104412627, "grad_norm": 0.3083670735359192, "learning_rate": 0.00018961624722285935, "loss": 11.6862, "step": 20999 }, { "epoch": 0.4395880432052248, "grad_norm": 0.2677271068096161, "learning_rate": 0.00018961527431732834, "loss": 11.6769, "step": 21000 }, { "epoch": 0.4395880432052248, "eval_loss": 11.67326831817627, "eval_runtime": 34.3713, "eval_samples_per_second": 27.959, "eval_steps_per_second": 7.012, "step": 21000 }, { "epoch": 0.43960897596918697, "grad_norm": 0.279666930437088, "learning_rate": 0.00018961430136871747, "loss": 11.6652, "step": 21001 }, { "epoch": 0.4396299087331491, "grad_norm": 0.364093542098999, "learning_rate": 0.00018961332837702722, "loss": 11.6789, "step": 21002 }, { "epoch": 0.43965084149711126, "grad_norm": 0.2999173700809479, "learning_rate": 0.000189612355342258, "loss": 11.6632, "step": 21003 }, { "epoch": 0.43967177426107346, "grad_norm": 0.302212119102478, "learning_rate": 0.00018961138226441038, "loss": 11.6692, "step": 21004 }, { "epoch": 0.4396927070250356, "grad_norm": 0.2907705307006836, "learning_rate": 0.00018961040914348477, "loss": 11.6636, "step": 21005 }, { "epoch": 0.43971363978899775, "grad_norm": 0.28897568583488464, "learning_rate": 0.0001896094359794816, "loss": 11.6664, "step": 21006 }, { "epoch": 0.4397345725529599, "grad_norm": 0.40721413493156433, "learning_rate": 0.0001896084627724014, "loss": 11.6873, "step": 21007 }, { "epoch": 0.43975550531692204, "grad_norm": 0.2616070806980133, "learning_rate": 0.0001896074895222446, "loss": 11.662, "step": 21008 }, { "epoch": 0.4397764380808842, "grad_norm": 0.3225703835487366, "learning_rate": 0.0001896065162290117, "loss": 11.6744, "step": 21009 }, { "epoch": 0.4397973708448464, "grad_norm": 0.2966058850288391, "learning_rate": 0.00018960554289270315, "loss": 11.6643, "step": 21010 }, { "epoch": 0.4398183036088085, "grad_norm": 0.30576181411743164, "learning_rate": 0.0001896045695133194, "loss": 11.6549, "step": 21011 }, { "epoch": 0.43983923637277067, "grad_norm": 0.2579127550125122, "learning_rate": 0.000189603596090861, "loss": 11.6637, "step": 21012 }, { "epoch": 0.4398601691367328, "grad_norm": 0.3924944996833801, "learning_rate": 0.0001896026226253283, "loss": 11.6701, "step": 21013 }, { "epoch": 0.43988110190069496, "grad_norm": 0.3368734121322632, "learning_rate": 0.00018960164911672184, "loss": 11.6699, "step": 21014 }, { "epoch": 0.4399020346646571, "grad_norm": 0.30894705653190613, "learning_rate": 0.0001896006755650421, "loss": 11.6845, "step": 21015 }, { "epoch": 0.43992296742861925, "grad_norm": 0.2737897038459778, "learning_rate": 0.00018959970197028947, "loss": 11.6619, "step": 21016 }, { "epoch": 0.43994390019258145, "grad_norm": 0.30635690689086914, "learning_rate": 0.00018959872833246449, "loss": 11.6727, "step": 21017 }, { "epoch": 0.4399648329565436, "grad_norm": 0.2872515916824341, "learning_rate": 0.0001895977546515676, "loss": 11.6596, "step": 21018 }, { "epoch": 0.43998576572050574, "grad_norm": 0.2810025215148926, "learning_rate": 0.00018959678092759927, "loss": 11.6638, "step": 21019 }, { "epoch": 0.4400066984844679, "grad_norm": 0.30407819151878357, "learning_rate": 0.00018959580716056, "loss": 11.6752, "step": 21020 }, { "epoch": 0.44002763124843003, "grad_norm": 0.28919780254364014, "learning_rate": 0.00018959483335045018, "loss": 11.6709, "step": 21021 }, { "epoch": 0.4400485640123922, "grad_norm": 0.3422236442565918, "learning_rate": 0.00018959385949727037, "loss": 11.6874, "step": 21022 }, { "epoch": 0.4400694967763544, "grad_norm": 0.26695629954338074, "learning_rate": 0.00018959288560102097, "loss": 11.6708, "step": 21023 }, { "epoch": 0.4400904295403165, "grad_norm": 0.2777155935764313, "learning_rate": 0.0001895919116617025, "loss": 11.6619, "step": 21024 }, { "epoch": 0.44011136230427866, "grad_norm": 0.3340117633342743, "learning_rate": 0.00018959093767931538, "loss": 11.6753, "step": 21025 }, { "epoch": 0.4401322950682408, "grad_norm": 0.2646929919719696, "learning_rate": 0.0001895899636538601, "loss": 11.6615, "step": 21026 }, { "epoch": 0.44015322783220295, "grad_norm": 0.23989804089069366, "learning_rate": 0.00018958898958533715, "loss": 11.663, "step": 21027 }, { "epoch": 0.4401741605961651, "grad_norm": 0.2869051396846771, "learning_rate": 0.00018958801547374696, "loss": 11.6633, "step": 21028 }, { "epoch": 0.4401950933601273, "grad_norm": 0.34081828594207764, "learning_rate": 0.00018958704131909, "loss": 11.6752, "step": 21029 }, { "epoch": 0.44021602612408944, "grad_norm": 0.39443472027778625, "learning_rate": 0.0001895860671213668, "loss": 11.6754, "step": 21030 }, { "epoch": 0.4402369588880516, "grad_norm": 0.25845661759376526, "learning_rate": 0.00018958509288057775, "loss": 11.6628, "step": 21031 }, { "epoch": 0.44025789165201373, "grad_norm": 0.2941831946372986, "learning_rate": 0.00018958411859672336, "loss": 11.6797, "step": 21032 }, { "epoch": 0.4402788244159759, "grad_norm": 0.28099411725997925, "learning_rate": 0.00018958314426980412, "loss": 11.6817, "step": 21033 }, { "epoch": 0.440299757179938, "grad_norm": 0.3276028335094452, "learning_rate": 0.0001895821698998204, "loss": 11.6922, "step": 21034 }, { "epoch": 0.44032068994390017, "grad_norm": 0.34888845682144165, "learning_rate": 0.00018958119548677277, "loss": 11.6743, "step": 21035 }, { "epoch": 0.44034162270786237, "grad_norm": 0.2568283975124359, "learning_rate": 0.0001895802210306617, "loss": 11.6694, "step": 21036 }, { "epoch": 0.4403625554718245, "grad_norm": 0.238752081990242, "learning_rate": 0.00018957924653148758, "loss": 11.6718, "step": 21037 }, { "epoch": 0.44038348823578666, "grad_norm": 0.26441097259521484, "learning_rate": 0.0001895782719892509, "loss": 11.6661, "step": 21038 }, { "epoch": 0.4404044209997488, "grad_norm": 0.2812551259994507, "learning_rate": 0.0001895772974039522, "loss": 11.6726, "step": 21039 }, { "epoch": 0.44042535376371095, "grad_norm": 0.2905908524990082, "learning_rate": 0.00018957632277559187, "loss": 11.6642, "step": 21040 }, { "epoch": 0.4404462865276731, "grad_norm": 0.26967012882232666, "learning_rate": 0.00018957534810417042, "loss": 11.6528, "step": 21041 }, { "epoch": 0.4404672192916353, "grad_norm": 0.30362409353256226, "learning_rate": 0.00018957437338968828, "loss": 11.6834, "step": 21042 }, { "epoch": 0.44048815205559744, "grad_norm": 0.25943732261657715, "learning_rate": 0.00018957339863214597, "loss": 11.6776, "step": 21043 }, { "epoch": 0.4405090848195596, "grad_norm": 0.3098885715007782, "learning_rate": 0.0001895724238315439, "loss": 11.6724, "step": 21044 }, { "epoch": 0.4405300175835217, "grad_norm": 0.38215184211730957, "learning_rate": 0.0001895714489878826, "loss": 11.6875, "step": 21045 }, { "epoch": 0.44055095034748387, "grad_norm": 0.2769298553466797, "learning_rate": 0.00018957047410116253, "loss": 11.6609, "step": 21046 }, { "epoch": 0.440571883111446, "grad_norm": 0.2979268729686737, "learning_rate": 0.0001895694991713841, "loss": 11.6789, "step": 21047 }, { "epoch": 0.44059281587540816, "grad_norm": 0.26794740557670593, "learning_rate": 0.00018956852419854783, "loss": 11.6773, "step": 21048 }, { "epoch": 0.44061374863937036, "grad_norm": 0.3420507609844208, "learning_rate": 0.00018956754918265416, "loss": 11.6666, "step": 21049 }, { "epoch": 0.4406346814033325, "grad_norm": 0.2926306426525116, "learning_rate": 0.0001895665741237036, "loss": 11.6708, "step": 21050 }, { "epoch": 0.44065561416729465, "grad_norm": 0.296340674161911, "learning_rate": 0.0001895655990216966, "loss": 11.677, "step": 21051 }, { "epoch": 0.4406765469312568, "grad_norm": 0.30975642800331116, "learning_rate": 0.00018956462387663357, "loss": 11.6639, "step": 21052 }, { "epoch": 0.44069747969521894, "grad_norm": 0.33763521909713745, "learning_rate": 0.00018956364868851505, "loss": 11.6664, "step": 21053 }, { "epoch": 0.4407184124591811, "grad_norm": 0.20768563449382782, "learning_rate": 0.00018956267345734154, "loss": 11.6696, "step": 21054 }, { "epoch": 0.4407393452231433, "grad_norm": 0.28737783432006836, "learning_rate": 0.0001895616981831134, "loss": 11.6722, "step": 21055 }, { "epoch": 0.44076027798710543, "grad_norm": 0.3112737834453583, "learning_rate": 0.0001895607228658312, "loss": 11.6766, "step": 21056 }, { "epoch": 0.4407812107510676, "grad_norm": 0.30823928117752075, "learning_rate": 0.00018955974750549533, "loss": 11.666, "step": 21057 }, { "epoch": 0.4408021435150297, "grad_norm": 0.3297508656978607, "learning_rate": 0.0001895587721021063, "loss": 11.6741, "step": 21058 }, { "epoch": 0.44082307627899187, "grad_norm": 0.26658958196640015, "learning_rate": 0.0001895577966556646, "loss": 11.6634, "step": 21059 }, { "epoch": 0.440844009042954, "grad_norm": 0.3834414482116699, "learning_rate": 0.00018955682116617064, "loss": 11.6581, "step": 21060 }, { "epoch": 0.4408649418069162, "grad_norm": 0.2521013617515564, "learning_rate": 0.00018955584563362494, "loss": 11.6745, "step": 21061 }, { "epoch": 0.44088587457087836, "grad_norm": 0.44128021597862244, "learning_rate": 0.00018955487005802794, "loss": 11.6895, "step": 21062 }, { "epoch": 0.4409068073348405, "grad_norm": 0.32769641280174255, "learning_rate": 0.0001895538944393801, "loss": 11.6761, "step": 21063 }, { "epoch": 0.44092774009880265, "grad_norm": 0.3235449492931366, "learning_rate": 0.00018955291877768195, "loss": 11.677, "step": 21064 }, { "epoch": 0.4409486728627648, "grad_norm": 0.2732012867927551, "learning_rate": 0.00018955194307293392, "loss": 11.6775, "step": 21065 }, { "epoch": 0.44096960562672693, "grad_norm": 0.3007630407810211, "learning_rate": 0.00018955096732513644, "loss": 11.6865, "step": 21066 }, { "epoch": 0.4409905383906891, "grad_norm": 0.31280091404914856, "learning_rate": 0.00018954999153429004, "loss": 11.6695, "step": 21067 }, { "epoch": 0.4410114711546513, "grad_norm": 0.29410386085510254, "learning_rate": 0.00018954901570039515, "loss": 11.6603, "step": 21068 }, { "epoch": 0.4410324039186134, "grad_norm": 0.35280585289001465, "learning_rate": 0.00018954803982345226, "loss": 11.6667, "step": 21069 }, { "epoch": 0.44105333668257557, "grad_norm": 0.2612230181694031, "learning_rate": 0.00018954706390346182, "loss": 11.678, "step": 21070 }, { "epoch": 0.4410742694465377, "grad_norm": 0.2851015627384186, "learning_rate": 0.00018954608794042433, "loss": 11.6744, "step": 21071 }, { "epoch": 0.44109520221049986, "grad_norm": 0.3061785399913788, "learning_rate": 0.00018954511193434024, "loss": 11.6759, "step": 21072 }, { "epoch": 0.441116134974462, "grad_norm": 0.31482887268066406, "learning_rate": 0.00018954413588521, "loss": 11.6806, "step": 21073 }, { "epoch": 0.4411370677384242, "grad_norm": 0.31131497025489807, "learning_rate": 0.0001895431597930341, "loss": 11.6736, "step": 21074 }, { "epoch": 0.44115800050238635, "grad_norm": 0.31863105297088623, "learning_rate": 0.00018954218365781303, "loss": 11.6802, "step": 21075 }, { "epoch": 0.4411789332663485, "grad_norm": 0.2696164846420288, "learning_rate": 0.00018954120747954724, "loss": 11.6797, "step": 21076 }, { "epoch": 0.44119986603031064, "grad_norm": 0.2810399830341339, "learning_rate": 0.0001895402312582372, "loss": 11.6614, "step": 21077 }, { "epoch": 0.4412207987942728, "grad_norm": 0.2988928258419037, "learning_rate": 0.00018953925499388337, "loss": 11.687, "step": 21078 }, { "epoch": 0.44124173155823493, "grad_norm": 0.23505663871765137, "learning_rate": 0.00018953827868648621, "loss": 11.672, "step": 21079 }, { "epoch": 0.44126266432219713, "grad_norm": 0.33645734190940857, "learning_rate": 0.00018953730233604624, "loss": 11.682, "step": 21080 }, { "epoch": 0.4412835970861593, "grad_norm": 0.3382484018802643, "learning_rate": 0.00018953632594256384, "loss": 11.6626, "step": 21081 }, { "epoch": 0.4413045298501214, "grad_norm": 0.4093626141548157, "learning_rate": 0.00018953534950603957, "loss": 11.6555, "step": 21082 }, { "epoch": 0.44132546261408356, "grad_norm": 0.329685777425766, "learning_rate": 0.00018953437302647385, "loss": 11.6702, "step": 21083 }, { "epoch": 0.4413463953780457, "grad_norm": 0.2969816327095032, "learning_rate": 0.00018953339650386718, "loss": 11.6854, "step": 21084 }, { "epoch": 0.44136732814200785, "grad_norm": 0.2303885519504547, "learning_rate": 0.00018953241993822, "loss": 11.6778, "step": 21085 }, { "epoch": 0.44138826090597, "grad_norm": 0.29536381363868713, "learning_rate": 0.00018953144332953278, "loss": 11.6739, "step": 21086 }, { "epoch": 0.4414091936699322, "grad_norm": 0.2965189814567566, "learning_rate": 0.00018953046667780603, "loss": 11.676, "step": 21087 }, { "epoch": 0.44143012643389434, "grad_norm": 0.294283390045166, "learning_rate": 0.00018952948998304017, "loss": 11.6859, "step": 21088 }, { "epoch": 0.4414510591978565, "grad_norm": 0.3024871349334717, "learning_rate": 0.0001895285132452357, "loss": 11.6857, "step": 21089 }, { "epoch": 0.44147199196181863, "grad_norm": 0.2760540843009949, "learning_rate": 0.00018952753646439305, "loss": 11.6658, "step": 21090 }, { "epoch": 0.4414929247257808, "grad_norm": 0.2675997018814087, "learning_rate": 0.00018952655964051275, "loss": 11.683, "step": 21091 }, { "epoch": 0.4415138574897429, "grad_norm": 0.29355356097221375, "learning_rate": 0.00018952558277359524, "loss": 11.6587, "step": 21092 }, { "epoch": 0.4415347902537051, "grad_norm": 0.23211655020713806, "learning_rate": 0.000189524605863641, "loss": 11.6666, "step": 21093 }, { "epoch": 0.44155572301766727, "grad_norm": 0.28258970379829407, "learning_rate": 0.00018952362891065043, "loss": 11.6722, "step": 21094 }, { "epoch": 0.4415766557816294, "grad_norm": 0.2612331509590149, "learning_rate": 0.00018952265191462414, "loss": 11.6504, "step": 21095 }, { "epoch": 0.44159758854559156, "grad_norm": 0.2623770236968994, "learning_rate": 0.00018952167487556242, "loss": 11.6773, "step": 21096 }, { "epoch": 0.4416185213095537, "grad_norm": 0.2763356566429138, "learning_rate": 0.0001895206977934659, "loss": 11.68, "step": 21097 }, { "epoch": 0.44163945407351585, "grad_norm": 0.33802172541618347, "learning_rate": 0.00018951972066833498, "loss": 11.6821, "step": 21098 }, { "epoch": 0.44166038683747805, "grad_norm": 0.26508164405822754, "learning_rate": 0.00018951874350017013, "loss": 11.669, "step": 21099 }, { "epoch": 0.4416813196014402, "grad_norm": 0.37496882677078247, "learning_rate": 0.00018951776628897183, "loss": 11.7029, "step": 21100 }, { "epoch": 0.44170225236540234, "grad_norm": 0.2533247768878937, "learning_rate": 0.00018951678903474055, "loss": 11.6753, "step": 21101 }, { "epoch": 0.4417231851293645, "grad_norm": 0.3176623582839966, "learning_rate": 0.00018951581173747673, "loss": 11.6701, "step": 21102 }, { "epoch": 0.4417441178933266, "grad_norm": 0.27531158924102783, "learning_rate": 0.0001895148343971809, "loss": 11.6806, "step": 21103 }, { "epoch": 0.44176505065728877, "grad_norm": 0.35460150241851807, "learning_rate": 0.00018951385701385348, "loss": 11.667, "step": 21104 }, { "epoch": 0.4417859834212509, "grad_norm": 0.3061393201351166, "learning_rate": 0.000189512879587495, "loss": 11.6709, "step": 21105 }, { "epoch": 0.4418069161852131, "grad_norm": 0.28169411420822144, "learning_rate": 0.0001895119021181058, "loss": 11.6794, "step": 21106 }, { "epoch": 0.44182784894917526, "grad_norm": 0.2483358383178711, "learning_rate": 0.00018951092460568647, "loss": 11.6758, "step": 21107 }, { "epoch": 0.4418487817131374, "grad_norm": 0.2688100039958954, "learning_rate": 0.00018950994705023744, "loss": 11.6809, "step": 21108 }, { "epoch": 0.44186971447709955, "grad_norm": 0.2941876947879791, "learning_rate": 0.0001895089694517592, "loss": 11.6731, "step": 21109 }, { "epoch": 0.4418906472410617, "grad_norm": 0.30313000082969666, "learning_rate": 0.0001895079918102522, "loss": 11.6736, "step": 21110 }, { "epoch": 0.44191158000502384, "grad_norm": 0.28376832604408264, "learning_rate": 0.00018950701412571693, "loss": 11.6694, "step": 21111 }, { "epoch": 0.44193251276898604, "grad_norm": 0.27193865180015564, "learning_rate": 0.00018950603639815382, "loss": 11.6697, "step": 21112 }, { "epoch": 0.4419534455329482, "grad_norm": 0.2526479959487915, "learning_rate": 0.00018950505862756336, "loss": 11.6756, "step": 21113 }, { "epoch": 0.44197437829691033, "grad_norm": 0.29268142580986023, "learning_rate": 0.00018950408081394604, "loss": 11.6644, "step": 21114 }, { "epoch": 0.4419953110608725, "grad_norm": 0.25610342621803284, "learning_rate": 0.00018950310295730232, "loss": 11.6607, "step": 21115 }, { "epoch": 0.4420162438248346, "grad_norm": 0.2944444417953491, "learning_rate": 0.00018950212505763265, "loss": 11.6616, "step": 21116 }, { "epoch": 0.44203717658879677, "grad_norm": 0.2336348593235016, "learning_rate": 0.00018950114711493752, "loss": 11.6673, "step": 21117 }, { "epoch": 0.44205810935275897, "grad_norm": 0.3013952970504761, "learning_rate": 0.00018950016912921738, "loss": 11.6721, "step": 21118 }, { "epoch": 0.4420790421167211, "grad_norm": 0.24755004048347473, "learning_rate": 0.00018949919110047275, "loss": 11.6679, "step": 21119 }, { "epoch": 0.44209997488068326, "grad_norm": 0.2719251215457916, "learning_rate": 0.00018949821302870405, "loss": 11.6779, "step": 21120 }, { "epoch": 0.4421209076446454, "grad_norm": 0.30239924788475037, "learning_rate": 0.00018949723491391176, "loss": 11.6726, "step": 21121 }, { "epoch": 0.44214184040860754, "grad_norm": 0.3050389587879181, "learning_rate": 0.00018949625675609638, "loss": 11.6692, "step": 21122 }, { "epoch": 0.4421627731725697, "grad_norm": 0.24699126183986664, "learning_rate": 0.00018949527855525831, "loss": 11.6563, "step": 21123 }, { "epoch": 0.44218370593653183, "grad_norm": 0.3245773911476135, "learning_rate": 0.00018949430031139808, "loss": 11.6755, "step": 21124 }, { "epoch": 0.44220463870049403, "grad_norm": 0.2607215940952301, "learning_rate": 0.00018949332202451618, "loss": 11.6843, "step": 21125 }, { "epoch": 0.4422255714644562, "grad_norm": 0.35225486755371094, "learning_rate": 0.000189492343694613, "loss": 11.6841, "step": 21126 }, { "epoch": 0.4422465042284183, "grad_norm": 0.2625313401222229, "learning_rate": 0.00018949136532168907, "loss": 11.6644, "step": 21127 }, { "epoch": 0.44226743699238047, "grad_norm": 0.2975631058216095, "learning_rate": 0.00018949038690574486, "loss": 11.6797, "step": 21128 }, { "epoch": 0.4422883697563426, "grad_norm": 0.2937226891517639, "learning_rate": 0.00018948940844678083, "loss": 11.6718, "step": 21129 }, { "epoch": 0.44230930252030476, "grad_norm": 0.3000645637512207, "learning_rate": 0.00018948842994479743, "loss": 11.6858, "step": 21130 }, { "epoch": 0.44233023528426696, "grad_norm": 0.3170596659183502, "learning_rate": 0.00018948745139979518, "loss": 11.6712, "step": 21131 }, { "epoch": 0.4423511680482291, "grad_norm": 0.3230098485946655, "learning_rate": 0.0001894864728117745, "loss": 11.6635, "step": 21132 }, { "epoch": 0.44237210081219125, "grad_norm": 0.44561779499053955, "learning_rate": 0.00018948549418073586, "loss": 11.6834, "step": 21133 }, { "epoch": 0.4423930335761534, "grad_norm": 0.23915298283100128, "learning_rate": 0.0001894845155066798, "loss": 11.6717, "step": 21134 }, { "epoch": 0.44241396634011554, "grad_norm": 0.3399195671081543, "learning_rate": 0.0001894835367896067, "loss": 11.6705, "step": 21135 }, { "epoch": 0.4424348991040777, "grad_norm": 0.2762373685836792, "learning_rate": 0.00018948255802951706, "loss": 11.6642, "step": 21136 }, { "epoch": 0.44245583186803983, "grad_norm": 0.27970707416534424, "learning_rate": 0.00018948157922641138, "loss": 11.6798, "step": 21137 }, { "epoch": 0.44247676463200203, "grad_norm": 0.24266216158866882, "learning_rate": 0.00018948060038029012, "loss": 11.6548, "step": 21138 }, { "epoch": 0.4424976973959642, "grad_norm": 0.2852736711502075, "learning_rate": 0.00018947962149115375, "loss": 11.6658, "step": 21139 }, { "epoch": 0.4425186301599263, "grad_norm": 0.2909334897994995, "learning_rate": 0.0001894786425590027, "loss": 11.6775, "step": 21140 }, { "epoch": 0.44253956292388846, "grad_norm": 0.3621373474597931, "learning_rate": 0.00018947766358383747, "loss": 11.6722, "step": 21141 }, { "epoch": 0.4425604956878506, "grad_norm": 0.2524076998233795, "learning_rate": 0.00018947668456565858, "loss": 11.6737, "step": 21142 }, { "epoch": 0.44258142845181275, "grad_norm": 0.31937113404273987, "learning_rate": 0.00018947570550446644, "loss": 11.6704, "step": 21143 }, { "epoch": 0.44260236121577495, "grad_norm": 0.33377930521965027, "learning_rate": 0.00018947472640026152, "loss": 11.6641, "step": 21144 }, { "epoch": 0.4426232939797371, "grad_norm": 0.2834923267364502, "learning_rate": 0.0001894737472530443, "loss": 11.6892, "step": 21145 }, { "epoch": 0.44264422674369924, "grad_norm": 0.36266881227493286, "learning_rate": 0.00018947276806281527, "loss": 11.675, "step": 21146 }, { "epoch": 0.4426651595076614, "grad_norm": 0.25856253504753113, "learning_rate": 0.00018947178882957485, "loss": 11.6673, "step": 21147 }, { "epoch": 0.44268609227162353, "grad_norm": 0.244625985622406, "learning_rate": 0.00018947080955332362, "loss": 11.6807, "step": 21148 }, { "epoch": 0.4427070250355857, "grad_norm": 0.2870715260505676, "learning_rate": 0.00018946983023406192, "loss": 11.6714, "step": 21149 }, { "epoch": 0.4427279577995479, "grad_norm": 0.22371898591518402, "learning_rate": 0.0001894688508717903, "loss": 11.6747, "step": 21150 }, { "epoch": 0.44274889056351, "grad_norm": 0.26321589946746826, "learning_rate": 0.0001894678714665092, "loss": 11.671, "step": 21151 }, { "epoch": 0.44276982332747217, "grad_norm": 0.27207043766975403, "learning_rate": 0.00018946689201821911, "loss": 11.677, "step": 21152 }, { "epoch": 0.4427907560914343, "grad_norm": 0.261472225189209, "learning_rate": 0.00018946591252692053, "loss": 11.6475, "step": 21153 }, { "epoch": 0.44281168885539646, "grad_norm": 0.2750278413295746, "learning_rate": 0.00018946493299261383, "loss": 11.6728, "step": 21154 }, { "epoch": 0.4428326216193586, "grad_norm": 0.34356027841567993, "learning_rate": 0.0001894639534152996, "loss": 11.6674, "step": 21155 }, { "epoch": 0.44285355438332075, "grad_norm": 0.2506602704524994, "learning_rate": 0.0001894629737949782, "loss": 11.6756, "step": 21156 }, { "epoch": 0.44287448714728295, "grad_norm": 0.3037704527378082, "learning_rate": 0.0001894619941316502, "loss": 11.6803, "step": 21157 }, { "epoch": 0.4428954199112451, "grad_norm": 0.3000098764896393, "learning_rate": 0.00018946101442531598, "loss": 11.6944, "step": 21158 }, { "epoch": 0.44291635267520724, "grad_norm": 0.2637934684753418, "learning_rate": 0.0001894600346759761, "loss": 11.6717, "step": 21159 }, { "epoch": 0.4429372854391694, "grad_norm": 0.5141655206680298, "learning_rate": 0.00018945905488363098, "loss": 11.6805, "step": 21160 }, { "epoch": 0.4429582182031315, "grad_norm": 0.3428511917591095, "learning_rate": 0.00018945807504828108, "loss": 11.6596, "step": 21161 }, { "epoch": 0.44297915096709367, "grad_norm": 0.27817273139953613, "learning_rate": 0.00018945709516992692, "loss": 11.6735, "step": 21162 }, { "epoch": 0.44300008373105587, "grad_norm": 0.26068538427352905, "learning_rate": 0.0001894561152485689, "loss": 11.6676, "step": 21163 }, { "epoch": 0.443021016495018, "grad_norm": 0.2943366467952728, "learning_rate": 0.00018945513528420755, "loss": 11.6718, "step": 21164 }, { "epoch": 0.44304194925898016, "grad_norm": 0.4024045467376709, "learning_rate": 0.00018945415527684336, "loss": 11.684, "step": 21165 }, { "epoch": 0.4430628820229423, "grad_norm": 0.27881574630737305, "learning_rate": 0.0001894531752264767, "loss": 11.6741, "step": 21166 }, { "epoch": 0.44308381478690445, "grad_norm": 0.2993021309375763, "learning_rate": 0.00018945219513310816, "loss": 11.6691, "step": 21167 }, { "epoch": 0.4431047475508666, "grad_norm": 0.32008838653564453, "learning_rate": 0.00018945121499673814, "loss": 11.6584, "step": 21168 }, { "epoch": 0.4431256803148288, "grad_norm": 0.31307947635650635, "learning_rate": 0.00018945023481736715, "loss": 11.6605, "step": 21169 }, { "epoch": 0.44314661307879094, "grad_norm": 0.3170872628688812, "learning_rate": 0.0001894492545949956, "loss": 11.6587, "step": 21170 }, { "epoch": 0.4431675458427531, "grad_norm": 0.3641200065612793, "learning_rate": 0.00018944827432962403, "loss": 11.6822, "step": 21171 }, { "epoch": 0.44318847860671523, "grad_norm": 0.2345181256532669, "learning_rate": 0.00018944729402125288, "loss": 11.6637, "step": 21172 }, { "epoch": 0.4432094113706774, "grad_norm": 0.30058515071868896, "learning_rate": 0.0001894463136698826, "loss": 11.6644, "step": 21173 }, { "epoch": 0.4432303441346395, "grad_norm": 0.3353045582771301, "learning_rate": 0.00018944533327551369, "loss": 11.6774, "step": 21174 }, { "epoch": 0.44325127689860166, "grad_norm": 0.3403203785419464, "learning_rate": 0.00018944435283814662, "loss": 11.6747, "step": 21175 }, { "epoch": 0.44327220966256387, "grad_norm": 0.32612377405166626, "learning_rate": 0.00018944337235778184, "loss": 11.6771, "step": 21176 }, { "epoch": 0.443293142426526, "grad_norm": 0.33308112621307373, "learning_rate": 0.00018944239183441987, "loss": 11.6698, "step": 21177 }, { "epoch": 0.44331407519048815, "grad_norm": 0.2638007402420044, "learning_rate": 0.00018944141126806113, "loss": 11.6673, "step": 21178 }, { "epoch": 0.4433350079544503, "grad_norm": 0.3779484033584595, "learning_rate": 0.00018944043065870613, "loss": 11.6704, "step": 21179 }, { "epoch": 0.44335594071841244, "grad_norm": 0.291810005903244, "learning_rate": 0.00018943945000635533, "loss": 11.6657, "step": 21180 }, { "epoch": 0.4433768734823746, "grad_norm": 0.30273935198783875, "learning_rate": 0.00018943846931100916, "loss": 11.673, "step": 21181 }, { "epoch": 0.4433978062463368, "grad_norm": 0.23920638859272003, "learning_rate": 0.00018943748857266814, "loss": 11.6617, "step": 21182 }, { "epoch": 0.44341873901029893, "grad_norm": 0.2718697190284729, "learning_rate": 0.00018943650779133274, "loss": 11.6946, "step": 21183 }, { "epoch": 0.4434396717742611, "grad_norm": 0.2770156264305115, "learning_rate": 0.0001894355269670034, "loss": 11.6746, "step": 21184 }, { "epoch": 0.4434606045382232, "grad_norm": 0.42167025804519653, "learning_rate": 0.0001894345460996806, "loss": 11.6827, "step": 21185 }, { "epoch": 0.44348153730218537, "grad_norm": 0.29394960403442383, "learning_rate": 0.00018943356518936485, "loss": 11.6747, "step": 21186 }, { "epoch": 0.4435024700661475, "grad_norm": 0.3011140823364258, "learning_rate": 0.0001894325842360566, "loss": 11.6822, "step": 21187 }, { "epoch": 0.4435234028301097, "grad_norm": 0.28351929783821106, "learning_rate": 0.0001894316032397563, "loss": 11.6643, "step": 21188 }, { "epoch": 0.44354433559407186, "grad_norm": 0.3582117557525635, "learning_rate": 0.00018943062220046444, "loss": 11.6859, "step": 21189 }, { "epoch": 0.443565268358034, "grad_norm": 0.25981953740119934, "learning_rate": 0.00018942964111818146, "loss": 11.6787, "step": 21190 }, { "epoch": 0.44358620112199615, "grad_norm": 0.2533097267150879, "learning_rate": 0.00018942865999290788, "loss": 11.6807, "step": 21191 }, { "epoch": 0.4436071338859583, "grad_norm": 0.3183544874191284, "learning_rate": 0.00018942767882464416, "loss": 11.6864, "step": 21192 }, { "epoch": 0.44362806664992044, "grad_norm": 0.3258306682109833, "learning_rate": 0.00018942669761339075, "loss": 11.673, "step": 21193 }, { "epoch": 0.4436489994138826, "grad_norm": 0.296953022480011, "learning_rate": 0.00018942571635914814, "loss": 11.6741, "step": 21194 }, { "epoch": 0.4436699321778448, "grad_norm": 0.28095903992652893, "learning_rate": 0.0001894247350619168, "loss": 11.67, "step": 21195 }, { "epoch": 0.44369086494180693, "grad_norm": 0.28100255131721497, "learning_rate": 0.00018942375372169718, "loss": 11.667, "step": 21196 }, { "epoch": 0.4437117977057691, "grad_norm": 0.3077211081981659, "learning_rate": 0.00018942277233848978, "loss": 11.661, "step": 21197 }, { "epoch": 0.4437327304697312, "grad_norm": 0.309909462928772, "learning_rate": 0.00018942179091229505, "loss": 11.6656, "step": 21198 }, { "epoch": 0.44375366323369336, "grad_norm": 0.25710055232048035, "learning_rate": 0.0001894208094431135, "loss": 11.6676, "step": 21199 }, { "epoch": 0.4437745959976555, "grad_norm": 0.32466185092926025, "learning_rate": 0.00018941982793094556, "loss": 11.6621, "step": 21200 }, { "epoch": 0.4437955287616177, "grad_norm": 0.3923531770706177, "learning_rate": 0.0001894188463757917, "loss": 11.6802, "step": 21201 }, { "epoch": 0.44381646152557985, "grad_norm": 0.38188496232032776, "learning_rate": 0.00018941786477765242, "loss": 11.686, "step": 21202 }, { "epoch": 0.443837394289542, "grad_norm": 0.26257357001304626, "learning_rate": 0.0001894168831365282, "loss": 11.6871, "step": 21203 }, { "epoch": 0.44385832705350414, "grad_norm": 0.2867746949195862, "learning_rate": 0.00018941590145241948, "loss": 11.6816, "step": 21204 }, { "epoch": 0.4438792598174663, "grad_norm": 0.3049759268760681, "learning_rate": 0.0001894149197253267, "loss": 11.6719, "step": 21205 }, { "epoch": 0.44390019258142843, "grad_norm": 0.38429880142211914, "learning_rate": 0.00018941393795525045, "loss": 11.6831, "step": 21206 }, { "epoch": 0.44392112534539063, "grad_norm": 0.3305627703666687, "learning_rate": 0.0001894129561421911, "loss": 11.683, "step": 21207 }, { "epoch": 0.4439420581093528, "grad_norm": 0.2869848310947418, "learning_rate": 0.00018941197428614913, "loss": 11.6648, "step": 21208 }, { "epoch": 0.4439629908733149, "grad_norm": 0.24030442535877228, "learning_rate": 0.00018941099238712505, "loss": 11.6662, "step": 21209 }, { "epoch": 0.44398392363727707, "grad_norm": 0.2662321925163269, "learning_rate": 0.00018941001044511934, "loss": 11.6823, "step": 21210 }, { "epoch": 0.4440048564012392, "grad_norm": 0.33987581729888916, "learning_rate": 0.00018940902846013242, "loss": 11.6855, "step": 21211 }, { "epoch": 0.44402578916520136, "grad_norm": 0.2749675214290619, "learning_rate": 0.00018940804643216477, "loss": 11.6717, "step": 21212 }, { "epoch": 0.4440467219291635, "grad_norm": 0.29048460721969604, "learning_rate": 0.0001894070643612169, "loss": 11.6605, "step": 21213 }, { "epoch": 0.4440676546931257, "grad_norm": 0.2744426429271698, "learning_rate": 0.00018940608224728926, "loss": 11.6661, "step": 21214 }, { "epoch": 0.44408858745708785, "grad_norm": 0.2667234539985657, "learning_rate": 0.00018940510009038232, "loss": 11.6643, "step": 21215 }, { "epoch": 0.44410952022105, "grad_norm": 0.3523109555244446, "learning_rate": 0.00018940411789049658, "loss": 11.6595, "step": 21216 }, { "epoch": 0.44413045298501214, "grad_norm": 0.26671963930130005, "learning_rate": 0.00018940313564763247, "loss": 11.6708, "step": 21217 }, { "epoch": 0.4441513857489743, "grad_norm": 0.2709190249443054, "learning_rate": 0.00018940215336179047, "loss": 11.6944, "step": 21218 }, { "epoch": 0.4441723185129364, "grad_norm": 0.2623409032821655, "learning_rate": 0.00018940117103297105, "loss": 11.6646, "step": 21219 }, { "epoch": 0.4441932512768986, "grad_norm": 0.27423161268234253, "learning_rate": 0.00018940018866117476, "loss": 11.6751, "step": 21220 }, { "epoch": 0.44421418404086077, "grad_norm": 0.2930365204811096, "learning_rate": 0.00018939920624640196, "loss": 11.6533, "step": 21221 }, { "epoch": 0.4442351168048229, "grad_norm": 0.30395832657814026, "learning_rate": 0.00018939822378865318, "loss": 11.6787, "step": 21222 }, { "epoch": 0.44425604956878506, "grad_norm": 0.28937414288520813, "learning_rate": 0.00018939724128792885, "loss": 11.6714, "step": 21223 }, { "epoch": 0.4442769823327472, "grad_norm": 0.326608270406723, "learning_rate": 0.0001893962587442295, "loss": 11.662, "step": 21224 }, { "epoch": 0.44429791509670935, "grad_norm": 0.276517778635025, "learning_rate": 0.00018939527615755562, "loss": 11.6656, "step": 21225 }, { "epoch": 0.44431884786067155, "grad_norm": 0.29045283794403076, "learning_rate": 0.00018939429352790758, "loss": 11.6702, "step": 21226 }, { "epoch": 0.4443397806246337, "grad_norm": 0.28672999143600464, "learning_rate": 0.00018939331085528592, "loss": 11.6743, "step": 21227 }, { "epoch": 0.44436071338859584, "grad_norm": 0.24162331223487854, "learning_rate": 0.00018939232813969112, "loss": 11.6823, "step": 21228 }, { "epoch": 0.444381646152558, "grad_norm": 0.25021880865097046, "learning_rate": 0.00018939134538112365, "loss": 11.6821, "step": 21229 }, { "epoch": 0.44440257891652013, "grad_norm": 0.24275103211402893, "learning_rate": 0.00018939036257958393, "loss": 11.6853, "step": 21230 }, { "epoch": 0.4444235116804823, "grad_norm": 0.3543091118335724, "learning_rate": 0.0001893893797350725, "loss": 11.6707, "step": 21231 }, { "epoch": 0.4444444444444444, "grad_norm": 0.26211249828338623, "learning_rate": 0.0001893883968475898, "loss": 11.6762, "step": 21232 }, { "epoch": 0.4444653772084066, "grad_norm": 0.2199893295764923, "learning_rate": 0.0001893874139171363, "loss": 11.6787, "step": 21233 }, { "epoch": 0.44448630997236876, "grad_norm": 0.3998139202594757, "learning_rate": 0.00018938643094371246, "loss": 11.6927, "step": 21234 }, { "epoch": 0.4445072427363309, "grad_norm": 0.2796092927455902, "learning_rate": 0.0001893854479273188, "loss": 11.6711, "step": 21235 }, { "epoch": 0.44452817550029305, "grad_norm": 0.28700801730155945, "learning_rate": 0.00018938446486795578, "loss": 11.6802, "step": 21236 }, { "epoch": 0.4445491082642552, "grad_norm": 0.3219740092754364, "learning_rate": 0.00018938348176562384, "loss": 11.6749, "step": 21237 }, { "epoch": 0.44457004102821734, "grad_norm": 0.257481187582016, "learning_rate": 0.00018938249862032345, "loss": 11.6545, "step": 21238 }, { "epoch": 0.44459097379217954, "grad_norm": 0.37422654032707214, "learning_rate": 0.0001893815154320551, "loss": 11.6869, "step": 21239 }, { "epoch": 0.4446119065561417, "grad_norm": 0.20516538619995117, "learning_rate": 0.0001893805322008193, "loss": 11.6713, "step": 21240 }, { "epoch": 0.44463283932010383, "grad_norm": 0.3309917151927948, "learning_rate": 0.00018937954892661645, "loss": 11.6756, "step": 21241 }, { "epoch": 0.444653772084066, "grad_norm": 0.2741445004940033, "learning_rate": 0.00018937856560944706, "loss": 11.6735, "step": 21242 }, { "epoch": 0.4446747048480281, "grad_norm": 0.26021650433540344, "learning_rate": 0.00018937758224931163, "loss": 11.6676, "step": 21243 }, { "epoch": 0.44469563761199027, "grad_norm": 0.33909380435943604, "learning_rate": 0.0001893765988462106, "loss": 11.6867, "step": 21244 }, { "epoch": 0.4447165703759524, "grad_norm": 0.25669577717781067, "learning_rate": 0.00018937561540014443, "loss": 11.6771, "step": 21245 }, { "epoch": 0.4447375031399146, "grad_norm": 0.21982628107070923, "learning_rate": 0.00018937463191111365, "loss": 11.6713, "step": 21246 }, { "epoch": 0.44475843590387676, "grad_norm": 0.27479153871536255, "learning_rate": 0.00018937364837911863, "loss": 11.6826, "step": 21247 }, { "epoch": 0.4447793686678389, "grad_norm": 0.3629983961582184, "learning_rate": 0.00018937266480415995, "loss": 11.6625, "step": 21248 }, { "epoch": 0.44480030143180105, "grad_norm": 0.3082209527492523, "learning_rate": 0.00018937168118623803, "loss": 11.6743, "step": 21249 }, { "epoch": 0.4448212341957632, "grad_norm": 0.25321298837661743, "learning_rate": 0.00018937069752535335, "loss": 11.6544, "step": 21250 }, { "epoch": 0.44484216695972534, "grad_norm": 0.25531065464019775, "learning_rate": 0.0001893697138215064, "loss": 11.6834, "step": 21251 }, { "epoch": 0.44486309972368754, "grad_norm": 0.3274916112422943, "learning_rate": 0.00018936873007469762, "loss": 11.6872, "step": 21252 }, { "epoch": 0.4448840324876497, "grad_norm": 0.26215070486068726, "learning_rate": 0.00018936774628492748, "loss": 11.6745, "step": 21253 }, { "epoch": 0.44490496525161183, "grad_norm": 0.29990410804748535, "learning_rate": 0.00018936676245219651, "loss": 11.6621, "step": 21254 }, { "epoch": 0.444925898015574, "grad_norm": 0.3786063492298126, "learning_rate": 0.00018936577857650515, "loss": 11.6788, "step": 21255 }, { "epoch": 0.4449468307795361, "grad_norm": 0.2853335738182068, "learning_rate": 0.00018936479465785384, "loss": 11.6804, "step": 21256 }, { "epoch": 0.44496776354349826, "grad_norm": 0.24878470599651337, "learning_rate": 0.00018936381069624308, "loss": 11.6806, "step": 21257 }, { "epoch": 0.44498869630746046, "grad_norm": 0.3047530949115753, "learning_rate": 0.0001893628266916734, "loss": 11.6547, "step": 21258 }, { "epoch": 0.4450096290714226, "grad_norm": 0.33059826493263245, "learning_rate": 0.00018936184264414517, "loss": 11.6761, "step": 21259 }, { "epoch": 0.44503056183538475, "grad_norm": 0.23404039442539215, "learning_rate": 0.0001893608585536589, "loss": 11.671, "step": 21260 }, { "epoch": 0.4450514945993469, "grad_norm": 0.31474044919013977, "learning_rate": 0.0001893598744202151, "loss": 11.6864, "step": 21261 }, { "epoch": 0.44507242736330904, "grad_norm": 0.24299514293670654, "learning_rate": 0.0001893588902438142, "loss": 11.6647, "step": 21262 }, { "epoch": 0.4450933601272712, "grad_norm": 0.22817134857177734, "learning_rate": 0.0001893579060244567, "loss": 11.666, "step": 21263 }, { "epoch": 0.44511429289123333, "grad_norm": 0.21702373027801514, "learning_rate": 0.00018935692176214305, "loss": 11.6669, "step": 21264 }, { "epoch": 0.44513522565519553, "grad_norm": 0.28664571046829224, "learning_rate": 0.00018935593745687376, "loss": 11.6827, "step": 21265 }, { "epoch": 0.4451561584191577, "grad_norm": 0.2146456092596054, "learning_rate": 0.00018935495310864927, "loss": 11.6772, "step": 21266 }, { "epoch": 0.4451770911831198, "grad_norm": 0.3316196799278259, "learning_rate": 0.00018935396871747008, "loss": 11.6787, "step": 21267 }, { "epoch": 0.44519802394708197, "grad_norm": 0.3517417907714844, "learning_rate": 0.00018935298428333664, "loss": 11.6899, "step": 21268 }, { "epoch": 0.4452189567110441, "grad_norm": 0.2800183594226837, "learning_rate": 0.0001893519998062494, "loss": 11.6768, "step": 21269 }, { "epoch": 0.44523988947500626, "grad_norm": 0.29397687315940857, "learning_rate": 0.00018935101528620891, "loss": 11.6786, "step": 21270 }, { "epoch": 0.44526082223896846, "grad_norm": 0.3073714077472687, "learning_rate": 0.00018935003072321558, "loss": 11.6685, "step": 21271 }, { "epoch": 0.4452817550029306, "grad_norm": 0.35415953397750854, "learning_rate": 0.00018934904611726987, "loss": 11.6648, "step": 21272 }, { "epoch": 0.44530268776689275, "grad_norm": 0.2629932761192322, "learning_rate": 0.00018934806146837232, "loss": 11.6825, "step": 21273 }, { "epoch": 0.4453236205308549, "grad_norm": 0.2467486411333084, "learning_rate": 0.00018934707677652335, "loss": 11.671, "step": 21274 }, { "epoch": 0.44534455329481704, "grad_norm": 0.31286194920539856, "learning_rate": 0.00018934609204172346, "loss": 11.6571, "step": 21275 }, { "epoch": 0.4453654860587792, "grad_norm": 0.30802032351493835, "learning_rate": 0.0001893451072639731, "loss": 11.6589, "step": 21276 }, { "epoch": 0.4453864188227414, "grad_norm": 0.32430487871170044, "learning_rate": 0.00018934412244327276, "loss": 11.6766, "step": 21277 }, { "epoch": 0.4454073515867035, "grad_norm": 0.30369460582733154, "learning_rate": 0.0001893431375796229, "loss": 11.664, "step": 21278 }, { "epoch": 0.44542828435066567, "grad_norm": 0.3037029802799225, "learning_rate": 0.00018934215267302402, "loss": 11.6975, "step": 21279 }, { "epoch": 0.4454492171146278, "grad_norm": 0.344014048576355, "learning_rate": 0.00018934116772347656, "loss": 11.6727, "step": 21280 }, { "epoch": 0.44547014987858996, "grad_norm": 0.23780176043510437, "learning_rate": 0.00018934018273098104, "loss": 11.6734, "step": 21281 }, { "epoch": 0.4454910826425521, "grad_norm": 0.21594761312007904, "learning_rate": 0.00018933919769553788, "loss": 11.6801, "step": 21282 }, { "epoch": 0.44551201540651425, "grad_norm": 0.28150975704193115, "learning_rate": 0.00018933821261714757, "loss": 11.667, "step": 21283 }, { "epoch": 0.44553294817047645, "grad_norm": 0.29335060715675354, "learning_rate": 0.0001893372274958106, "loss": 11.6512, "step": 21284 }, { "epoch": 0.4455538809344386, "grad_norm": 0.2509273290634155, "learning_rate": 0.00018933624233152745, "loss": 11.672, "step": 21285 }, { "epoch": 0.44557481369840074, "grad_norm": 0.2464289665222168, "learning_rate": 0.00018933525712429855, "loss": 11.6808, "step": 21286 }, { "epoch": 0.4455957464623629, "grad_norm": 0.2587243914604187, "learning_rate": 0.00018933427187412444, "loss": 11.6782, "step": 21287 }, { "epoch": 0.44561667922632503, "grad_norm": 0.24209944903850555, "learning_rate": 0.00018933328658100553, "loss": 11.6705, "step": 21288 }, { "epoch": 0.4456376119902872, "grad_norm": 0.29828527569770813, "learning_rate": 0.0001893323012449423, "loss": 11.6523, "step": 21289 }, { "epoch": 0.4456585447542494, "grad_norm": 0.25303226709365845, "learning_rate": 0.0001893313158659353, "loss": 11.6837, "step": 21290 }, { "epoch": 0.4456794775182115, "grad_norm": 0.26973792910575867, "learning_rate": 0.0001893303304439849, "loss": 11.6791, "step": 21291 }, { "epoch": 0.44570041028217366, "grad_norm": 0.2936934232711792, "learning_rate": 0.00018932934497909161, "loss": 11.6794, "step": 21292 }, { "epoch": 0.4457213430461358, "grad_norm": 0.2710544764995575, "learning_rate": 0.00018932835947125593, "loss": 11.6734, "step": 21293 }, { "epoch": 0.44574227581009795, "grad_norm": 0.23430398106575012, "learning_rate": 0.00018932737392047833, "loss": 11.6863, "step": 21294 }, { "epoch": 0.4457632085740601, "grad_norm": 0.25583863258361816, "learning_rate": 0.00018932638832675925, "loss": 11.6754, "step": 21295 }, { "epoch": 0.4457841413380223, "grad_norm": 0.2941770851612091, "learning_rate": 0.00018932540269009918, "loss": 11.6648, "step": 21296 }, { "epoch": 0.44580507410198444, "grad_norm": 0.43335747718811035, "learning_rate": 0.00018932441701049863, "loss": 11.6828, "step": 21297 }, { "epoch": 0.4458260068659466, "grad_norm": 0.35905730724334717, "learning_rate": 0.00018932343128795804, "loss": 11.6962, "step": 21298 }, { "epoch": 0.44584693962990873, "grad_norm": 0.31303149461746216, "learning_rate": 0.0001893224455224779, "loss": 11.6658, "step": 21299 }, { "epoch": 0.4458678723938709, "grad_norm": 0.2652457356452942, "learning_rate": 0.00018932145971405865, "loss": 11.6635, "step": 21300 }, { "epoch": 0.445888805157833, "grad_norm": 0.2657967805862427, "learning_rate": 0.00018932047386270074, "loss": 11.6637, "step": 21301 }, { "epoch": 0.44590973792179517, "grad_norm": 0.2675762474536896, "learning_rate": 0.00018931948796840472, "loss": 11.6615, "step": 21302 }, { "epoch": 0.44593067068575737, "grad_norm": 0.2891416549682617, "learning_rate": 0.00018931850203117106, "loss": 11.6731, "step": 21303 }, { "epoch": 0.4459516034497195, "grad_norm": 0.23022392392158508, "learning_rate": 0.00018931751605100018, "loss": 11.6686, "step": 21304 }, { "epoch": 0.44597253621368166, "grad_norm": 0.21457147598266602, "learning_rate": 0.0001893165300278926, "loss": 11.6701, "step": 21305 }, { "epoch": 0.4459934689776438, "grad_norm": 0.32444337010383606, "learning_rate": 0.00018931554396184875, "loss": 11.6791, "step": 21306 }, { "epoch": 0.44601440174160595, "grad_norm": 0.2910580337047577, "learning_rate": 0.00018931455785286915, "loss": 11.6735, "step": 21307 }, { "epoch": 0.4460353345055681, "grad_norm": 0.29521554708480835, "learning_rate": 0.00018931357170095422, "loss": 11.6732, "step": 21308 }, { "epoch": 0.4460562672695303, "grad_norm": 0.27286550402641296, "learning_rate": 0.0001893125855061045, "loss": 11.6821, "step": 21309 }, { "epoch": 0.44607720003349244, "grad_norm": 0.2862206995487213, "learning_rate": 0.0001893115992683204, "loss": 11.6875, "step": 21310 }, { "epoch": 0.4460981327974546, "grad_norm": 0.3367970883846283, "learning_rate": 0.00018931061298760245, "loss": 11.6848, "step": 21311 }, { "epoch": 0.4461190655614167, "grad_norm": 0.7544874548912048, "learning_rate": 0.0001893096266639511, "loss": 11.7038, "step": 21312 }, { "epoch": 0.4461399983253789, "grad_norm": 0.277029424905777, "learning_rate": 0.00018930864029736681, "loss": 11.6759, "step": 21313 }, { "epoch": 0.446160931089341, "grad_norm": 0.24006836116313934, "learning_rate": 0.00018930765388785008, "loss": 11.6732, "step": 21314 }, { "epoch": 0.4461818638533032, "grad_norm": 0.26377347111701965, "learning_rate": 0.00018930666743540133, "loss": 11.6813, "step": 21315 }, { "epoch": 0.44620279661726536, "grad_norm": 0.26547226309776306, "learning_rate": 0.0001893056809400211, "loss": 11.6711, "step": 21316 }, { "epoch": 0.4462237293812275, "grad_norm": 0.3075162470340729, "learning_rate": 0.00018930469440170985, "loss": 11.6414, "step": 21317 }, { "epoch": 0.44624466214518965, "grad_norm": 0.24849970638751984, "learning_rate": 0.00018930370782046808, "loss": 11.6717, "step": 21318 }, { "epoch": 0.4462655949091518, "grad_norm": 0.24245573580265045, "learning_rate": 0.00018930272119629615, "loss": 11.6501, "step": 21319 }, { "epoch": 0.44628652767311394, "grad_norm": 0.29703083634376526, "learning_rate": 0.00018930173452919467, "loss": 11.685, "step": 21320 }, { "epoch": 0.4463074604370761, "grad_norm": 0.27097347378730774, "learning_rate": 0.00018930074781916404, "loss": 11.6613, "step": 21321 }, { "epoch": 0.4463283932010383, "grad_norm": 0.2446761578321457, "learning_rate": 0.00018929976106620472, "loss": 11.6617, "step": 21322 }, { "epoch": 0.44634932596500043, "grad_norm": 0.31795480847358704, "learning_rate": 0.00018929877427031726, "loss": 11.6803, "step": 21323 }, { "epoch": 0.4463702587289626, "grad_norm": 0.29662439227104187, "learning_rate": 0.00018929778743150206, "loss": 11.6696, "step": 21324 }, { "epoch": 0.4463911914929247, "grad_norm": 0.2830296456813812, "learning_rate": 0.00018929680054975963, "loss": 11.6883, "step": 21325 }, { "epoch": 0.44641212425688687, "grad_norm": 0.5472791194915771, "learning_rate": 0.00018929581362509045, "loss": 11.69, "step": 21326 }, { "epoch": 0.446433057020849, "grad_norm": 0.3054348826408386, "learning_rate": 0.000189294826657495, "loss": 11.6804, "step": 21327 }, { "epoch": 0.4464539897848112, "grad_norm": 0.30692440271377563, "learning_rate": 0.00018929383964697369, "loss": 11.6678, "step": 21328 }, { "epoch": 0.44647492254877336, "grad_norm": 0.2651227116584778, "learning_rate": 0.00018929285259352706, "loss": 11.6752, "step": 21329 }, { "epoch": 0.4464958553127355, "grad_norm": 0.2698759138584137, "learning_rate": 0.0001892918654971556, "loss": 11.6828, "step": 21330 }, { "epoch": 0.44651678807669765, "grad_norm": 0.27569422125816345, "learning_rate": 0.0001892908783578597, "loss": 11.6724, "step": 21331 }, { "epoch": 0.4465377208406598, "grad_norm": 0.29966944456100464, "learning_rate": 0.0001892898911756399, "loss": 11.682, "step": 21332 }, { "epoch": 0.44655865360462194, "grad_norm": 0.33083376288414, "learning_rate": 0.00018928890395049667, "loss": 11.6629, "step": 21333 }, { "epoch": 0.4465795863685841, "grad_norm": 0.24810628592967987, "learning_rate": 0.0001892879166824305, "loss": 11.685, "step": 21334 }, { "epoch": 0.4466005191325463, "grad_norm": 0.27367904782295227, "learning_rate": 0.0001892869293714418, "loss": 11.6788, "step": 21335 }, { "epoch": 0.4466214518965084, "grad_norm": 0.2988954186439514, "learning_rate": 0.0001892859420175311, "loss": 11.6557, "step": 21336 }, { "epoch": 0.44664238466047057, "grad_norm": 0.34616175293922424, "learning_rate": 0.00018928495462069885, "loss": 11.6635, "step": 21337 }, { "epoch": 0.4466633174244327, "grad_norm": 0.27080878615379333, "learning_rate": 0.00018928396718094552, "loss": 11.6754, "step": 21338 }, { "epoch": 0.44668425018839486, "grad_norm": 0.2646944224834442, "learning_rate": 0.00018928297969827163, "loss": 11.6887, "step": 21339 }, { "epoch": 0.446705182952357, "grad_norm": 0.3395771384239197, "learning_rate": 0.0001892819921726776, "loss": 11.6741, "step": 21340 }, { "epoch": 0.4467261157163192, "grad_norm": 0.26387521624565125, "learning_rate": 0.00018928100460416394, "loss": 11.6669, "step": 21341 }, { "epoch": 0.44674704848028135, "grad_norm": 0.26742643117904663, "learning_rate": 0.00018928001699273112, "loss": 11.6682, "step": 21342 }, { "epoch": 0.4467679812442435, "grad_norm": 0.3462267816066742, "learning_rate": 0.0001892790293383796, "loss": 11.6668, "step": 21343 }, { "epoch": 0.44678891400820564, "grad_norm": 0.3722924292087555, "learning_rate": 0.00018927804164110983, "loss": 11.6751, "step": 21344 }, { "epoch": 0.4468098467721678, "grad_norm": 0.23627592623233795, "learning_rate": 0.00018927705390092234, "loss": 11.6736, "step": 21345 }, { "epoch": 0.44683077953612993, "grad_norm": 0.26164430379867554, "learning_rate": 0.0001892760661178176, "loss": 11.6828, "step": 21346 }, { "epoch": 0.44685171230009213, "grad_norm": 0.2816019058227539, "learning_rate": 0.00018927507829179604, "loss": 11.6636, "step": 21347 }, { "epoch": 0.4468726450640543, "grad_norm": 0.28533869981765747, "learning_rate": 0.00018927409042285818, "loss": 11.6767, "step": 21348 }, { "epoch": 0.4468935778280164, "grad_norm": 0.2987167239189148, "learning_rate": 0.00018927310251100443, "loss": 11.6673, "step": 21349 }, { "epoch": 0.44691451059197856, "grad_norm": 0.26386716961860657, "learning_rate": 0.00018927211455623536, "loss": 11.6723, "step": 21350 }, { "epoch": 0.4469354433559407, "grad_norm": 0.2660342752933502, "learning_rate": 0.0001892711265585514, "loss": 11.6711, "step": 21351 }, { "epoch": 0.44695637611990285, "grad_norm": 0.2738059163093567, "learning_rate": 0.00018927013851795298, "loss": 11.6673, "step": 21352 }, { "epoch": 0.446977308883865, "grad_norm": 0.28184425830841064, "learning_rate": 0.00018926915043444064, "loss": 11.6642, "step": 21353 }, { "epoch": 0.4469982416478272, "grad_norm": 1.679849624633789, "learning_rate": 0.0001892681623080148, "loss": 11.6386, "step": 21354 }, { "epoch": 0.44701917441178934, "grad_norm": 0.25638294219970703, "learning_rate": 0.00018926717413867602, "loss": 11.6562, "step": 21355 }, { "epoch": 0.4470401071757515, "grad_norm": 0.29372870922088623, "learning_rate": 0.0001892661859264247, "loss": 11.6819, "step": 21356 }, { "epoch": 0.44706103993971363, "grad_norm": 0.2698098421096802, "learning_rate": 0.00018926519767126132, "loss": 11.6685, "step": 21357 }, { "epoch": 0.4470819727036758, "grad_norm": 0.31012919545173645, "learning_rate": 0.00018926420937318637, "loss": 11.6537, "step": 21358 }, { "epoch": 0.4471029054676379, "grad_norm": 0.2963670492172241, "learning_rate": 0.00018926322103220034, "loss": 11.6563, "step": 21359 }, { "epoch": 0.4471238382316001, "grad_norm": 0.3442734181880951, "learning_rate": 0.0001892622326483037, "loss": 11.665, "step": 21360 }, { "epoch": 0.44714477099556227, "grad_norm": 0.3226338326931, "learning_rate": 0.0001892612442214969, "loss": 11.6782, "step": 21361 }, { "epoch": 0.4471657037595244, "grad_norm": 0.30989590287208557, "learning_rate": 0.00018926025575178042, "loss": 11.6875, "step": 21362 }, { "epoch": 0.44718663652348656, "grad_norm": 0.276542067527771, "learning_rate": 0.00018925926723915474, "loss": 11.6758, "step": 21363 }, { "epoch": 0.4472075692874487, "grad_norm": 0.3408272862434387, "learning_rate": 0.00018925827868362038, "loss": 11.6798, "step": 21364 }, { "epoch": 0.44722850205141085, "grad_norm": 0.30916696786880493, "learning_rate": 0.00018925729008517774, "loss": 11.6698, "step": 21365 }, { "epoch": 0.44724943481537305, "grad_norm": 0.25884923338890076, "learning_rate": 0.00018925630144382734, "loss": 11.6787, "step": 21366 }, { "epoch": 0.4472703675793352, "grad_norm": 0.23899000883102417, "learning_rate": 0.00018925531275956967, "loss": 11.6764, "step": 21367 }, { "epoch": 0.44729130034329734, "grad_norm": 0.27977699041366577, "learning_rate": 0.00018925432403240518, "loss": 11.6494, "step": 21368 }, { "epoch": 0.4473122331072595, "grad_norm": 0.26096636056900024, "learning_rate": 0.00018925333526233432, "loss": 11.6608, "step": 21369 }, { "epoch": 0.4473331658712216, "grad_norm": 0.2665013372898102, "learning_rate": 0.00018925234644935762, "loss": 11.6784, "step": 21370 }, { "epoch": 0.4473540986351838, "grad_norm": 0.31651684641838074, "learning_rate": 0.0001892513575934755, "loss": 11.6717, "step": 21371 }, { "epoch": 0.4473750313991459, "grad_norm": 0.2809101343154907, "learning_rate": 0.0001892503686946885, "loss": 11.6649, "step": 21372 }, { "epoch": 0.4473959641631081, "grad_norm": 0.2958921790122986, "learning_rate": 0.000189249379752997, "loss": 11.6676, "step": 21373 }, { "epoch": 0.44741689692707026, "grad_norm": 0.3353157937526703, "learning_rate": 0.0001892483907684016, "loss": 11.6892, "step": 21374 }, { "epoch": 0.4474378296910324, "grad_norm": 0.4930305778980255, "learning_rate": 0.00018924740174090267, "loss": 11.6836, "step": 21375 }, { "epoch": 0.44745876245499455, "grad_norm": 0.31906959414482117, "learning_rate": 0.00018924641267050073, "loss": 11.6654, "step": 21376 }, { "epoch": 0.4474796952189567, "grad_norm": 0.2729511260986328, "learning_rate": 0.00018924542355719628, "loss": 11.6568, "step": 21377 }, { "epoch": 0.44750062798291884, "grad_norm": 0.31357431411743164, "learning_rate": 0.00018924443440098977, "loss": 11.6519, "step": 21378 }, { "epoch": 0.44752156074688104, "grad_norm": 0.2800358831882477, "learning_rate": 0.00018924344520188164, "loss": 11.6799, "step": 21379 }, { "epoch": 0.4475424935108432, "grad_norm": 0.3689824044704437, "learning_rate": 0.0001892424559598724, "loss": 11.7069, "step": 21380 }, { "epoch": 0.44756342627480533, "grad_norm": 0.275823712348938, "learning_rate": 0.00018924146667496255, "loss": 11.6782, "step": 21381 }, { "epoch": 0.4475843590387675, "grad_norm": 0.2925076484680176, "learning_rate": 0.0001892404773471525, "loss": 11.6739, "step": 21382 }, { "epoch": 0.4476052918027296, "grad_norm": 0.254755437374115, "learning_rate": 0.00018923948797644276, "loss": 11.6834, "step": 21383 }, { "epoch": 0.44762622456669177, "grad_norm": 0.2761676609516144, "learning_rate": 0.00018923849856283383, "loss": 11.6621, "step": 21384 }, { "epoch": 0.44764715733065397, "grad_norm": 0.2809046506881714, "learning_rate": 0.0001892375091063262, "loss": 11.6688, "step": 21385 }, { "epoch": 0.4476680900946161, "grad_norm": 0.29088708758354187, "learning_rate": 0.00018923651960692025, "loss": 11.6848, "step": 21386 }, { "epoch": 0.44768902285857826, "grad_norm": 0.2963188886642456, "learning_rate": 0.00018923553006461655, "loss": 11.661, "step": 21387 }, { "epoch": 0.4477099556225404, "grad_norm": 0.2634637653827667, "learning_rate": 0.00018923454047941555, "loss": 11.6561, "step": 21388 }, { "epoch": 0.44773088838650255, "grad_norm": 0.2743471562862396, "learning_rate": 0.0001892335508513177, "loss": 11.6803, "step": 21389 }, { "epoch": 0.4477518211504647, "grad_norm": 0.2916618585586548, "learning_rate": 0.0001892325611803235, "loss": 11.676, "step": 21390 }, { "epoch": 0.44777275391442684, "grad_norm": 0.30456456542015076, "learning_rate": 0.0001892315714664334, "loss": 11.6784, "step": 21391 }, { "epoch": 0.44779368667838904, "grad_norm": 0.3005047142505646, "learning_rate": 0.00018923058170964793, "loss": 11.6753, "step": 21392 }, { "epoch": 0.4478146194423512, "grad_norm": 0.3500272333621979, "learning_rate": 0.0001892295919099675, "loss": 11.6753, "step": 21393 }, { "epoch": 0.4478355522063133, "grad_norm": 0.2332836240530014, "learning_rate": 0.00018922860206739266, "loss": 11.671, "step": 21394 }, { "epoch": 0.44785648497027547, "grad_norm": 0.3235696852207184, "learning_rate": 0.00018922761218192382, "loss": 11.6573, "step": 21395 }, { "epoch": 0.4478774177342376, "grad_norm": 0.2781974673271179, "learning_rate": 0.00018922662225356147, "loss": 11.6531, "step": 21396 }, { "epoch": 0.44789835049819976, "grad_norm": 0.3819087743759155, "learning_rate": 0.0001892256322823061, "loss": 11.673, "step": 21397 }, { "epoch": 0.44791928326216196, "grad_norm": 0.26485684514045715, "learning_rate": 0.00018922464226815816, "loss": 11.6479, "step": 21398 }, { "epoch": 0.4479402160261241, "grad_norm": 0.3028925061225891, "learning_rate": 0.00018922365221111818, "loss": 11.683, "step": 21399 }, { "epoch": 0.44796114879008625, "grad_norm": 0.2737548351287842, "learning_rate": 0.00018922266211118658, "loss": 11.6636, "step": 21400 }, { "epoch": 0.4479820815540484, "grad_norm": 0.23835356533527374, "learning_rate": 0.00018922167196836387, "loss": 11.6765, "step": 21401 }, { "epoch": 0.44800301431801054, "grad_norm": 0.23123523592948914, "learning_rate": 0.0001892206817826505, "loss": 11.68, "step": 21402 }, { "epoch": 0.4480239470819727, "grad_norm": 0.3949930965900421, "learning_rate": 0.00018921969155404698, "loss": 11.6675, "step": 21403 }, { "epoch": 0.4480448798459349, "grad_norm": 0.23707690834999084, "learning_rate": 0.00018921870128255373, "loss": 11.6766, "step": 21404 }, { "epoch": 0.44806581260989703, "grad_norm": 0.40856418013572693, "learning_rate": 0.0001892177109681713, "loss": 11.6794, "step": 21405 }, { "epoch": 0.4480867453738592, "grad_norm": 0.2664295732975006, "learning_rate": 0.00018921672061090013, "loss": 11.6734, "step": 21406 }, { "epoch": 0.4481076781378213, "grad_norm": 0.2794065475463867, "learning_rate": 0.00018921573021074068, "loss": 11.6673, "step": 21407 }, { "epoch": 0.44812861090178346, "grad_norm": 0.3243619203567505, "learning_rate": 0.00018921473976769347, "loss": 11.6713, "step": 21408 }, { "epoch": 0.4481495436657456, "grad_norm": 0.29621008038520813, "learning_rate": 0.0001892137492817589, "loss": 11.6676, "step": 21409 }, { "epoch": 0.44817047642970775, "grad_norm": 0.24110844731330872, "learning_rate": 0.0001892127587529375, "loss": 11.6642, "step": 21410 }, { "epoch": 0.44819140919366995, "grad_norm": 0.31040772795677185, "learning_rate": 0.00018921176818122976, "loss": 11.6691, "step": 21411 }, { "epoch": 0.4482123419576321, "grad_norm": 0.31651929020881653, "learning_rate": 0.00018921077756663612, "loss": 11.6701, "step": 21412 }, { "epoch": 0.44823327472159424, "grad_norm": 0.30496570467948914, "learning_rate": 0.00018920978690915708, "loss": 11.6852, "step": 21413 }, { "epoch": 0.4482542074855564, "grad_norm": 0.2719418704509735, "learning_rate": 0.0001892087962087931, "loss": 11.6769, "step": 21414 }, { "epoch": 0.44827514024951853, "grad_norm": 0.2948617935180664, "learning_rate": 0.00018920780546554465, "loss": 11.6641, "step": 21415 }, { "epoch": 0.4482960730134807, "grad_norm": 0.28286516666412354, "learning_rate": 0.0001892068146794123, "loss": 11.6683, "step": 21416 }, { "epoch": 0.4483170057774429, "grad_norm": 0.3379034399986267, "learning_rate": 0.00018920582385039637, "loss": 11.6686, "step": 21417 }, { "epoch": 0.448337938541405, "grad_norm": 0.31596118211746216, "learning_rate": 0.0001892048329784974, "loss": 11.6809, "step": 21418 }, { "epoch": 0.44835887130536717, "grad_norm": 0.28409162163734436, "learning_rate": 0.00018920384206371594, "loss": 11.6719, "step": 21419 }, { "epoch": 0.4483798040693293, "grad_norm": 0.28412026166915894, "learning_rate": 0.00018920285110605236, "loss": 11.6578, "step": 21420 }, { "epoch": 0.44840073683329146, "grad_norm": 0.3352002501487732, "learning_rate": 0.00018920186010550717, "loss": 11.6699, "step": 21421 }, { "epoch": 0.4484216695972536, "grad_norm": 0.3465154469013214, "learning_rate": 0.0001892008690620809, "loss": 11.6769, "step": 21422 }, { "epoch": 0.44844260236121575, "grad_norm": 0.3097838759422302, "learning_rate": 0.00018919987797577395, "loss": 11.6554, "step": 21423 }, { "epoch": 0.44846353512517795, "grad_norm": 0.32265040278434753, "learning_rate": 0.00018919888684658688, "loss": 11.6819, "step": 21424 }, { "epoch": 0.4484844678891401, "grad_norm": 0.307140588760376, "learning_rate": 0.0001891978956745201, "loss": 11.6789, "step": 21425 }, { "epoch": 0.44850540065310224, "grad_norm": 0.3857576847076416, "learning_rate": 0.00018919690445957406, "loss": 11.6757, "step": 21426 }, { "epoch": 0.4485263334170644, "grad_norm": 0.29853135347366333, "learning_rate": 0.00018919591320174932, "loss": 11.6644, "step": 21427 }, { "epoch": 0.4485472661810265, "grad_norm": 0.2678581476211548, "learning_rate": 0.0001891949219010463, "loss": 11.6789, "step": 21428 }, { "epoch": 0.44856819894498867, "grad_norm": 0.28047314286231995, "learning_rate": 0.0001891939305574655, "loss": 11.6672, "step": 21429 }, { "epoch": 0.4485891317089509, "grad_norm": 0.2938995063304901, "learning_rate": 0.0001891929391710074, "loss": 11.6678, "step": 21430 }, { "epoch": 0.448610064472913, "grad_norm": 0.2705388069152832, "learning_rate": 0.00018919194774167245, "loss": 11.6727, "step": 21431 }, { "epoch": 0.44863099723687516, "grad_norm": 0.34746021032333374, "learning_rate": 0.00018919095626946115, "loss": 11.6846, "step": 21432 }, { "epoch": 0.4486519300008373, "grad_norm": 0.36109039187431335, "learning_rate": 0.00018918996475437397, "loss": 11.6925, "step": 21433 }, { "epoch": 0.44867286276479945, "grad_norm": 0.3598836660385132, "learning_rate": 0.0001891889731964114, "loss": 11.6608, "step": 21434 }, { "epoch": 0.4486937955287616, "grad_norm": 0.5827775597572327, "learning_rate": 0.0001891879815955739, "loss": 11.7022, "step": 21435 }, { "epoch": 0.4487147282927238, "grad_norm": 0.30424216389656067, "learning_rate": 0.00018918698995186193, "loss": 11.6598, "step": 21436 }, { "epoch": 0.44873566105668594, "grad_norm": 0.2830260097980499, "learning_rate": 0.000189185998265276, "loss": 11.6559, "step": 21437 }, { "epoch": 0.4487565938206481, "grad_norm": 0.3458728790283203, "learning_rate": 0.00018918500653581656, "loss": 11.6632, "step": 21438 }, { "epoch": 0.44877752658461023, "grad_norm": 0.2882139980792999, "learning_rate": 0.00018918401476348412, "loss": 11.6724, "step": 21439 }, { "epoch": 0.4487984593485724, "grad_norm": 0.22925418615341187, "learning_rate": 0.00018918302294827912, "loss": 11.659, "step": 21440 }, { "epoch": 0.4488193921125345, "grad_norm": 0.35205087065696716, "learning_rate": 0.00018918203109020206, "loss": 11.6722, "step": 21441 }, { "epoch": 0.44884032487649667, "grad_norm": 0.30370596051216125, "learning_rate": 0.0001891810391892534, "loss": 11.6837, "step": 21442 }, { "epoch": 0.44886125764045887, "grad_norm": 0.26929911971092224, "learning_rate": 0.00018918004724543367, "loss": 11.6726, "step": 21443 }, { "epoch": 0.448882190404421, "grad_norm": 0.27681225538253784, "learning_rate": 0.00018917905525874326, "loss": 11.6558, "step": 21444 }, { "epoch": 0.44890312316838316, "grad_norm": 0.3282683491706848, "learning_rate": 0.0001891780632291827, "loss": 11.6612, "step": 21445 }, { "epoch": 0.4489240559323453, "grad_norm": 0.29029297828674316, "learning_rate": 0.00018917707115675248, "loss": 11.6706, "step": 21446 }, { "epoch": 0.44894498869630745, "grad_norm": 0.3325234353542328, "learning_rate": 0.00018917607904145304, "loss": 11.6678, "step": 21447 }, { "epoch": 0.4489659214602696, "grad_norm": 0.2547006607055664, "learning_rate": 0.00018917508688328486, "loss": 11.6768, "step": 21448 }, { "epoch": 0.4489868542242318, "grad_norm": 0.27481213212013245, "learning_rate": 0.00018917409468224844, "loss": 11.6659, "step": 21449 }, { "epoch": 0.44900778698819394, "grad_norm": 0.30635160207748413, "learning_rate": 0.00018917310243834424, "loss": 11.6748, "step": 21450 }, { "epoch": 0.4490287197521561, "grad_norm": 0.3187256455421448, "learning_rate": 0.00018917211015157274, "loss": 11.6676, "step": 21451 }, { "epoch": 0.4490496525161182, "grad_norm": 0.548668384552002, "learning_rate": 0.00018917111782193441, "loss": 11.669, "step": 21452 }, { "epoch": 0.44907058528008037, "grad_norm": 0.30590859055519104, "learning_rate": 0.00018917012544942977, "loss": 11.6842, "step": 21453 }, { "epoch": 0.4490915180440425, "grad_norm": 0.2899090051651001, "learning_rate": 0.00018916913303405926, "loss": 11.651, "step": 21454 }, { "epoch": 0.4491124508080047, "grad_norm": 0.26340755820274353, "learning_rate": 0.00018916814057582334, "loss": 11.674, "step": 21455 }, { "epoch": 0.44913338357196686, "grad_norm": 0.3457535207271576, "learning_rate": 0.0001891671480747225, "loss": 11.6815, "step": 21456 }, { "epoch": 0.449154316335929, "grad_norm": 0.8786822557449341, "learning_rate": 0.00018916615553075724, "loss": 11.6402, "step": 21457 }, { "epoch": 0.44917524909989115, "grad_norm": 0.25406837463378906, "learning_rate": 0.00018916516294392803, "loss": 11.6593, "step": 21458 }, { "epoch": 0.4491961818638533, "grad_norm": 0.2632687985897064, "learning_rate": 0.0001891641703142353, "loss": 11.672, "step": 21459 }, { "epoch": 0.44921711462781544, "grad_norm": 0.2669701874256134, "learning_rate": 0.0001891631776416796, "loss": 11.6801, "step": 21460 }, { "epoch": 0.4492380473917776, "grad_norm": 0.2592906653881073, "learning_rate": 0.00018916218492626139, "loss": 11.6653, "step": 21461 }, { "epoch": 0.4492589801557398, "grad_norm": 0.3028452396392822, "learning_rate": 0.00018916119216798113, "loss": 11.6579, "step": 21462 }, { "epoch": 0.44927991291970193, "grad_norm": 0.2607097029685974, "learning_rate": 0.00018916019936683927, "loss": 11.6672, "step": 21463 }, { "epoch": 0.4493008456836641, "grad_norm": 0.3521401286125183, "learning_rate": 0.00018915920652283632, "loss": 11.6733, "step": 21464 }, { "epoch": 0.4493217784476262, "grad_norm": 0.3402923345565796, "learning_rate": 0.00018915821363597274, "loss": 11.6779, "step": 21465 }, { "epoch": 0.44934271121158836, "grad_norm": 0.2736791670322418, "learning_rate": 0.00018915722070624903, "loss": 11.6666, "step": 21466 }, { "epoch": 0.4493636439755505, "grad_norm": 0.2682402431964874, "learning_rate": 0.00018915622773366567, "loss": 11.6857, "step": 21467 }, { "epoch": 0.4493845767395127, "grad_norm": 0.287826269865036, "learning_rate": 0.00018915523471822314, "loss": 11.6551, "step": 21468 }, { "epoch": 0.44940550950347485, "grad_norm": 0.33827513456344604, "learning_rate": 0.0001891542416599219, "loss": 11.6685, "step": 21469 }, { "epoch": 0.449426442267437, "grad_norm": 0.34312915802001953, "learning_rate": 0.00018915324855876237, "loss": 11.6927, "step": 21470 }, { "epoch": 0.44944737503139914, "grad_norm": 0.2579359710216522, "learning_rate": 0.00018915225541474515, "loss": 11.6736, "step": 21471 }, { "epoch": 0.4494683077953613, "grad_norm": 0.3543703556060791, "learning_rate": 0.00018915126222787064, "loss": 11.6809, "step": 21472 }, { "epoch": 0.44948924055932343, "grad_norm": 0.2582569718360901, "learning_rate": 0.0001891502689981393, "loss": 11.6621, "step": 21473 }, { "epoch": 0.44951017332328563, "grad_norm": 0.2799060642719269, "learning_rate": 0.00018914927572555169, "loss": 11.6772, "step": 21474 }, { "epoch": 0.4495311060872478, "grad_norm": 0.31951844692230225, "learning_rate": 0.0001891482824101082, "loss": 11.6742, "step": 21475 }, { "epoch": 0.4495520388512099, "grad_norm": 0.27526718378067017, "learning_rate": 0.00018914728905180934, "loss": 11.6902, "step": 21476 }, { "epoch": 0.44957297161517207, "grad_norm": 0.2638216018676758, "learning_rate": 0.0001891462956506556, "loss": 11.6752, "step": 21477 }, { "epoch": 0.4495939043791342, "grad_norm": 0.29892563819885254, "learning_rate": 0.00018914530220664748, "loss": 11.6707, "step": 21478 }, { "epoch": 0.44961483714309636, "grad_norm": 0.33638471364974976, "learning_rate": 0.0001891443087197854, "loss": 11.6665, "step": 21479 }, { "epoch": 0.4496357699070585, "grad_norm": 0.3058552145957947, "learning_rate": 0.00018914331519006986, "loss": 11.6813, "step": 21480 }, { "epoch": 0.4496567026710207, "grad_norm": 0.3032788336277008, "learning_rate": 0.00018914232161750135, "loss": 11.6677, "step": 21481 }, { "epoch": 0.44967763543498285, "grad_norm": 0.2766560912132263, "learning_rate": 0.00018914132800208031, "loss": 11.6837, "step": 21482 }, { "epoch": 0.449698568198945, "grad_norm": 0.3202318847179413, "learning_rate": 0.0001891403343438073, "loss": 11.6659, "step": 21483 }, { "epoch": 0.44971950096290714, "grad_norm": 0.3766794204711914, "learning_rate": 0.00018913934064268271, "loss": 11.6461, "step": 21484 }, { "epoch": 0.4497404337268693, "grad_norm": 0.26716870069503784, "learning_rate": 0.00018913834689870707, "loss": 11.6727, "step": 21485 }, { "epoch": 0.4497613664908314, "grad_norm": 0.24529096484184265, "learning_rate": 0.00018913735311188083, "loss": 11.6578, "step": 21486 }, { "epoch": 0.4497822992547936, "grad_norm": 0.32596591114997864, "learning_rate": 0.00018913635928220445, "loss": 11.6803, "step": 21487 }, { "epoch": 0.44980323201875577, "grad_norm": 0.2777644395828247, "learning_rate": 0.00018913536540967848, "loss": 11.6657, "step": 21488 }, { "epoch": 0.4498241647827179, "grad_norm": 0.24406956136226654, "learning_rate": 0.00018913437149430332, "loss": 11.6614, "step": 21489 }, { "epoch": 0.44984509754668006, "grad_norm": 0.3032730221748352, "learning_rate": 0.0001891333775360795, "loss": 11.6459, "step": 21490 }, { "epoch": 0.4498660303106422, "grad_norm": 0.2577829957008362, "learning_rate": 0.0001891323835350075, "loss": 11.6758, "step": 21491 }, { "epoch": 0.44988696307460435, "grad_norm": 0.33855605125427246, "learning_rate": 0.00018913138949108778, "loss": 11.672, "step": 21492 }, { "epoch": 0.44990789583856655, "grad_norm": 0.3984677493572235, "learning_rate": 0.00018913039540432078, "loss": 11.6816, "step": 21493 }, { "epoch": 0.4499288286025287, "grad_norm": 0.3432130217552185, "learning_rate": 0.00018912940127470702, "loss": 11.672, "step": 21494 }, { "epoch": 0.44994976136649084, "grad_norm": 0.29419082403182983, "learning_rate": 0.00018912840710224698, "loss": 11.6725, "step": 21495 }, { "epoch": 0.449970694130453, "grad_norm": 0.28817611932754517, "learning_rate": 0.00018912741288694112, "loss": 11.6766, "step": 21496 }, { "epoch": 0.44999162689441513, "grad_norm": 0.31980809569358826, "learning_rate": 0.0001891264186287899, "loss": 11.6746, "step": 21497 }, { "epoch": 0.4500125596583773, "grad_norm": 0.2481209933757782, "learning_rate": 0.00018912542432779388, "loss": 11.6621, "step": 21498 }, { "epoch": 0.4500334924223394, "grad_norm": 0.3467421531677246, "learning_rate": 0.00018912442998395346, "loss": 11.6777, "step": 21499 }, { "epoch": 0.4500544251863016, "grad_norm": 0.2606998085975647, "learning_rate": 0.00018912343559726914, "loss": 11.6689, "step": 21500 }, { "epoch": 0.45007535795026377, "grad_norm": 0.29101189970970154, "learning_rate": 0.00018912244116774137, "loss": 11.6673, "step": 21501 }, { "epoch": 0.4500962907142259, "grad_norm": 0.30281689763069153, "learning_rate": 0.0001891214466953707, "loss": 11.6576, "step": 21502 }, { "epoch": 0.45011722347818806, "grad_norm": 0.23179838061332703, "learning_rate": 0.00018912045218015755, "loss": 11.677, "step": 21503 }, { "epoch": 0.4501381562421502, "grad_norm": 0.27340659499168396, "learning_rate": 0.0001891194576221024, "loss": 11.666, "step": 21504 }, { "epoch": 0.45015908900611235, "grad_norm": 0.24922513961791992, "learning_rate": 0.00018911846302120574, "loss": 11.673, "step": 21505 }, { "epoch": 0.45018002177007455, "grad_norm": 0.23316024243831635, "learning_rate": 0.00018911746837746806, "loss": 11.6708, "step": 21506 }, { "epoch": 0.4502009545340367, "grad_norm": 0.412290096282959, "learning_rate": 0.00018911647369088984, "loss": 11.6735, "step": 21507 }, { "epoch": 0.45022188729799884, "grad_norm": 0.3074858486652374, "learning_rate": 0.00018911547896147154, "loss": 11.6705, "step": 21508 }, { "epoch": 0.450242820061961, "grad_norm": 0.34780263900756836, "learning_rate": 0.00018911448418921362, "loss": 11.6753, "step": 21509 }, { "epoch": 0.4502637528259231, "grad_norm": 0.31086280941963196, "learning_rate": 0.0001891134893741166, "loss": 11.6778, "step": 21510 }, { "epoch": 0.45028468558988527, "grad_norm": 0.3188018500804901, "learning_rate": 0.00018911249451618095, "loss": 11.6783, "step": 21511 }, { "epoch": 0.4503056183538474, "grad_norm": 0.36370083689689636, "learning_rate": 0.0001891114996154071, "loss": 11.6817, "step": 21512 }, { "epoch": 0.4503265511178096, "grad_norm": 0.27791696786880493, "learning_rate": 0.00018911050467179558, "loss": 11.6918, "step": 21513 }, { "epoch": 0.45034748388177176, "grad_norm": 0.31784293055534363, "learning_rate": 0.00018910950968534687, "loss": 11.6774, "step": 21514 }, { "epoch": 0.4503684166457339, "grad_norm": 0.2962940037250519, "learning_rate": 0.00018910851465606143, "loss": 11.684, "step": 21515 }, { "epoch": 0.45038934940969605, "grad_norm": 0.23608319461345673, "learning_rate": 0.00018910751958393975, "loss": 11.6686, "step": 21516 }, { "epoch": 0.4504102821736582, "grad_norm": 0.28983449935913086, "learning_rate": 0.0001891065244689823, "loss": 11.6828, "step": 21517 }, { "epoch": 0.45043121493762034, "grad_norm": 0.3905700445175171, "learning_rate": 0.00018910552931118954, "loss": 11.6814, "step": 21518 }, { "epoch": 0.45045214770158254, "grad_norm": 0.24209244549274445, "learning_rate": 0.00018910453411056196, "loss": 11.6718, "step": 21519 }, { "epoch": 0.4504730804655447, "grad_norm": 0.2939150333404541, "learning_rate": 0.00018910353886710007, "loss": 11.6655, "step": 21520 }, { "epoch": 0.45049401322950683, "grad_norm": 0.28651314973831177, "learning_rate": 0.0001891025435808043, "loss": 11.6639, "step": 21521 }, { "epoch": 0.450514945993469, "grad_norm": 0.2854863107204437, "learning_rate": 0.00018910154825167516, "loss": 11.6672, "step": 21522 }, { "epoch": 0.4505358787574311, "grad_norm": 0.32722902297973633, "learning_rate": 0.00018910055287971314, "loss": 11.6836, "step": 21523 }, { "epoch": 0.45055681152139326, "grad_norm": 0.335568368434906, "learning_rate": 0.00018909955746491868, "loss": 11.6701, "step": 21524 }, { "epoch": 0.45057774428535546, "grad_norm": 0.28274255990982056, "learning_rate": 0.00018909856200729225, "loss": 11.6975, "step": 21525 }, { "epoch": 0.4505986770493176, "grad_norm": 0.3622835874557495, "learning_rate": 0.0001890975665068344, "loss": 11.6861, "step": 21526 }, { "epoch": 0.45061960981327975, "grad_norm": 0.23559847474098206, "learning_rate": 0.00018909657096354555, "loss": 11.6658, "step": 21527 }, { "epoch": 0.4506405425772419, "grad_norm": 0.27738186717033386, "learning_rate": 0.0001890955753774262, "loss": 11.6702, "step": 21528 }, { "epoch": 0.45066147534120404, "grad_norm": 0.30882778763771057, "learning_rate": 0.0001890945797484768, "loss": 11.6881, "step": 21529 }, { "epoch": 0.4506824081051662, "grad_norm": 0.35032111406326294, "learning_rate": 0.00018909358407669786, "loss": 11.6735, "step": 21530 }, { "epoch": 0.45070334086912833, "grad_norm": 0.30854833126068115, "learning_rate": 0.00018909258836208984, "loss": 11.6716, "step": 21531 }, { "epoch": 0.45072427363309053, "grad_norm": 0.36339250206947327, "learning_rate": 0.00018909159260465327, "loss": 11.6542, "step": 21532 }, { "epoch": 0.4507452063970527, "grad_norm": 0.27374714612960815, "learning_rate": 0.00018909059680438854, "loss": 11.6644, "step": 21533 }, { "epoch": 0.4507661391610148, "grad_norm": 0.2655998468399048, "learning_rate": 0.00018908960096129617, "loss": 11.6878, "step": 21534 }, { "epoch": 0.45078707192497697, "grad_norm": 0.2799598276615143, "learning_rate": 0.00018908860507537667, "loss": 11.6597, "step": 21535 }, { "epoch": 0.4508080046889391, "grad_norm": 0.31029531359672546, "learning_rate": 0.0001890876091466305, "loss": 11.6801, "step": 21536 }, { "epoch": 0.45082893745290126, "grad_norm": 0.28463196754455566, "learning_rate": 0.0001890866131750581, "loss": 11.6677, "step": 21537 }, { "epoch": 0.45084987021686346, "grad_norm": 0.2773348391056061, "learning_rate": 0.00018908561716066, "loss": 11.6732, "step": 21538 }, { "epoch": 0.4508708029808256, "grad_norm": 0.2762213945388794, "learning_rate": 0.00018908462110343667, "loss": 11.6671, "step": 21539 }, { "epoch": 0.45089173574478775, "grad_norm": 0.2529512047767639, "learning_rate": 0.00018908362500338855, "loss": 11.669, "step": 21540 }, { "epoch": 0.4509126685087499, "grad_norm": 0.24463284015655518, "learning_rate": 0.00018908262886051615, "loss": 11.6716, "step": 21541 }, { "epoch": 0.45093360127271204, "grad_norm": 0.28893816471099854, "learning_rate": 0.00018908163267481996, "loss": 11.6606, "step": 21542 }, { "epoch": 0.4509545340366742, "grad_norm": 0.4193623661994934, "learning_rate": 0.00018908063644630042, "loss": 11.6657, "step": 21543 }, { "epoch": 0.4509754668006364, "grad_norm": 0.371541291475296, "learning_rate": 0.00018907964017495808, "loss": 11.686, "step": 21544 }, { "epoch": 0.4509963995645985, "grad_norm": 0.35088714957237244, "learning_rate": 0.00018907864386079332, "loss": 11.6863, "step": 21545 }, { "epoch": 0.45101733232856067, "grad_norm": 0.31792178750038147, "learning_rate": 0.0001890776475038067, "loss": 11.674, "step": 21546 }, { "epoch": 0.4510382650925228, "grad_norm": 0.25070494413375854, "learning_rate": 0.00018907665110399867, "loss": 11.6573, "step": 21547 }, { "epoch": 0.45105919785648496, "grad_norm": 0.34857437014579773, "learning_rate": 0.00018907565466136969, "loss": 11.6812, "step": 21548 }, { "epoch": 0.4510801306204471, "grad_norm": 0.3204667270183563, "learning_rate": 0.00018907465817592027, "loss": 11.6637, "step": 21549 }, { "epoch": 0.45110106338440925, "grad_norm": 0.3596816062927246, "learning_rate": 0.0001890736616476509, "loss": 11.6705, "step": 21550 }, { "epoch": 0.45112199614837145, "grad_norm": 0.30876559019088745, "learning_rate": 0.00018907266507656197, "loss": 11.6827, "step": 21551 }, { "epoch": 0.4511429289123336, "grad_norm": 0.2532414197921753, "learning_rate": 0.00018907166846265406, "loss": 11.6736, "step": 21552 }, { "epoch": 0.45116386167629574, "grad_norm": 0.28820323944091797, "learning_rate": 0.00018907067180592762, "loss": 11.6756, "step": 21553 }, { "epoch": 0.4511847944402579, "grad_norm": 0.2706407308578491, "learning_rate": 0.00018906967510638314, "loss": 11.6479, "step": 21554 }, { "epoch": 0.45120572720422003, "grad_norm": 0.26799243688583374, "learning_rate": 0.00018906867836402107, "loss": 11.6894, "step": 21555 }, { "epoch": 0.4512266599681822, "grad_norm": 0.24067465960979462, "learning_rate": 0.0001890676815788419, "loss": 11.6898, "step": 21556 }, { "epoch": 0.4512475927321444, "grad_norm": 0.3243268132209778, "learning_rate": 0.0001890666847508461, "loss": 11.6705, "step": 21557 }, { "epoch": 0.4512685254961065, "grad_norm": 0.24494631588459015, "learning_rate": 0.00018906568788003416, "loss": 11.6758, "step": 21558 }, { "epoch": 0.45128945826006867, "grad_norm": 0.33405473828315735, "learning_rate": 0.0001890646909664066, "loss": 11.6805, "step": 21559 }, { "epoch": 0.4513103910240308, "grad_norm": 0.296042799949646, "learning_rate": 0.0001890636940099638, "loss": 11.6555, "step": 21560 }, { "epoch": 0.45133132378799296, "grad_norm": 0.2627186179161072, "learning_rate": 0.0001890626970107063, "loss": 11.6787, "step": 21561 }, { "epoch": 0.4513522565519551, "grad_norm": 0.32121485471725464, "learning_rate": 0.00018906169996863462, "loss": 11.6707, "step": 21562 }, { "epoch": 0.4513731893159173, "grad_norm": 0.29455673694610596, "learning_rate": 0.00018906070288374918, "loss": 11.6702, "step": 21563 }, { "epoch": 0.45139412207987945, "grad_norm": 0.24398325383663177, "learning_rate": 0.00018905970575605046, "loss": 11.6811, "step": 21564 }, { "epoch": 0.4514150548438416, "grad_norm": 0.32068827748298645, "learning_rate": 0.00018905870858553896, "loss": 11.6852, "step": 21565 }, { "epoch": 0.45143598760780373, "grad_norm": 0.3220686912536621, "learning_rate": 0.00018905771137221514, "loss": 11.6749, "step": 21566 }, { "epoch": 0.4514569203717659, "grad_norm": 0.38838890194892883, "learning_rate": 0.00018905671411607953, "loss": 11.6749, "step": 21567 }, { "epoch": 0.451477853135728, "grad_norm": 0.33682525157928467, "learning_rate": 0.00018905571681713255, "loss": 11.68, "step": 21568 }, { "epoch": 0.45149878589969017, "grad_norm": 0.2734024226665497, "learning_rate": 0.0001890547194753747, "loss": 11.6828, "step": 21569 }, { "epoch": 0.45151971866365237, "grad_norm": 0.22783111035823822, "learning_rate": 0.00018905372209080648, "loss": 11.6698, "step": 21570 }, { "epoch": 0.4515406514276145, "grad_norm": 0.26475048065185547, "learning_rate": 0.00018905272466342835, "loss": 11.6699, "step": 21571 }, { "epoch": 0.45156158419157666, "grad_norm": 0.3356756567955017, "learning_rate": 0.00018905172719324077, "loss": 11.6821, "step": 21572 }, { "epoch": 0.4515825169555388, "grad_norm": 0.3111588656902313, "learning_rate": 0.00018905072968024425, "loss": 11.6595, "step": 21573 }, { "epoch": 0.45160344971950095, "grad_norm": 0.3313611149787903, "learning_rate": 0.00018904973212443925, "loss": 11.6832, "step": 21574 }, { "epoch": 0.4516243824834631, "grad_norm": 0.30357858538627625, "learning_rate": 0.00018904873452582627, "loss": 11.6777, "step": 21575 }, { "epoch": 0.4516453152474253, "grad_norm": 0.27175644040107727, "learning_rate": 0.00018904773688440578, "loss": 11.6787, "step": 21576 }, { "epoch": 0.45166624801138744, "grad_norm": 0.268118292093277, "learning_rate": 0.00018904673920017823, "loss": 11.6618, "step": 21577 }, { "epoch": 0.4516871807753496, "grad_norm": 0.26890328526496887, "learning_rate": 0.00018904574147314419, "loss": 11.6714, "step": 21578 }, { "epoch": 0.45170811353931173, "grad_norm": 0.2962628901004791, "learning_rate": 0.00018904474370330402, "loss": 11.6884, "step": 21579 }, { "epoch": 0.4517290463032739, "grad_norm": 0.2619118392467499, "learning_rate": 0.0001890437458906583, "loss": 11.6851, "step": 21580 }, { "epoch": 0.451749979067236, "grad_norm": 0.32136422395706177, "learning_rate": 0.00018904274803520742, "loss": 11.6756, "step": 21581 }, { "epoch": 0.4517709118311982, "grad_norm": 0.2719637155532837, "learning_rate": 0.00018904175013695194, "loss": 11.6805, "step": 21582 }, { "epoch": 0.45179184459516036, "grad_norm": 0.36282265186309814, "learning_rate": 0.0001890407521958923, "loss": 11.6803, "step": 21583 }, { "epoch": 0.4518127773591225, "grad_norm": 0.3312236964702606, "learning_rate": 0.00018903975421202896, "loss": 11.685, "step": 21584 }, { "epoch": 0.45183371012308465, "grad_norm": 0.26303645968437195, "learning_rate": 0.00018903875618536247, "loss": 11.6603, "step": 21585 }, { "epoch": 0.4518546428870468, "grad_norm": 0.23820115625858307, "learning_rate": 0.00018903775811589325, "loss": 11.663, "step": 21586 }, { "epoch": 0.45187557565100894, "grad_norm": 0.22613871097564697, "learning_rate": 0.0001890367600036218, "loss": 11.6897, "step": 21587 }, { "epoch": 0.4518965084149711, "grad_norm": 0.2888454496860504, "learning_rate": 0.0001890357618485486, "loss": 11.6767, "step": 21588 }, { "epoch": 0.4519174411789333, "grad_norm": 0.29052284359931946, "learning_rate": 0.0001890347636506741, "loss": 11.6715, "step": 21589 }, { "epoch": 0.45193837394289543, "grad_norm": 0.2447662353515625, "learning_rate": 0.0001890337654099988, "loss": 11.6925, "step": 21590 }, { "epoch": 0.4519593067068576, "grad_norm": 0.2650241553783417, "learning_rate": 0.0001890327671265232, "loss": 11.6871, "step": 21591 }, { "epoch": 0.4519802394708197, "grad_norm": 0.28357452154159546, "learning_rate": 0.0001890317688002478, "loss": 11.671, "step": 21592 }, { "epoch": 0.45200117223478187, "grad_norm": 0.33156946301460266, "learning_rate": 0.00018903077043117298, "loss": 11.6819, "step": 21593 }, { "epoch": 0.452022104998744, "grad_norm": 0.2874455749988556, "learning_rate": 0.00018902977201929933, "loss": 11.6732, "step": 21594 }, { "epoch": 0.4520430377627062, "grad_norm": 0.28653737902641296, "learning_rate": 0.00018902877356462725, "loss": 11.6667, "step": 21595 }, { "epoch": 0.45206397052666836, "grad_norm": 0.2835582494735718, "learning_rate": 0.00018902777506715727, "loss": 11.6556, "step": 21596 }, { "epoch": 0.4520849032906305, "grad_norm": 0.4034874737262726, "learning_rate": 0.00018902677652688984, "loss": 11.6777, "step": 21597 }, { "epoch": 0.45210583605459265, "grad_norm": 0.2730374038219452, "learning_rate": 0.0001890257779438255, "loss": 11.6637, "step": 21598 }, { "epoch": 0.4521267688185548, "grad_norm": 0.38618242740631104, "learning_rate": 0.00018902477931796464, "loss": 11.6812, "step": 21599 }, { "epoch": 0.45214770158251694, "grad_norm": 0.33619213104248047, "learning_rate": 0.0001890237806493078, "loss": 11.6713, "step": 21600 }, { "epoch": 0.4521686343464791, "grad_norm": 0.2960231602191925, "learning_rate": 0.00018902278193785543, "loss": 11.6531, "step": 21601 }, { "epoch": 0.4521895671104413, "grad_norm": 0.3061409294605255, "learning_rate": 0.00018902178318360805, "loss": 11.6696, "step": 21602 }, { "epoch": 0.4522104998744034, "grad_norm": 0.27911093831062317, "learning_rate": 0.00018902078438656612, "loss": 11.6654, "step": 21603 }, { "epoch": 0.45223143263836557, "grad_norm": 0.2792257070541382, "learning_rate": 0.00018901978554673008, "loss": 11.6786, "step": 21604 }, { "epoch": 0.4522523654023277, "grad_norm": 0.32826879620552063, "learning_rate": 0.00018901878666410048, "loss": 11.6751, "step": 21605 }, { "epoch": 0.45227329816628986, "grad_norm": 0.27152273058891296, "learning_rate": 0.00018901778773867776, "loss": 11.6572, "step": 21606 }, { "epoch": 0.452294230930252, "grad_norm": 0.30367836356163025, "learning_rate": 0.00018901678877046238, "loss": 11.6796, "step": 21607 }, { "epoch": 0.4523151636942142, "grad_norm": 0.34368273615837097, "learning_rate": 0.00018901578975945486, "loss": 11.6831, "step": 21608 }, { "epoch": 0.45233609645817635, "grad_norm": 0.2624940574169159, "learning_rate": 0.00018901479070565567, "loss": 11.6835, "step": 21609 }, { "epoch": 0.4523570292221385, "grad_norm": 0.29234176874160767, "learning_rate": 0.0001890137916090653, "loss": 11.6725, "step": 21610 }, { "epoch": 0.45237796198610064, "grad_norm": 0.2687334418296814, "learning_rate": 0.0001890127924696842, "loss": 11.6843, "step": 21611 }, { "epoch": 0.4523988947500628, "grad_norm": 0.27307623624801636, "learning_rate": 0.00018901179328751285, "loss": 11.6601, "step": 21612 }, { "epoch": 0.45241982751402493, "grad_norm": 0.30219775438308716, "learning_rate": 0.00018901079406255175, "loss": 11.6672, "step": 21613 }, { "epoch": 0.45244076027798713, "grad_norm": 0.27516457438468933, "learning_rate": 0.0001890097947948014, "loss": 11.6617, "step": 21614 }, { "epoch": 0.4524616930419493, "grad_norm": 0.2940851151943207, "learning_rate": 0.00018900879548426226, "loss": 11.674, "step": 21615 }, { "epoch": 0.4524826258059114, "grad_norm": 0.2543698251247406, "learning_rate": 0.0001890077961309348, "loss": 11.6799, "step": 21616 }, { "epoch": 0.45250355856987357, "grad_norm": 0.30087071657180786, "learning_rate": 0.0001890067967348195, "loss": 11.678, "step": 21617 }, { "epoch": 0.4525244913338357, "grad_norm": 0.25169113278388977, "learning_rate": 0.00018900579729591685, "loss": 11.662, "step": 21618 }, { "epoch": 0.45254542409779785, "grad_norm": 0.24844782054424286, "learning_rate": 0.00018900479781422733, "loss": 11.6667, "step": 21619 }, { "epoch": 0.45256635686176, "grad_norm": 0.2798621356487274, "learning_rate": 0.00018900379828975142, "loss": 11.6833, "step": 21620 }, { "epoch": 0.4525872896257222, "grad_norm": 0.2905048429965973, "learning_rate": 0.00018900279872248961, "loss": 11.6583, "step": 21621 }, { "epoch": 0.45260822238968434, "grad_norm": 0.29222914576530457, "learning_rate": 0.00018900179911244237, "loss": 11.6671, "step": 21622 }, { "epoch": 0.4526291551536465, "grad_norm": 0.36152026057243347, "learning_rate": 0.00018900079945961018, "loss": 11.6767, "step": 21623 }, { "epoch": 0.45265008791760863, "grad_norm": 0.3004939556121826, "learning_rate": 0.0001889997997639935, "loss": 11.6898, "step": 21624 }, { "epoch": 0.4526710206815708, "grad_norm": 0.23639480769634247, "learning_rate": 0.00018899880002559287, "loss": 11.6617, "step": 21625 }, { "epoch": 0.4526919534455329, "grad_norm": 0.28254541754722595, "learning_rate": 0.0001889978002444087, "loss": 11.6762, "step": 21626 }, { "epoch": 0.4527128862094951, "grad_norm": 0.2665945291519165, "learning_rate": 0.00018899680042044152, "loss": 11.6699, "step": 21627 }, { "epoch": 0.45273381897345727, "grad_norm": 0.34369784593582153, "learning_rate": 0.00018899580055369182, "loss": 11.6876, "step": 21628 }, { "epoch": 0.4527547517374194, "grad_norm": 0.32127508521080017, "learning_rate": 0.00018899480064416002, "loss": 11.6857, "step": 21629 }, { "epoch": 0.45277568450138156, "grad_norm": 0.29639676213264465, "learning_rate": 0.00018899380069184666, "loss": 11.6701, "step": 21630 }, { "epoch": 0.4527966172653437, "grad_norm": 0.2807198762893677, "learning_rate": 0.00018899280069675217, "loss": 11.6665, "step": 21631 }, { "epoch": 0.45281755002930585, "grad_norm": 0.2799309194087982, "learning_rate": 0.00018899180065887707, "loss": 11.6661, "step": 21632 }, { "epoch": 0.45283848279326805, "grad_norm": 0.30263736844062805, "learning_rate": 0.00018899080057822182, "loss": 11.6604, "step": 21633 }, { "epoch": 0.4528594155572302, "grad_norm": 0.2660185396671295, "learning_rate": 0.0001889898004547869, "loss": 11.6741, "step": 21634 }, { "epoch": 0.45288034832119234, "grad_norm": 0.42409273982048035, "learning_rate": 0.0001889888002885728, "loss": 11.6683, "step": 21635 }, { "epoch": 0.4529012810851545, "grad_norm": 0.3465460240840912, "learning_rate": 0.00018898780007958003, "loss": 11.6918, "step": 21636 }, { "epoch": 0.45292221384911663, "grad_norm": 0.2360767424106598, "learning_rate": 0.00018898679982780906, "loss": 11.6653, "step": 21637 }, { "epoch": 0.4529431466130788, "grad_norm": 0.30212637782096863, "learning_rate": 0.0001889857995332603, "loss": 11.676, "step": 21638 }, { "epoch": 0.4529640793770409, "grad_norm": 0.27466607093811035, "learning_rate": 0.0001889847991959343, "loss": 11.6755, "step": 21639 }, { "epoch": 0.4529850121410031, "grad_norm": 0.26172375679016113, "learning_rate": 0.00018898379881583153, "loss": 11.6824, "step": 21640 }, { "epoch": 0.45300594490496526, "grad_norm": 0.22936978936195374, "learning_rate": 0.00018898279839295245, "loss": 11.6752, "step": 21641 }, { "epoch": 0.4530268776689274, "grad_norm": 0.3765406310558319, "learning_rate": 0.00018898179792729757, "loss": 11.6737, "step": 21642 }, { "epoch": 0.45304781043288955, "grad_norm": 0.3300883173942566, "learning_rate": 0.00018898079741886737, "loss": 11.6736, "step": 21643 }, { "epoch": 0.4530687431968517, "grad_norm": 0.24779215455055237, "learning_rate": 0.00018897979686766228, "loss": 11.6743, "step": 21644 }, { "epoch": 0.45308967596081384, "grad_norm": 0.41274169087409973, "learning_rate": 0.00018897879627368283, "loss": 11.6632, "step": 21645 }, { "epoch": 0.45311060872477604, "grad_norm": 0.293459951877594, "learning_rate": 0.0001889777956369295, "loss": 11.6881, "step": 21646 }, { "epoch": 0.4531315414887382, "grad_norm": 0.2553148865699768, "learning_rate": 0.00018897679495740276, "loss": 11.6751, "step": 21647 }, { "epoch": 0.45315247425270033, "grad_norm": 0.3057815432548523, "learning_rate": 0.00018897579423510309, "loss": 11.6692, "step": 21648 }, { "epoch": 0.4531734070166625, "grad_norm": 0.2988266944885254, "learning_rate": 0.00018897479347003098, "loss": 11.6713, "step": 21649 }, { "epoch": 0.4531943397806246, "grad_norm": 0.2549281120300293, "learning_rate": 0.0001889737926621869, "loss": 11.6754, "step": 21650 }, { "epoch": 0.45321527254458677, "grad_norm": 0.337426096200943, "learning_rate": 0.0001889727918115713, "loss": 11.6785, "step": 21651 }, { "epoch": 0.45323620530854897, "grad_norm": 0.3076252043247223, "learning_rate": 0.00018897179091818475, "loss": 11.6644, "step": 21652 }, { "epoch": 0.4532571380725111, "grad_norm": 0.3436274230480194, "learning_rate": 0.00018897078998202766, "loss": 11.669, "step": 21653 }, { "epoch": 0.45327807083647326, "grad_norm": 0.28828397393226624, "learning_rate": 0.00018896978900310052, "loss": 11.6664, "step": 21654 }, { "epoch": 0.4532990036004354, "grad_norm": 0.38548922538757324, "learning_rate": 0.00018896878798140384, "loss": 11.6806, "step": 21655 }, { "epoch": 0.45331993636439755, "grad_norm": 0.3228669762611389, "learning_rate": 0.00018896778691693803, "loss": 11.6752, "step": 21656 }, { "epoch": 0.4533408691283597, "grad_norm": 0.2879173159599304, "learning_rate": 0.00018896678580970365, "loss": 11.6505, "step": 21657 }, { "epoch": 0.45336180189232184, "grad_norm": 0.27703621983528137, "learning_rate": 0.00018896578465970118, "loss": 11.6639, "step": 21658 }, { "epoch": 0.45338273465628404, "grad_norm": 0.44082218408584595, "learning_rate": 0.00018896478346693105, "loss": 11.6735, "step": 21659 }, { "epoch": 0.4534036674202462, "grad_norm": 0.3469211757183075, "learning_rate": 0.00018896378223139375, "loss": 11.6692, "step": 21660 }, { "epoch": 0.4534246001842083, "grad_norm": 0.28410249948501587, "learning_rate": 0.0001889627809530898, "loss": 11.6722, "step": 21661 }, { "epoch": 0.45344553294817047, "grad_norm": 0.29259195923805237, "learning_rate": 0.00018896177963201964, "loss": 11.6788, "step": 21662 }, { "epoch": 0.4534664657121326, "grad_norm": 0.36147361993789673, "learning_rate": 0.0001889607782681838, "loss": 11.6912, "step": 21663 }, { "epoch": 0.45348739847609476, "grad_norm": 0.2777954638004303, "learning_rate": 0.00018895977686158273, "loss": 11.6579, "step": 21664 }, { "epoch": 0.45350833124005696, "grad_norm": 0.3495441675186157, "learning_rate": 0.0001889587754122169, "loss": 11.6549, "step": 21665 }, { "epoch": 0.4535292640040191, "grad_norm": 0.3050740361213684, "learning_rate": 0.00018895777392008678, "loss": 11.6836, "step": 21666 }, { "epoch": 0.45355019676798125, "grad_norm": 0.279729425907135, "learning_rate": 0.00018895677238519288, "loss": 11.6882, "step": 21667 }, { "epoch": 0.4535711295319434, "grad_norm": 0.3009134829044342, "learning_rate": 0.0001889557708075357, "loss": 11.677, "step": 21668 }, { "epoch": 0.45359206229590554, "grad_norm": 0.2775167226791382, "learning_rate": 0.0001889547691871157, "loss": 11.6852, "step": 21669 }, { "epoch": 0.4536129950598677, "grad_norm": 0.28854861855506897, "learning_rate": 0.00018895376752393336, "loss": 11.6664, "step": 21670 }, { "epoch": 0.4536339278238299, "grad_norm": 0.31815508008003235, "learning_rate": 0.00018895276581798912, "loss": 11.6644, "step": 21671 }, { "epoch": 0.45365486058779203, "grad_norm": 0.2782124876976013, "learning_rate": 0.00018895176406928354, "loss": 11.6748, "step": 21672 }, { "epoch": 0.4536757933517542, "grad_norm": 0.24713268876075745, "learning_rate": 0.00018895076227781708, "loss": 11.6715, "step": 21673 }, { "epoch": 0.4536967261157163, "grad_norm": 0.3321259915828705, "learning_rate": 0.00018894976044359017, "loss": 11.674, "step": 21674 }, { "epoch": 0.45371765887967846, "grad_norm": 0.27327555418014526, "learning_rate": 0.00018894875856660334, "loss": 11.664, "step": 21675 }, { "epoch": 0.4537385916436406, "grad_norm": 0.3340146243572235, "learning_rate": 0.00018894775664685706, "loss": 11.6611, "step": 21676 }, { "epoch": 0.45375952440760275, "grad_norm": 0.31698888540267944, "learning_rate": 0.00018894675468435183, "loss": 11.6747, "step": 21677 }, { "epoch": 0.45378045717156495, "grad_norm": 0.29194843769073486, "learning_rate": 0.0001889457526790881, "loss": 11.6754, "step": 21678 }, { "epoch": 0.4538013899355271, "grad_norm": 0.24904699623584747, "learning_rate": 0.00018894475063106634, "loss": 11.6763, "step": 21679 }, { "epoch": 0.45382232269948924, "grad_norm": 0.24360546469688416, "learning_rate": 0.00018894374854028708, "loss": 11.6883, "step": 21680 }, { "epoch": 0.4538432554634514, "grad_norm": 0.2891647517681122, "learning_rate": 0.00018894274640675075, "loss": 11.6921, "step": 21681 }, { "epoch": 0.45386418822741353, "grad_norm": 0.364374577999115, "learning_rate": 0.00018894174423045792, "loss": 11.6806, "step": 21682 }, { "epoch": 0.4538851209913757, "grad_norm": 0.23403096199035645, "learning_rate": 0.00018894074201140896, "loss": 11.6531, "step": 21683 }, { "epoch": 0.4539060537553379, "grad_norm": 0.26797911524772644, "learning_rate": 0.0001889397397496044, "loss": 11.6708, "step": 21684 }, { "epoch": 0.4539269865193, "grad_norm": 0.307356059551239, "learning_rate": 0.00018893873744504474, "loss": 11.6857, "step": 21685 }, { "epoch": 0.45394791928326217, "grad_norm": 0.36673596501350403, "learning_rate": 0.00018893773509773046, "loss": 11.6856, "step": 21686 }, { "epoch": 0.4539688520472243, "grad_norm": 0.32653388381004333, "learning_rate": 0.00018893673270766202, "loss": 11.6794, "step": 21687 }, { "epoch": 0.45398978481118646, "grad_norm": 0.33647894859313965, "learning_rate": 0.00018893573027483991, "loss": 11.687, "step": 21688 }, { "epoch": 0.4540107175751486, "grad_norm": 0.29841119050979614, "learning_rate": 0.0001889347277992646, "loss": 11.6544, "step": 21689 }, { "epoch": 0.4540316503391108, "grad_norm": 0.3321656286716461, "learning_rate": 0.0001889337252809366, "loss": 11.641, "step": 21690 }, { "epoch": 0.45405258310307295, "grad_norm": 0.3054236173629761, "learning_rate": 0.00018893272271985635, "loss": 11.6666, "step": 21691 }, { "epoch": 0.4540735158670351, "grad_norm": 0.31124576926231384, "learning_rate": 0.00018893172011602438, "loss": 11.6596, "step": 21692 }, { "epoch": 0.45409444863099724, "grad_norm": 0.26458194851875305, "learning_rate": 0.00018893071746944117, "loss": 11.6757, "step": 21693 }, { "epoch": 0.4541153813949594, "grad_norm": 0.27948370575904846, "learning_rate": 0.00018892971478010715, "loss": 11.6794, "step": 21694 }, { "epoch": 0.45413631415892153, "grad_norm": 0.3006136417388916, "learning_rate": 0.00018892871204802283, "loss": 11.6714, "step": 21695 }, { "epoch": 0.4541572469228837, "grad_norm": 0.27504029870033264, "learning_rate": 0.0001889277092731887, "loss": 11.6564, "step": 21696 }, { "epoch": 0.4541781796868459, "grad_norm": 0.2639707922935486, "learning_rate": 0.00018892670645560524, "loss": 11.681, "step": 21697 }, { "epoch": 0.454199112450808, "grad_norm": 0.3570880889892578, "learning_rate": 0.00018892570359527294, "loss": 11.6813, "step": 21698 }, { "epoch": 0.45422004521477016, "grad_norm": 0.2676793336868286, "learning_rate": 0.0001889247006921923, "loss": 11.6679, "step": 21699 }, { "epoch": 0.4542409779787323, "grad_norm": 0.2564201354980469, "learning_rate": 0.00018892369774636372, "loss": 11.6877, "step": 21700 }, { "epoch": 0.45426191074269445, "grad_norm": 0.35377630591392517, "learning_rate": 0.00018892269475778774, "loss": 11.666, "step": 21701 }, { "epoch": 0.4542828435066566, "grad_norm": 0.2777289152145386, "learning_rate": 0.0001889216917264649, "loss": 11.6676, "step": 21702 }, { "epoch": 0.4543037762706188, "grad_norm": 0.2925809323787689, "learning_rate": 0.00018892068865239553, "loss": 11.6757, "step": 21703 }, { "epoch": 0.45432470903458094, "grad_norm": 0.30083906650543213, "learning_rate": 0.00018891968553558027, "loss": 11.6561, "step": 21704 }, { "epoch": 0.4543456417985431, "grad_norm": 0.2918069660663605, "learning_rate": 0.00018891868237601954, "loss": 11.6736, "step": 21705 }, { "epoch": 0.45436657456250523, "grad_norm": 0.3952076733112335, "learning_rate": 0.00018891767917371378, "loss": 11.6833, "step": 21706 }, { "epoch": 0.4543875073264674, "grad_norm": 0.3359938859939575, "learning_rate": 0.0001889166759286635, "loss": 11.6731, "step": 21707 }, { "epoch": 0.4544084400904295, "grad_norm": 0.3730461895465851, "learning_rate": 0.00018891567264086922, "loss": 11.6766, "step": 21708 }, { "epoch": 0.45442937285439167, "grad_norm": 0.3141666054725647, "learning_rate": 0.00018891466931033143, "loss": 11.6743, "step": 21709 }, { "epoch": 0.45445030561835387, "grad_norm": 0.3355102837085724, "learning_rate": 0.0001889136659370505, "loss": 11.6698, "step": 21710 }, { "epoch": 0.454471238382316, "grad_norm": 0.27807900309562683, "learning_rate": 0.00018891266252102704, "loss": 11.6806, "step": 21711 }, { "epoch": 0.45449217114627816, "grad_norm": 0.3326846957206726, "learning_rate": 0.00018891165906226144, "loss": 11.6818, "step": 21712 }, { "epoch": 0.4545131039102403, "grad_norm": 0.3914431929588318, "learning_rate": 0.00018891065556075426, "loss": 11.7026, "step": 21713 }, { "epoch": 0.45453403667420245, "grad_norm": 0.26742029190063477, "learning_rate": 0.00018890965201650593, "loss": 11.6859, "step": 21714 }, { "epoch": 0.4545549694381646, "grad_norm": 0.26238498091697693, "learning_rate": 0.00018890864842951698, "loss": 11.6544, "step": 21715 }, { "epoch": 0.4545759022021268, "grad_norm": 0.2944073975086212, "learning_rate": 0.00018890764479978782, "loss": 11.6667, "step": 21716 }, { "epoch": 0.45459683496608894, "grad_norm": 0.34784308075904846, "learning_rate": 0.00018890664112731898, "loss": 11.667, "step": 21717 }, { "epoch": 0.4546177677300511, "grad_norm": 0.30763357877731323, "learning_rate": 0.00018890563741211095, "loss": 11.6566, "step": 21718 }, { "epoch": 0.4546387004940132, "grad_norm": 0.2972346544265747, "learning_rate": 0.0001889046336541642, "loss": 11.6753, "step": 21719 }, { "epoch": 0.45465963325797537, "grad_norm": 0.8696576356887817, "learning_rate": 0.0001889036298534792, "loss": 11.6587, "step": 21720 }, { "epoch": 0.4546805660219375, "grad_norm": 0.3174327313899994, "learning_rate": 0.00018890262601005647, "loss": 11.6723, "step": 21721 }, { "epoch": 0.4547014987858997, "grad_norm": 0.28123587369918823, "learning_rate": 0.00018890162212389642, "loss": 11.6748, "step": 21722 }, { "epoch": 0.45472243154986186, "grad_norm": 0.3014366626739502, "learning_rate": 0.00018890061819499962, "loss": 11.6762, "step": 21723 }, { "epoch": 0.454743364313824, "grad_norm": 0.29570943117141724, "learning_rate": 0.00018889961422336647, "loss": 11.6633, "step": 21724 }, { "epoch": 0.45476429707778615, "grad_norm": 0.2869911193847656, "learning_rate": 0.00018889861020899754, "loss": 11.6656, "step": 21725 }, { "epoch": 0.4547852298417483, "grad_norm": 0.374884694814682, "learning_rate": 0.00018889760615189324, "loss": 11.6936, "step": 21726 }, { "epoch": 0.45480616260571044, "grad_norm": 0.280094176530838, "learning_rate": 0.00018889660205205407, "loss": 11.6787, "step": 21727 }, { "epoch": 0.4548270953696726, "grad_norm": 0.25534501671791077, "learning_rate": 0.00018889559790948054, "loss": 11.6708, "step": 21728 }, { "epoch": 0.4548480281336348, "grad_norm": 0.34638410806655884, "learning_rate": 0.0001888945937241731, "loss": 11.6637, "step": 21729 }, { "epoch": 0.45486896089759693, "grad_norm": 0.34966498613357544, "learning_rate": 0.00018889358949613228, "loss": 11.6889, "step": 21730 }, { "epoch": 0.4548898936615591, "grad_norm": 0.23240415751934052, "learning_rate": 0.0001888925852253585, "loss": 11.6917, "step": 21731 }, { "epoch": 0.4549108264255212, "grad_norm": 0.45051056146621704, "learning_rate": 0.0001888915809118523, "loss": 11.681, "step": 21732 }, { "epoch": 0.45493175918948336, "grad_norm": 0.32277897000312805, "learning_rate": 0.00018889057655561409, "loss": 11.6795, "step": 21733 }, { "epoch": 0.4549526919534455, "grad_norm": 0.3061024248600006, "learning_rate": 0.0001888895721566444, "loss": 11.6713, "step": 21734 }, { "epoch": 0.4549736247174077, "grad_norm": 0.2941191494464874, "learning_rate": 0.00018888856771494375, "loss": 11.6804, "step": 21735 }, { "epoch": 0.45499455748136985, "grad_norm": 0.2609707713127136, "learning_rate": 0.00018888756323051257, "loss": 11.6789, "step": 21736 }, { "epoch": 0.455015490245332, "grad_norm": 0.27523645758628845, "learning_rate": 0.00018888655870335136, "loss": 11.668, "step": 21737 }, { "epoch": 0.45503642300929414, "grad_norm": 0.2628413140773773, "learning_rate": 0.0001888855541334606, "loss": 11.6778, "step": 21738 }, { "epoch": 0.4550573557732563, "grad_norm": 0.2514254152774811, "learning_rate": 0.00018888454952084075, "loss": 11.6844, "step": 21739 }, { "epoch": 0.45507828853721843, "grad_norm": 0.29286545515060425, "learning_rate": 0.00018888354486549237, "loss": 11.6612, "step": 21740 }, { "epoch": 0.45509922130118063, "grad_norm": 0.26106908917427063, "learning_rate": 0.00018888254016741584, "loss": 11.6851, "step": 21741 }, { "epoch": 0.4551201540651428, "grad_norm": 0.3117753863334656, "learning_rate": 0.00018888153542661172, "loss": 11.6696, "step": 21742 }, { "epoch": 0.4551410868291049, "grad_norm": 0.3085542321205139, "learning_rate": 0.00018888053064308044, "loss": 11.6763, "step": 21743 }, { "epoch": 0.45516201959306707, "grad_norm": 0.2883136570453644, "learning_rate": 0.00018887952581682252, "loss": 11.6787, "step": 21744 }, { "epoch": 0.4551829523570292, "grad_norm": 0.3339313268661499, "learning_rate": 0.00018887852094783843, "loss": 11.6743, "step": 21745 }, { "epoch": 0.45520388512099136, "grad_norm": 0.2605014443397522, "learning_rate": 0.00018887751603612865, "loss": 11.675, "step": 21746 }, { "epoch": 0.4552248178849535, "grad_norm": 0.2668396234512329, "learning_rate": 0.00018887651108169366, "loss": 11.6705, "step": 21747 }, { "epoch": 0.4552457506489157, "grad_norm": 0.3652734160423279, "learning_rate": 0.00018887550608453398, "loss": 11.6932, "step": 21748 }, { "epoch": 0.45526668341287785, "grad_norm": 0.25339633226394653, "learning_rate": 0.00018887450104465003, "loss": 11.6892, "step": 21749 }, { "epoch": 0.45528761617684, "grad_norm": 0.29129865765571594, "learning_rate": 0.00018887349596204234, "loss": 11.6749, "step": 21750 }, { "epoch": 0.45530854894080214, "grad_norm": 0.28154927492141724, "learning_rate": 0.00018887249083671137, "loss": 11.6704, "step": 21751 }, { "epoch": 0.4553294817047643, "grad_norm": 0.26078683137893677, "learning_rate": 0.00018887148566865762, "loss": 11.6731, "step": 21752 }, { "epoch": 0.4553504144687264, "grad_norm": 0.3773198425769806, "learning_rate": 0.0001888704804578816, "loss": 11.6585, "step": 21753 }, { "epoch": 0.45537134723268863, "grad_norm": 0.32918381690979004, "learning_rate": 0.0001888694752043837, "loss": 11.6756, "step": 21754 }, { "epoch": 0.4553922799966508, "grad_norm": 0.31604379415512085, "learning_rate": 0.00018886846990816446, "loss": 11.6767, "step": 21755 }, { "epoch": 0.4554132127606129, "grad_norm": 0.3011374771595001, "learning_rate": 0.0001888674645692244, "loss": 11.6647, "step": 21756 }, { "epoch": 0.45543414552457506, "grad_norm": 0.3347846567630768, "learning_rate": 0.000188866459187564, "loss": 11.652, "step": 21757 }, { "epoch": 0.4554550782885372, "grad_norm": 0.29658031463623047, "learning_rate": 0.00018886545376318366, "loss": 11.6847, "step": 21758 }, { "epoch": 0.45547601105249935, "grad_norm": 0.33310142159461975, "learning_rate": 0.00018886444829608392, "loss": 11.6825, "step": 21759 }, { "epoch": 0.45549694381646155, "grad_norm": 0.3490639626979828, "learning_rate": 0.00018886344278626528, "loss": 11.6869, "step": 21760 }, { "epoch": 0.4555178765804237, "grad_norm": 0.35924410820007324, "learning_rate": 0.00018886243723372818, "loss": 11.6828, "step": 21761 }, { "epoch": 0.45553880934438584, "grad_norm": 0.28660571575164795, "learning_rate": 0.00018886143163847314, "loss": 11.6574, "step": 21762 }, { "epoch": 0.455559742108348, "grad_norm": 0.3058992028236389, "learning_rate": 0.00018886042600050064, "loss": 11.6843, "step": 21763 }, { "epoch": 0.45558067487231013, "grad_norm": 0.3305911719799042, "learning_rate": 0.00018885942031981112, "loss": 11.6543, "step": 21764 }, { "epoch": 0.4556016076362723, "grad_norm": 0.3132169246673584, "learning_rate": 0.0001888584145964051, "loss": 11.6773, "step": 21765 }, { "epoch": 0.4556225404002344, "grad_norm": 0.3118550777435303, "learning_rate": 0.0001888574088302831, "loss": 11.6746, "step": 21766 }, { "epoch": 0.4556434731641966, "grad_norm": 0.34492382407188416, "learning_rate": 0.0001888564030214455, "loss": 11.6682, "step": 21767 }, { "epoch": 0.45566440592815877, "grad_norm": 0.27768340706825256, "learning_rate": 0.0001888553971698929, "loss": 11.6644, "step": 21768 }, { "epoch": 0.4556853386921209, "grad_norm": 0.28026553988456726, "learning_rate": 0.00018885439127562571, "loss": 11.6816, "step": 21769 }, { "epoch": 0.45570627145608306, "grad_norm": 0.25250136852264404, "learning_rate": 0.00018885338533864446, "loss": 11.6875, "step": 21770 }, { "epoch": 0.4557272042200452, "grad_norm": 0.3010562062263489, "learning_rate": 0.00018885237935894957, "loss": 11.667, "step": 21771 }, { "epoch": 0.45574813698400735, "grad_norm": 0.2285565882921219, "learning_rate": 0.00018885137333654156, "loss": 11.672, "step": 21772 }, { "epoch": 0.45576906974796955, "grad_norm": 0.36540600657463074, "learning_rate": 0.00018885036727142094, "loss": 11.6654, "step": 21773 }, { "epoch": 0.4557900025119317, "grad_norm": 0.24908941984176636, "learning_rate": 0.00018884936116358815, "loss": 11.66, "step": 21774 }, { "epoch": 0.45581093527589384, "grad_norm": 0.3342739939689636, "learning_rate": 0.0001888483550130437, "loss": 11.678, "step": 21775 }, { "epoch": 0.455831868039856, "grad_norm": 0.26545026898384094, "learning_rate": 0.00018884734881978807, "loss": 11.6678, "step": 21776 }, { "epoch": 0.4558528008038181, "grad_norm": 0.28118446469306946, "learning_rate": 0.00018884634258382174, "loss": 11.6807, "step": 21777 }, { "epoch": 0.45587373356778027, "grad_norm": 0.25714975595474243, "learning_rate": 0.00018884533630514518, "loss": 11.6762, "step": 21778 }, { "epoch": 0.45589466633174247, "grad_norm": 0.27617523074150085, "learning_rate": 0.0001888443299837589, "loss": 11.6791, "step": 21779 }, { "epoch": 0.4559155990957046, "grad_norm": 0.34872856736183167, "learning_rate": 0.00018884332361966336, "loss": 11.6567, "step": 21780 }, { "epoch": 0.45593653185966676, "grad_norm": 0.38322317600250244, "learning_rate": 0.00018884231721285905, "loss": 11.6711, "step": 21781 }, { "epoch": 0.4559574646236289, "grad_norm": 0.3765984773635864, "learning_rate": 0.00018884131076334646, "loss": 11.6812, "step": 21782 }, { "epoch": 0.45597839738759105, "grad_norm": 0.2796625792980194, "learning_rate": 0.00018884030427112608, "loss": 11.6709, "step": 21783 }, { "epoch": 0.4559993301515532, "grad_norm": 0.31371819972991943, "learning_rate": 0.00018883929773619838, "loss": 11.6576, "step": 21784 }, { "epoch": 0.45602026291551534, "grad_norm": 0.46196988224983215, "learning_rate": 0.00018883829115856385, "loss": 11.6878, "step": 21785 }, { "epoch": 0.45604119567947754, "grad_norm": 0.25408098101615906, "learning_rate": 0.00018883728453822295, "loss": 11.6626, "step": 21786 }, { "epoch": 0.4560621284434397, "grad_norm": 0.30101725459098816, "learning_rate": 0.00018883627787517622, "loss": 11.6642, "step": 21787 }, { "epoch": 0.45608306120740183, "grad_norm": 0.3671081066131592, "learning_rate": 0.00018883527116942406, "loss": 11.6894, "step": 21788 }, { "epoch": 0.456103993971364, "grad_norm": 0.3201002776622772, "learning_rate": 0.00018883426442096708, "loss": 11.6756, "step": 21789 }, { "epoch": 0.4561249267353261, "grad_norm": 0.34448662400245667, "learning_rate": 0.00018883325762980566, "loss": 11.6811, "step": 21790 }, { "epoch": 0.45614585949928826, "grad_norm": 0.36061331629753113, "learning_rate": 0.0001888322507959403, "loss": 11.6484, "step": 21791 }, { "epoch": 0.45616679226325046, "grad_norm": 0.3371369540691376, "learning_rate": 0.0001888312439193715, "loss": 11.6721, "step": 21792 }, { "epoch": 0.4561877250272126, "grad_norm": 0.26824748516082764, "learning_rate": 0.00018883023700009974, "loss": 11.6922, "step": 21793 }, { "epoch": 0.45620865779117475, "grad_norm": 0.28228649497032166, "learning_rate": 0.00018882923003812552, "loss": 11.6815, "step": 21794 }, { "epoch": 0.4562295905551369, "grad_norm": 0.2600443959236145, "learning_rate": 0.00018882822303344928, "loss": 11.671, "step": 21795 }, { "epoch": 0.45625052331909904, "grad_norm": 0.2762095034122467, "learning_rate": 0.00018882721598607156, "loss": 11.6596, "step": 21796 }, { "epoch": 0.4562714560830612, "grad_norm": 0.28505271673202515, "learning_rate": 0.0001888262088959928, "loss": 11.6738, "step": 21797 }, { "epoch": 0.45629238884702333, "grad_norm": 0.3199472427368164, "learning_rate": 0.00018882520176321353, "loss": 11.6954, "step": 21798 }, { "epoch": 0.45631332161098553, "grad_norm": 0.2960132658481598, "learning_rate": 0.00018882419458773417, "loss": 11.6705, "step": 21799 }, { "epoch": 0.4563342543749477, "grad_norm": 0.31537872552871704, "learning_rate": 0.00018882318736955527, "loss": 11.6777, "step": 21800 }, { "epoch": 0.4563551871389098, "grad_norm": 0.30258163809776306, "learning_rate": 0.00018882218010867724, "loss": 11.6657, "step": 21801 }, { "epoch": 0.45637611990287197, "grad_norm": 0.34909650683403015, "learning_rate": 0.00018882117280510062, "loss": 11.6901, "step": 21802 }, { "epoch": 0.4563970526668341, "grad_norm": 0.3055303692817688, "learning_rate": 0.00018882016545882592, "loss": 11.6766, "step": 21803 }, { "epoch": 0.45641798543079626, "grad_norm": 0.24136966466903687, "learning_rate": 0.00018881915806985355, "loss": 11.6553, "step": 21804 }, { "epoch": 0.45643891819475846, "grad_norm": 0.3477758765220642, "learning_rate": 0.00018881815063818408, "loss": 11.6629, "step": 21805 }, { "epoch": 0.4564598509587206, "grad_norm": 0.26709529757499695, "learning_rate": 0.00018881714316381788, "loss": 11.6679, "step": 21806 }, { "epoch": 0.45648078372268275, "grad_norm": 0.27129748463630676, "learning_rate": 0.00018881613564675554, "loss": 11.6653, "step": 21807 }, { "epoch": 0.4565017164866449, "grad_norm": 0.27843543887138367, "learning_rate": 0.0001888151280869975, "loss": 11.6622, "step": 21808 }, { "epoch": 0.45652264925060704, "grad_norm": 0.3774593770503998, "learning_rate": 0.00018881412048454426, "loss": 11.6808, "step": 21809 }, { "epoch": 0.4565435820145692, "grad_norm": 0.34852465987205505, "learning_rate": 0.00018881311283939627, "loss": 11.6823, "step": 21810 }, { "epoch": 0.4565645147785314, "grad_norm": 0.31264981627464294, "learning_rate": 0.00018881210515155403, "loss": 11.6761, "step": 21811 }, { "epoch": 0.4565854475424935, "grad_norm": 0.2748584747314453, "learning_rate": 0.00018881109742101802, "loss": 11.6598, "step": 21812 }, { "epoch": 0.4566063803064557, "grad_norm": 0.2881123423576355, "learning_rate": 0.00018881008964778876, "loss": 11.6768, "step": 21813 }, { "epoch": 0.4566273130704178, "grad_norm": 0.30811619758605957, "learning_rate": 0.0001888090818318667, "loss": 11.6743, "step": 21814 }, { "epoch": 0.45664824583437996, "grad_norm": 0.31280872225761414, "learning_rate": 0.00018880807397325235, "loss": 11.6832, "step": 21815 }, { "epoch": 0.4566691785983421, "grad_norm": 0.2610252797603607, "learning_rate": 0.00018880706607194618, "loss": 11.6604, "step": 21816 }, { "epoch": 0.45669011136230425, "grad_norm": 0.29713189601898193, "learning_rate": 0.00018880605812794868, "loss": 11.6552, "step": 21817 }, { "epoch": 0.45671104412626645, "grad_norm": 0.24444520473480225, "learning_rate": 0.0001888050501412603, "loss": 11.664, "step": 21818 }, { "epoch": 0.4567319768902286, "grad_norm": 0.3109418451786041, "learning_rate": 0.00018880404211188157, "loss": 11.6674, "step": 21819 }, { "epoch": 0.45675290965419074, "grad_norm": 0.26301562786102295, "learning_rate": 0.00018880303403981293, "loss": 11.6638, "step": 21820 }, { "epoch": 0.4567738424181529, "grad_norm": 0.2959780991077423, "learning_rate": 0.00018880202592505492, "loss": 11.6554, "step": 21821 }, { "epoch": 0.45679477518211503, "grad_norm": 0.37628641724586487, "learning_rate": 0.00018880101776760802, "loss": 11.6845, "step": 21822 }, { "epoch": 0.4568157079460772, "grad_norm": 0.30509325861930847, "learning_rate": 0.00018880000956747264, "loss": 11.6894, "step": 21823 }, { "epoch": 0.4568366407100394, "grad_norm": 0.30338767170906067, "learning_rate": 0.00018879900132464937, "loss": 11.682, "step": 21824 }, { "epoch": 0.4568575734740015, "grad_norm": 0.25699326395988464, "learning_rate": 0.0001887979930391386, "loss": 11.6637, "step": 21825 }, { "epoch": 0.45687850623796367, "grad_norm": 0.2278367280960083, "learning_rate": 0.00018879698471094085, "loss": 11.659, "step": 21826 }, { "epoch": 0.4568994390019258, "grad_norm": 0.29097291827201843, "learning_rate": 0.00018879597634005664, "loss": 11.6695, "step": 21827 }, { "epoch": 0.45692037176588796, "grad_norm": 0.3494194447994232, "learning_rate": 0.00018879496792648638, "loss": 11.6589, "step": 21828 }, { "epoch": 0.4569413045298501, "grad_norm": 0.24957922101020813, "learning_rate": 0.00018879395947023065, "loss": 11.666, "step": 21829 }, { "epoch": 0.4569622372938123, "grad_norm": 0.2855454087257385, "learning_rate": 0.00018879295097128985, "loss": 11.6779, "step": 21830 }, { "epoch": 0.45698317005777445, "grad_norm": 0.35729700326919556, "learning_rate": 0.0001887919424296645, "loss": 11.6667, "step": 21831 }, { "epoch": 0.4570041028217366, "grad_norm": 0.29705023765563965, "learning_rate": 0.00018879093384535512, "loss": 11.6801, "step": 21832 }, { "epoch": 0.45702503558569874, "grad_norm": 0.3585953414440155, "learning_rate": 0.00018878992521836213, "loss": 11.673, "step": 21833 }, { "epoch": 0.4570459683496609, "grad_norm": 0.2772950232028961, "learning_rate": 0.00018878891654868602, "loss": 11.6758, "step": 21834 }, { "epoch": 0.457066901113623, "grad_norm": 0.3089914321899414, "learning_rate": 0.00018878790783632735, "loss": 11.6734, "step": 21835 }, { "epoch": 0.45708783387758517, "grad_norm": 0.3075222074985504, "learning_rate": 0.00018878689908128653, "loss": 11.6737, "step": 21836 }, { "epoch": 0.45710876664154737, "grad_norm": 0.289071649312973, "learning_rate": 0.00018878589028356406, "loss": 11.6554, "step": 21837 }, { "epoch": 0.4571296994055095, "grad_norm": 0.4656452238559723, "learning_rate": 0.00018878488144316043, "loss": 11.6699, "step": 21838 }, { "epoch": 0.45715063216947166, "grad_norm": 0.3591330349445343, "learning_rate": 0.00018878387256007615, "loss": 11.6685, "step": 21839 }, { "epoch": 0.4571715649334338, "grad_norm": 0.28427228331565857, "learning_rate": 0.00018878286363431168, "loss": 11.6555, "step": 21840 }, { "epoch": 0.45719249769739595, "grad_norm": 0.2333904206752777, "learning_rate": 0.00018878185466586752, "loss": 11.6583, "step": 21841 }, { "epoch": 0.4572134304613581, "grad_norm": 0.3126945495605469, "learning_rate": 0.0001887808456547441, "loss": 11.6434, "step": 21842 }, { "epoch": 0.4572343632253203, "grad_norm": 0.2850308418273926, "learning_rate": 0.00018877983660094198, "loss": 11.6913, "step": 21843 }, { "epoch": 0.45725529598928244, "grad_norm": 0.33197951316833496, "learning_rate": 0.00018877882750446162, "loss": 11.6949, "step": 21844 }, { "epoch": 0.4572762287532446, "grad_norm": 0.3187541663646698, "learning_rate": 0.00018877781836530347, "loss": 11.6594, "step": 21845 }, { "epoch": 0.45729716151720673, "grad_norm": 0.3391052484512329, "learning_rate": 0.00018877680918346804, "loss": 11.6929, "step": 21846 }, { "epoch": 0.4573180942811689, "grad_norm": 0.2866363823413849, "learning_rate": 0.00018877579995895584, "loss": 11.6746, "step": 21847 }, { "epoch": 0.457339027045131, "grad_norm": 0.2539520859718323, "learning_rate": 0.00018877479069176733, "loss": 11.6644, "step": 21848 }, { "epoch": 0.4573599598090932, "grad_norm": 0.34164902567863464, "learning_rate": 0.000188773781381903, "loss": 11.6653, "step": 21849 }, { "epoch": 0.45738089257305536, "grad_norm": 0.27641981840133667, "learning_rate": 0.00018877277202936332, "loss": 11.6807, "step": 21850 }, { "epoch": 0.4574018253370175, "grad_norm": 0.32517990469932556, "learning_rate": 0.00018877176263414882, "loss": 11.6671, "step": 21851 }, { "epoch": 0.45742275810097965, "grad_norm": 0.2605948746204376, "learning_rate": 0.00018877075319625994, "loss": 11.681, "step": 21852 }, { "epoch": 0.4574436908649418, "grad_norm": 0.2998851537704468, "learning_rate": 0.0001887697437156972, "loss": 11.691, "step": 21853 }, { "epoch": 0.45746462362890394, "grad_norm": 0.28270378708839417, "learning_rate": 0.00018876873419246102, "loss": 11.6863, "step": 21854 }, { "epoch": 0.4574855563928661, "grad_norm": 0.2317037433385849, "learning_rate": 0.00018876772462655194, "loss": 11.6706, "step": 21855 }, { "epoch": 0.4575064891568283, "grad_norm": 0.3963499963283539, "learning_rate": 0.00018876671501797047, "loss": 11.6654, "step": 21856 }, { "epoch": 0.45752742192079043, "grad_norm": 0.3309951424598694, "learning_rate": 0.00018876570536671702, "loss": 11.6833, "step": 21857 }, { "epoch": 0.4575483546847526, "grad_norm": 0.2519954741001129, "learning_rate": 0.00018876469567279217, "loss": 11.6759, "step": 21858 }, { "epoch": 0.4575692874487147, "grad_norm": 0.2624714970588684, "learning_rate": 0.0001887636859361963, "loss": 11.6647, "step": 21859 }, { "epoch": 0.45759022021267687, "grad_norm": 0.3551154136657715, "learning_rate": 0.00018876267615692999, "loss": 11.6786, "step": 21860 }, { "epoch": 0.457611152976639, "grad_norm": 0.2864747643470764, "learning_rate": 0.00018876166633499363, "loss": 11.6698, "step": 21861 }, { "epoch": 0.4576320857406012, "grad_norm": 0.3806246817111969, "learning_rate": 0.0001887606564703878, "loss": 11.6776, "step": 21862 }, { "epoch": 0.45765301850456336, "grad_norm": 0.2881670296192169, "learning_rate": 0.0001887596465631129, "loss": 11.6737, "step": 21863 }, { "epoch": 0.4576739512685255, "grad_norm": 0.2585080862045288, "learning_rate": 0.0001887586366131695, "loss": 11.6867, "step": 21864 }, { "epoch": 0.45769488403248765, "grad_norm": 0.2968406677246094, "learning_rate": 0.00018875762662055805, "loss": 11.6834, "step": 21865 }, { "epoch": 0.4577158167964498, "grad_norm": 0.3539183437824249, "learning_rate": 0.000188756616585279, "loss": 11.6634, "step": 21866 }, { "epoch": 0.45773674956041194, "grad_norm": 0.28742516040802, "learning_rate": 0.0001887556065073329, "loss": 11.6619, "step": 21867 }, { "epoch": 0.45775768232437414, "grad_norm": 0.2591840922832489, "learning_rate": 0.00018875459638672016, "loss": 11.6741, "step": 21868 }, { "epoch": 0.4577786150883363, "grad_norm": 0.2813364863395691, "learning_rate": 0.00018875358622344135, "loss": 11.6709, "step": 21869 }, { "epoch": 0.4577995478522984, "grad_norm": 0.29898321628570557, "learning_rate": 0.00018875257601749687, "loss": 11.6627, "step": 21870 }, { "epoch": 0.45782048061626057, "grad_norm": 0.30867311358451843, "learning_rate": 0.00018875156576888727, "loss": 11.66, "step": 21871 }, { "epoch": 0.4578414133802227, "grad_norm": 0.2997869849205017, "learning_rate": 0.00018875055547761302, "loss": 11.6833, "step": 21872 }, { "epoch": 0.45786234614418486, "grad_norm": 0.3257051408290863, "learning_rate": 0.00018874954514367458, "loss": 11.6807, "step": 21873 }, { "epoch": 0.457883278908147, "grad_norm": 0.3418075144290924, "learning_rate": 0.00018874853476707248, "loss": 11.6676, "step": 21874 }, { "epoch": 0.4579042116721092, "grad_norm": 0.3326992690563202, "learning_rate": 0.00018874752434780719, "loss": 11.6777, "step": 21875 }, { "epoch": 0.45792514443607135, "grad_norm": 0.29126113653182983, "learning_rate": 0.00018874651388587913, "loss": 11.6825, "step": 21876 }, { "epoch": 0.4579460772000335, "grad_norm": 0.2610612213611603, "learning_rate": 0.0001887455033812889, "loss": 11.6717, "step": 21877 }, { "epoch": 0.45796700996399564, "grad_norm": 0.3654901087284088, "learning_rate": 0.0001887444928340369, "loss": 11.6649, "step": 21878 }, { "epoch": 0.4579879427279578, "grad_norm": 0.2857566475868225, "learning_rate": 0.00018874348224412363, "loss": 11.678, "step": 21879 }, { "epoch": 0.45800887549191993, "grad_norm": 0.2811874449253082, "learning_rate": 0.0001887424716115496, "loss": 11.6749, "step": 21880 }, { "epoch": 0.45802980825588213, "grad_norm": 0.30918896198272705, "learning_rate": 0.0001887414609363153, "loss": 11.6659, "step": 21881 }, { "epoch": 0.4580507410198443, "grad_norm": 0.31932586431503296, "learning_rate": 0.0001887404502184212, "loss": 11.6801, "step": 21882 }, { "epoch": 0.4580716737838064, "grad_norm": 0.3005916178226471, "learning_rate": 0.00018873943945786777, "loss": 11.6773, "step": 21883 }, { "epoch": 0.45809260654776857, "grad_norm": 0.32998228073120117, "learning_rate": 0.00018873842865465553, "loss": 11.6752, "step": 21884 }, { "epoch": 0.4581135393117307, "grad_norm": 0.3119273781776428, "learning_rate": 0.00018873741780878498, "loss": 11.6745, "step": 21885 }, { "epoch": 0.45813447207569286, "grad_norm": 0.2859856188297272, "learning_rate": 0.00018873640692025653, "loss": 11.6739, "step": 21886 }, { "epoch": 0.458155404839655, "grad_norm": 0.3114682137966156, "learning_rate": 0.0001887353959890707, "loss": 11.6621, "step": 21887 }, { "epoch": 0.4581763376036172, "grad_norm": 0.29965895414352417, "learning_rate": 0.00018873438501522803, "loss": 11.6771, "step": 21888 }, { "epoch": 0.45819727036757935, "grad_norm": 0.3185892403125763, "learning_rate": 0.00018873337399872892, "loss": 11.6647, "step": 21889 }, { "epoch": 0.4582182031315415, "grad_norm": 0.27684909105300903, "learning_rate": 0.00018873236293957392, "loss": 11.6785, "step": 21890 }, { "epoch": 0.45823913589550364, "grad_norm": 0.3051165044307709, "learning_rate": 0.0001887313518377635, "loss": 11.6896, "step": 21891 }, { "epoch": 0.4582600686594658, "grad_norm": 0.3532334566116333, "learning_rate": 0.00018873034069329813, "loss": 11.6711, "step": 21892 }, { "epoch": 0.4582810014234279, "grad_norm": 0.30947932600975037, "learning_rate": 0.0001887293295061783, "loss": 11.6841, "step": 21893 }, { "epoch": 0.4583019341873901, "grad_norm": 0.24107471108436584, "learning_rate": 0.00018872831827640454, "loss": 11.6773, "step": 21894 }, { "epoch": 0.45832286695135227, "grad_norm": 0.27061575651168823, "learning_rate": 0.00018872730700397728, "loss": 11.6706, "step": 21895 }, { "epoch": 0.4583437997153144, "grad_norm": 0.26377415657043457, "learning_rate": 0.00018872629568889701, "loss": 11.675, "step": 21896 }, { "epoch": 0.45836473247927656, "grad_norm": 0.34254753589630127, "learning_rate": 0.00018872528433116426, "loss": 11.6653, "step": 21897 }, { "epoch": 0.4583856652432387, "grad_norm": 0.29567041993141174, "learning_rate": 0.00018872427293077944, "loss": 11.6621, "step": 21898 }, { "epoch": 0.45840659800720085, "grad_norm": 0.2761980891227722, "learning_rate": 0.00018872326148774312, "loss": 11.6611, "step": 21899 }, { "epoch": 0.45842753077116305, "grad_norm": 0.2975751757621765, "learning_rate": 0.00018872225000205573, "loss": 11.6579, "step": 21900 }, { "epoch": 0.4584484635351252, "grad_norm": 0.3333378732204437, "learning_rate": 0.0001887212384737178, "loss": 11.6923, "step": 21901 }, { "epoch": 0.45846939629908734, "grad_norm": 0.26675158739089966, "learning_rate": 0.00018872022690272978, "loss": 11.6734, "step": 21902 }, { "epoch": 0.4584903290630495, "grad_norm": 0.3061411380767822, "learning_rate": 0.00018871921528909214, "loss": 11.6778, "step": 21903 }, { "epoch": 0.45851126182701163, "grad_norm": 0.3314933776855469, "learning_rate": 0.00018871820363280546, "loss": 11.6648, "step": 21904 }, { "epoch": 0.4585321945909738, "grad_norm": 0.3328557014465332, "learning_rate": 0.0001887171919338701, "loss": 11.6792, "step": 21905 }, { "epoch": 0.4585531273549359, "grad_norm": 0.28237301111221313, "learning_rate": 0.00018871618019228667, "loss": 11.6668, "step": 21906 }, { "epoch": 0.4585740601188981, "grad_norm": 0.3874939978122711, "learning_rate": 0.00018871516840805557, "loss": 11.6927, "step": 21907 }, { "epoch": 0.45859499288286026, "grad_norm": 0.3092658221721649, "learning_rate": 0.0001887141565811773, "loss": 11.6719, "step": 21908 }, { "epoch": 0.4586159256468224, "grad_norm": 0.2833627462387085, "learning_rate": 0.00018871314471165232, "loss": 11.6674, "step": 21909 }, { "epoch": 0.45863685841078455, "grad_norm": 0.32078418135643005, "learning_rate": 0.00018871213279948122, "loss": 11.666, "step": 21910 }, { "epoch": 0.4586577911747467, "grad_norm": 0.2767561376094818, "learning_rate": 0.00018871112084466437, "loss": 11.6837, "step": 21911 }, { "epoch": 0.45867872393870884, "grad_norm": 0.3318597376346588, "learning_rate": 0.00018871010884720236, "loss": 11.6706, "step": 21912 }, { "epoch": 0.45869965670267104, "grad_norm": 0.2796907126903534, "learning_rate": 0.00018870909680709556, "loss": 11.6799, "step": 21913 }, { "epoch": 0.4587205894666332, "grad_norm": 0.30689695477485657, "learning_rate": 0.00018870808472434456, "loss": 11.6569, "step": 21914 }, { "epoch": 0.45874152223059533, "grad_norm": 0.25554296374320984, "learning_rate": 0.0001887070725989498, "loss": 11.6649, "step": 21915 }, { "epoch": 0.4587624549945575, "grad_norm": 0.3597114682197571, "learning_rate": 0.00018870606043091176, "loss": 11.694, "step": 21916 }, { "epoch": 0.4587833877585196, "grad_norm": 0.27005669474601746, "learning_rate": 0.00018870504822023095, "loss": 11.6511, "step": 21917 }, { "epoch": 0.45880432052248177, "grad_norm": 0.31423231959342957, "learning_rate": 0.00018870403596690785, "loss": 11.6866, "step": 21918 }, { "epoch": 0.45882525328644397, "grad_norm": 0.30803948640823364, "learning_rate": 0.00018870302367094292, "loss": 11.6774, "step": 21919 }, { "epoch": 0.4588461860504061, "grad_norm": 0.3061732351779938, "learning_rate": 0.0001887020113323367, "loss": 11.6859, "step": 21920 }, { "epoch": 0.45886711881436826, "grad_norm": 0.37009397149086, "learning_rate": 0.00018870099895108963, "loss": 11.6992, "step": 21921 }, { "epoch": 0.4588880515783304, "grad_norm": 0.29519373178482056, "learning_rate": 0.0001886999865272022, "loss": 11.6687, "step": 21922 }, { "epoch": 0.45890898434229255, "grad_norm": 0.29128339886665344, "learning_rate": 0.00018869897406067492, "loss": 11.6862, "step": 21923 }, { "epoch": 0.4589299171062547, "grad_norm": 0.3341315686702728, "learning_rate": 0.00018869796155150827, "loss": 11.6729, "step": 21924 }, { "epoch": 0.45895084987021684, "grad_norm": 0.30508914589881897, "learning_rate": 0.00018869694899970274, "loss": 11.6733, "step": 21925 }, { "epoch": 0.45897178263417904, "grad_norm": 0.24081826210021973, "learning_rate": 0.0001886959364052588, "loss": 11.6766, "step": 21926 }, { "epoch": 0.4589927153981412, "grad_norm": 0.268444687128067, "learning_rate": 0.00018869492376817694, "loss": 11.6746, "step": 21927 }, { "epoch": 0.4590136481621033, "grad_norm": 0.2535601258277893, "learning_rate": 0.00018869391108845767, "loss": 11.6837, "step": 21928 }, { "epoch": 0.45903458092606547, "grad_norm": 0.2547558844089508, "learning_rate": 0.00018869289836610141, "loss": 11.6675, "step": 21929 }, { "epoch": 0.4590555136900276, "grad_norm": 0.4043368101119995, "learning_rate": 0.00018869188560110875, "loss": 11.6675, "step": 21930 }, { "epoch": 0.45907644645398976, "grad_norm": 0.28870734572410583, "learning_rate": 0.0001886908727934801, "loss": 11.6643, "step": 21931 }, { "epoch": 0.45909737921795196, "grad_norm": 0.32742592692375183, "learning_rate": 0.000188689859943216, "loss": 11.6659, "step": 21932 }, { "epoch": 0.4591183119819141, "grad_norm": 0.33598583936691284, "learning_rate": 0.00018868884705031686, "loss": 11.6556, "step": 21933 }, { "epoch": 0.45913924474587625, "grad_norm": 0.29151302576065063, "learning_rate": 0.00018868783411478326, "loss": 11.6819, "step": 21934 }, { "epoch": 0.4591601775098384, "grad_norm": 0.38994529843330383, "learning_rate": 0.0001886868211366156, "loss": 11.6798, "step": 21935 }, { "epoch": 0.45918111027380054, "grad_norm": 0.35995200276374817, "learning_rate": 0.00018868580811581443, "loss": 11.677, "step": 21936 }, { "epoch": 0.4592020430377627, "grad_norm": 0.2945590913295746, "learning_rate": 0.00018868479505238018, "loss": 11.669, "step": 21937 }, { "epoch": 0.4592229758017249, "grad_norm": 0.35457339882850647, "learning_rate": 0.00018868378194631343, "loss": 11.6944, "step": 21938 }, { "epoch": 0.45924390856568703, "grad_norm": 0.3482419550418854, "learning_rate": 0.00018868276879761457, "loss": 11.6874, "step": 21939 }, { "epoch": 0.4592648413296492, "grad_norm": 0.2697840631008148, "learning_rate": 0.00018868175560628414, "loss": 11.6752, "step": 21940 }, { "epoch": 0.4592857740936113, "grad_norm": 0.2969824969768524, "learning_rate": 0.0001886807423723226, "loss": 11.6762, "step": 21941 }, { "epoch": 0.45930670685757347, "grad_norm": 0.30266469717025757, "learning_rate": 0.00018867972909573046, "loss": 11.6843, "step": 21942 }, { "epoch": 0.4593276396215356, "grad_norm": 0.29395464062690735, "learning_rate": 0.0001886787157765082, "loss": 11.6778, "step": 21943 }, { "epoch": 0.45934857238549776, "grad_norm": 0.3303399384021759, "learning_rate": 0.0001886777024146563, "loss": 11.6642, "step": 21944 }, { "epoch": 0.45936950514945996, "grad_norm": 0.25655025243759155, "learning_rate": 0.00018867668901017526, "loss": 11.6586, "step": 21945 }, { "epoch": 0.4593904379134221, "grad_norm": 0.30898502469062805, "learning_rate": 0.00018867567556306555, "loss": 11.6785, "step": 21946 }, { "epoch": 0.45941137067738425, "grad_norm": 0.2564287483692169, "learning_rate": 0.00018867466207332764, "loss": 11.6657, "step": 21947 }, { "epoch": 0.4594323034413464, "grad_norm": 0.345828115940094, "learning_rate": 0.0001886736485409621, "loss": 11.6828, "step": 21948 }, { "epoch": 0.45945323620530854, "grad_norm": 0.25920867919921875, "learning_rate": 0.00018867263496596932, "loss": 11.6659, "step": 21949 }, { "epoch": 0.4594741689692707, "grad_norm": 0.38751330971717834, "learning_rate": 0.00018867162134834984, "loss": 11.677, "step": 21950 }, { "epoch": 0.4594951017332329, "grad_norm": 0.37387415766716003, "learning_rate": 0.00018867060768810415, "loss": 11.683, "step": 21951 }, { "epoch": 0.459516034497195, "grad_norm": 0.29752713441848755, "learning_rate": 0.00018866959398523267, "loss": 11.6581, "step": 21952 }, { "epoch": 0.45953696726115717, "grad_norm": 0.3212079107761383, "learning_rate": 0.00018866858023973597, "loss": 11.6818, "step": 21953 }, { "epoch": 0.4595579000251193, "grad_norm": 0.3523966073989868, "learning_rate": 0.0001886675664516145, "loss": 11.678, "step": 21954 }, { "epoch": 0.45957883278908146, "grad_norm": 0.23884402215480804, "learning_rate": 0.00018866655262086878, "loss": 11.6716, "step": 21955 }, { "epoch": 0.4595997655530436, "grad_norm": 0.29055845737457275, "learning_rate": 0.00018866553874749927, "loss": 11.6709, "step": 21956 }, { "epoch": 0.4596206983170058, "grad_norm": 0.2617270052433014, "learning_rate": 0.00018866452483150644, "loss": 11.6784, "step": 21957 }, { "epoch": 0.45964163108096795, "grad_norm": 0.3404637277126312, "learning_rate": 0.00018866351087289078, "loss": 11.677, "step": 21958 }, { "epoch": 0.4596625638449301, "grad_norm": 0.2825193703174591, "learning_rate": 0.00018866249687165282, "loss": 11.6748, "step": 21959 }, { "epoch": 0.45968349660889224, "grad_norm": 0.337856262922287, "learning_rate": 0.000188661482827793, "loss": 11.6795, "step": 21960 }, { "epoch": 0.4597044293728544, "grad_norm": 0.2971823513507843, "learning_rate": 0.00018866046874131185, "loss": 11.6927, "step": 21961 }, { "epoch": 0.45972536213681653, "grad_norm": 0.2925710380077362, "learning_rate": 0.00018865945461220984, "loss": 11.6504, "step": 21962 }, { "epoch": 0.4597462949007787, "grad_norm": 0.32710665464401245, "learning_rate": 0.0001886584404404874, "loss": 11.6713, "step": 21963 }, { "epoch": 0.4597672276647409, "grad_norm": 0.24472573399543762, "learning_rate": 0.00018865742622614512, "loss": 11.681, "step": 21964 }, { "epoch": 0.459788160428703, "grad_norm": 0.2716286778450012, "learning_rate": 0.00018865641196918345, "loss": 11.6698, "step": 21965 }, { "epoch": 0.45980909319266516, "grad_norm": 0.3861333429813385, "learning_rate": 0.00018865539766960282, "loss": 11.6382, "step": 21966 }, { "epoch": 0.4598300259566273, "grad_norm": 0.2880171537399292, "learning_rate": 0.00018865438332740382, "loss": 11.6508, "step": 21967 }, { "epoch": 0.45985095872058945, "grad_norm": 0.3281328082084656, "learning_rate": 0.00018865336894258685, "loss": 11.6681, "step": 21968 }, { "epoch": 0.4598718914845516, "grad_norm": 0.27389761805534363, "learning_rate": 0.0001886523545151524, "loss": 11.676, "step": 21969 }, { "epoch": 0.4598928242485138, "grad_norm": 0.4171067178249359, "learning_rate": 0.00018865134004510104, "loss": 11.6754, "step": 21970 }, { "epoch": 0.45991375701247594, "grad_norm": 0.2922537922859192, "learning_rate": 0.00018865032553243318, "loss": 11.6801, "step": 21971 }, { "epoch": 0.4599346897764381, "grad_norm": 0.28749606013298035, "learning_rate": 0.00018864931097714932, "loss": 11.6712, "step": 21972 }, { "epoch": 0.45995562254040023, "grad_norm": 0.3331944942474365, "learning_rate": 0.00018864829637925, "loss": 11.6519, "step": 21973 }, { "epoch": 0.4599765553043624, "grad_norm": 0.26717713475227356, "learning_rate": 0.00018864728173873562, "loss": 11.6624, "step": 21974 }, { "epoch": 0.4599974880683245, "grad_norm": 0.30847370624542236, "learning_rate": 0.00018864626705560676, "loss": 11.6713, "step": 21975 }, { "epoch": 0.46001842083228667, "grad_norm": 0.28516295552253723, "learning_rate": 0.0001886452523298638, "loss": 11.6761, "step": 21976 }, { "epoch": 0.46003935359624887, "grad_norm": 0.3195432126522064, "learning_rate": 0.00018864423756150735, "loss": 11.674, "step": 21977 }, { "epoch": 0.460060286360211, "grad_norm": 0.2903648316860199, "learning_rate": 0.0001886432227505378, "loss": 11.6828, "step": 21978 }, { "epoch": 0.46008121912417316, "grad_norm": 0.2895720601081848, "learning_rate": 0.00018864220789695571, "loss": 11.6663, "step": 21979 }, { "epoch": 0.4601021518881353, "grad_norm": 0.28590503334999084, "learning_rate": 0.0001886411930007615, "loss": 11.6626, "step": 21980 }, { "epoch": 0.46012308465209745, "grad_norm": 0.3527304530143738, "learning_rate": 0.00018864017806195573, "loss": 11.6838, "step": 21981 }, { "epoch": 0.4601440174160596, "grad_norm": 0.24927543103694916, "learning_rate": 0.0001886391630805388, "loss": 11.6697, "step": 21982 }, { "epoch": 0.4601649501800218, "grad_norm": 0.31872352957725525, "learning_rate": 0.0001886381480565113, "loss": 11.6779, "step": 21983 }, { "epoch": 0.46018588294398394, "grad_norm": 0.26560157537460327, "learning_rate": 0.00018863713298987363, "loss": 11.6735, "step": 21984 }, { "epoch": 0.4602068157079461, "grad_norm": 0.2783028483390808, "learning_rate": 0.00018863611788062635, "loss": 11.6789, "step": 21985 }, { "epoch": 0.4602277484719082, "grad_norm": 0.3364846706390381, "learning_rate": 0.00018863510272876987, "loss": 11.6683, "step": 21986 }, { "epoch": 0.46024868123587037, "grad_norm": 0.27913570404052734, "learning_rate": 0.00018863408753430475, "loss": 11.6528, "step": 21987 }, { "epoch": 0.4602696139998325, "grad_norm": 0.3925877511501312, "learning_rate": 0.00018863307229723141, "loss": 11.6802, "step": 21988 }, { "epoch": 0.4602905467637947, "grad_norm": 0.36154642701148987, "learning_rate": 0.00018863205701755042, "loss": 11.6643, "step": 21989 }, { "epoch": 0.46031147952775686, "grad_norm": 0.2558402419090271, "learning_rate": 0.0001886310416952622, "loss": 11.6759, "step": 21990 }, { "epoch": 0.460332412291719, "grad_norm": 0.2670508623123169, "learning_rate": 0.0001886300263303673, "loss": 11.662, "step": 21991 }, { "epoch": 0.46035334505568115, "grad_norm": 0.30258044600486755, "learning_rate": 0.0001886290109228661, "loss": 11.6713, "step": 21992 }, { "epoch": 0.4603742778196433, "grad_norm": 0.31858542561531067, "learning_rate": 0.00018862799547275923, "loss": 11.6765, "step": 21993 }, { "epoch": 0.46039521058360544, "grad_norm": 0.26074299216270447, "learning_rate": 0.00018862697998004707, "loss": 11.6649, "step": 21994 }, { "epoch": 0.4604161433475676, "grad_norm": 0.2836098074913025, "learning_rate": 0.00018862596444473012, "loss": 11.6654, "step": 21995 }, { "epoch": 0.4604370761115298, "grad_norm": 0.2774820923805237, "learning_rate": 0.00018862494886680894, "loss": 11.6868, "step": 21996 }, { "epoch": 0.46045800887549193, "grad_norm": 0.31003937125205994, "learning_rate": 0.00018862393324628395, "loss": 11.6664, "step": 21997 }, { "epoch": 0.4604789416394541, "grad_norm": 0.2650837004184723, "learning_rate": 0.00018862291758315567, "loss": 11.6689, "step": 21998 }, { "epoch": 0.4604998744034162, "grad_norm": 0.2739868760108948, "learning_rate": 0.00018862190187742456, "loss": 11.6744, "step": 21999 }, { "epoch": 0.46052080716737837, "grad_norm": 0.4013059735298157, "learning_rate": 0.00018862088612909116, "loss": 11.6697, "step": 22000 }, { "epoch": 0.46052080716737837, "eval_loss": 11.673083305358887, "eval_runtime": 34.3557, "eval_samples_per_second": 27.972, "eval_steps_per_second": 7.015, "step": 22000 }, { "epoch": 0.4605417399313405, "grad_norm": 0.2769084870815277, "learning_rate": 0.00018861987033815588, "loss": 11.6822, "step": 22001 }, { "epoch": 0.4605626726953027, "grad_norm": 0.30025362968444824, "learning_rate": 0.00018861885450461926, "loss": 11.6626, "step": 22002 }, { "epoch": 0.46058360545926486, "grad_norm": 0.27027082443237305, "learning_rate": 0.00018861783862848182, "loss": 11.6729, "step": 22003 }, { "epoch": 0.460604538223227, "grad_norm": 0.29623275995254517, "learning_rate": 0.00018861682270974396, "loss": 11.6867, "step": 22004 }, { "epoch": 0.46062547098718915, "grad_norm": 0.3016257882118225, "learning_rate": 0.00018861580674840625, "loss": 11.68, "step": 22005 }, { "epoch": 0.4606464037511513, "grad_norm": 0.3498779833316803, "learning_rate": 0.00018861479074446912, "loss": 11.6793, "step": 22006 }, { "epoch": 0.46066733651511343, "grad_norm": 0.435878187417984, "learning_rate": 0.0001886137746979331, "loss": 11.6806, "step": 22007 }, { "epoch": 0.46068826927907564, "grad_norm": 0.23546750843524933, "learning_rate": 0.00018861275860879867, "loss": 11.6733, "step": 22008 }, { "epoch": 0.4607092020430378, "grad_norm": 0.23021356761455536, "learning_rate": 0.00018861174247706628, "loss": 11.6901, "step": 22009 }, { "epoch": 0.4607301348069999, "grad_norm": 0.34340357780456543, "learning_rate": 0.00018861072630273648, "loss": 11.6725, "step": 22010 }, { "epoch": 0.46075106757096207, "grad_norm": 0.26609861850738525, "learning_rate": 0.00018860971008580974, "loss": 11.673, "step": 22011 }, { "epoch": 0.4607720003349242, "grad_norm": 0.27090638875961304, "learning_rate": 0.00018860869382628648, "loss": 11.6836, "step": 22012 }, { "epoch": 0.46079293309888636, "grad_norm": 0.33672478795051575, "learning_rate": 0.0001886076775241673, "loss": 11.6743, "step": 22013 }, { "epoch": 0.4608138658628485, "grad_norm": 0.24376250803470612, "learning_rate": 0.00018860666117945262, "loss": 11.6717, "step": 22014 }, { "epoch": 0.4608347986268107, "grad_norm": 0.33451175689697266, "learning_rate": 0.00018860564479214294, "loss": 11.6843, "step": 22015 }, { "epoch": 0.46085573139077285, "grad_norm": 0.2831849157810211, "learning_rate": 0.00018860462836223875, "loss": 11.6755, "step": 22016 }, { "epoch": 0.460876664154735, "grad_norm": 0.30953237414360046, "learning_rate": 0.00018860361188974053, "loss": 11.6924, "step": 22017 }, { "epoch": 0.46089759691869714, "grad_norm": 0.34542492032051086, "learning_rate": 0.0001886025953746488, "loss": 11.6743, "step": 22018 }, { "epoch": 0.4609185296826593, "grad_norm": 0.3750676214694977, "learning_rate": 0.000188601578816964, "loss": 11.6767, "step": 22019 }, { "epoch": 0.46093946244662143, "grad_norm": 0.2424800992012024, "learning_rate": 0.00018860056221668665, "loss": 11.6683, "step": 22020 }, { "epoch": 0.46096039521058363, "grad_norm": 0.2623203694820404, "learning_rate": 0.00018859954557381726, "loss": 11.6775, "step": 22021 }, { "epoch": 0.4609813279745458, "grad_norm": 0.28366392850875854, "learning_rate": 0.0001885985288883563, "loss": 11.6687, "step": 22022 }, { "epoch": 0.4610022607385079, "grad_norm": 0.2987498342990875, "learning_rate": 0.0001885975121603042, "loss": 11.6805, "step": 22023 }, { "epoch": 0.46102319350247006, "grad_norm": 0.3208693563938141, "learning_rate": 0.0001885964953896615, "loss": 11.6771, "step": 22024 }, { "epoch": 0.4610441262664322, "grad_norm": 0.34326136112213135, "learning_rate": 0.00018859547857642874, "loss": 11.6934, "step": 22025 }, { "epoch": 0.46106505903039435, "grad_norm": 0.3046259582042694, "learning_rate": 0.00018859446172060632, "loss": 11.6873, "step": 22026 }, { "epoch": 0.46108599179435655, "grad_norm": 0.3601052761077881, "learning_rate": 0.0001885934448221948, "loss": 11.6854, "step": 22027 }, { "epoch": 0.4611069245583187, "grad_norm": 0.25571420788764954, "learning_rate": 0.00018859242788119463, "loss": 11.672, "step": 22028 }, { "epoch": 0.46112785732228084, "grad_norm": 0.21857735514640808, "learning_rate": 0.00018859141089760625, "loss": 11.669, "step": 22029 }, { "epoch": 0.461148790086243, "grad_norm": 0.28456223011016846, "learning_rate": 0.00018859039387143027, "loss": 11.6644, "step": 22030 }, { "epoch": 0.46116972285020513, "grad_norm": 0.27712690830230713, "learning_rate": 0.00018858937680266708, "loss": 11.6789, "step": 22031 }, { "epoch": 0.4611906556141673, "grad_norm": 0.294819176197052, "learning_rate": 0.0001885883596913172, "loss": 11.6743, "step": 22032 }, { "epoch": 0.4612115883781294, "grad_norm": 0.26163849234580994, "learning_rate": 0.00018858734253738115, "loss": 11.6655, "step": 22033 }, { "epoch": 0.4612325211420916, "grad_norm": 0.2541413903236389, "learning_rate": 0.00018858632534085937, "loss": 11.6683, "step": 22034 }, { "epoch": 0.46125345390605377, "grad_norm": 0.24696512520313263, "learning_rate": 0.00018858530810175235, "loss": 11.6702, "step": 22035 }, { "epoch": 0.4612743866700159, "grad_norm": 0.32444632053375244, "learning_rate": 0.00018858429082006063, "loss": 11.6747, "step": 22036 }, { "epoch": 0.46129531943397806, "grad_norm": 0.2427724301815033, "learning_rate": 0.00018858327349578465, "loss": 11.6783, "step": 22037 }, { "epoch": 0.4613162521979402, "grad_norm": 0.3422025740146637, "learning_rate": 0.00018858225612892492, "loss": 11.6608, "step": 22038 }, { "epoch": 0.46133718496190235, "grad_norm": 0.2133411467075348, "learning_rate": 0.0001885812387194819, "loss": 11.6841, "step": 22039 }, { "epoch": 0.46135811772586455, "grad_norm": 0.3133181929588318, "learning_rate": 0.0001885802212674561, "loss": 11.6699, "step": 22040 }, { "epoch": 0.4613790504898267, "grad_norm": 0.34032750129699707, "learning_rate": 0.00018857920377284807, "loss": 11.6837, "step": 22041 }, { "epoch": 0.46139998325378884, "grad_norm": 0.26832348108291626, "learning_rate": 0.0001885781862356582, "loss": 11.6829, "step": 22042 }, { "epoch": 0.461420916017751, "grad_norm": 0.3202778398990631, "learning_rate": 0.00018857716865588702, "loss": 11.687, "step": 22043 }, { "epoch": 0.4614418487817131, "grad_norm": 0.35429102182388306, "learning_rate": 0.00018857615103353502, "loss": 11.676, "step": 22044 }, { "epoch": 0.46146278154567527, "grad_norm": 0.38226038217544556, "learning_rate": 0.0001885751333686027, "loss": 11.686, "step": 22045 }, { "epoch": 0.46148371430963747, "grad_norm": 0.266623318195343, "learning_rate": 0.00018857411566109054, "loss": 11.6548, "step": 22046 }, { "epoch": 0.4615046470735996, "grad_norm": 0.3442099094390869, "learning_rate": 0.000188573097910999, "loss": 11.6697, "step": 22047 }, { "epoch": 0.46152557983756176, "grad_norm": 0.31355878710746765, "learning_rate": 0.00018857208011832863, "loss": 11.6523, "step": 22048 }, { "epoch": 0.4615465126015239, "grad_norm": 0.2737636864185333, "learning_rate": 0.00018857106228307988, "loss": 11.6626, "step": 22049 }, { "epoch": 0.46156744536548605, "grad_norm": 0.26368075609207153, "learning_rate": 0.00018857004440525323, "loss": 11.6612, "step": 22050 }, { "epoch": 0.4615883781294482, "grad_norm": 0.32336288690567017, "learning_rate": 0.0001885690264848492, "loss": 11.6688, "step": 22051 }, { "epoch": 0.46160931089341034, "grad_norm": 0.28894639015197754, "learning_rate": 0.00018856800852186826, "loss": 11.6769, "step": 22052 }, { "epoch": 0.46163024365737254, "grad_norm": 0.3448181748390198, "learning_rate": 0.0001885669905163109, "loss": 11.6854, "step": 22053 }, { "epoch": 0.4616511764213347, "grad_norm": 0.3085100054740906, "learning_rate": 0.0001885659724681776, "loss": 11.6804, "step": 22054 }, { "epoch": 0.46167210918529683, "grad_norm": 0.29821228981018066, "learning_rate": 0.00018856495437746888, "loss": 11.6558, "step": 22055 }, { "epoch": 0.461693041949259, "grad_norm": 0.27025192975997925, "learning_rate": 0.00018856393624418522, "loss": 11.6632, "step": 22056 }, { "epoch": 0.4617139747132211, "grad_norm": 0.31659984588623047, "learning_rate": 0.0001885629180683271, "loss": 11.6731, "step": 22057 }, { "epoch": 0.46173490747718327, "grad_norm": 0.273749977350235, "learning_rate": 0.000188561899849895, "loss": 11.6738, "step": 22058 }, { "epoch": 0.46175584024114547, "grad_norm": 0.30369052290916443, "learning_rate": 0.0001885608815888894, "loss": 11.6706, "step": 22059 }, { "epoch": 0.4617767730051076, "grad_norm": 0.3389925956726074, "learning_rate": 0.00018855986328531084, "loss": 11.6605, "step": 22060 }, { "epoch": 0.46179770576906976, "grad_norm": 0.2734467089176178, "learning_rate": 0.00018855884493915976, "loss": 11.6622, "step": 22061 }, { "epoch": 0.4618186385330319, "grad_norm": 0.25151199102401733, "learning_rate": 0.0001885578265504367, "loss": 11.6806, "step": 22062 }, { "epoch": 0.46183957129699404, "grad_norm": 0.2962892949581146, "learning_rate": 0.00018855680811914207, "loss": 11.6633, "step": 22063 }, { "epoch": 0.4618605040609562, "grad_norm": 0.21503427624702454, "learning_rate": 0.00018855578964527644, "loss": 11.6688, "step": 22064 }, { "epoch": 0.4618814368249184, "grad_norm": 0.34998828172683716, "learning_rate": 0.00018855477112884026, "loss": 11.6891, "step": 22065 }, { "epoch": 0.46190236958888053, "grad_norm": 0.26386603713035583, "learning_rate": 0.00018855375256983403, "loss": 11.6899, "step": 22066 }, { "epoch": 0.4619233023528427, "grad_norm": 0.3112855851650238, "learning_rate": 0.00018855273396825822, "loss": 11.6618, "step": 22067 }, { "epoch": 0.4619442351168048, "grad_norm": 0.29854220151901245, "learning_rate": 0.00018855171532411336, "loss": 11.6635, "step": 22068 }, { "epoch": 0.46196516788076697, "grad_norm": 0.28397855162620544, "learning_rate": 0.0001885506966373999, "loss": 11.6611, "step": 22069 }, { "epoch": 0.4619861006447291, "grad_norm": 0.487463116645813, "learning_rate": 0.00018854967790811835, "loss": 11.6898, "step": 22070 }, { "epoch": 0.46200703340869126, "grad_norm": 0.2477360963821411, "learning_rate": 0.0001885486591362692, "loss": 11.6673, "step": 22071 }, { "epoch": 0.46202796617265346, "grad_norm": 0.26702067255973816, "learning_rate": 0.00018854764032185292, "loss": 11.6989, "step": 22072 }, { "epoch": 0.4620488989366156, "grad_norm": 0.358974814414978, "learning_rate": 0.00018854662146487004, "loss": 11.6643, "step": 22073 }, { "epoch": 0.46206983170057775, "grad_norm": 0.31316015124320984, "learning_rate": 0.000188545602565321, "loss": 11.6694, "step": 22074 }, { "epoch": 0.4620907644645399, "grad_norm": 0.2725955843925476, "learning_rate": 0.00018854458362320634, "loss": 11.6734, "step": 22075 }, { "epoch": 0.46211169722850204, "grad_norm": 0.3453049957752228, "learning_rate": 0.0001885435646385265, "loss": 11.6726, "step": 22076 }, { "epoch": 0.4621326299924642, "grad_norm": 0.30796417593955994, "learning_rate": 0.000188542545611282, "loss": 11.6753, "step": 22077 }, { "epoch": 0.4621535627564264, "grad_norm": 0.26446694135665894, "learning_rate": 0.00018854152654147331, "loss": 11.6665, "step": 22078 }, { "epoch": 0.46217449552038853, "grad_norm": 0.31480932235717773, "learning_rate": 0.00018854050742910094, "loss": 11.6823, "step": 22079 }, { "epoch": 0.4621954282843507, "grad_norm": 0.5139731764793396, "learning_rate": 0.0001885394882741654, "loss": 11.6673, "step": 22080 }, { "epoch": 0.4622163610483128, "grad_norm": 0.31725403666496277, "learning_rate": 0.00018853846907666713, "loss": 11.6764, "step": 22081 }, { "epoch": 0.46223729381227496, "grad_norm": 0.2555696964263916, "learning_rate": 0.00018853744983660667, "loss": 11.6679, "step": 22082 }, { "epoch": 0.4622582265762371, "grad_norm": 0.32930752635002136, "learning_rate": 0.00018853643055398447, "loss": 11.6734, "step": 22083 }, { "epoch": 0.46227915934019925, "grad_norm": 0.27844998240470886, "learning_rate": 0.000188535411228801, "loss": 11.6647, "step": 22084 }, { "epoch": 0.46230009210416145, "grad_norm": 0.3534616231918335, "learning_rate": 0.00018853439186105684, "loss": 11.6797, "step": 22085 }, { "epoch": 0.4623210248681236, "grad_norm": 0.23145431280136108, "learning_rate": 0.0001885333724507524, "loss": 11.6665, "step": 22086 }, { "epoch": 0.46234195763208574, "grad_norm": 0.3487635850906372, "learning_rate": 0.00018853235299788818, "loss": 11.6811, "step": 22087 }, { "epoch": 0.4623628903960479, "grad_norm": 0.29810991883277893, "learning_rate": 0.0001885313335024647, "loss": 11.6819, "step": 22088 }, { "epoch": 0.46238382316001003, "grad_norm": 0.28647804260253906, "learning_rate": 0.0001885303139644824, "loss": 11.6564, "step": 22089 }, { "epoch": 0.4624047559239722, "grad_norm": 0.31240251660346985, "learning_rate": 0.00018852929438394184, "loss": 11.6768, "step": 22090 }, { "epoch": 0.4624256886879344, "grad_norm": 0.242624431848526, "learning_rate": 0.00018852827476084348, "loss": 11.6789, "step": 22091 }, { "epoch": 0.4624466214518965, "grad_norm": 0.30504974722862244, "learning_rate": 0.00018852725509518775, "loss": 11.6839, "step": 22092 }, { "epoch": 0.46246755421585867, "grad_norm": 0.3268035352230072, "learning_rate": 0.00018852623538697527, "loss": 11.6934, "step": 22093 }, { "epoch": 0.4624884869798208, "grad_norm": 0.27527084946632385, "learning_rate": 0.00018852521563620642, "loss": 11.6577, "step": 22094 }, { "epoch": 0.46250941974378296, "grad_norm": 0.23138703405857086, "learning_rate": 0.0001885241958428817, "loss": 11.671, "step": 22095 }, { "epoch": 0.4625303525077451, "grad_norm": 0.4064381420612335, "learning_rate": 0.00018852317600700165, "loss": 11.6605, "step": 22096 }, { "epoch": 0.4625512852717073, "grad_norm": 0.24568119645118713, "learning_rate": 0.00018852215612856675, "loss": 11.6638, "step": 22097 }, { "epoch": 0.46257221803566945, "grad_norm": 0.250789612531662, "learning_rate": 0.00018852113620757742, "loss": 11.6745, "step": 22098 }, { "epoch": 0.4625931507996316, "grad_norm": 0.2857488989830017, "learning_rate": 0.00018852011624403427, "loss": 11.6755, "step": 22099 }, { "epoch": 0.46261408356359374, "grad_norm": 0.28029584884643555, "learning_rate": 0.00018851909623793768, "loss": 11.6659, "step": 22100 }, { "epoch": 0.4626350163275559, "grad_norm": 0.2896706163883209, "learning_rate": 0.00018851807618928824, "loss": 11.6679, "step": 22101 }, { "epoch": 0.462655949091518, "grad_norm": 0.33763402700424194, "learning_rate": 0.00018851705609808635, "loss": 11.6802, "step": 22102 }, { "epoch": 0.46267688185548017, "grad_norm": 0.44771260023117065, "learning_rate": 0.00018851603596433254, "loss": 11.6889, "step": 22103 }, { "epoch": 0.46269781461944237, "grad_norm": 0.2565808594226837, "learning_rate": 0.0001885150157880273, "loss": 11.6615, "step": 22104 }, { "epoch": 0.4627187473834045, "grad_norm": 0.23700463771820068, "learning_rate": 0.00018851399556917112, "loss": 11.6707, "step": 22105 }, { "epoch": 0.46273968014736666, "grad_norm": 0.23416219651699066, "learning_rate": 0.00018851297530776448, "loss": 11.6773, "step": 22106 }, { "epoch": 0.4627606129113288, "grad_norm": 0.29365500807762146, "learning_rate": 0.00018851195500380787, "loss": 11.6615, "step": 22107 }, { "epoch": 0.46278154567529095, "grad_norm": 0.22771446406841278, "learning_rate": 0.0001885109346573018, "loss": 11.6685, "step": 22108 }, { "epoch": 0.4628024784392531, "grad_norm": 0.30255669355392456, "learning_rate": 0.00018850991426824676, "loss": 11.6615, "step": 22109 }, { "epoch": 0.4628234112032153, "grad_norm": 0.26229873299598694, "learning_rate": 0.00018850889383664323, "loss": 11.6686, "step": 22110 }, { "epoch": 0.46284434396717744, "grad_norm": 0.277047723531723, "learning_rate": 0.0001885078733624917, "loss": 11.6447, "step": 22111 }, { "epoch": 0.4628652767311396, "grad_norm": 0.3010862469673157, "learning_rate": 0.00018850685284579265, "loss": 11.6739, "step": 22112 }, { "epoch": 0.46288620949510173, "grad_norm": 0.28870201110839844, "learning_rate": 0.0001885058322865466, "loss": 11.6784, "step": 22113 }, { "epoch": 0.4629071422590639, "grad_norm": 0.2953394949436188, "learning_rate": 0.000188504811684754, "loss": 11.6793, "step": 22114 }, { "epoch": 0.462928075023026, "grad_norm": 0.26011088490486145, "learning_rate": 0.00018850379104041538, "loss": 11.6568, "step": 22115 }, { "epoch": 0.4629490077869882, "grad_norm": 0.3315095603466034, "learning_rate": 0.0001885027703535312, "loss": 11.6767, "step": 22116 }, { "epoch": 0.46296994055095037, "grad_norm": 0.2973644733428955, "learning_rate": 0.00018850174962410199, "loss": 11.6601, "step": 22117 }, { "epoch": 0.4629908733149125, "grad_norm": 0.27111122012138367, "learning_rate": 0.0001885007288521282, "loss": 11.6663, "step": 22118 }, { "epoch": 0.46301180607887465, "grad_norm": 0.25422394275665283, "learning_rate": 0.0001884997080376103, "loss": 11.6581, "step": 22119 }, { "epoch": 0.4630327388428368, "grad_norm": 0.24725010991096497, "learning_rate": 0.00018849868718054887, "loss": 11.661, "step": 22120 }, { "epoch": 0.46305367160679894, "grad_norm": 0.26709631085395813, "learning_rate": 0.00018849766628094435, "loss": 11.6585, "step": 22121 }, { "epoch": 0.4630746043707611, "grad_norm": 0.23115754127502441, "learning_rate": 0.00018849664533879718, "loss": 11.6775, "step": 22122 }, { "epoch": 0.4630955371347233, "grad_norm": 0.3948686122894287, "learning_rate": 0.00018849562435410791, "loss": 11.6837, "step": 22123 }, { "epoch": 0.46311646989868543, "grad_norm": 0.2972264885902405, "learning_rate": 0.00018849460332687705, "loss": 11.6733, "step": 22124 }, { "epoch": 0.4631374026626476, "grad_norm": 0.276664137840271, "learning_rate": 0.00018849358225710505, "loss": 11.6781, "step": 22125 }, { "epoch": 0.4631583354266097, "grad_norm": 0.3245091736316681, "learning_rate": 0.0001884925611447924, "loss": 11.684, "step": 22126 }, { "epoch": 0.46317926819057187, "grad_norm": 0.3529561460018158, "learning_rate": 0.0001884915399899396, "loss": 11.6889, "step": 22127 }, { "epoch": 0.463200200954534, "grad_norm": 0.30592575669288635, "learning_rate": 0.00018849051879254718, "loss": 11.6612, "step": 22128 }, { "epoch": 0.4632211337184962, "grad_norm": 0.2793337404727936, "learning_rate": 0.00018848949755261555, "loss": 11.6636, "step": 22129 }, { "epoch": 0.46324206648245836, "grad_norm": 0.34970274567604065, "learning_rate": 0.00018848847627014529, "loss": 11.6809, "step": 22130 }, { "epoch": 0.4632629992464205, "grad_norm": 0.33976760506629944, "learning_rate": 0.0001884874549451368, "loss": 11.6799, "step": 22131 }, { "epoch": 0.46328393201038265, "grad_norm": 0.3505564033985138, "learning_rate": 0.00018848643357759066, "loss": 11.6773, "step": 22132 }, { "epoch": 0.4633048647743448, "grad_norm": 0.36194586753845215, "learning_rate": 0.00018848541216750727, "loss": 11.6828, "step": 22133 }, { "epoch": 0.46332579753830694, "grad_norm": 0.4707992672920227, "learning_rate": 0.00018848439071488722, "loss": 11.6642, "step": 22134 }, { "epoch": 0.46334673030226914, "grad_norm": 0.2568676769733429, "learning_rate": 0.0001884833692197309, "loss": 11.671, "step": 22135 }, { "epoch": 0.4633676630662313, "grad_norm": 0.5331347584724426, "learning_rate": 0.00018848234768203888, "loss": 11.6691, "step": 22136 }, { "epoch": 0.46338859583019343, "grad_norm": 0.2748161554336548, "learning_rate": 0.00018848132610181162, "loss": 11.6671, "step": 22137 }, { "epoch": 0.4634095285941556, "grad_norm": 0.272896409034729, "learning_rate": 0.0001884803044790496, "loss": 11.663, "step": 22138 }, { "epoch": 0.4634304613581177, "grad_norm": 0.35215142369270325, "learning_rate": 0.00018847928281375334, "loss": 11.6698, "step": 22139 }, { "epoch": 0.46345139412207986, "grad_norm": 0.27977490425109863, "learning_rate": 0.0001884782611059233, "loss": 11.6749, "step": 22140 }, { "epoch": 0.463472326886042, "grad_norm": 0.2901759147644043, "learning_rate": 0.00018847723935556002, "loss": 11.6891, "step": 22141 }, { "epoch": 0.4634932596500042, "grad_norm": 0.27649638056755066, "learning_rate": 0.00018847621756266393, "loss": 11.6809, "step": 22142 }, { "epoch": 0.46351419241396635, "grad_norm": 0.2918977439403534, "learning_rate": 0.00018847519572723557, "loss": 11.6792, "step": 22143 }, { "epoch": 0.4635351251779285, "grad_norm": 0.3414539694786072, "learning_rate": 0.00018847417384927536, "loss": 11.687, "step": 22144 }, { "epoch": 0.46355605794189064, "grad_norm": 0.27427634596824646, "learning_rate": 0.0001884731519287839, "loss": 11.671, "step": 22145 }, { "epoch": 0.4635769907058528, "grad_norm": 0.27896761894226074, "learning_rate": 0.0001884721299657616, "loss": 11.6807, "step": 22146 }, { "epoch": 0.46359792346981493, "grad_norm": 0.23230378329753876, "learning_rate": 0.00018847110796020897, "loss": 11.6696, "step": 22147 }, { "epoch": 0.46361885623377713, "grad_norm": 0.32187795639038086, "learning_rate": 0.00018847008591212652, "loss": 11.6809, "step": 22148 }, { "epoch": 0.4636397889977393, "grad_norm": 0.2787691354751587, "learning_rate": 0.0001884690638215147, "loss": 11.6771, "step": 22149 }, { "epoch": 0.4636607217617014, "grad_norm": 0.3452812135219574, "learning_rate": 0.00018846804168837404, "loss": 11.68, "step": 22150 }, { "epoch": 0.46368165452566357, "grad_norm": 0.29437553882598877, "learning_rate": 0.00018846701951270503, "loss": 11.6704, "step": 22151 }, { "epoch": 0.4637025872896257, "grad_norm": 0.3133672773838043, "learning_rate": 0.00018846599729450814, "loss": 11.6929, "step": 22152 }, { "epoch": 0.46372352005358786, "grad_norm": 0.3008211553096771, "learning_rate": 0.00018846497503378389, "loss": 11.693, "step": 22153 }, { "epoch": 0.46374445281755006, "grad_norm": 0.2696363031864166, "learning_rate": 0.00018846395273053275, "loss": 11.6748, "step": 22154 }, { "epoch": 0.4637653855815122, "grad_norm": 0.30175289511680603, "learning_rate": 0.00018846293038475519, "loss": 11.6813, "step": 22155 }, { "epoch": 0.46378631834547435, "grad_norm": 0.3839713931083679, "learning_rate": 0.00018846190799645175, "loss": 11.6833, "step": 22156 }, { "epoch": 0.4638072511094365, "grad_norm": 0.30572766065597534, "learning_rate": 0.00018846088556562286, "loss": 11.6728, "step": 22157 }, { "epoch": 0.46382818387339864, "grad_norm": 0.3100731074810028, "learning_rate": 0.0001884598630922691, "loss": 11.6813, "step": 22158 }, { "epoch": 0.4638491166373608, "grad_norm": 0.31903934478759766, "learning_rate": 0.0001884588405763909, "loss": 11.6639, "step": 22159 }, { "epoch": 0.4638700494013229, "grad_norm": 0.3357463479042053, "learning_rate": 0.00018845781801798874, "loss": 11.6529, "step": 22160 }, { "epoch": 0.4638909821652851, "grad_norm": 0.27361905574798584, "learning_rate": 0.00018845679541706315, "loss": 11.671, "step": 22161 }, { "epoch": 0.46391191492924727, "grad_norm": 0.35128211975097656, "learning_rate": 0.0001884557727736146, "loss": 11.6658, "step": 22162 }, { "epoch": 0.4639328476932094, "grad_norm": 0.22630679607391357, "learning_rate": 0.0001884547500876436, "loss": 11.6619, "step": 22163 }, { "epoch": 0.46395378045717156, "grad_norm": 0.26898518204689026, "learning_rate": 0.00018845372735915063, "loss": 11.6921, "step": 22164 }, { "epoch": 0.4639747132211337, "grad_norm": 0.3206913471221924, "learning_rate": 0.00018845270458813616, "loss": 11.6718, "step": 22165 }, { "epoch": 0.46399564598509585, "grad_norm": 0.3623162806034088, "learning_rate": 0.0001884516817746007, "loss": 11.6742, "step": 22166 }, { "epoch": 0.46401657874905805, "grad_norm": 0.3157522678375244, "learning_rate": 0.00018845065891854475, "loss": 11.6701, "step": 22167 }, { "epoch": 0.4640375115130202, "grad_norm": 0.2599746286869049, "learning_rate": 0.0001884496360199688, "loss": 11.681, "step": 22168 }, { "epoch": 0.46405844427698234, "grad_norm": 0.34811931848526, "learning_rate": 0.00018844861307887336, "loss": 11.6695, "step": 22169 }, { "epoch": 0.4640793770409445, "grad_norm": 0.2665504515171051, "learning_rate": 0.00018844759009525888, "loss": 11.673, "step": 22170 }, { "epoch": 0.46410030980490663, "grad_norm": 0.26761215925216675, "learning_rate": 0.00018844656706912586, "loss": 11.6687, "step": 22171 }, { "epoch": 0.4641212425688688, "grad_norm": 0.3096291124820709, "learning_rate": 0.00018844554400047482, "loss": 11.6695, "step": 22172 }, { "epoch": 0.4641421753328309, "grad_norm": 0.27777430415153503, "learning_rate": 0.00018844452088930623, "loss": 11.6818, "step": 22173 }, { "epoch": 0.4641631080967931, "grad_norm": 0.2750869393348694, "learning_rate": 0.00018844349773562057, "loss": 11.6827, "step": 22174 }, { "epoch": 0.46418404086075526, "grad_norm": 0.28190821409225464, "learning_rate": 0.00018844247453941838, "loss": 11.675, "step": 22175 }, { "epoch": 0.4642049736247174, "grad_norm": 0.2757490873336792, "learning_rate": 0.0001884414513007001, "loss": 11.6739, "step": 22176 }, { "epoch": 0.46422590638867955, "grad_norm": 0.3352755904197693, "learning_rate": 0.00018844042801946623, "loss": 11.6781, "step": 22177 }, { "epoch": 0.4642468391526417, "grad_norm": 0.24640551209449768, "learning_rate": 0.00018843940469571728, "loss": 11.6693, "step": 22178 }, { "epoch": 0.46426777191660384, "grad_norm": 0.3007480800151825, "learning_rate": 0.00018843838132945375, "loss": 11.6784, "step": 22179 }, { "epoch": 0.46428870468056604, "grad_norm": 0.33400583267211914, "learning_rate": 0.00018843735792067613, "loss": 11.6495, "step": 22180 }, { "epoch": 0.4643096374445282, "grad_norm": 0.24303001165390015, "learning_rate": 0.00018843633446938486, "loss": 11.6846, "step": 22181 }, { "epoch": 0.46433057020849033, "grad_norm": 0.40514981746673584, "learning_rate": 0.0001884353109755805, "loss": 11.6647, "step": 22182 }, { "epoch": 0.4643515029724525, "grad_norm": 0.36228033900260925, "learning_rate": 0.0001884342874392635, "loss": 11.6799, "step": 22183 }, { "epoch": 0.4643724357364146, "grad_norm": 0.26637524366378784, "learning_rate": 0.00018843326386043437, "loss": 11.672, "step": 22184 }, { "epoch": 0.46439336850037677, "grad_norm": 0.2598152160644531, "learning_rate": 0.0001884322402390936, "loss": 11.6652, "step": 22185 }, { "epoch": 0.46441430126433897, "grad_norm": 0.30915167927742004, "learning_rate": 0.00018843121657524168, "loss": 11.6967, "step": 22186 }, { "epoch": 0.4644352340283011, "grad_norm": 0.3044302463531494, "learning_rate": 0.0001884301928688791, "loss": 11.6827, "step": 22187 }, { "epoch": 0.46445616679226326, "grad_norm": 0.3468281328678131, "learning_rate": 0.00018842916912000638, "loss": 11.6752, "step": 22188 }, { "epoch": 0.4644770995562254, "grad_norm": 0.2901882231235504, "learning_rate": 0.00018842814532862395, "loss": 11.6722, "step": 22189 }, { "epoch": 0.46449803232018755, "grad_norm": 0.28355780243873596, "learning_rate": 0.00018842712149473234, "loss": 11.6597, "step": 22190 }, { "epoch": 0.4645189650841497, "grad_norm": 0.4171440899372101, "learning_rate": 0.00018842609761833207, "loss": 11.6702, "step": 22191 }, { "epoch": 0.46453989784811184, "grad_norm": 0.37587475776672363, "learning_rate": 0.00018842507369942358, "loss": 11.674, "step": 22192 }, { "epoch": 0.46456083061207404, "grad_norm": 0.33425605297088623, "learning_rate": 0.00018842404973800738, "loss": 11.6769, "step": 22193 }, { "epoch": 0.4645817633760362, "grad_norm": 0.30288293957710266, "learning_rate": 0.00018842302573408398, "loss": 11.6701, "step": 22194 }, { "epoch": 0.46460269613999833, "grad_norm": 0.29949963092803955, "learning_rate": 0.00018842200168765386, "loss": 11.6643, "step": 22195 }, { "epoch": 0.4646236289039605, "grad_norm": 0.2684624195098877, "learning_rate": 0.0001884209775987175, "loss": 11.6777, "step": 22196 }, { "epoch": 0.4646445616679226, "grad_norm": 0.22965607047080994, "learning_rate": 0.0001884199534672754, "loss": 11.6731, "step": 22197 }, { "epoch": 0.46466549443188476, "grad_norm": 0.31343522667884827, "learning_rate": 0.00018841892929332806, "loss": 11.6622, "step": 22198 }, { "epoch": 0.46468642719584696, "grad_norm": 0.32884249091148376, "learning_rate": 0.000188417905076876, "loss": 11.6719, "step": 22199 }, { "epoch": 0.4647073599598091, "grad_norm": 0.2745945453643799, "learning_rate": 0.00018841688081791964, "loss": 11.6822, "step": 22200 }, { "epoch": 0.46472829272377125, "grad_norm": 0.27387261390686035, "learning_rate": 0.00018841585651645956, "loss": 11.681, "step": 22201 }, { "epoch": 0.4647492254877334, "grad_norm": 0.30352115631103516, "learning_rate": 0.00018841483217249617, "loss": 11.6684, "step": 22202 }, { "epoch": 0.46477015825169554, "grad_norm": 0.33896583318710327, "learning_rate": 0.00018841380778602997, "loss": 11.6551, "step": 22203 }, { "epoch": 0.4647910910156577, "grad_norm": 0.34049779176712036, "learning_rate": 0.0001884127833570615, "loss": 11.6795, "step": 22204 }, { "epoch": 0.4648120237796199, "grad_norm": 0.23301514983177185, "learning_rate": 0.00018841175888559129, "loss": 11.6807, "step": 22205 }, { "epoch": 0.46483295654358203, "grad_norm": 0.2627216875553131, "learning_rate": 0.00018841073437161972, "loss": 11.6831, "step": 22206 }, { "epoch": 0.4648538893075442, "grad_norm": 0.40617355704307556, "learning_rate": 0.00018840970981514737, "loss": 11.6744, "step": 22207 }, { "epoch": 0.4648748220715063, "grad_norm": 0.3778330683708191, "learning_rate": 0.00018840868521617467, "loss": 11.7013, "step": 22208 }, { "epoch": 0.46489575483546847, "grad_norm": 0.32983872294425964, "learning_rate": 0.00018840766057470217, "loss": 11.6674, "step": 22209 }, { "epoch": 0.4649166875994306, "grad_norm": 0.3501836657524109, "learning_rate": 0.0001884066358907303, "loss": 11.6741, "step": 22210 }, { "epoch": 0.46493762036339276, "grad_norm": 0.35625001788139343, "learning_rate": 0.00018840561116425963, "loss": 11.6527, "step": 22211 }, { "epoch": 0.46495855312735496, "grad_norm": 0.3692608177661896, "learning_rate": 0.00018840458639529057, "loss": 11.6637, "step": 22212 }, { "epoch": 0.4649794858913171, "grad_norm": 0.24141153693199158, "learning_rate": 0.00018840356158382368, "loss": 11.6871, "step": 22213 }, { "epoch": 0.46500041865527925, "grad_norm": 0.24053515493869781, "learning_rate": 0.00018840253672985943, "loss": 11.6506, "step": 22214 }, { "epoch": 0.4650213514192414, "grad_norm": 0.33497151732444763, "learning_rate": 0.0001884015118333983, "loss": 11.6583, "step": 22215 }, { "epoch": 0.46504228418320354, "grad_norm": 0.2796624004840851, "learning_rate": 0.0001884004868944408, "loss": 11.6751, "step": 22216 }, { "epoch": 0.4650632169471657, "grad_norm": 0.2928310036659241, "learning_rate": 0.00018839946191298742, "loss": 11.6703, "step": 22217 }, { "epoch": 0.4650841497111279, "grad_norm": 0.3078731894493103, "learning_rate": 0.0001883984368890386, "loss": 11.6785, "step": 22218 }, { "epoch": 0.46510508247509, "grad_norm": 0.4265209138393402, "learning_rate": 0.00018839741182259494, "loss": 11.6801, "step": 22219 }, { "epoch": 0.46512601523905217, "grad_norm": 0.30153191089630127, "learning_rate": 0.00018839638671365686, "loss": 11.6814, "step": 22220 }, { "epoch": 0.4651469480030143, "grad_norm": 0.2954835295677185, "learning_rate": 0.00018839536156222485, "loss": 11.675, "step": 22221 }, { "epoch": 0.46516788076697646, "grad_norm": 0.32564327120780945, "learning_rate": 0.00018839433636829938, "loss": 11.6543, "step": 22222 }, { "epoch": 0.4651888135309386, "grad_norm": 0.24355082213878632, "learning_rate": 0.00018839331113188105, "loss": 11.6768, "step": 22223 }, { "epoch": 0.4652097462949008, "grad_norm": 0.2792263925075531, "learning_rate": 0.00018839228585297026, "loss": 11.6783, "step": 22224 }, { "epoch": 0.46523067905886295, "grad_norm": 0.3039611876010895, "learning_rate": 0.0001883912605315675, "loss": 11.6721, "step": 22225 }, { "epoch": 0.4652516118228251, "grad_norm": 0.2675219774246216, "learning_rate": 0.00018839023516767332, "loss": 11.6627, "step": 22226 }, { "epoch": 0.46527254458678724, "grad_norm": 0.2716315686702728, "learning_rate": 0.0001883892097612882, "loss": 11.6783, "step": 22227 }, { "epoch": 0.4652934773507494, "grad_norm": 0.2902853190898895, "learning_rate": 0.00018838818431241258, "loss": 11.6553, "step": 22228 }, { "epoch": 0.46531441011471153, "grad_norm": 0.29114818572998047, "learning_rate": 0.00018838715882104698, "loss": 11.6846, "step": 22229 }, { "epoch": 0.4653353428786737, "grad_norm": 0.25938814878463745, "learning_rate": 0.00018838613328719193, "loss": 11.679, "step": 22230 }, { "epoch": 0.4653562756426359, "grad_norm": 2.529103994369507, "learning_rate": 0.00018838510771084787, "loss": 11.6323, "step": 22231 }, { "epoch": 0.465377208406598, "grad_norm": 0.26091477274894714, "learning_rate": 0.00018838408209201536, "loss": 11.6662, "step": 22232 }, { "epoch": 0.46539814117056016, "grad_norm": 0.27795469760894775, "learning_rate": 0.0001883830564306948, "loss": 11.6724, "step": 22233 }, { "epoch": 0.4654190739345223, "grad_norm": 0.33141621947288513, "learning_rate": 0.0001883820307268868, "loss": 11.6918, "step": 22234 }, { "epoch": 0.46544000669848445, "grad_norm": 0.25401267409324646, "learning_rate": 0.00018838100498059173, "loss": 11.6691, "step": 22235 }, { "epoch": 0.4654609394624466, "grad_norm": 0.4000544250011444, "learning_rate": 0.00018837997919181012, "loss": 11.6665, "step": 22236 }, { "epoch": 0.4654818722264088, "grad_norm": 0.3264957666397095, "learning_rate": 0.00018837895336054253, "loss": 11.6755, "step": 22237 }, { "epoch": 0.46550280499037094, "grad_norm": 0.3294881582260132, "learning_rate": 0.0001883779274867894, "loss": 11.6738, "step": 22238 }, { "epoch": 0.4655237377543331, "grad_norm": 0.2817206382751465, "learning_rate": 0.0001883769015705512, "loss": 11.6722, "step": 22239 }, { "epoch": 0.46554467051829523, "grad_norm": 0.36431974172592163, "learning_rate": 0.00018837587561182847, "loss": 11.6807, "step": 22240 }, { "epoch": 0.4655656032822574, "grad_norm": 0.31066223978996277, "learning_rate": 0.0001883748496106217, "loss": 11.6875, "step": 22241 }, { "epoch": 0.4655865360462195, "grad_norm": 0.26923784613609314, "learning_rate": 0.00018837382356693137, "loss": 11.6769, "step": 22242 }, { "epoch": 0.4656074688101817, "grad_norm": 0.31790462136268616, "learning_rate": 0.00018837279748075796, "loss": 11.6761, "step": 22243 }, { "epoch": 0.46562840157414387, "grad_norm": 0.25069791078567505, "learning_rate": 0.00018837177135210196, "loss": 11.6734, "step": 22244 }, { "epoch": 0.465649334338106, "grad_norm": 0.2609540820121765, "learning_rate": 0.0001883707451809639, "loss": 11.6801, "step": 22245 }, { "epoch": 0.46567026710206816, "grad_norm": 0.228155717253685, "learning_rate": 0.00018836971896734426, "loss": 11.6651, "step": 22246 }, { "epoch": 0.4656911998660303, "grad_norm": 0.2932340204715729, "learning_rate": 0.0001883686927112435, "loss": 11.6747, "step": 22247 }, { "epoch": 0.46571213262999245, "grad_norm": 0.3632166385650635, "learning_rate": 0.00018836766641266214, "loss": 11.6797, "step": 22248 }, { "epoch": 0.4657330653939546, "grad_norm": 0.26132798194885254, "learning_rate": 0.00018836664007160068, "loss": 11.6774, "step": 22249 }, { "epoch": 0.4657539981579168, "grad_norm": 0.2859576642513275, "learning_rate": 0.00018836561368805962, "loss": 11.6597, "step": 22250 }, { "epoch": 0.46577493092187894, "grad_norm": 0.26252663135528564, "learning_rate": 0.0001883645872620394, "loss": 11.6857, "step": 22251 }, { "epoch": 0.4657958636858411, "grad_norm": 0.24626298248767853, "learning_rate": 0.00018836356079354058, "loss": 11.6768, "step": 22252 }, { "epoch": 0.4658167964498032, "grad_norm": 0.2866884768009186, "learning_rate": 0.00018836253428256364, "loss": 11.6683, "step": 22253 }, { "epoch": 0.4658377292137654, "grad_norm": 0.26487037539482117, "learning_rate": 0.00018836150772910904, "loss": 11.6753, "step": 22254 }, { "epoch": 0.4658586619777275, "grad_norm": 0.38186371326446533, "learning_rate": 0.0001883604811331773, "loss": 11.6831, "step": 22255 }, { "epoch": 0.4658795947416897, "grad_norm": 0.2543579339981079, "learning_rate": 0.0001883594544947689, "loss": 11.6618, "step": 22256 }, { "epoch": 0.46590052750565186, "grad_norm": 0.2796942889690399, "learning_rate": 0.0001883584278138843, "loss": 11.6823, "step": 22257 }, { "epoch": 0.465921460269614, "grad_norm": 0.29514068365097046, "learning_rate": 0.0001883574010905241, "loss": 11.6693, "step": 22258 }, { "epoch": 0.46594239303357615, "grad_norm": 0.2910032570362091, "learning_rate": 0.0001883563743246887, "loss": 11.6554, "step": 22259 }, { "epoch": 0.4659633257975383, "grad_norm": 0.2348002940416336, "learning_rate": 0.0001883553475163786, "loss": 11.671, "step": 22260 }, { "epoch": 0.46598425856150044, "grad_norm": 0.3693271279335022, "learning_rate": 0.00018835432066559432, "loss": 11.6688, "step": 22261 }, { "epoch": 0.4660051913254626, "grad_norm": 0.3062680661678314, "learning_rate": 0.00018835329377233636, "loss": 11.6662, "step": 22262 }, { "epoch": 0.4660261240894248, "grad_norm": 0.2857762277126312, "learning_rate": 0.0001883522668366052, "loss": 11.6595, "step": 22263 }, { "epoch": 0.46604705685338693, "grad_norm": 0.26865506172180176, "learning_rate": 0.00018835123985840136, "loss": 11.6753, "step": 22264 }, { "epoch": 0.4660679896173491, "grad_norm": 0.2929082214832306, "learning_rate": 0.00018835021283772528, "loss": 11.6756, "step": 22265 }, { "epoch": 0.4660889223813112, "grad_norm": 0.29690009355545044, "learning_rate": 0.00018834918577457748, "loss": 11.6649, "step": 22266 }, { "epoch": 0.46610985514527337, "grad_norm": 0.30338218808174133, "learning_rate": 0.00018834815866895844, "loss": 11.6711, "step": 22267 }, { "epoch": 0.4661307879092355, "grad_norm": 0.3211306631565094, "learning_rate": 0.00018834713152086872, "loss": 11.6837, "step": 22268 }, { "epoch": 0.4661517206731977, "grad_norm": 0.34603872895240784, "learning_rate": 0.00018834610433030873, "loss": 11.6808, "step": 22269 }, { "epoch": 0.46617265343715986, "grad_norm": 0.2787310779094696, "learning_rate": 0.000188345077097279, "loss": 11.6768, "step": 22270 }, { "epoch": 0.466193586201122, "grad_norm": 0.35523322224617004, "learning_rate": 0.00018834404982178001, "loss": 11.6685, "step": 22271 }, { "epoch": 0.46621451896508415, "grad_norm": 0.3139185309410095, "learning_rate": 0.00018834302250381229, "loss": 11.6758, "step": 22272 }, { "epoch": 0.4662354517290463, "grad_norm": 0.3481951355934143, "learning_rate": 0.0001883419951433763, "loss": 11.6686, "step": 22273 }, { "epoch": 0.46625638449300844, "grad_norm": 0.33397427201271057, "learning_rate": 0.00018834096774047253, "loss": 11.6697, "step": 22274 }, { "epoch": 0.46627731725697064, "grad_norm": 0.28722456097602844, "learning_rate": 0.00018833994029510148, "loss": 11.6689, "step": 22275 }, { "epoch": 0.4662982500209328, "grad_norm": 0.2767803966999054, "learning_rate": 0.00018833891280726367, "loss": 11.6437, "step": 22276 }, { "epoch": 0.4663191827848949, "grad_norm": 0.26390108466148376, "learning_rate": 0.0001883378852769596, "loss": 11.6737, "step": 22277 }, { "epoch": 0.46634011554885707, "grad_norm": 0.3848054111003876, "learning_rate": 0.0001883368577041897, "loss": 11.6757, "step": 22278 }, { "epoch": 0.4663610483128192, "grad_norm": 0.36688515543937683, "learning_rate": 0.0001883358300889545, "loss": 11.66, "step": 22279 }, { "epoch": 0.46638198107678136, "grad_norm": 0.22083193063735962, "learning_rate": 0.00018833480243125452, "loss": 11.6759, "step": 22280 }, { "epoch": 0.4664029138407435, "grad_norm": 0.25952741503715515, "learning_rate": 0.0001883337747310902, "loss": 11.666, "step": 22281 }, { "epoch": 0.4664238466047057, "grad_norm": 0.347054660320282, "learning_rate": 0.0001883327469884621, "loss": 11.6664, "step": 22282 }, { "epoch": 0.46644477936866785, "grad_norm": 0.27745139598846436, "learning_rate": 0.00018833171920337063, "loss": 11.6789, "step": 22283 }, { "epoch": 0.46646571213263, "grad_norm": 0.2685125768184662, "learning_rate": 0.00018833069137581636, "loss": 11.6845, "step": 22284 }, { "epoch": 0.46648664489659214, "grad_norm": 0.2562650144100189, "learning_rate": 0.00018832966350579977, "loss": 11.6529, "step": 22285 }, { "epoch": 0.4665075776605543, "grad_norm": 0.25975555181503296, "learning_rate": 0.00018832863559332133, "loss": 11.6795, "step": 22286 }, { "epoch": 0.46652851042451643, "grad_norm": 0.2939459979534149, "learning_rate": 0.00018832760763838152, "loss": 11.6789, "step": 22287 }, { "epoch": 0.46654944318847863, "grad_norm": 0.2777861952781677, "learning_rate": 0.0001883265796409809, "loss": 11.6689, "step": 22288 }, { "epoch": 0.4665703759524408, "grad_norm": 0.2407669872045517, "learning_rate": 0.0001883255516011199, "loss": 11.6758, "step": 22289 }, { "epoch": 0.4665913087164029, "grad_norm": 0.26500532031059265, "learning_rate": 0.00018832452351879905, "loss": 11.6738, "step": 22290 }, { "epoch": 0.46661224148036506, "grad_norm": 0.2673848271369934, "learning_rate": 0.00018832349539401884, "loss": 11.6862, "step": 22291 }, { "epoch": 0.4666331742443272, "grad_norm": 0.24630145728588104, "learning_rate": 0.00018832246722677974, "loss": 11.6688, "step": 22292 }, { "epoch": 0.46665410700828935, "grad_norm": 0.30227452516555786, "learning_rate": 0.00018832143901708227, "loss": 11.6801, "step": 22293 }, { "epoch": 0.46667503977225155, "grad_norm": 2.3995120525360107, "learning_rate": 0.0001883204107649269, "loss": 11.7308, "step": 22294 }, { "epoch": 0.4666959725362137, "grad_norm": 0.2350531816482544, "learning_rate": 0.00018831938247031412, "loss": 11.6539, "step": 22295 }, { "epoch": 0.46671690530017584, "grad_norm": 0.3681320548057556, "learning_rate": 0.00018831835413324447, "loss": 11.6841, "step": 22296 }, { "epoch": 0.466737838064138, "grad_norm": 0.2495521903038025, "learning_rate": 0.0001883173257537184, "loss": 11.6539, "step": 22297 }, { "epoch": 0.46675877082810013, "grad_norm": 0.2637363374233246, "learning_rate": 0.00018831629733173647, "loss": 11.6754, "step": 22298 }, { "epoch": 0.4667797035920623, "grad_norm": 0.29943084716796875, "learning_rate": 0.00018831526886729907, "loss": 11.6705, "step": 22299 }, { "epoch": 0.4668006363560244, "grad_norm": 0.2987369894981384, "learning_rate": 0.00018831424036040676, "loss": 11.6712, "step": 22300 }, { "epoch": 0.4668215691199866, "grad_norm": 0.2452995777130127, "learning_rate": 0.00018831321181106002, "loss": 11.6666, "step": 22301 }, { "epoch": 0.46684250188394877, "grad_norm": 0.25192365050315857, "learning_rate": 0.0001883121832192594, "loss": 11.6783, "step": 22302 }, { "epoch": 0.4668634346479109, "grad_norm": 0.24244236946105957, "learning_rate": 0.0001883111545850053, "loss": 11.666, "step": 22303 }, { "epoch": 0.46688436741187306, "grad_norm": 0.2435237616300583, "learning_rate": 0.00018831012590829826, "loss": 11.6909, "step": 22304 }, { "epoch": 0.4669053001758352, "grad_norm": 0.2651016414165497, "learning_rate": 0.0001883090971891388, "loss": 11.6603, "step": 22305 }, { "epoch": 0.46692623293979735, "grad_norm": 0.2651173174381256, "learning_rate": 0.00018830806842752736, "loss": 11.6662, "step": 22306 }, { "epoch": 0.46694716570375955, "grad_norm": 0.36641576886177063, "learning_rate": 0.00018830703962346447, "loss": 11.6912, "step": 22307 }, { "epoch": 0.4669680984677217, "grad_norm": 0.5672101378440857, "learning_rate": 0.0001883060107769506, "loss": 11.6176, "step": 22308 }, { "epoch": 0.46698903123168384, "grad_norm": 0.2900274097919464, "learning_rate": 0.0001883049818879863, "loss": 11.6675, "step": 22309 }, { "epoch": 0.467009963995646, "grad_norm": 0.24413779377937317, "learning_rate": 0.000188303952956572, "loss": 11.6704, "step": 22310 }, { "epoch": 0.4670308967596081, "grad_norm": 0.3409479856491089, "learning_rate": 0.0001883029239827082, "loss": 11.6846, "step": 22311 }, { "epoch": 0.46705182952357027, "grad_norm": 0.29059603810310364, "learning_rate": 0.00018830189496639547, "loss": 11.6789, "step": 22312 }, { "epoch": 0.4670727622875325, "grad_norm": 0.31314757466316223, "learning_rate": 0.0001883008659076342, "loss": 11.6658, "step": 22313 }, { "epoch": 0.4670936950514946, "grad_norm": 0.27907824516296387, "learning_rate": 0.00018829983680642494, "loss": 11.6678, "step": 22314 }, { "epoch": 0.46711462781545676, "grad_norm": 0.29271262884140015, "learning_rate": 0.00018829880766276817, "loss": 11.6887, "step": 22315 }, { "epoch": 0.4671355605794189, "grad_norm": 0.23365309834480286, "learning_rate": 0.0001882977784766644, "loss": 11.6596, "step": 22316 }, { "epoch": 0.46715649334338105, "grad_norm": 0.2857743501663208, "learning_rate": 0.00018829674924811417, "loss": 11.6751, "step": 22317 }, { "epoch": 0.4671774261073432, "grad_norm": 0.38517457246780396, "learning_rate": 0.00018829571997711788, "loss": 11.6899, "step": 22318 }, { "epoch": 0.46719835887130534, "grad_norm": 0.3454497158527374, "learning_rate": 0.00018829469066367605, "loss": 11.6684, "step": 22319 }, { "epoch": 0.46721929163526754, "grad_norm": 0.2856758236885071, "learning_rate": 0.00018829366130778923, "loss": 11.6802, "step": 22320 }, { "epoch": 0.4672402243992297, "grad_norm": 0.32808852195739746, "learning_rate": 0.00018829263190945787, "loss": 11.6691, "step": 22321 }, { "epoch": 0.46726115716319183, "grad_norm": 0.275694876909256, "learning_rate": 0.00018829160246868244, "loss": 11.6748, "step": 22322 }, { "epoch": 0.467282089927154, "grad_norm": 0.2340281903743744, "learning_rate": 0.00018829057298546352, "loss": 11.6637, "step": 22323 }, { "epoch": 0.4673030226911161, "grad_norm": 0.30018049478530884, "learning_rate": 0.00018828954345980153, "loss": 11.6701, "step": 22324 }, { "epoch": 0.46732395545507827, "grad_norm": 0.3350431025028229, "learning_rate": 0.00018828851389169696, "loss": 11.6726, "step": 22325 }, { "epoch": 0.46734488821904047, "grad_norm": 0.24801714718341827, "learning_rate": 0.00018828748428115035, "loss": 11.6721, "step": 22326 }, { "epoch": 0.4673658209830026, "grad_norm": 0.29767045378685, "learning_rate": 0.0001882864546281622, "loss": 11.6496, "step": 22327 }, { "epoch": 0.46738675374696476, "grad_norm": 0.29379764199256897, "learning_rate": 0.00018828542493273297, "loss": 11.6659, "step": 22328 }, { "epoch": 0.4674076865109269, "grad_norm": 0.3813900947570801, "learning_rate": 0.00018828439519486315, "loss": 11.6594, "step": 22329 }, { "epoch": 0.46742861927488905, "grad_norm": 0.3171677589416504, "learning_rate": 0.00018828336541455327, "loss": 11.6745, "step": 22330 }, { "epoch": 0.4674495520388512, "grad_norm": 0.32039782404899597, "learning_rate": 0.0001882823355918038, "loss": 11.669, "step": 22331 }, { "epoch": 0.4674704848028134, "grad_norm": 0.30688023567199707, "learning_rate": 0.00018828130572661522, "loss": 11.67, "step": 22332 }, { "epoch": 0.46749141756677554, "grad_norm": 0.3265455663204193, "learning_rate": 0.00018828027581898807, "loss": 11.6725, "step": 22333 }, { "epoch": 0.4675123503307377, "grad_norm": 0.31754979491233826, "learning_rate": 0.00018827924586892281, "loss": 11.6725, "step": 22334 }, { "epoch": 0.4675332830946998, "grad_norm": 0.2518472671508789, "learning_rate": 0.00018827821587642, "loss": 11.6625, "step": 22335 }, { "epoch": 0.46755421585866197, "grad_norm": 0.4289446771144867, "learning_rate": 0.00018827718584148002, "loss": 11.6715, "step": 22336 }, { "epoch": 0.4675751486226241, "grad_norm": 0.2696029841899872, "learning_rate": 0.00018827615576410344, "loss": 11.6831, "step": 22337 }, { "epoch": 0.46759608138658626, "grad_norm": 0.29201364517211914, "learning_rate": 0.00018827512564429074, "loss": 11.6697, "step": 22338 }, { "epoch": 0.46761701415054846, "grad_norm": 0.3150271773338318, "learning_rate": 0.00018827409548204243, "loss": 11.668, "step": 22339 }, { "epoch": 0.4676379469145106, "grad_norm": 0.26449212431907654, "learning_rate": 0.000188273065277359, "loss": 11.6712, "step": 22340 }, { "epoch": 0.46765887967847275, "grad_norm": 0.2822475731372833, "learning_rate": 0.00018827203503024093, "loss": 11.6808, "step": 22341 }, { "epoch": 0.4676798124424349, "grad_norm": 0.2850726842880249, "learning_rate": 0.00018827100474068872, "loss": 11.6605, "step": 22342 }, { "epoch": 0.46770074520639704, "grad_norm": 0.26337701082229614, "learning_rate": 0.00018826997440870286, "loss": 11.6863, "step": 22343 }, { "epoch": 0.4677216779703592, "grad_norm": 0.2664280831813812, "learning_rate": 0.00018826894403428389, "loss": 11.6509, "step": 22344 }, { "epoch": 0.4677426107343214, "grad_norm": 0.3086702823638916, "learning_rate": 0.00018826791361743224, "loss": 11.679, "step": 22345 }, { "epoch": 0.46776354349828353, "grad_norm": 0.25823673605918884, "learning_rate": 0.00018826688315814845, "loss": 11.6732, "step": 22346 }, { "epoch": 0.4677844762622457, "grad_norm": 0.3887316882610321, "learning_rate": 0.00018826585265643297, "loss": 11.6651, "step": 22347 }, { "epoch": 0.4678054090262078, "grad_norm": 0.2658684253692627, "learning_rate": 0.00018826482211228637, "loss": 11.6806, "step": 22348 }, { "epoch": 0.46782634179016996, "grad_norm": 0.25079861283302307, "learning_rate": 0.00018826379152570905, "loss": 11.6682, "step": 22349 }, { "epoch": 0.4678472745541321, "grad_norm": 0.25552797317504883, "learning_rate": 0.0001882627608967016, "loss": 11.6691, "step": 22350 }, { "epoch": 0.46786820731809425, "grad_norm": 0.24897173047065735, "learning_rate": 0.00018826173022526446, "loss": 11.6773, "step": 22351 }, { "epoch": 0.46788914008205645, "grad_norm": 0.2949374318122864, "learning_rate": 0.00018826069951139815, "loss": 11.6639, "step": 22352 }, { "epoch": 0.4679100728460186, "grad_norm": 0.2377578765153885, "learning_rate": 0.00018825966875510314, "loss": 11.6528, "step": 22353 }, { "epoch": 0.46793100560998074, "grad_norm": 0.2715628445148468, "learning_rate": 0.00018825863795637993, "loss": 11.661, "step": 22354 }, { "epoch": 0.4679519383739429, "grad_norm": 0.2397366166114807, "learning_rate": 0.00018825760711522902, "loss": 11.6693, "step": 22355 }, { "epoch": 0.46797287113790503, "grad_norm": 0.33882439136505127, "learning_rate": 0.00018825657623165092, "loss": 11.6906, "step": 22356 }, { "epoch": 0.4679938039018672, "grad_norm": 0.2718672454357147, "learning_rate": 0.00018825554530564614, "loss": 11.6859, "step": 22357 }, { "epoch": 0.4680147366658294, "grad_norm": 0.2850590646266937, "learning_rate": 0.0001882545143372151, "loss": 11.6783, "step": 22358 }, { "epoch": 0.4680356694297915, "grad_norm": 0.2528208792209625, "learning_rate": 0.00018825348332635836, "loss": 11.6758, "step": 22359 }, { "epoch": 0.46805660219375367, "grad_norm": 0.3054655194282532, "learning_rate": 0.00018825245227307644, "loss": 11.6843, "step": 22360 }, { "epoch": 0.4680775349577158, "grad_norm": 0.3622815012931824, "learning_rate": 0.0001882514211773698, "loss": 11.6782, "step": 22361 }, { "epoch": 0.46809846772167796, "grad_norm": 0.3158501088619232, "learning_rate": 0.00018825039003923888, "loss": 11.6757, "step": 22362 }, { "epoch": 0.4681194004856401, "grad_norm": 0.2636098265647888, "learning_rate": 0.00018824935885868426, "loss": 11.6857, "step": 22363 }, { "epoch": 0.4681403332496023, "grad_norm": 0.2365824282169342, "learning_rate": 0.0001882483276357064, "loss": 11.6639, "step": 22364 }, { "epoch": 0.46816126601356445, "grad_norm": 0.3470541834831238, "learning_rate": 0.0001882472963703058, "loss": 11.677, "step": 22365 }, { "epoch": 0.4681821987775266, "grad_norm": 0.2851426601409912, "learning_rate": 0.00018824626506248296, "loss": 11.6851, "step": 22366 }, { "epoch": 0.46820313154148874, "grad_norm": 0.24983225762844086, "learning_rate": 0.00018824523371223836, "loss": 11.6678, "step": 22367 }, { "epoch": 0.4682240643054509, "grad_norm": 0.2985585033893585, "learning_rate": 0.0001882442023195725, "loss": 11.6776, "step": 22368 }, { "epoch": 0.468244997069413, "grad_norm": 0.29635879397392273, "learning_rate": 0.00018824317088448592, "loss": 11.6515, "step": 22369 }, { "epoch": 0.46826592983337517, "grad_norm": 0.2657132148742676, "learning_rate": 0.00018824213940697905, "loss": 11.667, "step": 22370 }, { "epoch": 0.46828686259733737, "grad_norm": 0.2753105163574219, "learning_rate": 0.00018824110788705242, "loss": 11.6807, "step": 22371 }, { "epoch": 0.4683077953612995, "grad_norm": 0.289507120847702, "learning_rate": 0.00018824007632470654, "loss": 11.6652, "step": 22372 }, { "epoch": 0.46832872812526166, "grad_norm": 0.2965911328792572, "learning_rate": 0.00018823904471994185, "loss": 11.6833, "step": 22373 }, { "epoch": 0.4683496608892238, "grad_norm": 0.3444971442222595, "learning_rate": 0.0001882380130727589, "loss": 11.6802, "step": 22374 }, { "epoch": 0.46837059365318595, "grad_norm": 0.3113265931606293, "learning_rate": 0.00018823698138315816, "loss": 11.6862, "step": 22375 }, { "epoch": 0.4683915264171481, "grad_norm": 0.28206104040145874, "learning_rate": 0.00018823594965114014, "loss": 11.6769, "step": 22376 }, { "epoch": 0.4684124591811103, "grad_norm": 0.4054640531539917, "learning_rate": 0.00018823491787670537, "loss": 11.6646, "step": 22377 }, { "epoch": 0.46843339194507244, "grad_norm": 0.3084072470664978, "learning_rate": 0.00018823388605985426, "loss": 11.6859, "step": 22378 }, { "epoch": 0.4684543247090346, "grad_norm": 0.27381637692451477, "learning_rate": 0.0001882328542005874, "loss": 11.6663, "step": 22379 }, { "epoch": 0.46847525747299673, "grad_norm": 0.24988333880901337, "learning_rate": 0.0001882318222989052, "loss": 11.6737, "step": 22380 }, { "epoch": 0.4684961902369589, "grad_norm": 0.3048274517059326, "learning_rate": 0.00018823079035480816, "loss": 11.6641, "step": 22381 }, { "epoch": 0.468517123000921, "grad_norm": 0.2392805963754654, "learning_rate": 0.00018822975836829686, "loss": 11.6912, "step": 22382 }, { "epoch": 0.4685380557648832, "grad_norm": 0.3664546310901642, "learning_rate": 0.00018822872633937173, "loss": 11.6658, "step": 22383 }, { "epoch": 0.46855898852884537, "grad_norm": 0.2450728863477707, "learning_rate": 0.0001882276942680333, "loss": 11.6769, "step": 22384 }, { "epoch": 0.4685799212928075, "grad_norm": 0.35909998416900635, "learning_rate": 0.00018822666215428206, "loss": 11.6734, "step": 22385 }, { "epoch": 0.46860085405676966, "grad_norm": 0.3241637647151947, "learning_rate": 0.0001882256299981185, "loss": 11.6713, "step": 22386 }, { "epoch": 0.4686217868207318, "grad_norm": 0.3009476959705353, "learning_rate": 0.00018822459779954307, "loss": 11.68, "step": 22387 }, { "epoch": 0.46864271958469395, "grad_norm": 0.39344102144241333, "learning_rate": 0.00018822356555855634, "loss": 11.6598, "step": 22388 }, { "epoch": 0.4686636523486561, "grad_norm": 0.3096662163734436, "learning_rate": 0.00018822253327515872, "loss": 11.6735, "step": 22389 }, { "epoch": 0.4686845851126183, "grad_norm": 0.4014548063278198, "learning_rate": 0.00018822150094935082, "loss": 11.6713, "step": 22390 }, { "epoch": 0.46870551787658044, "grad_norm": 0.35506463050842285, "learning_rate": 0.00018822046858113308, "loss": 11.6608, "step": 22391 }, { "epoch": 0.4687264506405426, "grad_norm": 0.2886616289615631, "learning_rate": 0.00018821943617050596, "loss": 11.6779, "step": 22392 }, { "epoch": 0.4687473834045047, "grad_norm": 0.34719035029411316, "learning_rate": 0.00018821840371747, "loss": 11.6832, "step": 22393 }, { "epoch": 0.46876831616846687, "grad_norm": 0.28203317523002625, "learning_rate": 0.00018821737122202568, "loss": 11.6768, "step": 22394 }, { "epoch": 0.468789248932429, "grad_norm": 0.2922407388687134, "learning_rate": 0.0001882163386841735, "loss": 11.665, "step": 22395 }, { "epoch": 0.4688101816963912, "grad_norm": 0.27947402000427246, "learning_rate": 0.00018821530610391399, "loss": 11.6667, "step": 22396 }, { "epoch": 0.46883111446035336, "grad_norm": 0.2670000195503235, "learning_rate": 0.00018821427348124757, "loss": 11.6752, "step": 22397 }, { "epoch": 0.4688520472243155, "grad_norm": 1.6494944095611572, "learning_rate": 0.0001882132408161748, "loss": 11.5762, "step": 22398 }, { "epoch": 0.46887297998827765, "grad_norm": 0.24227093160152435, "learning_rate": 0.00018821220810869615, "loss": 11.6621, "step": 22399 }, { "epoch": 0.4688939127522398, "grad_norm": 0.29904791712760925, "learning_rate": 0.00018821117535881214, "loss": 11.6862, "step": 22400 }, { "epoch": 0.46891484551620194, "grad_norm": 0.26950904726982117, "learning_rate": 0.00018821014256652325, "loss": 11.6722, "step": 22401 }, { "epoch": 0.46893577828016414, "grad_norm": 0.2983117699623108, "learning_rate": 0.00018820910973182996, "loss": 11.6822, "step": 22402 }, { "epoch": 0.4689567110441263, "grad_norm": 0.3680393695831299, "learning_rate": 0.0001882080768547328, "loss": 11.6855, "step": 22403 }, { "epoch": 0.46897764380808843, "grad_norm": 0.27386683225631714, "learning_rate": 0.00018820704393523224, "loss": 11.6702, "step": 22404 }, { "epoch": 0.4689985765720506, "grad_norm": 0.2966345250606537, "learning_rate": 0.00018820601097332878, "loss": 11.6817, "step": 22405 }, { "epoch": 0.4690195093360127, "grad_norm": 0.3313226103782654, "learning_rate": 0.00018820497796902293, "loss": 11.6509, "step": 22406 }, { "epoch": 0.46904044209997486, "grad_norm": 0.282956600189209, "learning_rate": 0.00018820394492231519, "loss": 11.6713, "step": 22407 }, { "epoch": 0.469061374863937, "grad_norm": 1.3770294189453125, "learning_rate": 0.00018820291183320603, "loss": 11.6357, "step": 22408 }, { "epoch": 0.4690823076278992, "grad_norm": 0.3272992968559265, "learning_rate": 0.00018820187870169597, "loss": 11.6785, "step": 22409 }, { "epoch": 0.46910324039186135, "grad_norm": 0.30128055810928345, "learning_rate": 0.0001882008455277855, "loss": 11.6673, "step": 22410 }, { "epoch": 0.4691241731558235, "grad_norm": 0.28837522864341736, "learning_rate": 0.0001881998123114751, "loss": 11.6635, "step": 22411 }, { "epoch": 0.46914510591978564, "grad_norm": 0.3148269057273865, "learning_rate": 0.0001881987790527653, "loss": 11.6899, "step": 22412 }, { "epoch": 0.4691660386837478, "grad_norm": 0.33219778537750244, "learning_rate": 0.00018819774575165655, "loss": 11.6638, "step": 22413 }, { "epoch": 0.46918697144770993, "grad_norm": 0.3032262325286865, "learning_rate": 0.0001881967124081494, "loss": 11.6717, "step": 22414 }, { "epoch": 0.46920790421167213, "grad_norm": 0.36751362681388855, "learning_rate": 0.00018819567902224433, "loss": 11.6777, "step": 22415 }, { "epoch": 0.4692288369756343, "grad_norm": 0.2526295781135559, "learning_rate": 0.00018819464559394184, "loss": 11.6683, "step": 22416 }, { "epoch": 0.4692497697395964, "grad_norm": 0.2831745147705078, "learning_rate": 0.0001881936121232424, "loss": 11.661, "step": 22417 }, { "epoch": 0.46927070250355857, "grad_norm": 0.25799837708473206, "learning_rate": 0.0001881925786101465, "loss": 11.6773, "step": 22418 }, { "epoch": 0.4692916352675207, "grad_norm": 0.2712961733341217, "learning_rate": 0.00018819154505465472, "loss": 11.6807, "step": 22419 }, { "epoch": 0.46931256803148286, "grad_norm": 0.3899930715560913, "learning_rate": 0.00018819051145676745, "loss": 11.6727, "step": 22420 }, { "epoch": 0.46933350079544506, "grad_norm": 0.26573315262794495, "learning_rate": 0.00018818947781648523, "loss": 11.6588, "step": 22421 }, { "epoch": 0.4693544335594072, "grad_norm": 0.3178359568119049, "learning_rate": 0.00018818844413380858, "loss": 11.6393, "step": 22422 }, { "epoch": 0.46937536632336935, "grad_norm": 0.25545844435691833, "learning_rate": 0.000188187410408738, "loss": 11.6706, "step": 22423 }, { "epoch": 0.4693962990873315, "grad_norm": 0.25578945875167847, "learning_rate": 0.00018818637664127393, "loss": 11.6547, "step": 22424 }, { "epoch": 0.46941723185129364, "grad_norm": 0.23750746250152588, "learning_rate": 0.0001881853428314169, "loss": 11.6663, "step": 22425 }, { "epoch": 0.4694381646152558, "grad_norm": 0.26381388306617737, "learning_rate": 0.0001881843089791674, "loss": 11.6547, "step": 22426 }, { "epoch": 0.4694590973792179, "grad_norm": 0.27639684081077576, "learning_rate": 0.00018818327508452596, "loss": 11.6807, "step": 22427 }, { "epoch": 0.4694800301431801, "grad_norm": 0.2833544611930847, "learning_rate": 0.00018818224114749304, "loss": 11.6703, "step": 22428 }, { "epoch": 0.46950096290714227, "grad_norm": 0.2565378248691559, "learning_rate": 0.00018818120716806918, "loss": 11.6736, "step": 22429 }, { "epoch": 0.4695218956711044, "grad_norm": 0.31136512756347656, "learning_rate": 0.0001881801731462548, "loss": 11.6861, "step": 22430 }, { "epoch": 0.46954282843506656, "grad_norm": 0.33615753054618835, "learning_rate": 0.00018817913908205047, "loss": 11.6585, "step": 22431 }, { "epoch": 0.4695637611990287, "grad_norm": 0.2542175054550171, "learning_rate": 0.00018817810497545666, "loss": 11.6623, "step": 22432 }, { "epoch": 0.46958469396299085, "grad_norm": 0.2350992113351822, "learning_rate": 0.00018817707082647387, "loss": 11.6673, "step": 22433 }, { "epoch": 0.46960562672695305, "grad_norm": 0.26560646295547485, "learning_rate": 0.00018817603663510258, "loss": 11.6798, "step": 22434 }, { "epoch": 0.4696265594909152, "grad_norm": 0.24882540106773376, "learning_rate": 0.00018817500240134332, "loss": 11.6759, "step": 22435 }, { "epoch": 0.46964749225487734, "grad_norm": 0.2654358744621277, "learning_rate": 0.00018817396812519655, "loss": 11.684, "step": 22436 }, { "epoch": 0.4696684250188395, "grad_norm": 0.285067081451416, "learning_rate": 0.00018817293380666281, "loss": 11.6692, "step": 22437 }, { "epoch": 0.46968935778280163, "grad_norm": 0.25334012508392334, "learning_rate": 0.00018817189944574255, "loss": 11.6753, "step": 22438 }, { "epoch": 0.4697102905467638, "grad_norm": 0.32826560735702515, "learning_rate": 0.00018817086504243632, "loss": 11.673, "step": 22439 }, { "epoch": 0.4697312233107259, "grad_norm": 0.30366745591163635, "learning_rate": 0.00018816983059674453, "loss": 11.6737, "step": 22440 }, { "epoch": 0.4697521560746881, "grad_norm": 0.2674807608127594, "learning_rate": 0.0001881687961086678, "loss": 11.6586, "step": 22441 }, { "epoch": 0.46977308883865027, "grad_norm": 0.3325756788253784, "learning_rate": 0.00018816776157820652, "loss": 11.686, "step": 22442 }, { "epoch": 0.4697940216026124, "grad_norm": 0.3458734154701233, "learning_rate": 0.00018816672700536127, "loss": 11.6596, "step": 22443 }, { "epoch": 0.46981495436657456, "grad_norm": 0.395728200674057, "learning_rate": 0.0001881656923901325, "loss": 11.6983, "step": 22444 }, { "epoch": 0.4698358871305367, "grad_norm": 0.2469976544380188, "learning_rate": 0.00018816465773252068, "loss": 11.6758, "step": 22445 }, { "epoch": 0.46985681989449884, "grad_norm": 0.3589913249015808, "learning_rate": 0.00018816362303252635, "loss": 11.6712, "step": 22446 }, { "epoch": 0.46987775265846105, "grad_norm": 0.3133672773838043, "learning_rate": 0.00018816258829015, "loss": 11.6635, "step": 22447 }, { "epoch": 0.4698986854224232, "grad_norm": 0.2646941840648651, "learning_rate": 0.00018816155350539216, "loss": 11.6832, "step": 22448 }, { "epoch": 0.46991961818638533, "grad_norm": 0.2664491832256317, "learning_rate": 0.00018816051867825325, "loss": 11.6808, "step": 22449 }, { "epoch": 0.4699405509503475, "grad_norm": 0.33223313093185425, "learning_rate": 0.00018815948380873385, "loss": 11.6621, "step": 22450 }, { "epoch": 0.4699614837143096, "grad_norm": 0.320563942193985, "learning_rate": 0.0001881584488968344, "loss": 11.6656, "step": 22451 }, { "epoch": 0.46998241647827177, "grad_norm": 0.3149555027484894, "learning_rate": 0.00018815741394255544, "loss": 11.667, "step": 22452 }, { "epoch": 0.47000334924223397, "grad_norm": 0.29581624269485474, "learning_rate": 0.00018815637894589743, "loss": 11.6743, "step": 22453 }, { "epoch": 0.4700242820061961, "grad_norm": 0.2777519226074219, "learning_rate": 0.00018815534390686085, "loss": 11.6744, "step": 22454 }, { "epoch": 0.47004521477015826, "grad_norm": 0.31201866269111633, "learning_rate": 0.00018815430882544626, "loss": 11.686, "step": 22455 }, { "epoch": 0.4700661475341204, "grad_norm": 0.24484367668628693, "learning_rate": 0.00018815327370165413, "loss": 11.6724, "step": 22456 }, { "epoch": 0.47008708029808255, "grad_norm": 0.258389413356781, "learning_rate": 0.00018815223853548493, "loss": 11.6666, "step": 22457 }, { "epoch": 0.4701080130620447, "grad_norm": 0.31738758087158203, "learning_rate": 0.0001881512033269392, "loss": 11.6659, "step": 22458 }, { "epoch": 0.47012894582600684, "grad_norm": 0.35640057921409607, "learning_rate": 0.00018815016807601745, "loss": 11.6733, "step": 22459 }, { "epoch": 0.47014987858996904, "grad_norm": 0.2730557918548584, "learning_rate": 0.0001881491327827201, "loss": 11.6808, "step": 22460 }, { "epoch": 0.4701708113539312, "grad_norm": 0.3967924118041992, "learning_rate": 0.0001881480974470477, "loss": 11.6779, "step": 22461 }, { "epoch": 0.47019174411789333, "grad_norm": 0.23817794024944305, "learning_rate": 0.00018814706206900076, "loss": 11.6567, "step": 22462 }, { "epoch": 0.4702126768818555, "grad_norm": 0.8620221018791199, "learning_rate": 0.00018814602664857976, "loss": 11.6852, "step": 22463 }, { "epoch": 0.4702336096458176, "grad_norm": 0.27883851528167725, "learning_rate": 0.0001881449911857852, "loss": 11.6744, "step": 22464 }, { "epoch": 0.47025454240977976, "grad_norm": 0.2695262134075165, "learning_rate": 0.0001881439556806176, "loss": 11.6801, "step": 22465 }, { "epoch": 0.47027547517374196, "grad_norm": 0.2722018361091614, "learning_rate": 0.0001881429201330774, "loss": 11.6683, "step": 22466 }, { "epoch": 0.4702964079377041, "grad_norm": 0.30834004282951355, "learning_rate": 0.00018814188454316515, "loss": 11.678, "step": 22467 }, { "epoch": 0.47031734070166625, "grad_norm": 0.21898643672466278, "learning_rate": 0.0001881408489108813, "loss": 11.6608, "step": 22468 }, { "epoch": 0.4703382734656284, "grad_norm": 0.26940298080444336, "learning_rate": 0.0001881398132362264, "loss": 11.6703, "step": 22469 }, { "epoch": 0.47035920622959054, "grad_norm": 0.25579607486724854, "learning_rate": 0.0001881387775192009, "loss": 11.6583, "step": 22470 }, { "epoch": 0.4703801389935527, "grad_norm": 0.35870862007141113, "learning_rate": 0.00018813774175980536, "loss": 11.6704, "step": 22471 }, { "epoch": 0.4704010717575149, "grad_norm": 0.3923488259315491, "learning_rate": 0.00018813670595804022, "loss": 11.6856, "step": 22472 }, { "epoch": 0.47042200452147703, "grad_norm": 0.30613669753074646, "learning_rate": 0.00018813567011390602, "loss": 11.6869, "step": 22473 }, { "epoch": 0.4704429372854392, "grad_norm": 0.32482966780662537, "learning_rate": 0.00018813463422740322, "loss": 11.6769, "step": 22474 }, { "epoch": 0.4704638700494013, "grad_norm": 0.27199676632881165, "learning_rate": 0.00018813359829853235, "loss": 11.6721, "step": 22475 }, { "epoch": 0.47048480281336347, "grad_norm": 0.3570830225944519, "learning_rate": 0.00018813256232729387, "loss": 11.6655, "step": 22476 }, { "epoch": 0.4705057355773256, "grad_norm": 0.25092098116874695, "learning_rate": 0.00018813152631368832, "loss": 11.684, "step": 22477 }, { "epoch": 0.47052666834128776, "grad_norm": 0.2966982126235962, "learning_rate": 0.00018813049025771615, "loss": 11.6638, "step": 22478 }, { "epoch": 0.47054760110524996, "grad_norm": 0.2453794628381729, "learning_rate": 0.00018812945415937793, "loss": 11.6687, "step": 22479 }, { "epoch": 0.4705685338692121, "grad_norm": 0.35128188133239746, "learning_rate": 0.00018812841801867406, "loss": 11.6833, "step": 22480 }, { "epoch": 0.47058946663317425, "grad_norm": 0.29174748063087463, "learning_rate": 0.00018812738183560514, "loss": 11.6765, "step": 22481 }, { "epoch": 0.4706103993971364, "grad_norm": 0.22586089372634888, "learning_rate": 0.0001881263456101716, "loss": 11.6789, "step": 22482 }, { "epoch": 0.47063133216109854, "grad_norm": 0.28355512022972107, "learning_rate": 0.000188125309342374, "loss": 11.6799, "step": 22483 }, { "epoch": 0.4706522649250607, "grad_norm": 0.31245070695877075, "learning_rate": 0.00018812427303221277, "loss": 11.6619, "step": 22484 }, { "epoch": 0.4706731976890229, "grad_norm": 0.2704978585243225, "learning_rate": 0.0001881232366796884, "loss": 11.6834, "step": 22485 }, { "epoch": 0.470694130452985, "grad_norm": 0.25774216651916504, "learning_rate": 0.00018812220028480145, "loss": 11.6817, "step": 22486 }, { "epoch": 0.47071506321694717, "grad_norm": 0.29405054450035095, "learning_rate": 0.0001881211638475524, "loss": 11.6717, "step": 22487 }, { "epoch": 0.4707359959809093, "grad_norm": 0.30363699793815613, "learning_rate": 0.00018812012736794172, "loss": 11.6756, "step": 22488 }, { "epoch": 0.47075692874487146, "grad_norm": 0.37442174553871155, "learning_rate": 0.00018811909084596998, "loss": 11.6786, "step": 22489 }, { "epoch": 0.4707778615088336, "grad_norm": 0.3524031639099121, "learning_rate": 0.0001881180542816376, "loss": 11.6683, "step": 22490 }, { "epoch": 0.4707987942727958, "grad_norm": 0.3066371977329254, "learning_rate": 0.00018811701767494507, "loss": 11.6784, "step": 22491 }, { "epoch": 0.47081972703675795, "grad_norm": 0.33341434597969055, "learning_rate": 0.00018811598102589296, "loss": 11.6858, "step": 22492 }, { "epoch": 0.4708406598007201, "grad_norm": 0.2564990520477295, "learning_rate": 0.00018811494433448173, "loss": 11.6711, "step": 22493 }, { "epoch": 0.47086159256468224, "grad_norm": 0.3065997064113617, "learning_rate": 0.00018811390760071188, "loss": 11.6735, "step": 22494 }, { "epoch": 0.4708825253286444, "grad_norm": 0.26542484760284424, "learning_rate": 0.0001881128708245839, "loss": 11.6636, "step": 22495 }, { "epoch": 0.47090345809260653, "grad_norm": 0.23678435385227203, "learning_rate": 0.00018811183400609832, "loss": 11.6719, "step": 22496 }, { "epoch": 0.4709243908565687, "grad_norm": 0.2678017020225525, "learning_rate": 0.00018811079714525558, "loss": 11.656, "step": 22497 }, { "epoch": 0.4709453236205309, "grad_norm": 0.25985074043273926, "learning_rate": 0.00018810976024205624, "loss": 11.6749, "step": 22498 }, { "epoch": 0.470966256384493, "grad_norm": 0.2919669449329376, "learning_rate": 0.00018810872329650077, "loss": 11.6671, "step": 22499 }, { "epoch": 0.47098718914845517, "grad_norm": 0.40581297874450684, "learning_rate": 0.00018810768630858965, "loss": 11.6814, "step": 22500 }, { "epoch": 0.4710081219124173, "grad_norm": 0.21895143389701843, "learning_rate": 0.0001881066492783234, "loss": 11.6474, "step": 22501 }, { "epoch": 0.47102905467637946, "grad_norm": 0.3591240644454956, "learning_rate": 0.00018810561220570254, "loss": 11.6844, "step": 22502 }, { "epoch": 0.4710499874403416, "grad_norm": 0.3073590099811554, "learning_rate": 0.00018810457509072754, "loss": 11.6665, "step": 22503 }, { "epoch": 0.4710709202043038, "grad_norm": 0.27536118030548096, "learning_rate": 0.0001881035379333989, "loss": 11.6683, "step": 22504 }, { "epoch": 0.47109185296826595, "grad_norm": 0.3000878095626831, "learning_rate": 0.00018810250073371715, "loss": 11.6571, "step": 22505 }, { "epoch": 0.4711127857322281, "grad_norm": 0.2606451213359833, "learning_rate": 0.00018810146349168273, "loss": 11.684, "step": 22506 }, { "epoch": 0.47113371849619023, "grad_norm": 0.28582295775413513, "learning_rate": 0.0001881004262072962, "loss": 11.6628, "step": 22507 }, { "epoch": 0.4711546512601524, "grad_norm": 0.26518499851226807, "learning_rate": 0.000188099388880558, "loss": 11.6765, "step": 22508 }, { "epoch": 0.4711755840241145, "grad_norm": 0.26724568009376526, "learning_rate": 0.00018809835151146866, "loss": 11.6676, "step": 22509 }, { "epoch": 0.4711965167880767, "grad_norm": 0.4711725115776062, "learning_rate": 0.0001880973141000287, "loss": 11.6672, "step": 22510 }, { "epoch": 0.47121744955203887, "grad_norm": 0.2752038240432739, "learning_rate": 0.00018809627664623856, "loss": 11.6621, "step": 22511 }, { "epoch": 0.471238382316001, "grad_norm": 0.4090932309627533, "learning_rate": 0.0001880952391500988, "loss": 11.6687, "step": 22512 }, { "epoch": 0.47125931507996316, "grad_norm": 0.25684764981269836, "learning_rate": 0.0001880942016116099, "loss": 11.6631, "step": 22513 }, { "epoch": 0.4712802478439253, "grad_norm": 0.25852397084236145, "learning_rate": 0.00018809316403077233, "loss": 11.6391, "step": 22514 }, { "epoch": 0.47130118060788745, "grad_norm": 0.26380982995033264, "learning_rate": 0.00018809212640758666, "loss": 11.6677, "step": 22515 }, { "epoch": 0.4713221133718496, "grad_norm": 0.2721475660800934, "learning_rate": 0.00018809108874205327, "loss": 11.6813, "step": 22516 }, { "epoch": 0.4713430461358118, "grad_norm": 0.303759902715683, "learning_rate": 0.00018809005103417276, "loss": 11.6755, "step": 22517 }, { "epoch": 0.47136397889977394, "grad_norm": 0.26487430930137634, "learning_rate": 0.0001880890132839456, "loss": 11.6562, "step": 22518 }, { "epoch": 0.4713849116637361, "grad_norm": 0.33114928007125854, "learning_rate": 0.00018808797549137228, "loss": 11.6808, "step": 22519 }, { "epoch": 0.47140584442769823, "grad_norm": 0.24294906854629517, "learning_rate": 0.0001880869376564533, "loss": 11.6888, "step": 22520 }, { "epoch": 0.4714267771916604, "grad_norm": 0.30351099371910095, "learning_rate": 0.00018808589977918917, "loss": 11.6733, "step": 22521 }, { "epoch": 0.4714477099556225, "grad_norm": 0.3093310594558716, "learning_rate": 0.00018808486185958038, "loss": 11.6897, "step": 22522 }, { "epoch": 0.4714686427195847, "grad_norm": 0.2676098942756653, "learning_rate": 0.00018808382389762744, "loss": 11.6597, "step": 22523 }, { "epoch": 0.47148957548354686, "grad_norm": 0.39458343386650085, "learning_rate": 0.00018808278589333083, "loss": 11.6705, "step": 22524 }, { "epoch": 0.471510508247509, "grad_norm": 0.46425867080688477, "learning_rate": 0.00018808174784669106, "loss": 11.6725, "step": 22525 }, { "epoch": 0.47153144101147115, "grad_norm": 0.3027702867984772, "learning_rate": 0.00018808070975770863, "loss": 11.6855, "step": 22526 }, { "epoch": 0.4715523737754333, "grad_norm": 0.2712778151035309, "learning_rate": 0.00018807967162638405, "loss": 11.6715, "step": 22527 }, { "epoch": 0.47157330653939544, "grad_norm": 0.27930882573127747, "learning_rate": 0.00018807863345271782, "loss": 11.6695, "step": 22528 }, { "epoch": 0.47159423930335764, "grad_norm": 0.2738517224788666, "learning_rate": 0.00018807759523671038, "loss": 11.6679, "step": 22529 }, { "epoch": 0.4716151720673198, "grad_norm": 0.32054322957992554, "learning_rate": 0.0001880765569783623, "loss": 11.6651, "step": 22530 }, { "epoch": 0.47163610483128193, "grad_norm": 0.2448633313179016, "learning_rate": 0.00018807551867767406, "loss": 11.6724, "step": 22531 }, { "epoch": 0.4716570375952441, "grad_norm": 0.27733081579208374, "learning_rate": 0.00018807448033464615, "loss": 11.672, "step": 22532 }, { "epoch": 0.4716779703592062, "grad_norm": 0.2605484127998352, "learning_rate": 0.00018807344194927907, "loss": 11.6627, "step": 22533 }, { "epoch": 0.47169890312316837, "grad_norm": 0.3172968924045563, "learning_rate": 0.00018807240352157332, "loss": 11.6684, "step": 22534 }, { "epoch": 0.4717198358871305, "grad_norm": 0.3118008077144623, "learning_rate": 0.0001880713650515294, "loss": 11.6767, "step": 22535 }, { "epoch": 0.4717407686510927, "grad_norm": 0.2845461964607239, "learning_rate": 0.0001880703265391478, "loss": 11.6552, "step": 22536 }, { "epoch": 0.47176170141505486, "grad_norm": 0.3179340660572052, "learning_rate": 0.00018806928798442905, "loss": 11.6652, "step": 22537 }, { "epoch": 0.471782634179017, "grad_norm": 0.35236287117004395, "learning_rate": 0.00018806824938737363, "loss": 11.6684, "step": 22538 }, { "epoch": 0.47180356694297915, "grad_norm": 0.2728155553340912, "learning_rate": 0.00018806721074798201, "loss": 11.6535, "step": 22539 }, { "epoch": 0.4718244997069413, "grad_norm": 0.2792477011680603, "learning_rate": 0.00018806617206625474, "loss": 11.6794, "step": 22540 }, { "epoch": 0.47184543247090344, "grad_norm": 0.2906886637210846, "learning_rate": 0.00018806513334219228, "loss": 11.6682, "step": 22541 }, { "epoch": 0.47186636523486564, "grad_norm": 0.29474779963493347, "learning_rate": 0.00018806409457579516, "loss": 11.6627, "step": 22542 }, { "epoch": 0.4718872979988278, "grad_norm": 0.2856033742427826, "learning_rate": 0.00018806305576706388, "loss": 11.6818, "step": 22543 }, { "epoch": 0.4719082307627899, "grad_norm": 0.3180253505706787, "learning_rate": 0.00018806201691599886, "loss": 11.6834, "step": 22544 }, { "epoch": 0.47192916352675207, "grad_norm": 0.3276878893375397, "learning_rate": 0.00018806097802260072, "loss": 11.6797, "step": 22545 }, { "epoch": 0.4719500962907142, "grad_norm": 0.30509838461875916, "learning_rate": 0.0001880599390868699, "loss": 11.6844, "step": 22546 }, { "epoch": 0.47197102905467636, "grad_norm": 0.24027079343795776, "learning_rate": 0.0001880589001088069, "loss": 11.6677, "step": 22547 }, { "epoch": 0.4719919618186385, "grad_norm": 0.23987966775894165, "learning_rate": 0.00018805786108841218, "loss": 11.6554, "step": 22548 }, { "epoch": 0.4720128945826007, "grad_norm": 0.27612271904945374, "learning_rate": 0.00018805682202568632, "loss": 11.6805, "step": 22549 }, { "epoch": 0.47203382734656285, "grad_norm": 0.26749387383461, "learning_rate": 0.0001880557829206298, "loss": 11.6619, "step": 22550 }, { "epoch": 0.472054760110525, "grad_norm": 0.26622065901756287, "learning_rate": 0.00018805474377324308, "loss": 11.6509, "step": 22551 }, { "epoch": 0.47207569287448714, "grad_norm": 0.29287782311439514, "learning_rate": 0.00018805370458352668, "loss": 11.6699, "step": 22552 }, { "epoch": 0.4720966256384493, "grad_norm": 0.3167881369590759, "learning_rate": 0.0001880526653514811, "loss": 11.695, "step": 22553 }, { "epoch": 0.47211755840241143, "grad_norm": 0.2753313183784485, "learning_rate": 0.0001880516260771068, "loss": 11.6848, "step": 22554 }, { "epoch": 0.47213849116637363, "grad_norm": 0.3041151463985443, "learning_rate": 0.00018805058676040436, "loss": 11.6868, "step": 22555 }, { "epoch": 0.4721594239303358, "grad_norm": 0.2460290640592575, "learning_rate": 0.00018804954740137423, "loss": 11.6664, "step": 22556 }, { "epoch": 0.4721803566942979, "grad_norm": 0.2704801559448242, "learning_rate": 0.00018804850800001696, "loss": 11.6725, "step": 22557 }, { "epoch": 0.47220128945826007, "grad_norm": 0.3077446222305298, "learning_rate": 0.00018804746855633292, "loss": 11.6687, "step": 22558 }, { "epoch": 0.4722222222222222, "grad_norm": 0.28453928232192993, "learning_rate": 0.00018804642907032276, "loss": 11.6762, "step": 22559 }, { "epoch": 0.47224315498618435, "grad_norm": 0.27350133657455444, "learning_rate": 0.0001880453895419869, "loss": 11.6788, "step": 22560 }, { "epoch": 0.47226408775014656, "grad_norm": 0.2863449454307556, "learning_rate": 0.00018804434997132588, "loss": 11.6668, "step": 22561 }, { "epoch": 0.4722850205141087, "grad_norm": 0.29824575781822205, "learning_rate": 0.00018804331035834014, "loss": 11.6717, "step": 22562 }, { "epoch": 0.47230595327807084, "grad_norm": 0.3160450756549835, "learning_rate": 0.00018804227070303025, "loss": 11.6683, "step": 22563 }, { "epoch": 0.472326886042033, "grad_norm": 0.28855830430984497, "learning_rate": 0.00018804123100539665, "loss": 11.6778, "step": 22564 }, { "epoch": 0.47234781880599513, "grad_norm": 0.2548512816429138, "learning_rate": 0.00018804019126543987, "loss": 11.6605, "step": 22565 }, { "epoch": 0.4723687515699573, "grad_norm": 0.30459219217300415, "learning_rate": 0.0001880391514831604, "loss": 11.6732, "step": 22566 }, { "epoch": 0.4723896843339194, "grad_norm": 0.30470767617225647, "learning_rate": 0.00018803811165855877, "loss": 11.6692, "step": 22567 }, { "epoch": 0.4724106170978816, "grad_norm": 0.265573114156723, "learning_rate": 0.00018803707179163546, "loss": 11.6475, "step": 22568 }, { "epoch": 0.47243154986184377, "grad_norm": 0.26523563265800476, "learning_rate": 0.00018803603188239095, "loss": 11.6563, "step": 22569 }, { "epoch": 0.4724524826258059, "grad_norm": 0.23561547696590424, "learning_rate": 0.00018803499193082574, "loss": 11.6693, "step": 22570 }, { "epoch": 0.47247341538976806, "grad_norm": 0.3000730276107788, "learning_rate": 0.00018803395193694035, "loss": 11.6711, "step": 22571 }, { "epoch": 0.4724943481537302, "grad_norm": 0.28370770812034607, "learning_rate": 0.0001880329119007353, "loss": 11.6733, "step": 22572 }, { "epoch": 0.47251528091769235, "grad_norm": 0.33800360560417175, "learning_rate": 0.00018803187182221104, "loss": 11.6654, "step": 22573 }, { "epoch": 0.47253621368165455, "grad_norm": 0.2974274158477783, "learning_rate": 0.0001880308317013681, "loss": 11.668, "step": 22574 }, { "epoch": 0.4725571464456167, "grad_norm": 0.3328206241130829, "learning_rate": 0.00018802979153820698, "loss": 11.6649, "step": 22575 }, { "epoch": 0.47257807920957884, "grad_norm": 0.3702801465988159, "learning_rate": 0.00018802875133272818, "loss": 11.6887, "step": 22576 }, { "epoch": 0.472599011973541, "grad_norm": 0.3142812252044678, "learning_rate": 0.00018802771108493218, "loss": 11.6845, "step": 22577 }, { "epoch": 0.47261994473750313, "grad_norm": 0.28472205996513367, "learning_rate": 0.0001880266707948195, "loss": 11.6662, "step": 22578 }, { "epoch": 0.4726408775014653, "grad_norm": 0.24168036878108978, "learning_rate": 0.00018802563046239063, "loss": 11.6764, "step": 22579 }, { "epoch": 0.4726618102654275, "grad_norm": 0.3114398717880249, "learning_rate": 0.0001880245900876461, "loss": 11.6802, "step": 22580 }, { "epoch": 0.4726827430293896, "grad_norm": 0.3664034903049469, "learning_rate": 0.00018802354967058636, "loss": 11.6814, "step": 22581 }, { "epoch": 0.47270367579335176, "grad_norm": 0.2686602473258972, "learning_rate": 0.00018802250921121197, "loss": 11.6595, "step": 22582 }, { "epoch": 0.4727246085573139, "grad_norm": 0.2858474850654602, "learning_rate": 0.00018802146870952336, "loss": 11.6718, "step": 22583 }, { "epoch": 0.47274554132127605, "grad_norm": 0.27541694045066833, "learning_rate": 0.00018802042816552106, "loss": 11.6744, "step": 22584 }, { "epoch": 0.4727664740852382, "grad_norm": 0.31120947003364563, "learning_rate": 0.00018801938757920562, "loss": 11.6819, "step": 22585 }, { "epoch": 0.47278740684920034, "grad_norm": 0.3326769173145294, "learning_rate": 0.00018801834695057745, "loss": 11.687, "step": 22586 }, { "epoch": 0.47280833961316254, "grad_norm": 0.31396612524986267, "learning_rate": 0.0001880173062796371, "loss": 11.6574, "step": 22587 }, { "epoch": 0.4728292723771247, "grad_norm": 0.2939000129699707, "learning_rate": 0.0001880162655663851, "loss": 11.662, "step": 22588 }, { "epoch": 0.47285020514108683, "grad_norm": 0.30099543929100037, "learning_rate": 0.00018801522481082188, "loss": 11.6551, "step": 22589 }, { "epoch": 0.472871137905049, "grad_norm": 0.22940661013126373, "learning_rate": 0.00018801418401294802, "loss": 11.6783, "step": 22590 }, { "epoch": 0.4728920706690111, "grad_norm": 0.31676337122917175, "learning_rate": 0.00018801314317276396, "loss": 11.6733, "step": 22591 }, { "epoch": 0.47291300343297327, "grad_norm": 0.2748604714870453, "learning_rate": 0.0001880121022902702, "loss": 11.674, "step": 22592 }, { "epoch": 0.47293393619693547, "grad_norm": 0.31656673550605774, "learning_rate": 0.00018801106136546726, "loss": 11.6686, "step": 22593 }, { "epoch": 0.4729548689608976, "grad_norm": 0.25559568405151367, "learning_rate": 0.00018801002039835568, "loss": 11.6634, "step": 22594 }, { "epoch": 0.47297580172485976, "grad_norm": 0.31910625100135803, "learning_rate": 0.00018800897938893587, "loss": 11.681, "step": 22595 }, { "epoch": 0.4729967344888219, "grad_norm": 0.29468241333961487, "learning_rate": 0.00018800793833720843, "loss": 11.671, "step": 22596 }, { "epoch": 0.47301766725278405, "grad_norm": 0.262393981218338, "learning_rate": 0.00018800689724317375, "loss": 11.6775, "step": 22597 }, { "epoch": 0.4730386000167462, "grad_norm": 0.2938395142555237, "learning_rate": 0.00018800585610683244, "loss": 11.6801, "step": 22598 }, { "epoch": 0.4730595327807084, "grad_norm": 0.2672419846057892, "learning_rate": 0.0001880048149281849, "loss": 11.686, "step": 22599 }, { "epoch": 0.47308046554467054, "grad_norm": 0.3848819136619568, "learning_rate": 0.00018800377370723173, "loss": 11.6667, "step": 22600 }, { "epoch": 0.4731013983086327, "grad_norm": 0.3756254017353058, "learning_rate": 0.00018800273244397339, "loss": 11.6755, "step": 22601 }, { "epoch": 0.4731223310725948, "grad_norm": 0.34435105323791504, "learning_rate": 0.00018800169113841034, "loss": 11.6801, "step": 22602 }, { "epoch": 0.47314326383655697, "grad_norm": 0.42199236154556274, "learning_rate": 0.00018800064979054314, "loss": 11.6647, "step": 22603 }, { "epoch": 0.4731641966005191, "grad_norm": 0.27321678400039673, "learning_rate": 0.00018799960840037224, "loss": 11.6824, "step": 22604 }, { "epoch": 0.47318512936448126, "grad_norm": 0.21038031578063965, "learning_rate": 0.00018799856696789818, "loss": 11.6675, "step": 22605 }, { "epoch": 0.47320606212844346, "grad_norm": 0.26433148980140686, "learning_rate": 0.00018799752549312143, "loss": 11.6743, "step": 22606 }, { "epoch": 0.4732269948924056, "grad_norm": 0.31944048404693604, "learning_rate": 0.0001879964839760425, "loss": 11.6661, "step": 22607 }, { "epoch": 0.47324792765636775, "grad_norm": 0.2596692442893982, "learning_rate": 0.00018799544241666192, "loss": 11.6755, "step": 22608 }, { "epoch": 0.4732688604203299, "grad_norm": 0.25039952993392944, "learning_rate": 0.00018799440081498014, "loss": 11.6629, "step": 22609 }, { "epoch": 0.47328979318429204, "grad_norm": 0.2513730823993683, "learning_rate": 0.00018799335917099773, "loss": 11.6745, "step": 22610 }, { "epoch": 0.4733107259482542, "grad_norm": 0.26030051708221436, "learning_rate": 0.00018799231748471514, "loss": 11.6691, "step": 22611 }, { "epoch": 0.4733316587122164, "grad_norm": 0.29429563879966736, "learning_rate": 0.00018799127575613284, "loss": 11.6921, "step": 22612 }, { "epoch": 0.47335259147617853, "grad_norm": 0.3489474952220917, "learning_rate": 0.0001879902339852514, "loss": 11.6725, "step": 22613 }, { "epoch": 0.4733735242401407, "grad_norm": 0.23059065639972687, "learning_rate": 0.00018798919217207128, "loss": 11.6671, "step": 22614 }, { "epoch": 0.4733944570041028, "grad_norm": 0.22642885148525238, "learning_rate": 0.000187988150316593, "loss": 11.6777, "step": 22615 }, { "epoch": 0.47341538976806496, "grad_norm": 0.3630163073539734, "learning_rate": 0.00018798710841881706, "loss": 11.6747, "step": 22616 }, { "epoch": 0.4734363225320271, "grad_norm": 0.31225448846817017, "learning_rate": 0.00018798606647874394, "loss": 11.6769, "step": 22617 }, { "epoch": 0.4734572552959893, "grad_norm": 0.3055388033390045, "learning_rate": 0.00018798502449637416, "loss": 11.6694, "step": 22618 }, { "epoch": 0.47347818805995145, "grad_norm": 0.2842172086238861, "learning_rate": 0.00018798398247170823, "loss": 11.6843, "step": 22619 }, { "epoch": 0.4734991208239136, "grad_norm": 0.2590784430503845, "learning_rate": 0.0001879829404047466, "loss": 11.6663, "step": 22620 }, { "epoch": 0.47352005358787574, "grad_norm": 0.3528597056865692, "learning_rate": 0.00018798189829548982, "loss": 11.6676, "step": 22621 }, { "epoch": 0.4735409863518379, "grad_norm": 0.2416629195213318, "learning_rate": 0.0001879808561439384, "loss": 11.6722, "step": 22622 }, { "epoch": 0.47356191911580003, "grad_norm": 0.2686825394630432, "learning_rate": 0.0001879798139500928, "loss": 11.6654, "step": 22623 }, { "epoch": 0.4735828518797622, "grad_norm": 0.31666988134384155, "learning_rate": 0.00018797877171395356, "loss": 11.6644, "step": 22624 }, { "epoch": 0.4736037846437244, "grad_norm": 0.2621922791004181, "learning_rate": 0.00018797772943552113, "loss": 11.678, "step": 22625 }, { "epoch": 0.4736247174076865, "grad_norm": 0.29642969369888306, "learning_rate": 0.00018797668711479607, "loss": 11.6607, "step": 22626 }, { "epoch": 0.47364565017164867, "grad_norm": 0.2516375780105591, "learning_rate": 0.00018797564475177885, "loss": 11.6637, "step": 22627 }, { "epoch": 0.4736665829356108, "grad_norm": 0.3749326467514038, "learning_rate": 0.00018797460234646994, "loss": 11.659, "step": 22628 }, { "epoch": 0.47368751569957296, "grad_norm": 0.3428674042224884, "learning_rate": 0.00018797355989886994, "loss": 11.6752, "step": 22629 }, { "epoch": 0.4737084484635351, "grad_norm": 0.3161408305168152, "learning_rate": 0.00018797251740897925, "loss": 11.6757, "step": 22630 }, { "epoch": 0.4737293812274973, "grad_norm": 0.27896925806999207, "learning_rate": 0.0001879714748767984, "loss": 11.6625, "step": 22631 }, { "epoch": 0.47375031399145945, "grad_norm": 0.2852464020252228, "learning_rate": 0.00018797043230232792, "loss": 11.689, "step": 22632 }, { "epoch": 0.4737712467554216, "grad_norm": 0.3149731159210205, "learning_rate": 0.00018796938968556828, "loss": 11.6764, "step": 22633 }, { "epoch": 0.47379217951938374, "grad_norm": 0.2822987139225006, "learning_rate": 0.00018796834702652, "loss": 11.6591, "step": 22634 }, { "epoch": 0.4738131122833459, "grad_norm": 0.384255975484848, "learning_rate": 0.00018796730432518355, "loss": 11.6577, "step": 22635 }, { "epoch": 0.47383404504730803, "grad_norm": 0.30002984404563904, "learning_rate": 0.00018796626158155947, "loss": 11.6741, "step": 22636 }, { "epoch": 0.4738549778112702, "grad_norm": 0.24179553985595703, "learning_rate": 0.00018796521879564822, "loss": 11.6618, "step": 22637 }, { "epoch": 0.4738759105752324, "grad_norm": 0.2534274458885193, "learning_rate": 0.00018796417596745035, "loss": 11.6687, "step": 22638 }, { "epoch": 0.4738968433391945, "grad_norm": 0.3072269558906555, "learning_rate": 0.0001879631330969663, "loss": 11.665, "step": 22639 }, { "epoch": 0.47391777610315666, "grad_norm": 0.2835659682750702, "learning_rate": 0.00018796209018419669, "loss": 11.6734, "step": 22640 }, { "epoch": 0.4739387088671188, "grad_norm": 0.30773234367370605, "learning_rate": 0.0001879610472291419, "loss": 11.6762, "step": 22641 }, { "epoch": 0.47395964163108095, "grad_norm": 0.23358307778835297, "learning_rate": 0.00018796000423180247, "loss": 11.6905, "step": 22642 }, { "epoch": 0.4739805743950431, "grad_norm": 0.33668968081474304, "learning_rate": 0.0001879589611921789, "loss": 11.6575, "step": 22643 }, { "epoch": 0.4740015071590053, "grad_norm": 0.2857474684715271, "learning_rate": 0.00018795791811027166, "loss": 11.6845, "step": 22644 }, { "epoch": 0.47402243992296744, "grad_norm": 0.22279779613018036, "learning_rate": 0.00018795687498608134, "loss": 11.6756, "step": 22645 }, { "epoch": 0.4740433726869296, "grad_norm": 0.26080480217933655, "learning_rate": 0.00018795583181960835, "loss": 11.6552, "step": 22646 }, { "epoch": 0.47406430545089173, "grad_norm": 0.27996692061424255, "learning_rate": 0.00018795478861085324, "loss": 11.6915, "step": 22647 }, { "epoch": 0.4740852382148539, "grad_norm": 0.32269835472106934, "learning_rate": 0.00018795374535981655, "loss": 11.666, "step": 22648 }, { "epoch": 0.474106170978816, "grad_norm": 0.31250855326652527, "learning_rate": 0.00018795270206649865, "loss": 11.6604, "step": 22649 }, { "epoch": 0.4741271037427782, "grad_norm": 0.3487653136253357, "learning_rate": 0.00018795165873090018, "loss": 11.6861, "step": 22650 }, { "epoch": 0.47414803650674037, "grad_norm": 0.46145719289779663, "learning_rate": 0.00018795061535302156, "loss": 11.6847, "step": 22651 }, { "epoch": 0.4741689692707025, "grad_norm": 0.26112884283065796, "learning_rate": 0.00018794957193286335, "loss": 11.6611, "step": 22652 }, { "epoch": 0.47418990203466466, "grad_norm": 0.4867704510688782, "learning_rate": 0.00018794852847042596, "loss": 11.6659, "step": 22653 }, { "epoch": 0.4742108347986268, "grad_norm": 0.31384000182151794, "learning_rate": 0.00018794748496571, "loss": 11.6589, "step": 22654 }, { "epoch": 0.47423176756258895, "grad_norm": 0.3088774085044861, "learning_rate": 0.0001879464414187159, "loss": 11.6772, "step": 22655 }, { "epoch": 0.4742527003265511, "grad_norm": 0.2555939853191376, "learning_rate": 0.00018794539782944418, "loss": 11.6565, "step": 22656 }, { "epoch": 0.4742736330905133, "grad_norm": 0.3513321578502655, "learning_rate": 0.00018794435419789535, "loss": 11.6693, "step": 22657 }, { "epoch": 0.47429456585447544, "grad_norm": 0.3146328330039978, "learning_rate": 0.00018794331052406993, "loss": 11.7134, "step": 22658 }, { "epoch": 0.4743154986184376, "grad_norm": 0.4342935383319855, "learning_rate": 0.00018794226680796835, "loss": 11.6915, "step": 22659 }, { "epoch": 0.4743364313823997, "grad_norm": 0.34820061922073364, "learning_rate": 0.0001879412230495912, "loss": 11.6629, "step": 22660 }, { "epoch": 0.47435736414636187, "grad_norm": 0.25448015332221985, "learning_rate": 0.00018794017924893895, "loss": 11.6779, "step": 22661 }, { "epoch": 0.474378296910324, "grad_norm": 0.33450576663017273, "learning_rate": 0.00018793913540601204, "loss": 11.6809, "step": 22662 }, { "epoch": 0.4743992296742862, "grad_norm": 0.36891570687294006, "learning_rate": 0.00018793809152081107, "loss": 11.6864, "step": 22663 }, { "epoch": 0.47442016243824836, "grad_norm": 0.3461296260356903, "learning_rate": 0.0001879370475933365, "loss": 11.663, "step": 22664 }, { "epoch": 0.4744410952022105, "grad_norm": 0.38775932788848877, "learning_rate": 0.0001879360036235888, "loss": 11.6732, "step": 22665 }, { "epoch": 0.47446202796617265, "grad_norm": 0.30825117230415344, "learning_rate": 0.00018793495961156852, "loss": 11.687, "step": 22666 }, { "epoch": 0.4744829607301348, "grad_norm": 0.32650116086006165, "learning_rate": 0.00018793391555727614, "loss": 11.6817, "step": 22667 }, { "epoch": 0.47450389349409694, "grad_norm": 0.3097555935382843, "learning_rate": 0.00018793287146071216, "loss": 11.6794, "step": 22668 }, { "epoch": 0.47452482625805914, "grad_norm": 0.2805294394493103, "learning_rate": 0.0001879318273218771, "loss": 11.6716, "step": 22669 }, { "epoch": 0.4745457590220213, "grad_norm": 0.30403628945350647, "learning_rate": 0.00018793078314077141, "loss": 11.6719, "step": 22670 }, { "epoch": 0.47456669178598343, "grad_norm": 0.317117303609848, "learning_rate": 0.00018792973891739568, "loss": 11.6671, "step": 22671 }, { "epoch": 0.4745876245499456, "grad_norm": 0.3187028765678406, "learning_rate": 0.0001879286946517503, "loss": 11.6799, "step": 22672 }, { "epoch": 0.4746085573139077, "grad_norm": 0.31548336148262024, "learning_rate": 0.0001879276503438359, "loss": 11.6919, "step": 22673 }, { "epoch": 0.47462949007786986, "grad_norm": 0.38828352093696594, "learning_rate": 0.0001879266059936529, "loss": 11.6864, "step": 22674 }, { "epoch": 0.474650422841832, "grad_norm": 0.26357945799827576, "learning_rate": 0.0001879255616012018, "loss": 11.6649, "step": 22675 }, { "epoch": 0.4746713556057942, "grad_norm": 0.2332025021314621, "learning_rate": 0.00018792451716648313, "loss": 11.6523, "step": 22676 }, { "epoch": 0.47469228836975635, "grad_norm": 0.25542977452278137, "learning_rate": 0.0001879234726894974, "loss": 11.6738, "step": 22677 }, { "epoch": 0.4747132211337185, "grad_norm": 0.3984454274177551, "learning_rate": 0.00018792242817024506, "loss": 11.6642, "step": 22678 }, { "epoch": 0.47473415389768064, "grad_norm": 0.28923219442367554, "learning_rate": 0.00018792138360872667, "loss": 11.6558, "step": 22679 }, { "epoch": 0.4747550866616428, "grad_norm": 0.32434147596359253, "learning_rate": 0.0001879203390049427, "loss": 11.6628, "step": 22680 }, { "epoch": 0.47477601942560493, "grad_norm": 0.25263795256614685, "learning_rate": 0.00018791929435889367, "loss": 11.6834, "step": 22681 }, { "epoch": 0.47479695218956713, "grad_norm": 0.35767802596092224, "learning_rate": 0.00018791824967058005, "loss": 11.69, "step": 22682 }, { "epoch": 0.4748178849535293, "grad_norm": 0.26124000549316406, "learning_rate": 0.00018791720494000237, "loss": 11.6751, "step": 22683 }, { "epoch": 0.4748388177174914, "grad_norm": 0.3087700605392456, "learning_rate": 0.00018791616016716116, "loss": 11.6657, "step": 22684 }, { "epoch": 0.47485975048145357, "grad_norm": 0.2988753616809845, "learning_rate": 0.00018791511535205685, "loss": 11.6852, "step": 22685 }, { "epoch": 0.4748806832454157, "grad_norm": 0.24465453624725342, "learning_rate": 0.00018791407049469002, "loss": 11.6695, "step": 22686 }, { "epoch": 0.47490161600937786, "grad_norm": 0.2805047333240509, "learning_rate": 0.0001879130255950611, "loss": 11.6494, "step": 22687 }, { "epoch": 0.47492254877334006, "grad_norm": 0.2904994785785675, "learning_rate": 0.00018791198065317067, "loss": 11.6769, "step": 22688 }, { "epoch": 0.4749434815373022, "grad_norm": 0.2925604283809662, "learning_rate": 0.00018791093566901913, "loss": 11.6571, "step": 22689 }, { "epoch": 0.47496441430126435, "grad_norm": 0.25435882806777954, "learning_rate": 0.0001879098906426071, "loss": 11.6597, "step": 22690 }, { "epoch": 0.4749853470652265, "grad_norm": 0.27659499645233154, "learning_rate": 0.00018790884557393499, "loss": 11.6673, "step": 22691 }, { "epoch": 0.47500627982918864, "grad_norm": 0.23923712968826294, "learning_rate": 0.00018790780046300335, "loss": 11.6516, "step": 22692 }, { "epoch": 0.4750272125931508, "grad_norm": 0.2632901072502136, "learning_rate": 0.00018790675530981266, "loss": 11.6694, "step": 22693 }, { "epoch": 0.4750481453571129, "grad_norm": 0.3318113684654236, "learning_rate": 0.00018790571011436343, "loss": 11.6708, "step": 22694 }, { "epoch": 0.47506907812107513, "grad_norm": 0.2961253225803375, "learning_rate": 0.00018790466487665617, "loss": 11.6513, "step": 22695 }, { "epoch": 0.4750900108850373, "grad_norm": 0.3224110007286072, "learning_rate": 0.00018790361959669137, "loss": 11.6889, "step": 22696 }, { "epoch": 0.4751109436489994, "grad_norm": 0.3227931261062622, "learning_rate": 0.00018790257427446952, "loss": 11.6731, "step": 22697 }, { "epoch": 0.47513187641296156, "grad_norm": 0.29021090269088745, "learning_rate": 0.00018790152890999116, "loss": 11.693, "step": 22698 }, { "epoch": 0.4751528091769237, "grad_norm": 0.30869388580322266, "learning_rate": 0.0001879004835032568, "loss": 11.6739, "step": 22699 }, { "epoch": 0.47517374194088585, "grad_norm": 0.30147844552993774, "learning_rate": 0.00018789943805426688, "loss": 11.6751, "step": 22700 }, { "epoch": 0.47519467470484805, "grad_norm": 0.23130053281784058, "learning_rate": 0.00018789839256302195, "loss": 11.6708, "step": 22701 }, { "epoch": 0.4752156074688102, "grad_norm": 0.31443342566490173, "learning_rate": 0.00018789734702952252, "loss": 11.6596, "step": 22702 }, { "epoch": 0.47523654023277234, "grad_norm": 0.2668442130088806, "learning_rate": 0.00018789630145376904, "loss": 11.6811, "step": 22703 }, { "epoch": 0.4752574729967345, "grad_norm": 0.26023802161216736, "learning_rate": 0.00018789525583576207, "loss": 11.6719, "step": 22704 }, { "epoch": 0.47527840576069663, "grad_norm": 0.3413623869419098, "learning_rate": 0.00018789421017550208, "loss": 11.6796, "step": 22705 }, { "epoch": 0.4752993385246588, "grad_norm": 0.2540220022201538, "learning_rate": 0.0001878931644729896, "loss": 11.6831, "step": 22706 }, { "epoch": 0.475320271288621, "grad_norm": 0.29617568850517273, "learning_rate": 0.00018789211872822508, "loss": 11.6604, "step": 22707 }, { "epoch": 0.4753412040525831, "grad_norm": 0.355650395154953, "learning_rate": 0.00018789107294120907, "loss": 11.6845, "step": 22708 }, { "epoch": 0.47536213681654527, "grad_norm": 0.31404754519462585, "learning_rate": 0.00018789002711194205, "loss": 11.6724, "step": 22709 }, { "epoch": 0.4753830695805074, "grad_norm": 0.30696624517440796, "learning_rate": 0.00018788898124042457, "loss": 11.6646, "step": 22710 }, { "epoch": 0.47540400234446956, "grad_norm": 0.276476114988327, "learning_rate": 0.00018788793532665708, "loss": 11.6521, "step": 22711 }, { "epoch": 0.4754249351084317, "grad_norm": 0.3028222918510437, "learning_rate": 0.00018788688937064008, "loss": 11.6595, "step": 22712 }, { "epoch": 0.47544586787239385, "grad_norm": 0.28603288531303406, "learning_rate": 0.0001878858433723741, "loss": 11.6903, "step": 22713 }, { "epoch": 0.47546680063635605, "grad_norm": 0.3064691126346588, "learning_rate": 0.00018788479733185964, "loss": 11.6657, "step": 22714 }, { "epoch": 0.4754877334003182, "grad_norm": 0.29588839411735535, "learning_rate": 0.00018788375124909718, "loss": 11.6632, "step": 22715 }, { "epoch": 0.47550866616428034, "grad_norm": 0.3167496919631958, "learning_rate": 0.00018788270512408724, "loss": 11.6622, "step": 22716 }, { "epoch": 0.4755295989282425, "grad_norm": 0.2897570729255676, "learning_rate": 0.00018788165895683033, "loss": 11.6667, "step": 22717 }, { "epoch": 0.4755505316922046, "grad_norm": 0.23668372631072998, "learning_rate": 0.00018788061274732697, "loss": 11.6829, "step": 22718 }, { "epoch": 0.47557146445616677, "grad_norm": 0.3481641113758087, "learning_rate": 0.00018787956649557761, "loss": 11.665, "step": 22719 }, { "epoch": 0.47559239722012897, "grad_norm": 0.30926668643951416, "learning_rate": 0.00018787852020158278, "loss": 11.6912, "step": 22720 }, { "epoch": 0.4756133299840911, "grad_norm": 0.31175580620765686, "learning_rate": 0.000187877473865343, "loss": 11.6686, "step": 22721 }, { "epoch": 0.47563426274805326, "grad_norm": 0.27854275703430176, "learning_rate": 0.00018787642748685875, "loss": 11.6692, "step": 22722 }, { "epoch": 0.4756551955120154, "grad_norm": 0.31229308247566223, "learning_rate": 0.00018787538106613053, "loss": 11.6547, "step": 22723 }, { "epoch": 0.47567612827597755, "grad_norm": 0.3329567611217499, "learning_rate": 0.00018787433460315888, "loss": 11.6716, "step": 22724 }, { "epoch": 0.4756970610399397, "grad_norm": 0.2680988311767578, "learning_rate": 0.00018787328809794426, "loss": 11.6717, "step": 22725 }, { "epoch": 0.47571799380390184, "grad_norm": 0.2629312574863434, "learning_rate": 0.00018787224155048717, "loss": 11.6672, "step": 22726 }, { "epoch": 0.47573892656786404, "grad_norm": 0.29747143387794495, "learning_rate": 0.00018787119496078817, "loss": 11.6694, "step": 22727 }, { "epoch": 0.4757598593318262, "grad_norm": 0.2785819172859192, "learning_rate": 0.00018787014832884771, "loss": 11.6749, "step": 22728 }, { "epoch": 0.47578079209578833, "grad_norm": 0.2899424731731415, "learning_rate": 0.0001878691016546663, "loss": 11.6611, "step": 22729 }, { "epoch": 0.4758017248597505, "grad_norm": 0.2418231964111328, "learning_rate": 0.00018786805493824446, "loss": 11.6726, "step": 22730 }, { "epoch": 0.4758226576237126, "grad_norm": 0.24973949790000916, "learning_rate": 0.00018786700817958267, "loss": 11.6912, "step": 22731 }, { "epoch": 0.47584359038767476, "grad_norm": 0.31637588143348694, "learning_rate": 0.00018786596137868147, "loss": 11.6751, "step": 22732 }, { "epoch": 0.47586452315163696, "grad_norm": 0.2881300151348114, "learning_rate": 0.00018786491453554133, "loss": 11.676, "step": 22733 }, { "epoch": 0.4758854559155991, "grad_norm": 0.42562365531921387, "learning_rate": 0.00018786386765016276, "loss": 11.6755, "step": 22734 }, { "epoch": 0.47590638867956125, "grad_norm": 0.25681036710739136, "learning_rate": 0.00018786282072254628, "loss": 11.6673, "step": 22735 }, { "epoch": 0.4759273214435234, "grad_norm": 0.2654401361942291, "learning_rate": 0.0001878617737526924, "loss": 11.6682, "step": 22736 }, { "epoch": 0.47594825420748554, "grad_norm": 0.2934921681880951, "learning_rate": 0.00018786072674060157, "loss": 11.6709, "step": 22737 }, { "epoch": 0.4759691869714477, "grad_norm": 0.3129715025424957, "learning_rate": 0.00018785967968627435, "loss": 11.6846, "step": 22738 }, { "epoch": 0.4759901197354099, "grad_norm": 0.2567601799964905, "learning_rate": 0.0001878586325897112, "loss": 11.6748, "step": 22739 }, { "epoch": 0.47601105249937203, "grad_norm": 0.28590142726898193, "learning_rate": 0.00018785758545091264, "loss": 11.6797, "step": 22740 }, { "epoch": 0.4760319852633342, "grad_norm": 0.334611177444458, "learning_rate": 0.00018785653826987922, "loss": 11.6856, "step": 22741 }, { "epoch": 0.4760529180272963, "grad_norm": 0.3299802839756012, "learning_rate": 0.00018785549104661134, "loss": 11.6746, "step": 22742 }, { "epoch": 0.47607385079125847, "grad_norm": 0.3808075189590454, "learning_rate": 0.00018785444378110962, "loss": 11.6933, "step": 22743 }, { "epoch": 0.4760947835552206, "grad_norm": 0.4337894022464752, "learning_rate": 0.0001878533964733745, "loss": 11.6822, "step": 22744 }, { "epoch": 0.47611571631918276, "grad_norm": 0.31545525789260864, "learning_rate": 0.00018785234912340644, "loss": 11.6701, "step": 22745 }, { "epoch": 0.47613664908314496, "grad_norm": 0.26012712717056274, "learning_rate": 0.00018785130173120608, "loss": 11.6741, "step": 22746 }, { "epoch": 0.4761575818471071, "grad_norm": 0.24591942131519318, "learning_rate": 0.00018785025429677376, "loss": 11.6613, "step": 22747 }, { "epoch": 0.47617851461106925, "grad_norm": 0.3511987030506134, "learning_rate": 0.0001878492068201101, "loss": 11.6824, "step": 22748 }, { "epoch": 0.4761994473750314, "grad_norm": 0.2641492784023285, "learning_rate": 0.00018784815930121556, "loss": 11.6731, "step": 22749 }, { "epoch": 0.47622038013899354, "grad_norm": 0.2791118323802948, "learning_rate": 0.00018784711174009067, "loss": 11.6789, "step": 22750 }, { "epoch": 0.4762413129029557, "grad_norm": 0.31999197602272034, "learning_rate": 0.00018784606413673588, "loss": 11.6618, "step": 22751 }, { "epoch": 0.4762622456669179, "grad_norm": 0.32783669233322144, "learning_rate": 0.00018784501649115175, "loss": 11.6784, "step": 22752 }, { "epoch": 0.47628317843088, "grad_norm": 0.27317067980766296, "learning_rate": 0.00018784396880333875, "loss": 11.6731, "step": 22753 }, { "epoch": 0.4763041111948422, "grad_norm": 0.2951315939426422, "learning_rate": 0.0001878429210732974, "loss": 11.6631, "step": 22754 }, { "epoch": 0.4763250439588043, "grad_norm": 0.27868545055389404, "learning_rate": 0.00018784187330102818, "loss": 11.6791, "step": 22755 }, { "epoch": 0.47634597672276646, "grad_norm": 0.30584773421287537, "learning_rate": 0.00018784082548653163, "loss": 11.66, "step": 22756 }, { "epoch": 0.4763669094867286, "grad_norm": 0.21971170604228973, "learning_rate": 0.00018783977762980823, "loss": 11.6513, "step": 22757 }, { "epoch": 0.4763878422506908, "grad_norm": 0.3558363914489746, "learning_rate": 0.0001878387297308585, "loss": 11.6766, "step": 22758 }, { "epoch": 0.47640877501465295, "grad_norm": 0.3045653700828552, "learning_rate": 0.0001878376817896829, "loss": 11.6658, "step": 22759 }, { "epoch": 0.4764297077786151, "grad_norm": 0.3247604966163635, "learning_rate": 0.000187836633806282, "loss": 11.6744, "step": 22760 }, { "epoch": 0.47645064054257724, "grad_norm": 0.29288700222969055, "learning_rate": 0.00018783558578065623, "loss": 11.6752, "step": 22761 }, { "epoch": 0.4764715733065394, "grad_norm": 0.2983766496181488, "learning_rate": 0.00018783453771280617, "loss": 11.6718, "step": 22762 }, { "epoch": 0.47649250607050153, "grad_norm": 0.32283344864845276, "learning_rate": 0.0001878334896027323, "loss": 11.6916, "step": 22763 }, { "epoch": 0.4765134388344637, "grad_norm": 0.30265453457832336, "learning_rate": 0.00018783244145043507, "loss": 11.682, "step": 22764 }, { "epoch": 0.4765343715984259, "grad_norm": 0.3176627457141876, "learning_rate": 0.00018783139325591504, "loss": 11.6836, "step": 22765 }, { "epoch": 0.476555304362388, "grad_norm": 0.28051236271858215, "learning_rate": 0.0001878303450191727, "loss": 11.6674, "step": 22766 }, { "epoch": 0.47657623712635017, "grad_norm": 0.2731901705265045, "learning_rate": 0.00018782929674020856, "loss": 11.6587, "step": 22767 }, { "epoch": 0.4765971698903123, "grad_norm": 0.34383395314216614, "learning_rate": 0.0001878282484190231, "loss": 11.6828, "step": 22768 }, { "epoch": 0.47661810265427446, "grad_norm": 0.3163991868495941, "learning_rate": 0.00018782720005561685, "loss": 11.6737, "step": 22769 }, { "epoch": 0.4766390354182366, "grad_norm": 0.3070794939994812, "learning_rate": 0.00018782615164999032, "loss": 11.6675, "step": 22770 }, { "epoch": 0.4766599681821988, "grad_norm": 0.33013221621513367, "learning_rate": 0.00018782510320214396, "loss": 11.673, "step": 22771 }, { "epoch": 0.47668090094616095, "grad_norm": 0.32181310653686523, "learning_rate": 0.00018782405471207833, "loss": 11.6641, "step": 22772 }, { "epoch": 0.4767018337101231, "grad_norm": 0.2944192886352539, "learning_rate": 0.00018782300617979395, "loss": 11.6806, "step": 22773 }, { "epoch": 0.47672276647408524, "grad_norm": 0.2651884853839874, "learning_rate": 0.00018782195760529128, "loss": 11.6564, "step": 22774 }, { "epoch": 0.4767436992380474, "grad_norm": 0.29908570647239685, "learning_rate": 0.0001878209089885708, "loss": 11.6858, "step": 22775 }, { "epoch": 0.4767646320020095, "grad_norm": 0.28210461139678955, "learning_rate": 0.00018781986032963307, "loss": 11.6738, "step": 22776 }, { "epoch": 0.4767855647659717, "grad_norm": 0.20189060270786285, "learning_rate": 0.00018781881162847856, "loss": 11.6811, "step": 22777 }, { "epoch": 0.47680649752993387, "grad_norm": 0.3002531826496124, "learning_rate": 0.00018781776288510782, "loss": 11.6806, "step": 22778 }, { "epoch": 0.476827430293896, "grad_norm": 0.3060319125652313, "learning_rate": 0.0001878167140995213, "loss": 11.6908, "step": 22779 }, { "epoch": 0.47684836305785816, "grad_norm": 0.2553081214427948, "learning_rate": 0.00018781566527171953, "loss": 11.6525, "step": 22780 }, { "epoch": 0.4768692958218203, "grad_norm": 0.2882572412490845, "learning_rate": 0.000187814616401703, "loss": 11.6743, "step": 22781 }, { "epoch": 0.47689022858578245, "grad_norm": 0.26147398352622986, "learning_rate": 0.00018781356748947224, "loss": 11.6503, "step": 22782 }, { "epoch": 0.4769111613497446, "grad_norm": 0.35695263743400574, "learning_rate": 0.00018781251853502772, "loss": 11.6676, "step": 22783 }, { "epoch": 0.4769320941137068, "grad_norm": 0.3217935860157013, "learning_rate": 0.00018781146953836996, "loss": 11.6722, "step": 22784 }, { "epoch": 0.47695302687766894, "grad_norm": 0.26783815026283264, "learning_rate": 0.00018781042049949948, "loss": 11.6722, "step": 22785 }, { "epoch": 0.4769739596416311, "grad_norm": 0.36503833532333374, "learning_rate": 0.0001878093714184168, "loss": 11.658, "step": 22786 }, { "epoch": 0.47699489240559323, "grad_norm": 0.3198072016239166, "learning_rate": 0.00018780832229512234, "loss": 11.6766, "step": 22787 }, { "epoch": 0.4770158251695554, "grad_norm": 0.2531244456768036, "learning_rate": 0.0001878072731296167, "loss": 11.6898, "step": 22788 }, { "epoch": 0.4770367579335175, "grad_norm": 0.3675127625465393, "learning_rate": 0.00018780622392190033, "loss": 11.6719, "step": 22789 }, { "epoch": 0.4770576906974797, "grad_norm": 0.27366653084754944, "learning_rate": 0.00018780517467197373, "loss": 11.6714, "step": 22790 }, { "epoch": 0.47707862346144186, "grad_norm": 0.23208372294902802, "learning_rate": 0.00018780412537983745, "loss": 11.6595, "step": 22791 }, { "epoch": 0.477099556225404, "grad_norm": 0.34063521027565, "learning_rate": 0.00018780307604549198, "loss": 11.6701, "step": 22792 }, { "epoch": 0.47712048898936615, "grad_norm": 0.257270872592926, "learning_rate": 0.00018780202666893777, "loss": 11.6637, "step": 22793 }, { "epoch": 0.4771414217533283, "grad_norm": 0.2594361901283264, "learning_rate": 0.0001878009772501754, "loss": 11.6704, "step": 22794 }, { "epoch": 0.47716235451729044, "grad_norm": 0.30403900146484375, "learning_rate": 0.00018779992778920535, "loss": 11.6626, "step": 22795 }, { "epoch": 0.47718328728125264, "grad_norm": 0.26884394884109497, "learning_rate": 0.00018779887828602806, "loss": 11.6764, "step": 22796 }, { "epoch": 0.4772042200452148, "grad_norm": 0.27416712045669556, "learning_rate": 0.00018779782874064413, "loss": 11.6799, "step": 22797 }, { "epoch": 0.47722515280917693, "grad_norm": 0.24018359184265137, "learning_rate": 0.00018779677915305404, "loss": 11.6618, "step": 22798 }, { "epoch": 0.4772460855731391, "grad_norm": 0.2823917269706726, "learning_rate": 0.00018779572952325822, "loss": 11.662, "step": 22799 }, { "epoch": 0.4772670183371012, "grad_norm": 0.3463839590549469, "learning_rate": 0.00018779467985125727, "loss": 11.6593, "step": 22800 }, { "epoch": 0.47728795110106337, "grad_norm": 0.2988215982913971, "learning_rate": 0.00018779363013705168, "loss": 11.6869, "step": 22801 }, { "epoch": 0.4773088838650255, "grad_norm": 0.28403523564338684, "learning_rate": 0.00018779258038064192, "loss": 11.669, "step": 22802 }, { "epoch": 0.4773298166289877, "grad_norm": 0.3281152546405792, "learning_rate": 0.00018779153058202849, "loss": 11.6636, "step": 22803 }, { "epoch": 0.47735074939294986, "grad_norm": 0.26856309175491333, "learning_rate": 0.0001877904807412119, "loss": 11.6681, "step": 22804 }, { "epoch": 0.477371682156912, "grad_norm": 0.2929779291152954, "learning_rate": 0.00018778943085819268, "loss": 11.6791, "step": 22805 }, { "epoch": 0.47739261492087415, "grad_norm": 0.342214435338974, "learning_rate": 0.00018778838093297132, "loss": 11.6808, "step": 22806 }, { "epoch": 0.4774135476848363, "grad_norm": 0.2563576400279999, "learning_rate": 0.00018778733096554834, "loss": 11.6791, "step": 22807 }, { "epoch": 0.47743448044879844, "grad_norm": 0.3550954759120941, "learning_rate": 0.00018778628095592422, "loss": 11.6838, "step": 22808 }, { "epoch": 0.47745541321276064, "grad_norm": 0.27769437432289124, "learning_rate": 0.0001877852309040995, "loss": 11.6835, "step": 22809 }, { "epoch": 0.4774763459767228, "grad_norm": 0.2937360405921936, "learning_rate": 0.00018778418081007463, "loss": 11.6871, "step": 22810 }, { "epoch": 0.4774972787406849, "grad_norm": 0.30047476291656494, "learning_rate": 0.00018778313067385015, "loss": 11.6573, "step": 22811 }, { "epoch": 0.47751821150464707, "grad_norm": 0.25515010952949524, "learning_rate": 0.0001877820804954266, "loss": 11.673, "step": 22812 }, { "epoch": 0.4775391442686092, "grad_norm": 0.27601394057273865, "learning_rate": 0.00018778103027480438, "loss": 11.6874, "step": 22813 }, { "epoch": 0.47756007703257136, "grad_norm": 0.2834547460079193, "learning_rate": 0.00018777998001198406, "loss": 11.6673, "step": 22814 }, { "epoch": 0.4775810097965335, "grad_norm": 0.3207767605781555, "learning_rate": 0.0001877789297069662, "loss": 11.6798, "step": 22815 }, { "epoch": 0.4776019425604957, "grad_norm": 0.27260059118270874, "learning_rate": 0.00018777787935975122, "loss": 11.658, "step": 22816 }, { "epoch": 0.47762287532445785, "grad_norm": 0.3056793212890625, "learning_rate": 0.00018777682897033965, "loss": 11.6614, "step": 22817 }, { "epoch": 0.47764380808842, "grad_norm": 0.2695004940032959, "learning_rate": 0.00018777577853873201, "loss": 11.6665, "step": 22818 }, { "epoch": 0.47766474085238214, "grad_norm": 0.2878819406032562, "learning_rate": 0.00018777472806492877, "loss": 11.6843, "step": 22819 }, { "epoch": 0.4776856736163443, "grad_norm": 0.2482096254825592, "learning_rate": 0.0001877736775489305, "loss": 11.6686, "step": 22820 }, { "epoch": 0.47770660638030643, "grad_norm": 0.2674465775489807, "learning_rate": 0.00018777262699073763, "loss": 11.6715, "step": 22821 }, { "epoch": 0.47772753914426863, "grad_norm": 0.2786672115325928, "learning_rate": 0.00018777157639035072, "loss": 11.6711, "step": 22822 }, { "epoch": 0.4777484719082308, "grad_norm": 0.2674393057823181, "learning_rate": 0.00018777052574777024, "loss": 11.6772, "step": 22823 }, { "epoch": 0.4777694046721929, "grad_norm": 0.2585185766220093, "learning_rate": 0.0001877694750629967, "loss": 11.6522, "step": 22824 }, { "epoch": 0.47779033743615507, "grad_norm": 0.2594294846057892, "learning_rate": 0.00018776842433603066, "loss": 11.6756, "step": 22825 }, { "epoch": 0.4778112702001172, "grad_norm": 0.34178251028060913, "learning_rate": 0.00018776737356687254, "loss": 11.6953, "step": 22826 }, { "epoch": 0.47783220296407936, "grad_norm": 0.330487996339798, "learning_rate": 0.0001877663227555229, "loss": 11.6747, "step": 22827 }, { "epoch": 0.47785313572804156, "grad_norm": 0.23919445276260376, "learning_rate": 0.00018776527190198224, "loss": 11.684, "step": 22828 }, { "epoch": 0.4778740684920037, "grad_norm": 0.24811218678951263, "learning_rate": 0.00018776422100625102, "loss": 11.6689, "step": 22829 }, { "epoch": 0.47789500125596585, "grad_norm": 0.3618161678314209, "learning_rate": 0.0001877631700683298, "loss": 11.6771, "step": 22830 }, { "epoch": 0.477915934019928, "grad_norm": 0.33549439907073975, "learning_rate": 0.00018776211908821906, "loss": 11.6829, "step": 22831 }, { "epoch": 0.47793686678389014, "grad_norm": 0.28356045484542847, "learning_rate": 0.00018776106806591932, "loss": 11.6681, "step": 22832 }, { "epoch": 0.4779577995478523, "grad_norm": 0.2617662847042084, "learning_rate": 0.00018776001700143106, "loss": 11.6739, "step": 22833 }, { "epoch": 0.4779787323118144, "grad_norm": 0.306096613407135, "learning_rate": 0.0001877589658947548, "loss": 11.6811, "step": 22834 }, { "epoch": 0.4779996650757766, "grad_norm": 0.26760709285736084, "learning_rate": 0.00018775791474589108, "loss": 11.6715, "step": 22835 }, { "epoch": 0.47802059783973877, "grad_norm": 0.30122512578964233, "learning_rate": 0.00018775686355484034, "loss": 11.6771, "step": 22836 }, { "epoch": 0.4780415306037009, "grad_norm": 0.32344359159469604, "learning_rate": 0.00018775581232160314, "loss": 11.674, "step": 22837 }, { "epoch": 0.47806246336766306, "grad_norm": 0.24322116374969482, "learning_rate": 0.00018775476104617995, "loss": 11.6621, "step": 22838 }, { "epoch": 0.4780833961316252, "grad_norm": 0.3078628182411194, "learning_rate": 0.0001877537097285713, "loss": 11.6452, "step": 22839 }, { "epoch": 0.47810432889558735, "grad_norm": 0.25605371594429016, "learning_rate": 0.00018775265836877764, "loss": 11.6731, "step": 22840 }, { "epoch": 0.47812526165954955, "grad_norm": 0.30898210406303406, "learning_rate": 0.00018775160696679957, "loss": 11.6659, "step": 22841 }, { "epoch": 0.4781461944235117, "grad_norm": 0.28676146268844604, "learning_rate": 0.0001877505555226375, "loss": 11.6615, "step": 22842 }, { "epoch": 0.47816712718747384, "grad_norm": 0.3062434494495392, "learning_rate": 0.00018774950403629203, "loss": 11.6614, "step": 22843 }, { "epoch": 0.478188059951436, "grad_norm": 0.24424265325069427, "learning_rate": 0.00018774845250776358, "loss": 11.6788, "step": 22844 }, { "epoch": 0.47820899271539813, "grad_norm": 0.3253147602081299, "learning_rate": 0.0001877474009370527, "loss": 11.6731, "step": 22845 }, { "epoch": 0.4782299254793603, "grad_norm": 0.284091591835022, "learning_rate": 0.00018774634932415988, "loss": 11.6609, "step": 22846 }, { "epoch": 0.4782508582433225, "grad_norm": 0.3002007007598877, "learning_rate": 0.00018774529766908563, "loss": 11.6762, "step": 22847 }, { "epoch": 0.4782717910072846, "grad_norm": 0.30009448528289795, "learning_rate": 0.00018774424597183047, "loss": 11.677, "step": 22848 }, { "epoch": 0.47829272377124676, "grad_norm": 0.2906728982925415, "learning_rate": 0.00018774319423239488, "loss": 11.658, "step": 22849 }, { "epoch": 0.4783136565352089, "grad_norm": 0.28675445914268494, "learning_rate": 0.00018774214245077938, "loss": 11.6538, "step": 22850 }, { "epoch": 0.47833458929917105, "grad_norm": 0.2971836030483246, "learning_rate": 0.0001877410906269845, "loss": 11.6781, "step": 22851 }, { "epoch": 0.4783555220631332, "grad_norm": 0.20813675224781036, "learning_rate": 0.00018774003876101067, "loss": 11.6692, "step": 22852 }, { "epoch": 0.47837645482709534, "grad_norm": 0.23491212725639343, "learning_rate": 0.00018773898685285848, "loss": 11.6781, "step": 22853 }, { "epoch": 0.47839738759105754, "grad_norm": 0.3549332618713379, "learning_rate": 0.00018773793490252838, "loss": 11.6854, "step": 22854 }, { "epoch": 0.4784183203550197, "grad_norm": 0.26102569699287415, "learning_rate": 0.00018773688291002093, "loss": 11.6686, "step": 22855 }, { "epoch": 0.47843925311898183, "grad_norm": 0.27697131037712097, "learning_rate": 0.00018773583087533657, "loss": 11.6687, "step": 22856 }, { "epoch": 0.478460185882944, "grad_norm": 0.2907339334487915, "learning_rate": 0.00018773477879847582, "loss": 11.6576, "step": 22857 }, { "epoch": 0.4784811186469061, "grad_norm": 0.30887964367866516, "learning_rate": 0.00018773372667943924, "loss": 11.669, "step": 22858 }, { "epoch": 0.47850205141086827, "grad_norm": 0.2719583511352539, "learning_rate": 0.0001877326745182273, "loss": 11.6551, "step": 22859 }, { "epoch": 0.47852298417483047, "grad_norm": 0.2614380121231079, "learning_rate": 0.0001877316223148405, "loss": 11.6713, "step": 22860 }, { "epoch": 0.4785439169387926, "grad_norm": 0.34724438190460205, "learning_rate": 0.00018773057006927935, "loss": 11.6963, "step": 22861 }, { "epoch": 0.47856484970275476, "grad_norm": 0.5105869770050049, "learning_rate": 0.00018772951778154434, "loss": 11.6865, "step": 22862 }, { "epoch": 0.4785857824667169, "grad_norm": 0.3398074805736542, "learning_rate": 0.00018772846545163602, "loss": 11.6756, "step": 22863 }, { "epoch": 0.47860671523067905, "grad_norm": 0.3148927390575409, "learning_rate": 0.00018772741307955486, "loss": 11.6722, "step": 22864 }, { "epoch": 0.4786276479946412, "grad_norm": 0.3560509979724884, "learning_rate": 0.00018772636066530135, "loss": 11.6793, "step": 22865 }, { "epoch": 0.4786485807586034, "grad_norm": 0.26669374108314514, "learning_rate": 0.00018772530820887603, "loss": 11.6625, "step": 22866 }, { "epoch": 0.47866951352256554, "grad_norm": 0.31412968039512634, "learning_rate": 0.00018772425571027943, "loss": 11.6741, "step": 22867 }, { "epoch": 0.4786904462865277, "grad_norm": 0.29533329606056213, "learning_rate": 0.000187723203169512, "loss": 11.675, "step": 22868 }, { "epoch": 0.4787113790504898, "grad_norm": 0.2882516086101532, "learning_rate": 0.00018772215058657427, "loss": 11.6723, "step": 22869 }, { "epoch": 0.47873231181445197, "grad_norm": 0.3919163644313812, "learning_rate": 0.00018772109796146672, "loss": 11.6905, "step": 22870 }, { "epoch": 0.4787532445784141, "grad_norm": 0.33386924862861633, "learning_rate": 0.00018772004529418992, "loss": 11.6711, "step": 22871 }, { "epoch": 0.47877417734237626, "grad_norm": 0.347109854221344, "learning_rate": 0.00018771899258474432, "loss": 11.6742, "step": 22872 }, { "epoch": 0.47879511010633846, "grad_norm": 0.3323626220226288, "learning_rate": 0.00018771793983313043, "loss": 11.6702, "step": 22873 }, { "epoch": 0.4788160428703006, "grad_norm": 0.3135290741920471, "learning_rate": 0.00018771688703934878, "loss": 11.6641, "step": 22874 }, { "epoch": 0.47883697563426275, "grad_norm": 0.23937314748764038, "learning_rate": 0.00018771583420339985, "loss": 11.6903, "step": 22875 }, { "epoch": 0.4788579083982249, "grad_norm": 0.35888293385505676, "learning_rate": 0.0001877147813252842, "loss": 11.6716, "step": 22876 }, { "epoch": 0.47887884116218704, "grad_norm": 0.42627206444740295, "learning_rate": 0.00018771372840500225, "loss": 11.666, "step": 22877 }, { "epoch": 0.4788997739261492, "grad_norm": 0.3234027624130249, "learning_rate": 0.00018771267544255457, "loss": 11.6718, "step": 22878 }, { "epoch": 0.4789207066901114, "grad_norm": 0.31843453645706177, "learning_rate": 0.00018771162243794165, "loss": 11.6771, "step": 22879 }, { "epoch": 0.47894163945407353, "grad_norm": 0.384534627199173, "learning_rate": 0.000187710569391164, "loss": 11.6765, "step": 22880 }, { "epoch": 0.4789625722180357, "grad_norm": 0.279366672039032, "learning_rate": 0.00018770951630222213, "loss": 11.6729, "step": 22881 }, { "epoch": 0.4789835049819978, "grad_norm": 0.3297729790210724, "learning_rate": 0.00018770846317111653, "loss": 11.676, "step": 22882 }, { "epoch": 0.47900443774595997, "grad_norm": 0.3389797806739807, "learning_rate": 0.0001877074099978477, "loss": 11.6881, "step": 22883 }, { "epoch": 0.4790253705099221, "grad_norm": 0.2971450984477997, "learning_rate": 0.00018770635678241616, "loss": 11.6853, "step": 22884 }, { "epoch": 0.4790463032738843, "grad_norm": 0.33006030321121216, "learning_rate": 0.00018770530352482244, "loss": 11.6611, "step": 22885 }, { "epoch": 0.47906723603784646, "grad_norm": 0.2329561412334442, "learning_rate": 0.000187704250225067, "loss": 11.6737, "step": 22886 }, { "epoch": 0.4790881688018086, "grad_norm": 0.2612408995628357, "learning_rate": 0.00018770319688315038, "loss": 11.6789, "step": 22887 }, { "epoch": 0.47910910156577075, "grad_norm": 0.3166901767253876, "learning_rate": 0.0001877021434990731, "loss": 11.6742, "step": 22888 }, { "epoch": 0.4791300343297329, "grad_norm": 0.39836856722831726, "learning_rate": 0.0001877010900728356, "loss": 11.6789, "step": 22889 }, { "epoch": 0.47915096709369503, "grad_norm": 0.27742648124694824, "learning_rate": 0.00018770003660443846, "loss": 11.6755, "step": 22890 }, { "epoch": 0.4791718998576572, "grad_norm": 0.27433517575263977, "learning_rate": 0.00018769898309388213, "loss": 11.6799, "step": 22891 }, { "epoch": 0.4791928326216194, "grad_norm": 0.30396896600723267, "learning_rate": 0.00018769792954116717, "loss": 11.6638, "step": 22892 }, { "epoch": 0.4792137653855815, "grad_norm": 0.25016480684280396, "learning_rate": 0.00018769687594629404, "loss": 11.6753, "step": 22893 }, { "epoch": 0.47923469814954367, "grad_norm": 0.40519267320632935, "learning_rate": 0.00018769582230926326, "loss": 11.6746, "step": 22894 }, { "epoch": 0.4792556309135058, "grad_norm": 0.2328868955373764, "learning_rate": 0.00018769476863007535, "loss": 11.6655, "step": 22895 }, { "epoch": 0.47927656367746796, "grad_norm": 0.34261608123779297, "learning_rate": 0.00018769371490873077, "loss": 11.6672, "step": 22896 }, { "epoch": 0.4792974964414301, "grad_norm": 0.346601665019989, "learning_rate": 0.0001876926611452301, "loss": 11.6735, "step": 22897 }, { "epoch": 0.4793184292053923, "grad_norm": 0.3254995346069336, "learning_rate": 0.00018769160733957382, "loss": 11.6714, "step": 22898 }, { "epoch": 0.47933936196935445, "grad_norm": 0.29254645109176636, "learning_rate": 0.00018769055349176245, "loss": 11.664, "step": 22899 }, { "epoch": 0.4793602947333166, "grad_norm": 0.25894472002983093, "learning_rate": 0.0001876894996017964, "loss": 11.6688, "step": 22900 }, { "epoch": 0.47938122749727874, "grad_norm": 3.106740713119507, "learning_rate": 0.0001876884456696763, "loss": 11.6939, "step": 22901 }, { "epoch": 0.4794021602612409, "grad_norm": 0.2884463369846344, "learning_rate": 0.0001876873916954026, "loss": 11.6786, "step": 22902 }, { "epoch": 0.47942309302520303, "grad_norm": 0.23738113045692444, "learning_rate": 0.0001876863376789758, "loss": 11.6734, "step": 22903 }, { "epoch": 0.47944402578916523, "grad_norm": 0.2936582565307617, "learning_rate": 0.00018768528362039644, "loss": 11.6729, "step": 22904 }, { "epoch": 0.4794649585531274, "grad_norm": 0.29239359498023987, "learning_rate": 0.00018768422951966497, "loss": 11.6821, "step": 22905 }, { "epoch": 0.4794858913170895, "grad_norm": 0.23878514766693115, "learning_rate": 0.00018768317537678198, "loss": 11.6812, "step": 22906 }, { "epoch": 0.47950682408105166, "grad_norm": 0.2660822868347168, "learning_rate": 0.0001876821211917479, "loss": 11.6791, "step": 22907 }, { "epoch": 0.4795277568450138, "grad_norm": 0.3469545841217041, "learning_rate": 0.00018768106696456327, "loss": 11.6834, "step": 22908 }, { "epoch": 0.47954868960897595, "grad_norm": 0.2737770080566406, "learning_rate": 0.0001876800126952286, "loss": 11.6495, "step": 22909 }, { "epoch": 0.4795696223729381, "grad_norm": 0.29657796025276184, "learning_rate": 0.0001876789583837444, "loss": 11.6748, "step": 22910 }, { "epoch": 0.4795905551369003, "grad_norm": 0.27485954761505127, "learning_rate": 0.00018767790403011113, "loss": 11.6798, "step": 22911 }, { "epoch": 0.47961148790086244, "grad_norm": 0.3258057236671448, "learning_rate": 0.0001876768496343294, "loss": 11.6563, "step": 22912 }, { "epoch": 0.4796324206648246, "grad_norm": 0.32663431763648987, "learning_rate": 0.0001876757951963996, "loss": 11.6772, "step": 22913 }, { "epoch": 0.47965335342878673, "grad_norm": 0.2920365333557129, "learning_rate": 0.00018767474071632232, "loss": 11.6874, "step": 22914 }, { "epoch": 0.4796742861927489, "grad_norm": 0.3010875880718231, "learning_rate": 0.000187673686194098, "loss": 11.6817, "step": 22915 }, { "epoch": 0.479695218956711, "grad_norm": 0.28749406337738037, "learning_rate": 0.0001876726316297272, "loss": 11.6855, "step": 22916 }, { "epoch": 0.4797161517206732, "grad_norm": 0.2463325411081314, "learning_rate": 0.00018767157702321042, "loss": 11.6728, "step": 22917 }, { "epoch": 0.47973708448463537, "grad_norm": 0.3207795321941376, "learning_rate": 0.00018767052237454813, "loss": 11.6669, "step": 22918 }, { "epoch": 0.4797580172485975, "grad_norm": 0.28140032291412354, "learning_rate": 0.0001876694676837409, "loss": 11.6806, "step": 22919 }, { "epoch": 0.47977895001255966, "grad_norm": 0.31233978271484375, "learning_rate": 0.00018766841295078917, "loss": 11.6618, "step": 22920 }, { "epoch": 0.4797998827765218, "grad_norm": 0.30931052565574646, "learning_rate": 0.00018766735817569348, "loss": 11.6939, "step": 22921 }, { "epoch": 0.47982081554048395, "grad_norm": 0.363038569688797, "learning_rate": 0.00018766630335845434, "loss": 11.6844, "step": 22922 }, { "epoch": 0.4798417483044461, "grad_norm": 0.2819781005382538, "learning_rate": 0.00018766524849907223, "loss": 11.673, "step": 22923 }, { "epoch": 0.4798626810684083, "grad_norm": 0.35694000124931335, "learning_rate": 0.00018766419359754773, "loss": 11.6865, "step": 22924 }, { "epoch": 0.47988361383237044, "grad_norm": 0.2716168463230133, "learning_rate": 0.00018766313865388123, "loss": 11.6855, "step": 22925 }, { "epoch": 0.4799045465963326, "grad_norm": 0.3470551073551178, "learning_rate": 0.00018766208366807337, "loss": 11.6824, "step": 22926 }, { "epoch": 0.4799254793602947, "grad_norm": 0.25019004940986633, "learning_rate": 0.00018766102864012452, "loss": 11.686, "step": 22927 }, { "epoch": 0.47994641212425687, "grad_norm": 0.2837730348110199, "learning_rate": 0.0001876599735700353, "loss": 11.6765, "step": 22928 }, { "epoch": 0.479967344888219, "grad_norm": 0.25512489676475525, "learning_rate": 0.0001876589184578062, "loss": 11.6795, "step": 22929 }, { "epoch": 0.4799882776521812, "grad_norm": 0.33718761801719666, "learning_rate": 0.00018765786330343764, "loss": 11.6826, "step": 22930 }, { "epoch": 0.48000921041614336, "grad_norm": 0.3015182316303253, "learning_rate": 0.00018765680810693022, "loss": 11.679, "step": 22931 }, { "epoch": 0.4800301431801055, "grad_norm": 0.36769285798072815, "learning_rate": 0.00018765575286828443, "loss": 11.6918, "step": 22932 }, { "epoch": 0.48005107594406765, "grad_norm": 0.3371788561344147, "learning_rate": 0.00018765469758750075, "loss": 11.6804, "step": 22933 }, { "epoch": 0.4800720087080298, "grad_norm": 0.279013067483902, "learning_rate": 0.00018765364226457968, "loss": 11.6694, "step": 22934 }, { "epoch": 0.48009294147199194, "grad_norm": 0.31777986884117126, "learning_rate": 0.00018765258689952178, "loss": 11.6744, "step": 22935 }, { "epoch": 0.48011387423595414, "grad_norm": 0.27937042713165283, "learning_rate": 0.0001876515314923275, "loss": 11.6778, "step": 22936 }, { "epoch": 0.4801348069999163, "grad_norm": 0.33150964975357056, "learning_rate": 0.00018765047604299737, "loss": 11.6716, "step": 22937 }, { "epoch": 0.48015573976387843, "grad_norm": 0.26746582984924316, "learning_rate": 0.00018764942055153193, "loss": 11.6553, "step": 22938 }, { "epoch": 0.4801766725278406, "grad_norm": 0.29601922631263733, "learning_rate": 0.00018764836501793163, "loss": 11.6804, "step": 22939 }, { "epoch": 0.4801976052918027, "grad_norm": 0.2718822956085205, "learning_rate": 0.00018764730944219698, "loss": 11.6594, "step": 22940 }, { "epoch": 0.48021853805576487, "grad_norm": 0.33329498767852783, "learning_rate": 0.00018764625382432858, "loss": 11.678, "step": 22941 }, { "epoch": 0.480239470819727, "grad_norm": 0.3310244083404541, "learning_rate": 0.0001876451981643268, "loss": 11.6888, "step": 22942 }, { "epoch": 0.4802604035836892, "grad_norm": 0.3349851965904236, "learning_rate": 0.00018764414246219224, "loss": 11.685, "step": 22943 }, { "epoch": 0.48028133634765136, "grad_norm": 0.29203447699546814, "learning_rate": 0.0001876430867179254, "loss": 11.6842, "step": 22944 }, { "epoch": 0.4803022691116135, "grad_norm": 0.3080912232398987, "learning_rate": 0.00018764203093152678, "loss": 11.6655, "step": 22945 }, { "epoch": 0.48032320187557564, "grad_norm": 0.24919283390045166, "learning_rate": 0.00018764097510299684, "loss": 11.6675, "step": 22946 }, { "epoch": 0.4803441346395378, "grad_norm": 0.23795345425605774, "learning_rate": 0.00018763991923233616, "loss": 11.6671, "step": 22947 }, { "epoch": 0.48036506740349993, "grad_norm": 0.31412866711616516, "learning_rate": 0.0001876388633195452, "loss": 11.6603, "step": 22948 }, { "epoch": 0.48038600016746213, "grad_norm": 0.3471277952194214, "learning_rate": 0.0001876378073646245, "loss": 11.6609, "step": 22949 }, { "epoch": 0.4804069329314243, "grad_norm": 0.2670344114303589, "learning_rate": 0.00018763675136757453, "loss": 11.674, "step": 22950 }, { "epoch": 0.4804278656953864, "grad_norm": 0.33898159861564636, "learning_rate": 0.00018763569532839584, "loss": 11.6588, "step": 22951 }, { "epoch": 0.48044879845934857, "grad_norm": 0.2816368341445923, "learning_rate": 0.0001876346392470889, "loss": 11.6982, "step": 22952 }, { "epoch": 0.4804697312233107, "grad_norm": 0.2087712287902832, "learning_rate": 0.00018763358312365422, "loss": 11.6796, "step": 22953 }, { "epoch": 0.48049066398727286, "grad_norm": 0.35190531611442566, "learning_rate": 0.00018763252695809234, "loss": 11.6813, "step": 22954 }, { "epoch": 0.48051159675123506, "grad_norm": 0.3878549635410309, "learning_rate": 0.00018763147075040375, "loss": 11.6757, "step": 22955 }, { "epoch": 0.4805325295151972, "grad_norm": 0.28003886342048645, "learning_rate": 0.00018763041450058893, "loss": 11.678, "step": 22956 }, { "epoch": 0.48055346227915935, "grad_norm": 0.2939552962779999, "learning_rate": 0.00018762935820864846, "loss": 11.681, "step": 22957 }, { "epoch": 0.4805743950431215, "grad_norm": 0.24232827126979828, "learning_rate": 0.00018762830187458274, "loss": 11.6846, "step": 22958 }, { "epoch": 0.48059532780708364, "grad_norm": 0.3876703679561615, "learning_rate": 0.00018762724549839236, "loss": 11.668, "step": 22959 }, { "epoch": 0.4806162605710458, "grad_norm": 0.2745521068572998, "learning_rate": 0.00018762618908007786, "loss": 11.6735, "step": 22960 }, { "epoch": 0.48063719333500793, "grad_norm": 0.3426967263221741, "learning_rate": 0.00018762513261963963, "loss": 11.6673, "step": 22961 }, { "epoch": 0.48065812609897013, "grad_norm": 0.27161192893981934, "learning_rate": 0.00018762407611707828, "loss": 11.6664, "step": 22962 }, { "epoch": 0.4806790588629323, "grad_norm": 0.30869290232658386, "learning_rate": 0.00018762301957239426, "loss": 11.6885, "step": 22963 }, { "epoch": 0.4806999916268944, "grad_norm": 0.2793419063091278, "learning_rate": 0.0001876219629855881, "loss": 11.6951, "step": 22964 }, { "epoch": 0.48072092439085656, "grad_norm": 0.24176126718521118, "learning_rate": 0.0001876209063566603, "loss": 11.6927, "step": 22965 }, { "epoch": 0.4807418571548187, "grad_norm": 0.26498204469680786, "learning_rate": 0.0001876198496856114, "loss": 11.6577, "step": 22966 }, { "epoch": 0.48076278991878085, "grad_norm": 0.4216649532318115, "learning_rate": 0.00018761879297244188, "loss": 11.7008, "step": 22967 }, { "epoch": 0.48078372268274305, "grad_norm": 0.27547481656074524, "learning_rate": 0.0001876177362171522, "loss": 11.6624, "step": 22968 }, { "epoch": 0.4808046554467052, "grad_norm": 0.24717459082603455, "learning_rate": 0.00018761667941974297, "loss": 11.692, "step": 22969 }, { "epoch": 0.48082558821066734, "grad_norm": 0.26064029335975647, "learning_rate": 0.00018761562258021462, "loss": 11.6645, "step": 22970 }, { "epoch": 0.4808465209746295, "grad_norm": 0.3299933671951294, "learning_rate": 0.00018761456569856772, "loss": 11.6717, "step": 22971 }, { "epoch": 0.48086745373859163, "grad_norm": 0.24343962967395782, "learning_rate": 0.00018761350877480272, "loss": 11.6606, "step": 22972 }, { "epoch": 0.4808883865025538, "grad_norm": 0.21688544750213623, "learning_rate": 0.00018761245180892014, "loss": 11.6673, "step": 22973 }, { "epoch": 0.480909319266516, "grad_norm": 0.2686517536640167, "learning_rate": 0.00018761139480092052, "loss": 11.6724, "step": 22974 }, { "epoch": 0.4809302520304781, "grad_norm": 0.2518264353275299, "learning_rate": 0.00018761033775080432, "loss": 11.6642, "step": 22975 }, { "epoch": 0.48095118479444027, "grad_norm": 0.3601730167865753, "learning_rate": 0.00018760928065857208, "loss": 11.664, "step": 22976 }, { "epoch": 0.4809721175584024, "grad_norm": 0.2507925033569336, "learning_rate": 0.00018760822352422433, "loss": 11.6732, "step": 22977 }, { "epoch": 0.48099305032236456, "grad_norm": 0.24548570811748505, "learning_rate": 0.00018760716634776156, "loss": 11.6823, "step": 22978 }, { "epoch": 0.4810139830863267, "grad_norm": 0.23946520686149597, "learning_rate": 0.00018760610912918422, "loss": 11.6611, "step": 22979 }, { "epoch": 0.48103491585028885, "grad_norm": 0.21527762711048126, "learning_rate": 0.00018760505186849288, "loss": 11.6578, "step": 22980 }, { "epoch": 0.48105584861425105, "grad_norm": 0.3158716857433319, "learning_rate": 0.00018760399456568805, "loss": 11.6818, "step": 22981 }, { "epoch": 0.4810767813782132, "grad_norm": 0.25679129362106323, "learning_rate": 0.00018760293722077024, "loss": 11.673, "step": 22982 }, { "epoch": 0.48109771414217534, "grad_norm": 0.23238727450370789, "learning_rate": 0.0001876018798337399, "loss": 11.6718, "step": 22983 }, { "epoch": 0.4811186469061375, "grad_norm": 0.25827908515930176, "learning_rate": 0.0001876008224045976, "loss": 11.6619, "step": 22984 }, { "epoch": 0.4811395796700996, "grad_norm": 0.29198747873306274, "learning_rate": 0.00018759976493334387, "loss": 11.658, "step": 22985 }, { "epoch": 0.48116051243406177, "grad_norm": 0.27587220072746277, "learning_rate": 0.00018759870741997912, "loss": 11.6609, "step": 22986 }, { "epoch": 0.48118144519802397, "grad_norm": 0.30159351229667664, "learning_rate": 0.00018759764986450394, "loss": 11.6764, "step": 22987 }, { "epoch": 0.4812023779619861, "grad_norm": 0.26314568519592285, "learning_rate": 0.0001875965922669188, "loss": 11.6691, "step": 22988 }, { "epoch": 0.48122331072594826, "grad_norm": 0.3063417673110962, "learning_rate": 0.00018759553462722425, "loss": 11.6819, "step": 22989 }, { "epoch": 0.4812442434899104, "grad_norm": 0.40678104758262634, "learning_rate": 0.00018759447694542076, "loss": 11.6742, "step": 22990 }, { "epoch": 0.48126517625387255, "grad_norm": 0.3474055826663971, "learning_rate": 0.00018759341922150883, "loss": 11.6606, "step": 22991 }, { "epoch": 0.4812861090178347, "grad_norm": 0.26774877309799194, "learning_rate": 0.000187592361455489, "loss": 11.6698, "step": 22992 }, { "epoch": 0.4813070417817969, "grad_norm": 0.27860212326049805, "learning_rate": 0.00018759130364736176, "loss": 11.6741, "step": 22993 }, { "epoch": 0.48132797454575904, "grad_norm": 0.28775691986083984, "learning_rate": 0.00018759024579712764, "loss": 11.6555, "step": 22994 }, { "epoch": 0.4813489073097212, "grad_norm": 0.4013303816318512, "learning_rate": 0.00018758918790478716, "loss": 11.6828, "step": 22995 }, { "epoch": 0.48136984007368333, "grad_norm": 0.31751549243927, "learning_rate": 0.00018758812997034076, "loss": 11.6862, "step": 22996 }, { "epoch": 0.4813907728376455, "grad_norm": 0.2758055031299591, "learning_rate": 0.000187587071993789, "loss": 11.6727, "step": 22997 }, { "epoch": 0.4814117056016076, "grad_norm": 0.3932637572288513, "learning_rate": 0.0001875860139751324, "loss": 11.6706, "step": 22998 }, { "epoch": 0.48143263836556976, "grad_norm": 0.42599228024482727, "learning_rate": 0.00018758495591437142, "loss": 11.679, "step": 22999 }, { "epoch": 0.48145357112953197, "grad_norm": 0.31440502405166626, "learning_rate": 0.00018758389781150657, "loss": 11.6759, "step": 23000 }, { "epoch": 0.48145357112953197, "eval_loss": 11.672798156738281, "eval_runtime": 34.4029, "eval_samples_per_second": 27.934, "eval_steps_per_second": 7.005, "step": 23000 }, { "epoch": 0.4814745038934941, "grad_norm": 0.3967822194099426, "learning_rate": 0.00018758283966653844, "loss": 11.6753, "step": 23001 }, { "epoch": 0.48149543665745625, "grad_norm": 0.2641587555408478, "learning_rate": 0.00018758178147946746, "loss": 11.6727, "step": 23002 }, { "epoch": 0.4815163694214184, "grad_norm": 0.26223570108413696, "learning_rate": 0.00018758072325029417, "loss": 11.6753, "step": 23003 }, { "epoch": 0.48153730218538054, "grad_norm": 0.31822001934051514, "learning_rate": 0.00018757966497901906, "loss": 11.6839, "step": 23004 }, { "epoch": 0.4815582349493427, "grad_norm": 0.37281671166419983, "learning_rate": 0.00018757860666564267, "loss": 11.6819, "step": 23005 }, { "epoch": 0.4815791677133049, "grad_norm": 0.3594178259372711, "learning_rate": 0.00018757754831016547, "loss": 11.6722, "step": 23006 }, { "epoch": 0.48160010047726703, "grad_norm": 0.24617734551429749, "learning_rate": 0.000187576489912588, "loss": 11.6634, "step": 23007 }, { "epoch": 0.4816210332412292, "grad_norm": 0.3249469995498657, "learning_rate": 0.00018757543147291075, "loss": 11.6718, "step": 23008 }, { "epoch": 0.4816419660051913, "grad_norm": 0.2617143988609314, "learning_rate": 0.00018757437299113424, "loss": 11.6601, "step": 23009 }, { "epoch": 0.48166289876915347, "grad_norm": 0.3413342535495758, "learning_rate": 0.00018757331446725894, "loss": 11.6812, "step": 23010 }, { "epoch": 0.4816838315331156, "grad_norm": 0.26189273595809937, "learning_rate": 0.00018757225590128543, "loss": 11.6739, "step": 23011 }, { "epoch": 0.48170476429707776, "grad_norm": 0.30586203932762146, "learning_rate": 0.00018757119729321417, "loss": 11.6561, "step": 23012 }, { "epoch": 0.48172569706103996, "grad_norm": 0.2985815405845642, "learning_rate": 0.0001875701386430457, "loss": 11.6868, "step": 23013 }, { "epoch": 0.4817466298250021, "grad_norm": 0.2540142238140106, "learning_rate": 0.00018756907995078049, "loss": 11.6724, "step": 23014 }, { "epoch": 0.48176756258896425, "grad_norm": 0.32475006580352783, "learning_rate": 0.00018756802121641907, "loss": 11.6548, "step": 23015 }, { "epoch": 0.4817884953529264, "grad_norm": 0.29097166657447815, "learning_rate": 0.00018756696243996194, "loss": 11.6646, "step": 23016 }, { "epoch": 0.48180942811688854, "grad_norm": 0.3642539381980896, "learning_rate": 0.00018756590362140966, "loss": 11.6768, "step": 23017 }, { "epoch": 0.4818303608808507, "grad_norm": 0.26741519570350647, "learning_rate": 0.00018756484476076263, "loss": 11.6798, "step": 23018 }, { "epoch": 0.4818512936448129, "grad_norm": 0.3608637750148773, "learning_rate": 0.00018756378585802145, "loss": 11.6511, "step": 23019 }, { "epoch": 0.48187222640877503, "grad_norm": 0.4394356310367584, "learning_rate": 0.0001875627269131866, "loss": 11.6694, "step": 23020 }, { "epoch": 0.4818931591727372, "grad_norm": 0.2791418433189392, "learning_rate": 0.00018756166792625862, "loss": 11.674, "step": 23021 }, { "epoch": 0.4819140919366993, "grad_norm": 0.2709904611110687, "learning_rate": 0.00018756060889723797, "loss": 11.6749, "step": 23022 }, { "epoch": 0.48193502470066146, "grad_norm": 0.25002017617225647, "learning_rate": 0.00018755954982612515, "loss": 11.674, "step": 23023 }, { "epoch": 0.4819559574646236, "grad_norm": 0.4033272862434387, "learning_rate": 0.00018755849071292076, "loss": 11.6769, "step": 23024 }, { "epoch": 0.4819768902285858, "grad_norm": 0.3371627628803253, "learning_rate": 0.00018755743155762523, "loss": 11.6891, "step": 23025 }, { "epoch": 0.48199782299254795, "grad_norm": 0.3436896502971649, "learning_rate": 0.00018755637236023907, "loss": 11.6605, "step": 23026 }, { "epoch": 0.4820187557565101, "grad_norm": 0.4234274923801422, "learning_rate": 0.0001875553131207628, "loss": 11.69, "step": 23027 }, { "epoch": 0.48203968852047224, "grad_norm": 0.3344680368900299, "learning_rate": 0.00018755425383919696, "loss": 11.6704, "step": 23028 }, { "epoch": 0.4820606212844344, "grad_norm": 0.3383749723434448, "learning_rate": 0.00018755319451554202, "loss": 11.6767, "step": 23029 }, { "epoch": 0.48208155404839653, "grad_norm": 0.3451346457004547, "learning_rate": 0.00018755213514979853, "loss": 11.6774, "step": 23030 }, { "epoch": 0.4821024868123587, "grad_norm": 0.22452975809574127, "learning_rate": 0.00018755107574196695, "loss": 11.6707, "step": 23031 }, { "epoch": 0.4821234195763209, "grad_norm": 0.2967364192008972, "learning_rate": 0.00018755001629204782, "loss": 11.6714, "step": 23032 }, { "epoch": 0.482144352340283, "grad_norm": 0.29965680837631226, "learning_rate": 0.00018754895680004165, "loss": 11.6897, "step": 23033 }, { "epoch": 0.48216528510424517, "grad_norm": 0.29322755336761475, "learning_rate": 0.00018754789726594892, "loss": 11.6962, "step": 23034 }, { "epoch": 0.4821862178682073, "grad_norm": 0.2621724009513855, "learning_rate": 0.00018754683768977017, "loss": 11.6875, "step": 23035 }, { "epoch": 0.48220715063216946, "grad_norm": 0.29039686918258667, "learning_rate": 0.00018754577807150594, "loss": 11.6642, "step": 23036 }, { "epoch": 0.4822280833961316, "grad_norm": 0.3323022425174713, "learning_rate": 0.00018754471841115665, "loss": 11.6684, "step": 23037 }, { "epoch": 0.4822490161600938, "grad_norm": 0.2616903781890869, "learning_rate": 0.00018754365870872288, "loss": 11.6719, "step": 23038 }, { "epoch": 0.48226994892405595, "grad_norm": 0.258184552192688, "learning_rate": 0.0001875425989642051, "loss": 11.684, "step": 23039 }, { "epoch": 0.4822908816880181, "grad_norm": 0.3713914155960083, "learning_rate": 0.00018754153917760388, "loss": 11.6717, "step": 23040 }, { "epoch": 0.48231181445198024, "grad_norm": 0.2787509560585022, "learning_rate": 0.00018754047934891962, "loss": 11.6772, "step": 23041 }, { "epoch": 0.4823327472159424, "grad_norm": 0.3206593990325928, "learning_rate": 0.00018753941947815297, "loss": 11.6644, "step": 23042 }, { "epoch": 0.4823536799799045, "grad_norm": 0.2644414007663727, "learning_rate": 0.00018753835956530432, "loss": 11.6745, "step": 23043 }, { "epoch": 0.4823746127438667, "grad_norm": 0.33867576718330383, "learning_rate": 0.00018753729961037423, "loss": 11.6892, "step": 23044 }, { "epoch": 0.48239554550782887, "grad_norm": 0.29328858852386475, "learning_rate": 0.00018753623961336322, "loss": 11.6728, "step": 23045 }, { "epoch": 0.482416478271791, "grad_norm": 0.3002723753452301, "learning_rate": 0.00018753517957427178, "loss": 11.673, "step": 23046 }, { "epoch": 0.48243741103575316, "grad_norm": 0.3309028148651123, "learning_rate": 0.00018753411949310044, "loss": 11.6637, "step": 23047 }, { "epoch": 0.4824583437997153, "grad_norm": 0.27256277203559875, "learning_rate": 0.00018753305936984965, "loss": 11.6702, "step": 23048 }, { "epoch": 0.48247927656367745, "grad_norm": 0.29581496119499207, "learning_rate": 0.00018753199920452002, "loss": 11.6781, "step": 23049 }, { "epoch": 0.4825002093276396, "grad_norm": 0.3055002987384796, "learning_rate": 0.000187530938997112, "loss": 11.6814, "step": 23050 }, { "epoch": 0.4825211420916018, "grad_norm": 0.263152539730072, "learning_rate": 0.00018752987874762607, "loss": 11.67, "step": 23051 }, { "epoch": 0.48254207485556394, "grad_norm": 0.3274497091770172, "learning_rate": 0.00018752881845606278, "loss": 11.6736, "step": 23052 }, { "epoch": 0.4825630076195261, "grad_norm": 0.3666684925556183, "learning_rate": 0.00018752775812242264, "loss": 11.6769, "step": 23053 }, { "epoch": 0.48258394038348823, "grad_norm": 0.2694213390350342, "learning_rate": 0.00018752669774670615, "loss": 11.6567, "step": 23054 }, { "epoch": 0.4826048731474504, "grad_norm": 0.24750488996505737, "learning_rate": 0.0001875256373289138, "loss": 11.6735, "step": 23055 }, { "epoch": 0.4826258059114125, "grad_norm": 0.2841242849826813, "learning_rate": 0.00018752457686904616, "loss": 11.6694, "step": 23056 }, { "epoch": 0.4826467386753747, "grad_norm": 0.2900471091270447, "learning_rate": 0.0001875235163671037, "loss": 11.6595, "step": 23057 }, { "epoch": 0.48266767143933687, "grad_norm": 0.22793562710285187, "learning_rate": 0.00018752245582308693, "loss": 11.6739, "step": 23058 }, { "epoch": 0.482688604203299, "grad_norm": 0.3210887014865875, "learning_rate": 0.00018752139523699635, "loss": 11.6794, "step": 23059 }, { "epoch": 0.48270953696726115, "grad_norm": 0.2683303654193878, "learning_rate": 0.00018752033460883247, "loss": 11.664, "step": 23060 }, { "epoch": 0.4827304697312233, "grad_norm": 0.3083636164665222, "learning_rate": 0.00018751927393859582, "loss": 11.6763, "step": 23061 }, { "epoch": 0.48275140249518544, "grad_norm": 0.24380841851234436, "learning_rate": 0.0001875182132262869, "loss": 11.6662, "step": 23062 }, { "epoch": 0.48277233525914764, "grad_norm": 0.24650432169437408, "learning_rate": 0.00018751715247190625, "loss": 11.6743, "step": 23063 }, { "epoch": 0.4827932680231098, "grad_norm": 0.25101035833358765, "learning_rate": 0.00018751609167545432, "loss": 11.6623, "step": 23064 }, { "epoch": 0.48281420078707193, "grad_norm": 0.278716504573822, "learning_rate": 0.00018751503083693164, "loss": 11.659, "step": 23065 }, { "epoch": 0.4828351335510341, "grad_norm": 0.3107578754425049, "learning_rate": 0.00018751396995633876, "loss": 11.6642, "step": 23066 }, { "epoch": 0.4828560663149962, "grad_norm": 0.23901303112506866, "learning_rate": 0.00018751290903367616, "loss": 11.6766, "step": 23067 }, { "epoch": 0.48287699907895837, "grad_norm": 0.31713297963142395, "learning_rate": 0.00018751184806894432, "loss": 11.6737, "step": 23068 }, { "epoch": 0.4828979318429205, "grad_norm": 0.30002808570861816, "learning_rate": 0.0001875107870621438, "loss": 11.6705, "step": 23069 }, { "epoch": 0.4829188646068827, "grad_norm": 0.24150487780570984, "learning_rate": 0.00018750972601327512, "loss": 11.685, "step": 23070 }, { "epoch": 0.48293979737084486, "grad_norm": 0.28712376952171326, "learning_rate": 0.00018750866492233873, "loss": 11.6681, "step": 23071 }, { "epoch": 0.482960730134807, "grad_norm": 0.34567609429359436, "learning_rate": 0.00018750760378933516, "loss": 11.6643, "step": 23072 }, { "epoch": 0.48298166289876915, "grad_norm": 0.3981100916862488, "learning_rate": 0.00018750654261426495, "loss": 11.6595, "step": 23073 }, { "epoch": 0.4830025956627313, "grad_norm": 0.27906909584999084, "learning_rate": 0.00018750548139712857, "loss": 11.6671, "step": 23074 }, { "epoch": 0.48302352842669344, "grad_norm": 0.37683242559432983, "learning_rate": 0.00018750442013792658, "loss": 11.6926, "step": 23075 }, { "epoch": 0.48304446119065564, "grad_norm": 0.4598816931247711, "learning_rate": 0.00018750335883665947, "loss": 11.6758, "step": 23076 }, { "epoch": 0.4830653939546178, "grad_norm": 0.26931139826774597, "learning_rate": 0.0001875022974933277, "loss": 11.6673, "step": 23077 }, { "epoch": 0.48308632671857993, "grad_norm": 0.44010981917381287, "learning_rate": 0.00018750123610793184, "loss": 11.69, "step": 23078 }, { "epoch": 0.4831072594825421, "grad_norm": 0.283617228269577, "learning_rate": 0.0001875001746804724, "loss": 11.6573, "step": 23079 }, { "epoch": 0.4831281922465042, "grad_norm": 0.2505033612251282, "learning_rate": 0.00018749911321094985, "loss": 11.6832, "step": 23080 }, { "epoch": 0.48314912501046636, "grad_norm": 0.30119073390960693, "learning_rate": 0.00018749805169936473, "loss": 11.6842, "step": 23081 }, { "epoch": 0.48317005777442856, "grad_norm": 0.4006045460700989, "learning_rate": 0.00018749699014571755, "loss": 11.677, "step": 23082 }, { "epoch": 0.4831909905383907, "grad_norm": 0.2691454291343689, "learning_rate": 0.0001874959285500088, "loss": 11.6648, "step": 23083 }, { "epoch": 0.48321192330235285, "grad_norm": 0.3191007673740387, "learning_rate": 0.00018749486691223903, "loss": 11.6646, "step": 23084 }, { "epoch": 0.483232856066315, "grad_norm": 0.31958630681037903, "learning_rate": 0.0001874938052324087, "loss": 11.679, "step": 23085 }, { "epoch": 0.48325378883027714, "grad_norm": 0.3327661454677582, "learning_rate": 0.00018749274351051836, "loss": 11.6784, "step": 23086 }, { "epoch": 0.4832747215942393, "grad_norm": 0.3391300439834595, "learning_rate": 0.00018749168174656848, "loss": 11.6853, "step": 23087 }, { "epoch": 0.48329565435820143, "grad_norm": 0.35299447178840637, "learning_rate": 0.00018749061994055962, "loss": 11.6828, "step": 23088 }, { "epoch": 0.48331658712216363, "grad_norm": 0.3680001199245453, "learning_rate": 0.00018748955809249224, "loss": 11.6891, "step": 23089 }, { "epoch": 0.4833375198861258, "grad_norm": 0.2789643108844757, "learning_rate": 0.0001874884962023669, "loss": 11.6763, "step": 23090 }, { "epoch": 0.4833584526500879, "grad_norm": 0.38926631212234497, "learning_rate": 0.00018748743427018407, "loss": 11.695, "step": 23091 }, { "epoch": 0.48337938541405007, "grad_norm": 0.2865353524684906, "learning_rate": 0.0001874863722959443, "loss": 11.6725, "step": 23092 }, { "epoch": 0.4834003181780122, "grad_norm": 0.3100188672542572, "learning_rate": 0.00018748531027964807, "loss": 11.6791, "step": 23093 }, { "epoch": 0.48342125094197436, "grad_norm": 0.2953590452671051, "learning_rate": 0.00018748424822129586, "loss": 11.6837, "step": 23094 }, { "epoch": 0.48344218370593656, "grad_norm": 0.23614263534545898, "learning_rate": 0.00018748318612088824, "loss": 11.6745, "step": 23095 }, { "epoch": 0.4834631164698987, "grad_norm": 0.2760331332683563, "learning_rate": 0.00018748212397842572, "loss": 11.6765, "step": 23096 }, { "epoch": 0.48348404923386085, "grad_norm": 0.31000372767448425, "learning_rate": 0.00018748106179390878, "loss": 11.6609, "step": 23097 }, { "epoch": 0.483504981997823, "grad_norm": 0.2777958810329437, "learning_rate": 0.00018747999956733794, "loss": 11.6663, "step": 23098 }, { "epoch": 0.48352591476178514, "grad_norm": 0.3149730861186981, "learning_rate": 0.0001874789372987137, "loss": 11.6642, "step": 23099 }, { "epoch": 0.4835468475257473, "grad_norm": 0.2688911557197571, "learning_rate": 0.00018747787498803658, "loss": 11.6687, "step": 23100 }, { "epoch": 0.4835677802897094, "grad_norm": 0.29501596093177795, "learning_rate": 0.00018747681263530711, "loss": 11.6737, "step": 23101 }, { "epoch": 0.4835887130536716, "grad_norm": 0.21984729170799255, "learning_rate": 0.00018747575024052578, "loss": 11.6692, "step": 23102 }, { "epoch": 0.48360964581763377, "grad_norm": 0.35762616991996765, "learning_rate": 0.00018747468780369312, "loss": 11.6833, "step": 23103 }, { "epoch": 0.4836305785815959, "grad_norm": 0.3059385120868683, "learning_rate": 0.0001874736253248096, "loss": 11.6759, "step": 23104 }, { "epoch": 0.48365151134555806, "grad_norm": 0.2707158029079437, "learning_rate": 0.00018747256280387577, "loss": 11.6665, "step": 23105 }, { "epoch": 0.4836724441095202, "grad_norm": 0.2835439145565033, "learning_rate": 0.00018747150024089212, "loss": 11.6791, "step": 23106 }, { "epoch": 0.48369337687348235, "grad_norm": 0.27284175157546997, "learning_rate": 0.00018747043763585913, "loss": 11.6619, "step": 23107 }, { "epoch": 0.48371430963744455, "grad_norm": 0.3349902629852295, "learning_rate": 0.00018746937498877742, "loss": 11.675, "step": 23108 }, { "epoch": 0.4837352424014067, "grad_norm": 0.3633590638637543, "learning_rate": 0.0001874683122996474, "loss": 11.6712, "step": 23109 }, { "epoch": 0.48375617516536884, "grad_norm": 0.2433118373155594, "learning_rate": 0.0001874672495684696, "loss": 11.6646, "step": 23110 }, { "epoch": 0.483777107929331, "grad_norm": 0.3115747570991516, "learning_rate": 0.0001874661867952445, "loss": 11.6871, "step": 23111 }, { "epoch": 0.48379804069329313, "grad_norm": 0.24317947030067444, "learning_rate": 0.0001874651239799727, "loss": 11.6717, "step": 23112 }, { "epoch": 0.4838189734572553, "grad_norm": 0.31884872913360596, "learning_rate": 0.00018746406112265466, "loss": 11.6673, "step": 23113 }, { "epoch": 0.4838399062212175, "grad_norm": 0.26086243987083435, "learning_rate": 0.0001874629982232909, "loss": 11.6568, "step": 23114 }, { "epoch": 0.4838608389851796, "grad_norm": 0.255472332239151, "learning_rate": 0.00018746193528188193, "loss": 11.6793, "step": 23115 }, { "epoch": 0.48388177174914176, "grad_norm": 0.30459898710250854, "learning_rate": 0.0001874608722984282, "loss": 11.6607, "step": 23116 }, { "epoch": 0.4839027045131039, "grad_norm": 0.27715349197387695, "learning_rate": 0.0001874598092729303, "loss": 11.6707, "step": 23117 }, { "epoch": 0.48392363727706605, "grad_norm": 0.2970345914363861, "learning_rate": 0.00018745874620538876, "loss": 11.664, "step": 23118 }, { "epoch": 0.4839445700410282, "grad_norm": 0.3179298937320709, "learning_rate": 0.000187457683095804, "loss": 11.6598, "step": 23119 }, { "epoch": 0.48396550280499034, "grad_norm": 0.272522896528244, "learning_rate": 0.0001874566199441766, "loss": 11.6549, "step": 23120 }, { "epoch": 0.48398643556895254, "grad_norm": 0.283446729183197, "learning_rate": 0.00018745555675050705, "loss": 11.6926, "step": 23121 }, { "epoch": 0.4840073683329147, "grad_norm": 0.2936835289001465, "learning_rate": 0.00018745449351479585, "loss": 11.6609, "step": 23122 }, { "epoch": 0.48402830109687683, "grad_norm": 0.2623324990272522, "learning_rate": 0.00018745343023704352, "loss": 11.6737, "step": 23123 }, { "epoch": 0.484049233860839, "grad_norm": 0.2978895306587219, "learning_rate": 0.00018745236691725058, "loss": 11.6772, "step": 23124 }, { "epoch": 0.4840701666248011, "grad_norm": 0.25843545794487, "learning_rate": 0.00018745130355541756, "loss": 11.6696, "step": 23125 }, { "epoch": 0.48409109938876327, "grad_norm": 0.338157594203949, "learning_rate": 0.00018745024015154492, "loss": 11.6732, "step": 23126 }, { "epoch": 0.48411203215272547, "grad_norm": 0.2872112989425659, "learning_rate": 0.00018744917670563319, "loss": 11.6777, "step": 23127 }, { "epoch": 0.4841329649166876, "grad_norm": 0.23547329008579254, "learning_rate": 0.0001874481132176829, "loss": 11.6738, "step": 23128 }, { "epoch": 0.48415389768064976, "grad_norm": 0.3119536340236664, "learning_rate": 0.00018744704968769456, "loss": 11.6752, "step": 23129 }, { "epoch": 0.4841748304446119, "grad_norm": 0.3486892580986023, "learning_rate": 0.00018744598611566865, "loss": 11.6778, "step": 23130 }, { "epoch": 0.48419576320857405, "grad_norm": 0.2539327144622803, "learning_rate": 0.0001874449225016057, "loss": 11.6484, "step": 23131 }, { "epoch": 0.4842166959725362, "grad_norm": 0.28497618436813354, "learning_rate": 0.00018744385884550624, "loss": 11.6793, "step": 23132 }, { "epoch": 0.4842376287364984, "grad_norm": 0.3232634365558624, "learning_rate": 0.00018744279514737076, "loss": 11.6824, "step": 23133 }, { "epoch": 0.48425856150046054, "grad_norm": 0.3534935712814331, "learning_rate": 0.00018744173140719975, "loss": 11.6628, "step": 23134 }, { "epoch": 0.4842794942644227, "grad_norm": 0.3166423439979553, "learning_rate": 0.0001874406676249938, "loss": 11.6664, "step": 23135 }, { "epoch": 0.48430042702838483, "grad_norm": 0.321380078792572, "learning_rate": 0.0001874396038007533, "loss": 11.6745, "step": 23136 }, { "epoch": 0.484321359792347, "grad_norm": 0.2813299298286438, "learning_rate": 0.0001874385399344789, "loss": 11.6662, "step": 23137 }, { "epoch": 0.4843422925563091, "grad_norm": 0.3138861060142517, "learning_rate": 0.000187437476026171, "loss": 11.6809, "step": 23138 }, { "epoch": 0.48436322532027126, "grad_norm": 0.8177711367607117, "learning_rate": 0.0001874364120758302, "loss": 11.6053, "step": 23139 }, { "epoch": 0.48438415808423346, "grad_norm": 0.3262370228767395, "learning_rate": 0.0001874353480834569, "loss": 11.6758, "step": 23140 }, { "epoch": 0.4844050908481956, "grad_norm": 0.27061739563941956, "learning_rate": 0.00018743428404905172, "loss": 11.6611, "step": 23141 }, { "epoch": 0.48442602361215775, "grad_norm": 0.24849267303943634, "learning_rate": 0.0001874332199726151, "loss": 11.6703, "step": 23142 }, { "epoch": 0.4844469563761199, "grad_norm": 0.31252819299697876, "learning_rate": 0.00018743215585414758, "loss": 11.6651, "step": 23143 }, { "epoch": 0.48446788914008204, "grad_norm": 0.30267226696014404, "learning_rate": 0.0001874310916936497, "loss": 11.6644, "step": 23144 }, { "epoch": 0.4844888219040442, "grad_norm": 0.31827038526535034, "learning_rate": 0.00018743002749112194, "loss": 11.6898, "step": 23145 }, { "epoch": 0.4845097546680064, "grad_norm": 0.2740052044391632, "learning_rate": 0.00018742896324656477, "loss": 11.6615, "step": 23146 }, { "epoch": 0.48453068743196853, "grad_norm": 0.24855278432369232, "learning_rate": 0.0001874278989599788, "loss": 11.665, "step": 23147 }, { "epoch": 0.4845516201959307, "grad_norm": 0.36598366498947144, "learning_rate": 0.00018742683463136446, "loss": 11.6842, "step": 23148 }, { "epoch": 0.4845725529598928, "grad_norm": 0.29693758487701416, "learning_rate": 0.00018742577026072227, "loss": 11.6634, "step": 23149 }, { "epoch": 0.48459348572385497, "grad_norm": 0.2856135070323944, "learning_rate": 0.00018742470584805278, "loss": 11.6671, "step": 23150 }, { "epoch": 0.4846144184878171, "grad_norm": 0.33319607377052307, "learning_rate": 0.0001874236413933565, "loss": 11.6586, "step": 23151 }, { "epoch": 0.4846353512517793, "grad_norm": 0.28715917468070984, "learning_rate": 0.0001874225768966339, "loss": 11.6838, "step": 23152 }, { "epoch": 0.48465628401574146, "grad_norm": 0.25632938742637634, "learning_rate": 0.0001874215123578855, "loss": 11.669, "step": 23153 }, { "epoch": 0.4846772167797036, "grad_norm": 0.36472374200820923, "learning_rate": 0.00018742044777711185, "loss": 11.6747, "step": 23154 }, { "epoch": 0.48469814954366575, "grad_norm": 0.2585846483707428, "learning_rate": 0.00018741938315431346, "loss": 11.6863, "step": 23155 }, { "epoch": 0.4847190823076279, "grad_norm": 0.2664570212364197, "learning_rate": 0.0001874183184894908, "loss": 11.6762, "step": 23156 }, { "epoch": 0.48474001507159004, "grad_norm": 0.2705550491809845, "learning_rate": 0.0001874172537826444, "loss": 11.6766, "step": 23157 }, { "epoch": 0.4847609478355522, "grad_norm": 0.27400705218315125, "learning_rate": 0.00018741618903377475, "loss": 11.6765, "step": 23158 }, { "epoch": 0.4847818805995144, "grad_norm": 0.2845613956451416, "learning_rate": 0.00018741512424288241, "loss": 11.6736, "step": 23159 }, { "epoch": 0.4848028133634765, "grad_norm": 0.2610466182231903, "learning_rate": 0.00018741405940996786, "loss": 11.6899, "step": 23160 }, { "epoch": 0.48482374612743867, "grad_norm": 0.23199887573719025, "learning_rate": 0.00018741299453503164, "loss": 11.6862, "step": 23161 }, { "epoch": 0.4848446788914008, "grad_norm": 0.2646264433860779, "learning_rate": 0.0001874119296180742, "loss": 11.6946, "step": 23162 }, { "epoch": 0.48486561165536296, "grad_norm": 0.3612876534461975, "learning_rate": 0.00018741086465909614, "loss": 11.6761, "step": 23163 }, { "epoch": 0.4848865444193251, "grad_norm": 0.3065730333328247, "learning_rate": 0.0001874097996580979, "loss": 11.6734, "step": 23164 }, { "epoch": 0.4849074771832873, "grad_norm": 0.3344798982143402, "learning_rate": 0.00018740873461508005, "loss": 11.6677, "step": 23165 }, { "epoch": 0.48492840994724945, "grad_norm": 0.35352957248687744, "learning_rate": 0.00018740766953004304, "loss": 11.6788, "step": 23166 }, { "epoch": 0.4849493427112116, "grad_norm": 0.30346575379371643, "learning_rate": 0.00018740660440298742, "loss": 11.6638, "step": 23167 }, { "epoch": 0.48497027547517374, "grad_norm": 0.3542238473892212, "learning_rate": 0.00018740553923391368, "loss": 11.686, "step": 23168 }, { "epoch": 0.4849912082391359, "grad_norm": 0.3195563852787018, "learning_rate": 0.00018740447402282236, "loss": 11.6657, "step": 23169 }, { "epoch": 0.48501214100309803, "grad_norm": 0.2859773337841034, "learning_rate": 0.00018740340876971395, "loss": 11.6876, "step": 23170 }, { "epoch": 0.48503307376706023, "grad_norm": 0.22558698058128357, "learning_rate": 0.00018740234347458897, "loss": 11.6852, "step": 23171 }, { "epoch": 0.4850540065310224, "grad_norm": 0.24911357462406158, "learning_rate": 0.00018740127813744793, "loss": 11.6745, "step": 23172 }, { "epoch": 0.4850749392949845, "grad_norm": 0.25320965051651, "learning_rate": 0.00018740021275829137, "loss": 11.6551, "step": 23173 }, { "epoch": 0.48509587205894666, "grad_norm": 0.3039044737815857, "learning_rate": 0.00018739914733711976, "loss": 11.6593, "step": 23174 }, { "epoch": 0.4851168048229088, "grad_norm": 0.24931848049163818, "learning_rate": 0.00018739808187393363, "loss": 11.6794, "step": 23175 }, { "epoch": 0.48513773758687095, "grad_norm": 0.2934132516384125, "learning_rate": 0.00018739701636873347, "loss": 11.6741, "step": 23176 }, { "epoch": 0.4851586703508331, "grad_norm": 0.37959352135658264, "learning_rate": 0.00018739595082151983, "loss": 11.6878, "step": 23177 }, { "epoch": 0.4851796031147953, "grad_norm": 0.25755634903907776, "learning_rate": 0.0001873948852322932, "loss": 11.6698, "step": 23178 }, { "epoch": 0.48520053587875744, "grad_norm": 0.33530333638191223, "learning_rate": 0.0001873938196010541, "loss": 11.6751, "step": 23179 }, { "epoch": 0.4852214686427196, "grad_norm": 0.3790358603000641, "learning_rate": 0.00018739275392780304, "loss": 11.6844, "step": 23180 }, { "epoch": 0.48524240140668173, "grad_norm": 0.25917869806289673, "learning_rate": 0.00018739168821254053, "loss": 11.6676, "step": 23181 }, { "epoch": 0.4852633341706439, "grad_norm": 0.21531035006046295, "learning_rate": 0.00018739062245526708, "loss": 11.6704, "step": 23182 }, { "epoch": 0.485284266934606, "grad_norm": 0.2588796019554138, "learning_rate": 0.0001873895566559832, "loss": 11.6621, "step": 23183 }, { "epoch": 0.4853051996985682, "grad_norm": 0.37423813343048096, "learning_rate": 0.00018738849081468946, "loss": 11.6711, "step": 23184 }, { "epoch": 0.48532613246253037, "grad_norm": 0.24011264741420746, "learning_rate": 0.0001873874249313863, "loss": 11.6862, "step": 23185 }, { "epoch": 0.4853470652264925, "grad_norm": 0.3009319305419922, "learning_rate": 0.0001873863590060742, "loss": 11.6681, "step": 23186 }, { "epoch": 0.48536799799045466, "grad_norm": 0.24514581263065338, "learning_rate": 0.00018738529303875375, "loss": 11.6668, "step": 23187 }, { "epoch": 0.4853889307544168, "grad_norm": 0.24974580109119415, "learning_rate": 0.00018738422702942544, "loss": 11.6714, "step": 23188 }, { "epoch": 0.48540986351837895, "grad_norm": 0.2841390073299408, "learning_rate": 0.0001873831609780898, "loss": 11.6723, "step": 23189 }, { "epoch": 0.4854307962823411, "grad_norm": 0.29314807057380676, "learning_rate": 0.00018738209488474735, "loss": 11.6782, "step": 23190 }, { "epoch": 0.4854517290463033, "grad_norm": 0.2634229362010956, "learning_rate": 0.0001873810287493985, "loss": 11.6631, "step": 23191 }, { "epoch": 0.48547266181026544, "grad_norm": 0.452583372592926, "learning_rate": 0.00018737996257204388, "loss": 11.6795, "step": 23192 }, { "epoch": 0.4854935945742276, "grad_norm": 0.33844420313835144, "learning_rate": 0.00018737889635268395, "loss": 11.6779, "step": 23193 }, { "epoch": 0.4855145273381897, "grad_norm": 0.3600393831729889, "learning_rate": 0.00018737783009131927, "loss": 11.6559, "step": 23194 }, { "epoch": 0.4855354601021519, "grad_norm": 0.27183544635772705, "learning_rate": 0.0001873767637879503, "loss": 11.6669, "step": 23195 }, { "epoch": 0.485556392866114, "grad_norm": 0.2879028618335724, "learning_rate": 0.00018737569744257756, "loss": 11.6715, "step": 23196 }, { "epoch": 0.4855773256300762, "grad_norm": 0.3632150888442993, "learning_rate": 0.00018737463105520156, "loss": 11.6886, "step": 23197 }, { "epoch": 0.48559825839403836, "grad_norm": 0.2783125638961792, "learning_rate": 0.00018737356462582283, "loss": 11.6749, "step": 23198 }, { "epoch": 0.4856191911580005, "grad_norm": 0.25846603512763977, "learning_rate": 0.00018737249815444187, "loss": 11.6479, "step": 23199 }, { "epoch": 0.48564012392196265, "grad_norm": 0.36611050367355347, "learning_rate": 0.00018737143164105922, "loss": 11.674, "step": 23200 }, { "epoch": 0.4856610566859248, "grad_norm": 0.31500762701034546, "learning_rate": 0.00018737036508567533, "loss": 11.6826, "step": 23201 }, { "epoch": 0.48568198944988694, "grad_norm": 0.23841631412506104, "learning_rate": 0.00018736929848829078, "loss": 11.6644, "step": 23202 }, { "epoch": 0.48570292221384914, "grad_norm": 0.27744340896606445, "learning_rate": 0.00018736823184890608, "loss": 11.6697, "step": 23203 }, { "epoch": 0.4857238549778113, "grad_norm": 0.24205327033996582, "learning_rate": 0.0001873671651675217, "loss": 11.6623, "step": 23204 }, { "epoch": 0.48574478774177343, "grad_norm": 0.2982630133628845, "learning_rate": 0.00018736609844413818, "loss": 11.6757, "step": 23205 }, { "epoch": 0.4857657205057356, "grad_norm": 0.32635077834129333, "learning_rate": 0.00018736503167875602, "loss": 11.6417, "step": 23206 }, { "epoch": 0.4857866532696977, "grad_norm": 0.355957567691803, "learning_rate": 0.00018736396487137574, "loss": 11.6714, "step": 23207 }, { "epoch": 0.48580758603365987, "grad_norm": 0.3337365388870239, "learning_rate": 0.00018736289802199785, "loss": 11.6634, "step": 23208 }, { "epoch": 0.485828518797622, "grad_norm": 0.28484871983528137, "learning_rate": 0.00018736183113062283, "loss": 11.6738, "step": 23209 }, { "epoch": 0.4858494515615842, "grad_norm": 0.2688170373439789, "learning_rate": 0.0001873607641972513, "loss": 11.6612, "step": 23210 }, { "epoch": 0.48587038432554636, "grad_norm": 0.2493082582950592, "learning_rate": 0.00018735969722188364, "loss": 11.6754, "step": 23211 }, { "epoch": 0.4858913170895085, "grad_norm": 0.32775983214378357, "learning_rate": 0.00018735863020452045, "loss": 11.6748, "step": 23212 }, { "epoch": 0.48591224985347065, "grad_norm": 0.32338958978652954, "learning_rate": 0.0001873575631451622, "loss": 11.6662, "step": 23213 }, { "epoch": 0.4859331826174328, "grad_norm": 0.24334809184074402, "learning_rate": 0.00018735649604380945, "loss": 11.6846, "step": 23214 }, { "epoch": 0.48595411538139494, "grad_norm": 0.27177807688713074, "learning_rate": 0.00018735542890046264, "loss": 11.6631, "step": 23215 }, { "epoch": 0.48597504814535714, "grad_norm": 0.25788652896881104, "learning_rate": 0.00018735436171512235, "loss": 11.6651, "step": 23216 }, { "epoch": 0.4859959809093193, "grad_norm": 0.3036087155342102, "learning_rate": 0.00018735329448778907, "loss": 11.6649, "step": 23217 }, { "epoch": 0.4860169136732814, "grad_norm": 0.26681092381477356, "learning_rate": 0.00018735222721846331, "loss": 11.681, "step": 23218 }, { "epoch": 0.48603784643724357, "grad_norm": 0.3074435591697693, "learning_rate": 0.0001873511599071456, "loss": 11.6744, "step": 23219 }, { "epoch": 0.4860587792012057, "grad_norm": 0.37764763832092285, "learning_rate": 0.00018735009255383637, "loss": 11.6821, "step": 23220 }, { "epoch": 0.48607971196516786, "grad_norm": 0.2870463728904724, "learning_rate": 0.00018734902515853626, "loss": 11.6723, "step": 23221 }, { "epoch": 0.48610064472913006, "grad_norm": 0.3448704779148102, "learning_rate": 0.0001873479577212457, "loss": 11.6691, "step": 23222 }, { "epoch": 0.4861215774930922, "grad_norm": 0.27261796593666077, "learning_rate": 0.00018734689024196523, "loss": 11.7004, "step": 23223 }, { "epoch": 0.48614251025705435, "grad_norm": 0.4580230414867401, "learning_rate": 0.00018734582272069537, "loss": 11.6886, "step": 23224 }, { "epoch": 0.4861634430210165, "grad_norm": 0.2905726432800293, "learning_rate": 0.00018734475515743662, "loss": 11.6821, "step": 23225 }, { "epoch": 0.48618437578497864, "grad_norm": 0.2906368374824524, "learning_rate": 0.00018734368755218947, "loss": 11.677, "step": 23226 }, { "epoch": 0.4862053085489408, "grad_norm": 0.2680981159210205, "learning_rate": 0.00018734261990495447, "loss": 11.6641, "step": 23227 }, { "epoch": 0.48622624131290293, "grad_norm": 0.23927664756774902, "learning_rate": 0.00018734155221573215, "loss": 11.6801, "step": 23228 }, { "epoch": 0.48624717407686513, "grad_norm": 0.26950350403785706, "learning_rate": 0.00018734048448452298, "loss": 11.6766, "step": 23229 }, { "epoch": 0.4862681068408273, "grad_norm": 0.2236575335264206, "learning_rate": 0.0001873394167113275, "loss": 11.6764, "step": 23230 }, { "epoch": 0.4862890396047894, "grad_norm": 0.2433343380689621, "learning_rate": 0.0001873383488961462, "loss": 11.6827, "step": 23231 }, { "epoch": 0.48630997236875156, "grad_norm": 0.2964015603065491, "learning_rate": 0.0001873372810389796, "loss": 11.6656, "step": 23232 }, { "epoch": 0.4863309051327137, "grad_norm": 0.255687952041626, "learning_rate": 0.0001873362131398282, "loss": 11.6758, "step": 23233 }, { "epoch": 0.48635183789667585, "grad_norm": 0.2967378497123718, "learning_rate": 0.00018733514519869257, "loss": 11.6823, "step": 23234 }, { "epoch": 0.48637277066063805, "grad_norm": 0.30565428733825684, "learning_rate": 0.00018733407721557317, "loss": 11.6673, "step": 23235 }, { "epoch": 0.4863937034246002, "grad_norm": 0.26119694113731384, "learning_rate": 0.00018733300919047052, "loss": 11.6694, "step": 23236 }, { "epoch": 0.48641463618856234, "grad_norm": 0.2343454509973526, "learning_rate": 0.00018733194112338514, "loss": 11.6692, "step": 23237 }, { "epoch": 0.4864355689525245, "grad_norm": 0.2755407691001892, "learning_rate": 0.00018733087301431759, "loss": 11.6678, "step": 23238 }, { "epoch": 0.48645650171648663, "grad_norm": 0.257750928401947, "learning_rate": 0.00018732980486326829, "loss": 11.6555, "step": 23239 }, { "epoch": 0.4864774344804488, "grad_norm": 0.26002463698387146, "learning_rate": 0.00018732873667023784, "loss": 11.6667, "step": 23240 }, { "epoch": 0.486498367244411, "grad_norm": 0.25061580538749695, "learning_rate": 0.00018732766843522667, "loss": 11.6841, "step": 23241 }, { "epoch": 0.4865193000083731, "grad_norm": 0.2517584562301636, "learning_rate": 0.00018732660015823537, "loss": 11.6673, "step": 23242 }, { "epoch": 0.48654023277233527, "grad_norm": 0.25733500719070435, "learning_rate": 0.00018732553183926443, "loss": 11.6626, "step": 23243 }, { "epoch": 0.4865611655362974, "grad_norm": 0.2658090591430664, "learning_rate": 0.00018732446347831432, "loss": 11.6735, "step": 23244 }, { "epoch": 0.48658209830025956, "grad_norm": 0.2885769307613373, "learning_rate": 0.0001873233950753856, "loss": 11.6611, "step": 23245 }, { "epoch": 0.4866030310642217, "grad_norm": 0.2892962098121643, "learning_rate": 0.00018732232663047881, "loss": 11.6677, "step": 23246 }, { "epoch": 0.48662396382818385, "grad_norm": 0.22557874023914337, "learning_rate": 0.0001873212581435944, "loss": 11.6771, "step": 23247 }, { "epoch": 0.48664489659214605, "grad_norm": 0.353834331035614, "learning_rate": 0.00018732018961473292, "loss": 11.6877, "step": 23248 }, { "epoch": 0.4866658293561082, "grad_norm": 0.31041115522384644, "learning_rate": 0.00018731912104389488, "loss": 11.6573, "step": 23249 }, { "epoch": 0.48668676212007034, "grad_norm": 0.3064602315425873, "learning_rate": 0.00018731805243108078, "loss": 11.6552, "step": 23250 }, { "epoch": 0.4867076948840325, "grad_norm": 0.34125518798828125, "learning_rate": 0.00018731698377629115, "loss": 11.6633, "step": 23251 }, { "epoch": 0.4867286276479946, "grad_norm": 0.2751064598560333, "learning_rate": 0.0001873159150795265, "loss": 11.6735, "step": 23252 }, { "epoch": 0.48674956041195677, "grad_norm": 0.27164122462272644, "learning_rate": 0.0001873148463407873, "loss": 11.6666, "step": 23253 }, { "epoch": 0.486770493175919, "grad_norm": 0.2323557585477829, "learning_rate": 0.00018731377756007415, "loss": 11.6669, "step": 23254 }, { "epoch": 0.4867914259398811, "grad_norm": 0.2506927251815796, "learning_rate": 0.00018731270873738752, "loss": 11.6671, "step": 23255 }, { "epoch": 0.48681235870384326, "grad_norm": 0.3412230908870697, "learning_rate": 0.0001873116398727279, "loss": 11.6775, "step": 23256 }, { "epoch": 0.4868332914678054, "grad_norm": 0.2725544273853302, "learning_rate": 0.00018731057096609582, "loss": 11.6788, "step": 23257 }, { "epoch": 0.48685422423176755, "grad_norm": 0.2449747920036316, "learning_rate": 0.0001873095020174918, "loss": 11.6648, "step": 23258 }, { "epoch": 0.4868751569957297, "grad_norm": 0.3743863105773926, "learning_rate": 0.00018730843302691637, "loss": 11.6811, "step": 23259 }, { "epoch": 0.4868960897596919, "grad_norm": 0.35850754380226135, "learning_rate": 0.00018730736399437002, "loss": 11.6722, "step": 23260 }, { "epoch": 0.48691702252365404, "grad_norm": 0.23267404735088348, "learning_rate": 0.00018730629491985323, "loss": 11.6704, "step": 23261 }, { "epoch": 0.4869379552876162, "grad_norm": 0.43362200260162354, "learning_rate": 0.00018730522580336662, "loss": 11.672, "step": 23262 }, { "epoch": 0.48695888805157833, "grad_norm": 0.3146047294139862, "learning_rate": 0.00018730415664491062, "loss": 11.6797, "step": 23263 }, { "epoch": 0.4869798208155405, "grad_norm": 0.28748348355293274, "learning_rate": 0.00018730308744448573, "loss": 11.6867, "step": 23264 }, { "epoch": 0.4870007535795026, "grad_norm": 0.3235490620136261, "learning_rate": 0.0001873020182020925, "loss": 11.675, "step": 23265 }, { "epoch": 0.48702168634346477, "grad_norm": 0.2914654314517975, "learning_rate": 0.00018730094891773147, "loss": 11.682, "step": 23266 }, { "epoch": 0.48704261910742697, "grad_norm": 0.30736786127090454, "learning_rate": 0.00018729987959140311, "loss": 11.6722, "step": 23267 }, { "epoch": 0.4870635518713891, "grad_norm": 0.28521832823753357, "learning_rate": 0.00018729881022310795, "loss": 11.6647, "step": 23268 }, { "epoch": 0.48708448463535126, "grad_norm": 0.298977792263031, "learning_rate": 0.0001872977408128465, "loss": 11.6749, "step": 23269 }, { "epoch": 0.4871054173993134, "grad_norm": 0.3073488175868988, "learning_rate": 0.00018729667136061927, "loss": 11.6698, "step": 23270 }, { "epoch": 0.48712635016327555, "grad_norm": 0.3294498920440674, "learning_rate": 0.0001872956018664268, "loss": 11.666, "step": 23271 }, { "epoch": 0.4871472829272377, "grad_norm": 0.3509836196899414, "learning_rate": 0.00018729453233026959, "loss": 11.6665, "step": 23272 }, { "epoch": 0.4871682156911999, "grad_norm": 0.4233187735080719, "learning_rate": 0.00018729346275214812, "loss": 11.6611, "step": 23273 }, { "epoch": 0.48718914845516204, "grad_norm": 0.3238881230354309, "learning_rate": 0.00018729239313206294, "loss": 11.6819, "step": 23274 }, { "epoch": 0.4872100812191242, "grad_norm": 0.28296250104904175, "learning_rate": 0.00018729132347001457, "loss": 11.6687, "step": 23275 }, { "epoch": 0.4872310139830863, "grad_norm": 0.2763999104499817, "learning_rate": 0.00018729025376600352, "loss": 11.6932, "step": 23276 }, { "epoch": 0.48725194674704847, "grad_norm": 0.38729923963546753, "learning_rate": 0.0001872891840200303, "loss": 11.6822, "step": 23277 }, { "epoch": 0.4872728795110106, "grad_norm": 0.2925293445587158, "learning_rate": 0.00018728811423209538, "loss": 11.6775, "step": 23278 }, { "epoch": 0.4872938122749728, "grad_norm": 0.3266369700431824, "learning_rate": 0.00018728704440219932, "loss": 11.6661, "step": 23279 }, { "epoch": 0.48731474503893496, "grad_norm": 0.272478312253952, "learning_rate": 0.00018728597453034263, "loss": 11.6766, "step": 23280 }, { "epoch": 0.4873356778028971, "grad_norm": 0.4247235655784607, "learning_rate": 0.00018728490461652583, "loss": 11.6723, "step": 23281 }, { "epoch": 0.48735661056685925, "grad_norm": 4.130082607269287, "learning_rate": 0.00018728383466074945, "loss": 11.6863, "step": 23282 }, { "epoch": 0.4873775433308214, "grad_norm": 0.3135052025318146, "learning_rate": 0.00018728276466301397, "loss": 11.6684, "step": 23283 }, { "epoch": 0.48739847609478354, "grad_norm": 0.250352680683136, "learning_rate": 0.00018728169462331993, "loss": 11.6715, "step": 23284 }, { "epoch": 0.4874194088587457, "grad_norm": 4.051458835601807, "learning_rate": 0.0001872806245416678, "loss": 11.6333, "step": 23285 }, { "epoch": 0.4874403416227079, "grad_norm": 0.28001904487609863, "learning_rate": 0.00018727955441805811, "loss": 11.6795, "step": 23286 }, { "epoch": 0.48746127438667003, "grad_norm": 0.3028181195259094, "learning_rate": 0.0001872784842524914, "loss": 11.6662, "step": 23287 }, { "epoch": 0.4874822071506322, "grad_norm": 0.3044764995574951, "learning_rate": 0.00018727741404496822, "loss": 11.6706, "step": 23288 }, { "epoch": 0.4875031399145943, "grad_norm": 0.32451871037483215, "learning_rate": 0.000187276343795489, "loss": 11.6777, "step": 23289 }, { "epoch": 0.48752407267855646, "grad_norm": 0.33395135402679443, "learning_rate": 0.0001872752735040543, "loss": 11.6752, "step": 23290 }, { "epoch": 0.4875450054425186, "grad_norm": 0.3013751804828644, "learning_rate": 0.00018727420317066464, "loss": 11.6567, "step": 23291 }, { "epoch": 0.4875659382064808, "grad_norm": 0.31680604815483093, "learning_rate": 0.0001872731327953205, "loss": 11.6752, "step": 23292 }, { "epoch": 0.48758687097044295, "grad_norm": 0.3784140348434448, "learning_rate": 0.00018727206237802242, "loss": 11.6638, "step": 23293 }, { "epoch": 0.4876078037344051, "grad_norm": 0.41915202140808105, "learning_rate": 0.0001872709919187709, "loss": 11.6691, "step": 23294 }, { "epoch": 0.48762873649836724, "grad_norm": 0.3011780083179474, "learning_rate": 0.00018726992141756649, "loss": 11.6833, "step": 23295 }, { "epoch": 0.4876496692623294, "grad_norm": 0.31851714849472046, "learning_rate": 0.00018726885087440968, "loss": 11.6822, "step": 23296 }, { "epoch": 0.48767060202629153, "grad_norm": 0.34193897247314453, "learning_rate": 0.00018726778028930097, "loss": 11.6604, "step": 23297 }, { "epoch": 0.4876915347902537, "grad_norm": 0.393331915140152, "learning_rate": 0.00018726670966224087, "loss": 11.6742, "step": 23298 }, { "epoch": 0.4877124675542159, "grad_norm": 0.2789759635925293, "learning_rate": 0.00018726563899322996, "loss": 11.6679, "step": 23299 }, { "epoch": 0.487733400318178, "grad_norm": 0.35648390650749207, "learning_rate": 0.00018726456828226868, "loss": 11.6732, "step": 23300 }, { "epoch": 0.48775433308214017, "grad_norm": 0.29031917452812195, "learning_rate": 0.0001872634975293576, "loss": 11.6878, "step": 23301 }, { "epoch": 0.4877752658461023, "grad_norm": 0.35130545496940613, "learning_rate": 0.00018726242673449716, "loss": 11.688, "step": 23302 }, { "epoch": 0.48779619861006446, "grad_norm": 0.36617010831832886, "learning_rate": 0.00018726135589768793, "loss": 11.6718, "step": 23303 }, { "epoch": 0.4878171313740266, "grad_norm": 0.27452149987220764, "learning_rate": 0.00018726028501893046, "loss": 11.669, "step": 23304 }, { "epoch": 0.4878380641379888, "grad_norm": 0.3234921395778656, "learning_rate": 0.0001872592140982252, "loss": 11.6762, "step": 23305 }, { "epoch": 0.48785899690195095, "grad_norm": 0.3083646893501282, "learning_rate": 0.0001872581431355727, "loss": 11.6816, "step": 23306 }, { "epoch": 0.4878799296659131, "grad_norm": 0.3922097384929657, "learning_rate": 0.00018725707213097342, "loss": 11.6651, "step": 23307 }, { "epoch": 0.48790086242987524, "grad_norm": 0.30140241980552673, "learning_rate": 0.00018725600108442797, "loss": 11.6749, "step": 23308 }, { "epoch": 0.4879217951938374, "grad_norm": 0.24199697375297546, "learning_rate": 0.00018725492999593677, "loss": 11.6647, "step": 23309 }, { "epoch": 0.4879427279577995, "grad_norm": 0.323764830827713, "learning_rate": 0.00018725385886550038, "loss": 11.666, "step": 23310 }, { "epoch": 0.4879636607217617, "grad_norm": 0.40699872374534607, "learning_rate": 0.00018725278769311934, "loss": 11.6616, "step": 23311 }, { "epoch": 0.48798459348572387, "grad_norm": 0.2710140347480774, "learning_rate": 0.0001872517164787941, "loss": 11.6586, "step": 23312 }, { "epoch": 0.488005526249686, "grad_norm": 0.2655166685581207, "learning_rate": 0.00018725064522252525, "loss": 11.6669, "step": 23313 }, { "epoch": 0.48802645901364816, "grad_norm": 0.3457701504230499, "learning_rate": 0.00018724957392431325, "loss": 11.6625, "step": 23314 }, { "epoch": 0.4880473917776103, "grad_norm": 0.3689563274383545, "learning_rate": 0.00018724850258415864, "loss": 11.6643, "step": 23315 }, { "epoch": 0.48806832454157245, "grad_norm": 0.2997340261936188, "learning_rate": 0.0001872474312020619, "loss": 11.6618, "step": 23316 }, { "epoch": 0.4880892573055346, "grad_norm": 0.293994277715683, "learning_rate": 0.00018724635977802358, "loss": 11.6896, "step": 23317 }, { "epoch": 0.4881101900694968, "grad_norm": 0.3028200566768646, "learning_rate": 0.0001872452883120442, "loss": 11.6619, "step": 23318 }, { "epoch": 0.48813112283345894, "grad_norm": 0.259277880191803, "learning_rate": 0.00018724421680412428, "loss": 11.6711, "step": 23319 }, { "epoch": 0.4881520555974211, "grad_norm": 0.29381057620048523, "learning_rate": 0.00018724314525426426, "loss": 11.6602, "step": 23320 }, { "epoch": 0.48817298836138323, "grad_norm": 0.30024585127830505, "learning_rate": 0.00018724207366246475, "loss": 11.6721, "step": 23321 }, { "epoch": 0.4881939211253454, "grad_norm": 0.324647456407547, "learning_rate": 0.00018724100202872622, "loss": 11.6715, "step": 23322 }, { "epoch": 0.4882148538893075, "grad_norm": 0.2586575150489807, "learning_rate": 0.0001872399303530492, "loss": 11.6771, "step": 23323 }, { "epoch": 0.4882357866532697, "grad_norm": 0.2759873569011688, "learning_rate": 0.00018723885863543418, "loss": 11.6649, "step": 23324 }, { "epoch": 0.48825671941723187, "grad_norm": 0.3205707371234894, "learning_rate": 0.0001872377868758817, "loss": 11.6588, "step": 23325 }, { "epoch": 0.488277652181194, "grad_norm": 0.2798773944377899, "learning_rate": 0.00018723671507439227, "loss": 11.6773, "step": 23326 }, { "epoch": 0.48829858494515616, "grad_norm": 0.2592243552207947, "learning_rate": 0.0001872356432309664, "loss": 11.6644, "step": 23327 }, { "epoch": 0.4883195177091183, "grad_norm": 0.29856956005096436, "learning_rate": 0.0001872345713456046, "loss": 11.6705, "step": 23328 }, { "epoch": 0.48834045047308045, "grad_norm": 0.24850286543369293, "learning_rate": 0.0001872334994183074, "loss": 11.656, "step": 23329 }, { "epoch": 0.48836138323704265, "grad_norm": 0.3514445722103119, "learning_rate": 0.0001872324274490753, "loss": 11.6709, "step": 23330 }, { "epoch": 0.4883823160010048, "grad_norm": 0.2930707633495331, "learning_rate": 0.00018723135543790887, "loss": 11.671, "step": 23331 }, { "epoch": 0.48840324876496694, "grad_norm": 0.3327282965183258, "learning_rate": 0.00018723028338480852, "loss": 11.6538, "step": 23332 }, { "epoch": 0.4884241815289291, "grad_norm": 0.3610997200012207, "learning_rate": 0.00018722921128977487, "loss": 11.6813, "step": 23333 }, { "epoch": 0.4884451142928912, "grad_norm": 0.3366255760192871, "learning_rate": 0.00018722813915280837, "loss": 11.6591, "step": 23334 }, { "epoch": 0.48846604705685337, "grad_norm": 0.2504618465900421, "learning_rate": 0.00018722706697390956, "loss": 11.6592, "step": 23335 }, { "epoch": 0.4884869798208155, "grad_norm": 0.24943196773529053, "learning_rate": 0.00018722599475307894, "loss": 11.6736, "step": 23336 }, { "epoch": 0.4885079125847777, "grad_norm": 0.2864341735839844, "learning_rate": 0.00018722492249031704, "loss": 11.6611, "step": 23337 }, { "epoch": 0.48852884534873986, "grad_norm": 0.4624338448047638, "learning_rate": 0.00018722385018562435, "loss": 11.6733, "step": 23338 }, { "epoch": 0.488549778112702, "grad_norm": 0.4192090630531311, "learning_rate": 0.00018722277783900145, "loss": 11.6809, "step": 23339 }, { "epoch": 0.48857071087666415, "grad_norm": 0.3312797248363495, "learning_rate": 0.00018722170545044882, "loss": 11.6583, "step": 23340 }, { "epoch": 0.4885916436406263, "grad_norm": 0.38011637330055237, "learning_rate": 0.00018722063301996692, "loss": 11.6662, "step": 23341 }, { "epoch": 0.48861257640458844, "grad_norm": 0.2918890714645386, "learning_rate": 0.00018721956054755634, "loss": 11.6686, "step": 23342 }, { "epoch": 0.48863350916855064, "grad_norm": 0.3221488296985626, "learning_rate": 0.00018721848803321755, "loss": 11.6813, "step": 23343 }, { "epoch": 0.4886544419325128, "grad_norm": 0.2891024947166443, "learning_rate": 0.00018721741547695112, "loss": 11.6795, "step": 23344 }, { "epoch": 0.48867537469647493, "grad_norm": 0.3068104386329651, "learning_rate": 0.0001872163428787575, "loss": 11.6774, "step": 23345 }, { "epoch": 0.4886963074604371, "grad_norm": 0.28707024455070496, "learning_rate": 0.00018721527023863726, "loss": 11.6644, "step": 23346 }, { "epoch": 0.4887172402243992, "grad_norm": 0.2429676651954651, "learning_rate": 0.00018721419755659085, "loss": 11.6701, "step": 23347 }, { "epoch": 0.48873817298836136, "grad_norm": 0.26206445693969727, "learning_rate": 0.0001872131248326189, "loss": 11.6691, "step": 23348 }, { "epoch": 0.48875910575232356, "grad_norm": 0.23204314708709717, "learning_rate": 0.00018721205206672178, "loss": 11.6677, "step": 23349 }, { "epoch": 0.4887800385162857, "grad_norm": 0.2616806924343109, "learning_rate": 0.00018721097925890012, "loss": 11.6663, "step": 23350 }, { "epoch": 0.48880097128024785, "grad_norm": 0.32234126329421997, "learning_rate": 0.00018720990640915436, "loss": 11.6808, "step": 23351 }, { "epoch": 0.48882190404421, "grad_norm": 0.3870351016521454, "learning_rate": 0.00018720883351748508, "loss": 11.6761, "step": 23352 }, { "epoch": 0.48884283680817214, "grad_norm": 0.2515566647052765, "learning_rate": 0.00018720776058389277, "loss": 11.6777, "step": 23353 }, { "epoch": 0.4888637695721343, "grad_norm": 0.3137221932411194, "learning_rate": 0.0001872066876083779, "loss": 11.6561, "step": 23354 }, { "epoch": 0.48888470233609643, "grad_norm": 0.3549638092517853, "learning_rate": 0.0001872056145909411, "loss": 11.6782, "step": 23355 }, { "epoch": 0.48890563510005863, "grad_norm": 0.32594043016433716, "learning_rate": 0.00018720454153158277, "loss": 11.6864, "step": 23356 }, { "epoch": 0.4889265678640208, "grad_norm": 0.297368586063385, "learning_rate": 0.00018720346843030346, "loss": 11.6814, "step": 23357 }, { "epoch": 0.4889475006279829, "grad_norm": 0.288544237613678, "learning_rate": 0.00018720239528710372, "loss": 11.6707, "step": 23358 }, { "epoch": 0.48896843339194507, "grad_norm": 0.2552376985549927, "learning_rate": 0.000187201322101984, "loss": 11.6683, "step": 23359 }, { "epoch": 0.4889893661559072, "grad_norm": 0.30943092703819275, "learning_rate": 0.00018720024887494493, "loss": 11.684, "step": 23360 }, { "epoch": 0.48901029891986936, "grad_norm": 0.30448803305625916, "learning_rate": 0.0001871991756059869, "loss": 11.6801, "step": 23361 }, { "epoch": 0.48903123168383156, "grad_norm": 0.27464932203292847, "learning_rate": 0.00018719810229511044, "loss": 11.672, "step": 23362 }, { "epoch": 0.4890521644477937, "grad_norm": 0.2478938102722168, "learning_rate": 0.00018719702894231616, "loss": 11.6753, "step": 23363 }, { "epoch": 0.48907309721175585, "grad_norm": 0.2670045495033264, "learning_rate": 0.00018719595554760453, "loss": 11.683, "step": 23364 }, { "epoch": 0.489094029975718, "grad_norm": 0.36942949891090393, "learning_rate": 0.00018719488211097604, "loss": 11.6869, "step": 23365 }, { "epoch": 0.48911496273968014, "grad_norm": 0.2636379897594452, "learning_rate": 0.00018719380863243125, "loss": 11.6725, "step": 23366 }, { "epoch": 0.4891358955036423, "grad_norm": 0.29833388328552246, "learning_rate": 0.0001871927351119706, "loss": 11.66, "step": 23367 }, { "epoch": 0.4891568282676045, "grad_norm": 0.24673554301261902, "learning_rate": 0.00018719166154959466, "loss": 11.6642, "step": 23368 }, { "epoch": 0.4891777610315666, "grad_norm": 0.3382198214530945, "learning_rate": 0.00018719058794530396, "loss": 11.6742, "step": 23369 }, { "epoch": 0.48919869379552877, "grad_norm": 0.2798818349838257, "learning_rate": 0.00018718951429909898, "loss": 11.6798, "step": 23370 }, { "epoch": 0.4892196265594909, "grad_norm": 0.305501252412796, "learning_rate": 0.0001871884406109803, "loss": 11.6788, "step": 23371 }, { "epoch": 0.48924055932345306, "grad_norm": 0.33965057134628296, "learning_rate": 0.00018718736688094835, "loss": 11.6885, "step": 23372 }, { "epoch": 0.4892614920874152, "grad_norm": 0.28104647994041443, "learning_rate": 0.0001871862931090037, "loss": 11.6748, "step": 23373 }, { "epoch": 0.48928242485137735, "grad_norm": 1.6047528982162476, "learning_rate": 0.00018718521929514682, "loss": 11.6847, "step": 23374 }, { "epoch": 0.48930335761533955, "grad_norm": 0.2831198573112488, "learning_rate": 0.00018718414543937828, "loss": 11.6783, "step": 23375 }, { "epoch": 0.4893242903793017, "grad_norm": 0.3210552930831909, "learning_rate": 0.0001871830715416986, "loss": 11.6613, "step": 23376 }, { "epoch": 0.48934522314326384, "grad_norm": 0.3366507887840271, "learning_rate": 0.0001871819976021082, "loss": 11.6518, "step": 23377 }, { "epoch": 0.489366155907226, "grad_norm": 0.2673571705818176, "learning_rate": 0.00018718092362060774, "loss": 11.6591, "step": 23378 }, { "epoch": 0.48938708867118813, "grad_norm": 0.2794061601161957, "learning_rate": 0.0001871798495971976, "loss": 11.6644, "step": 23379 }, { "epoch": 0.4894080214351503, "grad_norm": 0.3007816672325134, "learning_rate": 0.0001871787755318784, "loss": 11.6611, "step": 23380 }, { "epoch": 0.4894289541991125, "grad_norm": 0.3080216646194458, "learning_rate": 0.0001871777014246506, "loss": 11.6859, "step": 23381 }, { "epoch": 0.4894498869630746, "grad_norm": 0.24616838991641998, "learning_rate": 0.00018717662727551476, "loss": 11.6761, "step": 23382 }, { "epoch": 0.48947081972703677, "grad_norm": 0.32873204350471497, "learning_rate": 0.00018717555308447135, "loss": 11.693, "step": 23383 }, { "epoch": 0.4894917524909989, "grad_norm": 0.40309181809425354, "learning_rate": 0.0001871744788515209, "loss": 11.6552, "step": 23384 }, { "epoch": 0.48951268525496106, "grad_norm": 0.23361746966838837, "learning_rate": 0.00018717340457666395, "loss": 11.6665, "step": 23385 }, { "epoch": 0.4895336180189232, "grad_norm": 0.3357579708099365, "learning_rate": 0.00018717233025990099, "loss": 11.6602, "step": 23386 }, { "epoch": 0.48955455078288534, "grad_norm": 0.3514578342437744, "learning_rate": 0.00018717125590123252, "loss": 11.6732, "step": 23387 }, { "epoch": 0.48957548354684755, "grad_norm": 0.35423213243484497, "learning_rate": 0.0001871701815006591, "loss": 11.6619, "step": 23388 }, { "epoch": 0.4895964163108097, "grad_norm": 0.2753787040710449, "learning_rate": 0.00018716910705818122, "loss": 11.6786, "step": 23389 }, { "epoch": 0.48961734907477183, "grad_norm": 0.2667047679424286, "learning_rate": 0.0001871680325737994, "loss": 11.6717, "step": 23390 }, { "epoch": 0.489638281838734, "grad_norm": 0.3254260718822479, "learning_rate": 0.00018716695804751415, "loss": 11.674, "step": 23391 }, { "epoch": 0.4896592146026961, "grad_norm": 0.27038082480430603, "learning_rate": 0.00018716588347932603, "loss": 11.6757, "step": 23392 }, { "epoch": 0.48968014736665827, "grad_norm": 0.2667781412601471, "learning_rate": 0.0001871648088692355, "loss": 11.6637, "step": 23393 }, { "epoch": 0.48970108013062047, "grad_norm": 0.30954670906066895, "learning_rate": 0.0001871637342172431, "loss": 11.6537, "step": 23394 }, { "epoch": 0.4897220128945826, "grad_norm": 0.2998979985713959, "learning_rate": 0.00018716265952334937, "loss": 11.6682, "step": 23395 }, { "epoch": 0.48974294565854476, "grad_norm": 0.27337846159935, "learning_rate": 0.00018716158478755479, "loss": 11.6652, "step": 23396 }, { "epoch": 0.4897638784225069, "grad_norm": 0.2978035807609558, "learning_rate": 0.00018716051000985988, "loss": 11.6849, "step": 23397 }, { "epoch": 0.48978481118646905, "grad_norm": 0.27766871452331543, "learning_rate": 0.00018715943519026517, "loss": 11.657, "step": 23398 }, { "epoch": 0.4898057439504312, "grad_norm": 0.26045289635658264, "learning_rate": 0.00018715836032877117, "loss": 11.6887, "step": 23399 }, { "epoch": 0.4898266767143934, "grad_norm": 0.29395192861557007, "learning_rate": 0.0001871572854253784, "loss": 11.6822, "step": 23400 }, { "epoch": 0.48984760947835554, "grad_norm": 0.31200501322746277, "learning_rate": 0.0001871562104800874, "loss": 11.6638, "step": 23401 }, { "epoch": 0.4898685422423177, "grad_norm": 0.31962189078330994, "learning_rate": 0.00018715513549289863, "loss": 11.6836, "step": 23402 }, { "epoch": 0.48988947500627983, "grad_norm": 0.3046413064002991, "learning_rate": 0.00018715406046381262, "loss": 11.6854, "step": 23403 }, { "epoch": 0.489910407770242, "grad_norm": 0.3012676537036896, "learning_rate": 0.00018715298539282994, "loss": 11.6748, "step": 23404 }, { "epoch": 0.4899313405342041, "grad_norm": 0.2595590353012085, "learning_rate": 0.0001871519102799511, "loss": 11.6568, "step": 23405 }, { "epoch": 0.48995227329816626, "grad_norm": 0.2706078290939331, "learning_rate": 0.00018715083512517655, "loss": 11.6591, "step": 23406 }, { "epoch": 0.48997320606212846, "grad_norm": 0.3036159873008728, "learning_rate": 0.00018714975992850685, "loss": 11.663, "step": 23407 }, { "epoch": 0.4899941388260906, "grad_norm": 0.36744722723960876, "learning_rate": 0.00018714868468994254, "loss": 11.6846, "step": 23408 }, { "epoch": 0.49001507159005275, "grad_norm": 0.2818576991558075, "learning_rate": 0.0001871476094094841, "loss": 11.6598, "step": 23409 }, { "epoch": 0.4900360043540149, "grad_norm": 0.34995657205581665, "learning_rate": 0.00018714653408713204, "loss": 11.6816, "step": 23410 }, { "epoch": 0.49005693711797704, "grad_norm": 0.3267149329185486, "learning_rate": 0.0001871454587228869, "loss": 11.6643, "step": 23411 }, { "epoch": 0.4900778698819392, "grad_norm": 0.30718716979026794, "learning_rate": 0.00018714438331674923, "loss": 11.6682, "step": 23412 }, { "epoch": 0.4900988026459014, "grad_norm": 0.3132551610469818, "learning_rate": 0.00018714330786871944, "loss": 11.6745, "step": 23413 }, { "epoch": 0.49011973540986353, "grad_norm": 0.2537873387336731, "learning_rate": 0.00018714223237879815, "loss": 11.687, "step": 23414 }, { "epoch": 0.4901406681738257, "grad_norm": 0.33306780457496643, "learning_rate": 0.00018714115684698586, "loss": 11.6553, "step": 23415 }, { "epoch": 0.4901616009377878, "grad_norm": 0.3157587945461273, "learning_rate": 0.00018714008127328305, "loss": 11.684, "step": 23416 }, { "epoch": 0.49018253370174997, "grad_norm": 0.2823496162891388, "learning_rate": 0.00018713900565769026, "loss": 11.6785, "step": 23417 }, { "epoch": 0.4902034664657121, "grad_norm": 0.29296132922172546, "learning_rate": 0.00018713793000020798, "loss": 11.6771, "step": 23418 }, { "epoch": 0.4902243992296743, "grad_norm": 0.23841281235218048, "learning_rate": 0.0001871368543008368, "loss": 11.6672, "step": 23419 }, { "epoch": 0.49024533199363646, "grad_norm": 0.31934183835983276, "learning_rate": 0.00018713577855957716, "loss": 11.659, "step": 23420 }, { "epoch": 0.4902662647575986, "grad_norm": 0.25051286816596985, "learning_rate": 0.0001871347027764296, "loss": 11.6805, "step": 23421 }, { "epoch": 0.49028719752156075, "grad_norm": 0.26059848070144653, "learning_rate": 0.00018713362695139466, "loss": 11.6835, "step": 23422 }, { "epoch": 0.4903081302855229, "grad_norm": 0.4100603461265564, "learning_rate": 0.0001871325510844728, "loss": 11.6664, "step": 23423 }, { "epoch": 0.49032906304948504, "grad_norm": 0.25045809149742126, "learning_rate": 0.00018713147517566463, "loss": 11.6672, "step": 23424 }, { "epoch": 0.4903499958134472, "grad_norm": 0.29600411653518677, "learning_rate": 0.00018713039922497058, "loss": 11.6748, "step": 23425 }, { "epoch": 0.4903709285774094, "grad_norm": 0.392471045255661, "learning_rate": 0.00018712932323239123, "loss": 11.678, "step": 23426 }, { "epoch": 0.4903918613413715, "grad_norm": 0.29117757081985474, "learning_rate": 0.00018712824719792703, "loss": 11.6715, "step": 23427 }, { "epoch": 0.49041279410533367, "grad_norm": 0.6088626980781555, "learning_rate": 0.00018712717112157856, "loss": 11.6827, "step": 23428 }, { "epoch": 0.4904337268692958, "grad_norm": 0.29023414850234985, "learning_rate": 0.0001871260950033463, "loss": 11.6722, "step": 23429 }, { "epoch": 0.49045465963325796, "grad_norm": 0.2670396566390991, "learning_rate": 0.0001871250188432308, "loss": 11.6648, "step": 23430 }, { "epoch": 0.4904755923972201, "grad_norm": 0.3232380151748657, "learning_rate": 0.00018712394264123252, "loss": 11.6715, "step": 23431 }, { "epoch": 0.4904965251611823, "grad_norm": 0.3869735300540924, "learning_rate": 0.00018712286639735207, "loss": 11.6784, "step": 23432 }, { "epoch": 0.49051745792514445, "grad_norm": 0.3276405334472656, "learning_rate": 0.00018712179011158987, "loss": 11.6805, "step": 23433 }, { "epoch": 0.4905383906891066, "grad_norm": 0.2394779920578003, "learning_rate": 0.00018712071378394648, "loss": 11.6743, "step": 23434 }, { "epoch": 0.49055932345306874, "grad_norm": 0.3352411091327667, "learning_rate": 0.00018711963741442245, "loss": 11.6786, "step": 23435 }, { "epoch": 0.4905802562170309, "grad_norm": 0.3155217170715332, "learning_rate": 0.00018711856100301822, "loss": 11.6656, "step": 23436 }, { "epoch": 0.49060118898099303, "grad_norm": 0.3286657929420471, "learning_rate": 0.00018711748454973437, "loss": 11.6658, "step": 23437 }, { "epoch": 0.49062212174495523, "grad_norm": 0.2571055293083191, "learning_rate": 0.0001871164080545714, "loss": 11.6609, "step": 23438 }, { "epoch": 0.4906430545089174, "grad_norm": 0.3777375817298889, "learning_rate": 0.00018711533151752986, "loss": 11.6788, "step": 23439 }, { "epoch": 0.4906639872728795, "grad_norm": 0.252396821975708, "learning_rate": 0.00018711425493861018, "loss": 11.6572, "step": 23440 }, { "epoch": 0.49068492003684167, "grad_norm": 0.23991450667381287, "learning_rate": 0.00018711317831781297, "loss": 11.6525, "step": 23441 }, { "epoch": 0.4907058528008038, "grad_norm": 0.2737891376018524, "learning_rate": 0.00018711210165513867, "loss": 11.6786, "step": 23442 }, { "epoch": 0.49072678556476595, "grad_norm": 0.37554997205734253, "learning_rate": 0.00018711102495058788, "loss": 11.6788, "step": 23443 }, { "epoch": 0.4907477183287281, "grad_norm": 0.30712637305259705, "learning_rate": 0.00018710994820416105, "loss": 11.6867, "step": 23444 }, { "epoch": 0.4907686510926903, "grad_norm": 0.25516045093536377, "learning_rate": 0.00018710887141585873, "loss": 11.6745, "step": 23445 }, { "epoch": 0.49078958385665244, "grad_norm": 0.32941675186157227, "learning_rate": 0.00018710779458568143, "loss": 11.6783, "step": 23446 }, { "epoch": 0.4908105166206146, "grad_norm": 0.31749579310417175, "learning_rate": 0.00018710671771362966, "loss": 11.6857, "step": 23447 }, { "epoch": 0.49083144938457673, "grad_norm": 0.3044080138206482, "learning_rate": 0.00018710564079970395, "loss": 11.66, "step": 23448 }, { "epoch": 0.4908523821485389, "grad_norm": 0.264752060174942, "learning_rate": 0.0001871045638439048, "loss": 11.6709, "step": 23449 }, { "epoch": 0.490873314912501, "grad_norm": 0.3114056885242462, "learning_rate": 0.00018710348684623278, "loss": 11.6795, "step": 23450 }, { "epoch": 0.4908942476764632, "grad_norm": 0.30949971079826355, "learning_rate": 0.00018710240980668834, "loss": 11.6688, "step": 23451 }, { "epoch": 0.49091518044042537, "grad_norm": 0.25502389669418335, "learning_rate": 0.000187101332725272, "loss": 11.6627, "step": 23452 }, { "epoch": 0.4909361132043875, "grad_norm": 0.24525579810142517, "learning_rate": 0.00018710025560198437, "loss": 11.6657, "step": 23453 }, { "epoch": 0.49095704596834966, "grad_norm": 0.28011858463287354, "learning_rate": 0.00018709917843682584, "loss": 11.6639, "step": 23454 }, { "epoch": 0.4909779787323118, "grad_norm": 0.27074506878852844, "learning_rate": 0.00018709810122979702, "loss": 11.6736, "step": 23455 }, { "epoch": 0.49099891149627395, "grad_norm": 0.31782886385917664, "learning_rate": 0.0001870970239808984, "loss": 11.6824, "step": 23456 }, { "epoch": 0.49101984426023615, "grad_norm": 0.37593874335289, "learning_rate": 0.00018709594669013047, "loss": 11.6752, "step": 23457 }, { "epoch": 0.4910407770241983, "grad_norm": 0.33763471245765686, "learning_rate": 0.0001870948693574938, "loss": 11.6704, "step": 23458 }, { "epoch": 0.49106170978816044, "grad_norm": 0.29157865047454834, "learning_rate": 0.0001870937919829889, "loss": 11.6555, "step": 23459 }, { "epoch": 0.4910826425521226, "grad_norm": 0.23571404814720154, "learning_rate": 0.00018709271456661622, "loss": 11.6605, "step": 23460 }, { "epoch": 0.49110357531608473, "grad_norm": 0.302571564912796, "learning_rate": 0.00018709163710837635, "loss": 11.6873, "step": 23461 }, { "epoch": 0.4911245080800469, "grad_norm": 0.32548004388809204, "learning_rate": 0.00018709055960826977, "loss": 11.6868, "step": 23462 }, { "epoch": 0.491145440844009, "grad_norm": 0.2591686546802521, "learning_rate": 0.00018708948206629705, "loss": 11.6798, "step": 23463 }, { "epoch": 0.4911663736079712, "grad_norm": 0.31342262029647827, "learning_rate": 0.00018708840448245862, "loss": 11.6767, "step": 23464 }, { "epoch": 0.49118730637193336, "grad_norm": 0.3299318552017212, "learning_rate": 0.00018708732685675507, "loss": 11.6821, "step": 23465 }, { "epoch": 0.4912082391358955, "grad_norm": 0.2496441751718521, "learning_rate": 0.00018708624918918688, "loss": 11.6915, "step": 23466 }, { "epoch": 0.49122917189985765, "grad_norm": 0.2611815333366394, "learning_rate": 0.0001870851714797546, "loss": 11.6604, "step": 23467 }, { "epoch": 0.4912501046638198, "grad_norm": 0.2912070155143738, "learning_rate": 0.00018708409372845876, "loss": 11.6729, "step": 23468 }, { "epoch": 0.49127103742778194, "grad_norm": 0.26933911442756653, "learning_rate": 0.00018708301593529982, "loss": 11.6663, "step": 23469 }, { "epoch": 0.49129197019174414, "grad_norm": 0.3183831572532654, "learning_rate": 0.00018708193810027836, "loss": 11.6693, "step": 23470 }, { "epoch": 0.4913129029557063, "grad_norm": 0.2470243275165558, "learning_rate": 0.00018708086022339483, "loss": 11.677, "step": 23471 }, { "epoch": 0.49133383571966843, "grad_norm": 0.26600316166877747, "learning_rate": 0.00018707978230464981, "loss": 11.681, "step": 23472 }, { "epoch": 0.4913547684836306, "grad_norm": 0.2888883948326111, "learning_rate": 0.00018707870434404378, "loss": 11.6809, "step": 23473 }, { "epoch": 0.4913757012475927, "grad_norm": 0.30380213260650635, "learning_rate": 0.00018707762634157725, "loss": 11.6834, "step": 23474 }, { "epoch": 0.49139663401155487, "grad_norm": 0.2456180602312088, "learning_rate": 0.0001870765482972508, "loss": 11.6871, "step": 23475 }, { "epoch": 0.491417566775517, "grad_norm": 0.29239794611930847, "learning_rate": 0.00018707547021106492, "loss": 11.6665, "step": 23476 }, { "epoch": 0.4914384995394792, "grad_norm": 0.34485912322998047, "learning_rate": 0.00018707439208302007, "loss": 11.679, "step": 23477 }, { "epoch": 0.49145943230344136, "grad_norm": 0.28224921226501465, "learning_rate": 0.00018707331391311683, "loss": 11.6532, "step": 23478 }, { "epoch": 0.4914803650674035, "grad_norm": 0.32976236939430237, "learning_rate": 0.00018707223570135572, "loss": 11.6816, "step": 23479 }, { "epoch": 0.49150129783136565, "grad_norm": 0.35429567098617554, "learning_rate": 0.00018707115744773722, "loss": 11.6635, "step": 23480 }, { "epoch": 0.4915222305953278, "grad_norm": 0.3170906603336334, "learning_rate": 0.0001870700791522619, "loss": 11.6708, "step": 23481 }, { "epoch": 0.49154316335928994, "grad_norm": 0.25530144572257996, "learning_rate": 0.0001870690008149302, "loss": 11.667, "step": 23482 }, { "epoch": 0.49156409612325214, "grad_norm": 0.29283809661865234, "learning_rate": 0.00018706792243574274, "loss": 11.6875, "step": 23483 }, { "epoch": 0.4915850288872143, "grad_norm": 0.26199010014533997, "learning_rate": 0.00018706684401469993, "loss": 11.6847, "step": 23484 }, { "epoch": 0.4916059616511764, "grad_norm": 0.2740969955921173, "learning_rate": 0.00018706576555180238, "loss": 11.6621, "step": 23485 }, { "epoch": 0.49162689441513857, "grad_norm": 0.2735912501811981, "learning_rate": 0.00018706468704705057, "loss": 11.6883, "step": 23486 }, { "epoch": 0.4916478271791007, "grad_norm": 0.22733110189437866, "learning_rate": 0.000187063608500445, "loss": 11.6712, "step": 23487 }, { "epoch": 0.49166875994306286, "grad_norm": 0.26974448561668396, "learning_rate": 0.00018706252991198622, "loss": 11.6759, "step": 23488 }, { "epoch": 0.49168969270702506, "grad_norm": 0.25623348355293274, "learning_rate": 0.00018706145128167477, "loss": 11.6435, "step": 23489 }, { "epoch": 0.4917106254709872, "grad_norm": 0.2659267783164978, "learning_rate": 0.00018706037260951108, "loss": 11.6721, "step": 23490 }, { "epoch": 0.49173155823494935, "grad_norm": 0.2613118886947632, "learning_rate": 0.00018705929389549575, "loss": 11.6734, "step": 23491 }, { "epoch": 0.4917524909989115, "grad_norm": 0.27229341864585876, "learning_rate": 0.00018705821513962927, "loss": 11.6636, "step": 23492 }, { "epoch": 0.49177342376287364, "grad_norm": 0.2855057418346405, "learning_rate": 0.00018705713634191215, "loss": 11.655, "step": 23493 }, { "epoch": 0.4917943565268358, "grad_norm": 0.2976084351539612, "learning_rate": 0.00018705605750234496, "loss": 11.6697, "step": 23494 }, { "epoch": 0.49181528929079793, "grad_norm": 0.29538220167160034, "learning_rate": 0.00018705497862092814, "loss": 11.6819, "step": 23495 }, { "epoch": 0.49183622205476013, "grad_norm": 0.4134826064109802, "learning_rate": 0.00018705389969766225, "loss": 11.6835, "step": 23496 }, { "epoch": 0.4918571548187223, "grad_norm": 0.2532827854156494, "learning_rate": 0.0001870528207325478, "loss": 11.6666, "step": 23497 }, { "epoch": 0.4918780875826844, "grad_norm": 0.2663775682449341, "learning_rate": 0.00018705174172558534, "loss": 11.6679, "step": 23498 }, { "epoch": 0.49189902034664656, "grad_norm": 0.31952014565467834, "learning_rate": 0.00018705066267677534, "loss": 11.6811, "step": 23499 }, { "epoch": 0.4919199531106087, "grad_norm": 0.31301429867744446, "learning_rate": 0.00018704958358611837, "loss": 11.6859, "step": 23500 }, { "epoch": 0.49194088587457085, "grad_norm": 0.30149826407432556, "learning_rate": 0.0001870485044536149, "loss": 11.6607, "step": 23501 }, { "epoch": 0.49196181863853305, "grad_norm": 0.26614221930503845, "learning_rate": 0.00018704742527926547, "loss": 11.6687, "step": 23502 }, { "epoch": 0.4919827514024952, "grad_norm": 0.28712213039398193, "learning_rate": 0.0001870463460630706, "loss": 11.6696, "step": 23503 }, { "epoch": 0.49200368416645734, "grad_norm": 0.2964511215686798, "learning_rate": 0.00018704526680503084, "loss": 11.6729, "step": 23504 }, { "epoch": 0.4920246169304195, "grad_norm": 0.27602678537368774, "learning_rate": 0.00018704418750514662, "loss": 11.6851, "step": 23505 }, { "epoch": 0.49204554969438163, "grad_norm": 0.2805696427822113, "learning_rate": 0.00018704310816341853, "loss": 11.654, "step": 23506 }, { "epoch": 0.4920664824583438, "grad_norm": 0.2975347340106964, "learning_rate": 0.0001870420287798471, "loss": 11.6718, "step": 23507 }, { "epoch": 0.492087415222306, "grad_norm": 0.2594528794288635, "learning_rate": 0.00018704094935443282, "loss": 11.6756, "step": 23508 }, { "epoch": 0.4921083479862681, "grad_norm": 0.38819193840026855, "learning_rate": 0.00018703986988717616, "loss": 11.6684, "step": 23509 }, { "epoch": 0.49212928075023027, "grad_norm": 0.2836638391017914, "learning_rate": 0.00018703879037807773, "loss": 11.6702, "step": 23510 }, { "epoch": 0.4921502135141924, "grad_norm": 0.36068007349967957, "learning_rate": 0.00018703771082713804, "loss": 11.6523, "step": 23511 }, { "epoch": 0.49217114627815456, "grad_norm": 0.29329797625541687, "learning_rate": 0.00018703663123435753, "loss": 11.682, "step": 23512 }, { "epoch": 0.4921920790421167, "grad_norm": 0.28364357352256775, "learning_rate": 0.00018703555159973678, "loss": 11.6632, "step": 23513 }, { "epoch": 0.49221301180607885, "grad_norm": 0.31167343258857727, "learning_rate": 0.0001870344719232763, "loss": 11.6582, "step": 23514 }, { "epoch": 0.49223394457004105, "grad_norm": 0.36310869455337524, "learning_rate": 0.0001870333922049766, "loss": 11.6657, "step": 23515 }, { "epoch": 0.4922548773340032, "grad_norm": 0.37989550828933716, "learning_rate": 0.00018703231244483818, "loss": 11.6749, "step": 23516 }, { "epoch": 0.49227581009796534, "grad_norm": 0.3127429187297821, "learning_rate": 0.00018703123264286165, "loss": 11.6688, "step": 23517 }, { "epoch": 0.4922967428619275, "grad_norm": 0.2949877977371216, "learning_rate": 0.0001870301527990474, "loss": 11.6685, "step": 23518 }, { "epoch": 0.49231767562588963, "grad_norm": 0.3194049000740051, "learning_rate": 0.00018702907291339604, "loss": 11.6779, "step": 23519 }, { "epoch": 0.4923386083898518, "grad_norm": 0.23096118867397308, "learning_rate": 0.00018702799298590805, "loss": 11.6725, "step": 23520 }, { "epoch": 0.492359541153814, "grad_norm": 0.2700280547142029, "learning_rate": 0.00018702691301658396, "loss": 11.665, "step": 23521 }, { "epoch": 0.4923804739177761, "grad_norm": 0.3017231822013855, "learning_rate": 0.0001870258330054243, "loss": 11.6808, "step": 23522 }, { "epoch": 0.49240140668173826, "grad_norm": 0.31323134899139404, "learning_rate": 0.00018702475295242956, "loss": 11.6875, "step": 23523 }, { "epoch": 0.4924223394457004, "grad_norm": 0.29125499725341797, "learning_rate": 0.00018702367285760029, "loss": 11.6795, "step": 23524 }, { "epoch": 0.49244327220966255, "grad_norm": 0.39158540964126587, "learning_rate": 0.00018702259272093701, "loss": 11.6672, "step": 23525 }, { "epoch": 0.4924642049736247, "grad_norm": 0.3505640923976898, "learning_rate": 0.0001870215125424402, "loss": 11.6933, "step": 23526 }, { "epoch": 0.4924851377375869, "grad_norm": 0.432477205991745, "learning_rate": 0.00018702043232211044, "loss": 11.6443, "step": 23527 }, { "epoch": 0.49250607050154904, "grad_norm": 0.4891683757305145, "learning_rate": 0.0001870193520599482, "loss": 11.6879, "step": 23528 }, { "epoch": 0.4925270032655112, "grad_norm": 0.37512385845184326, "learning_rate": 0.00018701827175595398, "loss": 11.6936, "step": 23529 }, { "epoch": 0.49254793602947333, "grad_norm": 0.2787603437900543, "learning_rate": 0.00018701719141012838, "loss": 11.6666, "step": 23530 }, { "epoch": 0.4925688687934355, "grad_norm": 0.3146773874759674, "learning_rate": 0.00018701611102247184, "loss": 11.6728, "step": 23531 }, { "epoch": 0.4925898015573976, "grad_norm": 0.3591965436935425, "learning_rate": 0.00018701503059298492, "loss": 11.6765, "step": 23532 }, { "epoch": 0.49261073432135977, "grad_norm": 0.29093796014785767, "learning_rate": 0.00018701395012166812, "loss": 11.6577, "step": 23533 }, { "epoch": 0.49263166708532197, "grad_norm": 0.28677043318748474, "learning_rate": 0.00018701286960852196, "loss": 11.6724, "step": 23534 }, { "epoch": 0.4926525998492841, "grad_norm": 0.312305212020874, "learning_rate": 0.000187011789053547, "loss": 11.6689, "step": 23535 }, { "epoch": 0.49267353261324626, "grad_norm": 0.2687377333641052, "learning_rate": 0.00018701070845674372, "loss": 11.6754, "step": 23536 }, { "epoch": 0.4926944653772084, "grad_norm": 0.2767626941204071, "learning_rate": 0.00018700962781811265, "loss": 11.6633, "step": 23537 }, { "epoch": 0.49271539814117055, "grad_norm": 0.2877099812030792, "learning_rate": 0.0001870085471376543, "loss": 11.6679, "step": 23538 }, { "epoch": 0.4927363309051327, "grad_norm": 0.2525883913040161, "learning_rate": 0.0001870074664153692, "loss": 11.6808, "step": 23539 }, { "epoch": 0.4927572636690949, "grad_norm": 0.23576720058918, "learning_rate": 0.00018700638565125787, "loss": 11.6954, "step": 23540 }, { "epoch": 0.49277819643305704, "grad_norm": 0.3306567072868347, "learning_rate": 0.00018700530484532085, "loss": 11.6534, "step": 23541 }, { "epoch": 0.4927991291970192, "grad_norm": 0.32638272643089294, "learning_rate": 0.00018700422399755862, "loss": 11.6601, "step": 23542 }, { "epoch": 0.4928200619609813, "grad_norm": 0.2833121418952942, "learning_rate": 0.00018700314310797172, "loss": 11.6773, "step": 23543 }, { "epoch": 0.49284099472494347, "grad_norm": 0.25063785910606384, "learning_rate": 0.00018700206217656064, "loss": 11.6853, "step": 23544 }, { "epoch": 0.4928619274889056, "grad_norm": 0.325728178024292, "learning_rate": 0.00018700098120332594, "loss": 11.6734, "step": 23545 }, { "epoch": 0.4928828602528678, "grad_norm": 0.2856954336166382, "learning_rate": 0.00018699990018826811, "loss": 11.6788, "step": 23546 }, { "epoch": 0.49290379301682996, "grad_norm": 0.3189569115638733, "learning_rate": 0.00018699881913138773, "loss": 11.68, "step": 23547 }, { "epoch": 0.4929247257807921, "grad_norm": 0.2410125434398651, "learning_rate": 0.00018699773803268523, "loss": 11.6644, "step": 23548 }, { "epoch": 0.49294565854475425, "grad_norm": 0.25726521015167236, "learning_rate": 0.0001869966568921612, "loss": 11.6736, "step": 23549 }, { "epoch": 0.4929665913087164, "grad_norm": 0.2514820992946625, "learning_rate": 0.0001869955757098161, "loss": 11.6715, "step": 23550 }, { "epoch": 0.49298752407267854, "grad_norm": 0.29846909642219543, "learning_rate": 0.00018699449448565054, "loss": 11.6804, "step": 23551 }, { "epoch": 0.4930084568366407, "grad_norm": 0.3368166387081146, "learning_rate": 0.00018699341321966494, "loss": 11.6728, "step": 23552 }, { "epoch": 0.4930293896006029, "grad_norm": 0.2920069694519043, "learning_rate": 0.00018699233191185987, "loss": 11.6716, "step": 23553 }, { "epoch": 0.49305032236456503, "grad_norm": 0.2743227183818817, "learning_rate": 0.00018699125056223585, "loss": 11.6712, "step": 23554 }, { "epoch": 0.4930712551285272, "grad_norm": 0.3675948679447174, "learning_rate": 0.00018699016917079336, "loss": 11.6845, "step": 23555 }, { "epoch": 0.4930921878924893, "grad_norm": 0.21544773876667023, "learning_rate": 0.000186989087737533, "loss": 11.6768, "step": 23556 }, { "epoch": 0.49311312065645146, "grad_norm": 0.2839919924736023, "learning_rate": 0.00018698800626245523, "loss": 11.6727, "step": 23557 }, { "epoch": 0.4931340534204136, "grad_norm": 0.3463877737522125, "learning_rate": 0.00018698692474556058, "loss": 11.6768, "step": 23558 }, { "epoch": 0.4931549861843758, "grad_norm": 0.32732275128364563, "learning_rate": 0.00018698584318684955, "loss": 11.6745, "step": 23559 }, { "epoch": 0.49317591894833795, "grad_norm": 0.23636038601398468, "learning_rate": 0.00018698476158632272, "loss": 11.6549, "step": 23560 }, { "epoch": 0.4931968517123001, "grad_norm": 0.24868004024028778, "learning_rate": 0.00018698367994398054, "loss": 11.69, "step": 23561 }, { "epoch": 0.49321778447626224, "grad_norm": 0.28779205679893494, "learning_rate": 0.00018698259825982358, "loss": 11.6668, "step": 23562 }, { "epoch": 0.4932387172402244, "grad_norm": 0.25728633999824524, "learning_rate": 0.00018698151653385233, "loss": 11.6723, "step": 23563 }, { "epoch": 0.49325965000418653, "grad_norm": 0.2827022671699524, "learning_rate": 0.00018698043476606733, "loss": 11.6815, "step": 23564 }, { "epoch": 0.4932805827681487, "grad_norm": 0.2327425181865692, "learning_rate": 0.00018697935295646908, "loss": 11.6768, "step": 23565 }, { "epoch": 0.4933015155321109, "grad_norm": 0.2783275246620178, "learning_rate": 0.00018697827110505813, "loss": 11.6723, "step": 23566 }, { "epoch": 0.493322448296073, "grad_norm": 0.27643999457359314, "learning_rate": 0.00018697718921183496, "loss": 11.6712, "step": 23567 }, { "epoch": 0.49334338106003517, "grad_norm": 0.34304484724998474, "learning_rate": 0.00018697610727680012, "loss": 11.6827, "step": 23568 }, { "epoch": 0.4933643138239973, "grad_norm": 0.29131561517715454, "learning_rate": 0.0001869750252999541, "loss": 11.6733, "step": 23569 }, { "epoch": 0.49338524658795946, "grad_norm": 0.36211735010147095, "learning_rate": 0.0001869739432812975, "loss": 11.6849, "step": 23570 }, { "epoch": 0.4934061793519216, "grad_norm": 0.3218458592891693, "learning_rate": 0.00018697286122083075, "loss": 11.6697, "step": 23571 }, { "epoch": 0.4934271121158838, "grad_norm": 0.25044506788253784, "learning_rate": 0.0001869717791185544, "loss": 11.6628, "step": 23572 }, { "epoch": 0.49344804487984595, "grad_norm": 0.30381807684898376, "learning_rate": 0.00018697069697446898, "loss": 11.6841, "step": 23573 }, { "epoch": 0.4934689776438081, "grad_norm": 0.2709368169307709, "learning_rate": 0.000186969614788575, "loss": 11.6761, "step": 23574 }, { "epoch": 0.49348991040777024, "grad_norm": 0.25518369674682617, "learning_rate": 0.00018696853256087298, "loss": 11.6599, "step": 23575 }, { "epoch": 0.4935108431717324, "grad_norm": 0.24973097443580627, "learning_rate": 0.00018696745029136346, "loss": 11.6598, "step": 23576 }, { "epoch": 0.4935317759356945, "grad_norm": 0.3025936186313629, "learning_rate": 0.00018696636798004693, "loss": 11.6747, "step": 23577 }, { "epoch": 0.49355270869965673, "grad_norm": 0.3147640526294708, "learning_rate": 0.0001869652856269239, "loss": 11.6783, "step": 23578 }, { "epoch": 0.4935736414636189, "grad_norm": 0.2613352835178375, "learning_rate": 0.00018696420323199495, "loss": 11.6882, "step": 23579 }, { "epoch": 0.493594574227581, "grad_norm": 0.28103020787239075, "learning_rate": 0.00018696312079526055, "loss": 11.6794, "step": 23580 }, { "epoch": 0.49361550699154316, "grad_norm": 0.35617780685424805, "learning_rate": 0.00018696203831672126, "loss": 11.6738, "step": 23581 }, { "epoch": 0.4936364397555053, "grad_norm": 0.2559763789176941, "learning_rate": 0.00018696095579637752, "loss": 11.6745, "step": 23582 }, { "epoch": 0.49365737251946745, "grad_norm": 0.34439006447792053, "learning_rate": 0.00018695987323422996, "loss": 11.6862, "step": 23583 }, { "epoch": 0.4936783052834296, "grad_norm": 0.2831331491470337, "learning_rate": 0.00018695879063027903, "loss": 11.6524, "step": 23584 }, { "epoch": 0.4936992380473918, "grad_norm": 0.3034929633140564, "learning_rate": 0.00018695770798452525, "loss": 11.6646, "step": 23585 }, { "epoch": 0.49372017081135394, "grad_norm": 0.2695329189300537, "learning_rate": 0.00018695662529696918, "loss": 11.6664, "step": 23586 }, { "epoch": 0.4937411035753161, "grad_norm": 0.2526942491531372, "learning_rate": 0.0001869555425676113, "loss": 11.6638, "step": 23587 }, { "epoch": 0.49376203633927823, "grad_norm": 0.2830437421798706, "learning_rate": 0.00018695445979645216, "loss": 11.6624, "step": 23588 }, { "epoch": 0.4937829691032404, "grad_norm": 0.2912691533565521, "learning_rate": 0.00018695337698349226, "loss": 11.6793, "step": 23589 }, { "epoch": 0.4938039018672025, "grad_norm": 0.25351861119270325, "learning_rate": 0.00018695229412873213, "loss": 11.6738, "step": 23590 }, { "epoch": 0.4938248346311647, "grad_norm": 0.2561516761779785, "learning_rate": 0.0001869512112321723, "loss": 11.6612, "step": 23591 }, { "epoch": 0.49384576739512687, "grad_norm": 0.2633558511734009, "learning_rate": 0.00018695012829381325, "loss": 11.6676, "step": 23592 }, { "epoch": 0.493866700159089, "grad_norm": 0.27786046266555786, "learning_rate": 0.00018694904531365558, "loss": 11.6606, "step": 23593 }, { "epoch": 0.49388763292305116, "grad_norm": 0.4303894340991974, "learning_rate": 0.00018694796229169972, "loss": 11.6718, "step": 23594 }, { "epoch": 0.4939085656870133, "grad_norm": 0.29454612731933594, "learning_rate": 0.00018694687922794624, "loss": 11.6754, "step": 23595 }, { "epoch": 0.49392949845097545, "grad_norm": 0.3372698724269867, "learning_rate": 0.00018694579612239568, "loss": 11.6779, "step": 23596 }, { "epoch": 0.49395043121493765, "grad_norm": 0.3888452351093292, "learning_rate": 0.00018694471297504853, "loss": 11.6709, "step": 23597 }, { "epoch": 0.4939713639788998, "grad_norm": 0.3190344572067261, "learning_rate": 0.00018694362978590527, "loss": 11.6607, "step": 23598 }, { "epoch": 0.49399229674286194, "grad_norm": 0.2849462330341339, "learning_rate": 0.00018694254655496651, "loss": 11.6756, "step": 23599 }, { "epoch": 0.4940132295068241, "grad_norm": 0.30375510454177856, "learning_rate": 0.0001869414632822327, "loss": 11.6685, "step": 23600 }, { "epoch": 0.4940341622707862, "grad_norm": 0.23533490300178528, "learning_rate": 0.0001869403799677044, "loss": 11.6673, "step": 23601 }, { "epoch": 0.49405509503474837, "grad_norm": 0.2678551971912384, "learning_rate": 0.0001869392966113821, "loss": 11.683, "step": 23602 }, { "epoch": 0.4940760277987105, "grad_norm": 0.2871485650539398, "learning_rate": 0.00018693821321326634, "loss": 11.6862, "step": 23603 }, { "epoch": 0.4940969605626727, "grad_norm": 0.3011714220046997, "learning_rate": 0.00018693712977335767, "loss": 11.6869, "step": 23604 }, { "epoch": 0.49411789332663486, "grad_norm": 0.32343870401382446, "learning_rate": 0.00018693604629165656, "loss": 11.6845, "step": 23605 }, { "epoch": 0.494138826090597, "grad_norm": 0.2757101356983185, "learning_rate": 0.00018693496276816354, "loss": 11.6693, "step": 23606 }, { "epoch": 0.49415975885455915, "grad_norm": 0.36682823300361633, "learning_rate": 0.00018693387920287914, "loss": 11.6781, "step": 23607 }, { "epoch": 0.4941806916185213, "grad_norm": 0.27378660440444946, "learning_rate": 0.00018693279559580387, "loss": 11.6796, "step": 23608 }, { "epoch": 0.49420162438248344, "grad_norm": 0.3881116509437561, "learning_rate": 0.0001869317119469383, "loss": 11.6821, "step": 23609 }, { "epoch": 0.49422255714644564, "grad_norm": 0.29298609495162964, "learning_rate": 0.0001869306282562829, "loss": 11.6624, "step": 23610 }, { "epoch": 0.4942434899104078, "grad_norm": 0.28959351778030396, "learning_rate": 0.0001869295445238382, "loss": 11.6671, "step": 23611 }, { "epoch": 0.49426442267436993, "grad_norm": 0.3279481530189514, "learning_rate": 0.00018692846074960473, "loss": 11.6796, "step": 23612 }, { "epoch": 0.4942853554383321, "grad_norm": 0.27925947308540344, "learning_rate": 0.000186927376933583, "loss": 11.648, "step": 23613 }, { "epoch": 0.4943062882022942, "grad_norm": 0.26855018734931946, "learning_rate": 0.00018692629307577355, "loss": 11.6672, "step": 23614 }, { "epoch": 0.49432722096625636, "grad_norm": 0.2968158423900604, "learning_rate": 0.0001869252091761769, "loss": 11.6867, "step": 23615 }, { "epoch": 0.49434815373021856, "grad_norm": 0.39468297362327576, "learning_rate": 0.00018692412523479352, "loss": 11.6719, "step": 23616 }, { "epoch": 0.4943690864941807, "grad_norm": 0.2996227741241455, "learning_rate": 0.000186923041251624, "loss": 11.6569, "step": 23617 }, { "epoch": 0.49439001925814285, "grad_norm": 0.3344773054122925, "learning_rate": 0.0001869219572266688, "loss": 11.6819, "step": 23618 }, { "epoch": 0.494410952022105, "grad_norm": 0.2831951975822449, "learning_rate": 0.00018692087315992854, "loss": 11.6759, "step": 23619 }, { "epoch": 0.49443188478606714, "grad_norm": 0.34492000937461853, "learning_rate": 0.0001869197890514036, "loss": 11.666, "step": 23620 }, { "epoch": 0.4944528175500293, "grad_norm": 0.2646227777004242, "learning_rate": 0.0001869187049010946, "loss": 11.669, "step": 23621 }, { "epoch": 0.49447375031399143, "grad_norm": 0.29501017928123474, "learning_rate": 0.00018691762070900206, "loss": 11.6664, "step": 23622 }, { "epoch": 0.49449468307795363, "grad_norm": 0.5413010716438293, "learning_rate": 0.00018691653647512645, "loss": 11.6736, "step": 23623 }, { "epoch": 0.4945156158419158, "grad_norm": 0.28076639771461487, "learning_rate": 0.00018691545219946836, "loss": 11.6763, "step": 23624 }, { "epoch": 0.4945365486058779, "grad_norm": 0.35568767786026, "learning_rate": 0.00018691436788202822, "loss": 11.6704, "step": 23625 }, { "epoch": 0.49455748136984007, "grad_norm": 3.1247494220733643, "learning_rate": 0.00018691328352280664, "loss": 11.6549, "step": 23626 }, { "epoch": 0.4945784141338022, "grad_norm": 0.350689560174942, "learning_rate": 0.00018691219912180408, "loss": 11.6737, "step": 23627 }, { "epoch": 0.49459934689776436, "grad_norm": 0.23139028251171112, "learning_rate": 0.00018691111467902108, "loss": 11.6761, "step": 23628 }, { "epoch": 0.49462027966172656, "grad_norm": 0.31246888637542725, "learning_rate": 0.0001869100301944582, "loss": 11.6712, "step": 23629 }, { "epoch": 0.4946412124256887, "grad_norm": 0.37735846638679504, "learning_rate": 0.0001869089456681159, "loss": 11.6698, "step": 23630 }, { "epoch": 0.49466214518965085, "grad_norm": 0.31204289197921753, "learning_rate": 0.0001869078610999947, "loss": 11.6747, "step": 23631 }, { "epoch": 0.494683077953613, "grad_norm": 0.2811667025089264, "learning_rate": 0.0001869067764900952, "loss": 11.6711, "step": 23632 }, { "epoch": 0.49470401071757514, "grad_norm": 0.39170464873313904, "learning_rate": 0.00018690569183841787, "loss": 11.6712, "step": 23633 }, { "epoch": 0.4947249434815373, "grad_norm": 0.30774208903312683, "learning_rate": 0.0001869046071449632, "loss": 11.6594, "step": 23634 }, { "epoch": 0.4947458762454995, "grad_norm": 0.30159124732017517, "learning_rate": 0.00018690352240973175, "loss": 11.6544, "step": 23635 }, { "epoch": 0.49476680900946163, "grad_norm": 0.2764141857624054, "learning_rate": 0.00018690243763272405, "loss": 11.6892, "step": 23636 }, { "epoch": 0.4947877417734238, "grad_norm": 0.31252336502075195, "learning_rate": 0.00018690135281394058, "loss": 11.6686, "step": 23637 }, { "epoch": 0.4948086745373859, "grad_norm": 0.26301202178001404, "learning_rate": 0.0001869002679533819, "loss": 11.6684, "step": 23638 }, { "epoch": 0.49482960730134806, "grad_norm": 0.29038140177726746, "learning_rate": 0.00018689918305104858, "loss": 11.6835, "step": 23639 }, { "epoch": 0.4948505400653102, "grad_norm": 0.27517858147621155, "learning_rate": 0.00018689809810694099, "loss": 11.6614, "step": 23640 }, { "epoch": 0.49487147282927235, "grad_norm": 0.26742851734161377, "learning_rate": 0.00018689701312105982, "loss": 11.6712, "step": 23641 }, { "epoch": 0.49489240559323455, "grad_norm": 0.260777086019516, "learning_rate": 0.00018689592809340545, "loss": 11.6551, "step": 23642 }, { "epoch": 0.4949133383571967, "grad_norm": 0.3127102851867676, "learning_rate": 0.00018689484302397848, "loss": 11.6696, "step": 23643 }, { "epoch": 0.49493427112115884, "grad_norm": 0.3701152205467224, "learning_rate": 0.00018689375791277945, "loss": 11.6791, "step": 23644 }, { "epoch": 0.494955203885121, "grad_norm": 0.35295724868774414, "learning_rate": 0.00018689267275980883, "loss": 11.6835, "step": 23645 }, { "epoch": 0.49497613664908313, "grad_norm": 0.28032931685447693, "learning_rate": 0.00018689158756506714, "loss": 11.6599, "step": 23646 }, { "epoch": 0.4949970694130453, "grad_norm": 0.2972026467323303, "learning_rate": 0.00018689050232855494, "loss": 11.6854, "step": 23647 }, { "epoch": 0.4950180021770075, "grad_norm": 0.2937118113040924, "learning_rate": 0.00018688941705027277, "loss": 11.6758, "step": 23648 }, { "epoch": 0.4950389349409696, "grad_norm": 0.3426876962184906, "learning_rate": 0.00018688833173022106, "loss": 11.67, "step": 23649 }, { "epoch": 0.49505986770493177, "grad_norm": 0.29720190167427063, "learning_rate": 0.0001868872463684004, "loss": 11.6662, "step": 23650 }, { "epoch": 0.4950808004688939, "grad_norm": 0.2890154719352722, "learning_rate": 0.00018688616096481132, "loss": 11.6835, "step": 23651 }, { "epoch": 0.49510173323285606, "grad_norm": 0.26693472266197205, "learning_rate": 0.00018688507551945432, "loss": 11.6767, "step": 23652 }, { "epoch": 0.4951226659968182, "grad_norm": 0.26763245463371277, "learning_rate": 0.0001868839900323299, "loss": 11.6861, "step": 23653 }, { "epoch": 0.49514359876078035, "grad_norm": 0.28304216265678406, "learning_rate": 0.00018688290450343862, "loss": 11.672, "step": 23654 }, { "epoch": 0.49516453152474255, "grad_norm": 0.3095199763774872, "learning_rate": 0.00018688181893278098, "loss": 11.6793, "step": 23655 }, { "epoch": 0.4951854642887047, "grad_norm": 0.36420804262161255, "learning_rate": 0.0001868807333203575, "loss": 11.6857, "step": 23656 }, { "epoch": 0.49520639705266684, "grad_norm": 0.35479170083999634, "learning_rate": 0.00018687964766616875, "loss": 11.691, "step": 23657 }, { "epoch": 0.495227329816629, "grad_norm": 0.3184519410133362, "learning_rate": 0.00018687856197021518, "loss": 11.6645, "step": 23658 }, { "epoch": 0.4952482625805911, "grad_norm": 0.2693178355693817, "learning_rate": 0.00018687747623249737, "loss": 11.6571, "step": 23659 }, { "epoch": 0.49526919534455327, "grad_norm": 0.3256723880767822, "learning_rate": 0.00018687639045301578, "loss": 11.6721, "step": 23660 }, { "epoch": 0.49529012810851547, "grad_norm": 0.34552639722824097, "learning_rate": 0.00018687530463177096, "loss": 11.6733, "step": 23661 }, { "epoch": 0.4953110608724776, "grad_norm": 0.3537062108516693, "learning_rate": 0.00018687421876876348, "loss": 11.6726, "step": 23662 }, { "epoch": 0.49533199363643976, "grad_norm": 0.28115981817245483, "learning_rate": 0.00018687313286399378, "loss": 11.6703, "step": 23663 }, { "epoch": 0.4953529264004019, "grad_norm": 0.26674678921699524, "learning_rate": 0.00018687204691746246, "loss": 11.678, "step": 23664 }, { "epoch": 0.49537385916436405, "grad_norm": 0.3079618513584137, "learning_rate": 0.00018687096092917003, "loss": 11.6992, "step": 23665 }, { "epoch": 0.4953947919283262, "grad_norm": 0.22235548496246338, "learning_rate": 0.00018686987489911693, "loss": 11.6667, "step": 23666 }, { "epoch": 0.4954157246922884, "grad_norm": 0.3419492840766907, "learning_rate": 0.0001868687888273038, "loss": 11.6808, "step": 23667 }, { "epoch": 0.49543665745625054, "grad_norm": 0.2584693729877472, "learning_rate": 0.00018686770271373105, "loss": 11.6663, "step": 23668 }, { "epoch": 0.4954575902202127, "grad_norm": 0.2541491985321045, "learning_rate": 0.00018686661655839925, "loss": 11.6725, "step": 23669 }, { "epoch": 0.49547852298417483, "grad_norm": 0.35566115379333496, "learning_rate": 0.00018686553036130897, "loss": 11.67, "step": 23670 }, { "epoch": 0.495499455748137, "grad_norm": 0.284092515707016, "learning_rate": 0.00018686444412246064, "loss": 11.672, "step": 23671 }, { "epoch": 0.4955203885120991, "grad_norm": 0.2787257432937622, "learning_rate": 0.00018686335784185487, "loss": 11.6593, "step": 23672 }, { "epoch": 0.49554132127606126, "grad_norm": 0.27821600437164307, "learning_rate": 0.00018686227151949213, "loss": 11.6642, "step": 23673 }, { "epoch": 0.49556225404002346, "grad_norm": 0.30786940455436707, "learning_rate": 0.00018686118515537297, "loss": 11.6707, "step": 23674 }, { "epoch": 0.4955831868039856, "grad_norm": 0.3284154236316681, "learning_rate": 0.00018686009874949787, "loss": 11.6773, "step": 23675 }, { "epoch": 0.49560411956794775, "grad_norm": 0.25356417894363403, "learning_rate": 0.0001868590123018674, "loss": 11.6532, "step": 23676 }, { "epoch": 0.4956250523319099, "grad_norm": 0.25899478793144226, "learning_rate": 0.00018685792581248206, "loss": 11.6764, "step": 23677 }, { "epoch": 0.49564598509587204, "grad_norm": 0.258705735206604, "learning_rate": 0.00018685683928134237, "loss": 11.6809, "step": 23678 }, { "epoch": 0.4956669178598342, "grad_norm": 0.298054575920105, "learning_rate": 0.00018685575270844886, "loss": 11.6663, "step": 23679 }, { "epoch": 0.4956878506237964, "grad_norm": 0.34755855798721313, "learning_rate": 0.00018685466609380205, "loss": 11.6599, "step": 23680 }, { "epoch": 0.49570878338775853, "grad_norm": 0.3501175343990326, "learning_rate": 0.00018685357943740245, "loss": 11.6692, "step": 23681 }, { "epoch": 0.4957297161517207, "grad_norm": 0.28804120421409607, "learning_rate": 0.00018685249273925057, "loss": 11.6588, "step": 23682 }, { "epoch": 0.4957506489156828, "grad_norm": 0.3237011432647705, "learning_rate": 0.00018685140599934702, "loss": 11.677, "step": 23683 }, { "epoch": 0.49577158167964497, "grad_norm": 0.31657910346984863, "learning_rate": 0.0001868503192176922, "loss": 11.6824, "step": 23684 }, { "epoch": 0.4957925144436071, "grad_norm": 0.2563496530056, "learning_rate": 0.00018684923239428673, "loss": 11.6756, "step": 23685 }, { "epoch": 0.4958134472075693, "grad_norm": 0.2786017954349518, "learning_rate": 0.0001868481455291311, "loss": 11.6772, "step": 23686 }, { "epoch": 0.49583437997153146, "grad_norm": 0.3170328736305237, "learning_rate": 0.00018684705862222578, "loss": 11.682, "step": 23687 }, { "epoch": 0.4958553127354936, "grad_norm": 0.24276559054851532, "learning_rate": 0.00018684597167357137, "loss": 11.6624, "step": 23688 }, { "epoch": 0.49587624549945575, "grad_norm": 0.31763994693756104, "learning_rate": 0.00018684488468316836, "loss": 11.6807, "step": 23689 }, { "epoch": 0.4958971782634179, "grad_norm": 0.3228933811187744, "learning_rate": 0.00018684379765101727, "loss": 11.6553, "step": 23690 }, { "epoch": 0.49591811102738004, "grad_norm": 0.30989933013916016, "learning_rate": 0.00018684271057711863, "loss": 11.6678, "step": 23691 }, { "epoch": 0.4959390437913422, "grad_norm": 0.2741200625896454, "learning_rate": 0.00018684162346147297, "loss": 11.6644, "step": 23692 }, { "epoch": 0.4959599765553044, "grad_norm": 0.29224148392677307, "learning_rate": 0.00018684053630408078, "loss": 11.6694, "step": 23693 }, { "epoch": 0.4959809093192665, "grad_norm": 0.28001564741134644, "learning_rate": 0.0001868394491049426, "loss": 11.6721, "step": 23694 }, { "epoch": 0.4960018420832287, "grad_norm": 0.39218172430992126, "learning_rate": 0.00018683836186405897, "loss": 11.6675, "step": 23695 }, { "epoch": 0.4960227748471908, "grad_norm": 0.29404014348983765, "learning_rate": 0.0001868372745814304, "loss": 11.6807, "step": 23696 }, { "epoch": 0.49604370761115296, "grad_norm": 0.28394412994384766, "learning_rate": 0.0001868361872570574, "loss": 11.6547, "step": 23697 }, { "epoch": 0.4960646403751151, "grad_norm": 0.2590252757072449, "learning_rate": 0.0001868350998909405, "loss": 11.6755, "step": 23698 }, { "epoch": 0.4960855731390773, "grad_norm": 0.2782380282878876, "learning_rate": 0.00018683401248308025, "loss": 11.6749, "step": 23699 }, { "epoch": 0.49610650590303945, "grad_norm": 0.31813985109329224, "learning_rate": 0.00018683292503347715, "loss": 11.672, "step": 23700 }, { "epoch": 0.4961274386670016, "grad_norm": 0.4046872854232788, "learning_rate": 0.00018683183754213172, "loss": 11.6635, "step": 23701 }, { "epoch": 0.49614837143096374, "grad_norm": 0.28536731004714966, "learning_rate": 0.00018683075000904445, "loss": 11.6782, "step": 23702 }, { "epoch": 0.4961693041949259, "grad_norm": 0.3763951063156128, "learning_rate": 0.00018682966243421594, "loss": 11.6818, "step": 23703 }, { "epoch": 0.49619023695888803, "grad_norm": 0.2894130349159241, "learning_rate": 0.00018682857481764663, "loss": 11.6504, "step": 23704 }, { "epoch": 0.49621116972285023, "grad_norm": 0.37883421778678894, "learning_rate": 0.00018682748715933713, "loss": 11.6689, "step": 23705 }, { "epoch": 0.4962321024868124, "grad_norm": 0.372477263212204, "learning_rate": 0.0001868263994592879, "loss": 11.6779, "step": 23706 }, { "epoch": 0.4962530352507745, "grad_norm": 0.27337411046028137, "learning_rate": 0.0001868253117174995, "loss": 11.6725, "step": 23707 }, { "epoch": 0.49627396801473667, "grad_norm": 0.3186345100402832, "learning_rate": 0.0001868242239339724, "loss": 11.6663, "step": 23708 }, { "epoch": 0.4962949007786988, "grad_norm": 0.28445032238960266, "learning_rate": 0.00018682313610870717, "loss": 11.6673, "step": 23709 }, { "epoch": 0.49631583354266096, "grad_norm": 0.2750989496707916, "learning_rate": 0.00018682204824170429, "loss": 11.6475, "step": 23710 }, { "epoch": 0.4963367663066231, "grad_norm": 0.30225545167922974, "learning_rate": 0.00018682096033296436, "loss": 11.6641, "step": 23711 }, { "epoch": 0.4963576990705853, "grad_norm": 0.28029879927635193, "learning_rate": 0.00018681987238248782, "loss": 11.6587, "step": 23712 }, { "epoch": 0.49637863183454745, "grad_norm": 0.412106454372406, "learning_rate": 0.00018681878439027525, "loss": 11.6726, "step": 23713 }, { "epoch": 0.4963995645985096, "grad_norm": 0.2779310941696167, "learning_rate": 0.0001868176963563271, "loss": 11.6779, "step": 23714 }, { "epoch": 0.49642049736247174, "grad_norm": 0.2864581048488617, "learning_rate": 0.000186816608280644, "loss": 11.6607, "step": 23715 }, { "epoch": 0.4964414301264339, "grad_norm": 0.32531529664993286, "learning_rate": 0.0001868155201632264, "loss": 11.6742, "step": 23716 }, { "epoch": 0.496462362890396, "grad_norm": 0.2845856547355652, "learning_rate": 0.00018681443200407484, "loss": 11.6863, "step": 23717 }, { "epoch": 0.4964832956543582, "grad_norm": 0.2965494394302368, "learning_rate": 0.00018681334380318983, "loss": 11.6761, "step": 23718 }, { "epoch": 0.49650422841832037, "grad_norm": 0.27052050828933716, "learning_rate": 0.00018681225556057193, "loss": 11.6743, "step": 23719 }, { "epoch": 0.4965251611822825, "grad_norm": 0.28110602498054504, "learning_rate": 0.0001868111672762216, "loss": 11.6773, "step": 23720 }, { "epoch": 0.49654609394624466, "grad_norm": 0.36318737268447876, "learning_rate": 0.00018681007895013946, "loss": 11.6813, "step": 23721 }, { "epoch": 0.4965670267102068, "grad_norm": 0.34530285000801086, "learning_rate": 0.00018680899058232591, "loss": 11.676, "step": 23722 }, { "epoch": 0.49658795947416895, "grad_norm": 0.31707704067230225, "learning_rate": 0.0001868079021727816, "loss": 11.6802, "step": 23723 }, { "epoch": 0.49660889223813115, "grad_norm": 0.3172800540924072, "learning_rate": 0.00018680681372150698, "loss": 11.6737, "step": 23724 }, { "epoch": 0.4966298250020933, "grad_norm": 0.2779821455478668, "learning_rate": 0.00018680572522850256, "loss": 11.6649, "step": 23725 }, { "epoch": 0.49665075776605544, "grad_norm": 0.2760618031024933, "learning_rate": 0.00018680463669376888, "loss": 11.6702, "step": 23726 }, { "epoch": 0.4966716905300176, "grad_norm": 0.3081235885620117, "learning_rate": 0.0001868035481173065, "loss": 11.6653, "step": 23727 }, { "epoch": 0.49669262329397973, "grad_norm": 0.30860450863838196, "learning_rate": 0.00018680245949911589, "loss": 11.6679, "step": 23728 }, { "epoch": 0.4967135560579419, "grad_norm": 0.32278385758399963, "learning_rate": 0.00018680137083919762, "loss": 11.6817, "step": 23729 }, { "epoch": 0.496734488821904, "grad_norm": 0.25511300563812256, "learning_rate": 0.00018680028213755216, "loss": 11.6746, "step": 23730 }, { "epoch": 0.4967554215858662, "grad_norm": 0.363066703081131, "learning_rate": 0.00018679919339418012, "loss": 11.6793, "step": 23731 }, { "epoch": 0.49677635434982836, "grad_norm": 0.2952563166618347, "learning_rate": 0.0001867981046090819, "loss": 11.6589, "step": 23732 }, { "epoch": 0.4967972871137905, "grad_norm": 0.2840036451816559, "learning_rate": 0.00018679701578225814, "loss": 11.6834, "step": 23733 }, { "epoch": 0.49681821987775265, "grad_norm": 0.40534549951553345, "learning_rate": 0.0001867959269137093, "loss": 11.6515, "step": 23734 }, { "epoch": 0.4968391526417148, "grad_norm": 0.2371659278869629, "learning_rate": 0.00018679483800343593, "loss": 11.6644, "step": 23735 }, { "epoch": 0.49686008540567694, "grad_norm": 0.2819996178150177, "learning_rate": 0.00018679374905143854, "loss": 11.6721, "step": 23736 }, { "epoch": 0.49688101816963914, "grad_norm": 0.2678135633468628, "learning_rate": 0.00018679266005771765, "loss": 11.6585, "step": 23737 }, { "epoch": 0.4969019509336013, "grad_norm": 0.30693668127059937, "learning_rate": 0.00018679157102227377, "loss": 11.6668, "step": 23738 }, { "epoch": 0.49692288369756343, "grad_norm": 0.3227177858352661, "learning_rate": 0.00018679048194510749, "loss": 11.668, "step": 23739 }, { "epoch": 0.4969438164615256, "grad_norm": 0.2833749055862427, "learning_rate": 0.00018678939282621923, "loss": 11.6869, "step": 23740 }, { "epoch": 0.4969647492254877, "grad_norm": 0.2974195182323456, "learning_rate": 0.0001867883036656096, "loss": 11.664, "step": 23741 }, { "epoch": 0.49698568198944987, "grad_norm": 0.3068598508834839, "learning_rate": 0.00018678721446327908, "loss": 11.682, "step": 23742 }, { "epoch": 0.49700661475341207, "grad_norm": 0.37315258383750916, "learning_rate": 0.00018678612521922823, "loss": 11.6725, "step": 23743 }, { "epoch": 0.4970275475173742, "grad_norm": 0.29603540897369385, "learning_rate": 0.00018678503593345754, "loss": 11.6697, "step": 23744 }, { "epoch": 0.49704848028133636, "grad_norm": 0.2724837362766266, "learning_rate": 0.00018678394660596754, "loss": 11.6521, "step": 23745 }, { "epoch": 0.4970694130452985, "grad_norm": 0.37157824635505676, "learning_rate": 0.00018678285723675874, "loss": 11.68, "step": 23746 }, { "epoch": 0.49709034580926065, "grad_norm": 0.24590128660202026, "learning_rate": 0.0001867817678258317, "loss": 11.6663, "step": 23747 }, { "epoch": 0.4971112785732228, "grad_norm": 0.3515614867210388, "learning_rate": 0.00018678067837318694, "loss": 11.6764, "step": 23748 }, { "epoch": 0.49713221133718494, "grad_norm": 0.2607114017009735, "learning_rate": 0.00018677958887882494, "loss": 11.6447, "step": 23749 }, { "epoch": 0.49715314410114714, "grad_norm": 0.34851258993148804, "learning_rate": 0.00018677849934274626, "loss": 11.6878, "step": 23750 }, { "epoch": 0.4971740768651093, "grad_norm": 0.3277146816253662, "learning_rate": 0.00018677740976495144, "loss": 11.6598, "step": 23751 }, { "epoch": 0.4971950096290714, "grad_norm": 0.2973450720310211, "learning_rate": 0.00018677632014544096, "loss": 11.672, "step": 23752 }, { "epoch": 0.49721594239303357, "grad_norm": 0.2876127362251282, "learning_rate": 0.00018677523048421537, "loss": 11.6728, "step": 23753 }, { "epoch": 0.4972368751569957, "grad_norm": 0.3231827914714813, "learning_rate": 0.00018677414078127518, "loss": 11.6725, "step": 23754 }, { "epoch": 0.49725780792095786, "grad_norm": 0.35362452268600464, "learning_rate": 0.00018677305103662092, "loss": 11.66, "step": 23755 }, { "epoch": 0.49727874068492006, "grad_norm": 0.33585771918296814, "learning_rate": 0.0001867719612502531, "loss": 11.6707, "step": 23756 }, { "epoch": 0.4972996734488822, "grad_norm": 0.412925124168396, "learning_rate": 0.0001867708714221723, "loss": 11.6866, "step": 23757 }, { "epoch": 0.49732060621284435, "grad_norm": 0.2345437854528427, "learning_rate": 0.00018676978155237898, "loss": 11.6553, "step": 23758 }, { "epoch": 0.4973415389768065, "grad_norm": 0.32113510370254517, "learning_rate": 0.0001867686916408737, "loss": 11.6651, "step": 23759 }, { "epoch": 0.49736247174076864, "grad_norm": 0.3505760133266449, "learning_rate": 0.00018676760168765696, "loss": 11.6547, "step": 23760 }, { "epoch": 0.4973834045047308, "grad_norm": 0.22733497619628906, "learning_rate": 0.0001867665116927293, "loss": 11.6769, "step": 23761 }, { "epoch": 0.49740433726869293, "grad_norm": 0.26105809211730957, "learning_rate": 0.00018676542165609123, "loss": 11.6749, "step": 23762 }, { "epoch": 0.49742527003265513, "grad_norm": 0.24437180161476135, "learning_rate": 0.00018676433157774328, "loss": 11.6723, "step": 23763 }, { "epoch": 0.4974462027966173, "grad_norm": 0.26396119594573975, "learning_rate": 0.000186763241457686, "loss": 11.6605, "step": 23764 }, { "epoch": 0.4974671355605794, "grad_norm": 0.2945651113986969, "learning_rate": 0.00018676215129591984, "loss": 11.67, "step": 23765 }, { "epoch": 0.49748806832454157, "grad_norm": 0.37443235516548157, "learning_rate": 0.00018676106109244544, "loss": 11.6852, "step": 23766 }, { "epoch": 0.4975090010885037, "grad_norm": 0.2899082899093628, "learning_rate": 0.00018675997084726325, "loss": 11.6599, "step": 23767 }, { "epoch": 0.49752993385246586, "grad_norm": 0.23615893721580505, "learning_rate": 0.00018675888056037375, "loss": 11.6568, "step": 23768 }, { "epoch": 0.49755086661642806, "grad_norm": 0.30172863602638245, "learning_rate": 0.00018675779023177757, "loss": 11.6632, "step": 23769 }, { "epoch": 0.4975717993803902, "grad_norm": 0.29242143034935, "learning_rate": 0.00018675669986147514, "loss": 11.6617, "step": 23770 }, { "epoch": 0.49759273214435235, "grad_norm": 0.28723645210266113, "learning_rate": 0.00018675560944946709, "loss": 11.6725, "step": 23771 }, { "epoch": 0.4976136649083145, "grad_norm": 0.2575417459011078, "learning_rate": 0.0001867545189957538, "loss": 11.6644, "step": 23772 }, { "epoch": 0.49763459767227664, "grad_norm": 0.9107338786125183, "learning_rate": 0.00018675342850033591, "loss": 11.6912, "step": 23773 }, { "epoch": 0.4976555304362388, "grad_norm": 0.2477767914533615, "learning_rate": 0.00018675233796321394, "loss": 11.6774, "step": 23774 }, { "epoch": 0.497676463200201, "grad_norm": 0.33239662647247314, "learning_rate": 0.00018675124738438834, "loss": 11.66, "step": 23775 }, { "epoch": 0.4976973959641631, "grad_norm": 0.26848816871643066, "learning_rate": 0.0001867501567638597, "loss": 11.6676, "step": 23776 }, { "epoch": 0.49771832872812527, "grad_norm": 0.2554457485675812, "learning_rate": 0.0001867490661016285, "loss": 11.6609, "step": 23777 }, { "epoch": 0.4977392614920874, "grad_norm": 0.3208219110965729, "learning_rate": 0.0001867479753976953, "loss": 11.6732, "step": 23778 }, { "epoch": 0.49776019425604956, "grad_norm": 0.25171273946762085, "learning_rate": 0.00018674688465206062, "loss": 11.6709, "step": 23779 }, { "epoch": 0.4977811270200117, "grad_norm": 0.22459676861763, "learning_rate": 0.00018674579386472495, "loss": 11.6738, "step": 23780 }, { "epoch": 0.49780205978397385, "grad_norm": 0.27058130502700806, "learning_rate": 0.00018674470303568886, "loss": 11.6719, "step": 23781 }, { "epoch": 0.49782299254793605, "grad_norm": 0.2224670648574829, "learning_rate": 0.00018674361216495285, "loss": 11.6572, "step": 23782 }, { "epoch": 0.4978439253118982, "grad_norm": 0.28681719303131104, "learning_rate": 0.00018674252125251744, "loss": 11.6691, "step": 23783 }, { "epoch": 0.49786485807586034, "grad_norm": 0.28624168038368225, "learning_rate": 0.00018674143029838317, "loss": 11.682, "step": 23784 }, { "epoch": 0.4978857908398225, "grad_norm": 0.3350231647491455, "learning_rate": 0.00018674033930255054, "loss": 11.6837, "step": 23785 }, { "epoch": 0.49790672360378463, "grad_norm": 0.25091278553009033, "learning_rate": 0.0001867392482650201, "loss": 11.6834, "step": 23786 }, { "epoch": 0.4979276563677468, "grad_norm": 0.26169586181640625, "learning_rate": 0.00018673815718579234, "loss": 11.6635, "step": 23787 }, { "epoch": 0.497948589131709, "grad_norm": 0.39812934398651123, "learning_rate": 0.00018673706606486782, "loss": 11.6986, "step": 23788 }, { "epoch": 0.4979695218956711, "grad_norm": 0.2546596825122833, "learning_rate": 0.00018673597490224709, "loss": 11.6582, "step": 23789 }, { "epoch": 0.49799045465963326, "grad_norm": 0.3012012243270874, "learning_rate": 0.00018673488369793062, "loss": 11.6759, "step": 23790 }, { "epoch": 0.4980113874235954, "grad_norm": 0.24353647232055664, "learning_rate": 0.00018673379245191893, "loss": 11.6678, "step": 23791 }, { "epoch": 0.49803232018755755, "grad_norm": 0.27156513929367065, "learning_rate": 0.0001867327011642126, "loss": 11.6773, "step": 23792 }, { "epoch": 0.4980532529515197, "grad_norm": 0.27186253666877747, "learning_rate": 0.0001867316098348121, "loss": 11.6861, "step": 23793 }, { "epoch": 0.4980741857154819, "grad_norm": 0.2791178226470947, "learning_rate": 0.00018673051846371797, "loss": 11.674, "step": 23794 }, { "epoch": 0.49809511847944404, "grad_norm": 0.274067223072052, "learning_rate": 0.00018672942705093079, "loss": 11.6916, "step": 23795 }, { "epoch": 0.4981160512434062, "grad_norm": 1.9901976585388184, "learning_rate": 0.000186728335596451, "loss": 11.6767, "step": 23796 }, { "epoch": 0.49813698400736833, "grad_norm": 0.3373129069805145, "learning_rate": 0.00018672724410027916, "loss": 11.6485, "step": 23797 }, { "epoch": 0.4981579167713305, "grad_norm": 0.3156545162200928, "learning_rate": 0.00018672615256241579, "loss": 11.6772, "step": 23798 }, { "epoch": 0.4981788495352926, "grad_norm": 0.26022905111312866, "learning_rate": 0.00018672506098286142, "loss": 11.6683, "step": 23799 }, { "epoch": 0.49819978229925477, "grad_norm": 0.2993791401386261, "learning_rate": 0.0001867239693616166, "loss": 11.6744, "step": 23800 }, { "epoch": 0.49822071506321697, "grad_norm": 0.32235053181648254, "learning_rate": 0.00018672287769868183, "loss": 11.6887, "step": 23801 }, { "epoch": 0.4982416478271791, "grad_norm": 0.2582190930843353, "learning_rate": 0.00018672178599405762, "loss": 11.6679, "step": 23802 }, { "epoch": 0.49826258059114126, "grad_norm": 0.37351664900779724, "learning_rate": 0.0001867206942477445, "loss": 11.6614, "step": 23803 }, { "epoch": 0.4982835133551034, "grad_norm": 0.30210089683532715, "learning_rate": 0.00018671960245974302, "loss": 11.6692, "step": 23804 }, { "epoch": 0.49830444611906555, "grad_norm": 0.27734699845314026, "learning_rate": 0.00018671851063005367, "loss": 11.6763, "step": 23805 }, { "epoch": 0.4983253788830277, "grad_norm": 0.3259824812412262, "learning_rate": 0.00018671741875867702, "loss": 11.6808, "step": 23806 }, { "epoch": 0.4983463116469899, "grad_norm": 0.2707946002483368, "learning_rate": 0.00018671632684561358, "loss": 11.6751, "step": 23807 }, { "epoch": 0.49836724441095204, "grad_norm": 0.2861563265323639, "learning_rate": 0.00018671523489086382, "loss": 11.6791, "step": 23808 }, { "epoch": 0.4983881771749142, "grad_norm": 0.3073066174983978, "learning_rate": 0.00018671414289442832, "loss": 11.6653, "step": 23809 }, { "epoch": 0.4984091099388763, "grad_norm": 0.29729026556015015, "learning_rate": 0.0001867130508563076, "loss": 11.6685, "step": 23810 }, { "epoch": 0.49843004270283847, "grad_norm": 0.3596053421497345, "learning_rate": 0.00018671195877650217, "loss": 11.6788, "step": 23811 }, { "epoch": 0.4984509754668006, "grad_norm": 0.35667720437049866, "learning_rate": 0.0001867108666550126, "loss": 11.6839, "step": 23812 }, { "epoch": 0.4984719082307628, "grad_norm": 0.41603171825408936, "learning_rate": 0.00018670977449183936, "loss": 11.6606, "step": 23813 }, { "epoch": 0.49849284099472496, "grad_norm": 0.31182411313056946, "learning_rate": 0.00018670868228698296, "loss": 11.6625, "step": 23814 }, { "epoch": 0.4985137737586871, "grad_norm": 0.2681145966053009, "learning_rate": 0.00018670759004044403, "loss": 11.6798, "step": 23815 }, { "epoch": 0.49853470652264925, "grad_norm": 0.3321824371814728, "learning_rate": 0.00018670649775222295, "loss": 11.682, "step": 23816 }, { "epoch": 0.4985556392866114, "grad_norm": 0.28514352440834045, "learning_rate": 0.00018670540542232037, "loss": 11.6683, "step": 23817 }, { "epoch": 0.49857657205057354, "grad_norm": 0.37986165285110474, "learning_rate": 0.00018670431305073675, "loss": 11.6723, "step": 23818 }, { "epoch": 0.4985975048145357, "grad_norm": 0.2768282890319824, "learning_rate": 0.0001867032206374726, "loss": 11.6908, "step": 23819 }, { "epoch": 0.4986184375784979, "grad_norm": 0.2500190734863281, "learning_rate": 0.0001867021281825285, "loss": 11.6697, "step": 23820 }, { "epoch": 0.49863937034246003, "grad_norm": 0.2851495146751404, "learning_rate": 0.00018670103568590497, "loss": 11.6681, "step": 23821 }, { "epoch": 0.4986603031064222, "grad_norm": 0.2643430531024933, "learning_rate": 0.00018669994314760247, "loss": 11.6712, "step": 23822 }, { "epoch": 0.4986812358703843, "grad_norm": 0.29018092155456543, "learning_rate": 0.0001866988505676216, "loss": 11.6683, "step": 23823 }, { "epoch": 0.49870216863434647, "grad_norm": 0.25126147270202637, "learning_rate": 0.00018669775794596285, "loss": 11.6857, "step": 23824 }, { "epoch": 0.4987231013983086, "grad_norm": 0.44808799028396606, "learning_rate": 0.00018669666528262676, "loss": 11.679, "step": 23825 }, { "epoch": 0.4987440341622708, "grad_norm": 0.37021028995513916, "learning_rate": 0.0001866955725776138, "loss": 11.6633, "step": 23826 }, { "epoch": 0.49876496692623296, "grad_norm": 0.3321158289909363, "learning_rate": 0.00018669447983092458, "loss": 11.6778, "step": 23827 }, { "epoch": 0.4987858996901951, "grad_norm": 0.30868491530418396, "learning_rate": 0.00018669338704255958, "loss": 11.6683, "step": 23828 }, { "epoch": 0.49880683245415725, "grad_norm": 0.40262627601623535, "learning_rate": 0.00018669229421251933, "loss": 11.6802, "step": 23829 }, { "epoch": 0.4988277652181194, "grad_norm": 0.2784006893634796, "learning_rate": 0.00018669120134080436, "loss": 11.6707, "step": 23830 }, { "epoch": 0.49884869798208153, "grad_norm": 0.4107256233692169, "learning_rate": 0.0001866901084274152, "loss": 11.6749, "step": 23831 }, { "epoch": 0.49886963074604374, "grad_norm": 0.2849448621273041, "learning_rate": 0.00018668901547235235, "loss": 11.6774, "step": 23832 }, { "epoch": 0.4988905635100059, "grad_norm": 0.26304948329925537, "learning_rate": 0.00018668792247561634, "loss": 11.6782, "step": 23833 }, { "epoch": 0.498911496273968, "grad_norm": 0.28178149461746216, "learning_rate": 0.00018668682943720775, "loss": 11.6917, "step": 23834 }, { "epoch": 0.49893242903793017, "grad_norm": 0.305580198764801, "learning_rate": 0.00018668573635712704, "loss": 11.6651, "step": 23835 }, { "epoch": 0.4989533618018923, "grad_norm": 0.3767175078392029, "learning_rate": 0.00018668464323537474, "loss": 11.6662, "step": 23836 }, { "epoch": 0.49897429456585446, "grad_norm": 0.3262845277786255, "learning_rate": 0.00018668355007195144, "loss": 11.6669, "step": 23837 }, { "epoch": 0.4989952273298166, "grad_norm": 0.3492332994937897, "learning_rate": 0.0001866824568668576, "loss": 11.6527, "step": 23838 }, { "epoch": 0.4990161600937788, "grad_norm": 0.3304491937160492, "learning_rate": 0.00018668136362009376, "loss": 11.6769, "step": 23839 }, { "epoch": 0.49903709285774095, "grad_norm": 0.29341909289360046, "learning_rate": 0.00018668027033166045, "loss": 11.6764, "step": 23840 }, { "epoch": 0.4990580256217031, "grad_norm": 0.3052137792110443, "learning_rate": 0.00018667917700155822, "loss": 11.6894, "step": 23841 }, { "epoch": 0.49907895838566524, "grad_norm": 0.2727705240249634, "learning_rate": 0.00018667808362978755, "loss": 11.6743, "step": 23842 }, { "epoch": 0.4990998911496274, "grad_norm": 0.2420414388179779, "learning_rate": 0.00018667699021634898, "loss": 11.6639, "step": 23843 }, { "epoch": 0.49912082391358953, "grad_norm": 0.27353665232658386, "learning_rate": 0.00018667589676124306, "loss": 11.6776, "step": 23844 }, { "epoch": 0.49914175667755173, "grad_norm": 0.30115994811058044, "learning_rate": 0.00018667480326447027, "loss": 11.6758, "step": 23845 }, { "epoch": 0.4991626894415139, "grad_norm": 0.33237341046333313, "learning_rate": 0.00018667370972603122, "loss": 11.6674, "step": 23846 }, { "epoch": 0.499183622205476, "grad_norm": 0.25844746828079224, "learning_rate": 0.00018667261614592636, "loss": 11.6768, "step": 23847 }, { "epoch": 0.49920455496943816, "grad_norm": 0.23754173517227173, "learning_rate": 0.0001866715225241562, "loss": 11.6653, "step": 23848 }, { "epoch": 0.4992254877334003, "grad_norm": 0.21730688214302063, "learning_rate": 0.00018667042886072135, "loss": 11.6567, "step": 23849 }, { "epoch": 0.49924642049736245, "grad_norm": 0.2611188590526581, "learning_rate": 0.00018666933515562226, "loss": 11.6666, "step": 23850 }, { "epoch": 0.4992673532613246, "grad_norm": 0.31365829706192017, "learning_rate": 0.0001866682414088595, "loss": 11.6812, "step": 23851 }, { "epoch": 0.4992882860252868, "grad_norm": 0.2847188413143158, "learning_rate": 0.00018666714762043357, "loss": 11.6728, "step": 23852 }, { "epoch": 0.49930921878924894, "grad_norm": 0.2960735261440277, "learning_rate": 0.000186666053790345, "loss": 11.6745, "step": 23853 }, { "epoch": 0.4993301515532111, "grad_norm": 0.3343794643878937, "learning_rate": 0.00018666495991859433, "loss": 11.6731, "step": 23854 }, { "epoch": 0.49935108431717323, "grad_norm": 0.3483351767063141, "learning_rate": 0.0001866638660051821, "loss": 11.6649, "step": 23855 }, { "epoch": 0.4993720170811354, "grad_norm": 0.4928547143936157, "learning_rate": 0.00018666277205010875, "loss": 11.6638, "step": 23856 }, { "epoch": 0.4993929498450975, "grad_norm": 0.31385159492492676, "learning_rate": 0.00018666167805337492, "loss": 11.6625, "step": 23857 }, { "epoch": 0.4994138826090597, "grad_norm": 0.2784520089626312, "learning_rate": 0.00018666058401498105, "loss": 11.6744, "step": 23858 }, { "epoch": 0.49943481537302187, "grad_norm": 0.33926713466644287, "learning_rate": 0.00018665948993492774, "loss": 11.6694, "step": 23859 }, { "epoch": 0.499455748136984, "grad_norm": 0.28045520186424255, "learning_rate": 0.00018665839581321546, "loss": 11.667, "step": 23860 }, { "epoch": 0.49947668090094616, "grad_norm": 0.29339268803596497, "learning_rate": 0.00018665730164984474, "loss": 11.6817, "step": 23861 }, { "epoch": 0.4994976136649083, "grad_norm": 0.26797589659690857, "learning_rate": 0.00018665620744481612, "loss": 11.6831, "step": 23862 }, { "epoch": 0.49951854642887045, "grad_norm": 0.24873444437980652, "learning_rate": 0.00018665511319813013, "loss": 11.6805, "step": 23863 }, { "epoch": 0.49953947919283265, "grad_norm": 0.3479495942592621, "learning_rate": 0.0001866540189097873, "loss": 11.6805, "step": 23864 }, { "epoch": 0.4995604119567948, "grad_norm": 0.29944857954978943, "learning_rate": 0.00018665292457978815, "loss": 11.6477, "step": 23865 }, { "epoch": 0.49958134472075694, "grad_norm": 0.31119611859321594, "learning_rate": 0.0001866518302081332, "loss": 11.6752, "step": 23866 }, { "epoch": 0.4996022774847191, "grad_norm": 0.27322930097579956, "learning_rate": 0.00018665073579482298, "loss": 11.6584, "step": 23867 }, { "epoch": 0.4996232102486812, "grad_norm": 0.2622908651828766, "learning_rate": 0.000186649641339858, "loss": 11.6753, "step": 23868 }, { "epoch": 0.49964414301264337, "grad_norm": 0.3732289671897888, "learning_rate": 0.0001866485468432388, "loss": 11.6709, "step": 23869 }, { "epoch": 0.4996650757766055, "grad_norm": 0.28333204984664917, "learning_rate": 0.0001866474523049659, "loss": 11.6724, "step": 23870 }, { "epoch": 0.4996860085405677, "grad_norm": 0.27122053503990173, "learning_rate": 0.00018664635772503986, "loss": 11.6578, "step": 23871 }, { "epoch": 0.49970694130452986, "grad_norm": 0.2898266315460205, "learning_rate": 0.00018664526310346115, "loss": 11.6692, "step": 23872 }, { "epoch": 0.499727874068492, "grad_norm": 0.24040983617305756, "learning_rate": 0.00018664416844023035, "loss": 11.6821, "step": 23873 }, { "epoch": 0.49974880683245415, "grad_norm": 0.2901564836502075, "learning_rate": 0.00018664307373534794, "loss": 11.6644, "step": 23874 }, { "epoch": 0.4997697395964163, "grad_norm": 0.251079261302948, "learning_rate": 0.0001866419789888145, "loss": 11.6717, "step": 23875 }, { "epoch": 0.49979067236037844, "grad_norm": 0.33587580919265747, "learning_rate": 0.0001866408842006305, "loss": 11.6746, "step": 23876 }, { "epoch": 0.49981160512434064, "grad_norm": 0.2925872504711151, "learning_rate": 0.0001866397893707965, "loss": 11.6657, "step": 23877 }, { "epoch": 0.4998325378883028, "grad_norm": 0.33233025670051575, "learning_rate": 0.00018663869449931305, "loss": 11.6791, "step": 23878 }, { "epoch": 0.49985347065226493, "grad_norm": 0.28134843707084656, "learning_rate": 0.00018663759958618056, "loss": 11.654, "step": 23879 }, { "epoch": 0.4998744034162271, "grad_norm": 0.3033144474029541, "learning_rate": 0.00018663650463139972, "loss": 11.6648, "step": 23880 }, { "epoch": 0.4998953361801892, "grad_norm": 0.24647197127342224, "learning_rate": 0.00018663540963497095, "loss": 11.6593, "step": 23881 }, { "epoch": 0.49991626894415137, "grad_norm": 0.28225812315940857, "learning_rate": 0.0001866343145968948, "loss": 11.6632, "step": 23882 }, { "epoch": 0.49993720170811357, "grad_norm": 0.4151958227157593, "learning_rate": 0.0001866332195171718, "loss": 11.6969, "step": 23883 }, { "epoch": 0.4999581344720757, "grad_norm": 0.32260289788246155, "learning_rate": 0.00018663212439580244, "loss": 11.6608, "step": 23884 }, { "epoch": 0.49997906723603786, "grad_norm": 0.28907790780067444, "learning_rate": 0.00018663102923278732, "loss": 11.6723, "step": 23885 }, { "epoch": 0.5, "grad_norm": 0.2694440186023712, "learning_rate": 0.00018662993402812693, "loss": 11.6787, "step": 23886 }, { "epoch": 0.5000209327639622, "grad_norm": 0.247007355093956, "learning_rate": 0.00018662883878182177, "loss": 11.6801, "step": 23887 }, { "epoch": 0.5000418655279243, "grad_norm": 0.3473820090293884, "learning_rate": 0.0001866277434938724, "loss": 11.6698, "step": 23888 }, { "epoch": 0.5000627982918865, "grad_norm": 0.2625029981136322, "learning_rate": 0.00018662664816427934, "loss": 11.6613, "step": 23889 }, { "epoch": 0.5000837310558486, "grad_norm": 0.27039220929145813, "learning_rate": 0.00018662555279304313, "loss": 11.6754, "step": 23890 }, { "epoch": 0.5001046638198108, "grad_norm": 0.26213666796684265, "learning_rate": 0.00018662445738016427, "loss": 11.6697, "step": 23891 }, { "epoch": 0.5001255965837729, "grad_norm": 0.2932184338569641, "learning_rate": 0.00018662336192564328, "loss": 11.6563, "step": 23892 }, { "epoch": 0.5001465293477351, "grad_norm": 0.31734973192214966, "learning_rate": 0.00018662226642948072, "loss": 11.655, "step": 23893 }, { "epoch": 0.5001674621116973, "grad_norm": 0.27439266443252563, "learning_rate": 0.0001866211708916771, "loss": 11.6885, "step": 23894 }, { "epoch": 0.5001883948756594, "grad_norm": 0.32319679856300354, "learning_rate": 0.00018662007531223296, "loss": 11.6605, "step": 23895 }, { "epoch": 0.5002093276396216, "grad_norm": 0.33976998925209045, "learning_rate": 0.00018661897969114878, "loss": 11.6764, "step": 23896 }, { "epoch": 0.5002302604035836, "grad_norm": 0.30069541931152344, "learning_rate": 0.00018661788402842518, "loss": 11.6644, "step": 23897 }, { "epoch": 0.5002511931675458, "grad_norm": 0.26220911741256714, "learning_rate": 0.00018661678832406254, "loss": 11.6685, "step": 23898 }, { "epoch": 0.500272125931508, "grad_norm": 0.26852232217788696, "learning_rate": 0.00018661569257806155, "loss": 11.6757, "step": 23899 }, { "epoch": 0.5002930586954701, "grad_norm": 0.3658619523048401, "learning_rate": 0.00018661459679042265, "loss": 11.6712, "step": 23900 }, { "epoch": 0.5003139914594323, "grad_norm": 0.28966641426086426, "learning_rate": 0.00018661350096114634, "loss": 11.6706, "step": 23901 }, { "epoch": 0.5003349242233944, "grad_norm": 0.2716633975505829, "learning_rate": 0.0001866124050902332, "loss": 11.6982, "step": 23902 }, { "epoch": 0.5003558569873566, "grad_norm": 0.25833818316459656, "learning_rate": 0.00018661130917768377, "loss": 11.6861, "step": 23903 }, { "epoch": 0.5003767897513187, "grad_norm": 0.27187469601631165, "learning_rate": 0.00018661021322349849, "loss": 11.6788, "step": 23904 }, { "epoch": 0.5003977225152809, "grad_norm": 0.2647784650325775, "learning_rate": 0.000186609117227678, "loss": 11.6868, "step": 23905 }, { "epoch": 0.5004186552792431, "grad_norm": 0.2908550798892975, "learning_rate": 0.00018660802119022275, "loss": 11.6848, "step": 23906 }, { "epoch": 0.5004395880432052, "grad_norm": 0.32832208275794983, "learning_rate": 0.00018660692511113328, "loss": 11.6838, "step": 23907 }, { "epoch": 0.5004605208071674, "grad_norm": 0.2792036235332489, "learning_rate": 0.00018660582899041015, "loss": 11.6684, "step": 23908 }, { "epoch": 0.5004814535711295, "grad_norm": 0.27823519706726074, "learning_rate": 0.00018660473282805385, "loss": 11.6638, "step": 23909 }, { "epoch": 0.5005023863350917, "grad_norm": 0.3294794261455536, "learning_rate": 0.00018660363662406494, "loss": 11.6681, "step": 23910 }, { "epoch": 0.5005233190990538, "grad_norm": 0.3193044662475586, "learning_rate": 0.00018660254037844388, "loss": 11.6836, "step": 23911 }, { "epoch": 0.500544251863016, "grad_norm": 0.34254032373428345, "learning_rate": 0.00018660144409119128, "loss": 11.6843, "step": 23912 }, { "epoch": 0.5005651846269782, "grad_norm": 0.322490394115448, "learning_rate": 0.0001866003477623076, "loss": 11.6756, "step": 23913 }, { "epoch": 0.5005861173909403, "grad_norm": 0.3794979155063629, "learning_rate": 0.00018659925139179342, "loss": 11.673, "step": 23914 }, { "epoch": 0.5006070501549025, "grad_norm": 0.308023601770401, "learning_rate": 0.00018659815497964924, "loss": 11.6767, "step": 23915 }, { "epoch": 0.5006279829188646, "grad_norm": 0.27198874950408936, "learning_rate": 0.00018659705852587557, "loss": 11.6706, "step": 23916 }, { "epoch": 0.5006489156828268, "grad_norm": 0.22531934082508087, "learning_rate": 0.000186595962030473, "loss": 11.6569, "step": 23917 }, { "epoch": 0.500669848446789, "grad_norm": 0.31319257616996765, "learning_rate": 0.000186594865493442, "loss": 11.6628, "step": 23918 }, { "epoch": 0.5006907812107511, "grad_norm": 0.2291078120470047, "learning_rate": 0.0001865937689147831, "loss": 11.6555, "step": 23919 }, { "epoch": 0.5007117139747133, "grad_norm": 0.3247157037258148, "learning_rate": 0.00018659267229449684, "loss": 11.6616, "step": 23920 }, { "epoch": 0.5007326467386753, "grad_norm": 0.29118838906288147, "learning_rate": 0.00018659157563258373, "loss": 11.6661, "step": 23921 }, { "epoch": 0.5007535795026375, "grad_norm": 0.3596302568912506, "learning_rate": 0.0001865904789290444, "loss": 11.684, "step": 23922 }, { "epoch": 0.5007745122665996, "grad_norm": 0.2870292365550995, "learning_rate": 0.00018658938218387921, "loss": 11.6678, "step": 23923 }, { "epoch": 0.5007954450305618, "grad_norm": 0.26578256487846375, "learning_rate": 0.00018658828539708876, "loss": 11.667, "step": 23924 }, { "epoch": 0.500816377794524, "grad_norm": 0.3008946478366852, "learning_rate": 0.00018658718856867364, "loss": 11.6675, "step": 23925 }, { "epoch": 0.5008373105584861, "grad_norm": 0.28331536054611206, "learning_rate": 0.0001865860916986343, "loss": 11.6628, "step": 23926 }, { "epoch": 0.5008582433224483, "grad_norm": 0.27703380584716797, "learning_rate": 0.00018658499478697128, "loss": 11.6638, "step": 23927 }, { "epoch": 0.5008791760864104, "grad_norm": 0.2314298301935196, "learning_rate": 0.00018658389783368512, "loss": 11.6565, "step": 23928 }, { "epoch": 0.5009001088503726, "grad_norm": 0.24098710715770721, "learning_rate": 0.00018658280083877634, "loss": 11.6755, "step": 23929 }, { "epoch": 0.5009210416143347, "grad_norm": 0.27480024099349976, "learning_rate": 0.0001865817038022455, "loss": 11.6839, "step": 23930 }, { "epoch": 0.5009419743782969, "grad_norm": 0.2503158152103424, "learning_rate": 0.00018658060672409308, "loss": 11.6882, "step": 23931 }, { "epoch": 0.5009629071422591, "grad_norm": 0.2803579866886139, "learning_rate": 0.00018657950960431963, "loss": 11.6597, "step": 23932 }, { "epoch": 0.5009838399062212, "grad_norm": 0.26520612835884094, "learning_rate": 0.0001865784124429257, "loss": 11.6486, "step": 23933 }, { "epoch": 0.5010047726701834, "grad_norm": 0.22280153632164001, "learning_rate": 0.00018657731523991173, "loss": 11.6648, "step": 23934 }, { "epoch": 0.5010257054341455, "grad_norm": 0.37399330735206604, "learning_rate": 0.00018657621799527836, "loss": 11.6775, "step": 23935 }, { "epoch": 0.5010466381981077, "grad_norm": 0.3272150158882141, "learning_rate": 0.00018657512070902605, "loss": 11.6795, "step": 23936 }, { "epoch": 0.5010675709620699, "grad_norm": 0.315792441368103, "learning_rate": 0.00018657402338115536, "loss": 11.6657, "step": 23937 }, { "epoch": 0.501088503726032, "grad_norm": 0.3471900224685669, "learning_rate": 0.0001865729260116668, "loss": 11.6642, "step": 23938 }, { "epoch": 0.5011094364899942, "grad_norm": 0.3202623426914215, "learning_rate": 0.00018657182860056087, "loss": 11.6836, "step": 23939 }, { "epoch": 0.5011303692539563, "grad_norm": 0.3359992802143097, "learning_rate": 0.00018657073114783815, "loss": 11.6501, "step": 23940 }, { "epoch": 0.5011513020179185, "grad_norm": 0.25089946389198303, "learning_rate": 0.00018656963365349912, "loss": 11.6608, "step": 23941 }, { "epoch": 0.5011722347818806, "grad_norm": 0.26432687044143677, "learning_rate": 0.00018656853611754437, "loss": 11.6782, "step": 23942 }, { "epoch": 0.5011931675458428, "grad_norm": 0.2804611623287201, "learning_rate": 0.00018656743853997436, "loss": 11.684, "step": 23943 }, { "epoch": 0.501214100309805, "grad_norm": 0.2423340380191803, "learning_rate": 0.00018656634092078966, "loss": 11.6638, "step": 23944 }, { "epoch": 0.501235033073767, "grad_norm": 0.24797704815864563, "learning_rate": 0.0001865652432599908, "loss": 11.6734, "step": 23945 }, { "epoch": 0.5012559658377292, "grad_norm": 0.3086041808128357, "learning_rate": 0.0001865641455575783, "loss": 11.668, "step": 23946 }, { "epoch": 0.5012768986016913, "grad_norm": 0.2716391980648041, "learning_rate": 0.00018656304781355262, "loss": 11.655, "step": 23947 }, { "epoch": 0.5012978313656535, "grad_norm": 0.28184181451797485, "learning_rate": 0.0001865619500279144, "loss": 11.6648, "step": 23948 }, { "epoch": 0.5013187641296156, "grad_norm": 0.25215980410575867, "learning_rate": 0.0001865608522006641, "loss": 11.6591, "step": 23949 }, { "epoch": 0.5013396968935778, "grad_norm": 0.2781209945678711, "learning_rate": 0.00018655975433180228, "loss": 11.6598, "step": 23950 }, { "epoch": 0.50136062965754, "grad_norm": 0.3173521161079407, "learning_rate": 0.00018655865642132943, "loss": 11.6716, "step": 23951 }, { "epoch": 0.5013815624215021, "grad_norm": 0.37265118956565857, "learning_rate": 0.00018655755846924612, "loss": 11.6767, "step": 23952 }, { "epoch": 0.5014024951854643, "grad_norm": 0.25894495844841003, "learning_rate": 0.00018655646047555284, "loss": 11.6733, "step": 23953 }, { "epoch": 0.5014234279494264, "grad_norm": 0.34844970703125, "learning_rate": 0.00018655536244025015, "loss": 11.6607, "step": 23954 }, { "epoch": 0.5014443607133886, "grad_norm": 0.25459951162338257, "learning_rate": 0.00018655426436333853, "loss": 11.6426, "step": 23955 }, { "epoch": 0.5014652934773508, "grad_norm": 0.3487848937511444, "learning_rate": 0.00018655316624481858, "loss": 11.6834, "step": 23956 }, { "epoch": 0.5014862262413129, "grad_norm": 0.23565347492694855, "learning_rate": 0.00018655206808469076, "loss": 11.6633, "step": 23957 }, { "epoch": 0.5015071590052751, "grad_norm": 0.26659247279167175, "learning_rate": 0.00018655096988295564, "loss": 11.6726, "step": 23958 }, { "epoch": 0.5015280917692372, "grad_norm": 0.2452850341796875, "learning_rate": 0.00018654987163961372, "loss": 11.6593, "step": 23959 }, { "epoch": 0.5015490245331994, "grad_norm": 0.3026258945465088, "learning_rate": 0.00018654877335466557, "loss": 11.6633, "step": 23960 }, { "epoch": 0.5015699572971615, "grad_norm": 0.26413077116012573, "learning_rate": 0.00018654767502811168, "loss": 11.6703, "step": 23961 }, { "epoch": 0.5015908900611237, "grad_norm": 0.2711525559425354, "learning_rate": 0.0001865465766599526, "loss": 11.6558, "step": 23962 }, { "epoch": 0.5016118228250859, "grad_norm": 0.2706824243068695, "learning_rate": 0.0001865454782501888, "loss": 11.6604, "step": 23963 }, { "epoch": 0.501632755589048, "grad_norm": 0.3085215091705322, "learning_rate": 0.0001865443797988209, "loss": 11.6753, "step": 23964 }, { "epoch": 0.5016536883530102, "grad_norm": 0.2720864713191986, "learning_rate": 0.00018654328130584936, "loss": 11.6686, "step": 23965 }, { "epoch": 0.5016746211169723, "grad_norm": 0.45500972867012024, "learning_rate": 0.00018654218277127472, "loss": 11.6636, "step": 23966 }, { "epoch": 0.5016955538809345, "grad_norm": 0.2723172903060913, "learning_rate": 0.00018654108419509754, "loss": 11.6751, "step": 23967 }, { "epoch": 0.5017164866448965, "grad_norm": 0.3266681134700775, "learning_rate": 0.00018653998557731832, "loss": 11.667, "step": 23968 }, { "epoch": 0.5017374194088587, "grad_norm": 0.2797006666660309, "learning_rate": 0.0001865388869179376, "loss": 11.6641, "step": 23969 }, { "epoch": 0.5017583521728209, "grad_norm": 0.2345125675201416, "learning_rate": 0.0001865377882169559, "loss": 11.67, "step": 23970 }, { "epoch": 0.501779284936783, "grad_norm": 0.29924026131629944, "learning_rate": 0.00018653668947437376, "loss": 11.6715, "step": 23971 }, { "epoch": 0.5018002177007452, "grad_norm": 0.2638205885887146, "learning_rate": 0.00018653559069019168, "loss": 11.6873, "step": 23972 }, { "epoch": 0.5018211504647073, "grad_norm": 0.3080059587955475, "learning_rate": 0.0001865344918644102, "loss": 11.663, "step": 23973 }, { "epoch": 0.5018420832286695, "grad_norm": 0.37692365050315857, "learning_rate": 0.00018653339299702987, "loss": 11.6702, "step": 23974 }, { "epoch": 0.5018630159926317, "grad_norm": 0.2846261262893677, "learning_rate": 0.00018653229408805122, "loss": 11.6669, "step": 23975 }, { "epoch": 0.5018839487565938, "grad_norm": 0.352760910987854, "learning_rate": 0.00018653119513747476, "loss": 11.6595, "step": 23976 }, { "epoch": 0.501904881520556, "grad_norm": 0.3130464553833008, "learning_rate": 0.000186530096145301, "loss": 11.6804, "step": 23977 }, { "epoch": 0.5019258142845181, "grad_norm": 0.3016044497489929, "learning_rate": 0.00018652899711153048, "loss": 11.6607, "step": 23978 }, { "epoch": 0.5019467470484803, "grad_norm": 0.4013789892196655, "learning_rate": 0.00018652789803616377, "loss": 11.6914, "step": 23979 }, { "epoch": 0.5019676798124424, "grad_norm": 0.32604023814201355, "learning_rate": 0.00018652679891920134, "loss": 11.6767, "step": 23980 }, { "epoch": 0.5019886125764046, "grad_norm": 0.30347540974617004, "learning_rate": 0.00018652569976064376, "loss": 11.6708, "step": 23981 }, { "epoch": 0.5020095453403668, "grad_norm": 0.3106118440628052, "learning_rate": 0.00018652460056049153, "loss": 11.6787, "step": 23982 }, { "epoch": 0.5020304781043289, "grad_norm": 0.3560168147087097, "learning_rate": 0.00018652350131874519, "loss": 11.6743, "step": 23983 }, { "epoch": 0.5020514108682911, "grad_norm": 0.28626805543899536, "learning_rate": 0.00018652240203540527, "loss": 11.6731, "step": 23984 }, { "epoch": 0.5020723436322532, "grad_norm": 0.3310592472553253, "learning_rate": 0.00018652130271047231, "loss": 11.6653, "step": 23985 }, { "epoch": 0.5020932763962154, "grad_norm": 0.261033833026886, "learning_rate": 0.00018652020334394682, "loss": 11.6852, "step": 23986 }, { "epoch": 0.5021142091601775, "grad_norm": 0.3130321204662323, "learning_rate": 0.00018651910393582935, "loss": 11.6613, "step": 23987 }, { "epoch": 0.5021351419241397, "grad_norm": 0.3326365053653717, "learning_rate": 0.00018651800448612037, "loss": 11.6602, "step": 23988 }, { "epoch": 0.5021560746881019, "grad_norm": 0.31676363945007324, "learning_rate": 0.00018651690499482047, "loss": 11.6801, "step": 23989 }, { "epoch": 0.502177007452064, "grad_norm": 0.27330541610717773, "learning_rate": 0.00018651580546193017, "loss": 11.678, "step": 23990 }, { "epoch": 0.5021979402160262, "grad_norm": 0.27362045645713806, "learning_rate": 0.00018651470588744998, "loss": 11.6718, "step": 23991 }, { "epoch": 0.5022188729799882, "grad_norm": 0.34331896901130676, "learning_rate": 0.00018651360627138045, "loss": 11.6829, "step": 23992 }, { "epoch": 0.5022398057439504, "grad_norm": 0.42142254114151, "learning_rate": 0.00018651250661372212, "loss": 11.6683, "step": 23993 }, { "epoch": 0.5022607385079125, "grad_norm": 0.2994145154953003, "learning_rate": 0.00018651140691447544, "loss": 11.6742, "step": 23994 }, { "epoch": 0.5022816712718747, "grad_norm": 0.34162116050720215, "learning_rate": 0.00018651030717364102, "loss": 11.6783, "step": 23995 }, { "epoch": 0.5023026040358369, "grad_norm": 0.2526692748069763, "learning_rate": 0.00018650920739121936, "loss": 11.6556, "step": 23996 }, { "epoch": 0.502323536799799, "grad_norm": 0.26037463545799255, "learning_rate": 0.00018650810756721097, "loss": 11.6662, "step": 23997 }, { "epoch": 0.5023444695637612, "grad_norm": 0.2920823097229004, "learning_rate": 0.00018650700770161643, "loss": 11.6645, "step": 23998 }, { "epoch": 0.5023654023277233, "grad_norm": 0.2801370322704315, "learning_rate": 0.00018650590779443622, "loss": 11.6512, "step": 23999 }, { "epoch": 0.5023863350916855, "grad_norm": 0.27230438590049744, "learning_rate": 0.00018650480784567088, "loss": 11.6607, "step": 24000 }, { "epoch": 0.5023863350916855, "eval_loss": 11.672552108764648, "eval_runtime": 34.3434, "eval_samples_per_second": 27.982, "eval_steps_per_second": 7.017, "step": 24000 }, { "epoch": 0.5024072678556477, "grad_norm": 0.31868356466293335, "learning_rate": 0.00018650370785532093, "loss": 11.6774, "step": 24001 }, { "epoch": 0.5024282006196098, "grad_norm": 0.3683360517024994, "learning_rate": 0.00018650260782338696, "loss": 11.6989, "step": 24002 }, { "epoch": 0.502449133383572, "grad_norm": 0.28720590472221375, "learning_rate": 0.00018650150774986943, "loss": 11.6778, "step": 24003 }, { "epoch": 0.5024700661475341, "grad_norm": 0.2772645950317383, "learning_rate": 0.0001865004076347689, "loss": 11.6717, "step": 24004 }, { "epoch": 0.5024909989114963, "grad_norm": 0.3158283233642578, "learning_rate": 0.00018649930747808587, "loss": 11.6725, "step": 24005 }, { "epoch": 0.5025119316754584, "grad_norm": 0.3061738908290863, "learning_rate": 0.00018649820727982091, "loss": 11.6674, "step": 24006 }, { "epoch": 0.5025328644394206, "grad_norm": 0.32684487104415894, "learning_rate": 0.00018649710703997453, "loss": 11.6606, "step": 24007 }, { "epoch": 0.5025537972033828, "grad_norm": 0.26300233602523804, "learning_rate": 0.00018649600675854722, "loss": 11.6714, "step": 24008 }, { "epoch": 0.5025747299673449, "grad_norm": 0.2533523142337799, "learning_rate": 0.0001864949064355396, "loss": 11.6701, "step": 24009 }, { "epoch": 0.5025956627313071, "grad_norm": 0.28070709109306335, "learning_rate": 0.00018649380607095212, "loss": 11.6618, "step": 24010 }, { "epoch": 0.5026165954952692, "grad_norm": 0.2148820161819458, "learning_rate": 0.00018649270566478533, "loss": 11.6572, "step": 24011 }, { "epoch": 0.5026375282592314, "grad_norm": 0.29850706458091736, "learning_rate": 0.0001864916052170398, "loss": 11.6838, "step": 24012 }, { "epoch": 0.5026584610231934, "grad_norm": 0.3135850429534912, "learning_rate": 0.00018649050472771596, "loss": 11.6834, "step": 24013 }, { "epoch": 0.5026793937871556, "grad_norm": 0.2614666521549225, "learning_rate": 0.00018648940419681444, "loss": 11.6798, "step": 24014 }, { "epoch": 0.5027003265511178, "grad_norm": 0.24927040934562683, "learning_rate": 0.00018648830362433572, "loss": 11.6613, "step": 24015 }, { "epoch": 0.5027212593150799, "grad_norm": 0.2783498764038086, "learning_rate": 0.00018648720301028034, "loss": 11.6825, "step": 24016 }, { "epoch": 0.5027421920790421, "grad_norm": 0.26307836174964905, "learning_rate": 0.00018648610235464882, "loss": 11.6665, "step": 24017 }, { "epoch": 0.5027631248430042, "grad_norm": 0.2935860753059387, "learning_rate": 0.00018648500165744172, "loss": 11.6732, "step": 24018 }, { "epoch": 0.5027840576069664, "grad_norm": 0.24758930504322052, "learning_rate": 0.00018648390091865954, "loss": 11.6613, "step": 24019 }, { "epoch": 0.5028049903709286, "grad_norm": 0.28477078676223755, "learning_rate": 0.0001864828001383028, "loss": 11.6786, "step": 24020 }, { "epoch": 0.5028259231348907, "grad_norm": 0.276938259601593, "learning_rate": 0.00018648169931637204, "loss": 11.6633, "step": 24021 }, { "epoch": 0.5028468558988529, "grad_norm": 0.2876996397972107, "learning_rate": 0.00018648059845286782, "loss": 11.6819, "step": 24022 }, { "epoch": 0.502867788662815, "grad_norm": 0.31307247281074524, "learning_rate": 0.00018647949754779062, "loss": 11.6728, "step": 24023 }, { "epoch": 0.5028887214267772, "grad_norm": 0.33339619636535645, "learning_rate": 0.00018647839660114102, "loss": 11.6754, "step": 24024 }, { "epoch": 0.5029096541907393, "grad_norm": 0.28365036845207214, "learning_rate": 0.0001864772956129195, "loss": 11.6709, "step": 24025 }, { "epoch": 0.5029305869547015, "grad_norm": 0.3033452033996582, "learning_rate": 0.00018647619458312658, "loss": 11.6706, "step": 24026 }, { "epoch": 0.5029515197186637, "grad_norm": 0.29046431183815, "learning_rate": 0.00018647509351176286, "loss": 11.6789, "step": 24027 }, { "epoch": 0.5029724524826258, "grad_norm": 0.2953575849533081, "learning_rate": 0.00018647399239882885, "loss": 11.6766, "step": 24028 }, { "epoch": 0.502993385246588, "grad_norm": 0.2514517307281494, "learning_rate": 0.00018647289124432503, "loss": 11.6518, "step": 24029 }, { "epoch": 0.5030143180105501, "grad_norm": 0.2635557949542999, "learning_rate": 0.00018647179004825196, "loss": 11.6829, "step": 24030 }, { "epoch": 0.5030352507745123, "grad_norm": 0.9905170798301697, "learning_rate": 0.00018647068881061016, "loss": 11.7173, "step": 24031 }, { "epoch": 0.5030561835384744, "grad_norm": 0.2836706042289734, "learning_rate": 0.0001864695875314002, "loss": 11.6707, "step": 24032 }, { "epoch": 0.5030771163024366, "grad_norm": 0.3101300299167633, "learning_rate": 0.00018646848621062254, "loss": 11.6644, "step": 24033 }, { "epoch": 0.5030980490663988, "grad_norm": 0.27501389384269714, "learning_rate": 0.00018646738484827776, "loss": 11.6622, "step": 24034 }, { "epoch": 0.5031189818303609, "grad_norm": 0.2469138354063034, "learning_rate": 0.00018646628344436635, "loss": 11.6882, "step": 24035 }, { "epoch": 0.5031399145943231, "grad_norm": 0.33997440338134766, "learning_rate": 0.0001864651819988889, "loss": 11.6906, "step": 24036 }, { "epoch": 0.5031608473582851, "grad_norm": 0.29891806840896606, "learning_rate": 0.00018646408051184592, "loss": 11.6669, "step": 24037 }, { "epoch": 0.5031817801222473, "grad_norm": 0.2738554775714874, "learning_rate": 0.0001864629789832379, "loss": 11.6584, "step": 24038 }, { "epoch": 0.5032027128862095, "grad_norm": 0.30267125368118286, "learning_rate": 0.00018646187741306534, "loss": 11.6691, "step": 24039 }, { "epoch": 0.5032236456501716, "grad_norm": 0.31545382738113403, "learning_rate": 0.0001864607758013289, "loss": 11.6821, "step": 24040 }, { "epoch": 0.5032445784141338, "grad_norm": 0.2693868577480316, "learning_rate": 0.000186459674148029, "loss": 11.66, "step": 24041 }, { "epoch": 0.5032655111780959, "grad_norm": 0.3057744801044464, "learning_rate": 0.00018645857245316622, "loss": 11.6777, "step": 24042 }, { "epoch": 0.5032864439420581, "grad_norm": 0.33538487553596497, "learning_rate": 0.00018645747071674106, "loss": 11.6857, "step": 24043 }, { "epoch": 0.5033073767060202, "grad_norm": 0.2743520736694336, "learning_rate": 0.00018645636893875405, "loss": 11.6843, "step": 24044 }, { "epoch": 0.5033283094699824, "grad_norm": 0.27281156182289124, "learning_rate": 0.00018645526711920575, "loss": 11.679, "step": 24045 }, { "epoch": 0.5033492422339446, "grad_norm": 0.31658878922462463, "learning_rate": 0.00018645416525809666, "loss": 11.662, "step": 24046 }, { "epoch": 0.5033701749979067, "grad_norm": 0.27589836716651917, "learning_rate": 0.00018645306335542735, "loss": 11.6754, "step": 24047 }, { "epoch": 0.5033911077618689, "grad_norm": 0.3001779615879059, "learning_rate": 0.00018645196141119826, "loss": 11.6672, "step": 24048 }, { "epoch": 0.503412040525831, "grad_norm": 0.2749515771865845, "learning_rate": 0.00018645085942541, "loss": 11.6558, "step": 24049 }, { "epoch": 0.5034329732897932, "grad_norm": 0.3498680293560028, "learning_rate": 0.00018644975739806312, "loss": 11.6654, "step": 24050 }, { "epoch": 0.5034539060537553, "grad_norm": 0.2963066101074219, "learning_rate": 0.00018644865532915805, "loss": 11.6844, "step": 24051 }, { "epoch": 0.5034748388177175, "grad_norm": 0.2785559296607971, "learning_rate": 0.00018644755321869542, "loss": 11.6597, "step": 24052 }, { "epoch": 0.5034957715816797, "grad_norm": 0.3764236271381378, "learning_rate": 0.0001864464510666757, "loss": 11.6783, "step": 24053 }, { "epoch": 0.5035167043456418, "grad_norm": 0.30880430340766907, "learning_rate": 0.00018644534887309944, "loss": 11.6759, "step": 24054 }, { "epoch": 0.503537637109604, "grad_norm": 0.32250717282295227, "learning_rate": 0.00018644424663796718, "loss": 11.6678, "step": 24055 }, { "epoch": 0.5035585698735661, "grad_norm": 0.36570656299591064, "learning_rate": 0.00018644314436127944, "loss": 11.6726, "step": 24056 }, { "epoch": 0.5035795026375283, "grad_norm": 0.29058322310447693, "learning_rate": 0.00018644204204303674, "loss": 11.6737, "step": 24057 }, { "epoch": 0.5036004354014905, "grad_norm": 0.28298965096473694, "learning_rate": 0.00018644093968323962, "loss": 11.6736, "step": 24058 }, { "epoch": 0.5036213681654526, "grad_norm": 0.2964763641357422, "learning_rate": 0.00018643983728188862, "loss": 11.6658, "step": 24059 }, { "epoch": 0.5036423009294148, "grad_norm": 0.3526518642902374, "learning_rate": 0.00018643873483898424, "loss": 11.6855, "step": 24060 }, { "epoch": 0.5036632336933768, "grad_norm": 0.26706621050834656, "learning_rate": 0.00018643763235452702, "loss": 11.6715, "step": 24061 }, { "epoch": 0.503684166457339, "grad_norm": 0.23907530307769775, "learning_rate": 0.00018643652982851751, "loss": 11.6827, "step": 24062 }, { "epoch": 0.5037050992213011, "grad_norm": 0.28098902106285095, "learning_rate": 0.00018643542726095623, "loss": 11.6651, "step": 24063 }, { "epoch": 0.5037260319852633, "grad_norm": 0.24457059800624847, "learning_rate": 0.0001864343246518437, "loss": 11.671, "step": 24064 }, { "epoch": 0.5037469647492255, "grad_norm": 0.28216949105262756, "learning_rate": 0.0001864332220011805, "loss": 11.6718, "step": 24065 }, { "epoch": 0.5037678975131876, "grad_norm": 0.392976313829422, "learning_rate": 0.00018643211930896705, "loss": 11.6667, "step": 24066 }, { "epoch": 0.5037888302771498, "grad_norm": 0.2639720141887665, "learning_rate": 0.000186431016575204, "loss": 11.6751, "step": 24067 }, { "epoch": 0.5038097630411119, "grad_norm": 0.31694960594177246, "learning_rate": 0.00018642991379989182, "loss": 11.6727, "step": 24068 }, { "epoch": 0.5038306958050741, "grad_norm": 0.2934596538543701, "learning_rate": 0.000186428810983031, "loss": 11.6688, "step": 24069 }, { "epoch": 0.5038516285690362, "grad_norm": 0.3364655077457428, "learning_rate": 0.00018642770812462217, "loss": 11.6802, "step": 24070 }, { "epoch": 0.5038725613329984, "grad_norm": 0.268971711397171, "learning_rate": 0.0001864266052246658, "loss": 11.6645, "step": 24071 }, { "epoch": 0.5038934940969606, "grad_norm": 0.3038487434387207, "learning_rate": 0.00018642550228316243, "loss": 11.6807, "step": 24072 }, { "epoch": 0.5039144268609227, "grad_norm": 0.4071944057941437, "learning_rate": 0.00018642439930011256, "loss": 11.6766, "step": 24073 }, { "epoch": 0.5039353596248849, "grad_norm": 0.3884202539920807, "learning_rate": 0.00018642329627551678, "loss": 11.6677, "step": 24074 }, { "epoch": 0.503956292388847, "grad_norm": 0.2538006901741028, "learning_rate": 0.00018642219320937558, "loss": 11.6708, "step": 24075 }, { "epoch": 0.5039772251528092, "grad_norm": 0.28227779269218445, "learning_rate": 0.00018642109010168952, "loss": 11.6814, "step": 24076 }, { "epoch": 0.5039981579167714, "grad_norm": 0.28088921308517456, "learning_rate": 0.00018641998695245908, "loss": 11.6659, "step": 24077 }, { "epoch": 0.5040190906807335, "grad_norm": 0.28439828753471375, "learning_rate": 0.00018641888376168484, "loss": 11.6629, "step": 24078 }, { "epoch": 0.5040400234446957, "grad_norm": 0.30799633264541626, "learning_rate": 0.0001864177805293673, "loss": 11.6733, "step": 24079 }, { "epoch": 0.5040609562086578, "grad_norm": 0.26303738355636597, "learning_rate": 0.00018641667725550698, "loss": 11.667, "step": 24080 }, { "epoch": 0.50408188897262, "grad_norm": 0.30139055848121643, "learning_rate": 0.00018641557394010448, "loss": 11.6587, "step": 24081 }, { "epoch": 0.504102821736582, "grad_norm": 0.251272976398468, "learning_rate": 0.00018641447058316026, "loss": 11.6605, "step": 24082 }, { "epoch": 0.5041237545005443, "grad_norm": 0.25338706374168396, "learning_rate": 0.00018641336718467486, "loss": 11.6656, "step": 24083 }, { "epoch": 0.5041446872645065, "grad_norm": 0.28467556834220886, "learning_rate": 0.00018641226374464882, "loss": 11.6783, "step": 24084 }, { "epoch": 0.5041656200284685, "grad_norm": 0.31419703364372253, "learning_rate": 0.00018641116026308269, "loss": 11.6811, "step": 24085 }, { "epoch": 0.5041865527924307, "grad_norm": 0.2675066590309143, "learning_rate": 0.00018641005673997697, "loss": 11.6819, "step": 24086 }, { "epoch": 0.5042074855563928, "grad_norm": 0.33687031269073486, "learning_rate": 0.00018640895317533222, "loss": 11.67, "step": 24087 }, { "epoch": 0.504228418320355, "grad_norm": 0.25961124897003174, "learning_rate": 0.00018640784956914895, "loss": 11.6882, "step": 24088 }, { "epoch": 0.5042493510843171, "grad_norm": 0.28480714559555054, "learning_rate": 0.0001864067459214277, "loss": 11.672, "step": 24089 }, { "epoch": 0.5042702838482793, "grad_norm": 0.356742799282074, "learning_rate": 0.00018640564223216898, "loss": 11.6702, "step": 24090 }, { "epoch": 0.5042912166122415, "grad_norm": 0.26313814520835876, "learning_rate": 0.00018640453850137333, "loss": 11.687, "step": 24091 }, { "epoch": 0.5043121493762036, "grad_norm": 0.2414529174566269, "learning_rate": 0.0001864034347290413, "loss": 11.6638, "step": 24092 }, { "epoch": 0.5043330821401658, "grad_norm": 0.2566578686237335, "learning_rate": 0.00018640233091517342, "loss": 11.6696, "step": 24093 }, { "epoch": 0.5043540149041279, "grad_norm": 0.3177100718021393, "learning_rate": 0.00018640122705977018, "loss": 11.6762, "step": 24094 }, { "epoch": 0.5043749476680901, "grad_norm": 0.25302788615226746, "learning_rate": 0.00018640012316283218, "loss": 11.6921, "step": 24095 }, { "epoch": 0.5043958804320523, "grad_norm": 0.2744767665863037, "learning_rate": 0.00018639901922435989, "loss": 11.6647, "step": 24096 }, { "epoch": 0.5044168131960144, "grad_norm": 0.25997066497802734, "learning_rate": 0.00018639791524435384, "loss": 11.6687, "step": 24097 }, { "epoch": 0.5044377459599766, "grad_norm": 0.35397064685821533, "learning_rate": 0.00018639681122281459, "loss": 11.6802, "step": 24098 }, { "epoch": 0.5044586787239387, "grad_norm": 0.389262318611145, "learning_rate": 0.00018639570715974266, "loss": 11.6602, "step": 24099 }, { "epoch": 0.5044796114879009, "grad_norm": 0.37176617980003357, "learning_rate": 0.00018639460305513859, "loss": 11.6778, "step": 24100 }, { "epoch": 0.504500544251863, "grad_norm": 0.2523857355117798, "learning_rate": 0.0001863934989090029, "loss": 11.6787, "step": 24101 }, { "epoch": 0.5045214770158252, "grad_norm": 0.2532516419887543, "learning_rate": 0.00018639239472133614, "loss": 11.6615, "step": 24102 }, { "epoch": 0.5045424097797874, "grad_norm": 0.30521366000175476, "learning_rate": 0.0001863912904921388, "loss": 11.6667, "step": 24103 }, { "epoch": 0.5045633425437495, "grad_norm": 0.2562562823295593, "learning_rate": 0.00018639018622141146, "loss": 11.6701, "step": 24104 }, { "epoch": 0.5045842753077117, "grad_norm": 0.3342237174510956, "learning_rate": 0.00018638908190915458, "loss": 11.6772, "step": 24105 }, { "epoch": 0.5046052080716737, "grad_norm": 0.27811098098754883, "learning_rate": 0.00018638797755536878, "loss": 11.6823, "step": 24106 }, { "epoch": 0.504626140835636, "grad_norm": 0.26580336689949036, "learning_rate": 0.00018638687316005454, "loss": 11.6515, "step": 24107 }, { "epoch": 0.504647073599598, "grad_norm": 0.2917356491088867, "learning_rate": 0.0001863857687232124, "loss": 11.6729, "step": 24108 }, { "epoch": 0.5046680063635602, "grad_norm": 0.3322189152240753, "learning_rate": 0.00018638466424484288, "loss": 11.6715, "step": 24109 }, { "epoch": 0.5046889391275224, "grad_norm": 0.3404349982738495, "learning_rate": 0.00018638355972494652, "loss": 11.6743, "step": 24110 }, { "epoch": 0.5047098718914845, "grad_norm": 0.23194798827171326, "learning_rate": 0.00018638245516352386, "loss": 11.6801, "step": 24111 }, { "epoch": 0.5047308046554467, "grad_norm": 0.3225213289260864, "learning_rate": 0.00018638135056057542, "loss": 11.6596, "step": 24112 }, { "epoch": 0.5047517374194088, "grad_norm": 0.24567143619060516, "learning_rate": 0.00018638024591610173, "loss": 11.6678, "step": 24113 }, { "epoch": 0.504772670183371, "grad_norm": 0.2637612521648407, "learning_rate": 0.00018637914123010335, "loss": 11.6717, "step": 24114 }, { "epoch": 0.5047936029473332, "grad_norm": 0.28550636768341064, "learning_rate": 0.00018637803650258075, "loss": 11.6747, "step": 24115 }, { "epoch": 0.5048145357112953, "grad_norm": 0.2757738530635834, "learning_rate": 0.0001863769317335345, "loss": 11.6731, "step": 24116 }, { "epoch": 0.5048354684752575, "grad_norm": 0.2769223153591156, "learning_rate": 0.00018637582692296512, "loss": 11.6701, "step": 24117 }, { "epoch": 0.5048564012392196, "grad_norm": 0.35863015055656433, "learning_rate": 0.00018637472207087317, "loss": 11.6918, "step": 24118 }, { "epoch": 0.5048773340031818, "grad_norm": 0.31781628727912903, "learning_rate": 0.00018637361717725915, "loss": 11.6555, "step": 24119 }, { "epoch": 0.5048982667671439, "grad_norm": 0.2717283368110657, "learning_rate": 0.00018637251224212363, "loss": 11.6483, "step": 24120 }, { "epoch": 0.5049191995311061, "grad_norm": 0.30916300415992737, "learning_rate": 0.0001863714072654671, "loss": 11.6573, "step": 24121 }, { "epoch": 0.5049401322950683, "grad_norm": 0.2590961456298828, "learning_rate": 0.0001863703022472901, "loss": 11.6754, "step": 24122 }, { "epoch": 0.5049610650590304, "grad_norm": 0.32557669281959534, "learning_rate": 0.00018636919718759316, "loss": 11.6735, "step": 24123 }, { "epoch": 0.5049819978229926, "grad_norm": 0.3212871253490448, "learning_rate": 0.0001863680920863768, "loss": 11.6747, "step": 24124 }, { "epoch": 0.5050029305869547, "grad_norm": 0.3146530091762543, "learning_rate": 0.0001863669869436416, "loss": 11.6484, "step": 24125 }, { "epoch": 0.5050238633509169, "grad_norm": 0.23365984857082367, "learning_rate": 0.00018636588175938802, "loss": 11.6794, "step": 24126 }, { "epoch": 0.505044796114879, "grad_norm": 0.3042006492614746, "learning_rate": 0.00018636477653361666, "loss": 11.6884, "step": 24127 }, { "epoch": 0.5050657288788412, "grad_norm": 0.3158515393733978, "learning_rate": 0.00018636367126632802, "loss": 11.6705, "step": 24128 }, { "epoch": 0.5050866616428034, "grad_norm": 0.23179344832897186, "learning_rate": 0.00018636256595752263, "loss": 11.6774, "step": 24129 }, { "epoch": 0.5051075944067654, "grad_norm": 0.27309098839759827, "learning_rate": 0.00018636146060720102, "loss": 11.6736, "step": 24130 }, { "epoch": 0.5051285271707276, "grad_norm": 0.304658979177475, "learning_rate": 0.0001863603552153637, "loss": 11.6702, "step": 24131 }, { "epoch": 0.5051494599346897, "grad_norm": 0.3151629865169525, "learning_rate": 0.00018635924978201125, "loss": 11.677, "step": 24132 }, { "epoch": 0.5051703926986519, "grad_norm": 0.3455689549446106, "learning_rate": 0.0001863581443071442, "loss": 11.6795, "step": 24133 }, { "epoch": 0.5051913254626141, "grad_norm": 0.25651755928993225, "learning_rate": 0.00018635703879076303, "loss": 11.6744, "step": 24134 }, { "epoch": 0.5052122582265762, "grad_norm": 0.23754100501537323, "learning_rate": 0.00018635593323286832, "loss": 11.6561, "step": 24135 }, { "epoch": 0.5052331909905384, "grad_norm": 0.3171818256378174, "learning_rate": 0.00018635482763346058, "loss": 11.6487, "step": 24136 }, { "epoch": 0.5052541237545005, "grad_norm": 0.25367388129234314, "learning_rate": 0.00018635372199254032, "loss": 11.6711, "step": 24137 }, { "epoch": 0.5052750565184627, "grad_norm": 0.38767316937446594, "learning_rate": 0.0001863526163101081, "loss": 11.6764, "step": 24138 }, { "epoch": 0.5052959892824248, "grad_norm": 0.3701709508895874, "learning_rate": 0.00018635151058616446, "loss": 11.6895, "step": 24139 }, { "epoch": 0.505316922046387, "grad_norm": 0.26883363723754883, "learning_rate": 0.00018635040482070992, "loss": 11.6741, "step": 24140 }, { "epoch": 0.5053378548103492, "grad_norm": 0.28171056509017944, "learning_rate": 0.00018634929901374499, "loss": 11.6732, "step": 24141 }, { "epoch": 0.5053587875743113, "grad_norm": 0.3960644006729126, "learning_rate": 0.00018634819316527024, "loss": 11.6724, "step": 24142 }, { "epoch": 0.5053797203382735, "grad_norm": 0.24860531091690063, "learning_rate": 0.00018634708727528618, "loss": 11.6701, "step": 24143 }, { "epoch": 0.5054006531022356, "grad_norm": 0.26804012060165405, "learning_rate": 0.00018634598134379334, "loss": 11.6758, "step": 24144 }, { "epoch": 0.5054215858661978, "grad_norm": 0.38100558519363403, "learning_rate": 0.00018634487537079228, "loss": 11.6808, "step": 24145 }, { "epoch": 0.5054425186301599, "grad_norm": 0.283499151468277, "learning_rate": 0.0001863437693562835, "loss": 11.6508, "step": 24146 }, { "epoch": 0.5054634513941221, "grad_norm": 0.2532185912132263, "learning_rate": 0.0001863426633002675, "loss": 11.6603, "step": 24147 }, { "epoch": 0.5054843841580843, "grad_norm": 0.35983744263648987, "learning_rate": 0.00018634155720274488, "loss": 11.6914, "step": 24148 }, { "epoch": 0.5055053169220464, "grad_norm": 0.37170836329460144, "learning_rate": 0.00018634045106371617, "loss": 11.6812, "step": 24149 }, { "epoch": 0.5055262496860086, "grad_norm": 0.28798240423202515, "learning_rate": 0.00018633934488318182, "loss": 11.6906, "step": 24150 }, { "epoch": 0.5055471824499707, "grad_norm": 0.31375518441200256, "learning_rate": 0.00018633823866114245, "loss": 11.6608, "step": 24151 }, { "epoch": 0.5055681152139329, "grad_norm": 0.31617966294288635, "learning_rate": 0.00018633713239759857, "loss": 11.6751, "step": 24152 }, { "epoch": 0.5055890479778951, "grad_norm": 0.32483288645744324, "learning_rate": 0.00018633602609255067, "loss": 11.6778, "step": 24153 }, { "epoch": 0.5056099807418571, "grad_norm": 0.2717873752117157, "learning_rate": 0.00018633491974599933, "loss": 11.6723, "step": 24154 }, { "epoch": 0.5056309135058193, "grad_norm": 0.26736846566200256, "learning_rate": 0.00018633381335794505, "loss": 11.6704, "step": 24155 }, { "epoch": 0.5056518462697814, "grad_norm": 0.2969472110271454, "learning_rate": 0.0001863327069283884, "loss": 11.6548, "step": 24156 }, { "epoch": 0.5056727790337436, "grad_norm": 0.269117534160614, "learning_rate": 0.00018633160045732986, "loss": 11.665, "step": 24157 }, { "epoch": 0.5056937117977057, "grad_norm": 0.3023666739463806, "learning_rate": 0.00018633049394477, "loss": 11.6567, "step": 24158 }, { "epoch": 0.5057146445616679, "grad_norm": 0.24845662713050842, "learning_rate": 0.00018632938739070935, "loss": 11.6715, "step": 24159 }, { "epoch": 0.5057355773256301, "grad_norm": 0.30229175090789795, "learning_rate": 0.00018632828079514843, "loss": 11.676, "step": 24160 }, { "epoch": 0.5057565100895922, "grad_norm": 0.30454474687576294, "learning_rate": 0.00018632717415808777, "loss": 11.6676, "step": 24161 }, { "epoch": 0.5057774428535544, "grad_norm": 0.2681390345096588, "learning_rate": 0.0001863260674795279, "loss": 11.6732, "step": 24162 }, { "epoch": 0.5057983756175165, "grad_norm": 0.3904440701007843, "learning_rate": 0.00018632496075946938, "loss": 11.6631, "step": 24163 }, { "epoch": 0.5058193083814787, "grad_norm": 0.4023531973361969, "learning_rate": 0.0001863238539979127, "loss": 11.6785, "step": 24164 }, { "epoch": 0.5058402411454408, "grad_norm": 0.2666904926300049, "learning_rate": 0.00018632274719485842, "loss": 11.6697, "step": 24165 }, { "epoch": 0.505861173909403, "grad_norm": 0.3979554772377014, "learning_rate": 0.00018632164035030707, "loss": 11.6632, "step": 24166 }, { "epoch": 0.5058821066733652, "grad_norm": 0.35718223452568054, "learning_rate": 0.00018632053346425917, "loss": 11.6699, "step": 24167 }, { "epoch": 0.5059030394373273, "grad_norm": 0.2694149613380432, "learning_rate": 0.0001863194265367153, "loss": 11.6554, "step": 24168 }, { "epoch": 0.5059239722012895, "grad_norm": 0.29271262884140015, "learning_rate": 0.0001863183195676759, "loss": 11.6929, "step": 24169 }, { "epoch": 0.5059449049652516, "grad_norm": 0.29487764835357666, "learning_rate": 0.00018631721255714156, "loss": 11.6889, "step": 24170 }, { "epoch": 0.5059658377292138, "grad_norm": 0.2581471800804138, "learning_rate": 0.00018631610550511282, "loss": 11.6697, "step": 24171 }, { "epoch": 0.5059867704931759, "grad_norm": 0.27238160371780396, "learning_rate": 0.00018631499841159022, "loss": 11.6513, "step": 24172 }, { "epoch": 0.5060077032571381, "grad_norm": 0.2585516571998596, "learning_rate": 0.00018631389127657424, "loss": 11.657, "step": 24173 }, { "epoch": 0.5060286360211003, "grad_norm": 0.3081446588039398, "learning_rate": 0.00018631278410006545, "loss": 11.658, "step": 24174 }, { "epoch": 0.5060495687850624, "grad_norm": 0.24603420495986938, "learning_rate": 0.00018631167688206435, "loss": 11.6565, "step": 24175 }, { "epoch": 0.5060705015490246, "grad_norm": 0.2751294672489166, "learning_rate": 0.00018631056962257155, "loss": 11.6808, "step": 24176 }, { "epoch": 0.5060914343129866, "grad_norm": 0.2563478350639343, "learning_rate": 0.0001863094623215875, "loss": 11.6636, "step": 24177 }, { "epoch": 0.5061123670769488, "grad_norm": 0.34512603282928467, "learning_rate": 0.00018630835497911275, "loss": 11.6775, "step": 24178 }, { "epoch": 0.506133299840911, "grad_norm": 0.293876975774765, "learning_rate": 0.00018630724759514787, "loss": 11.6919, "step": 24179 }, { "epoch": 0.5061542326048731, "grad_norm": 0.2500721216201782, "learning_rate": 0.00018630614016969336, "loss": 11.6662, "step": 24180 }, { "epoch": 0.5061751653688353, "grad_norm": 0.2804871201515198, "learning_rate": 0.00018630503270274974, "loss": 11.6694, "step": 24181 }, { "epoch": 0.5061960981327974, "grad_norm": 0.27107176184654236, "learning_rate": 0.00018630392519431758, "loss": 11.661, "step": 24182 }, { "epoch": 0.5062170308967596, "grad_norm": 0.32733070850372314, "learning_rate": 0.0001863028176443974, "loss": 11.6714, "step": 24183 }, { "epoch": 0.5062379636607217, "grad_norm": 0.23951128125190735, "learning_rate": 0.0001863017100529897, "loss": 11.6761, "step": 24184 }, { "epoch": 0.5062588964246839, "grad_norm": 0.29198023676872253, "learning_rate": 0.00018630060242009507, "loss": 11.6842, "step": 24185 }, { "epoch": 0.5062798291886461, "grad_norm": 0.2856110632419586, "learning_rate": 0.00018629949474571397, "loss": 11.6623, "step": 24186 }, { "epoch": 0.5063007619526082, "grad_norm": 0.27242621779441833, "learning_rate": 0.000186298387029847, "loss": 11.6623, "step": 24187 }, { "epoch": 0.5063216947165704, "grad_norm": 0.34484851360321045, "learning_rate": 0.00018629727927249468, "loss": 11.6754, "step": 24188 }, { "epoch": 0.5063426274805325, "grad_norm": 0.2804872393608093, "learning_rate": 0.0001862961714736575, "loss": 11.6741, "step": 24189 }, { "epoch": 0.5063635602444947, "grad_norm": 0.2745266854763031, "learning_rate": 0.00018629506363333605, "loss": 11.6732, "step": 24190 }, { "epoch": 0.5063844930084568, "grad_norm": 0.2985158860683441, "learning_rate": 0.0001862939557515308, "loss": 11.6748, "step": 24191 }, { "epoch": 0.506405425772419, "grad_norm": 0.29005008935928345, "learning_rate": 0.00018629284782824233, "loss": 11.6565, "step": 24192 }, { "epoch": 0.5064263585363812, "grad_norm": 0.24445728957653046, "learning_rate": 0.00018629173986347116, "loss": 11.6649, "step": 24193 }, { "epoch": 0.5064472913003433, "grad_norm": 0.3532005250453949, "learning_rate": 0.0001862906318572178, "loss": 11.6795, "step": 24194 }, { "epoch": 0.5064682240643055, "grad_norm": 0.391132652759552, "learning_rate": 0.00018628952380948284, "loss": 11.6624, "step": 24195 }, { "epoch": 0.5064891568282676, "grad_norm": 0.2781004309654236, "learning_rate": 0.00018628841572026675, "loss": 11.6594, "step": 24196 }, { "epoch": 0.5065100895922298, "grad_norm": 0.3129752278327942, "learning_rate": 0.00018628730758957012, "loss": 11.6697, "step": 24197 }, { "epoch": 0.506531022356192, "grad_norm": 0.2751520276069641, "learning_rate": 0.00018628619941739346, "loss": 11.6685, "step": 24198 }, { "epoch": 0.506551955120154, "grad_norm": 0.35211771726608276, "learning_rate": 0.00018628509120373723, "loss": 11.691, "step": 24199 }, { "epoch": 0.5065728878841163, "grad_norm": 0.3162313401699066, "learning_rate": 0.00018628398294860208, "loss": 11.6577, "step": 24200 }, { "epoch": 0.5065938206480783, "grad_norm": 0.31416013836860657, "learning_rate": 0.00018628287465198847, "loss": 11.6612, "step": 24201 }, { "epoch": 0.5066147534120405, "grad_norm": 0.3895791172981262, "learning_rate": 0.00018628176631389694, "loss": 11.6661, "step": 24202 }, { "epoch": 0.5066356861760026, "grad_norm": 0.3284706473350525, "learning_rate": 0.00018628065793432807, "loss": 11.6738, "step": 24203 }, { "epoch": 0.5066566189399648, "grad_norm": 0.3763929009437561, "learning_rate": 0.00018627954951328234, "loss": 11.694, "step": 24204 }, { "epoch": 0.506677551703927, "grad_norm": 0.2853902280330658, "learning_rate": 0.00018627844105076032, "loss": 11.6628, "step": 24205 }, { "epoch": 0.5066984844678891, "grad_norm": 0.38212859630584717, "learning_rate": 0.00018627733254676253, "loss": 11.68, "step": 24206 }, { "epoch": 0.5067194172318513, "grad_norm": 0.3600395619869232, "learning_rate": 0.00018627622400128944, "loss": 11.6735, "step": 24207 }, { "epoch": 0.5067403499958134, "grad_norm": 0.23721368610858917, "learning_rate": 0.00018627511541434167, "loss": 11.6733, "step": 24208 }, { "epoch": 0.5067612827597756, "grad_norm": 0.24639631807804108, "learning_rate": 0.00018627400678591975, "loss": 11.6731, "step": 24209 }, { "epoch": 0.5067822155237377, "grad_norm": 0.3296845257282257, "learning_rate": 0.00018627289811602417, "loss": 11.6728, "step": 24210 }, { "epoch": 0.5068031482876999, "grad_norm": 0.26317891478538513, "learning_rate": 0.00018627178940465547, "loss": 11.6558, "step": 24211 }, { "epoch": 0.5068240810516621, "grad_norm": 0.28544196486473083, "learning_rate": 0.0001862706806518142, "loss": 11.6658, "step": 24212 }, { "epoch": 0.5068450138156242, "grad_norm": 0.33429932594299316, "learning_rate": 0.0001862695718575009, "loss": 11.6757, "step": 24213 }, { "epoch": 0.5068659465795864, "grad_norm": 0.24218030273914337, "learning_rate": 0.00018626846302171606, "loss": 11.6816, "step": 24214 }, { "epoch": 0.5068868793435485, "grad_norm": 0.3071689307689667, "learning_rate": 0.00018626735414446023, "loss": 11.6643, "step": 24215 }, { "epoch": 0.5069078121075107, "grad_norm": 0.3214409053325653, "learning_rate": 0.000186266245225734, "loss": 11.6712, "step": 24216 }, { "epoch": 0.5069287448714729, "grad_norm": 0.30981478095054626, "learning_rate": 0.0001862651362655378, "loss": 11.6698, "step": 24217 }, { "epoch": 0.506949677635435, "grad_norm": 0.2376214563846588, "learning_rate": 0.00018626402726387226, "loss": 11.656, "step": 24218 }, { "epoch": 0.5069706103993972, "grad_norm": 0.24443872272968292, "learning_rate": 0.00018626291822073786, "loss": 11.686, "step": 24219 }, { "epoch": 0.5069915431633593, "grad_norm": 0.3823156952857971, "learning_rate": 0.00018626180913613517, "loss": 11.6986, "step": 24220 }, { "epoch": 0.5070124759273215, "grad_norm": 0.34293776750564575, "learning_rate": 0.0001862607000100647, "loss": 11.6629, "step": 24221 }, { "epoch": 0.5070334086912835, "grad_norm": 0.2603286802768707, "learning_rate": 0.00018625959084252696, "loss": 11.6665, "step": 24222 }, { "epoch": 0.5070543414552457, "grad_norm": 0.25534501671791077, "learning_rate": 0.0001862584816335225, "loss": 11.6607, "step": 24223 }, { "epoch": 0.507075274219208, "grad_norm": 0.2977423071861267, "learning_rate": 0.00018625737238305186, "loss": 11.6717, "step": 24224 }, { "epoch": 0.50709620698317, "grad_norm": 0.23613083362579346, "learning_rate": 0.00018625626309111558, "loss": 11.6815, "step": 24225 }, { "epoch": 0.5071171397471322, "grad_norm": 0.2870434522628784, "learning_rate": 0.0001862551537577142, "loss": 11.6656, "step": 24226 }, { "epoch": 0.5071380725110943, "grad_norm": 0.5714238286018372, "learning_rate": 0.0001862540443828482, "loss": 11.6002, "step": 24227 }, { "epoch": 0.5071590052750565, "grad_norm": 0.2992486357688904, "learning_rate": 0.0001862529349665182, "loss": 11.6738, "step": 24228 }, { "epoch": 0.5071799380390186, "grad_norm": 0.30984926223754883, "learning_rate": 0.00018625182550872465, "loss": 11.6693, "step": 24229 }, { "epoch": 0.5072008708029808, "grad_norm": 0.31719067692756653, "learning_rate": 0.00018625071600946816, "loss": 11.6508, "step": 24230 }, { "epoch": 0.507221803566943, "grad_norm": 0.2859439551830292, "learning_rate": 0.00018624960646874915, "loss": 11.6781, "step": 24231 }, { "epoch": 0.5072427363309051, "grad_norm": 0.3214016258716583, "learning_rate": 0.00018624849688656832, "loss": 11.6848, "step": 24232 }, { "epoch": 0.5072636690948673, "grad_norm": 0.3307335078716278, "learning_rate": 0.00018624738726292605, "loss": 11.6769, "step": 24233 }, { "epoch": 0.5072846018588294, "grad_norm": 0.2823212444782257, "learning_rate": 0.00018624627759782295, "loss": 11.6604, "step": 24234 }, { "epoch": 0.5073055346227916, "grad_norm": 0.2799881398677826, "learning_rate": 0.00018624516789125952, "loss": 11.6772, "step": 24235 }, { "epoch": 0.5073264673867538, "grad_norm": 0.2581787407398224, "learning_rate": 0.0001862440581432363, "loss": 11.6727, "step": 24236 }, { "epoch": 0.5073474001507159, "grad_norm": 0.33366072177886963, "learning_rate": 0.00018624294835375385, "loss": 11.6705, "step": 24237 }, { "epoch": 0.5073683329146781, "grad_norm": 0.31161606311798096, "learning_rate": 0.0001862418385228127, "loss": 11.6753, "step": 24238 }, { "epoch": 0.5073892656786402, "grad_norm": 0.341534286737442, "learning_rate": 0.00018624072865041334, "loss": 11.6682, "step": 24239 }, { "epoch": 0.5074101984426024, "grad_norm": 0.32470908761024475, "learning_rate": 0.00018623961873655637, "loss": 11.6744, "step": 24240 }, { "epoch": 0.5074311312065645, "grad_norm": 0.26743409037590027, "learning_rate": 0.00018623850878124226, "loss": 11.6878, "step": 24241 }, { "epoch": 0.5074520639705267, "grad_norm": 0.2834528088569641, "learning_rate": 0.00018623739878447158, "loss": 11.6663, "step": 24242 }, { "epoch": 0.5074729967344889, "grad_norm": 0.3178737461566925, "learning_rate": 0.00018623628874624485, "loss": 11.6758, "step": 24243 }, { "epoch": 0.507493929498451, "grad_norm": 0.290103942155838, "learning_rate": 0.00018623517866656262, "loss": 11.671, "step": 24244 }, { "epoch": 0.5075148622624132, "grad_norm": 0.21968984603881836, "learning_rate": 0.0001862340685454254, "loss": 11.6742, "step": 24245 }, { "epoch": 0.5075357950263752, "grad_norm": 0.31864991784095764, "learning_rate": 0.00018623295838283375, "loss": 11.6604, "step": 24246 }, { "epoch": 0.5075567277903374, "grad_norm": 0.32239723205566406, "learning_rate": 0.00018623184817878816, "loss": 11.6618, "step": 24247 }, { "epoch": 0.5075776605542995, "grad_norm": 0.2482214868068695, "learning_rate": 0.00018623073793328922, "loss": 11.6741, "step": 24248 }, { "epoch": 0.5075985933182617, "grad_norm": 0.36495742201805115, "learning_rate": 0.00018622962764633743, "loss": 11.6658, "step": 24249 }, { "epoch": 0.5076195260822239, "grad_norm": 0.3260812759399414, "learning_rate": 0.00018622851731793332, "loss": 11.6792, "step": 24250 }, { "epoch": 0.507640458846186, "grad_norm": 0.30048662424087524, "learning_rate": 0.00018622740694807744, "loss": 11.6809, "step": 24251 }, { "epoch": 0.5076613916101482, "grad_norm": 0.3252544403076172, "learning_rate": 0.0001862262965367703, "loss": 11.6667, "step": 24252 }, { "epoch": 0.5076823243741103, "grad_norm": 0.3335164785385132, "learning_rate": 0.00018622518608401247, "loss": 11.6662, "step": 24253 }, { "epoch": 0.5077032571380725, "grad_norm": 0.28982430696487427, "learning_rate": 0.00018622407558980446, "loss": 11.653, "step": 24254 }, { "epoch": 0.5077241899020347, "grad_norm": 0.37848150730133057, "learning_rate": 0.0001862229650541468, "loss": 11.6568, "step": 24255 }, { "epoch": 0.5077451226659968, "grad_norm": 0.3382631242275238, "learning_rate": 0.00018622185447704005, "loss": 11.6749, "step": 24256 }, { "epoch": 0.507766055429959, "grad_norm": 0.3073948323726654, "learning_rate": 0.00018622074385848473, "loss": 11.6544, "step": 24257 }, { "epoch": 0.5077869881939211, "grad_norm": 0.3434002101421356, "learning_rate": 0.00018621963319848136, "loss": 11.6553, "step": 24258 }, { "epoch": 0.5078079209578833, "grad_norm": 0.3088913857936859, "learning_rate": 0.0001862185224970305, "loss": 11.6699, "step": 24259 }, { "epoch": 0.5078288537218454, "grad_norm": 0.2936629056930542, "learning_rate": 0.00018621741175413266, "loss": 11.6911, "step": 24260 }, { "epoch": 0.5078497864858076, "grad_norm": 0.26456862688064575, "learning_rate": 0.00018621630096978838, "loss": 11.6672, "step": 24261 }, { "epoch": 0.5078707192497698, "grad_norm": 0.27672940492630005, "learning_rate": 0.00018621519014399817, "loss": 11.6544, "step": 24262 }, { "epoch": 0.5078916520137319, "grad_norm": 0.34888988733291626, "learning_rate": 0.00018621407927676262, "loss": 11.6793, "step": 24263 }, { "epoch": 0.5079125847776941, "grad_norm": 0.2793334722518921, "learning_rate": 0.00018621296836808223, "loss": 11.664, "step": 24264 }, { "epoch": 0.5079335175416562, "grad_norm": 0.34453463554382324, "learning_rate": 0.00018621185741795753, "loss": 11.6836, "step": 24265 }, { "epoch": 0.5079544503056184, "grad_norm": 0.3451882302761078, "learning_rate": 0.00018621074642638906, "loss": 11.6757, "step": 24266 }, { "epoch": 0.5079753830695805, "grad_norm": 0.23847468197345734, "learning_rate": 0.00018620963539337738, "loss": 11.6667, "step": 24267 }, { "epoch": 0.5079963158335427, "grad_norm": 0.36081138253211975, "learning_rate": 0.00018620852431892299, "loss": 11.6768, "step": 24268 }, { "epoch": 0.5080172485975049, "grad_norm": 0.2495613843202591, "learning_rate": 0.0001862074132030264, "loss": 11.6654, "step": 24269 }, { "epoch": 0.508038181361467, "grad_norm": 0.3623257577419281, "learning_rate": 0.0001862063020456882, "loss": 11.6612, "step": 24270 }, { "epoch": 0.5080591141254291, "grad_norm": 0.23670177161693573, "learning_rate": 0.00018620519084690894, "loss": 11.6929, "step": 24271 }, { "epoch": 0.5080800468893912, "grad_norm": 0.3606029450893402, "learning_rate": 0.0001862040796066891, "loss": 11.6747, "step": 24272 }, { "epoch": 0.5081009796533534, "grad_norm": 0.27220073342323303, "learning_rate": 0.00018620296832502921, "loss": 11.6505, "step": 24273 }, { "epoch": 0.5081219124173156, "grad_norm": 0.2771942913532257, "learning_rate": 0.00018620185700192986, "loss": 11.6732, "step": 24274 }, { "epoch": 0.5081428451812777, "grad_norm": 0.29206424951553345, "learning_rate": 0.00018620074563739148, "loss": 11.6676, "step": 24275 }, { "epoch": 0.5081637779452399, "grad_norm": 0.27341338992118835, "learning_rate": 0.00018619963423141474, "loss": 11.6725, "step": 24276 }, { "epoch": 0.508184710709202, "grad_norm": 0.34541749954223633, "learning_rate": 0.00018619852278400009, "loss": 11.6622, "step": 24277 }, { "epoch": 0.5082056434731642, "grad_norm": 0.32590481638908386, "learning_rate": 0.00018619741129514807, "loss": 11.6675, "step": 24278 }, { "epoch": 0.5082265762371263, "grad_norm": 0.2681094706058502, "learning_rate": 0.00018619629976485925, "loss": 11.6772, "step": 24279 }, { "epoch": 0.5082475090010885, "grad_norm": 0.32695841789245605, "learning_rate": 0.00018619518819313412, "loss": 11.6562, "step": 24280 }, { "epoch": 0.5082684417650507, "grad_norm": 0.328807532787323, "learning_rate": 0.00018619407657997322, "loss": 11.6627, "step": 24281 }, { "epoch": 0.5082893745290128, "grad_norm": 0.2564593553543091, "learning_rate": 0.00018619296492537716, "loss": 11.6612, "step": 24282 }, { "epoch": 0.508310307292975, "grad_norm": 0.29184865951538086, "learning_rate": 0.00018619185322934638, "loss": 11.6758, "step": 24283 }, { "epoch": 0.5083312400569371, "grad_norm": 0.28750649094581604, "learning_rate": 0.00018619074149188144, "loss": 11.667, "step": 24284 }, { "epoch": 0.5083521728208993, "grad_norm": 0.30888795852661133, "learning_rate": 0.0001861896297129829, "loss": 11.6815, "step": 24285 }, { "epoch": 0.5083731055848614, "grad_norm": 0.3362368047237396, "learning_rate": 0.00018618851789265126, "loss": 11.6774, "step": 24286 }, { "epoch": 0.5083940383488236, "grad_norm": 0.29833632707595825, "learning_rate": 0.00018618740603088707, "loss": 11.6773, "step": 24287 }, { "epoch": 0.5084149711127858, "grad_norm": 0.25058677792549133, "learning_rate": 0.0001861862941276909, "loss": 11.6645, "step": 24288 }, { "epoch": 0.5084359038767479, "grad_norm": 0.3725999891757965, "learning_rate": 0.0001861851821830632, "loss": 11.6838, "step": 24289 }, { "epoch": 0.5084568366407101, "grad_norm": 0.27661558985710144, "learning_rate": 0.0001861840701970046, "loss": 11.6643, "step": 24290 }, { "epoch": 0.5084777694046722, "grad_norm": 0.39964133501052856, "learning_rate": 0.0001861829581695156, "loss": 11.6807, "step": 24291 }, { "epoch": 0.5084987021686344, "grad_norm": 0.32952651381492615, "learning_rate": 0.00018618184610059667, "loss": 11.6831, "step": 24292 }, { "epoch": 0.5085196349325966, "grad_norm": 0.32269182801246643, "learning_rate": 0.00018618073399024842, "loss": 11.6754, "step": 24293 }, { "epoch": 0.5085405676965586, "grad_norm": 0.3186171352863312, "learning_rate": 0.0001861796218384714, "loss": 11.7007, "step": 24294 }, { "epoch": 0.5085615004605208, "grad_norm": 0.313909113407135, "learning_rate": 0.0001861785096452661, "loss": 11.6672, "step": 24295 }, { "epoch": 0.5085824332244829, "grad_norm": 0.24973028898239136, "learning_rate": 0.00018617739741063304, "loss": 11.6589, "step": 24296 }, { "epoch": 0.5086033659884451, "grad_norm": 0.25864726305007935, "learning_rate": 0.00018617628513457275, "loss": 11.6709, "step": 24297 }, { "epoch": 0.5086242987524072, "grad_norm": 0.3292394280433655, "learning_rate": 0.00018617517281708582, "loss": 11.6897, "step": 24298 }, { "epoch": 0.5086452315163694, "grad_norm": 0.2889265716075897, "learning_rate": 0.00018617406045817278, "loss": 11.6794, "step": 24299 }, { "epoch": 0.5086661642803316, "grad_norm": 0.25238457322120667, "learning_rate": 0.0001861729480578341, "loss": 11.6597, "step": 24300 }, { "epoch": 0.5086870970442937, "grad_norm": 0.30268293619155884, "learning_rate": 0.00018617183561607043, "loss": 11.6683, "step": 24301 }, { "epoch": 0.5087080298082559, "grad_norm": 0.2293754667043686, "learning_rate": 0.00018617072313288215, "loss": 11.66, "step": 24302 }, { "epoch": 0.508728962572218, "grad_norm": 0.2671995460987091, "learning_rate": 0.00018616961060826995, "loss": 11.6823, "step": 24303 }, { "epoch": 0.5087498953361802, "grad_norm": 0.28291624784469604, "learning_rate": 0.00018616849804223423, "loss": 11.6785, "step": 24304 }, { "epoch": 0.5087708281001423, "grad_norm": 0.23941247165203094, "learning_rate": 0.00018616738543477562, "loss": 11.6659, "step": 24305 }, { "epoch": 0.5087917608641045, "grad_norm": 0.2415405660867691, "learning_rate": 0.0001861662727858946, "loss": 11.6773, "step": 24306 }, { "epoch": 0.5088126936280667, "grad_norm": 0.3775622844696045, "learning_rate": 0.00018616516009559172, "loss": 11.6758, "step": 24307 }, { "epoch": 0.5088336263920288, "grad_norm": 0.2735580801963806, "learning_rate": 0.00018616404736386755, "loss": 11.6866, "step": 24308 }, { "epoch": 0.508854559155991, "grad_norm": 0.3462550640106201, "learning_rate": 0.00018616293459072257, "loss": 11.6682, "step": 24309 }, { "epoch": 0.5088754919199531, "grad_norm": 0.2717359960079193, "learning_rate": 0.00018616182177615739, "loss": 11.6657, "step": 24310 }, { "epoch": 0.5088964246839153, "grad_norm": 0.2897818088531494, "learning_rate": 0.00018616070892017245, "loss": 11.6729, "step": 24311 }, { "epoch": 0.5089173574478775, "grad_norm": 0.24535685777664185, "learning_rate": 0.00018615959602276834, "loss": 11.6663, "step": 24312 }, { "epoch": 0.5089382902118396, "grad_norm": 0.30930468440055847, "learning_rate": 0.00018615848308394556, "loss": 11.6566, "step": 24313 }, { "epoch": 0.5089592229758018, "grad_norm": 0.2807294428348541, "learning_rate": 0.0001861573701037047, "loss": 11.6881, "step": 24314 }, { "epoch": 0.5089801557397639, "grad_norm": 0.2850606143474579, "learning_rate": 0.00018615625708204628, "loss": 11.6717, "step": 24315 }, { "epoch": 0.509001088503726, "grad_norm": 0.2854311168193817, "learning_rate": 0.0001861551440189708, "loss": 11.6888, "step": 24316 }, { "epoch": 0.5090220212676881, "grad_norm": 0.40868157148361206, "learning_rate": 0.0001861540309144788, "loss": 11.6866, "step": 24317 }, { "epoch": 0.5090429540316503, "grad_norm": 0.3706612288951874, "learning_rate": 0.00018615291776857083, "loss": 11.6774, "step": 24318 }, { "epoch": 0.5090638867956125, "grad_norm": 0.26621687412261963, "learning_rate": 0.00018615180458124745, "loss": 11.6682, "step": 24319 }, { "epoch": 0.5090848195595746, "grad_norm": 0.29315128922462463, "learning_rate": 0.0001861506913525092, "loss": 11.6907, "step": 24320 }, { "epoch": 0.5091057523235368, "grad_norm": 0.32326069474220276, "learning_rate": 0.00018614957808235654, "loss": 11.6698, "step": 24321 }, { "epoch": 0.5091266850874989, "grad_norm": 0.294674813747406, "learning_rate": 0.00018614846477079007, "loss": 11.6767, "step": 24322 }, { "epoch": 0.5091476178514611, "grad_norm": 0.2608940303325653, "learning_rate": 0.0001861473514178103, "loss": 11.6656, "step": 24323 }, { "epoch": 0.5091685506154232, "grad_norm": 0.3858638405799866, "learning_rate": 0.00018614623802341775, "loss": 11.684, "step": 24324 }, { "epoch": 0.5091894833793854, "grad_norm": 0.34515175223350525, "learning_rate": 0.00018614512458761299, "loss": 11.6833, "step": 24325 }, { "epoch": 0.5092104161433476, "grad_norm": 0.2935153543949127, "learning_rate": 0.00018614401111039658, "loss": 11.6711, "step": 24326 }, { "epoch": 0.5092313489073097, "grad_norm": 0.2570236325263977, "learning_rate": 0.00018614289759176899, "loss": 11.6723, "step": 24327 }, { "epoch": 0.5092522816712719, "grad_norm": 0.2634693682193756, "learning_rate": 0.00018614178403173078, "loss": 11.6554, "step": 24328 }, { "epoch": 0.509273214435234, "grad_norm": 0.27556735277175903, "learning_rate": 0.00018614067043028247, "loss": 11.678, "step": 24329 }, { "epoch": 0.5092941471991962, "grad_norm": 0.29925575852394104, "learning_rate": 0.00018613955678742464, "loss": 11.6823, "step": 24330 }, { "epoch": 0.5093150799631584, "grad_norm": 0.3103765845298767, "learning_rate": 0.0001861384431031578, "loss": 11.6696, "step": 24331 }, { "epoch": 0.5093360127271205, "grad_norm": 0.2843870520591736, "learning_rate": 0.00018613732937748244, "loss": 11.6678, "step": 24332 }, { "epoch": 0.5093569454910827, "grad_norm": 0.3108723759651184, "learning_rate": 0.00018613621561039919, "loss": 11.6812, "step": 24333 }, { "epoch": 0.5093778782550448, "grad_norm": 0.2747965455055237, "learning_rate": 0.00018613510180190854, "loss": 11.6583, "step": 24334 }, { "epoch": 0.509398811019007, "grad_norm": 0.29320722818374634, "learning_rate": 0.00018613398795201097, "loss": 11.6709, "step": 24335 }, { "epoch": 0.5094197437829691, "grad_norm": 0.7127349376678467, "learning_rate": 0.00018613287406070713, "loss": 11.6185, "step": 24336 }, { "epoch": 0.5094406765469313, "grad_norm": 0.2883351147174835, "learning_rate": 0.00018613176012799744, "loss": 11.6598, "step": 24337 }, { "epoch": 0.5094616093108935, "grad_norm": 0.25658079981803894, "learning_rate": 0.0001861306461538825, "loss": 11.6649, "step": 24338 }, { "epoch": 0.5094825420748555, "grad_norm": 0.40457525849342346, "learning_rate": 0.00018612953213836283, "loss": 11.6594, "step": 24339 }, { "epoch": 0.5095034748388177, "grad_norm": 0.2976987361907959, "learning_rate": 0.00018612841808143897, "loss": 11.6697, "step": 24340 }, { "epoch": 0.5095244076027798, "grad_norm": 0.2657245695590973, "learning_rate": 0.00018612730398311146, "loss": 11.685, "step": 24341 }, { "epoch": 0.509545340366742, "grad_norm": 0.329675555229187, "learning_rate": 0.00018612618984338082, "loss": 11.6749, "step": 24342 }, { "epoch": 0.5095662731307041, "grad_norm": 0.32392796874046326, "learning_rate": 0.00018612507566224758, "loss": 11.6877, "step": 24343 }, { "epoch": 0.5095872058946663, "grad_norm": 0.3319079279899597, "learning_rate": 0.00018612396143971233, "loss": 11.6692, "step": 24344 }, { "epoch": 0.5096081386586285, "grad_norm": 0.29198920726776123, "learning_rate": 0.00018612284717577557, "loss": 11.656, "step": 24345 }, { "epoch": 0.5096290714225906, "grad_norm": 0.26761922240257263, "learning_rate": 0.0001861217328704378, "loss": 11.6588, "step": 24346 }, { "epoch": 0.5096500041865528, "grad_norm": 0.2998369038105011, "learning_rate": 0.00018612061852369958, "loss": 11.6624, "step": 24347 }, { "epoch": 0.5096709369505149, "grad_norm": 0.3778313100337982, "learning_rate": 0.00018611950413556147, "loss": 11.6917, "step": 24348 }, { "epoch": 0.5096918697144771, "grad_norm": 0.34716692566871643, "learning_rate": 0.000186118389706024, "loss": 11.6751, "step": 24349 }, { "epoch": 0.5097128024784393, "grad_norm": 0.3097989857196808, "learning_rate": 0.00018611727523508767, "loss": 11.6826, "step": 24350 }, { "epoch": 0.5097337352424014, "grad_norm": 0.33954060077667236, "learning_rate": 0.00018611616072275306, "loss": 11.6707, "step": 24351 }, { "epoch": 0.5097546680063636, "grad_norm": 0.33716434240341187, "learning_rate": 0.00018611504616902066, "loss": 11.6715, "step": 24352 }, { "epoch": 0.5097756007703257, "grad_norm": 0.31689217686653137, "learning_rate": 0.00018611393157389106, "loss": 11.6881, "step": 24353 }, { "epoch": 0.5097965335342879, "grad_norm": 0.2906174063682556, "learning_rate": 0.00018611281693736476, "loss": 11.6721, "step": 24354 }, { "epoch": 0.50981746629825, "grad_norm": 0.29593244194984436, "learning_rate": 0.0001861117022594423, "loss": 11.6664, "step": 24355 }, { "epoch": 0.5098383990622122, "grad_norm": 0.2414626181125641, "learning_rate": 0.0001861105875401242, "loss": 11.6751, "step": 24356 }, { "epoch": 0.5098593318261744, "grad_norm": 0.22204628586769104, "learning_rate": 0.00018610947277941106, "loss": 11.6721, "step": 24357 }, { "epoch": 0.5098802645901365, "grad_norm": 0.35039594769477844, "learning_rate": 0.00018610835797730333, "loss": 11.6829, "step": 24358 }, { "epoch": 0.5099011973540987, "grad_norm": 0.2893334925174713, "learning_rate": 0.00018610724313380158, "loss": 11.6769, "step": 24359 }, { "epoch": 0.5099221301180608, "grad_norm": 0.2839162349700928, "learning_rate": 0.00018610612824890638, "loss": 11.6778, "step": 24360 }, { "epoch": 0.509943062882023, "grad_norm": 0.5478155016899109, "learning_rate": 0.00018610501332261823, "loss": 11.6805, "step": 24361 }, { "epoch": 0.509963995645985, "grad_norm": 0.30642133951187134, "learning_rate": 0.00018610389835493768, "loss": 11.6755, "step": 24362 }, { "epoch": 0.5099849284099472, "grad_norm": 0.3363451063632965, "learning_rate": 0.00018610278334586525, "loss": 11.6707, "step": 24363 }, { "epoch": 0.5100058611739094, "grad_norm": 0.34729036688804626, "learning_rate": 0.0001861016682954015, "loss": 11.6806, "step": 24364 }, { "epoch": 0.5100267939378715, "grad_norm": 0.28952422738075256, "learning_rate": 0.00018610055320354696, "loss": 11.6673, "step": 24365 }, { "epoch": 0.5100477267018337, "grad_norm": 0.24777942895889282, "learning_rate": 0.00018609943807030216, "loss": 11.6704, "step": 24366 }, { "epoch": 0.5100686594657958, "grad_norm": 0.2921192944049835, "learning_rate": 0.0001860983228956676, "loss": 11.6785, "step": 24367 }, { "epoch": 0.510089592229758, "grad_norm": 0.3052879571914673, "learning_rate": 0.00018609720767964386, "loss": 11.6758, "step": 24368 }, { "epoch": 0.5101105249937201, "grad_norm": 0.3332850933074951, "learning_rate": 0.00018609609242223148, "loss": 11.679, "step": 24369 }, { "epoch": 0.5101314577576823, "grad_norm": 0.30736786127090454, "learning_rate": 0.00018609497712343095, "loss": 11.6721, "step": 24370 }, { "epoch": 0.5101523905216445, "grad_norm": 0.37116432189941406, "learning_rate": 0.00018609386178324288, "loss": 11.6789, "step": 24371 }, { "epoch": 0.5101733232856066, "grad_norm": 0.3576483130455017, "learning_rate": 0.00018609274640166777, "loss": 11.6829, "step": 24372 }, { "epoch": 0.5101942560495688, "grad_norm": 0.3046398460865021, "learning_rate": 0.00018609163097870615, "loss": 11.6714, "step": 24373 }, { "epoch": 0.5102151888135309, "grad_norm": 0.2743600010871887, "learning_rate": 0.00018609051551435853, "loss": 11.6742, "step": 24374 }, { "epoch": 0.5102361215774931, "grad_norm": 0.48445725440979004, "learning_rate": 0.00018608940000862547, "loss": 11.6735, "step": 24375 }, { "epoch": 0.5102570543414553, "grad_norm": 0.2622132897377014, "learning_rate": 0.00018608828446150754, "loss": 11.6787, "step": 24376 }, { "epoch": 0.5102779871054174, "grad_norm": 0.31388169527053833, "learning_rate": 0.00018608716887300522, "loss": 11.6665, "step": 24377 }, { "epoch": 0.5102989198693796, "grad_norm": 0.4997813105583191, "learning_rate": 0.0001860860532431191, "loss": 11.6821, "step": 24378 }, { "epoch": 0.5103198526333417, "grad_norm": 0.2890802323818207, "learning_rate": 0.00018608493757184965, "loss": 11.6754, "step": 24379 }, { "epoch": 0.5103407853973039, "grad_norm": 0.3617077171802521, "learning_rate": 0.00018608382185919748, "loss": 11.6783, "step": 24380 }, { "epoch": 0.510361718161266, "grad_norm": 0.29716822504997253, "learning_rate": 0.0001860827061051631, "loss": 11.6785, "step": 24381 }, { "epoch": 0.5103826509252282, "grad_norm": 0.28068938851356506, "learning_rate": 0.00018608159030974702, "loss": 11.6595, "step": 24382 }, { "epoch": 0.5104035836891904, "grad_norm": 0.29682523012161255, "learning_rate": 0.0001860804744729498, "loss": 11.6668, "step": 24383 }, { "epoch": 0.5104245164531525, "grad_norm": 0.2870456278324127, "learning_rate": 0.00018607935859477195, "loss": 11.6873, "step": 24384 }, { "epoch": 0.5104454492171147, "grad_norm": 0.2538028359413147, "learning_rate": 0.00018607824267521402, "loss": 11.6797, "step": 24385 }, { "epoch": 0.5104663819810767, "grad_norm": 0.3090779483318329, "learning_rate": 0.0001860771267142766, "loss": 11.6899, "step": 24386 }, { "epoch": 0.510487314745039, "grad_norm": 0.25881028175354004, "learning_rate": 0.00018607601071196015, "loss": 11.6786, "step": 24387 }, { "epoch": 0.510508247509001, "grad_norm": 0.27573278546333313, "learning_rate": 0.00018607489466826525, "loss": 11.6724, "step": 24388 }, { "epoch": 0.5105291802729632, "grad_norm": 0.2920741140842438, "learning_rate": 0.00018607377858319242, "loss": 11.6501, "step": 24389 }, { "epoch": 0.5105501130369254, "grad_norm": 0.3679821789264679, "learning_rate": 0.0001860726624567422, "loss": 11.6736, "step": 24390 }, { "epoch": 0.5105710458008875, "grad_norm": 0.2828618586063385, "learning_rate": 0.0001860715462889151, "loss": 11.6868, "step": 24391 }, { "epoch": 0.5105919785648497, "grad_norm": 0.2729986608028412, "learning_rate": 0.0001860704300797117, "loss": 11.6729, "step": 24392 }, { "epoch": 0.5106129113288118, "grad_norm": 0.40520790219306946, "learning_rate": 0.00018606931382913255, "loss": 11.6813, "step": 24393 }, { "epoch": 0.510633844092774, "grad_norm": 0.256438672542572, "learning_rate": 0.0001860681975371781, "loss": 11.6704, "step": 24394 }, { "epoch": 0.5106547768567362, "grad_norm": 0.2173333466053009, "learning_rate": 0.000186067081203849, "loss": 11.6706, "step": 24395 }, { "epoch": 0.5106757096206983, "grad_norm": 0.28930792212486267, "learning_rate": 0.00018606596482914568, "loss": 11.6763, "step": 24396 }, { "epoch": 0.5106966423846605, "grad_norm": 0.24007371068000793, "learning_rate": 0.00018606484841306875, "loss": 11.6497, "step": 24397 }, { "epoch": 0.5107175751486226, "grad_norm": 0.3381393551826477, "learning_rate": 0.0001860637319556187, "loss": 11.687, "step": 24398 }, { "epoch": 0.5107385079125848, "grad_norm": 0.3010404109954834, "learning_rate": 0.00018606261545679614, "loss": 11.6593, "step": 24399 }, { "epoch": 0.5107594406765469, "grad_norm": 0.4304925501346588, "learning_rate": 0.00018606149891660153, "loss": 11.6741, "step": 24400 }, { "epoch": 0.5107803734405091, "grad_norm": 0.2933507561683655, "learning_rate": 0.00018606038233503542, "loss": 11.6797, "step": 24401 }, { "epoch": 0.5108013062044713, "grad_norm": 0.26629701256752014, "learning_rate": 0.00018605926571209838, "loss": 11.6562, "step": 24402 }, { "epoch": 0.5108222389684334, "grad_norm": 0.3584580421447754, "learning_rate": 0.00018605814904779091, "loss": 11.6726, "step": 24403 }, { "epoch": 0.5108431717323956, "grad_norm": 0.30540749430656433, "learning_rate": 0.00018605703234211358, "loss": 11.6675, "step": 24404 }, { "epoch": 0.5108641044963577, "grad_norm": 0.3543631434440613, "learning_rate": 0.00018605591559506688, "loss": 11.656, "step": 24405 }, { "epoch": 0.5108850372603199, "grad_norm": 0.3065645396709442, "learning_rate": 0.0001860547988066514, "loss": 11.6614, "step": 24406 }, { "epoch": 0.510905970024282, "grad_norm": 0.2744854688644409, "learning_rate": 0.00018605368197686763, "loss": 11.684, "step": 24407 }, { "epoch": 0.5109269027882442, "grad_norm": 0.33045125007629395, "learning_rate": 0.00018605256510571616, "loss": 11.6773, "step": 24408 }, { "epoch": 0.5109478355522064, "grad_norm": 0.355488657951355, "learning_rate": 0.0001860514481931975, "loss": 11.6731, "step": 24409 }, { "epoch": 0.5109687683161684, "grad_norm": 0.34804293513298035, "learning_rate": 0.00018605033123931218, "loss": 11.6768, "step": 24410 }, { "epoch": 0.5109897010801306, "grad_norm": 0.3203977942466736, "learning_rate": 0.00018604921424406073, "loss": 11.6915, "step": 24411 }, { "epoch": 0.5110106338440927, "grad_norm": 0.2593971788883209, "learning_rate": 0.0001860480972074437, "loss": 11.6731, "step": 24412 }, { "epoch": 0.5110315666080549, "grad_norm": 0.34940358996391296, "learning_rate": 0.0001860469801294616, "loss": 11.6766, "step": 24413 }, { "epoch": 0.5110524993720171, "grad_norm": 0.3214153051376343, "learning_rate": 0.00018604586301011503, "loss": 11.6724, "step": 24414 }, { "epoch": 0.5110734321359792, "grad_norm": 0.28229427337646484, "learning_rate": 0.0001860447458494045, "loss": 11.6679, "step": 24415 }, { "epoch": 0.5110943648999414, "grad_norm": 0.24883943796157837, "learning_rate": 0.00018604362864733047, "loss": 11.672, "step": 24416 }, { "epoch": 0.5111152976639035, "grad_norm": 0.26105841994285583, "learning_rate": 0.0001860425114038936, "loss": 11.6835, "step": 24417 }, { "epoch": 0.5111362304278657, "grad_norm": 0.3126432001590729, "learning_rate": 0.00018604139411909436, "loss": 11.6627, "step": 24418 }, { "epoch": 0.5111571631918278, "grad_norm": 0.44499266147613525, "learning_rate": 0.00018604027679293327, "loss": 11.6936, "step": 24419 }, { "epoch": 0.51117809595579, "grad_norm": 0.37143993377685547, "learning_rate": 0.00018603915942541095, "loss": 11.6914, "step": 24420 }, { "epoch": 0.5111990287197522, "grad_norm": 0.3378466069698334, "learning_rate": 0.00018603804201652784, "loss": 11.6724, "step": 24421 }, { "epoch": 0.5112199614837143, "grad_norm": 0.2929637134075165, "learning_rate": 0.00018603692456628453, "loss": 11.674, "step": 24422 }, { "epoch": 0.5112408942476765, "grad_norm": 0.328504353761673, "learning_rate": 0.00018603580707468156, "loss": 11.674, "step": 24423 }, { "epoch": 0.5112618270116386, "grad_norm": 0.23698656260967255, "learning_rate": 0.00018603468954171942, "loss": 11.662, "step": 24424 }, { "epoch": 0.5112827597756008, "grad_norm": 0.46670201420783997, "learning_rate": 0.00018603357196739874, "loss": 11.6673, "step": 24425 }, { "epoch": 0.5113036925395629, "grad_norm": 0.2543080747127533, "learning_rate": 0.0001860324543517199, "loss": 11.6679, "step": 24426 }, { "epoch": 0.5113246253035251, "grad_norm": 0.299978107213974, "learning_rate": 0.00018603133669468363, "loss": 11.6737, "step": 24427 }, { "epoch": 0.5113455580674873, "grad_norm": 0.3263123631477356, "learning_rate": 0.00018603021899629032, "loss": 11.6821, "step": 24428 }, { "epoch": 0.5113664908314494, "grad_norm": 0.27565720677375793, "learning_rate": 0.00018602910125654058, "loss": 11.6655, "step": 24429 }, { "epoch": 0.5113874235954116, "grad_norm": 0.272757351398468, "learning_rate": 0.00018602798347543492, "loss": 11.6746, "step": 24430 }, { "epoch": 0.5114083563593737, "grad_norm": 0.24026189744472504, "learning_rate": 0.0001860268656529739, "loss": 11.6541, "step": 24431 }, { "epoch": 0.5114292891233359, "grad_norm": 0.3472360074520111, "learning_rate": 0.000186025747789158, "loss": 11.6883, "step": 24432 }, { "epoch": 0.511450221887298, "grad_norm": 0.3474890887737274, "learning_rate": 0.00018602462988398784, "loss": 11.6767, "step": 24433 }, { "epoch": 0.5114711546512601, "grad_norm": 0.34595787525177, "learning_rate": 0.00018602351193746391, "loss": 11.6843, "step": 24434 }, { "epoch": 0.5114920874152223, "grad_norm": 0.3954421579837799, "learning_rate": 0.00018602239394958673, "loss": 11.6654, "step": 24435 }, { "epoch": 0.5115130201791844, "grad_norm": 0.24423864483833313, "learning_rate": 0.0001860212759203569, "loss": 11.6772, "step": 24436 }, { "epoch": 0.5115339529431466, "grad_norm": 0.4147911071777344, "learning_rate": 0.0001860201578497749, "loss": 11.6742, "step": 24437 }, { "epoch": 0.5115548857071087, "grad_norm": 0.26070645451545715, "learning_rate": 0.00018601903973784127, "loss": 11.6727, "step": 24438 }, { "epoch": 0.5115758184710709, "grad_norm": 0.3225042521953583, "learning_rate": 0.00018601792158455657, "loss": 11.6718, "step": 24439 }, { "epoch": 0.5115967512350331, "grad_norm": 0.34628868103027344, "learning_rate": 0.00018601680338992135, "loss": 11.6634, "step": 24440 }, { "epoch": 0.5116176839989952, "grad_norm": 0.28866344690322876, "learning_rate": 0.00018601568515393612, "loss": 11.6734, "step": 24441 }, { "epoch": 0.5116386167629574, "grad_norm": 0.2795243263244629, "learning_rate": 0.00018601456687660143, "loss": 11.6705, "step": 24442 }, { "epoch": 0.5116595495269195, "grad_norm": 0.3039000630378723, "learning_rate": 0.00018601344855791782, "loss": 11.662, "step": 24443 }, { "epoch": 0.5116804822908817, "grad_norm": 0.2657582759857178, "learning_rate": 0.0001860123301978858, "loss": 11.6679, "step": 24444 }, { "epoch": 0.5117014150548438, "grad_norm": 0.325103759765625, "learning_rate": 0.0001860112117965059, "loss": 11.668, "step": 24445 }, { "epoch": 0.511722347818806, "grad_norm": 0.3043707609176636, "learning_rate": 0.00018601009335377877, "loss": 11.6675, "step": 24446 }, { "epoch": 0.5117432805827682, "grad_norm": 0.24941526353359222, "learning_rate": 0.00018600897486970478, "loss": 11.673, "step": 24447 }, { "epoch": 0.5117642133467303, "grad_norm": 0.3420521914958954, "learning_rate": 0.00018600785634428462, "loss": 11.6784, "step": 24448 }, { "epoch": 0.5117851461106925, "grad_norm": 0.2795875668525696, "learning_rate": 0.00018600673777751872, "loss": 11.6548, "step": 24449 }, { "epoch": 0.5118060788746546, "grad_norm": 0.3004978895187378, "learning_rate": 0.00018600561916940769, "loss": 11.6614, "step": 24450 }, { "epoch": 0.5118270116386168, "grad_norm": 0.2816479802131653, "learning_rate": 0.00018600450051995199, "loss": 11.6681, "step": 24451 }, { "epoch": 0.511847944402579, "grad_norm": 0.35503995418548584, "learning_rate": 0.00018600338182915221, "loss": 11.7079, "step": 24452 }, { "epoch": 0.5118688771665411, "grad_norm": 0.31510788202285767, "learning_rate": 0.00018600226309700894, "loss": 11.6854, "step": 24453 }, { "epoch": 0.5118898099305033, "grad_norm": 0.3354147672653198, "learning_rate": 0.0001860011443235226, "loss": 11.6651, "step": 24454 }, { "epoch": 0.5119107426944653, "grad_norm": 0.30116036534309387, "learning_rate": 0.0001860000255086938, "loss": 11.6835, "step": 24455 }, { "epoch": 0.5119316754584275, "grad_norm": 0.25272053480148315, "learning_rate": 0.0001859989066525231, "loss": 11.6668, "step": 24456 }, { "epoch": 0.5119526082223896, "grad_norm": 0.2703174948692322, "learning_rate": 0.00018599778775501094, "loss": 11.675, "step": 24457 }, { "epoch": 0.5119735409863518, "grad_norm": 0.3065054714679718, "learning_rate": 0.00018599666881615796, "loss": 11.6611, "step": 24458 }, { "epoch": 0.511994473750314, "grad_norm": 0.28338414430618286, "learning_rate": 0.00018599554983596462, "loss": 11.674, "step": 24459 }, { "epoch": 0.5120154065142761, "grad_norm": 0.31358736753463745, "learning_rate": 0.00018599443081443156, "loss": 11.6697, "step": 24460 }, { "epoch": 0.5120363392782383, "grad_norm": 0.342977911233902, "learning_rate": 0.0001859933117515592, "loss": 11.6712, "step": 24461 }, { "epoch": 0.5120572720422004, "grad_norm": 0.22413265705108643, "learning_rate": 0.00018599219264734816, "loss": 11.6749, "step": 24462 }, { "epoch": 0.5120782048061626, "grad_norm": 0.30154213309288025, "learning_rate": 0.00018599107350179893, "loss": 11.6882, "step": 24463 }, { "epoch": 0.5120991375701247, "grad_norm": 0.2500486671924591, "learning_rate": 0.0001859899543149121, "loss": 11.6743, "step": 24464 }, { "epoch": 0.5121200703340869, "grad_norm": 0.40193620324134827, "learning_rate": 0.0001859888350866881, "loss": 11.665, "step": 24465 }, { "epoch": 0.5121410030980491, "grad_norm": 0.407230943441391, "learning_rate": 0.0001859877158171276, "loss": 11.6797, "step": 24466 }, { "epoch": 0.5121619358620112, "grad_norm": 0.2548336386680603, "learning_rate": 0.00018598659650623112, "loss": 11.6709, "step": 24467 }, { "epoch": 0.5121828686259734, "grad_norm": 0.3135295808315277, "learning_rate": 0.0001859854771539991, "loss": 11.6596, "step": 24468 }, { "epoch": 0.5122038013899355, "grad_norm": 0.2788623571395874, "learning_rate": 0.00018598435776043216, "loss": 11.6619, "step": 24469 }, { "epoch": 0.5122247341538977, "grad_norm": 0.29235708713531494, "learning_rate": 0.0001859832383255308, "loss": 11.6721, "step": 24470 }, { "epoch": 0.5122456669178599, "grad_norm": 0.37928780913352966, "learning_rate": 0.0001859821188492956, "loss": 11.6731, "step": 24471 }, { "epoch": 0.512266599681822, "grad_norm": 0.29750844836235046, "learning_rate": 0.00018598099933172705, "loss": 11.659, "step": 24472 }, { "epoch": 0.5122875324457842, "grad_norm": 0.28664928674697876, "learning_rate": 0.0001859798797728257, "loss": 11.6653, "step": 24473 }, { "epoch": 0.5123084652097463, "grad_norm": 0.2684072256088257, "learning_rate": 0.00018597876017259214, "loss": 11.6738, "step": 24474 }, { "epoch": 0.5123293979737085, "grad_norm": 0.3149125874042511, "learning_rate": 0.00018597764053102683, "loss": 11.6644, "step": 24475 }, { "epoch": 0.5123503307376706, "grad_norm": 0.29877737164497375, "learning_rate": 0.00018597652084813038, "loss": 11.6742, "step": 24476 }, { "epoch": 0.5123712635016328, "grad_norm": 0.27370578050613403, "learning_rate": 0.00018597540112390325, "loss": 11.6615, "step": 24477 }, { "epoch": 0.512392196265595, "grad_norm": 0.2728239595890045, "learning_rate": 0.00018597428135834604, "loss": 11.6762, "step": 24478 }, { "epoch": 0.512413129029557, "grad_norm": 0.2638149857521057, "learning_rate": 0.0001859731615514593, "loss": 11.6693, "step": 24479 }, { "epoch": 0.5124340617935192, "grad_norm": 0.3303004801273346, "learning_rate": 0.00018597204170324352, "loss": 11.6622, "step": 24480 }, { "epoch": 0.5124549945574813, "grad_norm": 0.2604235112667084, "learning_rate": 0.00018597092181369925, "loss": 11.6867, "step": 24481 }, { "epoch": 0.5124759273214435, "grad_norm": 0.342055082321167, "learning_rate": 0.00018596980188282703, "loss": 11.6777, "step": 24482 }, { "epoch": 0.5124968600854056, "grad_norm": 0.32645803689956665, "learning_rate": 0.0001859686819106274, "loss": 11.6723, "step": 24483 }, { "epoch": 0.5125177928493678, "grad_norm": 0.2803669273853302, "learning_rate": 0.0001859675618971009, "loss": 11.6686, "step": 24484 }, { "epoch": 0.51253872561333, "grad_norm": 0.258500337600708, "learning_rate": 0.0001859664418422481, "loss": 11.6787, "step": 24485 }, { "epoch": 0.5125596583772921, "grad_norm": 0.29859060049057007, "learning_rate": 0.00018596532174606947, "loss": 11.6892, "step": 24486 }, { "epoch": 0.5125805911412543, "grad_norm": 0.2704405188560486, "learning_rate": 0.00018596420160856561, "loss": 11.6581, "step": 24487 }, { "epoch": 0.5126015239052164, "grad_norm": 0.2877379059791565, "learning_rate": 0.00018596308142973704, "loss": 11.6654, "step": 24488 }, { "epoch": 0.5126224566691786, "grad_norm": 0.2731151580810547, "learning_rate": 0.0001859619612095843, "loss": 11.6743, "step": 24489 }, { "epoch": 0.5126433894331408, "grad_norm": 0.30082398653030396, "learning_rate": 0.0001859608409481079, "loss": 11.6667, "step": 24490 }, { "epoch": 0.5126643221971029, "grad_norm": 0.26200070977211, "learning_rate": 0.0001859597206453084, "loss": 11.661, "step": 24491 }, { "epoch": 0.5126852549610651, "grad_norm": 0.31837013363838196, "learning_rate": 0.00018595860030118635, "loss": 11.6868, "step": 24492 }, { "epoch": 0.5127061877250272, "grad_norm": 0.2792750298976898, "learning_rate": 0.0001859574799157423, "loss": 11.6699, "step": 24493 }, { "epoch": 0.5127271204889894, "grad_norm": 0.2775370478630066, "learning_rate": 0.0001859563594889767, "loss": 11.6768, "step": 24494 }, { "epoch": 0.5127480532529515, "grad_norm": 0.29177913069725037, "learning_rate": 0.0001859552390208902, "loss": 11.6693, "step": 24495 }, { "epoch": 0.5127689860169137, "grad_norm": 0.3746417760848999, "learning_rate": 0.0001859541185114833, "loss": 11.6672, "step": 24496 }, { "epoch": 0.5127899187808759, "grad_norm": 0.2930791974067688, "learning_rate": 0.00018595299796075654, "loss": 11.6966, "step": 24497 }, { "epoch": 0.512810851544838, "grad_norm": 0.31149089336395264, "learning_rate": 0.00018595187736871043, "loss": 11.683, "step": 24498 }, { "epoch": 0.5128317843088002, "grad_norm": 0.3346281945705414, "learning_rate": 0.00018595075673534555, "loss": 11.6761, "step": 24499 }, { "epoch": 0.5128527170727623, "grad_norm": 0.3073984384536743, "learning_rate": 0.00018594963606066237, "loss": 11.6634, "step": 24500 }, { "epoch": 0.5128736498367245, "grad_norm": 0.3590964674949646, "learning_rate": 0.00018594851534466155, "loss": 11.663, "step": 24501 }, { "epoch": 0.5128945826006865, "grad_norm": 0.318541556596756, "learning_rate": 0.0001859473945873435, "loss": 11.6838, "step": 24502 }, { "epoch": 0.5129155153646487, "grad_norm": 0.30429717898368835, "learning_rate": 0.00018594627378870887, "loss": 11.6702, "step": 24503 }, { "epoch": 0.512936448128611, "grad_norm": 0.2659126818180084, "learning_rate": 0.0001859451529487581, "loss": 11.6693, "step": 24504 }, { "epoch": 0.512957380892573, "grad_norm": 0.3184399902820587, "learning_rate": 0.00018594403206749176, "loss": 11.673, "step": 24505 }, { "epoch": 0.5129783136565352, "grad_norm": 0.28206831216812134, "learning_rate": 0.00018594291114491042, "loss": 11.6737, "step": 24506 }, { "epoch": 0.5129992464204973, "grad_norm": 0.2666526138782501, "learning_rate": 0.0001859417901810146, "loss": 11.666, "step": 24507 }, { "epoch": 0.5130201791844595, "grad_norm": 0.2632391154766083, "learning_rate": 0.00018594066917580485, "loss": 11.6606, "step": 24508 }, { "epoch": 0.5130411119484217, "grad_norm": 0.2630631625652313, "learning_rate": 0.0001859395481292817, "loss": 11.6509, "step": 24509 }, { "epoch": 0.5130620447123838, "grad_norm": 0.36721286177635193, "learning_rate": 0.00018593842704144566, "loss": 11.673, "step": 24510 }, { "epoch": 0.513082977476346, "grad_norm": 0.3174552917480469, "learning_rate": 0.00018593730591229732, "loss": 11.6741, "step": 24511 }, { "epoch": 0.5131039102403081, "grad_norm": 0.270222544670105, "learning_rate": 0.0001859361847418372, "loss": 11.6659, "step": 24512 }, { "epoch": 0.5131248430042703, "grad_norm": 0.31093016266822815, "learning_rate": 0.0001859350635300658, "loss": 11.6837, "step": 24513 }, { "epoch": 0.5131457757682324, "grad_norm": 0.23310396075248718, "learning_rate": 0.00018593394227698373, "loss": 11.674, "step": 24514 }, { "epoch": 0.5131667085321946, "grad_norm": 0.32978716492652893, "learning_rate": 0.00018593282098259146, "loss": 11.6892, "step": 24515 }, { "epoch": 0.5131876412961568, "grad_norm": 0.27238693833351135, "learning_rate": 0.00018593169964688957, "loss": 11.6805, "step": 24516 }, { "epoch": 0.5132085740601189, "grad_norm": 0.2886843979358673, "learning_rate": 0.0001859305782698786, "loss": 11.6749, "step": 24517 }, { "epoch": 0.5132295068240811, "grad_norm": 0.2824171185493469, "learning_rate": 0.0001859294568515591, "loss": 11.6576, "step": 24518 }, { "epoch": 0.5132504395880432, "grad_norm": 0.24149173498153687, "learning_rate": 0.00018592833539193154, "loss": 11.6745, "step": 24519 }, { "epoch": 0.5132713723520054, "grad_norm": 0.2760944664478302, "learning_rate": 0.00018592721389099653, "loss": 11.6769, "step": 24520 }, { "epoch": 0.5132923051159675, "grad_norm": 0.427910715341568, "learning_rate": 0.0001859260923487546, "loss": 11.6832, "step": 24521 }, { "epoch": 0.5133132378799297, "grad_norm": 0.29312843084335327, "learning_rate": 0.00018592497076520625, "loss": 11.6736, "step": 24522 }, { "epoch": 0.5133341706438919, "grad_norm": 0.23622435331344604, "learning_rate": 0.00018592384914035206, "loss": 11.6808, "step": 24523 }, { "epoch": 0.513355103407854, "grad_norm": 0.360117644071579, "learning_rate": 0.00018592272747419255, "loss": 11.6631, "step": 24524 }, { "epoch": 0.5133760361718162, "grad_norm": 0.30352237820625305, "learning_rate": 0.00018592160576672828, "loss": 11.6626, "step": 24525 }, { "epoch": 0.5133969689357782, "grad_norm": 0.2791300415992737, "learning_rate": 0.00018592048401795975, "loss": 11.6771, "step": 24526 }, { "epoch": 0.5134179016997404, "grad_norm": 0.2906719148159027, "learning_rate": 0.00018591936222788752, "loss": 11.6882, "step": 24527 }, { "epoch": 0.5134388344637026, "grad_norm": 0.2553313076496124, "learning_rate": 0.00018591824039651216, "loss": 11.6647, "step": 24528 }, { "epoch": 0.5134597672276647, "grad_norm": 0.40518468618392944, "learning_rate": 0.00018591711852383416, "loss": 11.6768, "step": 24529 }, { "epoch": 0.5134806999916269, "grad_norm": 0.3543936312198639, "learning_rate": 0.0001859159966098541, "loss": 11.6744, "step": 24530 }, { "epoch": 0.513501632755589, "grad_norm": 0.3372220993041992, "learning_rate": 0.00018591487465457245, "loss": 11.6773, "step": 24531 }, { "epoch": 0.5135225655195512, "grad_norm": 0.24674545228481293, "learning_rate": 0.00018591375265798981, "loss": 11.6761, "step": 24532 }, { "epoch": 0.5135434982835133, "grad_norm": 0.4053111970424652, "learning_rate": 0.00018591263062010672, "loss": 11.6688, "step": 24533 }, { "epoch": 0.5135644310474755, "grad_norm": 0.2279430627822876, "learning_rate": 0.00018591150854092373, "loss": 11.6577, "step": 24534 }, { "epoch": 0.5135853638114377, "grad_norm": 0.31042221188545227, "learning_rate": 0.0001859103864204413, "loss": 11.6755, "step": 24535 }, { "epoch": 0.5136062965753998, "grad_norm": 0.3506835997104645, "learning_rate": 0.0001859092642586601, "loss": 11.6759, "step": 24536 }, { "epoch": 0.513627229339362, "grad_norm": 0.27991726994514465, "learning_rate": 0.00018590814205558052, "loss": 11.6646, "step": 24537 }, { "epoch": 0.5136481621033241, "grad_norm": 0.24641619622707367, "learning_rate": 0.0001859070198112032, "loss": 11.6757, "step": 24538 }, { "epoch": 0.5136690948672863, "grad_norm": 0.3175157606601715, "learning_rate": 0.0001859058975255287, "loss": 11.6702, "step": 24539 }, { "epoch": 0.5136900276312484, "grad_norm": 0.26136648654937744, "learning_rate": 0.00018590477519855747, "loss": 11.6525, "step": 24540 }, { "epoch": 0.5137109603952106, "grad_norm": 0.25943130254745483, "learning_rate": 0.0001859036528302901, "loss": 11.6692, "step": 24541 }, { "epoch": 0.5137318931591728, "grad_norm": 0.2799910604953766, "learning_rate": 0.0001859025304207271, "loss": 11.6622, "step": 24542 }, { "epoch": 0.5137528259231349, "grad_norm": 0.27710649371147156, "learning_rate": 0.00018590140796986908, "loss": 11.654, "step": 24543 }, { "epoch": 0.5137737586870971, "grad_norm": 0.33852171897888184, "learning_rate": 0.0001859002854777165, "loss": 11.6715, "step": 24544 }, { "epoch": 0.5137946914510592, "grad_norm": 0.30313390493392944, "learning_rate": 0.00018589916294426995, "loss": 11.6733, "step": 24545 }, { "epoch": 0.5138156242150214, "grad_norm": 0.32747533917427063, "learning_rate": 0.00018589804036952995, "loss": 11.6681, "step": 24546 }, { "epoch": 0.5138365569789834, "grad_norm": 0.2434694617986679, "learning_rate": 0.00018589691775349702, "loss": 11.6805, "step": 24547 }, { "epoch": 0.5138574897429457, "grad_norm": 0.3068690896034241, "learning_rate": 0.00018589579509617173, "loss": 11.6802, "step": 24548 }, { "epoch": 0.5138784225069079, "grad_norm": 0.24375353753566742, "learning_rate": 0.0001858946723975546, "loss": 11.6496, "step": 24549 }, { "epoch": 0.5138993552708699, "grad_norm": 0.25443747639656067, "learning_rate": 0.0001858935496576462, "loss": 11.6737, "step": 24550 }, { "epoch": 0.5139202880348321, "grad_norm": 0.25071239471435547, "learning_rate": 0.00018589242687644703, "loss": 11.6591, "step": 24551 }, { "epoch": 0.5139412207987942, "grad_norm": 0.41923728585243225, "learning_rate": 0.00018589130405395764, "loss": 11.6826, "step": 24552 }, { "epoch": 0.5139621535627564, "grad_norm": 0.2831789255142212, "learning_rate": 0.00018589018119017864, "loss": 11.6492, "step": 24553 }, { "epoch": 0.5139830863267186, "grad_norm": 0.2793174684047699, "learning_rate": 0.00018588905828511044, "loss": 11.6729, "step": 24554 }, { "epoch": 0.5140040190906807, "grad_norm": 0.31926780939102173, "learning_rate": 0.00018588793533875366, "loss": 11.6723, "step": 24555 }, { "epoch": 0.5140249518546429, "grad_norm": 0.3191491365432739, "learning_rate": 0.00018588681235110884, "loss": 11.65, "step": 24556 }, { "epoch": 0.514045884618605, "grad_norm": 0.26411306858062744, "learning_rate": 0.00018588568932217654, "loss": 11.6673, "step": 24557 }, { "epoch": 0.5140668173825672, "grad_norm": 0.3039344847202301, "learning_rate": 0.00018588456625195723, "loss": 11.6707, "step": 24558 }, { "epoch": 0.5140877501465293, "grad_norm": 0.30846190452575684, "learning_rate": 0.0001858834431404515, "loss": 11.6833, "step": 24559 }, { "epoch": 0.5141086829104915, "grad_norm": 0.26897528767585754, "learning_rate": 0.00018588231998765986, "loss": 11.6826, "step": 24560 }, { "epoch": 0.5141296156744537, "grad_norm": 0.28176382184028625, "learning_rate": 0.00018588119679358286, "loss": 11.682, "step": 24561 }, { "epoch": 0.5141505484384158, "grad_norm": 0.3217357099056244, "learning_rate": 0.00018588007355822107, "loss": 11.6613, "step": 24562 }, { "epoch": 0.514171481202378, "grad_norm": 0.5373253226280212, "learning_rate": 0.00018587895028157503, "loss": 11.6858, "step": 24563 }, { "epoch": 0.5141924139663401, "grad_norm": 0.28491324186325073, "learning_rate": 0.00018587782696364525, "loss": 11.6773, "step": 24564 }, { "epoch": 0.5142133467303023, "grad_norm": 0.36014577746391296, "learning_rate": 0.00018587670360443225, "loss": 11.6809, "step": 24565 }, { "epoch": 0.5142342794942644, "grad_norm": 0.24314160645008087, "learning_rate": 0.0001858755802039366, "loss": 11.6652, "step": 24566 }, { "epoch": 0.5142552122582266, "grad_norm": 0.22273719310760498, "learning_rate": 0.00018587445676215887, "loss": 11.6435, "step": 24567 }, { "epoch": 0.5142761450221888, "grad_norm": 0.32048743963241577, "learning_rate": 0.00018587333327909952, "loss": 11.6686, "step": 24568 }, { "epoch": 0.5142970777861509, "grad_norm": 0.3664577305316925, "learning_rate": 0.00018587220975475917, "loss": 11.6856, "step": 24569 }, { "epoch": 0.5143180105501131, "grad_norm": 0.27656227350234985, "learning_rate": 0.00018587108618913832, "loss": 11.6662, "step": 24570 }, { "epoch": 0.5143389433140751, "grad_norm": 0.318845272064209, "learning_rate": 0.0001858699625822375, "loss": 11.6791, "step": 24571 }, { "epoch": 0.5143598760780373, "grad_norm": 0.29990285634994507, "learning_rate": 0.00018586883893405732, "loss": 11.6662, "step": 24572 }, { "epoch": 0.5143808088419995, "grad_norm": 0.2904617488384247, "learning_rate": 0.00018586771524459824, "loss": 11.6833, "step": 24573 }, { "epoch": 0.5144017416059616, "grad_norm": 0.3103528320789337, "learning_rate": 0.00018586659151386079, "loss": 11.6773, "step": 24574 }, { "epoch": 0.5144226743699238, "grad_norm": 0.3009227216243744, "learning_rate": 0.0001858654677418456, "loss": 11.6774, "step": 24575 }, { "epoch": 0.5144436071338859, "grad_norm": 0.27795517444610596, "learning_rate": 0.00018586434392855314, "loss": 11.659, "step": 24576 }, { "epoch": 0.5144645398978481, "grad_norm": 1.4126344919204712, "learning_rate": 0.00018586322007398394, "loss": 11.6847, "step": 24577 }, { "epoch": 0.5144854726618102, "grad_norm": 0.28523844480514526, "learning_rate": 0.00018586209617813863, "loss": 11.6715, "step": 24578 }, { "epoch": 0.5145064054257724, "grad_norm": 0.2912248969078064, "learning_rate": 0.00018586097224101766, "loss": 11.6844, "step": 24579 }, { "epoch": 0.5145273381897346, "grad_norm": 0.2722471356391907, "learning_rate": 0.0001858598482626216, "loss": 11.6746, "step": 24580 }, { "epoch": 0.5145482709536967, "grad_norm": 0.338583379983902, "learning_rate": 0.000185858724242951, "loss": 11.6887, "step": 24581 }, { "epoch": 0.5145692037176589, "grad_norm": 0.27861082553863525, "learning_rate": 0.00018585760018200635, "loss": 11.6744, "step": 24582 }, { "epoch": 0.514590136481621, "grad_norm": 0.3972698450088501, "learning_rate": 0.00018585647607978825, "loss": 11.6797, "step": 24583 }, { "epoch": 0.5146110692455832, "grad_norm": 0.36561319231987, "learning_rate": 0.00018585535193629723, "loss": 11.6774, "step": 24584 }, { "epoch": 0.5146320020095453, "grad_norm": 0.362413614988327, "learning_rate": 0.00018585422775153386, "loss": 11.6809, "step": 24585 }, { "epoch": 0.5146529347735075, "grad_norm": 0.27073967456817627, "learning_rate": 0.0001858531035254986, "loss": 11.6637, "step": 24586 }, { "epoch": 0.5146738675374697, "grad_norm": 0.28809139132499695, "learning_rate": 0.00018585197925819204, "loss": 11.6836, "step": 24587 }, { "epoch": 0.5146948003014318, "grad_norm": 0.2689027488231659, "learning_rate": 0.0001858508549496147, "loss": 11.6703, "step": 24588 }, { "epoch": 0.514715733065394, "grad_norm": 0.26354312896728516, "learning_rate": 0.00018584973059976716, "loss": 11.678, "step": 24589 }, { "epoch": 0.5147366658293561, "grad_norm": 0.43563613295555115, "learning_rate": 0.0001858486062086499, "loss": 11.6814, "step": 24590 }, { "epoch": 0.5147575985933183, "grad_norm": 0.3571473956108093, "learning_rate": 0.00018584748177626352, "loss": 11.6768, "step": 24591 }, { "epoch": 0.5147785313572805, "grad_norm": 0.35280218720436096, "learning_rate": 0.00018584635730260854, "loss": 11.6839, "step": 24592 }, { "epoch": 0.5147994641212426, "grad_norm": 0.27306416630744934, "learning_rate": 0.0001858452327876855, "loss": 11.6669, "step": 24593 }, { "epoch": 0.5148203968852048, "grad_norm": 0.2835562825202942, "learning_rate": 0.0001858441082314949, "loss": 11.6648, "step": 24594 }, { "epoch": 0.5148413296491668, "grad_norm": 0.2922162115573883, "learning_rate": 0.0001858429836340373, "loss": 11.6651, "step": 24595 }, { "epoch": 0.514862262413129, "grad_norm": 0.2938275635242462, "learning_rate": 0.00018584185899531331, "loss": 11.6604, "step": 24596 }, { "epoch": 0.5148831951770911, "grad_norm": 0.27936264872550964, "learning_rate": 0.00018584073431532341, "loss": 11.6648, "step": 24597 }, { "epoch": 0.5149041279410533, "grad_norm": 0.3127889335155487, "learning_rate": 0.00018583960959406816, "loss": 11.6623, "step": 24598 }, { "epoch": 0.5149250607050155, "grad_norm": 0.30772292613983154, "learning_rate": 0.00018583848483154806, "loss": 11.6726, "step": 24599 }, { "epoch": 0.5149459934689776, "grad_norm": 0.26283228397369385, "learning_rate": 0.00018583736002776368, "loss": 11.6669, "step": 24600 }, { "epoch": 0.5149669262329398, "grad_norm": 0.3567832410335541, "learning_rate": 0.0001858362351827156, "loss": 11.6698, "step": 24601 }, { "epoch": 0.5149878589969019, "grad_norm": 0.3010929226875305, "learning_rate": 0.00018583511029640428, "loss": 11.6744, "step": 24602 }, { "epoch": 0.5150087917608641, "grad_norm": 0.24580074846744537, "learning_rate": 0.00018583398536883033, "loss": 11.6746, "step": 24603 }, { "epoch": 0.5150297245248262, "grad_norm": 0.24819587171077728, "learning_rate": 0.00018583286039999423, "loss": 11.6737, "step": 24604 }, { "epoch": 0.5150506572887884, "grad_norm": 0.26600587368011475, "learning_rate": 0.00018583173538989656, "loss": 11.6688, "step": 24605 }, { "epoch": 0.5150715900527506, "grad_norm": 0.2444545328617096, "learning_rate": 0.00018583061033853788, "loss": 11.6534, "step": 24606 }, { "epoch": 0.5150925228167127, "grad_norm": 0.35979023575782776, "learning_rate": 0.0001858294852459187, "loss": 11.6685, "step": 24607 }, { "epoch": 0.5151134555806749, "grad_norm": 0.31468990445137024, "learning_rate": 0.00018582836011203955, "loss": 11.6929, "step": 24608 }, { "epoch": 0.515134388344637, "grad_norm": 0.35529735684394836, "learning_rate": 0.00018582723493690102, "loss": 11.6777, "step": 24609 }, { "epoch": 0.5151553211085992, "grad_norm": 0.3156871199607849, "learning_rate": 0.0001858261097205036, "loss": 11.6926, "step": 24610 }, { "epoch": 0.5151762538725614, "grad_norm": 0.2982557415962219, "learning_rate": 0.00018582498446284782, "loss": 11.6848, "step": 24611 }, { "epoch": 0.5151971866365235, "grad_norm": 0.307982474565506, "learning_rate": 0.00018582385916393428, "loss": 11.6772, "step": 24612 }, { "epoch": 0.5152181194004857, "grad_norm": 0.27883920073509216, "learning_rate": 0.00018582273382376348, "loss": 11.6729, "step": 24613 }, { "epoch": 0.5152390521644478, "grad_norm": 0.3057405948638916, "learning_rate": 0.00018582160844233598, "loss": 11.6737, "step": 24614 }, { "epoch": 0.51525998492841, "grad_norm": 0.2955420911312103, "learning_rate": 0.0001858204830196523, "loss": 11.679, "step": 24615 }, { "epoch": 0.515280917692372, "grad_norm": 0.2670604884624481, "learning_rate": 0.000185819357555713, "loss": 11.6686, "step": 24616 }, { "epoch": 0.5153018504563343, "grad_norm": 0.2936534881591797, "learning_rate": 0.00018581823205051861, "loss": 11.6827, "step": 24617 }, { "epoch": 0.5153227832202965, "grad_norm": 0.2937570810317993, "learning_rate": 0.0001858171065040697, "loss": 11.6608, "step": 24618 }, { "epoch": 0.5153437159842585, "grad_norm": 0.35780465602874756, "learning_rate": 0.00018581598091636678, "loss": 11.66, "step": 24619 }, { "epoch": 0.5153646487482207, "grad_norm": 0.317432165145874, "learning_rate": 0.0001858148552874104, "loss": 11.6941, "step": 24620 }, { "epoch": 0.5153855815121828, "grad_norm": 0.3130049705505371, "learning_rate": 0.00018581372961720108, "loss": 11.6839, "step": 24621 }, { "epoch": 0.515406514276145, "grad_norm": 0.3780691623687744, "learning_rate": 0.00018581260390573937, "loss": 11.6687, "step": 24622 }, { "epoch": 0.5154274470401071, "grad_norm": 0.23210032284259796, "learning_rate": 0.00018581147815302585, "loss": 11.6681, "step": 24623 }, { "epoch": 0.5154483798040693, "grad_norm": 0.2521463930606842, "learning_rate": 0.00018581035235906105, "loss": 11.6671, "step": 24624 }, { "epoch": 0.5154693125680315, "grad_norm": 0.34196439385414124, "learning_rate": 0.00018580922652384544, "loss": 11.6706, "step": 24625 }, { "epoch": 0.5154902453319936, "grad_norm": 0.2678622603416443, "learning_rate": 0.00018580810064737965, "loss": 11.6776, "step": 24626 }, { "epoch": 0.5155111780959558, "grad_norm": 0.37403494119644165, "learning_rate": 0.0001858069747296642, "loss": 11.6819, "step": 24627 }, { "epoch": 0.5155321108599179, "grad_norm": 0.2793707847595215, "learning_rate": 0.0001858058487706996, "loss": 11.6591, "step": 24628 }, { "epoch": 0.5155530436238801, "grad_norm": 0.30193498730659485, "learning_rate": 0.0001858047227704864, "loss": 11.6788, "step": 24629 }, { "epoch": 0.5155739763878423, "grad_norm": 0.2594303786754608, "learning_rate": 0.00018580359672902517, "loss": 11.6664, "step": 24630 }, { "epoch": 0.5155949091518044, "grad_norm": 0.3133418560028076, "learning_rate": 0.0001858024706463164, "loss": 11.6685, "step": 24631 }, { "epoch": 0.5156158419157666, "grad_norm": 0.28559914231300354, "learning_rate": 0.00018580134452236068, "loss": 11.688, "step": 24632 }, { "epoch": 0.5156367746797287, "grad_norm": 0.2614648938179016, "learning_rate": 0.00018580021835715854, "loss": 11.6824, "step": 24633 }, { "epoch": 0.5156577074436909, "grad_norm": 0.31663402915000916, "learning_rate": 0.00018579909215071048, "loss": 11.6946, "step": 24634 }, { "epoch": 0.515678640207653, "grad_norm": 0.33249789476394653, "learning_rate": 0.00018579796590301713, "loss": 11.6688, "step": 24635 }, { "epoch": 0.5156995729716152, "grad_norm": 0.31214141845703125, "learning_rate": 0.00018579683961407895, "loss": 11.6668, "step": 24636 }, { "epoch": 0.5157205057355774, "grad_norm": 0.2736806869506836, "learning_rate": 0.00018579571328389654, "loss": 11.6927, "step": 24637 }, { "epoch": 0.5157414384995395, "grad_norm": 0.2931872308254242, "learning_rate": 0.00018579458691247037, "loss": 11.6606, "step": 24638 }, { "epoch": 0.5157623712635017, "grad_norm": 0.2908887267112732, "learning_rate": 0.00018579346049980107, "loss": 11.6614, "step": 24639 }, { "epoch": 0.5157833040274638, "grad_norm": 0.3144568204879761, "learning_rate": 0.0001857923340458891, "loss": 11.6958, "step": 24640 }, { "epoch": 0.515804236791426, "grad_norm": 0.3071335256099701, "learning_rate": 0.00018579120755073502, "loss": 11.6837, "step": 24641 }, { "epoch": 0.515825169555388, "grad_norm": 0.3677096664905548, "learning_rate": 0.0001857900810143394, "loss": 11.6627, "step": 24642 }, { "epoch": 0.5158461023193502, "grad_norm": 0.32763051986694336, "learning_rate": 0.00018578895443670278, "loss": 11.686, "step": 24643 }, { "epoch": 0.5158670350833124, "grad_norm": 0.2981213331222534, "learning_rate": 0.00018578782781782568, "loss": 11.6682, "step": 24644 }, { "epoch": 0.5158879678472745, "grad_norm": 0.3108613193035126, "learning_rate": 0.00018578670115770868, "loss": 11.6771, "step": 24645 }, { "epoch": 0.5159089006112367, "grad_norm": 0.2688920199871063, "learning_rate": 0.00018578557445635228, "loss": 11.6703, "step": 24646 }, { "epoch": 0.5159298333751988, "grad_norm": 0.3588615655899048, "learning_rate": 0.000185784447713757, "loss": 11.6772, "step": 24647 }, { "epoch": 0.515950766139161, "grad_norm": 0.30319470167160034, "learning_rate": 0.00018578332092992344, "loss": 11.6704, "step": 24648 }, { "epoch": 0.5159716989031232, "grad_norm": 0.3321373462677002, "learning_rate": 0.00018578219410485212, "loss": 11.6692, "step": 24649 }, { "epoch": 0.5159926316670853, "grad_norm": 0.3615454435348511, "learning_rate": 0.0001857810672385436, "loss": 11.6639, "step": 24650 }, { "epoch": 0.5160135644310475, "grad_norm": 0.4657588005065918, "learning_rate": 0.00018577994033099838, "loss": 11.6827, "step": 24651 }, { "epoch": 0.5160344971950096, "grad_norm": 0.25204262137413025, "learning_rate": 0.00018577881338221704, "loss": 11.6651, "step": 24652 }, { "epoch": 0.5160554299589718, "grad_norm": 0.35615742206573486, "learning_rate": 0.0001857776863922001, "loss": 11.6799, "step": 24653 }, { "epoch": 0.5160763627229339, "grad_norm": 0.32445645332336426, "learning_rate": 0.0001857765593609481, "loss": 11.6668, "step": 24654 }, { "epoch": 0.5160972954868961, "grad_norm": 0.3197058439254761, "learning_rate": 0.0001857754322884616, "loss": 11.6726, "step": 24655 }, { "epoch": 0.5161182282508583, "grad_norm": 0.3077624440193176, "learning_rate": 0.0001857743051747411, "loss": 11.6837, "step": 24656 }, { "epoch": 0.5161391610148204, "grad_norm": 0.24251657724380493, "learning_rate": 0.0001857731780197872, "loss": 11.6694, "step": 24657 }, { "epoch": 0.5161600937787826, "grad_norm": 0.24981848895549774, "learning_rate": 0.0001857720508236004, "loss": 11.6652, "step": 24658 }, { "epoch": 0.5161810265427447, "grad_norm": 0.3118831217288971, "learning_rate": 0.00018577092358618125, "loss": 11.6724, "step": 24659 }, { "epoch": 0.5162019593067069, "grad_norm": 0.28097954392433167, "learning_rate": 0.00018576979630753032, "loss": 11.668, "step": 24660 }, { "epoch": 0.516222892070669, "grad_norm": 0.2938891053199768, "learning_rate": 0.00018576866898764813, "loss": 11.6775, "step": 24661 }, { "epoch": 0.5162438248346312, "grad_norm": 0.3820593059062958, "learning_rate": 0.0001857675416265352, "loss": 11.6738, "step": 24662 }, { "epoch": 0.5162647575985934, "grad_norm": 0.31881004571914673, "learning_rate": 0.0001857664142241921, "loss": 11.674, "step": 24663 }, { "epoch": 0.5162856903625554, "grad_norm": 0.40844371914863586, "learning_rate": 0.0001857652867806194, "loss": 11.6776, "step": 24664 }, { "epoch": 0.5163066231265176, "grad_norm": 0.2578347325325012, "learning_rate": 0.00018576415929581758, "loss": 11.6707, "step": 24665 }, { "epoch": 0.5163275558904797, "grad_norm": 0.306574285030365, "learning_rate": 0.0001857630317697872, "loss": 11.6804, "step": 24666 }, { "epoch": 0.5163484886544419, "grad_norm": 0.41156890988349915, "learning_rate": 0.00018576190420252885, "loss": 11.6576, "step": 24667 }, { "epoch": 0.5163694214184041, "grad_norm": 0.2947622835636139, "learning_rate": 0.00018576077659404302, "loss": 11.6679, "step": 24668 }, { "epoch": 0.5163903541823662, "grad_norm": 0.34131354093551636, "learning_rate": 0.00018575964894433023, "loss": 11.6767, "step": 24669 }, { "epoch": 0.5164112869463284, "grad_norm": 0.30313003063201904, "learning_rate": 0.00018575852125339107, "loss": 11.6444, "step": 24670 }, { "epoch": 0.5164322197102905, "grad_norm": 0.2891370952129364, "learning_rate": 0.0001857573935212261, "loss": 11.6499, "step": 24671 }, { "epoch": 0.5164531524742527, "grad_norm": 0.3093215823173523, "learning_rate": 0.0001857562657478358, "loss": 11.6791, "step": 24672 }, { "epoch": 0.5164740852382148, "grad_norm": 0.2797206938266754, "learning_rate": 0.00018575513793322074, "loss": 11.6679, "step": 24673 }, { "epoch": 0.516495018002177, "grad_norm": 0.2569984197616577, "learning_rate": 0.00018575401007738153, "loss": 11.6878, "step": 24674 }, { "epoch": 0.5165159507661392, "grad_norm": 0.326112300157547, "learning_rate": 0.0001857528821803186, "loss": 11.674, "step": 24675 }, { "epoch": 0.5165368835301013, "grad_norm": 0.28733786940574646, "learning_rate": 0.00018575175424203252, "loss": 11.6418, "step": 24676 }, { "epoch": 0.5165578162940635, "grad_norm": 0.25945913791656494, "learning_rate": 0.0001857506262625239, "loss": 11.6623, "step": 24677 }, { "epoch": 0.5165787490580256, "grad_norm": 0.2809988856315613, "learning_rate": 0.0001857494982417932, "loss": 11.6535, "step": 24678 }, { "epoch": 0.5165996818219878, "grad_norm": 0.3171333968639374, "learning_rate": 0.000185748370179841, "loss": 11.6684, "step": 24679 }, { "epoch": 0.5166206145859499, "grad_norm": 0.39383557438850403, "learning_rate": 0.00018574724207666784, "loss": 11.6707, "step": 24680 }, { "epoch": 0.5166415473499121, "grad_norm": 0.28300416469573975, "learning_rate": 0.0001857461139322743, "loss": 11.6795, "step": 24681 }, { "epoch": 0.5166624801138743, "grad_norm": 0.33729204535484314, "learning_rate": 0.00018574498574666084, "loss": 11.6753, "step": 24682 }, { "epoch": 0.5166834128778364, "grad_norm": 0.35981833934783936, "learning_rate": 0.00018574385751982808, "loss": 11.6675, "step": 24683 }, { "epoch": 0.5167043456417986, "grad_norm": 0.25136861205101013, "learning_rate": 0.0001857427292517765, "loss": 11.6683, "step": 24684 }, { "epoch": 0.5167252784057607, "grad_norm": 0.23532433807849884, "learning_rate": 0.0001857416009425067, "loss": 11.6568, "step": 24685 }, { "epoch": 0.5167462111697229, "grad_norm": 0.2679457366466522, "learning_rate": 0.0001857404725920192, "loss": 11.6765, "step": 24686 }, { "epoch": 0.5167671439336851, "grad_norm": 0.40775880217552185, "learning_rate": 0.0001857393442003145, "loss": 11.6764, "step": 24687 }, { "epoch": 0.5167880766976471, "grad_norm": 0.28007686138153076, "learning_rate": 0.0001857382157673932, "loss": 11.675, "step": 24688 }, { "epoch": 0.5168090094616093, "grad_norm": 0.3033091127872467, "learning_rate": 0.00018573708729325582, "loss": 11.6771, "step": 24689 }, { "epoch": 0.5168299422255714, "grad_norm": 0.3028789460659027, "learning_rate": 0.00018573595877790288, "loss": 11.6695, "step": 24690 }, { "epoch": 0.5168508749895336, "grad_norm": 0.36002862453460693, "learning_rate": 0.000185734830221335, "loss": 11.6761, "step": 24691 }, { "epoch": 0.5168718077534957, "grad_norm": 0.25910359621047974, "learning_rate": 0.00018573370162355262, "loss": 11.6609, "step": 24692 }, { "epoch": 0.5168927405174579, "grad_norm": 0.27201250195503235, "learning_rate": 0.00018573257298455634, "loss": 11.6781, "step": 24693 }, { "epoch": 0.5169136732814201, "grad_norm": 0.27081891894340515, "learning_rate": 0.0001857314443043467, "loss": 11.6642, "step": 24694 }, { "epoch": 0.5169346060453822, "grad_norm": 0.2629927098751068, "learning_rate": 0.00018573031558292424, "loss": 11.6714, "step": 24695 }, { "epoch": 0.5169555388093444, "grad_norm": 0.29447439312934875, "learning_rate": 0.0001857291868202895, "loss": 11.6712, "step": 24696 }, { "epoch": 0.5169764715733065, "grad_norm": 0.3340691328048706, "learning_rate": 0.00018572805801644302, "loss": 11.6825, "step": 24697 }, { "epoch": 0.5169974043372687, "grad_norm": 0.3040533661842346, "learning_rate": 0.00018572692917138536, "loss": 11.6727, "step": 24698 }, { "epoch": 0.5170183371012308, "grad_norm": 0.26305055618286133, "learning_rate": 0.00018572580028511704, "loss": 11.6698, "step": 24699 }, { "epoch": 0.517039269865193, "grad_norm": 0.26760202646255493, "learning_rate": 0.0001857246713576386, "loss": 11.6713, "step": 24700 }, { "epoch": 0.5170602026291552, "grad_norm": 0.345048189163208, "learning_rate": 0.0001857235423889506, "loss": 11.6725, "step": 24701 }, { "epoch": 0.5170811353931173, "grad_norm": 0.3687301576137543, "learning_rate": 0.00018572241337905357, "loss": 11.6857, "step": 24702 }, { "epoch": 0.5171020681570795, "grad_norm": 0.28539639711380005, "learning_rate": 0.00018572128432794803, "loss": 11.6815, "step": 24703 }, { "epoch": 0.5171230009210416, "grad_norm": 0.2466806322336197, "learning_rate": 0.0001857201552356346, "loss": 11.6746, "step": 24704 }, { "epoch": 0.5171439336850038, "grad_norm": 0.379592627286911, "learning_rate": 0.00018571902610211373, "loss": 11.7019, "step": 24705 }, { "epoch": 0.517164866448966, "grad_norm": 0.2995651364326477, "learning_rate": 0.00018571789692738605, "loss": 11.658, "step": 24706 }, { "epoch": 0.5171857992129281, "grad_norm": 0.26377344131469727, "learning_rate": 0.00018571676771145202, "loss": 11.6637, "step": 24707 }, { "epoch": 0.5172067319768903, "grad_norm": 0.3267853260040283, "learning_rate": 0.00018571563845431227, "loss": 11.6708, "step": 24708 }, { "epoch": 0.5172276647408524, "grad_norm": 0.28573915362358093, "learning_rate": 0.00018571450915596724, "loss": 11.6739, "step": 24709 }, { "epoch": 0.5172485975048146, "grad_norm": 0.26234403252601624, "learning_rate": 0.00018571337981641755, "loss": 11.6714, "step": 24710 }, { "epoch": 0.5172695302687766, "grad_norm": 0.33994409441947937, "learning_rate": 0.00018571225043566373, "loss": 11.6821, "step": 24711 }, { "epoch": 0.5172904630327388, "grad_norm": 0.2592317759990692, "learning_rate": 0.00018571112101370632, "loss": 11.6798, "step": 24712 }, { "epoch": 0.517311395796701, "grad_norm": 0.24636901915073395, "learning_rate": 0.00018570999155054582, "loss": 11.6576, "step": 24713 }, { "epoch": 0.5173323285606631, "grad_norm": 0.32058510184288025, "learning_rate": 0.00018570886204618284, "loss": 11.6618, "step": 24714 }, { "epoch": 0.5173532613246253, "grad_norm": 0.2518067955970764, "learning_rate": 0.00018570773250061788, "loss": 11.6801, "step": 24715 }, { "epoch": 0.5173741940885874, "grad_norm": 0.30591797828674316, "learning_rate": 0.0001857066029138515, "loss": 11.6802, "step": 24716 }, { "epoch": 0.5173951268525496, "grad_norm": 0.40946051478385925, "learning_rate": 0.00018570547328588424, "loss": 11.6741, "step": 24717 }, { "epoch": 0.5174160596165117, "grad_norm": 0.256954163312912, "learning_rate": 0.00018570434361671665, "loss": 11.6774, "step": 24718 }, { "epoch": 0.5174369923804739, "grad_norm": 0.39324331283569336, "learning_rate": 0.00018570321390634927, "loss": 11.6607, "step": 24719 }, { "epoch": 0.5174579251444361, "grad_norm": 0.25728312134742737, "learning_rate": 0.0001857020841547826, "loss": 11.6624, "step": 24720 }, { "epoch": 0.5174788579083982, "grad_norm": 0.22906708717346191, "learning_rate": 0.00018570095436201724, "loss": 11.6699, "step": 24721 }, { "epoch": 0.5174997906723604, "grad_norm": 0.27835673093795776, "learning_rate": 0.00018569982452805372, "loss": 11.6699, "step": 24722 }, { "epoch": 0.5175207234363225, "grad_norm": 0.2635423243045807, "learning_rate": 0.00018569869465289256, "loss": 11.6591, "step": 24723 }, { "epoch": 0.5175416562002847, "grad_norm": 0.3020521402359009, "learning_rate": 0.00018569756473653436, "loss": 11.6679, "step": 24724 }, { "epoch": 0.5175625889642469, "grad_norm": 0.2557886242866516, "learning_rate": 0.0001856964347789796, "loss": 11.6779, "step": 24725 }, { "epoch": 0.517583521728209, "grad_norm": 0.23894748091697693, "learning_rate": 0.00018569530478022882, "loss": 11.6711, "step": 24726 }, { "epoch": 0.5176044544921712, "grad_norm": 0.31155282258987427, "learning_rate": 0.0001856941747402826, "loss": 11.6905, "step": 24727 }, { "epoch": 0.5176253872561333, "grad_norm": 0.28908783197402954, "learning_rate": 0.0001856930446591415, "loss": 11.6818, "step": 24728 }, { "epoch": 0.5176463200200955, "grad_norm": 0.27013373374938965, "learning_rate": 0.00018569191453680602, "loss": 11.6707, "step": 24729 }, { "epoch": 0.5176672527840576, "grad_norm": 0.24455662071704865, "learning_rate": 0.00018569078437327672, "loss": 11.6719, "step": 24730 }, { "epoch": 0.5176881855480198, "grad_norm": 0.3875555694103241, "learning_rate": 0.00018568965416855413, "loss": 11.6715, "step": 24731 }, { "epoch": 0.517709118311982, "grad_norm": 0.2997145652770996, "learning_rate": 0.00018568852392263882, "loss": 11.6693, "step": 24732 }, { "epoch": 0.517730051075944, "grad_norm": 0.2725476622581482, "learning_rate": 0.00018568739363553134, "loss": 11.6859, "step": 24733 }, { "epoch": 0.5177509838399063, "grad_norm": 0.2810751795768738, "learning_rate": 0.00018568626330723218, "loss": 11.6622, "step": 24734 }, { "epoch": 0.5177719166038683, "grad_norm": 0.28499799966812134, "learning_rate": 0.00018568513293774195, "loss": 11.6663, "step": 24735 }, { "epoch": 0.5177928493678305, "grad_norm": 0.29605093598365784, "learning_rate": 0.0001856840025270611, "loss": 11.6578, "step": 24736 }, { "epoch": 0.5178137821317926, "grad_norm": 0.34110891819000244, "learning_rate": 0.00018568287207519028, "loss": 11.6625, "step": 24737 }, { "epoch": 0.5178347148957548, "grad_norm": 0.31199926137924194, "learning_rate": 0.00018568174158213, "loss": 11.6733, "step": 24738 }, { "epoch": 0.517855647659717, "grad_norm": 0.27992764115333557, "learning_rate": 0.00018568061104788075, "loss": 11.6686, "step": 24739 }, { "epoch": 0.5178765804236791, "grad_norm": 0.2553330957889557, "learning_rate": 0.00018567948047244314, "loss": 11.6679, "step": 24740 }, { "epoch": 0.5178975131876413, "grad_norm": 0.3242493271827698, "learning_rate": 0.00018567834985581768, "loss": 11.6633, "step": 24741 }, { "epoch": 0.5179184459516034, "grad_norm": 0.31123214960098267, "learning_rate": 0.0001856772191980049, "loss": 11.6748, "step": 24742 }, { "epoch": 0.5179393787155656, "grad_norm": 0.2725571095943451, "learning_rate": 0.00018567608849900537, "loss": 11.6765, "step": 24743 }, { "epoch": 0.5179603114795277, "grad_norm": 0.2423381507396698, "learning_rate": 0.00018567495775881964, "loss": 11.6783, "step": 24744 }, { "epoch": 0.5179812442434899, "grad_norm": 0.25636908411979675, "learning_rate": 0.00018567382697744824, "loss": 11.6699, "step": 24745 }, { "epoch": 0.5180021770074521, "grad_norm": 0.27954134345054626, "learning_rate": 0.00018567269615489172, "loss": 11.6756, "step": 24746 }, { "epoch": 0.5180231097714142, "grad_norm": 0.32319188117980957, "learning_rate": 0.0001856715652911506, "loss": 11.6641, "step": 24747 }, { "epoch": 0.5180440425353764, "grad_norm": 0.3182915151119232, "learning_rate": 0.00018567043438622545, "loss": 11.669, "step": 24748 }, { "epoch": 0.5180649752993385, "grad_norm": 0.2934662401676178, "learning_rate": 0.00018566930344011682, "loss": 11.6707, "step": 24749 }, { "epoch": 0.5180859080633007, "grad_norm": 0.2987233102321625, "learning_rate": 0.0001856681724528252, "loss": 11.6723, "step": 24750 }, { "epoch": 0.5181068408272629, "grad_norm": 0.24131077527999878, "learning_rate": 0.00018566704142435124, "loss": 11.6669, "step": 24751 }, { "epoch": 0.518127773591225, "grad_norm": 0.44020897150039673, "learning_rate": 0.00018566591035469536, "loss": 11.6743, "step": 24752 }, { "epoch": 0.5181487063551872, "grad_norm": 0.3050176203250885, "learning_rate": 0.00018566477924385818, "loss": 11.6734, "step": 24753 }, { "epoch": 0.5181696391191493, "grad_norm": 0.3478107750415802, "learning_rate": 0.0001856636480918402, "loss": 11.6734, "step": 24754 }, { "epoch": 0.5181905718831115, "grad_norm": 0.24988007545471191, "learning_rate": 0.00018566251689864204, "loss": 11.6574, "step": 24755 }, { "epoch": 0.5182115046470736, "grad_norm": 0.2674984633922577, "learning_rate": 0.00018566138566426414, "loss": 11.6703, "step": 24756 }, { "epoch": 0.5182324374110358, "grad_norm": 0.3922794759273529, "learning_rate": 0.0001856602543887071, "loss": 11.6662, "step": 24757 }, { "epoch": 0.518253370174998, "grad_norm": 0.32644277811050415, "learning_rate": 0.0001856591230719715, "loss": 11.6672, "step": 24758 }, { "epoch": 0.51827430293896, "grad_norm": 0.305381715297699, "learning_rate": 0.0001856579917140578, "loss": 11.662, "step": 24759 }, { "epoch": 0.5182952357029222, "grad_norm": 0.34177595376968384, "learning_rate": 0.00018565686031496662, "loss": 11.6681, "step": 24760 }, { "epoch": 0.5183161684668843, "grad_norm": 0.3189709186553955, "learning_rate": 0.00018565572887469845, "loss": 11.6714, "step": 24761 }, { "epoch": 0.5183371012308465, "grad_norm": 0.3288973271846771, "learning_rate": 0.00018565459739325384, "loss": 11.6764, "step": 24762 }, { "epoch": 0.5183580339948086, "grad_norm": 0.33919963240623474, "learning_rate": 0.0001856534658706334, "loss": 11.6657, "step": 24763 }, { "epoch": 0.5183789667587708, "grad_norm": 0.2787717282772064, "learning_rate": 0.00018565233430683757, "loss": 11.6759, "step": 24764 }, { "epoch": 0.518399899522733, "grad_norm": 0.24752764403820038, "learning_rate": 0.00018565120270186696, "loss": 11.6831, "step": 24765 }, { "epoch": 0.5184208322866951, "grad_norm": 0.33195677399635315, "learning_rate": 0.00018565007105572208, "loss": 11.6654, "step": 24766 }, { "epoch": 0.5184417650506573, "grad_norm": 0.2739275395870209, "learning_rate": 0.00018564893936840355, "loss": 11.6747, "step": 24767 }, { "epoch": 0.5184626978146194, "grad_norm": 0.28204625844955444, "learning_rate": 0.00018564780763991184, "loss": 11.676, "step": 24768 }, { "epoch": 0.5184836305785816, "grad_norm": 0.3429538607597351, "learning_rate": 0.0001856466758702475, "loss": 11.6763, "step": 24769 }, { "epoch": 0.5185045633425438, "grad_norm": 0.26959916949272156, "learning_rate": 0.00018564554405941106, "loss": 11.6659, "step": 24770 }, { "epoch": 0.5185254961065059, "grad_norm": 0.2793342173099518, "learning_rate": 0.0001856444122074031, "loss": 11.661, "step": 24771 }, { "epoch": 0.5185464288704681, "grad_norm": 0.3044016361236572, "learning_rate": 0.0001856432803142242, "loss": 11.6768, "step": 24772 }, { "epoch": 0.5185673616344302, "grad_norm": 0.2851972281932831, "learning_rate": 0.00018564214837987484, "loss": 11.6692, "step": 24773 }, { "epoch": 0.5185882943983924, "grad_norm": 0.39519479870796204, "learning_rate": 0.00018564101640435556, "loss": 11.691, "step": 24774 }, { "epoch": 0.5186092271623545, "grad_norm": 0.3175075352191925, "learning_rate": 0.00018563988438766694, "loss": 11.6809, "step": 24775 }, { "epoch": 0.5186301599263167, "grad_norm": 0.2960866391658783, "learning_rate": 0.00018563875232980952, "loss": 11.6561, "step": 24776 }, { "epoch": 0.5186510926902789, "grad_norm": 0.2758048176765442, "learning_rate": 0.00018563762023078383, "loss": 11.6653, "step": 24777 }, { "epoch": 0.518672025454241, "grad_norm": 0.31518489122390747, "learning_rate": 0.00018563648809059042, "loss": 11.6879, "step": 24778 }, { "epoch": 0.5186929582182032, "grad_norm": 0.24598674476146698, "learning_rate": 0.00018563535590922982, "loss": 11.6632, "step": 24779 }, { "epoch": 0.5187138909821652, "grad_norm": 0.2924952805042267, "learning_rate": 0.00018563422368670263, "loss": 11.6705, "step": 24780 }, { "epoch": 0.5187348237461274, "grad_norm": 0.2891600430011749, "learning_rate": 0.00018563309142300932, "loss": 11.6579, "step": 24781 }, { "epoch": 0.5187557565100895, "grad_norm": 0.3352532386779785, "learning_rate": 0.00018563195911815045, "loss": 11.6814, "step": 24782 }, { "epoch": 0.5187766892740517, "grad_norm": 0.23438183963298798, "learning_rate": 0.0001856308267721266, "loss": 11.6522, "step": 24783 }, { "epoch": 0.5187976220380139, "grad_norm": 0.2771718502044678, "learning_rate": 0.0001856296943849383, "loss": 11.6686, "step": 24784 }, { "epoch": 0.518818554801976, "grad_norm": 0.2982091009616852, "learning_rate": 0.0001856285619565861, "loss": 11.6363, "step": 24785 }, { "epoch": 0.5188394875659382, "grad_norm": 0.30120477080345154, "learning_rate": 0.0001856274294870705, "loss": 11.6664, "step": 24786 }, { "epoch": 0.5188604203299003, "grad_norm": 0.2831156253814697, "learning_rate": 0.00018562629697639211, "loss": 11.6715, "step": 24787 }, { "epoch": 0.5188813530938625, "grad_norm": 0.3086487650871277, "learning_rate": 0.00018562516442455142, "loss": 11.6693, "step": 24788 }, { "epoch": 0.5189022858578247, "grad_norm": 0.28112778067588806, "learning_rate": 0.00018562403183154902, "loss": 11.6838, "step": 24789 }, { "epoch": 0.5189232186217868, "grad_norm": 0.24101108312606812, "learning_rate": 0.00018562289919738542, "loss": 11.6704, "step": 24790 }, { "epoch": 0.518944151385749, "grad_norm": 0.326424241065979, "learning_rate": 0.00018562176652206117, "loss": 11.6806, "step": 24791 }, { "epoch": 0.5189650841497111, "grad_norm": 0.3605666756629944, "learning_rate": 0.00018562063380557684, "loss": 11.6651, "step": 24792 }, { "epoch": 0.5189860169136733, "grad_norm": 0.30126893520355225, "learning_rate": 0.00018561950104793295, "loss": 11.6729, "step": 24793 }, { "epoch": 0.5190069496776354, "grad_norm": 0.33747488260269165, "learning_rate": 0.00018561836824913003, "loss": 11.6671, "step": 24794 }, { "epoch": 0.5190278824415976, "grad_norm": 0.3145968019962311, "learning_rate": 0.00018561723540916868, "loss": 11.6724, "step": 24795 }, { "epoch": 0.5190488152055598, "grad_norm": 0.2965422570705414, "learning_rate": 0.00018561610252804938, "loss": 11.6677, "step": 24796 }, { "epoch": 0.5190697479695219, "grad_norm": 0.23960185050964355, "learning_rate": 0.0001856149696057727, "loss": 11.6695, "step": 24797 }, { "epoch": 0.5190906807334841, "grad_norm": 0.2866741418838501, "learning_rate": 0.00018561383664233923, "loss": 11.6644, "step": 24798 }, { "epoch": 0.5191116134974462, "grad_norm": 0.29024335741996765, "learning_rate": 0.00018561270363774942, "loss": 11.6745, "step": 24799 }, { "epoch": 0.5191325462614084, "grad_norm": 0.33856257796287537, "learning_rate": 0.0001856115705920039, "loss": 11.6922, "step": 24800 }, { "epoch": 0.5191534790253705, "grad_norm": 0.3020155429840088, "learning_rate": 0.0001856104375051032, "loss": 11.6657, "step": 24801 }, { "epoch": 0.5191744117893327, "grad_norm": 0.31508684158325195, "learning_rate": 0.0001856093043770478, "loss": 11.6759, "step": 24802 }, { "epoch": 0.5191953445532949, "grad_norm": 0.2508367896080017, "learning_rate": 0.0001856081712078383, "loss": 11.6656, "step": 24803 }, { "epoch": 0.519216277317257, "grad_norm": 0.35232624411582947, "learning_rate": 0.00018560703799747524, "loss": 11.6792, "step": 24804 }, { "epoch": 0.5192372100812191, "grad_norm": 0.30632588267326355, "learning_rate": 0.00018560590474595918, "loss": 11.6694, "step": 24805 }, { "epoch": 0.5192581428451812, "grad_norm": 0.36277973651885986, "learning_rate": 0.00018560477145329065, "loss": 11.6696, "step": 24806 }, { "epoch": 0.5192790756091434, "grad_norm": 0.3227638602256775, "learning_rate": 0.00018560363811947017, "loss": 11.6475, "step": 24807 }, { "epoch": 0.5193000083731056, "grad_norm": 0.2620013356208801, "learning_rate": 0.0001856025047444983, "loss": 11.6714, "step": 24808 }, { "epoch": 0.5193209411370677, "grad_norm": 0.26862409710884094, "learning_rate": 0.0001856013713283756, "loss": 11.6571, "step": 24809 }, { "epoch": 0.5193418739010299, "grad_norm": 0.29158639907836914, "learning_rate": 0.00018560023787110263, "loss": 11.6782, "step": 24810 }, { "epoch": 0.519362806664992, "grad_norm": 0.33009353280067444, "learning_rate": 0.00018559910437267987, "loss": 11.6734, "step": 24811 }, { "epoch": 0.5193837394289542, "grad_norm": 0.3464362919330597, "learning_rate": 0.00018559797083310793, "loss": 11.6804, "step": 24812 }, { "epoch": 0.5194046721929163, "grad_norm": 0.2728349566459656, "learning_rate": 0.00018559683725238732, "loss": 11.6671, "step": 24813 }, { "epoch": 0.5194256049568785, "grad_norm": 0.30127161741256714, "learning_rate": 0.0001855957036305186, "loss": 11.6708, "step": 24814 }, { "epoch": 0.5194465377208407, "grad_norm": 0.2502875328063965, "learning_rate": 0.0001855945699675023, "loss": 11.6801, "step": 24815 }, { "epoch": 0.5194674704848028, "grad_norm": 0.4223630428314209, "learning_rate": 0.000185593436263339, "loss": 11.6598, "step": 24816 }, { "epoch": 0.519488403248765, "grad_norm": 0.2810986340045929, "learning_rate": 0.0001855923025180292, "loss": 11.6687, "step": 24817 }, { "epoch": 0.5195093360127271, "grad_norm": 0.363736629486084, "learning_rate": 0.00018559116873157345, "loss": 11.6822, "step": 24818 }, { "epoch": 0.5195302687766893, "grad_norm": 0.32660409808158875, "learning_rate": 0.00018559003490397236, "loss": 11.6767, "step": 24819 }, { "epoch": 0.5195512015406514, "grad_norm": 0.2898305654525757, "learning_rate": 0.00018558890103522637, "loss": 11.6808, "step": 24820 }, { "epoch": 0.5195721343046136, "grad_norm": 0.32103410363197327, "learning_rate": 0.0001855877671253361, "loss": 11.6857, "step": 24821 }, { "epoch": 0.5195930670685758, "grad_norm": 0.29576390981674194, "learning_rate": 0.0001855866331743021, "loss": 11.6838, "step": 24822 }, { "epoch": 0.5196139998325379, "grad_norm": 0.3472156822681427, "learning_rate": 0.00018558549918212485, "loss": 11.6628, "step": 24823 }, { "epoch": 0.5196349325965001, "grad_norm": 0.38825953006744385, "learning_rate": 0.00018558436514880494, "loss": 11.6849, "step": 24824 }, { "epoch": 0.5196558653604622, "grad_norm": 0.2656724154949188, "learning_rate": 0.00018558323107434294, "loss": 11.6721, "step": 24825 }, { "epoch": 0.5196767981244244, "grad_norm": 0.2808937728404999, "learning_rate": 0.00018558209695873935, "loss": 11.6832, "step": 24826 }, { "epoch": 0.5196977308883866, "grad_norm": 0.26174384355545044, "learning_rate": 0.00018558096280199476, "loss": 11.6709, "step": 24827 }, { "epoch": 0.5197186636523486, "grad_norm": 0.2709806263446808, "learning_rate": 0.00018557982860410962, "loss": 11.6711, "step": 24828 }, { "epoch": 0.5197395964163108, "grad_norm": 0.326749324798584, "learning_rate": 0.00018557869436508457, "loss": 11.6628, "step": 24829 }, { "epoch": 0.5197605291802729, "grad_norm": 0.2642001509666443, "learning_rate": 0.00018557756008492015, "loss": 11.6721, "step": 24830 }, { "epoch": 0.5197814619442351, "grad_norm": 0.27167031168937683, "learning_rate": 0.00018557642576361687, "loss": 11.6631, "step": 24831 }, { "epoch": 0.5198023947081972, "grad_norm": 0.30350062251091003, "learning_rate": 0.0001855752914011753, "loss": 11.6784, "step": 24832 }, { "epoch": 0.5198233274721594, "grad_norm": 0.3170982599258423, "learning_rate": 0.00018557415699759595, "loss": 11.6912, "step": 24833 }, { "epoch": 0.5198442602361216, "grad_norm": 0.47388526797294617, "learning_rate": 0.00018557302255287937, "loss": 11.6753, "step": 24834 }, { "epoch": 0.5198651930000837, "grad_norm": 0.2634238600730896, "learning_rate": 0.00018557188806702616, "loss": 11.6855, "step": 24835 }, { "epoch": 0.5198861257640459, "grad_norm": 0.3000809848308563, "learning_rate": 0.00018557075354003685, "loss": 11.6748, "step": 24836 }, { "epoch": 0.519907058528008, "grad_norm": 0.3405015468597412, "learning_rate": 0.0001855696189719119, "loss": 11.671, "step": 24837 }, { "epoch": 0.5199279912919702, "grad_norm": 0.23927685618400574, "learning_rate": 0.00018556848436265195, "loss": 11.6661, "step": 24838 }, { "epoch": 0.5199489240559323, "grad_norm": 0.2687573730945587, "learning_rate": 0.0001855673497122575, "loss": 11.6748, "step": 24839 }, { "epoch": 0.5199698568198945, "grad_norm": 0.30166271328926086, "learning_rate": 0.00018556621502072913, "loss": 11.6689, "step": 24840 }, { "epoch": 0.5199907895838567, "grad_norm": 0.3227342963218689, "learning_rate": 0.00018556508028806735, "loss": 11.6693, "step": 24841 }, { "epoch": 0.5200117223478188, "grad_norm": 0.3063833713531494, "learning_rate": 0.00018556394551427275, "loss": 11.6531, "step": 24842 }, { "epoch": 0.520032655111781, "grad_norm": 0.24926412105560303, "learning_rate": 0.00018556281069934583, "loss": 11.6605, "step": 24843 }, { "epoch": 0.5200535878757431, "grad_norm": 0.28077951073646545, "learning_rate": 0.00018556167584328717, "loss": 11.6829, "step": 24844 }, { "epoch": 0.5200745206397053, "grad_norm": 0.24785102903842926, "learning_rate": 0.00018556054094609725, "loss": 11.6726, "step": 24845 }, { "epoch": 0.5200954534036675, "grad_norm": 0.3294357359409332, "learning_rate": 0.00018555940600777668, "loss": 11.6658, "step": 24846 }, { "epoch": 0.5201163861676296, "grad_norm": 0.35029447078704834, "learning_rate": 0.000185558271028326, "loss": 11.6518, "step": 24847 }, { "epoch": 0.5201373189315918, "grad_norm": 0.29958081245422363, "learning_rate": 0.00018555713600774577, "loss": 11.6767, "step": 24848 }, { "epoch": 0.5201582516955539, "grad_norm": 0.38104864954948425, "learning_rate": 0.00018555600094603646, "loss": 11.6747, "step": 24849 }, { "epoch": 0.520179184459516, "grad_norm": 0.2609448730945587, "learning_rate": 0.0001855548658431987, "loss": 11.6791, "step": 24850 }, { "epoch": 0.5202001172234781, "grad_norm": 0.27168023586273193, "learning_rate": 0.00018555373069923298, "loss": 11.6616, "step": 24851 }, { "epoch": 0.5202210499874403, "grad_norm": 0.2687093913555145, "learning_rate": 0.00018555259551413991, "loss": 11.6651, "step": 24852 }, { "epoch": 0.5202419827514025, "grad_norm": 0.40445375442504883, "learning_rate": 0.00018555146028791995, "loss": 11.6737, "step": 24853 }, { "epoch": 0.5202629155153646, "grad_norm": 0.3042813539505005, "learning_rate": 0.0001855503250205737, "loss": 11.6895, "step": 24854 }, { "epoch": 0.5202838482793268, "grad_norm": 0.2744516134262085, "learning_rate": 0.00018554918971210168, "loss": 11.6844, "step": 24855 }, { "epoch": 0.5203047810432889, "grad_norm": 0.4424421787261963, "learning_rate": 0.00018554805436250447, "loss": 11.6955, "step": 24856 }, { "epoch": 0.5203257138072511, "grad_norm": 0.28224238753318787, "learning_rate": 0.00018554691897178257, "loss": 11.6882, "step": 24857 }, { "epoch": 0.5203466465712132, "grad_norm": 0.27835166454315186, "learning_rate": 0.00018554578353993661, "loss": 11.6665, "step": 24858 }, { "epoch": 0.5203675793351754, "grad_norm": 0.33938586711883545, "learning_rate": 0.00018554464806696703, "loss": 11.6682, "step": 24859 }, { "epoch": 0.5203885120991376, "grad_norm": 0.27447032928466797, "learning_rate": 0.00018554351255287442, "loss": 11.6841, "step": 24860 }, { "epoch": 0.5204094448630997, "grad_norm": 0.36138302087783813, "learning_rate": 0.00018554237699765935, "loss": 11.6816, "step": 24861 }, { "epoch": 0.5204303776270619, "grad_norm": 0.2776716649532318, "learning_rate": 0.00018554124140132233, "loss": 11.6608, "step": 24862 }, { "epoch": 0.520451310391024, "grad_norm": 0.3029835820198059, "learning_rate": 0.00018554010576386394, "loss": 11.6752, "step": 24863 }, { "epoch": 0.5204722431549862, "grad_norm": 0.33736947178840637, "learning_rate": 0.00018553897008528468, "loss": 11.6806, "step": 24864 }, { "epoch": 0.5204931759189484, "grad_norm": 0.2565745413303375, "learning_rate": 0.00018553783436558514, "loss": 11.6664, "step": 24865 }, { "epoch": 0.5205141086829105, "grad_norm": 0.35087719559669495, "learning_rate": 0.00018553669860476587, "loss": 11.6834, "step": 24866 }, { "epoch": 0.5205350414468727, "grad_norm": 0.3474346697330475, "learning_rate": 0.0001855355628028274, "loss": 11.6873, "step": 24867 }, { "epoch": 0.5205559742108348, "grad_norm": 0.2964091897010803, "learning_rate": 0.00018553442695977022, "loss": 11.6782, "step": 24868 }, { "epoch": 0.520576906974797, "grad_norm": 0.27868351340293884, "learning_rate": 0.00018553329107559493, "loss": 11.6693, "step": 24869 }, { "epoch": 0.5205978397387591, "grad_norm": 0.25007110834121704, "learning_rate": 0.0001855321551503021, "loss": 11.6634, "step": 24870 }, { "epoch": 0.5206187725027213, "grad_norm": 0.291332483291626, "learning_rate": 0.00018553101918389225, "loss": 11.6897, "step": 24871 }, { "epoch": 0.5206397052666835, "grad_norm": 0.2828511595726013, "learning_rate": 0.0001855298831763659, "loss": 11.6791, "step": 24872 }, { "epoch": 0.5206606380306456, "grad_norm": 0.24703077971935272, "learning_rate": 0.00018552874712772364, "loss": 11.6873, "step": 24873 }, { "epoch": 0.5206815707946078, "grad_norm": 0.2809448540210724, "learning_rate": 0.00018552761103796602, "loss": 11.6611, "step": 24874 }, { "epoch": 0.5207025035585698, "grad_norm": 0.26808297634124756, "learning_rate": 0.0001855264749070935, "loss": 11.6764, "step": 24875 }, { "epoch": 0.520723436322532, "grad_norm": 0.25345566868782043, "learning_rate": 0.00018552533873510675, "loss": 11.6801, "step": 24876 }, { "epoch": 0.5207443690864941, "grad_norm": 0.30689895153045654, "learning_rate": 0.00018552420252200623, "loss": 11.6725, "step": 24877 }, { "epoch": 0.5207653018504563, "grad_norm": 0.3145095705986023, "learning_rate": 0.00018552306626779252, "loss": 11.674, "step": 24878 }, { "epoch": 0.5207862346144185, "grad_norm": 0.34218600392341614, "learning_rate": 0.00018552192997246616, "loss": 11.6775, "step": 24879 }, { "epoch": 0.5208071673783806, "grad_norm": 0.25771665573120117, "learning_rate": 0.00018552079363602766, "loss": 11.6559, "step": 24880 }, { "epoch": 0.5208281001423428, "grad_norm": 0.3809744119644165, "learning_rate": 0.00018551965725847763, "loss": 11.7044, "step": 24881 }, { "epoch": 0.5208490329063049, "grad_norm": 0.3433781862258911, "learning_rate": 0.0001855185208398166, "loss": 11.6571, "step": 24882 }, { "epoch": 0.5208699656702671, "grad_norm": 0.242706760764122, "learning_rate": 0.00018551738438004508, "loss": 11.675, "step": 24883 }, { "epoch": 0.5208908984342293, "grad_norm": 0.28894323110580444, "learning_rate": 0.00018551624787916366, "loss": 11.6729, "step": 24884 }, { "epoch": 0.5209118311981914, "grad_norm": 0.2655738592147827, "learning_rate": 0.00018551511133717283, "loss": 11.6708, "step": 24885 }, { "epoch": 0.5209327639621536, "grad_norm": 0.30785131454467773, "learning_rate": 0.00018551397475407322, "loss": 11.6766, "step": 24886 }, { "epoch": 0.5209536967261157, "grad_norm": 0.317160427570343, "learning_rate": 0.0001855128381298653, "loss": 11.6619, "step": 24887 }, { "epoch": 0.5209746294900779, "grad_norm": 0.34753915667533875, "learning_rate": 0.00018551170146454963, "loss": 11.6689, "step": 24888 }, { "epoch": 0.52099556225404, "grad_norm": 0.34560051560401917, "learning_rate": 0.0001855105647581268, "loss": 11.6725, "step": 24889 }, { "epoch": 0.5210164950180022, "grad_norm": 0.25303176045417786, "learning_rate": 0.00018550942801059733, "loss": 11.6768, "step": 24890 }, { "epoch": 0.5210374277819644, "grad_norm": 0.3525209128856659, "learning_rate": 0.00018550829122196175, "loss": 11.6827, "step": 24891 }, { "epoch": 0.5210583605459265, "grad_norm": 0.2942901849746704, "learning_rate": 0.00018550715439222062, "loss": 11.6671, "step": 24892 }, { "epoch": 0.5210792933098887, "grad_norm": 0.27496352791786194, "learning_rate": 0.00018550601752137448, "loss": 11.688, "step": 24893 }, { "epoch": 0.5211002260738508, "grad_norm": 0.2899557948112488, "learning_rate": 0.00018550488060942391, "loss": 11.6628, "step": 24894 }, { "epoch": 0.521121158837813, "grad_norm": 0.25971755385398865, "learning_rate": 0.0001855037436563694, "loss": 11.6666, "step": 24895 }, { "epoch": 0.521142091601775, "grad_norm": 0.3247682452201843, "learning_rate": 0.00018550260666221156, "loss": 11.6823, "step": 24896 }, { "epoch": 0.5211630243657372, "grad_norm": 0.3324195146560669, "learning_rate": 0.0001855014696269509, "loss": 11.677, "step": 24897 }, { "epoch": 0.5211839571296994, "grad_norm": 0.27625375986099243, "learning_rate": 0.00018550033255058793, "loss": 11.6734, "step": 24898 }, { "epoch": 0.5212048898936615, "grad_norm": 0.29202908277511597, "learning_rate": 0.0001854991954331233, "loss": 11.6535, "step": 24899 }, { "epoch": 0.5212258226576237, "grad_norm": 0.3361302614212036, "learning_rate": 0.00018549805827455744, "loss": 11.6703, "step": 24900 }, { "epoch": 0.5212467554215858, "grad_norm": 0.3635241985321045, "learning_rate": 0.00018549692107489097, "loss": 11.6604, "step": 24901 }, { "epoch": 0.521267688185548, "grad_norm": 0.24338437616825104, "learning_rate": 0.0001854957838341244, "loss": 11.6734, "step": 24902 }, { "epoch": 0.5212886209495102, "grad_norm": 0.28022539615631104, "learning_rate": 0.00018549464655225832, "loss": 11.6889, "step": 24903 }, { "epoch": 0.5213095537134723, "grad_norm": 0.2670005261898041, "learning_rate": 0.00018549350922929324, "loss": 11.6681, "step": 24904 }, { "epoch": 0.5213304864774345, "grad_norm": 0.2597884237766266, "learning_rate": 0.0001854923718652297, "loss": 11.6667, "step": 24905 }, { "epoch": 0.5213514192413966, "grad_norm": 0.3830835521221161, "learning_rate": 0.00018549123446006825, "loss": 11.6878, "step": 24906 }, { "epoch": 0.5213723520053588, "grad_norm": 0.29310327768325806, "learning_rate": 0.00018549009701380952, "loss": 11.6724, "step": 24907 }, { "epoch": 0.5213932847693209, "grad_norm": 0.30915701389312744, "learning_rate": 0.0001854889595264539, "loss": 11.6606, "step": 24908 }, { "epoch": 0.5214142175332831, "grad_norm": 0.2441643923521042, "learning_rate": 0.00018548782199800208, "loss": 11.6697, "step": 24909 }, { "epoch": 0.5214351502972453, "grad_norm": 0.3660658597946167, "learning_rate": 0.00018548668442845454, "loss": 11.6641, "step": 24910 }, { "epoch": 0.5214560830612074, "grad_norm": 0.30845776200294495, "learning_rate": 0.00018548554681781183, "loss": 11.6877, "step": 24911 }, { "epoch": 0.5214770158251696, "grad_norm": 0.2693924009799957, "learning_rate": 0.00018548440916607453, "loss": 11.6704, "step": 24912 }, { "epoch": 0.5214979485891317, "grad_norm": 0.28359729051589966, "learning_rate": 0.00018548327147324315, "loss": 11.6735, "step": 24913 }, { "epoch": 0.5215188813530939, "grad_norm": 0.27861714363098145, "learning_rate": 0.00018548213373931823, "loss": 11.686, "step": 24914 }, { "epoch": 0.521539814117056, "grad_norm": 0.31580686569213867, "learning_rate": 0.00018548099596430034, "loss": 11.6727, "step": 24915 }, { "epoch": 0.5215607468810182, "grad_norm": 0.2765774726867676, "learning_rate": 0.00018547985814819002, "loss": 11.6932, "step": 24916 }, { "epoch": 0.5215816796449804, "grad_norm": 0.4233987033367157, "learning_rate": 0.00018547872029098784, "loss": 11.6846, "step": 24917 }, { "epoch": 0.5216026124089425, "grad_norm": 0.23484794795513153, "learning_rate": 0.0001854775823926943, "loss": 11.6609, "step": 24918 }, { "epoch": 0.5216235451729047, "grad_norm": 0.2794415354728699, "learning_rate": 0.00018547644445331, "loss": 11.6792, "step": 24919 }, { "epoch": 0.5216444779368667, "grad_norm": 0.32360100746154785, "learning_rate": 0.00018547530647283544, "loss": 11.6724, "step": 24920 }, { "epoch": 0.521665410700829, "grad_norm": 0.31168434023857117, "learning_rate": 0.00018547416845127122, "loss": 11.6822, "step": 24921 }, { "epoch": 0.521686343464791, "grad_norm": 0.2337883561849594, "learning_rate": 0.0001854730303886178, "loss": 11.6783, "step": 24922 }, { "epoch": 0.5217072762287532, "grad_norm": 0.30649471282958984, "learning_rate": 0.0001854718922848758, "loss": 11.6572, "step": 24923 }, { "epoch": 0.5217282089927154, "grad_norm": 0.36356788873672485, "learning_rate": 0.00018547075414004576, "loss": 11.6755, "step": 24924 }, { "epoch": 0.5217491417566775, "grad_norm": 0.29769366979599, "learning_rate": 0.0001854696159541282, "loss": 11.6853, "step": 24925 }, { "epoch": 0.5217700745206397, "grad_norm": 0.3288249969482422, "learning_rate": 0.00018546847772712372, "loss": 11.6732, "step": 24926 }, { "epoch": 0.5217910072846018, "grad_norm": 0.28754496574401855, "learning_rate": 0.0001854673394590328, "loss": 11.6514, "step": 24927 }, { "epoch": 0.521811940048564, "grad_norm": 0.2765468657016754, "learning_rate": 0.000185466201149856, "loss": 11.6632, "step": 24928 }, { "epoch": 0.5218328728125262, "grad_norm": 0.31913140416145325, "learning_rate": 0.00018546506279959392, "loss": 11.6666, "step": 24929 }, { "epoch": 0.5218538055764883, "grad_norm": 0.24416373670101166, "learning_rate": 0.0001854639244082471, "loss": 11.6729, "step": 24930 }, { "epoch": 0.5218747383404505, "grad_norm": 0.34205037355422974, "learning_rate": 0.00018546278597581601, "loss": 11.6625, "step": 24931 }, { "epoch": 0.5218956711044126, "grad_norm": 0.28683939576148987, "learning_rate": 0.00018546164750230123, "loss": 11.6682, "step": 24932 }, { "epoch": 0.5219166038683748, "grad_norm": 0.38513973355293274, "learning_rate": 0.00018546050898770336, "loss": 11.6638, "step": 24933 }, { "epoch": 0.5219375366323369, "grad_norm": 0.33808469772338867, "learning_rate": 0.00018545937043202288, "loss": 11.6706, "step": 24934 }, { "epoch": 0.5219584693962991, "grad_norm": 0.26187458634376526, "learning_rate": 0.0001854582318352604, "loss": 11.6729, "step": 24935 }, { "epoch": 0.5219794021602613, "grad_norm": 0.2495511770248413, "learning_rate": 0.00018545709319741643, "loss": 11.6717, "step": 24936 }, { "epoch": 0.5220003349242234, "grad_norm": 0.2788141369819641, "learning_rate": 0.00018545595451849152, "loss": 11.6664, "step": 24937 }, { "epoch": 0.5220212676881856, "grad_norm": 0.3930385410785675, "learning_rate": 0.0001854548157984862, "loss": 11.6628, "step": 24938 }, { "epoch": 0.5220422004521477, "grad_norm": 0.2844681441783905, "learning_rate": 0.00018545367703740105, "loss": 11.6774, "step": 24939 }, { "epoch": 0.5220631332161099, "grad_norm": 0.24853038787841797, "learning_rate": 0.0001854525382352366, "loss": 11.6752, "step": 24940 }, { "epoch": 0.522084065980072, "grad_norm": 0.28595975041389465, "learning_rate": 0.00018545139939199343, "loss": 11.6796, "step": 24941 }, { "epoch": 0.5221049987440342, "grad_norm": 0.24813224375247955, "learning_rate": 0.00018545026050767203, "loss": 11.6709, "step": 24942 }, { "epoch": 0.5221259315079964, "grad_norm": 0.3157559633255005, "learning_rate": 0.000185449121582273, "loss": 11.6601, "step": 24943 }, { "epoch": 0.5221468642719584, "grad_norm": 0.2744913697242737, "learning_rate": 0.00018544798261579682, "loss": 11.6707, "step": 24944 }, { "epoch": 0.5221677970359206, "grad_norm": 0.2767868936061859, "learning_rate": 0.0001854468436082441, "loss": 11.6494, "step": 24945 }, { "epoch": 0.5221887297998827, "grad_norm": 0.2969096601009369, "learning_rate": 0.0001854457045596154, "loss": 11.6672, "step": 24946 }, { "epoch": 0.5222096625638449, "grad_norm": 0.2797577679157257, "learning_rate": 0.00018544456546991123, "loss": 11.686, "step": 24947 }, { "epoch": 0.5222305953278071, "grad_norm": 0.27909839153289795, "learning_rate": 0.00018544342633913214, "loss": 11.6641, "step": 24948 }, { "epoch": 0.5222515280917692, "grad_norm": 0.30848410725593567, "learning_rate": 0.0001854422871672787, "loss": 11.6762, "step": 24949 }, { "epoch": 0.5222724608557314, "grad_norm": 0.22354203462600708, "learning_rate": 0.00018544114795435138, "loss": 11.6778, "step": 24950 }, { "epoch": 0.5222933936196935, "grad_norm": 0.24688775837421417, "learning_rate": 0.00018544000870035086, "loss": 11.6677, "step": 24951 }, { "epoch": 0.5223143263836557, "grad_norm": 0.33348411321640015, "learning_rate": 0.00018543886940527757, "loss": 11.6623, "step": 24952 }, { "epoch": 0.5223352591476178, "grad_norm": 0.3257388174533844, "learning_rate": 0.00018543773006913212, "loss": 11.6625, "step": 24953 }, { "epoch": 0.52235619191158, "grad_norm": 0.2894153892993927, "learning_rate": 0.00018543659069191504, "loss": 11.6757, "step": 24954 }, { "epoch": 0.5223771246755422, "grad_norm": 0.3788207769393921, "learning_rate": 0.00018543545127362685, "loss": 11.6705, "step": 24955 }, { "epoch": 0.5223980574395043, "grad_norm": 0.2665237486362457, "learning_rate": 0.00018543431181426815, "loss": 11.6692, "step": 24956 }, { "epoch": 0.5224189902034665, "grad_norm": 0.28518038988113403, "learning_rate": 0.00018543317231383948, "loss": 11.6608, "step": 24957 }, { "epoch": 0.5224399229674286, "grad_norm": 0.27625399827957153, "learning_rate": 0.00018543203277234137, "loss": 11.6592, "step": 24958 }, { "epoch": 0.5224608557313908, "grad_norm": 0.32668548822402954, "learning_rate": 0.00018543089318977436, "loss": 11.669, "step": 24959 }, { "epoch": 0.5224817884953529, "grad_norm": 0.3035869896411896, "learning_rate": 0.000185429753566139, "loss": 11.6526, "step": 24960 }, { "epoch": 0.5225027212593151, "grad_norm": 0.2800673842430115, "learning_rate": 0.00018542861390143583, "loss": 11.6682, "step": 24961 }, { "epoch": 0.5225236540232773, "grad_norm": 0.2484518140554428, "learning_rate": 0.00018542747419566545, "loss": 11.6685, "step": 24962 }, { "epoch": 0.5225445867872394, "grad_norm": 0.31098634004592896, "learning_rate": 0.00018542633444882834, "loss": 11.6683, "step": 24963 }, { "epoch": 0.5225655195512016, "grad_norm": 0.4165585935115814, "learning_rate": 0.00018542519466092512, "loss": 11.6765, "step": 24964 }, { "epoch": 0.5225864523151637, "grad_norm": 0.328922837972641, "learning_rate": 0.00018542405483195626, "loss": 11.6622, "step": 24965 }, { "epoch": 0.5226073850791259, "grad_norm": 0.2669278085231781, "learning_rate": 0.00018542291496192234, "loss": 11.6762, "step": 24966 }, { "epoch": 0.522628317843088, "grad_norm": 0.28857186436653137, "learning_rate": 0.00018542177505082393, "loss": 11.6672, "step": 24967 }, { "epoch": 0.5226492506070501, "grad_norm": 0.32671990990638733, "learning_rate": 0.0001854206350986616, "loss": 11.6697, "step": 24968 }, { "epoch": 0.5226701833710123, "grad_norm": 0.2774309813976288, "learning_rate": 0.0001854194951054358, "loss": 11.6781, "step": 24969 }, { "epoch": 0.5226911161349744, "grad_norm": 0.27342793345451355, "learning_rate": 0.00018541835507114715, "loss": 11.6757, "step": 24970 }, { "epoch": 0.5227120488989366, "grad_norm": 0.3143010139465332, "learning_rate": 0.00018541721499579618, "loss": 11.6835, "step": 24971 }, { "epoch": 0.5227329816628987, "grad_norm": 0.29435300827026367, "learning_rate": 0.00018541607487938347, "loss": 11.6667, "step": 24972 }, { "epoch": 0.5227539144268609, "grad_norm": 0.2814478576183319, "learning_rate": 0.0001854149347219095, "loss": 11.675, "step": 24973 }, { "epoch": 0.5227748471908231, "grad_norm": 0.25325989723205566, "learning_rate": 0.00018541379452337488, "loss": 11.6604, "step": 24974 }, { "epoch": 0.5227957799547852, "grad_norm": 0.345074325799942, "learning_rate": 0.00018541265428378016, "loss": 11.6676, "step": 24975 }, { "epoch": 0.5228167127187474, "grad_norm": 0.3269427418708801, "learning_rate": 0.00018541151400312583, "loss": 11.6745, "step": 24976 }, { "epoch": 0.5228376454827095, "grad_norm": 0.31150224804878235, "learning_rate": 0.00018541037368141253, "loss": 11.6739, "step": 24977 }, { "epoch": 0.5228585782466717, "grad_norm": 0.30539920926094055, "learning_rate": 0.00018540923331864068, "loss": 11.6813, "step": 24978 }, { "epoch": 0.5228795110106338, "grad_norm": 0.36335861682891846, "learning_rate": 0.00018540809291481093, "loss": 11.6745, "step": 24979 }, { "epoch": 0.522900443774596, "grad_norm": 0.3159710168838501, "learning_rate": 0.0001854069524699238, "loss": 11.6784, "step": 24980 }, { "epoch": 0.5229213765385582, "grad_norm": 0.2882581949234009, "learning_rate": 0.00018540581198397984, "loss": 11.6566, "step": 24981 }, { "epoch": 0.5229423093025203, "grad_norm": 0.3325275778770447, "learning_rate": 0.0001854046714569796, "loss": 11.671, "step": 24982 }, { "epoch": 0.5229632420664825, "grad_norm": 0.3361167907714844, "learning_rate": 0.00018540353088892362, "loss": 11.678, "step": 24983 }, { "epoch": 0.5229841748304446, "grad_norm": 0.2890858054161072, "learning_rate": 0.00018540239027981244, "loss": 11.6713, "step": 24984 }, { "epoch": 0.5230051075944068, "grad_norm": 0.29876548051834106, "learning_rate": 0.00018540124962964662, "loss": 11.6621, "step": 24985 }, { "epoch": 0.523026040358369, "grad_norm": 0.2663707435131073, "learning_rate": 0.0001854001089384267, "loss": 11.6812, "step": 24986 }, { "epoch": 0.5230469731223311, "grad_norm": 0.3006446361541748, "learning_rate": 0.00018539896820615325, "loss": 11.6514, "step": 24987 }, { "epoch": 0.5230679058862933, "grad_norm": 0.29912468791007996, "learning_rate": 0.0001853978274328268, "loss": 11.65, "step": 24988 }, { "epoch": 0.5230888386502553, "grad_norm": 0.25444239377975464, "learning_rate": 0.00018539668661844791, "loss": 11.6754, "step": 24989 }, { "epoch": 0.5231097714142176, "grad_norm": 0.3689842224121094, "learning_rate": 0.00018539554576301712, "loss": 11.6671, "step": 24990 }, { "epoch": 0.5231307041781796, "grad_norm": 0.3551328480243683, "learning_rate": 0.00018539440486653499, "loss": 11.6484, "step": 24991 }, { "epoch": 0.5231516369421418, "grad_norm": 0.3345010280609131, "learning_rate": 0.00018539326392900206, "loss": 11.6811, "step": 24992 }, { "epoch": 0.523172569706104, "grad_norm": 0.2627125382423401, "learning_rate": 0.00018539212295041882, "loss": 11.6687, "step": 24993 }, { "epoch": 0.5231935024700661, "grad_norm": 0.27657416462898254, "learning_rate": 0.00018539098193078593, "loss": 11.6627, "step": 24994 }, { "epoch": 0.5232144352340283, "grad_norm": 0.35112902522087097, "learning_rate": 0.00018538984087010388, "loss": 11.6744, "step": 24995 }, { "epoch": 0.5232353679979904, "grad_norm": 0.3114953935146332, "learning_rate": 0.00018538869976837322, "loss": 11.6814, "step": 24996 }, { "epoch": 0.5232563007619526, "grad_norm": 0.3459441661834717, "learning_rate": 0.00018538755862559448, "loss": 11.681, "step": 24997 }, { "epoch": 0.5232772335259147, "grad_norm": 0.2857668101787567, "learning_rate": 0.00018538641744176822, "loss": 11.6592, "step": 24998 }, { "epoch": 0.5232981662898769, "grad_norm": 0.31993404030799866, "learning_rate": 0.00018538527621689505, "loss": 11.6721, "step": 24999 }, { "epoch": 0.5233190990538391, "grad_norm": 0.25239476561546326, "learning_rate": 0.00018538413495097544, "loss": 11.6626, "step": 25000 }, { "epoch": 0.5233190990538391, "eval_loss": 11.672061920166016, "eval_runtime": 34.3969, "eval_samples_per_second": 27.939, "eval_steps_per_second": 7.006, "step": 25000 }, { "epoch": 0.5233400318178012, "grad_norm": 0.2855614721775055, "learning_rate": 0.00018538299364400995, "loss": 11.6911, "step": 25001 }, { "epoch": 0.5233609645817634, "grad_norm": 0.27917060256004333, "learning_rate": 0.00018538185229599916, "loss": 11.664, "step": 25002 }, { "epoch": 0.5233818973457255, "grad_norm": 0.23256853222846985, "learning_rate": 0.0001853807109069436, "loss": 11.68, "step": 25003 }, { "epoch": 0.5234028301096877, "grad_norm": 0.2532498836517334, "learning_rate": 0.00018537956947684383, "loss": 11.6908, "step": 25004 }, { "epoch": 0.5234237628736499, "grad_norm": 0.2758391797542572, "learning_rate": 0.00018537842800570037, "loss": 11.6646, "step": 25005 }, { "epoch": 0.523444695637612, "grad_norm": 0.2713735103607178, "learning_rate": 0.0001853772864935138, "loss": 11.6681, "step": 25006 }, { "epoch": 0.5234656284015742, "grad_norm": 0.276030033826828, "learning_rate": 0.00018537614494028466, "loss": 11.6679, "step": 25007 }, { "epoch": 0.5234865611655363, "grad_norm": 0.3075878322124481, "learning_rate": 0.0001853750033460135, "loss": 11.6658, "step": 25008 }, { "epoch": 0.5235074939294985, "grad_norm": 0.30280154943466187, "learning_rate": 0.00018537386171070086, "loss": 11.674, "step": 25009 }, { "epoch": 0.5235284266934606, "grad_norm": 0.260958731174469, "learning_rate": 0.00018537272003434728, "loss": 11.6767, "step": 25010 }, { "epoch": 0.5235493594574228, "grad_norm": 0.29015669226646423, "learning_rate": 0.00018537157831695336, "loss": 11.6776, "step": 25011 }, { "epoch": 0.523570292221385, "grad_norm": 0.32657694816589355, "learning_rate": 0.00018537043655851955, "loss": 11.686, "step": 25012 }, { "epoch": 0.523591224985347, "grad_norm": 0.290249764919281, "learning_rate": 0.0001853692947590465, "loss": 11.6659, "step": 25013 }, { "epoch": 0.5236121577493092, "grad_norm": 0.30653563141822815, "learning_rate": 0.00018536815291853473, "loss": 11.6726, "step": 25014 }, { "epoch": 0.5236330905132713, "grad_norm": 0.29993948340415955, "learning_rate": 0.00018536701103698478, "loss": 11.6679, "step": 25015 }, { "epoch": 0.5236540232772335, "grad_norm": 0.3507070541381836, "learning_rate": 0.00018536586911439718, "loss": 11.6831, "step": 25016 }, { "epoch": 0.5236749560411956, "grad_norm": 0.2907455265522003, "learning_rate": 0.0001853647271507725, "loss": 11.668, "step": 25017 }, { "epoch": 0.5236958888051578, "grad_norm": 0.29820311069488525, "learning_rate": 0.00018536358514611125, "loss": 11.661, "step": 25018 }, { "epoch": 0.52371682156912, "grad_norm": 0.2761460542678833, "learning_rate": 0.00018536244310041403, "loss": 11.6652, "step": 25019 }, { "epoch": 0.5237377543330821, "grad_norm": 0.2629474699497223, "learning_rate": 0.00018536130101368142, "loss": 11.6685, "step": 25020 }, { "epoch": 0.5237586870970443, "grad_norm": 0.32075977325439453, "learning_rate": 0.00018536015888591387, "loss": 11.6707, "step": 25021 }, { "epoch": 0.5237796198610064, "grad_norm": 0.33517026901245117, "learning_rate": 0.00018535901671711203, "loss": 11.6637, "step": 25022 }, { "epoch": 0.5238005526249686, "grad_norm": 0.2184358537197113, "learning_rate": 0.00018535787450727636, "loss": 11.6567, "step": 25023 }, { "epoch": 0.5238214853889308, "grad_norm": 0.48212897777557373, "learning_rate": 0.00018535673225640747, "loss": 11.677, "step": 25024 }, { "epoch": 0.5238424181528929, "grad_norm": 0.3252826929092407, "learning_rate": 0.00018535558996450585, "loss": 11.6654, "step": 25025 }, { "epoch": 0.5238633509168551, "grad_norm": 0.3414154052734375, "learning_rate": 0.00018535444763157212, "loss": 11.6777, "step": 25026 }, { "epoch": 0.5238842836808172, "grad_norm": 0.31996625661849976, "learning_rate": 0.0001853533052576068, "loss": 11.6698, "step": 25027 }, { "epoch": 0.5239052164447794, "grad_norm": 0.32023394107818604, "learning_rate": 0.00018535216284261045, "loss": 11.6651, "step": 25028 }, { "epoch": 0.5239261492087415, "grad_norm": 0.311810702085495, "learning_rate": 0.00018535102038658356, "loss": 11.6707, "step": 25029 }, { "epoch": 0.5239470819727037, "grad_norm": 0.27486106753349304, "learning_rate": 0.00018534987788952677, "loss": 11.6797, "step": 25030 }, { "epoch": 0.5239680147366659, "grad_norm": 0.3873961865901947, "learning_rate": 0.00018534873535144052, "loss": 11.6767, "step": 25031 }, { "epoch": 0.523988947500628, "grad_norm": 0.2525266408920288, "learning_rate": 0.0001853475927723255, "loss": 11.6736, "step": 25032 }, { "epoch": 0.5240098802645902, "grad_norm": 0.2978987991809845, "learning_rate": 0.00018534645015218213, "loss": 11.6728, "step": 25033 }, { "epoch": 0.5240308130285523, "grad_norm": 0.2707253396511078, "learning_rate": 0.00018534530749101101, "loss": 11.6598, "step": 25034 }, { "epoch": 0.5240517457925145, "grad_norm": 0.27936384081840515, "learning_rate": 0.00018534416478881272, "loss": 11.6834, "step": 25035 }, { "epoch": 0.5240726785564765, "grad_norm": 0.26521819829940796, "learning_rate": 0.00018534302204558774, "loss": 11.6769, "step": 25036 }, { "epoch": 0.5240936113204387, "grad_norm": 0.3364182412624359, "learning_rate": 0.0001853418792613367, "loss": 11.6691, "step": 25037 }, { "epoch": 0.524114544084401, "grad_norm": 0.2706453204154968, "learning_rate": 0.0001853407364360601, "loss": 11.6614, "step": 25038 }, { "epoch": 0.524135476848363, "grad_norm": 0.3366577625274658, "learning_rate": 0.00018533959356975847, "loss": 11.6946, "step": 25039 }, { "epoch": 0.5241564096123252, "grad_norm": 0.25419384241104126, "learning_rate": 0.0001853384506624324, "loss": 11.6769, "step": 25040 }, { "epoch": 0.5241773423762873, "grad_norm": 0.2622605562210083, "learning_rate": 0.00018533730771408245, "loss": 11.6812, "step": 25041 }, { "epoch": 0.5241982751402495, "grad_norm": 0.28715330362319946, "learning_rate": 0.00018533616472470914, "loss": 11.6751, "step": 25042 }, { "epoch": 0.5242192079042117, "grad_norm": 0.7134261727333069, "learning_rate": 0.000185335021694313, "loss": 11.5991, "step": 25043 }, { "epoch": 0.5242401406681738, "grad_norm": 0.3613578677177429, "learning_rate": 0.00018533387862289464, "loss": 11.6794, "step": 25044 }, { "epoch": 0.524261073432136, "grad_norm": 0.3109874725341797, "learning_rate": 0.00018533273551045456, "loss": 11.6791, "step": 25045 }, { "epoch": 0.5242820061960981, "grad_norm": 0.273612916469574, "learning_rate": 0.0001853315923569933, "loss": 11.6841, "step": 25046 }, { "epoch": 0.5243029389600603, "grad_norm": 0.2617167830467224, "learning_rate": 0.00018533044916251145, "loss": 11.6663, "step": 25047 }, { "epoch": 0.5243238717240224, "grad_norm": 0.25672441720962524, "learning_rate": 0.00018532930592700956, "loss": 11.6566, "step": 25048 }, { "epoch": 0.5243448044879846, "grad_norm": 0.3687274158000946, "learning_rate": 0.00018532816265048814, "loss": 11.6839, "step": 25049 }, { "epoch": 0.5243657372519468, "grad_norm": 0.32625100016593933, "learning_rate": 0.00018532701933294776, "loss": 11.6746, "step": 25050 }, { "epoch": 0.5243866700159089, "grad_norm": 0.33122915029525757, "learning_rate": 0.000185325875974389, "loss": 11.6684, "step": 25051 }, { "epoch": 0.5244076027798711, "grad_norm": 0.30368131399154663, "learning_rate": 0.00018532473257481237, "loss": 11.6746, "step": 25052 }, { "epoch": 0.5244285355438332, "grad_norm": 0.39590245485305786, "learning_rate": 0.00018532358913421842, "loss": 11.6601, "step": 25053 }, { "epoch": 0.5244494683077954, "grad_norm": 0.2741750180721283, "learning_rate": 0.0001853224456526077, "loss": 11.6623, "step": 25054 }, { "epoch": 0.5244704010717575, "grad_norm": 0.29973337054252625, "learning_rate": 0.0001853213021299808, "loss": 11.6721, "step": 25055 }, { "epoch": 0.5244913338357197, "grad_norm": 0.2787244915962219, "learning_rate": 0.00018532015856633822, "loss": 11.6665, "step": 25056 }, { "epoch": 0.5245122665996819, "grad_norm": 0.2927730679512024, "learning_rate": 0.00018531901496168053, "loss": 11.6617, "step": 25057 }, { "epoch": 0.524533199363644, "grad_norm": 0.3048531413078308, "learning_rate": 0.00018531787131600828, "loss": 11.6795, "step": 25058 }, { "epoch": 0.5245541321276062, "grad_norm": 0.23085831105709076, "learning_rate": 0.00018531672762932204, "loss": 11.6547, "step": 25059 }, { "epoch": 0.5245750648915682, "grad_norm": 0.28279411792755127, "learning_rate": 0.0001853155839016223, "loss": 11.6782, "step": 25060 }, { "epoch": 0.5245959976555304, "grad_norm": 0.28350046277046204, "learning_rate": 0.0001853144401329097, "loss": 11.6843, "step": 25061 }, { "epoch": 0.5246169304194926, "grad_norm": 0.23481139540672302, "learning_rate": 0.0001853132963231847, "loss": 11.6632, "step": 25062 }, { "epoch": 0.5246378631834547, "grad_norm": 0.3384103775024414, "learning_rate": 0.00018531215247244792, "loss": 11.6678, "step": 25063 }, { "epoch": 0.5246587959474169, "grad_norm": 0.2753070592880249, "learning_rate": 0.00018531100858069986, "loss": 11.6596, "step": 25064 }, { "epoch": 0.524679728711379, "grad_norm": 0.314285010099411, "learning_rate": 0.00018530986464794107, "loss": 11.6701, "step": 25065 }, { "epoch": 0.5247006614753412, "grad_norm": 0.39455240964889526, "learning_rate": 0.00018530872067417216, "loss": 11.6668, "step": 25066 }, { "epoch": 0.5247215942393033, "grad_norm": 0.2737303674221039, "learning_rate": 0.0001853075766593936, "loss": 11.6764, "step": 25067 }, { "epoch": 0.5247425270032655, "grad_norm": 0.2884746789932251, "learning_rate": 0.00018530643260360598, "loss": 11.6584, "step": 25068 }, { "epoch": 0.5247634597672277, "grad_norm": 0.29335498809814453, "learning_rate": 0.0001853052885068099, "loss": 11.6676, "step": 25069 }, { "epoch": 0.5247843925311898, "grad_norm": 0.2643716335296631, "learning_rate": 0.00018530414436900578, "loss": 11.6784, "step": 25070 }, { "epoch": 0.524805325295152, "grad_norm": 0.3013484477996826, "learning_rate": 0.00018530300019019432, "loss": 11.6757, "step": 25071 }, { "epoch": 0.5248262580591141, "grad_norm": 0.26970192790031433, "learning_rate": 0.00018530185597037595, "loss": 11.6607, "step": 25072 }, { "epoch": 0.5248471908230763, "grad_norm": 0.3621427118778229, "learning_rate": 0.00018530071170955127, "loss": 11.6701, "step": 25073 }, { "epoch": 0.5248681235870384, "grad_norm": 0.276859313249588, "learning_rate": 0.00018529956740772084, "loss": 11.6763, "step": 25074 }, { "epoch": 0.5248890563510006, "grad_norm": 0.298828125, "learning_rate": 0.00018529842306488524, "loss": 11.6532, "step": 25075 }, { "epoch": 0.5249099891149628, "grad_norm": 0.3203795254230499, "learning_rate": 0.0001852972786810449, "loss": 11.6798, "step": 25076 }, { "epoch": 0.5249309218789249, "grad_norm": 0.4092138707637787, "learning_rate": 0.00018529613425620053, "loss": 11.6833, "step": 25077 }, { "epoch": 0.5249518546428871, "grad_norm": 0.26049119234085083, "learning_rate": 0.00018529498979035254, "loss": 11.6807, "step": 25078 }, { "epoch": 0.5249727874068492, "grad_norm": 0.31569403409957886, "learning_rate": 0.00018529384528350154, "loss": 11.675, "step": 25079 }, { "epoch": 0.5249937201708114, "grad_norm": 0.28433167934417725, "learning_rate": 0.0001852927007356481, "loss": 11.6712, "step": 25080 }, { "epoch": 0.5250146529347736, "grad_norm": 0.27258995175361633, "learning_rate": 0.00018529155614679276, "loss": 11.6777, "step": 25081 }, { "epoch": 0.5250355856987357, "grad_norm": 0.2667207419872284, "learning_rate": 0.00018529041151693604, "loss": 11.6593, "step": 25082 }, { "epoch": 0.5250565184626979, "grad_norm": 0.32616254687309265, "learning_rate": 0.0001852892668460785, "loss": 11.6829, "step": 25083 }, { "epoch": 0.5250774512266599, "grad_norm": 0.3226431608200073, "learning_rate": 0.00018528812213422074, "loss": 11.6729, "step": 25084 }, { "epoch": 0.5250983839906221, "grad_norm": 0.24863623082637787, "learning_rate": 0.00018528697738136324, "loss": 11.6611, "step": 25085 }, { "epoch": 0.5251193167545842, "grad_norm": 0.3281785845756531, "learning_rate": 0.00018528583258750657, "loss": 11.6777, "step": 25086 }, { "epoch": 0.5251402495185464, "grad_norm": 0.28580379486083984, "learning_rate": 0.00018528468775265132, "loss": 11.6892, "step": 25087 }, { "epoch": 0.5251611822825086, "grad_norm": 0.24013999104499817, "learning_rate": 0.00018528354287679802, "loss": 11.6733, "step": 25088 }, { "epoch": 0.5251821150464707, "grad_norm": 0.38676515221595764, "learning_rate": 0.00018528239795994716, "loss": 11.6773, "step": 25089 }, { "epoch": 0.5252030478104329, "grad_norm": 0.2682967185974121, "learning_rate": 0.0001852812530020994, "loss": 11.6577, "step": 25090 }, { "epoch": 0.525223980574395, "grad_norm": 0.36111369729042053, "learning_rate": 0.0001852801080032552, "loss": 11.6837, "step": 25091 }, { "epoch": 0.5252449133383572, "grad_norm": 0.3052932918071747, "learning_rate": 0.00018527896296341515, "loss": 11.6861, "step": 25092 }, { "epoch": 0.5252658461023193, "grad_norm": 0.2817213535308838, "learning_rate": 0.00018527781788257977, "loss": 11.6826, "step": 25093 }, { "epoch": 0.5252867788662815, "grad_norm": 0.4560966193675995, "learning_rate": 0.00018527667276074968, "loss": 11.672, "step": 25094 }, { "epoch": 0.5253077116302437, "grad_norm": 0.3139951527118683, "learning_rate": 0.00018527552759792535, "loss": 11.6735, "step": 25095 }, { "epoch": 0.5253286443942058, "grad_norm": 0.26506873965263367, "learning_rate": 0.0001852743823941074, "loss": 11.6672, "step": 25096 }, { "epoch": 0.525349577158168, "grad_norm": 0.3194584846496582, "learning_rate": 0.0001852732371492963, "loss": 11.6773, "step": 25097 }, { "epoch": 0.5253705099221301, "grad_norm": 0.2956572473049164, "learning_rate": 0.00018527209186349267, "loss": 11.6629, "step": 25098 }, { "epoch": 0.5253914426860923, "grad_norm": 0.31620991230010986, "learning_rate": 0.00018527094653669702, "loss": 11.6714, "step": 25099 }, { "epoch": 0.5254123754500544, "grad_norm": 0.30930662155151367, "learning_rate": 0.00018526980116890992, "loss": 11.682, "step": 25100 }, { "epoch": 0.5254333082140166, "grad_norm": 0.2568380534648895, "learning_rate": 0.00018526865576013195, "loss": 11.6754, "step": 25101 }, { "epoch": 0.5254542409779788, "grad_norm": 0.37855300307273865, "learning_rate": 0.0001852675103103636, "loss": 11.6675, "step": 25102 }, { "epoch": 0.5254751737419409, "grad_norm": 0.3058393895626068, "learning_rate": 0.00018526636481960547, "loss": 11.6703, "step": 25103 }, { "epoch": 0.5254961065059031, "grad_norm": 0.3386891186237335, "learning_rate": 0.00018526521928785808, "loss": 11.6759, "step": 25104 }, { "epoch": 0.5255170392698651, "grad_norm": 0.2992737591266632, "learning_rate": 0.00018526407371512199, "loss": 11.6714, "step": 25105 }, { "epoch": 0.5255379720338273, "grad_norm": 0.36043116450309753, "learning_rate": 0.00018526292810139774, "loss": 11.6698, "step": 25106 }, { "epoch": 0.5255589047977896, "grad_norm": 0.272945374250412, "learning_rate": 0.00018526178244668588, "loss": 11.6705, "step": 25107 }, { "epoch": 0.5255798375617516, "grad_norm": 0.36170610785484314, "learning_rate": 0.000185260636750987, "loss": 11.6773, "step": 25108 }, { "epoch": 0.5256007703257138, "grad_norm": 0.2672683298587799, "learning_rate": 0.00018525949101430163, "loss": 11.6798, "step": 25109 }, { "epoch": 0.5256217030896759, "grad_norm": 0.3621950149536133, "learning_rate": 0.0001852583452366303, "loss": 11.6756, "step": 25110 }, { "epoch": 0.5256426358536381, "grad_norm": 0.24878886342048645, "learning_rate": 0.00018525719941797357, "loss": 11.6762, "step": 25111 }, { "epoch": 0.5256635686176002, "grad_norm": 0.2964055836200714, "learning_rate": 0.000185256053558332, "loss": 11.6802, "step": 25112 }, { "epoch": 0.5256845013815624, "grad_norm": 0.3076244294643402, "learning_rate": 0.00018525490765770614, "loss": 11.6878, "step": 25113 }, { "epoch": 0.5257054341455246, "grad_norm": 0.239050954580307, "learning_rate": 0.00018525376171609655, "loss": 11.6654, "step": 25114 }, { "epoch": 0.5257263669094867, "grad_norm": 0.26675739884376526, "learning_rate": 0.00018525261573350372, "loss": 11.6755, "step": 25115 }, { "epoch": 0.5257472996734489, "grad_norm": 0.37523189187049866, "learning_rate": 0.0001852514697099283, "loss": 11.656, "step": 25116 }, { "epoch": 0.525768232437411, "grad_norm": 0.27810755372047424, "learning_rate": 0.00018525032364537076, "loss": 11.655, "step": 25117 }, { "epoch": 0.5257891652013732, "grad_norm": 0.32037538290023804, "learning_rate": 0.00018524917753983167, "loss": 11.6499, "step": 25118 }, { "epoch": 0.5258100979653353, "grad_norm": 0.2890433371067047, "learning_rate": 0.00018524803139331164, "loss": 11.6663, "step": 25119 }, { "epoch": 0.5258310307292975, "grad_norm": 0.36294421553611755, "learning_rate": 0.00018524688520581115, "loss": 11.6594, "step": 25120 }, { "epoch": 0.5258519634932597, "grad_norm": 0.2142629474401474, "learning_rate": 0.00018524573897733078, "loss": 11.6735, "step": 25121 }, { "epoch": 0.5258728962572218, "grad_norm": 0.24390560388565063, "learning_rate": 0.00018524459270787105, "loss": 11.6842, "step": 25122 }, { "epoch": 0.525893829021184, "grad_norm": 0.2743381857872009, "learning_rate": 0.00018524344639743253, "loss": 11.6642, "step": 25123 }, { "epoch": 0.5259147617851461, "grad_norm": 0.269011914730072, "learning_rate": 0.00018524230004601582, "loss": 11.6617, "step": 25124 }, { "epoch": 0.5259356945491083, "grad_norm": 0.30839455127716064, "learning_rate": 0.0001852411536536214, "loss": 11.6802, "step": 25125 }, { "epoch": 0.5259566273130705, "grad_norm": 0.32697927951812744, "learning_rate": 0.00018524000722024985, "loss": 11.6678, "step": 25126 }, { "epoch": 0.5259775600770326, "grad_norm": 0.3292895555496216, "learning_rate": 0.00018523886074590172, "loss": 11.69, "step": 25127 }, { "epoch": 0.5259984928409948, "grad_norm": 0.2787438631057739, "learning_rate": 0.00018523771423057758, "loss": 11.657, "step": 25128 }, { "epoch": 0.5260194256049568, "grad_norm": 0.3118499219417572, "learning_rate": 0.00018523656767427795, "loss": 11.6719, "step": 25129 }, { "epoch": 0.526040358368919, "grad_norm": 0.2801777720451355, "learning_rate": 0.0001852354210770034, "loss": 11.6889, "step": 25130 }, { "epoch": 0.5260612911328811, "grad_norm": 0.3784027397632599, "learning_rate": 0.00018523427443875449, "loss": 11.6733, "step": 25131 }, { "epoch": 0.5260822238968433, "grad_norm": 0.33051392436027527, "learning_rate": 0.00018523312775953172, "loss": 11.6559, "step": 25132 }, { "epoch": 0.5261031566608055, "grad_norm": 0.3864138424396515, "learning_rate": 0.0001852319810393357, "loss": 11.6646, "step": 25133 }, { "epoch": 0.5261240894247676, "grad_norm": 0.23009398579597473, "learning_rate": 0.00018523083427816694, "loss": 11.6698, "step": 25134 }, { "epoch": 0.5261450221887298, "grad_norm": 0.3807370066642761, "learning_rate": 0.00018522968747602603, "loss": 11.6581, "step": 25135 }, { "epoch": 0.5261659549526919, "grad_norm": 0.3258199393749237, "learning_rate": 0.00018522854063291347, "loss": 11.6902, "step": 25136 }, { "epoch": 0.5261868877166541, "grad_norm": 0.2356688231229782, "learning_rate": 0.0001852273937488299, "loss": 11.6619, "step": 25137 }, { "epoch": 0.5262078204806162, "grad_norm": 0.25474682450294495, "learning_rate": 0.00018522624682377575, "loss": 11.6661, "step": 25138 }, { "epoch": 0.5262287532445784, "grad_norm": 0.22266453504562378, "learning_rate": 0.00018522509985775167, "loss": 11.6709, "step": 25139 }, { "epoch": 0.5262496860085406, "grad_norm": 0.30930790305137634, "learning_rate": 0.0001852239528507582, "loss": 11.6777, "step": 25140 }, { "epoch": 0.5262706187725027, "grad_norm": 0.2942969799041748, "learning_rate": 0.00018522280580279582, "loss": 11.6754, "step": 25141 }, { "epoch": 0.5262915515364649, "grad_norm": 0.4040067493915558, "learning_rate": 0.00018522165871386515, "loss": 11.6858, "step": 25142 }, { "epoch": 0.526312484300427, "grad_norm": 0.31285929679870605, "learning_rate": 0.0001852205115839667, "loss": 11.6799, "step": 25143 }, { "epoch": 0.5263334170643892, "grad_norm": 0.2867584526538849, "learning_rate": 0.00018521936441310107, "loss": 11.6756, "step": 25144 }, { "epoch": 0.5263543498283514, "grad_norm": 0.3428281545639038, "learning_rate": 0.00018521821720126879, "loss": 11.6488, "step": 25145 }, { "epoch": 0.5263752825923135, "grad_norm": 0.22817203402519226, "learning_rate": 0.00018521706994847035, "loss": 11.674, "step": 25146 }, { "epoch": 0.5263962153562757, "grad_norm": 0.2599793076515198, "learning_rate": 0.00018521592265470643, "loss": 11.6878, "step": 25147 }, { "epoch": 0.5264171481202378, "grad_norm": 0.23918555676937103, "learning_rate": 0.00018521477531997747, "loss": 11.6712, "step": 25148 }, { "epoch": 0.5264380808842, "grad_norm": 0.306619793176651, "learning_rate": 0.00018521362794428404, "loss": 11.6581, "step": 25149 }, { "epoch": 0.526459013648162, "grad_norm": 0.2691994607448578, "learning_rate": 0.00018521248052762675, "loss": 11.6718, "step": 25150 }, { "epoch": 0.5264799464121243, "grad_norm": 0.3066459000110626, "learning_rate": 0.00018521133307000608, "loss": 11.6654, "step": 25151 }, { "epoch": 0.5265008791760865, "grad_norm": 0.2610339820384979, "learning_rate": 0.00018521018557142265, "loss": 11.6615, "step": 25152 }, { "epoch": 0.5265218119400485, "grad_norm": 0.3324381411075592, "learning_rate": 0.00018520903803187695, "loss": 11.6925, "step": 25153 }, { "epoch": 0.5265427447040107, "grad_norm": 0.3429379165172577, "learning_rate": 0.00018520789045136957, "loss": 11.6974, "step": 25154 }, { "epoch": 0.5265636774679728, "grad_norm": 0.3338073790073395, "learning_rate": 0.00018520674282990104, "loss": 11.6709, "step": 25155 }, { "epoch": 0.526584610231935, "grad_norm": 0.28008922934532166, "learning_rate": 0.00018520559516747195, "loss": 11.6698, "step": 25156 }, { "epoch": 0.5266055429958971, "grad_norm": 0.30479249358177185, "learning_rate": 0.00018520444746408278, "loss": 11.6585, "step": 25157 }, { "epoch": 0.5266264757598593, "grad_norm": 0.24204440414905548, "learning_rate": 0.00018520329971973416, "loss": 11.6813, "step": 25158 }, { "epoch": 0.5266474085238215, "grad_norm": 0.28271132707595825, "learning_rate": 0.0001852021519344266, "loss": 11.6578, "step": 25159 }, { "epoch": 0.5266683412877836, "grad_norm": 0.31479769945144653, "learning_rate": 0.00018520100410816063, "loss": 11.6702, "step": 25160 }, { "epoch": 0.5266892740517458, "grad_norm": 0.3027876317501068, "learning_rate": 0.00018519985624093686, "loss": 11.6719, "step": 25161 }, { "epoch": 0.5267102068157079, "grad_norm": 0.3299536108970642, "learning_rate": 0.0001851987083327558, "loss": 11.6858, "step": 25162 }, { "epoch": 0.5267311395796701, "grad_norm": 0.33195844292640686, "learning_rate": 0.00018519756038361802, "loss": 11.6538, "step": 25163 }, { "epoch": 0.5267520723436323, "grad_norm": 0.27997297048568726, "learning_rate": 0.00018519641239352405, "loss": 11.6868, "step": 25164 }, { "epoch": 0.5267730051075944, "grad_norm": 0.3701121211051941, "learning_rate": 0.00018519526436247446, "loss": 11.6722, "step": 25165 }, { "epoch": 0.5267939378715566, "grad_norm": 0.3138028383255005, "learning_rate": 0.00018519411629046984, "loss": 11.6553, "step": 25166 }, { "epoch": 0.5268148706355187, "grad_norm": 0.28907379508018494, "learning_rate": 0.00018519296817751064, "loss": 11.674, "step": 25167 }, { "epoch": 0.5268358033994809, "grad_norm": 0.3038310110569, "learning_rate": 0.0001851918200235975, "loss": 11.6772, "step": 25168 }, { "epoch": 0.526856736163443, "grad_norm": 0.3364998996257782, "learning_rate": 0.00018519067182873095, "loss": 11.693, "step": 25169 }, { "epoch": 0.5268776689274052, "grad_norm": 0.2878468930721283, "learning_rate": 0.00018518952359291153, "loss": 11.666, "step": 25170 }, { "epoch": 0.5268986016913674, "grad_norm": 0.2088334560394287, "learning_rate": 0.00018518837531613982, "loss": 11.6743, "step": 25171 }, { "epoch": 0.5269195344553295, "grad_norm": 0.26067668199539185, "learning_rate": 0.00018518722699841633, "loss": 11.6714, "step": 25172 }, { "epoch": 0.5269404672192917, "grad_norm": 0.35165536403656006, "learning_rate": 0.00018518607863974164, "loss": 11.6732, "step": 25173 }, { "epoch": 0.5269613999832538, "grad_norm": 0.3155931234359741, "learning_rate": 0.00018518493024011627, "loss": 11.6761, "step": 25174 }, { "epoch": 0.526982332747216, "grad_norm": 0.29251667857170105, "learning_rate": 0.0001851837817995408, "loss": 11.6653, "step": 25175 }, { "epoch": 0.527003265511178, "grad_norm": 0.26858213543891907, "learning_rate": 0.00018518263331801583, "loss": 11.6596, "step": 25176 }, { "epoch": 0.5270241982751402, "grad_norm": 0.32138508558273315, "learning_rate": 0.0001851814847955418, "loss": 11.6797, "step": 25177 }, { "epoch": 0.5270451310391024, "grad_norm": 0.3141149878501892, "learning_rate": 0.00018518033623211933, "loss": 11.6595, "step": 25178 }, { "epoch": 0.5270660638030645, "grad_norm": 0.2895668148994446, "learning_rate": 0.00018517918762774903, "loss": 11.6643, "step": 25179 }, { "epoch": 0.5270869965670267, "grad_norm": 0.2595476806163788, "learning_rate": 0.0001851780389824313, "loss": 11.672, "step": 25180 }, { "epoch": 0.5271079293309888, "grad_norm": 0.2823988199234009, "learning_rate": 0.00018517689029616684, "loss": 11.6688, "step": 25181 }, { "epoch": 0.527128862094951, "grad_norm": 0.3363237679004669, "learning_rate": 0.0001851757415689561, "loss": 11.6692, "step": 25182 }, { "epoch": 0.5271497948589132, "grad_norm": 0.22039289772510529, "learning_rate": 0.00018517459280079974, "loss": 11.6864, "step": 25183 }, { "epoch": 0.5271707276228753, "grad_norm": 0.26647698879241943, "learning_rate": 0.0001851734439916982, "loss": 11.6633, "step": 25184 }, { "epoch": 0.5271916603868375, "grad_norm": 0.24914437532424927, "learning_rate": 0.00018517229514165208, "loss": 11.6806, "step": 25185 }, { "epoch": 0.5272125931507996, "grad_norm": 0.3182057738304138, "learning_rate": 0.00018517114625066192, "loss": 11.6873, "step": 25186 }, { "epoch": 0.5272335259147618, "grad_norm": 0.3077962100505829, "learning_rate": 0.0001851699973187283, "loss": 11.6652, "step": 25187 }, { "epoch": 0.5272544586787239, "grad_norm": 0.2851429581642151, "learning_rate": 0.00018516884834585175, "loss": 11.6713, "step": 25188 }, { "epoch": 0.5272753914426861, "grad_norm": 0.25777024030685425, "learning_rate": 0.00018516769933203285, "loss": 11.6669, "step": 25189 }, { "epoch": 0.5272963242066483, "grad_norm": 0.28615498542785645, "learning_rate": 0.0001851665502772721, "loss": 11.6935, "step": 25190 }, { "epoch": 0.5273172569706104, "grad_norm": 0.2940327525138855, "learning_rate": 0.00018516540118157008, "loss": 11.6582, "step": 25191 }, { "epoch": 0.5273381897345726, "grad_norm": 0.32088378071784973, "learning_rate": 0.00018516425204492736, "loss": 11.668, "step": 25192 }, { "epoch": 0.5273591224985347, "grad_norm": 0.3031323552131653, "learning_rate": 0.0001851631028673445, "loss": 11.691, "step": 25193 }, { "epoch": 0.5273800552624969, "grad_norm": 0.307546466588974, "learning_rate": 0.00018516195364882198, "loss": 11.6574, "step": 25194 }, { "epoch": 0.527400988026459, "grad_norm": 0.2628062069416046, "learning_rate": 0.00018516080438936043, "loss": 11.6715, "step": 25195 }, { "epoch": 0.5274219207904212, "grad_norm": 0.3639072775840759, "learning_rate": 0.00018515965508896035, "loss": 11.6802, "step": 25196 }, { "epoch": 0.5274428535543834, "grad_norm": 0.2986586093902588, "learning_rate": 0.00018515850574762235, "loss": 11.6768, "step": 25197 }, { "epoch": 0.5274637863183455, "grad_norm": 0.2649889588356018, "learning_rate": 0.00018515735636534696, "loss": 11.6687, "step": 25198 }, { "epoch": 0.5274847190823077, "grad_norm": 0.32841986417770386, "learning_rate": 0.0001851562069421347, "loss": 11.6783, "step": 25199 }, { "epoch": 0.5275056518462697, "grad_norm": 0.23211769759655, "learning_rate": 0.00018515505747798613, "loss": 11.6718, "step": 25200 }, { "epoch": 0.5275265846102319, "grad_norm": 0.3103688657283783, "learning_rate": 0.00018515390797290183, "loss": 11.6724, "step": 25201 }, { "epoch": 0.5275475173741941, "grad_norm": 0.3315921425819397, "learning_rate": 0.00018515275842688236, "loss": 11.6814, "step": 25202 }, { "epoch": 0.5275684501381562, "grad_norm": 0.2931179106235504, "learning_rate": 0.00018515160883992822, "loss": 11.6608, "step": 25203 }, { "epoch": 0.5275893829021184, "grad_norm": 0.30227065086364746, "learning_rate": 0.00018515045921204, "loss": 11.673, "step": 25204 }, { "epoch": 0.5276103156660805, "grad_norm": 0.30619677901268005, "learning_rate": 0.00018514930954321827, "loss": 11.6507, "step": 25205 }, { "epoch": 0.5276312484300427, "grad_norm": 0.32094502449035645, "learning_rate": 0.00018514815983346355, "loss": 11.6715, "step": 25206 }, { "epoch": 0.5276521811940048, "grad_norm": 0.29651254415512085, "learning_rate": 0.0001851470100827764, "loss": 11.6717, "step": 25207 }, { "epoch": 0.527673113957967, "grad_norm": 0.2391565442085266, "learning_rate": 0.00018514586029115738, "loss": 11.6727, "step": 25208 }, { "epoch": 0.5276940467219292, "grad_norm": 0.3398352861404419, "learning_rate": 0.00018514471045860705, "loss": 11.6714, "step": 25209 }, { "epoch": 0.5277149794858913, "grad_norm": 0.3472246527671814, "learning_rate": 0.00018514356058512593, "loss": 11.6639, "step": 25210 }, { "epoch": 0.5277359122498535, "grad_norm": 0.27420899271965027, "learning_rate": 0.0001851424106707146, "loss": 11.6781, "step": 25211 }, { "epoch": 0.5277568450138156, "grad_norm": 0.325289249420166, "learning_rate": 0.0001851412607153736, "loss": 11.6517, "step": 25212 }, { "epoch": 0.5277777777777778, "grad_norm": 0.2293168157339096, "learning_rate": 0.0001851401107191035, "loss": 11.6715, "step": 25213 }, { "epoch": 0.5277987105417399, "grad_norm": 0.3202555775642395, "learning_rate": 0.00018513896068190485, "loss": 11.6764, "step": 25214 }, { "epoch": 0.5278196433057021, "grad_norm": 0.32017701864242554, "learning_rate": 0.00018513781060377818, "loss": 11.6579, "step": 25215 }, { "epoch": 0.5278405760696643, "grad_norm": 0.32156670093536377, "learning_rate": 0.00018513666048472407, "loss": 11.6967, "step": 25216 }, { "epoch": 0.5278615088336264, "grad_norm": 0.2822495102882385, "learning_rate": 0.00018513551032474306, "loss": 11.6779, "step": 25217 }, { "epoch": 0.5278824415975886, "grad_norm": 0.46423083543777466, "learning_rate": 0.0001851343601238357, "loss": 11.6809, "step": 25218 }, { "epoch": 0.5279033743615507, "grad_norm": 0.25166046619415283, "learning_rate": 0.00018513320988200254, "loss": 11.661, "step": 25219 }, { "epoch": 0.5279243071255129, "grad_norm": 0.2752382159233093, "learning_rate": 0.00018513205959924415, "loss": 11.6908, "step": 25220 }, { "epoch": 0.5279452398894751, "grad_norm": 0.2729727625846863, "learning_rate": 0.00018513090927556105, "loss": 11.659, "step": 25221 }, { "epoch": 0.5279661726534371, "grad_norm": 0.35065534710884094, "learning_rate": 0.00018512975891095385, "loss": 11.6609, "step": 25222 }, { "epoch": 0.5279871054173993, "grad_norm": 0.31270694732666016, "learning_rate": 0.00018512860850542303, "loss": 11.6857, "step": 25223 }, { "epoch": 0.5280080381813614, "grad_norm": 0.26617786288261414, "learning_rate": 0.00018512745805896922, "loss": 11.6847, "step": 25224 }, { "epoch": 0.5280289709453236, "grad_norm": 0.3152164816856384, "learning_rate": 0.00018512630757159295, "loss": 11.6579, "step": 25225 }, { "epoch": 0.5280499037092857, "grad_norm": 0.37751033902168274, "learning_rate": 0.00018512515704329472, "loss": 11.6847, "step": 25226 }, { "epoch": 0.5280708364732479, "grad_norm": 0.33108821511268616, "learning_rate": 0.00018512400647407513, "loss": 11.668, "step": 25227 }, { "epoch": 0.5280917692372101, "grad_norm": 0.3346864581108093, "learning_rate": 0.00018512285586393473, "loss": 11.6795, "step": 25228 }, { "epoch": 0.5281127020011722, "grad_norm": 0.31989941000938416, "learning_rate": 0.00018512170521287405, "loss": 11.6682, "step": 25229 }, { "epoch": 0.5281336347651344, "grad_norm": 0.3276727497577667, "learning_rate": 0.00018512055452089368, "loss": 11.6686, "step": 25230 }, { "epoch": 0.5281545675290965, "grad_norm": 0.27600398659706116, "learning_rate": 0.00018511940378799414, "loss": 11.6759, "step": 25231 }, { "epoch": 0.5281755002930587, "grad_norm": 0.31641390919685364, "learning_rate": 0.000185118253014176, "loss": 11.6704, "step": 25232 }, { "epoch": 0.5281964330570208, "grad_norm": 0.29215896129608154, "learning_rate": 0.0001851171021994398, "loss": 11.6715, "step": 25233 }, { "epoch": 0.528217365820983, "grad_norm": 0.3196645975112915, "learning_rate": 0.00018511595134378613, "loss": 11.6775, "step": 25234 }, { "epoch": 0.5282382985849452, "grad_norm": 0.3136625587940216, "learning_rate": 0.00018511480044721548, "loss": 11.6792, "step": 25235 }, { "epoch": 0.5282592313489073, "grad_norm": 0.24116769433021545, "learning_rate": 0.00018511364950972849, "loss": 11.6905, "step": 25236 }, { "epoch": 0.5282801641128695, "grad_norm": 0.26362529397010803, "learning_rate": 0.00018511249853132565, "loss": 11.6756, "step": 25237 }, { "epoch": 0.5283010968768316, "grad_norm": 0.30401626229286194, "learning_rate": 0.0001851113475120075, "loss": 11.6629, "step": 25238 }, { "epoch": 0.5283220296407938, "grad_norm": 0.2952011227607727, "learning_rate": 0.0001851101964517746, "loss": 11.6832, "step": 25239 }, { "epoch": 0.528342962404756, "grad_norm": 0.3270290195941925, "learning_rate": 0.00018510904535062758, "loss": 11.6796, "step": 25240 }, { "epoch": 0.5283638951687181, "grad_norm": 0.2763961851596832, "learning_rate": 0.0001851078942085669, "loss": 11.6597, "step": 25241 }, { "epoch": 0.5283848279326803, "grad_norm": 0.272300660610199, "learning_rate": 0.00018510674302559316, "loss": 11.676, "step": 25242 }, { "epoch": 0.5284057606966424, "grad_norm": 0.3583611845970154, "learning_rate": 0.0001851055918017069, "loss": 11.6917, "step": 25243 }, { "epoch": 0.5284266934606046, "grad_norm": 0.32618239521980286, "learning_rate": 0.00018510444053690866, "loss": 11.6687, "step": 25244 }, { "epoch": 0.5284476262245666, "grad_norm": 0.3202078342437744, "learning_rate": 0.00018510328923119907, "loss": 11.6688, "step": 25245 }, { "epoch": 0.5284685589885288, "grad_norm": 0.26399099826812744, "learning_rate": 0.00018510213788457857, "loss": 11.6507, "step": 25246 }, { "epoch": 0.528489491752491, "grad_norm": 0.3025050461292267, "learning_rate": 0.00018510098649704776, "loss": 11.6633, "step": 25247 }, { "epoch": 0.5285104245164531, "grad_norm": 0.2810087203979492, "learning_rate": 0.00018509983506860724, "loss": 11.6736, "step": 25248 }, { "epoch": 0.5285313572804153, "grad_norm": 0.22679661214351654, "learning_rate": 0.00018509868359925752, "loss": 11.6761, "step": 25249 }, { "epoch": 0.5285522900443774, "grad_norm": 0.3482584059238434, "learning_rate": 0.00018509753208899912, "loss": 11.6723, "step": 25250 }, { "epoch": 0.5285732228083396, "grad_norm": 0.34080538153648376, "learning_rate": 0.00018509638053783265, "loss": 11.6661, "step": 25251 }, { "epoch": 0.5285941555723017, "grad_norm": 0.32433149218559265, "learning_rate": 0.00018509522894575865, "loss": 11.6878, "step": 25252 }, { "epoch": 0.5286150883362639, "grad_norm": 0.2855614423751831, "learning_rate": 0.00018509407731277767, "loss": 11.6629, "step": 25253 }, { "epoch": 0.5286360211002261, "grad_norm": 0.3729962706565857, "learning_rate": 0.00018509292563889025, "loss": 11.6533, "step": 25254 }, { "epoch": 0.5286569538641882, "grad_norm": 0.2578258216381073, "learning_rate": 0.00018509177392409695, "loss": 11.6642, "step": 25255 }, { "epoch": 0.5286778866281504, "grad_norm": 0.30024030804634094, "learning_rate": 0.00018509062216839835, "loss": 11.6648, "step": 25256 }, { "epoch": 0.5286988193921125, "grad_norm": 0.2702416181564331, "learning_rate": 0.00018508947037179497, "loss": 11.6679, "step": 25257 }, { "epoch": 0.5287197521560747, "grad_norm": 0.30399468541145325, "learning_rate": 0.00018508831853428735, "loss": 11.673, "step": 25258 }, { "epoch": 0.5287406849200369, "grad_norm": 0.2702106833457947, "learning_rate": 0.00018508716665587612, "loss": 11.6617, "step": 25259 }, { "epoch": 0.528761617683999, "grad_norm": 0.28458839654922485, "learning_rate": 0.00018508601473656177, "loss": 11.6741, "step": 25260 }, { "epoch": 0.5287825504479612, "grad_norm": 0.27534598112106323, "learning_rate": 0.00018508486277634488, "loss": 11.6782, "step": 25261 }, { "epoch": 0.5288034832119233, "grad_norm": 0.312054306268692, "learning_rate": 0.00018508371077522595, "loss": 11.6821, "step": 25262 }, { "epoch": 0.5288244159758855, "grad_norm": 0.2834876477718353, "learning_rate": 0.0001850825587332056, "loss": 11.6684, "step": 25263 }, { "epoch": 0.5288453487398476, "grad_norm": 0.35468554496765137, "learning_rate": 0.00018508140665028435, "loss": 11.6712, "step": 25264 }, { "epoch": 0.5288662815038098, "grad_norm": 0.2845246195793152, "learning_rate": 0.00018508025452646277, "loss": 11.6921, "step": 25265 }, { "epoch": 0.528887214267772, "grad_norm": 0.2865241765975952, "learning_rate": 0.00018507910236174142, "loss": 11.6588, "step": 25266 }, { "epoch": 0.528908147031734, "grad_norm": 0.24927015602588654, "learning_rate": 0.00018507795015612085, "loss": 11.6778, "step": 25267 }, { "epoch": 0.5289290797956963, "grad_norm": 0.3971802890300751, "learning_rate": 0.00018507679790960154, "loss": 11.7013, "step": 25268 }, { "epoch": 0.5289500125596583, "grad_norm": 0.31859028339385986, "learning_rate": 0.00018507564562218415, "loss": 11.6672, "step": 25269 }, { "epoch": 0.5289709453236205, "grad_norm": 0.3352377116680145, "learning_rate": 0.00018507449329386918, "loss": 11.6616, "step": 25270 }, { "epoch": 0.5289918780875826, "grad_norm": 0.3076331615447998, "learning_rate": 0.00018507334092465722, "loss": 11.6827, "step": 25271 }, { "epoch": 0.5290128108515448, "grad_norm": 0.2630150616168976, "learning_rate": 0.00018507218851454883, "loss": 11.6916, "step": 25272 }, { "epoch": 0.529033743615507, "grad_norm": 0.2543516457080841, "learning_rate": 0.00018507103606354446, "loss": 11.6626, "step": 25273 }, { "epoch": 0.5290546763794691, "grad_norm": 0.26900026202201843, "learning_rate": 0.00018506988357164478, "loss": 11.6605, "step": 25274 }, { "epoch": 0.5290756091434313, "grad_norm": 0.27525684237480164, "learning_rate": 0.00018506873103885026, "loss": 11.6589, "step": 25275 }, { "epoch": 0.5290965419073934, "grad_norm": 0.334718257188797, "learning_rate": 0.00018506757846516156, "loss": 11.6685, "step": 25276 }, { "epoch": 0.5291174746713556, "grad_norm": 0.24947407841682434, "learning_rate": 0.00018506642585057912, "loss": 11.6754, "step": 25277 }, { "epoch": 0.5291384074353178, "grad_norm": 0.2420148253440857, "learning_rate": 0.00018506527319510357, "loss": 11.6758, "step": 25278 }, { "epoch": 0.5291593401992799, "grad_norm": 0.30778053402900696, "learning_rate": 0.00018506412049873542, "loss": 11.6923, "step": 25279 }, { "epoch": 0.5291802729632421, "grad_norm": 0.24010828137397766, "learning_rate": 0.00018506296776147526, "loss": 11.6808, "step": 25280 }, { "epoch": 0.5292012057272042, "grad_norm": 0.37304016947746277, "learning_rate": 0.00018506181498332362, "loss": 11.669, "step": 25281 }, { "epoch": 0.5292221384911664, "grad_norm": 0.29043591022491455, "learning_rate": 0.00018506066216428106, "loss": 11.655, "step": 25282 }, { "epoch": 0.5292430712551285, "grad_norm": 0.3016786575317383, "learning_rate": 0.00018505950930434816, "loss": 11.6556, "step": 25283 }, { "epoch": 0.5292640040190907, "grad_norm": 0.25416678190231323, "learning_rate": 0.0001850583564035254, "loss": 11.651, "step": 25284 }, { "epoch": 0.5292849367830529, "grad_norm": 0.2484370768070221, "learning_rate": 0.00018505720346181342, "loss": 11.6598, "step": 25285 }, { "epoch": 0.529305869547015, "grad_norm": 0.30727484822273254, "learning_rate": 0.00018505605047921273, "loss": 11.6769, "step": 25286 }, { "epoch": 0.5293268023109772, "grad_norm": 0.26425299048423767, "learning_rate": 0.0001850548974557239, "loss": 11.6819, "step": 25287 }, { "epoch": 0.5293477350749393, "grad_norm": 0.28950035572052, "learning_rate": 0.00018505374439134746, "loss": 11.6649, "step": 25288 }, { "epoch": 0.5293686678389015, "grad_norm": 0.29242607951164246, "learning_rate": 0.00018505259128608395, "loss": 11.6705, "step": 25289 }, { "epoch": 0.5293896006028636, "grad_norm": 0.2864341139793396, "learning_rate": 0.00018505143813993402, "loss": 11.669, "step": 25290 }, { "epoch": 0.5294105333668258, "grad_norm": 0.25266793370246887, "learning_rate": 0.0001850502849528981, "loss": 11.6693, "step": 25291 }, { "epoch": 0.529431466130788, "grad_norm": 0.28153562545776367, "learning_rate": 0.00018504913172497686, "loss": 11.6749, "step": 25292 }, { "epoch": 0.52945239889475, "grad_norm": 0.37702876329421997, "learning_rate": 0.00018504797845617077, "loss": 11.6707, "step": 25293 }, { "epoch": 0.5294733316587122, "grad_norm": 0.259319931268692, "learning_rate": 0.00018504682514648043, "loss": 11.6824, "step": 25294 }, { "epoch": 0.5294942644226743, "grad_norm": 0.27435189485549927, "learning_rate": 0.00018504567179590634, "loss": 11.6963, "step": 25295 }, { "epoch": 0.5295151971866365, "grad_norm": 0.2788035273551941, "learning_rate": 0.00018504451840444913, "loss": 11.6543, "step": 25296 }, { "epoch": 0.5295361299505986, "grad_norm": 0.27931392192840576, "learning_rate": 0.00018504336497210928, "loss": 11.6854, "step": 25297 }, { "epoch": 0.5295570627145608, "grad_norm": 0.3005811274051666, "learning_rate": 0.0001850422114988874, "loss": 11.6839, "step": 25298 }, { "epoch": 0.529577995478523, "grad_norm": 0.2717345058917999, "learning_rate": 0.000185041057984784, "loss": 11.6585, "step": 25299 }, { "epoch": 0.5295989282424851, "grad_norm": 0.27872809767723083, "learning_rate": 0.0001850399044297997, "loss": 11.6613, "step": 25300 }, { "epoch": 0.5296198610064473, "grad_norm": 0.37154263257980347, "learning_rate": 0.00018503875083393498, "loss": 11.6586, "step": 25301 }, { "epoch": 0.5296407937704094, "grad_norm": 0.2976931035518646, "learning_rate": 0.00018503759719719045, "loss": 11.6674, "step": 25302 }, { "epoch": 0.5296617265343716, "grad_norm": 0.3195124566555023, "learning_rate": 0.00018503644351956666, "loss": 11.6742, "step": 25303 }, { "epoch": 0.5296826592983338, "grad_norm": 0.33875322341918945, "learning_rate": 0.0001850352898010641, "loss": 11.6776, "step": 25304 }, { "epoch": 0.5297035920622959, "grad_norm": 0.25749948620796204, "learning_rate": 0.00018503413604168342, "loss": 11.666, "step": 25305 }, { "epoch": 0.5297245248262581, "grad_norm": 0.30552271008491516, "learning_rate": 0.0001850329822414251, "loss": 11.6654, "step": 25306 }, { "epoch": 0.5297454575902202, "grad_norm": 0.3013252913951874, "learning_rate": 0.0001850318284002897, "loss": 11.6767, "step": 25307 }, { "epoch": 0.5297663903541824, "grad_norm": 0.35189759731292725, "learning_rate": 0.00018503067451827786, "loss": 11.6583, "step": 25308 }, { "epoch": 0.5297873231181445, "grad_norm": 0.41069477796554565, "learning_rate": 0.00018502952059539, "loss": 11.6891, "step": 25309 }, { "epoch": 0.5298082558821067, "grad_norm": 0.35879722237586975, "learning_rate": 0.00018502836663162678, "loss": 11.6768, "step": 25310 }, { "epoch": 0.5298291886460689, "grad_norm": 0.2894008755683899, "learning_rate": 0.00018502721262698873, "loss": 11.6684, "step": 25311 }, { "epoch": 0.529850121410031, "grad_norm": 0.2882014811038971, "learning_rate": 0.00018502605858147637, "loss": 11.6807, "step": 25312 }, { "epoch": 0.5298710541739932, "grad_norm": 0.3100396692752838, "learning_rate": 0.00018502490449509029, "loss": 11.6689, "step": 25313 }, { "epoch": 0.5298919869379553, "grad_norm": 0.33533474802970886, "learning_rate": 0.00018502375036783103, "loss": 11.6455, "step": 25314 }, { "epoch": 0.5299129197019175, "grad_norm": 0.23155954480171204, "learning_rate": 0.00018502259619969914, "loss": 11.6644, "step": 25315 }, { "epoch": 0.5299338524658795, "grad_norm": 0.28053781390190125, "learning_rate": 0.0001850214419906952, "loss": 11.6713, "step": 25316 }, { "epoch": 0.5299547852298417, "grad_norm": 0.2734779417514801, "learning_rate": 0.00018502028774081976, "loss": 11.6899, "step": 25317 }, { "epoch": 0.5299757179938039, "grad_norm": 0.2828959822654724, "learning_rate": 0.00018501913345007336, "loss": 11.6598, "step": 25318 }, { "epoch": 0.529996650757766, "grad_norm": 0.27779170870780945, "learning_rate": 0.00018501797911845655, "loss": 11.6591, "step": 25319 }, { "epoch": 0.5300175835217282, "grad_norm": 0.31847110390663147, "learning_rate": 0.0001850168247459699, "loss": 11.6663, "step": 25320 }, { "epoch": 0.5300385162856903, "grad_norm": 0.3209281861782074, "learning_rate": 0.00018501567033261392, "loss": 11.6624, "step": 25321 }, { "epoch": 0.5300594490496525, "grad_norm": 0.22683101892471313, "learning_rate": 0.00018501451587838927, "loss": 11.6596, "step": 25322 }, { "epoch": 0.5300803818136147, "grad_norm": 0.2852950692176819, "learning_rate": 0.0001850133613832964, "loss": 11.6753, "step": 25323 }, { "epoch": 0.5301013145775768, "grad_norm": 0.2812007665634155, "learning_rate": 0.0001850122068473359, "loss": 11.684, "step": 25324 }, { "epoch": 0.530122247341539, "grad_norm": 0.29492878913879395, "learning_rate": 0.00018501105227050835, "loss": 11.67, "step": 25325 }, { "epoch": 0.5301431801055011, "grad_norm": 0.3129745423793793, "learning_rate": 0.00018500989765281425, "loss": 11.6587, "step": 25326 }, { "epoch": 0.5301641128694633, "grad_norm": 0.2979305684566498, "learning_rate": 0.00018500874299425424, "loss": 11.6821, "step": 25327 }, { "epoch": 0.5301850456334254, "grad_norm": 0.28063228726387024, "learning_rate": 0.00018500758829482879, "loss": 11.6713, "step": 25328 }, { "epoch": 0.5302059783973876, "grad_norm": 0.3328954875469208, "learning_rate": 0.00018500643355453847, "loss": 11.6829, "step": 25329 }, { "epoch": 0.5302269111613498, "grad_norm": 0.2941615879535675, "learning_rate": 0.00018500527877338387, "loss": 11.6704, "step": 25330 }, { "epoch": 0.5302478439253119, "grad_norm": 0.31288427114486694, "learning_rate": 0.00018500412395136554, "loss": 11.675, "step": 25331 }, { "epoch": 0.5302687766892741, "grad_norm": 0.3068457841873169, "learning_rate": 0.00018500296908848402, "loss": 11.6561, "step": 25332 }, { "epoch": 0.5302897094532362, "grad_norm": 0.29672330617904663, "learning_rate": 0.00018500181418473988, "loss": 11.6547, "step": 25333 }, { "epoch": 0.5303106422171984, "grad_norm": 0.2716021239757538, "learning_rate": 0.00018500065924013364, "loss": 11.664, "step": 25334 }, { "epoch": 0.5303315749811605, "grad_norm": 0.3127221465110779, "learning_rate": 0.0001849995042546659, "loss": 11.6893, "step": 25335 }, { "epoch": 0.5303525077451227, "grad_norm": 0.2714041769504547, "learning_rate": 0.0001849983492283372, "loss": 11.6634, "step": 25336 }, { "epoch": 0.5303734405090849, "grad_norm": 0.2506059408187866, "learning_rate": 0.00018499719416114805, "loss": 11.6817, "step": 25337 }, { "epoch": 0.530394373273047, "grad_norm": 0.2870250642299652, "learning_rate": 0.00018499603905309907, "loss": 11.6751, "step": 25338 }, { "epoch": 0.5304153060370091, "grad_norm": 0.25068506598472595, "learning_rate": 0.0001849948839041908, "loss": 11.6639, "step": 25339 }, { "epoch": 0.5304362388009712, "grad_norm": 0.27854153513908386, "learning_rate": 0.0001849937287144238, "loss": 11.6671, "step": 25340 }, { "epoch": 0.5304571715649334, "grad_norm": 0.31661564111709595, "learning_rate": 0.00018499257348379854, "loss": 11.6814, "step": 25341 }, { "epoch": 0.5304781043288956, "grad_norm": 0.3301750123500824, "learning_rate": 0.0001849914182123157, "loss": 11.6816, "step": 25342 }, { "epoch": 0.5304990370928577, "grad_norm": 0.27353647351264954, "learning_rate": 0.0001849902628999758, "loss": 11.648, "step": 25343 }, { "epoch": 0.5305199698568199, "grad_norm": 0.3817189633846283, "learning_rate": 0.00018498910754677936, "loss": 11.66, "step": 25344 }, { "epoch": 0.530540902620782, "grad_norm": 0.269089937210083, "learning_rate": 0.00018498795215272695, "loss": 11.6676, "step": 25345 }, { "epoch": 0.5305618353847442, "grad_norm": 0.30610567331314087, "learning_rate": 0.0001849867967178191, "loss": 11.6758, "step": 25346 }, { "epoch": 0.5305827681487063, "grad_norm": 0.2688305675983429, "learning_rate": 0.00018498564124205645, "loss": 11.6699, "step": 25347 }, { "epoch": 0.5306037009126685, "grad_norm": 0.30846068263053894, "learning_rate": 0.00018498448572543946, "loss": 11.6702, "step": 25348 }, { "epoch": 0.5306246336766307, "grad_norm": 0.3737180233001709, "learning_rate": 0.00018498333016796873, "loss": 11.6902, "step": 25349 }, { "epoch": 0.5306455664405928, "grad_norm": 0.189498633146286, "learning_rate": 0.00018498217456964483, "loss": 11.6647, "step": 25350 }, { "epoch": 0.530666499204555, "grad_norm": 0.31673502922058105, "learning_rate": 0.00018498101893046827, "loss": 11.6719, "step": 25351 }, { "epoch": 0.5306874319685171, "grad_norm": 0.29758042097091675, "learning_rate": 0.00018497986325043963, "loss": 11.6636, "step": 25352 }, { "epoch": 0.5307083647324793, "grad_norm": 0.3180878162384033, "learning_rate": 0.0001849787075295595, "loss": 11.6692, "step": 25353 }, { "epoch": 0.5307292974964414, "grad_norm": 0.2653726637363434, "learning_rate": 0.00018497755176782835, "loss": 11.6637, "step": 25354 }, { "epoch": 0.5307502302604036, "grad_norm": 0.29673099517822266, "learning_rate": 0.00018497639596524686, "loss": 11.6615, "step": 25355 }, { "epoch": 0.5307711630243658, "grad_norm": 0.27860456705093384, "learning_rate": 0.00018497524012181545, "loss": 11.6625, "step": 25356 }, { "epoch": 0.5307920957883279, "grad_norm": 0.31458306312561035, "learning_rate": 0.00018497408423753475, "loss": 11.664, "step": 25357 }, { "epoch": 0.5308130285522901, "grad_norm": 0.27285289764404297, "learning_rate": 0.00018497292831240534, "loss": 11.6751, "step": 25358 }, { "epoch": 0.5308339613162522, "grad_norm": 0.26154986023902893, "learning_rate": 0.0001849717723464277, "loss": 11.6974, "step": 25359 }, { "epoch": 0.5308548940802144, "grad_norm": 0.3064851760864258, "learning_rate": 0.00018497061633960247, "loss": 11.664, "step": 25360 }, { "epoch": 0.5308758268441766, "grad_norm": 0.24241113662719727, "learning_rate": 0.0001849694602919301, "loss": 11.6719, "step": 25361 }, { "epoch": 0.5308967596081386, "grad_norm": 0.31050774455070496, "learning_rate": 0.00018496830420341125, "loss": 11.6794, "step": 25362 }, { "epoch": 0.5309176923721008, "grad_norm": 0.2717771530151367, "learning_rate": 0.00018496714807404643, "loss": 11.6759, "step": 25363 }, { "epoch": 0.5309386251360629, "grad_norm": 0.2657880187034607, "learning_rate": 0.00018496599190383618, "loss": 11.6626, "step": 25364 }, { "epoch": 0.5309595579000251, "grad_norm": 0.2890256643295288, "learning_rate": 0.00018496483569278112, "loss": 11.6733, "step": 25365 }, { "epoch": 0.5309804906639872, "grad_norm": 0.3781026005744934, "learning_rate": 0.00018496367944088174, "loss": 11.6762, "step": 25366 }, { "epoch": 0.5310014234279494, "grad_norm": 0.2742856740951538, "learning_rate": 0.00018496252314813857, "loss": 11.673, "step": 25367 }, { "epoch": 0.5310223561919116, "grad_norm": 0.33463725447654724, "learning_rate": 0.00018496136681455226, "loss": 11.6788, "step": 25368 }, { "epoch": 0.5310432889558737, "grad_norm": 0.266650527715683, "learning_rate": 0.0001849602104401233, "loss": 11.6536, "step": 25369 }, { "epoch": 0.5310642217198359, "grad_norm": 0.23480211198329926, "learning_rate": 0.00018495905402485228, "loss": 11.6723, "step": 25370 }, { "epoch": 0.531085154483798, "grad_norm": 0.3143581748008728, "learning_rate": 0.00018495789756873974, "loss": 11.6828, "step": 25371 }, { "epoch": 0.5311060872477602, "grad_norm": 0.28094783425331116, "learning_rate": 0.00018495674107178622, "loss": 11.6718, "step": 25372 }, { "epoch": 0.5311270200117223, "grad_norm": 0.2691795527935028, "learning_rate": 0.0001849555845339923, "loss": 11.6585, "step": 25373 }, { "epoch": 0.5311479527756845, "grad_norm": 0.33609697222709656, "learning_rate": 0.00018495442795535852, "loss": 11.6755, "step": 25374 }, { "epoch": 0.5311688855396467, "grad_norm": 0.29596978425979614, "learning_rate": 0.00018495327133588547, "loss": 11.6757, "step": 25375 }, { "epoch": 0.5311898183036088, "grad_norm": 0.3203963339328766, "learning_rate": 0.00018495211467557364, "loss": 11.6822, "step": 25376 }, { "epoch": 0.531210751067571, "grad_norm": 0.2786114811897278, "learning_rate": 0.00018495095797442366, "loss": 11.6757, "step": 25377 }, { "epoch": 0.5312316838315331, "grad_norm": 0.2445453703403473, "learning_rate": 0.00018494980123243604, "loss": 11.6708, "step": 25378 }, { "epoch": 0.5312526165954953, "grad_norm": 0.342926025390625, "learning_rate": 0.00018494864444961134, "loss": 11.6636, "step": 25379 }, { "epoch": 0.5312735493594575, "grad_norm": 0.26493507623672485, "learning_rate": 0.00018494748762595012, "loss": 11.6677, "step": 25380 }, { "epoch": 0.5312944821234196, "grad_norm": 0.28487253189086914, "learning_rate": 0.00018494633076145296, "loss": 11.6717, "step": 25381 }, { "epoch": 0.5313154148873818, "grad_norm": 0.2916209399700165, "learning_rate": 0.0001849451738561204, "loss": 11.6759, "step": 25382 }, { "epoch": 0.5313363476513439, "grad_norm": 0.3024384677410126, "learning_rate": 0.00018494401690995297, "loss": 11.672, "step": 25383 }, { "epoch": 0.531357280415306, "grad_norm": 0.3100051283836365, "learning_rate": 0.00018494285992295127, "loss": 11.6356, "step": 25384 }, { "epoch": 0.5313782131792681, "grad_norm": 0.35109037160873413, "learning_rate": 0.00018494170289511583, "loss": 11.6517, "step": 25385 }, { "epoch": 0.5313991459432303, "grad_norm": 0.33883121609687805, "learning_rate": 0.0001849405458264472, "loss": 11.6774, "step": 25386 }, { "epoch": 0.5314200787071925, "grad_norm": 0.2937224209308624, "learning_rate": 0.00018493938871694593, "loss": 11.6628, "step": 25387 }, { "epoch": 0.5314410114711546, "grad_norm": 0.22336480021476746, "learning_rate": 0.00018493823156661264, "loss": 11.6714, "step": 25388 }, { "epoch": 0.5314619442351168, "grad_norm": 0.2565644681453705, "learning_rate": 0.0001849370743754478, "loss": 11.6637, "step": 25389 }, { "epoch": 0.5314828769990789, "grad_norm": 0.35847344994544983, "learning_rate": 0.00018493591714345204, "loss": 11.657, "step": 25390 }, { "epoch": 0.5315038097630411, "grad_norm": 0.262784868478775, "learning_rate": 0.00018493475987062588, "loss": 11.6628, "step": 25391 }, { "epoch": 0.5315247425270032, "grad_norm": 0.3296913504600525, "learning_rate": 0.00018493360255696984, "loss": 11.6729, "step": 25392 }, { "epoch": 0.5315456752909654, "grad_norm": 0.2486155927181244, "learning_rate": 0.00018493244520248454, "loss": 11.6686, "step": 25393 }, { "epoch": 0.5315666080549276, "grad_norm": 0.2741219997406006, "learning_rate": 0.00018493128780717053, "loss": 11.6726, "step": 25394 }, { "epoch": 0.5315875408188897, "grad_norm": 0.2557632327079773, "learning_rate": 0.00018493013037102832, "loss": 11.6663, "step": 25395 }, { "epoch": 0.5316084735828519, "grad_norm": 0.35039255023002625, "learning_rate": 0.0001849289728940585, "loss": 11.6667, "step": 25396 }, { "epoch": 0.531629406346814, "grad_norm": 0.25422653555870056, "learning_rate": 0.00018492781537626164, "loss": 11.6812, "step": 25397 }, { "epoch": 0.5316503391107762, "grad_norm": 0.3324717581272125, "learning_rate": 0.00018492665781763825, "loss": 11.6611, "step": 25398 }, { "epoch": 0.5316712718747384, "grad_norm": 0.3367762267589569, "learning_rate": 0.00018492550021818893, "loss": 11.6624, "step": 25399 }, { "epoch": 0.5316922046387005, "grad_norm": 0.30440008640289307, "learning_rate": 0.00018492434257791422, "loss": 11.672, "step": 25400 }, { "epoch": 0.5317131374026627, "grad_norm": 0.3435264229774475, "learning_rate": 0.00018492318489681466, "loss": 11.643, "step": 25401 }, { "epoch": 0.5317340701666248, "grad_norm": 0.3384091258049011, "learning_rate": 0.00018492202717489085, "loss": 11.6793, "step": 25402 }, { "epoch": 0.531755002930587, "grad_norm": 0.29384082555770874, "learning_rate": 0.00018492086941214328, "loss": 11.6691, "step": 25403 }, { "epoch": 0.5317759356945491, "grad_norm": 0.3195677399635315, "learning_rate": 0.00018491971160857257, "loss": 11.6813, "step": 25404 }, { "epoch": 0.5317968684585113, "grad_norm": 0.24232542514801025, "learning_rate": 0.00018491855376417927, "loss": 11.658, "step": 25405 }, { "epoch": 0.5318178012224735, "grad_norm": 0.33900412917137146, "learning_rate": 0.0001849173958789639, "loss": 11.6656, "step": 25406 }, { "epoch": 0.5318387339864356, "grad_norm": 0.2930201292037964, "learning_rate": 0.00018491623795292706, "loss": 11.6632, "step": 25407 }, { "epoch": 0.5318596667503978, "grad_norm": 0.2816670536994934, "learning_rate": 0.00018491507998606924, "loss": 11.6609, "step": 25408 }, { "epoch": 0.5318805995143598, "grad_norm": 0.28055471181869507, "learning_rate": 0.00018491392197839108, "loss": 11.6632, "step": 25409 }, { "epoch": 0.531901532278322, "grad_norm": 0.24264898896217346, "learning_rate": 0.00018491276392989305, "loss": 11.6622, "step": 25410 }, { "epoch": 0.5319224650422841, "grad_norm": 0.33112984895706177, "learning_rate": 0.0001849116058405758, "loss": 11.6686, "step": 25411 }, { "epoch": 0.5319433978062463, "grad_norm": 0.2994925379753113, "learning_rate": 0.0001849104477104398, "loss": 11.654, "step": 25412 }, { "epoch": 0.5319643305702085, "grad_norm": 0.3094179630279541, "learning_rate": 0.00018490928953948568, "loss": 11.6721, "step": 25413 }, { "epoch": 0.5319852633341706, "grad_norm": 0.2858622670173645, "learning_rate": 0.00018490813132771393, "loss": 11.6733, "step": 25414 }, { "epoch": 0.5320061960981328, "grad_norm": 0.3222942352294922, "learning_rate": 0.00018490697307512518, "loss": 11.6708, "step": 25415 }, { "epoch": 0.5320271288620949, "grad_norm": 0.28062841296195984, "learning_rate": 0.00018490581478171992, "loss": 11.6709, "step": 25416 }, { "epoch": 0.5320480616260571, "grad_norm": 0.33078432083129883, "learning_rate": 0.0001849046564474987, "loss": 11.6832, "step": 25417 }, { "epoch": 0.5320689943900193, "grad_norm": 0.2696184813976288, "learning_rate": 0.0001849034980724622, "loss": 11.6595, "step": 25418 }, { "epoch": 0.5320899271539814, "grad_norm": 0.31654655933380127, "learning_rate": 0.00018490233965661083, "loss": 11.6809, "step": 25419 }, { "epoch": 0.5321108599179436, "grad_norm": 0.32812434434890747, "learning_rate": 0.00018490118119994522, "loss": 11.6729, "step": 25420 }, { "epoch": 0.5321317926819057, "grad_norm": 0.2663797438144684, "learning_rate": 0.00018490002270246588, "loss": 11.6637, "step": 25421 }, { "epoch": 0.5321527254458679, "grad_norm": 0.2544792890548706, "learning_rate": 0.00018489886416417342, "loss": 11.6543, "step": 25422 }, { "epoch": 0.53217365820983, "grad_norm": 0.28336799144744873, "learning_rate": 0.00018489770558506837, "loss": 11.6595, "step": 25423 }, { "epoch": 0.5321945909737922, "grad_norm": 0.30214110016822815, "learning_rate": 0.0001848965469651513, "loss": 11.6828, "step": 25424 }, { "epoch": 0.5322155237377544, "grad_norm": 0.25701409578323364, "learning_rate": 0.00018489538830442274, "loss": 11.6724, "step": 25425 }, { "epoch": 0.5322364565017165, "grad_norm": 0.36558231711387634, "learning_rate": 0.00018489422960288326, "loss": 11.6619, "step": 25426 }, { "epoch": 0.5322573892656787, "grad_norm": 0.2533605992794037, "learning_rate": 0.00018489307086053347, "loss": 11.6644, "step": 25427 }, { "epoch": 0.5322783220296408, "grad_norm": 0.35683557391166687, "learning_rate": 0.00018489191207737385, "loss": 11.6663, "step": 25428 }, { "epoch": 0.532299254793603, "grad_norm": 0.271523654460907, "learning_rate": 0.000184890753253405, "loss": 11.6741, "step": 25429 }, { "epoch": 0.532320187557565, "grad_norm": 0.31509584188461304, "learning_rate": 0.00018488959438862744, "loss": 11.6552, "step": 25430 }, { "epoch": 0.5323411203215273, "grad_norm": 0.4451298713684082, "learning_rate": 0.00018488843548304177, "loss": 11.6753, "step": 25431 }, { "epoch": 0.5323620530854895, "grad_norm": 0.3176884353160858, "learning_rate": 0.0001848872765366485, "loss": 11.6713, "step": 25432 }, { "epoch": 0.5323829858494515, "grad_norm": 0.2941893935203552, "learning_rate": 0.00018488611754944822, "loss": 11.6835, "step": 25433 }, { "epoch": 0.5324039186134137, "grad_norm": 0.31343942880630493, "learning_rate": 0.0001848849585214415, "loss": 11.6948, "step": 25434 }, { "epoch": 0.5324248513773758, "grad_norm": 0.3040351867675781, "learning_rate": 0.00018488379945262888, "loss": 11.6899, "step": 25435 }, { "epoch": 0.532445784141338, "grad_norm": 0.2955239713191986, "learning_rate": 0.00018488264034301094, "loss": 11.6396, "step": 25436 }, { "epoch": 0.5324667169053002, "grad_norm": 0.30475810170173645, "learning_rate": 0.00018488148119258816, "loss": 11.6585, "step": 25437 }, { "epoch": 0.5324876496692623, "grad_norm": 0.29970020055770874, "learning_rate": 0.00018488032200136115, "loss": 11.6762, "step": 25438 }, { "epoch": 0.5325085824332245, "grad_norm": 0.23835355043411255, "learning_rate": 0.00018487916276933053, "loss": 11.6782, "step": 25439 }, { "epoch": 0.5325295151971866, "grad_norm": 0.31151434779167175, "learning_rate": 0.00018487800349649677, "loss": 11.6744, "step": 25440 }, { "epoch": 0.5325504479611488, "grad_norm": 0.37718749046325684, "learning_rate": 0.0001848768441828604, "loss": 11.6737, "step": 25441 }, { "epoch": 0.5325713807251109, "grad_norm": 0.31865766644477844, "learning_rate": 0.00018487568482842211, "loss": 11.662, "step": 25442 }, { "epoch": 0.5325923134890731, "grad_norm": 0.34199854731559753, "learning_rate": 0.00018487452543318233, "loss": 11.6991, "step": 25443 }, { "epoch": 0.5326132462530353, "grad_norm": 0.3098473846912384, "learning_rate": 0.00018487336599714167, "loss": 11.6679, "step": 25444 }, { "epoch": 0.5326341790169974, "grad_norm": 0.23054887354373932, "learning_rate": 0.00018487220652030069, "loss": 11.6804, "step": 25445 }, { "epoch": 0.5326551117809596, "grad_norm": 0.29725730419158936, "learning_rate": 0.00018487104700265993, "loss": 11.6706, "step": 25446 }, { "epoch": 0.5326760445449217, "grad_norm": 0.38209521770477295, "learning_rate": 0.00018486988744422, "loss": 11.6724, "step": 25447 }, { "epoch": 0.5326969773088839, "grad_norm": 0.31016406416893005, "learning_rate": 0.00018486872784498134, "loss": 11.6691, "step": 25448 }, { "epoch": 0.532717910072846, "grad_norm": 0.2660161256790161, "learning_rate": 0.00018486756820494465, "loss": 11.6472, "step": 25449 }, { "epoch": 0.5327388428368082, "grad_norm": 0.30975109338760376, "learning_rate": 0.00018486640852411035, "loss": 11.6782, "step": 25450 }, { "epoch": 0.5327597756007704, "grad_norm": 0.3490379750728607, "learning_rate": 0.00018486524880247912, "loss": 11.6741, "step": 25451 }, { "epoch": 0.5327807083647325, "grad_norm": 0.265787810087204, "learning_rate": 0.00018486408904005143, "loss": 11.6577, "step": 25452 }, { "epoch": 0.5328016411286947, "grad_norm": 0.2806941866874695, "learning_rate": 0.0001848629292368279, "loss": 11.6675, "step": 25453 }, { "epoch": 0.5328225738926567, "grad_norm": 0.3428210914134979, "learning_rate": 0.00018486176939280907, "loss": 11.6633, "step": 25454 }, { "epoch": 0.532843506656619, "grad_norm": 0.2617189586162567, "learning_rate": 0.00018486060950799544, "loss": 11.6383, "step": 25455 }, { "epoch": 0.5328644394205811, "grad_norm": 0.3476267457008362, "learning_rate": 0.00018485944958238765, "loss": 11.6828, "step": 25456 }, { "epoch": 0.5328853721845432, "grad_norm": 0.23587214946746826, "learning_rate": 0.0001848582896159862, "loss": 11.6718, "step": 25457 }, { "epoch": 0.5329063049485054, "grad_norm": 0.28033655881881714, "learning_rate": 0.0001848571296087917, "loss": 11.6752, "step": 25458 }, { "epoch": 0.5329272377124675, "grad_norm": 0.26943936944007874, "learning_rate": 0.00018485596956080465, "loss": 11.6802, "step": 25459 }, { "epoch": 0.5329481704764297, "grad_norm": 0.28939327597618103, "learning_rate": 0.00018485480947202568, "loss": 11.6639, "step": 25460 }, { "epoch": 0.5329691032403918, "grad_norm": 0.3647167384624481, "learning_rate": 0.00018485364934245525, "loss": 11.6814, "step": 25461 }, { "epoch": 0.532990036004354, "grad_norm": 0.28267067670822144, "learning_rate": 0.000184852489172094, "loss": 11.6815, "step": 25462 }, { "epoch": 0.5330109687683162, "grad_norm": 0.2902608811855316, "learning_rate": 0.00018485132896094243, "loss": 11.6571, "step": 25463 }, { "epoch": 0.5330319015322783, "grad_norm": 0.27697309851646423, "learning_rate": 0.00018485016870900114, "loss": 11.6561, "step": 25464 }, { "epoch": 0.5330528342962405, "grad_norm": 0.2581876218318939, "learning_rate": 0.00018484900841627067, "loss": 11.6836, "step": 25465 }, { "epoch": 0.5330737670602026, "grad_norm": 0.2647774815559387, "learning_rate": 0.0001848478480827516, "loss": 11.6761, "step": 25466 }, { "epoch": 0.5330946998241648, "grad_norm": 0.3485738933086395, "learning_rate": 0.00018484668770844446, "loss": 11.6717, "step": 25467 }, { "epoch": 0.5331156325881269, "grad_norm": 0.3554549515247345, "learning_rate": 0.0001848455272933498, "loss": 11.6737, "step": 25468 }, { "epoch": 0.5331365653520891, "grad_norm": 0.2499980479478836, "learning_rate": 0.0001848443668374682, "loss": 11.6761, "step": 25469 }, { "epoch": 0.5331574981160513, "grad_norm": 0.335387259721756, "learning_rate": 0.00018484320634080023, "loss": 11.6711, "step": 25470 }, { "epoch": 0.5331784308800134, "grad_norm": 0.2797281742095947, "learning_rate": 0.00018484204580334642, "loss": 11.6586, "step": 25471 }, { "epoch": 0.5331993636439756, "grad_norm": 0.2857663035392761, "learning_rate": 0.00018484088522510735, "loss": 11.6566, "step": 25472 }, { "epoch": 0.5332202964079377, "grad_norm": 0.27993258833885193, "learning_rate": 0.00018483972460608353, "loss": 11.6599, "step": 25473 }, { "epoch": 0.5332412291718999, "grad_norm": 0.29375556111335754, "learning_rate": 0.00018483856394627558, "loss": 11.6687, "step": 25474 }, { "epoch": 0.533262161935862, "grad_norm": 0.3220943510532379, "learning_rate": 0.00018483740324568406, "loss": 11.6731, "step": 25475 }, { "epoch": 0.5332830946998242, "grad_norm": 0.2883123755455017, "learning_rate": 0.00018483624250430943, "loss": 11.6821, "step": 25476 }, { "epoch": 0.5333040274637864, "grad_norm": 0.25369682908058167, "learning_rate": 0.00018483508172215238, "loss": 11.6514, "step": 25477 }, { "epoch": 0.5333249602277484, "grad_norm": 0.35167914628982544, "learning_rate": 0.00018483392089921338, "loss": 11.6917, "step": 25478 }, { "epoch": 0.5333458929917106, "grad_norm": 0.33965468406677246, "learning_rate": 0.00018483276003549302, "loss": 11.6806, "step": 25479 }, { "epoch": 0.5333668257556727, "grad_norm": 0.32249072194099426, "learning_rate": 0.00018483159913099185, "loss": 11.6851, "step": 25480 }, { "epoch": 0.5333877585196349, "grad_norm": 0.3579252064228058, "learning_rate": 0.00018483043818571043, "loss": 11.6657, "step": 25481 }, { "epoch": 0.5334086912835971, "grad_norm": 0.2386145442724228, "learning_rate": 0.00018482927719964932, "loss": 11.6756, "step": 25482 }, { "epoch": 0.5334296240475592, "grad_norm": 0.3535143733024597, "learning_rate": 0.00018482811617280908, "loss": 11.6701, "step": 25483 }, { "epoch": 0.5334505568115214, "grad_norm": 0.264914333820343, "learning_rate": 0.00018482695510519027, "loss": 11.6716, "step": 25484 }, { "epoch": 0.5334714895754835, "grad_norm": 0.2984674274921417, "learning_rate": 0.00018482579399679342, "loss": 11.6759, "step": 25485 }, { "epoch": 0.5334924223394457, "grad_norm": 0.22566168010234833, "learning_rate": 0.00018482463284761913, "loss": 11.6748, "step": 25486 }, { "epoch": 0.5335133551034078, "grad_norm": 0.2817055881023407, "learning_rate": 0.00018482347165766795, "loss": 11.6622, "step": 25487 }, { "epoch": 0.53353428786737, "grad_norm": 0.2577061355113983, "learning_rate": 0.0001848223104269404, "loss": 11.6703, "step": 25488 }, { "epoch": 0.5335552206313322, "grad_norm": 0.3068964183330536, "learning_rate": 0.00018482114915543708, "loss": 11.6775, "step": 25489 }, { "epoch": 0.5335761533952943, "grad_norm": 0.36889752745628357, "learning_rate": 0.00018481998784315853, "loss": 11.689, "step": 25490 }, { "epoch": 0.5335970861592565, "grad_norm": 0.2705203890800476, "learning_rate": 0.0001848188264901053, "loss": 11.6723, "step": 25491 }, { "epoch": 0.5336180189232186, "grad_norm": 0.2919521629810333, "learning_rate": 0.00018481766509627798, "loss": 11.687, "step": 25492 }, { "epoch": 0.5336389516871808, "grad_norm": 0.26916226744651794, "learning_rate": 0.0001848165036616771, "loss": 11.6466, "step": 25493 }, { "epoch": 0.5336598844511429, "grad_norm": 0.27334004640579224, "learning_rate": 0.00018481534218630323, "loss": 11.6701, "step": 25494 }, { "epoch": 0.5336808172151051, "grad_norm": 0.27197229862213135, "learning_rate": 0.00018481418067015692, "loss": 11.6852, "step": 25495 }, { "epoch": 0.5337017499790673, "grad_norm": 0.372640460729599, "learning_rate": 0.00018481301911323875, "loss": 11.6902, "step": 25496 }, { "epoch": 0.5337226827430294, "grad_norm": 0.31470146775245667, "learning_rate": 0.00018481185751554927, "loss": 11.6649, "step": 25497 }, { "epoch": 0.5337436155069916, "grad_norm": 0.31117793917655945, "learning_rate": 0.00018481069587708899, "loss": 11.6727, "step": 25498 }, { "epoch": 0.5337645482709537, "grad_norm": 0.24783210456371307, "learning_rate": 0.00018480953419785853, "loss": 11.6631, "step": 25499 }, { "epoch": 0.5337854810349159, "grad_norm": 0.22996893525123596, "learning_rate": 0.00018480837247785844, "loss": 11.6683, "step": 25500 }, { "epoch": 0.533806413798878, "grad_norm": 0.32087332010269165, "learning_rate": 0.00018480721071708925, "loss": 11.68, "step": 25501 }, { "epoch": 0.5338273465628401, "grad_norm": 0.42090585827827454, "learning_rate": 0.00018480604891555152, "loss": 11.6742, "step": 25502 }, { "epoch": 0.5338482793268023, "grad_norm": 0.2530011534690857, "learning_rate": 0.00018480488707324583, "loss": 11.6725, "step": 25503 }, { "epoch": 0.5338692120907644, "grad_norm": 0.37928712368011475, "learning_rate": 0.00018480372519017274, "loss": 11.6818, "step": 25504 }, { "epoch": 0.5338901448547266, "grad_norm": 0.33625829219818115, "learning_rate": 0.0001848025632663328, "loss": 11.6849, "step": 25505 }, { "epoch": 0.5339110776186887, "grad_norm": 0.26615869998931885, "learning_rate": 0.00018480140130172656, "loss": 11.6753, "step": 25506 }, { "epoch": 0.5339320103826509, "grad_norm": 0.2807794511318207, "learning_rate": 0.0001848002392963546, "loss": 11.6759, "step": 25507 }, { "epoch": 0.5339529431466131, "grad_norm": 0.2673929035663605, "learning_rate": 0.00018479907725021746, "loss": 11.656, "step": 25508 }, { "epoch": 0.5339738759105752, "grad_norm": 0.2741680443286896, "learning_rate": 0.00018479791516331573, "loss": 11.6654, "step": 25509 }, { "epoch": 0.5339948086745374, "grad_norm": 0.3400307595729828, "learning_rate": 0.0001847967530356499, "loss": 11.6833, "step": 25510 }, { "epoch": 0.5340157414384995, "grad_norm": 0.2637183666229248, "learning_rate": 0.00018479559086722057, "loss": 11.6651, "step": 25511 }, { "epoch": 0.5340366742024617, "grad_norm": 0.2910568416118622, "learning_rate": 0.00018479442865802835, "loss": 11.6794, "step": 25512 }, { "epoch": 0.5340576069664238, "grad_norm": 0.32370370626449585, "learning_rate": 0.0001847932664080737, "loss": 11.6566, "step": 25513 }, { "epoch": 0.534078539730386, "grad_norm": 0.29905277490615845, "learning_rate": 0.00018479210411735726, "loss": 11.671, "step": 25514 }, { "epoch": 0.5340994724943482, "grad_norm": 0.38173285126686096, "learning_rate": 0.00018479094178587953, "loss": 11.641, "step": 25515 }, { "epoch": 0.5341204052583103, "grad_norm": 0.3882670998573303, "learning_rate": 0.0001847897794136411, "loss": 11.6767, "step": 25516 }, { "epoch": 0.5341413380222725, "grad_norm": 0.28662270307540894, "learning_rate": 0.00018478861700064255, "loss": 11.6651, "step": 25517 }, { "epoch": 0.5341622707862346, "grad_norm": 0.22806116938591003, "learning_rate": 0.0001847874545468844, "loss": 11.6594, "step": 25518 }, { "epoch": 0.5341832035501968, "grad_norm": 0.2812788188457489, "learning_rate": 0.00018478629205236722, "loss": 11.668, "step": 25519 }, { "epoch": 0.534204136314159, "grad_norm": 0.33423295617103577, "learning_rate": 0.00018478512951709155, "loss": 11.665, "step": 25520 }, { "epoch": 0.5342250690781211, "grad_norm": 0.34817904233932495, "learning_rate": 0.00018478396694105798, "loss": 11.67, "step": 25521 }, { "epoch": 0.5342460018420833, "grad_norm": 0.3153226971626282, "learning_rate": 0.00018478280432426705, "loss": 11.6658, "step": 25522 }, { "epoch": 0.5342669346060454, "grad_norm": 0.4318573772907257, "learning_rate": 0.00018478164166671937, "loss": 11.6669, "step": 25523 }, { "epoch": 0.5342878673700076, "grad_norm": 0.24409988522529602, "learning_rate": 0.00018478047896841542, "loss": 11.6726, "step": 25524 }, { "epoch": 0.5343088001339696, "grad_norm": 0.2565176784992218, "learning_rate": 0.00018477931622935579, "loss": 11.6753, "step": 25525 }, { "epoch": 0.5343297328979318, "grad_norm": 0.2795717716217041, "learning_rate": 0.00018477815344954105, "loss": 11.6631, "step": 25526 }, { "epoch": 0.534350665661894, "grad_norm": 0.3770208954811096, "learning_rate": 0.00018477699062897175, "loss": 11.6598, "step": 25527 }, { "epoch": 0.5343715984258561, "grad_norm": 0.3581334352493286, "learning_rate": 0.00018477582776764846, "loss": 11.6796, "step": 25528 }, { "epoch": 0.5343925311898183, "grad_norm": 0.2871840000152588, "learning_rate": 0.00018477466486557174, "loss": 11.6792, "step": 25529 }, { "epoch": 0.5344134639537804, "grad_norm": 0.2603724002838135, "learning_rate": 0.0001847735019227421, "loss": 11.6712, "step": 25530 }, { "epoch": 0.5344343967177426, "grad_norm": 0.28872302174568176, "learning_rate": 0.0001847723389391602, "loss": 11.6714, "step": 25531 }, { "epoch": 0.5344553294817047, "grad_norm": 0.3306794762611389, "learning_rate": 0.00018477117591482648, "loss": 11.6637, "step": 25532 }, { "epoch": 0.5344762622456669, "grad_norm": 0.2784012258052826, "learning_rate": 0.0001847700128497416, "loss": 11.6743, "step": 25533 }, { "epoch": 0.5344971950096291, "grad_norm": 0.26236796379089355, "learning_rate": 0.00018476884974390605, "loss": 11.6694, "step": 25534 }, { "epoch": 0.5345181277735912, "grad_norm": 0.3803805410861969, "learning_rate": 0.0001847676865973204, "loss": 11.6652, "step": 25535 }, { "epoch": 0.5345390605375534, "grad_norm": 0.30166494846343994, "learning_rate": 0.00018476652340998525, "loss": 11.6748, "step": 25536 }, { "epoch": 0.5345599933015155, "grad_norm": 0.2888396680355072, "learning_rate": 0.00018476536018190113, "loss": 11.6805, "step": 25537 }, { "epoch": 0.5345809260654777, "grad_norm": 0.25232812762260437, "learning_rate": 0.00018476419691306864, "loss": 11.6767, "step": 25538 }, { "epoch": 0.5346018588294399, "grad_norm": 0.3164413571357727, "learning_rate": 0.00018476303360348824, "loss": 11.6873, "step": 25539 }, { "epoch": 0.534622791593402, "grad_norm": 0.35820379853248596, "learning_rate": 0.00018476187025316058, "loss": 11.6782, "step": 25540 }, { "epoch": 0.5346437243573642, "grad_norm": 0.2541124224662781, "learning_rate": 0.00018476070686208618, "loss": 11.6539, "step": 25541 }, { "epoch": 0.5346646571213263, "grad_norm": 0.35007014870643616, "learning_rate": 0.00018475954343026563, "loss": 11.6784, "step": 25542 }, { "epoch": 0.5346855898852885, "grad_norm": 0.37696266174316406, "learning_rate": 0.00018475837995769945, "loss": 11.6646, "step": 25543 }, { "epoch": 0.5347065226492506, "grad_norm": 0.31939369440078735, "learning_rate": 0.0001847572164443882, "loss": 11.6583, "step": 25544 }, { "epoch": 0.5347274554132128, "grad_norm": 0.3257778286933899, "learning_rate": 0.0001847560528903325, "loss": 11.658, "step": 25545 }, { "epoch": 0.534748388177175, "grad_norm": 0.32867974042892456, "learning_rate": 0.00018475488929553286, "loss": 11.6792, "step": 25546 }, { "epoch": 0.534769320941137, "grad_norm": 0.38166800141334534, "learning_rate": 0.00018475372565998982, "loss": 11.6835, "step": 25547 }, { "epoch": 0.5347902537050992, "grad_norm": 0.28399795293807983, "learning_rate": 0.00018475256198370395, "loss": 11.6794, "step": 25548 }, { "epoch": 0.5348111864690613, "grad_norm": 0.3262753486633301, "learning_rate": 0.00018475139826667588, "loss": 11.6905, "step": 25549 }, { "epoch": 0.5348321192330235, "grad_norm": 0.30125322937965393, "learning_rate": 0.0001847502345089061, "loss": 11.6873, "step": 25550 }, { "epoch": 0.5348530519969856, "grad_norm": 0.30492720007896423, "learning_rate": 0.00018474907071039514, "loss": 11.6862, "step": 25551 }, { "epoch": 0.5348739847609478, "grad_norm": 0.2834666669368744, "learning_rate": 0.00018474790687114366, "loss": 11.6691, "step": 25552 }, { "epoch": 0.53489491752491, "grad_norm": 0.3154173195362091, "learning_rate": 0.00018474674299115215, "loss": 11.6885, "step": 25553 }, { "epoch": 0.5349158502888721, "grad_norm": 0.23719406127929688, "learning_rate": 0.00018474557907042117, "loss": 11.6806, "step": 25554 }, { "epoch": 0.5349367830528343, "grad_norm": 0.29625168442726135, "learning_rate": 0.0001847444151089513, "loss": 11.677, "step": 25555 }, { "epoch": 0.5349577158167964, "grad_norm": 0.31333088874816895, "learning_rate": 0.00018474325110674305, "loss": 11.6737, "step": 25556 }, { "epoch": 0.5349786485807586, "grad_norm": 0.2795151472091675, "learning_rate": 0.00018474208706379706, "loss": 11.6753, "step": 25557 }, { "epoch": 0.5349995813447208, "grad_norm": 0.42197608947753906, "learning_rate": 0.00018474092298011386, "loss": 11.6627, "step": 25558 }, { "epoch": 0.5350205141086829, "grad_norm": 0.31700584292411804, "learning_rate": 0.00018473975885569398, "loss": 11.682, "step": 25559 }, { "epoch": 0.5350414468726451, "grad_norm": 0.3526296615600586, "learning_rate": 0.000184738594690538, "loss": 11.675, "step": 25560 }, { "epoch": 0.5350623796366072, "grad_norm": 0.3471931219100952, "learning_rate": 0.0001847374304846465, "loss": 11.6713, "step": 25561 }, { "epoch": 0.5350833124005694, "grad_norm": 0.2810949981212616, "learning_rate": 0.00018473626623802, "loss": 11.6656, "step": 25562 }, { "epoch": 0.5351042451645315, "grad_norm": 0.29236671328544617, "learning_rate": 0.0001847351019506591, "loss": 11.6688, "step": 25563 }, { "epoch": 0.5351251779284937, "grad_norm": 0.2608206272125244, "learning_rate": 0.0001847339376225643, "loss": 11.6736, "step": 25564 }, { "epoch": 0.5351461106924559, "grad_norm": 0.33496159315109253, "learning_rate": 0.00018473277325373622, "loss": 11.6623, "step": 25565 }, { "epoch": 0.535167043456418, "grad_norm": 0.2318350076675415, "learning_rate": 0.00018473160884417537, "loss": 11.6687, "step": 25566 }, { "epoch": 0.5351879762203802, "grad_norm": 0.26365336775779724, "learning_rate": 0.00018473044439388237, "loss": 11.6723, "step": 25567 }, { "epoch": 0.5352089089843423, "grad_norm": 0.29966989159584045, "learning_rate": 0.00018472927990285777, "loss": 11.6802, "step": 25568 }, { "epoch": 0.5352298417483045, "grad_norm": 0.32167792320251465, "learning_rate": 0.00018472811537110207, "loss": 11.6859, "step": 25569 }, { "epoch": 0.5352507745122665, "grad_norm": 0.37701600790023804, "learning_rate": 0.00018472695079861589, "loss": 11.6855, "step": 25570 }, { "epoch": 0.5352717072762287, "grad_norm": 0.32923439145088196, "learning_rate": 0.00018472578618539974, "loss": 11.6637, "step": 25571 }, { "epoch": 0.535292640040191, "grad_norm": 0.28411081433296204, "learning_rate": 0.0001847246215314542, "loss": 11.6607, "step": 25572 }, { "epoch": 0.535313572804153, "grad_norm": 0.2707291543483734, "learning_rate": 0.00018472345683677986, "loss": 11.6756, "step": 25573 }, { "epoch": 0.5353345055681152, "grad_norm": 0.28296616673469543, "learning_rate": 0.00018472229210137725, "loss": 11.6758, "step": 25574 }, { "epoch": 0.5353554383320773, "grad_norm": 0.30400511622428894, "learning_rate": 0.00018472112732524694, "loss": 11.6536, "step": 25575 }, { "epoch": 0.5353763710960395, "grad_norm": 0.33304059505462646, "learning_rate": 0.0001847199625083895, "loss": 11.6805, "step": 25576 }, { "epoch": 0.5353973038600017, "grad_norm": 0.2932228446006775, "learning_rate": 0.00018471879765080543, "loss": 11.6743, "step": 25577 }, { "epoch": 0.5354182366239638, "grad_norm": 0.3692340552806854, "learning_rate": 0.0001847176327524954, "loss": 11.6794, "step": 25578 }, { "epoch": 0.535439169387926, "grad_norm": 0.28605982661247253, "learning_rate": 0.00018471646781345987, "loss": 11.662, "step": 25579 }, { "epoch": 0.5354601021518881, "grad_norm": 0.3195282518863678, "learning_rate": 0.00018471530283369942, "loss": 11.6785, "step": 25580 }, { "epoch": 0.5354810349158503, "grad_norm": 0.2799101173877716, "learning_rate": 0.00018471413781321466, "loss": 11.6599, "step": 25581 }, { "epoch": 0.5355019676798124, "grad_norm": 0.49481871724128723, "learning_rate": 0.0001847129727520061, "loss": 11.5938, "step": 25582 }, { "epoch": 0.5355229004437746, "grad_norm": 0.4311964809894562, "learning_rate": 0.00018471180765007432, "loss": 11.6782, "step": 25583 }, { "epoch": 0.5355438332077368, "grad_norm": 0.29985201358795166, "learning_rate": 0.00018471064250741987, "loss": 11.6657, "step": 25584 }, { "epoch": 0.5355647659716989, "grad_norm": 0.26560264825820923, "learning_rate": 0.00018470947732404332, "loss": 11.6761, "step": 25585 }, { "epoch": 0.5355856987356611, "grad_norm": 0.25301557779312134, "learning_rate": 0.00018470831209994523, "loss": 11.6668, "step": 25586 }, { "epoch": 0.5356066314996232, "grad_norm": 0.36063075065612793, "learning_rate": 0.00018470714683512616, "loss": 11.6727, "step": 25587 }, { "epoch": 0.5356275642635854, "grad_norm": 0.2650620639324188, "learning_rate": 0.00018470598152958668, "loss": 11.6428, "step": 25588 }, { "epoch": 0.5356484970275475, "grad_norm": 0.31404614448547363, "learning_rate": 0.0001847048161833273, "loss": 11.6538, "step": 25589 }, { "epoch": 0.5356694297915097, "grad_norm": 0.34837570786476135, "learning_rate": 0.00018470365079634862, "loss": 11.6707, "step": 25590 }, { "epoch": 0.5356903625554719, "grad_norm": 0.31513991951942444, "learning_rate": 0.00018470248536865124, "loss": 11.6937, "step": 25591 }, { "epoch": 0.535711295319434, "grad_norm": 0.30455225706100464, "learning_rate": 0.00018470131990023565, "loss": 11.6623, "step": 25592 }, { "epoch": 0.5357322280833962, "grad_norm": 0.330485999584198, "learning_rate": 0.00018470015439110243, "loss": 11.6575, "step": 25593 }, { "epoch": 0.5357531608473582, "grad_norm": 0.263377845287323, "learning_rate": 0.00018469898884125217, "loss": 11.6683, "step": 25594 }, { "epoch": 0.5357740936113204, "grad_norm": 0.3432132303714752, "learning_rate": 0.0001846978232506854, "loss": 11.6713, "step": 25595 }, { "epoch": 0.5357950263752826, "grad_norm": 0.2686833441257477, "learning_rate": 0.00018469665761940268, "loss": 11.6456, "step": 25596 }, { "epoch": 0.5358159591392447, "grad_norm": 0.24811694025993347, "learning_rate": 0.00018469549194740457, "loss": 11.6557, "step": 25597 }, { "epoch": 0.5358368919032069, "grad_norm": 0.2624465525150299, "learning_rate": 0.00018469432623469166, "loss": 11.6487, "step": 25598 }, { "epoch": 0.535857824667169, "grad_norm": 0.32885220646858215, "learning_rate": 0.00018469316048126447, "loss": 11.6718, "step": 25599 }, { "epoch": 0.5358787574311312, "grad_norm": 0.27568474411964417, "learning_rate": 0.00018469199468712359, "loss": 11.6803, "step": 25600 }, { "epoch": 0.5358996901950933, "grad_norm": 0.2666566073894501, "learning_rate": 0.0001846908288522696, "loss": 11.6677, "step": 25601 }, { "epoch": 0.5359206229590555, "grad_norm": 0.30689316987991333, "learning_rate": 0.000184689662976703, "loss": 11.6827, "step": 25602 }, { "epoch": 0.5359415557230177, "grad_norm": 0.30695465207099915, "learning_rate": 0.0001846884970604244, "loss": 11.679, "step": 25603 }, { "epoch": 0.5359624884869798, "grad_norm": 0.35515615344047546, "learning_rate": 0.00018468733110343433, "loss": 11.6912, "step": 25604 }, { "epoch": 0.535983421250942, "grad_norm": 0.2746037244796753, "learning_rate": 0.00018468616510573335, "loss": 11.6757, "step": 25605 }, { "epoch": 0.5360043540149041, "grad_norm": 0.28896060585975647, "learning_rate": 0.00018468499906732207, "loss": 11.6774, "step": 25606 }, { "epoch": 0.5360252867788663, "grad_norm": 0.31814754009246826, "learning_rate": 0.00018468383298820098, "loss": 11.6814, "step": 25607 }, { "epoch": 0.5360462195428284, "grad_norm": 0.2818993330001831, "learning_rate": 0.00018468266686837067, "loss": 11.6577, "step": 25608 }, { "epoch": 0.5360671523067906, "grad_norm": 0.45803242921829224, "learning_rate": 0.0001846815007078317, "loss": 11.649, "step": 25609 }, { "epoch": 0.5360880850707528, "grad_norm": 0.2628225088119507, "learning_rate": 0.00018468033450658468, "loss": 11.6693, "step": 25610 }, { "epoch": 0.5361090178347149, "grad_norm": 0.31041398644447327, "learning_rate": 0.0001846791682646301, "loss": 11.671, "step": 25611 }, { "epoch": 0.5361299505986771, "grad_norm": 0.3562757670879364, "learning_rate": 0.00018467800198196857, "loss": 11.6699, "step": 25612 }, { "epoch": 0.5361508833626392, "grad_norm": 0.2941368818283081, "learning_rate": 0.0001846768356586006, "loss": 11.6644, "step": 25613 }, { "epoch": 0.5361718161266014, "grad_norm": 0.29413530230522156, "learning_rate": 0.00018467566929452679, "loss": 11.6721, "step": 25614 }, { "epoch": 0.5361927488905636, "grad_norm": 0.3139927387237549, "learning_rate": 0.00018467450288974768, "loss": 11.6866, "step": 25615 }, { "epoch": 0.5362136816545257, "grad_norm": 0.22844022512435913, "learning_rate": 0.00018467333644426383, "loss": 11.6744, "step": 25616 }, { "epoch": 0.5362346144184879, "grad_norm": 0.27767258882522583, "learning_rate": 0.00018467216995807583, "loss": 11.6746, "step": 25617 }, { "epoch": 0.5362555471824499, "grad_norm": 0.3191639184951782, "learning_rate": 0.00018467100343118423, "loss": 11.6933, "step": 25618 }, { "epoch": 0.5362764799464121, "grad_norm": 0.25294962525367737, "learning_rate": 0.00018466983686358957, "loss": 11.6678, "step": 25619 }, { "epoch": 0.5362974127103742, "grad_norm": 0.274313360452652, "learning_rate": 0.0001846686702552924, "loss": 11.6557, "step": 25620 }, { "epoch": 0.5363183454743364, "grad_norm": 0.28367483615875244, "learning_rate": 0.00018466750360629332, "loss": 11.669, "step": 25621 }, { "epoch": 0.5363392782382986, "grad_norm": 0.24513040482997894, "learning_rate": 0.00018466633691659287, "loss": 11.6613, "step": 25622 }, { "epoch": 0.5363602110022607, "grad_norm": 0.2845200002193451, "learning_rate": 0.00018466517018619162, "loss": 11.6717, "step": 25623 }, { "epoch": 0.5363811437662229, "grad_norm": 0.30214497447013855, "learning_rate": 0.00018466400341509014, "loss": 11.6799, "step": 25624 }, { "epoch": 0.536402076530185, "grad_norm": 0.2724658250808716, "learning_rate": 0.00018466283660328897, "loss": 11.6713, "step": 25625 }, { "epoch": 0.5364230092941472, "grad_norm": 0.2575971484184265, "learning_rate": 0.00018466166975078866, "loss": 11.6595, "step": 25626 }, { "epoch": 0.5364439420581093, "grad_norm": 0.28463032841682434, "learning_rate": 0.0001846605028575898, "loss": 11.6549, "step": 25627 }, { "epoch": 0.5364648748220715, "grad_norm": 0.25495725870132446, "learning_rate": 0.00018465933592369296, "loss": 11.677, "step": 25628 }, { "epoch": 0.5364858075860337, "grad_norm": 0.24398453533649445, "learning_rate": 0.00018465816894909867, "loss": 11.6672, "step": 25629 }, { "epoch": 0.5365067403499958, "grad_norm": 0.2715839445590973, "learning_rate": 0.00018465700193380746, "loss": 11.6667, "step": 25630 }, { "epoch": 0.536527673113958, "grad_norm": 0.3101653754711151, "learning_rate": 0.00018465583487781997, "loss": 11.6618, "step": 25631 }, { "epoch": 0.5365486058779201, "grad_norm": 0.23054181039333344, "learning_rate": 0.00018465466778113673, "loss": 11.6626, "step": 25632 }, { "epoch": 0.5365695386418823, "grad_norm": 0.302016943693161, "learning_rate": 0.00018465350064375828, "loss": 11.6898, "step": 25633 }, { "epoch": 0.5365904714058445, "grad_norm": 0.2776485085487366, "learning_rate": 0.0001846523334656852, "loss": 11.6608, "step": 25634 }, { "epoch": 0.5366114041698066, "grad_norm": 0.31255683302879333, "learning_rate": 0.00018465116624691804, "loss": 11.6726, "step": 25635 }, { "epoch": 0.5366323369337688, "grad_norm": 0.2908692955970764, "learning_rate": 0.00018464999898745737, "loss": 11.6707, "step": 25636 }, { "epoch": 0.5366532696977309, "grad_norm": 0.30856215953826904, "learning_rate": 0.00018464883168730375, "loss": 11.6635, "step": 25637 }, { "epoch": 0.5366742024616931, "grad_norm": 0.34180977940559387, "learning_rate": 0.00018464766434645778, "loss": 11.6707, "step": 25638 }, { "epoch": 0.5366951352256552, "grad_norm": 0.2816932797431946, "learning_rate": 0.00018464649696491993, "loss": 11.6713, "step": 25639 }, { "epoch": 0.5367160679896174, "grad_norm": 0.2515699565410614, "learning_rate": 0.00018464532954269082, "loss": 11.6804, "step": 25640 }, { "epoch": 0.5367370007535796, "grad_norm": 0.2645651698112488, "learning_rate": 0.00018464416207977102, "loss": 11.6518, "step": 25641 }, { "epoch": 0.5367579335175416, "grad_norm": 0.306297242641449, "learning_rate": 0.00018464299457616106, "loss": 11.6673, "step": 25642 }, { "epoch": 0.5367788662815038, "grad_norm": 0.35208889842033386, "learning_rate": 0.00018464182703186152, "loss": 11.6548, "step": 25643 }, { "epoch": 0.5367997990454659, "grad_norm": 0.3656630516052246, "learning_rate": 0.00018464065944687297, "loss": 11.68, "step": 25644 }, { "epoch": 0.5368207318094281, "grad_norm": 0.2847119867801666, "learning_rate": 0.00018463949182119594, "loss": 11.6774, "step": 25645 }, { "epoch": 0.5368416645733902, "grad_norm": 0.27218106389045715, "learning_rate": 0.00018463832415483103, "loss": 11.6649, "step": 25646 }, { "epoch": 0.5368625973373524, "grad_norm": 0.2850038409233093, "learning_rate": 0.00018463715644777878, "loss": 11.6427, "step": 25647 }, { "epoch": 0.5368835301013146, "grad_norm": 0.3374965786933899, "learning_rate": 0.00018463598870003972, "loss": 11.6742, "step": 25648 }, { "epoch": 0.5369044628652767, "grad_norm": 0.2794475257396698, "learning_rate": 0.0001846348209116145, "loss": 11.6659, "step": 25649 }, { "epoch": 0.5369253956292389, "grad_norm": 0.3198646008968353, "learning_rate": 0.00018463365308250354, "loss": 11.6633, "step": 25650 }, { "epoch": 0.536946328393201, "grad_norm": 0.2878093719482422, "learning_rate": 0.00018463248521270758, "loss": 11.6741, "step": 25651 }, { "epoch": 0.5369672611571632, "grad_norm": 0.31227609515190125, "learning_rate": 0.00018463131730222702, "loss": 11.6881, "step": 25652 }, { "epoch": 0.5369881939211254, "grad_norm": 0.40213778614997864, "learning_rate": 0.00018463014935106252, "loss": 11.6677, "step": 25653 }, { "epoch": 0.5370091266850875, "grad_norm": 0.3193695545196533, "learning_rate": 0.00018462898135921462, "loss": 11.6514, "step": 25654 }, { "epoch": 0.5370300594490497, "grad_norm": 0.37782570719718933, "learning_rate": 0.00018462781332668387, "loss": 11.6964, "step": 25655 }, { "epoch": 0.5370509922130118, "grad_norm": 0.2980680763721466, "learning_rate": 0.00018462664525347082, "loss": 11.6663, "step": 25656 }, { "epoch": 0.537071924976974, "grad_norm": 0.34170931577682495, "learning_rate": 0.00018462547713957606, "loss": 11.6919, "step": 25657 }, { "epoch": 0.5370928577409361, "grad_norm": 0.4717049300670624, "learning_rate": 0.00018462430898500012, "loss": 11.676, "step": 25658 }, { "epoch": 0.5371137905048983, "grad_norm": 0.305545449256897, "learning_rate": 0.00018462314078974358, "loss": 11.6692, "step": 25659 }, { "epoch": 0.5371347232688605, "grad_norm": 0.31146880984306335, "learning_rate": 0.000184621972553807, "loss": 11.6635, "step": 25660 }, { "epoch": 0.5371556560328226, "grad_norm": 0.3076046407222748, "learning_rate": 0.00018462080427719094, "loss": 11.6702, "step": 25661 }, { "epoch": 0.5371765887967848, "grad_norm": 0.32040929794311523, "learning_rate": 0.000184619635959896, "loss": 11.6657, "step": 25662 }, { "epoch": 0.5371975215607468, "grad_norm": 0.28323277831077576, "learning_rate": 0.00018461846760192268, "loss": 11.6831, "step": 25663 }, { "epoch": 0.537218454324709, "grad_norm": 0.3102104365825653, "learning_rate": 0.00018461729920327155, "loss": 11.6584, "step": 25664 }, { "epoch": 0.5372393870886711, "grad_norm": 0.2513549327850342, "learning_rate": 0.0001846161307639432, "loss": 11.6584, "step": 25665 }, { "epoch": 0.5372603198526333, "grad_norm": 0.3118746876716614, "learning_rate": 0.0001846149622839382, "loss": 11.6933, "step": 25666 }, { "epoch": 0.5372812526165955, "grad_norm": 0.30502551794052124, "learning_rate": 0.00018461379376325706, "loss": 11.678, "step": 25667 }, { "epoch": 0.5373021853805576, "grad_norm": 0.36751553416252136, "learning_rate": 0.00018461262520190038, "loss": 11.6799, "step": 25668 }, { "epoch": 0.5373231181445198, "grad_norm": 0.267617791891098, "learning_rate": 0.00018461145659986875, "loss": 11.6658, "step": 25669 }, { "epoch": 0.5373440509084819, "grad_norm": 0.37928465008735657, "learning_rate": 0.00018461028795716268, "loss": 11.6773, "step": 25670 }, { "epoch": 0.5373649836724441, "grad_norm": 0.32779303193092346, "learning_rate": 0.00018460911927378274, "loss": 11.6799, "step": 25671 }, { "epoch": 0.5373859164364062, "grad_norm": 0.3217572569847107, "learning_rate": 0.0001846079505497295, "loss": 11.6503, "step": 25672 }, { "epoch": 0.5374068492003684, "grad_norm": 0.26090195775032043, "learning_rate": 0.00018460678178500352, "loss": 11.6522, "step": 25673 }, { "epoch": 0.5374277819643306, "grad_norm": 0.2838946580886841, "learning_rate": 0.00018460561297960537, "loss": 11.6864, "step": 25674 }, { "epoch": 0.5374487147282927, "grad_norm": 0.29277747869491577, "learning_rate": 0.00018460444413353558, "loss": 11.6626, "step": 25675 }, { "epoch": 0.5374696474922549, "grad_norm": 0.241801455616951, "learning_rate": 0.00018460327524679477, "loss": 11.6855, "step": 25676 }, { "epoch": 0.537490580256217, "grad_norm": 0.25917649269104004, "learning_rate": 0.0001846021063193835, "loss": 11.673, "step": 25677 }, { "epoch": 0.5375115130201792, "grad_norm": 0.3077061176300049, "learning_rate": 0.00018460093735130225, "loss": 11.6729, "step": 25678 }, { "epoch": 0.5375324457841414, "grad_norm": 0.262112557888031, "learning_rate": 0.00018459976834255164, "loss": 11.6753, "step": 25679 }, { "epoch": 0.5375533785481035, "grad_norm": 0.2872893214225769, "learning_rate": 0.00018459859929313225, "loss": 11.6639, "step": 25680 }, { "epoch": 0.5375743113120657, "grad_norm": 0.2963988780975342, "learning_rate": 0.0001845974302030446, "loss": 11.6673, "step": 25681 }, { "epoch": 0.5375952440760278, "grad_norm": 0.2614923417568207, "learning_rate": 0.00018459626107228925, "loss": 11.6883, "step": 25682 }, { "epoch": 0.53761617683999, "grad_norm": 0.24275514483451843, "learning_rate": 0.00018459509190086682, "loss": 11.6808, "step": 25683 }, { "epoch": 0.5376371096039521, "grad_norm": 0.2651824951171875, "learning_rate": 0.00018459392268877785, "loss": 11.6639, "step": 25684 }, { "epoch": 0.5376580423679143, "grad_norm": 0.26193562150001526, "learning_rate": 0.00018459275343602282, "loss": 11.6639, "step": 25685 }, { "epoch": 0.5376789751318765, "grad_norm": 0.3239848017692566, "learning_rate": 0.00018459158414260243, "loss": 11.6885, "step": 25686 }, { "epoch": 0.5376999078958385, "grad_norm": 0.3185444474220276, "learning_rate": 0.00018459041480851712, "loss": 11.6599, "step": 25687 }, { "epoch": 0.5377208406598007, "grad_norm": 0.33605486154556274, "learning_rate": 0.00018458924543376752, "loss": 11.6902, "step": 25688 }, { "epoch": 0.5377417734237628, "grad_norm": 0.32588663697242737, "learning_rate": 0.00018458807601835417, "loss": 11.6897, "step": 25689 }, { "epoch": 0.537762706187725, "grad_norm": 0.34806326031684875, "learning_rate": 0.00018458690656227762, "loss": 11.6744, "step": 25690 }, { "epoch": 0.5377836389516871, "grad_norm": 0.28002890944480896, "learning_rate": 0.00018458573706553848, "loss": 11.6708, "step": 25691 }, { "epoch": 0.5378045717156493, "grad_norm": 0.25486069917678833, "learning_rate": 0.00018458456752813726, "loss": 11.6699, "step": 25692 }, { "epoch": 0.5378255044796115, "grad_norm": 0.2938906252384186, "learning_rate": 0.00018458339795007455, "loss": 11.65, "step": 25693 }, { "epoch": 0.5378464372435736, "grad_norm": 0.22621877491474152, "learning_rate": 0.00018458222833135093, "loss": 11.6895, "step": 25694 }, { "epoch": 0.5378673700075358, "grad_norm": 0.2613784372806549, "learning_rate": 0.00018458105867196688, "loss": 11.6818, "step": 25695 }, { "epoch": 0.5378883027714979, "grad_norm": 0.3518698513507843, "learning_rate": 0.00018457988897192307, "loss": 11.6741, "step": 25696 }, { "epoch": 0.5379092355354601, "grad_norm": 0.3315061330795288, "learning_rate": 0.00018457871923122, "loss": 11.6722, "step": 25697 }, { "epoch": 0.5379301682994223, "grad_norm": 0.2666986882686615, "learning_rate": 0.00018457754944985823, "loss": 11.6576, "step": 25698 }, { "epoch": 0.5379511010633844, "grad_norm": 0.3155615031719208, "learning_rate": 0.00018457637962783835, "loss": 11.688, "step": 25699 }, { "epoch": 0.5379720338273466, "grad_norm": 0.342808336019516, "learning_rate": 0.00018457520976516092, "loss": 11.6836, "step": 25700 }, { "epoch": 0.5379929665913087, "grad_norm": 0.2809375524520874, "learning_rate": 0.0001845740398618265, "loss": 11.6685, "step": 25701 }, { "epoch": 0.5380138993552709, "grad_norm": 0.27876344323158264, "learning_rate": 0.0001845728699178356, "loss": 11.68, "step": 25702 }, { "epoch": 0.538034832119233, "grad_norm": 0.338576078414917, "learning_rate": 0.00018457169993318885, "loss": 11.6676, "step": 25703 }, { "epoch": 0.5380557648831952, "grad_norm": 0.3413301408290863, "learning_rate": 0.00018457052990788678, "loss": 11.6752, "step": 25704 }, { "epoch": 0.5380766976471574, "grad_norm": 0.2704785168170929, "learning_rate": 0.00018456935984192998, "loss": 11.6816, "step": 25705 }, { "epoch": 0.5380976304111195, "grad_norm": 0.22791622579097748, "learning_rate": 0.000184568189735319, "loss": 11.6708, "step": 25706 }, { "epoch": 0.5381185631750817, "grad_norm": 0.5282593369483948, "learning_rate": 0.00018456701958805436, "loss": 11.6585, "step": 25707 }, { "epoch": 0.5381394959390438, "grad_norm": 0.30917900800704956, "learning_rate": 0.00018456584940013669, "loss": 11.6692, "step": 25708 }, { "epoch": 0.538160428703006, "grad_norm": 0.36308756470680237, "learning_rate": 0.0001845646791715665, "loss": 11.6716, "step": 25709 }, { "epoch": 0.538181361466968, "grad_norm": 0.28066977858543396, "learning_rate": 0.0001845635089023444, "loss": 11.6702, "step": 25710 }, { "epoch": 0.5382022942309302, "grad_norm": 0.30438894033432007, "learning_rate": 0.0001845623385924709, "loss": 11.6525, "step": 25711 }, { "epoch": 0.5382232269948924, "grad_norm": 0.2674538791179657, "learning_rate": 0.00018456116824194663, "loss": 11.6719, "step": 25712 }, { "epoch": 0.5382441597588545, "grad_norm": 0.3075686991214752, "learning_rate": 0.00018455999785077208, "loss": 11.673, "step": 25713 }, { "epoch": 0.5382650925228167, "grad_norm": 0.26003214716911316, "learning_rate": 0.00018455882741894784, "loss": 11.6556, "step": 25714 }, { "epoch": 0.5382860252867788, "grad_norm": 0.3187806308269501, "learning_rate": 0.00018455765694647448, "loss": 11.6825, "step": 25715 }, { "epoch": 0.538306958050741, "grad_norm": 0.24703660607337952, "learning_rate": 0.00018455648643335258, "loss": 11.6623, "step": 25716 }, { "epoch": 0.5383278908147032, "grad_norm": 0.28548678755760193, "learning_rate": 0.00018455531587958265, "loss": 11.6766, "step": 25717 }, { "epoch": 0.5383488235786653, "grad_norm": 0.2971290946006775, "learning_rate": 0.0001845541452851653, "loss": 11.6673, "step": 25718 }, { "epoch": 0.5383697563426275, "grad_norm": 0.3660101890563965, "learning_rate": 0.0001845529746501011, "loss": 11.679, "step": 25719 }, { "epoch": 0.5383906891065896, "grad_norm": 0.29800117015838623, "learning_rate": 0.00018455180397439054, "loss": 11.6794, "step": 25720 }, { "epoch": 0.5384116218705518, "grad_norm": 0.3516535758972168, "learning_rate": 0.00018455063325803427, "loss": 11.6801, "step": 25721 }, { "epoch": 0.5384325546345139, "grad_norm": 0.31010696291923523, "learning_rate": 0.0001845494625010328, "loss": 11.6641, "step": 25722 }, { "epoch": 0.5384534873984761, "grad_norm": 0.2951154410839081, "learning_rate": 0.00018454829170338672, "loss": 11.6689, "step": 25723 }, { "epoch": 0.5384744201624383, "grad_norm": 0.2548774480819702, "learning_rate": 0.00018454712086509657, "loss": 11.6869, "step": 25724 }, { "epoch": 0.5384953529264004, "grad_norm": 0.24154648184776306, "learning_rate": 0.00018454594998616294, "loss": 11.6741, "step": 25725 }, { "epoch": 0.5385162856903626, "grad_norm": 0.28718382120132446, "learning_rate": 0.00018454477906658642, "loss": 11.681, "step": 25726 }, { "epoch": 0.5385372184543247, "grad_norm": 0.26381444931030273, "learning_rate": 0.00018454360810636744, "loss": 11.6749, "step": 25727 }, { "epoch": 0.5385581512182869, "grad_norm": 0.34130987524986267, "learning_rate": 0.0001845424371055067, "loss": 11.6674, "step": 25728 }, { "epoch": 0.538579083982249, "grad_norm": 0.3448322117328644, "learning_rate": 0.00018454126606400474, "loss": 11.678, "step": 25729 }, { "epoch": 0.5386000167462112, "grad_norm": 0.36248788237571716, "learning_rate": 0.00018454009498186205, "loss": 11.6775, "step": 25730 }, { "epoch": 0.5386209495101734, "grad_norm": 0.3238758146762848, "learning_rate": 0.00018453892385907926, "loss": 11.6669, "step": 25731 }, { "epoch": 0.5386418822741355, "grad_norm": 0.2988376319408417, "learning_rate": 0.0001845377526956569, "loss": 11.676, "step": 25732 }, { "epoch": 0.5386628150380977, "grad_norm": 0.31053173542022705, "learning_rate": 0.00018453658149159558, "loss": 11.6723, "step": 25733 }, { "epoch": 0.5386837478020597, "grad_norm": 0.3149406313896179, "learning_rate": 0.00018453541024689583, "loss": 11.6735, "step": 25734 }, { "epoch": 0.5387046805660219, "grad_norm": 0.27639445662498474, "learning_rate": 0.0001845342389615582, "loss": 11.6773, "step": 25735 }, { "epoch": 0.5387256133299841, "grad_norm": 0.3198855221271515, "learning_rate": 0.0001845330676355833, "loss": 11.6736, "step": 25736 }, { "epoch": 0.5387465460939462, "grad_norm": 0.3516344726085663, "learning_rate": 0.0001845318962689716, "loss": 11.6727, "step": 25737 }, { "epoch": 0.5387674788579084, "grad_norm": 0.28078556060791016, "learning_rate": 0.00018453072486172376, "loss": 11.6758, "step": 25738 }, { "epoch": 0.5387884116218705, "grad_norm": 0.30831000208854675, "learning_rate": 0.00018452955341384032, "loss": 11.6631, "step": 25739 }, { "epoch": 0.5388093443858327, "grad_norm": 0.35266998410224915, "learning_rate": 0.0001845283819253218, "loss": 11.6795, "step": 25740 }, { "epoch": 0.5388302771497948, "grad_norm": 0.2996232807636261, "learning_rate": 0.00018452721039616878, "loss": 11.6509, "step": 25741 }, { "epoch": 0.538851209913757, "grad_norm": 0.29715871810913086, "learning_rate": 0.00018452603882638188, "loss": 11.6691, "step": 25742 }, { "epoch": 0.5388721426777192, "grad_norm": 0.323494017124176, "learning_rate": 0.0001845248672159616, "loss": 11.6509, "step": 25743 }, { "epoch": 0.5388930754416813, "grad_norm": 0.2784999907016754, "learning_rate": 0.0001845236955649085, "loss": 11.6827, "step": 25744 }, { "epoch": 0.5389140082056435, "grad_norm": 0.46867382526397705, "learning_rate": 0.0001845225238732232, "loss": 11.6775, "step": 25745 }, { "epoch": 0.5389349409696056, "grad_norm": 0.2516041100025177, "learning_rate": 0.0001845213521409062, "loss": 11.6657, "step": 25746 }, { "epoch": 0.5389558737335678, "grad_norm": 0.26509013772010803, "learning_rate": 0.00018452018036795812, "loss": 11.66, "step": 25747 }, { "epoch": 0.5389768064975299, "grad_norm": 0.28565600514411926, "learning_rate": 0.0001845190085543795, "loss": 11.6696, "step": 25748 }, { "epoch": 0.5389977392614921, "grad_norm": 0.28956395387649536, "learning_rate": 0.00018451783670017085, "loss": 11.6668, "step": 25749 }, { "epoch": 0.5390186720254543, "grad_norm": 0.27516716718673706, "learning_rate": 0.00018451666480533282, "loss": 11.6507, "step": 25750 }, { "epoch": 0.5390396047894164, "grad_norm": 0.30747562646865845, "learning_rate": 0.00018451549286986594, "loss": 11.6662, "step": 25751 }, { "epoch": 0.5390605375533786, "grad_norm": 0.2625509798526764, "learning_rate": 0.00018451432089377075, "loss": 11.6775, "step": 25752 }, { "epoch": 0.5390814703173407, "grad_norm": 0.3155654966831207, "learning_rate": 0.00018451314887704784, "loss": 11.6636, "step": 25753 }, { "epoch": 0.5391024030813029, "grad_norm": 0.3674990236759186, "learning_rate": 0.00018451197681969777, "loss": 11.6591, "step": 25754 }, { "epoch": 0.5391233358452651, "grad_norm": 0.25867366790771484, "learning_rate": 0.0001845108047217211, "loss": 11.6716, "step": 25755 }, { "epoch": 0.5391442686092272, "grad_norm": 0.308709979057312, "learning_rate": 0.00018450963258311837, "loss": 11.6738, "step": 25756 }, { "epoch": 0.5391652013731894, "grad_norm": 0.28250017762184143, "learning_rate": 0.0001845084604038902, "loss": 11.6931, "step": 25757 }, { "epoch": 0.5391861341371514, "grad_norm": 0.3074246346950531, "learning_rate": 0.0001845072881840371, "loss": 11.6875, "step": 25758 }, { "epoch": 0.5392070669011136, "grad_norm": 0.3922328054904938, "learning_rate": 0.00018450611592355968, "loss": 11.68, "step": 25759 }, { "epoch": 0.5392279996650757, "grad_norm": 0.29673850536346436, "learning_rate": 0.00018450494362245846, "loss": 11.6818, "step": 25760 }, { "epoch": 0.5392489324290379, "grad_norm": 0.2469429075717926, "learning_rate": 0.000184503771280734, "loss": 11.6681, "step": 25761 }, { "epoch": 0.5392698651930001, "grad_norm": 0.25038155913352966, "learning_rate": 0.00018450259889838687, "loss": 11.6588, "step": 25762 }, { "epoch": 0.5392907979569622, "grad_norm": 0.29981744289398193, "learning_rate": 0.00018450142647541767, "loss": 11.6603, "step": 25763 }, { "epoch": 0.5393117307209244, "grad_norm": 0.3248699903488159, "learning_rate": 0.00018450025401182697, "loss": 11.6596, "step": 25764 }, { "epoch": 0.5393326634848865, "grad_norm": 0.27644822001457214, "learning_rate": 0.00018449908150761527, "loss": 11.667, "step": 25765 }, { "epoch": 0.5393535962488487, "grad_norm": 0.28321000933647156, "learning_rate": 0.00018449790896278318, "loss": 11.6746, "step": 25766 }, { "epoch": 0.5393745290128108, "grad_norm": 0.2724992334842682, "learning_rate": 0.00018449673637733124, "loss": 11.6714, "step": 25767 }, { "epoch": 0.539395461776773, "grad_norm": 0.30603501200675964, "learning_rate": 0.00018449556375126002, "loss": 11.674, "step": 25768 }, { "epoch": 0.5394163945407352, "grad_norm": 0.2734210789203644, "learning_rate": 0.00018449439108457014, "loss": 11.6698, "step": 25769 }, { "epoch": 0.5394373273046973, "grad_norm": 0.4044936001300812, "learning_rate": 0.00018449321837726208, "loss": 11.6632, "step": 25770 }, { "epoch": 0.5394582600686595, "grad_norm": 0.25863003730773926, "learning_rate": 0.00018449204562933643, "loss": 11.6708, "step": 25771 }, { "epoch": 0.5394791928326216, "grad_norm": 0.27389055490493774, "learning_rate": 0.00018449087284079375, "loss": 11.6753, "step": 25772 }, { "epoch": 0.5395001255965838, "grad_norm": 0.38143521547317505, "learning_rate": 0.00018448970001163462, "loss": 11.6827, "step": 25773 }, { "epoch": 0.539521058360546, "grad_norm": 0.32784345746040344, "learning_rate": 0.00018448852714185964, "loss": 11.6728, "step": 25774 }, { "epoch": 0.5395419911245081, "grad_norm": 0.2722649872303009, "learning_rate": 0.00018448735423146927, "loss": 11.6435, "step": 25775 }, { "epoch": 0.5395629238884703, "grad_norm": 0.32428696751594543, "learning_rate": 0.00018448618128046417, "loss": 11.6648, "step": 25776 }, { "epoch": 0.5395838566524324, "grad_norm": 0.29586929082870483, "learning_rate": 0.00018448500828884486, "loss": 11.6702, "step": 25777 }, { "epoch": 0.5396047894163946, "grad_norm": 0.31626516580581665, "learning_rate": 0.00018448383525661194, "loss": 11.6743, "step": 25778 }, { "epoch": 0.5396257221803566, "grad_norm": 0.27724358439445496, "learning_rate": 0.0001844826621837659, "loss": 11.6718, "step": 25779 }, { "epoch": 0.5396466549443188, "grad_norm": 0.36012235283851624, "learning_rate": 0.00018448148907030738, "loss": 11.6774, "step": 25780 }, { "epoch": 0.539667587708281, "grad_norm": 0.29111021757125854, "learning_rate": 0.00018448031591623688, "loss": 11.6721, "step": 25781 }, { "epoch": 0.5396885204722431, "grad_norm": 0.3577266335487366, "learning_rate": 0.00018447914272155504, "loss": 11.6755, "step": 25782 }, { "epoch": 0.5397094532362053, "grad_norm": 0.2527194619178772, "learning_rate": 0.0001844779694862624, "loss": 11.6589, "step": 25783 }, { "epoch": 0.5397303860001674, "grad_norm": 0.3329451382160187, "learning_rate": 0.00018447679621035946, "loss": 11.6745, "step": 25784 }, { "epoch": 0.5397513187641296, "grad_norm": 0.23006005585193634, "learning_rate": 0.00018447562289384685, "loss": 11.6639, "step": 25785 }, { "epoch": 0.5397722515280917, "grad_norm": 0.26387661695480347, "learning_rate": 0.00018447444953672513, "loss": 11.6762, "step": 25786 }, { "epoch": 0.5397931842920539, "grad_norm": 0.31645500659942627, "learning_rate": 0.00018447327613899483, "loss": 11.6632, "step": 25787 }, { "epoch": 0.5398141170560161, "grad_norm": 0.2586257755756378, "learning_rate": 0.00018447210270065652, "loss": 11.6654, "step": 25788 }, { "epoch": 0.5398350498199782, "grad_norm": 0.261018842458725, "learning_rate": 0.0001844709292217108, "loss": 11.6617, "step": 25789 }, { "epoch": 0.5398559825839404, "grad_norm": 0.251221626996994, "learning_rate": 0.00018446975570215822, "loss": 11.6595, "step": 25790 }, { "epoch": 0.5398769153479025, "grad_norm": 0.2591214179992676, "learning_rate": 0.00018446858214199932, "loss": 11.6835, "step": 25791 }, { "epoch": 0.5398978481118647, "grad_norm": 0.3370347321033478, "learning_rate": 0.0001844674085412347, "loss": 11.6729, "step": 25792 }, { "epoch": 0.5399187808758269, "grad_norm": 0.28841906785964966, "learning_rate": 0.00018446623489986488, "loss": 11.6763, "step": 25793 }, { "epoch": 0.539939713639789, "grad_norm": 0.39652276039123535, "learning_rate": 0.00018446506121789043, "loss": 11.675, "step": 25794 }, { "epoch": 0.5399606464037512, "grad_norm": 0.2706964313983917, "learning_rate": 0.00018446388749531197, "loss": 11.6599, "step": 25795 }, { "epoch": 0.5399815791677133, "grad_norm": 0.3193054795265198, "learning_rate": 0.00018446271373213002, "loss": 11.6616, "step": 25796 }, { "epoch": 0.5400025119316755, "grad_norm": 0.26979926228523254, "learning_rate": 0.00018446153992834514, "loss": 11.6657, "step": 25797 }, { "epoch": 0.5400234446956376, "grad_norm": 0.31808850169181824, "learning_rate": 0.0001844603660839579, "loss": 11.6979, "step": 25798 }, { "epoch": 0.5400443774595998, "grad_norm": 0.28782519698143005, "learning_rate": 0.0001844591921989689, "loss": 11.6751, "step": 25799 }, { "epoch": 0.540065310223562, "grad_norm": 0.25230053067207336, "learning_rate": 0.00018445801827337866, "loss": 11.6661, "step": 25800 }, { "epoch": 0.5400862429875241, "grad_norm": 0.2886456549167633, "learning_rate": 0.00018445684430718772, "loss": 11.6659, "step": 25801 }, { "epoch": 0.5401071757514863, "grad_norm": 0.3121275007724762, "learning_rate": 0.00018445567030039674, "loss": 11.6651, "step": 25802 }, { "epoch": 0.5401281085154483, "grad_norm": 0.25466829538345337, "learning_rate": 0.00018445449625300618, "loss": 11.6447, "step": 25803 }, { "epoch": 0.5401490412794105, "grad_norm": 0.32264864444732666, "learning_rate": 0.0001844533221650167, "loss": 11.6783, "step": 25804 }, { "epoch": 0.5401699740433726, "grad_norm": 0.43658891320228577, "learning_rate": 0.00018445214803642875, "loss": 11.6671, "step": 25805 }, { "epoch": 0.5401909068073348, "grad_norm": 0.29435867071151733, "learning_rate": 0.00018445097386724302, "loss": 11.6825, "step": 25806 }, { "epoch": 0.540211839571297, "grad_norm": 0.3463539481163025, "learning_rate": 0.00018444979965745998, "loss": 11.681, "step": 25807 }, { "epoch": 0.5402327723352591, "grad_norm": 0.38136735558509827, "learning_rate": 0.00018444862540708024, "loss": 11.6782, "step": 25808 }, { "epoch": 0.5402537050992213, "grad_norm": 0.3326592445373535, "learning_rate": 0.00018444745111610437, "loss": 11.6774, "step": 25809 }, { "epoch": 0.5402746378631834, "grad_norm": 0.2729112505912781, "learning_rate": 0.00018444627678453288, "loss": 11.6818, "step": 25810 }, { "epoch": 0.5402955706271456, "grad_norm": 0.3036350905895233, "learning_rate": 0.00018444510241236642, "loss": 11.6826, "step": 25811 }, { "epoch": 0.5403165033911078, "grad_norm": 0.26238152384757996, "learning_rate": 0.00018444392799960547, "loss": 11.6938, "step": 25812 }, { "epoch": 0.5403374361550699, "grad_norm": 0.29187339544296265, "learning_rate": 0.00018444275354625065, "loss": 11.6791, "step": 25813 }, { "epoch": 0.5403583689190321, "grad_norm": 0.28303825855255127, "learning_rate": 0.0001844415790523025, "loss": 11.6607, "step": 25814 }, { "epoch": 0.5403793016829942, "grad_norm": 0.2862120568752289, "learning_rate": 0.0001844404045177616, "loss": 11.6645, "step": 25815 }, { "epoch": 0.5404002344469564, "grad_norm": 0.25424671173095703, "learning_rate": 0.00018443922994262847, "loss": 11.6619, "step": 25816 }, { "epoch": 0.5404211672109185, "grad_norm": 0.35316193103790283, "learning_rate": 0.00018443805532690374, "loss": 11.6772, "step": 25817 }, { "epoch": 0.5404420999748807, "grad_norm": 0.2461153268814087, "learning_rate": 0.00018443688067058792, "loss": 11.6521, "step": 25818 }, { "epoch": 0.5404630327388429, "grad_norm": 0.24643035233020782, "learning_rate": 0.00018443570597368166, "loss": 11.6728, "step": 25819 }, { "epoch": 0.540483965502805, "grad_norm": 0.30376043915748596, "learning_rate": 0.0001844345312361854, "loss": 11.669, "step": 25820 }, { "epoch": 0.5405048982667672, "grad_norm": 0.3441794812679291, "learning_rate": 0.0001844333564580998, "loss": 11.6782, "step": 25821 }, { "epoch": 0.5405258310307293, "grad_norm": 0.3081671893596649, "learning_rate": 0.0001844321816394254, "loss": 11.6545, "step": 25822 }, { "epoch": 0.5405467637946915, "grad_norm": 0.30638977885246277, "learning_rate": 0.00018443100678016273, "loss": 11.6735, "step": 25823 }, { "epoch": 0.5405676965586536, "grad_norm": 0.26028311252593994, "learning_rate": 0.0001844298318803124, "loss": 11.6726, "step": 25824 }, { "epoch": 0.5405886293226158, "grad_norm": 0.3271696865558624, "learning_rate": 0.00018442865693987494, "loss": 11.6347, "step": 25825 }, { "epoch": 0.540609562086578, "grad_norm": 0.3281325697898865, "learning_rate": 0.00018442748195885095, "loss": 11.6712, "step": 25826 }, { "epoch": 0.54063049485054, "grad_norm": 0.2755133807659149, "learning_rate": 0.00018442630693724096, "loss": 11.6669, "step": 25827 }, { "epoch": 0.5406514276145022, "grad_norm": 0.2698047161102295, "learning_rate": 0.00018442513187504558, "loss": 11.6702, "step": 25828 }, { "epoch": 0.5406723603784643, "grad_norm": 0.23754295706748962, "learning_rate": 0.00018442395677226533, "loss": 11.6578, "step": 25829 }, { "epoch": 0.5406932931424265, "grad_norm": 0.33013716340065, "learning_rate": 0.00018442278162890082, "loss": 11.6853, "step": 25830 }, { "epoch": 0.5407142259063887, "grad_norm": 0.3148740828037262, "learning_rate": 0.0001844216064449525, "loss": 11.6697, "step": 25831 }, { "epoch": 0.5407351586703508, "grad_norm": 0.34921300411224365, "learning_rate": 0.00018442043122042113, "loss": 11.682, "step": 25832 }, { "epoch": 0.540756091434313, "grad_norm": 0.25334104895591736, "learning_rate": 0.00018441925595530712, "loss": 11.6647, "step": 25833 }, { "epoch": 0.5407770241982751, "grad_norm": 0.2697570323944092, "learning_rate": 0.0001844180806496111, "loss": 11.6715, "step": 25834 }, { "epoch": 0.5407979569622373, "grad_norm": 0.29892098903656006, "learning_rate": 0.00018441690530333356, "loss": 11.6814, "step": 25835 }, { "epoch": 0.5408188897261994, "grad_norm": 0.30586186051368713, "learning_rate": 0.00018441572991647516, "loss": 11.6837, "step": 25836 }, { "epoch": 0.5408398224901616, "grad_norm": 0.8717705011367798, "learning_rate": 0.00018441455448903646, "loss": 11.6734, "step": 25837 }, { "epoch": 0.5408607552541238, "grad_norm": 0.37146979570388794, "learning_rate": 0.00018441337902101794, "loss": 11.6839, "step": 25838 }, { "epoch": 0.5408816880180859, "grad_norm": 0.3263317942619324, "learning_rate": 0.00018441220351242022, "loss": 11.6724, "step": 25839 }, { "epoch": 0.5409026207820481, "grad_norm": 0.259446382522583, "learning_rate": 0.0001844110279632439, "loss": 11.6804, "step": 25840 }, { "epoch": 0.5409235535460102, "grad_norm": 0.3105660676956177, "learning_rate": 0.0001844098523734895, "loss": 11.679, "step": 25841 }, { "epoch": 0.5409444863099724, "grad_norm": 0.28832679986953735, "learning_rate": 0.00018440867674315755, "loss": 11.6681, "step": 25842 }, { "epoch": 0.5409654190739345, "grad_norm": 0.24369923770427704, "learning_rate": 0.0001844075010722487, "loss": 11.6756, "step": 25843 }, { "epoch": 0.5409863518378967, "grad_norm": 0.2845815420150757, "learning_rate": 0.00018440632536076347, "loss": 11.6698, "step": 25844 }, { "epoch": 0.5410072846018589, "grad_norm": 0.22498998045921326, "learning_rate": 0.00018440514960870244, "loss": 11.6749, "step": 25845 }, { "epoch": 0.541028217365821, "grad_norm": 0.32603171467781067, "learning_rate": 0.00018440397381606612, "loss": 11.6717, "step": 25846 }, { "epoch": 0.5410491501297832, "grad_norm": 0.2738935649394989, "learning_rate": 0.00018440279798285513, "loss": 11.6602, "step": 25847 }, { "epoch": 0.5410700828937453, "grad_norm": 0.28389227390289307, "learning_rate": 0.00018440162210907004, "loss": 11.672, "step": 25848 }, { "epoch": 0.5410910156577075, "grad_norm": 0.2968733608722687, "learning_rate": 0.0001844004461947114, "loss": 11.6741, "step": 25849 }, { "epoch": 0.5411119484216695, "grad_norm": 0.25375452637672424, "learning_rate": 0.00018439927023977974, "loss": 11.6616, "step": 25850 }, { "epoch": 0.5411328811856317, "grad_norm": 0.26862016320228577, "learning_rate": 0.0001843980942442757, "loss": 11.6622, "step": 25851 }, { "epoch": 0.5411538139495939, "grad_norm": 0.32652610540390015, "learning_rate": 0.0001843969182081998, "loss": 11.6724, "step": 25852 }, { "epoch": 0.541174746713556, "grad_norm": 0.2825455963611603, "learning_rate": 0.0001843957421315526, "loss": 11.6775, "step": 25853 }, { "epoch": 0.5411956794775182, "grad_norm": 0.2963312268257141, "learning_rate": 0.00018439456601433466, "loss": 11.6747, "step": 25854 }, { "epoch": 0.5412166122414803, "grad_norm": 0.2523096203804016, "learning_rate": 0.0001843933898565466, "loss": 11.6843, "step": 25855 }, { "epoch": 0.5412375450054425, "grad_norm": 0.25611335039138794, "learning_rate": 0.0001843922136581889, "loss": 11.6628, "step": 25856 }, { "epoch": 0.5412584777694047, "grad_norm": 0.2951684296131134, "learning_rate": 0.0001843910374192622, "loss": 11.6758, "step": 25857 }, { "epoch": 0.5412794105333668, "grad_norm": 0.30924925208091736, "learning_rate": 0.00018438986113976705, "loss": 11.6713, "step": 25858 }, { "epoch": 0.541300343297329, "grad_norm": 0.3116002380847931, "learning_rate": 0.00018438868481970397, "loss": 11.6657, "step": 25859 }, { "epoch": 0.5413212760612911, "grad_norm": 0.2983955144882202, "learning_rate": 0.0001843875084590736, "loss": 11.6572, "step": 25860 }, { "epoch": 0.5413422088252533, "grad_norm": 0.23935478925704956, "learning_rate": 0.0001843863320578764, "loss": 11.6635, "step": 25861 }, { "epoch": 0.5413631415892154, "grad_norm": 0.28455376625061035, "learning_rate": 0.00018438515561611306, "loss": 11.6614, "step": 25862 }, { "epoch": 0.5413840743531776, "grad_norm": 0.3233641982078552, "learning_rate": 0.00018438397913378402, "loss": 11.6711, "step": 25863 }, { "epoch": 0.5414050071171398, "grad_norm": 0.30509257316589355, "learning_rate": 0.00018438280261089, "loss": 11.6851, "step": 25864 }, { "epoch": 0.5414259398811019, "grad_norm": 0.24999721348285675, "learning_rate": 0.0001843816260474314, "loss": 11.6518, "step": 25865 }, { "epoch": 0.5414468726450641, "grad_norm": 0.32737836241722107, "learning_rate": 0.0001843804494434089, "loss": 11.6755, "step": 25866 }, { "epoch": 0.5414678054090262, "grad_norm": 0.2957056164741516, "learning_rate": 0.00018437927279882298, "loss": 11.6787, "step": 25867 }, { "epoch": 0.5414887381729884, "grad_norm": 0.28996607661247253, "learning_rate": 0.0001843780961136743, "loss": 11.6891, "step": 25868 }, { "epoch": 0.5415096709369505, "grad_norm": 0.23740838468074799, "learning_rate": 0.00018437691938796337, "loss": 11.6758, "step": 25869 }, { "epoch": 0.5415306037009127, "grad_norm": 0.323479562997818, "learning_rate": 0.00018437574262169075, "loss": 11.6529, "step": 25870 }, { "epoch": 0.5415515364648749, "grad_norm": 0.2530403137207031, "learning_rate": 0.00018437456581485702, "loss": 11.6789, "step": 25871 }, { "epoch": 0.541572469228837, "grad_norm": 0.28302979469299316, "learning_rate": 0.00018437338896746278, "loss": 11.6834, "step": 25872 }, { "epoch": 0.5415934019927992, "grad_norm": 0.35748201608657837, "learning_rate": 0.00018437221207950853, "loss": 11.6723, "step": 25873 }, { "epoch": 0.5416143347567612, "grad_norm": 0.27758607268333435, "learning_rate": 0.00018437103515099484, "loss": 11.6606, "step": 25874 }, { "epoch": 0.5416352675207234, "grad_norm": 0.26585009694099426, "learning_rate": 0.00018436985818192234, "loss": 11.6586, "step": 25875 }, { "epoch": 0.5416562002846856, "grad_norm": 0.2681289315223694, "learning_rate": 0.00018436868117229155, "loss": 11.6947, "step": 25876 }, { "epoch": 0.5416771330486477, "grad_norm": 0.2683393359184265, "learning_rate": 0.00018436750412210306, "loss": 11.6797, "step": 25877 }, { "epoch": 0.5416980658126099, "grad_norm": 0.4321465790271759, "learning_rate": 0.00018436632703135741, "loss": 11.6767, "step": 25878 }, { "epoch": 0.541718998576572, "grad_norm": 0.30870968103408813, "learning_rate": 0.00018436514990005515, "loss": 11.6681, "step": 25879 }, { "epoch": 0.5417399313405342, "grad_norm": 0.28432080149650574, "learning_rate": 0.0001843639727281969, "loss": 11.6655, "step": 25880 }, { "epoch": 0.5417608641044963, "grad_norm": 0.284943163394928, "learning_rate": 0.00018436279551578322, "loss": 11.6857, "step": 25881 }, { "epoch": 0.5417817968684585, "grad_norm": 0.3882642984390259, "learning_rate": 0.0001843616182628146, "loss": 11.6832, "step": 25882 }, { "epoch": 0.5418027296324207, "grad_norm": 0.3426520824432373, "learning_rate": 0.00018436044096929168, "loss": 11.6713, "step": 25883 }, { "epoch": 0.5418236623963828, "grad_norm": 0.28149282932281494, "learning_rate": 0.00018435926363521502, "loss": 11.6509, "step": 25884 }, { "epoch": 0.541844595160345, "grad_norm": 0.29822149872779846, "learning_rate": 0.00018435808626058515, "loss": 11.672, "step": 25885 }, { "epoch": 0.5418655279243071, "grad_norm": 0.24866069853305817, "learning_rate": 0.00018435690884540269, "loss": 11.6589, "step": 25886 }, { "epoch": 0.5418864606882693, "grad_norm": 0.2888043522834778, "learning_rate": 0.00018435573138966813, "loss": 11.6378, "step": 25887 }, { "epoch": 0.5419073934522314, "grad_norm": 0.2687506079673767, "learning_rate": 0.0001843545538933821, "loss": 11.6587, "step": 25888 }, { "epoch": 0.5419283262161936, "grad_norm": 0.29413434863090515, "learning_rate": 0.00018435337635654516, "loss": 11.6748, "step": 25889 }, { "epoch": 0.5419492589801558, "grad_norm": 0.2996118664741516, "learning_rate": 0.00018435219877915783, "loss": 11.6716, "step": 25890 }, { "epoch": 0.5419701917441179, "grad_norm": 0.3209231495857239, "learning_rate": 0.00018435102116122075, "loss": 11.6854, "step": 25891 }, { "epoch": 0.5419911245080801, "grad_norm": 0.3374370038509369, "learning_rate": 0.00018434984350273442, "loss": 11.6667, "step": 25892 }, { "epoch": 0.5420120572720422, "grad_norm": 0.32781562209129333, "learning_rate": 0.00018434866580369942, "loss": 11.6692, "step": 25893 }, { "epoch": 0.5420329900360044, "grad_norm": 0.3025377690792084, "learning_rate": 0.00018434748806411632, "loss": 11.6795, "step": 25894 }, { "epoch": 0.5420539227999666, "grad_norm": 0.32814109325408936, "learning_rate": 0.00018434631028398573, "loss": 11.6868, "step": 25895 }, { "epoch": 0.5420748555639286, "grad_norm": 0.3016091585159302, "learning_rate": 0.00018434513246330813, "loss": 11.6639, "step": 25896 }, { "epoch": 0.5420957883278908, "grad_norm": 0.2794775664806366, "learning_rate": 0.00018434395460208418, "loss": 11.6806, "step": 25897 }, { "epoch": 0.5421167210918529, "grad_norm": 0.23507721722126007, "learning_rate": 0.00018434277670031438, "loss": 11.6781, "step": 25898 }, { "epoch": 0.5421376538558151, "grad_norm": 0.3703329563140869, "learning_rate": 0.00018434159875799932, "loss": 11.6795, "step": 25899 }, { "epoch": 0.5421585866197772, "grad_norm": 0.2557564079761505, "learning_rate": 0.00018434042077513954, "loss": 11.6762, "step": 25900 }, { "epoch": 0.5421795193837394, "grad_norm": 0.2861482799053192, "learning_rate": 0.00018433924275173568, "loss": 11.6839, "step": 25901 }, { "epoch": 0.5422004521477016, "grad_norm": 0.29484620690345764, "learning_rate": 0.00018433806468778823, "loss": 11.6595, "step": 25902 }, { "epoch": 0.5422213849116637, "grad_norm": 0.35580283403396606, "learning_rate": 0.00018433688658329778, "loss": 11.6783, "step": 25903 }, { "epoch": 0.5422423176756259, "grad_norm": 0.3520984351634979, "learning_rate": 0.00018433570843826488, "loss": 11.674, "step": 25904 }, { "epoch": 0.542263250439588, "grad_norm": 0.3334648907184601, "learning_rate": 0.00018433453025269012, "loss": 11.6772, "step": 25905 }, { "epoch": 0.5422841832035502, "grad_norm": 0.3366181254386902, "learning_rate": 0.00018433335202657408, "loss": 11.6781, "step": 25906 }, { "epoch": 0.5423051159675123, "grad_norm": 0.2759036421775818, "learning_rate": 0.00018433217375991729, "loss": 11.6656, "step": 25907 }, { "epoch": 0.5423260487314745, "grad_norm": 0.31100142002105713, "learning_rate": 0.00018433099545272035, "loss": 11.6735, "step": 25908 }, { "epoch": 0.5423469814954367, "grad_norm": 0.26636847853660583, "learning_rate": 0.00018432981710498383, "loss": 11.6682, "step": 25909 }, { "epoch": 0.5423679142593988, "grad_norm": 0.29625949263572693, "learning_rate": 0.00018432863871670825, "loss": 11.6794, "step": 25910 }, { "epoch": 0.542388847023361, "grad_norm": 0.3024981617927551, "learning_rate": 0.0001843274602878942, "loss": 11.6718, "step": 25911 }, { "epoch": 0.5424097797873231, "grad_norm": 0.23977187275886536, "learning_rate": 0.00018432628181854223, "loss": 11.6546, "step": 25912 }, { "epoch": 0.5424307125512853, "grad_norm": 0.22924409806728363, "learning_rate": 0.00018432510330865297, "loss": 11.6764, "step": 25913 }, { "epoch": 0.5424516453152475, "grad_norm": 0.3952126204967499, "learning_rate": 0.00018432392475822693, "loss": 11.6868, "step": 25914 }, { "epoch": 0.5424725780792096, "grad_norm": 0.2600565254688263, "learning_rate": 0.00018432274616726467, "loss": 11.6704, "step": 25915 }, { "epoch": 0.5424935108431718, "grad_norm": 0.2950589954853058, "learning_rate": 0.0001843215675357668, "loss": 11.6523, "step": 25916 }, { "epoch": 0.5425144436071339, "grad_norm": 0.31612229347229004, "learning_rate": 0.00018432038886373385, "loss": 11.6859, "step": 25917 }, { "epoch": 0.5425353763710961, "grad_norm": 0.27357426285743713, "learning_rate": 0.00018431921015116642, "loss": 11.6579, "step": 25918 }, { "epoch": 0.5425563091350581, "grad_norm": 0.3584679365158081, "learning_rate": 0.00018431803139806502, "loss": 11.6698, "step": 25919 }, { "epoch": 0.5425772418990203, "grad_norm": 0.3067648410797119, "learning_rate": 0.00018431685260443027, "loss": 11.6704, "step": 25920 }, { "epoch": 0.5425981746629825, "grad_norm": 0.39841070771217346, "learning_rate": 0.00018431567377026273, "loss": 11.6696, "step": 25921 }, { "epoch": 0.5426191074269446, "grad_norm": 0.3365519344806671, "learning_rate": 0.00018431449489556294, "loss": 11.6804, "step": 25922 }, { "epoch": 0.5426400401909068, "grad_norm": 0.32505741715431213, "learning_rate": 0.0001843133159803315, "loss": 11.6859, "step": 25923 }, { "epoch": 0.5426609729548689, "grad_norm": 0.3120305836200714, "learning_rate": 0.00018431213702456894, "loss": 11.6656, "step": 25924 }, { "epoch": 0.5426819057188311, "grad_norm": 0.2786639630794525, "learning_rate": 0.00018431095802827585, "loss": 11.6695, "step": 25925 }, { "epoch": 0.5427028384827932, "grad_norm": 0.25548452138900757, "learning_rate": 0.00018430977899145282, "loss": 11.6607, "step": 25926 }, { "epoch": 0.5427237712467554, "grad_norm": 0.22011321783065796, "learning_rate": 0.00018430859991410036, "loss": 11.664, "step": 25927 }, { "epoch": 0.5427447040107176, "grad_norm": 0.29148268699645996, "learning_rate": 0.00018430742079621909, "loss": 11.674, "step": 25928 }, { "epoch": 0.5427656367746797, "grad_norm": 0.2979965806007385, "learning_rate": 0.00018430624163780953, "loss": 11.6647, "step": 25929 }, { "epoch": 0.5427865695386419, "grad_norm": 0.2698937654495239, "learning_rate": 0.00018430506243887225, "loss": 11.6767, "step": 25930 }, { "epoch": 0.542807502302604, "grad_norm": 0.27792149782180786, "learning_rate": 0.00018430388319940787, "loss": 11.6751, "step": 25931 }, { "epoch": 0.5428284350665662, "grad_norm": 0.29210028052330017, "learning_rate": 0.0001843027039194169, "loss": 11.6654, "step": 25932 }, { "epoch": 0.5428493678305284, "grad_norm": 0.29921698570251465, "learning_rate": 0.0001843015245989, "loss": 11.6679, "step": 25933 }, { "epoch": 0.5428703005944905, "grad_norm": 0.3082283139228821, "learning_rate": 0.0001843003452378576, "loss": 11.6727, "step": 25934 }, { "epoch": 0.5428912333584527, "grad_norm": 0.33889052271842957, "learning_rate": 0.00018429916583629036, "loss": 11.6642, "step": 25935 }, { "epoch": 0.5429121661224148, "grad_norm": 0.2875783443450928, "learning_rate": 0.00018429798639419878, "loss": 11.6727, "step": 25936 }, { "epoch": 0.542933098886377, "grad_norm": 0.33440670371055603, "learning_rate": 0.0001842968069115835, "loss": 11.681, "step": 25937 }, { "epoch": 0.5429540316503391, "grad_norm": 0.2905103266239166, "learning_rate": 0.00018429562738844505, "loss": 11.6856, "step": 25938 }, { "epoch": 0.5429749644143013, "grad_norm": 0.29320642352104187, "learning_rate": 0.00018429444782478403, "loss": 11.6918, "step": 25939 }, { "epoch": 0.5429958971782635, "grad_norm": 0.3508433699607849, "learning_rate": 0.00018429326822060095, "loss": 11.668, "step": 25940 }, { "epoch": 0.5430168299422256, "grad_norm": 0.37137678265571594, "learning_rate": 0.00018429208857589642, "loss": 11.6773, "step": 25941 }, { "epoch": 0.5430377627061878, "grad_norm": 0.30387258529663086, "learning_rate": 0.000184290908890671, "loss": 11.6746, "step": 25942 }, { "epoch": 0.5430586954701498, "grad_norm": 0.3099752962589264, "learning_rate": 0.00018428972916492521, "loss": 11.6615, "step": 25943 }, { "epoch": 0.543079628234112, "grad_norm": 0.2612558901309967, "learning_rate": 0.00018428854939865969, "loss": 11.6623, "step": 25944 }, { "epoch": 0.5431005609980741, "grad_norm": 0.39859917759895325, "learning_rate": 0.00018428736959187497, "loss": 11.6697, "step": 25945 }, { "epoch": 0.5431214937620363, "grad_norm": 0.3006862998008728, "learning_rate": 0.00018428618974457165, "loss": 11.6754, "step": 25946 }, { "epoch": 0.5431424265259985, "grad_norm": 0.25799596309661865, "learning_rate": 0.00018428500985675022, "loss": 11.6782, "step": 25947 }, { "epoch": 0.5431633592899606, "grad_norm": 0.22505562007427216, "learning_rate": 0.00018428382992841135, "loss": 11.6649, "step": 25948 }, { "epoch": 0.5431842920539228, "grad_norm": 0.29073572158813477, "learning_rate": 0.00018428264995955552, "loss": 11.6617, "step": 25949 }, { "epoch": 0.5432052248178849, "grad_norm": 0.3035567104816437, "learning_rate": 0.00018428146995018332, "loss": 11.6743, "step": 25950 }, { "epoch": 0.5432261575818471, "grad_norm": 0.26532554626464844, "learning_rate": 0.00018428028990029536, "loss": 11.6669, "step": 25951 }, { "epoch": 0.5432470903458093, "grad_norm": 0.25145623087882996, "learning_rate": 0.00018427910980989217, "loss": 11.6847, "step": 25952 }, { "epoch": 0.5432680231097714, "grad_norm": 0.313301146030426, "learning_rate": 0.0001842779296789743, "loss": 11.6705, "step": 25953 }, { "epoch": 0.5432889558737336, "grad_norm": 0.2575717270374298, "learning_rate": 0.00018427674950754236, "loss": 11.6747, "step": 25954 }, { "epoch": 0.5433098886376957, "grad_norm": 0.3148384094238281, "learning_rate": 0.00018427556929559688, "loss": 11.6696, "step": 25955 }, { "epoch": 0.5433308214016579, "grad_norm": 0.32681766152381897, "learning_rate": 0.00018427438904313848, "loss": 11.6512, "step": 25956 }, { "epoch": 0.54335175416562, "grad_norm": 0.30693280696868896, "learning_rate": 0.00018427320875016764, "loss": 11.6599, "step": 25957 }, { "epoch": 0.5433726869295822, "grad_norm": 0.325449675321579, "learning_rate": 0.000184272028416685, "loss": 11.6677, "step": 25958 }, { "epoch": 0.5433936196935444, "grad_norm": 0.28898438811302185, "learning_rate": 0.00018427084804269113, "loss": 11.6578, "step": 25959 }, { "epoch": 0.5434145524575065, "grad_norm": 0.2627980709075928, "learning_rate": 0.00018426966762818655, "loss": 11.6578, "step": 25960 }, { "epoch": 0.5434354852214687, "grad_norm": 0.27545252442359924, "learning_rate": 0.00018426848717317186, "loss": 11.6594, "step": 25961 }, { "epoch": 0.5434564179854308, "grad_norm": 0.23757871985435486, "learning_rate": 0.00018426730667764764, "loss": 11.6671, "step": 25962 }, { "epoch": 0.543477350749393, "grad_norm": 0.31189024448394775, "learning_rate": 0.0001842661261416144, "loss": 11.6637, "step": 25963 }, { "epoch": 0.543498283513355, "grad_norm": 0.2703566551208496, "learning_rate": 0.00018426494556507276, "loss": 11.6685, "step": 25964 }, { "epoch": 0.5435192162773173, "grad_norm": 0.3367827534675598, "learning_rate": 0.0001842637649480233, "loss": 11.6655, "step": 25965 }, { "epoch": 0.5435401490412795, "grad_norm": 0.31762149930000305, "learning_rate": 0.00018426258429046652, "loss": 11.6804, "step": 25966 }, { "epoch": 0.5435610818052415, "grad_norm": 0.6451979875564575, "learning_rate": 0.00018426140359240306, "loss": 11.7124, "step": 25967 }, { "epoch": 0.5435820145692037, "grad_norm": 0.3630269765853882, "learning_rate": 0.00018426022285383342, "loss": 11.655, "step": 25968 }, { "epoch": 0.5436029473331658, "grad_norm": 0.26817819476127625, "learning_rate": 0.00018425904207475821, "loss": 11.6674, "step": 25969 }, { "epoch": 0.543623880097128, "grad_norm": 0.3234717845916748, "learning_rate": 0.00018425786125517802, "loss": 11.6723, "step": 25970 }, { "epoch": 0.5436448128610902, "grad_norm": 0.2826860547065735, "learning_rate": 0.00018425668039509337, "loss": 11.6658, "step": 25971 }, { "epoch": 0.5436657456250523, "grad_norm": 0.27260822057724, "learning_rate": 0.00018425549949450482, "loss": 11.6725, "step": 25972 }, { "epoch": 0.5436866783890145, "grad_norm": 0.3101502060890198, "learning_rate": 0.000184254318553413, "loss": 11.663, "step": 25973 }, { "epoch": 0.5437076111529766, "grad_norm": 0.2708800733089447, "learning_rate": 0.0001842531375718184, "loss": 11.6595, "step": 25974 }, { "epoch": 0.5437285439169388, "grad_norm": 0.32391759753227234, "learning_rate": 0.00018425195654972167, "loss": 11.668, "step": 25975 }, { "epoch": 0.5437494766809009, "grad_norm": 0.25591152906417847, "learning_rate": 0.0001842507754871233, "loss": 11.6571, "step": 25976 }, { "epoch": 0.5437704094448631, "grad_norm": 0.2886851131916046, "learning_rate": 0.00018424959438402395, "loss": 11.6787, "step": 25977 }, { "epoch": 0.5437913422088253, "grad_norm": 0.34284961223602295, "learning_rate": 0.00018424841324042409, "loss": 11.6866, "step": 25978 }, { "epoch": 0.5438122749727874, "grad_norm": 0.30108052492141724, "learning_rate": 0.0001842472320563243, "loss": 11.6794, "step": 25979 }, { "epoch": 0.5438332077367496, "grad_norm": 0.34152284264564514, "learning_rate": 0.00018424605083172525, "loss": 11.6802, "step": 25980 }, { "epoch": 0.5438541405007117, "grad_norm": 0.2573799192905426, "learning_rate": 0.00018424486956662736, "loss": 11.6767, "step": 25981 }, { "epoch": 0.5438750732646739, "grad_norm": 0.2838132083415985, "learning_rate": 0.00018424368826103132, "loss": 11.6462, "step": 25982 }, { "epoch": 0.543896006028636, "grad_norm": 0.26633960008621216, "learning_rate": 0.00018424250691493764, "loss": 11.6675, "step": 25983 }, { "epoch": 0.5439169387925982, "grad_norm": 0.300449937582016, "learning_rate": 0.0001842413255283469, "loss": 11.6673, "step": 25984 }, { "epoch": 0.5439378715565604, "grad_norm": 0.3733481764793396, "learning_rate": 0.00018424014410125964, "loss": 11.6691, "step": 25985 }, { "epoch": 0.5439588043205225, "grad_norm": 0.2729385495185852, "learning_rate": 0.0001842389626336765, "loss": 11.6809, "step": 25986 }, { "epoch": 0.5439797370844847, "grad_norm": 0.26882117986679077, "learning_rate": 0.00018423778112559798, "loss": 11.6861, "step": 25987 }, { "epoch": 0.5440006698484467, "grad_norm": 0.28051406145095825, "learning_rate": 0.00018423659957702467, "loss": 11.6635, "step": 25988 }, { "epoch": 0.544021602612409, "grad_norm": 0.2825401723384857, "learning_rate": 0.00018423541798795717, "loss": 11.6602, "step": 25989 }, { "epoch": 0.5440425353763712, "grad_norm": 0.3001134991645813, "learning_rate": 0.00018423423635839596, "loss": 11.6949, "step": 25990 }, { "epoch": 0.5440634681403332, "grad_norm": 0.25415709614753723, "learning_rate": 0.0001842330546883417, "loss": 11.663, "step": 25991 }, { "epoch": 0.5440844009042954, "grad_norm": 0.26015040278434753, "learning_rate": 0.00018423187297779493, "loss": 11.6707, "step": 25992 }, { "epoch": 0.5441053336682575, "grad_norm": 0.2821017801761627, "learning_rate": 0.0001842306912267562, "loss": 11.6696, "step": 25993 }, { "epoch": 0.5441262664322197, "grad_norm": 0.24920162558555603, "learning_rate": 0.00018422950943522605, "loss": 11.6681, "step": 25994 }, { "epoch": 0.5441471991961818, "grad_norm": 0.29159364104270935, "learning_rate": 0.00018422832760320513, "loss": 11.6689, "step": 25995 }, { "epoch": 0.544168131960144, "grad_norm": 0.3364122807979584, "learning_rate": 0.00018422714573069395, "loss": 11.6586, "step": 25996 }, { "epoch": 0.5441890647241062, "grad_norm": 0.3020472526550293, "learning_rate": 0.0001842259638176931, "loss": 11.6838, "step": 25997 }, { "epoch": 0.5442099974880683, "grad_norm": 0.25331446528434753, "learning_rate": 0.00018422478186420316, "loss": 11.6799, "step": 25998 }, { "epoch": 0.5442309302520305, "grad_norm": 0.23433835804462433, "learning_rate": 0.00018422359987022463, "loss": 11.6677, "step": 25999 }, { "epoch": 0.5442518630159926, "grad_norm": 0.34316423535346985, "learning_rate": 0.00018422241783575815, "loss": 11.6755, "step": 26000 }, { "epoch": 0.5442518630159926, "eval_loss": 11.671845436096191, "eval_runtime": 34.3088, "eval_samples_per_second": 28.01, "eval_steps_per_second": 7.024, "step": 26000 }, { "epoch": 0.5442727957799548, "grad_norm": 0.2934809625148773, "learning_rate": 0.00018422123576080428, "loss": 11.6617, "step": 26001 }, { "epoch": 0.5442937285439169, "grad_norm": 0.3045943081378937, "learning_rate": 0.0001842200536453636, "loss": 11.668, "step": 26002 }, { "epoch": 0.5443146613078791, "grad_norm": 0.2743947505950928, "learning_rate": 0.0001842188714894366, "loss": 11.6679, "step": 26003 }, { "epoch": 0.5443355940718413, "grad_norm": 0.3802218735218048, "learning_rate": 0.00018421768929302392, "loss": 11.6867, "step": 26004 }, { "epoch": 0.5443565268358034, "grad_norm": 0.2857484519481659, "learning_rate": 0.0001842165070561261, "loss": 11.6679, "step": 26005 }, { "epoch": 0.5443774595997656, "grad_norm": 0.3020669221878052, "learning_rate": 0.00018421532477874375, "loss": 11.6916, "step": 26006 }, { "epoch": 0.5443983923637277, "grad_norm": 0.28700900077819824, "learning_rate": 0.00018421414246087739, "loss": 11.6728, "step": 26007 }, { "epoch": 0.5444193251276899, "grad_norm": 0.3396211266517639, "learning_rate": 0.00018421296010252757, "loss": 11.6674, "step": 26008 }, { "epoch": 0.5444402578916521, "grad_norm": 0.35100826621055603, "learning_rate": 0.00018421177770369492, "loss": 11.6695, "step": 26009 }, { "epoch": 0.5444611906556142, "grad_norm": 0.3006744980812073, "learning_rate": 0.00018421059526437997, "loss": 11.6474, "step": 26010 }, { "epoch": 0.5444821234195764, "grad_norm": 0.23311300575733185, "learning_rate": 0.0001842094127845833, "loss": 11.6643, "step": 26011 }, { "epoch": 0.5445030561835384, "grad_norm": 0.23497943580150604, "learning_rate": 0.0001842082302643055, "loss": 11.6636, "step": 26012 }, { "epoch": 0.5445239889475006, "grad_norm": 0.26519104838371277, "learning_rate": 0.00018420704770354712, "loss": 11.6854, "step": 26013 }, { "epoch": 0.5445449217114627, "grad_norm": 0.3501355051994324, "learning_rate": 0.0001842058651023087, "loss": 11.6704, "step": 26014 }, { "epoch": 0.5445658544754249, "grad_norm": 0.3157718777656555, "learning_rate": 0.00018420468246059085, "loss": 11.6604, "step": 26015 }, { "epoch": 0.5445867872393871, "grad_norm": 0.2015811800956726, "learning_rate": 0.0001842034997783941, "loss": 11.6749, "step": 26016 }, { "epoch": 0.5446077200033492, "grad_norm": 0.3553204834461212, "learning_rate": 0.00018420231705571904, "loss": 11.6718, "step": 26017 }, { "epoch": 0.5446286527673114, "grad_norm": 0.25783827900886536, "learning_rate": 0.00018420113429256624, "loss": 11.6708, "step": 26018 }, { "epoch": 0.5446495855312735, "grad_norm": 0.3088877201080322, "learning_rate": 0.00018419995148893632, "loss": 11.6624, "step": 26019 }, { "epoch": 0.5446705182952357, "grad_norm": 0.30803006887435913, "learning_rate": 0.00018419876864482975, "loss": 11.6745, "step": 26020 }, { "epoch": 0.5446914510591978, "grad_norm": 0.2584839463233948, "learning_rate": 0.00018419758576024712, "loss": 11.6699, "step": 26021 }, { "epoch": 0.54471238382316, "grad_norm": 0.28805744647979736, "learning_rate": 0.00018419640283518906, "loss": 11.6833, "step": 26022 }, { "epoch": 0.5447333165871222, "grad_norm": 0.23263880610466003, "learning_rate": 0.0001841952198696561, "loss": 11.6715, "step": 26023 }, { "epoch": 0.5447542493510843, "grad_norm": 0.280094712972641, "learning_rate": 0.0001841940368636488, "loss": 11.6831, "step": 26024 }, { "epoch": 0.5447751821150465, "grad_norm": 0.26162901520729065, "learning_rate": 0.00018419285381716773, "loss": 11.6767, "step": 26025 }, { "epoch": 0.5447961148790086, "grad_norm": 0.32460635900497437, "learning_rate": 0.0001841916707302135, "loss": 11.6711, "step": 26026 }, { "epoch": 0.5448170476429708, "grad_norm": 0.27018702030181885, "learning_rate": 0.00018419048760278664, "loss": 11.6733, "step": 26027 }, { "epoch": 0.544837980406933, "grad_norm": 0.24954698979854584, "learning_rate": 0.0001841893044348877, "loss": 11.6728, "step": 26028 }, { "epoch": 0.5448589131708951, "grad_norm": 0.30996865034103394, "learning_rate": 0.0001841881212265173, "loss": 11.6778, "step": 26029 }, { "epoch": 0.5448798459348573, "grad_norm": 0.3103797137737274, "learning_rate": 0.00018418693797767595, "loss": 11.6681, "step": 26030 }, { "epoch": 0.5449007786988194, "grad_norm": 0.20504513382911682, "learning_rate": 0.0001841857546883643, "loss": 11.67, "step": 26031 }, { "epoch": 0.5449217114627816, "grad_norm": 0.2752222716808319, "learning_rate": 0.0001841845713585828, "loss": 11.6595, "step": 26032 }, { "epoch": 0.5449426442267437, "grad_norm": 0.29933634400367737, "learning_rate": 0.00018418338798833217, "loss": 11.6791, "step": 26033 }, { "epoch": 0.5449635769907059, "grad_norm": 0.2995665669441223, "learning_rate": 0.00018418220457761285, "loss": 11.6622, "step": 26034 }, { "epoch": 0.5449845097546681, "grad_norm": 0.2510753870010376, "learning_rate": 0.00018418102112642548, "loss": 11.6639, "step": 26035 }, { "epoch": 0.5450054425186301, "grad_norm": 0.25886332988739014, "learning_rate": 0.00018417983763477057, "loss": 11.6675, "step": 26036 }, { "epoch": 0.5450263752825923, "grad_norm": 0.4391723871231079, "learning_rate": 0.00018417865410264876, "loss": 11.6811, "step": 26037 }, { "epoch": 0.5450473080465544, "grad_norm": 0.38037440180778503, "learning_rate": 0.00018417747053006057, "loss": 11.6628, "step": 26038 }, { "epoch": 0.5450682408105166, "grad_norm": 0.3729800283908844, "learning_rate": 0.0001841762869170066, "loss": 11.6787, "step": 26039 }, { "epoch": 0.5450891735744787, "grad_norm": 0.28997141122817993, "learning_rate": 0.00018417510326348738, "loss": 11.6794, "step": 26040 }, { "epoch": 0.5451101063384409, "grad_norm": 0.3150600492954254, "learning_rate": 0.00018417391956950348, "loss": 11.6678, "step": 26041 }, { "epoch": 0.5451310391024031, "grad_norm": 0.3393687605857849, "learning_rate": 0.00018417273583505554, "loss": 11.6674, "step": 26042 }, { "epoch": 0.5451519718663652, "grad_norm": 0.26444101333618164, "learning_rate": 0.00018417155206014408, "loss": 11.6612, "step": 26043 }, { "epoch": 0.5451729046303274, "grad_norm": 0.2967131435871124, "learning_rate": 0.00018417036824476963, "loss": 11.6774, "step": 26044 }, { "epoch": 0.5451938373942895, "grad_norm": 0.27077898383140564, "learning_rate": 0.00018416918438893284, "loss": 11.6665, "step": 26045 }, { "epoch": 0.5452147701582517, "grad_norm": 0.32093796133995056, "learning_rate": 0.0001841680004926342, "loss": 11.6758, "step": 26046 }, { "epoch": 0.5452357029222138, "grad_norm": 0.3114672303199768, "learning_rate": 0.0001841668165558743, "loss": 11.6939, "step": 26047 }, { "epoch": 0.545256635686176, "grad_norm": 0.2871645987033844, "learning_rate": 0.00018416563257865376, "loss": 11.676, "step": 26048 }, { "epoch": 0.5452775684501382, "grad_norm": 0.3473276197910309, "learning_rate": 0.0001841644485609731, "loss": 11.6744, "step": 26049 }, { "epoch": 0.5452985012141003, "grad_norm": 0.34063631296157837, "learning_rate": 0.00018416326450283293, "loss": 11.6876, "step": 26050 }, { "epoch": 0.5453194339780625, "grad_norm": 0.2686416506767273, "learning_rate": 0.00018416208040423376, "loss": 11.6812, "step": 26051 }, { "epoch": 0.5453403667420246, "grad_norm": 0.27591273188591003, "learning_rate": 0.0001841608962651762, "loss": 11.6517, "step": 26052 }, { "epoch": 0.5453612995059868, "grad_norm": 0.29576268792152405, "learning_rate": 0.00018415971208566082, "loss": 11.6565, "step": 26053 }, { "epoch": 0.545382232269949, "grad_norm": 0.25845038890838623, "learning_rate": 0.00018415852786568814, "loss": 11.6765, "step": 26054 }, { "epoch": 0.5454031650339111, "grad_norm": 0.26568302512168884, "learning_rate": 0.00018415734360525883, "loss": 11.6789, "step": 26055 }, { "epoch": 0.5454240977978733, "grad_norm": 0.30774742364883423, "learning_rate": 0.00018415615930437337, "loss": 11.663, "step": 26056 }, { "epoch": 0.5454450305618354, "grad_norm": 0.30024656653404236, "learning_rate": 0.00018415497496303233, "loss": 11.6724, "step": 26057 }, { "epoch": 0.5454659633257976, "grad_norm": 0.36432498693466187, "learning_rate": 0.00018415379058123635, "loss": 11.6608, "step": 26058 }, { "epoch": 0.5454868960897596, "grad_norm": 0.35965701937675476, "learning_rate": 0.00018415260615898591, "loss": 11.679, "step": 26059 }, { "epoch": 0.5455078288537218, "grad_norm": 0.2728275954723358, "learning_rate": 0.00018415142169628167, "loss": 11.6623, "step": 26060 }, { "epoch": 0.545528761617684, "grad_norm": 0.35668376088142395, "learning_rate": 0.00018415023719312414, "loss": 11.6644, "step": 26061 }, { "epoch": 0.5455496943816461, "grad_norm": 0.26003891229629517, "learning_rate": 0.0001841490526495139, "loss": 11.6659, "step": 26062 }, { "epoch": 0.5455706271456083, "grad_norm": 0.25324350595474243, "learning_rate": 0.00018414786806545154, "loss": 11.6836, "step": 26063 }, { "epoch": 0.5455915599095704, "grad_norm": 0.2576283812522888, "learning_rate": 0.0001841466834409376, "loss": 11.684, "step": 26064 }, { "epoch": 0.5456124926735326, "grad_norm": 0.2744978666305542, "learning_rate": 0.00018414549877597268, "loss": 11.6856, "step": 26065 }, { "epoch": 0.5456334254374947, "grad_norm": 0.2689286172389984, "learning_rate": 0.0001841443140705573, "loss": 11.6812, "step": 26066 }, { "epoch": 0.5456543582014569, "grad_norm": 0.23986075818538666, "learning_rate": 0.00018414312932469207, "loss": 11.6746, "step": 26067 }, { "epoch": 0.5456752909654191, "grad_norm": 0.3081878423690796, "learning_rate": 0.00018414194453837758, "loss": 11.6792, "step": 26068 }, { "epoch": 0.5456962237293812, "grad_norm": 0.23758216202259064, "learning_rate": 0.00018414075971161435, "loss": 11.6799, "step": 26069 }, { "epoch": 0.5457171564933434, "grad_norm": 0.39817750453948975, "learning_rate": 0.00018413957484440297, "loss": 11.6801, "step": 26070 }, { "epoch": 0.5457380892573055, "grad_norm": 0.26509031653404236, "learning_rate": 0.000184138389936744, "loss": 11.6745, "step": 26071 }, { "epoch": 0.5457590220212677, "grad_norm": 0.3517608940601349, "learning_rate": 0.00018413720498863803, "loss": 11.6834, "step": 26072 }, { "epoch": 0.5457799547852299, "grad_norm": 0.26465803384780884, "learning_rate": 0.00018413602000008563, "loss": 11.6643, "step": 26073 }, { "epoch": 0.545800887549192, "grad_norm": 0.2934763729572296, "learning_rate": 0.00018413483497108736, "loss": 11.6693, "step": 26074 }, { "epoch": 0.5458218203131542, "grad_norm": 0.26159021258354187, "learning_rate": 0.00018413364990164377, "loss": 11.6742, "step": 26075 }, { "epoch": 0.5458427530771163, "grad_norm": 0.26497766375541687, "learning_rate": 0.00018413246479175545, "loss": 11.6627, "step": 26076 }, { "epoch": 0.5458636858410785, "grad_norm": 0.2784833312034607, "learning_rate": 0.00018413127964142298, "loss": 11.6527, "step": 26077 }, { "epoch": 0.5458846186050406, "grad_norm": 0.2930874824523926, "learning_rate": 0.00018413009445064692, "loss": 11.6697, "step": 26078 }, { "epoch": 0.5459055513690028, "grad_norm": 0.28983327746391296, "learning_rate": 0.00018412890921942784, "loss": 11.672, "step": 26079 }, { "epoch": 0.545926484132965, "grad_norm": 0.2762857675552368, "learning_rate": 0.00018412772394776628, "loss": 11.6701, "step": 26080 }, { "epoch": 0.545947416896927, "grad_norm": 0.34898918867111206, "learning_rate": 0.00018412653863566285, "loss": 11.6838, "step": 26081 }, { "epoch": 0.5459683496608893, "grad_norm": 0.2651333212852478, "learning_rate": 0.00018412535328311814, "loss": 11.6762, "step": 26082 }, { "epoch": 0.5459892824248513, "grad_norm": 0.27721309661865234, "learning_rate": 0.00018412416789013265, "loss": 11.6779, "step": 26083 }, { "epoch": 0.5460102151888135, "grad_norm": 0.24608436226844788, "learning_rate": 0.000184122982456707, "loss": 11.6693, "step": 26084 }, { "epoch": 0.5460311479527756, "grad_norm": 0.3754827380180359, "learning_rate": 0.00018412179698284176, "loss": 11.6788, "step": 26085 }, { "epoch": 0.5460520807167378, "grad_norm": 0.3449872136116028, "learning_rate": 0.00018412061146853742, "loss": 11.6583, "step": 26086 }, { "epoch": 0.5460730134807, "grad_norm": 0.33219093084335327, "learning_rate": 0.0001841194259137947, "loss": 11.6755, "step": 26087 }, { "epoch": 0.5460939462446621, "grad_norm": 0.3151235282421112, "learning_rate": 0.00018411824031861404, "loss": 11.6762, "step": 26088 }, { "epoch": 0.5461148790086243, "grad_norm": 0.24432478845119476, "learning_rate": 0.00018411705468299606, "loss": 11.666, "step": 26089 }, { "epoch": 0.5461358117725864, "grad_norm": 0.3442799746990204, "learning_rate": 0.00018411586900694133, "loss": 11.673, "step": 26090 }, { "epoch": 0.5461567445365486, "grad_norm": 0.3610014319419861, "learning_rate": 0.00018411468329045043, "loss": 11.6678, "step": 26091 }, { "epoch": 0.5461776773005108, "grad_norm": 0.2857697010040283, "learning_rate": 0.00018411349753352392, "loss": 11.6728, "step": 26092 }, { "epoch": 0.5461986100644729, "grad_norm": 0.2612634599208832, "learning_rate": 0.00018411231173616236, "loss": 11.6596, "step": 26093 }, { "epoch": 0.5462195428284351, "grad_norm": 0.2995182275772095, "learning_rate": 0.0001841111258983663, "loss": 11.662, "step": 26094 }, { "epoch": 0.5462404755923972, "grad_norm": 0.239580899477005, "learning_rate": 0.00018410994002013634, "loss": 11.6792, "step": 26095 }, { "epoch": 0.5462614083563594, "grad_norm": 0.24588067829608917, "learning_rate": 0.00018410875410147307, "loss": 11.6676, "step": 26096 }, { "epoch": 0.5462823411203215, "grad_norm": 0.2457045316696167, "learning_rate": 0.00018410756814237703, "loss": 11.6696, "step": 26097 }, { "epoch": 0.5463032738842837, "grad_norm": 0.3657357692718506, "learning_rate": 0.00018410638214284882, "loss": 11.6753, "step": 26098 }, { "epoch": 0.5463242066482459, "grad_norm": 0.3859509527683258, "learning_rate": 0.00018410519610288895, "loss": 11.6813, "step": 26099 }, { "epoch": 0.546345139412208, "grad_norm": 0.2898116707801819, "learning_rate": 0.00018410401002249803, "loss": 11.6679, "step": 26100 }, { "epoch": 0.5463660721761702, "grad_norm": 0.3198479115962982, "learning_rate": 0.00018410282390167663, "loss": 11.6811, "step": 26101 }, { "epoch": 0.5463870049401323, "grad_norm": 0.2655990421772003, "learning_rate": 0.00018410163774042532, "loss": 11.6647, "step": 26102 }, { "epoch": 0.5464079377040945, "grad_norm": 0.34934884309768677, "learning_rate": 0.00018410045153874466, "loss": 11.6748, "step": 26103 }, { "epoch": 0.5464288704680565, "grad_norm": 0.28276675939559937, "learning_rate": 0.00018409926529663526, "loss": 11.682, "step": 26104 }, { "epoch": 0.5464498032320187, "grad_norm": 0.2585603892803192, "learning_rate": 0.00018409807901409764, "loss": 11.6559, "step": 26105 }, { "epoch": 0.546470735995981, "grad_norm": 0.24001289904117584, "learning_rate": 0.00018409689269113235, "loss": 11.6526, "step": 26106 }, { "epoch": 0.546491668759943, "grad_norm": 0.3147658407688141, "learning_rate": 0.00018409570632774004, "loss": 11.6765, "step": 26107 }, { "epoch": 0.5465126015239052, "grad_norm": 0.26684805750846863, "learning_rate": 0.00018409451992392123, "loss": 11.69, "step": 26108 }, { "epoch": 0.5465335342878673, "grad_norm": 0.44824814796447754, "learning_rate": 0.0001840933334796765, "loss": 11.6814, "step": 26109 }, { "epoch": 0.5465544670518295, "grad_norm": 0.26898422837257385, "learning_rate": 0.0001840921469950064, "loss": 11.6791, "step": 26110 }, { "epoch": 0.5465753998157917, "grad_norm": 0.2796024680137634, "learning_rate": 0.00018409096046991157, "loss": 11.6816, "step": 26111 }, { "epoch": 0.5465963325797538, "grad_norm": 0.2330855131149292, "learning_rate": 0.0001840897739043925, "loss": 11.6578, "step": 26112 }, { "epoch": 0.546617265343716, "grad_norm": 0.2470100224018097, "learning_rate": 0.00018408858729844978, "loss": 11.6528, "step": 26113 }, { "epoch": 0.5466381981076781, "grad_norm": 0.31349024176597595, "learning_rate": 0.00018408740065208398, "loss": 11.6708, "step": 26114 }, { "epoch": 0.5466591308716403, "grad_norm": 0.3123510777950287, "learning_rate": 0.0001840862139652957, "loss": 11.6783, "step": 26115 }, { "epoch": 0.5466800636356024, "grad_norm": 0.3586021363735199, "learning_rate": 0.0001840850272380855, "loss": 11.6836, "step": 26116 }, { "epoch": 0.5467009963995646, "grad_norm": 0.33406367897987366, "learning_rate": 0.00018408384047045393, "loss": 11.6773, "step": 26117 }, { "epoch": 0.5467219291635268, "grad_norm": 0.2469012588262558, "learning_rate": 0.0001840826536624016, "loss": 11.6634, "step": 26118 }, { "epoch": 0.5467428619274889, "grad_norm": 0.2585996091365814, "learning_rate": 0.000184081466813929, "loss": 11.662, "step": 26119 }, { "epoch": 0.5467637946914511, "grad_norm": 0.30309900641441345, "learning_rate": 0.00018408027992503678, "loss": 11.6568, "step": 26120 }, { "epoch": 0.5467847274554132, "grad_norm": 0.2925240397453308, "learning_rate": 0.0001840790929957255, "loss": 11.6618, "step": 26121 }, { "epoch": 0.5468056602193754, "grad_norm": 0.2935863733291626, "learning_rate": 0.0001840779060259957, "loss": 11.6691, "step": 26122 }, { "epoch": 0.5468265929833375, "grad_norm": 0.2413579374551773, "learning_rate": 0.00018407671901584798, "loss": 11.6714, "step": 26123 }, { "epoch": 0.5468475257472997, "grad_norm": 0.2556232213973999, "learning_rate": 0.00018407553196528287, "loss": 11.6714, "step": 26124 }, { "epoch": 0.5468684585112619, "grad_norm": 0.35646891593933105, "learning_rate": 0.00018407434487430096, "loss": 11.6724, "step": 26125 }, { "epoch": 0.546889391275224, "grad_norm": 0.31821897625923157, "learning_rate": 0.00018407315774290287, "loss": 11.6713, "step": 26126 }, { "epoch": 0.5469103240391862, "grad_norm": 0.21525177359580994, "learning_rate": 0.0001840719705710891, "loss": 11.6483, "step": 26127 }, { "epoch": 0.5469312568031482, "grad_norm": 0.25752177834510803, "learning_rate": 0.00018407078335886027, "loss": 11.6614, "step": 26128 }, { "epoch": 0.5469521895671104, "grad_norm": 0.3658374845981598, "learning_rate": 0.00018406959610621693, "loss": 11.6805, "step": 26129 }, { "epoch": 0.5469731223310726, "grad_norm": 0.2851191759109497, "learning_rate": 0.00018406840881315964, "loss": 11.6777, "step": 26130 }, { "epoch": 0.5469940550950347, "grad_norm": 0.2903883159160614, "learning_rate": 0.00018406722147968897, "loss": 11.657, "step": 26131 }, { "epoch": 0.5470149878589969, "grad_norm": 0.2868196666240692, "learning_rate": 0.00018406603410580548, "loss": 11.6807, "step": 26132 }, { "epoch": 0.547035920622959, "grad_norm": 0.29603511095046997, "learning_rate": 0.0001840648466915098, "loss": 11.6708, "step": 26133 }, { "epoch": 0.5470568533869212, "grad_norm": 0.2723204493522644, "learning_rate": 0.00018406365923680248, "loss": 11.6636, "step": 26134 }, { "epoch": 0.5470777861508833, "grad_norm": 0.31432485580444336, "learning_rate": 0.00018406247174168405, "loss": 11.6851, "step": 26135 }, { "epoch": 0.5470987189148455, "grad_norm": 0.3266178071498871, "learning_rate": 0.0001840612842061551, "loss": 11.6753, "step": 26136 }, { "epoch": 0.5471196516788077, "grad_norm": 0.39235004782676697, "learning_rate": 0.00018406009663021622, "loss": 11.6722, "step": 26137 }, { "epoch": 0.5471405844427698, "grad_norm": 0.3774566948413849, "learning_rate": 0.00018405890901386794, "loss": 11.6845, "step": 26138 }, { "epoch": 0.547161517206732, "grad_norm": 0.26797881722450256, "learning_rate": 0.0001840577213571109, "loss": 11.6806, "step": 26139 }, { "epoch": 0.5471824499706941, "grad_norm": 0.3440778851509094, "learning_rate": 0.0001840565336599456, "loss": 11.6807, "step": 26140 }, { "epoch": 0.5472033827346563, "grad_norm": 0.3289637565612793, "learning_rate": 0.00018405534592237264, "loss": 11.686, "step": 26141 }, { "epoch": 0.5472243154986184, "grad_norm": 0.38769346475601196, "learning_rate": 0.00018405415814439258, "loss": 11.6742, "step": 26142 }, { "epoch": 0.5472452482625806, "grad_norm": 0.23521257936954498, "learning_rate": 0.00018405297032600605, "loss": 11.6708, "step": 26143 }, { "epoch": 0.5472661810265428, "grad_norm": 0.25385820865631104, "learning_rate": 0.00018405178246721352, "loss": 11.6571, "step": 26144 }, { "epoch": 0.5472871137905049, "grad_norm": 0.33033451437950134, "learning_rate": 0.00018405059456801565, "loss": 11.6801, "step": 26145 }, { "epoch": 0.5473080465544671, "grad_norm": 0.31600791215896606, "learning_rate": 0.00018404940662841295, "loss": 11.6749, "step": 26146 }, { "epoch": 0.5473289793184292, "grad_norm": 0.2967366576194763, "learning_rate": 0.00018404821864840602, "loss": 11.6705, "step": 26147 }, { "epoch": 0.5473499120823914, "grad_norm": 0.283474326133728, "learning_rate": 0.00018404703062799543, "loss": 11.692, "step": 26148 }, { "epoch": 0.5473708448463536, "grad_norm": 0.2713222801685333, "learning_rate": 0.00018404584256718175, "loss": 11.6645, "step": 26149 }, { "epoch": 0.5473917776103157, "grad_norm": 0.3190922737121582, "learning_rate": 0.00018404465446596554, "loss": 11.6913, "step": 26150 }, { "epoch": 0.5474127103742779, "grad_norm": 0.40494224429130554, "learning_rate": 0.0001840434663243474, "loss": 11.6513, "step": 26151 }, { "epoch": 0.54743364313824, "grad_norm": 0.2878423035144806, "learning_rate": 0.0001840422781423279, "loss": 11.6582, "step": 26152 }, { "epoch": 0.5474545759022021, "grad_norm": 0.2931733727455139, "learning_rate": 0.00018404108991990753, "loss": 11.6738, "step": 26153 }, { "epoch": 0.5474755086661642, "grad_norm": 0.38155844807624817, "learning_rate": 0.00018403990165708698, "loss": 11.6725, "step": 26154 }, { "epoch": 0.5474964414301264, "grad_norm": 0.29098862409591675, "learning_rate": 0.00018403871335386673, "loss": 11.6904, "step": 26155 }, { "epoch": 0.5475173741940886, "grad_norm": 0.24511897563934326, "learning_rate": 0.00018403752501024737, "loss": 11.6776, "step": 26156 }, { "epoch": 0.5475383069580507, "grad_norm": 0.3617683947086334, "learning_rate": 0.00018403633662622955, "loss": 11.6909, "step": 26157 }, { "epoch": 0.5475592397220129, "grad_norm": 0.32095101475715637, "learning_rate": 0.00018403514820181373, "loss": 11.6719, "step": 26158 }, { "epoch": 0.547580172485975, "grad_norm": 0.3132837116718292, "learning_rate": 0.00018403395973700054, "loss": 11.6674, "step": 26159 }, { "epoch": 0.5476011052499372, "grad_norm": 0.29279735684394836, "learning_rate": 0.00018403277123179055, "loss": 11.6436, "step": 26160 }, { "epoch": 0.5476220380138993, "grad_norm": 0.22959865629673004, "learning_rate": 0.00018403158268618435, "loss": 11.6815, "step": 26161 }, { "epoch": 0.5476429707778615, "grad_norm": 0.23345987498760223, "learning_rate": 0.00018403039410018243, "loss": 11.6669, "step": 26162 }, { "epoch": 0.5476639035418237, "grad_norm": 0.33999699354171753, "learning_rate": 0.00018402920547378544, "loss": 11.679, "step": 26163 }, { "epoch": 0.5476848363057858, "grad_norm": 0.3281401991844177, "learning_rate": 0.00018402801680699393, "loss": 11.6647, "step": 26164 }, { "epoch": 0.547705769069748, "grad_norm": 0.33053290843963623, "learning_rate": 0.00018402682809980845, "loss": 11.6784, "step": 26165 }, { "epoch": 0.5477267018337101, "grad_norm": 0.28592947125434875, "learning_rate": 0.00018402563935222962, "loss": 11.6698, "step": 26166 }, { "epoch": 0.5477476345976723, "grad_norm": 0.35951340198516846, "learning_rate": 0.00018402445056425797, "loss": 11.6631, "step": 26167 }, { "epoch": 0.5477685673616345, "grad_norm": 0.22587035596370697, "learning_rate": 0.0001840232617358941, "loss": 11.6722, "step": 26168 }, { "epoch": 0.5477895001255966, "grad_norm": 0.3059273064136505, "learning_rate": 0.0001840220728671385, "loss": 11.6854, "step": 26169 }, { "epoch": 0.5478104328895588, "grad_norm": 0.25853607058525085, "learning_rate": 0.00018402088395799186, "loss": 11.678, "step": 26170 }, { "epoch": 0.5478313656535209, "grad_norm": 0.37381216883659363, "learning_rate": 0.00018401969500845468, "loss": 11.6571, "step": 26171 }, { "epoch": 0.5478522984174831, "grad_norm": 0.3151167035102844, "learning_rate": 0.00018401850601852757, "loss": 11.6664, "step": 26172 }, { "epoch": 0.5478732311814452, "grad_norm": 0.33565595746040344, "learning_rate": 0.00018401731698821107, "loss": 11.6902, "step": 26173 }, { "epoch": 0.5478941639454074, "grad_norm": 0.295904278755188, "learning_rate": 0.00018401612791750575, "loss": 11.6697, "step": 26174 }, { "epoch": 0.5479150967093696, "grad_norm": 0.3355366289615631, "learning_rate": 0.0001840149388064122, "loss": 11.683, "step": 26175 }, { "epoch": 0.5479360294733316, "grad_norm": 0.3200601637363434, "learning_rate": 0.00018401374965493097, "loss": 11.6734, "step": 26176 }, { "epoch": 0.5479569622372938, "grad_norm": 0.32527074217796326, "learning_rate": 0.00018401256046306268, "loss": 11.6722, "step": 26177 }, { "epoch": 0.5479778950012559, "grad_norm": 0.24721811711788177, "learning_rate": 0.00018401137123080785, "loss": 11.6854, "step": 26178 }, { "epoch": 0.5479988277652181, "grad_norm": 0.25347498059272766, "learning_rate": 0.0001840101819581671, "loss": 11.6604, "step": 26179 }, { "epoch": 0.5480197605291802, "grad_norm": 0.2809109091758728, "learning_rate": 0.0001840089926451409, "loss": 11.6495, "step": 26180 }, { "epoch": 0.5480406932931424, "grad_norm": 0.3486112356185913, "learning_rate": 0.00018400780329172996, "loss": 11.682, "step": 26181 }, { "epoch": 0.5480616260571046, "grad_norm": 0.36164140701293945, "learning_rate": 0.00018400661389793475, "loss": 11.671, "step": 26182 }, { "epoch": 0.5480825588210667, "grad_norm": 0.2845306992530823, "learning_rate": 0.00018400542446375592, "loss": 11.6487, "step": 26183 }, { "epoch": 0.5481034915850289, "grad_norm": 0.24246448278427124, "learning_rate": 0.00018400423498919394, "loss": 11.6804, "step": 26184 }, { "epoch": 0.548124424348991, "grad_norm": 0.340618759393692, "learning_rate": 0.00018400304547424947, "loss": 11.6676, "step": 26185 }, { "epoch": 0.5481453571129532, "grad_norm": 0.29304391145706177, "learning_rate": 0.00018400185591892305, "loss": 11.6677, "step": 26186 }, { "epoch": 0.5481662898769154, "grad_norm": 0.29267024993896484, "learning_rate": 0.00018400066632321527, "loss": 11.6701, "step": 26187 }, { "epoch": 0.5481872226408775, "grad_norm": 0.2605409324169159, "learning_rate": 0.00018399947668712666, "loss": 11.6759, "step": 26188 }, { "epoch": 0.5482081554048397, "grad_norm": 0.3507464528083801, "learning_rate": 0.00018399828701065783, "loss": 11.683, "step": 26189 }, { "epoch": 0.5482290881688018, "grad_norm": 0.28281092643737793, "learning_rate": 0.00018399709729380933, "loss": 11.6517, "step": 26190 }, { "epoch": 0.548250020932764, "grad_norm": 0.2999822199344635, "learning_rate": 0.00018399590753658175, "loss": 11.6496, "step": 26191 }, { "epoch": 0.5482709536967261, "grad_norm": 0.3108527362346649, "learning_rate": 0.00018399471773897566, "loss": 11.6683, "step": 26192 }, { "epoch": 0.5482918864606883, "grad_norm": 0.2712324559688568, "learning_rate": 0.00018399352790099164, "loss": 11.6794, "step": 26193 }, { "epoch": 0.5483128192246505, "grad_norm": 0.2693663239479065, "learning_rate": 0.00018399233802263024, "loss": 11.6748, "step": 26194 }, { "epoch": 0.5483337519886126, "grad_norm": 0.25508275628089905, "learning_rate": 0.00018399114810389202, "loss": 11.6798, "step": 26195 }, { "epoch": 0.5483546847525748, "grad_norm": 0.31917887926101685, "learning_rate": 0.00018398995814477757, "loss": 11.6698, "step": 26196 }, { "epoch": 0.5483756175165369, "grad_norm": 0.3193477690219879, "learning_rate": 0.00018398876814528748, "loss": 11.6737, "step": 26197 }, { "epoch": 0.548396550280499, "grad_norm": 0.26517626643180847, "learning_rate": 0.0001839875781054223, "loss": 11.6624, "step": 26198 }, { "epoch": 0.5484174830444611, "grad_norm": 0.2824546694755554, "learning_rate": 0.00018398638802518262, "loss": 11.6518, "step": 26199 }, { "epoch": 0.5484384158084233, "grad_norm": 0.3353699743747711, "learning_rate": 0.000183985197904569, "loss": 11.6759, "step": 26200 }, { "epoch": 0.5484593485723855, "grad_norm": 0.2798606753349304, "learning_rate": 0.000183984007743582, "loss": 11.6711, "step": 26201 }, { "epoch": 0.5484802813363476, "grad_norm": 0.2596595585346222, "learning_rate": 0.0001839828175422222, "loss": 11.6651, "step": 26202 }, { "epoch": 0.5485012141003098, "grad_norm": 0.3354424834251404, "learning_rate": 0.00018398162730049017, "loss": 11.6734, "step": 26203 }, { "epoch": 0.5485221468642719, "grad_norm": 0.2294979840517044, "learning_rate": 0.00018398043701838652, "loss": 11.6637, "step": 26204 }, { "epoch": 0.5485430796282341, "grad_norm": 0.24421095848083496, "learning_rate": 0.00018397924669591177, "loss": 11.6673, "step": 26205 }, { "epoch": 0.5485640123921963, "grad_norm": 0.2705443203449249, "learning_rate": 0.0001839780563330665, "loss": 11.6783, "step": 26206 }, { "epoch": 0.5485849451561584, "grad_norm": 0.3789379596710205, "learning_rate": 0.00018397686592985132, "loss": 11.6618, "step": 26207 }, { "epoch": 0.5486058779201206, "grad_norm": 0.32401514053344727, "learning_rate": 0.0001839756754862668, "loss": 11.6711, "step": 26208 }, { "epoch": 0.5486268106840827, "grad_norm": 0.32710838317871094, "learning_rate": 0.00018397448500231345, "loss": 11.6799, "step": 26209 }, { "epoch": 0.5486477434480449, "grad_norm": 0.29738089442253113, "learning_rate": 0.00018397329447799189, "loss": 11.6678, "step": 26210 }, { "epoch": 0.548668676212007, "grad_norm": 0.2794443964958191, "learning_rate": 0.00018397210391330268, "loss": 11.6615, "step": 26211 }, { "epoch": 0.5486896089759692, "grad_norm": 0.2876773476600647, "learning_rate": 0.0001839709133082464, "loss": 11.6517, "step": 26212 }, { "epoch": 0.5487105417399314, "grad_norm": 0.28391170501708984, "learning_rate": 0.00018396972266282365, "loss": 11.6816, "step": 26213 }, { "epoch": 0.5487314745038935, "grad_norm": 0.2600516676902771, "learning_rate": 0.00018396853197703492, "loss": 11.6685, "step": 26214 }, { "epoch": 0.5487524072678557, "grad_norm": 0.3652957081794739, "learning_rate": 0.00018396734125088085, "loss": 11.6693, "step": 26215 }, { "epoch": 0.5487733400318178, "grad_norm": 0.25294625759124756, "learning_rate": 0.000183966150484362, "loss": 11.6804, "step": 26216 }, { "epoch": 0.54879427279578, "grad_norm": 0.31890371441841125, "learning_rate": 0.00018396495967747895, "loss": 11.6763, "step": 26217 }, { "epoch": 0.5488152055597421, "grad_norm": 0.34969526529312134, "learning_rate": 0.00018396376883023224, "loss": 11.6885, "step": 26218 }, { "epoch": 0.5488361383237043, "grad_norm": 0.2571519911289215, "learning_rate": 0.0001839625779426225, "loss": 11.6663, "step": 26219 }, { "epoch": 0.5488570710876665, "grad_norm": 0.2551591694355011, "learning_rate": 0.00018396138701465024, "loss": 11.6734, "step": 26220 }, { "epoch": 0.5488780038516285, "grad_norm": 0.2591935694217682, "learning_rate": 0.00018396019604631606, "loss": 11.6774, "step": 26221 }, { "epoch": 0.5488989366155907, "grad_norm": 0.2954100966453552, "learning_rate": 0.00018395900503762053, "loss": 11.676, "step": 26222 }, { "epoch": 0.5489198693795528, "grad_norm": 0.1924244463443756, "learning_rate": 0.00018395781398856422, "loss": 11.6732, "step": 26223 }, { "epoch": 0.548940802143515, "grad_norm": 0.26971596479415894, "learning_rate": 0.00018395662289914772, "loss": 11.6765, "step": 26224 }, { "epoch": 0.5489617349074771, "grad_norm": 0.2877468168735504, "learning_rate": 0.0001839554317693716, "loss": 11.6888, "step": 26225 }, { "epoch": 0.5489826676714393, "grad_norm": 0.3013884723186493, "learning_rate": 0.0001839542405992364, "loss": 11.6795, "step": 26226 }, { "epoch": 0.5490036004354015, "grad_norm": 0.2791503369808197, "learning_rate": 0.0001839530493887427, "loss": 11.6687, "step": 26227 }, { "epoch": 0.5490245331993636, "grad_norm": 0.3253696858882904, "learning_rate": 0.00018395185813789114, "loss": 11.6919, "step": 26228 }, { "epoch": 0.5490454659633258, "grad_norm": 0.28502023220062256, "learning_rate": 0.00018395066684668217, "loss": 11.6731, "step": 26229 }, { "epoch": 0.5490663987272879, "grad_norm": 0.3836531341075897, "learning_rate": 0.00018394947551511647, "loss": 11.6808, "step": 26230 }, { "epoch": 0.5490873314912501, "grad_norm": 0.4371167719364166, "learning_rate": 0.0001839482841431946, "loss": 11.6698, "step": 26231 }, { "epoch": 0.5491082642552123, "grad_norm": 0.30213335156440735, "learning_rate": 0.00018394709273091705, "loss": 11.6865, "step": 26232 }, { "epoch": 0.5491291970191744, "grad_norm": 0.27143603563308716, "learning_rate": 0.00018394590127828448, "loss": 11.67, "step": 26233 }, { "epoch": 0.5491501297831366, "grad_norm": 0.26089128851890564, "learning_rate": 0.00018394470978529744, "loss": 11.6696, "step": 26234 }, { "epoch": 0.5491710625470987, "grad_norm": 0.2767771780490875, "learning_rate": 0.00018394351825195648, "loss": 11.681, "step": 26235 }, { "epoch": 0.5491919953110609, "grad_norm": 0.2827813923358917, "learning_rate": 0.0001839423266782622, "loss": 11.6633, "step": 26236 }, { "epoch": 0.549212928075023, "grad_norm": 0.2635987102985382, "learning_rate": 0.00018394113506421515, "loss": 11.6586, "step": 26237 }, { "epoch": 0.5492338608389852, "grad_norm": 0.3132483959197998, "learning_rate": 0.00018393994340981594, "loss": 11.6679, "step": 26238 }, { "epoch": 0.5492547936029474, "grad_norm": 0.26728641986846924, "learning_rate": 0.00018393875171506509, "loss": 11.6704, "step": 26239 }, { "epoch": 0.5492757263669095, "grad_norm": 0.30891209840774536, "learning_rate": 0.00018393755997996324, "loss": 11.6649, "step": 26240 }, { "epoch": 0.5492966591308717, "grad_norm": 0.2713163197040558, "learning_rate": 0.00018393636820451085, "loss": 11.6699, "step": 26241 }, { "epoch": 0.5493175918948338, "grad_norm": 0.27580395340919495, "learning_rate": 0.00018393517638870863, "loss": 11.6664, "step": 26242 }, { "epoch": 0.549338524658796, "grad_norm": 0.33961230516433716, "learning_rate": 0.00018393398453255704, "loss": 11.6783, "step": 26243 }, { "epoch": 0.549359457422758, "grad_norm": 0.37796708941459656, "learning_rate": 0.00018393279263605674, "loss": 11.6747, "step": 26244 }, { "epoch": 0.5493803901867202, "grad_norm": 0.2492530643939972, "learning_rate": 0.00018393160069920826, "loss": 11.6741, "step": 26245 }, { "epoch": 0.5494013229506824, "grad_norm": 0.3001112937927246, "learning_rate": 0.00018393040872201216, "loss": 11.6771, "step": 26246 }, { "epoch": 0.5494222557146445, "grad_norm": 0.2570003271102905, "learning_rate": 0.00018392921670446906, "loss": 11.6671, "step": 26247 }, { "epoch": 0.5494431884786067, "grad_norm": 0.3414897322654724, "learning_rate": 0.00018392802464657946, "loss": 11.6556, "step": 26248 }, { "epoch": 0.5494641212425688, "grad_norm": 0.29793721437454224, "learning_rate": 0.000183926832548344, "loss": 11.6739, "step": 26249 }, { "epoch": 0.549485054006531, "grad_norm": 0.26474228501319885, "learning_rate": 0.00018392564040976326, "loss": 11.6717, "step": 26250 }, { "epoch": 0.5495059867704932, "grad_norm": 0.2772918939590454, "learning_rate": 0.00018392444823083772, "loss": 11.671, "step": 26251 }, { "epoch": 0.5495269195344553, "grad_norm": 0.30088791251182556, "learning_rate": 0.00018392325601156805, "loss": 11.6683, "step": 26252 }, { "epoch": 0.5495478522984175, "grad_norm": 0.29027190804481506, "learning_rate": 0.0001839220637519548, "loss": 11.6774, "step": 26253 }, { "epoch": 0.5495687850623796, "grad_norm": 0.24411365389823914, "learning_rate": 0.00018392087145199852, "loss": 11.6767, "step": 26254 }, { "epoch": 0.5495897178263418, "grad_norm": 0.323352187871933, "learning_rate": 0.00018391967911169982, "loss": 11.6706, "step": 26255 }, { "epoch": 0.5496106505903039, "grad_norm": 0.2992793023586273, "learning_rate": 0.00018391848673105917, "loss": 11.6787, "step": 26256 }, { "epoch": 0.5496315833542661, "grad_norm": 0.3480239510536194, "learning_rate": 0.00018391729431007728, "loss": 11.6844, "step": 26257 }, { "epoch": 0.5496525161182283, "grad_norm": 0.2750287652015686, "learning_rate": 0.00018391610184875467, "loss": 11.6681, "step": 26258 }, { "epoch": 0.5496734488821904, "grad_norm": 0.3087490200996399, "learning_rate": 0.0001839149093470919, "loss": 11.6711, "step": 26259 }, { "epoch": 0.5496943816461526, "grad_norm": 0.30448588728904724, "learning_rate": 0.00018391371680508955, "loss": 11.6828, "step": 26260 }, { "epoch": 0.5497153144101147, "grad_norm": 0.2733542025089264, "learning_rate": 0.00018391252422274822, "loss": 11.6587, "step": 26261 }, { "epoch": 0.5497362471740769, "grad_norm": 0.3454653322696686, "learning_rate": 0.0001839113316000684, "loss": 11.6871, "step": 26262 }, { "epoch": 0.549757179938039, "grad_norm": 0.3466168940067291, "learning_rate": 0.00018391013893705077, "loss": 11.6816, "step": 26263 }, { "epoch": 0.5497781127020012, "grad_norm": 0.2954496741294861, "learning_rate": 0.00018390894623369582, "loss": 11.6801, "step": 26264 }, { "epoch": 0.5497990454659634, "grad_norm": 0.2551589012145996, "learning_rate": 0.00018390775349000417, "loss": 11.6404, "step": 26265 }, { "epoch": 0.5498199782299255, "grad_norm": 0.24617734551429749, "learning_rate": 0.0001839065607059764, "loss": 11.6531, "step": 26266 }, { "epoch": 0.5498409109938877, "grad_norm": 0.3977920114994049, "learning_rate": 0.00018390536788161308, "loss": 11.6818, "step": 26267 }, { "epoch": 0.5498618437578497, "grad_norm": 0.23848064243793488, "learning_rate": 0.00018390417501691473, "loss": 11.6862, "step": 26268 }, { "epoch": 0.549882776521812, "grad_norm": 0.33281609416007996, "learning_rate": 0.00018390298211188197, "loss": 11.6816, "step": 26269 }, { "epoch": 0.5499037092857741, "grad_norm": 0.30908170342445374, "learning_rate": 0.00018390178916651535, "loss": 11.6778, "step": 26270 }, { "epoch": 0.5499246420497362, "grad_norm": 0.2529332935810089, "learning_rate": 0.0001839005961808155, "loss": 11.6774, "step": 26271 }, { "epoch": 0.5499455748136984, "grad_norm": 0.3342757821083069, "learning_rate": 0.00018389940315478292, "loss": 11.6803, "step": 26272 }, { "epoch": 0.5499665075776605, "grad_norm": 0.29658767580986023, "learning_rate": 0.0001838982100884182, "loss": 11.6565, "step": 26273 }, { "epoch": 0.5499874403416227, "grad_norm": 0.28184348344802856, "learning_rate": 0.00018389701698172197, "loss": 11.6628, "step": 26274 }, { "epoch": 0.5500083731055848, "grad_norm": 0.2742518484592438, "learning_rate": 0.00018389582383469475, "loss": 11.6758, "step": 26275 }, { "epoch": 0.550029305869547, "grad_norm": 0.3236338198184967, "learning_rate": 0.00018389463064733713, "loss": 11.6592, "step": 26276 }, { "epoch": 0.5500502386335092, "grad_norm": 0.2619255483150482, "learning_rate": 0.00018389343741964965, "loss": 11.6614, "step": 26277 }, { "epoch": 0.5500711713974713, "grad_norm": 0.2605412006378174, "learning_rate": 0.00018389224415163295, "loss": 11.6702, "step": 26278 }, { "epoch": 0.5500921041614335, "grad_norm": 0.3375876843929291, "learning_rate": 0.00018389105084328753, "loss": 11.6754, "step": 26279 }, { "epoch": 0.5501130369253956, "grad_norm": 0.26799800992012024, "learning_rate": 0.00018388985749461404, "loss": 11.6695, "step": 26280 }, { "epoch": 0.5501339696893578, "grad_norm": 0.29863202571868896, "learning_rate": 0.000183888664105613, "loss": 11.6617, "step": 26281 }, { "epoch": 0.5501549024533199, "grad_norm": 0.29588523507118225, "learning_rate": 0.00018388747067628498, "loss": 11.6891, "step": 26282 }, { "epoch": 0.5501758352172821, "grad_norm": 0.263995498418808, "learning_rate": 0.00018388627720663058, "loss": 11.6789, "step": 26283 }, { "epoch": 0.5501967679812443, "grad_norm": 0.36111921072006226, "learning_rate": 0.0001838850836966504, "loss": 11.6863, "step": 26284 }, { "epoch": 0.5502177007452064, "grad_norm": 0.28430163860321045, "learning_rate": 0.00018388389014634493, "loss": 11.6663, "step": 26285 }, { "epoch": 0.5502386335091686, "grad_norm": 0.35786202549934387, "learning_rate": 0.0001838826965557148, "loss": 11.6864, "step": 26286 }, { "epoch": 0.5502595662731307, "grad_norm": 0.28699466586112976, "learning_rate": 0.00018388150292476062, "loss": 11.6562, "step": 26287 }, { "epoch": 0.5502804990370929, "grad_norm": 0.38712742924690247, "learning_rate": 0.00018388030925348288, "loss": 11.6812, "step": 26288 }, { "epoch": 0.5503014318010551, "grad_norm": 0.4400807321071625, "learning_rate": 0.0001838791155418822, "loss": 11.6674, "step": 26289 }, { "epoch": 0.5503223645650172, "grad_norm": 0.30163145065307617, "learning_rate": 0.00018387792178995916, "loss": 11.667, "step": 26290 }, { "epoch": 0.5503432973289794, "grad_norm": 0.27078476548194885, "learning_rate": 0.0001838767279977143, "loss": 11.671, "step": 26291 }, { "epoch": 0.5503642300929414, "grad_norm": 0.29976531863212585, "learning_rate": 0.00018387553416514824, "loss": 11.6756, "step": 26292 }, { "epoch": 0.5503851628569036, "grad_norm": 0.31880882382392883, "learning_rate": 0.00018387434029226154, "loss": 11.679, "step": 26293 }, { "epoch": 0.5504060956208657, "grad_norm": 0.290351003408432, "learning_rate": 0.00018387314637905472, "loss": 11.6621, "step": 26294 }, { "epoch": 0.5504270283848279, "grad_norm": 0.27441340684890747, "learning_rate": 0.00018387195242552845, "loss": 11.6644, "step": 26295 }, { "epoch": 0.5504479611487901, "grad_norm": 0.3079245686531067, "learning_rate": 0.0001838707584316832, "loss": 11.6666, "step": 26296 }, { "epoch": 0.5504688939127522, "grad_norm": 0.2588837444782257, "learning_rate": 0.00018386956439751963, "loss": 11.6578, "step": 26297 }, { "epoch": 0.5504898266767144, "grad_norm": 0.2561776041984558, "learning_rate": 0.00018386837032303825, "loss": 11.669, "step": 26298 }, { "epoch": 0.5505107594406765, "grad_norm": 0.2383691817522049, "learning_rate": 0.0001838671762082397, "loss": 11.6669, "step": 26299 }, { "epoch": 0.5505316922046387, "grad_norm": 0.30760324001312256, "learning_rate": 0.00018386598205312452, "loss": 11.6723, "step": 26300 }, { "epoch": 0.5505526249686008, "grad_norm": 0.2953013479709625, "learning_rate": 0.00018386478785769323, "loss": 11.6688, "step": 26301 }, { "epoch": 0.550573557732563, "grad_norm": 0.2930692732334137, "learning_rate": 0.0001838635936219465, "loss": 11.6865, "step": 26302 }, { "epoch": 0.5505944904965252, "grad_norm": 0.26081666350364685, "learning_rate": 0.00018386239934588486, "loss": 11.6597, "step": 26303 }, { "epoch": 0.5506154232604873, "grad_norm": 0.3268873691558838, "learning_rate": 0.00018386120502950887, "loss": 11.6888, "step": 26304 }, { "epoch": 0.5506363560244495, "grad_norm": 0.30930280685424805, "learning_rate": 0.0001838600106728191, "loss": 11.6538, "step": 26305 }, { "epoch": 0.5506572887884116, "grad_norm": 0.26182791590690613, "learning_rate": 0.0001838588162758162, "loss": 11.6591, "step": 26306 }, { "epoch": 0.5506782215523738, "grad_norm": 0.23175056278705597, "learning_rate": 0.0001838576218385006, "loss": 11.66, "step": 26307 }, { "epoch": 0.550699154316336, "grad_norm": 0.2827959954738617, "learning_rate": 0.00018385642736087302, "loss": 11.6683, "step": 26308 }, { "epoch": 0.5507200870802981, "grad_norm": 0.3293205201625824, "learning_rate": 0.00018385523284293397, "loss": 11.6733, "step": 26309 }, { "epoch": 0.5507410198442603, "grad_norm": 0.35252687335014343, "learning_rate": 0.00018385403828468402, "loss": 11.6784, "step": 26310 }, { "epoch": 0.5507619526082224, "grad_norm": 0.319998562335968, "learning_rate": 0.00018385284368612376, "loss": 11.6673, "step": 26311 }, { "epoch": 0.5507828853721846, "grad_norm": 0.3471006155014038, "learning_rate": 0.00018385164904725375, "loss": 11.6834, "step": 26312 }, { "epoch": 0.5508038181361467, "grad_norm": 0.2695513665676117, "learning_rate": 0.00018385045436807457, "loss": 11.6755, "step": 26313 }, { "epoch": 0.5508247509001089, "grad_norm": 0.27532196044921875, "learning_rate": 0.00018384925964858682, "loss": 11.6778, "step": 26314 }, { "epoch": 0.550845683664071, "grad_norm": 0.26212891936302185, "learning_rate": 0.00018384806488879103, "loss": 11.6616, "step": 26315 }, { "epoch": 0.5508666164280331, "grad_norm": 0.35278260707855225, "learning_rate": 0.00018384687008868783, "loss": 11.6849, "step": 26316 }, { "epoch": 0.5508875491919953, "grad_norm": 0.3413480520248413, "learning_rate": 0.0001838456752482777, "loss": 11.6733, "step": 26317 }, { "epoch": 0.5509084819559574, "grad_norm": 0.2643485367298126, "learning_rate": 0.0001838444803675613, "loss": 11.6752, "step": 26318 }, { "epoch": 0.5509294147199196, "grad_norm": 0.3486539423465729, "learning_rate": 0.00018384328544653918, "loss": 11.6895, "step": 26319 }, { "epoch": 0.5509503474838817, "grad_norm": 0.3182222545146942, "learning_rate": 0.0001838420904852119, "loss": 11.6792, "step": 26320 }, { "epoch": 0.5509712802478439, "grad_norm": 0.33325329422950745, "learning_rate": 0.00018384089548358006, "loss": 11.6848, "step": 26321 }, { "epoch": 0.5509922130118061, "grad_norm": 0.4567922353744507, "learning_rate": 0.00018383970044164423, "loss": 11.6968, "step": 26322 }, { "epoch": 0.5510131457757682, "grad_norm": 0.3440978527069092, "learning_rate": 0.00018383850535940496, "loss": 11.6684, "step": 26323 }, { "epoch": 0.5510340785397304, "grad_norm": 0.2783597409725189, "learning_rate": 0.00018383731023686285, "loss": 11.6701, "step": 26324 }, { "epoch": 0.5510550113036925, "grad_norm": 0.3037658929824829, "learning_rate": 0.00018383611507401845, "loss": 11.6739, "step": 26325 }, { "epoch": 0.5510759440676547, "grad_norm": 0.40165433287620544, "learning_rate": 0.0001838349198708724, "loss": 11.6763, "step": 26326 }, { "epoch": 0.5510968768316169, "grad_norm": 0.28379762172698975, "learning_rate": 0.00018383372462742515, "loss": 11.6707, "step": 26327 }, { "epoch": 0.551117809595579, "grad_norm": 0.2778840959072113, "learning_rate": 0.00018383252934367738, "loss": 11.6716, "step": 26328 }, { "epoch": 0.5511387423595412, "grad_norm": 0.338005006313324, "learning_rate": 0.00018383133401962963, "loss": 11.6855, "step": 26329 }, { "epoch": 0.5511596751235033, "grad_norm": 0.30637362599372864, "learning_rate": 0.0001838301386552825, "loss": 11.6813, "step": 26330 }, { "epoch": 0.5511806078874655, "grad_norm": 0.3062214255332947, "learning_rate": 0.0001838289432506365, "loss": 11.6701, "step": 26331 }, { "epoch": 0.5512015406514276, "grad_norm": 0.31485801935195923, "learning_rate": 0.0001838277478056923, "loss": 11.6731, "step": 26332 }, { "epoch": 0.5512224734153898, "grad_norm": 0.23241639137268066, "learning_rate": 0.0001838265523204504, "loss": 11.6753, "step": 26333 }, { "epoch": 0.551243406179352, "grad_norm": 0.35640963912010193, "learning_rate": 0.00018382535679491136, "loss": 11.6644, "step": 26334 }, { "epoch": 0.5512643389433141, "grad_norm": 0.3200720548629761, "learning_rate": 0.00018382416122907585, "loss": 11.6732, "step": 26335 }, { "epoch": 0.5512852717072763, "grad_norm": 0.37397775053977966, "learning_rate": 0.00018382296562294433, "loss": 11.599, "step": 26336 }, { "epoch": 0.5513062044712383, "grad_norm": 0.27291321754455566, "learning_rate": 0.00018382176997651747, "loss": 11.6578, "step": 26337 }, { "epoch": 0.5513271372352005, "grad_norm": 0.2996731698513031, "learning_rate": 0.0001838205742897958, "loss": 11.6597, "step": 26338 }, { "epoch": 0.5513480699991626, "grad_norm": 0.4262864291667938, "learning_rate": 0.0001838193785627799, "loss": 11.6707, "step": 26339 }, { "epoch": 0.5513690027631248, "grad_norm": 0.48560312390327454, "learning_rate": 0.0001838181827954703, "loss": 11.6706, "step": 26340 }, { "epoch": 0.551389935527087, "grad_norm": 0.2706966996192932, "learning_rate": 0.00018381698698786769, "loss": 11.6746, "step": 26341 }, { "epoch": 0.5514108682910491, "grad_norm": 0.30623432993888855, "learning_rate": 0.00018381579113997253, "loss": 11.6797, "step": 26342 }, { "epoch": 0.5514318010550113, "grad_norm": 0.2904510498046875, "learning_rate": 0.00018381459525178547, "loss": 11.6588, "step": 26343 }, { "epoch": 0.5514527338189734, "grad_norm": 0.28259119391441345, "learning_rate": 0.00018381339932330704, "loss": 11.6659, "step": 26344 }, { "epoch": 0.5514736665829356, "grad_norm": 0.2686302363872528, "learning_rate": 0.00018381220335453782, "loss": 11.6702, "step": 26345 }, { "epoch": 0.5514945993468978, "grad_norm": 0.33986005187034607, "learning_rate": 0.0001838110073454784, "loss": 11.6739, "step": 26346 }, { "epoch": 0.5515155321108599, "grad_norm": 0.27320411801338196, "learning_rate": 0.00018380981129612935, "loss": 11.6648, "step": 26347 }, { "epoch": 0.5515364648748221, "grad_norm": 0.3324659466743469, "learning_rate": 0.00018380861520649123, "loss": 11.6795, "step": 26348 }, { "epoch": 0.5515573976387842, "grad_norm": 0.3212417960166931, "learning_rate": 0.00018380741907656468, "loss": 11.6675, "step": 26349 }, { "epoch": 0.5515783304027464, "grad_norm": 0.3348633646965027, "learning_rate": 0.00018380622290635018, "loss": 11.689, "step": 26350 }, { "epoch": 0.5515992631667085, "grad_norm": 0.3359590768814087, "learning_rate": 0.00018380502669584832, "loss": 11.6802, "step": 26351 }, { "epoch": 0.5516201959306707, "grad_norm": 0.32877489924430847, "learning_rate": 0.00018380383044505977, "loss": 11.6569, "step": 26352 }, { "epoch": 0.5516411286946329, "grad_norm": 0.260009765625, "learning_rate": 0.000183802634153985, "loss": 11.6725, "step": 26353 }, { "epoch": 0.551662061458595, "grad_norm": 0.3327569365501404, "learning_rate": 0.00018380143782262463, "loss": 11.6762, "step": 26354 }, { "epoch": 0.5516829942225572, "grad_norm": 0.34101206064224243, "learning_rate": 0.00018380024145097921, "loss": 11.6893, "step": 26355 }, { "epoch": 0.5517039269865193, "grad_norm": 0.25862953066825867, "learning_rate": 0.0001837990450390494, "loss": 11.6729, "step": 26356 }, { "epoch": 0.5517248597504815, "grad_norm": 0.3439638316631317, "learning_rate": 0.00018379784858683565, "loss": 11.671, "step": 26357 }, { "epoch": 0.5517457925144436, "grad_norm": 0.25541016459465027, "learning_rate": 0.0001837966520943386, "loss": 11.6644, "step": 26358 }, { "epoch": 0.5517667252784058, "grad_norm": 0.30643779039382935, "learning_rate": 0.00018379545556155885, "loss": 11.6664, "step": 26359 }, { "epoch": 0.551787658042368, "grad_norm": 0.4084221422672272, "learning_rate": 0.00018379425898849694, "loss": 11.6689, "step": 26360 }, { "epoch": 0.55180859080633, "grad_norm": 0.3787566125392914, "learning_rate": 0.0001837930623751534, "loss": 11.6607, "step": 26361 }, { "epoch": 0.5518295235702922, "grad_norm": 0.2991580069065094, "learning_rate": 0.0001837918657215289, "loss": 11.6748, "step": 26362 }, { "epoch": 0.5518504563342543, "grad_norm": 0.3129775822162628, "learning_rate": 0.000183790669027624, "loss": 11.6597, "step": 26363 }, { "epoch": 0.5518713890982165, "grad_norm": 0.2999313175678253, "learning_rate": 0.00018378947229343918, "loss": 11.6566, "step": 26364 }, { "epoch": 0.5518923218621787, "grad_norm": 0.27566829323768616, "learning_rate": 0.00018378827551897512, "loss": 11.6656, "step": 26365 }, { "epoch": 0.5519132546261408, "grad_norm": 0.33120065927505493, "learning_rate": 0.00018378707870423235, "loss": 11.6733, "step": 26366 }, { "epoch": 0.551934187390103, "grad_norm": 0.28550729155540466, "learning_rate": 0.00018378588184921147, "loss": 11.666, "step": 26367 }, { "epoch": 0.5519551201540651, "grad_norm": 0.307587206363678, "learning_rate": 0.00018378468495391302, "loss": 11.6861, "step": 26368 }, { "epoch": 0.5519760529180273, "grad_norm": 0.27985647320747375, "learning_rate": 0.00018378348801833757, "loss": 11.6568, "step": 26369 }, { "epoch": 0.5519969856819894, "grad_norm": 0.2692604959011078, "learning_rate": 0.00018378229104248575, "loss": 11.6524, "step": 26370 }, { "epoch": 0.5520179184459516, "grad_norm": 0.2555232048034668, "learning_rate": 0.0001837810940263581, "loss": 11.6662, "step": 26371 }, { "epoch": 0.5520388512099138, "grad_norm": 0.3005102872848511, "learning_rate": 0.00018377989696995522, "loss": 11.682, "step": 26372 }, { "epoch": 0.5520597839738759, "grad_norm": 0.4194588363170624, "learning_rate": 0.0001837786998732776, "loss": 11.6754, "step": 26373 }, { "epoch": 0.5520807167378381, "grad_norm": 0.30373096466064453, "learning_rate": 0.00018377750273632592, "loss": 11.6669, "step": 26374 }, { "epoch": 0.5521016495018002, "grad_norm": 0.3574836254119873, "learning_rate": 0.00018377630555910072, "loss": 11.6621, "step": 26375 }, { "epoch": 0.5521225822657624, "grad_norm": 0.4553014934062958, "learning_rate": 0.00018377510834160257, "loss": 11.6938, "step": 26376 }, { "epoch": 0.5521435150297245, "grad_norm": 0.29522591829299927, "learning_rate": 0.00018377391108383205, "loss": 11.6686, "step": 26377 }, { "epoch": 0.5521644477936867, "grad_norm": 0.26854926347732544, "learning_rate": 0.00018377271378578974, "loss": 11.6674, "step": 26378 }, { "epoch": 0.5521853805576489, "grad_norm": 0.3416091799736023, "learning_rate": 0.00018377151644747617, "loss": 11.6861, "step": 26379 }, { "epoch": 0.552206313321611, "grad_norm": 0.29640907049179077, "learning_rate": 0.00018377031906889198, "loss": 11.6791, "step": 26380 }, { "epoch": 0.5522272460855732, "grad_norm": 1.8497436046600342, "learning_rate": 0.0001837691216500377, "loss": 11.712, "step": 26381 }, { "epoch": 0.5522481788495353, "grad_norm": 0.28071117401123047, "learning_rate": 0.00018376792419091392, "loss": 11.6699, "step": 26382 }, { "epoch": 0.5522691116134975, "grad_norm": 0.2991984784603119, "learning_rate": 0.00018376672669152125, "loss": 11.6868, "step": 26383 }, { "epoch": 0.5522900443774597, "grad_norm": 0.30417051911354065, "learning_rate": 0.00018376552915186023, "loss": 11.6884, "step": 26384 }, { "epoch": 0.5523109771414217, "grad_norm": 0.34713494777679443, "learning_rate": 0.00018376433157193142, "loss": 11.6677, "step": 26385 }, { "epoch": 0.5523319099053839, "grad_norm": 0.2648528814315796, "learning_rate": 0.00018376313395173543, "loss": 11.6824, "step": 26386 }, { "epoch": 0.552352842669346, "grad_norm": 0.26215222477912903, "learning_rate": 0.00018376193629127284, "loss": 11.6682, "step": 26387 }, { "epoch": 0.5523737754333082, "grad_norm": 0.2840183675289154, "learning_rate": 0.00018376073859054418, "loss": 11.6575, "step": 26388 }, { "epoch": 0.5523947081972703, "grad_norm": 0.30029061436653137, "learning_rate": 0.0001837595408495501, "loss": 11.6678, "step": 26389 }, { "epoch": 0.5524156409612325, "grad_norm": 0.3047635853290558, "learning_rate": 0.00018375834306829107, "loss": 11.6614, "step": 26390 }, { "epoch": 0.5524365737251947, "grad_norm": 0.33703577518463135, "learning_rate": 0.00018375714524676776, "loss": 11.6854, "step": 26391 }, { "epoch": 0.5524575064891568, "grad_norm": 0.2579785883426666, "learning_rate": 0.00018375594738498073, "loss": 11.6637, "step": 26392 }, { "epoch": 0.552478439253119, "grad_norm": 0.34920570254325867, "learning_rate": 0.0001837547494829305, "loss": 11.6608, "step": 26393 }, { "epoch": 0.5524993720170811, "grad_norm": 0.2949638068675995, "learning_rate": 0.0001837535515406177, "loss": 11.6725, "step": 26394 }, { "epoch": 0.5525203047810433, "grad_norm": 0.34542280435562134, "learning_rate": 0.00018375235355804287, "loss": 11.6774, "step": 26395 }, { "epoch": 0.5525412375450054, "grad_norm": 0.38290566205978394, "learning_rate": 0.00018375115553520663, "loss": 11.6572, "step": 26396 }, { "epoch": 0.5525621703089676, "grad_norm": 0.30039969086647034, "learning_rate": 0.0001837499574721095, "loss": 11.6889, "step": 26397 }, { "epoch": 0.5525831030729298, "grad_norm": 0.2626807689666748, "learning_rate": 0.00018374875936875214, "loss": 11.6861, "step": 26398 }, { "epoch": 0.5526040358368919, "grad_norm": 0.2659547030925751, "learning_rate": 0.00018374756122513502, "loss": 11.6826, "step": 26399 }, { "epoch": 0.5526249686008541, "grad_norm": 0.2333395779132843, "learning_rate": 0.0001837463630412588, "loss": 11.6582, "step": 26400 }, { "epoch": 0.5526459013648162, "grad_norm": 0.2882542908191681, "learning_rate": 0.000183745164817124, "loss": 11.6689, "step": 26401 }, { "epoch": 0.5526668341287784, "grad_norm": 0.27821046113967896, "learning_rate": 0.00018374396655273126, "loss": 11.6849, "step": 26402 }, { "epoch": 0.5526877668927406, "grad_norm": 0.24490413069725037, "learning_rate": 0.0001837427682480811, "loss": 11.6605, "step": 26403 }, { "epoch": 0.5527086996567027, "grad_norm": 0.34625062346458435, "learning_rate": 0.0001837415699031741, "loss": 11.6788, "step": 26404 }, { "epoch": 0.5527296324206649, "grad_norm": 0.3311783969402313, "learning_rate": 0.00018374037151801085, "loss": 11.6695, "step": 26405 }, { "epoch": 0.552750565184627, "grad_norm": 0.4147745966911316, "learning_rate": 0.00018373917309259192, "loss": 11.68, "step": 26406 }, { "epoch": 0.5527714979485892, "grad_norm": 0.2838604748249054, "learning_rate": 0.00018373797462691794, "loss": 11.6838, "step": 26407 }, { "epoch": 0.5527924307125512, "grad_norm": 0.3347684442996979, "learning_rate": 0.00018373677612098938, "loss": 11.6918, "step": 26408 }, { "epoch": 0.5528133634765134, "grad_norm": 0.32199111580848694, "learning_rate": 0.0001837355775748069, "loss": 11.6764, "step": 26409 }, { "epoch": 0.5528342962404756, "grad_norm": 0.33711129426956177, "learning_rate": 0.00018373437898837103, "loss": 11.6701, "step": 26410 }, { "epoch": 0.5528552290044377, "grad_norm": 0.39471694827079773, "learning_rate": 0.00018373318036168237, "loss": 11.6667, "step": 26411 }, { "epoch": 0.5528761617683999, "grad_norm": 0.265730619430542, "learning_rate": 0.0001837319816947415, "loss": 11.6687, "step": 26412 }, { "epoch": 0.552897094532362, "grad_norm": 0.2482498586177826, "learning_rate": 0.00018373078298754902, "loss": 11.6628, "step": 26413 }, { "epoch": 0.5529180272963242, "grad_norm": 0.30172449350357056, "learning_rate": 0.00018372958424010545, "loss": 11.6828, "step": 26414 }, { "epoch": 0.5529389600602863, "grad_norm": 0.2919650375843048, "learning_rate": 0.00018372838545241136, "loss": 11.678, "step": 26415 }, { "epoch": 0.5529598928242485, "grad_norm": 0.24081642925739288, "learning_rate": 0.00018372718662446738, "loss": 11.6573, "step": 26416 }, { "epoch": 0.5529808255882107, "grad_norm": 0.33112916350364685, "learning_rate": 0.00018372598775627408, "loss": 11.6685, "step": 26417 }, { "epoch": 0.5530017583521728, "grad_norm": 0.2857033908367157, "learning_rate": 0.000183724788847832, "loss": 11.6677, "step": 26418 }, { "epoch": 0.553022691116135, "grad_norm": 0.29176077246665955, "learning_rate": 0.00018372358989914174, "loss": 11.6669, "step": 26419 }, { "epoch": 0.5530436238800971, "grad_norm": 0.29338493943214417, "learning_rate": 0.00018372239091020388, "loss": 11.6555, "step": 26420 }, { "epoch": 0.5530645566440593, "grad_norm": 0.270416796207428, "learning_rate": 0.00018372119188101897, "loss": 11.667, "step": 26421 }, { "epoch": 0.5530854894080214, "grad_norm": 0.3536175787448883, "learning_rate": 0.00018371999281158763, "loss": 11.6781, "step": 26422 }, { "epoch": 0.5531064221719836, "grad_norm": 0.2809737026691437, "learning_rate": 0.0001837187937019104, "loss": 11.6674, "step": 26423 }, { "epoch": 0.5531273549359458, "grad_norm": 0.31363174319267273, "learning_rate": 0.00018371759455198787, "loss": 11.6858, "step": 26424 }, { "epoch": 0.5531482876999079, "grad_norm": 0.3178246319293976, "learning_rate": 0.00018371639536182062, "loss": 11.6722, "step": 26425 }, { "epoch": 0.5531692204638701, "grad_norm": 0.3005903661251068, "learning_rate": 0.00018371519613140922, "loss": 11.6809, "step": 26426 }, { "epoch": 0.5531901532278322, "grad_norm": 0.3056287467479706, "learning_rate": 0.00018371399686075424, "loss": 11.6656, "step": 26427 }, { "epoch": 0.5532110859917944, "grad_norm": 0.28560858964920044, "learning_rate": 0.00018371279754985625, "loss": 11.6763, "step": 26428 }, { "epoch": 0.5532320187557566, "grad_norm": 0.4255467355251312, "learning_rate": 0.00018371159819871585, "loss": 11.6467, "step": 26429 }, { "epoch": 0.5532529515197186, "grad_norm": 0.2628473937511444, "learning_rate": 0.00018371039880733363, "loss": 11.6707, "step": 26430 }, { "epoch": 0.5532738842836809, "grad_norm": 0.26736313104629517, "learning_rate": 0.00018370919937571011, "loss": 11.6764, "step": 26431 }, { "epoch": 0.5532948170476429, "grad_norm": 0.3282061517238617, "learning_rate": 0.00018370799990384594, "loss": 11.6741, "step": 26432 }, { "epoch": 0.5533157498116051, "grad_norm": 0.33051949739456177, "learning_rate": 0.0001837068003917416, "loss": 11.67, "step": 26433 }, { "epoch": 0.5533366825755672, "grad_norm": 0.28229111433029175, "learning_rate": 0.00018370560083939778, "loss": 11.6649, "step": 26434 }, { "epoch": 0.5533576153395294, "grad_norm": 0.2905218005180359, "learning_rate": 0.000183704401246815, "loss": 11.6686, "step": 26435 }, { "epoch": 0.5533785481034916, "grad_norm": 0.3332630693912506, "learning_rate": 0.00018370320161399382, "loss": 11.681, "step": 26436 }, { "epoch": 0.5533994808674537, "grad_norm": 0.2890865206718445, "learning_rate": 0.0001837020019409348, "loss": 11.6618, "step": 26437 }, { "epoch": 0.5534204136314159, "grad_norm": 0.3047213554382324, "learning_rate": 0.0001837008022276386, "loss": 11.6608, "step": 26438 }, { "epoch": 0.553441346395378, "grad_norm": 0.2889788746833801, "learning_rate": 0.0001836996024741057, "loss": 11.6714, "step": 26439 }, { "epoch": 0.5534622791593402, "grad_norm": 0.27935221791267395, "learning_rate": 0.0001836984026803368, "loss": 11.6903, "step": 26440 }, { "epoch": 0.5534832119233023, "grad_norm": 0.24201811850070953, "learning_rate": 0.00018369720284633231, "loss": 11.6727, "step": 26441 }, { "epoch": 0.5535041446872645, "grad_norm": 0.2938306927680969, "learning_rate": 0.00018369600297209296, "loss": 11.6749, "step": 26442 }, { "epoch": 0.5535250774512267, "grad_norm": 0.41487306356430054, "learning_rate": 0.00018369480305761924, "loss": 11.6694, "step": 26443 }, { "epoch": 0.5535460102151888, "grad_norm": 0.27052849531173706, "learning_rate": 0.00018369360310291173, "loss": 11.6756, "step": 26444 }, { "epoch": 0.553566942979151, "grad_norm": 0.24486403167247772, "learning_rate": 0.00018369240310797108, "loss": 11.6674, "step": 26445 }, { "epoch": 0.5535878757431131, "grad_norm": 0.35170623660087585, "learning_rate": 0.00018369120307279777, "loss": 11.689, "step": 26446 }, { "epoch": 0.5536088085070753, "grad_norm": 0.2838863432407379, "learning_rate": 0.00018369000299739243, "loss": 11.6555, "step": 26447 }, { "epoch": 0.5536297412710375, "grad_norm": 0.2776113450527191, "learning_rate": 0.00018368880288175565, "loss": 11.6588, "step": 26448 }, { "epoch": 0.5536506740349996, "grad_norm": 0.2462194859981537, "learning_rate": 0.00018368760272588795, "loss": 11.6683, "step": 26449 }, { "epoch": 0.5536716067989618, "grad_norm": 0.2905462980270386, "learning_rate": 0.00018368640252978998, "loss": 11.663, "step": 26450 }, { "epoch": 0.5536925395629239, "grad_norm": 0.421589732170105, "learning_rate": 0.00018368520229346226, "loss": 11.6944, "step": 26451 }, { "epoch": 0.5537134723268861, "grad_norm": 0.2636543810367584, "learning_rate": 0.00018368400201690537, "loss": 11.6746, "step": 26452 }, { "epoch": 0.5537344050908481, "grad_norm": 0.24919797480106354, "learning_rate": 0.00018368280170011993, "loss": 11.6654, "step": 26453 }, { "epoch": 0.5537553378548103, "grad_norm": 0.3203204274177551, "learning_rate": 0.0001836816013431065, "loss": 11.6853, "step": 26454 }, { "epoch": 0.5537762706187725, "grad_norm": 0.35715460777282715, "learning_rate": 0.0001836804009458656, "loss": 11.6511, "step": 26455 }, { "epoch": 0.5537972033827346, "grad_norm": 0.2566088140010834, "learning_rate": 0.0001836792005083979, "loss": 11.6711, "step": 26456 }, { "epoch": 0.5538181361466968, "grad_norm": 0.29810261726379395, "learning_rate": 0.0001836780000307039, "loss": 11.693, "step": 26457 }, { "epoch": 0.5538390689106589, "grad_norm": 0.3427686095237732, "learning_rate": 0.00018367679951278426, "loss": 11.6732, "step": 26458 }, { "epoch": 0.5538600016746211, "grad_norm": 0.3404804766178131, "learning_rate": 0.00018367559895463945, "loss": 11.6625, "step": 26459 }, { "epoch": 0.5538809344385832, "grad_norm": 0.2592225670814514, "learning_rate": 0.0001836743983562701, "loss": 11.6701, "step": 26460 }, { "epoch": 0.5539018672025454, "grad_norm": 0.2681106626987457, "learning_rate": 0.0001836731977176768, "loss": 11.6688, "step": 26461 }, { "epoch": 0.5539227999665076, "grad_norm": 0.32030653953552246, "learning_rate": 0.00018367199703886016, "loss": 11.6597, "step": 26462 }, { "epoch": 0.5539437327304697, "grad_norm": 0.3060043454170227, "learning_rate": 0.00018367079631982067, "loss": 11.6759, "step": 26463 }, { "epoch": 0.5539646654944319, "grad_norm": 0.2911792993545532, "learning_rate": 0.00018366959556055895, "loss": 11.6788, "step": 26464 }, { "epoch": 0.553985598258394, "grad_norm": 0.3428395390510559, "learning_rate": 0.0001836683947610756, "loss": 11.6782, "step": 26465 }, { "epoch": 0.5540065310223562, "grad_norm": 0.29087552428245544, "learning_rate": 0.00018366719392137116, "loss": 11.6754, "step": 26466 }, { "epoch": 0.5540274637863184, "grad_norm": 0.2540202736854553, "learning_rate": 0.00018366599304144626, "loss": 11.6762, "step": 26467 }, { "epoch": 0.5540483965502805, "grad_norm": 0.34223416447639465, "learning_rate": 0.0001836647921213014, "loss": 11.6812, "step": 26468 }, { "epoch": 0.5540693293142427, "grad_norm": 0.2624627649784088, "learning_rate": 0.0001836635911609372, "loss": 11.6475, "step": 26469 }, { "epoch": 0.5540902620782048, "grad_norm": 0.30736038088798523, "learning_rate": 0.00018366239016035424, "loss": 11.6734, "step": 26470 }, { "epoch": 0.554111194842167, "grad_norm": 0.3320339620113373, "learning_rate": 0.0001836611891195531, "loss": 11.6837, "step": 26471 }, { "epoch": 0.5541321276061291, "grad_norm": 0.3105826675891876, "learning_rate": 0.00018365998803853434, "loss": 11.675, "step": 26472 }, { "epoch": 0.5541530603700913, "grad_norm": 0.275512158870697, "learning_rate": 0.00018365878691729856, "loss": 11.6872, "step": 26473 }, { "epoch": 0.5541739931340535, "grad_norm": 0.22605812549591064, "learning_rate": 0.00018365758575584633, "loss": 11.6556, "step": 26474 }, { "epoch": 0.5541949258980156, "grad_norm": 0.30466410517692566, "learning_rate": 0.0001836563845541782, "loss": 11.6686, "step": 26475 }, { "epoch": 0.5542158586619778, "grad_norm": 0.32067665457725525, "learning_rate": 0.00018365518331229478, "loss": 11.6691, "step": 26476 }, { "epoch": 0.5542367914259398, "grad_norm": 0.36522406339645386, "learning_rate": 0.00018365398203019665, "loss": 11.6917, "step": 26477 }, { "epoch": 0.554257724189902, "grad_norm": 0.2847227156162262, "learning_rate": 0.00018365278070788438, "loss": 11.6741, "step": 26478 }, { "epoch": 0.5542786569538641, "grad_norm": 0.2829383909702301, "learning_rate": 0.0001836515793453585, "loss": 11.6665, "step": 26479 }, { "epoch": 0.5542995897178263, "grad_norm": 0.2862131595611572, "learning_rate": 0.00018365037794261966, "loss": 11.6584, "step": 26480 }, { "epoch": 0.5543205224817885, "grad_norm": 0.3334703743457794, "learning_rate": 0.0001836491764996684, "loss": 11.6661, "step": 26481 }, { "epoch": 0.5543414552457506, "grad_norm": 0.27424466609954834, "learning_rate": 0.0001836479750165053, "loss": 11.6576, "step": 26482 }, { "epoch": 0.5543623880097128, "grad_norm": 0.3089035153388977, "learning_rate": 0.00018364677349313097, "loss": 11.6651, "step": 26483 }, { "epoch": 0.5543833207736749, "grad_norm": 0.35138654708862305, "learning_rate": 0.0001836455719295459, "loss": 11.6603, "step": 26484 }, { "epoch": 0.5544042535376371, "grad_norm": 0.27865079045295715, "learning_rate": 0.00018364437032575078, "loss": 11.6598, "step": 26485 }, { "epoch": 0.5544251863015993, "grad_norm": 0.24626269936561584, "learning_rate": 0.00018364316868174608, "loss": 11.6781, "step": 26486 }, { "epoch": 0.5544461190655614, "grad_norm": 0.30642181634902954, "learning_rate": 0.0001836419669975325, "loss": 11.6754, "step": 26487 }, { "epoch": 0.5544670518295236, "grad_norm": 0.27062317728996277, "learning_rate": 0.0001836407652731105, "loss": 11.6762, "step": 26488 }, { "epoch": 0.5544879845934857, "grad_norm": 0.34158068895339966, "learning_rate": 0.00018363956350848074, "loss": 11.6719, "step": 26489 }, { "epoch": 0.5545089173574479, "grad_norm": 0.23859895765781403, "learning_rate": 0.00018363836170364378, "loss": 11.6858, "step": 26490 }, { "epoch": 0.55452985012141, "grad_norm": 0.30900564789772034, "learning_rate": 0.00018363715985860016, "loss": 11.6936, "step": 26491 }, { "epoch": 0.5545507828853722, "grad_norm": 0.2728114724159241, "learning_rate": 0.00018363595797335047, "loss": 11.6737, "step": 26492 }, { "epoch": 0.5545717156493344, "grad_norm": 0.24739103019237518, "learning_rate": 0.0001836347560478953, "loss": 11.6655, "step": 26493 }, { "epoch": 0.5545926484132965, "grad_norm": 0.3129456341266632, "learning_rate": 0.00018363355408223525, "loss": 11.6692, "step": 26494 }, { "epoch": 0.5546135811772587, "grad_norm": 0.290159672498703, "learning_rate": 0.00018363235207637083, "loss": 11.6664, "step": 26495 }, { "epoch": 0.5546345139412208, "grad_norm": 0.3386872112751007, "learning_rate": 0.00018363115003030268, "loss": 11.6579, "step": 26496 }, { "epoch": 0.554655446705183, "grad_norm": 0.2642979919910431, "learning_rate": 0.0001836299479440314, "loss": 11.6781, "step": 26497 }, { "epoch": 0.554676379469145, "grad_norm": 0.3180530369281769, "learning_rate": 0.00018362874581755748, "loss": 11.6658, "step": 26498 }, { "epoch": 0.5546973122331073, "grad_norm": 0.38118240237236023, "learning_rate": 0.0001836275436508816, "loss": 11.7019, "step": 26499 }, { "epoch": 0.5547182449970695, "grad_norm": 0.29161036014556885, "learning_rate": 0.00018362634144400425, "loss": 11.6902, "step": 26500 }, { "epoch": 0.5547391777610315, "grad_norm": 0.3048059642314911, "learning_rate": 0.00018362513919692602, "loss": 11.69, "step": 26501 }, { "epoch": 0.5547601105249937, "grad_norm": 0.28550902009010315, "learning_rate": 0.00018362393690964752, "loss": 11.669, "step": 26502 }, { "epoch": 0.5547810432889558, "grad_norm": 0.26281532645225525, "learning_rate": 0.00018362273458216936, "loss": 11.6665, "step": 26503 }, { "epoch": 0.554801976052918, "grad_norm": 0.3341246545314789, "learning_rate": 0.00018362153221449204, "loss": 11.6832, "step": 26504 }, { "epoch": 0.5548229088168802, "grad_norm": 0.29365596175193787, "learning_rate": 0.00018362032980661616, "loss": 11.6797, "step": 26505 }, { "epoch": 0.5548438415808423, "grad_norm": 0.34970659017562866, "learning_rate": 0.00018361912735854232, "loss": 11.6827, "step": 26506 }, { "epoch": 0.5548647743448045, "grad_norm": 0.34166523814201355, "learning_rate": 0.00018361792487027112, "loss": 11.6656, "step": 26507 }, { "epoch": 0.5548857071087666, "grad_norm": 0.2740515470504761, "learning_rate": 0.0001836167223418031, "loss": 11.6676, "step": 26508 }, { "epoch": 0.5549066398727288, "grad_norm": 0.3071437478065491, "learning_rate": 0.0001836155197731388, "loss": 11.6779, "step": 26509 }, { "epoch": 0.5549275726366909, "grad_norm": 0.3418755829334259, "learning_rate": 0.00018361431716427888, "loss": 11.6873, "step": 26510 }, { "epoch": 0.5549485054006531, "grad_norm": 0.4277481436729431, "learning_rate": 0.0001836131145152239, "loss": 11.6889, "step": 26511 }, { "epoch": 0.5549694381646153, "grad_norm": 0.2794877290725708, "learning_rate": 0.0001836119118259744, "loss": 11.654, "step": 26512 }, { "epoch": 0.5549903709285774, "grad_norm": 0.3290039598941803, "learning_rate": 0.00018361070909653096, "loss": 11.6888, "step": 26513 }, { "epoch": 0.5550113036925396, "grad_norm": 0.3806384801864624, "learning_rate": 0.0001836095063268942, "loss": 11.6964, "step": 26514 }, { "epoch": 0.5550322364565017, "grad_norm": 0.32252153754234314, "learning_rate": 0.00018360830351706468, "loss": 11.6585, "step": 26515 }, { "epoch": 0.5550531692204639, "grad_norm": 0.32273897528648376, "learning_rate": 0.00018360710066704297, "loss": 11.6756, "step": 26516 }, { "epoch": 0.555074101984426, "grad_norm": 0.27466124296188354, "learning_rate": 0.00018360589777682962, "loss": 11.675, "step": 26517 }, { "epoch": 0.5550950347483882, "grad_norm": 0.24241265654563904, "learning_rate": 0.00018360469484642527, "loss": 11.6638, "step": 26518 }, { "epoch": 0.5551159675123504, "grad_norm": 0.34332433342933655, "learning_rate": 0.00018360349187583047, "loss": 11.6734, "step": 26519 }, { "epoch": 0.5551369002763125, "grad_norm": 0.32730549573898315, "learning_rate": 0.00018360228886504576, "loss": 11.6755, "step": 26520 }, { "epoch": 0.5551578330402747, "grad_norm": 0.3656512498855591, "learning_rate": 0.0001836010858140718, "loss": 11.677, "step": 26521 }, { "epoch": 0.5551787658042368, "grad_norm": 0.3125095069408417, "learning_rate": 0.00018359988272290912, "loss": 11.6768, "step": 26522 }, { "epoch": 0.555199698568199, "grad_norm": 0.2534540891647339, "learning_rate": 0.00018359867959155825, "loss": 11.6798, "step": 26523 }, { "epoch": 0.5552206313321612, "grad_norm": 0.36572128534317017, "learning_rate": 0.00018359747642001986, "loss": 11.6877, "step": 26524 }, { "epoch": 0.5552415640961232, "grad_norm": 0.32419317960739136, "learning_rate": 0.00018359627320829448, "loss": 11.6721, "step": 26525 }, { "epoch": 0.5552624968600854, "grad_norm": 0.27341753244400024, "learning_rate": 0.0001835950699563827, "loss": 11.669, "step": 26526 }, { "epoch": 0.5552834296240475, "grad_norm": 0.28063565492630005, "learning_rate": 0.00018359386666428509, "loss": 11.6811, "step": 26527 }, { "epoch": 0.5553043623880097, "grad_norm": 0.2770455777645111, "learning_rate": 0.00018359266333200226, "loss": 11.6614, "step": 26528 }, { "epoch": 0.5553252951519718, "grad_norm": 0.2525237798690796, "learning_rate": 0.00018359145995953472, "loss": 11.6794, "step": 26529 }, { "epoch": 0.555346227915934, "grad_norm": 0.3433707654476166, "learning_rate": 0.0001835902565468831, "loss": 11.6921, "step": 26530 }, { "epoch": 0.5553671606798962, "grad_norm": 0.4183734655380249, "learning_rate": 0.00018358905309404796, "loss": 11.6762, "step": 26531 }, { "epoch": 0.5553880934438583, "grad_norm": 0.29700538516044617, "learning_rate": 0.00018358784960102994, "loss": 11.6777, "step": 26532 }, { "epoch": 0.5554090262078205, "grad_norm": 0.26213210821151733, "learning_rate": 0.0001835866460678295, "loss": 11.6716, "step": 26533 }, { "epoch": 0.5554299589717826, "grad_norm": 0.37271806597709656, "learning_rate": 0.00018358544249444733, "loss": 11.6743, "step": 26534 }, { "epoch": 0.5554508917357448, "grad_norm": 0.32942891120910645, "learning_rate": 0.00018358423888088394, "loss": 11.6613, "step": 26535 }, { "epoch": 0.5554718244997069, "grad_norm": 0.331631064414978, "learning_rate": 0.00018358303522713994, "loss": 11.6734, "step": 26536 }, { "epoch": 0.5554927572636691, "grad_norm": 0.29957544803619385, "learning_rate": 0.0001835818315332159, "loss": 11.6793, "step": 26537 }, { "epoch": 0.5555136900276313, "grad_norm": 0.33376142382621765, "learning_rate": 0.0001835806277991124, "loss": 11.6793, "step": 26538 }, { "epoch": 0.5555346227915934, "grad_norm": 0.31908509135246277, "learning_rate": 0.00018357942402483, "loss": 11.6574, "step": 26539 }, { "epoch": 0.5555555555555556, "grad_norm": 0.2346012145280838, "learning_rate": 0.0001835782202103693, "loss": 11.656, "step": 26540 }, { "epoch": 0.5555764883195177, "grad_norm": 0.2611676752567291, "learning_rate": 0.00018357701635573088, "loss": 11.68, "step": 26541 }, { "epoch": 0.5555974210834799, "grad_norm": 0.23743218183517456, "learning_rate": 0.0001835758124609153, "loss": 11.6663, "step": 26542 }, { "epoch": 0.5556183538474421, "grad_norm": 0.26182839274406433, "learning_rate": 0.00018357460852592322, "loss": 11.6608, "step": 26543 }, { "epoch": 0.5556392866114042, "grad_norm": 0.24642696976661682, "learning_rate": 0.00018357340455075506, "loss": 11.6659, "step": 26544 }, { "epoch": 0.5556602193753664, "grad_norm": 0.23388439416885376, "learning_rate": 0.00018357220053541156, "loss": 11.6786, "step": 26545 }, { "epoch": 0.5556811521393284, "grad_norm": 0.2969839870929718, "learning_rate": 0.00018357099647989318, "loss": 11.6738, "step": 26546 }, { "epoch": 0.5557020849032906, "grad_norm": 0.30886679887771606, "learning_rate": 0.00018356979238420056, "loss": 11.6702, "step": 26547 }, { "epoch": 0.5557230176672527, "grad_norm": 0.4062870740890503, "learning_rate": 0.00018356858824833427, "loss": 11.6866, "step": 26548 }, { "epoch": 0.5557439504312149, "grad_norm": 0.27721092104911804, "learning_rate": 0.0001835673840722949, "loss": 11.6615, "step": 26549 }, { "epoch": 0.5557648831951771, "grad_norm": 0.27437564730644226, "learning_rate": 0.00018356617985608299, "loss": 11.672, "step": 26550 }, { "epoch": 0.5557858159591392, "grad_norm": 0.30043455958366394, "learning_rate": 0.00018356497559969914, "loss": 11.6824, "step": 26551 }, { "epoch": 0.5558067487231014, "grad_norm": 0.28773579001426697, "learning_rate": 0.00018356377130314394, "loss": 11.6728, "step": 26552 }, { "epoch": 0.5558276814870635, "grad_norm": 0.2803948223590851, "learning_rate": 0.00018356256696641795, "loss": 11.668, "step": 26553 }, { "epoch": 0.5558486142510257, "grad_norm": 0.256664514541626, "learning_rate": 0.0001835613625895218, "loss": 11.6654, "step": 26554 }, { "epoch": 0.5558695470149878, "grad_norm": 0.2299094945192337, "learning_rate": 0.000183560158172456, "loss": 11.6673, "step": 26555 }, { "epoch": 0.55589047977895, "grad_norm": 0.24982506036758423, "learning_rate": 0.00018355895371522117, "loss": 11.6852, "step": 26556 }, { "epoch": 0.5559114125429122, "grad_norm": 0.3013937473297119, "learning_rate": 0.00018355774921781785, "loss": 11.685, "step": 26557 }, { "epoch": 0.5559323453068743, "grad_norm": 0.22469723224639893, "learning_rate": 0.00018355654468024665, "loss": 11.6753, "step": 26558 }, { "epoch": 0.5559532780708365, "grad_norm": 0.29486721754074097, "learning_rate": 0.00018355534010250815, "loss": 11.6613, "step": 26559 }, { "epoch": 0.5559742108347986, "grad_norm": 0.3203105628490448, "learning_rate": 0.00018355413548460295, "loss": 11.6606, "step": 26560 }, { "epoch": 0.5559951435987608, "grad_norm": 0.31199881434440613, "learning_rate": 0.00018355293082653157, "loss": 11.6741, "step": 26561 }, { "epoch": 0.556016076362723, "grad_norm": 0.2960914671421051, "learning_rate": 0.00018355172612829462, "loss": 11.658, "step": 26562 }, { "epoch": 0.5560370091266851, "grad_norm": 0.28342878818511963, "learning_rate": 0.0001835505213898927, "loss": 11.6665, "step": 26563 }, { "epoch": 0.5560579418906473, "grad_norm": 0.3267561197280884, "learning_rate": 0.00018354931661132638, "loss": 11.6829, "step": 26564 }, { "epoch": 0.5560788746546094, "grad_norm": 0.3065564036369324, "learning_rate": 0.00018354811179259622, "loss": 11.6754, "step": 26565 }, { "epoch": 0.5560998074185716, "grad_norm": 0.3192979395389557, "learning_rate": 0.0001835469069337028, "loss": 11.6998, "step": 26566 }, { "epoch": 0.5561207401825337, "grad_norm": 0.3183833062648773, "learning_rate": 0.0001835457020346467, "loss": 11.6541, "step": 26567 }, { "epoch": 0.5561416729464959, "grad_norm": 0.27543196082115173, "learning_rate": 0.00018354449709542852, "loss": 11.6606, "step": 26568 }, { "epoch": 0.5561626057104581, "grad_norm": 0.3712911307811737, "learning_rate": 0.0001835432921160488, "loss": 11.6753, "step": 26569 }, { "epoch": 0.5561835384744201, "grad_norm": 0.34586066007614136, "learning_rate": 0.00018354208709650817, "loss": 11.664, "step": 26570 }, { "epoch": 0.5562044712383823, "grad_norm": 0.2550022006034851, "learning_rate": 0.0001835408820368072, "loss": 11.6433, "step": 26571 }, { "epoch": 0.5562254040023444, "grad_norm": 0.3624061644077301, "learning_rate": 0.00018353967693694642, "loss": 11.6899, "step": 26572 }, { "epoch": 0.5562463367663066, "grad_norm": 0.26036590337753296, "learning_rate": 0.00018353847179692647, "loss": 11.6374, "step": 26573 }, { "epoch": 0.5562672695302687, "grad_norm": 0.2825138568878174, "learning_rate": 0.00018353726661674789, "loss": 11.6695, "step": 26574 }, { "epoch": 0.5562882022942309, "grad_norm": 0.30205008387565613, "learning_rate": 0.00018353606139641127, "loss": 11.6701, "step": 26575 }, { "epoch": 0.5563091350581931, "grad_norm": 0.29602643847465515, "learning_rate": 0.0001835348561359172, "loss": 11.6555, "step": 26576 }, { "epoch": 0.5563300678221552, "grad_norm": 0.2930777072906494, "learning_rate": 0.00018353365083526623, "loss": 11.6836, "step": 26577 }, { "epoch": 0.5563510005861174, "grad_norm": 0.26178890466690063, "learning_rate": 0.00018353244549445898, "loss": 11.6558, "step": 26578 }, { "epoch": 0.5563719333500795, "grad_norm": 0.2533363699913025, "learning_rate": 0.000183531240113496, "loss": 11.6634, "step": 26579 }, { "epoch": 0.5563928661140417, "grad_norm": 0.2553003430366516, "learning_rate": 0.00018353003469237788, "loss": 11.6602, "step": 26580 }, { "epoch": 0.5564137988780039, "grad_norm": 0.3790224492549896, "learning_rate": 0.00018352882923110522, "loss": 11.6607, "step": 26581 }, { "epoch": 0.556434731641966, "grad_norm": 0.3245776295661926, "learning_rate": 0.00018352762372967854, "loss": 11.687, "step": 26582 }, { "epoch": 0.5564556644059282, "grad_norm": 0.47797808051109314, "learning_rate": 0.00018352641818809848, "loss": 11.6833, "step": 26583 }, { "epoch": 0.5564765971698903, "grad_norm": 0.298298180103302, "learning_rate": 0.0001835252126063656, "loss": 11.6719, "step": 26584 }, { "epoch": 0.5564975299338525, "grad_norm": 0.270412802696228, "learning_rate": 0.00018352400698448047, "loss": 11.6814, "step": 26585 }, { "epoch": 0.5565184626978146, "grad_norm": 0.34467649459838867, "learning_rate": 0.00018352280132244366, "loss": 11.6835, "step": 26586 }, { "epoch": 0.5565393954617768, "grad_norm": 0.3090495765209198, "learning_rate": 0.0001835215956202558, "loss": 11.6751, "step": 26587 }, { "epoch": 0.556560328225739, "grad_norm": 0.3809677064418793, "learning_rate": 0.00018352038987791742, "loss": 11.6808, "step": 26588 }, { "epoch": 0.5565812609897011, "grad_norm": 0.26187267899513245, "learning_rate": 0.0001835191840954291, "loss": 11.6649, "step": 26589 }, { "epoch": 0.5566021937536633, "grad_norm": 0.27640846371650696, "learning_rate": 0.00018351797827279145, "loss": 11.6525, "step": 26590 }, { "epoch": 0.5566231265176254, "grad_norm": 0.33051857352256775, "learning_rate": 0.000183516772410005, "loss": 11.6759, "step": 26591 }, { "epoch": 0.5566440592815876, "grad_norm": 0.27226507663726807, "learning_rate": 0.00018351556650707043, "loss": 11.6795, "step": 26592 }, { "epoch": 0.5566649920455496, "grad_norm": 0.36165928840637207, "learning_rate": 0.0001835143605639882, "loss": 11.6714, "step": 26593 }, { "epoch": 0.5566859248095118, "grad_norm": 0.27278727293014526, "learning_rate": 0.00018351315458075895, "loss": 11.6676, "step": 26594 }, { "epoch": 0.556706857573474, "grad_norm": 0.3119978606700897, "learning_rate": 0.00018351194855738327, "loss": 11.6696, "step": 26595 }, { "epoch": 0.5567277903374361, "grad_norm": 0.27446630597114563, "learning_rate": 0.0001835107424938617, "loss": 11.6857, "step": 26596 }, { "epoch": 0.5567487231013983, "grad_norm": 0.2960313856601715, "learning_rate": 0.00018350953639019487, "loss": 11.6706, "step": 26597 }, { "epoch": 0.5567696558653604, "grad_norm": 0.2778671383857727, "learning_rate": 0.00018350833024638333, "loss": 11.6831, "step": 26598 }, { "epoch": 0.5567905886293226, "grad_norm": 0.29696816205978394, "learning_rate": 0.00018350712406242764, "loss": 11.6696, "step": 26599 }, { "epoch": 0.5568115213932847, "grad_norm": 0.280041366815567, "learning_rate": 0.00018350591783832844, "loss": 11.6691, "step": 26600 }, { "epoch": 0.5568324541572469, "grad_norm": 0.25790777802467346, "learning_rate": 0.00018350471157408624, "loss": 11.6756, "step": 26601 }, { "epoch": 0.5568533869212091, "grad_norm": 0.3225986063480377, "learning_rate": 0.00018350350526970166, "loss": 11.6845, "step": 26602 }, { "epoch": 0.5568743196851712, "grad_norm": 0.26861289143562317, "learning_rate": 0.00018350229892517524, "loss": 11.6646, "step": 26603 }, { "epoch": 0.5568952524491334, "grad_norm": 0.26312169432640076, "learning_rate": 0.00018350109254050765, "loss": 11.6845, "step": 26604 }, { "epoch": 0.5569161852130955, "grad_norm": 0.3851149380207062, "learning_rate": 0.0001834998861156994, "loss": 11.6722, "step": 26605 }, { "epoch": 0.5569371179770577, "grad_norm": 0.33382129669189453, "learning_rate": 0.00018349867965075105, "loss": 11.6818, "step": 26606 }, { "epoch": 0.5569580507410199, "grad_norm": 0.3077622950077057, "learning_rate": 0.00018349747314566321, "loss": 11.6472, "step": 26607 }, { "epoch": 0.556978983504982, "grad_norm": 0.27305662631988525, "learning_rate": 0.00018349626660043648, "loss": 11.6641, "step": 26608 }, { "epoch": 0.5569999162689442, "grad_norm": 0.240178182721138, "learning_rate": 0.0001834950600150714, "loss": 11.6618, "step": 26609 }, { "epoch": 0.5570208490329063, "grad_norm": 0.3354268968105316, "learning_rate": 0.0001834938533895686, "loss": 11.6875, "step": 26610 }, { "epoch": 0.5570417817968685, "grad_norm": 0.261174738407135, "learning_rate": 0.00018349264672392864, "loss": 11.6653, "step": 26611 }, { "epoch": 0.5570627145608306, "grad_norm": 0.35327091813087463, "learning_rate": 0.00018349144001815208, "loss": 11.663, "step": 26612 }, { "epoch": 0.5570836473247928, "grad_norm": 0.2582540810108185, "learning_rate": 0.00018349023327223952, "loss": 11.6585, "step": 26613 }, { "epoch": 0.557104580088755, "grad_norm": 0.31932055950164795, "learning_rate": 0.00018348902648619147, "loss": 11.699, "step": 26614 }, { "epoch": 0.557125512852717, "grad_norm": 0.25952157378196716, "learning_rate": 0.00018348781966000862, "loss": 11.6681, "step": 26615 }, { "epoch": 0.5571464456166793, "grad_norm": 0.3646329939365387, "learning_rate": 0.00018348661279369153, "loss": 11.6763, "step": 26616 }, { "epoch": 0.5571673783806413, "grad_norm": 0.273063987493515, "learning_rate": 0.0001834854058872407, "loss": 11.6779, "step": 26617 }, { "epoch": 0.5571883111446035, "grad_norm": 0.27956458926200867, "learning_rate": 0.0001834841989406568, "loss": 11.667, "step": 26618 }, { "epoch": 0.5572092439085656, "grad_norm": 0.32670602202415466, "learning_rate": 0.00018348299195394035, "loss": 11.6766, "step": 26619 }, { "epoch": 0.5572301766725278, "grad_norm": 0.2804541289806366, "learning_rate": 0.00018348178492709195, "loss": 11.665, "step": 26620 }, { "epoch": 0.55725110943649, "grad_norm": 0.28958845138549805, "learning_rate": 0.00018348057786011222, "loss": 11.6867, "step": 26621 }, { "epoch": 0.5572720422004521, "grad_norm": 0.2838723659515381, "learning_rate": 0.00018347937075300166, "loss": 11.6721, "step": 26622 }, { "epoch": 0.5572929749644143, "grad_norm": 0.32277819514274597, "learning_rate": 0.0001834781636057609, "loss": 11.6674, "step": 26623 }, { "epoch": 0.5573139077283764, "grad_norm": 0.2677711546421051, "learning_rate": 0.00018347695641839052, "loss": 11.6802, "step": 26624 }, { "epoch": 0.5573348404923386, "grad_norm": 0.309079110622406, "learning_rate": 0.00018347574919089113, "loss": 11.6673, "step": 26625 }, { "epoch": 0.5573557732563008, "grad_norm": 0.3530970513820648, "learning_rate": 0.00018347454192326322, "loss": 11.6468, "step": 26626 }, { "epoch": 0.5573767060202629, "grad_norm": 0.28668490052223206, "learning_rate": 0.00018347333461550745, "loss": 11.6604, "step": 26627 }, { "epoch": 0.5573976387842251, "grad_norm": 0.2539058327674866, "learning_rate": 0.00018347212726762437, "loss": 11.6664, "step": 26628 }, { "epoch": 0.5574185715481872, "grad_norm": 0.2741902470588684, "learning_rate": 0.00018347091987961457, "loss": 11.6806, "step": 26629 }, { "epoch": 0.5574395043121494, "grad_norm": 0.35259678959846497, "learning_rate": 0.0001834697124514786, "loss": 11.6876, "step": 26630 }, { "epoch": 0.5574604370761115, "grad_norm": 0.33934617042541504, "learning_rate": 0.0001834685049832171, "loss": 11.6877, "step": 26631 }, { "epoch": 0.5574813698400737, "grad_norm": 0.31536436080932617, "learning_rate": 0.0001834672974748306, "loss": 11.6801, "step": 26632 }, { "epoch": 0.5575023026040359, "grad_norm": 0.37183019518852234, "learning_rate": 0.00018346608992631972, "loss": 11.6673, "step": 26633 }, { "epoch": 0.557523235367998, "grad_norm": 0.27634894847869873, "learning_rate": 0.000183464882337685, "loss": 11.6701, "step": 26634 }, { "epoch": 0.5575441681319602, "grad_norm": 0.31605637073516846, "learning_rate": 0.00018346367470892702, "loss": 11.6827, "step": 26635 }, { "epoch": 0.5575651008959223, "grad_norm": 0.35228431224823, "learning_rate": 0.0001834624670400464, "loss": 11.6766, "step": 26636 }, { "epoch": 0.5575860336598845, "grad_norm": 0.30550462007522583, "learning_rate": 0.00018346125933104372, "loss": 11.6659, "step": 26637 }, { "epoch": 0.5576069664238466, "grad_norm": 0.44066330790519714, "learning_rate": 0.00018346005158191952, "loss": 11.683, "step": 26638 }, { "epoch": 0.5576278991878088, "grad_norm": 0.22861361503601074, "learning_rate": 0.0001834588437926744, "loss": 11.6711, "step": 26639 }, { "epoch": 0.557648831951771, "grad_norm": 0.273827463388443, "learning_rate": 0.00018345763596330894, "loss": 11.6686, "step": 26640 }, { "epoch": 0.557669764715733, "grad_norm": 0.28829482197761536, "learning_rate": 0.0001834564280938237, "loss": 11.6566, "step": 26641 }, { "epoch": 0.5576906974796952, "grad_norm": 0.29369786381721497, "learning_rate": 0.0001834552201842193, "loss": 11.669, "step": 26642 }, { "epoch": 0.5577116302436573, "grad_norm": 0.35822683572769165, "learning_rate": 0.00018345401223449632, "loss": 11.6759, "step": 26643 }, { "epoch": 0.5577325630076195, "grad_norm": 0.2695872187614441, "learning_rate": 0.00018345280424465532, "loss": 11.6557, "step": 26644 }, { "epoch": 0.5577534957715817, "grad_norm": 0.31274789571762085, "learning_rate": 0.0001834515962146969, "loss": 11.6773, "step": 26645 }, { "epoch": 0.5577744285355438, "grad_norm": 0.2794669270515442, "learning_rate": 0.0001834503881446216, "loss": 11.6685, "step": 26646 }, { "epoch": 0.557795361299506, "grad_norm": 0.2824711501598358, "learning_rate": 0.00018344918003443002, "loss": 11.6843, "step": 26647 }, { "epoch": 0.5578162940634681, "grad_norm": 0.5318763256072998, "learning_rate": 0.00018344797188412276, "loss": 11.6644, "step": 26648 }, { "epoch": 0.5578372268274303, "grad_norm": 0.47212639451026917, "learning_rate": 0.00018344676369370038, "loss": 11.6684, "step": 26649 }, { "epoch": 0.5578581595913924, "grad_norm": 0.2789762616157532, "learning_rate": 0.00018344555546316346, "loss": 11.6754, "step": 26650 }, { "epoch": 0.5578790923553546, "grad_norm": 0.27616414427757263, "learning_rate": 0.0001834443471925126, "loss": 11.6597, "step": 26651 }, { "epoch": 0.5579000251193168, "grad_norm": 0.2596268653869629, "learning_rate": 0.00018344313888174834, "loss": 11.6533, "step": 26652 }, { "epoch": 0.5579209578832789, "grad_norm": 0.3029174506664276, "learning_rate": 0.00018344193053087136, "loss": 11.6814, "step": 26653 }, { "epoch": 0.5579418906472411, "grad_norm": 0.2652965784072876, "learning_rate": 0.00018344072213988212, "loss": 11.662, "step": 26654 }, { "epoch": 0.5579628234112032, "grad_norm": 0.26549798250198364, "learning_rate": 0.00018343951370878127, "loss": 11.6553, "step": 26655 }, { "epoch": 0.5579837561751654, "grad_norm": 0.303477942943573, "learning_rate": 0.00018343830523756937, "loss": 11.6887, "step": 26656 }, { "epoch": 0.5580046889391275, "grad_norm": 0.35491588711738586, "learning_rate": 0.000183437096726247, "loss": 11.6734, "step": 26657 }, { "epoch": 0.5580256217030897, "grad_norm": 0.2357502281665802, "learning_rate": 0.00018343588817481475, "loss": 11.6714, "step": 26658 }, { "epoch": 0.5580465544670519, "grad_norm": 0.27343136072158813, "learning_rate": 0.0001834346795832732, "loss": 11.6583, "step": 26659 }, { "epoch": 0.558067487231014, "grad_norm": 0.2617110013961792, "learning_rate": 0.0001834334709516229, "loss": 11.6743, "step": 26660 }, { "epoch": 0.5580884199949762, "grad_norm": 0.23564782738685608, "learning_rate": 0.0001834322622798645, "loss": 11.6647, "step": 26661 }, { "epoch": 0.5581093527589382, "grad_norm": 0.4579271674156189, "learning_rate": 0.00018343105356799852, "loss": 11.6819, "step": 26662 }, { "epoch": 0.5581302855229004, "grad_norm": 0.3045884668827057, "learning_rate": 0.00018342984481602558, "loss": 11.6643, "step": 26663 }, { "epoch": 0.5581512182868626, "grad_norm": 0.28006669878959656, "learning_rate": 0.0001834286360239462, "loss": 11.6683, "step": 26664 }, { "epoch": 0.5581721510508247, "grad_norm": 0.2296644151210785, "learning_rate": 0.00018342742719176105, "loss": 11.6718, "step": 26665 }, { "epoch": 0.5581930838147869, "grad_norm": 0.3358127772808075, "learning_rate": 0.00018342621831947064, "loss": 11.6899, "step": 26666 }, { "epoch": 0.558214016578749, "grad_norm": 0.3730223774909973, "learning_rate": 0.00018342500940707557, "loss": 11.6868, "step": 26667 }, { "epoch": 0.5582349493427112, "grad_norm": 0.27828511595726013, "learning_rate": 0.00018342380045457642, "loss": 11.6655, "step": 26668 }, { "epoch": 0.5582558821066733, "grad_norm": 0.32705697417259216, "learning_rate": 0.00018342259146197378, "loss": 11.6445, "step": 26669 }, { "epoch": 0.5582768148706355, "grad_norm": 0.26312026381492615, "learning_rate": 0.00018342138242926826, "loss": 11.6663, "step": 26670 }, { "epoch": 0.5582977476345977, "grad_norm": 0.32124871015548706, "learning_rate": 0.00018342017335646037, "loss": 11.667, "step": 26671 }, { "epoch": 0.5583186803985598, "grad_norm": 0.26691970229148865, "learning_rate": 0.00018341896424355075, "loss": 11.6759, "step": 26672 }, { "epoch": 0.558339613162522, "grad_norm": 0.4311709702014923, "learning_rate": 0.00018341775509053996, "loss": 11.6685, "step": 26673 }, { "epoch": 0.5583605459264841, "grad_norm": 0.26117658615112305, "learning_rate": 0.00018341654589742857, "loss": 11.6734, "step": 26674 }, { "epoch": 0.5583814786904463, "grad_norm": 0.2713930904865265, "learning_rate": 0.0001834153366642172, "loss": 11.6657, "step": 26675 }, { "epoch": 0.5584024114544084, "grad_norm": 0.3212866187095642, "learning_rate": 0.0001834141273909064, "loss": 11.6654, "step": 26676 }, { "epoch": 0.5584233442183706, "grad_norm": 0.3229697644710541, "learning_rate": 0.00018341291807749674, "loss": 11.6819, "step": 26677 }, { "epoch": 0.5584442769823328, "grad_norm": 0.3426276743412018, "learning_rate": 0.00018341170872398883, "loss": 11.6651, "step": 26678 }, { "epoch": 0.5584652097462949, "grad_norm": 0.3386111855506897, "learning_rate": 0.00018341049933038326, "loss": 11.6707, "step": 26679 }, { "epoch": 0.5584861425102571, "grad_norm": 0.32354503870010376, "learning_rate": 0.00018340928989668055, "loss": 11.665, "step": 26680 }, { "epoch": 0.5585070752742192, "grad_norm": 0.34619224071502686, "learning_rate": 0.00018340808042288136, "loss": 11.6825, "step": 26681 }, { "epoch": 0.5585280080381814, "grad_norm": 0.38449087738990784, "learning_rate": 0.00018340687090898623, "loss": 11.6634, "step": 26682 }, { "epoch": 0.5585489408021436, "grad_norm": 0.3289814293384552, "learning_rate": 0.0001834056613549957, "loss": 11.6786, "step": 26683 }, { "epoch": 0.5585698735661057, "grad_norm": 0.2695520222187042, "learning_rate": 0.00018340445176091047, "loss": 11.6751, "step": 26684 }, { "epoch": 0.5585908063300679, "grad_norm": 0.2761997580528259, "learning_rate": 0.000183403242126731, "loss": 11.6674, "step": 26685 }, { "epoch": 0.55861173909403, "grad_norm": 0.2774219512939453, "learning_rate": 0.0001834020324524579, "loss": 11.6749, "step": 26686 }, { "epoch": 0.5586326718579921, "grad_norm": 0.3248678743839264, "learning_rate": 0.0001834008227380918, "loss": 11.6756, "step": 26687 }, { "epoch": 0.5586536046219542, "grad_norm": 0.27045318484306335, "learning_rate": 0.00018339961298363328, "loss": 11.6779, "step": 26688 }, { "epoch": 0.5586745373859164, "grad_norm": 0.2716779112815857, "learning_rate": 0.00018339840318908288, "loss": 11.6635, "step": 26689 }, { "epoch": 0.5586954701498786, "grad_norm": 0.31422221660614014, "learning_rate": 0.00018339719335444117, "loss": 11.6535, "step": 26690 }, { "epoch": 0.5587164029138407, "grad_norm": 0.3914913535118103, "learning_rate": 0.00018339598347970877, "loss": 11.6818, "step": 26691 }, { "epoch": 0.5587373356778029, "grad_norm": 0.3305319845676422, "learning_rate": 0.00018339477356488625, "loss": 11.6559, "step": 26692 }, { "epoch": 0.558758268441765, "grad_norm": 0.23256918787956238, "learning_rate": 0.00018339356360997419, "loss": 11.6679, "step": 26693 }, { "epoch": 0.5587792012057272, "grad_norm": 0.345088392496109, "learning_rate": 0.00018339235361497318, "loss": 11.6767, "step": 26694 }, { "epoch": 0.5588001339696893, "grad_norm": 0.27063244581222534, "learning_rate": 0.0001833911435798838, "loss": 11.6737, "step": 26695 }, { "epoch": 0.5588210667336515, "grad_norm": 0.2517387270927429, "learning_rate": 0.00018338993350470662, "loss": 11.6734, "step": 26696 }, { "epoch": 0.5588419994976137, "grad_norm": 0.28747037053108215, "learning_rate": 0.00018338872338944222, "loss": 11.6743, "step": 26697 }, { "epoch": 0.5588629322615758, "grad_norm": 0.3303360641002655, "learning_rate": 0.0001833875132340912, "loss": 11.6632, "step": 26698 }, { "epoch": 0.558883865025538, "grad_norm": 0.38878485560417175, "learning_rate": 0.00018338630303865413, "loss": 11.6992, "step": 26699 }, { "epoch": 0.5589047977895001, "grad_norm": 0.2600547671318054, "learning_rate": 0.00018338509280313157, "loss": 11.6729, "step": 26700 }, { "epoch": 0.5589257305534623, "grad_norm": 0.24625307321548462, "learning_rate": 0.00018338388252752414, "loss": 11.6693, "step": 26701 }, { "epoch": 0.5589466633174245, "grad_norm": 0.30959659814834595, "learning_rate": 0.0001833826722118324, "loss": 11.6619, "step": 26702 }, { "epoch": 0.5589675960813866, "grad_norm": 0.3419455587863922, "learning_rate": 0.000183381461856057, "loss": 11.6647, "step": 26703 }, { "epoch": 0.5589885288453488, "grad_norm": 0.31169402599334717, "learning_rate": 0.00018338025146019836, "loss": 11.6681, "step": 26704 }, { "epoch": 0.5590094616093109, "grad_norm": 0.28753983974456787, "learning_rate": 0.00018337904102425721, "loss": 11.6758, "step": 26705 }, { "epoch": 0.5590303943732731, "grad_norm": 0.25815561413764954, "learning_rate": 0.00018337783054823409, "loss": 11.6665, "step": 26706 }, { "epoch": 0.5590513271372352, "grad_norm": 0.30883118510246277, "learning_rate": 0.00018337662003212958, "loss": 11.6848, "step": 26707 }, { "epoch": 0.5590722599011974, "grad_norm": 0.37720298767089844, "learning_rate": 0.00018337540947594425, "loss": 11.6835, "step": 26708 }, { "epoch": 0.5590931926651596, "grad_norm": 0.36365121603012085, "learning_rate": 0.0001833741988796787, "loss": 11.6694, "step": 26709 }, { "epoch": 0.5591141254291216, "grad_norm": 0.4676515460014343, "learning_rate": 0.0001833729882433335, "loss": 11.6687, "step": 26710 }, { "epoch": 0.5591350581930838, "grad_norm": 0.29104211926460266, "learning_rate": 0.00018337177756690923, "loss": 11.6621, "step": 26711 }, { "epoch": 0.5591559909570459, "grad_norm": 0.23756329715251923, "learning_rate": 0.00018337056685040643, "loss": 11.6503, "step": 26712 }, { "epoch": 0.5591769237210081, "grad_norm": 0.28016796708106995, "learning_rate": 0.0001833693560938258, "loss": 11.655, "step": 26713 }, { "epoch": 0.5591978564849702, "grad_norm": 0.41594257950782776, "learning_rate": 0.0001833681452971678, "loss": 11.6943, "step": 26714 }, { "epoch": 0.5592187892489324, "grad_norm": 0.2922303378582001, "learning_rate": 0.0001833669344604331, "loss": 11.6706, "step": 26715 }, { "epoch": 0.5592397220128946, "grad_norm": 0.36125415563583374, "learning_rate": 0.0001833657235836222, "loss": 11.6871, "step": 26716 }, { "epoch": 0.5592606547768567, "grad_norm": 0.39065930247306824, "learning_rate": 0.00018336451266673577, "loss": 11.6808, "step": 26717 }, { "epoch": 0.5592815875408189, "grad_norm": 0.28074508905410767, "learning_rate": 0.00018336330170977432, "loss": 11.6641, "step": 26718 }, { "epoch": 0.559302520304781, "grad_norm": 0.2677180767059326, "learning_rate": 0.00018336209071273847, "loss": 11.6776, "step": 26719 }, { "epoch": 0.5593234530687432, "grad_norm": 0.3722795248031616, "learning_rate": 0.0001833608796756288, "loss": 11.6767, "step": 26720 }, { "epoch": 0.5593443858327054, "grad_norm": 0.3446236252784729, "learning_rate": 0.00018335966859844586, "loss": 11.6816, "step": 26721 }, { "epoch": 0.5593653185966675, "grad_norm": 0.3555833101272583, "learning_rate": 0.00018335845748119027, "loss": 11.6791, "step": 26722 }, { "epoch": 0.5593862513606297, "grad_norm": 0.25933602452278137, "learning_rate": 0.0001833572463238626, "loss": 11.6629, "step": 26723 }, { "epoch": 0.5594071841245918, "grad_norm": 0.3716373145580292, "learning_rate": 0.00018335603512646342, "loss": 11.6787, "step": 26724 }, { "epoch": 0.559428116888554, "grad_norm": 0.2704611122608185, "learning_rate": 0.00018335482388899334, "loss": 11.6574, "step": 26725 }, { "epoch": 0.5594490496525161, "grad_norm": 0.30417779088020325, "learning_rate": 0.00018335361261145292, "loss": 11.6892, "step": 26726 }, { "epoch": 0.5594699824164783, "grad_norm": 0.27406805753707886, "learning_rate": 0.00018335240129384274, "loss": 11.6681, "step": 26727 }, { "epoch": 0.5594909151804405, "grad_norm": 0.2838963270187378, "learning_rate": 0.00018335118993616342, "loss": 11.6707, "step": 26728 }, { "epoch": 0.5595118479444026, "grad_norm": 0.2858043909072876, "learning_rate": 0.0001833499785384155, "loss": 11.6681, "step": 26729 }, { "epoch": 0.5595327807083648, "grad_norm": 0.2657710611820221, "learning_rate": 0.00018334876710059957, "loss": 11.6422, "step": 26730 }, { "epoch": 0.5595537134723269, "grad_norm": 0.274098664522171, "learning_rate": 0.0001833475556227162, "loss": 11.6717, "step": 26731 }, { "epoch": 0.559574646236289, "grad_norm": 0.2919064164161682, "learning_rate": 0.00018334634410476598, "loss": 11.6728, "step": 26732 }, { "epoch": 0.5595955790002511, "grad_norm": 0.3495990037918091, "learning_rate": 0.00018334513254674955, "loss": 11.685, "step": 26733 }, { "epoch": 0.5596165117642133, "grad_norm": 0.30462646484375, "learning_rate": 0.0001833439209486674, "loss": 11.6667, "step": 26734 }, { "epoch": 0.5596374445281755, "grad_norm": 0.26841145753860474, "learning_rate": 0.00018334270931052017, "loss": 11.6737, "step": 26735 }, { "epoch": 0.5596583772921376, "grad_norm": 0.3697745203971863, "learning_rate": 0.00018334149763230847, "loss": 11.6795, "step": 26736 }, { "epoch": 0.5596793100560998, "grad_norm": 0.3398296535015106, "learning_rate": 0.00018334028591403278, "loss": 11.6463, "step": 26737 }, { "epoch": 0.5597002428200619, "grad_norm": 0.2796342670917511, "learning_rate": 0.00018333907415569377, "loss": 11.6684, "step": 26738 }, { "epoch": 0.5597211755840241, "grad_norm": 0.30923184752464294, "learning_rate": 0.00018333786235729202, "loss": 11.6649, "step": 26739 }, { "epoch": 0.5597421083479863, "grad_norm": 0.3060779869556427, "learning_rate": 0.00018333665051882808, "loss": 11.6681, "step": 26740 }, { "epoch": 0.5597630411119484, "grad_norm": 0.3919994831085205, "learning_rate": 0.0001833354386403025, "loss": 11.6708, "step": 26741 }, { "epoch": 0.5597839738759106, "grad_norm": 0.28270047903060913, "learning_rate": 0.00018333422672171598, "loss": 11.6675, "step": 26742 }, { "epoch": 0.5598049066398727, "grad_norm": 0.29039013385772705, "learning_rate": 0.00018333301476306896, "loss": 11.6627, "step": 26743 }, { "epoch": 0.5598258394038349, "grad_norm": 0.28979575634002686, "learning_rate": 0.0001833318027643621, "loss": 11.6726, "step": 26744 }, { "epoch": 0.559846772167797, "grad_norm": 0.393810510635376, "learning_rate": 0.00018333059072559598, "loss": 11.6818, "step": 26745 }, { "epoch": 0.5598677049317592, "grad_norm": 0.28766778111457825, "learning_rate": 0.0001833293786467712, "loss": 11.6845, "step": 26746 }, { "epoch": 0.5598886376957214, "grad_norm": 0.28066694736480713, "learning_rate": 0.00018332816652788828, "loss": 11.6691, "step": 26747 }, { "epoch": 0.5599095704596835, "grad_norm": 0.34717273712158203, "learning_rate": 0.0001833269543689479, "loss": 11.6667, "step": 26748 }, { "epoch": 0.5599305032236457, "grad_norm": 0.3198026120662689, "learning_rate": 0.00018332574216995052, "loss": 11.6689, "step": 26749 }, { "epoch": 0.5599514359876078, "grad_norm": 0.21751853823661804, "learning_rate": 0.00018332452993089678, "loss": 11.6672, "step": 26750 }, { "epoch": 0.55997236875157, "grad_norm": 0.2855963706970215, "learning_rate": 0.00018332331765178733, "loss": 11.6637, "step": 26751 }, { "epoch": 0.5599933015155321, "grad_norm": 0.2611228823661804, "learning_rate": 0.00018332210533262265, "loss": 11.6791, "step": 26752 }, { "epoch": 0.5600142342794943, "grad_norm": 0.30066734552383423, "learning_rate": 0.0001833208929734034, "loss": 11.6616, "step": 26753 }, { "epoch": 0.5600351670434565, "grad_norm": 0.2837773263454437, "learning_rate": 0.00018331968057413008, "loss": 11.6643, "step": 26754 }, { "epoch": 0.5600560998074186, "grad_norm": 0.23352383077144623, "learning_rate": 0.00018331846813480332, "loss": 11.6673, "step": 26755 }, { "epoch": 0.5600770325713808, "grad_norm": 0.26591458916664124, "learning_rate": 0.00018331725565542373, "loss": 11.6676, "step": 26756 }, { "epoch": 0.5600979653353428, "grad_norm": 0.23706772923469543, "learning_rate": 0.00018331604313599184, "loss": 11.669, "step": 26757 }, { "epoch": 0.560118898099305, "grad_norm": 0.37656188011169434, "learning_rate": 0.0001833148305765083, "loss": 11.6709, "step": 26758 }, { "epoch": 0.5601398308632672, "grad_norm": 0.2366706281900406, "learning_rate": 0.00018331361797697363, "loss": 11.6667, "step": 26759 }, { "epoch": 0.5601607636272293, "grad_norm": 0.2790692150592804, "learning_rate": 0.00018331240533738842, "loss": 11.6929, "step": 26760 }, { "epoch": 0.5601816963911915, "grad_norm": 0.3115941882133484, "learning_rate": 0.00018331119265775327, "loss": 11.6722, "step": 26761 }, { "epoch": 0.5602026291551536, "grad_norm": 0.27749866247177124, "learning_rate": 0.00018330997993806878, "loss": 11.6704, "step": 26762 }, { "epoch": 0.5602235619191158, "grad_norm": 0.33946382999420166, "learning_rate": 0.00018330876717833548, "loss": 11.6807, "step": 26763 }, { "epoch": 0.5602444946830779, "grad_norm": 0.28066930174827576, "learning_rate": 0.00018330755437855403, "loss": 11.6658, "step": 26764 }, { "epoch": 0.5602654274470401, "grad_norm": 0.3361624479293823, "learning_rate": 0.00018330634153872492, "loss": 11.6782, "step": 26765 }, { "epoch": 0.5602863602110023, "grad_norm": 0.32215192914009094, "learning_rate": 0.00018330512865884883, "loss": 11.6887, "step": 26766 }, { "epoch": 0.5603072929749644, "grad_norm": 0.3299337923526764, "learning_rate": 0.00018330391573892623, "loss": 11.6599, "step": 26767 }, { "epoch": 0.5603282257389266, "grad_norm": 0.3386465311050415, "learning_rate": 0.00018330270277895785, "loss": 11.6719, "step": 26768 }, { "epoch": 0.5603491585028887, "grad_norm": 0.2966797947883606, "learning_rate": 0.00018330148977894415, "loss": 11.6792, "step": 26769 }, { "epoch": 0.5603700912668509, "grad_norm": 0.2930002212524414, "learning_rate": 0.00018330027673888575, "loss": 11.6754, "step": 26770 }, { "epoch": 0.560391024030813, "grad_norm": 0.2781468331813812, "learning_rate": 0.00018329906365878324, "loss": 11.6643, "step": 26771 }, { "epoch": 0.5604119567947752, "grad_norm": 0.32798677682876587, "learning_rate": 0.0001832978505386372, "loss": 11.6899, "step": 26772 }, { "epoch": 0.5604328895587374, "grad_norm": 0.263217568397522, "learning_rate": 0.00018329663737844825, "loss": 11.6603, "step": 26773 }, { "epoch": 0.5604538223226995, "grad_norm": 0.27669480443000793, "learning_rate": 0.0001832954241782169, "loss": 11.675, "step": 26774 }, { "epoch": 0.5604747550866617, "grad_norm": 0.2406459003686905, "learning_rate": 0.00018329421093794376, "loss": 11.669, "step": 26775 }, { "epoch": 0.5604956878506238, "grad_norm": 0.24234512448310852, "learning_rate": 0.00018329299765762946, "loss": 11.6689, "step": 26776 }, { "epoch": 0.560516620614586, "grad_norm": 0.2901834547519684, "learning_rate": 0.00018329178433727452, "loss": 11.6788, "step": 26777 }, { "epoch": 0.5605375533785482, "grad_norm": 0.26813384890556335, "learning_rate": 0.00018329057097687956, "loss": 11.6657, "step": 26778 }, { "epoch": 0.5605584861425102, "grad_norm": 0.29990553855895996, "learning_rate": 0.00018328935757644515, "loss": 11.6681, "step": 26779 }, { "epoch": 0.5605794189064724, "grad_norm": 0.24561360478401184, "learning_rate": 0.00018328814413597186, "loss": 11.67, "step": 26780 }, { "epoch": 0.5606003516704345, "grad_norm": 0.3357110619544983, "learning_rate": 0.0001832869306554603, "loss": 11.6588, "step": 26781 }, { "epoch": 0.5606212844343967, "grad_norm": 0.3124781548976898, "learning_rate": 0.00018328571713491107, "loss": 11.6647, "step": 26782 }, { "epoch": 0.5606422171983588, "grad_norm": 0.27046895027160645, "learning_rate": 0.00018328450357432468, "loss": 11.6639, "step": 26783 }, { "epoch": 0.560663149962321, "grad_norm": 0.2876293957233429, "learning_rate": 0.0001832832899737018, "loss": 11.676, "step": 26784 }, { "epoch": 0.5606840827262832, "grad_norm": 0.2974662482738495, "learning_rate": 0.00018328207633304297, "loss": 11.6858, "step": 26785 }, { "epoch": 0.5607050154902453, "grad_norm": 0.31497621536254883, "learning_rate": 0.00018328086265234877, "loss": 11.6681, "step": 26786 }, { "epoch": 0.5607259482542075, "grad_norm": 0.29887276887893677, "learning_rate": 0.0001832796489316198, "loss": 11.6824, "step": 26787 }, { "epoch": 0.5607468810181696, "grad_norm": 0.28667932748794556, "learning_rate": 0.00018327843517085661, "loss": 11.6772, "step": 26788 }, { "epoch": 0.5607678137821318, "grad_norm": 0.2845708429813385, "learning_rate": 0.00018327722137005982, "loss": 11.6875, "step": 26789 }, { "epoch": 0.5607887465460939, "grad_norm": 0.3305532932281494, "learning_rate": 0.00018327600752923, "loss": 11.6723, "step": 26790 }, { "epoch": 0.5608096793100561, "grad_norm": 0.3427824079990387, "learning_rate": 0.00018327479364836774, "loss": 11.6774, "step": 26791 }, { "epoch": 0.5608306120740183, "grad_norm": 0.2499939650297165, "learning_rate": 0.00018327357972747362, "loss": 11.6684, "step": 26792 }, { "epoch": 0.5608515448379804, "grad_norm": 0.3024851381778717, "learning_rate": 0.00018327236576654824, "loss": 11.6838, "step": 26793 }, { "epoch": 0.5608724776019426, "grad_norm": 0.27250364422798157, "learning_rate": 0.0001832711517655921, "loss": 11.6644, "step": 26794 }, { "epoch": 0.5608934103659047, "grad_norm": 0.3180132210254669, "learning_rate": 0.0001832699377246059, "loss": 11.6685, "step": 26795 }, { "epoch": 0.5609143431298669, "grad_norm": 0.29929307103157043, "learning_rate": 0.00018326872364359017, "loss": 11.6744, "step": 26796 }, { "epoch": 0.560935275893829, "grad_norm": 0.28066882491111755, "learning_rate": 0.00018326750952254547, "loss": 11.6743, "step": 26797 }, { "epoch": 0.5609562086577912, "grad_norm": 0.2853412330150604, "learning_rate": 0.0001832662953614724, "loss": 11.6672, "step": 26798 }, { "epoch": 0.5609771414217534, "grad_norm": 0.24287523329257965, "learning_rate": 0.0001832650811603716, "loss": 11.6668, "step": 26799 }, { "epoch": 0.5609980741857155, "grad_norm": 0.2967338562011719, "learning_rate": 0.0001832638669192436, "loss": 11.6778, "step": 26800 }, { "epoch": 0.5610190069496777, "grad_norm": 0.3496701717376709, "learning_rate": 0.00018326265263808895, "loss": 11.6797, "step": 26801 }, { "epoch": 0.5610399397136397, "grad_norm": 0.30160287022590637, "learning_rate": 0.00018326143831690831, "loss": 11.6677, "step": 26802 }, { "epoch": 0.561060872477602, "grad_norm": 0.3211774230003357, "learning_rate": 0.00018326022395570224, "loss": 11.6736, "step": 26803 }, { "epoch": 0.5610818052415641, "grad_norm": 0.23638281226158142, "learning_rate": 0.0001832590095544713, "loss": 11.6628, "step": 26804 }, { "epoch": 0.5611027380055262, "grad_norm": 0.2913302183151245, "learning_rate": 0.00018325779511321603, "loss": 11.6534, "step": 26805 }, { "epoch": 0.5611236707694884, "grad_norm": 0.310946524143219, "learning_rate": 0.00018325658063193714, "loss": 11.6632, "step": 26806 }, { "epoch": 0.5611446035334505, "grad_norm": 0.26602667570114136, "learning_rate": 0.0001832553661106351, "loss": 11.68, "step": 26807 }, { "epoch": 0.5611655362974127, "grad_norm": 0.2944968044757843, "learning_rate": 0.00018325415154931055, "loss": 11.6737, "step": 26808 }, { "epoch": 0.5611864690613748, "grad_norm": 0.26600465178489685, "learning_rate": 0.00018325293694796408, "loss": 11.6592, "step": 26809 }, { "epoch": 0.561207401825337, "grad_norm": 0.2853779196739197, "learning_rate": 0.00018325172230659623, "loss": 11.6596, "step": 26810 }, { "epoch": 0.5612283345892992, "grad_norm": 0.25905001163482666, "learning_rate": 0.0001832505076252076, "loss": 11.6754, "step": 26811 }, { "epoch": 0.5612492673532613, "grad_norm": 0.28360503911972046, "learning_rate": 0.0001832492929037988, "loss": 11.6749, "step": 26812 }, { "epoch": 0.5612702001172235, "grad_norm": 0.2561936378479004, "learning_rate": 0.00018324807814237043, "loss": 11.6652, "step": 26813 }, { "epoch": 0.5612911328811856, "grad_norm": 0.3289823830127716, "learning_rate": 0.000183246863340923, "loss": 11.6725, "step": 26814 }, { "epoch": 0.5613120656451478, "grad_norm": 0.25559940934181213, "learning_rate": 0.00018324564849945716, "loss": 11.672, "step": 26815 }, { "epoch": 0.5613329984091099, "grad_norm": 0.3482634127140045, "learning_rate": 0.00018324443361797345, "loss": 11.6582, "step": 26816 }, { "epoch": 0.5613539311730721, "grad_norm": 0.27389857172966003, "learning_rate": 0.00018324321869647245, "loss": 11.6649, "step": 26817 }, { "epoch": 0.5613748639370343, "grad_norm": 0.34760722517967224, "learning_rate": 0.0001832420037349548, "loss": 11.6591, "step": 26818 }, { "epoch": 0.5613957967009964, "grad_norm": 0.2729954719543457, "learning_rate": 0.00018324078873342106, "loss": 11.6476, "step": 26819 }, { "epoch": 0.5614167294649586, "grad_norm": 0.3280494213104248, "learning_rate": 0.00018323957369187176, "loss": 11.6774, "step": 26820 }, { "epoch": 0.5614376622289207, "grad_norm": 0.4362199008464813, "learning_rate": 0.00018323835861030758, "loss": 11.6855, "step": 26821 }, { "epoch": 0.5614585949928829, "grad_norm": 0.2984910309314728, "learning_rate": 0.00018323714348872903, "loss": 11.6674, "step": 26822 }, { "epoch": 0.5614795277568451, "grad_norm": 0.3478507995605469, "learning_rate": 0.00018323592832713672, "loss": 11.6638, "step": 26823 }, { "epoch": 0.5615004605208072, "grad_norm": 0.31332895159721375, "learning_rate": 0.0001832347131255312, "loss": 11.677, "step": 26824 }, { "epoch": 0.5615213932847694, "grad_norm": 3.1526377201080322, "learning_rate": 0.00018323349788391312, "loss": 11.6202, "step": 26825 }, { "epoch": 0.5615423260487314, "grad_norm": 0.279591828584671, "learning_rate": 0.00018323228260228302, "loss": 11.6721, "step": 26826 }, { "epoch": 0.5615632588126936, "grad_norm": 0.44697150588035583, "learning_rate": 0.0001832310672806415, "loss": 11.6224, "step": 26827 }, { "epoch": 0.5615841915766557, "grad_norm": 0.306424617767334, "learning_rate": 0.00018322985191898913, "loss": 11.6618, "step": 26828 }, { "epoch": 0.5616051243406179, "grad_norm": 0.2684761583805084, "learning_rate": 0.00018322863651732652, "loss": 11.6705, "step": 26829 }, { "epoch": 0.5616260571045801, "grad_norm": 0.3280617892742157, "learning_rate": 0.00018322742107565422, "loss": 11.6821, "step": 26830 }, { "epoch": 0.5616469898685422, "grad_norm": 0.29740333557128906, "learning_rate": 0.00018322620559397282, "loss": 11.6802, "step": 26831 }, { "epoch": 0.5616679226325044, "grad_norm": 0.2414243072271347, "learning_rate": 0.00018322499007228293, "loss": 11.6714, "step": 26832 }, { "epoch": 0.5616888553964665, "grad_norm": 0.3862609565258026, "learning_rate": 0.00018322377451058513, "loss": 11.6701, "step": 26833 }, { "epoch": 0.5617097881604287, "grad_norm": 0.4140419363975525, "learning_rate": 0.00018322255890887998, "loss": 11.6691, "step": 26834 }, { "epoch": 0.5617307209243908, "grad_norm": 0.28907227516174316, "learning_rate": 0.00018322134326716807, "loss": 11.6929, "step": 26835 }, { "epoch": 0.561751653688353, "grad_norm": 0.3339565098285675, "learning_rate": 0.00018322012758545, "loss": 11.6673, "step": 26836 }, { "epoch": 0.5617725864523152, "grad_norm": 0.24820129573345184, "learning_rate": 0.00018321891186372634, "loss": 11.6692, "step": 26837 }, { "epoch": 0.5617935192162773, "grad_norm": 2.0521767139434814, "learning_rate": 0.00018321769610199772, "loss": 11.6521, "step": 26838 }, { "epoch": 0.5618144519802395, "grad_norm": 0.2613641321659088, "learning_rate": 0.00018321648030026464, "loss": 11.6724, "step": 26839 }, { "epoch": 0.5618353847442016, "grad_norm": 0.34762635827064514, "learning_rate": 0.00018321526445852775, "loss": 11.6869, "step": 26840 }, { "epoch": 0.5618563175081638, "grad_norm": 0.31586870551109314, "learning_rate": 0.00018321404857678762, "loss": 11.6832, "step": 26841 }, { "epoch": 0.561877250272126, "grad_norm": 0.2864675223827362, "learning_rate": 0.0001832128326550448, "loss": 11.6842, "step": 26842 }, { "epoch": 0.5618981830360881, "grad_norm": 0.30820953845977783, "learning_rate": 0.0001832116166932999, "loss": 11.6653, "step": 26843 }, { "epoch": 0.5619191158000503, "grad_norm": 0.23388522863388062, "learning_rate": 0.00018321040069155356, "loss": 11.6736, "step": 26844 }, { "epoch": 0.5619400485640124, "grad_norm": 0.3074970245361328, "learning_rate": 0.00018320918464980628, "loss": 11.686, "step": 26845 }, { "epoch": 0.5619609813279746, "grad_norm": 0.357418417930603, "learning_rate": 0.0001832079685680587, "loss": 11.6625, "step": 26846 }, { "epoch": 0.5619819140919367, "grad_norm": 0.28490927815437317, "learning_rate": 0.00018320675244631133, "loss": 11.6817, "step": 26847 }, { "epoch": 0.5620028468558989, "grad_norm": 0.34244000911712646, "learning_rate": 0.00018320553628456488, "loss": 11.6779, "step": 26848 }, { "epoch": 0.562023779619861, "grad_norm": 0.333939790725708, "learning_rate": 0.0001832043200828198, "loss": 11.6705, "step": 26849 }, { "epoch": 0.5620447123838231, "grad_norm": 0.31035447120666504, "learning_rate": 0.00018320310384107674, "loss": 11.6957, "step": 26850 }, { "epoch": 0.5620656451477853, "grad_norm": 0.26882442831993103, "learning_rate": 0.00018320188755933633, "loss": 11.6688, "step": 26851 }, { "epoch": 0.5620865779117474, "grad_norm": 0.2736690938472748, "learning_rate": 0.00018320067123759904, "loss": 11.6645, "step": 26852 }, { "epoch": 0.5621075106757096, "grad_norm": 0.275280624628067, "learning_rate": 0.0001831994548758656, "loss": 11.6757, "step": 26853 }, { "epoch": 0.5621284434396717, "grad_norm": 0.24307729303836823, "learning_rate": 0.00018319823847413644, "loss": 11.6723, "step": 26854 }, { "epoch": 0.5621493762036339, "grad_norm": 0.31696945428848267, "learning_rate": 0.00018319702203241226, "loss": 11.6742, "step": 26855 }, { "epoch": 0.5621703089675961, "grad_norm": 0.33310726284980774, "learning_rate": 0.00018319580555069357, "loss": 11.657, "step": 26856 }, { "epoch": 0.5621912417315582, "grad_norm": 0.31183573603630066, "learning_rate": 0.00018319458902898102, "loss": 11.6689, "step": 26857 }, { "epoch": 0.5622121744955204, "grad_norm": 0.36466529965400696, "learning_rate": 0.00018319337246727515, "loss": 11.6867, "step": 26858 }, { "epoch": 0.5622331072594825, "grad_norm": 0.2994043231010437, "learning_rate": 0.00018319215586557657, "loss": 11.6458, "step": 26859 }, { "epoch": 0.5622540400234447, "grad_norm": 0.2631434202194214, "learning_rate": 0.00018319093922388583, "loss": 11.6467, "step": 26860 }, { "epoch": 0.5622749727874069, "grad_norm": 0.2709212303161621, "learning_rate": 0.00018318972254220356, "loss": 11.6551, "step": 26861 }, { "epoch": 0.562295905551369, "grad_norm": 0.2663023769855499, "learning_rate": 0.00018318850582053033, "loss": 11.6656, "step": 26862 }, { "epoch": 0.5623168383153312, "grad_norm": 0.2752736806869507, "learning_rate": 0.0001831872890588667, "loss": 11.6645, "step": 26863 }, { "epoch": 0.5623377710792933, "grad_norm": 0.32734090089797974, "learning_rate": 0.00018318607225721326, "loss": 11.6622, "step": 26864 }, { "epoch": 0.5623587038432555, "grad_norm": 0.3166532814502716, "learning_rate": 0.00018318485541557068, "loss": 11.6794, "step": 26865 }, { "epoch": 0.5623796366072176, "grad_norm": 0.2986757457256317, "learning_rate": 0.00018318363853393938, "loss": 11.6682, "step": 26866 }, { "epoch": 0.5624005693711798, "grad_norm": 0.2888811230659485, "learning_rate": 0.00018318242161232008, "loss": 11.6575, "step": 26867 }, { "epoch": 0.562421502135142, "grad_norm": 0.32731297612190247, "learning_rate": 0.00018318120465071332, "loss": 11.6857, "step": 26868 }, { "epoch": 0.5624424348991041, "grad_norm": 0.2307337373495102, "learning_rate": 0.0001831799876491197, "loss": 11.6725, "step": 26869 }, { "epoch": 0.5624633676630663, "grad_norm": 0.2783939838409424, "learning_rate": 0.0001831787706075398, "loss": 11.6713, "step": 26870 }, { "epoch": 0.5624843004270283, "grad_norm": 0.3527560234069824, "learning_rate": 0.00018317755352597413, "loss": 11.6722, "step": 26871 }, { "epoch": 0.5625052331909906, "grad_norm": 0.29956597089767456, "learning_rate": 0.00018317633640442342, "loss": 11.6708, "step": 26872 }, { "epoch": 0.5625261659549526, "grad_norm": 0.32088595628738403, "learning_rate": 0.00018317511924288815, "loss": 11.6879, "step": 26873 }, { "epoch": 0.5625470987189148, "grad_norm": 0.4085802137851715, "learning_rate": 0.00018317390204136892, "loss": 11.6723, "step": 26874 }, { "epoch": 0.562568031482877, "grad_norm": 0.31395190954208374, "learning_rate": 0.00018317268479986636, "loss": 11.6785, "step": 26875 }, { "epoch": 0.5625889642468391, "grad_norm": 0.2951743006706238, "learning_rate": 0.000183171467518381, "loss": 11.6734, "step": 26876 }, { "epoch": 0.5626098970108013, "grad_norm": 0.2986879348754883, "learning_rate": 0.00018317025019691348, "loss": 11.6739, "step": 26877 }, { "epoch": 0.5626308297747634, "grad_norm": 0.38915857672691345, "learning_rate": 0.0001831690328354643, "loss": 11.6777, "step": 26878 }, { "epoch": 0.5626517625387256, "grad_norm": 0.27084141969680786, "learning_rate": 0.00018316781543403416, "loss": 11.6839, "step": 26879 }, { "epoch": 0.5626726953026878, "grad_norm": 0.24147094786167145, "learning_rate": 0.00018316659799262353, "loss": 11.6735, "step": 26880 }, { "epoch": 0.5626936280666499, "grad_norm": 0.2578001618385315, "learning_rate": 0.0001831653805112331, "loss": 11.6708, "step": 26881 }, { "epoch": 0.5627145608306121, "grad_norm": 0.3556217551231384, "learning_rate": 0.00018316416298986337, "loss": 11.663, "step": 26882 }, { "epoch": 0.5627354935945742, "grad_norm": 0.2808833718299866, "learning_rate": 0.00018316294542851494, "loss": 11.6734, "step": 26883 }, { "epoch": 0.5627564263585364, "grad_norm": 0.3148949444293976, "learning_rate": 0.00018316172782718848, "loss": 11.6738, "step": 26884 }, { "epoch": 0.5627773591224985, "grad_norm": 0.2368740439414978, "learning_rate": 0.00018316051018588448, "loss": 11.6641, "step": 26885 }, { "epoch": 0.5627982918864607, "grad_norm": 0.3101483881473541, "learning_rate": 0.00018315929250460353, "loss": 11.7025, "step": 26886 }, { "epoch": 0.5628192246504229, "grad_norm": 0.25066760182380676, "learning_rate": 0.00018315807478334626, "loss": 11.6598, "step": 26887 }, { "epoch": 0.562840157414385, "grad_norm": 0.2664527893066406, "learning_rate": 0.00018315685702211324, "loss": 11.6819, "step": 26888 }, { "epoch": 0.5628610901783472, "grad_norm": 0.3524322211742401, "learning_rate": 0.00018315563922090505, "loss": 11.6932, "step": 26889 }, { "epoch": 0.5628820229423093, "grad_norm": 0.2929895222187042, "learning_rate": 0.00018315442137972228, "loss": 11.6778, "step": 26890 }, { "epoch": 0.5629029557062715, "grad_norm": 0.2879458963871002, "learning_rate": 0.00018315320349856552, "loss": 11.6687, "step": 26891 }, { "epoch": 0.5629238884702336, "grad_norm": 0.3100212514400482, "learning_rate": 0.00018315198557743533, "loss": 11.6722, "step": 26892 }, { "epoch": 0.5629448212341958, "grad_norm": 0.29559633135795593, "learning_rate": 0.00018315076761633235, "loss": 11.6856, "step": 26893 }, { "epoch": 0.562965753998158, "grad_norm": 0.24172458052635193, "learning_rate": 0.0001831495496152571, "loss": 11.6642, "step": 26894 }, { "epoch": 0.56298668676212, "grad_norm": 0.31947916746139526, "learning_rate": 0.00018314833157421018, "loss": 11.6612, "step": 26895 }, { "epoch": 0.5630076195260822, "grad_norm": 0.2558593153953552, "learning_rate": 0.00018314711349319223, "loss": 11.6796, "step": 26896 }, { "epoch": 0.5630285522900443, "grad_norm": 0.3468697965145111, "learning_rate": 0.00018314589537220377, "loss": 11.6759, "step": 26897 }, { "epoch": 0.5630494850540065, "grad_norm": 0.31214675307273865, "learning_rate": 0.0001831446772112454, "loss": 11.6818, "step": 26898 }, { "epoch": 0.5630704178179687, "grad_norm": 0.3113204836845398, "learning_rate": 0.00018314345901031774, "loss": 11.6777, "step": 26899 }, { "epoch": 0.5630913505819308, "grad_norm": 0.24436426162719727, "learning_rate": 0.00018314224076942134, "loss": 11.6558, "step": 26900 }, { "epoch": 0.563112283345893, "grad_norm": 0.35957661271095276, "learning_rate": 0.00018314102248855683, "loss": 11.6902, "step": 26901 }, { "epoch": 0.5631332161098551, "grad_norm": 0.27688080072402954, "learning_rate": 0.00018313980416772473, "loss": 11.6906, "step": 26902 }, { "epoch": 0.5631541488738173, "grad_norm": 0.3501812815666199, "learning_rate": 0.00018313858580692568, "loss": 11.6731, "step": 26903 }, { "epoch": 0.5631750816377794, "grad_norm": 0.31859758496284485, "learning_rate": 0.00018313736740616023, "loss": 11.6823, "step": 26904 }, { "epoch": 0.5631960144017416, "grad_norm": 0.33921539783477783, "learning_rate": 0.000183136148965429, "loss": 11.6722, "step": 26905 }, { "epoch": 0.5632169471657038, "grad_norm": 0.2518663704395294, "learning_rate": 0.00018313493048473256, "loss": 11.6823, "step": 26906 }, { "epoch": 0.5632378799296659, "grad_norm": 0.31790825724601746, "learning_rate": 0.00018313371196407148, "loss": 11.6581, "step": 26907 }, { "epoch": 0.5632588126936281, "grad_norm": 0.2752958834171295, "learning_rate": 0.00018313249340344635, "loss": 11.6788, "step": 26908 }, { "epoch": 0.5632797454575902, "grad_norm": 0.2487100064754486, "learning_rate": 0.00018313127480285778, "loss": 11.6457, "step": 26909 }, { "epoch": 0.5633006782215524, "grad_norm": 0.28211861848831177, "learning_rate": 0.00018313005616230632, "loss": 11.6473, "step": 26910 }, { "epoch": 0.5633216109855145, "grad_norm": 0.26329174637794495, "learning_rate": 0.00018312883748179259, "loss": 11.673, "step": 26911 }, { "epoch": 0.5633425437494767, "grad_norm": 0.2813258171081543, "learning_rate": 0.00018312761876131716, "loss": 11.6706, "step": 26912 }, { "epoch": 0.5633634765134389, "grad_norm": 0.28451526165008545, "learning_rate": 0.00018312640000088063, "loss": 11.673, "step": 26913 }, { "epoch": 0.563384409277401, "grad_norm": 0.2474767416715622, "learning_rate": 0.00018312518120048354, "loss": 11.6771, "step": 26914 }, { "epoch": 0.5634053420413632, "grad_norm": 0.4315846264362335, "learning_rate": 0.00018312396236012655, "loss": 11.669, "step": 26915 }, { "epoch": 0.5634262748053253, "grad_norm": 0.30546584725379944, "learning_rate": 0.0001831227434798102, "loss": 11.6564, "step": 26916 }, { "epoch": 0.5634472075692875, "grad_norm": 0.36377260088920593, "learning_rate": 0.00018312152455953503, "loss": 11.6649, "step": 26917 }, { "epoch": 0.5634681403332497, "grad_norm": 0.27739930152893066, "learning_rate": 0.00018312030559930175, "loss": 11.6771, "step": 26918 }, { "epoch": 0.5634890730972117, "grad_norm": 0.25237005949020386, "learning_rate": 0.0001831190865991108, "loss": 11.6739, "step": 26919 }, { "epoch": 0.563510005861174, "grad_norm": 0.2690574526786804, "learning_rate": 0.00018311786755896293, "loss": 11.6704, "step": 26920 }, { "epoch": 0.563530938625136, "grad_norm": 0.27059051394462585, "learning_rate": 0.00018311664847885854, "loss": 11.6574, "step": 26921 }, { "epoch": 0.5635518713890982, "grad_norm": 0.29520100355148315, "learning_rate": 0.00018311542935879835, "loss": 11.6622, "step": 26922 }, { "epoch": 0.5635728041530603, "grad_norm": 0.3061034083366394, "learning_rate": 0.00018311421019878292, "loss": 11.6755, "step": 26923 }, { "epoch": 0.5635937369170225, "grad_norm": 0.25797590613365173, "learning_rate": 0.00018311299099881283, "loss": 11.6739, "step": 26924 }, { "epoch": 0.5636146696809847, "grad_norm": 0.2674703598022461, "learning_rate": 0.00018311177175888866, "loss": 11.6591, "step": 26925 }, { "epoch": 0.5636356024449468, "grad_norm": 0.30369481444358826, "learning_rate": 0.00018311055247901097, "loss": 11.6701, "step": 26926 }, { "epoch": 0.563656535208909, "grad_norm": 0.26077568531036377, "learning_rate": 0.00018310933315918038, "loss": 11.6569, "step": 26927 }, { "epoch": 0.5636774679728711, "grad_norm": 0.2528057098388672, "learning_rate": 0.00018310811379939747, "loss": 11.6691, "step": 26928 }, { "epoch": 0.5636984007368333, "grad_norm": 0.31900089979171753, "learning_rate": 0.00018310689439966283, "loss": 11.6723, "step": 26929 }, { "epoch": 0.5637193335007954, "grad_norm": 0.325920432806015, "learning_rate": 0.00018310567495997705, "loss": 11.6809, "step": 26930 }, { "epoch": 0.5637402662647576, "grad_norm": 0.28834232687950134, "learning_rate": 0.0001831044554803407, "loss": 11.6779, "step": 26931 }, { "epoch": 0.5637611990287198, "grad_norm": 0.3009284734725952, "learning_rate": 0.00018310323596075432, "loss": 11.6878, "step": 26932 }, { "epoch": 0.5637821317926819, "grad_norm": 0.31410345435142517, "learning_rate": 0.00018310201640121864, "loss": 11.6672, "step": 26933 }, { "epoch": 0.5638030645566441, "grad_norm": 0.24758636951446533, "learning_rate": 0.00018310079680173408, "loss": 11.6768, "step": 26934 }, { "epoch": 0.5638239973206062, "grad_norm": 0.29445555806159973, "learning_rate": 0.00018309957716230134, "loss": 11.6652, "step": 26935 }, { "epoch": 0.5638449300845684, "grad_norm": 0.32063964009284973, "learning_rate": 0.00018309835748292095, "loss": 11.6698, "step": 26936 }, { "epoch": 0.5638658628485306, "grad_norm": 0.260711133480072, "learning_rate": 0.00018309713776359354, "loss": 11.6585, "step": 26937 }, { "epoch": 0.5638867956124927, "grad_norm": 0.34028369188308716, "learning_rate": 0.00018309591800431965, "loss": 11.6665, "step": 26938 }, { "epoch": 0.5639077283764549, "grad_norm": 0.4071595072746277, "learning_rate": 0.00018309469820509988, "loss": 11.6728, "step": 26939 }, { "epoch": 0.563928661140417, "grad_norm": 0.2586926221847534, "learning_rate": 0.00018309347836593484, "loss": 11.6716, "step": 26940 }, { "epoch": 0.5639495939043792, "grad_norm": 0.3264012038707733, "learning_rate": 0.00018309225848682509, "loss": 11.6733, "step": 26941 }, { "epoch": 0.5639705266683412, "grad_norm": 0.22651441395282745, "learning_rate": 0.00018309103856777124, "loss": 11.6756, "step": 26942 }, { "epoch": 0.5639914594323034, "grad_norm": 0.32697781920433044, "learning_rate": 0.00018308981860877386, "loss": 11.6988, "step": 26943 }, { "epoch": 0.5640123921962656, "grad_norm": 0.4103279411792755, "learning_rate": 0.00018308859860983353, "loss": 11.6781, "step": 26944 }, { "epoch": 0.5640333249602277, "grad_norm": 0.2832069396972656, "learning_rate": 0.00018308737857095084, "loss": 11.6678, "step": 26945 }, { "epoch": 0.5640542577241899, "grad_norm": 0.2542726397514343, "learning_rate": 0.0001830861584921264, "loss": 11.6667, "step": 26946 }, { "epoch": 0.564075190488152, "grad_norm": 0.3439258635044098, "learning_rate": 0.00018308493837336076, "loss": 11.6782, "step": 26947 }, { "epoch": 0.5640961232521142, "grad_norm": 0.4031863212585449, "learning_rate": 0.00018308371821465453, "loss": 11.6786, "step": 26948 }, { "epoch": 0.5641170560160763, "grad_norm": 0.38759589195251465, "learning_rate": 0.0001830824980160083, "loss": 11.663, "step": 26949 }, { "epoch": 0.5641379887800385, "grad_norm": 0.2669273316860199, "learning_rate": 0.00018308127777742262, "loss": 11.6676, "step": 26950 }, { "epoch": 0.5641589215440007, "grad_norm": 0.34857815504074097, "learning_rate": 0.00018308005749889813, "loss": 11.6784, "step": 26951 }, { "epoch": 0.5641798543079628, "grad_norm": 0.3009101450443268, "learning_rate": 0.00018307883718043542, "loss": 11.6703, "step": 26952 }, { "epoch": 0.564200787071925, "grad_norm": 0.34615322947502136, "learning_rate": 0.00018307761682203499, "loss": 11.6769, "step": 26953 }, { "epoch": 0.5642217198358871, "grad_norm": 0.27952802181243896, "learning_rate": 0.00018307639642369753, "loss": 11.6653, "step": 26954 }, { "epoch": 0.5642426525998493, "grad_norm": 0.23595291376113892, "learning_rate": 0.00018307517598542357, "loss": 11.6659, "step": 26955 }, { "epoch": 0.5642635853638115, "grad_norm": 0.2975529432296753, "learning_rate": 0.0001830739555072137, "loss": 11.6462, "step": 26956 }, { "epoch": 0.5642845181277736, "grad_norm": 0.3633573353290558, "learning_rate": 0.0001830727349890685, "loss": 11.671, "step": 26957 }, { "epoch": 0.5643054508917358, "grad_norm": 0.26281675696372986, "learning_rate": 0.0001830715144309886, "loss": 11.6806, "step": 26958 }, { "epoch": 0.5643263836556979, "grad_norm": 0.43550094962120056, "learning_rate": 0.00018307029383297455, "loss": 11.6726, "step": 26959 }, { "epoch": 0.5643473164196601, "grad_norm": 0.3523581326007843, "learning_rate": 0.00018306907319502692, "loss": 11.6572, "step": 26960 }, { "epoch": 0.5643682491836222, "grad_norm": 0.29194554686546326, "learning_rate": 0.00018306785251714634, "loss": 11.6643, "step": 26961 }, { "epoch": 0.5643891819475844, "grad_norm": 0.32715412974357605, "learning_rate": 0.00018306663179933338, "loss": 11.6699, "step": 26962 }, { "epoch": 0.5644101147115466, "grad_norm": 0.3108181953430176, "learning_rate": 0.00018306541104158865, "loss": 11.6737, "step": 26963 }, { "epoch": 0.5644310474755087, "grad_norm": 0.26742812991142273, "learning_rate": 0.00018306419024391267, "loss": 11.6613, "step": 26964 }, { "epoch": 0.5644519802394709, "grad_norm": 0.31338122487068176, "learning_rate": 0.0001830629694063061, "loss": 11.6764, "step": 26965 }, { "epoch": 0.5644729130034329, "grad_norm": 0.389352947473526, "learning_rate": 0.00018306174852876947, "loss": 11.6622, "step": 26966 }, { "epoch": 0.5644938457673951, "grad_norm": 0.2863944172859192, "learning_rate": 0.00018306052761130341, "loss": 11.6753, "step": 26967 }, { "epoch": 0.5645147785313572, "grad_norm": 0.3194271922111511, "learning_rate": 0.0001830593066539085, "loss": 11.666, "step": 26968 }, { "epoch": 0.5645357112953194, "grad_norm": 0.2631944417953491, "learning_rate": 0.0001830580856565853, "loss": 11.6608, "step": 26969 }, { "epoch": 0.5645566440592816, "grad_norm": 0.28041842579841614, "learning_rate": 0.00018305686461933443, "loss": 11.6729, "step": 26970 }, { "epoch": 0.5645775768232437, "grad_norm": 0.31237876415252686, "learning_rate": 0.00018305564354215644, "loss": 11.6794, "step": 26971 }, { "epoch": 0.5645985095872059, "grad_norm": 0.28744953870773315, "learning_rate": 0.00018305442242505196, "loss": 11.6644, "step": 26972 }, { "epoch": 0.564619442351168, "grad_norm": 0.3322811722755432, "learning_rate": 0.00018305320126802153, "loss": 11.6671, "step": 26973 }, { "epoch": 0.5646403751151302, "grad_norm": 0.2896900177001953, "learning_rate": 0.00018305198007106577, "loss": 11.6857, "step": 26974 }, { "epoch": 0.5646613078790923, "grad_norm": 0.28204187750816345, "learning_rate": 0.00018305075883418524, "loss": 11.6652, "step": 26975 }, { "epoch": 0.5646822406430545, "grad_norm": 0.6123389601707458, "learning_rate": 0.0001830495375573806, "loss": 11.6855, "step": 26976 }, { "epoch": 0.5647031734070167, "grad_norm": 0.3214140236377716, "learning_rate": 0.00018304831624065234, "loss": 11.6734, "step": 26977 }, { "epoch": 0.5647241061709788, "grad_norm": 0.3581993579864502, "learning_rate": 0.0001830470948840011, "loss": 11.6723, "step": 26978 }, { "epoch": 0.564745038934941, "grad_norm": 0.3054097890853882, "learning_rate": 0.00018304587348742744, "loss": 11.6722, "step": 26979 }, { "epoch": 0.5647659716989031, "grad_norm": 0.28611719608306885, "learning_rate": 0.00018304465205093198, "loss": 11.6627, "step": 26980 }, { "epoch": 0.5647869044628653, "grad_norm": 0.31730732321739197, "learning_rate": 0.0001830434305745153, "loss": 11.6677, "step": 26981 }, { "epoch": 0.5648078372268275, "grad_norm": 0.3012314438819885, "learning_rate": 0.00018304220905817798, "loss": 11.6634, "step": 26982 }, { "epoch": 0.5648287699907896, "grad_norm": 0.31760069727897644, "learning_rate": 0.0001830409875019206, "loss": 11.6576, "step": 26983 }, { "epoch": 0.5648497027547518, "grad_norm": 0.34552937746047974, "learning_rate": 0.00018303976590574377, "loss": 11.6813, "step": 26984 }, { "epoch": 0.5648706355187139, "grad_norm": 0.22954654693603516, "learning_rate": 0.00018303854426964802, "loss": 11.6655, "step": 26985 }, { "epoch": 0.5648915682826761, "grad_norm": 0.33707866072654724, "learning_rate": 0.00018303732259363403, "loss": 11.6828, "step": 26986 }, { "epoch": 0.5649125010466381, "grad_norm": 0.2896766662597656, "learning_rate": 0.00018303610087770227, "loss": 11.6536, "step": 26987 }, { "epoch": 0.5649334338106003, "grad_norm": 0.45040109753608704, "learning_rate": 0.0001830348791218534, "loss": 11.6761, "step": 26988 }, { "epoch": 0.5649543665745625, "grad_norm": 0.2605482339859009, "learning_rate": 0.00018303365732608807, "loss": 11.6747, "step": 26989 }, { "epoch": 0.5649752993385246, "grad_norm": 0.2666122317314148, "learning_rate": 0.00018303243549040675, "loss": 11.6606, "step": 26990 }, { "epoch": 0.5649962321024868, "grad_norm": 0.30852359533309937, "learning_rate": 0.00018303121361481006, "loss": 11.6586, "step": 26991 }, { "epoch": 0.5650171648664489, "grad_norm": 0.28709089756011963, "learning_rate": 0.0001830299916992986, "loss": 11.6734, "step": 26992 }, { "epoch": 0.5650380976304111, "grad_norm": 0.34455606341362, "learning_rate": 0.000183028769743873, "loss": 11.6785, "step": 26993 }, { "epoch": 0.5650590303943732, "grad_norm": 0.2674504816532135, "learning_rate": 0.00018302754774853378, "loss": 11.6754, "step": 26994 }, { "epoch": 0.5650799631583354, "grad_norm": 0.27205905318260193, "learning_rate": 0.00018302632571328155, "loss": 11.6678, "step": 26995 }, { "epoch": 0.5651008959222976, "grad_norm": 0.2969456911087036, "learning_rate": 0.00018302510363811693, "loss": 11.667, "step": 26996 }, { "epoch": 0.5651218286862597, "grad_norm": 0.2726893424987793, "learning_rate": 0.00018302388152304048, "loss": 11.6476, "step": 26997 }, { "epoch": 0.5651427614502219, "grad_norm": 0.3046307861804962, "learning_rate": 0.00018302265936805272, "loss": 11.6572, "step": 26998 }, { "epoch": 0.565163694214184, "grad_norm": 0.3033808469772339, "learning_rate": 0.00018302143717315438, "loss": 11.6719, "step": 26999 }, { "epoch": 0.5651846269781462, "grad_norm": 0.29750046133995056, "learning_rate": 0.00018302021493834592, "loss": 11.6708, "step": 27000 }, { "epoch": 0.5651846269781462, "eval_loss": 11.671675682067871, "eval_runtime": 34.3718, "eval_samples_per_second": 27.959, "eval_steps_per_second": 7.012, "step": 27000 }, { "epoch": 0.5652055597421084, "grad_norm": 0.27166512608528137, "learning_rate": 0.00018301899266362802, "loss": 11.6715, "step": 27001 }, { "epoch": 0.5652264925060705, "grad_norm": 0.25968149304389954, "learning_rate": 0.0001830177703490012, "loss": 11.6792, "step": 27002 }, { "epoch": 0.5652474252700327, "grad_norm": 0.3071907162666321, "learning_rate": 0.00018301654799446606, "loss": 11.6612, "step": 27003 }, { "epoch": 0.5652683580339948, "grad_norm": 0.28633537888526917, "learning_rate": 0.00018301532560002323, "loss": 11.6782, "step": 27004 }, { "epoch": 0.565289290797957, "grad_norm": 0.2667236924171448, "learning_rate": 0.00018301410316567328, "loss": 11.6627, "step": 27005 }, { "epoch": 0.5653102235619191, "grad_norm": 0.2731746435165405, "learning_rate": 0.00018301288069141674, "loss": 11.668, "step": 27006 }, { "epoch": 0.5653311563258813, "grad_norm": 0.30449384450912476, "learning_rate": 0.00018301165817725428, "loss": 11.6915, "step": 27007 }, { "epoch": 0.5653520890898435, "grad_norm": 0.2830284535884857, "learning_rate": 0.00018301043562318645, "loss": 11.6635, "step": 27008 }, { "epoch": 0.5653730218538056, "grad_norm": 0.25316622853279114, "learning_rate": 0.0001830092130292138, "loss": 11.6732, "step": 27009 }, { "epoch": 0.5653939546177678, "grad_norm": 0.2930888235569, "learning_rate": 0.000183007990395337, "loss": 11.6656, "step": 27010 }, { "epoch": 0.5654148873817298, "grad_norm": 0.36807239055633545, "learning_rate": 0.00018300676772155662, "loss": 11.6772, "step": 27011 }, { "epoch": 0.565435820145692, "grad_norm": 0.22075484693050385, "learning_rate": 0.0001830055450078732, "loss": 11.6728, "step": 27012 }, { "epoch": 0.5654567529096541, "grad_norm": 0.3116110563278198, "learning_rate": 0.0001830043222542873, "loss": 11.6642, "step": 27013 }, { "epoch": 0.5654776856736163, "grad_norm": 0.30531933903694153, "learning_rate": 0.00018300309946079962, "loss": 11.6644, "step": 27014 }, { "epoch": 0.5654986184375785, "grad_norm": 0.3094314932823181, "learning_rate": 0.00018300187662741065, "loss": 11.671, "step": 27015 }, { "epoch": 0.5655195512015406, "grad_norm": 0.3753586411476135, "learning_rate": 0.00018300065375412105, "loss": 11.6527, "step": 27016 }, { "epoch": 0.5655404839655028, "grad_norm": 0.363622784614563, "learning_rate": 0.00018299943084093133, "loss": 11.6837, "step": 27017 }, { "epoch": 0.5655614167294649, "grad_norm": 0.3171383738517761, "learning_rate": 0.00018299820788784217, "loss": 11.6552, "step": 27018 }, { "epoch": 0.5655823494934271, "grad_norm": 0.24310177564620972, "learning_rate": 0.00018299698489485406, "loss": 11.6735, "step": 27019 }, { "epoch": 0.5656032822573893, "grad_norm": 0.29070422053337097, "learning_rate": 0.00018299576186196767, "loss": 11.6705, "step": 27020 }, { "epoch": 0.5656242150213514, "grad_norm": 0.3732777535915375, "learning_rate": 0.00018299453878918356, "loss": 11.6916, "step": 27021 }, { "epoch": 0.5656451477853136, "grad_norm": 0.2689577043056488, "learning_rate": 0.00018299331567650227, "loss": 11.6825, "step": 27022 }, { "epoch": 0.5656660805492757, "grad_norm": 0.5129175186157227, "learning_rate": 0.0001829920925239245, "loss": 11.6928, "step": 27023 }, { "epoch": 0.5656870133132379, "grad_norm": 0.28324341773986816, "learning_rate": 0.0001829908693314507, "loss": 11.6491, "step": 27024 }, { "epoch": 0.5657079460772, "grad_norm": 0.24625079333782196, "learning_rate": 0.00018298964609908154, "loss": 11.6718, "step": 27025 }, { "epoch": 0.5657288788411622, "grad_norm": 0.3808429539203644, "learning_rate": 0.0001829884228268176, "loss": 11.684, "step": 27026 }, { "epoch": 0.5657498116051244, "grad_norm": 0.3371348977088928, "learning_rate": 0.00018298719951465947, "loss": 11.6978, "step": 27027 }, { "epoch": 0.5657707443690865, "grad_norm": 0.2641359269618988, "learning_rate": 0.00018298597616260773, "loss": 11.6737, "step": 27028 }, { "epoch": 0.5657916771330487, "grad_norm": 0.6284084320068359, "learning_rate": 0.00018298475277066296, "loss": 11.6862, "step": 27029 }, { "epoch": 0.5658126098970108, "grad_norm": 0.37866273522377014, "learning_rate": 0.00018298352933882574, "loss": 11.6735, "step": 27030 }, { "epoch": 0.565833542660973, "grad_norm": 0.42717650532722473, "learning_rate": 0.0001829823058670967, "loss": 11.6998, "step": 27031 }, { "epoch": 0.565854475424935, "grad_norm": 0.32400837540626526, "learning_rate": 0.00018298108235547642, "loss": 11.6768, "step": 27032 }, { "epoch": 0.5658754081888973, "grad_norm": 0.38395386934280396, "learning_rate": 0.0001829798588039654, "loss": 11.6625, "step": 27033 }, { "epoch": 0.5658963409528595, "grad_norm": 0.37078168988227844, "learning_rate": 0.0001829786352125644, "loss": 11.6889, "step": 27034 }, { "epoch": 0.5659172737168215, "grad_norm": 0.3429009020328522, "learning_rate": 0.00018297741158127383, "loss": 11.6416, "step": 27035 }, { "epoch": 0.5659382064807837, "grad_norm": 0.3059546947479248, "learning_rate": 0.00018297618791009436, "loss": 11.6476, "step": 27036 }, { "epoch": 0.5659591392447458, "grad_norm": 0.2597695291042328, "learning_rate": 0.0001829749641990266, "loss": 11.6691, "step": 27037 }, { "epoch": 0.565980072008708, "grad_norm": 0.285678893327713, "learning_rate": 0.00018297374044807111, "loss": 11.6774, "step": 27038 }, { "epoch": 0.5660010047726702, "grad_norm": 0.49360257387161255, "learning_rate": 0.00018297251665722847, "loss": 11.6712, "step": 27039 }, { "epoch": 0.5660219375366323, "grad_norm": 0.3549621105194092, "learning_rate": 0.0001829712928264993, "loss": 11.6854, "step": 27040 }, { "epoch": 0.5660428703005945, "grad_norm": 0.34361493587493896, "learning_rate": 0.00018297006895588413, "loss": 11.6873, "step": 27041 }, { "epoch": 0.5660638030645566, "grad_norm": 0.38184821605682373, "learning_rate": 0.00018296884504538362, "loss": 11.6894, "step": 27042 }, { "epoch": 0.5660847358285188, "grad_norm": 0.25477609038352966, "learning_rate": 0.0001829676210949983, "loss": 11.6569, "step": 27043 }, { "epoch": 0.5661056685924809, "grad_norm": 0.2411794513463974, "learning_rate": 0.0001829663971047288, "loss": 11.6665, "step": 27044 }, { "epoch": 0.5661266013564431, "grad_norm": 0.28872206807136536, "learning_rate": 0.00018296517307457566, "loss": 11.6604, "step": 27045 }, { "epoch": 0.5661475341204053, "grad_norm": 0.2819065749645233, "learning_rate": 0.00018296394900453952, "loss": 11.678, "step": 27046 }, { "epoch": 0.5661684668843674, "grad_norm": 0.32349905371665955, "learning_rate": 0.00018296272489462095, "loss": 11.6813, "step": 27047 }, { "epoch": 0.5661893996483296, "grad_norm": 0.2718047797679901, "learning_rate": 0.0001829615007448205, "loss": 11.67, "step": 27048 }, { "epoch": 0.5662103324122917, "grad_norm": 0.31067731976509094, "learning_rate": 0.00018296027655513887, "loss": 11.6636, "step": 27049 }, { "epoch": 0.5662312651762539, "grad_norm": 0.2275213897228241, "learning_rate": 0.0001829590523255765, "loss": 11.6689, "step": 27050 }, { "epoch": 0.566252197940216, "grad_norm": 0.42885610461235046, "learning_rate": 0.0001829578280561341, "loss": 11.678, "step": 27051 }, { "epoch": 0.5662731307041782, "grad_norm": 0.37228402495384216, "learning_rate": 0.0001829566037468122, "loss": 11.6774, "step": 27052 }, { "epoch": 0.5662940634681404, "grad_norm": 0.31558194756507874, "learning_rate": 0.00018295537939761136, "loss": 11.6625, "step": 27053 }, { "epoch": 0.5663149962321025, "grad_norm": 0.29747307300567627, "learning_rate": 0.00018295415500853225, "loss": 11.6825, "step": 27054 }, { "epoch": 0.5663359289960647, "grad_norm": 0.3109980821609497, "learning_rate": 0.0001829529305795754, "loss": 11.6668, "step": 27055 }, { "epoch": 0.5663568617600268, "grad_norm": 0.3636752963066101, "learning_rate": 0.0001829517061107414, "loss": 11.6739, "step": 27056 }, { "epoch": 0.566377794523989, "grad_norm": 0.38074633479118347, "learning_rate": 0.00018295048160203088, "loss": 11.6844, "step": 27057 }, { "epoch": 0.5663987272879512, "grad_norm": 0.2930734157562256, "learning_rate": 0.00018294925705344438, "loss": 11.6864, "step": 27058 }, { "epoch": 0.5664196600519132, "grad_norm": 0.30970874428749084, "learning_rate": 0.00018294803246498252, "loss": 11.671, "step": 27059 }, { "epoch": 0.5664405928158754, "grad_norm": 0.36843574047088623, "learning_rate": 0.00018294680783664585, "loss": 11.6843, "step": 27060 }, { "epoch": 0.5664615255798375, "grad_norm": 0.2699136435985565, "learning_rate": 0.00018294558316843501, "loss": 11.6687, "step": 27061 }, { "epoch": 0.5664824583437997, "grad_norm": 0.2708636522293091, "learning_rate": 0.00018294435846035058, "loss": 11.6855, "step": 27062 }, { "epoch": 0.5665033911077618, "grad_norm": 0.34500816464424133, "learning_rate": 0.00018294313371239313, "loss": 11.6733, "step": 27063 }, { "epoch": 0.566524323871724, "grad_norm": 0.24260570108890533, "learning_rate": 0.00018294190892456324, "loss": 11.6607, "step": 27064 }, { "epoch": 0.5665452566356862, "grad_norm": 0.3083244860172272, "learning_rate": 0.00018294068409686155, "loss": 11.672, "step": 27065 }, { "epoch": 0.5665661893996483, "grad_norm": 0.391401082277298, "learning_rate": 0.00018293945922928857, "loss": 11.69, "step": 27066 }, { "epoch": 0.5665871221636105, "grad_norm": 0.2641046345233917, "learning_rate": 0.00018293823432184492, "loss": 11.6769, "step": 27067 }, { "epoch": 0.5666080549275726, "grad_norm": 0.32655084133148193, "learning_rate": 0.00018293700937453122, "loss": 11.6668, "step": 27068 }, { "epoch": 0.5666289876915348, "grad_norm": 0.28755730390548706, "learning_rate": 0.00018293578438734805, "loss": 11.6981, "step": 27069 }, { "epoch": 0.5666499204554969, "grad_norm": 0.2860123813152313, "learning_rate": 0.00018293455936029595, "loss": 11.6791, "step": 27070 }, { "epoch": 0.5666708532194591, "grad_norm": 0.29612618684768677, "learning_rate": 0.0001829333342933756, "loss": 11.6854, "step": 27071 }, { "epoch": 0.5666917859834213, "grad_norm": 0.2566371262073517, "learning_rate": 0.00018293210918658747, "loss": 11.6738, "step": 27072 }, { "epoch": 0.5667127187473834, "grad_norm": 0.31499916315078735, "learning_rate": 0.00018293088403993223, "loss": 11.6597, "step": 27073 }, { "epoch": 0.5667336515113456, "grad_norm": 0.33172136545181274, "learning_rate": 0.0001829296588534105, "loss": 11.6624, "step": 27074 }, { "epoch": 0.5667545842753077, "grad_norm": 0.2984693646430969, "learning_rate": 0.00018292843362702278, "loss": 11.6518, "step": 27075 }, { "epoch": 0.5667755170392699, "grad_norm": 0.28382739424705505, "learning_rate": 0.00018292720836076968, "loss": 11.6749, "step": 27076 }, { "epoch": 0.5667964498032321, "grad_norm": 0.35774657130241394, "learning_rate": 0.00018292598305465186, "loss": 11.6643, "step": 27077 }, { "epoch": 0.5668173825671942, "grad_norm": 0.38408079743385315, "learning_rate": 0.00018292475770866984, "loss": 11.6536, "step": 27078 }, { "epoch": 0.5668383153311564, "grad_norm": 0.2638006806373596, "learning_rate": 0.00018292353232282423, "loss": 11.6671, "step": 27079 }, { "epoch": 0.5668592480951185, "grad_norm": 0.24413920938968658, "learning_rate": 0.00018292230689711557, "loss": 11.6756, "step": 27080 }, { "epoch": 0.5668801808590807, "grad_norm": 0.2826879620552063, "learning_rate": 0.00018292108143154456, "loss": 11.6807, "step": 27081 }, { "epoch": 0.5669011136230427, "grad_norm": 0.2649017572402954, "learning_rate": 0.0001829198559261117, "loss": 11.6798, "step": 27082 }, { "epoch": 0.5669220463870049, "grad_norm": 0.2893262505531311, "learning_rate": 0.0001829186303808176, "loss": 11.6705, "step": 27083 }, { "epoch": 0.5669429791509671, "grad_norm": 0.28867143392562866, "learning_rate": 0.00018291740479566283, "loss": 11.6961, "step": 27084 }, { "epoch": 0.5669639119149292, "grad_norm": 0.27955570816993713, "learning_rate": 0.00018291617917064803, "loss": 11.6717, "step": 27085 }, { "epoch": 0.5669848446788914, "grad_norm": 0.3721996247768402, "learning_rate": 0.00018291495350577374, "loss": 11.6681, "step": 27086 }, { "epoch": 0.5670057774428535, "grad_norm": 0.297943115234375, "learning_rate": 0.00018291372780104061, "loss": 11.6666, "step": 27087 }, { "epoch": 0.5670267102068157, "grad_norm": 0.26111528277397156, "learning_rate": 0.00018291250205644918, "loss": 11.6543, "step": 27088 }, { "epoch": 0.5670476429707778, "grad_norm": 0.3831314742565155, "learning_rate": 0.00018291127627200003, "loss": 11.6746, "step": 27089 }, { "epoch": 0.56706857573474, "grad_norm": 0.27188533544540405, "learning_rate": 0.00018291005044769376, "loss": 11.6747, "step": 27090 }, { "epoch": 0.5670895084987022, "grad_norm": 0.2908543646335602, "learning_rate": 0.00018290882458353098, "loss": 11.6782, "step": 27091 }, { "epoch": 0.5671104412626643, "grad_norm": 0.30483561754226685, "learning_rate": 0.00018290759867951227, "loss": 11.6647, "step": 27092 }, { "epoch": 0.5671313740266265, "grad_norm": 0.280291885137558, "learning_rate": 0.00018290637273563819, "loss": 11.6706, "step": 27093 }, { "epoch": 0.5671523067905886, "grad_norm": 0.4018317461013794, "learning_rate": 0.0001829051467519094, "loss": 11.6654, "step": 27094 }, { "epoch": 0.5671732395545508, "grad_norm": 0.2982470691204071, "learning_rate": 0.00018290392072832642, "loss": 11.6824, "step": 27095 }, { "epoch": 0.567194172318513, "grad_norm": 0.31327736377716064, "learning_rate": 0.00018290269466488988, "loss": 11.6668, "step": 27096 }, { "epoch": 0.5672151050824751, "grad_norm": 0.32593780755996704, "learning_rate": 0.00018290146856160032, "loss": 11.6644, "step": 27097 }, { "epoch": 0.5672360378464373, "grad_norm": 0.3059694468975067, "learning_rate": 0.00018290024241845838, "loss": 11.6596, "step": 27098 }, { "epoch": 0.5672569706103994, "grad_norm": 0.30378738045692444, "learning_rate": 0.00018289901623546464, "loss": 11.6631, "step": 27099 }, { "epoch": 0.5672779033743616, "grad_norm": 0.2481483668088913, "learning_rate": 0.00018289779001261967, "loss": 11.6598, "step": 27100 }, { "epoch": 0.5672988361383237, "grad_norm": 0.4082746207714081, "learning_rate": 0.00018289656374992407, "loss": 11.6666, "step": 27101 }, { "epoch": 0.5673197689022859, "grad_norm": 0.25054463744163513, "learning_rate": 0.00018289533744737845, "loss": 11.6607, "step": 27102 }, { "epoch": 0.5673407016662481, "grad_norm": 0.29457345604896545, "learning_rate": 0.00018289411110498336, "loss": 11.6804, "step": 27103 }, { "epoch": 0.5673616344302101, "grad_norm": 0.3629920482635498, "learning_rate": 0.00018289288472273942, "loss": 11.6638, "step": 27104 }, { "epoch": 0.5673825671941723, "grad_norm": 0.2964077293872833, "learning_rate": 0.0001828916583006472, "loss": 11.6777, "step": 27105 }, { "epoch": 0.5674034999581344, "grad_norm": 0.3205130100250244, "learning_rate": 0.00018289043183870732, "loss": 11.6707, "step": 27106 }, { "epoch": 0.5674244327220966, "grad_norm": 0.3674072325229645, "learning_rate": 0.0001828892053369203, "loss": 11.6751, "step": 27107 }, { "epoch": 0.5674453654860587, "grad_norm": 0.2549579441547394, "learning_rate": 0.00018288797879528683, "loss": 11.6674, "step": 27108 }, { "epoch": 0.5674662982500209, "grad_norm": 0.3087828457355499, "learning_rate": 0.00018288675221380743, "loss": 11.6603, "step": 27109 }, { "epoch": 0.5674872310139831, "grad_norm": 0.3236812949180603, "learning_rate": 0.0001828855255924827, "loss": 11.6724, "step": 27110 }, { "epoch": 0.5675081637779452, "grad_norm": 0.2770233154296875, "learning_rate": 0.00018288429893131323, "loss": 11.6811, "step": 27111 }, { "epoch": 0.5675290965419074, "grad_norm": 0.3440367877483368, "learning_rate": 0.00018288307223029965, "loss": 11.6796, "step": 27112 }, { "epoch": 0.5675500293058695, "grad_norm": 0.29116690158843994, "learning_rate": 0.00018288184548944248, "loss": 11.6639, "step": 27113 }, { "epoch": 0.5675709620698317, "grad_norm": 0.3223990201950073, "learning_rate": 0.00018288061870874234, "loss": 11.6543, "step": 27114 }, { "epoch": 0.5675918948337939, "grad_norm": 0.2700318694114685, "learning_rate": 0.00018287939188819986, "loss": 11.6631, "step": 27115 }, { "epoch": 0.567612827597756, "grad_norm": 0.3010399043560028, "learning_rate": 0.0001828781650278156, "loss": 11.6766, "step": 27116 }, { "epoch": 0.5676337603617182, "grad_norm": 0.27348190546035767, "learning_rate": 0.00018287693812759013, "loss": 11.6647, "step": 27117 }, { "epoch": 0.5676546931256803, "grad_norm": 0.23673152923583984, "learning_rate": 0.00018287571118752403, "loss": 11.6724, "step": 27118 }, { "epoch": 0.5676756258896425, "grad_norm": 0.3796854019165039, "learning_rate": 0.00018287448420761796, "loss": 11.6734, "step": 27119 }, { "epoch": 0.5676965586536046, "grad_norm": 0.28860196471214294, "learning_rate": 0.00018287325718787243, "loss": 11.6749, "step": 27120 }, { "epoch": 0.5677174914175668, "grad_norm": 0.3270087242126465, "learning_rate": 0.00018287203012828807, "loss": 11.6753, "step": 27121 }, { "epoch": 0.567738424181529, "grad_norm": 0.28693515062332153, "learning_rate": 0.00018287080302886548, "loss": 11.6648, "step": 27122 }, { "epoch": 0.5677593569454911, "grad_norm": 0.30840882658958435, "learning_rate": 0.0001828695758896052, "loss": 11.6707, "step": 27123 }, { "epoch": 0.5677802897094533, "grad_norm": 0.33160391449928284, "learning_rate": 0.00018286834871050788, "loss": 11.6761, "step": 27124 }, { "epoch": 0.5678012224734154, "grad_norm": 0.3233579099178314, "learning_rate": 0.0001828671214915741, "loss": 11.6948, "step": 27125 }, { "epoch": 0.5678221552373776, "grad_norm": 0.2908211946487427, "learning_rate": 0.00018286589423280441, "loss": 11.6572, "step": 27126 }, { "epoch": 0.5678430880013396, "grad_norm": 0.2640835642814636, "learning_rate": 0.00018286466693419944, "loss": 11.6657, "step": 27127 }, { "epoch": 0.5678640207653018, "grad_norm": 0.4286004602909088, "learning_rate": 0.00018286343959575976, "loss": 11.6826, "step": 27128 }, { "epoch": 0.567884953529264, "grad_norm": 0.30379700660705566, "learning_rate": 0.00018286221221748593, "loss": 11.6865, "step": 27129 }, { "epoch": 0.5679058862932261, "grad_norm": 0.2589411735534668, "learning_rate": 0.00018286098479937862, "loss": 11.6742, "step": 27130 }, { "epoch": 0.5679268190571883, "grad_norm": 0.3045375645160675, "learning_rate": 0.00018285975734143835, "loss": 11.6783, "step": 27131 }, { "epoch": 0.5679477518211504, "grad_norm": 0.28235912322998047, "learning_rate": 0.00018285852984366576, "loss": 11.6668, "step": 27132 }, { "epoch": 0.5679686845851126, "grad_norm": 0.297191858291626, "learning_rate": 0.0001828573023060614, "loss": 11.6896, "step": 27133 }, { "epoch": 0.5679896173490748, "grad_norm": 0.2844504117965698, "learning_rate": 0.00018285607472862586, "loss": 11.6671, "step": 27134 }, { "epoch": 0.5680105501130369, "grad_norm": 0.29075419902801514, "learning_rate": 0.00018285484711135975, "loss": 11.6674, "step": 27135 }, { "epoch": 0.5680314828769991, "grad_norm": 0.27562451362609863, "learning_rate": 0.00018285361945426365, "loss": 11.6587, "step": 27136 }, { "epoch": 0.5680524156409612, "grad_norm": 0.29150933027267456, "learning_rate": 0.00018285239175733816, "loss": 11.6812, "step": 27137 }, { "epoch": 0.5680733484049234, "grad_norm": 0.30893781781196594, "learning_rate": 0.00018285116402058386, "loss": 11.6648, "step": 27138 }, { "epoch": 0.5680942811688855, "grad_norm": 0.2956928014755249, "learning_rate": 0.00018284993624400136, "loss": 11.6756, "step": 27139 }, { "epoch": 0.5681152139328477, "grad_norm": 0.23606878519058228, "learning_rate": 0.00018284870842759122, "loss": 11.6545, "step": 27140 }, { "epoch": 0.5681361466968099, "grad_norm": 0.3060154318809509, "learning_rate": 0.00018284748057135407, "loss": 11.6631, "step": 27141 }, { "epoch": 0.568157079460772, "grad_norm": 0.3432328402996063, "learning_rate": 0.00018284625267529046, "loss": 11.6501, "step": 27142 }, { "epoch": 0.5681780122247342, "grad_norm": 0.3300718069076538, "learning_rate": 0.000182845024739401, "loss": 11.6796, "step": 27143 }, { "epoch": 0.5681989449886963, "grad_norm": 0.30289560556411743, "learning_rate": 0.00018284379676368625, "loss": 11.6689, "step": 27144 }, { "epoch": 0.5682198777526585, "grad_norm": 0.34105443954467773, "learning_rate": 0.00018284256874814684, "loss": 11.6838, "step": 27145 }, { "epoch": 0.5682408105166206, "grad_norm": 0.2613772749900818, "learning_rate": 0.00018284134069278336, "loss": 11.6575, "step": 27146 }, { "epoch": 0.5682617432805828, "grad_norm": 0.27625688910484314, "learning_rate": 0.0001828401125975964, "loss": 11.6858, "step": 27147 }, { "epoch": 0.568282676044545, "grad_norm": 0.27132007479667664, "learning_rate": 0.00018283888446258653, "loss": 11.6737, "step": 27148 }, { "epoch": 0.568303608808507, "grad_norm": 0.30785590410232544, "learning_rate": 0.00018283765628775432, "loss": 11.6635, "step": 27149 }, { "epoch": 0.5683245415724693, "grad_norm": 0.26244017481803894, "learning_rate": 0.0001828364280731004, "loss": 11.6617, "step": 27150 }, { "epoch": 0.5683454743364313, "grad_norm": 0.2794267237186432, "learning_rate": 0.00018283519981862536, "loss": 11.6831, "step": 27151 }, { "epoch": 0.5683664071003935, "grad_norm": 0.29848745465278625, "learning_rate": 0.00018283397152432976, "loss": 11.659, "step": 27152 }, { "epoch": 0.5683873398643556, "grad_norm": 0.3024519681930542, "learning_rate": 0.00018283274319021424, "loss": 11.6744, "step": 27153 }, { "epoch": 0.5684082726283178, "grad_norm": 0.338234007358551, "learning_rate": 0.00018283151481627937, "loss": 11.6832, "step": 27154 }, { "epoch": 0.56842920539228, "grad_norm": 0.340944766998291, "learning_rate": 0.00018283028640252567, "loss": 11.6558, "step": 27155 }, { "epoch": 0.5684501381562421, "grad_norm": 0.29899170994758606, "learning_rate": 0.00018282905794895384, "loss": 11.6699, "step": 27156 }, { "epoch": 0.5684710709202043, "grad_norm": 0.26107916235923767, "learning_rate": 0.00018282782945556442, "loss": 11.6719, "step": 27157 }, { "epoch": 0.5684920036841664, "grad_norm": 0.2761681377887726, "learning_rate": 0.000182826600922358, "loss": 11.6849, "step": 27158 }, { "epoch": 0.5685129364481286, "grad_norm": 0.34049180150032043, "learning_rate": 0.00018282537234933516, "loss": 11.6558, "step": 27159 }, { "epoch": 0.5685338692120908, "grad_norm": 0.32042741775512695, "learning_rate": 0.0001828241437364965, "loss": 11.6758, "step": 27160 }, { "epoch": 0.5685548019760529, "grad_norm": 0.3116675913333893, "learning_rate": 0.00018282291508384262, "loss": 11.6716, "step": 27161 }, { "epoch": 0.5685757347400151, "grad_norm": 0.2559640109539032, "learning_rate": 0.0001828216863913741, "loss": 11.6769, "step": 27162 }, { "epoch": 0.5685966675039772, "grad_norm": 0.2669698894023895, "learning_rate": 0.00018282045765909155, "loss": 11.6718, "step": 27163 }, { "epoch": 0.5686176002679394, "grad_norm": 0.28798094391822815, "learning_rate": 0.00018281922888699554, "loss": 11.6836, "step": 27164 }, { "epoch": 0.5686385330319015, "grad_norm": 0.32571113109588623, "learning_rate": 0.00018281800007508668, "loss": 11.678, "step": 27165 }, { "epoch": 0.5686594657958637, "grad_norm": 0.2714621424674988, "learning_rate": 0.00018281677122336553, "loss": 11.6761, "step": 27166 }, { "epoch": 0.5686803985598259, "grad_norm": 0.30717185139656067, "learning_rate": 0.0001828155423318327, "loss": 11.6624, "step": 27167 }, { "epoch": 0.568701331323788, "grad_norm": 0.3114778697490692, "learning_rate": 0.00018281431340048878, "loss": 11.6609, "step": 27168 }, { "epoch": 0.5687222640877502, "grad_norm": 0.31138134002685547, "learning_rate": 0.00018281308442933436, "loss": 11.6589, "step": 27169 }, { "epoch": 0.5687431968517123, "grad_norm": 0.31505468487739563, "learning_rate": 0.00018281185541837002, "loss": 11.6688, "step": 27170 }, { "epoch": 0.5687641296156745, "grad_norm": 0.3282493054866791, "learning_rate": 0.00018281062636759637, "loss": 11.6822, "step": 27171 }, { "epoch": 0.5687850623796366, "grad_norm": 0.3646708130836487, "learning_rate": 0.000182809397277014, "loss": 11.6822, "step": 27172 }, { "epoch": 0.5688059951435988, "grad_norm": 0.2441297322511673, "learning_rate": 0.0001828081681466235, "loss": 11.6717, "step": 27173 }, { "epoch": 0.568826927907561, "grad_norm": 0.32028651237487793, "learning_rate": 0.00018280693897642543, "loss": 11.6648, "step": 27174 }, { "epoch": 0.568847860671523, "grad_norm": 0.2614140510559082, "learning_rate": 0.00018280570976642044, "loss": 11.6765, "step": 27175 }, { "epoch": 0.5688687934354852, "grad_norm": 0.23357704281806946, "learning_rate": 0.00018280448051660907, "loss": 11.6603, "step": 27176 }, { "epoch": 0.5688897261994473, "grad_norm": 0.25722047686576843, "learning_rate": 0.00018280325122699195, "loss": 11.6683, "step": 27177 }, { "epoch": 0.5689106589634095, "grad_norm": 0.3397938907146454, "learning_rate": 0.0001828020218975696, "loss": 11.6639, "step": 27178 }, { "epoch": 0.5689315917273717, "grad_norm": 0.28965452313423157, "learning_rate": 0.00018280079252834267, "loss": 11.6736, "step": 27179 }, { "epoch": 0.5689525244913338, "grad_norm": 0.33357009291648865, "learning_rate": 0.00018279956311931174, "loss": 11.6773, "step": 27180 }, { "epoch": 0.568973457255296, "grad_norm": 0.27618205547332764, "learning_rate": 0.00018279833367047742, "loss": 11.6836, "step": 27181 }, { "epoch": 0.5689943900192581, "grad_norm": 0.29945075511932373, "learning_rate": 0.00018279710418184027, "loss": 11.6829, "step": 27182 }, { "epoch": 0.5690153227832203, "grad_norm": 0.30604109168052673, "learning_rate": 0.0001827958746534009, "loss": 11.6791, "step": 27183 }, { "epoch": 0.5690362555471824, "grad_norm": 0.29874879121780396, "learning_rate": 0.00018279464508515986, "loss": 11.6642, "step": 27184 }, { "epoch": 0.5690571883111446, "grad_norm": 0.3122974634170532, "learning_rate": 0.00018279341547711782, "loss": 11.6562, "step": 27185 }, { "epoch": 0.5690781210751068, "grad_norm": 0.4708201587200165, "learning_rate": 0.00018279218582927532, "loss": 11.6672, "step": 27186 }, { "epoch": 0.5690990538390689, "grad_norm": 0.32011595368385315, "learning_rate": 0.00018279095614163294, "loss": 11.6682, "step": 27187 }, { "epoch": 0.5691199866030311, "grad_norm": 0.31026384234428406, "learning_rate": 0.00018278972641419126, "loss": 11.6694, "step": 27188 }, { "epoch": 0.5691409193669932, "grad_norm": 0.28742483258247375, "learning_rate": 0.00018278849664695094, "loss": 11.6591, "step": 27189 }, { "epoch": 0.5691618521309554, "grad_norm": 0.2842082381248474, "learning_rate": 0.00018278726683991254, "loss": 11.6719, "step": 27190 }, { "epoch": 0.5691827848949175, "grad_norm": 0.3087911009788513, "learning_rate": 0.00018278603699307665, "loss": 11.6721, "step": 27191 }, { "epoch": 0.5692037176588797, "grad_norm": 0.3527737855911255, "learning_rate": 0.0001827848071064438, "loss": 11.6674, "step": 27192 }, { "epoch": 0.5692246504228419, "grad_norm": 0.2910769581794739, "learning_rate": 0.00018278357718001467, "loss": 11.6725, "step": 27193 }, { "epoch": 0.569245583186804, "grad_norm": 0.41605260968208313, "learning_rate": 0.00018278234721378982, "loss": 11.6913, "step": 27194 }, { "epoch": 0.5692665159507662, "grad_norm": 0.39375266432762146, "learning_rate": 0.0001827811172077698, "loss": 11.6782, "step": 27195 }, { "epoch": 0.5692874487147283, "grad_norm": 0.3630075454711914, "learning_rate": 0.00018277988716195527, "loss": 11.6808, "step": 27196 }, { "epoch": 0.5693083814786905, "grad_norm": 0.3569071590900421, "learning_rate": 0.0001827786570763468, "loss": 11.6648, "step": 27197 }, { "epoch": 0.5693293142426527, "grad_norm": 0.2902127206325531, "learning_rate": 0.00018277742695094494, "loss": 11.6705, "step": 27198 }, { "epoch": 0.5693502470066147, "grad_norm": 0.4621342718601227, "learning_rate": 0.00018277619678575032, "loss": 11.6699, "step": 27199 }, { "epoch": 0.5693711797705769, "grad_norm": 0.23290440440177917, "learning_rate": 0.00018277496658076353, "loss": 11.6698, "step": 27200 }, { "epoch": 0.569392112534539, "grad_norm": 0.32647427916526794, "learning_rate": 0.00018277373633598514, "loss": 11.6656, "step": 27201 }, { "epoch": 0.5694130452985012, "grad_norm": 0.2762063443660736, "learning_rate": 0.00018277250605141577, "loss": 11.6684, "step": 27202 }, { "epoch": 0.5694339780624633, "grad_norm": 0.33703941106796265, "learning_rate": 0.00018277127572705602, "loss": 11.6637, "step": 27203 }, { "epoch": 0.5694549108264255, "grad_norm": 0.26813244819641113, "learning_rate": 0.00018277004536290642, "loss": 11.6733, "step": 27204 }, { "epoch": 0.5694758435903877, "grad_norm": 0.292892724275589, "learning_rate": 0.00018276881495896762, "loss": 11.6674, "step": 27205 }, { "epoch": 0.5694967763543498, "grad_norm": 0.3385033905506134, "learning_rate": 0.00018276758451524016, "loss": 11.6744, "step": 27206 }, { "epoch": 0.569517709118312, "grad_norm": 0.3327348232269287, "learning_rate": 0.0001827663540317247, "loss": 11.6739, "step": 27207 }, { "epoch": 0.5695386418822741, "grad_norm": 0.40578874945640564, "learning_rate": 0.0001827651235084218, "loss": 11.6819, "step": 27208 }, { "epoch": 0.5695595746462363, "grad_norm": 0.33881238102912903, "learning_rate": 0.000182763892945332, "loss": 11.6755, "step": 27209 }, { "epoch": 0.5695805074101984, "grad_norm": 0.303443968296051, "learning_rate": 0.000182762662342456, "loss": 11.6887, "step": 27210 }, { "epoch": 0.5696014401741606, "grad_norm": 0.2935771048069, "learning_rate": 0.0001827614316997943, "loss": 11.7009, "step": 27211 }, { "epoch": 0.5696223729381228, "grad_norm": 0.27966222167015076, "learning_rate": 0.0001827602010173475, "loss": 11.6839, "step": 27212 }, { "epoch": 0.5696433057020849, "grad_norm": 0.27044954895973206, "learning_rate": 0.00018275897029511624, "loss": 11.6762, "step": 27213 }, { "epoch": 0.5696642384660471, "grad_norm": 0.3060407042503357, "learning_rate": 0.0001827577395331011, "loss": 11.6766, "step": 27214 }, { "epoch": 0.5696851712300092, "grad_norm": 0.25816160440444946, "learning_rate": 0.00018275650873130262, "loss": 11.694, "step": 27215 }, { "epoch": 0.5697061039939714, "grad_norm": 0.39010292291641235, "learning_rate": 0.00018275527788972145, "loss": 11.6749, "step": 27216 }, { "epoch": 0.5697270367579336, "grad_norm": 0.22796140611171722, "learning_rate": 0.00018275404700835815, "loss": 11.6799, "step": 27217 }, { "epoch": 0.5697479695218957, "grad_norm": 0.2330029308795929, "learning_rate": 0.0001827528160872133, "loss": 11.6833, "step": 27218 }, { "epoch": 0.5697689022858579, "grad_norm": 0.26345330476760864, "learning_rate": 0.00018275158512628755, "loss": 11.6632, "step": 27219 }, { "epoch": 0.56978983504982, "grad_norm": 0.325886994600296, "learning_rate": 0.00018275035412558145, "loss": 11.664, "step": 27220 }, { "epoch": 0.5698107678137821, "grad_norm": 0.2788163423538208, "learning_rate": 0.00018274912308509556, "loss": 11.6683, "step": 27221 }, { "epoch": 0.5698317005777442, "grad_norm": 0.2773265540599823, "learning_rate": 0.00018274789200483054, "loss": 11.6573, "step": 27222 }, { "epoch": 0.5698526333417064, "grad_norm": 0.2517431080341339, "learning_rate": 0.00018274666088478696, "loss": 11.6754, "step": 27223 }, { "epoch": 0.5698735661056686, "grad_norm": 0.2649029493331909, "learning_rate": 0.00018274542972496538, "loss": 11.678, "step": 27224 }, { "epoch": 0.5698944988696307, "grad_norm": 0.31903207302093506, "learning_rate": 0.00018274419852536643, "loss": 11.6674, "step": 27225 }, { "epoch": 0.5699154316335929, "grad_norm": 0.28705692291259766, "learning_rate": 0.00018274296728599068, "loss": 11.6823, "step": 27226 }, { "epoch": 0.569936364397555, "grad_norm": 0.3876210153102875, "learning_rate": 0.00018274173600683872, "loss": 11.6845, "step": 27227 }, { "epoch": 0.5699572971615172, "grad_norm": 0.30503568053245544, "learning_rate": 0.00018274050468791115, "loss": 11.6573, "step": 27228 }, { "epoch": 0.5699782299254793, "grad_norm": 0.36782658100128174, "learning_rate": 0.00018273927332920854, "loss": 11.6648, "step": 27229 }, { "epoch": 0.5699991626894415, "grad_norm": 0.28694814443588257, "learning_rate": 0.00018273804193073154, "loss": 11.6826, "step": 27230 }, { "epoch": 0.5700200954534037, "grad_norm": 0.2826714515686035, "learning_rate": 0.0001827368104924807, "loss": 11.6596, "step": 27231 }, { "epoch": 0.5700410282173658, "grad_norm": 0.3267925977706909, "learning_rate": 0.0001827355790144566, "loss": 11.6794, "step": 27232 }, { "epoch": 0.570061960981328, "grad_norm": 0.30396369099617004, "learning_rate": 0.00018273434749665988, "loss": 11.6823, "step": 27233 }, { "epoch": 0.5700828937452901, "grad_norm": 0.2526428699493408, "learning_rate": 0.00018273311593909106, "loss": 11.6798, "step": 27234 }, { "epoch": 0.5701038265092523, "grad_norm": 0.2729675769805908, "learning_rate": 0.00018273188434175078, "loss": 11.6751, "step": 27235 }, { "epoch": 0.5701247592732145, "grad_norm": 0.36018383502960205, "learning_rate": 0.00018273065270463964, "loss": 11.6904, "step": 27236 }, { "epoch": 0.5701456920371766, "grad_norm": 0.26414039731025696, "learning_rate": 0.00018272942102775822, "loss": 11.6624, "step": 27237 }, { "epoch": 0.5701666248011388, "grad_norm": 0.26927506923675537, "learning_rate": 0.0001827281893111071, "loss": 11.6701, "step": 27238 }, { "epoch": 0.5701875575651009, "grad_norm": 0.24846695363521576, "learning_rate": 0.0001827269575546869, "loss": 11.6798, "step": 27239 }, { "epoch": 0.5702084903290631, "grad_norm": 0.3201894462108612, "learning_rate": 0.00018272572575849818, "loss": 11.6746, "step": 27240 }, { "epoch": 0.5702294230930252, "grad_norm": 0.28783687949180603, "learning_rate": 0.00018272449392254156, "loss": 11.6605, "step": 27241 }, { "epoch": 0.5702503558569874, "grad_norm": 0.34068453311920166, "learning_rate": 0.0001827232620468176, "loss": 11.6901, "step": 27242 }, { "epoch": 0.5702712886209496, "grad_norm": 0.2771151065826416, "learning_rate": 0.00018272203013132692, "loss": 11.6712, "step": 27243 }, { "epoch": 0.5702922213849116, "grad_norm": 0.25740113854408264, "learning_rate": 0.0001827207981760701, "loss": 11.6608, "step": 27244 }, { "epoch": 0.5703131541488738, "grad_norm": 0.2756057679653168, "learning_rate": 0.00018271956618104773, "loss": 11.675, "step": 27245 }, { "epoch": 0.5703340869128359, "grad_norm": 0.2630668580532074, "learning_rate": 0.0001827183341462604, "loss": 11.6518, "step": 27246 }, { "epoch": 0.5703550196767981, "grad_norm": 0.41476643085479736, "learning_rate": 0.00018271710207170872, "loss": 11.6773, "step": 27247 }, { "epoch": 0.5703759524407602, "grad_norm": 0.27995824813842773, "learning_rate": 0.00018271586995739325, "loss": 11.6603, "step": 27248 }, { "epoch": 0.5703968852047224, "grad_norm": 0.3330216109752655, "learning_rate": 0.00018271463780331464, "loss": 11.6871, "step": 27249 }, { "epoch": 0.5704178179686846, "grad_norm": 0.3289214074611664, "learning_rate": 0.00018271340560947343, "loss": 11.6701, "step": 27250 }, { "epoch": 0.5704387507326467, "grad_norm": 0.4486353397369385, "learning_rate": 0.00018271217337587021, "loss": 11.6769, "step": 27251 }, { "epoch": 0.5704596834966089, "grad_norm": 0.39280062913894653, "learning_rate": 0.0001827109411025056, "loss": 11.667, "step": 27252 }, { "epoch": 0.570480616260571, "grad_norm": 0.34372639656066895, "learning_rate": 0.0001827097087893802, "loss": 11.6727, "step": 27253 }, { "epoch": 0.5705015490245332, "grad_norm": 0.290200799703598, "learning_rate": 0.00018270847643649457, "loss": 11.684, "step": 27254 }, { "epoch": 0.5705224817884954, "grad_norm": 0.38296300172805786, "learning_rate": 0.0001827072440438493, "loss": 11.6819, "step": 27255 }, { "epoch": 0.5705434145524575, "grad_norm": 0.3332783579826355, "learning_rate": 0.00018270601161144505, "loss": 11.6877, "step": 27256 }, { "epoch": 0.5705643473164197, "grad_norm": 0.3407622277736664, "learning_rate": 0.0001827047791392823, "loss": 11.6817, "step": 27257 }, { "epoch": 0.5705852800803818, "grad_norm": 0.22811543941497803, "learning_rate": 0.00018270354662736177, "loss": 11.6777, "step": 27258 }, { "epoch": 0.570606212844344, "grad_norm": 0.3666125237941742, "learning_rate": 0.00018270231407568397, "loss": 11.6758, "step": 27259 }, { "epoch": 0.5706271456083061, "grad_norm": 0.27584755420684814, "learning_rate": 0.00018270108148424948, "loss": 11.6693, "step": 27260 }, { "epoch": 0.5706480783722683, "grad_norm": 0.2667175233364105, "learning_rate": 0.00018269984885305895, "loss": 11.6782, "step": 27261 }, { "epoch": 0.5706690111362305, "grad_norm": 0.33381593227386475, "learning_rate": 0.00018269861618211295, "loss": 11.6805, "step": 27262 }, { "epoch": 0.5706899439001926, "grad_norm": 0.28284648060798645, "learning_rate": 0.00018269738347141204, "loss": 11.6748, "step": 27263 }, { "epoch": 0.5707108766641548, "grad_norm": 0.31433725357055664, "learning_rate": 0.00018269615072095686, "loss": 11.6924, "step": 27264 }, { "epoch": 0.5707318094281169, "grad_norm": 0.36318472027778625, "learning_rate": 0.000182694917930748, "loss": 11.6793, "step": 27265 }, { "epoch": 0.570752742192079, "grad_norm": 0.2543313205242157, "learning_rate": 0.00018269368510078602, "loss": 11.6744, "step": 27266 }, { "epoch": 0.5707736749560411, "grad_norm": 0.25249722599983215, "learning_rate": 0.00018269245223107152, "loss": 11.6685, "step": 27267 }, { "epoch": 0.5707946077200033, "grad_norm": 0.3781279921531677, "learning_rate": 0.00018269121932160508, "loss": 11.687, "step": 27268 }, { "epoch": 0.5708155404839655, "grad_norm": 0.2593267261981964, "learning_rate": 0.00018268998637238734, "loss": 11.6713, "step": 27269 }, { "epoch": 0.5708364732479276, "grad_norm": 0.27654337882995605, "learning_rate": 0.00018268875338341885, "loss": 11.6609, "step": 27270 }, { "epoch": 0.5708574060118898, "grad_norm": 0.3432201147079468, "learning_rate": 0.00018268752035470028, "loss": 11.6712, "step": 27271 }, { "epoch": 0.5708783387758519, "grad_norm": 0.34427157044410706, "learning_rate": 0.0001826862872862321, "loss": 11.6771, "step": 27272 }, { "epoch": 0.5708992715398141, "grad_norm": 0.3293149173259735, "learning_rate": 0.00018268505417801497, "loss": 11.6893, "step": 27273 }, { "epoch": 0.5709202043037763, "grad_norm": 0.3265637159347534, "learning_rate": 0.00018268382103004947, "loss": 11.6702, "step": 27274 }, { "epoch": 0.5709411370677384, "grad_norm": 0.2991233170032501, "learning_rate": 0.00018268258784233626, "loss": 11.6869, "step": 27275 }, { "epoch": 0.5709620698317006, "grad_norm": 0.2776295840740204, "learning_rate": 0.00018268135461487582, "loss": 11.6901, "step": 27276 }, { "epoch": 0.5709830025956627, "grad_norm": 0.29357603192329407, "learning_rate": 0.0001826801213476688, "loss": 11.6798, "step": 27277 }, { "epoch": 0.5710039353596249, "grad_norm": 0.2509055435657501, "learning_rate": 0.0001826788880407158, "loss": 11.6663, "step": 27278 }, { "epoch": 0.571024868123587, "grad_norm": 0.33000272512435913, "learning_rate": 0.00018267765469401738, "loss": 11.6797, "step": 27279 }, { "epoch": 0.5710458008875492, "grad_norm": 0.2698265612125397, "learning_rate": 0.0001826764213075742, "loss": 11.6676, "step": 27280 }, { "epoch": 0.5710667336515114, "grad_norm": 0.30527985095977783, "learning_rate": 0.00018267518788138675, "loss": 11.7021, "step": 27281 }, { "epoch": 0.5710876664154735, "grad_norm": 0.24513649940490723, "learning_rate": 0.00018267395441545572, "loss": 11.6648, "step": 27282 }, { "epoch": 0.5711085991794357, "grad_norm": 0.28201258182525635, "learning_rate": 0.00018267272090978165, "loss": 11.6752, "step": 27283 }, { "epoch": 0.5711295319433978, "grad_norm": 2.5392098426818848, "learning_rate": 0.00018267148736436515, "loss": 11.6799, "step": 27284 }, { "epoch": 0.57115046470736, "grad_norm": 0.3117824196815491, "learning_rate": 0.00018267025377920678, "loss": 11.6854, "step": 27285 }, { "epoch": 0.5711713974713221, "grad_norm": 0.30592405796051025, "learning_rate": 0.00018266902015430723, "loss": 11.6805, "step": 27286 }, { "epoch": 0.5711923302352843, "grad_norm": 0.2996290624141693, "learning_rate": 0.00018266778648966695, "loss": 11.6795, "step": 27287 }, { "epoch": 0.5712132629992465, "grad_norm": 0.2714877128601074, "learning_rate": 0.00018266655278528665, "loss": 11.6661, "step": 27288 }, { "epoch": 0.5712341957632086, "grad_norm": 0.37831833958625793, "learning_rate": 0.0001826653190411669, "loss": 11.6873, "step": 27289 }, { "epoch": 0.5712551285271708, "grad_norm": 0.32634544372558594, "learning_rate": 0.00018266408525730824, "loss": 11.6659, "step": 27290 }, { "epoch": 0.5712760612911328, "grad_norm": 0.33224624395370483, "learning_rate": 0.00018266285143371132, "loss": 11.6646, "step": 27291 }, { "epoch": 0.571296994055095, "grad_norm": 0.2535926103591919, "learning_rate": 0.00018266161757037668, "loss": 11.6656, "step": 27292 }, { "epoch": 0.5713179268190572, "grad_norm": 0.32142356038093567, "learning_rate": 0.00018266038366730495, "loss": 11.6637, "step": 27293 }, { "epoch": 0.5713388595830193, "grad_norm": 0.2642917335033417, "learning_rate": 0.00018265914972449677, "loss": 11.6726, "step": 27294 }, { "epoch": 0.5713597923469815, "grad_norm": 0.27000072598457336, "learning_rate": 0.00018265791574195261, "loss": 11.6669, "step": 27295 }, { "epoch": 0.5713807251109436, "grad_norm": 0.3005458116531372, "learning_rate": 0.00018265668171967318, "loss": 11.669, "step": 27296 }, { "epoch": 0.5714016578749058, "grad_norm": 0.45515310764312744, "learning_rate": 0.000182655447657659, "loss": 11.6686, "step": 27297 }, { "epoch": 0.5714225906388679, "grad_norm": 0.29861748218536377, "learning_rate": 0.0001826542135559107, "loss": 11.6591, "step": 27298 }, { "epoch": 0.5714435234028301, "grad_norm": 0.29401475191116333, "learning_rate": 0.00018265297941442884, "loss": 11.6438, "step": 27299 }, { "epoch": 0.5714644561667923, "grad_norm": 0.24949973821640015, "learning_rate": 0.0001826517452332141, "loss": 11.6666, "step": 27300 }, { "epoch": 0.5714853889307544, "grad_norm": 0.3221558928489685, "learning_rate": 0.00018265051101226695, "loss": 11.6764, "step": 27301 }, { "epoch": 0.5715063216947166, "grad_norm": 0.4068310558795929, "learning_rate": 0.00018264927675158807, "loss": 11.6861, "step": 27302 }, { "epoch": 0.5715272544586787, "grad_norm": 0.3366698622703552, "learning_rate": 0.000182648042451178, "loss": 11.6443, "step": 27303 }, { "epoch": 0.5715481872226409, "grad_norm": 0.37115442752838135, "learning_rate": 0.00018264680811103738, "loss": 11.6765, "step": 27304 }, { "epoch": 0.571569119986603, "grad_norm": 0.2997486889362335, "learning_rate": 0.0001826455737311668, "loss": 11.6753, "step": 27305 }, { "epoch": 0.5715900527505652, "grad_norm": 0.261949360370636, "learning_rate": 0.00018264433931156678, "loss": 11.6556, "step": 27306 }, { "epoch": 0.5716109855145274, "grad_norm": 0.2668510377407074, "learning_rate": 0.00018264310485223802, "loss": 11.6667, "step": 27307 }, { "epoch": 0.5716319182784895, "grad_norm": 0.31084632873535156, "learning_rate": 0.00018264187035318106, "loss": 11.6678, "step": 27308 }, { "epoch": 0.5716528510424517, "grad_norm": 0.293711394071579, "learning_rate": 0.0001826406358143965, "loss": 11.6677, "step": 27309 }, { "epoch": 0.5716737838064138, "grad_norm": 0.35172930359840393, "learning_rate": 0.00018263940123588491, "loss": 11.6661, "step": 27310 }, { "epoch": 0.571694716570376, "grad_norm": 0.2108893096446991, "learning_rate": 0.00018263816661764692, "loss": 11.6748, "step": 27311 }, { "epoch": 0.5717156493343382, "grad_norm": 0.2746937572956085, "learning_rate": 0.0001826369319596831, "loss": 11.6727, "step": 27312 }, { "epoch": 0.5717365820983003, "grad_norm": 0.22874483466148376, "learning_rate": 0.00018263569726199402, "loss": 11.65, "step": 27313 }, { "epoch": 0.5717575148622625, "grad_norm": 0.4040306806564331, "learning_rate": 0.00018263446252458034, "loss": 11.6669, "step": 27314 }, { "epoch": 0.5717784476262245, "grad_norm": 0.31313690543174744, "learning_rate": 0.00018263322774744262, "loss": 11.6721, "step": 27315 }, { "epoch": 0.5717993803901867, "grad_norm": 0.3441351652145386, "learning_rate": 0.00018263199293058145, "loss": 11.6848, "step": 27316 }, { "epoch": 0.5718203131541488, "grad_norm": 0.3299550414085388, "learning_rate": 0.0001826307580739974, "loss": 11.6691, "step": 27317 }, { "epoch": 0.571841245918111, "grad_norm": 0.29160165786743164, "learning_rate": 0.0001826295231776911, "loss": 11.6706, "step": 27318 }, { "epoch": 0.5718621786820732, "grad_norm": 0.24410313367843628, "learning_rate": 0.00018262828824166315, "loss": 11.6773, "step": 27319 }, { "epoch": 0.5718831114460353, "grad_norm": 0.3310747742652893, "learning_rate": 0.00018262705326591415, "loss": 11.6784, "step": 27320 }, { "epoch": 0.5719040442099975, "grad_norm": 0.28296834230422974, "learning_rate": 0.0001826258182504446, "loss": 11.6702, "step": 27321 }, { "epoch": 0.5719249769739596, "grad_norm": 0.2826070785522461, "learning_rate": 0.00018262458319525518, "loss": 11.6743, "step": 27322 }, { "epoch": 0.5719459097379218, "grad_norm": 0.37007153034210205, "learning_rate": 0.0001826233481003465, "loss": 11.6828, "step": 27323 }, { "epoch": 0.5719668425018839, "grad_norm": 0.2565317153930664, "learning_rate": 0.00018262211296571912, "loss": 11.6852, "step": 27324 }, { "epoch": 0.5719877752658461, "grad_norm": 0.29626765847206116, "learning_rate": 0.0001826208777913736, "loss": 11.6775, "step": 27325 }, { "epoch": 0.5720087080298083, "grad_norm": 0.3047480285167694, "learning_rate": 0.0001826196425773106, "loss": 11.6675, "step": 27326 }, { "epoch": 0.5720296407937704, "grad_norm": 0.3310287594795227, "learning_rate": 0.00018261840732353068, "loss": 11.6554, "step": 27327 }, { "epoch": 0.5720505735577326, "grad_norm": 0.3043119013309479, "learning_rate": 0.00018261717203003444, "loss": 11.653, "step": 27328 }, { "epoch": 0.5720715063216947, "grad_norm": 0.2986758351325989, "learning_rate": 0.00018261593669682244, "loss": 11.6729, "step": 27329 }, { "epoch": 0.5720924390856569, "grad_norm": 0.3379265367984772, "learning_rate": 0.0001826147013238953, "loss": 11.6778, "step": 27330 }, { "epoch": 0.5721133718496191, "grad_norm": 0.2846061885356903, "learning_rate": 0.00018261346591125365, "loss": 11.6793, "step": 27331 }, { "epoch": 0.5721343046135812, "grad_norm": 0.24358804523944855, "learning_rate": 0.00018261223045889802, "loss": 11.6728, "step": 27332 }, { "epoch": 0.5721552373775434, "grad_norm": 0.2838694453239441, "learning_rate": 0.00018261099496682908, "loss": 11.6696, "step": 27333 }, { "epoch": 0.5721761701415055, "grad_norm": 0.34739190340042114, "learning_rate": 0.00018260975943504735, "loss": 11.6484, "step": 27334 }, { "epoch": 0.5721971029054677, "grad_norm": 0.43455490469932556, "learning_rate": 0.00018260852386355346, "loss": 11.6709, "step": 27335 }, { "epoch": 0.5722180356694297, "grad_norm": 0.3109072148799896, "learning_rate": 0.00018260728825234798, "loss": 11.6747, "step": 27336 }, { "epoch": 0.572238968433392, "grad_norm": 0.2911912500858307, "learning_rate": 0.00018260605260143154, "loss": 11.6639, "step": 27337 }, { "epoch": 0.5722599011973541, "grad_norm": 0.391338050365448, "learning_rate": 0.00018260481691080468, "loss": 11.6828, "step": 27338 }, { "epoch": 0.5722808339613162, "grad_norm": 0.2826836407184601, "learning_rate": 0.0001826035811804681, "loss": 11.6815, "step": 27339 }, { "epoch": 0.5723017667252784, "grad_norm": 0.43028226494789124, "learning_rate": 0.00018260234541042227, "loss": 11.6664, "step": 27340 }, { "epoch": 0.5723226994892405, "grad_norm": 0.27361926436424255, "learning_rate": 0.00018260110960066785, "loss": 11.6762, "step": 27341 }, { "epoch": 0.5723436322532027, "grad_norm": 0.2876233160495758, "learning_rate": 0.00018259987375120543, "loss": 11.6668, "step": 27342 }, { "epoch": 0.5723645650171648, "grad_norm": 0.26919060945510864, "learning_rate": 0.00018259863786203556, "loss": 11.6709, "step": 27343 }, { "epoch": 0.572385497781127, "grad_norm": 0.3256211280822754, "learning_rate": 0.00018259740193315887, "loss": 11.6936, "step": 27344 }, { "epoch": 0.5724064305450892, "grad_norm": 0.3333265483379364, "learning_rate": 0.000182596165964576, "loss": 11.6794, "step": 27345 }, { "epoch": 0.5724273633090513, "grad_norm": 0.320464164018631, "learning_rate": 0.0001825949299562875, "loss": 11.6698, "step": 27346 }, { "epoch": 0.5724482960730135, "grad_norm": 0.31581902503967285, "learning_rate": 0.00018259369390829393, "loss": 11.681, "step": 27347 }, { "epoch": 0.5724692288369756, "grad_norm": 0.40487271547317505, "learning_rate": 0.00018259245782059592, "loss": 11.6811, "step": 27348 }, { "epoch": 0.5724901616009378, "grad_norm": 0.33878591656684875, "learning_rate": 0.00018259122169319406, "loss": 11.6647, "step": 27349 }, { "epoch": 0.5725110943648999, "grad_norm": 0.30318009853363037, "learning_rate": 0.00018258998552608894, "loss": 11.6787, "step": 27350 }, { "epoch": 0.5725320271288621, "grad_norm": 0.2774432301521301, "learning_rate": 0.00018258874931928118, "loss": 11.6792, "step": 27351 }, { "epoch": 0.5725529598928243, "grad_norm": 0.297038197517395, "learning_rate": 0.00018258751307277137, "loss": 11.6751, "step": 27352 }, { "epoch": 0.5725738926567864, "grad_norm": 0.25996291637420654, "learning_rate": 0.00018258627678656005, "loss": 11.6502, "step": 27353 }, { "epoch": 0.5725948254207486, "grad_norm": 0.24334779381752014, "learning_rate": 0.00018258504046064783, "loss": 11.6756, "step": 27354 }, { "epoch": 0.5726157581847107, "grad_norm": 0.324606329202652, "learning_rate": 0.00018258380409503534, "loss": 11.6889, "step": 27355 }, { "epoch": 0.5726366909486729, "grad_norm": 0.31136441230773926, "learning_rate": 0.00018258256768972323, "loss": 11.6957, "step": 27356 }, { "epoch": 0.5726576237126351, "grad_norm": 0.2760227918624878, "learning_rate": 0.00018258133124471196, "loss": 11.6683, "step": 27357 }, { "epoch": 0.5726785564765972, "grad_norm": 0.24555717408657074, "learning_rate": 0.00018258009476000218, "loss": 11.6586, "step": 27358 }, { "epoch": 0.5726994892405594, "grad_norm": 0.3017663359642029, "learning_rate": 0.00018257885823559452, "loss": 11.6857, "step": 27359 }, { "epoch": 0.5727204220045214, "grad_norm": 0.27037569880485535, "learning_rate": 0.00018257762167148953, "loss": 11.6833, "step": 27360 }, { "epoch": 0.5727413547684836, "grad_norm": 0.4334064722061157, "learning_rate": 0.00018257638506768783, "loss": 11.6722, "step": 27361 }, { "epoch": 0.5727622875324457, "grad_norm": 0.29541340470314026, "learning_rate": 0.00018257514842419005, "loss": 11.6795, "step": 27362 }, { "epoch": 0.5727832202964079, "grad_norm": 0.34945255517959595, "learning_rate": 0.00018257391174099667, "loss": 11.6621, "step": 27363 }, { "epoch": 0.5728041530603701, "grad_norm": 0.30424928665161133, "learning_rate": 0.0001825726750181084, "loss": 11.6615, "step": 27364 }, { "epoch": 0.5728250858243322, "grad_norm": 0.3030778467655182, "learning_rate": 0.00018257143825552575, "loss": 11.6615, "step": 27365 }, { "epoch": 0.5728460185882944, "grad_norm": 0.4014205038547516, "learning_rate": 0.0001825702014532494, "loss": 11.6832, "step": 27366 }, { "epoch": 0.5728669513522565, "grad_norm": 0.2783225178718567, "learning_rate": 0.00018256896461127988, "loss": 11.6626, "step": 27367 }, { "epoch": 0.5728878841162187, "grad_norm": 0.2635060250759125, "learning_rate": 0.00018256772772961782, "loss": 11.6812, "step": 27368 }, { "epoch": 0.5729088168801808, "grad_norm": 0.2974593937397003, "learning_rate": 0.00018256649080826377, "loss": 11.6629, "step": 27369 }, { "epoch": 0.572929749644143, "grad_norm": 0.25505995750427246, "learning_rate": 0.00018256525384721838, "loss": 11.6777, "step": 27370 }, { "epoch": 0.5729506824081052, "grad_norm": 0.27562564611434937, "learning_rate": 0.0001825640168464822, "loss": 11.6625, "step": 27371 }, { "epoch": 0.5729716151720673, "grad_norm": 0.358092725276947, "learning_rate": 0.00018256277980605585, "loss": 11.6707, "step": 27372 }, { "epoch": 0.5729925479360295, "grad_norm": 0.32712358236312866, "learning_rate": 0.0001825615427259399, "loss": 11.6781, "step": 27373 }, { "epoch": 0.5730134806999916, "grad_norm": 0.2566351592540741, "learning_rate": 0.000182560305606135, "loss": 11.6649, "step": 27374 }, { "epoch": 0.5730344134639538, "grad_norm": 0.30423885583877563, "learning_rate": 0.0001825590684466417, "loss": 11.6756, "step": 27375 }, { "epoch": 0.573055346227916, "grad_norm": 0.2253679782152176, "learning_rate": 0.00018255783124746056, "loss": 11.6569, "step": 27376 }, { "epoch": 0.5730762789918781, "grad_norm": 0.28000715374946594, "learning_rate": 0.00018255659400859224, "loss": 11.6773, "step": 27377 }, { "epoch": 0.5730972117558403, "grad_norm": 0.32933902740478516, "learning_rate": 0.00018255535673003732, "loss": 11.6523, "step": 27378 }, { "epoch": 0.5731181445198024, "grad_norm": 0.29410961270332336, "learning_rate": 0.0001825541194117964, "loss": 11.6889, "step": 27379 }, { "epoch": 0.5731390772837646, "grad_norm": 0.3253449499607086, "learning_rate": 0.00018255288205387002, "loss": 11.6684, "step": 27380 }, { "epoch": 0.5731600100477267, "grad_norm": 0.3513961434364319, "learning_rate": 0.00018255164465625883, "loss": 11.6624, "step": 27381 }, { "epoch": 0.5731809428116889, "grad_norm": 0.2669981122016907, "learning_rate": 0.0001825504072189634, "loss": 11.6665, "step": 27382 }, { "epoch": 0.573201875575651, "grad_norm": 0.33733516931533813, "learning_rate": 0.00018254916974198436, "loss": 11.6039, "step": 27383 }, { "epoch": 0.5732228083396131, "grad_norm": 0.26073646545410156, "learning_rate": 0.00018254793222532227, "loss": 11.6861, "step": 27384 }, { "epoch": 0.5732437411035753, "grad_norm": 0.26363301277160645, "learning_rate": 0.00018254669466897773, "loss": 11.6609, "step": 27385 }, { "epoch": 0.5732646738675374, "grad_norm": 0.33497315645217896, "learning_rate": 0.00018254545707295137, "loss": 11.677, "step": 27386 }, { "epoch": 0.5732856066314996, "grad_norm": 0.33288970589637756, "learning_rate": 0.00018254421943724372, "loss": 11.6775, "step": 27387 }, { "epoch": 0.5733065393954617, "grad_norm": 0.2685002088546753, "learning_rate": 0.0001825429817618554, "loss": 11.6687, "step": 27388 }, { "epoch": 0.5733274721594239, "grad_norm": 0.2866933047771454, "learning_rate": 0.00018254174404678703, "loss": 11.6764, "step": 27389 }, { "epoch": 0.5733484049233861, "grad_norm": 1.4778586626052856, "learning_rate": 0.00018254050629203921, "loss": 11.622, "step": 27390 }, { "epoch": 0.5733693376873482, "grad_norm": 0.30886250734329224, "learning_rate": 0.00018253926849761247, "loss": 11.673, "step": 27391 }, { "epoch": 0.5733902704513104, "grad_norm": 0.30401134490966797, "learning_rate": 0.00018253803066350748, "loss": 11.6858, "step": 27392 }, { "epoch": 0.5734112032152725, "grad_norm": 0.31033533811569214, "learning_rate": 0.0001825367927897248, "loss": 11.6763, "step": 27393 }, { "epoch": 0.5734321359792347, "grad_norm": 0.3166799545288086, "learning_rate": 0.00018253555487626503, "loss": 11.6782, "step": 27394 }, { "epoch": 0.5734530687431969, "grad_norm": 0.2520582377910614, "learning_rate": 0.00018253431692312876, "loss": 11.6663, "step": 27395 }, { "epoch": 0.573474001507159, "grad_norm": 0.3475303649902344, "learning_rate": 0.00018253307893031662, "loss": 11.6994, "step": 27396 }, { "epoch": 0.5734949342711212, "grad_norm": 0.3481840491294861, "learning_rate": 0.00018253184089782914, "loss": 11.6548, "step": 27397 }, { "epoch": 0.5735158670350833, "grad_norm": 0.2877439260482788, "learning_rate": 0.00018253060282566694, "loss": 11.6645, "step": 27398 }, { "epoch": 0.5735367997990455, "grad_norm": 0.2602189779281616, "learning_rate": 0.00018252936471383063, "loss": 11.6644, "step": 27399 }, { "epoch": 0.5735577325630076, "grad_norm": 0.22438684105873108, "learning_rate": 0.00018252812656232084, "loss": 11.6856, "step": 27400 }, { "epoch": 0.5735786653269698, "grad_norm": 0.2476629912853241, "learning_rate": 0.0001825268883711381, "loss": 11.6632, "step": 27401 }, { "epoch": 0.573599598090932, "grad_norm": 0.2695133090019226, "learning_rate": 0.00018252565014028303, "loss": 11.676, "step": 27402 }, { "epoch": 0.5736205308548941, "grad_norm": 0.3001933693885803, "learning_rate": 0.00018252441186975623, "loss": 11.6654, "step": 27403 }, { "epoch": 0.5736414636188563, "grad_norm": 0.27231642603874207, "learning_rate": 0.00018252317355955827, "loss": 11.6539, "step": 27404 }, { "epoch": 0.5736623963828184, "grad_norm": 0.24836601316928864, "learning_rate": 0.0001825219352096898, "loss": 11.6663, "step": 27405 }, { "epoch": 0.5736833291467806, "grad_norm": 0.2587937116622925, "learning_rate": 0.00018252069682015137, "loss": 11.6644, "step": 27406 }, { "epoch": 0.5737042619107426, "grad_norm": 0.31766656041145325, "learning_rate": 0.00018251945839094358, "loss": 11.679, "step": 27407 }, { "epoch": 0.5737251946747048, "grad_norm": 0.2966437041759491, "learning_rate": 0.00018251821992206702, "loss": 11.6976, "step": 27408 }, { "epoch": 0.573746127438667, "grad_norm": 0.3902952969074249, "learning_rate": 0.00018251698141352233, "loss": 11.6955, "step": 27409 }, { "epoch": 0.5737670602026291, "grad_norm": 0.3126826286315918, "learning_rate": 0.00018251574286531005, "loss": 11.6669, "step": 27410 }, { "epoch": 0.5737879929665913, "grad_norm": 0.29440000653266907, "learning_rate": 0.00018251450427743082, "loss": 11.6666, "step": 27411 }, { "epoch": 0.5738089257305534, "grad_norm": 0.31798505783081055, "learning_rate": 0.0001825132656498852, "loss": 11.6781, "step": 27412 }, { "epoch": 0.5738298584945156, "grad_norm": 0.29943209886550903, "learning_rate": 0.0001825120269826738, "loss": 11.6634, "step": 27413 }, { "epoch": 0.5738507912584778, "grad_norm": 0.2484196424484253, "learning_rate": 0.00018251078827579722, "loss": 11.6715, "step": 27414 }, { "epoch": 0.5738717240224399, "grad_norm": 0.3233254551887512, "learning_rate": 0.00018250954952925605, "loss": 11.6701, "step": 27415 }, { "epoch": 0.5738926567864021, "grad_norm": 0.3168084919452667, "learning_rate": 0.00018250831074305087, "loss": 11.6558, "step": 27416 }, { "epoch": 0.5739135895503642, "grad_norm": 0.2650771141052246, "learning_rate": 0.00018250707191718233, "loss": 11.6773, "step": 27417 }, { "epoch": 0.5739345223143264, "grad_norm": 0.32231688499450684, "learning_rate": 0.00018250583305165098, "loss": 11.6767, "step": 27418 }, { "epoch": 0.5739554550782885, "grad_norm": 0.3478272557258606, "learning_rate": 0.00018250459414645737, "loss": 11.6707, "step": 27419 }, { "epoch": 0.5739763878422507, "grad_norm": 0.37195295095443726, "learning_rate": 0.00018250335520160216, "loss": 11.6731, "step": 27420 }, { "epoch": 0.5739973206062129, "grad_norm": 0.27930399775505066, "learning_rate": 0.000182502116217086, "loss": 11.6706, "step": 27421 }, { "epoch": 0.574018253370175, "grad_norm": 0.2586321234703064, "learning_rate": 0.00018250087719290937, "loss": 11.695, "step": 27422 }, { "epoch": 0.5740391861341372, "grad_norm": 0.2741335332393646, "learning_rate": 0.0001824996381290729, "loss": 11.6646, "step": 27423 }, { "epoch": 0.5740601188980993, "grad_norm": 0.3313945531845093, "learning_rate": 0.00018249839902557721, "loss": 11.6782, "step": 27424 }, { "epoch": 0.5740810516620615, "grad_norm": 0.2646573781967163, "learning_rate": 0.00018249715988242292, "loss": 11.6629, "step": 27425 }, { "epoch": 0.5741019844260236, "grad_norm": 0.2830740213394165, "learning_rate": 0.00018249592069961057, "loss": 11.6614, "step": 27426 }, { "epoch": 0.5741229171899858, "grad_norm": 0.3101978003978729, "learning_rate": 0.00018249468147714078, "loss": 11.6744, "step": 27427 }, { "epoch": 0.574143849953948, "grad_norm": 0.3274134397506714, "learning_rate": 0.00018249344221501413, "loss": 11.6634, "step": 27428 }, { "epoch": 0.57416478271791, "grad_norm": 0.31912630796432495, "learning_rate": 0.00018249220291323125, "loss": 11.6693, "step": 27429 }, { "epoch": 0.5741857154818722, "grad_norm": 0.40195029973983765, "learning_rate": 0.0001824909635717927, "loss": 11.6749, "step": 27430 }, { "epoch": 0.5742066482458343, "grad_norm": 0.280094176530838, "learning_rate": 0.0001824897241906991, "loss": 11.662, "step": 27431 }, { "epoch": 0.5742275810097965, "grad_norm": 0.27827098965644836, "learning_rate": 0.000182488484769951, "loss": 11.6977, "step": 27432 }, { "epoch": 0.5742485137737587, "grad_norm": 0.2548745274543762, "learning_rate": 0.00018248724530954907, "loss": 11.6724, "step": 27433 }, { "epoch": 0.5742694465377208, "grad_norm": 0.23878133296966553, "learning_rate": 0.00018248600580949388, "loss": 11.6588, "step": 27434 }, { "epoch": 0.574290379301683, "grad_norm": 0.2879457473754883, "learning_rate": 0.000182484766269786, "loss": 11.6705, "step": 27435 }, { "epoch": 0.5743113120656451, "grad_norm": 0.3086448013782501, "learning_rate": 0.00018248352669042602, "loss": 11.6749, "step": 27436 }, { "epoch": 0.5743322448296073, "grad_norm": 0.25659486651420593, "learning_rate": 0.0001824822870714146, "loss": 11.6673, "step": 27437 }, { "epoch": 0.5743531775935694, "grad_norm": 0.27154093980789185, "learning_rate": 0.00018248104741275224, "loss": 11.6683, "step": 27438 }, { "epoch": 0.5743741103575316, "grad_norm": 0.31387853622436523, "learning_rate": 0.00018247980771443962, "loss": 11.6592, "step": 27439 }, { "epoch": 0.5743950431214938, "grad_norm": 0.22881008684635162, "learning_rate": 0.0001824785679764773, "loss": 11.66, "step": 27440 }, { "epoch": 0.5744159758854559, "grad_norm": 0.33015817403793335, "learning_rate": 0.00018247732819886588, "loss": 11.6746, "step": 27441 }, { "epoch": 0.5744369086494181, "grad_norm": 0.25814154744148254, "learning_rate": 0.00018247608838160593, "loss": 11.6783, "step": 27442 }, { "epoch": 0.5744578414133802, "grad_norm": 0.32547393441200256, "learning_rate": 0.00018247484852469808, "loss": 11.6618, "step": 27443 }, { "epoch": 0.5744787741773424, "grad_norm": 0.2622019350528717, "learning_rate": 0.00018247360862814295, "loss": 11.6692, "step": 27444 }, { "epoch": 0.5744997069413045, "grad_norm": 0.3209567666053772, "learning_rate": 0.00018247236869194108, "loss": 11.6835, "step": 27445 }, { "epoch": 0.5745206397052667, "grad_norm": 0.20886074006557465, "learning_rate": 0.0001824711287160931, "loss": 11.6671, "step": 27446 }, { "epoch": 0.5745415724692289, "grad_norm": 0.32263991236686707, "learning_rate": 0.00018246988870059956, "loss": 11.6834, "step": 27447 }, { "epoch": 0.574562505233191, "grad_norm": 0.33878427743911743, "learning_rate": 0.0001824686486454611, "loss": 11.6772, "step": 27448 }, { "epoch": 0.5745834379971532, "grad_norm": 0.2695525288581848, "learning_rate": 0.00018246740855067835, "loss": 11.6629, "step": 27449 }, { "epoch": 0.5746043707611153, "grad_norm": 0.30940309166908264, "learning_rate": 0.00018246616841625186, "loss": 11.6812, "step": 27450 }, { "epoch": 0.5746253035250775, "grad_norm": 0.2549210786819458, "learning_rate": 0.0001824649282421822, "loss": 11.6552, "step": 27451 }, { "epoch": 0.5746462362890397, "grad_norm": 0.3179546296596527, "learning_rate": 0.00018246368802847, "loss": 11.655, "step": 27452 }, { "epoch": 0.5746671690530017, "grad_norm": 0.3550732135772705, "learning_rate": 0.00018246244777511588, "loss": 11.6818, "step": 27453 }, { "epoch": 0.574688101816964, "grad_norm": 0.2861289083957672, "learning_rate": 0.00018246120748212038, "loss": 11.6621, "step": 27454 }, { "epoch": 0.574709034580926, "grad_norm": 0.33947286009788513, "learning_rate": 0.0001824599671494841, "loss": 11.671, "step": 27455 }, { "epoch": 0.5747299673448882, "grad_norm": 0.2913483679294586, "learning_rate": 0.00018245872677720773, "loss": 11.6805, "step": 27456 }, { "epoch": 0.5747509001088503, "grad_norm": 0.3106124997138977, "learning_rate": 0.00018245748636529173, "loss": 11.672, "step": 27457 }, { "epoch": 0.5747718328728125, "grad_norm": 0.37540826201438904, "learning_rate": 0.00018245624591373683, "loss": 11.6757, "step": 27458 }, { "epoch": 0.5747927656367747, "grad_norm": 0.23772673308849335, "learning_rate": 0.00018245500542254353, "loss": 11.6688, "step": 27459 }, { "epoch": 0.5748136984007368, "grad_norm": 0.3522665500640869, "learning_rate": 0.00018245376489171245, "loss": 11.6687, "step": 27460 }, { "epoch": 0.574834631164699, "grad_norm": 0.2752476632595062, "learning_rate": 0.00018245252432124418, "loss": 11.6712, "step": 27461 }, { "epoch": 0.5748555639286611, "grad_norm": 0.3145469129085541, "learning_rate": 0.00018245128371113936, "loss": 11.6732, "step": 27462 }, { "epoch": 0.5748764966926233, "grad_norm": 0.2747898995876312, "learning_rate": 0.00018245004306139854, "loss": 11.6765, "step": 27463 }, { "epoch": 0.5748974294565854, "grad_norm": 0.3102450370788574, "learning_rate": 0.00018244880237202232, "loss": 11.6828, "step": 27464 }, { "epoch": 0.5749183622205476, "grad_norm": 0.2984280586242676, "learning_rate": 0.00018244756164301134, "loss": 11.6755, "step": 27465 }, { "epoch": 0.5749392949845098, "grad_norm": 0.2860012948513031, "learning_rate": 0.00018244632087436612, "loss": 11.6749, "step": 27466 }, { "epoch": 0.5749602277484719, "grad_norm": 0.2896403968334198, "learning_rate": 0.00018244508006608732, "loss": 11.6822, "step": 27467 }, { "epoch": 0.5749811605124341, "grad_norm": 0.30290886759757996, "learning_rate": 0.00018244383921817554, "loss": 11.6601, "step": 27468 }, { "epoch": 0.5750020932763962, "grad_norm": 0.35382378101348877, "learning_rate": 0.00018244259833063135, "loss": 11.679, "step": 27469 }, { "epoch": 0.5750230260403584, "grad_norm": 0.2741459906101227, "learning_rate": 0.0001824413574034553, "loss": 11.6652, "step": 27470 }, { "epoch": 0.5750439588043206, "grad_norm": 0.305349200963974, "learning_rate": 0.00018244011643664808, "loss": 11.6716, "step": 27471 }, { "epoch": 0.5750648915682827, "grad_norm": 0.27785006165504456, "learning_rate": 0.00018243887543021025, "loss": 11.6819, "step": 27472 }, { "epoch": 0.5750858243322449, "grad_norm": 0.2759369909763336, "learning_rate": 0.00018243763438414241, "loss": 11.6601, "step": 27473 }, { "epoch": 0.575106757096207, "grad_norm": 0.25361087918281555, "learning_rate": 0.00018243639329844512, "loss": 11.675, "step": 27474 }, { "epoch": 0.5751276898601692, "grad_norm": 0.2536759674549103, "learning_rate": 0.00018243515217311902, "loss": 11.6633, "step": 27475 }, { "epoch": 0.5751486226241312, "grad_norm": 0.41072970628738403, "learning_rate": 0.00018243391100816468, "loss": 11.691, "step": 27476 }, { "epoch": 0.5751695553880934, "grad_norm": 0.3364260494709015, "learning_rate": 0.00018243266980358272, "loss": 11.655, "step": 27477 }, { "epoch": 0.5751904881520556, "grad_norm": 0.2689165472984314, "learning_rate": 0.0001824314285593737, "loss": 11.6671, "step": 27478 }, { "epoch": 0.5752114209160177, "grad_norm": 0.263187438249588, "learning_rate": 0.00018243018727553826, "loss": 11.6474, "step": 27479 }, { "epoch": 0.5752323536799799, "grad_norm": 0.3534189462661743, "learning_rate": 0.00018242894595207697, "loss": 11.6804, "step": 27480 }, { "epoch": 0.575253286443942, "grad_norm": 0.24256141483783722, "learning_rate": 0.00018242770458899043, "loss": 11.6668, "step": 27481 }, { "epoch": 0.5752742192079042, "grad_norm": 0.2597559690475464, "learning_rate": 0.00018242646318627925, "loss": 11.666, "step": 27482 }, { "epoch": 0.5752951519718663, "grad_norm": 0.30553919076919556, "learning_rate": 0.000182425221743944, "loss": 11.686, "step": 27483 }, { "epoch": 0.5753160847358285, "grad_norm": 0.242038756608963, "learning_rate": 0.00018242398026198532, "loss": 11.6974, "step": 27484 }, { "epoch": 0.5753370174997907, "grad_norm": 0.27200478315353394, "learning_rate": 0.00018242273874040377, "loss": 11.6787, "step": 27485 }, { "epoch": 0.5753579502637528, "grad_norm": 0.2813733220100403, "learning_rate": 0.00018242149717919993, "loss": 11.672, "step": 27486 }, { "epoch": 0.575378883027715, "grad_norm": 0.32409265637397766, "learning_rate": 0.00018242025557837447, "loss": 11.6738, "step": 27487 }, { "epoch": 0.5753998157916771, "grad_norm": 0.37365081906318665, "learning_rate": 0.0001824190139379279, "loss": 11.6792, "step": 27488 }, { "epoch": 0.5754207485556393, "grad_norm": 0.3665812611579895, "learning_rate": 0.0001824177722578609, "loss": 11.678, "step": 27489 }, { "epoch": 0.5754416813196015, "grad_norm": 0.4435643255710602, "learning_rate": 0.000182416530538174, "loss": 11.6788, "step": 27490 }, { "epoch": 0.5754626140835636, "grad_norm": 0.24348004162311554, "learning_rate": 0.00018241528877886785, "loss": 11.6714, "step": 27491 }, { "epoch": 0.5754835468475258, "grad_norm": 0.30093544721603394, "learning_rate": 0.00018241404697994299, "loss": 11.6807, "step": 27492 }, { "epoch": 0.5755044796114879, "grad_norm": 0.2917857766151428, "learning_rate": 0.00018241280514140002, "loss": 11.6785, "step": 27493 }, { "epoch": 0.5755254123754501, "grad_norm": 0.33610063791275024, "learning_rate": 0.00018241156326323962, "loss": 11.6646, "step": 27494 }, { "epoch": 0.5755463451394122, "grad_norm": 0.25366082787513733, "learning_rate": 0.0001824103213454623, "loss": 11.6608, "step": 27495 }, { "epoch": 0.5755672779033744, "grad_norm": 0.3299292027950287, "learning_rate": 0.0001824090793880687, "loss": 11.6754, "step": 27496 }, { "epoch": 0.5755882106673366, "grad_norm": 0.33608484268188477, "learning_rate": 0.00018240783739105937, "loss": 11.6613, "step": 27497 }, { "epoch": 0.5756091434312987, "grad_norm": 0.28985822200775146, "learning_rate": 0.000182406595354435, "loss": 11.6884, "step": 27498 }, { "epoch": 0.5756300761952609, "grad_norm": 0.24651536345481873, "learning_rate": 0.00018240535327819607, "loss": 11.6817, "step": 27499 }, { "epoch": 0.5756510089592229, "grad_norm": 0.34623536467552185, "learning_rate": 0.00018240411116234327, "loss": 11.6693, "step": 27500 }, { "epoch": 0.5756719417231851, "grad_norm": 0.3219868838787079, "learning_rate": 0.00018240286900687717, "loss": 11.6734, "step": 27501 }, { "epoch": 0.5756928744871472, "grad_norm": 0.2898762822151184, "learning_rate": 0.00018240162681179832, "loss": 11.667, "step": 27502 }, { "epoch": 0.5757138072511094, "grad_norm": 0.27045202255249023, "learning_rate": 0.00018240038457710737, "loss": 11.6662, "step": 27503 }, { "epoch": 0.5757347400150716, "grad_norm": 0.2922394573688507, "learning_rate": 0.00018239914230280493, "loss": 11.6588, "step": 27504 }, { "epoch": 0.5757556727790337, "grad_norm": 0.22491945326328278, "learning_rate": 0.00018239789998889157, "loss": 11.669, "step": 27505 }, { "epoch": 0.5757766055429959, "grad_norm": 0.2827606201171875, "learning_rate": 0.00018239665763536785, "loss": 11.6613, "step": 27506 }, { "epoch": 0.575797538306958, "grad_norm": 0.25577792525291443, "learning_rate": 0.00018239541524223441, "loss": 11.6597, "step": 27507 }, { "epoch": 0.5758184710709202, "grad_norm": 0.29677560925483704, "learning_rate": 0.00018239417280949187, "loss": 11.6845, "step": 27508 }, { "epoch": 0.5758394038348824, "grad_norm": 0.2966731786727905, "learning_rate": 0.0001823929303371408, "loss": 11.6676, "step": 27509 }, { "epoch": 0.5758603365988445, "grad_norm": 0.24632875621318817, "learning_rate": 0.00018239168782518178, "loss": 11.6729, "step": 27510 }, { "epoch": 0.5758812693628067, "grad_norm": 0.26148727536201477, "learning_rate": 0.00018239044527361544, "loss": 11.6573, "step": 27511 }, { "epoch": 0.5759022021267688, "grad_norm": 0.32299840450286865, "learning_rate": 0.00018238920268244234, "loss": 11.6741, "step": 27512 }, { "epoch": 0.575923134890731, "grad_norm": 0.2510351240634918, "learning_rate": 0.00018238796005166311, "loss": 11.6766, "step": 27513 }, { "epoch": 0.5759440676546931, "grad_norm": 0.20804911851882935, "learning_rate": 0.00018238671738127836, "loss": 11.6576, "step": 27514 }, { "epoch": 0.5759650004186553, "grad_norm": 0.31491169333457947, "learning_rate": 0.00018238547467128863, "loss": 11.6714, "step": 27515 }, { "epoch": 0.5759859331826175, "grad_norm": 0.31351664662361145, "learning_rate": 0.00018238423192169458, "loss": 11.6679, "step": 27516 }, { "epoch": 0.5760068659465796, "grad_norm": 0.2697964310646057, "learning_rate": 0.00018238298913249675, "loss": 11.6776, "step": 27517 }, { "epoch": 0.5760277987105418, "grad_norm": 0.2972743511199951, "learning_rate": 0.00018238174630369578, "loss": 11.6752, "step": 27518 }, { "epoch": 0.5760487314745039, "grad_norm": 0.25021788477897644, "learning_rate": 0.00018238050343529226, "loss": 11.6684, "step": 27519 }, { "epoch": 0.5760696642384661, "grad_norm": 0.27158480882644653, "learning_rate": 0.00018237926052728677, "loss": 11.6719, "step": 27520 }, { "epoch": 0.5760905970024282, "grad_norm": 0.3694915771484375, "learning_rate": 0.00018237801757967992, "loss": 11.6632, "step": 27521 }, { "epoch": 0.5761115297663904, "grad_norm": 0.2760047912597656, "learning_rate": 0.00018237677459247232, "loss": 11.6618, "step": 27522 }, { "epoch": 0.5761324625303526, "grad_norm": 0.393169105052948, "learning_rate": 0.0001823755315656645, "loss": 11.6807, "step": 27523 }, { "epoch": 0.5761533952943146, "grad_norm": 0.30853271484375, "learning_rate": 0.00018237428849925718, "loss": 11.6919, "step": 27524 }, { "epoch": 0.5761743280582768, "grad_norm": 0.2635746896266937, "learning_rate": 0.00018237304539325087, "loss": 11.6683, "step": 27525 }, { "epoch": 0.5761952608222389, "grad_norm": 0.3434589207172394, "learning_rate": 0.00018237180224764616, "loss": 11.6541, "step": 27526 }, { "epoch": 0.5762161935862011, "grad_norm": 0.3017265796661377, "learning_rate": 0.0001823705590624437, "loss": 11.6772, "step": 27527 }, { "epoch": 0.5762371263501632, "grad_norm": 0.338238924741745, "learning_rate": 0.00018236931583764406, "loss": 11.6877, "step": 27528 }, { "epoch": 0.5762580591141254, "grad_norm": 0.30670106410980225, "learning_rate": 0.0001823680725732478, "loss": 11.6646, "step": 27529 }, { "epoch": 0.5762789918780876, "grad_norm": 0.25799423456192017, "learning_rate": 0.0001823668292692556, "loss": 11.6883, "step": 27530 }, { "epoch": 0.5762999246420497, "grad_norm": 0.24590054154396057, "learning_rate": 0.00018236558592566802, "loss": 11.6674, "step": 27531 }, { "epoch": 0.5763208574060119, "grad_norm": 0.28993910551071167, "learning_rate": 0.00018236434254248563, "loss": 11.6704, "step": 27532 }, { "epoch": 0.576341790169974, "grad_norm": 0.32858458161354065, "learning_rate": 0.00018236309911970908, "loss": 11.6806, "step": 27533 }, { "epoch": 0.5763627229339362, "grad_norm": 0.25748634338378906, "learning_rate": 0.0001823618556573389, "loss": 11.665, "step": 27534 }, { "epoch": 0.5763836556978984, "grad_norm": 0.3120995759963989, "learning_rate": 0.00018236061215537574, "loss": 11.6734, "step": 27535 }, { "epoch": 0.5764045884618605, "grad_norm": 0.30641305446624756, "learning_rate": 0.00018235936861382018, "loss": 11.6774, "step": 27536 }, { "epoch": 0.5764255212258227, "grad_norm": 0.285841166973114, "learning_rate": 0.00018235812503267283, "loss": 11.6625, "step": 27537 }, { "epoch": 0.5764464539897848, "grad_norm": 0.2770083248615265, "learning_rate": 0.00018235688141193427, "loss": 11.6558, "step": 27538 }, { "epoch": 0.576467386753747, "grad_norm": 0.30531275272369385, "learning_rate": 0.00018235563775160512, "loss": 11.6658, "step": 27539 }, { "epoch": 0.5764883195177091, "grad_norm": 0.32416820526123047, "learning_rate": 0.00018235439405168594, "loss": 11.67, "step": 27540 }, { "epoch": 0.5765092522816713, "grad_norm": 0.33904966711997986, "learning_rate": 0.00018235315031217737, "loss": 11.6772, "step": 27541 }, { "epoch": 0.5765301850456335, "grad_norm": 0.3462941646575928, "learning_rate": 0.00018235190653308, "loss": 11.6744, "step": 27542 }, { "epoch": 0.5765511178095956, "grad_norm": 0.27727195620536804, "learning_rate": 0.00018235066271439442, "loss": 11.6741, "step": 27543 }, { "epoch": 0.5765720505735578, "grad_norm": 0.283579021692276, "learning_rate": 0.00018234941885612117, "loss": 11.678, "step": 27544 }, { "epoch": 0.5765929833375198, "grad_norm": 0.27094975113868713, "learning_rate": 0.00018234817495826094, "loss": 11.6758, "step": 27545 }, { "epoch": 0.576613916101482, "grad_norm": 0.2714061439037323, "learning_rate": 0.0001823469310208143, "loss": 11.6608, "step": 27546 }, { "epoch": 0.5766348488654441, "grad_norm": 0.436747670173645, "learning_rate": 0.00018234568704378185, "loss": 11.6935, "step": 27547 }, { "epoch": 0.5766557816294063, "grad_norm": 0.2888889014720917, "learning_rate": 0.0001823444430271642, "loss": 11.6807, "step": 27548 }, { "epoch": 0.5766767143933685, "grad_norm": 0.2438967227935791, "learning_rate": 0.00018234319897096187, "loss": 11.6669, "step": 27549 }, { "epoch": 0.5766976471573306, "grad_norm": 0.3053746819496155, "learning_rate": 0.00018234195487517552, "loss": 11.6702, "step": 27550 }, { "epoch": 0.5767185799212928, "grad_norm": 0.35955676436424255, "learning_rate": 0.00018234071073980576, "loss": 11.6775, "step": 27551 }, { "epoch": 0.5767395126852549, "grad_norm": 0.3101015090942383, "learning_rate": 0.00018233946656485316, "loss": 11.6682, "step": 27552 }, { "epoch": 0.5767604454492171, "grad_norm": 0.3627457618713379, "learning_rate": 0.00018233822235031835, "loss": 11.6662, "step": 27553 }, { "epoch": 0.5767813782131793, "grad_norm": 0.29242295026779175, "learning_rate": 0.00018233697809620186, "loss": 11.6472, "step": 27554 }, { "epoch": 0.5768023109771414, "grad_norm": 0.3137867748737335, "learning_rate": 0.0001823357338025044, "loss": 11.6538, "step": 27555 }, { "epoch": 0.5768232437411036, "grad_norm": 0.27532345056533813, "learning_rate": 0.00018233448946922644, "loss": 11.656, "step": 27556 }, { "epoch": 0.5768441765050657, "grad_norm": 0.27883028984069824, "learning_rate": 0.0001823332450963687, "loss": 11.6715, "step": 27557 }, { "epoch": 0.5768651092690279, "grad_norm": 0.3055613040924072, "learning_rate": 0.00018233200068393165, "loss": 11.6644, "step": 27558 }, { "epoch": 0.57688604203299, "grad_norm": 0.28236493468284607, "learning_rate": 0.00018233075623191603, "loss": 11.6704, "step": 27559 }, { "epoch": 0.5769069747969522, "grad_norm": 0.25485533475875854, "learning_rate": 0.0001823295117403223, "loss": 11.6707, "step": 27560 }, { "epoch": 0.5769279075609144, "grad_norm": 0.22559626400470734, "learning_rate": 0.00018232826720915117, "loss": 11.6684, "step": 27561 }, { "epoch": 0.5769488403248765, "grad_norm": 0.30361682176589966, "learning_rate": 0.0001823270226384032, "loss": 11.6817, "step": 27562 }, { "epoch": 0.5769697730888387, "grad_norm": 0.40500080585479736, "learning_rate": 0.00018232577802807892, "loss": 11.6982, "step": 27563 }, { "epoch": 0.5769907058528008, "grad_norm": 0.3417951464653015, "learning_rate": 0.00018232453337817906, "loss": 11.6812, "step": 27564 }, { "epoch": 0.577011638616763, "grad_norm": 0.27249425649642944, "learning_rate": 0.00018232328868870412, "loss": 11.6816, "step": 27565 }, { "epoch": 0.5770325713807251, "grad_norm": 0.27363720536231995, "learning_rate": 0.00018232204395965472, "loss": 11.665, "step": 27566 }, { "epoch": 0.5770535041446873, "grad_norm": 0.3106917440891266, "learning_rate": 0.00018232079919103144, "loss": 11.6553, "step": 27567 }, { "epoch": 0.5770744369086495, "grad_norm": 0.3391728103160858, "learning_rate": 0.00018231955438283493, "loss": 11.6781, "step": 27568 }, { "epoch": 0.5770953696726115, "grad_norm": 0.30542683601379395, "learning_rate": 0.00018231830953506574, "loss": 11.6566, "step": 27569 }, { "epoch": 0.5771163024365737, "grad_norm": 0.2835465371608734, "learning_rate": 0.00018231706464772452, "loss": 11.6588, "step": 27570 }, { "epoch": 0.5771372352005358, "grad_norm": 0.2865981459617615, "learning_rate": 0.00018231581972081182, "loss": 11.671, "step": 27571 }, { "epoch": 0.577158167964498, "grad_norm": 0.2100825309753418, "learning_rate": 0.00018231457475432827, "loss": 11.6667, "step": 27572 }, { "epoch": 0.5771791007284602, "grad_norm": 0.26031726598739624, "learning_rate": 0.00018231332974827445, "loss": 11.6766, "step": 27573 }, { "epoch": 0.5772000334924223, "grad_norm": 0.2614850401878357, "learning_rate": 0.00018231208470265094, "loss": 11.6605, "step": 27574 }, { "epoch": 0.5772209662563845, "grad_norm": 0.32449230551719666, "learning_rate": 0.00018231083961745838, "loss": 11.681, "step": 27575 }, { "epoch": 0.5772418990203466, "grad_norm": 0.3448921740055084, "learning_rate": 0.00018230959449269736, "loss": 11.6708, "step": 27576 }, { "epoch": 0.5772628317843088, "grad_norm": 0.2666368782520294, "learning_rate": 0.00018230834932836845, "loss": 11.6746, "step": 27577 }, { "epoch": 0.5772837645482709, "grad_norm": 0.380193829536438, "learning_rate": 0.00018230710412447224, "loss": 11.6848, "step": 27578 }, { "epoch": 0.5773046973122331, "grad_norm": 0.37076765298843384, "learning_rate": 0.00018230585888100943, "loss": 11.6751, "step": 27579 }, { "epoch": 0.5773256300761953, "grad_norm": 0.2364802211523056, "learning_rate": 0.00018230461359798047, "loss": 11.6861, "step": 27580 }, { "epoch": 0.5773465628401574, "grad_norm": 0.2646690011024475, "learning_rate": 0.00018230336827538608, "loss": 11.6767, "step": 27581 }, { "epoch": 0.5773674956041196, "grad_norm": 0.3557753562927246, "learning_rate": 0.0001823021229132268, "loss": 11.6475, "step": 27582 }, { "epoch": 0.5773884283680817, "grad_norm": 0.4472312331199646, "learning_rate": 0.00018230087751150323, "loss": 11.688, "step": 27583 }, { "epoch": 0.5774093611320439, "grad_norm": 0.29137682914733887, "learning_rate": 0.000182299632070216, "loss": 11.6598, "step": 27584 }, { "epoch": 0.577430293896006, "grad_norm": 0.2504224479198456, "learning_rate": 0.00018229838658936564, "loss": 11.6861, "step": 27585 }, { "epoch": 0.5774512266599682, "grad_norm": 0.23314429819583893, "learning_rate": 0.00018229714106895284, "loss": 11.6745, "step": 27586 }, { "epoch": 0.5774721594239304, "grad_norm": 0.30723854899406433, "learning_rate": 0.00018229589550897813, "loss": 11.6724, "step": 27587 }, { "epoch": 0.5774930921878925, "grad_norm": 0.276788592338562, "learning_rate": 0.00018229464990944215, "loss": 11.6869, "step": 27588 }, { "epoch": 0.5775140249518547, "grad_norm": 0.2932869493961334, "learning_rate": 0.00018229340427034548, "loss": 11.6654, "step": 27589 }, { "epoch": 0.5775349577158168, "grad_norm": 0.2831887900829315, "learning_rate": 0.00018229215859168871, "loss": 11.6716, "step": 27590 }, { "epoch": 0.577555890479779, "grad_norm": 0.2757306396961212, "learning_rate": 0.0001822909128734725, "loss": 11.6636, "step": 27591 }, { "epoch": 0.5775768232437412, "grad_norm": 0.3166661560535431, "learning_rate": 0.00018228966711569732, "loss": 11.6699, "step": 27592 }, { "epoch": 0.5775977560077032, "grad_norm": 0.33432537317276, "learning_rate": 0.0001822884213183639, "loss": 11.6768, "step": 27593 }, { "epoch": 0.5776186887716654, "grad_norm": 0.32603123784065247, "learning_rate": 0.00018228717548147278, "loss": 11.6851, "step": 27594 }, { "epoch": 0.5776396215356275, "grad_norm": 0.29940733313560486, "learning_rate": 0.00018228592960502454, "loss": 11.657, "step": 27595 }, { "epoch": 0.5776605542995897, "grad_norm": 0.3469271659851074, "learning_rate": 0.00018228468368901985, "loss": 11.6723, "step": 27596 }, { "epoch": 0.5776814870635518, "grad_norm": 0.28424862027168274, "learning_rate": 0.00018228343773345925, "loss": 11.6739, "step": 27597 }, { "epoch": 0.577702419827514, "grad_norm": 0.3187806308269501, "learning_rate": 0.00018228219173834333, "loss": 11.6748, "step": 27598 }, { "epoch": 0.5777233525914762, "grad_norm": 0.24113953113555908, "learning_rate": 0.0001822809457036727, "loss": 11.6913, "step": 27599 }, { "epoch": 0.5777442853554383, "grad_norm": 0.3380309045314789, "learning_rate": 0.00018227969962944802, "loss": 11.6635, "step": 27600 }, { "epoch": 0.5777652181194005, "grad_norm": 0.2967013120651245, "learning_rate": 0.0001822784535156698, "loss": 11.6808, "step": 27601 }, { "epoch": 0.5777861508833626, "grad_norm": 0.3175044357776642, "learning_rate": 0.0001822772073623387, "loss": 11.6716, "step": 27602 }, { "epoch": 0.5778070836473248, "grad_norm": 0.38612493872642517, "learning_rate": 0.00018227596116945528, "loss": 11.6774, "step": 27603 }, { "epoch": 0.5778280164112869, "grad_norm": 0.28383177518844604, "learning_rate": 0.00018227471493702019, "loss": 11.6643, "step": 27604 }, { "epoch": 0.5778489491752491, "grad_norm": 0.3061300814151764, "learning_rate": 0.00018227346866503397, "loss": 11.6623, "step": 27605 }, { "epoch": 0.5778698819392113, "grad_norm": 0.4086669683456421, "learning_rate": 0.00018227222235349725, "loss": 11.6913, "step": 27606 }, { "epoch": 0.5778908147031734, "grad_norm": 0.31068265438079834, "learning_rate": 0.00018227097600241062, "loss": 11.6775, "step": 27607 }, { "epoch": 0.5779117474671356, "grad_norm": 0.26374080777168274, "learning_rate": 0.00018226972961177466, "loss": 11.6766, "step": 27608 }, { "epoch": 0.5779326802310977, "grad_norm": 0.275721937417984, "learning_rate": 0.00018226848318159003, "loss": 11.6736, "step": 27609 }, { "epoch": 0.5779536129950599, "grad_norm": 0.2999230921268463, "learning_rate": 0.0001822672367118573, "loss": 11.6688, "step": 27610 }, { "epoch": 0.5779745457590221, "grad_norm": 0.3483997583389282, "learning_rate": 0.000182265990202577, "loss": 11.6769, "step": 27611 }, { "epoch": 0.5779954785229842, "grad_norm": 0.3232870101928711, "learning_rate": 0.00018226474365374984, "loss": 11.6877, "step": 27612 }, { "epoch": 0.5780164112869464, "grad_norm": 0.24821604788303375, "learning_rate": 0.00018226349706537635, "loss": 11.6582, "step": 27613 }, { "epoch": 0.5780373440509085, "grad_norm": 0.3025732636451721, "learning_rate": 0.00018226225043745716, "loss": 11.6712, "step": 27614 }, { "epoch": 0.5780582768148707, "grad_norm": 0.3032485544681549, "learning_rate": 0.00018226100376999286, "loss": 11.6659, "step": 27615 }, { "epoch": 0.5780792095788327, "grad_norm": 0.40445733070373535, "learning_rate": 0.00018225975706298404, "loss": 11.6721, "step": 27616 }, { "epoch": 0.5781001423427949, "grad_norm": 0.3130471706390381, "learning_rate": 0.00018225851031643129, "loss": 11.6757, "step": 27617 }, { "epoch": 0.5781210751067571, "grad_norm": 0.34115472435951233, "learning_rate": 0.00018225726353033524, "loss": 11.6758, "step": 27618 }, { "epoch": 0.5781420078707192, "grad_norm": 0.3591328561306, "learning_rate": 0.0001822560167046965, "loss": 11.6636, "step": 27619 }, { "epoch": 0.5781629406346814, "grad_norm": 0.3176257014274597, "learning_rate": 0.00018225476983951562, "loss": 11.6708, "step": 27620 }, { "epoch": 0.5781838733986435, "grad_norm": 0.34600892663002014, "learning_rate": 0.0001822535229347932, "loss": 11.6653, "step": 27621 }, { "epoch": 0.5782048061626057, "grad_norm": 0.2510911524295807, "learning_rate": 0.0001822522759905299, "loss": 11.689, "step": 27622 }, { "epoch": 0.5782257389265678, "grad_norm": 0.27404263615608215, "learning_rate": 0.00018225102900672625, "loss": 11.6561, "step": 27623 }, { "epoch": 0.57824667169053, "grad_norm": 0.2736339569091797, "learning_rate": 0.0001822497819833829, "loss": 11.6721, "step": 27624 }, { "epoch": 0.5782676044544922, "grad_norm": 0.330899715423584, "learning_rate": 0.00018224853492050045, "loss": 11.6742, "step": 27625 }, { "epoch": 0.5782885372184543, "grad_norm": 0.27970531582832336, "learning_rate": 0.00018224728781807947, "loss": 11.6747, "step": 27626 }, { "epoch": 0.5783094699824165, "grad_norm": 0.38199567794799805, "learning_rate": 0.00018224604067612057, "loss": 11.664, "step": 27627 }, { "epoch": 0.5783304027463786, "grad_norm": 0.37368959188461304, "learning_rate": 0.00018224479349462436, "loss": 11.6769, "step": 27628 }, { "epoch": 0.5783513355103408, "grad_norm": 0.2888247072696686, "learning_rate": 0.0001822435462735914, "loss": 11.6825, "step": 27629 }, { "epoch": 0.578372268274303, "grad_norm": 0.27775952219963074, "learning_rate": 0.00018224229901302232, "loss": 11.6759, "step": 27630 }, { "epoch": 0.5783932010382651, "grad_norm": 0.28365856409072876, "learning_rate": 0.00018224105171291775, "loss": 11.6602, "step": 27631 }, { "epoch": 0.5784141338022273, "grad_norm": 0.28454816341400146, "learning_rate": 0.00018223980437327825, "loss": 11.6753, "step": 27632 }, { "epoch": 0.5784350665661894, "grad_norm": 0.2872334122657776, "learning_rate": 0.0001822385569941044, "loss": 11.6657, "step": 27633 }, { "epoch": 0.5784559993301516, "grad_norm": 0.2710713744163513, "learning_rate": 0.00018223730957539686, "loss": 11.6438, "step": 27634 }, { "epoch": 0.5784769320941137, "grad_norm": 0.4105442464351654, "learning_rate": 0.00018223606211715615, "loss": 11.6645, "step": 27635 }, { "epoch": 0.5784978648580759, "grad_norm": 0.43364375829696655, "learning_rate": 0.00018223481461938297, "loss": 11.6675, "step": 27636 }, { "epoch": 0.5785187976220381, "grad_norm": 0.28873851895332336, "learning_rate": 0.00018223356708207784, "loss": 11.653, "step": 27637 }, { "epoch": 0.5785397303860002, "grad_norm": 0.24850603938102722, "learning_rate": 0.0001822323195052414, "loss": 11.6652, "step": 27638 }, { "epoch": 0.5785606631499624, "grad_norm": 0.24316421151161194, "learning_rate": 0.00018223107188887423, "loss": 11.6622, "step": 27639 }, { "epoch": 0.5785815959139244, "grad_norm": 0.29030466079711914, "learning_rate": 0.00018222982423297693, "loss": 11.67, "step": 27640 }, { "epoch": 0.5786025286778866, "grad_norm": 0.34932103753089905, "learning_rate": 0.0001822285765375501, "loss": 11.6743, "step": 27641 }, { "epoch": 0.5786234614418487, "grad_norm": 0.26991891860961914, "learning_rate": 0.00018222732880259437, "loss": 11.6741, "step": 27642 }, { "epoch": 0.5786443942058109, "grad_norm": 0.32617029547691345, "learning_rate": 0.00018222608102811031, "loss": 11.6727, "step": 27643 }, { "epoch": 0.5786653269697731, "grad_norm": 0.29616841673851013, "learning_rate": 0.0001822248332140985, "loss": 11.6719, "step": 27644 }, { "epoch": 0.5786862597337352, "grad_norm": 0.3265448808670044, "learning_rate": 0.00018222358536055958, "loss": 11.6528, "step": 27645 }, { "epoch": 0.5787071924976974, "grad_norm": 0.2846533954143524, "learning_rate": 0.00018222233746749417, "loss": 11.6556, "step": 27646 }, { "epoch": 0.5787281252616595, "grad_norm": 0.2806417644023895, "learning_rate": 0.0001822210895349028, "loss": 11.6889, "step": 27647 }, { "epoch": 0.5787490580256217, "grad_norm": 0.2291044145822525, "learning_rate": 0.00018221984156278608, "loss": 11.6701, "step": 27648 }, { "epoch": 0.5787699907895839, "grad_norm": 0.26233959197998047, "learning_rate": 0.00018221859355114467, "loss": 11.6557, "step": 27649 }, { "epoch": 0.578790923553546, "grad_norm": 0.2625754475593567, "learning_rate": 0.0001822173454999791, "loss": 11.6762, "step": 27650 }, { "epoch": 0.5788118563175082, "grad_norm": 0.2559957802295685, "learning_rate": 0.00018221609740929004, "loss": 11.6457, "step": 27651 }, { "epoch": 0.5788327890814703, "grad_norm": 0.3164687752723694, "learning_rate": 0.00018221484927907807, "loss": 11.6683, "step": 27652 }, { "epoch": 0.5788537218454325, "grad_norm": 0.28332793712615967, "learning_rate": 0.00018221360110934372, "loss": 11.675, "step": 27653 }, { "epoch": 0.5788746546093946, "grad_norm": 0.3219311535358429, "learning_rate": 0.0001822123529000877, "loss": 11.6407, "step": 27654 }, { "epoch": 0.5788955873733568, "grad_norm": 0.24820947647094727, "learning_rate": 0.0001822111046513105, "loss": 11.6753, "step": 27655 }, { "epoch": 0.578916520137319, "grad_norm": 0.4045164883136749, "learning_rate": 0.0001822098563630128, "loss": 11.6629, "step": 27656 }, { "epoch": 0.5789374529012811, "grad_norm": 0.30805784463882446, "learning_rate": 0.0001822086080351952, "loss": 11.6848, "step": 27657 }, { "epoch": 0.5789583856652433, "grad_norm": 0.343737930059433, "learning_rate": 0.00018220735966785823, "loss": 11.6668, "step": 27658 }, { "epoch": 0.5789793184292054, "grad_norm": 0.2945660948753357, "learning_rate": 0.00018220611126100255, "loss": 11.6767, "step": 27659 }, { "epoch": 0.5790002511931676, "grad_norm": 0.35100626945495605, "learning_rate": 0.00018220486281462876, "loss": 11.6829, "step": 27660 }, { "epoch": 0.5790211839571296, "grad_norm": 0.25542041659355164, "learning_rate": 0.00018220361432873743, "loss": 11.6531, "step": 27661 }, { "epoch": 0.5790421167210918, "grad_norm": 0.2717718482017517, "learning_rate": 0.00018220236580332916, "loss": 11.673, "step": 27662 }, { "epoch": 0.579063049485054, "grad_norm": 1.512622594833374, "learning_rate": 0.0001822011172384046, "loss": 11.7222, "step": 27663 }, { "epoch": 0.5790839822490161, "grad_norm": 0.2866726517677307, "learning_rate": 0.0001821998686339643, "loss": 11.6755, "step": 27664 }, { "epoch": 0.5791049150129783, "grad_norm": 0.28855839371681213, "learning_rate": 0.00018219861999000888, "loss": 11.6885, "step": 27665 }, { "epoch": 0.5791258477769404, "grad_norm": 0.2451035976409912, "learning_rate": 0.00018219737130653895, "loss": 11.6631, "step": 27666 }, { "epoch": 0.5791467805409026, "grad_norm": 0.24425610899925232, "learning_rate": 0.00018219612258355507, "loss": 11.6615, "step": 27667 }, { "epoch": 0.5791677133048648, "grad_norm": 0.21310840547084808, "learning_rate": 0.00018219487382105785, "loss": 11.6596, "step": 27668 }, { "epoch": 0.5791886460688269, "grad_norm": 0.28247320652008057, "learning_rate": 0.00018219362501904795, "loss": 11.6796, "step": 27669 }, { "epoch": 0.5792095788327891, "grad_norm": 0.2722015082836151, "learning_rate": 0.00018219237617752592, "loss": 11.6711, "step": 27670 }, { "epoch": 0.5792305115967512, "grad_norm": 0.2508992552757263, "learning_rate": 0.00018219112729649233, "loss": 11.6764, "step": 27671 }, { "epoch": 0.5792514443607134, "grad_norm": 0.27397117018699646, "learning_rate": 0.00018218987837594788, "loss": 11.6655, "step": 27672 }, { "epoch": 0.5792723771246755, "grad_norm": 0.27426713705062866, "learning_rate": 0.00018218862941589306, "loss": 11.6675, "step": 27673 }, { "epoch": 0.5792933098886377, "grad_norm": 0.3118176758289337, "learning_rate": 0.00018218738041632853, "loss": 11.6869, "step": 27674 }, { "epoch": 0.5793142426525999, "grad_norm": 0.2875238358974457, "learning_rate": 0.0001821861313772549, "loss": 11.6727, "step": 27675 }, { "epoch": 0.579335175416562, "grad_norm": 0.3222558796405792, "learning_rate": 0.0001821848822986727, "loss": 11.6903, "step": 27676 }, { "epoch": 0.5793561081805242, "grad_norm": 0.24778231978416443, "learning_rate": 0.00018218363318058263, "loss": 11.6684, "step": 27677 }, { "epoch": 0.5793770409444863, "grad_norm": 0.27148741483688354, "learning_rate": 0.00018218238402298523, "loss": 11.6777, "step": 27678 }, { "epoch": 0.5793979737084485, "grad_norm": 0.24109333753585815, "learning_rate": 0.00018218113482588107, "loss": 11.668, "step": 27679 }, { "epoch": 0.5794189064724106, "grad_norm": 0.2985626757144928, "learning_rate": 0.00018217988558927087, "loss": 11.6726, "step": 27680 }, { "epoch": 0.5794398392363728, "grad_norm": 0.29144614934921265, "learning_rate": 0.00018217863631315507, "loss": 11.6672, "step": 27681 }, { "epoch": 0.579460772000335, "grad_norm": 0.3395988643169403, "learning_rate": 0.00018217738699753442, "loss": 11.6784, "step": 27682 }, { "epoch": 0.5794817047642971, "grad_norm": 0.2597357928752899, "learning_rate": 0.00018217613764240942, "loss": 11.6578, "step": 27683 }, { "epoch": 0.5795026375282593, "grad_norm": 0.2329334169626236, "learning_rate": 0.00018217488824778067, "loss": 11.6766, "step": 27684 }, { "epoch": 0.5795235702922213, "grad_norm": 0.2647288739681244, "learning_rate": 0.00018217363881364886, "loss": 11.6769, "step": 27685 }, { "epoch": 0.5795445030561835, "grad_norm": 0.2662985324859619, "learning_rate": 0.0001821723893400145, "loss": 11.6789, "step": 27686 }, { "epoch": 0.5795654358201457, "grad_norm": 0.29483088850975037, "learning_rate": 0.00018217113982687825, "loss": 11.6716, "step": 27687 }, { "epoch": 0.5795863685841078, "grad_norm": 0.2877139747142792, "learning_rate": 0.00018216989027424068, "loss": 11.675, "step": 27688 }, { "epoch": 0.57960730134807, "grad_norm": 0.2753487527370453, "learning_rate": 0.0001821686406821024, "loss": 11.6753, "step": 27689 }, { "epoch": 0.5796282341120321, "grad_norm": 0.30307736992836, "learning_rate": 0.000182167391050464, "loss": 11.6644, "step": 27690 }, { "epoch": 0.5796491668759943, "grad_norm": 0.3001088500022888, "learning_rate": 0.00018216614137932607, "loss": 11.6674, "step": 27691 }, { "epoch": 0.5796700996399564, "grad_norm": 0.28632235527038574, "learning_rate": 0.00018216489166868928, "loss": 11.6765, "step": 27692 }, { "epoch": 0.5796910324039186, "grad_norm": 0.2696956694126129, "learning_rate": 0.00018216364191855412, "loss": 11.665, "step": 27693 }, { "epoch": 0.5797119651678808, "grad_norm": 0.2409307211637497, "learning_rate": 0.0001821623921289213, "loss": 11.6736, "step": 27694 }, { "epoch": 0.5797328979318429, "grad_norm": 0.2906191349029541, "learning_rate": 0.00018216114229979136, "loss": 11.6708, "step": 27695 }, { "epoch": 0.5797538306958051, "grad_norm": 0.2463759034872055, "learning_rate": 0.0001821598924311649, "loss": 11.6757, "step": 27696 }, { "epoch": 0.5797747634597672, "grad_norm": 0.3099254369735718, "learning_rate": 0.00018215864252304253, "loss": 11.6699, "step": 27697 }, { "epoch": 0.5797956962237294, "grad_norm": 0.32646504044532776, "learning_rate": 0.00018215739257542487, "loss": 11.6665, "step": 27698 }, { "epoch": 0.5798166289876915, "grad_norm": 0.2605992257595062, "learning_rate": 0.0001821561425883125, "loss": 11.6652, "step": 27699 }, { "epoch": 0.5798375617516537, "grad_norm": 0.3011864721775055, "learning_rate": 0.00018215489256170601, "loss": 11.6727, "step": 27700 }, { "epoch": 0.5798584945156159, "grad_norm": 0.2937052249908447, "learning_rate": 0.00018215364249560604, "loss": 11.6768, "step": 27701 }, { "epoch": 0.579879427279578, "grad_norm": 0.2648950517177582, "learning_rate": 0.00018215239239001316, "loss": 11.6591, "step": 27702 }, { "epoch": 0.5799003600435402, "grad_norm": 0.28767770528793335, "learning_rate": 0.00018215114224492797, "loss": 11.6839, "step": 27703 }, { "epoch": 0.5799212928075023, "grad_norm": 0.28547412157058716, "learning_rate": 0.0001821498920603511, "loss": 11.6587, "step": 27704 }, { "epoch": 0.5799422255714645, "grad_norm": 0.22338932752609253, "learning_rate": 0.0001821486418362831, "loss": 11.6756, "step": 27705 }, { "epoch": 0.5799631583354267, "grad_norm": 0.2748715877532959, "learning_rate": 0.00018214739157272462, "loss": 11.6667, "step": 27706 }, { "epoch": 0.5799840910993888, "grad_norm": 0.31339961290359497, "learning_rate": 0.00018214614126967624, "loss": 11.6637, "step": 27707 }, { "epoch": 0.580005023863351, "grad_norm": 0.2692343592643738, "learning_rate": 0.00018214489092713856, "loss": 11.6642, "step": 27708 }, { "epoch": 0.580025956627313, "grad_norm": 0.2760797142982483, "learning_rate": 0.00018214364054511218, "loss": 11.6743, "step": 27709 }, { "epoch": 0.5800468893912752, "grad_norm": 0.290928453207016, "learning_rate": 0.00018214239012359773, "loss": 11.6801, "step": 27710 }, { "epoch": 0.5800678221552373, "grad_norm": 0.25617489218711853, "learning_rate": 0.00018214113966259575, "loss": 11.6753, "step": 27711 }, { "epoch": 0.5800887549191995, "grad_norm": 0.27768561244010925, "learning_rate": 0.00018213988916210692, "loss": 11.6817, "step": 27712 }, { "epoch": 0.5801096876831617, "grad_norm": 0.30765965580940247, "learning_rate": 0.00018213863862213178, "loss": 11.6595, "step": 27713 }, { "epoch": 0.5801306204471238, "grad_norm": 0.36722832918167114, "learning_rate": 0.00018213738804267093, "loss": 11.6748, "step": 27714 }, { "epoch": 0.580151553211086, "grad_norm": 0.26975658535957336, "learning_rate": 0.00018213613742372502, "loss": 11.6697, "step": 27715 }, { "epoch": 0.5801724859750481, "grad_norm": 0.3048554062843323, "learning_rate": 0.00018213488676529463, "loss": 11.6645, "step": 27716 }, { "epoch": 0.5801934187390103, "grad_norm": 0.511005699634552, "learning_rate": 0.0001821336360673803, "loss": 11.668, "step": 27717 }, { "epoch": 0.5802143515029724, "grad_norm": 0.32578203082084656, "learning_rate": 0.00018213238532998274, "loss": 11.6586, "step": 27718 }, { "epoch": 0.5802352842669346, "grad_norm": 0.24735726416110992, "learning_rate": 0.0001821311345531025, "loss": 11.6689, "step": 27719 }, { "epoch": 0.5802562170308968, "grad_norm": 0.3145446181297302, "learning_rate": 0.00018212988373674015, "loss": 11.6792, "step": 27720 }, { "epoch": 0.5802771497948589, "grad_norm": 0.3057039976119995, "learning_rate": 0.00018212863288089633, "loss": 11.6722, "step": 27721 }, { "epoch": 0.5802980825588211, "grad_norm": 0.32981395721435547, "learning_rate": 0.0001821273819855716, "loss": 11.6727, "step": 27722 }, { "epoch": 0.5803190153227832, "grad_norm": 0.2843599021434784, "learning_rate": 0.0001821261310507666, "loss": 11.6651, "step": 27723 }, { "epoch": 0.5803399480867454, "grad_norm": 0.276662141084671, "learning_rate": 0.00018212488007648197, "loss": 11.6705, "step": 27724 }, { "epoch": 0.5803608808507075, "grad_norm": 0.31895607709884644, "learning_rate": 0.00018212362906271822, "loss": 11.6791, "step": 27725 }, { "epoch": 0.5803818136146697, "grad_norm": 0.3830774426460266, "learning_rate": 0.00018212237800947602, "loss": 11.6769, "step": 27726 }, { "epoch": 0.5804027463786319, "grad_norm": 0.26004835963249207, "learning_rate": 0.00018212112691675593, "loss": 11.6677, "step": 27727 }, { "epoch": 0.580423679142594, "grad_norm": 0.3362673223018646, "learning_rate": 0.00018211987578455857, "loss": 11.6726, "step": 27728 }, { "epoch": 0.5804446119065562, "grad_norm": 0.28721070289611816, "learning_rate": 0.00018211862461288455, "loss": 11.6662, "step": 27729 }, { "epoch": 0.5804655446705183, "grad_norm": 0.28628644347190857, "learning_rate": 0.00018211737340173448, "loss": 11.6731, "step": 27730 }, { "epoch": 0.5804864774344805, "grad_norm": 0.26850181818008423, "learning_rate": 0.0001821161221511089, "loss": 11.6654, "step": 27731 }, { "epoch": 0.5805074101984427, "grad_norm": 0.25986984372138977, "learning_rate": 0.00018211487086100848, "loss": 11.6727, "step": 27732 }, { "epoch": 0.5805283429624047, "grad_norm": 0.32419925928115845, "learning_rate": 0.00018211361953143377, "loss": 11.6595, "step": 27733 }, { "epoch": 0.5805492757263669, "grad_norm": 0.2824001908302307, "learning_rate": 0.00018211236816238543, "loss": 11.6716, "step": 27734 }, { "epoch": 0.580570208490329, "grad_norm": 0.3091554045677185, "learning_rate": 0.00018211111675386403, "loss": 11.6656, "step": 27735 }, { "epoch": 0.5805911412542912, "grad_norm": 0.2554214894771576, "learning_rate": 0.00018210986530587016, "loss": 11.6573, "step": 27736 }, { "epoch": 0.5806120740182533, "grad_norm": 0.2641741931438446, "learning_rate": 0.00018210861381840441, "loss": 11.6637, "step": 27737 }, { "epoch": 0.5806330067822155, "grad_norm": 0.2868072986602783, "learning_rate": 0.00018210736229146743, "loss": 11.6799, "step": 27738 }, { "epoch": 0.5806539395461777, "grad_norm": 0.2980976998806, "learning_rate": 0.0001821061107250598, "loss": 11.6596, "step": 27739 }, { "epoch": 0.5806748723101398, "grad_norm": 0.29479050636291504, "learning_rate": 0.00018210485911918212, "loss": 11.6794, "step": 27740 }, { "epoch": 0.580695805074102, "grad_norm": 0.26657745242118835, "learning_rate": 0.00018210360747383496, "loss": 11.6686, "step": 27741 }, { "epoch": 0.5807167378380641, "grad_norm": 0.28661200404167175, "learning_rate": 0.00018210235578901897, "loss": 11.6635, "step": 27742 }, { "epoch": 0.5807376706020263, "grad_norm": 0.28188392519950867, "learning_rate": 0.00018210110406473472, "loss": 11.6623, "step": 27743 }, { "epoch": 0.5807586033659884, "grad_norm": 0.2747105658054352, "learning_rate": 0.00018209985230098282, "loss": 11.6609, "step": 27744 }, { "epoch": 0.5807795361299506, "grad_norm": 0.2761373221874237, "learning_rate": 0.0001820986004977639, "loss": 11.6796, "step": 27745 }, { "epoch": 0.5808004688939128, "grad_norm": 0.4172232151031494, "learning_rate": 0.00018209734865507854, "loss": 11.6723, "step": 27746 }, { "epoch": 0.5808214016578749, "grad_norm": 0.45021986961364746, "learning_rate": 0.00018209609677292732, "loss": 11.6901, "step": 27747 }, { "epoch": 0.5808423344218371, "grad_norm": 0.3220672607421875, "learning_rate": 0.00018209484485131084, "loss": 11.6813, "step": 27748 }, { "epoch": 0.5808632671857992, "grad_norm": 0.29100626707077026, "learning_rate": 0.00018209359289022978, "loss": 11.6556, "step": 27749 }, { "epoch": 0.5808841999497614, "grad_norm": 0.2769322097301483, "learning_rate": 0.00018209234088968466, "loss": 11.6766, "step": 27750 }, { "epoch": 0.5809051327137236, "grad_norm": 0.30745449662208557, "learning_rate": 0.00018209108884967607, "loss": 11.6758, "step": 27751 }, { "epoch": 0.5809260654776857, "grad_norm": 0.3822386860847473, "learning_rate": 0.0001820898367702047, "loss": 11.6789, "step": 27752 }, { "epoch": 0.5809469982416479, "grad_norm": 0.4115974009037018, "learning_rate": 0.0001820885846512711, "loss": 11.676, "step": 27753 }, { "epoch": 0.58096793100561, "grad_norm": 0.3341735005378723, "learning_rate": 0.00018208733249287584, "loss": 11.6789, "step": 27754 }, { "epoch": 0.5809888637695722, "grad_norm": 0.38693153858184814, "learning_rate": 0.00018208608029501955, "loss": 11.6713, "step": 27755 }, { "epoch": 0.5810097965335342, "grad_norm": 0.2465818077325821, "learning_rate": 0.00018208482805770286, "loss": 11.674, "step": 27756 }, { "epoch": 0.5810307292974964, "grad_norm": 0.31626811623573303, "learning_rate": 0.00018208357578092634, "loss": 11.6726, "step": 27757 }, { "epoch": 0.5810516620614586, "grad_norm": 0.31087130308151245, "learning_rate": 0.00018208232346469058, "loss": 11.6831, "step": 27758 }, { "epoch": 0.5810725948254207, "grad_norm": 0.31888726353645325, "learning_rate": 0.00018208107110899625, "loss": 11.6697, "step": 27759 }, { "epoch": 0.5810935275893829, "grad_norm": 0.33019155263900757, "learning_rate": 0.00018207981871384385, "loss": 11.6729, "step": 27760 }, { "epoch": 0.581114460353345, "grad_norm": 0.34045183658599854, "learning_rate": 0.00018207856627923408, "loss": 11.6808, "step": 27761 }, { "epoch": 0.5811353931173072, "grad_norm": 0.269255667924881, "learning_rate": 0.00018207731380516745, "loss": 11.6501, "step": 27762 }, { "epoch": 0.5811563258812693, "grad_norm": 0.282934308052063, "learning_rate": 0.00018207606129164468, "loss": 11.6638, "step": 27763 }, { "epoch": 0.5811772586452315, "grad_norm": 0.29127901792526245, "learning_rate": 0.00018207480873866624, "loss": 11.675, "step": 27764 }, { "epoch": 0.5811981914091937, "grad_norm": 0.2570262849330902, "learning_rate": 0.00018207355614623282, "loss": 11.6664, "step": 27765 }, { "epoch": 0.5812191241731558, "grad_norm": 0.38276219367980957, "learning_rate": 0.000182072303514345, "loss": 11.6708, "step": 27766 }, { "epoch": 0.581240056937118, "grad_norm": 0.23277603089809418, "learning_rate": 0.00018207105084300338, "loss": 11.6669, "step": 27767 }, { "epoch": 0.5812609897010801, "grad_norm": 0.256534218788147, "learning_rate": 0.00018206979813220855, "loss": 11.6509, "step": 27768 }, { "epoch": 0.5812819224650423, "grad_norm": 0.2998118996620178, "learning_rate": 0.00018206854538196113, "loss": 11.6708, "step": 27769 }, { "epoch": 0.5813028552290045, "grad_norm": 0.2734062373638153, "learning_rate": 0.00018206729259226173, "loss": 11.6706, "step": 27770 }, { "epoch": 0.5813237879929666, "grad_norm": 0.27429670095443726, "learning_rate": 0.00018206603976311092, "loss": 11.6773, "step": 27771 }, { "epoch": 0.5813447207569288, "grad_norm": 0.25078079104423523, "learning_rate": 0.0001820647868945093, "loss": 11.6519, "step": 27772 }, { "epoch": 0.5813656535208909, "grad_norm": 0.31353235244750977, "learning_rate": 0.00018206353398645753, "loss": 11.6749, "step": 27773 }, { "epoch": 0.5813865862848531, "grad_norm": 0.2854679524898529, "learning_rate": 0.00018206228103895614, "loss": 11.6659, "step": 27774 }, { "epoch": 0.5814075190488152, "grad_norm": 0.33728906512260437, "learning_rate": 0.0001820610280520058, "loss": 11.6699, "step": 27775 }, { "epoch": 0.5814284518127774, "grad_norm": 0.2615618407726288, "learning_rate": 0.00018205977502560705, "loss": 11.685, "step": 27776 }, { "epoch": 0.5814493845767396, "grad_norm": 0.31582435965538025, "learning_rate": 0.00018205852195976054, "loss": 11.6832, "step": 27777 }, { "epoch": 0.5814703173407016, "grad_norm": 0.2553967535495758, "learning_rate": 0.00018205726885446686, "loss": 11.6661, "step": 27778 }, { "epoch": 0.5814912501046638, "grad_norm": 0.3295961320400238, "learning_rate": 0.0001820560157097266, "loss": 11.6575, "step": 27779 }, { "epoch": 0.5815121828686259, "grad_norm": 0.33042240142822266, "learning_rate": 0.00018205476252554037, "loss": 11.6797, "step": 27780 }, { "epoch": 0.5815331156325881, "grad_norm": 0.37081432342529297, "learning_rate": 0.00018205350930190875, "loss": 11.678, "step": 27781 }, { "epoch": 0.5815540483965502, "grad_norm": 0.3884563446044922, "learning_rate": 0.00018205225603883237, "loss": 11.6595, "step": 27782 }, { "epoch": 0.5815749811605124, "grad_norm": 0.26389583945274353, "learning_rate": 0.00018205100273631185, "loss": 11.6602, "step": 27783 }, { "epoch": 0.5815959139244746, "grad_norm": 0.28365135192871094, "learning_rate": 0.00018204974939434776, "loss": 11.6483, "step": 27784 }, { "epoch": 0.5816168466884367, "grad_norm": 0.29655539989471436, "learning_rate": 0.0001820484960129407, "loss": 11.6677, "step": 27785 }, { "epoch": 0.5816377794523989, "grad_norm": 0.4090522825717926, "learning_rate": 0.00018204724259209129, "loss": 11.664, "step": 27786 }, { "epoch": 0.581658712216361, "grad_norm": 0.2824411988258362, "learning_rate": 0.00018204598913180013, "loss": 11.6716, "step": 27787 }, { "epoch": 0.5816796449803232, "grad_norm": 0.3222423791885376, "learning_rate": 0.00018204473563206781, "loss": 11.6404, "step": 27788 }, { "epoch": 0.5817005777442854, "grad_norm": 0.27163591980934143, "learning_rate": 0.00018204348209289493, "loss": 11.6672, "step": 27789 }, { "epoch": 0.5817215105082475, "grad_norm": 0.29908454418182373, "learning_rate": 0.00018204222851428217, "loss": 11.654, "step": 27790 }, { "epoch": 0.5817424432722097, "grad_norm": 0.3147836923599243, "learning_rate": 0.00018204097489623, "loss": 11.6658, "step": 27791 }, { "epoch": 0.5817633760361718, "grad_norm": 0.31974294781684875, "learning_rate": 0.0001820397212387391, "loss": 11.6654, "step": 27792 }, { "epoch": 0.581784308800134, "grad_norm": 0.24100421369075775, "learning_rate": 0.00018203846754181007, "loss": 11.6707, "step": 27793 }, { "epoch": 0.5818052415640961, "grad_norm": 0.26921144127845764, "learning_rate": 0.0001820372138054435, "loss": 11.6767, "step": 27794 }, { "epoch": 0.5818261743280583, "grad_norm": 0.27737632393836975, "learning_rate": 0.00018203596002964, "loss": 11.6514, "step": 27795 }, { "epoch": 0.5818471070920205, "grad_norm": 0.28948861360549927, "learning_rate": 0.00018203470621440016, "loss": 11.6717, "step": 27796 }, { "epoch": 0.5818680398559826, "grad_norm": 0.29400214552879333, "learning_rate": 0.0001820334523597246, "loss": 11.6727, "step": 27797 }, { "epoch": 0.5818889726199448, "grad_norm": 0.2982891798019409, "learning_rate": 0.00018203219846561395, "loss": 11.6613, "step": 27798 }, { "epoch": 0.5819099053839069, "grad_norm": 0.2541244924068451, "learning_rate": 0.00018203094453206876, "loss": 11.67, "step": 27799 }, { "epoch": 0.5819308381478691, "grad_norm": 0.31746840476989746, "learning_rate": 0.0001820296905590896, "loss": 11.6625, "step": 27800 }, { "epoch": 0.5819517709118311, "grad_norm": 0.2623380720615387, "learning_rate": 0.00018202843654667716, "loss": 11.6686, "step": 27801 }, { "epoch": 0.5819727036757933, "grad_norm": 0.3564954996109009, "learning_rate": 0.00018202718249483204, "loss": 11.6913, "step": 27802 }, { "epoch": 0.5819936364397555, "grad_norm": 0.3075905740261078, "learning_rate": 0.00018202592840355476, "loss": 11.6707, "step": 27803 }, { "epoch": 0.5820145692037176, "grad_norm": 0.294128954410553, "learning_rate": 0.00018202467427284598, "loss": 11.6624, "step": 27804 }, { "epoch": 0.5820355019676798, "grad_norm": 0.31744885444641113, "learning_rate": 0.00018202342010270633, "loss": 11.6586, "step": 27805 }, { "epoch": 0.5820564347316419, "grad_norm": 0.3318650424480438, "learning_rate": 0.00018202216589313633, "loss": 11.6669, "step": 27806 }, { "epoch": 0.5820773674956041, "grad_norm": 0.3497645854949951, "learning_rate": 0.00018202091164413667, "loss": 11.6706, "step": 27807 }, { "epoch": 0.5820983002595663, "grad_norm": 0.27034586668014526, "learning_rate": 0.0001820196573557079, "loss": 11.6752, "step": 27808 }, { "epoch": 0.5821192330235284, "grad_norm": 0.3426910936832428, "learning_rate": 0.0001820184030278506, "loss": 11.6722, "step": 27809 }, { "epoch": 0.5821401657874906, "grad_norm": 0.29474663734436035, "learning_rate": 0.00018201714866056547, "loss": 11.659, "step": 27810 }, { "epoch": 0.5821610985514527, "grad_norm": 0.33566001057624817, "learning_rate": 0.00018201589425385304, "loss": 11.6708, "step": 27811 }, { "epoch": 0.5821820313154149, "grad_norm": 0.4169776141643524, "learning_rate": 0.00018201463980771393, "loss": 11.6818, "step": 27812 }, { "epoch": 0.582202964079377, "grad_norm": 0.2886258661746979, "learning_rate": 0.00018201338532214872, "loss": 11.68, "step": 27813 }, { "epoch": 0.5822238968433392, "grad_norm": 0.2559254467487335, "learning_rate": 0.00018201213079715802, "loss": 11.6625, "step": 27814 }, { "epoch": 0.5822448296073014, "grad_norm": 0.3872929513454437, "learning_rate": 0.00018201087623274248, "loss": 11.6709, "step": 27815 }, { "epoch": 0.5822657623712635, "grad_norm": 0.2958776354789734, "learning_rate": 0.00018200962162890263, "loss": 11.6855, "step": 27816 }, { "epoch": 0.5822866951352257, "grad_norm": 0.27623263001441956, "learning_rate": 0.0001820083669856391, "loss": 11.6604, "step": 27817 }, { "epoch": 0.5823076278991878, "grad_norm": 0.3005729019641876, "learning_rate": 0.0001820071123029526, "loss": 11.6797, "step": 27818 }, { "epoch": 0.58232856066315, "grad_norm": 0.2855406701564789, "learning_rate": 0.00018200585758084355, "loss": 11.6779, "step": 27819 }, { "epoch": 0.5823494934271121, "grad_norm": 0.3243376314640045, "learning_rate": 0.00018200460281931266, "loss": 11.6765, "step": 27820 }, { "epoch": 0.5823704261910743, "grad_norm": 0.28081414103507996, "learning_rate": 0.00018200334801836052, "loss": 11.6673, "step": 27821 }, { "epoch": 0.5823913589550365, "grad_norm": 0.3883203864097595, "learning_rate": 0.00018200209317798774, "loss": 11.678, "step": 27822 }, { "epoch": 0.5824122917189986, "grad_norm": 0.3113524317741394, "learning_rate": 0.00018200083829819486, "loss": 11.673, "step": 27823 }, { "epoch": 0.5824332244829608, "grad_norm": 0.3070286810398102, "learning_rate": 0.00018199958337898256, "loss": 11.6793, "step": 27824 }, { "epoch": 0.5824541572469228, "grad_norm": 0.23828139901161194, "learning_rate": 0.00018199832842035146, "loss": 11.6578, "step": 27825 }, { "epoch": 0.582475090010885, "grad_norm": 0.30233892798423767, "learning_rate": 0.00018199707342230208, "loss": 11.6728, "step": 27826 }, { "epoch": 0.5824960227748472, "grad_norm": 0.2696617841720581, "learning_rate": 0.00018199581838483506, "loss": 11.6668, "step": 27827 }, { "epoch": 0.5825169555388093, "grad_norm": 0.2837257385253906, "learning_rate": 0.00018199456330795103, "loss": 11.6735, "step": 27828 }, { "epoch": 0.5825378883027715, "grad_norm": 0.30039337277412415, "learning_rate": 0.00018199330819165053, "loss": 11.6665, "step": 27829 }, { "epoch": 0.5825588210667336, "grad_norm": 0.3676753342151642, "learning_rate": 0.00018199205303593425, "loss": 11.6035, "step": 27830 }, { "epoch": 0.5825797538306958, "grad_norm": 0.3188718855381012, "learning_rate": 0.00018199079784080274, "loss": 11.6746, "step": 27831 }, { "epoch": 0.5826006865946579, "grad_norm": 0.36220407485961914, "learning_rate": 0.0001819895426062566, "loss": 11.6665, "step": 27832 }, { "epoch": 0.5826216193586201, "grad_norm": 0.28599128127098083, "learning_rate": 0.00018198828733229644, "loss": 11.6815, "step": 27833 }, { "epoch": 0.5826425521225823, "grad_norm": 0.2691728174686432, "learning_rate": 0.00018198703201892288, "loss": 11.6782, "step": 27834 }, { "epoch": 0.5826634848865444, "grad_norm": 0.30843585729599, "learning_rate": 0.0001819857766661365, "loss": 11.6485, "step": 27835 }, { "epoch": 0.5826844176505066, "grad_norm": 0.29355937242507935, "learning_rate": 0.0001819845212739379, "loss": 11.6617, "step": 27836 }, { "epoch": 0.5827053504144687, "grad_norm": 0.23274315893650055, "learning_rate": 0.00018198326584232773, "loss": 11.6799, "step": 27837 }, { "epoch": 0.5827262831784309, "grad_norm": 0.279416024684906, "learning_rate": 0.00018198201037130654, "loss": 11.6769, "step": 27838 }, { "epoch": 0.582747215942393, "grad_norm": 0.3458790183067322, "learning_rate": 0.00018198075486087497, "loss": 11.6599, "step": 27839 }, { "epoch": 0.5827681487063552, "grad_norm": 0.26694223284721375, "learning_rate": 0.00018197949931103357, "loss": 11.6726, "step": 27840 }, { "epoch": 0.5827890814703174, "grad_norm": 0.3282987177371979, "learning_rate": 0.000181978243721783, "loss": 11.6695, "step": 27841 }, { "epoch": 0.5828100142342795, "grad_norm": 0.2582072913646698, "learning_rate": 0.00018197698809312388, "loss": 11.6625, "step": 27842 }, { "epoch": 0.5828309469982417, "grad_norm": 0.3408346176147461, "learning_rate": 0.00018197573242505674, "loss": 11.6741, "step": 27843 }, { "epoch": 0.5828518797622038, "grad_norm": 0.29334813356399536, "learning_rate": 0.00018197447671758221, "loss": 11.6767, "step": 27844 }, { "epoch": 0.582872812526166, "grad_norm": 0.23467206954956055, "learning_rate": 0.00018197322097070095, "loss": 11.6806, "step": 27845 }, { "epoch": 0.5828937452901282, "grad_norm": 0.3032263219356537, "learning_rate": 0.0001819719651844135, "loss": 11.6586, "step": 27846 }, { "epoch": 0.5829146780540903, "grad_norm": 0.392148494720459, "learning_rate": 0.0001819707093587205, "loss": 11.6788, "step": 27847 }, { "epoch": 0.5829356108180525, "grad_norm": 0.28351572155952454, "learning_rate": 0.0001819694534936225, "loss": 11.6741, "step": 27848 }, { "epoch": 0.5829565435820145, "grad_norm": 0.3125012218952179, "learning_rate": 0.00018196819758912019, "loss": 11.6476, "step": 27849 }, { "epoch": 0.5829774763459767, "grad_norm": 0.24234843254089355, "learning_rate": 0.00018196694164521407, "loss": 11.6745, "step": 27850 }, { "epoch": 0.5829984091099388, "grad_norm": 0.28499823808670044, "learning_rate": 0.00018196568566190483, "loss": 11.6742, "step": 27851 }, { "epoch": 0.583019341873901, "grad_norm": 0.3232104480266571, "learning_rate": 0.00018196442963919305, "loss": 11.6745, "step": 27852 }, { "epoch": 0.5830402746378632, "grad_norm": 0.28751999139785767, "learning_rate": 0.0001819631735770793, "loss": 11.6706, "step": 27853 }, { "epoch": 0.5830612074018253, "grad_norm": 0.3728969991207123, "learning_rate": 0.00018196191747556423, "loss": 11.6583, "step": 27854 }, { "epoch": 0.5830821401657875, "grad_norm": 0.3263428509235382, "learning_rate": 0.00018196066133464842, "loss": 11.6789, "step": 27855 }, { "epoch": 0.5831030729297496, "grad_norm": 0.2880755364894867, "learning_rate": 0.00018195940515433248, "loss": 11.6617, "step": 27856 }, { "epoch": 0.5831240056937118, "grad_norm": 0.3889705240726471, "learning_rate": 0.000181958148934617, "loss": 11.6671, "step": 27857 }, { "epoch": 0.5831449384576739, "grad_norm": 0.2774897515773773, "learning_rate": 0.0001819568926755026, "loss": 11.6721, "step": 27858 }, { "epoch": 0.5831658712216361, "grad_norm": 0.22249151766300201, "learning_rate": 0.00018195563637698987, "loss": 11.66, "step": 27859 }, { "epoch": 0.5831868039855983, "grad_norm": 0.2588498592376709, "learning_rate": 0.00018195438003907946, "loss": 11.6611, "step": 27860 }, { "epoch": 0.5832077367495604, "grad_norm": 0.3104255795478821, "learning_rate": 0.00018195312366177187, "loss": 11.6812, "step": 27861 }, { "epoch": 0.5832286695135226, "grad_norm": 0.2740311324596405, "learning_rate": 0.00018195186724506783, "loss": 11.6545, "step": 27862 }, { "epoch": 0.5832496022774847, "grad_norm": 0.27654704451560974, "learning_rate": 0.00018195061078896786, "loss": 11.6554, "step": 27863 }, { "epoch": 0.5832705350414469, "grad_norm": 0.294798344373703, "learning_rate": 0.00018194935429347261, "loss": 11.6781, "step": 27864 }, { "epoch": 0.5832914678054091, "grad_norm": 0.30259063839912415, "learning_rate": 0.00018194809775858267, "loss": 11.6519, "step": 27865 }, { "epoch": 0.5833124005693712, "grad_norm": 0.3921792507171631, "learning_rate": 0.00018194684118429862, "loss": 11.6595, "step": 27866 }, { "epoch": 0.5833333333333334, "grad_norm": 0.26238465309143066, "learning_rate": 0.00018194558457062106, "loss": 11.6843, "step": 27867 }, { "epoch": 0.5833542660972955, "grad_norm": 0.28388506174087524, "learning_rate": 0.00018194432791755066, "loss": 11.6633, "step": 27868 }, { "epoch": 0.5833751988612577, "grad_norm": 0.2629343271255493, "learning_rate": 0.00018194307122508792, "loss": 11.67, "step": 27869 }, { "epoch": 0.5833961316252197, "grad_norm": 0.3588288724422455, "learning_rate": 0.00018194181449323354, "loss": 11.6792, "step": 27870 }, { "epoch": 0.583417064389182, "grad_norm": 0.27463340759277344, "learning_rate": 0.0001819405577219881, "loss": 11.6636, "step": 27871 }, { "epoch": 0.5834379971531441, "grad_norm": 0.2526980936527252, "learning_rate": 0.00018193930091135217, "loss": 11.6763, "step": 27872 }, { "epoch": 0.5834589299171062, "grad_norm": 0.28312796354293823, "learning_rate": 0.0001819380440613264, "loss": 11.6709, "step": 27873 }, { "epoch": 0.5834798626810684, "grad_norm": 0.3187194764614105, "learning_rate": 0.00018193678717191134, "loss": 11.6709, "step": 27874 }, { "epoch": 0.5835007954450305, "grad_norm": 0.2803286015987396, "learning_rate": 0.00018193553024310763, "loss": 11.6885, "step": 27875 }, { "epoch": 0.5835217282089927, "grad_norm": 0.32469964027404785, "learning_rate": 0.00018193427327491588, "loss": 11.658, "step": 27876 }, { "epoch": 0.5835426609729548, "grad_norm": 0.2979743480682373, "learning_rate": 0.0001819330162673367, "loss": 11.6565, "step": 27877 }, { "epoch": 0.583563593736917, "grad_norm": 0.32188355922698975, "learning_rate": 0.00018193175922037066, "loss": 11.6596, "step": 27878 }, { "epoch": 0.5835845265008792, "grad_norm": 0.3129620850086212, "learning_rate": 0.00018193050213401835, "loss": 11.6695, "step": 27879 }, { "epoch": 0.5836054592648413, "grad_norm": 0.2903169095516205, "learning_rate": 0.00018192924500828043, "loss": 11.6613, "step": 27880 }, { "epoch": 0.5836263920288035, "grad_norm": 0.45752644538879395, "learning_rate": 0.0001819279878431575, "loss": 11.6707, "step": 27881 }, { "epoch": 0.5836473247927656, "grad_norm": 0.26915258169174194, "learning_rate": 0.00018192673063865012, "loss": 11.6753, "step": 27882 }, { "epoch": 0.5836682575567278, "grad_norm": 0.3524145483970642, "learning_rate": 0.0001819254733947589, "loss": 11.6622, "step": 27883 }, { "epoch": 0.58368919032069, "grad_norm": 0.27182403206825256, "learning_rate": 0.0001819242161114845, "loss": 11.6747, "step": 27884 }, { "epoch": 0.5837101230846521, "grad_norm": 0.29942479729652405, "learning_rate": 0.0001819229587888275, "loss": 11.6775, "step": 27885 }, { "epoch": 0.5837310558486143, "grad_norm": 0.3150023818016052, "learning_rate": 0.00018192170142678846, "loss": 11.672, "step": 27886 }, { "epoch": 0.5837519886125764, "grad_norm": 0.29148468375205994, "learning_rate": 0.00018192044402536803, "loss": 11.6763, "step": 27887 }, { "epoch": 0.5837729213765386, "grad_norm": 0.2625338137149811, "learning_rate": 0.00018191918658456676, "loss": 11.6733, "step": 27888 }, { "epoch": 0.5837938541405007, "grad_norm": 0.28496527671813965, "learning_rate": 0.00018191792910438536, "loss": 11.6438, "step": 27889 }, { "epoch": 0.5838147869044629, "grad_norm": 0.3141179382801056, "learning_rate": 0.00018191667158482436, "loss": 11.6724, "step": 27890 }, { "epoch": 0.5838357196684251, "grad_norm": 0.24352416396141052, "learning_rate": 0.00018191541402588435, "loss": 11.666, "step": 27891 }, { "epoch": 0.5838566524323872, "grad_norm": 0.2573966979980469, "learning_rate": 0.00018191415642756592, "loss": 11.6699, "step": 27892 }, { "epoch": 0.5838775851963494, "grad_norm": 0.33655494451522827, "learning_rate": 0.00018191289878986976, "loss": 11.6774, "step": 27893 }, { "epoch": 0.5838985179603114, "grad_norm": 0.25029227137565613, "learning_rate": 0.00018191164111279644, "loss": 11.6733, "step": 27894 }, { "epoch": 0.5839194507242736, "grad_norm": 0.26572921872138977, "learning_rate": 0.00018191038339634654, "loss": 11.6809, "step": 27895 }, { "epoch": 0.5839403834882357, "grad_norm": 0.2801118791103363, "learning_rate": 0.00018190912564052067, "loss": 11.6688, "step": 27896 }, { "epoch": 0.5839613162521979, "grad_norm": 0.3633379340171814, "learning_rate": 0.00018190786784531946, "loss": 11.6805, "step": 27897 }, { "epoch": 0.5839822490161601, "grad_norm": 0.34559160470962524, "learning_rate": 0.00018190661001074346, "loss": 11.6773, "step": 27898 }, { "epoch": 0.5840031817801222, "grad_norm": 0.2492319792509079, "learning_rate": 0.00018190535213679336, "loss": 11.6609, "step": 27899 }, { "epoch": 0.5840241145440844, "grad_norm": 0.30122247338294983, "learning_rate": 0.00018190409422346967, "loss": 11.6845, "step": 27900 }, { "epoch": 0.5840450473080465, "grad_norm": 0.24550789594650269, "learning_rate": 0.0001819028362707731, "loss": 11.6722, "step": 27901 }, { "epoch": 0.5840659800720087, "grad_norm": 0.2512803375720978, "learning_rate": 0.00018190157827870413, "loss": 11.6656, "step": 27902 }, { "epoch": 0.5840869128359708, "grad_norm": 0.38326650857925415, "learning_rate": 0.00018190032024726347, "loss": 11.6755, "step": 27903 }, { "epoch": 0.584107845599933, "grad_norm": 0.23760026693344116, "learning_rate": 0.00018189906217645164, "loss": 11.6668, "step": 27904 }, { "epoch": 0.5841287783638952, "grad_norm": 0.4253268837928772, "learning_rate": 0.00018189780406626933, "loss": 11.6612, "step": 27905 }, { "epoch": 0.5841497111278573, "grad_norm": 0.26339688897132874, "learning_rate": 0.0001818965459167171, "loss": 11.6676, "step": 27906 }, { "epoch": 0.5841706438918195, "grad_norm": 0.2626473009586334, "learning_rate": 0.00018189528772779556, "loss": 11.6694, "step": 27907 }, { "epoch": 0.5841915766557816, "grad_norm": 0.24144411087036133, "learning_rate": 0.00018189402949950529, "loss": 11.6763, "step": 27908 }, { "epoch": 0.5842125094197438, "grad_norm": 0.2711939811706543, "learning_rate": 0.00018189277123184695, "loss": 11.6638, "step": 27909 }, { "epoch": 0.584233442183706, "grad_norm": 0.2975413501262665, "learning_rate": 0.0001818915129248211, "loss": 11.6711, "step": 27910 }, { "epoch": 0.5842543749476681, "grad_norm": 0.3555939495563507, "learning_rate": 0.00018189025457842836, "loss": 11.6647, "step": 27911 }, { "epoch": 0.5842753077116303, "grad_norm": 0.2875732183456421, "learning_rate": 0.00018188899619266935, "loss": 11.6799, "step": 27912 }, { "epoch": 0.5842962404755924, "grad_norm": 0.24667838215827942, "learning_rate": 0.0001818877377675446, "loss": 11.6791, "step": 27913 }, { "epoch": 0.5843171732395546, "grad_norm": 0.354519248008728, "learning_rate": 0.00018188647930305485, "loss": 11.682, "step": 27914 }, { "epoch": 0.5843381060035167, "grad_norm": 0.2984766662120819, "learning_rate": 0.00018188522079920055, "loss": 11.6822, "step": 27915 }, { "epoch": 0.5843590387674789, "grad_norm": 0.32734009623527527, "learning_rate": 0.00018188396225598245, "loss": 11.6763, "step": 27916 }, { "epoch": 0.5843799715314411, "grad_norm": 0.25637367367744446, "learning_rate": 0.0001818827036734011, "loss": 11.6646, "step": 27917 }, { "epoch": 0.5844009042954031, "grad_norm": 0.3179987370967865, "learning_rate": 0.00018188144505145703, "loss": 11.6742, "step": 27918 }, { "epoch": 0.5844218370593653, "grad_norm": 0.26255926489830017, "learning_rate": 0.00018188018639015094, "loss": 11.658, "step": 27919 }, { "epoch": 0.5844427698233274, "grad_norm": 0.2957150340080261, "learning_rate": 0.0001818789276894834, "loss": 11.6724, "step": 27920 }, { "epoch": 0.5844637025872896, "grad_norm": 0.34149789810180664, "learning_rate": 0.000181877668949455, "loss": 11.6818, "step": 27921 }, { "epoch": 0.5844846353512517, "grad_norm": 0.2696169912815094, "learning_rate": 0.00018187641017006637, "loss": 11.6799, "step": 27922 }, { "epoch": 0.5845055681152139, "grad_norm": 0.2605540156364441, "learning_rate": 0.00018187515135131816, "loss": 11.6828, "step": 27923 }, { "epoch": 0.5845265008791761, "grad_norm": 0.2865141034126282, "learning_rate": 0.00018187389249321089, "loss": 11.6737, "step": 27924 }, { "epoch": 0.5845474336431382, "grad_norm": 0.26524633169174194, "learning_rate": 0.00018187263359574515, "loss": 11.6635, "step": 27925 }, { "epoch": 0.5845683664071004, "grad_norm": 0.28782737255096436, "learning_rate": 0.00018187137465892165, "loss": 11.6572, "step": 27926 }, { "epoch": 0.5845892991710625, "grad_norm": 0.24424836039543152, "learning_rate": 0.00018187011568274094, "loss": 11.677, "step": 27927 }, { "epoch": 0.5846102319350247, "grad_norm": 0.3519025146961212, "learning_rate": 0.0001818688566672036, "loss": 11.6937, "step": 27928 }, { "epoch": 0.5846311646989869, "grad_norm": 0.37374797463417053, "learning_rate": 0.0001818675976123103, "loss": 11.6827, "step": 27929 }, { "epoch": 0.584652097462949, "grad_norm": 0.3111783266067505, "learning_rate": 0.00018186633851806157, "loss": 11.6647, "step": 27930 }, { "epoch": 0.5846730302269112, "grad_norm": 0.26357683539390564, "learning_rate": 0.00018186507938445804, "loss": 11.662, "step": 27931 }, { "epoch": 0.5846939629908733, "grad_norm": 0.323176771402359, "learning_rate": 0.00018186382021150035, "loss": 11.6732, "step": 27932 }, { "epoch": 0.5847148957548355, "grad_norm": 0.25220203399658203, "learning_rate": 0.00018186256099918908, "loss": 11.6742, "step": 27933 }, { "epoch": 0.5847358285187976, "grad_norm": 0.49391797184944153, "learning_rate": 0.00018186130174752485, "loss": 11.6739, "step": 27934 }, { "epoch": 0.5847567612827598, "grad_norm": 0.3109073340892792, "learning_rate": 0.00018186004245650822, "loss": 11.6804, "step": 27935 }, { "epoch": 0.584777694046722, "grad_norm": 0.2773533761501312, "learning_rate": 0.00018185878312613983, "loss": 11.6591, "step": 27936 }, { "epoch": 0.5847986268106841, "grad_norm": 0.326078861951828, "learning_rate": 0.0001818575237564203, "loss": 11.6676, "step": 27937 }, { "epoch": 0.5848195595746463, "grad_norm": 0.292192280292511, "learning_rate": 0.0001818562643473502, "loss": 11.6749, "step": 27938 }, { "epoch": 0.5848404923386084, "grad_norm": 0.2662944495677948, "learning_rate": 0.00018185500489893017, "loss": 11.6671, "step": 27939 }, { "epoch": 0.5848614251025706, "grad_norm": 0.30315738916397095, "learning_rate": 0.0001818537454111608, "loss": 11.6667, "step": 27940 }, { "epoch": 0.5848823578665326, "grad_norm": 0.24262221157550812, "learning_rate": 0.0001818524858840427, "loss": 11.6666, "step": 27941 }, { "epoch": 0.5849032906304948, "grad_norm": 0.2361229807138443, "learning_rate": 0.00018185122631757643, "loss": 11.6655, "step": 27942 }, { "epoch": 0.584924223394457, "grad_norm": 0.31797492504119873, "learning_rate": 0.00018184996671176266, "loss": 11.6796, "step": 27943 }, { "epoch": 0.5849451561584191, "grad_norm": 0.34801965951919556, "learning_rate": 0.000181848707066602, "loss": 11.6625, "step": 27944 }, { "epoch": 0.5849660889223813, "grad_norm": 0.2934912145137787, "learning_rate": 0.00018184744738209498, "loss": 11.6841, "step": 27945 }, { "epoch": 0.5849870216863434, "grad_norm": 0.28813862800598145, "learning_rate": 0.0001818461876582423, "loss": 11.6659, "step": 27946 }, { "epoch": 0.5850079544503056, "grad_norm": 0.31671595573425293, "learning_rate": 0.00018184492789504443, "loss": 11.6704, "step": 27947 }, { "epoch": 0.5850288872142678, "grad_norm": 0.3724100887775421, "learning_rate": 0.00018184366809250214, "loss": 11.6791, "step": 27948 }, { "epoch": 0.5850498199782299, "grad_norm": 0.2943747043609619, "learning_rate": 0.00018184240825061597, "loss": 11.6883, "step": 27949 }, { "epoch": 0.5850707527421921, "grad_norm": 0.35681426525115967, "learning_rate": 0.00018184114836938646, "loss": 11.6889, "step": 27950 }, { "epoch": 0.5850916855061542, "grad_norm": 0.39842066168785095, "learning_rate": 0.0001818398884488143, "loss": 11.6723, "step": 27951 }, { "epoch": 0.5851126182701164, "grad_norm": 0.42622944712638855, "learning_rate": 0.00018183862848890006, "loss": 11.6672, "step": 27952 }, { "epoch": 0.5851335510340785, "grad_norm": 0.2967126667499542, "learning_rate": 0.00018183736848964436, "loss": 11.674, "step": 27953 }, { "epoch": 0.5851544837980407, "grad_norm": 0.2879374921321869, "learning_rate": 0.00018183610845104778, "loss": 11.6703, "step": 27954 }, { "epoch": 0.5851754165620029, "grad_norm": 0.3200448453426361, "learning_rate": 0.00018183484837311093, "loss": 11.6721, "step": 27955 }, { "epoch": 0.585196349325965, "grad_norm": 0.6909722089767456, "learning_rate": 0.00018183358825583446, "loss": 11.6699, "step": 27956 }, { "epoch": 0.5852172820899272, "grad_norm": 0.3528944253921509, "learning_rate": 0.00018183232809921893, "loss": 11.6721, "step": 27957 }, { "epoch": 0.5852382148538893, "grad_norm": 0.2982797622680664, "learning_rate": 0.00018183106790326496, "loss": 11.6675, "step": 27958 }, { "epoch": 0.5852591476178515, "grad_norm": 0.2888232469558716, "learning_rate": 0.00018182980766797316, "loss": 11.6501, "step": 27959 }, { "epoch": 0.5852800803818136, "grad_norm": 0.26982805132865906, "learning_rate": 0.00018182854739334415, "loss": 11.6732, "step": 27960 }, { "epoch": 0.5853010131457758, "grad_norm": 0.3323906660079956, "learning_rate": 0.00018182728707937846, "loss": 11.6743, "step": 27961 }, { "epoch": 0.585321945909738, "grad_norm": 0.33362096548080444, "learning_rate": 0.0001818260267260768, "loss": 11.6732, "step": 27962 }, { "epoch": 0.5853428786737, "grad_norm": 0.3486010730266571, "learning_rate": 0.0001818247663334397, "loss": 11.6583, "step": 27963 }, { "epoch": 0.5853638114376623, "grad_norm": 0.271617591381073, "learning_rate": 0.0001818235059014678, "loss": 11.6762, "step": 27964 }, { "epoch": 0.5853847442016243, "grad_norm": 0.28434234857559204, "learning_rate": 0.00018182224543016176, "loss": 11.6707, "step": 27965 }, { "epoch": 0.5854056769655865, "grad_norm": 0.3508417010307312, "learning_rate": 0.0001818209849195221, "loss": 11.6695, "step": 27966 }, { "epoch": 0.5854266097295487, "grad_norm": 0.2714918851852417, "learning_rate": 0.00018181972436954943, "loss": 11.6706, "step": 27967 }, { "epoch": 0.5854475424935108, "grad_norm": 0.2999404966831207, "learning_rate": 0.00018181846378024437, "loss": 11.6706, "step": 27968 }, { "epoch": 0.585468475257473, "grad_norm": 0.2787121534347534, "learning_rate": 0.00018181720315160754, "loss": 11.6499, "step": 27969 }, { "epoch": 0.5854894080214351, "grad_norm": 0.34814780950546265, "learning_rate": 0.00018181594248363955, "loss": 11.6689, "step": 27970 }, { "epoch": 0.5855103407853973, "grad_norm": 0.24801352620124817, "learning_rate": 0.000181814681776341, "loss": 11.6666, "step": 27971 }, { "epoch": 0.5855312735493594, "grad_norm": 0.31380248069763184, "learning_rate": 0.0001818134210297125, "loss": 11.6884, "step": 27972 }, { "epoch": 0.5855522063133216, "grad_norm": 0.27934885025024414, "learning_rate": 0.00018181216024375463, "loss": 11.6717, "step": 27973 }, { "epoch": 0.5855731390772838, "grad_norm": 0.2989882230758667, "learning_rate": 0.000181810899418468, "loss": 11.6707, "step": 27974 }, { "epoch": 0.5855940718412459, "grad_norm": 0.3576064109802246, "learning_rate": 0.00018180963855385327, "loss": 11.6814, "step": 27975 }, { "epoch": 0.5856150046052081, "grad_norm": 0.39510220289230347, "learning_rate": 0.00018180837764991098, "loss": 11.6721, "step": 27976 }, { "epoch": 0.5856359373691702, "grad_norm": 0.2696254253387451, "learning_rate": 0.00018180711670664178, "loss": 11.679, "step": 27977 }, { "epoch": 0.5856568701331324, "grad_norm": 0.3091244101524353, "learning_rate": 0.00018180585572404625, "loss": 11.6631, "step": 27978 }, { "epoch": 0.5856778028970945, "grad_norm": 0.3298517167568207, "learning_rate": 0.000181804594702125, "loss": 11.6775, "step": 27979 }, { "epoch": 0.5856987356610567, "grad_norm": 0.25692158937454224, "learning_rate": 0.00018180333364087867, "loss": 11.6667, "step": 27980 }, { "epoch": 0.5857196684250189, "grad_norm": 0.26994091272354126, "learning_rate": 0.00018180207254030783, "loss": 11.6583, "step": 27981 }, { "epoch": 0.585740601188981, "grad_norm": 0.30661243200302124, "learning_rate": 0.0001818008114004131, "loss": 11.6584, "step": 27982 }, { "epoch": 0.5857615339529432, "grad_norm": 0.3178669810295105, "learning_rate": 0.00018179955022119503, "loss": 11.6551, "step": 27983 }, { "epoch": 0.5857824667169053, "grad_norm": 0.2823962867259979, "learning_rate": 0.00018179828900265432, "loss": 11.6779, "step": 27984 }, { "epoch": 0.5858033994808675, "grad_norm": 0.2360466718673706, "learning_rate": 0.00018179702774479153, "loss": 11.6826, "step": 27985 }, { "epoch": 0.5858243322448297, "grad_norm": 0.3045201301574707, "learning_rate": 0.00018179576644760726, "loss": 11.6699, "step": 27986 }, { "epoch": 0.5858452650087917, "grad_norm": 0.2959233820438385, "learning_rate": 0.00018179450511110213, "loss": 11.6836, "step": 27987 }, { "epoch": 0.585866197772754, "grad_norm": 0.2849060297012329, "learning_rate": 0.00018179324373527675, "loss": 11.6619, "step": 27988 }, { "epoch": 0.585887130536716, "grad_norm": 0.31097736954689026, "learning_rate": 0.0001817919823201317, "loss": 11.6704, "step": 27989 }, { "epoch": 0.5859080633006782, "grad_norm": 0.26416686177253723, "learning_rate": 0.00018179072086566765, "loss": 11.6622, "step": 27990 }, { "epoch": 0.5859289960646403, "grad_norm": 0.25176283717155457, "learning_rate": 0.00018178945937188509, "loss": 11.6682, "step": 27991 }, { "epoch": 0.5859499288286025, "grad_norm": 0.3513520658016205, "learning_rate": 0.00018178819783878474, "loss": 11.6723, "step": 27992 }, { "epoch": 0.5859708615925647, "grad_norm": 0.3050026595592499, "learning_rate": 0.00018178693626636717, "loss": 11.657, "step": 27993 }, { "epoch": 0.5859917943565268, "grad_norm": 0.2769547402858734, "learning_rate": 0.00018178567465463298, "loss": 11.6798, "step": 27994 }, { "epoch": 0.586012727120489, "grad_norm": 0.28595489263534546, "learning_rate": 0.00018178441300358274, "loss": 11.6658, "step": 27995 }, { "epoch": 0.5860336598844511, "grad_norm": 0.2923218905925751, "learning_rate": 0.00018178315131321714, "loss": 11.6534, "step": 27996 }, { "epoch": 0.5860545926484133, "grad_norm": 0.2771896421909332, "learning_rate": 0.00018178188958353672, "loss": 11.6538, "step": 27997 }, { "epoch": 0.5860755254123754, "grad_norm": 0.2735807001590729, "learning_rate": 0.0001817806278145421, "loss": 11.6644, "step": 27998 }, { "epoch": 0.5860964581763376, "grad_norm": 0.2407388985157013, "learning_rate": 0.00018177936600623392, "loss": 11.6779, "step": 27999 }, { "epoch": 0.5861173909402998, "grad_norm": 0.26565247774124146, "learning_rate": 0.00018177810415861273, "loss": 11.6676, "step": 28000 }, { "epoch": 0.5861173909402998, "eval_loss": 11.671714782714844, "eval_runtime": 34.3359, "eval_samples_per_second": 27.988, "eval_steps_per_second": 7.019, "step": 28000 }, { "epoch": 0.5861383237042619, "grad_norm": 0.2844977378845215, "learning_rate": 0.0001817768422716792, "loss": 11.6612, "step": 28001 }, { "epoch": 0.5861592564682241, "grad_norm": 0.2879045307636261, "learning_rate": 0.00018177558034543386, "loss": 11.6566, "step": 28002 }, { "epoch": 0.5861801892321862, "grad_norm": 0.31676381826400757, "learning_rate": 0.00018177431837987736, "loss": 11.6814, "step": 28003 }, { "epoch": 0.5862011219961484, "grad_norm": 0.3033123016357422, "learning_rate": 0.00018177305637501036, "loss": 11.6539, "step": 28004 }, { "epoch": 0.5862220547601106, "grad_norm": 0.28413069248199463, "learning_rate": 0.00018177179433083337, "loss": 11.6818, "step": 28005 }, { "epoch": 0.5862429875240727, "grad_norm": 0.2599765360355377, "learning_rate": 0.00018177053224734707, "loss": 11.6583, "step": 28006 }, { "epoch": 0.5862639202880349, "grad_norm": 0.2949115037918091, "learning_rate": 0.000181769270124552, "loss": 11.6706, "step": 28007 }, { "epoch": 0.586284853051997, "grad_norm": 0.30438441038131714, "learning_rate": 0.00018176800796244883, "loss": 11.6902, "step": 28008 }, { "epoch": 0.5863057858159592, "grad_norm": 0.24391409754753113, "learning_rate": 0.0001817667457610381, "loss": 11.6662, "step": 28009 }, { "epoch": 0.5863267185799212, "grad_norm": 0.28794366121292114, "learning_rate": 0.0001817654835203205, "loss": 11.6778, "step": 28010 }, { "epoch": 0.5863476513438834, "grad_norm": 0.2734566628932953, "learning_rate": 0.00018176422124029657, "loss": 11.6709, "step": 28011 }, { "epoch": 0.5863685841078456, "grad_norm": 0.2601563632488251, "learning_rate": 0.00018176295892096696, "loss": 11.6839, "step": 28012 }, { "epoch": 0.5863895168718077, "grad_norm": 0.3034440577030182, "learning_rate": 0.00018176169656233224, "loss": 11.6863, "step": 28013 }, { "epoch": 0.5864104496357699, "grad_norm": 0.25575393438339233, "learning_rate": 0.00018176043416439304, "loss": 11.6634, "step": 28014 }, { "epoch": 0.586431382399732, "grad_norm": 0.2619331181049347, "learning_rate": 0.00018175917172714998, "loss": 11.675, "step": 28015 }, { "epoch": 0.5864523151636942, "grad_norm": 0.3171784579753876, "learning_rate": 0.00018175790925060363, "loss": 11.6814, "step": 28016 }, { "epoch": 0.5864732479276563, "grad_norm": 0.27320176362991333, "learning_rate": 0.0001817566467347546, "loss": 11.6916, "step": 28017 }, { "epoch": 0.5864941806916185, "grad_norm": 0.3529120087623596, "learning_rate": 0.00018175538417960355, "loss": 11.6765, "step": 28018 }, { "epoch": 0.5865151134555807, "grad_norm": 0.31380119919776917, "learning_rate": 0.000181754121585151, "loss": 11.6571, "step": 28019 }, { "epoch": 0.5865360462195428, "grad_norm": 0.33260586857795715, "learning_rate": 0.00018175285895139764, "loss": 11.6684, "step": 28020 }, { "epoch": 0.586556978983505, "grad_norm": 0.27599653601646423, "learning_rate": 0.00018175159627834404, "loss": 11.6573, "step": 28021 }, { "epoch": 0.5865779117474671, "grad_norm": 0.30899515748023987, "learning_rate": 0.00018175033356599077, "loss": 11.6946, "step": 28022 }, { "epoch": 0.5865988445114293, "grad_norm": 0.34046220779418945, "learning_rate": 0.0001817490708143385, "loss": 11.6518, "step": 28023 }, { "epoch": 0.5866197772753915, "grad_norm": 0.2890124022960663, "learning_rate": 0.00018174780802338784, "loss": 11.6883, "step": 28024 }, { "epoch": 0.5866407100393536, "grad_norm": 0.2455945760011673, "learning_rate": 0.00018174654519313935, "loss": 11.6764, "step": 28025 }, { "epoch": 0.5866616428033158, "grad_norm": 0.26993852853775024, "learning_rate": 0.00018174528232359368, "loss": 11.6758, "step": 28026 }, { "epoch": 0.5866825755672779, "grad_norm": 0.263589084148407, "learning_rate": 0.0001817440194147514, "loss": 11.6779, "step": 28027 }, { "epoch": 0.5867035083312401, "grad_norm": 0.28148260712623596, "learning_rate": 0.00018174275646661313, "loss": 11.6855, "step": 28028 }, { "epoch": 0.5867244410952022, "grad_norm": 0.25145161151885986, "learning_rate": 0.00018174149347917947, "loss": 11.6774, "step": 28029 }, { "epoch": 0.5867453738591644, "grad_norm": 0.3957221210002899, "learning_rate": 0.00018174023045245102, "loss": 11.6667, "step": 28030 }, { "epoch": 0.5867663066231266, "grad_norm": 0.31862184405326843, "learning_rate": 0.00018173896738642845, "loss": 11.6637, "step": 28031 }, { "epoch": 0.5867872393870887, "grad_norm": 0.2676486372947693, "learning_rate": 0.00018173770428111228, "loss": 11.6696, "step": 28032 }, { "epoch": 0.5868081721510509, "grad_norm": 0.26311105489730835, "learning_rate": 0.00018173644113650316, "loss": 11.6595, "step": 28033 }, { "epoch": 0.586829104915013, "grad_norm": 0.2588288486003876, "learning_rate": 0.00018173517795260172, "loss": 11.667, "step": 28034 }, { "epoch": 0.5868500376789751, "grad_norm": 0.3595130145549774, "learning_rate": 0.00018173391472940851, "loss": 11.6621, "step": 28035 }, { "epoch": 0.5868709704429372, "grad_norm": 0.31749969720840454, "learning_rate": 0.0001817326514669242, "loss": 11.6599, "step": 28036 }, { "epoch": 0.5868919032068994, "grad_norm": 0.29885533452033997, "learning_rate": 0.00018173138816514935, "loss": 11.678, "step": 28037 }, { "epoch": 0.5869128359708616, "grad_norm": 0.31010082364082336, "learning_rate": 0.0001817301248240846, "loss": 11.6833, "step": 28038 }, { "epoch": 0.5869337687348237, "grad_norm": 0.35765355825424194, "learning_rate": 0.0001817288614437305, "loss": 11.683, "step": 28039 }, { "epoch": 0.5869547014987859, "grad_norm": 0.27704671025276184, "learning_rate": 0.00018172759802408773, "loss": 11.6497, "step": 28040 }, { "epoch": 0.586975634262748, "grad_norm": 0.28457963466644287, "learning_rate": 0.00018172633456515686, "loss": 11.6697, "step": 28041 }, { "epoch": 0.5869965670267102, "grad_norm": 0.33730632066726685, "learning_rate": 0.00018172507106693853, "loss": 11.6719, "step": 28042 }, { "epoch": 0.5870174997906724, "grad_norm": 0.2884766757488251, "learning_rate": 0.00018172380752943328, "loss": 11.6542, "step": 28043 }, { "epoch": 0.5870384325546345, "grad_norm": 0.28481408953666687, "learning_rate": 0.00018172254395264177, "loss": 11.6645, "step": 28044 }, { "epoch": 0.5870593653185967, "grad_norm": 0.24925613403320312, "learning_rate": 0.00018172128033656462, "loss": 11.6627, "step": 28045 }, { "epoch": 0.5870802980825588, "grad_norm": 0.35826319456100464, "learning_rate": 0.00018172001668120236, "loss": 11.6753, "step": 28046 }, { "epoch": 0.587101230846521, "grad_norm": 0.24371309578418732, "learning_rate": 0.0001817187529865557, "loss": 11.6679, "step": 28047 }, { "epoch": 0.5871221636104831, "grad_norm": 0.3318624496459961, "learning_rate": 0.00018171748925262517, "loss": 11.672, "step": 28048 }, { "epoch": 0.5871430963744453, "grad_norm": 0.355683833360672, "learning_rate": 0.0001817162254794114, "loss": 11.6793, "step": 28049 }, { "epoch": 0.5871640291384075, "grad_norm": 0.3143472969532013, "learning_rate": 0.00018171496166691505, "loss": 11.683, "step": 28050 }, { "epoch": 0.5871849619023696, "grad_norm": 0.24701657891273499, "learning_rate": 0.00018171369781513663, "loss": 11.6592, "step": 28051 }, { "epoch": 0.5872058946663318, "grad_norm": 0.30118560791015625, "learning_rate": 0.00018171243392407682, "loss": 11.6576, "step": 28052 }, { "epoch": 0.5872268274302939, "grad_norm": 0.2891332507133484, "learning_rate": 0.00018171116999373622, "loss": 11.6779, "step": 28053 }, { "epoch": 0.5872477601942561, "grad_norm": 0.2781587541103363, "learning_rate": 0.00018170990602411538, "loss": 11.6789, "step": 28054 }, { "epoch": 0.5872686929582182, "grad_norm": 0.3755672872066498, "learning_rate": 0.000181708642015215, "loss": 11.6762, "step": 28055 }, { "epoch": 0.5872896257221804, "grad_norm": 0.26012104749679565, "learning_rate": 0.0001817073779670356, "loss": 11.6734, "step": 28056 }, { "epoch": 0.5873105584861426, "grad_norm": 0.29598507285118103, "learning_rate": 0.00018170611387957784, "loss": 11.6855, "step": 28057 }, { "epoch": 0.5873314912501046, "grad_norm": 0.36220434308052063, "learning_rate": 0.00018170484975284232, "loss": 11.6935, "step": 28058 }, { "epoch": 0.5873524240140668, "grad_norm": 0.3025674819946289, "learning_rate": 0.00018170358558682961, "loss": 11.6759, "step": 28059 }, { "epoch": 0.5873733567780289, "grad_norm": 0.2702501118183136, "learning_rate": 0.0001817023213815404, "loss": 11.6612, "step": 28060 }, { "epoch": 0.5873942895419911, "grad_norm": 0.2851322591304779, "learning_rate": 0.00018170105713697523, "loss": 11.6528, "step": 28061 }, { "epoch": 0.5874152223059533, "grad_norm": 0.2969639301300049, "learning_rate": 0.0001816997928531347, "loss": 11.6747, "step": 28062 }, { "epoch": 0.5874361550699154, "grad_norm": 0.3352283537387848, "learning_rate": 0.00018169852853001944, "loss": 11.6777, "step": 28063 }, { "epoch": 0.5874570878338776, "grad_norm": 0.2519354522228241, "learning_rate": 0.00018169726416763008, "loss": 11.6644, "step": 28064 }, { "epoch": 0.5874780205978397, "grad_norm": 0.3099772036075592, "learning_rate": 0.0001816959997659672, "loss": 11.6829, "step": 28065 }, { "epoch": 0.5874989533618019, "grad_norm": 0.3931761384010315, "learning_rate": 0.00018169473532503143, "loss": 11.6664, "step": 28066 }, { "epoch": 0.587519886125764, "grad_norm": 0.3192233145236969, "learning_rate": 0.00018169347084482339, "loss": 11.6488, "step": 28067 }, { "epoch": 0.5875408188897262, "grad_norm": 0.31784072518348694, "learning_rate": 0.00018169220632534362, "loss": 11.6581, "step": 28068 }, { "epoch": 0.5875617516536884, "grad_norm": 0.24064455926418304, "learning_rate": 0.00018169094176659275, "loss": 11.6729, "step": 28069 }, { "epoch": 0.5875826844176505, "grad_norm": 0.26124846935272217, "learning_rate": 0.00018168967716857145, "loss": 11.6849, "step": 28070 }, { "epoch": 0.5876036171816127, "grad_norm": 0.30558013916015625, "learning_rate": 0.00018168841253128027, "loss": 11.6887, "step": 28071 }, { "epoch": 0.5876245499455748, "grad_norm": 0.3380304276943207, "learning_rate": 0.00018168714785471982, "loss": 11.662, "step": 28072 }, { "epoch": 0.587645482709537, "grad_norm": 0.3101462721824646, "learning_rate": 0.00018168588313889075, "loss": 11.6609, "step": 28073 }, { "epoch": 0.5876664154734991, "grad_norm": 0.35330304503440857, "learning_rate": 0.0001816846183837936, "loss": 11.6765, "step": 28074 }, { "epoch": 0.5876873482374613, "grad_norm": 0.3337247967720032, "learning_rate": 0.00018168335358942903, "loss": 11.6699, "step": 28075 }, { "epoch": 0.5877082810014235, "grad_norm": 0.21244965493679047, "learning_rate": 0.00018168208875579765, "loss": 11.6655, "step": 28076 }, { "epoch": 0.5877292137653856, "grad_norm": 0.2783333957195282, "learning_rate": 0.00018168082388290006, "loss": 11.6748, "step": 28077 }, { "epoch": 0.5877501465293478, "grad_norm": 0.2731766998767853, "learning_rate": 0.00018167955897073682, "loss": 11.6688, "step": 28078 }, { "epoch": 0.5877710792933099, "grad_norm": 0.24504832923412323, "learning_rate": 0.00018167829401930863, "loss": 11.679, "step": 28079 }, { "epoch": 0.587792012057272, "grad_norm": 0.3205123841762543, "learning_rate": 0.000181677029028616, "loss": 11.6699, "step": 28080 }, { "epoch": 0.5878129448212343, "grad_norm": 0.28158533573150635, "learning_rate": 0.00018167576399865962, "loss": 11.6835, "step": 28081 }, { "epoch": 0.5878338775851963, "grad_norm": 0.26579543948173523, "learning_rate": 0.00018167449892944005, "loss": 11.6582, "step": 28082 }, { "epoch": 0.5878548103491585, "grad_norm": 0.29699864983558655, "learning_rate": 0.0001816732338209579, "loss": 11.6748, "step": 28083 }, { "epoch": 0.5878757431131206, "grad_norm": 0.35424789786338806, "learning_rate": 0.00018167196867321383, "loss": 11.6706, "step": 28084 }, { "epoch": 0.5878966758770828, "grad_norm": 0.31637245416641235, "learning_rate": 0.00018167070348620835, "loss": 11.6687, "step": 28085 }, { "epoch": 0.5879176086410449, "grad_norm": 0.3213335871696472, "learning_rate": 0.00018166943825994217, "loss": 11.6737, "step": 28086 }, { "epoch": 0.5879385414050071, "grad_norm": 0.2943051755428314, "learning_rate": 0.00018166817299441584, "loss": 11.6791, "step": 28087 }, { "epoch": 0.5879594741689693, "grad_norm": 0.2658640146255493, "learning_rate": 0.00018166690768963, "loss": 11.6604, "step": 28088 }, { "epoch": 0.5879804069329314, "grad_norm": 0.3250765800476074, "learning_rate": 0.00018166564234558524, "loss": 11.6685, "step": 28089 }, { "epoch": 0.5880013396968936, "grad_norm": 0.2352399230003357, "learning_rate": 0.00018166437696228214, "loss": 11.6591, "step": 28090 }, { "epoch": 0.5880222724608557, "grad_norm": 0.3328976035118103, "learning_rate": 0.00018166311153972135, "loss": 11.6629, "step": 28091 }, { "epoch": 0.5880432052248179, "grad_norm": 0.29950472712516785, "learning_rate": 0.00018166184607790347, "loss": 11.6615, "step": 28092 }, { "epoch": 0.58806413798878, "grad_norm": 0.2953917682170868, "learning_rate": 0.0001816605805768291, "loss": 11.6642, "step": 28093 }, { "epoch": 0.5880850707527422, "grad_norm": 0.29454341530799866, "learning_rate": 0.00018165931503649887, "loss": 11.6687, "step": 28094 }, { "epoch": 0.5881060035167044, "grad_norm": 0.33817416429519653, "learning_rate": 0.00018165804945691332, "loss": 11.6666, "step": 28095 }, { "epoch": 0.5881269362806665, "grad_norm": 0.314676970243454, "learning_rate": 0.00018165678383807315, "loss": 11.6768, "step": 28096 }, { "epoch": 0.5881478690446287, "grad_norm": 0.3503023684024811, "learning_rate": 0.00018165551817997893, "loss": 11.6933, "step": 28097 }, { "epoch": 0.5881688018085908, "grad_norm": 0.48505735397338867, "learning_rate": 0.00018165425248263127, "loss": 11.6721, "step": 28098 }, { "epoch": 0.588189734572553, "grad_norm": 0.30089932680130005, "learning_rate": 0.00018165298674603075, "loss": 11.6718, "step": 28099 }, { "epoch": 0.5882106673365151, "grad_norm": 0.3427795469760895, "learning_rate": 0.00018165172097017802, "loss": 11.6819, "step": 28100 }, { "epoch": 0.5882316001004773, "grad_norm": 0.2861281633377075, "learning_rate": 0.00018165045515507368, "loss": 11.673, "step": 28101 }, { "epoch": 0.5882525328644395, "grad_norm": 0.31793031096458435, "learning_rate": 0.0001816491893007183, "loss": 11.6598, "step": 28102 }, { "epoch": 0.5882734656284015, "grad_norm": 0.29887011647224426, "learning_rate": 0.00018164792340711255, "loss": 11.6701, "step": 28103 }, { "epoch": 0.5882943983923637, "grad_norm": 0.33644846081733704, "learning_rate": 0.00018164665747425697, "loss": 11.6716, "step": 28104 }, { "epoch": 0.5883153311563258, "grad_norm": 0.3472782373428345, "learning_rate": 0.00018164539150215223, "loss": 11.6793, "step": 28105 }, { "epoch": 0.588336263920288, "grad_norm": 0.42033180594444275, "learning_rate": 0.00018164412549079892, "loss": 11.6643, "step": 28106 }, { "epoch": 0.5883571966842502, "grad_norm": 0.30581092834472656, "learning_rate": 0.00018164285944019765, "loss": 11.6652, "step": 28107 }, { "epoch": 0.5883781294482123, "grad_norm": 0.2713635563850403, "learning_rate": 0.000181641593350349, "loss": 11.6671, "step": 28108 }, { "epoch": 0.5883990622121745, "grad_norm": 0.37839174270629883, "learning_rate": 0.0001816403272212536, "loss": 11.6827, "step": 28109 }, { "epoch": 0.5884199949761366, "grad_norm": 0.28908824920654297, "learning_rate": 0.00018163906105291206, "loss": 11.6722, "step": 28110 }, { "epoch": 0.5884409277400988, "grad_norm": 0.23889823257923126, "learning_rate": 0.00018163779484532498, "loss": 11.6757, "step": 28111 }, { "epoch": 0.5884618605040609, "grad_norm": 0.4009840190410614, "learning_rate": 0.000181636528598493, "loss": 11.6839, "step": 28112 }, { "epoch": 0.5884827932680231, "grad_norm": 0.2790420353412628, "learning_rate": 0.00018163526231241667, "loss": 11.6791, "step": 28113 }, { "epoch": 0.5885037260319853, "grad_norm": 0.2801447808742523, "learning_rate": 0.00018163399598709667, "loss": 11.6868, "step": 28114 }, { "epoch": 0.5885246587959474, "grad_norm": 0.26276275515556335, "learning_rate": 0.00018163272962253357, "loss": 11.6776, "step": 28115 }, { "epoch": 0.5885455915599096, "grad_norm": 0.28658753633499146, "learning_rate": 0.00018163146321872797, "loss": 11.672, "step": 28116 }, { "epoch": 0.5885665243238717, "grad_norm": 0.28526657819747925, "learning_rate": 0.00018163019677568047, "loss": 11.6607, "step": 28117 }, { "epoch": 0.5885874570878339, "grad_norm": 0.32680147886276245, "learning_rate": 0.00018162893029339172, "loss": 11.6643, "step": 28118 }, { "epoch": 0.588608389851796, "grad_norm": 0.3414723873138428, "learning_rate": 0.0001816276637718623, "loss": 11.663, "step": 28119 }, { "epoch": 0.5886293226157582, "grad_norm": 0.32541364431381226, "learning_rate": 0.00018162639721109285, "loss": 11.6823, "step": 28120 }, { "epoch": 0.5886502553797204, "grad_norm": 0.2595129907131195, "learning_rate": 0.00018162513061108392, "loss": 11.665, "step": 28121 }, { "epoch": 0.5886711881436825, "grad_norm": 0.30032843351364136, "learning_rate": 0.00018162386397183616, "loss": 11.6718, "step": 28122 }, { "epoch": 0.5886921209076447, "grad_norm": 0.26431581377983093, "learning_rate": 0.0001816225972933502, "loss": 11.6709, "step": 28123 }, { "epoch": 0.5887130536716068, "grad_norm": 0.2731528580188751, "learning_rate": 0.0001816213305756266, "loss": 11.6685, "step": 28124 }, { "epoch": 0.588733986435569, "grad_norm": 0.391674280166626, "learning_rate": 0.000181620063818666, "loss": 11.6693, "step": 28125 }, { "epoch": 0.5887549191995312, "grad_norm": 0.3327976167201996, "learning_rate": 0.00018161879702246897, "loss": 11.6619, "step": 28126 }, { "epoch": 0.5887758519634932, "grad_norm": 0.26822665333747864, "learning_rate": 0.0001816175301870362, "loss": 11.6473, "step": 28127 }, { "epoch": 0.5887967847274554, "grad_norm": 0.37059348821640015, "learning_rate": 0.0001816162633123682, "loss": 11.6624, "step": 28128 }, { "epoch": 0.5888177174914175, "grad_norm": 0.2995116114616394, "learning_rate": 0.00018161499639846563, "loss": 11.6569, "step": 28129 }, { "epoch": 0.5888386502553797, "grad_norm": 0.30180633068084717, "learning_rate": 0.00018161372944532916, "loss": 11.671, "step": 28130 }, { "epoch": 0.5888595830193418, "grad_norm": 0.2986830472946167, "learning_rate": 0.00018161246245295928, "loss": 11.671, "step": 28131 }, { "epoch": 0.588880515783304, "grad_norm": 0.2679503560066223, "learning_rate": 0.00018161119542135665, "loss": 11.6678, "step": 28132 }, { "epoch": 0.5889014485472662, "grad_norm": 0.334938645362854, "learning_rate": 0.0001816099283505219, "loss": 11.6661, "step": 28133 }, { "epoch": 0.5889223813112283, "grad_norm": 0.2622833549976349, "learning_rate": 0.00018160866124045558, "loss": 11.6739, "step": 28134 }, { "epoch": 0.5889433140751905, "grad_norm": 0.36648598313331604, "learning_rate": 0.0001816073940911584, "loss": 11.6731, "step": 28135 }, { "epoch": 0.5889642468391526, "grad_norm": 0.35758379101753235, "learning_rate": 0.00018160612690263088, "loss": 11.6815, "step": 28136 }, { "epoch": 0.5889851796031148, "grad_norm": 0.28187295794487, "learning_rate": 0.00018160485967487366, "loss": 11.674, "step": 28137 }, { "epoch": 0.5890061123670769, "grad_norm": 0.25634971261024475, "learning_rate": 0.00018160359240788738, "loss": 11.6584, "step": 28138 }, { "epoch": 0.5890270451310391, "grad_norm": 0.25723543763160706, "learning_rate": 0.00018160232510167255, "loss": 11.6776, "step": 28139 }, { "epoch": 0.5890479778950013, "grad_norm": 0.24842952191829681, "learning_rate": 0.00018160105775622993, "loss": 11.6655, "step": 28140 }, { "epoch": 0.5890689106589634, "grad_norm": 0.27740946412086487, "learning_rate": 0.00018159979037156, "loss": 11.6838, "step": 28141 }, { "epoch": 0.5890898434229256, "grad_norm": 0.26766666769981384, "learning_rate": 0.0001815985229476634, "loss": 11.6806, "step": 28142 }, { "epoch": 0.5891107761868877, "grad_norm": 0.2870532274246216, "learning_rate": 0.00018159725548454078, "loss": 11.6713, "step": 28143 }, { "epoch": 0.5891317089508499, "grad_norm": 0.3051164448261261, "learning_rate": 0.00018159598798219272, "loss": 11.6853, "step": 28144 }, { "epoch": 0.5891526417148121, "grad_norm": 0.29101651906967163, "learning_rate": 0.00018159472044061985, "loss": 11.6807, "step": 28145 }, { "epoch": 0.5891735744787742, "grad_norm": 0.3148728609085083, "learning_rate": 0.00018159345285982274, "loss": 11.6722, "step": 28146 }, { "epoch": 0.5891945072427364, "grad_norm": 0.31422460079193115, "learning_rate": 0.00018159218523980205, "loss": 11.6601, "step": 28147 }, { "epoch": 0.5892154400066985, "grad_norm": 0.311686247587204, "learning_rate": 0.00018159091758055833, "loss": 11.6783, "step": 28148 }, { "epoch": 0.5892363727706607, "grad_norm": 0.29807254672050476, "learning_rate": 0.00018158964988209225, "loss": 11.6715, "step": 28149 }, { "epoch": 0.5892573055346227, "grad_norm": 0.2749881148338318, "learning_rate": 0.00018158838214440433, "loss": 11.6368, "step": 28150 }, { "epoch": 0.5892782382985849, "grad_norm": 0.25301530957221985, "learning_rate": 0.0001815871143674953, "loss": 11.6709, "step": 28151 }, { "epoch": 0.5892991710625471, "grad_norm": 0.41011735796928406, "learning_rate": 0.0001815858465513657, "loss": 11.6666, "step": 28152 }, { "epoch": 0.5893201038265092, "grad_norm": 0.25505033135414124, "learning_rate": 0.00018158457869601612, "loss": 11.6675, "step": 28153 }, { "epoch": 0.5893410365904714, "grad_norm": 0.24567848443984985, "learning_rate": 0.00018158331080144721, "loss": 11.6628, "step": 28154 }, { "epoch": 0.5893619693544335, "grad_norm": 0.23880314826965332, "learning_rate": 0.00018158204286765957, "loss": 11.677, "step": 28155 }, { "epoch": 0.5893829021183957, "grad_norm": 0.3077523708343506, "learning_rate": 0.00018158077489465382, "loss": 11.6707, "step": 28156 }, { "epoch": 0.5894038348823578, "grad_norm": 0.30727627873420715, "learning_rate": 0.00018157950688243055, "loss": 11.6732, "step": 28157 }, { "epoch": 0.58942476764632, "grad_norm": 0.35831865668296814, "learning_rate": 0.0001815782388309904, "loss": 11.6888, "step": 28158 }, { "epoch": 0.5894457004102822, "grad_norm": 0.279845267534256, "learning_rate": 0.0001815769707403339, "loss": 11.6676, "step": 28159 }, { "epoch": 0.5894666331742443, "grad_norm": 0.33176833391189575, "learning_rate": 0.00018157570261046176, "loss": 11.6743, "step": 28160 }, { "epoch": 0.5894875659382065, "grad_norm": 0.2866867482662201, "learning_rate": 0.00018157443444137452, "loss": 11.6672, "step": 28161 }, { "epoch": 0.5895084987021686, "grad_norm": 0.3680519759654999, "learning_rate": 0.0001815731662330728, "loss": 11.6762, "step": 28162 }, { "epoch": 0.5895294314661308, "grad_norm": 0.3280978500843048, "learning_rate": 0.00018157189798555728, "loss": 11.6663, "step": 28163 }, { "epoch": 0.589550364230093, "grad_norm": 0.3145679235458374, "learning_rate": 0.00018157062969882847, "loss": 11.6479, "step": 28164 }, { "epoch": 0.5895712969940551, "grad_norm": 0.2522890865802765, "learning_rate": 0.00018156936137288704, "loss": 11.6658, "step": 28165 }, { "epoch": 0.5895922297580173, "grad_norm": 0.30564960837364197, "learning_rate": 0.00018156809300773357, "loss": 11.6803, "step": 28166 }, { "epoch": 0.5896131625219794, "grad_norm": 0.2934028208255768, "learning_rate": 0.0001815668246033687, "loss": 11.6795, "step": 28167 }, { "epoch": 0.5896340952859416, "grad_norm": 0.39176779985427856, "learning_rate": 0.000181565556159793, "loss": 11.6768, "step": 28168 }, { "epoch": 0.5896550280499037, "grad_norm": 0.32609832286834717, "learning_rate": 0.00018156428767700713, "loss": 11.6671, "step": 28169 }, { "epoch": 0.5896759608138659, "grad_norm": 0.3576316833496094, "learning_rate": 0.00018156301915501165, "loss": 11.6616, "step": 28170 }, { "epoch": 0.5896968935778281, "grad_norm": 0.29046428203582764, "learning_rate": 0.00018156175059380722, "loss": 11.6748, "step": 28171 }, { "epoch": 0.5897178263417902, "grad_norm": 0.3912825584411621, "learning_rate": 0.0001815604819933944, "loss": 11.6811, "step": 28172 }, { "epoch": 0.5897387591057524, "grad_norm": 0.2692563831806183, "learning_rate": 0.00018155921335377383, "loss": 11.6759, "step": 28173 }, { "epoch": 0.5897596918697144, "grad_norm": 0.285743772983551, "learning_rate": 0.00018155794467494612, "loss": 11.6817, "step": 28174 }, { "epoch": 0.5897806246336766, "grad_norm": 0.35031986236572266, "learning_rate": 0.00018155667595691184, "loss": 11.69, "step": 28175 }, { "epoch": 0.5898015573976387, "grad_norm": 0.30054712295532227, "learning_rate": 0.00018155540719967165, "loss": 11.6735, "step": 28176 }, { "epoch": 0.5898224901616009, "grad_norm": 0.3092392086982727, "learning_rate": 0.00018155413840322614, "loss": 11.6803, "step": 28177 }, { "epoch": 0.5898434229255631, "grad_norm": 0.25242549180984497, "learning_rate": 0.00018155286956757593, "loss": 11.6819, "step": 28178 }, { "epoch": 0.5898643556895252, "grad_norm": 0.4128246307373047, "learning_rate": 0.00018155160069272162, "loss": 11.6795, "step": 28179 }, { "epoch": 0.5898852884534874, "grad_norm": 0.3137870132923126, "learning_rate": 0.0001815503317786638, "loss": 11.6812, "step": 28180 }, { "epoch": 0.5899062212174495, "grad_norm": 0.34952399134635925, "learning_rate": 0.00018154906282540312, "loss": 11.6494, "step": 28181 }, { "epoch": 0.5899271539814117, "grad_norm": 0.28964030742645264, "learning_rate": 0.0001815477938329402, "loss": 11.6741, "step": 28182 }, { "epoch": 0.5899480867453739, "grad_norm": 0.2576598823070526, "learning_rate": 0.00018154652480127556, "loss": 11.6776, "step": 28183 }, { "epoch": 0.589969019509336, "grad_norm": 0.2407297044992447, "learning_rate": 0.0001815452557304099, "loss": 11.6789, "step": 28184 }, { "epoch": 0.5899899522732982, "grad_norm": 0.24739763140678406, "learning_rate": 0.0001815439866203438, "loss": 11.6711, "step": 28185 }, { "epoch": 0.5900108850372603, "grad_norm": 0.27523159980773926, "learning_rate": 0.0001815427174710779, "loss": 11.664, "step": 28186 }, { "epoch": 0.5900318178012225, "grad_norm": 0.2869575321674347, "learning_rate": 0.00018154144828261273, "loss": 11.6721, "step": 28187 }, { "epoch": 0.5900527505651846, "grad_norm": 0.24362385272979736, "learning_rate": 0.000181540179054949, "loss": 11.6692, "step": 28188 }, { "epoch": 0.5900736833291468, "grad_norm": 0.39316561818122864, "learning_rate": 0.00018153890978808723, "loss": 11.6707, "step": 28189 }, { "epoch": 0.590094616093109, "grad_norm": 0.2861667275428772, "learning_rate": 0.0001815376404820281, "loss": 11.6749, "step": 28190 }, { "epoch": 0.5901155488570711, "grad_norm": 0.38782861828804016, "learning_rate": 0.00018153637113677218, "loss": 11.649, "step": 28191 }, { "epoch": 0.5901364816210333, "grad_norm": 0.257026731967926, "learning_rate": 0.0001815351017523201, "loss": 11.6738, "step": 28192 }, { "epoch": 0.5901574143849954, "grad_norm": 0.30159178376197815, "learning_rate": 0.00018153383232867245, "loss": 11.6641, "step": 28193 }, { "epoch": 0.5901783471489576, "grad_norm": 0.2988432049751282, "learning_rate": 0.00018153256286582986, "loss": 11.6781, "step": 28194 }, { "epoch": 0.5901992799129196, "grad_norm": 0.2815583348274231, "learning_rate": 0.00018153129336379294, "loss": 11.6615, "step": 28195 }, { "epoch": 0.5902202126768819, "grad_norm": 0.34126901626586914, "learning_rate": 0.00018153002382256228, "loss": 11.6832, "step": 28196 }, { "epoch": 0.590241145440844, "grad_norm": 0.27585139870643616, "learning_rate": 0.0001815287542421385, "loss": 11.6628, "step": 28197 }, { "epoch": 0.5902620782048061, "grad_norm": 0.24415116012096405, "learning_rate": 0.00018152748462252225, "loss": 11.6742, "step": 28198 }, { "epoch": 0.5902830109687683, "grad_norm": 0.25997626781463623, "learning_rate": 0.00018152621496371406, "loss": 11.6675, "step": 28199 }, { "epoch": 0.5903039437327304, "grad_norm": 0.2691202759742737, "learning_rate": 0.00018152494526571463, "loss": 11.6749, "step": 28200 }, { "epoch": 0.5903248764966926, "grad_norm": 0.2938084304332733, "learning_rate": 0.00018152367552852448, "loss": 11.6634, "step": 28201 }, { "epoch": 0.5903458092606548, "grad_norm": 0.3013341724872589, "learning_rate": 0.0001815224057521443, "loss": 11.6761, "step": 28202 }, { "epoch": 0.5903667420246169, "grad_norm": 0.27888140082359314, "learning_rate": 0.00018152113593657463, "loss": 11.6571, "step": 28203 }, { "epoch": 0.5903876747885791, "grad_norm": 0.255908340215683, "learning_rate": 0.00018151986608181612, "loss": 11.6643, "step": 28204 }, { "epoch": 0.5904086075525412, "grad_norm": 0.3279978334903717, "learning_rate": 0.00018151859618786942, "loss": 11.6818, "step": 28205 }, { "epoch": 0.5904295403165034, "grad_norm": 0.3018944263458252, "learning_rate": 0.00018151732625473508, "loss": 11.6815, "step": 28206 }, { "epoch": 0.5904504730804655, "grad_norm": 0.3462802469730377, "learning_rate": 0.0001815160562824137, "loss": 11.6616, "step": 28207 }, { "epoch": 0.5904714058444277, "grad_norm": 0.33892735838890076, "learning_rate": 0.00018151478627090594, "loss": 11.6834, "step": 28208 }, { "epoch": 0.5904923386083899, "grad_norm": 0.2936159074306488, "learning_rate": 0.0001815135162202124, "loss": 11.6688, "step": 28209 }, { "epoch": 0.590513271372352, "grad_norm": 0.2646109163761139, "learning_rate": 0.00018151224613033366, "loss": 11.6782, "step": 28210 }, { "epoch": 0.5905342041363142, "grad_norm": 0.2806214988231659, "learning_rate": 0.00018151097600127035, "loss": 11.6709, "step": 28211 }, { "epoch": 0.5905551369002763, "grad_norm": 0.2751355767250061, "learning_rate": 0.00018150970583302308, "loss": 11.6731, "step": 28212 }, { "epoch": 0.5905760696642385, "grad_norm": 0.2698003649711609, "learning_rate": 0.00018150843562559247, "loss": 11.6647, "step": 28213 }, { "epoch": 0.5905970024282006, "grad_norm": 0.3444160521030426, "learning_rate": 0.0001815071653789791, "loss": 11.6499, "step": 28214 }, { "epoch": 0.5906179351921628, "grad_norm": 0.2634965479373932, "learning_rate": 0.00018150589509318362, "loss": 11.6801, "step": 28215 }, { "epoch": 0.590638867956125, "grad_norm": 0.3066153824329376, "learning_rate": 0.00018150462476820663, "loss": 11.6708, "step": 28216 }, { "epoch": 0.5906598007200871, "grad_norm": 0.3822728991508484, "learning_rate": 0.0001815033544040487, "loss": 11.69, "step": 28217 }, { "epoch": 0.5906807334840493, "grad_norm": 0.3492628335952759, "learning_rate": 0.00018150208400071055, "loss": 11.6868, "step": 28218 }, { "epoch": 0.5907016662480113, "grad_norm": 0.30395346879959106, "learning_rate": 0.00018150081355819265, "loss": 11.6723, "step": 28219 }, { "epoch": 0.5907225990119735, "grad_norm": 0.3301815092563629, "learning_rate": 0.00018149954307649567, "loss": 11.6929, "step": 28220 }, { "epoch": 0.5907435317759357, "grad_norm": 0.289682537317276, "learning_rate": 0.00018149827255562023, "loss": 11.6597, "step": 28221 }, { "epoch": 0.5907644645398978, "grad_norm": 0.2290433645248413, "learning_rate": 0.000181497001995567, "loss": 11.6848, "step": 28222 }, { "epoch": 0.59078539730386, "grad_norm": 0.3200140595436096, "learning_rate": 0.00018149573139633645, "loss": 11.68, "step": 28223 }, { "epoch": 0.5908063300678221, "grad_norm": 0.2945154309272766, "learning_rate": 0.0001814944607579293, "loss": 11.6764, "step": 28224 }, { "epoch": 0.5908272628317843, "grad_norm": 0.31889843940734863, "learning_rate": 0.00018149319008034612, "loss": 11.6526, "step": 28225 }, { "epoch": 0.5908481955957464, "grad_norm": 0.28541526198387146, "learning_rate": 0.00018149191936358755, "loss": 11.6718, "step": 28226 }, { "epoch": 0.5908691283597086, "grad_norm": 0.2636885941028595, "learning_rate": 0.00018149064860765418, "loss": 11.6834, "step": 28227 }, { "epoch": 0.5908900611236708, "grad_norm": 0.2807482182979584, "learning_rate": 0.0001814893778125466, "loss": 11.6569, "step": 28228 }, { "epoch": 0.5909109938876329, "grad_norm": 0.2511969208717346, "learning_rate": 0.00018148810697826546, "loss": 11.6657, "step": 28229 }, { "epoch": 0.5909319266515951, "grad_norm": 0.29200753569602966, "learning_rate": 0.00018148683610481135, "loss": 11.6824, "step": 28230 }, { "epoch": 0.5909528594155572, "grad_norm": 0.34724554419517517, "learning_rate": 0.00018148556519218488, "loss": 11.6748, "step": 28231 }, { "epoch": 0.5909737921795194, "grad_norm": 0.30854031443595886, "learning_rate": 0.00018148429424038668, "loss": 11.6797, "step": 28232 }, { "epoch": 0.5909947249434815, "grad_norm": 0.2513480484485626, "learning_rate": 0.00018148302324941736, "loss": 11.6704, "step": 28233 }, { "epoch": 0.5910156577074437, "grad_norm": 0.2566414177417755, "learning_rate": 0.00018148175221927747, "loss": 11.6596, "step": 28234 }, { "epoch": 0.5910365904714059, "grad_norm": 0.38567906618118286, "learning_rate": 0.0001814804811499677, "loss": 11.681, "step": 28235 }, { "epoch": 0.591057523235368, "grad_norm": 0.2789197862148285, "learning_rate": 0.0001814792100414886, "loss": 11.6764, "step": 28236 }, { "epoch": 0.5910784559993302, "grad_norm": 0.2790331542491913, "learning_rate": 0.00018147793889384086, "loss": 11.6817, "step": 28237 }, { "epoch": 0.5910993887632923, "grad_norm": 0.33248332142829895, "learning_rate": 0.00018147666770702503, "loss": 11.6954, "step": 28238 }, { "epoch": 0.5911203215272545, "grad_norm": 0.25853413343429565, "learning_rate": 0.00018147539648104172, "loss": 11.6583, "step": 28239 }, { "epoch": 0.5911412542912167, "grad_norm": 0.2526535093784332, "learning_rate": 0.00018147412521589152, "loss": 11.6594, "step": 28240 }, { "epoch": 0.5911621870551788, "grad_norm": 0.33822962641716003, "learning_rate": 0.00018147285391157514, "loss": 11.6742, "step": 28241 }, { "epoch": 0.591183119819141, "grad_norm": 0.2797664701938629, "learning_rate": 0.0001814715825680931, "loss": 11.6659, "step": 28242 }, { "epoch": 0.591204052583103, "grad_norm": 0.2762809693813324, "learning_rate": 0.00018147031118544602, "loss": 11.6829, "step": 28243 }, { "epoch": 0.5912249853470652, "grad_norm": 0.3297036588191986, "learning_rate": 0.00018146903976363454, "loss": 11.6697, "step": 28244 }, { "epoch": 0.5912459181110273, "grad_norm": 0.27507683634757996, "learning_rate": 0.00018146776830265925, "loss": 11.6728, "step": 28245 }, { "epoch": 0.5912668508749895, "grad_norm": 0.33727407455444336, "learning_rate": 0.00018146649680252078, "loss": 11.6852, "step": 28246 }, { "epoch": 0.5912877836389517, "grad_norm": 0.3373625874519348, "learning_rate": 0.00018146522526321976, "loss": 11.6724, "step": 28247 }, { "epoch": 0.5913087164029138, "grad_norm": 0.28106334805488586, "learning_rate": 0.00018146395368475674, "loss": 11.668, "step": 28248 }, { "epoch": 0.591329649166876, "grad_norm": 0.26503995060920715, "learning_rate": 0.00018146268206713237, "loss": 11.6444, "step": 28249 }, { "epoch": 0.5913505819308381, "grad_norm": 0.2416047751903534, "learning_rate": 0.00018146141041034728, "loss": 11.6624, "step": 28250 }, { "epoch": 0.5913715146948003, "grad_norm": 0.42868149280548096, "learning_rate": 0.000181460138714402, "loss": 11.6849, "step": 28251 }, { "epoch": 0.5913924474587624, "grad_norm": 0.23559771478176117, "learning_rate": 0.00018145886697929726, "loss": 11.663, "step": 28252 }, { "epoch": 0.5914133802227246, "grad_norm": 0.23787681758403778, "learning_rate": 0.00018145759520503358, "loss": 11.673, "step": 28253 }, { "epoch": 0.5914343129866868, "grad_norm": 0.2577640414237976, "learning_rate": 0.00018145632339161162, "loss": 11.6668, "step": 28254 }, { "epoch": 0.5914552457506489, "grad_norm": 0.30173447728157043, "learning_rate": 0.00018145505153903196, "loss": 11.6487, "step": 28255 }, { "epoch": 0.5914761785146111, "grad_norm": 0.3621421456336975, "learning_rate": 0.00018145377964729522, "loss": 11.6732, "step": 28256 }, { "epoch": 0.5914971112785732, "grad_norm": 0.32876071333885193, "learning_rate": 0.00018145250771640204, "loss": 11.6714, "step": 28257 }, { "epoch": 0.5915180440425354, "grad_norm": 0.3257555663585663, "learning_rate": 0.00018145123574635298, "loss": 11.6608, "step": 28258 }, { "epoch": 0.5915389768064976, "grad_norm": 0.28743118047714233, "learning_rate": 0.00018144996373714868, "loss": 11.6686, "step": 28259 }, { "epoch": 0.5915599095704597, "grad_norm": 0.2793492078781128, "learning_rate": 0.00018144869168878976, "loss": 11.6848, "step": 28260 }, { "epoch": 0.5915808423344219, "grad_norm": 0.3185974061489105, "learning_rate": 0.0001814474196012768, "loss": 11.6748, "step": 28261 }, { "epoch": 0.591601775098384, "grad_norm": 0.33397728204727173, "learning_rate": 0.00018144614747461047, "loss": 11.6695, "step": 28262 }, { "epoch": 0.5916227078623462, "grad_norm": 0.35302025079727173, "learning_rate": 0.0001814448753087913, "loss": 11.6732, "step": 28263 }, { "epoch": 0.5916436406263083, "grad_norm": 0.40047886967658997, "learning_rate": 0.00018144360310381997, "loss": 11.6863, "step": 28264 }, { "epoch": 0.5916645733902705, "grad_norm": 0.30513039231300354, "learning_rate": 0.0001814423308596971, "loss": 11.6786, "step": 28265 }, { "epoch": 0.5916855061542327, "grad_norm": 0.3325779438018799, "learning_rate": 0.00018144105857642324, "loss": 11.6582, "step": 28266 }, { "epoch": 0.5917064389181947, "grad_norm": 0.29842251539230347, "learning_rate": 0.000181439786253999, "loss": 11.6733, "step": 28267 }, { "epoch": 0.5917273716821569, "grad_norm": 0.3037995398044586, "learning_rate": 0.00018143851389242508, "loss": 11.6596, "step": 28268 }, { "epoch": 0.591748304446119, "grad_norm": 0.2816685140132904, "learning_rate": 0.000181437241491702, "loss": 11.663, "step": 28269 }, { "epoch": 0.5917692372100812, "grad_norm": 0.23999878764152527, "learning_rate": 0.00018143596905183042, "loss": 11.674, "step": 28270 }, { "epoch": 0.5917901699740433, "grad_norm": 0.32722008228302, "learning_rate": 0.00018143469657281092, "loss": 11.685, "step": 28271 }, { "epoch": 0.5918111027380055, "grad_norm": 0.2605511546134949, "learning_rate": 0.00018143342405464415, "loss": 11.671, "step": 28272 }, { "epoch": 0.5918320355019677, "grad_norm": 0.30185288190841675, "learning_rate": 0.0001814321514973307, "loss": 11.6848, "step": 28273 }, { "epoch": 0.5918529682659298, "grad_norm": 0.2801993787288666, "learning_rate": 0.0001814308789008712, "loss": 11.6476, "step": 28274 }, { "epoch": 0.591873901029892, "grad_norm": 0.3382042348384857, "learning_rate": 0.0001814296062652662, "loss": 11.6871, "step": 28275 }, { "epoch": 0.5918948337938541, "grad_norm": 0.2482789158821106, "learning_rate": 0.00018142833359051637, "loss": 11.6673, "step": 28276 }, { "epoch": 0.5919157665578163, "grad_norm": 0.3146200180053711, "learning_rate": 0.0001814270608766223, "loss": 11.6759, "step": 28277 }, { "epoch": 0.5919366993217784, "grad_norm": 0.31561774015426636, "learning_rate": 0.00018142578812358463, "loss": 11.6629, "step": 28278 }, { "epoch": 0.5919576320857406, "grad_norm": 0.25585559010505676, "learning_rate": 0.00018142451533140395, "loss": 11.6683, "step": 28279 }, { "epoch": 0.5919785648497028, "grad_norm": 0.265537291765213, "learning_rate": 0.00018142324250008086, "loss": 11.6809, "step": 28280 }, { "epoch": 0.5919994976136649, "grad_norm": 0.2983202636241913, "learning_rate": 0.000181421969629616, "loss": 11.6809, "step": 28281 }, { "epoch": 0.5920204303776271, "grad_norm": 0.2768155336380005, "learning_rate": 0.00018142069672000995, "loss": 11.6706, "step": 28282 }, { "epoch": 0.5920413631415892, "grad_norm": 0.3012121617794037, "learning_rate": 0.00018141942377126338, "loss": 11.6524, "step": 28283 }, { "epoch": 0.5920622959055514, "grad_norm": 0.3550570011138916, "learning_rate": 0.00018141815078337684, "loss": 11.6681, "step": 28284 }, { "epoch": 0.5920832286695136, "grad_norm": 0.3298608660697937, "learning_rate": 0.00018141687775635095, "loss": 11.6743, "step": 28285 }, { "epoch": 0.5921041614334757, "grad_norm": 0.4340456426143646, "learning_rate": 0.00018141560469018633, "loss": 11.6724, "step": 28286 }, { "epoch": 0.5921250941974379, "grad_norm": 0.30648988485336304, "learning_rate": 0.00018141433158488362, "loss": 11.6768, "step": 28287 }, { "epoch": 0.5921460269614, "grad_norm": 0.3666117787361145, "learning_rate": 0.00018141305844044338, "loss": 11.6736, "step": 28288 }, { "epoch": 0.5921669597253622, "grad_norm": 0.3090612590312958, "learning_rate": 0.0001814117852568663, "loss": 11.6683, "step": 28289 }, { "epoch": 0.5921878924893242, "grad_norm": 0.2688119113445282, "learning_rate": 0.00018141051203415288, "loss": 11.6678, "step": 28290 }, { "epoch": 0.5922088252532864, "grad_norm": 0.3627067506313324, "learning_rate": 0.00018140923877230385, "loss": 11.6855, "step": 28291 }, { "epoch": 0.5922297580172486, "grad_norm": 0.28910210728645325, "learning_rate": 0.00018140796547131975, "loss": 11.6738, "step": 28292 }, { "epoch": 0.5922506907812107, "grad_norm": 0.25928136706352234, "learning_rate": 0.0001814066921312012, "loss": 11.6766, "step": 28293 }, { "epoch": 0.5922716235451729, "grad_norm": 0.33136358857154846, "learning_rate": 0.00018140541875194882, "loss": 11.686, "step": 28294 }, { "epoch": 0.592292556309135, "grad_norm": 0.35687944293022156, "learning_rate": 0.00018140414533356322, "loss": 11.6766, "step": 28295 }, { "epoch": 0.5923134890730972, "grad_norm": 0.402465283870697, "learning_rate": 0.00018140287187604504, "loss": 11.6671, "step": 28296 }, { "epoch": 0.5923344218370593, "grad_norm": 0.42925986647605896, "learning_rate": 0.00018140159837939484, "loss": 11.6718, "step": 28297 }, { "epoch": 0.5923553546010215, "grad_norm": 0.42170432209968567, "learning_rate": 0.0001814003248436133, "loss": 11.6702, "step": 28298 }, { "epoch": 0.5923762873649837, "grad_norm": 0.5101824998855591, "learning_rate": 0.00018139905126870096, "loss": 11.6783, "step": 28299 }, { "epoch": 0.5923972201289458, "grad_norm": 0.3222818970680237, "learning_rate": 0.0001813977776546585, "loss": 11.6738, "step": 28300 }, { "epoch": 0.592418152892908, "grad_norm": 0.31722456216812134, "learning_rate": 0.00018139650400148644, "loss": 11.6668, "step": 28301 }, { "epoch": 0.5924390856568701, "grad_norm": 0.345945805311203, "learning_rate": 0.00018139523030918548, "loss": 11.6837, "step": 28302 }, { "epoch": 0.5924600184208323, "grad_norm": 0.3412506878376007, "learning_rate": 0.00018139395657775623, "loss": 11.6781, "step": 28303 }, { "epoch": 0.5924809511847945, "grad_norm": 0.2983958125114441, "learning_rate": 0.00018139268280719924, "loss": 11.6711, "step": 28304 }, { "epoch": 0.5925018839487566, "grad_norm": 0.6596164107322693, "learning_rate": 0.00018139140899751515, "loss": 11.6992, "step": 28305 }, { "epoch": 0.5925228167127188, "grad_norm": 0.3130573034286499, "learning_rate": 0.0001813901351487046, "loss": 11.6658, "step": 28306 }, { "epoch": 0.5925437494766809, "grad_norm": 0.2922334372997284, "learning_rate": 0.00018138886126076815, "loss": 11.6722, "step": 28307 }, { "epoch": 0.5925646822406431, "grad_norm": 0.3120070993900299, "learning_rate": 0.0001813875873337065, "loss": 11.6726, "step": 28308 }, { "epoch": 0.5925856150046052, "grad_norm": 0.25219517946243286, "learning_rate": 0.00018138631336752013, "loss": 11.6658, "step": 28309 }, { "epoch": 0.5926065477685674, "grad_norm": 0.3323718309402466, "learning_rate": 0.00018138503936220975, "loss": 11.6807, "step": 28310 }, { "epoch": 0.5926274805325296, "grad_norm": 0.29085320234298706, "learning_rate": 0.000181383765317776, "loss": 11.6785, "step": 28311 }, { "epoch": 0.5926484132964916, "grad_norm": 0.30986887216567993, "learning_rate": 0.00018138249123421943, "loss": 11.6734, "step": 28312 }, { "epoch": 0.5926693460604538, "grad_norm": 0.2798599302768707, "learning_rate": 0.00018138121711154062, "loss": 11.6616, "step": 28313 }, { "epoch": 0.5926902788244159, "grad_norm": 0.31379497051239014, "learning_rate": 0.00018137994294974025, "loss": 11.6724, "step": 28314 }, { "epoch": 0.5927112115883781, "grad_norm": 0.3663831353187561, "learning_rate": 0.00018137866874881893, "loss": 11.6616, "step": 28315 }, { "epoch": 0.5927321443523402, "grad_norm": 0.31850185990333557, "learning_rate": 0.00018137739450877724, "loss": 11.6808, "step": 28316 }, { "epoch": 0.5927530771163024, "grad_norm": 0.30614498257637024, "learning_rate": 0.0001813761202296158, "loss": 11.6866, "step": 28317 }, { "epoch": 0.5927740098802646, "grad_norm": 0.2788892090320587, "learning_rate": 0.00018137484591133522, "loss": 11.6811, "step": 28318 }, { "epoch": 0.5927949426442267, "grad_norm": 0.31850552558898926, "learning_rate": 0.00018137357155393615, "loss": 11.6692, "step": 28319 }, { "epoch": 0.5928158754081889, "grad_norm": 0.3161437511444092, "learning_rate": 0.00018137229715741918, "loss": 11.6707, "step": 28320 }, { "epoch": 0.592836808172151, "grad_norm": 0.26530852913856506, "learning_rate": 0.00018137102272178484, "loss": 11.672, "step": 28321 }, { "epoch": 0.5928577409361132, "grad_norm": 0.3571948707103729, "learning_rate": 0.0001813697482470339, "loss": 11.6621, "step": 28322 }, { "epoch": 0.5928786737000754, "grad_norm": 0.24579192698001862, "learning_rate": 0.00018136847373316685, "loss": 11.6793, "step": 28323 }, { "epoch": 0.5928996064640375, "grad_norm": 0.3002362847328186, "learning_rate": 0.00018136719918018438, "loss": 11.6804, "step": 28324 }, { "epoch": 0.5929205392279997, "grad_norm": 0.30033165216445923, "learning_rate": 0.00018136592458808704, "loss": 11.6617, "step": 28325 }, { "epoch": 0.5929414719919618, "grad_norm": 0.3203794062137604, "learning_rate": 0.00018136464995687548, "loss": 11.6689, "step": 28326 }, { "epoch": 0.592962404755924, "grad_norm": 0.2533135414123535, "learning_rate": 0.00018136337528655027, "loss": 11.6508, "step": 28327 }, { "epoch": 0.5929833375198861, "grad_norm": 0.2617821991443634, "learning_rate": 0.00018136210057711208, "loss": 11.6759, "step": 28328 }, { "epoch": 0.5930042702838483, "grad_norm": 0.30596745014190674, "learning_rate": 0.0001813608258285615, "loss": 11.6704, "step": 28329 }, { "epoch": 0.5930252030478105, "grad_norm": 0.4219071567058563, "learning_rate": 0.00018135955104089913, "loss": 11.6876, "step": 28330 }, { "epoch": 0.5930461358117726, "grad_norm": 0.3128439784049988, "learning_rate": 0.00018135827621412564, "loss": 11.6701, "step": 28331 }, { "epoch": 0.5930670685757348, "grad_norm": 0.2778838276863098, "learning_rate": 0.00018135700134824155, "loss": 11.6706, "step": 28332 }, { "epoch": 0.5930880013396969, "grad_norm": 0.2849788963794708, "learning_rate": 0.00018135572644324753, "loss": 11.6672, "step": 28333 }, { "epoch": 0.5931089341036591, "grad_norm": 0.30970120429992676, "learning_rate": 0.0001813544514991442, "loss": 11.6795, "step": 28334 }, { "epoch": 0.5931298668676211, "grad_norm": 0.31660282611846924, "learning_rate": 0.00018135317651593212, "loss": 11.6535, "step": 28335 }, { "epoch": 0.5931507996315833, "grad_norm": 0.414573609828949, "learning_rate": 0.00018135190149361194, "loss": 11.6792, "step": 28336 }, { "epoch": 0.5931717323955455, "grad_norm": 0.3140973746776581, "learning_rate": 0.00018135062643218427, "loss": 11.6772, "step": 28337 }, { "epoch": 0.5931926651595076, "grad_norm": 0.28045737743377686, "learning_rate": 0.00018134935133164975, "loss": 11.6574, "step": 28338 }, { "epoch": 0.5932135979234698, "grad_norm": 0.3084077537059784, "learning_rate": 0.00018134807619200896, "loss": 11.6463, "step": 28339 }, { "epoch": 0.5932345306874319, "grad_norm": 0.2733977735042572, "learning_rate": 0.00018134680101326248, "loss": 11.6828, "step": 28340 }, { "epoch": 0.5932554634513941, "grad_norm": 0.30239611864089966, "learning_rate": 0.00018134552579541103, "loss": 11.6761, "step": 28341 }, { "epoch": 0.5932763962153563, "grad_norm": 0.33045363426208496, "learning_rate": 0.00018134425053845508, "loss": 11.6792, "step": 28342 }, { "epoch": 0.5932973289793184, "grad_norm": 0.28660279512405396, "learning_rate": 0.00018134297524239538, "loss": 11.6719, "step": 28343 }, { "epoch": 0.5933182617432806, "grad_norm": 0.3490206301212311, "learning_rate": 0.00018134169990723247, "loss": 11.6686, "step": 28344 }, { "epoch": 0.5933391945072427, "grad_norm": 0.2989095449447632, "learning_rate": 0.00018134042453296696, "loss": 11.65, "step": 28345 }, { "epoch": 0.5933601272712049, "grad_norm": 0.3420912027359009, "learning_rate": 0.0001813391491195995, "loss": 11.6804, "step": 28346 }, { "epoch": 0.593381060035167, "grad_norm": 0.4173714518547058, "learning_rate": 0.00018133787366713064, "loss": 11.6738, "step": 28347 }, { "epoch": 0.5934019927991292, "grad_norm": 0.3023518919944763, "learning_rate": 0.00018133659817556107, "loss": 11.6617, "step": 28348 }, { "epoch": 0.5934229255630914, "grad_norm": 0.29332733154296875, "learning_rate": 0.00018133532264489135, "loss": 11.6586, "step": 28349 }, { "epoch": 0.5934438583270535, "grad_norm": 0.30670833587646484, "learning_rate": 0.0001813340470751221, "loss": 11.6476, "step": 28350 }, { "epoch": 0.5934647910910157, "grad_norm": 0.2868114709854126, "learning_rate": 0.00018133277146625398, "loss": 11.676, "step": 28351 }, { "epoch": 0.5934857238549778, "grad_norm": 0.31528645753860474, "learning_rate": 0.00018133149581828754, "loss": 11.6825, "step": 28352 }, { "epoch": 0.59350665661894, "grad_norm": 0.308285117149353, "learning_rate": 0.00018133022013122342, "loss": 11.6901, "step": 28353 }, { "epoch": 0.5935275893829021, "grad_norm": 0.32290157675743103, "learning_rate": 0.00018132894440506223, "loss": 11.6675, "step": 28354 }, { "epoch": 0.5935485221468643, "grad_norm": 0.3389681279659271, "learning_rate": 0.0001813276686398046, "loss": 11.6911, "step": 28355 }, { "epoch": 0.5935694549108265, "grad_norm": 0.3945484161376953, "learning_rate": 0.00018132639283545114, "loss": 11.672, "step": 28356 }, { "epoch": 0.5935903876747886, "grad_norm": 0.3521304130554199, "learning_rate": 0.00018132511699200246, "loss": 11.6626, "step": 28357 }, { "epoch": 0.5936113204387508, "grad_norm": 0.3309985101222992, "learning_rate": 0.00018132384110945913, "loss": 11.6831, "step": 28358 }, { "epoch": 0.5936322532027128, "grad_norm": 0.2877942621707916, "learning_rate": 0.0001813225651878218, "loss": 11.6762, "step": 28359 }, { "epoch": 0.593653185966675, "grad_norm": 0.3022991120815277, "learning_rate": 0.0001813212892270911, "loss": 11.6775, "step": 28360 }, { "epoch": 0.5936741187306372, "grad_norm": 0.24451550841331482, "learning_rate": 0.00018132001322726765, "loss": 11.6726, "step": 28361 }, { "epoch": 0.5936950514945993, "grad_norm": 0.3191116154193878, "learning_rate": 0.000181318737188352, "loss": 11.6705, "step": 28362 }, { "epoch": 0.5937159842585615, "grad_norm": 0.4093761742115021, "learning_rate": 0.00018131746111034483, "loss": 11.6656, "step": 28363 }, { "epoch": 0.5937369170225236, "grad_norm": 0.2968229651451111, "learning_rate": 0.00018131618499324668, "loss": 11.6725, "step": 28364 }, { "epoch": 0.5937578497864858, "grad_norm": 0.5466075539588928, "learning_rate": 0.00018131490883705826, "loss": 11.5945, "step": 28365 }, { "epoch": 0.5937787825504479, "grad_norm": 0.3846698999404907, "learning_rate": 0.00018131363264178014, "loss": 11.6886, "step": 28366 }, { "epoch": 0.5937997153144101, "grad_norm": 0.2865208685398102, "learning_rate": 0.00018131235640741287, "loss": 11.6658, "step": 28367 }, { "epoch": 0.5938206480783723, "grad_norm": 0.3618716299533844, "learning_rate": 0.00018131108013395717, "loss": 11.6726, "step": 28368 }, { "epoch": 0.5938415808423344, "grad_norm": 0.3359701931476593, "learning_rate": 0.00018130980382141361, "loss": 11.6971, "step": 28369 }, { "epoch": 0.5938625136062966, "grad_norm": 0.37832897901535034, "learning_rate": 0.00018130852746978278, "loss": 11.6819, "step": 28370 }, { "epoch": 0.5938834463702587, "grad_norm": 0.31415826082229614, "learning_rate": 0.0001813072510790653, "loss": 11.6639, "step": 28371 }, { "epoch": 0.5939043791342209, "grad_norm": 0.3250925540924072, "learning_rate": 0.0001813059746492618, "loss": 11.6538, "step": 28372 }, { "epoch": 0.593925311898183, "grad_norm": 0.2612557113170624, "learning_rate": 0.0001813046981803729, "loss": 11.6704, "step": 28373 }, { "epoch": 0.5939462446621452, "grad_norm": 0.3456374406814575, "learning_rate": 0.0001813034216723992, "loss": 11.6813, "step": 28374 }, { "epoch": 0.5939671774261074, "grad_norm": 0.30885276198387146, "learning_rate": 0.0001813021451253413, "loss": 11.666, "step": 28375 }, { "epoch": 0.5939881101900695, "grad_norm": 0.3195856213569641, "learning_rate": 0.00018130086853919986, "loss": 11.6676, "step": 28376 }, { "epoch": 0.5940090429540317, "grad_norm": 0.2891554534435272, "learning_rate": 0.00018129959191397546, "loss": 11.6776, "step": 28377 }, { "epoch": 0.5940299757179938, "grad_norm": 0.3439587652683258, "learning_rate": 0.0001812983152496687, "loss": 11.6852, "step": 28378 }, { "epoch": 0.594050908481956, "grad_norm": 0.3180152475833893, "learning_rate": 0.0001812970385462802, "loss": 11.6775, "step": 28379 }, { "epoch": 0.5940718412459182, "grad_norm": 0.25181829929351807, "learning_rate": 0.00018129576180381062, "loss": 11.6762, "step": 28380 }, { "epoch": 0.5940927740098803, "grad_norm": 0.2325437217950821, "learning_rate": 0.00018129448502226048, "loss": 11.6811, "step": 28381 }, { "epoch": 0.5941137067738425, "grad_norm": 0.29502466320991516, "learning_rate": 0.00018129320820163048, "loss": 11.6838, "step": 28382 }, { "epoch": 0.5941346395378045, "grad_norm": 0.356294184923172, "learning_rate": 0.00018129193134192124, "loss": 11.671, "step": 28383 }, { "epoch": 0.5941555723017667, "grad_norm": 0.2924717366695404, "learning_rate": 0.00018129065444313328, "loss": 11.6614, "step": 28384 }, { "epoch": 0.5941765050657288, "grad_norm": 0.4539567530155182, "learning_rate": 0.00018128937750526734, "loss": 11.6786, "step": 28385 }, { "epoch": 0.594197437829691, "grad_norm": 0.3814094662666321, "learning_rate": 0.00018128810052832394, "loss": 11.661, "step": 28386 }, { "epoch": 0.5942183705936532, "grad_norm": 0.3358568251132965, "learning_rate": 0.00018128682351230372, "loss": 11.6765, "step": 28387 }, { "epoch": 0.5942393033576153, "grad_norm": 0.32147496938705444, "learning_rate": 0.00018128554645720725, "loss": 11.6724, "step": 28388 }, { "epoch": 0.5942602361215775, "grad_norm": 0.27966997027397156, "learning_rate": 0.00018128426936303525, "loss": 11.667, "step": 28389 }, { "epoch": 0.5942811688855396, "grad_norm": 0.28654244542121887, "learning_rate": 0.00018128299222978824, "loss": 11.6691, "step": 28390 }, { "epoch": 0.5943021016495018, "grad_norm": 0.2724568843841553, "learning_rate": 0.00018128171505746687, "loss": 11.685, "step": 28391 }, { "epoch": 0.5943230344134639, "grad_norm": 0.3916632831096649, "learning_rate": 0.00018128043784607176, "loss": 11.6796, "step": 28392 }, { "epoch": 0.5943439671774261, "grad_norm": 0.275359570980072, "learning_rate": 0.0001812791605956035, "loss": 11.683, "step": 28393 }, { "epoch": 0.5943648999413883, "grad_norm": 0.33678293228149414, "learning_rate": 0.00018127788330606274, "loss": 11.6751, "step": 28394 }, { "epoch": 0.5943858327053504, "grad_norm": 0.27068519592285156, "learning_rate": 0.0001812766059774501, "loss": 11.6907, "step": 28395 }, { "epoch": 0.5944067654693126, "grad_norm": 0.5014940500259399, "learning_rate": 0.0001812753286097661, "loss": 11.6976, "step": 28396 }, { "epoch": 0.5944276982332747, "grad_norm": 0.2715507745742798, "learning_rate": 0.00018127405120301146, "loss": 11.6666, "step": 28397 }, { "epoch": 0.5944486309972369, "grad_norm": 0.29442694783210754, "learning_rate": 0.00018127277375718675, "loss": 11.6696, "step": 28398 }, { "epoch": 0.5944695637611991, "grad_norm": 0.26169028878211975, "learning_rate": 0.00018127149627229257, "loss": 11.6754, "step": 28399 }, { "epoch": 0.5944904965251612, "grad_norm": 0.27022960782051086, "learning_rate": 0.00018127021874832957, "loss": 11.6836, "step": 28400 }, { "epoch": 0.5945114292891234, "grad_norm": 0.32215002179145813, "learning_rate": 0.00018126894118529834, "loss": 11.6849, "step": 28401 }, { "epoch": 0.5945323620530855, "grad_norm": 0.3198868930339813, "learning_rate": 0.00018126766358319952, "loss": 11.6649, "step": 28402 }, { "epoch": 0.5945532948170477, "grad_norm": 0.38769662380218506, "learning_rate": 0.0001812663859420337, "loss": 11.6826, "step": 28403 }, { "epoch": 0.5945742275810098, "grad_norm": 0.2795373201370239, "learning_rate": 0.00018126510826180147, "loss": 11.6758, "step": 28404 }, { "epoch": 0.594595160344972, "grad_norm": 0.2596575617790222, "learning_rate": 0.00018126383054250348, "loss": 11.6745, "step": 28405 }, { "epoch": 0.5946160931089342, "grad_norm": 0.2473028153181076, "learning_rate": 0.00018126255278414036, "loss": 11.6685, "step": 28406 }, { "epoch": 0.5946370258728962, "grad_norm": 0.2825738191604614, "learning_rate": 0.00018126127498671273, "loss": 11.6507, "step": 28407 }, { "epoch": 0.5946579586368584, "grad_norm": 0.27604007720947266, "learning_rate": 0.00018125999715022114, "loss": 11.6672, "step": 28408 }, { "epoch": 0.5946788914008205, "grad_norm": 0.2886159121990204, "learning_rate": 0.00018125871927466623, "loss": 11.6837, "step": 28409 }, { "epoch": 0.5946998241647827, "grad_norm": 0.2794337570667267, "learning_rate": 0.00018125744136004865, "loss": 11.6636, "step": 28410 }, { "epoch": 0.5947207569287448, "grad_norm": 0.26427531242370605, "learning_rate": 0.00018125616340636898, "loss": 11.6723, "step": 28411 }, { "epoch": 0.594741689692707, "grad_norm": 0.245612233877182, "learning_rate": 0.00018125488541362783, "loss": 11.6584, "step": 28412 }, { "epoch": 0.5947626224566692, "grad_norm": 0.2697252035140991, "learning_rate": 0.00018125360738182583, "loss": 11.6783, "step": 28413 }, { "epoch": 0.5947835552206313, "grad_norm": 0.44282180070877075, "learning_rate": 0.0001812523293109636, "loss": 11.6734, "step": 28414 }, { "epoch": 0.5948044879845935, "grad_norm": 0.3287797272205353, "learning_rate": 0.00018125105120104175, "loss": 11.6726, "step": 28415 }, { "epoch": 0.5948254207485556, "grad_norm": 0.260521799325943, "learning_rate": 0.0001812497730520609, "loss": 11.6666, "step": 28416 }, { "epoch": 0.5948463535125178, "grad_norm": 0.31699174642562866, "learning_rate": 0.00018124849486402165, "loss": 11.6714, "step": 28417 }, { "epoch": 0.59486728627648, "grad_norm": 0.2581762373447418, "learning_rate": 0.00018124721663692462, "loss": 11.6561, "step": 28418 }, { "epoch": 0.5948882190404421, "grad_norm": 0.27615681290626526, "learning_rate": 0.00018124593837077043, "loss": 11.6613, "step": 28419 }, { "epoch": 0.5949091518044043, "grad_norm": 0.2971736490726471, "learning_rate": 0.00018124466006555967, "loss": 11.6812, "step": 28420 }, { "epoch": 0.5949300845683664, "grad_norm": 0.3352317810058594, "learning_rate": 0.000181243381721293, "loss": 11.6821, "step": 28421 }, { "epoch": 0.5949510173323286, "grad_norm": 0.2646719217300415, "learning_rate": 0.000181242103337971, "loss": 11.6909, "step": 28422 }, { "epoch": 0.5949719500962907, "grad_norm": 0.3358882963657379, "learning_rate": 0.00018124082491559427, "loss": 11.6831, "step": 28423 }, { "epoch": 0.5949928828602529, "grad_norm": 0.3548429012298584, "learning_rate": 0.00018123954645416344, "loss": 11.6743, "step": 28424 }, { "epoch": 0.5950138156242151, "grad_norm": 0.25078749656677246, "learning_rate": 0.00018123826795367917, "loss": 11.6553, "step": 28425 }, { "epoch": 0.5950347483881772, "grad_norm": 0.27984005212783813, "learning_rate": 0.00018123698941414202, "loss": 11.6589, "step": 28426 }, { "epoch": 0.5950556811521394, "grad_norm": 0.2673463821411133, "learning_rate": 0.00018123571083555263, "loss": 11.6635, "step": 28427 }, { "epoch": 0.5950766139161014, "grad_norm": 0.31274014711380005, "learning_rate": 0.0001812344322179116, "loss": 11.6825, "step": 28428 }, { "epoch": 0.5950975466800636, "grad_norm": 0.27697011828422546, "learning_rate": 0.00018123315356121956, "loss": 11.6698, "step": 28429 }, { "epoch": 0.5951184794440257, "grad_norm": 0.3004685044288635, "learning_rate": 0.00018123187486547711, "loss": 11.666, "step": 28430 }, { "epoch": 0.5951394122079879, "grad_norm": 0.2690775394439697, "learning_rate": 0.00018123059613068485, "loss": 11.6636, "step": 28431 }, { "epoch": 0.5951603449719501, "grad_norm": 0.3206275999546051, "learning_rate": 0.00018122931735684342, "loss": 11.6766, "step": 28432 }, { "epoch": 0.5951812777359122, "grad_norm": 0.39922311902046204, "learning_rate": 0.00018122803854395345, "loss": 11.673, "step": 28433 }, { "epoch": 0.5952022104998744, "grad_norm": 0.2973993718624115, "learning_rate": 0.0001812267596920155, "loss": 11.6657, "step": 28434 }, { "epoch": 0.5952231432638365, "grad_norm": 0.42569512128829956, "learning_rate": 0.00018122548080103026, "loss": 11.6888, "step": 28435 }, { "epoch": 0.5952440760277987, "grad_norm": 0.3781878650188446, "learning_rate": 0.00018122420187099828, "loss": 11.6758, "step": 28436 }, { "epoch": 0.5952650087917609, "grad_norm": 0.3210834562778473, "learning_rate": 0.0001812229229019202, "loss": 11.6685, "step": 28437 }, { "epoch": 0.595285941555723, "grad_norm": 0.320067435503006, "learning_rate": 0.00018122164389379664, "loss": 11.6963, "step": 28438 }, { "epoch": 0.5953068743196852, "grad_norm": 0.4029524326324463, "learning_rate": 0.0001812203648466282, "loss": 11.6689, "step": 28439 }, { "epoch": 0.5953278070836473, "grad_norm": 0.3479402959346771, "learning_rate": 0.00018121908576041552, "loss": 11.6877, "step": 28440 }, { "epoch": 0.5953487398476095, "grad_norm": 0.316614031791687, "learning_rate": 0.00018121780663515918, "loss": 11.6508, "step": 28441 }, { "epoch": 0.5953696726115716, "grad_norm": 0.2943216562271118, "learning_rate": 0.00018121652747085982, "loss": 11.6674, "step": 28442 }, { "epoch": 0.5953906053755338, "grad_norm": 0.23582398891448975, "learning_rate": 0.00018121524826751807, "loss": 11.6774, "step": 28443 }, { "epoch": 0.595411538139496, "grad_norm": 0.2901753783226013, "learning_rate": 0.00018121396902513447, "loss": 11.6677, "step": 28444 }, { "epoch": 0.5954324709034581, "grad_norm": 0.3212186396121979, "learning_rate": 0.00018121268974370972, "loss": 11.6568, "step": 28445 }, { "epoch": 0.5954534036674203, "grad_norm": 0.33736899495124817, "learning_rate": 0.00018121141042324438, "loss": 11.661, "step": 28446 }, { "epoch": 0.5954743364313824, "grad_norm": 0.30431607365608215, "learning_rate": 0.0001812101310637391, "loss": 11.675, "step": 28447 }, { "epoch": 0.5954952691953446, "grad_norm": 0.30236580967903137, "learning_rate": 0.0001812088516651945, "loss": 11.6496, "step": 28448 }, { "epoch": 0.5955162019593067, "grad_norm": 0.3224257528781891, "learning_rate": 0.00018120757222761118, "loss": 11.6575, "step": 28449 }, { "epoch": 0.5955371347232689, "grad_norm": 0.274264931678772, "learning_rate": 0.00018120629275098974, "loss": 11.6665, "step": 28450 }, { "epoch": 0.5955580674872311, "grad_norm": 0.24068038165569305, "learning_rate": 0.0001812050132353308, "loss": 11.6746, "step": 28451 }, { "epoch": 0.5955790002511931, "grad_norm": 0.296448677778244, "learning_rate": 0.000181203733680635, "loss": 11.6604, "step": 28452 }, { "epoch": 0.5955999330151553, "grad_norm": 0.3178585171699524, "learning_rate": 0.00018120245408690292, "loss": 11.6846, "step": 28453 }, { "epoch": 0.5956208657791174, "grad_norm": 0.23020176589488983, "learning_rate": 0.0001812011744541352, "loss": 11.6723, "step": 28454 }, { "epoch": 0.5956417985430796, "grad_norm": 0.3444289267063141, "learning_rate": 0.00018119989478233243, "loss": 11.6639, "step": 28455 }, { "epoch": 0.5956627313070418, "grad_norm": 0.3024255633354187, "learning_rate": 0.00018119861507149526, "loss": 11.6853, "step": 28456 }, { "epoch": 0.5956836640710039, "grad_norm": 0.3601229786872864, "learning_rate": 0.0001811973353216243, "loss": 11.6428, "step": 28457 }, { "epoch": 0.5957045968349661, "grad_norm": 0.3095749020576477, "learning_rate": 0.00018119605553272016, "loss": 11.674, "step": 28458 }, { "epoch": 0.5957255295989282, "grad_norm": 0.299468070268631, "learning_rate": 0.00018119477570478342, "loss": 11.6792, "step": 28459 }, { "epoch": 0.5957464623628904, "grad_norm": 0.2488546073436737, "learning_rate": 0.00018119349583781475, "loss": 11.6715, "step": 28460 }, { "epoch": 0.5957673951268525, "grad_norm": 0.26131758093833923, "learning_rate": 0.00018119221593181472, "loss": 11.6637, "step": 28461 }, { "epoch": 0.5957883278908147, "grad_norm": 0.34891852736473083, "learning_rate": 0.00018119093598678397, "loss": 11.6732, "step": 28462 }, { "epoch": 0.5958092606547769, "grad_norm": 0.2709062993526459, "learning_rate": 0.00018118965600272312, "loss": 11.6629, "step": 28463 }, { "epoch": 0.595830193418739, "grad_norm": 0.2888569235801697, "learning_rate": 0.00018118837597963275, "loss": 11.6634, "step": 28464 }, { "epoch": 0.5958511261827012, "grad_norm": 0.27676549553871155, "learning_rate": 0.00018118709591751354, "loss": 11.6668, "step": 28465 }, { "epoch": 0.5958720589466633, "grad_norm": 0.22005964815616608, "learning_rate": 0.00018118581581636603, "loss": 11.6596, "step": 28466 }, { "epoch": 0.5958929917106255, "grad_norm": 0.38349640369415283, "learning_rate": 0.00018118453567619088, "loss": 11.6812, "step": 28467 }, { "epoch": 0.5959139244745876, "grad_norm": 0.2628914415836334, "learning_rate": 0.00018118325549698868, "loss": 11.6468, "step": 28468 }, { "epoch": 0.5959348572385498, "grad_norm": 0.24581360816955566, "learning_rate": 0.0001811819752787601, "loss": 11.6657, "step": 28469 }, { "epoch": 0.595955790002512, "grad_norm": 0.34380707144737244, "learning_rate": 0.0001811806950215057, "loss": 11.6657, "step": 28470 }, { "epoch": 0.5959767227664741, "grad_norm": 0.22545337677001953, "learning_rate": 0.0001811794147252261, "loss": 11.6792, "step": 28471 }, { "epoch": 0.5959976555304363, "grad_norm": 0.34585532546043396, "learning_rate": 0.00018117813438992198, "loss": 11.6758, "step": 28472 }, { "epoch": 0.5960185882943984, "grad_norm": 0.23699074983596802, "learning_rate": 0.00018117685401559385, "loss": 11.685, "step": 28473 }, { "epoch": 0.5960395210583606, "grad_norm": 0.4892164170742035, "learning_rate": 0.00018117557360224236, "loss": 11.6797, "step": 28474 }, { "epoch": 0.5960604538223226, "grad_norm": 0.30091744661331177, "learning_rate": 0.00018117429314986821, "loss": 11.6825, "step": 28475 }, { "epoch": 0.5960813865862848, "grad_norm": 0.3742505609989166, "learning_rate": 0.0001811730126584719, "loss": 11.6945, "step": 28476 }, { "epoch": 0.596102319350247, "grad_norm": 0.30359214544296265, "learning_rate": 0.00018117173212805414, "loss": 11.674, "step": 28477 }, { "epoch": 0.5961232521142091, "grad_norm": 0.28002384305000305, "learning_rate": 0.00018117045155861548, "loss": 11.66, "step": 28478 }, { "epoch": 0.5961441848781713, "grad_norm": 0.2784847915172577, "learning_rate": 0.00018116917095015655, "loss": 11.669, "step": 28479 }, { "epoch": 0.5961651176421334, "grad_norm": 0.34350648522377014, "learning_rate": 0.00018116789030267797, "loss": 11.672, "step": 28480 }, { "epoch": 0.5961860504060956, "grad_norm": 0.2656499743461609, "learning_rate": 0.00018116660961618036, "loss": 11.6906, "step": 28481 }, { "epoch": 0.5962069831700578, "grad_norm": 0.2844569683074951, "learning_rate": 0.00018116532889066435, "loss": 11.6518, "step": 28482 }, { "epoch": 0.5962279159340199, "grad_norm": 0.2845041751861572, "learning_rate": 0.00018116404812613054, "loss": 11.667, "step": 28483 }, { "epoch": 0.5962488486979821, "grad_norm": 0.3405221700668335, "learning_rate": 0.00018116276732257954, "loss": 11.6805, "step": 28484 }, { "epoch": 0.5962697814619442, "grad_norm": 0.29422885179519653, "learning_rate": 0.00018116148648001198, "loss": 11.6588, "step": 28485 }, { "epoch": 0.5962907142259064, "grad_norm": 0.2567659914493561, "learning_rate": 0.00018116020559842843, "loss": 11.6617, "step": 28486 }, { "epoch": 0.5963116469898685, "grad_norm": 0.3492385745048523, "learning_rate": 0.0001811589246778296, "loss": 11.6685, "step": 28487 }, { "epoch": 0.5963325797538307, "grad_norm": 0.30239489674568176, "learning_rate": 0.000181157643718216, "loss": 11.681, "step": 28488 }, { "epoch": 0.5963535125177929, "grad_norm": 0.3718649744987488, "learning_rate": 0.0001811563627195883, "loss": 11.6678, "step": 28489 }, { "epoch": 0.596374445281755, "grad_norm": 0.36821362376213074, "learning_rate": 0.00018115508168194713, "loss": 11.6918, "step": 28490 }, { "epoch": 0.5963953780457172, "grad_norm": 0.27128899097442627, "learning_rate": 0.00018115380060529308, "loss": 11.6707, "step": 28491 }, { "epoch": 0.5964163108096793, "grad_norm": 0.2728317081928253, "learning_rate": 0.00018115251948962676, "loss": 11.6761, "step": 28492 }, { "epoch": 0.5964372435736415, "grad_norm": 0.28528741002082825, "learning_rate": 0.0001811512383349488, "loss": 11.646, "step": 28493 }, { "epoch": 0.5964581763376036, "grad_norm": 0.2995970845222473, "learning_rate": 0.00018114995714125983, "loss": 11.6718, "step": 28494 }, { "epoch": 0.5964791091015658, "grad_norm": 0.35290059447288513, "learning_rate": 0.0001811486759085604, "loss": 11.6498, "step": 28495 }, { "epoch": 0.596500041865528, "grad_norm": 0.43182817101478577, "learning_rate": 0.00018114739463685124, "loss": 11.6697, "step": 28496 }, { "epoch": 0.59652097462949, "grad_norm": 0.37045860290527344, "learning_rate": 0.00018114611332613288, "loss": 11.6645, "step": 28497 }, { "epoch": 0.5965419073934523, "grad_norm": 0.3918289244174957, "learning_rate": 0.00018114483197640595, "loss": 11.6532, "step": 28498 }, { "epoch": 0.5965628401574143, "grad_norm": 0.43349140882492065, "learning_rate": 0.00018114355058767106, "loss": 11.6676, "step": 28499 }, { "epoch": 0.5965837729213765, "grad_norm": 0.37333160638809204, "learning_rate": 0.00018114226915992888, "loss": 11.6852, "step": 28500 }, { "epoch": 0.5966047056853387, "grad_norm": 0.2953121066093445, "learning_rate": 0.00018114098769317995, "loss": 11.6685, "step": 28501 }, { "epoch": 0.5966256384493008, "grad_norm": 0.2668667733669281, "learning_rate": 0.00018113970618742492, "loss": 11.6791, "step": 28502 }, { "epoch": 0.596646571213263, "grad_norm": 0.34853026270866394, "learning_rate": 0.00018113842464266444, "loss": 11.6554, "step": 28503 }, { "epoch": 0.5966675039772251, "grad_norm": 0.258393794298172, "learning_rate": 0.00018113714305889906, "loss": 11.6742, "step": 28504 }, { "epoch": 0.5966884367411873, "grad_norm": 0.2639857530593872, "learning_rate": 0.00018113586143612943, "loss": 11.6716, "step": 28505 }, { "epoch": 0.5967093695051494, "grad_norm": 0.2593008875846863, "learning_rate": 0.00018113457977435616, "loss": 11.6662, "step": 28506 }, { "epoch": 0.5967303022691116, "grad_norm": 0.3087504506111145, "learning_rate": 0.0001811332980735799, "loss": 11.6687, "step": 28507 }, { "epoch": 0.5967512350330738, "grad_norm": 0.3178575336933136, "learning_rate": 0.00018113201633380123, "loss": 11.6563, "step": 28508 }, { "epoch": 0.5967721677970359, "grad_norm": 0.2735927104949951, "learning_rate": 0.00018113073455502078, "loss": 11.6595, "step": 28509 }, { "epoch": 0.5967931005609981, "grad_norm": 0.288274347782135, "learning_rate": 0.00018112945273723914, "loss": 11.6755, "step": 28510 }, { "epoch": 0.5968140333249602, "grad_norm": 0.2864026427268982, "learning_rate": 0.00018112817088045694, "loss": 11.68, "step": 28511 }, { "epoch": 0.5968349660889224, "grad_norm": 0.3926582634449005, "learning_rate": 0.00018112688898467484, "loss": 11.6691, "step": 28512 }, { "epoch": 0.5968558988528845, "grad_norm": 0.29202908277511597, "learning_rate": 0.0001811256070498934, "loss": 11.673, "step": 28513 }, { "epoch": 0.5968768316168467, "grad_norm": 0.31796926259994507, "learning_rate": 0.00018112432507611323, "loss": 11.6783, "step": 28514 }, { "epoch": 0.5968977643808089, "grad_norm": 0.26291024684906006, "learning_rate": 0.000181123043063335, "loss": 11.6881, "step": 28515 }, { "epoch": 0.596918697144771, "grad_norm": 0.3274053633213043, "learning_rate": 0.0001811217610115593, "loss": 11.6807, "step": 28516 }, { "epoch": 0.5969396299087332, "grad_norm": 0.23987795412540436, "learning_rate": 0.0001811204789207867, "loss": 11.6691, "step": 28517 }, { "epoch": 0.5969605626726953, "grad_norm": 0.3043544888496399, "learning_rate": 0.00018111919679101793, "loss": 11.6617, "step": 28518 }, { "epoch": 0.5969814954366575, "grad_norm": 0.2830887734889984, "learning_rate": 0.00018111791462225348, "loss": 11.6641, "step": 28519 }, { "epoch": 0.5970024282006197, "grad_norm": 0.2522647976875305, "learning_rate": 0.00018111663241449403, "loss": 11.6685, "step": 28520 }, { "epoch": 0.5970233609645818, "grad_norm": 0.3278486132621765, "learning_rate": 0.0001811153501677402, "loss": 11.6761, "step": 28521 }, { "epoch": 0.597044293728544, "grad_norm": 0.30160078406333923, "learning_rate": 0.0001811140678819926, "loss": 11.6705, "step": 28522 }, { "epoch": 0.597065226492506, "grad_norm": 0.2699834108352661, "learning_rate": 0.00018111278555725185, "loss": 11.6587, "step": 28523 }, { "epoch": 0.5970861592564682, "grad_norm": 0.2850193679332733, "learning_rate": 0.00018111150319351856, "loss": 11.6741, "step": 28524 }, { "epoch": 0.5971070920204303, "grad_norm": 0.3032439947128296, "learning_rate": 0.00018111022079079331, "loss": 11.6699, "step": 28525 }, { "epoch": 0.5971280247843925, "grad_norm": 0.29241013526916504, "learning_rate": 0.0001811089383490768, "loss": 11.666, "step": 28526 }, { "epoch": 0.5971489575483547, "grad_norm": 0.21691174805164337, "learning_rate": 0.00018110765586836958, "loss": 11.6692, "step": 28527 }, { "epoch": 0.5971698903123168, "grad_norm": 0.2783583104610443, "learning_rate": 0.00018110637334867226, "loss": 11.681, "step": 28528 }, { "epoch": 0.597190823076279, "grad_norm": 0.3008658289909363, "learning_rate": 0.0001811050907899855, "loss": 11.6553, "step": 28529 }, { "epoch": 0.5972117558402411, "grad_norm": 0.24472366273403168, "learning_rate": 0.0001811038081923099, "loss": 11.6768, "step": 28530 }, { "epoch": 0.5972326886042033, "grad_norm": 0.302916556596756, "learning_rate": 0.00018110252555564606, "loss": 11.6629, "step": 28531 }, { "epoch": 0.5972536213681654, "grad_norm": 0.34580758213996887, "learning_rate": 0.00018110124287999463, "loss": 11.682, "step": 28532 }, { "epoch": 0.5972745541321276, "grad_norm": 0.35492825508117676, "learning_rate": 0.0001810999601653562, "loss": 11.6662, "step": 28533 }, { "epoch": 0.5972954868960898, "grad_norm": 0.3025776147842407, "learning_rate": 0.00018109867741173137, "loss": 11.6701, "step": 28534 }, { "epoch": 0.5973164196600519, "grad_norm": 0.3230195939540863, "learning_rate": 0.00018109739461912082, "loss": 11.6721, "step": 28535 }, { "epoch": 0.5973373524240141, "grad_norm": 0.2728004455566406, "learning_rate": 0.00018109611178752507, "loss": 11.6734, "step": 28536 }, { "epoch": 0.5973582851879762, "grad_norm": 0.3148307204246521, "learning_rate": 0.00018109482891694487, "loss": 11.675, "step": 28537 }, { "epoch": 0.5973792179519384, "grad_norm": 0.2648703157901764, "learning_rate": 0.0001810935460073807, "loss": 11.6706, "step": 28538 }, { "epoch": 0.5974001507159006, "grad_norm": 0.2652181088924408, "learning_rate": 0.00018109226305883328, "loss": 11.6756, "step": 28539 }, { "epoch": 0.5974210834798627, "grad_norm": 0.25558117032051086, "learning_rate": 0.00018109098007130316, "loss": 11.6656, "step": 28540 }, { "epoch": 0.5974420162438249, "grad_norm": 0.33262330293655396, "learning_rate": 0.00018108969704479097, "loss": 11.6798, "step": 28541 }, { "epoch": 0.597462949007787, "grad_norm": 0.3150307536125183, "learning_rate": 0.00018108841397929736, "loss": 11.6683, "step": 28542 }, { "epoch": 0.5974838817717492, "grad_norm": 0.35744521021842957, "learning_rate": 0.0001810871308748229, "loss": 11.697, "step": 28543 }, { "epoch": 0.5975048145357112, "grad_norm": 0.34025970101356506, "learning_rate": 0.00018108584773136828, "loss": 11.6776, "step": 28544 }, { "epoch": 0.5975257472996734, "grad_norm": 0.3091234266757965, "learning_rate": 0.00018108456454893403, "loss": 11.7002, "step": 28545 }, { "epoch": 0.5975466800636356, "grad_norm": 0.239154651761055, "learning_rate": 0.0001810832813275208, "loss": 11.6734, "step": 28546 }, { "epoch": 0.5975676128275977, "grad_norm": 0.32533636689186096, "learning_rate": 0.00018108199806712922, "loss": 11.6726, "step": 28547 }, { "epoch": 0.5975885455915599, "grad_norm": 0.27318301796913147, "learning_rate": 0.00018108071476775992, "loss": 11.6725, "step": 28548 }, { "epoch": 0.597609478355522, "grad_norm": 0.25034695863723755, "learning_rate": 0.0001810794314294135, "loss": 11.6759, "step": 28549 }, { "epoch": 0.5976304111194842, "grad_norm": 0.31427833437919617, "learning_rate": 0.00018107814805209054, "loss": 11.6756, "step": 28550 }, { "epoch": 0.5976513438834463, "grad_norm": 0.34713348746299744, "learning_rate": 0.0001810768646357917, "loss": 11.6639, "step": 28551 }, { "epoch": 0.5976722766474085, "grad_norm": 0.293366938829422, "learning_rate": 0.00018107558118051755, "loss": 11.6722, "step": 28552 }, { "epoch": 0.5976932094113707, "grad_norm": 0.24760963022708893, "learning_rate": 0.0001810742976862688, "loss": 11.6847, "step": 28553 }, { "epoch": 0.5977141421753328, "grad_norm": 0.30630043148994446, "learning_rate": 0.000181073014153046, "loss": 11.6653, "step": 28554 }, { "epoch": 0.597735074939295, "grad_norm": 0.31547340750694275, "learning_rate": 0.00018107173058084974, "loss": 11.6712, "step": 28555 }, { "epoch": 0.5977560077032571, "grad_norm": 0.2848300039768219, "learning_rate": 0.00018107044696968072, "loss": 11.673, "step": 28556 }, { "epoch": 0.5977769404672193, "grad_norm": 0.34467214345932007, "learning_rate": 0.00018106916331953946, "loss": 11.6772, "step": 28557 }, { "epoch": 0.5977978732311815, "grad_norm": 0.276402086019516, "learning_rate": 0.0001810678796304267, "loss": 11.6724, "step": 28558 }, { "epoch": 0.5978188059951436, "grad_norm": 0.40653377771377563, "learning_rate": 0.00018106659590234295, "loss": 11.6759, "step": 28559 }, { "epoch": 0.5978397387591058, "grad_norm": 0.25586366653442383, "learning_rate": 0.00018106531213528884, "loss": 11.6793, "step": 28560 }, { "epoch": 0.5978606715230679, "grad_norm": 0.2523016035556793, "learning_rate": 0.000181064028329265, "loss": 11.6736, "step": 28561 }, { "epoch": 0.5978816042870301, "grad_norm": 0.26516059041023254, "learning_rate": 0.00018106274448427212, "loss": 11.6789, "step": 28562 }, { "epoch": 0.5979025370509922, "grad_norm": 0.3333738446235657, "learning_rate": 0.0001810614606003107, "loss": 11.6632, "step": 28563 }, { "epoch": 0.5979234698149544, "grad_norm": 0.2608366012573242, "learning_rate": 0.00018106017667738144, "loss": 11.6551, "step": 28564 }, { "epoch": 0.5979444025789166, "grad_norm": 0.31562569737434387, "learning_rate": 0.00018105889271548492, "loss": 11.6699, "step": 28565 }, { "epoch": 0.5979653353428787, "grad_norm": 0.340621680021286, "learning_rate": 0.00018105760871462177, "loss": 11.6741, "step": 28566 }, { "epoch": 0.5979862681068409, "grad_norm": 0.30433976650238037, "learning_rate": 0.00018105632467479258, "loss": 11.676, "step": 28567 }, { "epoch": 0.598007200870803, "grad_norm": 0.29628002643585205, "learning_rate": 0.00018105504059599803, "loss": 11.6644, "step": 28568 }, { "epoch": 0.5980281336347651, "grad_norm": 0.3623642921447754, "learning_rate": 0.00018105375647823867, "loss": 11.6869, "step": 28569 }, { "epoch": 0.5980490663987272, "grad_norm": 0.2697262167930603, "learning_rate": 0.00018105247232151517, "loss": 11.6794, "step": 28570 }, { "epoch": 0.5980699991626894, "grad_norm": 0.30749744176864624, "learning_rate": 0.00018105118812582812, "loss": 11.6519, "step": 28571 }, { "epoch": 0.5980909319266516, "grad_norm": 0.2671501040458679, "learning_rate": 0.0001810499038911781, "loss": 11.6599, "step": 28572 }, { "epoch": 0.5981118646906137, "grad_norm": 0.2993541657924652, "learning_rate": 0.00018104861961756578, "loss": 11.685, "step": 28573 }, { "epoch": 0.5981327974545759, "grad_norm": 0.3188382089138031, "learning_rate": 0.0001810473353049918, "loss": 11.6707, "step": 28574 }, { "epoch": 0.598153730218538, "grad_norm": 0.3085058033466339, "learning_rate": 0.0001810460509534567, "loss": 11.6692, "step": 28575 }, { "epoch": 0.5981746629825002, "grad_norm": 0.29315078258514404, "learning_rate": 0.00018104476656296113, "loss": 11.6671, "step": 28576 }, { "epoch": 0.5981955957464624, "grad_norm": 0.3071928918361664, "learning_rate": 0.00018104348213350576, "loss": 11.6742, "step": 28577 }, { "epoch": 0.5982165285104245, "grad_norm": 0.2554638683795929, "learning_rate": 0.00018104219766509116, "loss": 11.6751, "step": 28578 }, { "epoch": 0.5982374612743867, "grad_norm": 0.33701959252357483, "learning_rate": 0.00018104091315771794, "loss": 11.667, "step": 28579 }, { "epoch": 0.5982583940383488, "grad_norm": 0.27371907234191895, "learning_rate": 0.0001810396286113867, "loss": 11.6743, "step": 28580 }, { "epoch": 0.598279326802311, "grad_norm": 0.31834161281585693, "learning_rate": 0.00018103834402609815, "loss": 11.6843, "step": 28581 }, { "epoch": 0.5983002595662731, "grad_norm": 0.33772698044776917, "learning_rate": 0.00018103705940185282, "loss": 11.6703, "step": 28582 }, { "epoch": 0.5983211923302353, "grad_norm": 0.3074139356613159, "learning_rate": 0.0001810357747386513, "loss": 11.6783, "step": 28583 }, { "epoch": 0.5983421250941975, "grad_norm": 0.26760825514793396, "learning_rate": 0.00018103449003649432, "loss": 11.6754, "step": 28584 }, { "epoch": 0.5983630578581596, "grad_norm": 0.2556067705154419, "learning_rate": 0.00018103320529538238, "loss": 11.6711, "step": 28585 }, { "epoch": 0.5983839906221218, "grad_norm": 0.2747167646884918, "learning_rate": 0.00018103192051531622, "loss": 11.6578, "step": 28586 }, { "epoch": 0.5984049233860839, "grad_norm": 0.25830596685409546, "learning_rate": 0.00018103063569629635, "loss": 11.6764, "step": 28587 }, { "epoch": 0.5984258561500461, "grad_norm": 0.32653310894966125, "learning_rate": 0.00018102935083832343, "loss": 11.6771, "step": 28588 }, { "epoch": 0.5984467889140082, "grad_norm": 0.31675493717193604, "learning_rate": 0.00018102806594139808, "loss": 11.662, "step": 28589 }, { "epoch": 0.5984677216779704, "grad_norm": 0.31788498163223267, "learning_rate": 0.00018102678100552093, "loss": 11.6653, "step": 28590 }, { "epoch": 0.5984886544419326, "grad_norm": 0.24844130873680115, "learning_rate": 0.0001810254960306926, "loss": 11.6629, "step": 28591 }, { "epoch": 0.5985095872058946, "grad_norm": 0.2970934808254242, "learning_rate": 0.00018102421101691364, "loss": 11.6495, "step": 28592 }, { "epoch": 0.5985305199698568, "grad_norm": 0.41842585802078247, "learning_rate": 0.00018102292596418475, "loss": 11.6636, "step": 28593 }, { "epoch": 0.5985514527338189, "grad_norm": 0.2854267358779907, "learning_rate": 0.0001810216408725065, "loss": 11.6813, "step": 28594 }, { "epoch": 0.5985723854977811, "grad_norm": 0.3062773644924164, "learning_rate": 0.00018102035574187956, "loss": 11.6822, "step": 28595 }, { "epoch": 0.5985933182617433, "grad_norm": 0.32776978611946106, "learning_rate": 0.00018101907057230447, "loss": 11.6574, "step": 28596 }, { "epoch": 0.5986142510257054, "grad_norm": 0.25204598903656006, "learning_rate": 0.0001810177853637819, "loss": 11.6506, "step": 28597 }, { "epoch": 0.5986351837896676, "grad_norm": 0.28690120577812195, "learning_rate": 0.00018101650011631247, "loss": 11.671, "step": 28598 }, { "epoch": 0.5986561165536297, "grad_norm": 0.2957799434661865, "learning_rate": 0.0001810152148298968, "loss": 11.6657, "step": 28599 }, { "epoch": 0.5986770493175919, "grad_norm": 0.3991831839084625, "learning_rate": 0.00018101392950453543, "loss": 11.6758, "step": 28600 }, { "epoch": 0.598697982081554, "grad_norm": 0.3678241968154907, "learning_rate": 0.00018101264414022907, "loss": 11.6833, "step": 28601 }, { "epoch": 0.5987189148455162, "grad_norm": 0.2997478246688843, "learning_rate": 0.00018101135873697835, "loss": 11.6785, "step": 28602 }, { "epoch": 0.5987398476094784, "grad_norm": 0.26627615094184875, "learning_rate": 0.0001810100732947838, "loss": 11.6639, "step": 28603 }, { "epoch": 0.5987607803734405, "grad_norm": 0.2725170850753784, "learning_rate": 0.0001810087878136461, "loss": 11.6748, "step": 28604 }, { "epoch": 0.5987817131374027, "grad_norm": 0.383075088262558, "learning_rate": 0.00018100750229356585, "loss": 11.6777, "step": 28605 }, { "epoch": 0.5988026459013648, "grad_norm": 0.25366130471229553, "learning_rate": 0.00018100621673454366, "loss": 11.6745, "step": 28606 }, { "epoch": 0.598823578665327, "grad_norm": 0.29614341259002686, "learning_rate": 0.00018100493113658016, "loss": 11.6494, "step": 28607 }, { "epoch": 0.5988445114292891, "grad_norm": 0.28850194811820984, "learning_rate": 0.00018100364549967596, "loss": 11.6706, "step": 28608 }, { "epoch": 0.5988654441932513, "grad_norm": 0.3003547191619873, "learning_rate": 0.00018100235982383174, "loss": 11.6861, "step": 28609 }, { "epoch": 0.5988863769572135, "grad_norm": 0.30712997913360596, "learning_rate": 0.000181001074109048, "loss": 11.6823, "step": 28610 }, { "epoch": 0.5989073097211756, "grad_norm": 0.3081623911857605, "learning_rate": 0.00018099978835532543, "loss": 11.6753, "step": 28611 }, { "epoch": 0.5989282424851378, "grad_norm": 0.28267714381217957, "learning_rate": 0.00018099850256266466, "loss": 11.683, "step": 28612 }, { "epoch": 0.5989491752490999, "grad_norm": 0.2617967426776886, "learning_rate": 0.0001809972167310663, "loss": 11.6585, "step": 28613 }, { "epoch": 0.598970108013062, "grad_norm": 0.6611509323120117, "learning_rate": 0.0001809959308605309, "loss": 11.6908, "step": 28614 }, { "epoch": 0.5989910407770243, "grad_norm": 0.26752087473869324, "learning_rate": 0.0001809946449510592, "loss": 11.6574, "step": 28615 }, { "epoch": 0.5990119735409863, "grad_norm": 0.27205830812454224, "learning_rate": 0.0001809933590026517, "loss": 11.6724, "step": 28616 }, { "epoch": 0.5990329063049485, "grad_norm": 0.3299117684364319, "learning_rate": 0.00018099207301530907, "loss": 11.6842, "step": 28617 }, { "epoch": 0.5990538390689106, "grad_norm": 0.2866983115673065, "learning_rate": 0.00018099078698903193, "loss": 11.6727, "step": 28618 }, { "epoch": 0.5990747718328728, "grad_norm": 0.2850727438926697, "learning_rate": 0.00018098950092382092, "loss": 11.6755, "step": 28619 }, { "epoch": 0.5990957045968349, "grad_norm": 0.2987068295478821, "learning_rate": 0.00018098821481967662, "loss": 11.6749, "step": 28620 }, { "epoch": 0.5991166373607971, "grad_norm": 0.3769795000553131, "learning_rate": 0.00018098692867659968, "loss": 11.6679, "step": 28621 }, { "epoch": 0.5991375701247593, "grad_norm": 0.27346161007881165, "learning_rate": 0.00018098564249459067, "loss": 11.6772, "step": 28622 }, { "epoch": 0.5991585028887214, "grad_norm": 0.2728933095932007, "learning_rate": 0.00018098435627365026, "loss": 11.661, "step": 28623 }, { "epoch": 0.5991794356526836, "grad_norm": 0.25833240151405334, "learning_rate": 0.00018098307001377902, "loss": 11.6721, "step": 28624 }, { "epoch": 0.5992003684166457, "grad_norm": 0.2762477993965149, "learning_rate": 0.00018098178371497763, "loss": 11.6501, "step": 28625 }, { "epoch": 0.5992213011806079, "grad_norm": 0.29182109236717224, "learning_rate": 0.00018098049737724664, "loss": 11.6586, "step": 28626 }, { "epoch": 0.59924223394457, "grad_norm": 0.2836506962776184, "learning_rate": 0.00018097921100058674, "loss": 11.6914, "step": 28627 }, { "epoch": 0.5992631667085322, "grad_norm": 0.2702750265598297, "learning_rate": 0.00018097792458499846, "loss": 11.663, "step": 28628 }, { "epoch": 0.5992840994724944, "grad_norm": 0.30959826707839966, "learning_rate": 0.00018097663813048255, "loss": 11.6735, "step": 28629 }, { "epoch": 0.5993050322364565, "grad_norm": 0.32061678171157837, "learning_rate": 0.00018097535163703947, "loss": 11.6765, "step": 28630 }, { "epoch": 0.5993259650004187, "grad_norm": 0.28763750195503235, "learning_rate": 0.00018097406510466995, "loss": 11.6651, "step": 28631 }, { "epoch": 0.5993468977643808, "grad_norm": 0.3144524395465851, "learning_rate": 0.00018097277853337454, "loss": 11.6715, "step": 28632 }, { "epoch": 0.599367830528343, "grad_norm": 0.28551891446113586, "learning_rate": 0.00018097149192315394, "loss": 11.6856, "step": 28633 }, { "epoch": 0.5993887632923052, "grad_norm": 0.28346720337867737, "learning_rate": 0.0001809702052740087, "loss": 11.6632, "step": 28634 }, { "epoch": 0.5994096960562673, "grad_norm": 0.29656296968460083, "learning_rate": 0.00018096891858593946, "loss": 11.6772, "step": 28635 }, { "epoch": 0.5994306288202295, "grad_norm": 0.2726476192474365, "learning_rate": 0.00018096763185894684, "loss": 11.6665, "step": 28636 }, { "epoch": 0.5994515615841916, "grad_norm": 0.2543213963508606, "learning_rate": 0.00018096634509303145, "loss": 11.6446, "step": 28637 }, { "epoch": 0.5994724943481538, "grad_norm": 0.3693718910217285, "learning_rate": 0.00018096505828819393, "loss": 11.6729, "step": 28638 }, { "epoch": 0.5994934271121158, "grad_norm": 0.26129862666130066, "learning_rate": 0.00018096377144443486, "loss": 11.6762, "step": 28639 }, { "epoch": 0.599514359876078, "grad_norm": 0.335549533367157, "learning_rate": 0.00018096248456175488, "loss": 11.6757, "step": 28640 }, { "epoch": 0.5995352926400402, "grad_norm": 0.2590661942958832, "learning_rate": 0.00018096119764015464, "loss": 11.6759, "step": 28641 }, { "epoch": 0.5995562254040023, "grad_norm": 0.29462090134620667, "learning_rate": 0.00018095991067963474, "loss": 11.6785, "step": 28642 }, { "epoch": 0.5995771581679645, "grad_norm": 0.2768872380256653, "learning_rate": 0.00018095862368019576, "loss": 11.6761, "step": 28643 }, { "epoch": 0.5995980909319266, "grad_norm": 0.3970828354358673, "learning_rate": 0.00018095733664183833, "loss": 11.6707, "step": 28644 }, { "epoch": 0.5996190236958888, "grad_norm": 0.323216050863266, "learning_rate": 0.0001809560495645631, "loss": 11.6643, "step": 28645 }, { "epoch": 0.5996399564598509, "grad_norm": 0.2696605920791626, "learning_rate": 0.0001809547624483707, "loss": 11.6633, "step": 28646 }, { "epoch": 0.5996608892238131, "grad_norm": 0.3177036941051483, "learning_rate": 0.00018095347529326173, "loss": 11.6664, "step": 28647 }, { "epoch": 0.5996818219877753, "grad_norm": 0.3222753405570984, "learning_rate": 0.00018095218809923677, "loss": 11.677, "step": 28648 }, { "epoch": 0.5997027547517374, "grad_norm": 0.31216365098953247, "learning_rate": 0.0001809509008662965, "loss": 11.6818, "step": 28649 }, { "epoch": 0.5997236875156996, "grad_norm": 0.37812885642051697, "learning_rate": 0.00018094961359444147, "loss": 11.6771, "step": 28650 }, { "epoch": 0.5997446202796617, "grad_norm": 0.262482225894928, "learning_rate": 0.00018094832628367237, "loss": 11.6633, "step": 28651 }, { "epoch": 0.5997655530436239, "grad_norm": 0.32047179341316223, "learning_rate": 0.00018094703893398975, "loss": 11.6741, "step": 28652 }, { "epoch": 0.599786485807586, "grad_norm": 0.29924580454826355, "learning_rate": 0.00018094575154539431, "loss": 11.6808, "step": 28653 }, { "epoch": 0.5998074185715482, "grad_norm": 0.28369084000587463, "learning_rate": 0.00018094446411788661, "loss": 11.6682, "step": 28654 }, { "epoch": 0.5998283513355104, "grad_norm": 0.3068488538265228, "learning_rate": 0.00018094317665146728, "loss": 11.6589, "step": 28655 }, { "epoch": 0.5998492840994725, "grad_norm": 0.38679319620132446, "learning_rate": 0.00018094188914613693, "loss": 11.674, "step": 28656 }, { "epoch": 0.5998702168634347, "grad_norm": 0.2847708761692047, "learning_rate": 0.00018094060160189621, "loss": 11.6518, "step": 28657 }, { "epoch": 0.5998911496273968, "grad_norm": 0.30811256170272827, "learning_rate": 0.0001809393140187457, "loss": 11.6868, "step": 28658 }, { "epoch": 0.599912082391359, "grad_norm": 0.24125716090202332, "learning_rate": 0.00018093802639668606, "loss": 11.6656, "step": 28659 }, { "epoch": 0.5999330151553212, "grad_norm": 0.26097166538238525, "learning_rate": 0.0001809367387357179, "loss": 11.6656, "step": 28660 }, { "epoch": 0.5999539479192832, "grad_norm": 0.25005248188972473, "learning_rate": 0.00018093545103584183, "loss": 11.6733, "step": 28661 }, { "epoch": 0.5999748806832454, "grad_norm": 0.2944975793361664, "learning_rate": 0.00018093416329705844, "loss": 11.6723, "step": 28662 }, { "epoch": 0.5999958134472075, "grad_norm": 0.2972489297389984, "learning_rate": 0.00018093287551936837, "loss": 11.6912, "step": 28663 }, { "epoch": 0.6000167462111697, "grad_norm": 0.3257598280906677, "learning_rate": 0.00018093158770277225, "loss": 11.6822, "step": 28664 }, { "epoch": 0.6000376789751318, "grad_norm": 0.27193352580070496, "learning_rate": 0.0001809302998472707, "loss": 11.6716, "step": 28665 }, { "epoch": 0.600058611739094, "grad_norm": 0.2817525863647461, "learning_rate": 0.00018092901195286435, "loss": 11.6636, "step": 28666 }, { "epoch": 0.6000795445030562, "grad_norm": 0.29336968064308167, "learning_rate": 0.00018092772401955378, "loss": 11.6742, "step": 28667 }, { "epoch": 0.6001004772670183, "grad_norm": 0.23310501873493195, "learning_rate": 0.00018092643604733963, "loss": 11.6642, "step": 28668 }, { "epoch": 0.6001214100309805, "grad_norm": 0.32926931977272034, "learning_rate": 0.00018092514803622254, "loss": 11.6751, "step": 28669 }, { "epoch": 0.6001423427949426, "grad_norm": 0.43284285068511963, "learning_rate": 0.0001809238599862031, "loss": 11.678, "step": 28670 }, { "epoch": 0.6001632755589048, "grad_norm": 0.32426342368125916, "learning_rate": 0.00018092257189728195, "loss": 11.6788, "step": 28671 }, { "epoch": 0.6001842083228669, "grad_norm": 0.2769318222999573, "learning_rate": 0.00018092128376945968, "loss": 11.6678, "step": 28672 }, { "epoch": 0.6002051410868291, "grad_norm": 0.2555549144744873, "learning_rate": 0.00018091999560273692, "loss": 11.6738, "step": 28673 }, { "epoch": 0.6002260738507913, "grad_norm": 0.22196374833583832, "learning_rate": 0.00018091870739711432, "loss": 11.6725, "step": 28674 }, { "epoch": 0.6002470066147534, "grad_norm": 0.3385506272315979, "learning_rate": 0.00018091741915259247, "loss": 11.6731, "step": 28675 }, { "epoch": 0.6002679393787156, "grad_norm": 0.2956482470035553, "learning_rate": 0.00018091613086917199, "loss": 11.6836, "step": 28676 }, { "epoch": 0.6002888721426777, "grad_norm": 0.24127262830734253, "learning_rate": 0.0001809148425468535, "loss": 11.6775, "step": 28677 }, { "epoch": 0.6003098049066399, "grad_norm": 0.29278337955474854, "learning_rate": 0.00018091355418563764, "loss": 11.6641, "step": 28678 }, { "epoch": 0.6003307376706021, "grad_norm": 0.2676667273044586, "learning_rate": 0.000180912265785525, "loss": 11.6731, "step": 28679 }, { "epoch": 0.6003516704345642, "grad_norm": 0.3129982650279999, "learning_rate": 0.00018091097734651622, "loss": 11.669, "step": 28680 }, { "epoch": 0.6003726031985264, "grad_norm": 0.308757483959198, "learning_rate": 0.0001809096888686119, "loss": 11.6696, "step": 28681 }, { "epoch": 0.6003935359624885, "grad_norm": 0.24090170860290527, "learning_rate": 0.00018090840035181268, "loss": 11.6729, "step": 28682 }, { "epoch": 0.6004144687264507, "grad_norm": 0.2856186628341675, "learning_rate": 0.00018090711179611916, "loss": 11.6669, "step": 28683 }, { "epoch": 0.6004354014904127, "grad_norm": 0.2585993707180023, "learning_rate": 0.00018090582320153198, "loss": 11.6663, "step": 28684 }, { "epoch": 0.600456334254375, "grad_norm": 0.34733834862709045, "learning_rate": 0.00018090453456805173, "loss": 11.6685, "step": 28685 }, { "epoch": 0.6004772670183371, "grad_norm": 0.2999517619609833, "learning_rate": 0.0001809032458956791, "loss": 11.6598, "step": 28686 }, { "epoch": 0.6004981997822992, "grad_norm": 0.274389386177063, "learning_rate": 0.0001809019571844146, "loss": 11.672, "step": 28687 }, { "epoch": 0.6005191325462614, "grad_norm": 0.26065951585769653, "learning_rate": 0.00018090066843425896, "loss": 11.6747, "step": 28688 }, { "epoch": 0.6005400653102235, "grad_norm": 0.3160654902458191, "learning_rate": 0.00018089937964521272, "loss": 11.663, "step": 28689 }, { "epoch": 0.6005609980741857, "grad_norm": 0.30142030119895935, "learning_rate": 0.0001808980908172765, "loss": 11.688, "step": 28690 }, { "epoch": 0.6005819308381478, "grad_norm": 0.27866876125335693, "learning_rate": 0.00018089680195045098, "loss": 11.6774, "step": 28691 }, { "epoch": 0.60060286360211, "grad_norm": 0.2701760530471802, "learning_rate": 0.00018089551304473672, "loss": 11.6627, "step": 28692 }, { "epoch": 0.6006237963660722, "grad_norm": 0.25602132081985474, "learning_rate": 0.0001808942241001344, "loss": 11.6957, "step": 28693 }, { "epoch": 0.6006447291300343, "grad_norm": 0.27260833978652954, "learning_rate": 0.0001808929351166446, "loss": 11.6636, "step": 28694 }, { "epoch": 0.6006656618939965, "grad_norm": 0.29591864347457886, "learning_rate": 0.00018089164609426793, "loss": 11.67, "step": 28695 }, { "epoch": 0.6006865946579586, "grad_norm": 0.3062649369239807, "learning_rate": 0.00018089035703300503, "loss": 11.6722, "step": 28696 }, { "epoch": 0.6007075274219208, "grad_norm": 0.27973970770835876, "learning_rate": 0.00018088906793285652, "loss": 11.672, "step": 28697 }, { "epoch": 0.600728460185883, "grad_norm": 0.29105648398399353, "learning_rate": 0.00018088777879382301, "loss": 11.6725, "step": 28698 }, { "epoch": 0.6007493929498451, "grad_norm": 0.2470497488975525, "learning_rate": 0.0001808864896159051, "loss": 11.6918, "step": 28699 }, { "epoch": 0.6007703257138073, "grad_norm": 0.31726640462875366, "learning_rate": 0.00018088520039910347, "loss": 11.6581, "step": 28700 }, { "epoch": 0.6007912584777694, "grad_norm": 0.28169822692871094, "learning_rate": 0.0001808839111434187, "loss": 11.6647, "step": 28701 }, { "epoch": 0.6008121912417316, "grad_norm": 0.26013538241386414, "learning_rate": 0.00018088262184885138, "loss": 11.6722, "step": 28702 }, { "epoch": 0.6008331240056937, "grad_norm": 0.2798881530761719, "learning_rate": 0.0001808813325154022, "loss": 11.6492, "step": 28703 }, { "epoch": 0.6008540567696559, "grad_norm": 0.29557836055755615, "learning_rate": 0.00018088004314307173, "loss": 11.6607, "step": 28704 }, { "epoch": 0.6008749895336181, "grad_norm": 0.23293951153755188, "learning_rate": 0.00018087875373186057, "loss": 11.6708, "step": 28705 }, { "epoch": 0.6008959222975802, "grad_norm": 0.3149014413356781, "learning_rate": 0.00018087746428176944, "loss": 11.6846, "step": 28706 }, { "epoch": 0.6009168550615424, "grad_norm": 0.3409964144229889, "learning_rate": 0.00018087617479279884, "loss": 11.6736, "step": 28707 }, { "epoch": 0.6009377878255044, "grad_norm": 0.3153379261493683, "learning_rate": 0.00018087488526494946, "loss": 11.6517, "step": 28708 }, { "epoch": 0.6009587205894666, "grad_norm": 0.2941094636917114, "learning_rate": 0.00018087359569822188, "loss": 11.6704, "step": 28709 }, { "epoch": 0.6009796533534287, "grad_norm": 0.2788729667663574, "learning_rate": 0.00018087230609261677, "loss": 11.68, "step": 28710 }, { "epoch": 0.6010005861173909, "grad_norm": 0.29480743408203125, "learning_rate": 0.00018087101644813472, "loss": 11.6741, "step": 28711 }, { "epoch": 0.6010215188813531, "grad_norm": 0.30166688561439514, "learning_rate": 0.00018086972676477632, "loss": 11.6856, "step": 28712 }, { "epoch": 0.6010424516453152, "grad_norm": 0.30304956436157227, "learning_rate": 0.00018086843704254226, "loss": 11.6685, "step": 28713 }, { "epoch": 0.6010633844092774, "grad_norm": 0.3243545591831207, "learning_rate": 0.0001808671472814331, "loss": 11.6871, "step": 28714 }, { "epoch": 0.6010843171732395, "grad_norm": 0.33226141333580017, "learning_rate": 0.0001808658574814495, "loss": 11.6767, "step": 28715 }, { "epoch": 0.6011052499372017, "grad_norm": 0.2583386301994324, "learning_rate": 0.00018086456764259206, "loss": 11.6571, "step": 28716 }, { "epoch": 0.6011261827011639, "grad_norm": 0.33128467202186584, "learning_rate": 0.00018086327776486137, "loss": 11.6585, "step": 28717 }, { "epoch": 0.601147115465126, "grad_norm": 0.27266833186149597, "learning_rate": 0.00018086198784825812, "loss": 11.656, "step": 28718 }, { "epoch": 0.6011680482290882, "grad_norm": 0.29743748903274536, "learning_rate": 0.00018086069789278287, "loss": 11.6745, "step": 28719 }, { "epoch": 0.6011889809930503, "grad_norm": 0.25268521904945374, "learning_rate": 0.00018085940789843626, "loss": 11.6857, "step": 28720 }, { "epoch": 0.6012099137570125, "grad_norm": 0.40934428572654724, "learning_rate": 0.0001808581178652189, "loss": 11.6896, "step": 28721 }, { "epoch": 0.6012308465209746, "grad_norm": 0.2590870261192322, "learning_rate": 0.00018085682779313148, "loss": 11.6601, "step": 28722 }, { "epoch": 0.6012517792849368, "grad_norm": 0.3172471225261688, "learning_rate": 0.0001808555376821745, "loss": 11.6786, "step": 28723 }, { "epoch": 0.601272712048899, "grad_norm": 0.30866801738739014, "learning_rate": 0.00018085424753234866, "loss": 11.6765, "step": 28724 }, { "epoch": 0.6012936448128611, "grad_norm": 0.38576221466064453, "learning_rate": 0.00018085295734365458, "loss": 11.6848, "step": 28725 }, { "epoch": 0.6013145775768233, "grad_norm": 0.289008766412735, "learning_rate": 0.00018085166711609286, "loss": 11.6665, "step": 28726 }, { "epoch": 0.6013355103407854, "grad_norm": 0.25869110226631165, "learning_rate": 0.0001808503768496641, "loss": 11.6624, "step": 28727 }, { "epoch": 0.6013564431047476, "grad_norm": 0.2772769033908844, "learning_rate": 0.00018084908654436896, "loss": 11.6686, "step": 28728 }, { "epoch": 0.6013773758687097, "grad_norm": 0.28627675771713257, "learning_rate": 0.00018084779620020807, "loss": 11.6888, "step": 28729 }, { "epoch": 0.6013983086326719, "grad_norm": 0.2635940909385681, "learning_rate": 0.000180846505817182, "loss": 11.6705, "step": 28730 }, { "epoch": 0.601419241396634, "grad_norm": 0.26662465929985046, "learning_rate": 0.00018084521539529135, "loss": 11.6827, "step": 28731 }, { "epoch": 0.6014401741605961, "grad_norm": 0.28314411640167236, "learning_rate": 0.00018084392493453684, "loss": 11.6546, "step": 28732 }, { "epoch": 0.6014611069245583, "grad_norm": 0.29477962851524353, "learning_rate": 0.00018084263443491903, "loss": 11.6716, "step": 28733 }, { "epoch": 0.6014820396885204, "grad_norm": 0.33845946192741394, "learning_rate": 0.00018084134389643853, "loss": 11.6675, "step": 28734 }, { "epoch": 0.6015029724524826, "grad_norm": 0.24560120701789856, "learning_rate": 0.000180840053319096, "loss": 11.6659, "step": 28735 }, { "epoch": 0.6015239052164448, "grad_norm": 0.31284114718437195, "learning_rate": 0.000180838762702892, "loss": 11.6635, "step": 28736 }, { "epoch": 0.6015448379804069, "grad_norm": 0.2801932394504547, "learning_rate": 0.00018083747204782722, "loss": 11.6789, "step": 28737 }, { "epoch": 0.6015657707443691, "grad_norm": 0.2642767131328583, "learning_rate": 0.00018083618135390222, "loss": 11.6775, "step": 28738 }, { "epoch": 0.6015867035083312, "grad_norm": 0.35765397548675537, "learning_rate": 0.00018083489062111768, "loss": 11.6747, "step": 28739 }, { "epoch": 0.6016076362722934, "grad_norm": 0.2802759110927582, "learning_rate": 0.00018083359984947416, "loss": 11.6809, "step": 28740 }, { "epoch": 0.6016285690362555, "grad_norm": 0.30041682720184326, "learning_rate": 0.00018083230903897233, "loss": 11.6787, "step": 28741 }, { "epoch": 0.6016495018002177, "grad_norm": 0.3320569097995758, "learning_rate": 0.00018083101818961277, "loss": 11.669, "step": 28742 }, { "epoch": 0.6016704345641799, "grad_norm": 0.30657386779785156, "learning_rate": 0.00018082972730139614, "loss": 11.6747, "step": 28743 }, { "epoch": 0.601691367328142, "grad_norm": 0.3381114602088928, "learning_rate": 0.000180828436374323, "loss": 11.6838, "step": 28744 }, { "epoch": 0.6017123000921042, "grad_norm": 0.37329742312431335, "learning_rate": 0.00018082714540839406, "loss": 11.6607, "step": 28745 }, { "epoch": 0.6017332328560663, "grad_norm": 0.2818860113620758, "learning_rate": 0.00018082585440360986, "loss": 11.6696, "step": 28746 }, { "epoch": 0.6017541656200285, "grad_norm": 0.2917971611022949, "learning_rate": 0.00018082456335997106, "loss": 11.6641, "step": 28747 }, { "epoch": 0.6017750983839906, "grad_norm": 0.3018803298473358, "learning_rate": 0.00018082327227747827, "loss": 11.6719, "step": 28748 }, { "epoch": 0.6017960311479528, "grad_norm": 0.3374617397785187, "learning_rate": 0.00018082198115613213, "loss": 11.6764, "step": 28749 }, { "epoch": 0.601816963911915, "grad_norm": 0.26794326305389404, "learning_rate": 0.0001808206899959332, "loss": 11.6639, "step": 28750 }, { "epoch": 0.6018378966758771, "grad_norm": 0.26363325119018555, "learning_rate": 0.0001808193987968822, "loss": 11.663, "step": 28751 }, { "epoch": 0.6018588294398393, "grad_norm": 0.37005314230918884, "learning_rate": 0.00018081810755897967, "loss": 11.6884, "step": 28752 }, { "epoch": 0.6018797622038013, "grad_norm": 0.30470672249794006, "learning_rate": 0.00018081681628222626, "loss": 11.6678, "step": 28753 }, { "epoch": 0.6019006949677635, "grad_norm": 0.3836398124694824, "learning_rate": 0.00018081552496662258, "loss": 11.6678, "step": 28754 }, { "epoch": 0.6019216277317258, "grad_norm": 0.41398781538009644, "learning_rate": 0.00018081423361216925, "loss": 11.6831, "step": 28755 }, { "epoch": 0.6019425604956878, "grad_norm": 0.3260626494884491, "learning_rate": 0.00018081294221886692, "loss": 11.6667, "step": 28756 }, { "epoch": 0.60196349325965, "grad_norm": 0.32338592410087585, "learning_rate": 0.00018081165078671617, "loss": 11.6622, "step": 28757 }, { "epoch": 0.6019844260236121, "grad_norm": 0.29734593629837036, "learning_rate": 0.0001808103593157177, "loss": 11.6662, "step": 28758 }, { "epoch": 0.6020053587875743, "grad_norm": 0.26835179328918457, "learning_rate": 0.00018080906780587198, "loss": 11.6694, "step": 28759 }, { "epoch": 0.6020262915515364, "grad_norm": 0.2617490589618683, "learning_rate": 0.00018080777625717977, "loss": 11.6613, "step": 28760 }, { "epoch": 0.6020472243154986, "grad_norm": 0.3103146553039551, "learning_rate": 0.00018080648466964162, "loss": 11.6695, "step": 28761 }, { "epoch": 0.6020681570794608, "grad_norm": 0.23926950991153717, "learning_rate": 0.0001808051930432582, "loss": 11.6692, "step": 28762 }, { "epoch": 0.6020890898434229, "grad_norm": 0.2599998414516449, "learning_rate": 0.0001808039013780301, "loss": 11.6633, "step": 28763 }, { "epoch": 0.6021100226073851, "grad_norm": 0.25973665714263916, "learning_rate": 0.00018080260967395795, "loss": 11.6798, "step": 28764 }, { "epoch": 0.6021309553713472, "grad_norm": 0.2862318456172943, "learning_rate": 0.00018080131793104233, "loss": 11.6699, "step": 28765 }, { "epoch": 0.6021518881353094, "grad_norm": 0.3185212314128876, "learning_rate": 0.00018080002614928394, "loss": 11.6718, "step": 28766 }, { "epoch": 0.6021728208992715, "grad_norm": 0.33677563071250916, "learning_rate": 0.0001807987343286833, "loss": 11.6839, "step": 28767 }, { "epoch": 0.6021937536632337, "grad_norm": 0.3069603443145752, "learning_rate": 0.00018079744246924116, "loss": 11.6666, "step": 28768 }, { "epoch": 0.6022146864271959, "grad_norm": 0.2490096092224121, "learning_rate": 0.000180796150570958, "loss": 11.6617, "step": 28769 }, { "epoch": 0.602235619191158, "grad_norm": 0.24725259840488434, "learning_rate": 0.0001807948586338346, "loss": 11.6657, "step": 28770 }, { "epoch": 0.6022565519551202, "grad_norm": 0.324988454580307, "learning_rate": 0.00018079356665787143, "loss": 11.6633, "step": 28771 }, { "epoch": 0.6022774847190823, "grad_norm": 0.3205520510673523, "learning_rate": 0.00018079227464306915, "loss": 11.6703, "step": 28772 }, { "epoch": 0.6022984174830445, "grad_norm": 0.2865367829799652, "learning_rate": 0.00018079098258942845, "loss": 11.6862, "step": 28773 }, { "epoch": 0.6023193502470067, "grad_norm": 0.23648934066295624, "learning_rate": 0.00018078969049694992, "loss": 11.6655, "step": 28774 }, { "epoch": 0.6023402830109688, "grad_norm": 0.34637853503227234, "learning_rate": 0.00018078839836563413, "loss": 11.6756, "step": 28775 }, { "epoch": 0.602361215774931, "grad_norm": 0.31816741824150085, "learning_rate": 0.00018078710619548173, "loss": 11.6813, "step": 28776 }, { "epoch": 0.602382148538893, "grad_norm": 0.2936307489871979, "learning_rate": 0.0001807858139864934, "loss": 11.6744, "step": 28777 }, { "epoch": 0.6024030813028552, "grad_norm": 0.3358798027038574, "learning_rate": 0.00018078452173866968, "loss": 11.6556, "step": 28778 }, { "epoch": 0.6024240140668173, "grad_norm": 0.29804813861846924, "learning_rate": 0.00018078322945201122, "loss": 11.6892, "step": 28779 }, { "epoch": 0.6024449468307795, "grad_norm": 0.28878524899482727, "learning_rate": 0.00018078193712651864, "loss": 11.6666, "step": 28780 }, { "epoch": 0.6024658795947417, "grad_norm": 0.2892857491970062, "learning_rate": 0.00018078064476219257, "loss": 11.6614, "step": 28781 }, { "epoch": 0.6024868123587038, "grad_norm": 0.2546103298664093, "learning_rate": 0.00018077935235903363, "loss": 11.6741, "step": 28782 }, { "epoch": 0.602507745122666, "grad_norm": 0.2991131842136383, "learning_rate": 0.00018077805991704242, "loss": 11.6442, "step": 28783 }, { "epoch": 0.6025286778866281, "grad_norm": 0.38347312808036804, "learning_rate": 0.00018077676743621959, "loss": 11.6647, "step": 28784 }, { "epoch": 0.6025496106505903, "grad_norm": 0.27999797463417053, "learning_rate": 0.00018077547491656573, "loss": 11.6622, "step": 28785 }, { "epoch": 0.6025705434145524, "grad_norm": 0.3712936341762543, "learning_rate": 0.0001807741823580815, "loss": 11.6662, "step": 28786 }, { "epoch": 0.6025914761785146, "grad_norm": 0.2791077196598053, "learning_rate": 0.0001807728897607675, "loss": 11.6623, "step": 28787 }, { "epoch": 0.6026124089424768, "grad_norm": 0.33562466502189636, "learning_rate": 0.00018077159712462436, "loss": 11.6796, "step": 28788 }, { "epoch": 0.6026333417064389, "grad_norm": 0.3091421127319336, "learning_rate": 0.00018077030444965268, "loss": 11.659, "step": 28789 }, { "epoch": 0.6026542744704011, "grad_norm": 0.373317152261734, "learning_rate": 0.0001807690117358531, "loss": 11.6777, "step": 28790 }, { "epoch": 0.6026752072343632, "grad_norm": 0.32587966322898865, "learning_rate": 0.00018076771898322625, "loss": 11.6558, "step": 28791 }, { "epoch": 0.6026961399983254, "grad_norm": 0.37039440870285034, "learning_rate": 0.00018076642619177272, "loss": 11.6504, "step": 28792 }, { "epoch": 0.6027170727622876, "grad_norm": 0.28550201654434204, "learning_rate": 0.00018076513336149318, "loss": 11.6553, "step": 28793 }, { "epoch": 0.6027380055262497, "grad_norm": 0.3505791127681732, "learning_rate": 0.0001807638404923882, "loss": 11.6667, "step": 28794 }, { "epoch": 0.6027589382902119, "grad_norm": 0.3045068681240082, "learning_rate": 0.0001807625475844584, "loss": 11.6533, "step": 28795 }, { "epoch": 0.602779871054174, "grad_norm": 0.28593137860298157, "learning_rate": 0.00018076125463770446, "loss": 11.6856, "step": 28796 }, { "epoch": 0.6028008038181362, "grad_norm": 0.2797274589538574, "learning_rate": 0.00018075996165212694, "loss": 11.6707, "step": 28797 }, { "epoch": 0.6028217365820983, "grad_norm": 0.4072878956794739, "learning_rate": 0.0001807586686277265, "loss": 11.6594, "step": 28798 }, { "epoch": 0.6028426693460605, "grad_norm": 0.3515978157520294, "learning_rate": 0.0001807573755645038, "loss": 11.6843, "step": 28799 }, { "epoch": 0.6028636021100227, "grad_norm": 0.25055813789367676, "learning_rate": 0.00018075608246245932, "loss": 11.675, "step": 28800 }, { "epoch": 0.6028845348739847, "grad_norm": 0.3290003836154938, "learning_rate": 0.00018075478932159383, "loss": 11.6653, "step": 28801 }, { "epoch": 0.602905467637947, "grad_norm": 0.2721690535545349, "learning_rate": 0.00018075349614190788, "loss": 11.6868, "step": 28802 }, { "epoch": 0.602926400401909, "grad_norm": 0.33635449409484863, "learning_rate": 0.0001807522029234021, "loss": 11.6686, "step": 28803 }, { "epoch": 0.6029473331658712, "grad_norm": 0.3564605712890625, "learning_rate": 0.00018075090966607714, "loss": 11.6777, "step": 28804 }, { "epoch": 0.6029682659298333, "grad_norm": 0.2806156575679779, "learning_rate": 0.00018074961636993357, "loss": 11.6642, "step": 28805 }, { "epoch": 0.6029891986937955, "grad_norm": 0.2709449529647827, "learning_rate": 0.00018074832303497206, "loss": 11.6804, "step": 28806 }, { "epoch": 0.6030101314577577, "grad_norm": 0.30907493829727173, "learning_rate": 0.00018074702966119322, "loss": 11.6684, "step": 28807 }, { "epoch": 0.6030310642217198, "grad_norm": 0.25147324800491333, "learning_rate": 0.00018074573624859763, "loss": 11.6701, "step": 28808 }, { "epoch": 0.603051996985682, "grad_norm": 0.2250930517911911, "learning_rate": 0.00018074444279718595, "loss": 11.6799, "step": 28809 }, { "epoch": 0.6030729297496441, "grad_norm": 0.2650895118713379, "learning_rate": 0.00018074314930695886, "loss": 11.6699, "step": 28810 }, { "epoch": 0.6030938625136063, "grad_norm": 0.3319353461265564, "learning_rate": 0.00018074185577791687, "loss": 11.6731, "step": 28811 }, { "epoch": 0.6031147952775685, "grad_norm": 0.2866012454032898, "learning_rate": 0.00018074056221006066, "loss": 11.6687, "step": 28812 }, { "epoch": 0.6031357280415306, "grad_norm": 0.3655252158641815, "learning_rate": 0.00018073926860339085, "loss": 11.6797, "step": 28813 }, { "epoch": 0.6031566608054928, "grad_norm": 0.2784065008163452, "learning_rate": 0.00018073797495790804, "loss": 11.6605, "step": 28814 }, { "epoch": 0.6031775935694549, "grad_norm": 0.33089032769203186, "learning_rate": 0.00018073668127361288, "loss": 11.6486, "step": 28815 }, { "epoch": 0.6031985263334171, "grad_norm": 0.3277282118797302, "learning_rate": 0.00018073538755050597, "loss": 11.6713, "step": 28816 }, { "epoch": 0.6032194590973792, "grad_norm": 0.2560780942440033, "learning_rate": 0.00018073409378858797, "loss": 11.6792, "step": 28817 }, { "epoch": 0.6032403918613414, "grad_norm": 0.25466400384902954, "learning_rate": 0.00018073279998785945, "loss": 11.6664, "step": 28818 }, { "epoch": 0.6032613246253036, "grad_norm": 0.26654157042503357, "learning_rate": 0.00018073150614832104, "loss": 11.6584, "step": 28819 }, { "epoch": 0.6032822573892657, "grad_norm": 0.32416775822639465, "learning_rate": 0.0001807302122699734, "loss": 11.6573, "step": 28820 }, { "epoch": 0.6033031901532279, "grad_norm": 0.31306859850883484, "learning_rate": 0.0001807289183528171, "loss": 11.6675, "step": 28821 }, { "epoch": 0.60332412291719, "grad_norm": 0.2581245005130768, "learning_rate": 0.00018072762439685282, "loss": 11.6677, "step": 28822 }, { "epoch": 0.6033450556811522, "grad_norm": 0.32596448063850403, "learning_rate": 0.00018072633040208116, "loss": 11.6653, "step": 28823 }, { "epoch": 0.6033659884451142, "grad_norm": 0.24672871828079224, "learning_rate": 0.0001807250363685027, "loss": 11.6662, "step": 28824 }, { "epoch": 0.6033869212090764, "grad_norm": 0.32996469736099243, "learning_rate": 0.00018072374229611814, "loss": 11.6613, "step": 28825 }, { "epoch": 0.6034078539730386, "grad_norm": 0.2897647023200989, "learning_rate": 0.00018072244818492802, "loss": 11.6594, "step": 28826 }, { "epoch": 0.6034287867370007, "grad_norm": 0.291037380695343, "learning_rate": 0.000180721154034933, "loss": 11.6684, "step": 28827 }, { "epoch": 0.6034497195009629, "grad_norm": 0.3573436737060547, "learning_rate": 0.00018071985984613375, "loss": 11.6761, "step": 28828 }, { "epoch": 0.603470652264925, "grad_norm": 0.26847365498542786, "learning_rate": 0.0001807185656185308, "loss": 11.6565, "step": 28829 }, { "epoch": 0.6034915850288872, "grad_norm": 0.3598124086856842, "learning_rate": 0.00018071727135212485, "loss": 11.6807, "step": 28830 }, { "epoch": 0.6035125177928494, "grad_norm": 0.2778489291667938, "learning_rate": 0.00018071597704691648, "loss": 11.6958, "step": 28831 }, { "epoch": 0.6035334505568115, "grad_norm": 0.25535857677459717, "learning_rate": 0.0001807146827029063, "loss": 11.659, "step": 28832 }, { "epoch": 0.6035543833207737, "grad_norm": 0.28608840703964233, "learning_rate": 0.00018071338832009498, "loss": 11.6726, "step": 28833 }, { "epoch": 0.6035753160847358, "grad_norm": 0.2479052096605301, "learning_rate": 0.00018071209389848312, "loss": 11.6659, "step": 28834 }, { "epoch": 0.603596248848698, "grad_norm": 0.29817289113998413, "learning_rate": 0.00018071079943807132, "loss": 11.6763, "step": 28835 }, { "epoch": 0.6036171816126601, "grad_norm": 0.2771068215370178, "learning_rate": 0.00018070950493886024, "loss": 11.6924, "step": 28836 }, { "epoch": 0.6036381143766223, "grad_norm": 0.2780880928039551, "learning_rate": 0.00018070821040085046, "loss": 11.6677, "step": 28837 }, { "epoch": 0.6036590471405845, "grad_norm": 0.31006765365600586, "learning_rate": 0.00018070691582404265, "loss": 11.6581, "step": 28838 }, { "epoch": 0.6036799799045466, "grad_norm": 0.29084694385528564, "learning_rate": 0.0001807056212084374, "loss": 11.6942, "step": 28839 }, { "epoch": 0.6037009126685088, "grad_norm": 0.26935112476348877, "learning_rate": 0.00018070432655403534, "loss": 11.6847, "step": 28840 }, { "epoch": 0.6037218454324709, "grad_norm": 0.329459011554718, "learning_rate": 0.00018070303186083707, "loss": 11.6563, "step": 28841 }, { "epoch": 0.6037427781964331, "grad_norm": 0.29748794436454773, "learning_rate": 0.00018070173712884325, "loss": 11.6575, "step": 28842 }, { "epoch": 0.6037637109603952, "grad_norm": 0.2589670419692993, "learning_rate": 0.0001807004423580545, "loss": 11.6634, "step": 28843 }, { "epoch": 0.6037846437243574, "grad_norm": 0.28599488735198975, "learning_rate": 0.00018069914754847145, "loss": 11.6463, "step": 28844 }, { "epoch": 0.6038055764883196, "grad_norm": 0.3035409450531006, "learning_rate": 0.00018069785270009465, "loss": 11.6798, "step": 28845 }, { "epoch": 0.6038265092522817, "grad_norm": 0.31523531675338745, "learning_rate": 0.0001806965578129248, "loss": 11.6737, "step": 28846 }, { "epoch": 0.6038474420162439, "grad_norm": 0.3024097979068756, "learning_rate": 0.00018069526288696253, "loss": 11.6681, "step": 28847 }, { "epoch": 0.6038683747802059, "grad_norm": 0.3178102970123291, "learning_rate": 0.0001806939679222084, "loss": 11.6753, "step": 28848 }, { "epoch": 0.6038893075441681, "grad_norm": 0.3215993642807007, "learning_rate": 0.00018069267291866305, "loss": 11.6631, "step": 28849 }, { "epoch": 0.6039102403081302, "grad_norm": 0.2983032763004303, "learning_rate": 0.0001806913778763271, "loss": 11.6739, "step": 28850 }, { "epoch": 0.6039311730720924, "grad_norm": 0.3707745671272278, "learning_rate": 0.00018069008279520123, "loss": 11.671, "step": 28851 }, { "epoch": 0.6039521058360546, "grad_norm": 0.28286197781562805, "learning_rate": 0.000180688787675286, "loss": 11.6569, "step": 28852 }, { "epoch": 0.6039730386000167, "grad_norm": 0.26646238565444946, "learning_rate": 0.00018068749251658206, "loss": 11.6605, "step": 28853 }, { "epoch": 0.6039939713639789, "grad_norm": 0.34212952852249146, "learning_rate": 0.00018068619731909002, "loss": 11.6777, "step": 28854 }, { "epoch": 0.604014904127941, "grad_norm": 0.3047997057437897, "learning_rate": 0.00018068490208281052, "loss": 11.6705, "step": 28855 }, { "epoch": 0.6040358368919032, "grad_norm": 0.26542600989341736, "learning_rate": 0.00018068360680774417, "loss": 11.6685, "step": 28856 }, { "epoch": 0.6040567696558654, "grad_norm": 0.2862440049648285, "learning_rate": 0.0001806823114938916, "loss": 11.6797, "step": 28857 }, { "epoch": 0.6040777024198275, "grad_norm": 0.34517207741737366, "learning_rate": 0.00018068101614125337, "loss": 11.6838, "step": 28858 }, { "epoch": 0.6040986351837897, "grad_norm": 0.23814494907855988, "learning_rate": 0.00018067972074983023, "loss": 11.6673, "step": 28859 }, { "epoch": 0.6041195679477518, "grad_norm": 0.3427906930446625, "learning_rate": 0.00018067842531962268, "loss": 11.6662, "step": 28860 }, { "epoch": 0.604140500711714, "grad_norm": 0.2724073827266693, "learning_rate": 0.0001806771298506314, "loss": 11.677, "step": 28861 }, { "epoch": 0.6041614334756761, "grad_norm": 0.3498265743255615, "learning_rate": 0.00018067583434285705, "loss": 11.6683, "step": 28862 }, { "epoch": 0.6041823662396383, "grad_norm": 0.3460313081741333, "learning_rate": 0.00018067453879630016, "loss": 11.6649, "step": 28863 }, { "epoch": 0.6042032990036005, "grad_norm": 0.28243210911750793, "learning_rate": 0.00018067324321096142, "loss": 11.671, "step": 28864 }, { "epoch": 0.6042242317675626, "grad_norm": 0.35365724563598633, "learning_rate": 0.00018067194758684144, "loss": 11.6881, "step": 28865 }, { "epoch": 0.6042451645315248, "grad_norm": 0.30409151315689087, "learning_rate": 0.00018067065192394085, "loss": 11.6723, "step": 28866 }, { "epoch": 0.6042660972954869, "grad_norm": 0.31815487146377563, "learning_rate": 0.00018066935622226024, "loss": 11.6648, "step": 28867 }, { "epoch": 0.6042870300594491, "grad_norm": 0.29301631450653076, "learning_rate": 0.00018066806048180027, "loss": 11.6764, "step": 28868 }, { "epoch": 0.6043079628234111, "grad_norm": 0.2937249541282654, "learning_rate": 0.00018066676470256154, "loss": 11.6572, "step": 28869 }, { "epoch": 0.6043288955873733, "grad_norm": 0.2649131119251251, "learning_rate": 0.00018066546888454465, "loss": 11.6879, "step": 28870 }, { "epoch": 0.6043498283513355, "grad_norm": 0.2953384518623352, "learning_rate": 0.0001806641730277503, "loss": 11.6745, "step": 28871 }, { "epoch": 0.6043707611152976, "grad_norm": 0.34091347455978394, "learning_rate": 0.00018066287713217904, "loss": 11.678, "step": 28872 }, { "epoch": 0.6043916938792598, "grad_norm": 0.22419171035289764, "learning_rate": 0.00018066158119783152, "loss": 11.6739, "step": 28873 }, { "epoch": 0.6044126266432219, "grad_norm": 0.2896282970905304, "learning_rate": 0.0001806602852247084, "loss": 11.6793, "step": 28874 }, { "epoch": 0.6044335594071841, "grad_norm": 0.27189597487449646, "learning_rate": 0.00018065898921281018, "loss": 11.6776, "step": 28875 }, { "epoch": 0.6044544921711463, "grad_norm": 0.276541531085968, "learning_rate": 0.00018065769316213763, "loss": 11.6631, "step": 28876 }, { "epoch": 0.6044754249351084, "grad_norm": 0.2731451094150543, "learning_rate": 0.0001806563970726913, "loss": 11.6659, "step": 28877 }, { "epoch": 0.6044963576990706, "grad_norm": 0.2680985629558563, "learning_rate": 0.00018065510094447183, "loss": 11.6624, "step": 28878 }, { "epoch": 0.6045172904630327, "grad_norm": 0.2923409044742584, "learning_rate": 0.00018065380477747984, "loss": 11.6573, "step": 28879 }, { "epoch": 0.6045382232269949, "grad_norm": 0.26282671093940735, "learning_rate": 0.00018065250857171594, "loss": 11.674, "step": 28880 }, { "epoch": 0.604559155990957, "grad_norm": 0.452962189912796, "learning_rate": 0.00018065121232718076, "loss": 11.6624, "step": 28881 }, { "epoch": 0.6045800887549192, "grad_norm": 0.24352990090847015, "learning_rate": 0.0001806499160438749, "loss": 11.6575, "step": 28882 }, { "epoch": 0.6046010215188814, "grad_norm": 0.32165929675102234, "learning_rate": 0.00018064861972179905, "loss": 11.6651, "step": 28883 }, { "epoch": 0.6046219542828435, "grad_norm": 0.30036500096321106, "learning_rate": 0.00018064732336095377, "loss": 11.6567, "step": 28884 }, { "epoch": 0.6046428870468057, "grad_norm": 0.3780929744243622, "learning_rate": 0.00018064602696133975, "loss": 11.6684, "step": 28885 }, { "epoch": 0.6046638198107678, "grad_norm": 0.307384729385376, "learning_rate": 0.0001806447305229575, "loss": 11.6654, "step": 28886 }, { "epoch": 0.60468475257473, "grad_norm": 0.30104848742485046, "learning_rate": 0.00018064343404580776, "loss": 11.6815, "step": 28887 }, { "epoch": 0.6047056853386921, "grad_norm": 0.29069796204566956, "learning_rate": 0.0001806421375298911, "loss": 11.663, "step": 28888 }, { "epoch": 0.6047266181026543, "grad_norm": 0.36759403347969055, "learning_rate": 0.00018064084097520812, "loss": 11.6674, "step": 28889 }, { "epoch": 0.6047475508666165, "grad_norm": 0.300968199968338, "learning_rate": 0.0001806395443817595, "loss": 11.671, "step": 28890 }, { "epoch": 0.6047684836305786, "grad_norm": 0.31421226263046265, "learning_rate": 0.00018063824774954581, "loss": 11.6584, "step": 28891 }, { "epoch": 0.6047894163945408, "grad_norm": 0.31048697233200073, "learning_rate": 0.0001806369510785677, "loss": 11.6782, "step": 28892 }, { "epoch": 0.6048103491585028, "grad_norm": 0.2570275664329529, "learning_rate": 0.00018063565436882582, "loss": 11.6633, "step": 28893 }, { "epoch": 0.604831281922465, "grad_norm": 0.42299437522888184, "learning_rate": 0.00018063435762032075, "loss": 11.6796, "step": 28894 }, { "epoch": 0.6048522146864272, "grad_norm": 0.3012947142124176, "learning_rate": 0.00018063306083305314, "loss": 11.6822, "step": 28895 }, { "epoch": 0.6048731474503893, "grad_norm": 0.27201321721076965, "learning_rate": 0.0001806317640070236, "loss": 11.6688, "step": 28896 }, { "epoch": 0.6048940802143515, "grad_norm": 0.3646863102912903, "learning_rate": 0.00018063046714223273, "loss": 11.6692, "step": 28897 }, { "epoch": 0.6049150129783136, "grad_norm": 0.31622475385665894, "learning_rate": 0.00018062917023868117, "loss": 11.6805, "step": 28898 }, { "epoch": 0.6049359457422758, "grad_norm": 0.2366877794265747, "learning_rate": 0.0001806278732963696, "loss": 11.6668, "step": 28899 }, { "epoch": 0.6049568785062379, "grad_norm": 1.4559752941131592, "learning_rate": 0.00018062657631529855, "loss": 11.6543, "step": 28900 }, { "epoch": 0.6049778112702001, "grad_norm": 0.2977190613746643, "learning_rate": 0.0001806252792954687, "loss": 11.6634, "step": 28901 }, { "epoch": 0.6049987440341623, "grad_norm": 0.3359912037849426, "learning_rate": 0.0001806239822368807, "loss": 11.683, "step": 28902 }, { "epoch": 0.6050196767981244, "grad_norm": 0.3213925063610077, "learning_rate": 0.0001806226851395351, "loss": 11.6595, "step": 28903 }, { "epoch": 0.6050406095620866, "grad_norm": 0.4077763557434082, "learning_rate": 0.00018062138800343258, "loss": 11.6721, "step": 28904 }, { "epoch": 0.6050615423260487, "grad_norm": 0.30673569440841675, "learning_rate": 0.0001806200908285737, "loss": 11.6743, "step": 28905 }, { "epoch": 0.6050824750900109, "grad_norm": 0.3029080331325531, "learning_rate": 0.00018061879361495916, "loss": 11.6818, "step": 28906 }, { "epoch": 0.605103407853973, "grad_norm": 0.2865736186504364, "learning_rate": 0.00018061749636258956, "loss": 11.6703, "step": 28907 }, { "epoch": 0.6051243406179352, "grad_norm": 0.40764105319976807, "learning_rate": 0.0001806161990714655, "loss": 11.6757, "step": 28908 }, { "epoch": 0.6051452733818974, "grad_norm": 0.3202146291732788, "learning_rate": 0.00018061490174158763, "loss": 11.6726, "step": 28909 }, { "epoch": 0.6051662061458595, "grad_norm": 0.29398059844970703, "learning_rate": 0.00018061360437295656, "loss": 11.6689, "step": 28910 }, { "epoch": 0.6051871389098217, "grad_norm": 0.2870294749736786, "learning_rate": 0.00018061230696557289, "loss": 11.6697, "step": 28911 }, { "epoch": 0.6052080716737838, "grad_norm": 0.24430984258651733, "learning_rate": 0.00018061100951943728, "loss": 11.6808, "step": 28912 }, { "epoch": 0.605229004437746, "grad_norm": 0.32828083634376526, "learning_rate": 0.00018060971203455036, "loss": 11.6542, "step": 28913 }, { "epoch": 0.6052499372017082, "grad_norm": 0.2800746262073517, "learning_rate": 0.00018060841451091273, "loss": 11.6832, "step": 28914 }, { "epoch": 0.6052708699656703, "grad_norm": 0.29229995608329773, "learning_rate": 0.000180607116948525, "loss": 11.6556, "step": 28915 }, { "epoch": 0.6052918027296325, "grad_norm": 0.2728481888771057, "learning_rate": 0.00018060581934738784, "loss": 11.6818, "step": 28916 }, { "epoch": 0.6053127354935945, "grad_norm": 0.25212395191192627, "learning_rate": 0.0001806045217075018, "loss": 11.6582, "step": 28917 }, { "epoch": 0.6053336682575567, "grad_norm": 0.32837459444999695, "learning_rate": 0.00018060322402886763, "loss": 11.6542, "step": 28918 }, { "epoch": 0.6053546010215188, "grad_norm": 0.367829829454422, "learning_rate": 0.0001806019263114858, "loss": 11.6709, "step": 28919 }, { "epoch": 0.605375533785481, "grad_norm": 0.3197570741176605, "learning_rate": 0.00018060062855535704, "loss": 11.6809, "step": 28920 }, { "epoch": 0.6053964665494432, "grad_norm": 0.35260602831840515, "learning_rate": 0.00018059933076048195, "loss": 11.6888, "step": 28921 }, { "epoch": 0.6054173993134053, "grad_norm": 0.27553561329841614, "learning_rate": 0.00018059803292686113, "loss": 11.6764, "step": 28922 }, { "epoch": 0.6054383320773675, "grad_norm": 0.28918033838272095, "learning_rate": 0.0001805967350544952, "loss": 11.6699, "step": 28923 }, { "epoch": 0.6054592648413296, "grad_norm": 0.2524549961090088, "learning_rate": 0.00018059543714338486, "loss": 11.6632, "step": 28924 }, { "epoch": 0.6054801976052918, "grad_norm": 0.297780305147171, "learning_rate": 0.00018059413919353063, "loss": 11.6686, "step": 28925 }, { "epoch": 0.6055011303692539, "grad_norm": 0.35187816619873047, "learning_rate": 0.00018059284120493322, "loss": 11.6784, "step": 28926 }, { "epoch": 0.6055220631332161, "grad_norm": 0.23894725739955902, "learning_rate": 0.0001805915431775932, "loss": 11.6968, "step": 28927 }, { "epoch": 0.6055429958971783, "grad_norm": 0.33393222093582153, "learning_rate": 0.00018059024511151122, "loss": 11.6686, "step": 28928 }, { "epoch": 0.6055639286611404, "grad_norm": 0.26384907960891724, "learning_rate": 0.00018058894700668786, "loss": 11.6741, "step": 28929 }, { "epoch": 0.6055848614251026, "grad_norm": 0.26179739832878113, "learning_rate": 0.0001805876488631238, "loss": 11.6743, "step": 28930 }, { "epoch": 0.6056057941890647, "grad_norm": 0.35968780517578125, "learning_rate": 0.00018058635068081964, "loss": 11.6714, "step": 28931 }, { "epoch": 0.6056267269530269, "grad_norm": 0.29452866315841675, "learning_rate": 0.000180585052459776, "loss": 11.6649, "step": 28932 }, { "epoch": 0.6056476597169891, "grad_norm": 0.3080606758594513, "learning_rate": 0.00018058375419999353, "loss": 11.6573, "step": 28933 }, { "epoch": 0.6056685924809512, "grad_norm": 0.34961050748825073, "learning_rate": 0.00018058245590147284, "loss": 11.588, "step": 28934 }, { "epoch": 0.6056895252449134, "grad_norm": 0.3233341574668884, "learning_rate": 0.00018058115756421452, "loss": 11.667, "step": 28935 }, { "epoch": 0.6057104580088755, "grad_norm": 0.23299837112426758, "learning_rate": 0.00018057985918821924, "loss": 11.6775, "step": 28936 }, { "epoch": 0.6057313907728377, "grad_norm": 0.31833764910697937, "learning_rate": 0.0001805785607734876, "loss": 11.6523, "step": 28937 }, { "epoch": 0.6057523235367998, "grad_norm": 0.38301610946655273, "learning_rate": 0.00018057726232002024, "loss": 11.6736, "step": 28938 }, { "epoch": 0.605773256300762, "grad_norm": 0.3369286358356476, "learning_rate": 0.00018057596382781775, "loss": 11.6913, "step": 28939 }, { "epoch": 0.6057941890647242, "grad_norm": 0.2893311083316803, "learning_rate": 0.00018057466529688081, "loss": 11.6583, "step": 28940 }, { "epoch": 0.6058151218286862, "grad_norm": 0.3370422422885895, "learning_rate": 0.00018057336672721, "loss": 11.6819, "step": 28941 }, { "epoch": 0.6058360545926484, "grad_norm": 0.3126998543739319, "learning_rate": 0.00018057206811880594, "loss": 11.6685, "step": 28942 }, { "epoch": 0.6058569873566105, "grad_norm": 0.24794191122055054, "learning_rate": 0.0001805707694716693, "loss": 11.6641, "step": 28943 }, { "epoch": 0.6058779201205727, "grad_norm": 0.33862173557281494, "learning_rate": 0.00018056947078580067, "loss": 11.6612, "step": 28944 }, { "epoch": 0.6058988528845348, "grad_norm": 0.3184214234352112, "learning_rate": 0.0001805681720612007, "loss": 11.6795, "step": 28945 }, { "epoch": 0.605919785648497, "grad_norm": 0.2940603792667389, "learning_rate": 0.00018056687329786995, "loss": 11.6651, "step": 28946 }, { "epoch": 0.6059407184124592, "grad_norm": 0.24116192758083344, "learning_rate": 0.0001805655744958091, "loss": 11.6931, "step": 28947 }, { "epoch": 0.6059616511764213, "grad_norm": 0.38534170389175415, "learning_rate": 0.00018056427565501878, "loss": 11.6969, "step": 28948 }, { "epoch": 0.6059825839403835, "grad_norm": 0.28306692838668823, "learning_rate": 0.0001805629767754996, "loss": 11.6878, "step": 28949 }, { "epoch": 0.6060035167043456, "grad_norm": 0.2350887656211853, "learning_rate": 0.0001805616778572522, "loss": 11.6626, "step": 28950 }, { "epoch": 0.6060244494683078, "grad_norm": 0.3904291093349457, "learning_rate": 0.00018056037890027714, "loss": 11.6618, "step": 28951 }, { "epoch": 0.60604538223227, "grad_norm": 0.2533101439476013, "learning_rate": 0.0001805590799045751, "loss": 11.6624, "step": 28952 }, { "epoch": 0.6060663149962321, "grad_norm": 0.3612068295478821, "learning_rate": 0.00018055778087014673, "loss": 11.6797, "step": 28953 }, { "epoch": 0.6060872477601943, "grad_norm": 0.3633798062801361, "learning_rate": 0.0001805564817969926, "loss": 11.683, "step": 28954 }, { "epoch": 0.6061081805241564, "grad_norm": 0.2837614417076111, "learning_rate": 0.00018055518268511334, "loss": 11.676, "step": 28955 }, { "epoch": 0.6061291132881186, "grad_norm": 0.2591998279094696, "learning_rate": 0.00018055388353450962, "loss": 11.6724, "step": 28956 }, { "epoch": 0.6061500460520807, "grad_norm": 0.3264812231063843, "learning_rate": 0.00018055258434518203, "loss": 11.6799, "step": 28957 }, { "epoch": 0.6061709788160429, "grad_norm": 0.32338640093803406, "learning_rate": 0.00018055128511713117, "loss": 11.6715, "step": 28958 }, { "epoch": 0.6061919115800051, "grad_norm": 0.3907114565372467, "learning_rate": 0.0001805499858503577, "loss": 11.6686, "step": 28959 }, { "epoch": 0.6062128443439672, "grad_norm": 0.31573954224586487, "learning_rate": 0.00018054868654486227, "loss": 11.672, "step": 28960 }, { "epoch": 0.6062337771079294, "grad_norm": 0.2579463720321655, "learning_rate": 0.00018054738720064547, "loss": 11.6681, "step": 28961 }, { "epoch": 0.6062547098718915, "grad_norm": 0.2862057685852051, "learning_rate": 0.00018054608781770787, "loss": 11.6676, "step": 28962 }, { "epoch": 0.6062756426358537, "grad_norm": 0.2723329961299896, "learning_rate": 0.0001805447883960502, "loss": 11.6807, "step": 28963 }, { "epoch": 0.6062965753998157, "grad_norm": 0.25938352942466736, "learning_rate": 0.00018054348893567303, "loss": 11.655, "step": 28964 }, { "epoch": 0.6063175081637779, "grad_norm": 0.26298901438713074, "learning_rate": 0.000180542189436577, "loss": 11.6748, "step": 28965 }, { "epoch": 0.6063384409277401, "grad_norm": 0.3340655565261841, "learning_rate": 0.0001805408898987627, "loss": 11.6724, "step": 28966 }, { "epoch": 0.6063593736917022, "grad_norm": 0.3062632977962494, "learning_rate": 0.0001805395903222308, "loss": 11.6813, "step": 28967 }, { "epoch": 0.6063803064556644, "grad_norm": 0.3201155960559845, "learning_rate": 0.00018053829070698187, "loss": 11.6751, "step": 28968 }, { "epoch": 0.6064012392196265, "grad_norm": 0.32494908571243286, "learning_rate": 0.00018053699105301664, "loss": 11.6775, "step": 28969 }, { "epoch": 0.6064221719835887, "grad_norm": 0.2714140713214874, "learning_rate": 0.0001805356913603356, "loss": 11.6588, "step": 28970 }, { "epoch": 0.6064431047475509, "grad_norm": 0.3572394847869873, "learning_rate": 0.00018053439162893945, "loss": 11.6765, "step": 28971 }, { "epoch": 0.606464037511513, "grad_norm": 0.24347326159477234, "learning_rate": 0.00018053309185882884, "loss": 11.6796, "step": 28972 }, { "epoch": 0.6064849702754752, "grad_norm": 0.3414818346500397, "learning_rate": 0.0001805317920500043, "loss": 11.6869, "step": 28973 }, { "epoch": 0.6065059030394373, "grad_norm": 0.3044593930244446, "learning_rate": 0.00018053049220246656, "loss": 11.6698, "step": 28974 }, { "epoch": 0.6065268358033995, "grad_norm": 0.3101104199886322, "learning_rate": 0.0001805291923162162, "loss": 11.6712, "step": 28975 }, { "epoch": 0.6065477685673616, "grad_norm": 0.3141631484031677, "learning_rate": 0.00018052789239125382, "loss": 11.6679, "step": 28976 }, { "epoch": 0.6065687013313238, "grad_norm": 0.24234861135482788, "learning_rate": 0.00018052659242758008, "loss": 11.6599, "step": 28977 }, { "epoch": 0.606589634095286, "grad_norm": 0.24574504792690277, "learning_rate": 0.0001805252924251956, "loss": 11.6757, "step": 28978 }, { "epoch": 0.6066105668592481, "grad_norm": 0.2809376120567322, "learning_rate": 0.00018052399238410095, "loss": 11.6619, "step": 28979 }, { "epoch": 0.6066314996232103, "grad_norm": 0.3140377104282379, "learning_rate": 0.00018052269230429686, "loss": 11.6767, "step": 28980 }, { "epoch": 0.6066524323871724, "grad_norm": 0.269197940826416, "learning_rate": 0.00018052139218578388, "loss": 11.6758, "step": 28981 }, { "epoch": 0.6066733651511346, "grad_norm": 0.28886741399765015, "learning_rate": 0.00018052009202856263, "loss": 11.6649, "step": 28982 }, { "epoch": 0.6066942979150967, "grad_norm": 0.3105522692203522, "learning_rate": 0.00018051879183263379, "loss": 11.669, "step": 28983 }, { "epoch": 0.6067152306790589, "grad_norm": 0.3307565152645111, "learning_rate": 0.00018051749159799795, "loss": 11.6702, "step": 28984 }, { "epoch": 0.6067361634430211, "grad_norm": 0.3095945715904236, "learning_rate": 0.00018051619132465572, "loss": 11.6658, "step": 28985 }, { "epoch": 0.6067570962069831, "grad_norm": 0.34695225954055786, "learning_rate": 0.00018051489101260777, "loss": 11.6773, "step": 28986 }, { "epoch": 0.6067780289709453, "grad_norm": 0.2995108664035797, "learning_rate": 0.00018051359066185467, "loss": 11.651, "step": 28987 }, { "epoch": 0.6067989617349074, "grad_norm": 0.38005465269088745, "learning_rate": 0.0001805122902723971, "loss": 11.6639, "step": 28988 }, { "epoch": 0.6068198944988696, "grad_norm": 0.27952903509140015, "learning_rate": 0.00018051098984423565, "loss": 11.6533, "step": 28989 }, { "epoch": 0.6068408272628318, "grad_norm": 0.27899715304374695, "learning_rate": 0.00018050968937737095, "loss": 11.6569, "step": 28990 }, { "epoch": 0.6068617600267939, "grad_norm": 0.31491610407829285, "learning_rate": 0.00018050838887180361, "loss": 11.6699, "step": 28991 }, { "epoch": 0.6068826927907561, "grad_norm": 0.2572330832481384, "learning_rate": 0.0001805070883275343, "loss": 11.659, "step": 28992 }, { "epoch": 0.6069036255547182, "grad_norm": 0.3205922245979309, "learning_rate": 0.00018050578774456358, "loss": 11.6656, "step": 28993 }, { "epoch": 0.6069245583186804, "grad_norm": 0.279154896736145, "learning_rate": 0.00018050448712289216, "loss": 11.6725, "step": 28994 }, { "epoch": 0.6069454910826425, "grad_norm": 0.31774863600730896, "learning_rate": 0.00018050318646252062, "loss": 11.6831, "step": 28995 }, { "epoch": 0.6069664238466047, "grad_norm": 0.26968392729759216, "learning_rate": 0.00018050188576344955, "loss": 11.6592, "step": 28996 }, { "epoch": 0.6069873566105669, "grad_norm": 0.3859344720840454, "learning_rate": 0.00018050058502567967, "loss": 11.6678, "step": 28997 }, { "epoch": 0.607008289374529, "grad_norm": 0.28254234790802, "learning_rate": 0.00018049928424921147, "loss": 11.6739, "step": 28998 }, { "epoch": 0.6070292221384912, "grad_norm": 0.25337544083595276, "learning_rate": 0.0001804979834340457, "loss": 11.6686, "step": 28999 }, { "epoch": 0.6070501549024533, "grad_norm": 0.29731178283691406, "learning_rate": 0.00018049668258018293, "loss": 11.6607, "step": 29000 }, { "epoch": 0.6070501549024533, "eval_loss": 11.671394348144531, "eval_runtime": 34.2708, "eval_samples_per_second": 28.041, "eval_steps_per_second": 7.032, "step": 29000 }, { "epoch": 0.6070710876664155, "grad_norm": 0.2830931842327118, "learning_rate": 0.00018049538168762375, "loss": 11.6737, "step": 29001 }, { "epoch": 0.6070920204303776, "grad_norm": 0.2711764872074127, "learning_rate": 0.0001804940807563689, "loss": 11.673, "step": 29002 }, { "epoch": 0.6071129531943398, "grad_norm": 0.37934428453445435, "learning_rate": 0.00018049277978641887, "loss": 11.6655, "step": 29003 }, { "epoch": 0.607133885958302, "grad_norm": 0.3188604712486267, "learning_rate": 0.0001804914787777744, "loss": 11.673, "step": 29004 }, { "epoch": 0.6071548187222641, "grad_norm": 0.26852884888648987, "learning_rate": 0.00018049017773043603, "loss": 11.6635, "step": 29005 }, { "epoch": 0.6071757514862263, "grad_norm": 0.3336018919944763, "learning_rate": 0.00018048887664440442, "loss": 11.6793, "step": 29006 }, { "epoch": 0.6071966842501884, "grad_norm": 0.28657349944114685, "learning_rate": 0.00018048757551968018, "loss": 11.6708, "step": 29007 }, { "epoch": 0.6072176170141506, "grad_norm": 0.30886030197143555, "learning_rate": 0.00018048627435626396, "loss": 11.6749, "step": 29008 }, { "epoch": 0.6072385497781128, "grad_norm": 0.26655593514442444, "learning_rate": 0.0001804849731541564, "loss": 11.6636, "step": 29009 }, { "epoch": 0.6072594825420748, "grad_norm": 0.33812835812568665, "learning_rate": 0.00018048367191335807, "loss": 11.6722, "step": 29010 }, { "epoch": 0.607280415306037, "grad_norm": 0.2753235101699829, "learning_rate": 0.00018048237063386963, "loss": 11.6624, "step": 29011 }, { "epoch": 0.6073013480699991, "grad_norm": 0.36416855454444885, "learning_rate": 0.00018048106931569172, "loss": 11.6868, "step": 29012 }, { "epoch": 0.6073222808339613, "grad_norm": 0.3957310914993286, "learning_rate": 0.00018047976795882494, "loss": 11.6718, "step": 29013 }, { "epoch": 0.6073432135979234, "grad_norm": 0.2747720181941986, "learning_rate": 0.00018047846656326993, "loss": 11.6745, "step": 29014 }, { "epoch": 0.6073641463618856, "grad_norm": 0.2753203809261322, "learning_rate": 0.0001804771651290273, "loss": 11.6859, "step": 29015 }, { "epoch": 0.6073850791258478, "grad_norm": 0.258024662733078, "learning_rate": 0.00018047586365609768, "loss": 11.6555, "step": 29016 }, { "epoch": 0.6074060118898099, "grad_norm": 0.24792933464050293, "learning_rate": 0.0001804745621444817, "loss": 11.6568, "step": 29017 }, { "epoch": 0.6074269446537721, "grad_norm": 0.3605121076107025, "learning_rate": 0.00018047326059418, "loss": 11.661, "step": 29018 }, { "epoch": 0.6074478774177342, "grad_norm": 0.26026439666748047, "learning_rate": 0.00018047195900519316, "loss": 11.6737, "step": 29019 }, { "epoch": 0.6074688101816964, "grad_norm": 0.25628581643104553, "learning_rate": 0.00018047065737752184, "loss": 11.6666, "step": 29020 }, { "epoch": 0.6074897429456585, "grad_norm": 0.2666756808757782, "learning_rate": 0.0001804693557111667, "loss": 11.6714, "step": 29021 }, { "epoch": 0.6075106757096207, "grad_norm": 0.33508849143981934, "learning_rate": 0.0001804680540061283, "loss": 11.6542, "step": 29022 }, { "epoch": 0.6075316084735829, "grad_norm": 0.27225053310394287, "learning_rate": 0.0001804667522624073, "loss": 11.6735, "step": 29023 }, { "epoch": 0.607552541237545, "grad_norm": 0.3265136778354645, "learning_rate": 0.0001804654504800043, "loss": 11.6718, "step": 29024 }, { "epoch": 0.6075734740015072, "grad_norm": 0.27604246139526367, "learning_rate": 0.00018046414865892, "loss": 11.662, "step": 29025 }, { "epoch": 0.6075944067654693, "grad_norm": 0.3406979739665985, "learning_rate": 0.0001804628467991549, "loss": 11.6781, "step": 29026 }, { "epoch": 0.6076153395294315, "grad_norm": 0.39205360412597656, "learning_rate": 0.00018046154490070974, "loss": 11.6793, "step": 29027 }, { "epoch": 0.6076362722933936, "grad_norm": 0.254489004611969, "learning_rate": 0.0001804602429635851, "loss": 11.6726, "step": 29028 }, { "epoch": 0.6076572050573558, "grad_norm": 0.35720863938331604, "learning_rate": 0.0001804589409877816, "loss": 11.6888, "step": 29029 }, { "epoch": 0.607678137821318, "grad_norm": 0.313211590051651, "learning_rate": 0.0001804576389732999, "loss": 11.6823, "step": 29030 }, { "epoch": 0.60769907058528, "grad_norm": 0.2576882541179657, "learning_rate": 0.00018045633692014056, "loss": 11.6715, "step": 29031 }, { "epoch": 0.6077200033492423, "grad_norm": 0.2747231423854828, "learning_rate": 0.00018045503482830427, "loss": 11.6628, "step": 29032 }, { "epoch": 0.6077409361132043, "grad_norm": 0.252449631690979, "learning_rate": 0.00018045373269779161, "loss": 11.6611, "step": 29033 }, { "epoch": 0.6077618688771665, "grad_norm": 0.28556984663009644, "learning_rate": 0.00018045243052860325, "loss": 11.6397, "step": 29034 }, { "epoch": 0.6077828016411287, "grad_norm": 0.23959983885288239, "learning_rate": 0.0001804511283207398, "loss": 11.6667, "step": 29035 }, { "epoch": 0.6078037344050908, "grad_norm": 0.22181673347949982, "learning_rate": 0.00018044982607420186, "loss": 11.671, "step": 29036 }, { "epoch": 0.607824667169053, "grad_norm": 0.30714669823646545, "learning_rate": 0.0001804485237889901, "loss": 11.6641, "step": 29037 }, { "epoch": 0.6078455999330151, "grad_norm": 0.3383381962776184, "learning_rate": 0.0001804472214651051, "loss": 11.6731, "step": 29038 }, { "epoch": 0.6078665326969773, "grad_norm": 0.25562959909439087, "learning_rate": 0.0001804459191025475, "loss": 11.672, "step": 29039 }, { "epoch": 0.6078874654609394, "grad_norm": 0.2875928580760956, "learning_rate": 0.00018044461670131798, "loss": 11.6893, "step": 29040 }, { "epoch": 0.6079083982249016, "grad_norm": 0.29655930399894714, "learning_rate": 0.00018044331426141707, "loss": 11.6731, "step": 29041 }, { "epoch": 0.6079293309888638, "grad_norm": 0.32055073976516724, "learning_rate": 0.00018044201178284548, "loss": 11.6643, "step": 29042 }, { "epoch": 0.6079502637528259, "grad_norm": 0.40271124243736267, "learning_rate": 0.0001804407092656038, "loss": 11.6777, "step": 29043 }, { "epoch": 0.6079711965167881, "grad_norm": 0.24871784448623657, "learning_rate": 0.00018043940670969266, "loss": 11.6585, "step": 29044 }, { "epoch": 0.6079921292807502, "grad_norm": 0.29603347182273865, "learning_rate": 0.00018043810411511266, "loss": 11.6825, "step": 29045 }, { "epoch": 0.6080130620447124, "grad_norm": 0.3340916931629181, "learning_rate": 0.00018043680148186448, "loss": 11.6701, "step": 29046 }, { "epoch": 0.6080339948086745, "grad_norm": 0.24201065301895142, "learning_rate": 0.00018043549880994867, "loss": 11.6728, "step": 29047 }, { "epoch": 0.6080549275726367, "grad_norm": 0.31766703724861145, "learning_rate": 0.00018043419609936595, "loss": 11.6772, "step": 29048 }, { "epoch": 0.6080758603365989, "grad_norm": 0.41382187604904175, "learning_rate": 0.0001804328933501169, "loss": 11.6506, "step": 29049 }, { "epoch": 0.608096793100561, "grad_norm": 0.3019620478153229, "learning_rate": 0.00018043159056220214, "loss": 11.6624, "step": 29050 }, { "epoch": 0.6081177258645232, "grad_norm": 0.2819548547267914, "learning_rate": 0.00018043028773562228, "loss": 11.6489, "step": 29051 }, { "epoch": 0.6081386586284853, "grad_norm": 0.24789611995220184, "learning_rate": 0.00018042898487037802, "loss": 11.6603, "step": 29052 }, { "epoch": 0.6081595913924475, "grad_norm": 0.36459171772003174, "learning_rate": 0.00018042768196646988, "loss": 11.6698, "step": 29053 }, { "epoch": 0.6081805241564097, "grad_norm": 0.3068912625312805, "learning_rate": 0.00018042637902389855, "loss": 11.6771, "step": 29054 }, { "epoch": 0.6082014569203718, "grad_norm": 0.34637874364852905, "learning_rate": 0.00018042507604266469, "loss": 11.6687, "step": 29055 }, { "epoch": 0.608222389684334, "grad_norm": 0.2670249938964844, "learning_rate": 0.00018042377302276882, "loss": 11.6731, "step": 29056 }, { "epoch": 0.608243322448296, "grad_norm": 0.33220431208610535, "learning_rate": 0.00018042246996421169, "loss": 11.6772, "step": 29057 }, { "epoch": 0.6082642552122582, "grad_norm": 0.27740123867988586, "learning_rate": 0.00018042116686699383, "loss": 11.67, "step": 29058 }, { "epoch": 0.6082851879762203, "grad_norm": 0.3202895522117615, "learning_rate": 0.00018041986373111592, "loss": 11.6715, "step": 29059 }, { "epoch": 0.6083061207401825, "grad_norm": 0.2760983109474182, "learning_rate": 0.00018041856055657856, "loss": 11.6667, "step": 29060 }, { "epoch": 0.6083270535041447, "grad_norm": 0.303122878074646, "learning_rate": 0.0001804172573433824, "loss": 11.6567, "step": 29061 }, { "epoch": 0.6083479862681068, "grad_norm": 0.2848863899707794, "learning_rate": 0.00018041595409152805, "loss": 11.6808, "step": 29062 }, { "epoch": 0.608368919032069, "grad_norm": 0.35120469331741333, "learning_rate": 0.00018041465080101613, "loss": 11.6623, "step": 29063 }, { "epoch": 0.6083898517960311, "grad_norm": 0.29252099990844727, "learning_rate": 0.00018041334747184728, "loss": 11.6694, "step": 29064 }, { "epoch": 0.6084107845599933, "grad_norm": 0.2573765516281128, "learning_rate": 0.00018041204410402213, "loss": 11.6646, "step": 29065 }, { "epoch": 0.6084317173239554, "grad_norm": 0.30867576599121094, "learning_rate": 0.0001804107406975413, "loss": 11.6648, "step": 29066 }, { "epoch": 0.6084526500879176, "grad_norm": 0.25907984375953674, "learning_rate": 0.0001804094372524054, "loss": 11.6642, "step": 29067 }, { "epoch": 0.6084735828518798, "grad_norm": 0.2833942174911499, "learning_rate": 0.00018040813376861507, "loss": 11.6685, "step": 29068 }, { "epoch": 0.6084945156158419, "grad_norm": 0.25812312960624695, "learning_rate": 0.00018040683024617095, "loss": 11.6662, "step": 29069 }, { "epoch": 0.6085154483798041, "grad_norm": 0.30428481101989746, "learning_rate": 0.00018040552668507366, "loss": 11.6616, "step": 29070 }, { "epoch": 0.6085363811437662, "grad_norm": 0.2661360204219818, "learning_rate": 0.00018040422308532384, "loss": 11.679, "step": 29071 }, { "epoch": 0.6085573139077284, "grad_norm": 0.3103254735469818, "learning_rate": 0.00018040291944692205, "loss": 11.6703, "step": 29072 }, { "epoch": 0.6085782466716906, "grad_norm": 0.36238938570022583, "learning_rate": 0.00018040161576986897, "loss": 11.6733, "step": 29073 }, { "epoch": 0.6085991794356527, "grad_norm": 0.2685077488422394, "learning_rate": 0.00018040031205416524, "loss": 11.6811, "step": 29074 }, { "epoch": 0.6086201121996149, "grad_norm": 0.24126115441322327, "learning_rate": 0.0001803990082998115, "loss": 11.6786, "step": 29075 }, { "epoch": 0.608641044963577, "grad_norm": 0.2966304421424866, "learning_rate": 0.0001803977045068083, "loss": 11.6827, "step": 29076 }, { "epoch": 0.6086619777275392, "grad_norm": 0.3274761438369751, "learning_rate": 0.00018039640067515634, "loss": 11.6573, "step": 29077 }, { "epoch": 0.6086829104915013, "grad_norm": 0.2855737805366516, "learning_rate": 0.0001803950968048562, "loss": 11.6847, "step": 29078 }, { "epoch": 0.6087038432554635, "grad_norm": 0.27630579471588135, "learning_rate": 0.0001803937928959085, "loss": 11.6768, "step": 29079 }, { "epoch": 0.6087247760194257, "grad_norm": 0.26510316133499146, "learning_rate": 0.00018039248894831394, "loss": 11.6607, "step": 29080 }, { "epoch": 0.6087457087833877, "grad_norm": 0.34526118636131287, "learning_rate": 0.0001803911849620731, "loss": 11.68, "step": 29081 }, { "epoch": 0.6087666415473499, "grad_norm": 0.2857648730278015, "learning_rate": 0.00018038988093718657, "loss": 11.6535, "step": 29082 }, { "epoch": 0.608787574311312, "grad_norm": 0.3995233476161957, "learning_rate": 0.00018038857687365507, "loss": 11.6797, "step": 29083 }, { "epoch": 0.6088085070752742, "grad_norm": 0.2611304223537445, "learning_rate": 0.00018038727277147913, "loss": 11.6704, "step": 29084 }, { "epoch": 0.6088294398392363, "grad_norm": 0.3239997327327728, "learning_rate": 0.0001803859686306594, "loss": 11.6682, "step": 29085 }, { "epoch": 0.6088503726031985, "grad_norm": 0.2736451029777527, "learning_rate": 0.00018038466445119658, "loss": 11.6664, "step": 29086 }, { "epoch": 0.6088713053671607, "grad_norm": 0.25406745076179504, "learning_rate": 0.00018038336023309118, "loss": 11.665, "step": 29087 }, { "epoch": 0.6088922381311228, "grad_norm": 0.26079049706459045, "learning_rate": 0.00018038205597634393, "loss": 11.6776, "step": 29088 }, { "epoch": 0.608913170895085, "grad_norm": 0.2278718799352646, "learning_rate": 0.00018038075168095539, "loss": 11.6549, "step": 29089 }, { "epoch": 0.6089341036590471, "grad_norm": 0.3115677833557129, "learning_rate": 0.00018037944734692623, "loss": 11.6819, "step": 29090 }, { "epoch": 0.6089550364230093, "grad_norm": 0.2830953598022461, "learning_rate": 0.00018037814297425703, "loss": 11.6856, "step": 29091 }, { "epoch": 0.6089759691869715, "grad_norm": 0.22717082500457764, "learning_rate": 0.00018037683856294846, "loss": 11.6772, "step": 29092 }, { "epoch": 0.6089969019509336, "grad_norm": 0.28643175959587097, "learning_rate": 0.00018037553411300118, "loss": 11.6622, "step": 29093 }, { "epoch": 0.6090178347148958, "grad_norm": 0.3033691644668579, "learning_rate": 0.0001803742296244157, "loss": 11.6526, "step": 29094 }, { "epoch": 0.6090387674788579, "grad_norm": 0.3304215967655182, "learning_rate": 0.00018037292509719274, "loss": 11.6811, "step": 29095 }, { "epoch": 0.6090597002428201, "grad_norm": 0.28270116448402405, "learning_rate": 0.0001803716205313329, "loss": 11.6828, "step": 29096 }, { "epoch": 0.6090806330067822, "grad_norm": 0.2587914764881134, "learning_rate": 0.00018037031592683684, "loss": 11.6651, "step": 29097 }, { "epoch": 0.6091015657707444, "grad_norm": 0.37425878643989563, "learning_rate": 0.00018036901128370514, "loss": 11.6765, "step": 29098 }, { "epoch": 0.6091224985347066, "grad_norm": 0.26952019333839417, "learning_rate": 0.00018036770660193844, "loss": 11.6678, "step": 29099 }, { "epoch": 0.6091434312986687, "grad_norm": 0.21827274560928345, "learning_rate": 0.00018036640188153738, "loss": 11.6675, "step": 29100 }, { "epoch": 0.6091643640626309, "grad_norm": 0.31188878417015076, "learning_rate": 0.00018036509712250257, "loss": 11.6732, "step": 29101 }, { "epoch": 0.609185296826593, "grad_norm": 0.35556066036224365, "learning_rate": 0.00018036379232483466, "loss": 11.6884, "step": 29102 }, { "epoch": 0.6092062295905551, "grad_norm": 0.3437131941318512, "learning_rate": 0.00018036248748853426, "loss": 11.6583, "step": 29103 }, { "epoch": 0.6092271623545172, "grad_norm": 0.2686083912849426, "learning_rate": 0.00018036118261360203, "loss": 11.6774, "step": 29104 }, { "epoch": 0.6092480951184794, "grad_norm": 0.3770143389701843, "learning_rate": 0.00018035987770003852, "loss": 11.6686, "step": 29105 }, { "epoch": 0.6092690278824416, "grad_norm": 0.28339752554893494, "learning_rate": 0.00018035857274784441, "loss": 11.6633, "step": 29106 }, { "epoch": 0.6092899606464037, "grad_norm": 0.2358640879392624, "learning_rate": 0.00018035726775702035, "loss": 11.6904, "step": 29107 }, { "epoch": 0.6093108934103659, "grad_norm": 0.3069658577442169, "learning_rate": 0.00018035596272756693, "loss": 11.6667, "step": 29108 }, { "epoch": 0.609331826174328, "grad_norm": 0.30254387855529785, "learning_rate": 0.00018035465765948481, "loss": 11.6743, "step": 29109 }, { "epoch": 0.6093527589382902, "grad_norm": 0.2835174798965454, "learning_rate": 0.00018035335255277456, "loss": 11.6622, "step": 29110 }, { "epoch": 0.6093736917022524, "grad_norm": 0.25397270917892456, "learning_rate": 0.00018035204740743686, "loss": 11.6683, "step": 29111 }, { "epoch": 0.6093946244662145, "grad_norm": 0.3880603313446045, "learning_rate": 0.00018035074222347232, "loss": 11.6744, "step": 29112 }, { "epoch": 0.6094155572301767, "grad_norm": 0.24501115083694458, "learning_rate": 0.00018034943700088158, "loss": 11.6776, "step": 29113 }, { "epoch": 0.6094364899941388, "grad_norm": 0.2920706272125244, "learning_rate": 0.00018034813173966523, "loss": 11.6694, "step": 29114 }, { "epoch": 0.609457422758101, "grad_norm": 0.30792689323425293, "learning_rate": 0.00018034682643982395, "loss": 11.6697, "step": 29115 }, { "epoch": 0.6094783555220631, "grad_norm": 0.35814371705055237, "learning_rate": 0.00018034552110135832, "loss": 11.6912, "step": 29116 }, { "epoch": 0.6094992882860253, "grad_norm": 0.2888129949569702, "learning_rate": 0.000180344215724269, "loss": 11.6543, "step": 29117 }, { "epoch": 0.6095202210499875, "grad_norm": 0.5071660876274109, "learning_rate": 0.0001803429103085566, "loss": 11.6789, "step": 29118 }, { "epoch": 0.6095411538139496, "grad_norm": 0.2660254240036011, "learning_rate": 0.00018034160485422174, "loss": 11.6793, "step": 29119 }, { "epoch": 0.6095620865779118, "grad_norm": 0.24979671835899353, "learning_rate": 0.00018034029936126508, "loss": 11.663, "step": 29120 }, { "epoch": 0.6095830193418739, "grad_norm": 0.2874416410923004, "learning_rate": 0.0001803389938296872, "loss": 11.6733, "step": 29121 }, { "epoch": 0.6096039521058361, "grad_norm": 0.30782783031463623, "learning_rate": 0.0001803376882594888, "loss": 11.677, "step": 29122 }, { "epoch": 0.6096248848697982, "grad_norm": 0.2890620231628418, "learning_rate": 0.00018033638265067044, "loss": 11.6757, "step": 29123 }, { "epoch": 0.6096458176337604, "grad_norm": 0.26591572165489197, "learning_rate": 0.00018033507700323276, "loss": 11.6801, "step": 29124 }, { "epoch": 0.6096667503977226, "grad_norm": 0.2777033746242523, "learning_rate": 0.0001803337713171764, "loss": 11.6728, "step": 29125 }, { "epoch": 0.6096876831616846, "grad_norm": 0.2772868573665619, "learning_rate": 0.000180332465592502, "loss": 11.6834, "step": 29126 }, { "epoch": 0.6097086159256468, "grad_norm": 0.2460399866104126, "learning_rate": 0.0001803311598292102, "loss": 11.663, "step": 29127 }, { "epoch": 0.6097295486896089, "grad_norm": 0.3612476587295532, "learning_rate": 0.00018032985402730156, "loss": 11.6724, "step": 29128 }, { "epoch": 0.6097504814535711, "grad_norm": 0.2661297023296356, "learning_rate": 0.00018032854818677673, "loss": 11.6657, "step": 29129 }, { "epoch": 0.6097714142175333, "grad_norm": 0.2795746624469757, "learning_rate": 0.0001803272423076364, "loss": 11.6843, "step": 29130 }, { "epoch": 0.6097923469814954, "grad_norm": 0.2786979079246521, "learning_rate": 0.00018032593638988114, "loss": 11.6755, "step": 29131 }, { "epoch": 0.6098132797454576, "grad_norm": 0.2871825695037842, "learning_rate": 0.0001803246304335116, "loss": 11.6679, "step": 29132 }, { "epoch": 0.6098342125094197, "grad_norm": 0.253283828496933, "learning_rate": 0.0001803233244385284, "loss": 11.6553, "step": 29133 }, { "epoch": 0.6098551452733819, "grad_norm": 0.26593777537345886, "learning_rate": 0.00018032201840493216, "loss": 11.6691, "step": 29134 }, { "epoch": 0.609876078037344, "grad_norm": 0.2769433259963989, "learning_rate": 0.00018032071233272352, "loss": 11.6581, "step": 29135 }, { "epoch": 0.6098970108013062, "grad_norm": 0.31205686926841736, "learning_rate": 0.0001803194062219031, "loss": 11.6768, "step": 29136 }, { "epoch": 0.6099179435652684, "grad_norm": 0.3253454267978668, "learning_rate": 0.00018031810007247155, "loss": 11.6696, "step": 29137 }, { "epoch": 0.6099388763292305, "grad_norm": 0.3437407314777374, "learning_rate": 0.00018031679388442947, "loss": 11.6741, "step": 29138 }, { "epoch": 0.6099598090931927, "grad_norm": 0.2508736550807953, "learning_rate": 0.00018031548765777748, "loss": 11.6723, "step": 29139 }, { "epoch": 0.6099807418571548, "grad_norm": 0.2968374490737915, "learning_rate": 0.00018031418139251624, "loss": 11.6655, "step": 29140 }, { "epoch": 0.610001674621117, "grad_norm": 0.3948540985584259, "learning_rate": 0.00018031287508864637, "loss": 11.6601, "step": 29141 }, { "epoch": 0.6100226073850791, "grad_norm": 0.3389834463596344, "learning_rate": 0.0001803115687461685, "loss": 11.6776, "step": 29142 }, { "epoch": 0.6100435401490413, "grad_norm": 0.2758890390396118, "learning_rate": 0.0001803102623650832, "loss": 11.6646, "step": 29143 }, { "epoch": 0.6100644729130035, "grad_norm": 0.28888770937919617, "learning_rate": 0.0001803089559453912, "loss": 11.6666, "step": 29144 }, { "epoch": 0.6100854056769656, "grad_norm": 0.2873833179473877, "learning_rate": 0.00018030764948709303, "loss": 11.6809, "step": 29145 }, { "epoch": 0.6101063384409278, "grad_norm": 0.3342306911945343, "learning_rate": 0.0001803063429901894, "loss": 11.6573, "step": 29146 }, { "epoch": 0.6101272712048899, "grad_norm": 0.32571858167648315, "learning_rate": 0.0001803050364546809, "loss": 11.6898, "step": 29147 }, { "epoch": 0.610148203968852, "grad_norm": 0.2990415096282959, "learning_rate": 0.00018030372988056813, "loss": 11.6626, "step": 29148 }, { "epoch": 0.6101691367328143, "grad_norm": 0.2932584583759308, "learning_rate": 0.00018030242326785175, "loss": 11.6766, "step": 29149 }, { "epoch": 0.6101900694967763, "grad_norm": 0.2986612021923065, "learning_rate": 0.00018030111661653244, "loss": 11.6584, "step": 29150 }, { "epoch": 0.6102110022607385, "grad_norm": 0.31258347630500793, "learning_rate": 0.00018029980992661073, "loss": 11.6805, "step": 29151 }, { "epoch": 0.6102319350247006, "grad_norm": 0.30617237091064453, "learning_rate": 0.0001802985031980873, "loss": 11.6717, "step": 29152 }, { "epoch": 0.6102528677886628, "grad_norm": 0.2665625512599945, "learning_rate": 0.00018029719643096278, "loss": 11.6654, "step": 29153 }, { "epoch": 0.6102738005526249, "grad_norm": 0.27276700735092163, "learning_rate": 0.00018029588962523778, "loss": 11.6791, "step": 29154 }, { "epoch": 0.6102947333165871, "grad_norm": 0.2705787122249603, "learning_rate": 0.00018029458278091292, "loss": 11.6717, "step": 29155 }, { "epoch": 0.6103156660805493, "grad_norm": 0.316630095243454, "learning_rate": 0.00018029327589798885, "loss": 11.6694, "step": 29156 }, { "epoch": 0.6103365988445114, "grad_norm": 0.32036465406417847, "learning_rate": 0.00018029196897646622, "loss": 11.6558, "step": 29157 }, { "epoch": 0.6103575316084736, "grad_norm": 0.31642964482307434, "learning_rate": 0.0001802906620163456, "loss": 11.6813, "step": 29158 }, { "epoch": 0.6103784643724357, "grad_norm": 0.2508378326892853, "learning_rate": 0.00018028935501762766, "loss": 11.6726, "step": 29159 }, { "epoch": 0.6103993971363979, "grad_norm": 0.21864403784275055, "learning_rate": 0.00018028804798031302, "loss": 11.6753, "step": 29160 }, { "epoch": 0.61042032990036, "grad_norm": 0.2689926326274872, "learning_rate": 0.00018028674090440233, "loss": 11.6525, "step": 29161 }, { "epoch": 0.6104412626643222, "grad_norm": 0.31533196568489075, "learning_rate": 0.00018028543378989617, "loss": 11.6685, "step": 29162 }, { "epoch": 0.6104621954282844, "grad_norm": 0.33918771147727966, "learning_rate": 0.00018028412663679522, "loss": 11.676, "step": 29163 }, { "epoch": 0.6104831281922465, "grad_norm": 0.3557019829750061, "learning_rate": 0.00018028281944510002, "loss": 11.6812, "step": 29164 }, { "epoch": 0.6105040609562087, "grad_norm": 0.2976568639278412, "learning_rate": 0.00018028151221481133, "loss": 11.6928, "step": 29165 }, { "epoch": 0.6105249937201708, "grad_norm": 0.28251636028289795, "learning_rate": 0.00018028020494592964, "loss": 11.6631, "step": 29166 }, { "epoch": 0.610545926484133, "grad_norm": 0.2851802110671997, "learning_rate": 0.0001802788976384557, "loss": 11.6573, "step": 29167 }, { "epoch": 0.6105668592480952, "grad_norm": 0.27844494581222534, "learning_rate": 0.00018027759029239006, "loss": 11.6512, "step": 29168 }, { "epoch": 0.6105877920120573, "grad_norm": 0.3112965226173401, "learning_rate": 0.0001802762829077334, "loss": 11.682, "step": 29169 }, { "epoch": 0.6106087247760195, "grad_norm": 0.2904647886753082, "learning_rate": 0.0001802749754844863, "loss": 11.6726, "step": 29170 }, { "epoch": 0.6106296575399816, "grad_norm": 0.3018099367618561, "learning_rate": 0.00018027366802264943, "loss": 11.6638, "step": 29171 }, { "epoch": 0.6106505903039438, "grad_norm": 0.33694392442703247, "learning_rate": 0.00018027236052222336, "loss": 11.6547, "step": 29172 }, { "epoch": 0.6106715230679058, "grad_norm": 0.29408547282218933, "learning_rate": 0.0001802710529832088, "loss": 11.6666, "step": 29173 }, { "epoch": 0.610692455831868, "grad_norm": 0.2828902304172516, "learning_rate": 0.00018026974540560632, "loss": 11.6751, "step": 29174 }, { "epoch": 0.6107133885958302, "grad_norm": 0.26273757219314575, "learning_rate": 0.00018026843778941656, "loss": 11.6725, "step": 29175 }, { "epoch": 0.6107343213597923, "grad_norm": 0.25064000487327576, "learning_rate": 0.00018026713013464014, "loss": 11.6693, "step": 29176 }, { "epoch": 0.6107552541237545, "grad_norm": 0.3470951020717621, "learning_rate": 0.00018026582244127772, "loss": 11.6715, "step": 29177 }, { "epoch": 0.6107761868877166, "grad_norm": 0.23525762557983398, "learning_rate": 0.0001802645147093299, "loss": 11.6527, "step": 29178 }, { "epoch": 0.6107971196516788, "grad_norm": 0.2853413224220276, "learning_rate": 0.00018026320693879732, "loss": 11.6803, "step": 29179 }, { "epoch": 0.6108180524156409, "grad_norm": 0.29162538051605225, "learning_rate": 0.00018026189912968062, "loss": 11.6765, "step": 29180 }, { "epoch": 0.6108389851796031, "grad_norm": 0.31223735213279724, "learning_rate": 0.0001802605912819804, "loss": 11.6859, "step": 29181 }, { "epoch": 0.6108599179435653, "grad_norm": 0.27455687522888184, "learning_rate": 0.00018025928339569734, "loss": 11.6758, "step": 29182 }, { "epoch": 0.6108808507075274, "grad_norm": 0.2687690556049347, "learning_rate": 0.000180257975470832, "loss": 11.6687, "step": 29183 }, { "epoch": 0.6109017834714896, "grad_norm": 0.24253128468990326, "learning_rate": 0.00018025666750738506, "loss": 11.6703, "step": 29184 }, { "epoch": 0.6109227162354517, "grad_norm": 0.35864928364753723, "learning_rate": 0.0001802553595053571, "loss": 11.6665, "step": 29185 }, { "epoch": 0.6109436489994139, "grad_norm": 0.3517068326473236, "learning_rate": 0.00018025405146474883, "loss": 11.6924, "step": 29186 }, { "epoch": 0.6109645817633761, "grad_norm": 0.3331557810306549, "learning_rate": 0.00018025274338556079, "loss": 11.6737, "step": 29187 }, { "epoch": 0.6109855145273382, "grad_norm": 0.3328295052051544, "learning_rate": 0.00018025143526779366, "loss": 11.68, "step": 29188 }, { "epoch": 0.6110064472913004, "grad_norm": 0.31588801741600037, "learning_rate": 0.00018025012711144807, "loss": 11.6679, "step": 29189 }, { "epoch": 0.6110273800552625, "grad_norm": 0.5324674248695374, "learning_rate": 0.00018024881891652464, "loss": 11.5801, "step": 29190 }, { "epoch": 0.6110483128192247, "grad_norm": 0.3640165328979492, "learning_rate": 0.00018024751068302397, "loss": 11.6609, "step": 29191 }, { "epoch": 0.6110692455831868, "grad_norm": 0.3445865213871002, "learning_rate": 0.0001802462024109467, "loss": 11.67, "step": 29192 }, { "epoch": 0.611090178347149, "grad_norm": 0.3223375082015991, "learning_rate": 0.00018024489410029354, "loss": 11.6625, "step": 29193 }, { "epoch": 0.6111111111111112, "grad_norm": 0.25122392177581787, "learning_rate": 0.000180243585751065, "loss": 11.6733, "step": 29194 }, { "epoch": 0.6111320438750732, "grad_norm": 0.32420065999031067, "learning_rate": 0.00018024227736326175, "loss": 11.6857, "step": 29195 }, { "epoch": 0.6111529766390355, "grad_norm": 0.34700900316238403, "learning_rate": 0.00018024096893688447, "loss": 11.6762, "step": 29196 }, { "epoch": 0.6111739094029975, "grad_norm": 0.34822139143943787, "learning_rate": 0.00018023966047193372, "loss": 11.677, "step": 29197 }, { "epoch": 0.6111948421669597, "grad_norm": 0.2858024835586548, "learning_rate": 0.00018023835196841014, "loss": 11.6604, "step": 29198 }, { "epoch": 0.6112157749309218, "grad_norm": 0.30084460973739624, "learning_rate": 0.0001802370434263144, "loss": 11.6787, "step": 29199 }, { "epoch": 0.611236707694884, "grad_norm": 0.29839006066322327, "learning_rate": 0.0001802357348456471, "loss": 11.6562, "step": 29200 }, { "epoch": 0.6112576404588462, "grad_norm": 0.2789502441883087, "learning_rate": 0.00018023442622640889, "loss": 11.6595, "step": 29201 }, { "epoch": 0.6112785732228083, "grad_norm": 0.26130273938179016, "learning_rate": 0.00018023311756860038, "loss": 11.6689, "step": 29202 }, { "epoch": 0.6112995059867705, "grad_norm": 0.22762396931648254, "learning_rate": 0.00018023180887222218, "loss": 11.6724, "step": 29203 }, { "epoch": 0.6113204387507326, "grad_norm": 0.3027462363243103, "learning_rate": 0.00018023050013727497, "loss": 11.6771, "step": 29204 }, { "epoch": 0.6113413715146948, "grad_norm": 0.3395785689353943, "learning_rate": 0.0001802291913637593, "loss": 11.6808, "step": 29205 }, { "epoch": 0.6113623042786569, "grad_norm": 0.36778730154037476, "learning_rate": 0.0001802278825516759, "loss": 11.6656, "step": 29206 }, { "epoch": 0.6113832370426191, "grad_norm": 0.27574607729911804, "learning_rate": 0.00018022657370102534, "loss": 11.6723, "step": 29207 }, { "epoch": 0.6114041698065813, "grad_norm": 0.27802422642707825, "learning_rate": 0.00018022526481180825, "loss": 11.6812, "step": 29208 }, { "epoch": 0.6114251025705434, "grad_norm": 0.25980517268180847, "learning_rate": 0.00018022395588402528, "loss": 11.6543, "step": 29209 }, { "epoch": 0.6114460353345056, "grad_norm": 0.2960191071033478, "learning_rate": 0.00018022264691767702, "loss": 11.6684, "step": 29210 }, { "epoch": 0.6114669680984677, "grad_norm": 0.2761476933956146, "learning_rate": 0.00018022133791276414, "loss": 11.6715, "step": 29211 }, { "epoch": 0.6114879008624299, "grad_norm": 0.3462374806404114, "learning_rate": 0.00018022002886928728, "loss": 11.672, "step": 29212 }, { "epoch": 0.6115088336263921, "grad_norm": 0.23013976216316223, "learning_rate": 0.000180218719787247, "loss": 11.6785, "step": 29213 }, { "epoch": 0.6115297663903542, "grad_norm": 0.27704644203186035, "learning_rate": 0.00018021741066664401, "loss": 11.6526, "step": 29214 }, { "epoch": 0.6115506991543164, "grad_norm": 0.2667319178581238, "learning_rate": 0.0001802161015074789, "loss": 11.6886, "step": 29215 }, { "epoch": 0.6115716319182785, "grad_norm": 0.34881821274757385, "learning_rate": 0.00018021479230975228, "loss": 11.6583, "step": 29216 }, { "epoch": 0.6115925646822407, "grad_norm": 0.304817259311676, "learning_rate": 0.0001802134830734648, "loss": 11.6766, "step": 29217 }, { "epoch": 0.6116134974462027, "grad_norm": 0.2873540222644806, "learning_rate": 0.00018021217379861711, "loss": 11.6462, "step": 29218 }, { "epoch": 0.611634430210165, "grad_norm": 0.3016832172870636, "learning_rate": 0.00018021086448520983, "loss": 11.6856, "step": 29219 }, { "epoch": 0.6116553629741271, "grad_norm": 0.34768494963645935, "learning_rate": 0.00018020955513324357, "loss": 11.6811, "step": 29220 }, { "epoch": 0.6116762957380892, "grad_norm": 0.3361659646034241, "learning_rate": 0.00018020824574271896, "loss": 11.6538, "step": 29221 }, { "epoch": 0.6116972285020514, "grad_norm": 0.28009822964668274, "learning_rate": 0.00018020693631363662, "loss": 11.6726, "step": 29222 }, { "epoch": 0.6117181612660135, "grad_norm": 0.34315168857574463, "learning_rate": 0.00018020562684599724, "loss": 11.6816, "step": 29223 }, { "epoch": 0.6117390940299757, "grad_norm": 0.2938397228717804, "learning_rate": 0.0001802043173398014, "loss": 11.673, "step": 29224 }, { "epoch": 0.6117600267939378, "grad_norm": 0.3379738926887512, "learning_rate": 0.00018020300779504972, "loss": 11.6707, "step": 29225 }, { "epoch": 0.6117809595579, "grad_norm": 0.2835243046283722, "learning_rate": 0.00018020169821174285, "loss": 11.676, "step": 29226 }, { "epoch": 0.6118018923218622, "grad_norm": 0.405822217464447, "learning_rate": 0.00018020038858988143, "loss": 11.6656, "step": 29227 }, { "epoch": 0.6118228250858243, "grad_norm": 0.3115258514881134, "learning_rate": 0.00018019907892946605, "loss": 11.6511, "step": 29228 }, { "epoch": 0.6118437578497865, "grad_norm": 0.26236093044281006, "learning_rate": 0.00018019776923049738, "loss": 11.6533, "step": 29229 }, { "epoch": 0.6118646906137486, "grad_norm": 0.3013412058353424, "learning_rate": 0.00018019645949297606, "loss": 11.6754, "step": 29230 }, { "epoch": 0.6118856233777108, "grad_norm": 0.3117363154888153, "learning_rate": 0.00018019514971690269, "loss": 11.6655, "step": 29231 }, { "epoch": 0.611906556141673, "grad_norm": 0.29661497473716736, "learning_rate": 0.00018019383990227787, "loss": 11.6798, "step": 29232 }, { "epoch": 0.6119274889056351, "grad_norm": 0.2553299367427826, "learning_rate": 0.00018019253004910226, "loss": 11.6583, "step": 29233 }, { "epoch": 0.6119484216695973, "grad_norm": 0.26452872157096863, "learning_rate": 0.00018019122015737652, "loss": 11.6927, "step": 29234 }, { "epoch": 0.6119693544335594, "grad_norm": 0.308649480342865, "learning_rate": 0.00018018991022710124, "loss": 11.6609, "step": 29235 }, { "epoch": 0.6119902871975216, "grad_norm": 0.28156381845474243, "learning_rate": 0.00018018860025827707, "loss": 11.6795, "step": 29236 }, { "epoch": 0.6120112199614837, "grad_norm": 0.2568727135658264, "learning_rate": 0.00018018729025090464, "loss": 11.6866, "step": 29237 }, { "epoch": 0.6120321527254459, "grad_norm": 0.25404155254364014, "learning_rate": 0.00018018598020498456, "loss": 11.6783, "step": 29238 }, { "epoch": 0.6120530854894081, "grad_norm": 0.28525590896606445, "learning_rate": 0.0001801846701205175, "loss": 11.6599, "step": 29239 }, { "epoch": 0.6120740182533702, "grad_norm": 0.2850922644138336, "learning_rate": 0.00018018335999750402, "loss": 11.6533, "step": 29240 }, { "epoch": 0.6120949510173324, "grad_norm": 0.33560335636138916, "learning_rate": 0.0001801820498359448, "loss": 11.6791, "step": 29241 }, { "epoch": 0.6121158837812944, "grad_norm": 0.27574971318244934, "learning_rate": 0.0001801807396358405, "loss": 11.6666, "step": 29242 }, { "epoch": 0.6121368165452566, "grad_norm": 0.30935609340667725, "learning_rate": 0.00018017942939719167, "loss": 11.6574, "step": 29243 }, { "epoch": 0.6121577493092187, "grad_norm": 0.25695154070854187, "learning_rate": 0.00018017811911999902, "loss": 11.6626, "step": 29244 }, { "epoch": 0.6121786820731809, "grad_norm": 0.33696043491363525, "learning_rate": 0.0001801768088042631, "loss": 11.6697, "step": 29245 }, { "epoch": 0.6121996148371431, "grad_norm": 0.35685181617736816, "learning_rate": 0.0001801754984499846, "loss": 11.6849, "step": 29246 }, { "epoch": 0.6122205476011052, "grad_norm": 0.2545253038406372, "learning_rate": 0.00018017418805716415, "loss": 11.6867, "step": 29247 }, { "epoch": 0.6122414803650674, "grad_norm": 0.3084602653980255, "learning_rate": 0.00018017287762580236, "loss": 11.6665, "step": 29248 }, { "epoch": 0.6122624131290295, "grad_norm": 0.27641695737838745, "learning_rate": 0.00018017156715589982, "loss": 11.6734, "step": 29249 }, { "epoch": 0.6122833458929917, "grad_norm": 0.34151777625083923, "learning_rate": 0.00018017025664745722, "loss": 11.6804, "step": 29250 }, { "epoch": 0.6123042786569539, "grad_norm": 0.261951208114624, "learning_rate": 0.0001801689461004752, "loss": 11.6824, "step": 29251 }, { "epoch": 0.612325211420916, "grad_norm": 0.36452463269233704, "learning_rate": 0.0001801676355149543, "loss": 11.6494, "step": 29252 }, { "epoch": 0.6123461441848782, "grad_norm": 0.25336724519729614, "learning_rate": 0.00018016632489089527, "loss": 11.674, "step": 29253 }, { "epoch": 0.6123670769488403, "grad_norm": 0.2749536335468292, "learning_rate": 0.00018016501422829867, "loss": 11.6553, "step": 29254 }, { "epoch": 0.6123880097128025, "grad_norm": 0.31972771883010864, "learning_rate": 0.00018016370352716513, "loss": 11.6818, "step": 29255 }, { "epoch": 0.6124089424767646, "grad_norm": 0.31090444326400757, "learning_rate": 0.00018016239278749528, "loss": 11.6614, "step": 29256 }, { "epoch": 0.6124298752407268, "grad_norm": 0.3230038583278656, "learning_rate": 0.0001801610820092898, "loss": 11.6672, "step": 29257 }, { "epoch": 0.612450808004689, "grad_norm": 0.325947642326355, "learning_rate": 0.0001801597711925492, "loss": 11.683, "step": 29258 }, { "epoch": 0.6124717407686511, "grad_norm": 0.25645288825035095, "learning_rate": 0.00018015846033727427, "loss": 11.6741, "step": 29259 }, { "epoch": 0.6124926735326133, "grad_norm": 0.2864416539669037, "learning_rate": 0.00018015714944346554, "loss": 11.6683, "step": 29260 }, { "epoch": 0.6125136062965754, "grad_norm": 0.27118000388145447, "learning_rate": 0.00018015583851112367, "loss": 11.6636, "step": 29261 }, { "epoch": 0.6125345390605376, "grad_norm": 0.28130123019218445, "learning_rate": 0.00018015452754024925, "loss": 11.6715, "step": 29262 }, { "epoch": 0.6125554718244997, "grad_norm": 0.2906540036201477, "learning_rate": 0.00018015321653084296, "loss": 11.6717, "step": 29263 }, { "epoch": 0.6125764045884619, "grad_norm": 0.3262532353401184, "learning_rate": 0.0001801519054829054, "loss": 11.6753, "step": 29264 }, { "epoch": 0.612597337352424, "grad_norm": 0.30199748277664185, "learning_rate": 0.00018015059439643728, "loss": 11.6724, "step": 29265 }, { "epoch": 0.6126182701163861, "grad_norm": 0.3977579176425934, "learning_rate": 0.0001801492832714391, "loss": 11.6557, "step": 29266 }, { "epoch": 0.6126392028803483, "grad_norm": 0.3537325859069824, "learning_rate": 0.00018014797210791155, "loss": 11.6665, "step": 29267 }, { "epoch": 0.6126601356443104, "grad_norm": 0.28613170981407166, "learning_rate": 0.00018014666090585528, "loss": 11.6846, "step": 29268 }, { "epoch": 0.6126810684082726, "grad_norm": 0.3112333118915558, "learning_rate": 0.0001801453496652709, "loss": 11.6662, "step": 29269 }, { "epoch": 0.6127020011722348, "grad_norm": 0.30547621846199036, "learning_rate": 0.00018014403838615905, "loss": 11.6631, "step": 29270 }, { "epoch": 0.6127229339361969, "grad_norm": 0.27467891573905945, "learning_rate": 0.00018014272706852035, "loss": 11.665, "step": 29271 }, { "epoch": 0.6127438667001591, "grad_norm": 0.24493885040283203, "learning_rate": 0.00018014141571235544, "loss": 11.6767, "step": 29272 }, { "epoch": 0.6127647994641212, "grad_norm": 0.25619786977767944, "learning_rate": 0.00018014010431766493, "loss": 11.677, "step": 29273 }, { "epoch": 0.6127857322280834, "grad_norm": 0.2991085350513458, "learning_rate": 0.0001801387928844495, "loss": 11.6702, "step": 29274 }, { "epoch": 0.6128066649920455, "grad_norm": 0.3435728847980499, "learning_rate": 0.0001801374814127097, "loss": 11.6752, "step": 29275 }, { "epoch": 0.6128275977560077, "grad_norm": 0.23346100747585297, "learning_rate": 0.00018013616990244624, "loss": 11.6817, "step": 29276 }, { "epoch": 0.6128485305199699, "grad_norm": 0.28887662291526794, "learning_rate": 0.0001801348583536597, "loss": 11.6642, "step": 29277 }, { "epoch": 0.612869463283932, "grad_norm": 0.37256813049316406, "learning_rate": 0.00018013354676635075, "loss": 11.6831, "step": 29278 }, { "epoch": 0.6128903960478942, "grad_norm": 0.2800058424472809, "learning_rate": 0.00018013223514051999, "loss": 11.6768, "step": 29279 }, { "epoch": 0.6129113288118563, "grad_norm": 0.2708491086959839, "learning_rate": 0.00018013092347616804, "loss": 11.6696, "step": 29280 }, { "epoch": 0.6129322615758185, "grad_norm": 1.2198102474212646, "learning_rate": 0.00018012961177329558, "loss": 11.7003, "step": 29281 }, { "epoch": 0.6129531943397806, "grad_norm": 0.31339919567108154, "learning_rate": 0.00018012830003190318, "loss": 11.6765, "step": 29282 }, { "epoch": 0.6129741271037428, "grad_norm": 0.2752748131752014, "learning_rate": 0.00018012698825199151, "loss": 11.6659, "step": 29283 }, { "epoch": 0.612995059867705, "grad_norm": 0.21574726700782776, "learning_rate": 0.00018012567643356118, "loss": 11.6617, "step": 29284 }, { "epoch": 0.6130159926316671, "grad_norm": 0.3249354660511017, "learning_rate": 0.00018012436457661286, "loss": 11.6696, "step": 29285 }, { "epoch": 0.6130369253956293, "grad_norm": 0.32075896859169006, "learning_rate": 0.00018012305268114712, "loss": 11.6722, "step": 29286 }, { "epoch": 0.6130578581595914, "grad_norm": 0.2563842236995697, "learning_rate": 0.00018012174074716466, "loss": 11.6889, "step": 29287 }, { "epoch": 0.6130787909235536, "grad_norm": 0.3011834919452667, "learning_rate": 0.00018012042877466603, "loss": 11.6698, "step": 29288 }, { "epoch": 0.6130997236875158, "grad_norm": 0.34197747707366943, "learning_rate": 0.00018011911676365193, "loss": 11.6603, "step": 29289 }, { "epoch": 0.6131206564514778, "grad_norm": 0.26119464635849, "learning_rate": 0.00018011780471412298, "loss": 11.6716, "step": 29290 }, { "epoch": 0.61314158921544, "grad_norm": 0.255863219499588, "learning_rate": 0.00018011649262607976, "loss": 11.6601, "step": 29291 }, { "epoch": 0.6131625219794021, "grad_norm": 0.28262966871261597, "learning_rate": 0.00018011518049952297, "loss": 11.6712, "step": 29292 }, { "epoch": 0.6131834547433643, "grad_norm": 0.35917818546295166, "learning_rate": 0.00018011386833445317, "loss": 11.6683, "step": 29293 }, { "epoch": 0.6132043875073264, "grad_norm": 0.2647744417190552, "learning_rate": 0.00018011255613087105, "loss": 11.6753, "step": 29294 }, { "epoch": 0.6132253202712886, "grad_norm": 0.3222469687461853, "learning_rate": 0.00018011124388877723, "loss": 11.6635, "step": 29295 }, { "epoch": 0.6132462530352508, "grad_norm": 0.25831249356269836, "learning_rate": 0.00018010993160817233, "loss": 11.6734, "step": 29296 }, { "epoch": 0.6132671857992129, "grad_norm": 0.3039326071739197, "learning_rate": 0.00018010861928905697, "loss": 11.6912, "step": 29297 }, { "epoch": 0.6132881185631751, "grad_norm": 0.2781144678592682, "learning_rate": 0.00018010730693143176, "loss": 11.6578, "step": 29298 }, { "epoch": 0.6133090513271372, "grad_norm": 0.33388248085975647, "learning_rate": 0.00018010599453529742, "loss": 11.685, "step": 29299 }, { "epoch": 0.6133299840910994, "grad_norm": 0.3614389896392822, "learning_rate": 0.0001801046821006545, "loss": 11.6803, "step": 29300 }, { "epoch": 0.6133509168550615, "grad_norm": 0.2347000390291214, "learning_rate": 0.00018010336962750363, "loss": 11.6706, "step": 29301 }, { "epoch": 0.6133718496190237, "grad_norm": 0.2675008475780487, "learning_rate": 0.0001801020571158455, "loss": 11.6823, "step": 29302 }, { "epoch": 0.6133927823829859, "grad_norm": 0.26841336488723755, "learning_rate": 0.00018010074456568066, "loss": 11.6644, "step": 29303 }, { "epoch": 0.613413715146948, "grad_norm": 0.2690148651599884, "learning_rate": 0.00018009943197700982, "loss": 11.6623, "step": 29304 }, { "epoch": 0.6134346479109102, "grad_norm": 0.2922370433807373, "learning_rate": 0.00018009811934983357, "loss": 11.6842, "step": 29305 }, { "epoch": 0.6134555806748723, "grad_norm": 0.33120259642601013, "learning_rate": 0.00018009680668415256, "loss": 11.6838, "step": 29306 }, { "epoch": 0.6134765134388345, "grad_norm": 0.5010079741477966, "learning_rate": 0.0001800954939799674, "loss": 11.6818, "step": 29307 }, { "epoch": 0.6134974462027967, "grad_norm": 0.26902633905410767, "learning_rate": 0.00018009418123727874, "loss": 11.6612, "step": 29308 }, { "epoch": 0.6135183789667588, "grad_norm": 0.3631366193294525, "learning_rate": 0.0001800928684560872, "loss": 11.665, "step": 29309 }, { "epoch": 0.613539311730721, "grad_norm": 0.2910965085029602, "learning_rate": 0.0001800915556363934, "loss": 11.6699, "step": 29310 }, { "epoch": 0.613560244494683, "grad_norm": 0.34081918001174927, "learning_rate": 0.00018009024277819798, "loss": 11.704, "step": 29311 }, { "epoch": 0.6135811772586452, "grad_norm": 0.3322720527648926, "learning_rate": 0.00018008892988150162, "loss": 11.6621, "step": 29312 }, { "epoch": 0.6136021100226073, "grad_norm": 0.3101505935192108, "learning_rate": 0.00018008761694630486, "loss": 11.6861, "step": 29313 }, { "epoch": 0.6136230427865695, "grad_norm": 0.33830156922340393, "learning_rate": 0.0001800863039726084, "loss": 11.6811, "step": 29314 }, { "epoch": 0.6136439755505317, "grad_norm": 0.3311016261577606, "learning_rate": 0.00018008499096041285, "loss": 11.6662, "step": 29315 }, { "epoch": 0.6136649083144938, "grad_norm": 0.2630840837955475, "learning_rate": 0.0001800836779097188, "loss": 11.6907, "step": 29316 }, { "epoch": 0.613685841078456, "grad_norm": 0.26607948541641235, "learning_rate": 0.00018008236482052698, "loss": 11.6587, "step": 29317 }, { "epoch": 0.6137067738424181, "grad_norm": 0.2700503468513489, "learning_rate": 0.00018008105169283793, "loss": 11.6724, "step": 29318 }, { "epoch": 0.6137277066063803, "grad_norm": 0.28945037722587585, "learning_rate": 0.00018007973852665233, "loss": 11.6837, "step": 29319 }, { "epoch": 0.6137486393703424, "grad_norm": 0.39088964462280273, "learning_rate": 0.00018007842532197078, "loss": 11.6664, "step": 29320 }, { "epoch": 0.6137695721343046, "grad_norm": 0.2661033272743225, "learning_rate": 0.00018007711207879392, "loss": 11.6832, "step": 29321 }, { "epoch": 0.6137905048982668, "grad_norm": 0.2683276832103729, "learning_rate": 0.0001800757987971224, "loss": 11.6863, "step": 29322 }, { "epoch": 0.6138114376622289, "grad_norm": 0.33613985776901245, "learning_rate": 0.00018007448547695685, "loss": 11.6658, "step": 29323 }, { "epoch": 0.6138323704261911, "grad_norm": 0.38670989871025085, "learning_rate": 0.00018007317211829785, "loss": 11.6745, "step": 29324 }, { "epoch": 0.6138533031901532, "grad_norm": 0.31581854820251465, "learning_rate": 0.0001800718587211461, "loss": 11.6913, "step": 29325 }, { "epoch": 0.6138742359541154, "grad_norm": 0.2695096731185913, "learning_rate": 0.0001800705452855022, "loss": 11.6562, "step": 29326 }, { "epoch": 0.6138951687180776, "grad_norm": 0.33168500661849976, "learning_rate": 0.0001800692318113668, "loss": 11.6527, "step": 29327 }, { "epoch": 0.6139161014820397, "grad_norm": 0.3290274441242218, "learning_rate": 0.0001800679182987405, "loss": 11.6596, "step": 29328 }, { "epoch": 0.6139370342460019, "grad_norm": 0.25328266620635986, "learning_rate": 0.00018006660474762393, "loss": 11.669, "step": 29329 }, { "epoch": 0.613957967009964, "grad_norm": 0.31237444281578064, "learning_rate": 0.00018006529115801776, "loss": 11.6819, "step": 29330 }, { "epoch": 0.6139788997739262, "grad_norm": 0.2773094177246094, "learning_rate": 0.0001800639775299226, "loss": 11.6722, "step": 29331 }, { "epoch": 0.6139998325378883, "grad_norm": 0.3006284236907959, "learning_rate": 0.00018006266386333907, "loss": 11.6736, "step": 29332 }, { "epoch": 0.6140207653018505, "grad_norm": 0.3151930570602417, "learning_rate": 0.00018006135015826783, "loss": 11.6646, "step": 29333 }, { "epoch": 0.6140416980658127, "grad_norm": 0.3445778787136078, "learning_rate": 0.00018006003641470947, "loss": 11.6589, "step": 29334 }, { "epoch": 0.6140626308297747, "grad_norm": 0.2992752194404602, "learning_rate": 0.00018005872263266465, "loss": 11.6528, "step": 29335 }, { "epoch": 0.614083563593737, "grad_norm": 0.28253135085105896, "learning_rate": 0.000180057408812134, "loss": 11.661, "step": 29336 }, { "epoch": 0.614104496357699, "grad_norm": 0.2751953601837158, "learning_rate": 0.00018005609495311817, "loss": 11.6705, "step": 29337 }, { "epoch": 0.6141254291216612, "grad_norm": 0.24816127121448517, "learning_rate": 0.00018005478105561777, "loss": 11.6532, "step": 29338 }, { "epoch": 0.6141463618856233, "grad_norm": 0.321437269449234, "learning_rate": 0.00018005346711963343, "loss": 11.672, "step": 29339 }, { "epoch": 0.6141672946495855, "grad_norm": 0.2805461585521698, "learning_rate": 0.00018005215314516578, "loss": 11.6713, "step": 29340 }, { "epoch": 0.6141882274135477, "grad_norm": 0.33149343729019165, "learning_rate": 0.00018005083913221544, "loss": 11.6778, "step": 29341 }, { "epoch": 0.6142091601775098, "grad_norm": 0.3051908612251282, "learning_rate": 0.00018004952508078308, "loss": 11.659, "step": 29342 }, { "epoch": 0.614230092941472, "grad_norm": 0.28266602754592896, "learning_rate": 0.0001800482109908693, "loss": 11.6574, "step": 29343 }, { "epoch": 0.6142510257054341, "grad_norm": 0.38039734959602356, "learning_rate": 0.00018004689686247473, "loss": 11.6665, "step": 29344 }, { "epoch": 0.6142719584693963, "grad_norm": 0.23233060538768768, "learning_rate": 0.0001800455826956, "loss": 11.6749, "step": 29345 }, { "epoch": 0.6142928912333585, "grad_norm": 0.36526545882225037, "learning_rate": 0.00018004426849024577, "loss": 11.6586, "step": 29346 }, { "epoch": 0.6143138239973206, "grad_norm": 0.24255770444869995, "learning_rate": 0.00018004295424641268, "loss": 11.6488, "step": 29347 }, { "epoch": 0.6143347567612828, "grad_norm": 0.32147419452667236, "learning_rate": 0.0001800416399641013, "loss": 11.6564, "step": 29348 }, { "epoch": 0.6143556895252449, "grad_norm": 0.32109108567237854, "learning_rate": 0.00018004032564331233, "loss": 11.6601, "step": 29349 }, { "epoch": 0.6143766222892071, "grad_norm": 0.3129954934120178, "learning_rate": 0.00018003901128404634, "loss": 11.6699, "step": 29350 }, { "epoch": 0.6143975550531692, "grad_norm": 0.37343212962150574, "learning_rate": 0.00018003769688630403, "loss": 11.6818, "step": 29351 }, { "epoch": 0.6144184878171314, "grad_norm": 0.2850602865219116, "learning_rate": 0.00018003638245008596, "loss": 11.6853, "step": 29352 }, { "epoch": 0.6144394205810936, "grad_norm": 0.3174290060997009, "learning_rate": 0.00018003506797539283, "loss": 11.6851, "step": 29353 }, { "epoch": 0.6144603533450557, "grad_norm": 0.3713419735431671, "learning_rate": 0.00018003375346222522, "loss": 11.6742, "step": 29354 }, { "epoch": 0.6144812861090179, "grad_norm": 0.31023481488227844, "learning_rate": 0.0001800324389105838, "loss": 11.6766, "step": 29355 }, { "epoch": 0.61450221887298, "grad_norm": 0.2935236394405365, "learning_rate": 0.00018003112432046916, "loss": 11.6752, "step": 29356 }, { "epoch": 0.6145231516369422, "grad_norm": 0.2803960144519806, "learning_rate": 0.00018002980969188196, "loss": 11.7001, "step": 29357 }, { "epoch": 0.6145440844009042, "grad_norm": 0.29447248578071594, "learning_rate": 0.00018002849502482283, "loss": 11.6693, "step": 29358 }, { "epoch": 0.6145650171648664, "grad_norm": 0.3369574248790741, "learning_rate": 0.0001800271803192924, "loss": 11.6735, "step": 29359 }, { "epoch": 0.6145859499288286, "grad_norm": 0.3086584806442261, "learning_rate": 0.0001800258655752913, "loss": 11.676, "step": 29360 }, { "epoch": 0.6146068826927907, "grad_norm": 0.3253638446331024, "learning_rate": 0.00018002455079282016, "loss": 11.6724, "step": 29361 }, { "epoch": 0.6146278154567529, "grad_norm": 0.2734711766242981, "learning_rate": 0.0001800232359718796, "loss": 11.6828, "step": 29362 }, { "epoch": 0.614648748220715, "grad_norm": 0.3324320912361145, "learning_rate": 0.0001800219211124703, "loss": 11.678, "step": 29363 }, { "epoch": 0.6146696809846772, "grad_norm": 0.36255380511283875, "learning_rate": 0.00018002060621459282, "loss": 11.6682, "step": 29364 }, { "epoch": 0.6146906137486394, "grad_norm": 0.3125409483909607, "learning_rate": 0.00018001929127824787, "loss": 11.685, "step": 29365 }, { "epoch": 0.6147115465126015, "grad_norm": 0.33962637186050415, "learning_rate": 0.00018001797630343603, "loss": 11.6722, "step": 29366 }, { "epoch": 0.6147324792765637, "grad_norm": 0.38216716051101685, "learning_rate": 0.00018001666129015793, "loss": 11.677, "step": 29367 }, { "epoch": 0.6147534120405258, "grad_norm": 0.2772352397441864, "learning_rate": 0.00018001534623841422, "loss": 11.6635, "step": 29368 }, { "epoch": 0.614774344804488, "grad_norm": 0.2931053638458252, "learning_rate": 0.00018001403114820556, "loss": 11.6699, "step": 29369 }, { "epoch": 0.6147952775684501, "grad_norm": 0.32317256927490234, "learning_rate": 0.00018001271601953252, "loss": 11.6771, "step": 29370 }, { "epoch": 0.6148162103324123, "grad_norm": 0.3307324945926666, "learning_rate": 0.00018001140085239579, "loss": 11.6735, "step": 29371 }, { "epoch": 0.6148371430963745, "grad_norm": 0.3659643232822418, "learning_rate": 0.00018001008564679592, "loss": 11.7, "step": 29372 }, { "epoch": 0.6148580758603366, "grad_norm": 0.2975345551967621, "learning_rate": 0.00018000877040273366, "loss": 11.6747, "step": 29373 }, { "epoch": 0.6148790086242988, "grad_norm": 0.2883296608924866, "learning_rate": 0.00018000745512020954, "loss": 11.6804, "step": 29374 }, { "epoch": 0.6148999413882609, "grad_norm": 0.29726824164390564, "learning_rate": 0.00018000613979922428, "loss": 11.6708, "step": 29375 }, { "epoch": 0.6149208741522231, "grad_norm": 0.42082318663597107, "learning_rate": 0.00018000482443977843, "loss": 11.6897, "step": 29376 }, { "epoch": 0.6149418069161852, "grad_norm": 0.37081244587898254, "learning_rate": 0.00018000350904187268, "loss": 11.6813, "step": 29377 }, { "epoch": 0.6149627396801474, "grad_norm": 0.272030770778656, "learning_rate": 0.00018000219360550762, "loss": 11.6728, "step": 29378 }, { "epoch": 0.6149836724441096, "grad_norm": 0.3001905381679535, "learning_rate": 0.0001800008781306839, "loss": 11.6779, "step": 29379 }, { "epoch": 0.6150046052080717, "grad_norm": 0.27226126194000244, "learning_rate": 0.00017999956261740215, "loss": 11.6685, "step": 29380 }, { "epoch": 0.6150255379720339, "grad_norm": 0.3255622982978821, "learning_rate": 0.00017999824706566302, "loss": 11.6616, "step": 29381 }, { "epoch": 0.6150464707359959, "grad_norm": 0.2440641075372696, "learning_rate": 0.00017999693147546713, "loss": 11.6667, "step": 29382 }, { "epoch": 0.6150674034999581, "grad_norm": 0.2722042202949524, "learning_rate": 0.0001799956158468151, "loss": 11.6655, "step": 29383 }, { "epoch": 0.6150883362639203, "grad_norm": 0.33686086535453796, "learning_rate": 0.00017999430017970759, "loss": 11.6805, "step": 29384 }, { "epoch": 0.6151092690278824, "grad_norm": 0.2815910279750824, "learning_rate": 0.0001799929844741452, "loss": 11.6511, "step": 29385 }, { "epoch": 0.6151302017918446, "grad_norm": 0.3000098168849945, "learning_rate": 0.00017999166873012857, "loss": 11.6759, "step": 29386 }, { "epoch": 0.6151511345558067, "grad_norm": 0.26553821563720703, "learning_rate": 0.0001799903529476584, "loss": 11.6752, "step": 29387 }, { "epoch": 0.6151720673197689, "grad_norm": 0.29737532138824463, "learning_rate": 0.0001799890371267352, "loss": 11.6712, "step": 29388 }, { "epoch": 0.615193000083731, "grad_norm": 1.1385631561279297, "learning_rate": 0.0001799877212673597, "loss": 11.5894, "step": 29389 }, { "epoch": 0.6152139328476932, "grad_norm": 0.32294440269470215, "learning_rate": 0.00017998640536953252, "loss": 11.6739, "step": 29390 }, { "epoch": 0.6152348656116554, "grad_norm": 0.35078170895576477, "learning_rate": 0.0001799850894332542, "loss": 11.661, "step": 29391 }, { "epoch": 0.6152557983756175, "grad_norm": 0.2690177261829376, "learning_rate": 0.0001799837734585255, "loss": 11.6532, "step": 29392 }, { "epoch": 0.6152767311395797, "grad_norm": 0.25952595472335815, "learning_rate": 0.00017998245744534698, "loss": 11.6723, "step": 29393 }, { "epoch": 0.6152976639035418, "grad_norm": 0.3156585991382599, "learning_rate": 0.0001799811413937193, "loss": 11.6589, "step": 29394 }, { "epoch": 0.615318596667504, "grad_norm": 0.3722859025001526, "learning_rate": 0.0001799798253036431, "loss": 11.6833, "step": 29395 }, { "epoch": 0.6153395294314661, "grad_norm": 0.27525028586387634, "learning_rate": 0.00017997850917511897, "loss": 11.6545, "step": 29396 }, { "epoch": 0.6153604621954283, "grad_norm": 0.2523983120918274, "learning_rate": 0.00017997719300814758, "loss": 11.6684, "step": 29397 }, { "epoch": 0.6153813949593905, "grad_norm": 0.27385783195495605, "learning_rate": 0.0001799758768027295, "loss": 11.6728, "step": 29398 }, { "epoch": 0.6154023277233526, "grad_norm": 0.21529364585876465, "learning_rate": 0.00017997456055886547, "loss": 11.6784, "step": 29399 }, { "epoch": 0.6154232604873148, "grad_norm": 0.26286959648132324, "learning_rate": 0.00017997324427655603, "loss": 11.6647, "step": 29400 }, { "epoch": 0.6154441932512769, "grad_norm": 0.5185980796813965, "learning_rate": 0.00017997192795580192, "loss": 11.6491, "step": 29401 }, { "epoch": 0.6154651260152391, "grad_norm": 0.2802901566028595, "learning_rate": 0.00017997061159660363, "loss": 11.6792, "step": 29402 }, { "epoch": 0.6154860587792012, "grad_norm": 0.30199187994003296, "learning_rate": 0.00017996929519896188, "loss": 11.6766, "step": 29403 }, { "epoch": 0.6155069915431634, "grad_norm": 0.2713487446308136, "learning_rate": 0.00017996797876287729, "loss": 11.6813, "step": 29404 }, { "epoch": 0.6155279243071256, "grad_norm": 0.3314683437347412, "learning_rate": 0.0001799666622883505, "loss": 11.6797, "step": 29405 }, { "epoch": 0.6155488570710876, "grad_norm": 0.3370800018310547, "learning_rate": 0.00017996534577538212, "loss": 11.6805, "step": 29406 }, { "epoch": 0.6155697898350498, "grad_norm": 0.3328307271003723, "learning_rate": 0.00017996402922397278, "loss": 11.6825, "step": 29407 }, { "epoch": 0.6155907225990119, "grad_norm": 0.3367229104042053, "learning_rate": 0.0001799627126341232, "loss": 11.6792, "step": 29408 }, { "epoch": 0.6156116553629741, "grad_norm": 0.24724340438842773, "learning_rate": 0.00017996139600583386, "loss": 11.6689, "step": 29409 }, { "epoch": 0.6156325881269363, "grad_norm": 0.2580682933330536, "learning_rate": 0.0001799600793391055, "loss": 11.6784, "step": 29410 }, { "epoch": 0.6156535208908984, "grad_norm": 0.2913835942745209, "learning_rate": 0.00017995876263393875, "loss": 11.6572, "step": 29411 }, { "epoch": 0.6156744536548606, "grad_norm": 0.4099090099334717, "learning_rate": 0.0001799574458903342, "loss": 11.677, "step": 29412 }, { "epoch": 0.6156953864188227, "grad_norm": 0.2969985008239746, "learning_rate": 0.00017995612910829252, "loss": 11.6846, "step": 29413 }, { "epoch": 0.6157163191827849, "grad_norm": 0.33360037207603455, "learning_rate": 0.0001799548122878143, "loss": 11.6756, "step": 29414 }, { "epoch": 0.615737251946747, "grad_norm": 0.34868380427360535, "learning_rate": 0.00017995349542890022, "loss": 11.6788, "step": 29415 }, { "epoch": 0.6157581847107092, "grad_norm": 0.40787357091903687, "learning_rate": 0.00017995217853155086, "loss": 11.6908, "step": 29416 }, { "epoch": 0.6157791174746714, "grad_norm": 0.31172025203704834, "learning_rate": 0.00017995086159576694, "loss": 11.6689, "step": 29417 }, { "epoch": 0.6158000502386335, "grad_norm": 0.2758369743824005, "learning_rate": 0.00017994954462154897, "loss": 11.6658, "step": 29418 }, { "epoch": 0.6158209830025957, "grad_norm": 0.4166945517063141, "learning_rate": 0.00017994822760889772, "loss": 11.6462, "step": 29419 }, { "epoch": 0.6158419157665578, "grad_norm": 0.2595134675502777, "learning_rate": 0.00017994691055781372, "loss": 11.6949, "step": 29420 }, { "epoch": 0.61586284853052, "grad_norm": 0.28912869095802307, "learning_rate": 0.00017994559346829766, "loss": 11.6455, "step": 29421 }, { "epoch": 0.6158837812944821, "grad_norm": 0.28960752487182617, "learning_rate": 0.00017994427634035015, "loss": 11.6599, "step": 29422 }, { "epoch": 0.6159047140584443, "grad_norm": 0.26420286297798157, "learning_rate": 0.0001799429591739718, "loss": 11.6698, "step": 29423 }, { "epoch": 0.6159256468224065, "grad_norm": 0.2492697834968567, "learning_rate": 0.00017994164196916324, "loss": 11.6862, "step": 29424 }, { "epoch": 0.6159465795863686, "grad_norm": 0.27125802636146545, "learning_rate": 0.0001799403247259252, "loss": 11.6698, "step": 29425 }, { "epoch": 0.6159675123503308, "grad_norm": 0.25742337107658386, "learning_rate": 0.0001799390074442582, "loss": 11.6728, "step": 29426 }, { "epoch": 0.6159884451142928, "grad_norm": 0.2559897303581238, "learning_rate": 0.00017993769012416292, "loss": 11.6667, "step": 29427 }, { "epoch": 0.616009377878255, "grad_norm": 0.3251083195209503, "learning_rate": 0.00017993637276564, "loss": 11.6843, "step": 29428 }, { "epoch": 0.6160303106422172, "grad_norm": 0.3720646798610687, "learning_rate": 0.00017993505536869007, "loss": 11.6763, "step": 29429 }, { "epoch": 0.6160512434061793, "grad_norm": 0.2715996205806732, "learning_rate": 0.00017993373793331375, "loss": 11.6569, "step": 29430 }, { "epoch": 0.6160721761701415, "grad_norm": 0.259546160697937, "learning_rate": 0.0001799324204595117, "loss": 11.6649, "step": 29431 }, { "epoch": 0.6160931089341036, "grad_norm": 0.2982170283794403, "learning_rate": 0.0001799311029472845, "loss": 11.6584, "step": 29432 }, { "epoch": 0.6161140416980658, "grad_norm": 0.317522794008255, "learning_rate": 0.0001799297853966328, "loss": 11.6759, "step": 29433 }, { "epoch": 0.6161349744620279, "grad_norm": 0.24689607322216034, "learning_rate": 0.00017992846780755733, "loss": 11.6603, "step": 29434 }, { "epoch": 0.6161559072259901, "grad_norm": 0.2968995273113251, "learning_rate": 0.00017992715018005857, "loss": 11.6552, "step": 29435 }, { "epoch": 0.6161768399899523, "grad_norm": 0.27574214339256287, "learning_rate": 0.00017992583251413727, "loss": 11.6758, "step": 29436 }, { "epoch": 0.6161977727539144, "grad_norm": 0.2682158052921295, "learning_rate": 0.00017992451480979398, "loss": 11.6533, "step": 29437 }, { "epoch": 0.6162187055178766, "grad_norm": 0.28395846486091614, "learning_rate": 0.0001799231970670294, "loss": 11.6532, "step": 29438 }, { "epoch": 0.6162396382818387, "grad_norm": 0.2990707755088806, "learning_rate": 0.00017992187928584416, "loss": 11.663, "step": 29439 }, { "epoch": 0.6162605710458009, "grad_norm": 0.2932867705821991, "learning_rate": 0.00017992056146623882, "loss": 11.671, "step": 29440 }, { "epoch": 0.616281503809763, "grad_norm": 0.36859893798828125, "learning_rate": 0.0001799192436082141, "loss": 11.6695, "step": 29441 }, { "epoch": 0.6163024365737252, "grad_norm": 0.2942042052745819, "learning_rate": 0.00017991792571177056, "loss": 11.6719, "step": 29442 }, { "epoch": 0.6163233693376874, "grad_norm": 0.26979729533195496, "learning_rate": 0.0001799166077769089, "loss": 11.6734, "step": 29443 }, { "epoch": 0.6163443021016495, "grad_norm": 0.29537463188171387, "learning_rate": 0.00017991528980362972, "loss": 11.6655, "step": 29444 }, { "epoch": 0.6163652348656117, "grad_norm": 0.33088991045951843, "learning_rate": 0.00017991397179193368, "loss": 11.6758, "step": 29445 }, { "epoch": 0.6163861676295738, "grad_norm": 0.24509172141551971, "learning_rate": 0.00017991265374182137, "loss": 11.6588, "step": 29446 }, { "epoch": 0.616407100393536, "grad_norm": 0.29934582114219666, "learning_rate": 0.00017991133565329345, "loss": 11.6686, "step": 29447 }, { "epoch": 0.6164280331574982, "grad_norm": 0.2757454514503479, "learning_rate": 0.00017991001752635055, "loss": 11.655, "step": 29448 }, { "epoch": 0.6164489659214603, "grad_norm": 0.26454097032546997, "learning_rate": 0.0001799086993609933, "loss": 11.67, "step": 29449 }, { "epoch": 0.6164698986854225, "grad_norm": 0.29781386256217957, "learning_rate": 0.00017990738115722236, "loss": 11.6747, "step": 29450 }, { "epoch": 0.6164908314493845, "grad_norm": 0.2349957823753357, "learning_rate": 0.0001799060629150383, "loss": 11.6696, "step": 29451 }, { "epoch": 0.6165117642133467, "grad_norm": 0.27502360939979553, "learning_rate": 0.00017990474463444183, "loss": 11.6721, "step": 29452 }, { "epoch": 0.6165326969773088, "grad_norm": 0.2676716148853302, "learning_rate": 0.0001799034263154335, "loss": 11.6804, "step": 29453 }, { "epoch": 0.616553629741271, "grad_norm": 0.34586048126220703, "learning_rate": 0.00017990210795801404, "loss": 11.6658, "step": 29454 }, { "epoch": 0.6165745625052332, "grad_norm": 0.25205376744270325, "learning_rate": 0.00017990078956218403, "loss": 11.6592, "step": 29455 }, { "epoch": 0.6165954952691953, "grad_norm": 0.24366487562656403, "learning_rate": 0.0001798994711279441, "loss": 11.6727, "step": 29456 }, { "epoch": 0.6166164280331575, "grad_norm": 0.4000478982925415, "learning_rate": 0.00017989815265529488, "loss": 11.6789, "step": 29457 }, { "epoch": 0.6166373607971196, "grad_norm": 0.3117336928844452, "learning_rate": 0.00017989683414423703, "loss": 11.6629, "step": 29458 }, { "epoch": 0.6166582935610818, "grad_norm": 0.2539895474910736, "learning_rate": 0.00017989551559477118, "loss": 11.659, "step": 29459 }, { "epoch": 0.6166792263250439, "grad_norm": 0.3500377833843231, "learning_rate": 0.00017989419700689796, "loss": 11.6877, "step": 29460 }, { "epoch": 0.6167001590890061, "grad_norm": 0.3215782344341278, "learning_rate": 0.00017989287838061796, "loss": 11.6796, "step": 29461 }, { "epoch": 0.6167210918529683, "grad_norm": 0.2783052623271942, "learning_rate": 0.0001798915597159319, "loss": 11.6675, "step": 29462 }, { "epoch": 0.6167420246169304, "grad_norm": 0.3054755926132202, "learning_rate": 0.00017989024101284034, "loss": 11.6753, "step": 29463 }, { "epoch": 0.6167629573808926, "grad_norm": 0.29153379797935486, "learning_rate": 0.00017988892227134394, "loss": 11.6537, "step": 29464 }, { "epoch": 0.6167838901448547, "grad_norm": 0.2725820243358612, "learning_rate": 0.0001798876034914433, "loss": 11.6718, "step": 29465 }, { "epoch": 0.6168048229088169, "grad_norm": 0.27448660135269165, "learning_rate": 0.00017988628467313915, "loss": 11.6604, "step": 29466 }, { "epoch": 0.6168257556727791, "grad_norm": 0.2649393677711487, "learning_rate": 0.000179884965816432, "loss": 11.675, "step": 29467 }, { "epoch": 0.6168466884367412, "grad_norm": 0.2753555476665497, "learning_rate": 0.0001798836469213226, "loss": 11.6644, "step": 29468 }, { "epoch": 0.6168676212007034, "grad_norm": 0.26434364914894104, "learning_rate": 0.0001798823279878115, "loss": 11.6619, "step": 29469 }, { "epoch": 0.6168885539646655, "grad_norm": 0.3105882704257965, "learning_rate": 0.0001798810090158994, "loss": 11.6547, "step": 29470 }, { "epoch": 0.6169094867286277, "grad_norm": 0.22834019362926483, "learning_rate": 0.00017987969000558688, "loss": 11.6557, "step": 29471 }, { "epoch": 0.6169304194925898, "grad_norm": 0.3438900113105774, "learning_rate": 0.00017987837095687458, "loss": 11.6524, "step": 29472 }, { "epoch": 0.616951352256552, "grad_norm": 0.262254923582077, "learning_rate": 0.00017987705186976312, "loss": 11.6599, "step": 29473 }, { "epoch": 0.6169722850205142, "grad_norm": 0.32455360889434814, "learning_rate": 0.00017987573274425323, "loss": 11.6953, "step": 29474 }, { "epoch": 0.6169932177844762, "grad_norm": 0.3185560405254364, "learning_rate": 0.00017987441358034543, "loss": 11.6867, "step": 29475 }, { "epoch": 0.6170141505484384, "grad_norm": 0.3532637357711792, "learning_rate": 0.00017987309437804042, "loss": 11.6921, "step": 29476 }, { "epoch": 0.6170350833124005, "grad_norm": 0.30260026454925537, "learning_rate": 0.0001798717751373388, "loss": 11.6814, "step": 29477 }, { "epoch": 0.6170560160763627, "grad_norm": 0.2686854302883148, "learning_rate": 0.00017987045585824118, "loss": 11.6664, "step": 29478 }, { "epoch": 0.6170769488403248, "grad_norm": 0.3239211142063141, "learning_rate": 0.00017986913654074828, "loss": 11.6752, "step": 29479 }, { "epoch": 0.617097881604287, "grad_norm": 0.3391907513141632, "learning_rate": 0.00017986781718486066, "loss": 11.6853, "step": 29480 }, { "epoch": 0.6171188143682492, "grad_norm": 0.2212284356355667, "learning_rate": 0.00017986649779057904, "loss": 11.6699, "step": 29481 }, { "epoch": 0.6171397471322113, "grad_norm": 0.22741037607192993, "learning_rate": 0.0001798651783579039, "loss": 11.6613, "step": 29482 }, { "epoch": 0.6171606798961735, "grad_norm": 0.25439131259918213, "learning_rate": 0.00017986385888683602, "loss": 11.6667, "step": 29483 }, { "epoch": 0.6171816126601356, "grad_norm": 0.2844972610473633, "learning_rate": 0.000179862539377376, "loss": 11.6746, "step": 29484 }, { "epoch": 0.6172025454240978, "grad_norm": 0.2749800980091095, "learning_rate": 0.00017986121982952442, "loss": 11.6744, "step": 29485 }, { "epoch": 0.61722347818806, "grad_norm": 0.3061045706272125, "learning_rate": 0.00017985990024328195, "loss": 11.6845, "step": 29486 }, { "epoch": 0.6172444109520221, "grad_norm": 0.30137693881988525, "learning_rate": 0.00017985858061864927, "loss": 11.6611, "step": 29487 }, { "epoch": 0.6172653437159843, "grad_norm": 0.25049853324890137, "learning_rate": 0.0001798572609556269, "loss": 11.6669, "step": 29488 }, { "epoch": 0.6172862764799464, "grad_norm": 0.24551673233509064, "learning_rate": 0.0001798559412542156, "loss": 11.6565, "step": 29489 }, { "epoch": 0.6173072092439086, "grad_norm": 0.2675829231739044, "learning_rate": 0.00017985462151441593, "loss": 11.6747, "step": 29490 }, { "epoch": 0.6173281420078707, "grad_norm": 0.3264503479003906, "learning_rate": 0.00017985330173622856, "loss": 11.6843, "step": 29491 }, { "epoch": 0.6173490747718329, "grad_norm": 0.3818815350532532, "learning_rate": 0.00017985198191965412, "loss": 11.6766, "step": 29492 }, { "epoch": 0.6173700075357951, "grad_norm": 0.25763803720474243, "learning_rate": 0.0001798506620646932, "loss": 11.6631, "step": 29493 }, { "epoch": 0.6173909402997572, "grad_norm": 0.2645987570285797, "learning_rate": 0.00017984934217134645, "loss": 11.6774, "step": 29494 }, { "epoch": 0.6174118730637194, "grad_norm": 0.35758572816848755, "learning_rate": 0.00017984802223961453, "loss": 11.6587, "step": 29495 }, { "epoch": 0.6174328058276815, "grad_norm": 0.2836185097694397, "learning_rate": 0.0001798467022694981, "loss": 11.6561, "step": 29496 }, { "epoch": 0.6174537385916437, "grad_norm": 0.28063875436782837, "learning_rate": 0.00017984538226099774, "loss": 11.6622, "step": 29497 }, { "epoch": 0.6174746713556057, "grad_norm": 0.33626875281333923, "learning_rate": 0.00017984406221411412, "loss": 11.6836, "step": 29498 }, { "epoch": 0.6174956041195679, "grad_norm": 0.21678271889686584, "learning_rate": 0.0001798427421288478, "loss": 11.6774, "step": 29499 }, { "epoch": 0.6175165368835301, "grad_norm": 0.3647654056549072, "learning_rate": 0.00017984142200519956, "loss": 11.6688, "step": 29500 }, { "epoch": 0.6175374696474922, "grad_norm": 0.28134605288505554, "learning_rate": 0.0001798401018431699, "loss": 11.6685, "step": 29501 }, { "epoch": 0.6175584024114544, "grad_norm": 0.26252323389053345, "learning_rate": 0.00017983878164275948, "loss": 11.6654, "step": 29502 }, { "epoch": 0.6175793351754165, "grad_norm": 0.2781907021999359, "learning_rate": 0.000179837461403969, "loss": 11.6763, "step": 29503 }, { "epoch": 0.6176002679393787, "grad_norm": 0.34387239813804626, "learning_rate": 0.00017983614112679903, "loss": 11.6678, "step": 29504 }, { "epoch": 0.6176212007033409, "grad_norm": 0.34053343534469604, "learning_rate": 0.00017983482081125024, "loss": 11.6761, "step": 29505 }, { "epoch": 0.617642133467303, "grad_norm": 0.3031081259250641, "learning_rate": 0.00017983350045732324, "loss": 11.6766, "step": 29506 }, { "epoch": 0.6176630662312652, "grad_norm": 0.33466124534606934, "learning_rate": 0.0001798321800650187, "loss": 11.6706, "step": 29507 }, { "epoch": 0.6176839989952273, "grad_norm": 0.3203984200954437, "learning_rate": 0.0001798308596343372, "loss": 11.662, "step": 29508 }, { "epoch": 0.6177049317591895, "grad_norm": 0.3479143977165222, "learning_rate": 0.00017982953916527943, "loss": 11.6836, "step": 29509 }, { "epoch": 0.6177258645231516, "grad_norm": 0.287815123796463, "learning_rate": 0.000179828218657846, "loss": 11.6684, "step": 29510 }, { "epoch": 0.6177467972871138, "grad_norm": 0.26055729389190674, "learning_rate": 0.00017982689811203755, "loss": 11.6745, "step": 29511 }, { "epoch": 0.617767730051076, "grad_norm": 0.22941188514232635, "learning_rate": 0.00017982557752785466, "loss": 11.6631, "step": 29512 }, { "epoch": 0.6177886628150381, "grad_norm": 0.3024601340293884, "learning_rate": 0.00017982425690529804, "loss": 11.6686, "step": 29513 }, { "epoch": 0.6178095955790003, "grad_norm": 0.23189006745815277, "learning_rate": 0.00017982293624436833, "loss": 11.661, "step": 29514 }, { "epoch": 0.6178305283429624, "grad_norm": 0.2745477855205536, "learning_rate": 0.00017982161554506613, "loss": 11.6531, "step": 29515 }, { "epoch": 0.6178514611069246, "grad_norm": 0.3102647364139557, "learning_rate": 0.00017982029480739206, "loss": 11.6818, "step": 29516 }, { "epoch": 0.6178723938708867, "grad_norm": 0.3676474392414093, "learning_rate": 0.00017981897403134678, "loss": 11.6771, "step": 29517 }, { "epoch": 0.6178933266348489, "grad_norm": 0.2837774157524109, "learning_rate": 0.00017981765321693093, "loss": 11.6856, "step": 29518 }, { "epoch": 0.6179142593988111, "grad_norm": 0.3240910470485687, "learning_rate": 0.00017981633236414512, "loss": 11.6695, "step": 29519 }, { "epoch": 0.6179351921627732, "grad_norm": 0.33147427439689636, "learning_rate": 0.00017981501147299, "loss": 11.6674, "step": 29520 }, { "epoch": 0.6179561249267354, "grad_norm": 0.2521652579307556, "learning_rate": 0.0001798136905434662, "loss": 11.6796, "step": 29521 }, { "epoch": 0.6179770576906974, "grad_norm": 0.3055928647518158, "learning_rate": 0.0001798123695755744, "loss": 11.6759, "step": 29522 }, { "epoch": 0.6179979904546596, "grad_norm": 0.30807092785835266, "learning_rate": 0.00017981104856931514, "loss": 11.6608, "step": 29523 }, { "epoch": 0.6180189232186218, "grad_norm": 0.29623353481292725, "learning_rate": 0.00017980972752468913, "loss": 11.6767, "step": 29524 }, { "epoch": 0.6180398559825839, "grad_norm": 0.25586551427841187, "learning_rate": 0.00017980840644169697, "loss": 11.6783, "step": 29525 }, { "epoch": 0.6180607887465461, "grad_norm": 0.2897299528121948, "learning_rate": 0.00017980708532033934, "loss": 11.6758, "step": 29526 }, { "epoch": 0.6180817215105082, "grad_norm": 0.25579962134361267, "learning_rate": 0.00017980576416061684, "loss": 11.6709, "step": 29527 }, { "epoch": 0.6181026542744704, "grad_norm": 0.2816146910190582, "learning_rate": 0.00017980444296253012, "loss": 11.6872, "step": 29528 }, { "epoch": 0.6181235870384325, "grad_norm": 0.2980095446109772, "learning_rate": 0.0001798031217260798, "loss": 11.6604, "step": 29529 }, { "epoch": 0.6181445198023947, "grad_norm": 0.29815128445625305, "learning_rate": 0.0001798018004512665, "loss": 11.6785, "step": 29530 }, { "epoch": 0.6181654525663569, "grad_norm": 0.3505372107028961, "learning_rate": 0.00017980047913809086, "loss": 11.6722, "step": 29531 }, { "epoch": 0.618186385330319, "grad_norm": 0.30292144417762756, "learning_rate": 0.00017979915778655356, "loss": 11.6701, "step": 29532 }, { "epoch": 0.6182073180942812, "grad_norm": 0.32950928807258606, "learning_rate": 0.00017979783639665518, "loss": 11.665, "step": 29533 }, { "epoch": 0.6182282508582433, "grad_norm": 0.24150294065475464, "learning_rate": 0.0001797965149683964, "loss": 11.6693, "step": 29534 }, { "epoch": 0.6182491836222055, "grad_norm": 0.29198548197746277, "learning_rate": 0.00017979519350177784, "loss": 11.6689, "step": 29535 }, { "epoch": 0.6182701163861676, "grad_norm": 0.2932041883468628, "learning_rate": 0.00017979387199680013, "loss": 11.6835, "step": 29536 }, { "epoch": 0.6182910491501298, "grad_norm": 0.3209056556224823, "learning_rate": 0.0001797925504534639, "loss": 11.6474, "step": 29537 }, { "epoch": 0.618311981914092, "grad_norm": 0.3034818470478058, "learning_rate": 0.0001797912288717698, "loss": 11.6583, "step": 29538 }, { "epoch": 0.6183329146780541, "grad_norm": 0.3123171627521515, "learning_rate": 0.00017978990725171844, "loss": 11.6862, "step": 29539 }, { "epoch": 0.6183538474420163, "grad_norm": 0.31858548521995544, "learning_rate": 0.00017978858559331048, "loss": 11.6861, "step": 29540 }, { "epoch": 0.6183747802059784, "grad_norm": 0.3715762794017792, "learning_rate": 0.0001797872638965466, "loss": 11.6709, "step": 29541 }, { "epoch": 0.6183957129699406, "grad_norm": 0.313052237033844, "learning_rate": 0.0001797859421614273, "loss": 11.6769, "step": 29542 }, { "epoch": 0.6184166457339028, "grad_norm": 0.380650132894516, "learning_rate": 0.00017978462038795336, "loss": 11.6626, "step": 29543 }, { "epoch": 0.6184375784978648, "grad_norm": 0.2690153419971466, "learning_rate": 0.00017978329857612531, "loss": 11.6601, "step": 29544 }, { "epoch": 0.618458511261827, "grad_norm": 0.3085050880908966, "learning_rate": 0.00017978197672594386, "loss": 11.6756, "step": 29545 }, { "epoch": 0.6184794440257891, "grad_norm": 0.25795111060142517, "learning_rate": 0.0001797806548374096, "loss": 11.687, "step": 29546 }, { "epoch": 0.6185003767897513, "grad_norm": 0.3156881034374237, "learning_rate": 0.00017977933291052318, "loss": 11.6612, "step": 29547 }, { "epoch": 0.6185213095537134, "grad_norm": 0.3914923369884491, "learning_rate": 0.00017977801094528525, "loss": 11.6811, "step": 29548 }, { "epoch": 0.6185422423176756, "grad_norm": 0.25484758615493774, "learning_rate": 0.00017977668894169642, "loss": 11.6765, "step": 29549 }, { "epoch": 0.6185631750816378, "grad_norm": 0.41366687417030334, "learning_rate": 0.0001797753668997573, "loss": 11.6807, "step": 29550 }, { "epoch": 0.6185841078455999, "grad_norm": 0.290973037481308, "learning_rate": 0.00017977404481946864, "loss": 11.6659, "step": 29551 }, { "epoch": 0.6186050406095621, "grad_norm": 0.4667288064956665, "learning_rate": 0.00017977272270083096, "loss": 11.6798, "step": 29552 }, { "epoch": 0.6186259733735242, "grad_norm": 0.3871920108795166, "learning_rate": 0.00017977140054384493, "loss": 11.6527, "step": 29553 }, { "epoch": 0.6186469061374864, "grad_norm": 0.2612472176551819, "learning_rate": 0.00017977007834851119, "loss": 11.6664, "step": 29554 }, { "epoch": 0.6186678389014485, "grad_norm": 0.2653219997882843, "learning_rate": 0.00017976875611483038, "loss": 11.6747, "step": 29555 }, { "epoch": 0.6186887716654107, "grad_norm": 0.30326902866363525, "learning_rate": 0.00017976743384280313, "loss": 11.6622, "step": 29556 }, { "epoch": 0.6187097044293729, "grad_norm": 0.2904819846153259, "learning_rate": 0.00017976611153243006, "loss": 11.6835, "step": 29557 }, { "epoch": 0.618730637193335, "grad_norm": 0.27914804220199585, "learning_rate": 0.00017976478918371185, "loss": 11.6654, "step": 29558 }, { "epoch": 0.6187515699572972, "grad_norm": 0.3254980444908142, "learning_rate": 0.00017976346679664907, "loss": 11.6801, "step": 29559 }, { "epoch": 0.6187725027212593, "grad_norm": 0.3282555937767029, "learning_rate": 0.00017976214437124242, "loss": 11.678, "step": 29560 }, { "epoch": 0.6187934354852215, "grad_norm": 0.22900664806365967, "learning_rate": 0.00017976082190749248, "loss": 11.6679, "step": 29561 }, { "epoch": 0.6188143682491837, "grad_norm": 0.36667823791503906, "learning_rate": 0.00017975949940539997, "loss": 11.6828, "step": 29562 }, { "epoch": 0.6188353010131458, "grad_norm": 0.3114107549190521, "learning_rate": 0.00017975817686496543, "loss": 11.6681, "step": 29563 }, { "epoch": 0.618856233777108, "grad_norm": 0.24850796163082123, "learning_rate": 0.00017975685428618952, "loss": 11.667, "step": 29564 }, { "epoch": 0.6188771665410701, "grad_norm": 0.26891282200813293, "learning_rate": 0.00017975553166907292, "loss": 11.6674, "step": 29565 }, { "epoch": 0.6188980993050323, "grad_norm": 0.2797681391239166, "learning_rate": 0.00017975420901361627, "loss": 11.664, "step": 29566 }, { "epoch": 0.6189190320689943, "grad_norm": 0.32720157504081726, "learning_rate": 0.00017975288631982012, "loss": 11.6949, "step": 29567 }, { "epoch": 0.6189399648329565, "grad_norm": 0.32136887311935425, "learning_rate": 0.00017975156358768517, "loss": 11.6788, "step": 29568 }, { "epoch": 0.6189608975969187, "grad_norm": 0.35344740748405457, "learning_rate": 0.00017975024081721204, "loss": 11.6564, "step": 29569 }, { "epoch": 0.6189818303608808, "grad_norm": 0.27451291680336, "learning_rate": 0.0001797489180084014, "loss": 11.6709, "step": 29570 }, { "epoch": 0.619002763124843, "grad_norm": 0.28395184874534607, "learning_rate": 0.00017974759516125384, "loss": 11.6739, "step": 29571 }, { "epoch": 0.6190236958888051, "grad_norm": 0.29758748412132263, "learning_rate": 0.00017974627227577, "loss": 11.6777, "step": 29572 }, { "epoch": 0.6190446286527673, "grad_norm": 0.26356327533721924, "learning_rate": 0.00017974494935195053, "loss": 11.6674, "step": 29573 }, { "epoch": 0.6190655614167294, "grad_norm": 0.280857652425766, "learning_rate": 0.00017974362638979607, "loss": 11.6778, "step": 29574 }, { "epoch": 0.6190864941806916, "grad_norm": 0.3214563727378845, "learning_rate": 0.00017974230338930726, "loss": 11.6729, "step": 29575 }, { "epoch": 0.6191074269446538, "grad_norm": 0.37097951769828796, "learning_rate": 0.00017974098035048472, "loss": 11.6744, "step": 29576 }, { "epoch": 0.6191283597086159, "grad_norm": 0.2976357042789459, "learning_rate": 0.0001797396572733291, "loss": 11.6568, "step": 29577 }, { "epoch": 0.6191492924725781, "grad_norm": 0.273406445980072, "learning_rate": 0.00017973833415784103, "loss": 11.6615, "step": 29578 }, { "epoch": 0.6191702252365402, "grad_norm": 0.2532460689544678, "learning_rate": 0.00017973701100402114, "loss": 11.6892, "step": 29579 }, { "epoch": 0.6191911580005024, "grad_norm": 0.2680540978908539, "learning_rate": 0.00017973568781187008, "loss": 11.6632, "step": 29580 }, { "epoch": 0.6192120907644645, "grad_norm": 0.25665515661239624, "learning_rate": 0.00017973436458138846, "loss": 11.6645, "step": 29581 }, { "epoch": 0.6192330235284267, "grad_norm": 0.3047459125518799, "learning_rate": 0.00017973304131257693, "loss": 11.6823, "step": 29582 }, { "epoch": 0.6192539562923889, "grad_norm": 0.3547075390815735, "learning_rate": 0.00017973171800543614, "loss": 11.6695, "step": 29583 }, { "epoch": 0.619274889056351, "grad_norm": 0.2673560380935669, "learning_rate": 0.0001797303946599667, "loss": 11.6722, "step": 29584 }, { "epoch": 0.6192958218203132, "grad_norm": 0.2951141893863678, "learning_rate": 0.0001797290712761693, "loss": 11.6663, "step": 29585 }, { "epoch": 0.6193167545842753, "grad_norm": 0.35519108176231384, "learning_rate": 0.0001797277478540445, "loss": 11.6678, "step": 29586 }, { "epoch": 0.6193376873482375, "grad_norm": 0.3226676881313324, "learning_rate": 0.000179726424393593, "loss": 11.6809, "step": 29587 }, { "epoch": 0.6193586201121997, "grad_norm": 0.2805316150188446, "learning_rate": 0.0001797251008948154, "loss": 11.6678, "step": 29588 }, { "epoch": 0.6193795528761618, "grad_norm": 0.32106485962867737, "learning_rate": 0.00017972377735771234, "loss": 11.6579, "step": 29589 }, { "epoch": 0.619400485640124, "grad_norm": 0.28411194682121277, "learning_rate": 0.00017972245378228445, "loss": 11.6579, "step": 29590 }, { "epoch": 0.619421418404086, "grad_norm": 0.2309139370918274, "learning_rate": 0.0001797211301685324, "loss": 11.6841, "step": 29591 }, { "epoch": 0.6194423511680482, "grad_norm": 0.2676298916339874, "learning_rate": 0.00017971980651645681, "loss": 11.6689, "step": 29592 }, { "epoch": 0.6194632839320103, "grad_norm": 0.35009604692459106, "learning_rate": 0.0001797184828260583, "loss": 11.664, "step": 29593 }, { "epoch": 0.6194842166959725, "grad_norm": 0.24090194702148438, "learning_rate": 0.00017971715909733752, "loss": 11.6832, "step": 29594 }, { "epoch": 0.6195051494599347, "grad_norm": 0.33819180727005005, "learning_rate": 0.0001797158353302951, "loss": 11.6774, "step": 29595 }, { "epoch": 0.6195260822238968, "grad_norm": 0.32653695344924927, "learning_rate": 0.00017971451152493169, "loss": 11.6573, "step": 29596 }, { "epoch": 0.619547014987859, "grad_norm": 0.3149721026420593, "learning_rate": 0.0001797131876812479, "loss": 11.6838, "step": 29597 }, { "epoch": 0.6195679477518211, "grad_norm": 0.30930453538894653, "learning_rate": 0.00017971186379924441, "loss": 11.6574, "step": 29598 }, { "epoch": 0.6195888805157833, "grad_norm": 0.44134461879730225, "learning_rate": 0.00017971053987892179, "loss": 11.6915, "step": 29599 }, { "epoch": 0.6196098132797454, "grad_norm": 0.4806870222091675, "learning_rate": 0.00017970921592028077, "loss": 11.6696, "step": 29600 }, { "epoch": 0.6196307460437076, "grad_norm": 0.352478951215744, "learning_rate": 0.00017970789192332188, "loss": 11.6749, "step": 29601 }, { "epoch": 0.6196516788076698, "grad_norm": 0.3039339780807495, "learning_rate": 0.00017970656788804585, "loss": 11.662, "step": 29602 }, { "epoch": 0.6196726115716319, "grad_norm": 0.36689338088035583, "learning_rate": 0.00017970524381445327, "loss": 11.676, "step": 29603 }, { "epoch": 0.6196935443355941, "grad_norm": 0.2967691719532013, "learning_rate": 0.00017970391970254475, "loss": 11.6782, "step": 29604 }, { "epoch": 0.6197144770995562, "grad_norm": 0.38004904985427856, "learning_rate": 0.00017970259555232095, "loss": 11.6745, "step": 29605 }, { "epoch": 0.6197354098635184, "grad_norm": 0.275923490524292, "learning_rate": 0.00017970127136378257, "loss": 11.678, "step": 29606 }, { "epoch": 0.6197563426274806, "grad_norm": 0.3261500597000122, "learning_rate": 0.00017969994713693015, "loss": 11.6798, "step": 29607 }, { "epoch": 0.6197772753914427, "grad_norm": 0.35256731510162354, "learning_rate": 0.0001796986228717644, "loss": 11.6781, "step": 29608 }, { "epoch": 0.6197982081554049, "grad_norm": 0.2606052756309509, "learning_rate": 0.0001796972985682859, "loss": 11.6642, "step": 29609 }, { "epoch": 0.619819140919367, "grad_norm": 0.28484854102134705, "learning_rate": 0.0001796959742264953, "loss": 11.6719, "step": 29610 }, { "epoch": 0.6198400736833292, "grad_norm": 0.2786092758178711, "learning_rate": 0.00017969464984639327, "loss": 11.6776, "step": 29611 }, { "epoch": 0.6198610064472913, "grad_norm": 0.41908660531044006, "learning_rate": 0.00017969332542798044, "loss": 11.6668, "step": 29612 }, { "epoch": 0.6198819392112535, "grad_norm": 0.25413933396339417, "learning_rate": 0.0001796920009712574, "loss": 11.658, "step": 29613 }, { "epoch": 0.6199028719752157, "grad_norm": 0.3177729845046997, "learning_rate": 0.00017969067647622482, "loss": 11.6661, "step": 29614 }, { "epoch": 0.6199238047391777, "grad_norm": 0.2910434305667877, "learning_rate": 0.00017968935194288336, "loss": 11.6506, "step": 29615 }, { "epoch": 0.6199447375031399, "grad_norm": 0.4259875416755676, "learning_rate": 0.0001796880273712336, "loss": 11.6795, "step": 29616 }, { "epoch": 0.619965670267102, "grad_norm": 0.3272497355937958, "learning_rate": 0.00017968670276127623, "loss": 11.6783, "step": 29617 }, { "epoch": 0.6199866030310642, "grad_norm": 0.33035045862197876, "learning_rate": 0.00017968537811301186, "loss": 11.6798, "step": 29618 }, { "epoch": 0.6200075357950263, "grad_norm": 0.30803078413009644, "learning_rate": 0.00017968405342644114, "loss": 11.6686, "step": 29619 }, { "epoch": 0.6200284685589885, "grad_norm": 0.28684064745903015, "learning_rate": 0.00017968272870156468, "loss": 11.6724, "step": 29620 }, { "epoch": 0.6200494013229507, "grad_norm": 0.3129212260246277, "learning_rate": 0.00017968140393838315, "loss": 11.6572, "step": 29621 }, { "epoch": 0.6200703340869128, "grad_norm": 0.29386070370674133, "learning_rate": 0.00017968007913689716, "loss": 11.6682, "step": 29622 }, { "epoch": 0.620091266850875, "grad_norm": 0.24023331701755524, "learning_rate": 0.00017967875429710737, "loss": 11.6674, "step": 29623 }, { "epoch": 0.6201121996148371, "grad_norm": 0.3170374929904938, "learning_rate": 0.0001796774294190144, "loss": 11.6852, "step": 29624 }, { "epoch": 0.6201331323787993, "grad_norm": 0.2842238247394562, "learning_rate": 0.0001796761045026189, "loss": 11.6559, "step": 29625 }, { "epoch": 0.6201540651427615, "grad_norm": 0.25974419713020325, "learning_rate": 0.0001796747795479215, "loss": 11.6752, "step": 29626 }, { "epoch": 0.6201749979067236, "grad_norm": 0.47122836112976074, "learning_rate": 0.00017967345455492285, "loss": 11.6943, "step": 29627 }, { "epoch": 0.6201959306706858, "grad_norm": 0.268285870552063, "learning_rate": 0.00017967212952362354, "loss": 11.6669, "step": 29628 }, { "epoch": 0.6202168634346479, "grad_norm": 0.33391666412353516, "learning_rate": 0.00017967080445402425, "loss": 11.672, "step": 29629 }, { "epoch": 0.6202377961986101, "grad_norm": 0.29240989685058594, "learning_rate": 0.00017966947934612562, "loss": 11.6704, "step": 29630 }, { "epoch": 0.6202587289625722, "grad_norm": 0.295865923166275, "learning_rate": 0.00017966815419992829, "loss": 11.6684, "step": 29631 }, { "epoch": 0.6202796617265344, "grad_norm": 0.28618746995925903, "learning_rate": 0.00017966682901543284, "loss": 11.6723, "step": 29632 }, { "epoch": 0.6203005944904966, "grad_norm": 0.28270214796066284, "learning_rate": 0.00017966550379263998, "loss": 11.6509, "step": 29633 }, { "epoch": 0.6203215272544587, "grad_norm": 0.2693975567817688, "learning_rate": 0.0001796641785315503, "loss": 11.6691, "step": 29634 }, { "epoch": 0.6203424600184209, "grad_norm": 0.3872125446796417, "learning_rate": 0.00017966285323216445, "loss": 11.66, "step": 29635 }, { "epoch": 0.620363392782383, "grad_norm": 0.2692614793777466, "learning_rate": 0.0001796615278944831, "loss": 11.6718, "step": 29636 }, { "epoch": 0.6203843255463452, "grad_norm": 0.3242636024951935, "learning_rate": 0.00017966020251850684, "loss": 11.6674, "step": 29637 }, { "epoch": 0.6204052583103072, "grad_norm": 0.29915425181388855, "learning_rate": 0.0001796588771042363, "loss": 11.6601, "step": 29638 }, { "epoch": 0.6204261910742694, "grad_norm": 0.2734934687614441, "learning_rate": 0.00017965755165167218, "loss": 11.6749, "step": 29639 }, { "epoch": 0.6204471238382316, "grad_norm": 0.2647427022457123, "learning_rate": 0.00017965622616081505, "loss": 11.68, "step": 29640 }, { "epoch": 0.6204680566021937, "grad_norm": 0.2992410659790039, "learning_rate": 0.00017965490063166558, "loss": 11.6741, "step": 29641 }, { "epoch": 0.6204889893661559, "grad_norm": 0.2799336314201355, "learning_rate": 0.0001796535750642244, "loss": 11.6788, "step": 29642 }, { "epoch": 0.620509922130118, "grad_norm": 0.3233441710472107, "learning_rate": 0.00017965224945849215, "loss": 11.6661, "step": 29643 }, { "epoch": 0.6205308548940802, "grad_norm": 0.31204649806022644, "learning_rate": 0.00017965092381446948, "loss": 11.674, "step": 29644 }, { "epoch": 0.6205517876580424, "grad_norm": 0.3491978645324707, "learning_rate": 0.00017964959813215702, "loss": 11.6706, "step": 29645 }, { "epoch": 0.6205727204220045, "grad_norm": 0.22765953838825226, "learning_rate": 0.0001796482724115554, "loss": 11.6717, "step": 29646 }, { "epoch": 0.6205936531859667, "grad_norm": 0.25094690918922424, "learning_rate": 0.00017964694665266527, "loss": 11.675, "step": 29647 }, { "epoch": 0.6206145859499288, "grad_norm": 0.36399441957473755, "learning_rate": 0.00017964562085548723, "loss": 11.6704, "step": 29648 }, { "epoch": 0.620635518713891, "grad_norm": 0.31073957681655884, "learning_rate": 0.00017964429502002194, "loss": 11.6847, "step": 29649 }, { "epoch": 0.6206564514778531, "grad_norm": 0.2643294930458069, "learning_rate": 0.00017964296914627003, "loss": 11.6931, "step": 29650 }, { "epoch": 0.6206773842418153, "grad_norm": 0.2900603711605072, "learning_rate": 0.00017964164323423218, "loss": 11.6807, "step": 29651 }, { "epoch": 0.6206983170057775, "grad_norm": 0.2619140148162842, "learning_rate": 0.000179640317283909, "loss": 11.6835, "step": 29652 }, { "epoch": 0.6207192497697396, "grad_norm": 0.2617202699184418, "learning_rate": 0.0001796389912953011, "loss": 11.6883, "step": 29653 }, { "epoch": 0.6207401825337018, "grad_norm": 0.3162459135055542, "learning_rate": 0.00017963766526840917, "loss": 11.6792, "step": 29654 }, { "epoch": 0.6207611152976639, "grad_norm": 0.3466918170452118, "learning_rate": 0.0001796363392032338, "loss": 11.6717, "step": 29655 }, { "epoch": 0.6207820480616261, "grad_norm": 0.30860793590545654, "learning_rate": 0.00017963501309977564, "loss": 11.6626, "step": 29656 }, { "epoch": 0.6208029808255882, "grad_norm": 0.29237350821495056, "learning_rate": 0.00017963368695803533, "loss": 11.6685, "step": 29657 }, { "epoch": 0.6208239135895504, "grad_norm": 0.29216134548187256, "learning_rate": 0.00017963236077801352, "loss": 11.6814, "step": 29658 }, { "epoch": 0.6208448463535126, "grad_norm": 0.3226383626461029, "learning_rate": 0.00017963103455971083, "loss": 11.6481, "step": 29659 }, { "epoch": 0.6208657791174746, "grad_norm": 0.556431233882904, "learning_rate": 0.00017962970830312793, "loss": 11.6604, "step": 29660 }, { "epoch": 0.6208867118814368, "grad_norm": 0.3176119029521942, "learning_rate": 0.00017962838200826542, "loss": 11.6554, "step": 29661 }, { "epoch": 0.6209076446453989, "grad_norm": 0.295741468667984, "learning_rate": 0.00017962705567512396, "loss": 11.6713, "step": 29662 }, { "epoch": 0.6209285774093611, "grad_norm": 0.3484887480735779, "learning_rate": 0.00017962572930370417, "loss": 11.6824, "step": 29663 }, { "epoch": 0.6209495101733233, "grad_norm": 0.2750081717967987, "learning_rate": 0.0001796244028940067, "loss": 11.6673, "step": 29664 }, { "epoch": 0.6209704429372854, "grad_norm": 0.28999844193458557, "learning_rate": 0.0001796230764460322, "loss": 11.6806, "step": 29665 }, { "epoch": 0.6209913757012476, "grad_norm": 0.2579754889011383, "learning_rate": 0.00017962174995978124, "loss": 11.683, "step": 29666 }, { "epoch": 0.6210123084652097, "grad_norm": 0.35186272859573364, "learning_rate": 0.00017962042343525454, "loss": 11.6775, "step": 29667 }, { "epoch": 0.6210332412291719, "grad_norm": 0.3137663006782532, "learning_rate": 0.00017961909687245272, "loss": 11.6626, "step": 29668 }, { "epoch": 0.621054173993134, "grad_norm": 0.2873401343822479, "learning_rate": 0.00017961777027137638, "loss": 11.6706, "step": 29669 }, { "epoch": 0.6210751067570962, "grad_norm": 0.3396719694137573, "learning_rate": 0.0001796164436320262, "loss": 11.6734, "step": 29670 }, { "epoch": 0.6210960395210584, "grad_norm": 0.36568185687065125, "learning_rate": 0.00017961511695440278, "loss": 11.6694, "step": 29671 }, { "epoch": 0.6211169722850205, "grad_norm": 0.2973211705684662, "learning_rate": 0.0001796137902385068, "loss": 11.6765, "step": 29672 }, { "epoch": 0.6211379050489827, "grad_norm": 0.34397947788238525, "learning_rate": 0.00017961246348433885, "loss": 11.6791, "step": 29673 }, { "epoch": 0.6211588378129448, "grad_norm": 0.24995361268520355, "learning_rate": 0.00017961113669189962, "loss": 11.664, "step": 29674 }, { "epoch": 0.621179770576907, "grad_norm": 0.3093528747558594, "learning_rate": 0.00017960980986118973, "loss": 11.6731, "step": 29675 }, { "epoch": 0.6212007033408691, "grad_norm": 0.22936825454235077, "learning_rate": 0.00017960848299220975, "loss": 11.663, "step": 29676 }, { "epoch": 0.6212216361048313, "grad_norm": 0.2797609269618988, "learning_rate": 0.00017960715608496042, "loss": 11.6535, "step": 29677 }, { "epoch": 0.6212425688687935, "grad_norm": 0.2469123750925064, "learning_rate": 0.00017960582913944232, "loss": 11.6717, "step": 29678 }, { "epoch": 0.6212635016327556, "grad_norm": 0.28654178977012634, "learning_rate": 0.00017960450215565615, "loss": 11.6528, "step": 29679 }, { "epoch": 0.6212844343967178, "grad_norm": 0.2840113937854767, "learning_rate": 0.00017960317513360244, "loss": 11.6745, "step": 29680 }, { "epoch": 0.6213053671606799, "grad_norm": 0.2820536494255066, "learning_rate": 0.00017960184807328192, "loss": 11.67, "step": 29681 }, { "epoch": 0.6213262999246421, "grad_norm": 0.27183225750923157, "learning_rate": 0.0001796005209746952, "loss": 11.663, "step": 29682 }, { "epoch": 0.6213472326886043, "grad_norm": 0.3864206075668335, "learning_rate": 0.0001795991938378429, "loss": 11.6691, "step": 29683 }, { "epoch": 0.6213681654525663, "grad_norm": 0.3394959270954132, "learning_rate": 0.0001795978666627257, "loss": 11.6765, "step": 29684 }, { "epoch": 0.6213890982165285, "grad_norm": 0.26207271218299866, "learning_rate": 0.00017959653944934416, "loss": 11.6776, "step": 29685 }, { "epoch": 0.6214100309804906, "grad_norm": 0.32115161418914795, "learning_rate": 0.000179595212197699, "loss": 11.6643, "step": 29686 }, { "epoch": 0.6214309637444528, "grad_norm": 0.3222060799598694, "learning_rate": 0.00017959388490779082, "loss": 11.6577, "step": 29687 }, { "epoch": 0.6214518965084149, "grad_norm": 0.32840362191200256, "learning_rate": 0.00017959255757962026, "loss": 11.6828, "step": 29688 }, { "epoch": 0.6214728292723771, "grad_norm": 0.31879177689552307, "learning_rate": 0.00017959123021318795, "loss": 11.6649, "step": 29689 }, { "epoch": 0.6214937620363393, "grad_norm": 0.29285362362861633, "learning_rate": 0.0001795899028084946, "loss": 11.6643, "step": 29690 }, { "epoch": 0.6215146948003014, "grad_norm": 0.3146156072616577, "learning_rate": 0.00017958857536554072, "loss": 11.6551, "step": 29691 }, { "epoch": 0.6215356275642636, "grad_norm": 0.32754549384117126, "learning_rate": 0.00017958724788432707, "loss": 11.6882, "step": 29692 }, { "epoch": 0.6215565603282257, "grad_norm": 0.2911645174026489, "learning_rate": 0.0001795859203648542, "loss": 11.6756, "step": 29693 }, { "epoch": 0.6215774930921879, "grad_norm": 0.28055691719055176, "learning_rate": 0.00017958459280712277, "loss": 11.6647, "step": 29694 }, { "epoch": 0.62159842585615, "grad_norm": 0.35546767711639404, "learning_rate": 0.00017958326521113347, "loss": 11.6747, "step": 29695 }, { "epoch": 0.6216193586201122, "grad_norm": 0.34594935178756714, "learning_rate": 0.00017958193757688687, "loss": 11.6712, "step": 29696 }, { "epoch": 0.6216402913840744, "grad_norm": 0.24627280235290527, "learning_rate": 0.00017958060990438366, "loss": 11.6721, "step": 29697 }, { "epoch": 0.6216612241480365, "grad_norm": 0.30080363154411316, "learning_rate": 0.00017957928219362445, "loss": 11.6663, "step": 29698 }, { "epoch": 0.6216821569119987, "grad_norm": 0.3031729459762573, "learning_rate": 0.00017957795444460987, "loss": 11.6697, "step": 29699 }, { "epoch": 0.6217030896759608, "grad_norm": 0.3898416757583618, "learning_rate": 0.00017957662665734058, "loss": 11.6528, "step": 29700 }, { "epoch": 0.621724022439923, "grad_norm": 0.26199010014533997, "learning_rate": 0.00017957529883181722, "loss": 11.671, "step": 29701 }, { "epoch": 0.6217449552038852, "grad_norm": 0.24828605353832245, "learning_rate": 0.0001795739709680404, "loss": 11.6699, "step": 29702 }, { "epoch": 0.6217658879678473, "grad_norm": 0.30231863260269165, "learning_rate": 0.0001795726430660108, "loss": 11.6678, "step": 29703 }, { "epoch": 0.6217868207318095, "grad_norm": 0.27684298157691956, "learning_rate": 0.00017957131512572904, "loss": 11.6606, "step": 29704 }, { "epoch": 0.6218077534957716, "grad_norm": 0.29899686574935913, "learning_rate": 0.00017956998714719572, "loss": 11.6701, "step": 29705 }, { "epoch": 0.6218286862597338, "grad_norm": 0.3186441957950592, "learning_rate": 0.00017956865913041153, "loss": 11.6551, "step": 29706 }, { "epoch": 0.6218496190236958, "grad_norm": 0.26938387751579285, "learning_rate": 0.0001795673310753771, "loss": 11.6794, "step": 29707 }, { "epoch": 0.621870551787658, "grad_norm": 0.37608593702316284, "learning_rate": 0.00017956600298209307, "loss": 11.6891, "step": 29708 }, { "epoch": 0.6218914845516202, "grad_norm": 0.3031828701496124, "learning_rate": 0.00017956467485056007, "loss": 11.6726, "step": 29709 }, { "epoch": 0.6219124173155823, "grad_norm": 0.305746853351593, "learning_rate": 0.0001795633466807787, "loss": 11.6751, "step": 29710 }, { "epoch": 0.6219333500795445, "grad_norm": 0.30794093012809753, "learning_rate": 0.00017956201847274964, "loss": 11.6814, "step": 29711 }, { "epoch": 0.6219542828435066, "grad_norm": 0.34062647819519043, "learning_rate": 0.00017956069022647355, "loss": 11.6716, "step": 29712 }, { "epoch": 0.6219752156074688, "grad_norm": 0.4727938771247864, "learning_rate": 0.00017955936194195103, "loss": 11.6694, "step": 29713 }, { "epoch": 0.6219961483714309, "grad_norm": 0.24707452952861786, "learning_rate": 0.0001795580336191827, "loss": 11.6793, "step": 29714 }, { "epoch": 0.6220170811353931, "grad_norm": 0.3940694034099579, "learning_rate": 0.0001795567052581693, "loss": 11.6775, "step": 29715 }, { "epoch": 0.6220380138993553, "grad_norm": 0.28077322244644165, "learning_rate": 0.00017955537685891132, "loss": 11.6729, "step": 29716 }, { "epoch": 0.6220589466633174, "grad_norm": 0.27372661232948303, "learning_rate": 0.00017955404842140952, "loss": 11.6778, "step": 29717 }, { "epoch": 0.6220798794272796, "grad_norm": 0.30248987674713135, "learning_rate": 0.0001795527199456645, "loss": 11.6669, "step": 29718 }, { "epoch": 0.6221008121912417, "grad_norm": 0.2462613880634308, "learning_rate": 0.00017955139143167688, "loss": 11.6531, "step": 29719 }, { "epoch": 0.6221217449552039, "grad_norm": 0.27232101559638977, "learning_rate": 0.0001795500628794473, "loss": 11.6575, "step": 29720 }, { "epoch": 0.6221426777191661, "grad_norm": 0.2504088580608368, "learning_rate": 0.00017954873428897643, "loss": 11.6693, "step": 29721 }, { "epoch": 0.6221636104831282, "grad_norm": 0.2561774253845215, "learning_rate": 0.00017954740566026488, "loss": 11.6774, "step": 29722 }, { "epoch": 0.6221845432470904, "grad_norm": 0.30405399203300476, "learning_rate": 0.0001795460769933133, "loss": 11.656, "step": 29723 }, { "epoch": 0.6222054760110525, "grad_norm": 0.3794451057910919, "learning_rate": 0.00017954474828812234, "loss": 11.6735, "step": 29724 }, { "epoch": 0.6222264087750147, "grad_norm": 0.2901257276535034, "learning_rate": 0.0001795434195446926, "loss": 11.6684, "step": 29725 }, { "epoch": 0.6222473415389768, "grad_norm": 0.3279964029788971, "learning_rate": 0.00017954209076302474, "loss": 11.6713, "step": 29726 }, { "epoch": 0.622268274302939, "grad_norm": 0.356622576713562, "learning_rate": 0.00017954076194311942, "loss": 11.686, "step": 29727 }, { "epoch": 0.6222892070669012, "grad_norm": 0.2925379276275635, "learning_rate": 0.00017953943308497724, "loss": 11.6825, "step": 29728 }, { "epoch": 0.6223101398308633, "grad_norm": 0.33025887608528137, "learning_rate": 0.00017953810418859888, "loss": 11.6636, "step": 29729 }, { "epoch": 0.6223310725948255, "grad_norm": 0.24422988295555115, "learning_rate": 0.00017953677525398497, "loss": 11.6564, "step": 29730 }, { "epoch": 0.6223520053587875, "grad_norm": 0.3275231420993805, "learning_rate": 0.00017953544628113613, "loss": 11.6818, "step": 29731 }, { "epoch": 0.6223729381227497, "grad_norm": 0.3091239929199219, "learning_rate": 0.000179534117270053, "loss": 11.6672, "step": 29732 }, { "epoch": 0.6223938708867118, "grad_norm": 0.2953665554523468, "learning_rate": 0.00017953278822073623, "loss": 11.6553, "step": 29733 }, { "epoch": 0.622414803650674, "grad_norm": 0.28532910346984863, "learning_rate": 0.00017953145913318647, "loss": 11.6572, "step": 29734 }, { "epoch": 0.6224357364146362, "grad_norm": 0.28473612666130066, "learning_rate": 0.00017953013000740432, "loss": 11.6681, "step": 29735 }, { "epoch": 0.6224566691785983, "grad_norm": 0.3210391104221344, "learning_rate": 0.00017952880084339044, "loss": 11.6543, "step": 29736 }, { "epoch": 0.6224776019425605, "grad_norm": 0.27057215571403503, "learning_rate": 0.00017952747164114548, "loss": 11.6503, "step": 29737 }, { "epoch": 0.6224985347065226, "grad_norm": 0.2880380153656006, "learning_rate": 0.0001795261424006701, "loss": 11.6711, "step": 29738 }, { "epoch": 0.6225194674704848, "grad_norm": 0.26979687809944153, "learning_rate": 0.00017952481312196489, "loss": 11.6612, "step": 29739 }, { "epoch": 0.622540400234447, "grad_norm": 0.29534050822257996, "learning_rate": 0.0001795234838050305, "loss": 11.6689, "step": 29740 }, { "epoch": 0.6225613329984091, "grad_norm": 0.27157676219940186, "learning_rate": 0.00017952215444986758, "loss": 11.6606, "step": 29741 }, { "epoch": 0.6225822657623713, "grad_norm": 0.3139137029647827, "learning_rate": 0.00017952082505647674, "loss": 11.6685, "step": 29742 }, { "epoch": 0.6226031985263334, "grad_norm": 0.29768896102905273, "learning_rate": 0.00017951949562485867, "loss": 11.6849, "step": 29743 }, { "epoch": 0.6226241312902956, "grad_norm": 0.37790346145629883, "learning_rate": 0.000179518166155014, "loss": 11.6777, "step": 29744 }, { "epoch": 0.6226450640542577, "grad_norm": 0.280574768781662, "learning_rate": 0.00017951683664694336, "loss": 11.6618, "step": 29745 }, { "epoch": 0.6226659968182199, "grad_norm": 0.26678335666656494, "learning_rate": 0.00017951550710064735, "loss": 11.6734, "step": 29746 }, { "epoch": 0.6226869295821821, "grad_norm": 0.25228115916252136, "learning_rate": 0.00017951417751612664, "loss": 11.6634, "step": 29747 }, { "epoch": 0.6227078623461442, "grad_norm": 0.30710646510124207, "learning_rate": 0.0001795128478933819, "loss": 11.6721, "step": 29748 }, { "epoch": 0.6227287951101064, "grad_norm": 0.2919509708881378, "learning_rate": 0.00017951151823241372, "loss": 11.679, "step": 29749 }, { "epoch": 0.6227497278740685, "grad_norm": 0.2802737057209015, "learning_rate": 0.0001795101885332228, "loss": 11.6744, "step": 29750 }, { "epoch": 0.6227706606380307, "grad_norm": 0.44611892104148865, "learning_rate": 0.00017950885879580968, "loss": 11.6582, "step": 29751 }, { "epoch": 0.6227915934019927, "grad_norm": 0.2899106442928314, "learning_rate": 0.00017950752902017508, "loss": 11.671, "step": 29752 }, { "epoch": 0.622812526165955, "grad_norm": 0.2876477539539337, "learning_rate": 0.00017950619920631964, "loss": 11.6617, "step": 29753 }, { "epoch": 0.6228334589299171, "grad_norm": 0.2708449065685272, "learning_rate": 0.00017950486935424396, "loss": 11.6824, "step": 29754 }, { "epoch": 0.6228543916938792, "grad_norm": 0.35904812812805176, "learning_rate": 0.00017950353946394868, "loss": 11.6674, "step": 29755 }, { "epoch": 0.6228753244578414, "grad_norm": 0.2854834496974945, "learning_rate": 0.00017950220953543447, "loss": 11.668, "step": 29756 }, { "epoch": 0.6228962572218035, "grad_norm": 0.3204426169395447, "learning_rate": 0.00017950087956870197, "loss": 11.6642, "step": 29757 }, { "epoch": 0.6229171899857657, "grad_norm": 0.3516095280647278, "learning_rate": 0.0001794995495637518, "loss": 11.6798, "step": 29758 }, { "epoch": 0.6229381227497279, "grad_norm": 0.24269863963127136, "learning_rate": 0.0001794982195205846, "loss": 11.6834, "step": 29759 }, { "epoch": 0.62295905551369, "grad_norm": 0.3424219489097595, "learning_rate": 0.00017949688943920096, "loss": 11.6732, "step": 29760 }, { "epoch": 0.6229799882776522, "grad_norm": 0.2869020402431488, "learning_rate": 0.00017949555931960162, "loss": 11.6773, "step": 29761 }, { "epoch": 0.6230009210416143, "grad_norm": 0.2614997923374176, "learning_rate": 0.0001794942291617872, "loss": 11.6762, "step": 29762 }, { "epoch": 0.6230218538055765, "grad_norm": 0.3314109742641449, "learning_rate": 0.00017949289896575826, "loss": 11.6599, "step": 29763 }, { "epoch": 0.6230427865695386, "grad_norm": 0.25913819670677185, "learning_rate": 0.00017949156873151552, "loss": 11.6456, "step": 29764 }, { "epoch": 0.6230637193335008, "grad_norm": 0.2697785794734955, "learning_rate": 0.00017949023845905958, "loss": 11.6737, "step": 29765 }, { "epoch": 0.623084652097463, "grad_norm": 0.32533901929855347, "learning_rate": 0.00017948890814839107, "loss": 11.6497, "step": 29766 }, { "epoch": 0.6231055848614251, "grad_norm": 0.3237265646457672, "learning_rate": 0.0001794875777995107, "loss": 11.6699, "step": 29767 }, { "epoch": 0.6231265176253873, "grad_norm": 0.3568626046180725, "learning_rate": 0.00017948624741241903, "loss": 11.6644, "step": 29768 }, { "epoch": 0.6231474503893494, "grad_norm": 0.3483275771141052, "learning_rate": 0.0001794849169871167, "loss": 11.6572, "step": 29769 }, { "epoch": 0.6231683831533116, "grad_norm": 0.28966382145881653, "learning_rate": 0.00017948358652360443, "loss": 11.6534, "step": 29770 }, { "epoch": 0.6231893159172737, "grad_norm": 0.34673869609832764, "learning_rate": 0.00017948225602188274, "loss": 11.676, "step": 29771 }, { "epoch": 0.6232102486812359, "grad_norm": 0.39655137062072754, "learning_rate": 0.00017948092548195242, "loss": 11.6731, "step": 29772 }, { "epoch": 0.6232311814451981, "grad_norm": 0.38165873289108276, "learning_rate": 0.00017947959490381398, "loss": 11.6837, "step": 29773 }, { "epoch": 0.6232521142091602, "grad_norm": 0.24594946205615997, "learning_rate": 0.00017947826428746808, "loss": 11.661, "step": 29774 }, { "epoch": 0.6232730469731224, "grad_norm": 0.20179340243339539, "learning_rate": 0.00017947693363291542, "loss": 11.6724, "step": 29775 }, { "epoch": 0.6232939797370844, "grad_norm": 0.3347221612930298, "learning_rate": 0.0001794756029401566, "loss": 11.6675, "step": 29776 }, { "epoch": 0.6233149125010466, "grad_norm": 0.2887929379940033, "learning_rate": 0.00017947427220919225, "loss": 11.6633, "step": 29777 }, { "epoch": 0.6233358452650087, "grad_norm": 0.3734436333179474, "learning_rate": 0.00017947294144002304, "loss": 11.651, "step": 29778 }, { "epoch": 0.6233567780289709, "grad_norm": 0.2768386900424957, "learning_rate": 0.00017947161063264962, "loss": 11.6626, "step": 29779 }, { "epoch": 0.6233777107929331, "grad_norm": 0.34321919083595276, "learning_rate": 0.00017947027978707257, "loss": 11.6677, "step": 29780 }, { "epoch": 0.6233986435568952, "grad_norm": 0.25074663758277893, "learning_rate": 0.00017946894890329257, "loss": 11.6687, "step": 29781 }, { "epoch": 0.6234195763208574, "grad_norm": 0.29569172859191895, "learning_rate": 0.00017946761798131024, "loss": 11.6769, "step": 29782 }, { "epoch": 0.6234405090848195, "grad_norm": 0.26367613673210144, "learning_rate": 0.00017946628702112625, "loss": 11.667, "step": 29783 }, { "epoch": 0.6234614418487817, "grad_norm": 0.2571670114994049, "learning_rate": 0.00017946495602274123, "loss": 11.6717, "step": 29784 }, { "epoch": 0.6234823746127439, "grad_norm": 0.43363893032073975, "learning_rate": 0.0001794636249861558, "loss": 11.6776, "step": 29785 }, { "epoch": 0.623503307376706, "grad_norm": 0.3349389135837555, "learning_rate": 0.0001794622939113706, "loss": 11.6876, "step": 29786 }, { "epoch": 0.6235242401406682, "grad_norm": 0.3266852796077728, "learning_rate": 0.0001794609627983863, "loss": 11.6741, "step": 29787 }, { "epoch": 0.6235451729046303, "grad_norm": 0.3744262754917145, "learning_rate": 0.00017945963164720354, "loss": 11.6744, "step": 29788 }, { "epoch": 0.6235661056685925, "grad_norm": 0.36226770281791687, "learning_rate": 0.00017945830045782292, "loss": 11.6776, "step": 29789 }, { "epoch": 0.6235870384325546, "grad_norm": 0.31928372383117676, "learning_rate": 0.00017945696923024512, "loss": 11.669, "step": 29790 }, { "epoch": 0.6236079711965168, "grad_norm": 0.3181864619255066, "learning_rate": 0.00017945563796447077, "loss": 11.6596, "step": 29791 }, { "epoch": 0.623628903960479, "grad_norm": 0.2930699586868286, "learning_rate": 0.00017945430666050047, "loss": 11.6824, "step": 29792 }, { "epoch": 0.6236498367244411, "grad_norm": 0.3530562222003937, "learning_rate": 0.00017945297531833489, "loss": 11.674, "step": 29793 }, { "epoch": 0.6236707694884033, "grad_norm": 0.259373277425766, "learning_rate": 0.0001794516439379747, "loss": 11.6736, "step": 29794 }, { "epoch": 0.6236917022523654, "grad_norm": 0.37231504917144775, "learning_rate": 0.0001794503125194205, "loss": 11.67, "step": 29795 }, { "epoch": 0.6237126350163276, "grad_norm": 0.31748586893081665, "learning_rate": 0.00017944898106267293, "loss": 11.6887, "step": 29796 }, { "epoch": 0.6237335677802897, "grad_norm": 0.349567711353302, "learning_rate": 0.00017944764956773267, "loss": 11.6752, "step": 29797 }, { "epoch": 0.6237545005442519, "grad_norm": 0.2416185736656189, "learning_rate": 0.00017944631803460033, "loss": 11.6547, "step": 29798 }, { "epoch": 0.6237754333082141, "grad_norm": 0.26490703225135803, "learning_rate": 0.00017944498646327653, "loss": 11.6749, "step": 29799 }, { "epoch": 0.6237963660721761, "grad_norm": 0.3746127784252167, "learning_rate": 0.00017944365485376195, "loss": 11.6779, "step": 29800 }, { "epoch": 0.6238172988361383, "grad_norm": 0.32835566997528076, "learning_rate": 0.0001794423232060572, "loss": 11.6722, "step": 29801 }, { "epoch": 0.6238382316001004, "grad_norm": 0.4536885619163513, "learning_rate": 0.00017944099152016296, "loss": 11.6555, "step": 29802 }, { "epoch": 0.6238591643640626, "grad_norm": 0.29336056113243103, "learning_rate": 0.00017943965979607982, "loss": 11.6828, "step": 29803 }, { "epoch": 0.6238800971280248, "grad_norm": 0.31019675731658936, "learning_rate": 0.00017943832803380845, "loss": 11.6792, "step": 29804 }, { "epoch": 0.6239010298919869, "grad_norm": 0.2924445569515228, "learning_rate": 0.0001794369962333495, "loss": 11.6677, "step": 29805 }, { "epoch": 0.6239219626559491, "grad_norm": 0.2497403621673584, "learning_rate": 0.00017943566439470354, "loss": 11.6715, "step": 29806 }, { "epoch": 0.6239428954199112, "grad_norm": 0.2856306731700897, "learning_rate": 0.0001794343325178713, "loss": 11.6604, "step": 29807 }, { "epoch": 0.6239638281838734, "grad_norm": 0.29870525002479553, "learning_rate": 0.00017943300060285338, "loss": 11.6508, "step": 29808 }, { "epoch": 0.6239847609478355, "grad_norm": 0.2711591124534607, "learning_rate": 0.00017943166864965043, "loss": 11.6734, "step": 29809 }, { "epoch": 0.6240056937117977, "grad_norm": 0.46763351559638977, "learning_rate": 0.0001794303366582631, "loss": 11.6644, "step": 29810 }, { "epoch": 0.6240266264757599, "grad_norm": 0.3154652416706085, "learning_rate": 0.00017942900462869198, "loss": 11.656, "step": 29811 }, { "epoch": 0.624047559239722, "grad_norm": 0.34579482674598694, "learning_rate": 0.0001794276725609378, "loss": 11.67, "step": 29812 }, { "epoch": 0.6240684920036842, "grad_norm": 0.3008478581905365, "learning_rate": 0.00017942634045500108, "loss": 11.6717, "step": 29813 }, { "epoch": 0.6240894247676463, "grad_norm": 0.2831225097179413, "learning_rate": 0.00017942500831088256, "loss": 11.6636, "step": 29814 }, { "epoch": 0.6241103575316085, "grad_norm": 0.24793735146522522, "learning_rate": 0.00017942367612858284, "loss": 11.6605, "step": 29815 }, { "epoch": 0.6241312902955706, "grad_norm": 0.4130379259586334, "learning_rate": 0.0001794223439081026, "loss": 11.6788, "step": 29816 }, { "epoch": 0.6241522230595328, "grad_norm": 0.2940758168697357, "learning_rate": 0.0001794210116494424, "loss": 11.6779, "step": 29817 }, { "epoch": 0.624173155823495, "grad_norm": 0.30842211842536926, "learning_rate": 0.00017941967935260295, "loss": 11.6743, "step": 29818 }, { "epoch": 0.6241940885874571, "grad_norm": 0.2843928337097168, "learning_rate": 0.0001794183470175849, "loss": 11.6614, "step": 29819 }, { "epoch": 0.6242150213514193, "grad_norm": 0.3220154643058777, "learning_rate": 0.0001794170146443888, "loss": 11.6738, "step": 29820 }, { "epoch": 0.6242359541153814, "grad_norm": 0.45577511191368103, "learning_rate": 0.00017941568223301538, "loss": 11.6762, "step": 29821 }, { "epoch": 0.6242568868793436, "grad_norm": 0.22831714153289795, "learning_rate": 0.00017941434978346524, "loss": 11.6711, "step": 29822 }, { "epoch": 0.6242778196433058, "grad_norm": 0.28951525688171387, "learning_rate": 0.00017941301729573902, "loss": 11.6671, "step": 29823 }, { "epoch": 0.6242987524072678, "grad_norm": 0.3519068956375122, "learning_rate": 0.0001794116847698374, "loss": 11.6738, "step": 29824 }, { "epoch": 0.62431968517123, "grad_norm": 1.3523045778274536, "learning_rate": 0.000179410352205761, "loss": 11.6756, "step": 29825 }, { "epoch": 0.6243406179351921, "grad_norm": 0.31455761194229126, "learning_rate": 0.0001794090196035104, "loss": 11.6602, "step": 29826 }, { "epoch": 0.6243615506991543, "grad_norm": 0.3005329966545105, "learning_rate": 0.00017940768696308635, "loss": 11.6754, "step": 29827 }, { "epoch": 0.6243824834631164, "grad_norm": 0.26168909668922424, "learning_rate": 0.0001794063542844894, "loss": 11.6783, "step": 29828 }, { "epoch": 0.6244034162270786, "grad_norm": 0.24889183044433594, "learning_rate": 0.00017940502156772023, "loss": 11.6643, "step": 29829 }, { "epoch": 0.6244243489910408, "grad_norm": 0.27518439292907715, "learning_rate": 0.00017940368881277948, "loss": 11.6637, "step": 29830 }, { "epoch": 0.6244452817550029, "grad_norm": 0.32752618193626404, "learning_rate": 0.00017940235601966777, "loss": 11.6701, "step": 29831 }, { "epoch": 0.6244662145189651, "grad_norm": 0.3233861029148102, "learning_rate": 0.00017940102318838577, "loss": 11.6759, "step": 29832 }, { "epoch": 0.6244871472829272, "grad_norm": 0.2970040440559387, "learning_rate": 0.0001793996903189341, "loss": 11.6746, "step": 29833 }, { "epoch": 0.6245080800468894, "grad_norm": 0.27538326382637024, "learning_rate": 0.0001793983574113134, "loss": 11.6691, "step": 29834 }, { "epoch": 0.6245290128108515, "grad_norm": 0.26425281167030334, "learning_rate": 0.00017939702446552436, "loss": 11.6507, "step": 29835 }, { "epoch": 0.6245499455748137, "grad_norm": 0.33502328395843506, "learning_rate": 0.00017939569148156753, "loss": 11.666, "step": 29836 }, { "epoch": 0.6245708783387759, "grad_norm": 0.35615411400794983, "learning_rate": 0.00017939435845944362, "loss": 11.6785, "step": 29837 }, { "epoch": 0.624591811102738, "grad_norm": 0.26092395186424255, "learning_rate": 0.00017939302539915326, "loss": 11.6725, "step": 29838 }, { "epoch": 0.6246127438667002, "grad_norm": 0.3067100942134857, "learning_rate": 0.00017939169230069707, "loss": 11.6594, "step": 29839 }, { "epoch": 0.6246336766306623, "grad_norm": 0.2313891500234604, "learning_rate": 0.0001793903591640757, "loss": 11.6542, "step": 29840 }, { "epoch": 0.6246546093946245, "grad_norm": 0.28639769554138184, "learning_rate": 0.00017938902598928978, "loss": 11.6598, "step": 29841 }, { "epoch": 0.6246755421585867, "grad_norm": 0.2931835949420929, "learning_rate": 0.00017938769277634, "loss": 11.6878, "step": 29842 }, { "epoch": 0.6246964749225488, "grad_norm": 0.2889483869075775, "learning_rate": 0.00017938635952522694, "loss": 11.6688, "step": 29843 }, { "epoch": 0.624717407686511, "grad_norm": 0.23964449763298035, "learning_rate": 0.00017938502623595127, "loss": 11.6676, "step": 29844 }, { "epoch": 0.624738340450473, "grad_norm": 0.26287853717803955, "learning_rate": 0.00017938369290851363, "loss": 11.6844, "step": 29845 }, { "epoch": 0.6247592732144353, "grad_norm": 0.32986536622047424, "learning_rate": 0.00017938235954291466, "loss": 11.6729, "step": 29846 }, { "epoch": 0.6247802059783973, "grad_norm": 0.268432080745697, "learning_rate": 0.000179381026139155, "loss": 11.6672, "step": 29847 }, { "epoch": 0.6248011387423595, "grad_norm": 0.2824622690677643, "learning_rate": 0.00017937969269723528, "loss": 11.6759, "step": 29848 }, { "epoch": 0.6248220715063217, "grad_norm": 0.3208536207675934, "learning_rate": 0.00017937835921715619, "loss": 11.6831, "step": 29849 }, { "epoch": 0.6248430042702838, "grad_norm": 0.3319944143295288, "learning_rate": 0.00017937702569891827, "loss": 11.6701, "step": 29850 }, { "epoch": 0.624863937034246, "grad_norm": 0.30375543236732483, "learning_rate": 0.00017937569214252226, "loss": 11.6664, "step": 29851 }, { "epoch": 0.6248848697982081, "grad_norm": 0.2730337679386139, "learning_rate": 0.00017937435854796876, "loss": 11.6679, "step": 29852 }, { "epoch": 0.6249058025621703, "grad_norm": 0.24417905509471893, "learning_rate": 0.0001793730249152584, "loss": 11.6678, "step": 29853 }, { "epoch": 0.6249267353261324, "grad_norm": 0.3553176522254944, "learning_rate": 0.00017937169124439186, "loss": 11.6673, "step": 29854 }, { "epoch": 0.6249476680900946, "grad_norm": 0.3013066053390503, "learning_rate": 0.00017937035753536976, "loss": 11.669, "step": 29855 }, { "epoch": 0.6249686008540568, "grad_norm": 0.2510950565338135, "learning_rate": 0.0001793690237881927, "loss": 11.6723, "step": 29856 }, { "epoch": 0.6249895336180189, "grad_norm": 0.22932368516921997, "learning_rate": 0.0001793676900028614, "loss": 11.6699, "step": 29857 }, { "epoch": 0.6250104663819811, "grad_norm": 0.3339305818080902, "learning_rate": 0.00017936635617937644, "loss": 11.6604, "step": 29858 }, { "epoch": 0.6250313991459432, "grad_norm": 0.3377559781074524, "learning_rate": 0.00017936502231773852, "loss": 11.6614, "step": 29859 }, { "epoch": 0.6250523319099054, "grad_norm": 0.2343587428331375, "learning_rate": 0.0001793636884179482, "loss": 11.6744, "step": 29860 }, { "epoch": 0.6250732646738676, "grad_norm": 0.2935955822467804, "learning_rate": 0.00017936235448000616, "loss": 11.6684, "step": 29861 }, { "epoch": 0.6250941974378297, "grad_norm": 0.266020804643631, "learning_rate": 0.00017936102050391305, "loss": 11.6626, "step": 29862 }, { "epoch": 0.6251151302017919, "grad_norm": 0.3077511191368103, "learning_rate": 0.00017935968648966953, "loss": 11.6446, "step": 29863 }, { "epoch": 0.625136062965754, "grad_norm": 0.5489628911018372, "learning_rate": 0.0001793583524372762, "loss": 11.6756, "step": 29864 }, { "epoch": 0.6251569957297162, "grad_norm": 0.2746722996234894, "learning_rate": 0.00017935701834673372, "loss": 11.665, "step": 29865 }, { "epoch": 0.6251779284936783, "grad_norm": 0.44987261295318604, "learning_rate": 0.00017935568421804273, "loss": 11.6866, "step": 29866 }, { "epoch": 0.6251988612576405, "grad_norm": 0.28182128071784973, "learning_rate": 0.0001793543500512039, "loss": 11.6692, "step": 29867 }, { "epoch": 0.6252197940216027, "grad_norm": 0.3211958706378937, "learning_rate": 0.00017935301584621783, "loss": 11.6584, "step": 29868 }, { "epoch": 0.6252407267855647, "grad_norm": 0.31174686551094055, "learning_rate": 0.00017935168160308516, "loss": 11.6862, "step": 29869 }, { "epoch": 0.625261659549527, "grad_norm": 0.2588547468185425, "learning_rate": 0.00017935034732180656, "loss": 11.6652, "step": 29870 }, { "epoch": 0.625282592313489, "grad_norm": 0.32057949900627136, "learning_rate": 0.00017934901300238262, "loss": 11.6717, "step": 29871 }, { "epoch": 0.6253035250774512, "grad_norm": 0.3467957079410553, "learning_rate": 0.00017934767864481407, "loss": 11.6659, "step": 29872 }, { "epoch": 0.6253244578414133, "grad_norm": 0.271273672580719, "learning_rate": 0.0001793463442491015, "loss": 11.6792, "step": 29873 }, { "epoch": 0.6253453906053755, "grad_norm": 0.267535924911499, "learning_rate": 0.0001793450098152455, "loss": 11.6724, "step": 29874 }, { "epoch": 0.6253663233693377, "grad_norm": 0.3293817639350891, "learning_rate": 0.00017934367534324681, "loss": 11.6697, "step": 29875 }, { "epoch": 0.6253872561332998, "grad_norm": 0.30011695623397827, "learning_rate": 0.00017934234083310602, "loss": 11.6597, "step": 29876 }, { "epoch": 0.625408188897262, "grad_norm": 0.32123807072639465, "learning_rate": 0.00017934100628482375, "loss": 11.6708, "step": 29877 }, { "epoch": 0.6254291216612241, "grad_norm": 0.2943032383918762, "learning_rate": 0.00017933967169840072, "loss": 11.6782, "step": 29878 }, { "epoch": 0.6254500544251863, "grad_norm": 0.29194375872612, "learning_rate": 0.00017933833707383748, "loss": 11.6703, "step": 29879 }, { "epoch": 0.6254709871891485, "grad_norm": 0.3204490542411804, "learning_rate": 0.00017933700241113472, "loss": 11.6681, "step": 29880 }, { "epoch": 0.6254919199531106, "grad_norm": 0.3256644010543823, "learning_rate": 0.00017933566771029307, "loss": 11.6599, "step": 29881 }, { "epoch": 0.6255128527170728, "grad_norm": 0.3174302279949188, "learning_rate": 0.00017933433297131316, "loss": 11.6718, "step": 29882 }, { "epoch": 0.6255337854810349, "grad_norm": 0.30679982900619507, "learning_rate": 0.00017933299819419566, "loss": 11.6628, "step": 29883 }, { "epoch": 0.6255547182449971, "grad_norm": 0.368892639875412, "learning_rate": 0.0001793316633789412, "loss": 11.6628, "step": 29884 }, { "epoch": 0.6255756510089592, "grad_norm": 0.2789679765701294, "learning_rate": 0.00017933032852555044, "loss": 11.6578, "step": 29885 }, { "epoch": 0.6255965837729214, "grad_norm": 0.2305910438299179, "learning_rate": 0.00017932899363402397, "loss": 11.6657, "step": 29886 }, { "epoch": 0.6256175165368836, "grad_norm": 0.3485860526561737, "learning_rate": 0.00017932765870436247, "loss": 11.6847, "step": 29887 }, { "epoch": 0.6256384493008457, "grad_norm": 0.28097471594810486, "learning_rate": 0.0001793263237365666, "loss": 11.6802, "step": 29888 }, { "epoch": 0.6256593820648079, "grad_norm": 0.2722046971321106, "learning_rate": 0.00017932498873063693, "loss": 11.6686, "step": 29889 }, { "epoch": 0.62568031482877, "grad_norm": 0.25344353914260864, "learning_rate": 0.0001793236536865742, "loss": 11.6718, "step": 29890 }, { "epoch": 0.6257012475927322, "grad_norm": 0.3408137261867523, "learning_rate": 0.00017932231860437895, "loss": 11.6572, "step": 29891 }, { "epoch": 0.6257221803566942, "grad_norm": 0.2802744209766388, "learning_rate": 0.0001793209834840519, "loss": 11.6647, "step": 29892 }, { "epoch": 0.6257431131206564, "grad_norm": 0.2812131643295288, "learning_rate": 0.00017931964832559367, "loss": 11.6902, "step": 29893 }, { "epoch": 0.6257640458846186, "grad_norm": 0.3157021403312683, "learning_rate": 0.00017931831312900488, "loss": 11.6885, "step": 29894 }, { "epoch": 0.6257849786485807, "grad_norm": 0.2949987053871155, "learning_rate": 0.00017931697789428622, "loss": 11.6642, "step": 29895 }, { "epoch": 0.6258059114125429, "grad_norm": 0.3057534098625183, "learning_rate": 0.00017931564262143825, "loss": 11.6761, "step": 29896 }, { "epoch": 0.625826844176505, "grad_norm": 0.2728673815727234, "learning_rate": 0.0001793143073104617, "loss": 11.6665, "step": 29897 }, { "epoch": 0.6258477769404672, "grad_norm": 0.32609662413597107, "learning_rate": 0.00017931297196135715, "loss": 11.6712, "step": 29898 }, { "epoch": 0.6258687097044294, "grad_norm": 0.44006243348121643, "learning_rate": 0.00017931163657412526, "loss": 11.6776, "step": 29899 }, { "epoch": 0.6258896424683915, "grad_norm": 0.2722073197364807, "learning_rate": 0.0001793103011487667, "loss": 11.6674, "step": 29900 }, { "epoch": 0.6259105752323537, "grad_norm": 0.32052257657051086, "learning_rate": 0.00017930896568528205, "loss": 11.6837, "step": 29901 }, { "epoch": 0.6259315079963158, "grad_norm": 0.2918773293495178, "learning_rate": 0.00017930763018367203, "loss": 11.6582, "step": 29902 }, { "epoch": 0.625952440760278, "grad_norm": 0.30318474769592285, "learning_rate": 0.0001793062946439372, "loss": 11.6841, "step": 29903 }, { "epoch": 0.6259733735242401, "grad_norm": 0.2851279675960541, "learning_rate": 0.0001793049590660783, "loss": 11.675, "step": 29904 }, { "epoch": 0.6259943062882023, "grad_norm": 0.3288935422897339, "learning_rate": 0.0001793036234500959, "loss": 11.6932, "step": 29905 }, { "epoch": 0.6260152390521645, "grad_norm": 0.3514275550842285, "learning_rate": 0.00017930228779599065, "loss": 11.6746, "step": 29906 }, { "epoch": 0.6260361718161266, "grad_norm": 0.24177154898643494, "learning_rate": 0.00017930095210376317, "loss": 11.6572, "step": 29907 }, { "epoch": 0.6260571045800888, "grad_norm": 0.2682468593120575, "learning_rate": 0.0001792996163734142, "loss": 11.6604, "step": 29908 }, { "epoch": 0.6260780373440509, "grad_norm": 0.2845839262008667, "learning_rate": 0.00017929828060494427, "loss": 11.6677, "step": 29909 }, { "epoch": 0.6260989701080131, "grad_norm": 0.24887648224830627, "learning_rate": 0.0001792969447983541, "loss": 11.6614, "step": 29910 }, { "epoch": 0.6261199028719752, "grad_norm": 0.3246925175189972, "learning_rate": 0.00017929560895364426, "loss": 11.671, "step": 29911 }, { "epoch": 0.6261408356359374, "grad_norm": 0.30254480242729187, "learning_rate": 0.00017929427307081545, "loss": 11.6614, "step": 29912 }, { "epoch": 0.6261617683998996, "grad_norm": 0.30852892994880676, "learning_rate": 0.0001792929371498683, "loss": 11.6708, "step": 29913 }, { "epoch": 0.6261827011638617, "grad_norm": 0.3324581980705261, "learning_rate": 0.00017929160119080343, "loss": 11.6768, "step": 29914 }, { "epoch": 0.6262036339278239, "grad_norm": 0.3870827555656433, "learning_rate": 0.0001792902651936215, "loss": 11.6674, "step": 29915 }, { "epoch": 0.6262245666917859, "grad_norm": 0.3308551013469696, "learning_rate": 0.00017928892915832317, "loss": 11.6564, "step": 29916 }, { "epoch": 0.6262454994557481, "grad_norm": 0.26873767375946045, "learning_rate": 0.00017928759308490904, "loss": 11.6658, "step": 29917 }, { "epoch": 0.6262664322197103, "grad_norm": 0.3254203796386719, "learning_rate": 0.0001792862569733798, "loss": 11.6822, "step": 29918 }, { "epoch": 0.6262873649836724, "grad_norm": 0.33486542105674744, "learning_rate": 0.00017928492082373601, "loss": 11.6733, "step": 29919 }, { "epoch": 0.6263082977476346, "grad_norm": 0.28631800413131714, "learning_rate": 0.00017928358463597843, "loss": 11.6638, "step": 29920 }, { "epoch": 0.6263292305115967, "grad_norm": 0.29314878582954407, "learning_rate": 0.00017928224841010762, "loss": 11.6763, "step": 29921 }, { "epoch": 0.6263501632755589, "grad_norm": 0.3133774995803833, "learning_rate": 0.00017928091214612426, "loss": 11.6673, "step": 29922 }, { "epoch": 0.626371096039521, "grad_norm": 0.2803906798362732, "learning_rate": 0.00017927957584402897, "loss": 11.6665, "step": 29923 }, { "epoch": 0.6263920288034832, "grad_norm": 0.29843199253082275, "learning_rate": 0.00017927823950382237, "loss": 11.6569, "step": 29924 }, { "epoch": 0.6264129615674454, "grad_norm": 0.27541807293891907, "learning_rate": 0.00017927690312550518, "loss": 11.6664, "step": 29925 }, { "epoch": 0.6264338943314075, "grad_norm": 0.2623406648635864, "learning_rate": 0.00017927556670907797, "loss": 11.6529, "step": 29926 }, { "epoch": 0.6264548270953697, "grad_norm": 0.29502978920936584, "learning_rate": 0.00017927423025454142, "loss": 11.6622, "step": 29927 }, { "epoch": 0.6264757598593318, "grad_norm": 0.294411301612854, "learning_rate": 0.00017927289376189613, "loss": 11.6703, "step": 29928 }, { "epoch": 0.626496692623294, "grad_norm": 0.3318248689174652, "learning_rate": 0.0001792715572311428, "loss": 11.6706, "step": 29929 }, { "epoch": 0.6265176253872561, "grad_norm": 0.2667846083641052, "learning_rate": 0.000179270220662282, "loss": 11.6519, "step": 29930 }, { "epoch": 0.6265385581512183, "grad_norm": 0.25818535685539246, "learning_rate": 0.00017926888405531445, "loss": 11.6721, "step": 29931 }, { "epoch": 0.6265594909151805, "grad_norm": 0.2764107286930084, "learning_rate": 0.00017926754741024077, "loss": 11.6612, "step": 29932 }, { "epoch": 0.6265804236791426, "grad_norm": 0.2226840704679489, "learning_rate": 0.0001792662107270616, "loss": 11.6764, "step": 29933 }, { "epoch": 0.6266013564431048, "grad_norm": 0.27684709429740906, "learning_rate": 0.00017926487400577755, "loss": 11.6618, "step": 29934 }, { "epoch": 0.6266222892070669, "grad_norm": 0.24754996597766876, "learning_rate": 0.00017926353724638926, "loss": 11.6622, "step": 29935 }, { "epoch": 0.6266432219710291, "grad_norm": 0.2540898621082306, "learning_rate": 0.00017926220044889744, "loss": 11.6877, "step": 29936 }, { "epoch": 0.6266641547349913, "grad_norm": 0.3025526702404022, "learning_rate": 0.00017926086361330268, "loss": 11.6812, "step": 29937 }, { "epoch": 0.6266850874989534, "grad_norm": 0.5554381012916565, "learning_rate": 0.00017925952673960563, "loss": 11.6638, "step": 29938 }, { "epoch": 0.6267060202629156, "grad_norm": 0.2858373820781708, "learning_rate": 0.00017925818982780694, "loss": 11.669, "step": 29939 }, { "epoch": 0.6267269530268776, "grad_norm": 0.3095996081829071, "learning_rate": 0.00017925685287790722, "loss": 11.6691, "step": 29940 }, { "epoch": 0.6267478857908398, "grad_norm": 0.28123095631599426, "learning_rate": 0.0001792555158899072, "loss": 11.6708, "step": 29941 }, { "epoch": 0.6267688185548019, "grad_norm": 0.3264431059360504, "learning_rate": 0.00017925417886380745, "loss": 11.6727, "step": 29942 }, { "epoch": 0.6267897513187641, "grad_norm": 0.274247944355011, "learning_rate": 0.00017925284179960861, "loss": 11.6675, "step": 29943 }, { "epoch": 0.6268106840827263, "grad_norm": 0.3330507278442383, "learning_rate": 0.00017925150469731132, "loss": 11.6681, "step": 29944 }, { "epoch": 0.6268316168466884, "grad_norm": 0.30775514245033264, "learning_rate": 0.00017925016755691628, "loss": 11.674, "step": 29945 }, { "epoch": 0.6268525496106506, "grad_norm": 0.3284737467765808, "learning_rate": 0.0001792488303784241, "loss": 11.6676, "step": 29946 }, { "epoch": 0.6268734823746127, "grad_norm": 0.26748159527778625, "learning_rate": 0.0001792474931618354, "loss": 11.6822, "step": 29947 }, { "epoch": 0.6268944151385749, "grad_norm": 0.2744792699813843, "learning_rate": 0.00017924615590715086, "loss": 11.6627, "step": 29948 }, { "epoch": 0.626915347902537, "grad_norm": 0.3201247453689575, "learning_rate": 0.00017924481861437107, "loss": 11.6751, "step": 29949 }, { "epoch": 0.6269362806664992, "grad_norm": 0.24156944453716278, "learning_rate": 0.00017924348128349674, "loss": 11.6522, "step": 29950 }, { "epoch": 0.6269572134304614, "grad_norm": 0.2793412208557129, "learning_rate": 0.00017924214391452846, "loss": 11.6704, "step": 29951 }, { "epoch": 0.6269781461944235, "grad_norm": 0.25897154211997986, "learning_rate": 0.0001792408065074669, "loss": 11.6754, "step": 29952 }, { "epoch": 0.6269990789583857, "grad_norm": 0.326364666223526, "learning_rate": 0.0001792394690623127, "loss": 11.6678, "step": 29953 }, { "epoch": 0.6270200117223478, "grad_norm": 0.23565267026424408, "learning_rate": 0.0001792381315790665, "loss": 11.6656, "step": 29954 }, { "epoch": 0.62704094448631, "grad_norm": 0.3041119873523712, "learning_rate": 0.00017923679405772896, "loss": 11.6683, "step": 29955 }, { "epoch": 0.6270618772502721, "grad_norm": 0.2877614200115204, "learning_rate": 0.00017923545649830067, "loss": 11.6553, "step": 29956 }, { "epoch": 0.6270828100142343, "grad_norm": 0.2757978141307831, "learning_rate": 0.00017923411890078233, "loss": 11.665, "step": 29957 }, { "epoch": 0.6271037427781965, "grad_norm": 0.2633165419101715, "learning_rate": 0.00017923278126517455, "loss": 11.664, "step": 29958 }, { "epoch": 0.6271246755421586, "grad_norm": 0.25199225544929504, "learning_rate": 0.000179231443591478, "loss": 11.6626, "step": 29959 }, { "epoch": 0.6271456083061208, "grad_norm": 0.2718733847141266, "learning_rate": 0.00017923010587969328, "loss": 11.6638, "step": 29960 }, { "epoch": 0.6271665410700829, "grad_norm": 0.3671477735042572, "learning_rate": 0.0001792287681298211, "loss": 11.6928, "step": 29961 }, { "epoch": 0.627187473834045, "grad_norm": 0.34215009212493896, "learning_rate": 0.00017922743034186204, "loss": 11.6735, "step": 29962 }, { "epoch": 0.6272084065980073, "grad_norm": 0.2407868206501007, "learning_rate": 0.00017922609251581678, "loss": 11.6621, "step": 29963 }, { "epoch": 0.6272293393619693, "grad_norm": 0.2981863021850586, "learning_rate": 0.0001792247546516859, "loss": 11.6827, "step": 29964 }, { "epoch": 0.6272502721259315, "grad_norm": 0.2739393711090088, "learning_rate": 0.00017922341674947014, "loss": 11.6516, "step": 29965 }, { "epoch": 0.6272712048898936, "grad_norm": 0.3115633726119995, "learning_rate": 0.00017922207880917009, "loss": 11.6641, "step": 29966 }, { "epoch": 0.6272921376538558, "grad_norm": 0.2837740182876587, "learning_rate": 0.0001792207408307864, "loss": 11.6651, "step": 29967 }, { "epoch": 0.6273130704178179, "grad_norm": 0.33350446820259094, "learning_rate": 0.0001792194028143197, "loss": 11.671, "step": 29968 }, { "epoch": 0.6273340031817801, "grad_norm": 0.4019286632537842, "learning_rate": 0.00017921806475977063, "loss": 11.6881, "step": 29969 }, { "epoch": 0.6273549359457423, "grad_norm": 0.3038388788700104, "learning_rate": 0.0001792167266671399, "loss": 11.6666, "step": 29970 }, { "epoch": 0.6273758687097044, "grad_norm": 0.2837909460067749, "learning_rate": 0.00017921538853642806, "loss": 11.6643, "step": 29971 }, { "epoch": 0.6273968014736666, "grad_norm": 0.2466668039560318, "learning_rate": 0.0001792140503676358, "loss": 11.6647, "step": 29972 }, { "epoch": 0.6274177342376287, "grad_norm": 0.2594112455844879, "learning_rate": 0.00017921271216076376, "loss": 11.6737, "step": 29973 }, { "epoch": 0.6274386670015909, "grad_norm": 0.2871147692203522, "learning_rate": 0.0001792113739158126, "loss": 11.6552, "step": 29974 }, { "epoch": 0.627459599765553, "grad_norm": 0.2902548909187317, "learning_rate": 0.00017921003563278293, "loss": 11.6691, "step": 29975 }, { "epoch": 0.6274805325295152, "grad_norm": 0.2970399260520935, "learning_rate": 0.00017920869731167542, "loss": 11.669, "step": 29976 }, { "epoch": 0.6275014652934774, "grad_norm": 0.3162129819393158, "learning_rate": 0.0001792073589524907, "loss": 11.6651, "step": 29977 }, { "epoch": 0.6275223980574395, "grad_norm": 0.4637293815612793, "learning_rate": 0.0001792060205552294, "loss": 11.6636, "step": 29978 }, { "epoch": 0.6275433308214017, "grad_norm": 0.29652225971221924, "learning_rate": 0.00017920468211989217, "loss": 11.6545, "step": 29979 }, { "epoch": 0.6275642635853638, "grad_norm": 0.25461527705192566, "learning_rate": 0.0001792033436464797, "loss": 11.6755, "step": 29980 }, { "epoch": 0.627585196349326, "grad_norm": 0.3579164147377014, "learning_rate": 0.00017920200513499258, "loss": 11.6834, "step": 29981 }, { "epoch": 0.6276061291132882, "grad_norm": 0.2867586016654968, "learning_rate": 0.00017920066658543146, "loss": 11.683, "step": 29982 }, { "epoch": 0.6276270618772503, "grad_norm": 0.3864445686340332, "learning_rate": 0.000179199327997797, "loss": 11.6586, "step": 29983 }, { "epoch": 0.6276479946412125, "grad_norm": 0.26854366064071655, "learning_rate": 0.00017919798937208981, "loss": 11.653, "step": 29984 }, { "epoch": 0.6276689274051745, "grad_norm": 0.28283095359802246, "learning_rate": 0.0001791966507083106, "loss": 11.6791, "step": 29985 }, { "epoch": 0.6276898601691367, "grad_norm": 0.38912254571914673, "learning_rate": 0.00017919531200645997, "loss": 11.6811, "step": 29986 }, { "epoch": 0.6277107929330988, "grad_norm": 0.29964688420295715, "learning_rate": 0.00017919397326653853, "loss": 11.6732, "step": 29987 }, { "epoch": 0.627731725697061, "grad_norm": 0.2596484422683716, "learning_rate": 0.00017919263448854696, "loss": 11.6725, "step": 29988 }, { "epoch": 0.6277526584610232, "grad_norm": 0.39226919412612915, "learning_rate": 0.00017919129567248594, "loss": 11.6729, "step": 29989 }, { "epoch": 0.6277735912249853, "grad_norm": 0.3513154089450836, "learning_rate": 0.00017918995681835605, "loss": 11.6698, "step": 29990 }, { "epoch": 0.6277945239889475, "grad_norm": 0.3026919960975647, "learning_rate": 0.000179188617926158, "loss": 11.6777, "step": 29991 }, { "epoch": 0.6278154567529096, "grad_norm": 0.2656731605529785, "learning_rate": 0.00017918727899589237, "loss": 11.6612, "step": 29992 }, { "epoch": 0.6278363895168718, "grad_norm": 0.2946096360683441, "learning_rate": 0.0001791859400275598, "loss": 11.6658, "step": 29993 }, { "epoch": 0.6278573222808339, "grad_norm": 0.35339775681495667, "learning_rate": 0.000179184601021161, "loss": 11.6817, "step": 29994 }, { "epoch": 0.6278782550447961, "grad_norm": 0.23608222603797913, "learning_rate": 0.00017918326197669657, "loss": 11.6546, "step": 29995 }, { "epoch": 0.6278991878087583, "grad_norm": 0.2532248795032501, "learning_rate": 0.00017918192289416714, "loss": 11.6614, "step": 29996 }, { "epoch": 0.6279201205727204, "grad_norm": 0.26981326937675476, "learning_rate": 0.00017918058377357337, "loss": 11.6673, "step": 29997 }, { "epoch": 0.6279410533366826, "grad_norm": 0.4274589717388153, "learning_rate": 0.00017917924461491593, "loss": 11.6692, "step": 29998 }, { "epoch": 0.6279619861006447, "grad_norm": 0.35996362566947937, "learning_rate": 0.00017917790541819542, "loss": 11.6785, "step": 29999 }, { "epoch": 0.6279829188646069, "grad_norm": 0.3185480833053589, "learning_rate": 0.0001791765661834125, "loss": 11.6677, "step": 30000 }, { "epoch": 0.6279829188646069, "eval_loss": 11.671140670776367, "eval_runtime": 34.3047, "eval_samples_per_second": 28.014, "eval_steps_per_second": 7.025, "step": 30000 }, { "epoch": 0.6280038516285691, "grad_norm": 0.3463376760482788, "learning_rate": 0.00017917522691056784, "loss": 11.6732, "step": 30001 }, { "epoch": 0.6280247843925312, "grad_norm": 0.2935136556625366, "learning_rate": 0.00017917388759966206, "loss": 11.6667, "step": 30002 }, { "epoch": 0.6280457171564934, "grad_norm": 0.34068503975868225, "learning_rate": 0.0001791725482506958, "loss": 11.6745, "step": 30003 }, { "epoch": 0.6280666499204555, "grad_norm": 0.3572841286659241, "learning_rate": 0.0001791712088636697, "loss": 11.6851, "step": 30004 }, { "epoch": 0.6280875826844177, "grad_norm": 0.2749839723110199, "learning_rate": 0.00017916986943858446, "loss": 11.6691, "step": 30005 }, { "epoch": 0.6281085154483798, "grad_norm": 0.2845611274242401, "learning_rate": 0.00017916852997544062, "loss": 11.6704, "step": 30006 }, { "epoch": 0.628129448212342, "grad_norm": 0.27857503294944763, "learning_rate": 0.0001791671904742389, "loss": 11.6671, "step": 30007 }, { "epoch": 0.6281503809763042, "grad_norm": 0.2650163471698761, "learning_rate": 0.00017916585093497993, "loss": 11.6567, "step": 30008 }, { "epoch": 0.6281713137402662, "grad_norm": 0.31059530377388, "learning_rate": 0.00017916451135766438, "loss": 11.6738, "step": 30009 }, { "epoch": 0.6281922465042284, "grad_norm": 0.3067871928215027, "learning_rate": 0.0001791631717422928, "loss": 11.668, "step": 30010 }, { "epoch": 0.6282131792681905, "grad_norm": 0.28577929735183716, "learning_rate": 0.0001791618320888659, "loss": 11.657, "step": 30011 }, { "epoch": 0.6282341120321527, "grad_norm": 0.335453063249588, "learning_rate": 0.00017916049239738438, "loss": 11.6754, "step": 30012 }, { "epoch": 0.6282550447961148, "grad_norm": 0.37170958518981934, "learning_rate": 0.0001791591526678488, "loss": 11.6642, "step": 30013 }, { "epoch": 0.628275977560077, "grad_norm": 0.324056476354599, "learning_rate": 0.0001791578129002598, "loss": 11.6675, "step": 30014 }, { "epoch": 0.6282969103240392, "grad_norm": 0.40223947167396545, "learning_rate": 0.00017915647309461806, "loss": 11.6645, "step": 30015 }, { "epoch": 0.6283178430880013, "grad_norm": 0.2763909101486206, "learning_rate": 0.00017915513325092425, "loss": 11.6743, "step": 30016 }, { "epoch": 0.6283387758519635, "grad_norm": 0.2921261787414551, "learning_rate": 0.00017915379336917896, "loss": 11.6889, "step": 30017 }, { "epoch": 0.6283597086159256, "grad_norm": 0.2910439968109131, "learning_rate": 0.00017915245344938286, "loss": 11.6718, "step": 30018 }, { "epoch": 0.6283806413798878, "grad_norm": 0.3097369968891144, "learning_rate": 0.00017915111349153657, "loss": 11.6863, "step": 30019 }, { "epoch": 0.62840157414385, "grad_norm": 0.38033512234687805, "learning_rate": 0.00017914977349564078, "loss": 11.6662, "step": 30020 }, { "epoch": 0.6284225069078121, "grad_norm": 0.37204891443252563, "learning_rate": 0.0001791484334616961, "loss": 11.6703, "step": 30021 }, { "epoch": 0.6284434396717743, "grad_norm": 0.3347494602203369, "learning_rate": 0.00017914709338970317, "loss": 11.6855, "step": 30022 }, { "epoch": 0.6284643724357364, "grad_norm": 0.26689356565475464, "learning_rate": 0.00017914575327966265, "loss": 11.6698, "step": 30023 }, { "epoch": 0.6284853051996986, "grad_norm": 2.5024588108062744, "learning_rate": 0.0001791444131315752, "loss": 11.6632, "step": 30024 }, { "epoch": 0.6285062379636607, "grad_norm": 0.29097121953964233, "learning_rate": 0.00017914307294544142, "loss": 11.6773, "step": 30025 }, { "epoch": 0.6285271707276229, "grad_norm": 0.37404748797416687, "learning_rate": 0.00017914173272126199, "loss": 11.6749, "step": 30026 }, { "epoch": 0.6285481034915851, "grad_norm": 0.30570662021636963, "learning_rate": 0.00017914039245903754, "loss": 11.6664, "step": 30027 }, { "epoch": 0.6285690362555472, "grad_norm": 0.24292618036270142, "learning_rate": 0.0001791390521587687, "loss": 11.6628, "step": 30028 }, { "epoch": 0.6285899690195094, "grad_norm": 0.3540750741958618, "learning_rate": 0.00017913771182045615, "loss": 11.6863, "step": 30029 }, { "epoch": 0.6286109017834715, "grad_norm": 0.35516637563705444, "learning_rate": 0.00017913637144410054, "loss": 11.6746, "step": 30030 }, { "epoch": 0.6286318345474337, "grad_norm": 0.3658512532711029, "learning_rate": 0.00017913503102970246, "loss": 11.6636, "step": 30031 }, { "epoch": 0.6286527673113957, "grad_norm": 0.4017857015132904, "learning_rate": 0.0001791336905772626, "loss": 11.6769, "step": 30032 }, { "epoch": 0.6286737000753579, "grad_norm": 0.35105380415916443, "learning_rate": 0.00017913235008678156, "loss": 11.6699, "step": 30033 }, { "epoch": 0.6286946328393201, "grad_norm": 0.34459149837493896, "learning_rate": 0.00017913100955826004, "loss": 11.6549, "step": 30034 }, { "epoch": 0.6287155656032822, "grad_norm": 0.3048292100429535, "learning_rate": 0.00017912966899169864, "loss": 11.6683, "step": 30035 }, { "epoch": 0.6287364983672444, "grad_norm": 0.25626975297927856, "learning_rate": 0.00017912832838709806, "loss": 11.6757, "step": 30036 }, { "epoch": 0.6287574311312065, "grad_norm": 0.3304918706417084, "learning_rate": 0.0001791269877444589, "loss": 11.6617, "step": 30037 }, { "epoch": 0.6287783638951687, "grad_norm": 0.36660444736480713, "learning_rate": 0.00017912564706378176, "loss": 11.6621, "step": 30038 }, { "epoch": 0.6287992966591309, "grad_norm": 0.25137656927108765, "learning_rate": 0.00017912430634506738, "loss": 11.6757, "step": 30039 }, { "epoch": 0.628820229423093, "grad_norm": 0.26651087403297424, "learning_rate": 0.00017912296558831635, "loss": 11.6706, "step": 30040 }, { "epoch": 0.6288411621870552, "grad_norm": 0.25626784563064575, "learning_rate": 0.00017912162479352933, "loss": 11.6806, "step": 30041 }, { "epoch": 0.6288620949510173, "grad_norm": 0.33674389123916626, "learning_rate": 0.00017912028396070695, "loss": 11.6638, "step": 30042 }, { "epoch": 0.6288830277149795, "grad_norm": 0.3129004240036011, "learning_rate": 0.00017911894308984984, "loss": 11.6758, "step": 30043 }, { "epoch": 0.6289039604789416, "grad_norm": 0.28316956758499146, "learning_rate": 0.00017911760218095873, "loss": 11.6836, "step": 30044 }, { "epoch": 0.6289248932429038, "grad_norm": 0.28396376967430115, "learning_rate": 0.00017911626123403416, "loss": 11.6561, "step": 30045 }, { "epoch": 0.628945826006866, "grad_norm": 0.22593526542186737, "learning_rate": 0.0001791149202490768, "loss": 11.6609, "step": 30046 }, { "epoch": 0.6289667587708281, "grad_norm": 0.23308265209197998, "learning_rate": 0.00017911357922608735, "loss": 11.6496, "step": 30047 }, { "epoch": 0.6289876915347903, "grad_norm": 0.2744545042514801, "learning_rate": 0.0001791122381650664, "loss": 11.6733, "step": 30048 }, { "epoch": 0.6290086242987524, "grad_norm": 0.3060261905193329, "learning_rate": 0.00017911089706601464, "loss": 11.659, "step": 30049 }, { "epoch": 0.6290295570627146, "grad_norm": 0.2724211812019348, "learning_rate": 0.00017910955592893263, "loss": 11.6589, "step": 30050 }, { "epoch": 0.6290504898266767, "grad_norm": 0.39081764221191406, "learning_rate": 0.00017910821475382112, "loss": 11.6597, "step": 30051 }, { "epoch": 0.6290714225906389, "grad_norm": 0.2302497774362564, "learning_rate": 0.00017910687354068068, "loss": 11.6709, "step": 30052 }, { "epoch": 0.6290923553546011, "grad_norm": 0.2429274469614029, "learning_rate": 0.00017910553228951201, "loss": 11.6622, "step": 30053 }, { "epoch": 0.6291132881185632, "grad_norm": 0.3824833929538727, "learning_rate": 0.0001791041910003157, "loss": 11.6708, "step": 30054 }, { "epoch": 0.6291342208825254, "grad_norm": 0.29674622416496277, "learning_rate": 0.00017910284967309243, "loss": 11.6677, "step": 30055 }, { "epoch": 0.6291551536464874, "grad_norm": 0.29454100131988525, "learning_rate": 0.00017910150830784282, "loss": 11.6603, "step": 30056 }, { "epoch": 0.6291760864104496, "grad_norm": 0.4219525456428528, "learning_rate": 0.00017910016690456755, "loss": 11.6684, "step": 30057 }, { "epoch": 0.6291970191744118, "grad_norm": 0.3255976736545563, "learning_rate": 0.00017909882546326725, "loss": 11.6813, "step": 30058 }, { "epoch": 0.6292179519383739, "grad_norm": 0.282853901386261, "learning_rate": 0.0001790974839839425, "loss": 11.6868, "step": 30059 }, { "epoch": 0.6292388847023361, "grad_norm": 0.33512336015701294, "learning_rate": 0.00017909614246659409, "loss": 11.658, "step": 30060 }, { "epoch": 0.6292598174662982, "grad_norm": 0.23685911297798157, "learning_rate": 0.0001790948009112225, "loss": 11.6731, "step": 30061 }, { "epoch": 0.6292807502302604, "grad_norm": 0.2749038636684418, "learning_rate": 0.00017909345931782852, "loss": 11.6843, "step": 30062 }, { "epoch": 0.6293016829942225, "grad_norm": 0.3581767678260803, "learning_rate": 0.0001790921176864127, "loss": 11.6744, "step": 30063 }, { "epoch": 0.6293226157581847, "grad_norm": 0.23167411983013153, "learning_rate": 0.00017909077601697572, "loss": 11.6813, "step": 30064 }, { "epoch": 0.6293435485221469, "grad_norm": 0.3191029131412506, "learning_rate": 0.0001790894343095182, "loss": 11.6977, "step": 30065 }, { "epoch": 0.629364481286109, "grad_norm": 0.3721354901790619, "learning_rate": 0.00017908809256404081, "loss": 11.666, "step": 30066 }, { "epoch": 0.6293854140500712, "grad_norm": 0.2650735676288605, "learning_rate": 0.00017908675078054422, "loss": 11.6553, "step": 30067 }, { "epoch": 0.6294063468140333, "grad_norm": 0.24095115065574646, "learning_rate": 0.00017908540895902902, "loss": 11.6675, "step": 30068 }, { "epoch": 0.6294272795779955, "grad_norm": 0.40662166476249695, "learning_rate": 0.0001790840670994959, "loss": 11.6874, "step": 30069 }, { "epoch": 0.6294482123419576, "grad_norm": 0.3038357198238373, "learning_rate": 0.00017908272520194546, "loss": 11.6523, "step": 30070 }, { "epoch": 0.6294691451059198, "grad_norm": 0.3092239797115326, "learning_rate": 0.00017908138326637837, "loss": 11.6625, "step": 30071 }, { "epoch": 0.629490077869882, "grad_norm": 0.32515883445739746, "learning_rate": 0.00017908004129279525, "loss": 11.6721, "step": 30072 }, { "epoch": 0.6295110106338441, "grad_norm": 0.30599749088287354, "learning_rate": 0.0001790786992811968, "loss": 11.6571, "step": 30073 }, { "epoch": 0.6295319433978063, "grad_norm": 0.3015272617340088, "learning_rate": 0.00017907735723158362, "loss": 11.67, "step": 30074 }, { "epoch": 0.6295528761617684, "grad_norm": 0.32229748368263245, "learning_rate": 0.0001790760151439564, "loss": 11.6752, "step": 30075 }, { "epoch": 0.6295738089257306, "grad_norm": 0.26988133788108826, "learning_rate": 0.00017907467301831573, "loss": 11.6688, "step": 30076 }, { "epoch": 0.6295947416896928, "grad_norm": 0.35805240273475647, "learning_rate": 0.0001790733308546623, "loss": 11.6774, "step": 30077 }, { "epoch": 0.6296156744536548, "grad_norm": 0.2527003884315491, "learning_rate": 0.0001790719886529967, "loss": 11.6755, "step": 30078 }, { "epoch": 0.629636607217617, "grad_norm": 0.23408541083335876, "learning_rate": 0.00017907064641331966, "loss": 11.6642, "step": 30079 }, { "epoch": 0.6296575399815791, "grad_norm": 0.24000079929828644, "learning_rate": 0.00017906930413563173, "loss": 11.662, "step": 30080 }, { "epoch": 0.6296784727455413, "grad_norm": 0.30876749753952026, "learning_rate": 0.0001790679618199336, "loss": 11.6646, "step": 30081 }, { "epoch": 0.6296994055095034, "grad_norm": 0.2541652023792267, "learning_rate": 0.00017906661946622595, "loss": 11.6674, "step": 30082 }, { "epoch": 0.6297203382734656, "grad_norm": 0.4628293812274933, "learning_rate": 0.00017906527707450937, "loss": 11.6786, "step": 30083 }, { "epoch": 0.6297412710374278, "grad_norm": 0.30665943026542664, "learning_rate": 0.00017906393464478453, "loss": 11.6756, "step": 30084 }, { "epoch": 0.6297622038013899, "grad_norm": 0.2970742881298065, "learning_rate": 0.0001790625921770521, "loss": 11.6592, "step": 30085 }, { "epoch": 0.6297831365653521, "grad_norm": 0.30704471468925476, "learning_rate": 0.00017906124967131265, "loss": 11.6907, "step": 30086 }, { "epoch": 0.6298040693293142, "grad_norm": 0.29213833808898926, "learning_rate": 0.0001790599071275669, "loss": 11.6666, "step": 30087 }, { "epoch": 0.6298250020932764, "grad_norm": 0.26034992933273315, "learning_rate": 0.00017905856454581546, "loss": 11.6741, "step": 30088 }, { "epoch": 0.6298459348572385, "grad_norm": 0.2259421944618225, "learning_rate": 0.00017905722192605898, "loss": 11.6826, "step": 30089 }, { "epoch": 0.6298668676212007, "grad_norm": 0.30197837948799133, "learning_rate": 0.00017905587926829813, "loss": 11.6669, "step": 30090 }, { "epoch": 0.6298878003851629, "grad_norm": 0.24314913153648376, "learning_rate": 0.00017905453657253353, "loss": 11.6651, "step": 30091 }, { "epoch": 0.629908733149125, "grad_norm": 0.300273060798645, "learning_rate": 0.0001790531938387658, "loss": 11.6785, "step": 30092 }, { "epoch": 0.6299296659130872, "grad_norm": 0.29484623670578003, "learning_rate": 0.00017905185106699564, "loss": 11.6666, "step": 30093 }, { "epoch": 0.6299505986770493, "grad_norm": 0.40766608715057373, "learning_rate": 0.00017905050825722368, "loss": 11.6818, "step": 30094 }, { "epoch": 0.6299715314410115, "grad_norm": 0.26004675030708313, "learning_rate": 0.00017904916540945054, "loss": 11.6839, "step": 30095 }, { "epoch": 0.6299924642049737, "grad_norm": 0.2698060870170593, "learning_rate": 0.00017904782252367688, "loss": 11.6639, "step": 30096 }, { "epoch": 0.6300133969689358, "grad_norm": 0.36247384548187256, "learning_rate": 0.00017904647959990334, "loss": 11.6574, "step": 30097 }, { "epoch": 0.630034329732898, "grad_norm": 0.283397376537323, "learning_rate": 0.0001790451366381306, "loss": 11.671, "step": 30098 }, { "epoch": 0.6300552624968601, "grad_norm": 0.39768677949905396, "learning_rate": 0.00017904379363835928, "loss": 11.6699, "step": 30099 }, { "epoch": 0.6300761952608223, "grad_norm": 0.30691343545913696, "learning_rate": 0.00017904245060059, "loss": 11.6682, "step": 30100 }, { "epoch": 0.6300971280247843, "grad_norm": 0.2955879271030426, "learning_rate": 0.00017904110752482345, "loss": 11.6833, "step": 30101 }, { "epoch": 0.6301180607887465, "grad_norm": 0.38030630350112915, "learning_rate": 0.00017903976441106023, "loss": 11.6781, "step": 30102 }, { "epoch": 0.6301389935527087, "grad_norm": 0.39051514863967896, "learning_rate": 0.00017903842125930103, "loss": 11.6604, "step": 30103 }, { "epoch": 0.6301599263166708, "grad_norm": 0.28528356552124023, "learning_rate": 0.0001790370780695465, "loss": 11.6862, "step": 30104 }, { "epoch": 0.630180859080633, "grad_norm": 0.30558693408966064, "learning_rate": 0.00017903573484179722, "loss": 11.6717, "step": 30105 }, { "epoch": 0.6302017918445951, "grad_norm": 0.3764124810695648, "learning_rate": 0.00017903439157605387, "loss": 11.668, "step": 30106 }, { "epoch": 0.6302227246085573, "grad_norm": 0.28800517320632935, "learning_rate": 0.00017903304827231712, "loss": 11.6763, "step": 30107 }, { "epoch": 0.6302436573725194, "grad_norm": 0.2724599540233612, "learning_rate": 0.00017903170493058763, "loss": 11.6594, "step": 30108 }, { "epoch": 0.6302645901364816, "grad_norm": 0.32654792070388794, "learning_rate": 0.00017903036155086599, "loss": 11.6754, "step": 30109 }, { "epoch": 0.6302855229004438, "grad_norm": 0.298636794090271, "learning_rate": 0.00017902901813315287, "loss": 11.6524, "step": 30110 }, { "epoch": 0.6303064556644059, "grad_norm": 0.31358346343040466, "learning_rate": 0.00017902767467744895, "loss": 11.6851, "step": 30111 }, { "epoch": 0.6303273884283681, "grad_norm": 0.22006593644618988, "learning_rate": 0.0001790263311837548, "loss": 11.6788, "step": 30112 }, { "epoch": 0.6303483211923302, "grad_norm": 0.3176001310348511, "learning_rate": 0.00017902498765207112, "loss": 11.6503, "step": 30113 }, { "epoch": 0.6303692539562924, "grad_norm": 0.30293184518814087, "learning_rate": 0.00017902364408239855, "loss": 11.671, "step": 30114 }, { "epoch": 0.6303901867202546, "grad_norm": 0.30041640996932983, "learning_rate": 0.00017902230047473774, "loss": 11.6707, "step": 30115 }, { "epoch": 0.6304111194842167, "grad_norm": 0.2936348617076874, "learning_rate": 0.0001790209568290893, "loss": 11.6807, "step": 30116 }, { "epoch": 0.6304320522481789, "grad_norm": 0.981682538986206, "learning_rate": 0.0001790196131454539, "loss": 11.664, "step": 30117 }, { "epoch": 0.630452985012141, "grad_norm": 0.26630309224128723, "learning_rate": 0.00017901826942383223, "loss": 11.676, "step": 30118 }, { "epoch": 0.6304739177761032, "grad_norm": 0.24141302704811096, "learning_rate": 0.00017901692566422487, "loss": 11.662, "step": 30119 }, { "epoch": 0.6304948505400653, "grad_norm": 0.25911009311676025, "learning_rate": 0.0001790155818666325, "loss": 11.6571, "step": 30120 }, { "epoch": 0.6305157833040275, "grad_norm": 0.29789605736732483, "learning_rate": 0.00017901423803105574, "loss": 11.6453, "step": 30121 }, { "epoch": 0.6305367160679897, "grad_norm": 0.2863941788673401, "learning_rate": 0.00017901289415749527, "loss": 11.6676, "step": 30122 }, { "epoch": 0.6305576488319518, "grad_norm": 0.2858503460884094, "learning_rate": 0.00017901155024595172, "loss": 11.6698, "step": 30123 }, { "epoch": 0.630578581595914, "grad_norm": 0.28893324732780457, "learning_rate": 0.0001790102062964257, "loss": 11.6797, "step": 30124 }, { "epoch": 0.630599514359876, "grad_norm": 0.2286280244588852, "learning_rate": 0.00017900886230891793, "loss": 11.6795, "step": 30125 }, { "epoch": 0.6306204471238382, "grad_norm": 0.3225095272064209, "learning_rate": 0.000179007518283429, "loss": 11.6786, "step": 30126 }, { "epoch": 0.6306413798878003, "grad_norm": 0.26908454298973083, "learning_rate": 0.00017900617421995959, "loss": 11.6779, "step": 30127 }, { "epoch": 0.6306623126517625, "grad_norm": 0.2582165002822876, "learning_rate": 0.00017900483011851028, "loss": 11.6614, "step": 30128 }, { "epoch": 0.6306832454157247, "grad_norm": 0.38536643981933594, "learning_rate": 0.0001790034859790818, "loss": 11.6777, "step": 30129 }, { "epoch": 0.6307041781796868, "grad_norm": 0.35226061940193176, "learning_rate": 0.00017900214180167475, "loss": 11.6766, "step": 30130 }, { "epoch": 0.630725110943649, "grad_norm": 0.32422447204589844, "learning_rate": 0.00017900079758628983, "loss": 11.6689, "step": 30131 }, { "epoch": 0.6307460437076111, "grad_norm": 0.2761017680168152, "learning_rate": 0.0001789994533329276, "loss": 11.6679, "step": 30132 }, { "epoch": 0.6307669764715733, "grad_norm": 0.2784746289253235, "learning_rate": 0.00017899810904158873, "loss": 11.6702, "step": 30133 }, { "epoch": 0.6307879092355355, "grad_norm": 0.24353083968162537, "learning_rate": 0.00017899676471227392, "loss": 11.6601, "step": 30134 }, { "epoch": 0.6308088419994976, "grad_norm": 0.28901445865631104, "learning_rate": 0.00017899542034498377, "loss": 11.6681, "step": 30135 }, { "epoch": 0.6308297747634598, "grad_norm": 0.3237321078777313, "learning_rate": 0.00017899407593971898, "loss": 11.6827, "step": 30136 }, { "epoch": 0.6308507075274219, "grad_norm": 0.2873650789260864, "learning_rate": 0.0001789927314964801, "loss": 11.6782, "step": 30137 }, { "epoch": 0.6308716402913841, "grad_norm": 0.2577317953109741, "learning_rate": 0.00017899138701526782, "loss": 11.667, "step": 30138 }, { "epoch": 0.6308925730553462, "grad_norm": 0.2816837430000305, "learning_rate": 0.00017899004249608287, "loss": 11.683, "step": 30139 }, { "epoch": 0.6309135058193084, "grad_norm": 0.3250289857387543, "learning_rate": 0.00017898869793892577, "loss": 11.6587, "step": 30140 }, { "epoch": 0.6309344385832706, "grad_norm": 0.28618738055229187, "learning_rate": 0.00017898735334379723, "loss": 11.6756, "step": 30141 }, { "epoch": 0.6309553713472327, "grad_norm": 0.23657946288585663, "learning_rate": 0.0001789860087106979, "loss": 11.676, "step": 30142 }, { "epoch": 0.6309763041111949, "grad_norm": 0.30937957763671875, "learning_rate": 0.00017898466403962838, "loss": 11.6602, "step": 30143 }, { "epoch": 0.630997236875157, "grad_norm": 0.31409406661987305, "learning_rate": 0.00017898331933058936, "loss": 11.6608, "step": 30144 }, { "epoch": 0.6310181696391192, "grad_norm": 0.2858739495277405, "learning_rate": 0.0001789819745835815, "loss": 11.6602, "step": 30145 }, { "epoch": 0.6310391024030813, "grad_norm": 0.2744941711425781, "learning_rate": 0.0001789806297986054, "loss": 11.6581, "step": 30146 }, { "epoch": 0.6310600351670435, "grad_norm": 0.31379711627960205, "learning_rate": 0.00017897928497566173, "loss": 11.675, "step": 30147 }, { "epoch": 0.6310809679310057, "grad_norm": 0.2649420201778412, "learning_rate": 0.00017897794011475116, "loss": 11.6639, "step": 30148 }, { "epoch": 0.6311019006949677, "grad_norm": 0.3242940604686737, "learning_rate": 0.00017897659521587427, "loss": 11.6484, "step": 30149 }, { "epoch": 0.6311228334589299, "grad_norm": 0.25395092368125916, "learning_rate": 0.00017897525027903177, "loss": 11.677, "step": 30150 }, { "epoch": 0.631143766222892, "grad_norm": 0.3232915997505188, "learning_rate": 0.0001789739053042243, "loss": 11.6718, "step": 30151 }, { "epoch": 0.6311646989868542, "grad_norm": 0.27947139739990234, "learning_rate": 0.00017897256029145246, "loss": 11.659, "step": 30152 }, { "epoch": 0.6311856317508163, "grad_norm": 0.26242175698280334, "learning_rate": 0.00017897121524071695, "loss": 11.686, "step": 30153 }, { "epoch": 0.6312065645147785, "grad_norm": 0.3304775655269623, "learning_rate": 0.00017896987015201838, "loss": 11.6941, "step": 30154 }, { "epoch": 0.6312274972787407, "grad_norm": 0.37613558769226074, "learning_rate": 0.00017896852502535742, "loss": 11.6872, "step": 30155 }, { "epoch": 0.6312484300427028, "grad_norm": 0.30930963158607483, "learning_rate": 0.00017896717986073471, "loss": 11.6858, "step": 30156 }, { "epoch": 0.631269362806665, "grad_norm": 0.3423203229904175, "learning_rate": 0.00017896583465815087, "loss": 11.6836, "step": 30157 }, { "epoch": 0.6312902955706271, "grad_norm": 0.3814186751842499, "learning_rate": 0.00017896448941760658, "loss": 11.6787, "step": 30158 }, { "epoch": 0.6313112283345893, "grad_norm": 0.339225709438324, "learning_rate": 0.0001789631441391025, "loss": 11.6725, "step": 30159 }, { "epoch": 0.6313321610985515, "grad_norm": 0.2695915997028351, "learning_rate": 0.00017896179882263927, "loss": 11.6821, "step": 30160 }, { "epoch": 0.6313530938625136, "grad_norm": 0.31115248799324036, "learning_rate": 0.0001789604534682175, "loss": 11.6953, "step": 30161 }, { "epoch": 0.6313740266264758, "grad_norm": 0.39986538887023926, "learning_rate": 0.00017895910807583782, "loss": 11.6576, "step": 30162 }, { "epoch": 0.6313949593904379, "grad_norm": 0.2945253252983093, "learning_rate": 0.00017895776264550094, "loss": 11.6754, "step": 30163 }, { "epoch": 0.6314158921544001, "grad_norm": 0.2547285556793213, "learning_rate": 0.0001789564171772075, "loss": 11.6746, "step": 30164 }, { "epoch": 0.6314368249183622, "grad_norm": 0.36474165320396423, "learning_rate": 0.0001789550716709581, "loss": 11.6656, "step": 30165 }, { "epoch": 0.6314577576823244, "grad_norm": 0.3871796727180481, "learning_rate": 0.00017895372612675346, "loss": 11.6805, "step": 30166 }, { "epoch": 0.6314786904462866, "grad_norm": 0.3163794279098511, "learning_rate": 0.00017895238054459412, "loss": 11.672, "step": 30167 }, { "epoch": 0.6314996232102487, "grad_norm": 0.31730785965919495, "learning_rate": 0.00017895103492448082, "loss": 11.6865, "step": 30168 }, { "epoch": 0.6315205559742109, "grad_norm": 0.24823763966560364, "learning_rate": 0.00017894968926641418, "loss": 11.6684, "step": 30169 }, { "epoch": 0.631541488738173, "grad_norm": 0.2551994323730469, "learning_rate": 0.00017894834357039483, "loss": 11.666, "step": 30170 }, { "epoch": 0.6315624215021352, "grad_norm": 0.23515015840530396, "learning_rate": 0.00017894699783642344, "loss": 11.6599, "step": 30171 }, { "epoch": 0.6315833542660972, "grad_norm": 0.3105703890323639, "learning_rate": 0.00017894565206450064, "loss": 11.6587, "step": 30172 }, { "epoch": 0.6316042870300594, "grad_norm": 0.3047531247138977, "learning_rate": 0.00017894430625462707, "loss": 11.6712, "step": 30173 }, { "epoch": 0.6316252197940216, "grad_norm": 0.31098830699920654, "learning_rate": 0.0001789429604068034, "loss": 11.6679, "step": 30174 }, { "epoch": 0.6316461525579837, "grad_norm": 0.27711665630340576, "learning_rate": 0.0001789416145210303, "loss": 11.6712, "step": 30175 }, { "epoch": 0.6316670853219459, "grad_norm": 0.2697298526763916, "learning_rate": 0.0001789402685973083, "loss": 11.6684, "step": 30176 }, { "epoch": 0.631688018085908, "grad_norm": 0.27882257103919983, "learning_rate": 0.00017893892263563823, "loss": 11.6637, "step": 30177 }, { "epoch": 0.6317089508498702, "grad_norm": 0.4613106846809387, "learning_rate": 0.0001789375766360206, "loss": 11.6534, "step": 30178 }, { "epoch": 0.6317298836138324, "grad_norm": 0.2749074101448059, "learning_rate": 0.00017893623059845608, "loss": 11.6734, "step": 30179 }, { "epoch": 0.6317508163777945, "grad_norm": 0.26952657103538513, "learning_rate": 0.00017893488452294534, "loss": 11.6672, "step": 30180 }, { "epoch": 0.6317717491417567, "grad_norm": 0.24941721558570862, "learning_rate": 0.00017893353840948902, "loss": 11.658, "step": 30181 }, { "epoch": 0.6317926819057188, "grad_norm": 0.3308674097061157, "learning_rate": 0.00017893219225808776, "loss": 11.6656, "step": 30182 }, { "epoch": 0.631813614669681, "grad_norm": 0.2608550786972046, "learning_rate": 0.00017893084606874222, "loss": 11.6653, "step": 30183 }, { "epoch": 0.6318345474336431, "grad_norm": 0.30008435249328613, "learning_rate": 0.00017892949984145304, "loss": 11.6811, "step": 30184 }, { "epoch": 0.6318554801976053, "grad_norm": 0.37033358216285706, "learning_rate": 0.00017892815357622088, "loss": 11.6794, "step": 30185 }, { "epoch": 0.6318764129615675, "grad_norm": 0.350084513425827, "learning_rate": 0.00017892680727304633, "loss": 11.6762, "step": 30186 }, { "epoch": 0.6318973457255296, "grad_norm": 0.37266215682029724, "learning_rate": 0.00017892546093193016, "loss": 11.6776, "step": 30187 }, { "epoch": 0.6319182784894918, "grad_norm": 0.24313224852085114, "learning_rate": 0.00017892411455287287, "loss": 11.6555, "step": 30188 }, { "epoch": 0.6319392112534539, "grad_norm": 0.2821029722690582, "learning_rate": 0.0001789227681358752, "loss": 11.6755, "step": 30189 }, { "epoch": 0.6319601440174161, "grad_norm": 0.22979848086833954, "learning_rate": 0.00017892142168093777, "loss": 11.6592, "step": 30190 }, { "epoch": 0.6319810767813782, "grad_norm": 0.32873180508613586, "learning_rate": 0.00017892007518806124, "loss": 11.6543, "step": 30191 }, { "epoch": 0.6320020095453404, "grad_norm": 0.31558993458747864, "learning_rate": 0.00017891872865724625, "loss": 11.6732, "step": 30192 }, { "epoch": 0.6320229423093026, "grad_norm": 0.29128268361091614, "learning_rate": 0.00017891738208849342, "loss": 11.6615, "step": 30193 }, { "epoch": 0.6320438750732646, "grad_norm": 0.2549659013748169, "learning_rate": 0.00017891603548180344, "loss": 11.6615, "step": 30194 }, { "epoch": 0.6320648078372268, "grad_norm": 0.3252289295196533, "learning_rate": 0.00017891468883717696, "loss": 11.6753, "step": 30195 }, { "epoch": 0.6320857406011889, "grad_norm": 0.2824709415435791, "learning_rate": 0.00017891334215461456, "loss": 11.6521, "step": 30196 }, { "epoch": 0.6321066733651511, "grad_norm": 0.3197028338909149, "learning_rate": 0.000178911995434117, "loss": 11.6676, "step": 30197 }, { "epoch": 0.6321276061291133, "grad_norm": 0.308896541595459, "learning_rate": 0.00017891064867568481, "loss": 11.6593, "step": 30198 }, { "epoch": 0.6321485388930754, "grad_norm": 0.3569437265396118, "learning_rate": 0.00017890930187931868, "loss": 11.6703, "step": 30199 }, { "epoch": 0.6321694716570376, "grad_norm": 0.24104554951190948, "learning_rate": 0.00017890795504501932, "loss": 11.6597, "step": 30200 }, { "epoch": 0.6321904044209997, "grad_norm": 0.2893776595592499, "learning_rate": 0.0001789066081727873, "loss": 11.6853, "step": 30201 }, { "epoch": 0.6322113371849619, "grad_norm": 0.29890507459640503, "learning_rate": 0.00017890526126262328, "loss": 11.6427, "step": 30202 }, { "epoch": 0.632232269948924, "grad_norm": 0.3178847134113312, "learning_rate": 0.00017890391431452793, "loss": 11.6672, "step": 30203 }, { "epoch": 0.6322532027128862, "grad_norm": 0.29420948028564453, "learning_rate": 0.0001789025673285019, "loss": 11.6784, "step": 30204 }, { "epoch": 0.6322741354768484, "grad_norm": 0.2550302743911743, "learning_rate": 0.0001789012203045458, "loss": 11.6809, "step": 30205 }, { "epoch": 0.6322950682408105, "grad_norm": 0.4158923327922821, "learning_rate": 0.00017889987324266033, "loss": 11.672, "step": 30206 }, { "epoch": 0.6323160010047727, "grad_norm": 0.3222203850746155, "learning_rate": 0.0001788985261428461, "loss": 11.6636, "step": 30207 }, { "epoch": 0.6323369337687348, "grad_norm": 0.2589039206504822, "learning_rate": 0.00017889717900510375, "loss": 11.6634, "step": 30208 }, { "epoch": 0.632357866532697, "grad_norm": 0.2666516900062561, "learning_rate": 0.00017889583182943393, "loss": 11.6766, "step": 30209 }, { "epoch": 0.6323787992966591, "grad_norm": 0.2994283437728882, "learning_rate": 0.00017889448461583737, "loss": 11.68, "step": 30210 }, { "epoch": 0.6323997320606213, "grad_norm": 0.2938084304332733, "learning_rate": 0.0001788931373643146, "loss": 11.6704, "step": 30211 }, { "epoch": 0.6324206648245835, "grad_norm": 0.30917051434516907, "learning_rate": 0.00017889179007486632, "loss": 11.6579, "step": 30212 }, { "epoch": 0.6324415975885456, "grad_norm": 0.2897105813026428, "learning_rate": 0.00017889044274749317, "loss": 11.6732, "step": 30213 }, { "epoch": 0.6324625303525078, "grad_norm": 0.3291490375995636, "learning_rate": 0.00017888909538219582, "loss": 11.6862, "step": 30214 }, { "epoch": 0.6324834631164699, "grad_norm": 0.31311362981796265, "learning_rate": 0.0001788877479789749, "loss": 11.6671, "step": 30215 }, { "epoch": 0.6325043958804321, "grad_norm": 0.34026503562927246, "learning_rate": 0.00017888640053783105, "loss": 11.6588, "step": 30216 }, { "epoch": 0.6325253286443943, "grad_norm": 0.2516763210296631, "learning_rate": 0.00017888505305876492, "loss": 11.6671, "step": 30217 }, { "epoch": 0.6325462614083563, "grad_norm": 0.26549002528190613, "learning_rate": 0.00017888370554177717, "loss": 11.669, "step": 30218 }, { "epoch": 0.6325671941723185, "grad_norm": 0.24365393817424774, "learning_rate": 0.00017888235798686845, "loss": 11.6727, "step": 30219 }, { "epoch": 0.6325881269362806, "grad_norm": 0.3005647361278534, "learning_rate": 0.0001788810103940394, "loss": 11.6694, "step": 30220 }, { "epoch": 0.6326090597002428, "grad_norm": 0.6864814758300781, "learning_rate": 0.00017887966276329068, "loss": 11.6552, "step": 30221 }, { "epoch": 0.6326299924642049, "grad_norm": 0.31166666746139526, "learning_rate": 0.00017887831509462288, "loss": 11.6913, "step": 30222 }, { "epoch": 0.6326509252281671, "grad_norm": 0.287792444229126, "learning_rate": 0.00017887696738803674, "loss": 11.682, "step": 30223 }, { "epoch": 0.6326718579921293, "grad_norm": 0.31915760040283203, "learning_rate": 0.00017887561964353285, "loss": 11.6648, "step": 30224 }, { "epoch": 0.6326927907560914, "grad_norm": 0.27749350666999817, "learning_rate": 0.00017887427186111186, "loss": 11.6847, "step": 30225 }, { "epoch": 0.6327137235200536, "grad_norm": 0.2571912705898285, "learning_rate": 0.00017887292404077446, "loss": 11.6728, "step": 30226 }, { "epoch": 0.6327346562840157, "grad_norm": 0.2938046455383301, "learning_rate": 0.00017887157618252123, "loss": 11.6686, "step": 30227 }, { "epoch": 0.6327555890479779, "grad_norm": 0.2904704213142395, "learning_rate": 0.00017887022828635285, "loss": 11.6607, "step": 30228 }, { "epoch": 0.63277652181194, "grad_norm": 0.28563961386680603, "learning_rate": 0.00017886888035226997, "loss": 11.6621, "step": 30229 }, { "epoch": 0.6327974545759022, "grad_norm": 0.3257046043872833, "learning_rate": 0.00017886753238027323, "loss": 11.6699, "step": 30230 }, { "epoch": 0.6328183873398644, "grad_norm": 0.3224197328090668, "learning_rate": 0.00017886618437036331, "loss": 11.6602, "step": 30231 }, { "epoch": 0.6328393201038265, "grad_norm": 0.3194067180156708, "learning_rate": 0.00017886483632254083, "loss": 11.6683, "step": 30232 }, { "epoch": 0.6328602528677887, "grad_norm": 0.26848188042640686, "learning_rate": 0.00017886348823680645, "loss": 11.6844, "step": 30233 }, { "epoch": 0.6328811856317508, "grad_norm": 0.3058741092681885, "learning_rate": 0.0001788621401131608, "loss": 11.6784, "step": 30234 }, { "epoch": 0.632902118395713, "grad_norm": 0.37687963247299194, "learning_rate": 0.00017886079195160453, "loss": 11.6674, "step": 30235 }, { "epoch": 0.6329230511596752, "grad_norm": 0.3411179780960083, "learning_rate": 0.0001788594437521383, "loss": 11.6843, "step": 30236 }, { "epoch": 0.6329439839236373, "grad_norm": 0.3810100257396698, "learning_rate": 0.00017885809551476277, "loss": 11.6637, "step": 30237 }, { "epoch": 0.6329649166875995, "grad_norm": 0.3438955247402191, "learning_rate": 0.00017885674723947857, "loss": 11.6641, "step": 30238 }, { "epoch": 0.6329858494515616, "grad_norm": 0.33531293272972107, "learning_rate": 0.00017885539892628635, "loss": 11.6563, "step": 30239 }, { "epoch": 0.6330067822155238, "grad_norm": 0.29281455278396606, "learning_rate": 0.00017885405057518674, "loss": 11.6855, "step": 30240 }, { "epoch": 0.6330277149794858, "grad_norm": 0.32019633054733276, "learning_rate": 0.00017885270218618042, "loss": 11.6831, "step": 30241 }, { "epoch": 0.633048647743448, "grad_norm": 0.29482319951057434, "learning_rate": 0.00017885135375926803, "loss": 11.6718, "step": 30242 }, { "epoch": 0.6330695805074102, "grad_norm": 0.28307363390922546, "learning_rate": 0.00017885000529445022, "loss": 11.6644, "step": 30243 }, { "epoch": 0.6330905132713723, "grad_norm": 0.2981387674808502, "learning_rate": 0.00017884865679172762, "loss": 11.6759, "step": 30244 }, { "epoch": 0.6331114460353345, "grad_norm": 0.3379822075366974, "learning_rate": 0.0001788473082511009, "loss": 11.6601, "step": 30245 }, { "epoch": 0.6331323787992966, "grad_norm": 0.2703050971031189, "learning_rate": 0.00017884595967257073, "loss": 11.6618, "step": 30246 }, { "epoch": 0.6331533115632588, "grad_norm": 0.37557104229927063, "learning_rate": 0.00017884461105613765, "loss": 11.6634, "step": 30247 }, { "epoch": 0.6331742443272209, "grad_norm": 0.3803606927394867, "learning_rate": 0.00017884326240180242, "loss": 11.6573, "step": 30248 }, { "epoch": 0.6331951770911831, "grad_norm": 0.26299241185188293, "learning_rate": 0.0001788419137095657, "loss": 11.6623, "step": 30249 }, { "epoch": 0.6332161098551453, "grad_norm": 0.3636467754840851, "learning_rate": 0.00017884056497942803, "loss": 11.6606, "step": 30250 }, { "epoch": 0.6332370426191074, "grad_norm": 0.3108605146408081, "learning_rate": 0.00017883921621139015, "loss": 11.6705, "step": 30251 }, { "epoch": 0.6332579753830696, "grad_norm": 0.39047202467918396, "learning_rate": 0.00017883786740545266, "loss": 11.6706, "step": 30252 }, { "epoch": 0.6332789081470317, "grad_norm": 0.3478145897388458, "learning_rate": 0.00017883651856161626, "loss": 11.6856, "step": 30253 }, { "epoch": 0.6332998409109939, "grad_norm": 0.32907140254974365, "learning_rate": 0.00017883516967988155, "loss": 11.6698, "step": 30254 }, { "epoch": 0.6333207736749561, "grad_norm": 0.2802967429161072, "learning_rate": 0.00017883382076024917, "loss": 11.66, "step": 30255 }, { "epoch": 0.6333417064389182, "grad_norm": 0.3361024856567383, "learning_rate": 0.00017883247180271983, "loss": 11.7037, "step": 30256 }, { "epoch": 0.6333626392028804, "grad_norm": 0.27168357372283936, "learning_rate": 0.00017883112280729413, "loss": 11.6739, "step": 30257 }, { "epoch": 0.6333835719668425, "grad_norm": 0.3009738028049469, "learning_rate": 0.0001788297737739727, "loss": 11.6668, "step": 30258 }, { "epoch": 0.6334045047308047, "grad_norm": 0.32468053698539734, "learning_rate": 0.00017882842470275624, "loss": 11.6719, "step": 30259 }, { "epoch": 0.6334254374947668, "grad_norm": 0.2750977575778961, "learning_rate": 0.00017882707559364538, "loss": 11.6762, "step": 30260 }, { "epoch": 0.633446370258729, "grad_norm": 0.2686449885368347, "learning_rate": 0.00017882572644664077, "loss": 11.6763, "step": 30261 }, { "epoch": 0.6334673030226912, "grad_norm": 0.3108680546283722, "learning_rate": 0.00017882437726174304, "loss": 11.6746, "step": 30262 }, { "epoch": 0.6334882357866533, "grad_norm": 0.3005024194717407, "learning_rate": 0.00017882302803895285, "loss": 11.6832, "step": 30263 }, { "epoch": 0.6335091685506155, "grad_norm": 0.2853153347969055, "learning_rate": 0.00017882167877827088, "loss": 11.6593, "step": 30264 }, { "epoch": 0.6335301013145775, "grad_norm": 0.3181198239326477, "learning_rate": 0.0001788203294796977, "loss": 11.6738, "step": 30265 }, { "epoch": 0.6335510340785397, "grad_norm": 0.2907603085041046, "learning_rate": 0.00017881898014323407, "loss": 11.6709, "step": 30266 }, { "epoch": 0.6335719668425018, "grad_norm": 0.346211701631546, "learning_rate": 0.00017881763076888052, "loss": 11.6717, "step": 30267 }, { "epoch": 0.633592899606464, "grad_norm": 0.272417813539505, "learning_rate": 0.00017881628135663777, "loss": 11.672, "step": 30268 }, { "epoch": 0.6336138323704262, "grad_norm": 0.25717735290527344, "learning_rate": 0.00017881493190650646, "loss": 11.6792, "step": 30269 }, { "epoch": 0.6336347651343883, "grad_norm": 0.2631489336490631, "learning_rate": 0.00017881358241848724, "loss": 11.6715, "step": 30270 }, { "epoch": 0.6336556978983505, "grad_norm": 0.2746496796607971, "learning_rate": 0.00017881223289258074, "loss": 11.6659, "step": 30271 }, { "epoch": 0.6336766306623126, "grad_norm": 0.29389119148254395, "learning_rate": 0.00017881088332878764, "loss": 11.6762, "step": 30272 }, { "epoch": 0.6336975634262748, "grad_norm": 0.3257131576538086, "learning_rate": 0.00017880953372710856, "loss": 11.6785, "step": 30273 }, { "epoch": 0.633718496190237, "grad_norm": 0.24889786541461945, "learning_rate": 0.00017880818408754414, "loss": 11.6689, "step": 30274 }, { "epoch": 0.6337394289541991, "grad_norm": 0.2839185297489166, "learning_rate": 0.00017880683441009508, "loss": 11.6762, "step": 30275 }, { "epoch": 0.6337603617181613, "grad_norm": 0.32472163438796997, "learning_rate": 0.00017880548469476195, "loss": 11.6717, "step": 30276 }, { "epoch": 0.6337812944821234, "grad_norm": 0.33738669753074646, "learning_rate": 0.00017880413494154548, "loss": 11.6648, "step": 30277 }, { "epoch": 0.6338022272460856, "grad_norm": 0.2859296500682831, "learning_rate": 0.00017880278515044627, "loss": 11.6726, "step": 30278 }, { "epoch": 0.6338231600100477, "grad_norm": 0.24138998985290527, "learning_rate": 0.00017880143532146502, "loss": 11.6689, "step": 30279 }, { "epoch": 0.6338440927740099, "grad_norm": 0.31455084681510925, "learning_rate": 0.00017880008545460228, "loss": 11.6691, "step": 30280 }, { "epoch": 0.6338650255379721, "grad_norm": 0.28830137848854065, "learning_rate": 0.00017879873554985876, "loss": 11.6669, "step": 30281 }, { "epoch": 0.6338859583019342, "grad_norm": 0.31849998235702515, "learning_rate": 0.00017879738560723515, "loss": 11.6532, "step": 30282 }, { "epoch": 0.6339068910658964, "grad_norm": 0.2858337461948395, "learning_rate": 0.00017879603562673206, "loss": 11.6542, "step": 30283 }, { "epoch": 0.6339278238298585, "grad_norm": 0.33227261900901794, "learning_rate": 0.00017879468560835012, "loss": 11.6679, "step": 30284 }, { "epoch": 0.6339487565938207, "grad_norm": 0.277911901473999, "learning_rate": 0.00017879333555208997, "loss": 11.6783, "step": 30285 }, { "epoch": 0.6339696893577828, "grad_norm": 0.33773308992385864, "learning_rate": 0.00017879198545795233, "loss": 11.6598, "step": 30286 }, { "epoch": 0.633990622121745, "grad_norm": 0.2678835093975067, "learning_rate": 0.0001787906353259378, "loss": 11.6736, "step": 30287 }, { "epoch": 0.6340115548857072, "grad_norm": 0.2321336418390274, "learning_rate": 0.000178789285156047, "loss": 11.6639, "step": 30288 }, { "epoch": 0.6340324876496692, "grad_norm": 0.21406792104244232, "learning_rate": 0.0001787879349482806, "loss": 11.6856, "step": 30289 }, { "epoch": 0.6340534204136314, "grad_norm": 0.3486991226673126, "learning_rate": 0.0001787865847026393, "loss": 11.6656, "step": 30290 }, { "epoch": 0.6340743531775935, "grad_norm": 0.290535032749176, "learning_rate": 0.0001787852344191237, "loss": 11.6753, "step": 30291 }, { "epoch": 0.6340952859415557, "grad_norm": 0.3097044825553894, "learning_rate": 0.00017878388409773446, "loss": 11.6797, "step": 30292 }, { "epoch": 0.6341162187055179, "grad_norm": 0.28822338581085205, "learning_rate": 0.00017878253373847223, "loss": 11.6748, "step": 30293 }, { "epoch": 0.63413715146948, "grad_norm": 0.31561848521232605, "learning_rate": 0.00017878118334133764, "loss": 11.6669, "step": 30294 }, { "epoch": 0.6341580842334422, "grad_norm": 0.30972617864608765, "learning_rate": 0.00017877983290633137, "loss": 11.684, "step": 30295 }, { "epoch": 0.6341790169974043, "grad_norm": 0.32891204953193665, "learning_rate": 0.00017877848243345404, "loss": 11.6501, "step": 30296 }, { "epoch": 0.6341999497613665, "grad_norm": 0.48302432894706726, "learning_rate": 0.00017877713192270632, "loss": 11.6714, "step": 30297 }, { "epoch": 0.6342208825253286, "grad_norm": 0.3035624325275421, "learning_rate": 0.00017877578137408888, "loss": 11.6642, "step": 30298 }, { "epoch": 0.6342418152892908, "grad_norm": 0.23745733499526978, "learning_rate": 0.00017877443078760232, "loss": 11.6652, "step": 30299 }, { "epoch": 0.634262748053253, "grad_norm": 0.25790268182754517, "learning_rate": 0.00017877308016324731, "loss": 11.6785, "step": 30300 }, { "epoch": 0.6342836808172151, "grad_norm": 0.28723716735839844, "learning_rate": 0.00017877172950102452, "loss": 11.6565, "step": 30301 }, { "epoch": 0.6343046135811773, "grad_norm": 0.31066837906837463, "learning_rate": 0.00017877037880093457, "loss": 11.6718, "step": 30302 }, { "epoch": 0.6343255463451394, "grad_norm": 0.2956508994102478, "learning_rate": 0.00017876902806297814, "loss": 11.6768, "step": 30303 }, { "epoch": 0.6343464791091016, "grad_norm": 0.240316703915596, "learning_rate": 0.00017876767728715583, "loss": 11.6602, "step": 30304 }, { "epoch": 0.6343674118730637, "grad_norm": 0.31430721282958984, "learning_rate": 0.0001787663264734683, "loss": 11.6779, "step": 30305 }, { "epoch": 0.6343883446370259, "grad_norm": 0.3085464835166931, "learning_rate": 0.00017876497562191625, "loss": 11.6836, "step": 30306 }, { "epoch": 0.6344092774009881, "grad_norm": 0.3784586489200592, "learning_rate": 0.00017876362473250032, "loss": 11.6718, "step": 30307 }, { "epoch": 0.6344302101649502, "grad_norm": 0.2909673750400543, "learning_rate": 0.0001787622738052211, "loss": 11.6655, "step": 30308 }, { "epoch": 0.6344511429289124, "grad_norm": 0.30222320556640625, "learning_rate": 0.00017876092284007929, "loss": 11.684, "step": 30309 }, { "epoch": 0.6344720756928744, "grad_norm": 0.38809964060783386, "learning_rate": 0.00017875957183707554, "loss": 11.6754, "step": 30310 }, { "epoch": 0.6344930084568366, "grad_norm": 0.26787447929382324, "learning_rate": 0.00017875822079621044, "loss": 11.6707, "step": 30311 }, { "epoch": 0.6345139412207988, "grad_norm": 0.2873101830482483, "learning_rate": 0.00017875686971748474, "loss": 11.6822, "step": 30312 }, { "epoch": 0.6345348739847609, "grad_norm": 0.26359960436820984, "learning_rate": 0.000178755518600899, "loss": 11.6675, "step": 30313 }, { "epoch": 0.6345558067487231, "grad_norm": 0.28135576844215393, "learning_rate": 0.0001787541674464539, "loss": 11.6529, "step": 30314 }, { "epoch": 0.6345767395126852, "grad_norm": 0.2772078514099121, "learning_rate": 0.0001787528162541501, "loss": 11.6616, "step": 30315 }, { "epoch": 0.6345976722766474, "grad_norm": 0.3240903913974762, "learning_rate": 0.00017875146502398828, "loss": 11.6747, "step": 30316 }, { "epoch": 0.6346186050406095, "grad_norm": 0.41338616609573364, "learning_rate": 0.000178750113755969, "loss": 11.6696, "step": 30317 }, { "epoch": 0.6346395378045717, "grad_norm": 0.3003843426704407, "learning_rate": 0.000178748762450093, "loss": 11.673, "step": 30318 }, { "epoch": 0.6346604705685339, "grad_norm": 0.280086874961853, "learning_rate": 0.00017874741110636086, "loss": 11.6708, "step": 30319 }, { "epoch": 0.634681403332496, "grad_norm": 0.27008965611457825, "learning_rate": 0.0001787460597247733, "loss": 11.6696, "step": 30320 }, { "epoch": 0.6347023360964582, "grad_norm": 0.2975243330001831, "learning_rate": 0.00017874470830533089, "loss": 11.6662, "step": 30321 }, { "epoch": 0.6347232688604203, "grad_norm": 0.3410438001155853, "learning_rate": 0.00017874335684803434, "loss": 11.6673, "step": 30322 }, { "epoch": 0.6347442016243825, "grad_norm": 0.23662684857845306, "learning_rate": 0.00017874200535288428, "loss": 11.6487, "step": 30323 }, { "epoch": 0.6347651343883446, "grad_norm": 0.34612300992012024, "learning_rate": 0.00017874065381988136, "loss": 11.6522, "step": 30324 }, { "epoch": 0.6347860671523068, "grad_norm": 0.25874796509742737, "learning_rate": 0.00017873930224902623, "loss": 11.6683, "step": 30325 }, { "epoch": 0.634806999916269, "grad_norm": 0.37940847873687744, "learning_rate": 0.00017873795064031955, "loss": 11.6633, "step": 30326 }, { "epoch": 0.6348279326802311, "grad_norm": 0.2675076723098755, "learning_rate": 0.00017873659899376193, "loss": 11.6737, "step": 30327 }, { "epoch": 0.6348488654441933, "grad_norm": 0.3468616008758545, "learning_rate": 0.00017873524730935408, "loss": 11.6673, "step": 30328 }, { "epoch": 0.6348697982081554, "grad_norm": 0.32487332820892334, "learning_rate": 0.0001787338955870966, "loss": 11.6872, "step": 30329 }, { "epoch": 0.6348907309721176, "grad_norm": 0.32906097173690796, "learning_rate": 0.0001787325438269902, "loss": 11.6757, "step": 30330 }, { "epoch": 0.6349116637360797, "grad_norm": 0.3219188153743744, "learning_rate": 0.00017873119202903542, "loss": 11.6623, "step": 30331 }, { "epoch": 0.6349325965000419, "grad_norm": 0.27747300267219543, "learning_rate": 0.000178729840193233, "loss": 11.6782, "step": 30332 }, { "epoch": 0.6349535292640041, "grad_norm": 0.32368117570877075, "learning_rate": 0.0001787284883195836, "loss": 11.6862, "step": 30333 }, { "epoch": 0.6349744620279661, "grad_norm": 0.2840877175331116, "learning_rate": 0.0001787271364080878, "loss": 11.6685, "step": 30334 }, { "epoch": 0.6349953947919283, "grad_norm": 0.29388314485549927, "learning_rate": 0.0001787257844587463, "loss": 11.6644, "step": 30335 }, { "epoch": 0.6350163275558904, "grad_norm": 0.3485599458217621, "learning_rate": 0.00017872443247155974, "loss": 11.6755, "step": 30336 }, { "epoch": 0.6350372603198526, "grad_norm": 0.25095903873443604, "learning_rate": 0.0001787230804465288, "loss": 11.669, "step": 30337 }, { "epoch": 0.6350581930838148, "grad_norm": 0.2914293706417084, "learning_rate": 0.00017872172838365408, "loss": 11.6711, "step": 30338 }, { "epoch": 0.6350791258477769, "grad_norm": 0.2614245116710663, "learning_rate": 0.0001787203762829362, "loss": 11.6845, "step": 30339 }, { "epoch": 0.6351000586117391, "grad_norm": 0.37842461466789246, "learning_rate": 0.0001787190241443759, "loss": 11.6712, "step": 30340 }, { "epoch": 0.6351209913757012, "grad_norm": 0.25343838334083557, "learning_rate": 0.00017871767196797378, "loss": 11.663, "step": 30341 }, { "epoch": 0.6351419241396634, "grad_norm": 0.27568215131759644, "learning_rate": 0.00017871631975373048, "loss": 11.6749, "step": 30342 }, { "epoch": 0.6351628569036255, "grad_norm": 0.2977619171142578, "learning_rate": 0.0001787149675016467, "loss": 11.6609, "step": 30343 }, { "epoch": 0.6351837896675877, "grad_norm": 0.38170942664146423, "learning_rate": 0.00017871361521172306, "loss": 11.6783, "step": 30344 }, { "epoch": 0.6352047224315499, "grad_norm": 0.2533371150493622, "learning_rate": 0.00017871226288396018, "loss": 11.6612, "step": 30345 }, { "epoch": 0.635225655195512, "grad_norm": 0.2956191599369049, "learning_rate": 0.00017871091051835874, "loss": 11.6592, "step": 30346 }, { "epoch": 0.6352465879594742, "grad_norm": 0.2465869039297104, "learning_rate": 0.00017870955811491942, "loss": 11.6574, "step": 30347 }, { "epoch": 0.6352675207234363, "grad_norm": 0.30517151951789856, "learning_rate": 0.0001787082056736428, "loss": 11.6799, "step": 30348 }, { "epoch": 0.6352884534873985, "grad_norm": 0.28952285647392273, "learning_rate": 0.00017870685319452959, "loss": 11.673, "step": 30349 }, { "epoch": 0.6353093862513606, "grad_norm": 0.23857276141643524, "learning_rate": 0.0001787055006775804, "loss": 11.6736, "step": 30350 }, { "epoch": 0.6353303190153228, "grad_norm": 0.2921445071697235, "learning_rate": 0.00017870414812279592, "loss": 11.6645, "step": 30351 }, { "epoch": 0.635351251779285, "grad_norm": 0.3080637753009796, "learning_rate": 0.00017870279553017677, "loss": 11.6682, "step": 30352 }, { "epoch": 0.6353721845432471, "grad_norm": 0.26837897300720215, "learning_rate": 0.00017870144289972362, "loss": 11.6582, "step": 30353 }, { "epoch": 0.6353931173072093, "grad_norm": 0.2709956467151642, "learning_rate": 0.00017870009023143708, "loss": 11.6619, "step": 30354 }, { "epoch": 0.6354140500711714, "grad_norm": 0.2530505061149597, "learning_rate": 0.00017869873752531784, "loss": 11.6703, "step": 30355 }, { "epoch": 0.6354349828351336, "grad_norm": 0.35637104511260986, "learning_rate": 0.00017869738478136656, "loss": 11.6848, "step": 30356 }, { "epoch": 0.6354559155990958, "grad_norm": 0.241669163107872, "learning_rate": 0.00017869603199958383, "loss": 11.6575, "step": 30357 }, { "epoch": 0.6354768483630578, "grad_norm": 0.29612699151039124, "learning_rate": 0.00017869467917997037, "loss": 11.6762, "step": 30358 }, { "epoch": 0.63549778112702, "grad_norm": 0.26987412571907043, "learning_rate": 0.0001786933263225268, "loss": 11.6663, "step": 30359 }, { "epoch": 0.6355187138909821, "grad_norm": 0.29221147298812866, "learning_rate": 0.00017869197342725376, "loss": 11.6666, "step": 30360 }, { "epoch": 0.6355396466549443, "grad_norm": 0.36669081449508667, "learning_rate": 0.00017869062049415195, "loss": 11.6656, "step": 30361 }, { "epoch": 0.6355605794189064, "grad_norm": 0.3287941515445709, "learning_rate": 0.00017868926752322194, "loss": 11.6587, "step": 30362 }, { "epoch": 0.6355815121828686, "grad_norm": 0.29670676589012146, "learning_rate": 0.00017868791451446442, "loss": 11.6712, "step": 30363 }, { "epoch": 0.6356024449468308, "grad_norm": 0.2564869821071625, "learning_rate": 0.00017868656146788006, "loss": 11.6636, "step": 30364 }, { "epoch": 0.6356233777107929, "grad_norm": 0.2897332012653351, "learning_rate": 0.00017868520838346948, "loss": 11.6612, "step": 30365 }, { "epoch": 0.6356443104747551, "grad_norm": 0.41256827116012573, "learning_rate": 0.00017868385526123337, "loss": 11.6948, "step": 30366 }, { "epoch": 0.6356652432387172, "grad_norm": 0.257657527923584, "learning_rate": 0.00017868250210117235, "loss": 11.673, "step": 30367 }, { "epoch": 0.6356861760026794, "grad_norm": 0.368446409702301, "learning_rate": 0.00017868114890328703, "loss": 11.6655, "step": 30368 }, { "epoch": 0.6357071087666415, "grad_norm": 0.33113330602645874, "learning_rate": 0.00017867979566757813, "loss": 11.6796, "step": 30369 }, { "epoch": 0.6357280415306037, "grad_norm": 0.24452489614486694, "learning_rate": 0.0001786784423940463, "loss": 11.6706, "step": 30370 }, { "epoch": 0.6357489742945659, "grad_norm": 0.38288846611976624, "learning_rate": 0.00017867708908269215, "loss": 11.6793, "step": 30371 }, { "epoch": 0.635769907058528, "grad_norm": 0.33598455786705017, "learning_rate": 0.00017867573573351634, "loss": 11.6705, "step": 30372 }, { "epoch": 0.6357908398224902, "grad_norm": 0.3020789921283722, "learning_rate": 0.00017867438234651952, "loss": 11.6767, "step": 30373 }, { "epoch": 0.6358117725864523, "grad_norm": 0.29136642813682556, "learning_rate": 0.00017867302892170237, "loss": 11.6738, "step": 30374 }, { "epoch": 0.6358327053504145, "grad_norm": 0.31126946210861206, "learning_rate": 0.0001786716754590655, "loss": 11.6795, "step": 30375 }, { "epoch": 0.6358536381143767, "grad_norm": 0.290794312953949, "learning_rate": 0.0001786703219586096, "loss": 11.6791, "step": 30376 }, { "epoch": 0.6358745708783388, "grad_norm": 0.4516896903514862, "learning_rate": 0.00017866896842033526, "loss": 11.6826, "step": 30377 }, { "epoch": 0.635895503642301, "grad_norm": 0.2917923033237457, "learning_rate": 0.0001786676148442432, "loss": 11.6782, "step": 30378 }, { "epoch": 0.635916436406263, "grad_norm": 0.33693304657936096, "learning_rate": 0.00017866626123033404, "loss": 11.6623, "step": 30379 }, { "epoch": 0.6359373691702253, "grad_norm": 0.23412548005580902, "learning_rate": 0.00017866490757860844, "loss": 11.6676, "step": 30380 }, { "epoch": 0.6359583019341873, "grad_norm": 0.32765528559684753, "learning_rate": 0.00017866355388906704, "loss": 11.6775, "step": 30381 }, { "epoch": 0.6359792346981495, "grad_norm": 0.30870771408081055, "learning_rate": 0.00017866220016171048, "loss": 11.6828, "step": 30382 }, { "epoch": 0.6360001674621117, "grad_norm": 0.2741895616054535, "learning_rate": 0.00017866084639653942, "loss": 11.6683, "step": 30383 }, { "epoch": 0.6360211002260738, "grad_norm": 0.3123990297317505, "learning_rate": 0.00017865949259355453, "loss": 11.657, "step": 30384 }, { "epoch": 0.636042032990036, "grad_norm": 0.27420660853385925, "learning_rate": 0.00017865813875275643, "loss": 11.665, "step": 30385 }, { "epoch": 0.6360629657539981, "grad_norm": 0.37721166014671326, "learning_rate": 0.00017865678487414578, "loss": 11.6779, "step": 30386 }, { "epoch": 0.6360838985179603, "grad_norm": 0.26941996812820435, "learning_rate": 0.00017865543095772326, "loss": 11.68, "step": 30387 }, { "epoch": 0.6361048312819224, "grad_norm": 0.32974764704704285, "learning_rate": 0.0001786540770034895, "loss": 11.6643, "step": 30388 }, { "epoch": 0.6361257640458846, "grad_norm": 0.30740028619766235, "learning_rate": 0.00017865272301144513, "loss": 11.6618, "step": 30389 }, { "epoch": 0.6361466968098468, "grad_norm": 0.3023098111152649, "learning_rate": 0.00017865136898159083, "loss": 11.6677, "step": 30390 }, { "epoch": 0.6361676295738089, "grad_norm": 0.25447386503219604, "learning_rate": 0.00017865001491392722, "loss": 11.6627, "step": 30391 }, { "epoch": 0.6361885623377711, "grad_norm": 0.31012117862701416, "learning_rate": 0.00017864866080845502, "loss": 11.6717, "step": 30392 }, { "epoch": 0.6362094951017332, "grad_norm": 0.2654649615287781, "learning_rate": 0.0001786473066651748, "loss": 11.6643, "step": 30393 }, { "epoch": 0.6362304278656954, "grad_norm": 0.3164108395576477, "learning_rate": 0.00017864595248408724, "loss": 11.6802, "step": 30394 }, { "epoch": 0.6362513606296576, "grad_norm": 0.3549097180366516, "learning_rate": 0.00017864459826519302, "loss": 11.694, "step": 30395 }, { "epoch": 0.6362722933936197, "grad_norm": 0.2905605435371399, "learning_rate": 0.00017864324400849272, "loss": 11.6686, "step": 30396 }, { "epoch": 0.6362932261575819, "grad_norm": 0.303192675113678, "learning_rate": 0.00017864188971398708, "loss": 11.6652, "step": 30397 }, { "epoch": 0.636314158921544, "grad_norm": 0.3186674416065216, "learning_rate": 0.0001786405353816767, "loss": 11.6864, "step": 30398 }, { "epoch": 0.6363350916855062, "grad_norm": 0.2855924069881439, "learning_rate": 0.00017863918101156223, "loss": 11.6587, "step": 30399 }, { "epoch": 0.6363560244494683, "grad_norm": 0.29542097449302673, "learning_rate": 0.00017863782660364433, "loss": 11.6759, "step": 30400 }, { "epoch": 0.6363769572134305, "grad_norm": 0.27203133702278137, "learning_rate": 0.00017863647215792363, "loss": 11.6772, "step": 30401 }, { "epoch": 0.6363978899773927, "grad_norm": 0.42687729001045227, "learning_rate": 0.00017863511767440084, "loss": 11.6785, "step": 30402 }, { "epoch": 0.6364188227413548, "grad_norm": 0.33596256375312805, "learning_rate": 0.00017863376315307655, "loss": 11.6811, "step": 30403 }, { "epoch": 0.636439755505317, "grad_norm": 0.26205846667289734, "learning_rate": 0.00017863240859395145, "loss": 11.6771, "step": 30404 }, { "epoch": 0.636460688269279, "grad_norm": 0.3185872733592987, "learning_rate": 0.00017863105399702615, "loss": 11.6709, "step": 30405 }, { "epoch": 0.6364816210332412, "grad_norm": 0.27550989389419556, "learning_rate": 0.00017862969936230133, "loss": 11.671, "step": 30406 }, { "epoch": 0.6365025537972033, "grad_norm": 0.3125341832637787, "learning_rate": 0.00017862834468977768, "loss": 11.6696, "step": 30407 }, { "epoch": 0.6365234865611655, "grad_norm": 0.26409411430358887, "learning_rate": 0.00017862698997945576, "loss": 11.6778, "step": 30408 }, { "epoch": 0.6365444193251277, "grad_norm": 0.30916011333465576, "learning_rate": 0.0001786256352313363, "loss": 11.6848, "step": 30409 }, { "epoch": 0.6365653520890898, "grad_norm": 0.3205608129501343, "learning_rate": 0.0001786242804454199, "loss": 11.6623, "step": 30410 }, { "epoch": 0.636586284853052, "grad_norm": 0.3011475205421448, "learning_rate": 0.00017862292562170726, "loss": 11.6764, "step": 30411 }, { "epoch": 0.6366072176170141, "grad_norm": 0.3785236179828644, "learning_rate": 0.000178621570760199, "loss": 11.6756, "step": 30412 }, { "epoch": 0.6366281503809763, "grad_norm": 0.2713037431240082, "learning_rate": 0.00017862021586089575, "loss": 11.6785, "step": 30413 }, { "epoch": 0.6366490831449385, "grad_norm": 0.33741089701652527, "learning_rate": 0.0001786188609237982, "loss": 11.6602, "step": 30414 }, { "epoch": 0.6366700159089006, "grad_norm": 0.2701079249382019, "learning_rate": 0.00017861750594890698, "loss": 11.6633, "step": 30415 }, { "epoch": 0.6366909486728628, "grad_norm": 0.23897689580917358, "learning_rate": 0.00017861615093622276, "loss": 11.6572, "step": 30416 }, { "epoch": 0.6367118814368249, "grad_norm": 0.33098289370536804, "learning_rate": 0.00017861479588574618, "loss": 11.675, "step": 30417 }, { "epoch": 0.6367328142007871, "grad_norm": 0.2805064022541046, "learning_rate": 0.00017861344079747788, "loss": 11.6686, "step": 30418 }, { "epoch": 0.6367537469647492, "grad_norm": 0.3194083273410797, "learning_rate": 0.00017861208567141853, "loss": 11.6699, "step": 30419 }, { "epoch": 0.6367746797287114, "grad_norm": 0.32678908109664917, "learning_rate": 0.00017861073050756877, "loss": 11.6795, "step": 30420 }, { "epoch": 0.6367956124926736, "grad_norm": 0.2885774075984955, "learning_rate": 0.0001786093753059293, "loss": 11.6825, "step": 30421 }, { "epoch": 0.6368165452566357, "grad_norm": 0.3475600779056549, "learning_rate": 0.00017860802006650067, "loss": 11.6614, "step": 30422 }, { "epoch": 0.6368374780205979, "grad_norm": 0.28448590636253357, "learning_rate": 0.00017860666478928362, "loss": 11.6674, "step": 30423 }, { "epoch": 0.63685841078456, "grad_norm": 0.25684693455696106, "learning_rate": 0.00017860530947427875, "loss": 11.662, "step": 30424 }, { "epoch": 0.6368793435485222, "grad_norm": 0.2515416443347931, "learning_rate": 0.00017860395412148673, "loss": 11.6638, "step": 30425 }, { "epoch": 0.6369002763124842, "grad_norm": 0.3322314918041229, "learning_rate": 0.00017860259873090826, "loss": 11.6821, "step": 30426 }, { "epoch": 0.6369212090764464, "grad_norm": 0.2707024812698364, "learning_rate": 0.0001786012433025439, "loss": 11.6459, "step": 30427 }, { "epoch": 0.6369421418404086, "grad_norm": 0.4107488691806793, "learning_rate": 0.00017859988783639437, "loss": 11.6799, "step": 30428 }, { "epoch": 0.6369630746043707, "grad_norm": 0.26533612608909607, "learning_rate": 0.00017859853233246028, "loss": 11.663, "step": 30429 }, { "epoch": 0.6369840073683329, "grad_norm": 0.29302576184272766, "learning_rate": 0.00017859717679074232, "loss": 11.6658, "step": 30430 }, { "epoch": 0.637004940132295, "grad_norm": 0.24600021541118622, "learning_rate": 0.00017859582121124113, "loss": 11.662, "step": 30431 }, { "epoch": 0.6370258728962572, "grad_norm": 0.2389756292104721, "learning_rate": 0.00017859446559395731, "loss": 11.6713, "step": 30432 }, { "epoch": 0.6370468056602194, "grad_norm": 0.388382226228714, "learning_rate": 0.0001785931099388916, "loss": 11.6809, "step": 30433 }, { "epoch": 0.6370677384241815, "grad_norm": 0.2705170512199402, "learning_rate": 0.0001785917542460446, "loss": 11.6762, "step": 30434 }, { "epoch": 0.6370886711881437, "grad_norm": 0.26376578211784363, "learning_rate": 0.00017859039851541694, "loss": 11.679, "step": 30435 }, { "epoch": 0.6371096039521058, "grad_norm": 0.337907999753952, "learning_rate": 0.00017858904274700932, "loss": 11.6739, "step": 30436 }, { "epoch": 0.637130536716068, "grad_norm": 0.4010290801525116, "learning_rate": 0.00017858768694082238, "loss": 11.6698, "step": 30437 }, { "epoch": 0.6371514694800301, "grad_norm": 0.29745006561279297, "learning_rate": 0.00017858633109685675, "loss": 11.6706, "step": 30438 }, { "epoch": 0.6371724022439923, "grad_norm": 0.3821803033351898, "learning_rate": 0.00017858497521511311, "loss": 11.683, "step": 30439 }, { "epoch": 0.6371933350079545, "grad_norm": 0.2953168749809265, "learning_rate": 0.00017858361929559208, "loss": 11.6628, "step": 30440 }, { "epoch": 0.6372142677719166, "grad_norm": 0.3122234046459198, "learning_rate": 0.00017858226333829433, "loss": 11.6944, "step": 30441 }, { "epoch": 0.6372352005358788, "grad_norm": 0.3963381052017212, "learning_rate": 0.00017858090734322054, "loss": 11.6693, "step": 30442 }, { "epoch": 0.6372561332998409, "grad_norm": 0.27333885431289673, "learning_rate": 0.0001785795513103713, "loss": 11.6688, "step": 30443 }, { "epoch": 0.6372770660638031, "grad_norm": 0.2771787941455841, "learning_rate": 0.0001785781952397473, "loss": 11.6759, "step": 30444 }, { "epoch": 0.6372979988277652, "grad_norm": 0.41716548800468445, "learning_rate": 0.00017857683913134918, "loss": 11.6654, "step": 30445 }, { "epoch": 0.6373189315917274, "grad_norm": 0.3304440975189209, "learning_rate": 0.0001785754829851776, "loss": 11.6682, "step": 30446 }, { "epoch": 0.6373398643556896, "grad_norm": 0.31116852164268494, "learning_rate": 0.00017857412680123325, "loss": 11.6854, "step": 30447 }, { "epoch": 0.6373607971196517, "grad_norm": 0.28444886207580566, "learning_rate": 0.00017857277057951672, "loss": 11.6662, "step": 30448 }, { "epoch": 0.6373817298836139, "grad_norm": 0.3319627046585083, "learning_rate": 0.00017857141432002868, "loss": 11.6699, "step": 30449 }, { "epoch": 0.637402662647576, "grad_norm": 0.2850823402404785, "learning_rate": 0.00017857005802276977, "loss": 11.6704, "step": 30450 }, { "epoch": 0.6374235954115381, "grad_norm": 0.36402180790901184, "learning_rate": 0.0001785687016877407, "loss": 11.6742, "step": 30451 }, { "epoch": 0.6374445281755003, "grad_norm": 0.33024606108665466, "learning_rate": 0.00017856734531494203, "loss": 11.6841, "step": 30452 }, { "epoch": 0.6374654609394624, "grad_norm": 0.37961456179618835, "learning_rate": 0.00017856598890437448, "loss": 11.6704, "step": 30453 }, { "epoch": 0.6374863937034246, "grad_norm": 0.3611135482788086, "learning_rate": 0.00017856463245603872, "loss": 11.6926, "step": 30454 }, { "epoch": 0.6375073264673867, "grad_norm": 0.37004122138023376, "learning_rate": 0.00017856327596993534, "loss": 11.6764, "step": 30455 }, { "epoch": 0.6375282592313489, "grad_norm": 0.297982394695282, "learning_rate": 0.000178561919446065, "loss": 11.6706, "step": 30456 }, { "epoch": 0.637549191995311, "grad_norm": 0.3446245491504669, "learning_rate": 0.00017856056288442838, "loss": 11.6819, "step": 30457 }, { "epoch": 0.6375701247592732, "grad_norm": 0.2961677610874176, "learning_rate": 0.00017855920628502613, "loss": 11.6685, "step": 30458 }, { "epoch": 0.6375910575232354, "grad_norm": 0.4039023220539093, "learning_rate": 0.0001785578496478589, "loss": 11.6713, "step": 30459 }, { "epoch": 0.6376119902871975, "grad_norm": 0.37438464164733887, "learning_rate": 0.00017855649297292736, "loss": 11.6766, "step": 30460 }, { "epoch": 0.6376329230511597, "grad_norm": 0.3490779399871826, "learning_rate": 0.0001785551362602321, "loss": 11.6605, "step": 30461 }, { "epoch": 0.6376538558151218, "grad_norm": 0.2846640944480896, "learning_rate": 0.00017855377950977384, "loss": 11.686, "step": 30462 }, { "epoch": 0.637674788579084, "grad_norm": 0.29869380593299866, "learning_rate": 0.0001785524227215532, "loss": 11.6761, "step": 30463 }, { "epoch": 0.6376957213430461, "grad_norm": 0.28566792607307434, "learning_rate": 0.0001785510658955708, "loss": 11.6842, "step": 30464 }, { "epoch": 0.6377166541070083, "grad_norm": 0.3004682660102844, "learning_rate": 0.00017854970903182738, "loss": 11.6877, "step": 30465 }, { "epoch": 0.6377375868709705, "grad_norm": 0.26488274335861206, "learning_rate": 0.00017854835213032352, "loss": 11.6668, "step": 30466 }, { "epoch": 0.6377585196349326, "grad_norm": 0.3297966718673706, "learning_rate": 0.00017854699519105987, "loss": 11.6873, "step": 30467 }, { "epoch": 0.6377794523988948, "grad_norm": 0.2682267129421234, "learning_rate": 0.00017854563821403713, "loss": 11.657, "step": 30468 }, { "epoch": 0.6378003851628569, "grad_norm": 0.2541987895965576, "learning_rate": 0.00017854428119925593, "loss": 11.6618, "step": 30469 }, { "epoch": 0.6378213179268191, "grad_norm": 0.3134470283985138, "learning_rate": 0.0001785429241467169, "loss": 11.6603, "step": 30470 }, { "epoch": 0.6378422506907813, "grad_norm": 0.2825184762477875, "learning_rate": 0.00017854156705642072, "loss": 11.6642, "step": 30471 }, { "epoch": 0.6378631834547434, "grad_norm": 0.2463863492012024, "learning_rate": 0.00017854020992836805, "loss": 11.6698, "step": 30472 }, { "epoch": 0.6378841162187056, "grad_norm": 0.2689919173717499, "learning_rate": 0.00017853885276255952, "loss": 11.67, "step": 30473 }, { "epoch": 0.6379050489826676, "grad_norm": 0.37592417001724243, "learning_rate": 0.00017853749555899574, "loss": 11.6861, "step": 30474 }, { "epoch": 0.6379259817466298, "grad_norm": 0.34153592586517334, "learning_rate": 0.0001785361383176775, "loss": 11.6649, "step": 30475 }, { "epoch": 0.6379469145105919, "grad_norm": 0.3312877416610718, "learning_rate": 0.00017853478103860531, "loss": 11.6629, "step": 30476 }, { "epoch": 0.6379678472745541, "grad_norm": 0.2440464347600937, "learning_rate": 0.0001785334237217799, "loss": 11.6735, "step": 30477 }, { "epoch": 0.6379887800385163, "grad_norm": 0.3231053948402405, "learning_rate": 0.00017853206636720187, "loss": 11.6855, "step": 30478 }, { "epoch": 0.6380097128024784, "grad_norm": 0.40589639544487, "learning_rate": 0.0001785307089748719, "loss": 11.6673, "step": 30479 }, { "epoch": 0.6380306455664406, "grad_norm": 0.2905891239643097, "learning_rate": 0.0001785293515447907, "loss": 11.6782, "step": 30480 }, { "epoch": 0.6380515783304027, "grad_norm": 0.3568917214870453, "learning_rate": 0.00017852799407695882, "loss": 11.6601, "step": 30481 }, { "epoch": 0.6380725110943649, "grad_norm": 0.3478669226169586, "learning_rate": 0.00017852663657137697, "loss": 11.6818, "step": 30482 }, { "epoch": 0.638093443858327, "grad_norm": 0.24683676660060883, "learning_rate": 0.0001785252790280458, "loss": 11.6806, "step": 30483 }, { "epoch": 0.6381143766222892, "grad_norm": 0.2485920637845993, "learning_rate": 0.00017852392144696591, "loss": 11.6846, "step": 30484 }, { "epoch": 0.6381353093862514, "grad_norm": 0.27307450771331787, "learning_rate": 0.00017852256382813806, "loss": 11.6609, "step": 30485 }, { "epoch": 0.6381562421502135, "grad_norm": 0.3181014955043793, "learning_rate": 0.00017852120617156282, "loss": 11.6723, "step": 30486 }, { "epoch": 0.6381771749141757, "grad_norm": 0.27428385615348816, "learning_rate": 0.00017851984847724084, "loss": 11.6918, "step": 30487 }, { "epoch": 0.6381981076781378, "grad_norm": 0.3145984411239624, "learning_rate": 0.0001785184907451728, "loss": 11.6799, "step": 30488 }, { "epoch": 0.6382190404421, "grad_norm": 0.4244460165500641, "learning_rate": 0.0001785171329753594, "loss": 11.6706, "step": 30489 }, { "epoch": 0.6382399732060622, "grad_norm": 0.2869596779346466, "learning_rate": 0.00017851577516780117, "loss": 11.6709, "step": 30490 }, { "epoch": 0.6382609059700243, "grad_norm": 0.30387958884239197, "learning_rate": 0.00017851441732249888, "loss": 11.6736, "step": 30491 }, { "epoch": 0.6382818387339865, "grad_norm": 0.3263513147830963, "learning_rate": 0.0001785130594394531, "loss": 11.669, "step": 30492 }, { "epoch": 0.6383027714979486, "grad_norm": 0.3618675172328949, "learning_rate": 0.00017851170151866457, "loss": 11.6669, "step": 30493 }, { "epoch": 0.6383237042619108, "grad_norm": 0.35575297474861145, "learning_rate": 0.00017851034356013384, "loss": 11.6701, "step": 30494 }, { "epoch": 0.6383446370258729, "grad_norm": 0.3626849055290222, "learning_rate": 0.00017850898556386163, "loss": 11.6657, "step": 30495 }, { "epoch": 0.638365569789835, "grad_norm": 0.3873758912086487, "learning_rate": 0.0001785076275298486, "loss": 11.6859, "step": 30496 }, { "epoch": 0.6383865025537973, "grad_norm": 0.26326385140419006, "learning_rate": 0.00017850626945809536, "loss": 11.6604, "step": 30497 }, { "epoch": 0.6384074353177593, "grad_norm": 0.2995161712169647, "learning_rate": 0.00017850491134860258, "loss": 11.656, "step": 30498 }, { "epoch": 0.6384283680817215, "grad_norm": 0.2944856286048889, "learning_rate": 0.00017850355320137094, "loss": 11.6739, "step": 30499 }, { "epoch": 0.6384493008456836, "grad_norm": 0.29188603162765503, "learning_rate": 0.00017850219501640104, "loss": 11.6722, "step": 30500 }, { "epoch": 0.6384702336096458, "grad_norm": 0.35026815533638, "learning_rate": 0.00017850083679369356, "loss": 11.6676, "step": 30501 }, { "epoch": 0.6384911663736079, "grad_norm": 0.26798027753829956, "learning_rate": 0.00017849947853324917, "loss": 11.6701, "step": 30502 }, { "epoch": 0.6385120991375701, "grad_norm": 0.27656540274620056, "learning_rate": 0.00017849812023506854, "loss": 11.6737, "step": 30503 }, { "epoch": 0.6385330319015323, "grad_norm": 0.3407018184661865, "learning_rate": 0.00017849676189915224, "loss": 11.6611, "step": 30504 }, { "epoch": 0.6385539646654944, "grad_norm": 0.3914979100227356, "learning_rate": 0.00017849540352550097, "loss": 11.6507, "step": 30505 }, { "epoch": 0.6385748974294566, "grad_norm": 0.3014179468154907, "learning_rate": 0.0001784940451141154, "loss": 11.6853, "step": 30506 }, { "epoch": 0.6385958301934187, "grad_norm": 0.3921845853328705, "learning_rate": 0.0001784926866649962, "loss": 11.6675, "step": 30507 }, { "epoch": 0.6386167629573809, "grad_norm": 0.2505171298980713, "learning_rate": 0.00017849132817814394, "loss": 11.6685, "step": 30508 }, { "epoch": 0.6386376957213431, "grad_norm": 0.29082706570625305, "learning_rate": 0.00017848996965355937, "loss": 11.6634, "step": 30509 }, { "epoch": 0.6386586284853052, "grad_norm": 0.2673444151878357, "learning_rate": 0.0001784886110912431, "loss": 11.6832, "step": 30510 }, { "epoch": 0.6386795612492674, "grad_norm": 0.31192857027053833, "learning_rate": 0.00017848725249119574, "loss": 11.6752, "step": 30511 }, { "epoch": 0.6387004940132295, "grad_norm": 0.24914680421352386, "learning_rate": 0.000178485893853418, "loss": 11.6762, "step": 30512 }, { "epoch": 0.6387214267771917, "grad_norm": 0.3279973566532135, "learning_rate": 0.00017848453517791053, "loss": 11.6718, "step": 30513 }, { "epoch": 0.6387423595411538, "grad_norm": 0.3947006165981293, "learning_rate": 0.000178483176464674, "loss": 11.671, "step": 30514 }, { "epoch": 0.638763292305116, "grad_norm": 0.25651484727859497, "learning_rate": 0.00017848181771370897, "loss": 11.6724, "step": 30515 }, { "epoch": 0.6387842250690782, "grad_norm": 0.29741084575653076, "learning_rate": 0.0001784804589250162, "loss": 11.6466, "step": 30516 }, { "epoch": 0.6388051578330403, "grad_norm": 0.3762473464012146, "learning_rate": 0.00017847910009859626, "loss": 11.666, "step": 30517 }, { "epoch": 0.6388260905970025, "grad_norm": 0.31165826320648193, "learning_rate": 0.00017847774123444992, "loss": 11.6766, "step": 30518 }, { "epoch": 0.6388470233609645, "grad_norm": 0.23870079219341278, "learning_rate": 0.00017847638233257768, "loss": 11.6738, "step": 30519 }, { "epoch": 0.6388679561249268, "grad_norm": 0.3360280394554138, "learning_rate": 0.0001784750233929803, "loss": 11.6806, "step": 30520 }, { "epoch": 0.6388888888888888, "grad_norm": 0.26912030577659607, "learning_rate": 0.0001784736644156584, "loss": 11.6624, "step": 30521 }, { "epoch": 0.638909821652851, "grad_norm": 0.29241231083869934, "learning_rate": 0.00017847230540061264, "loss": 11.6767, "step": 30522 }, { "epoch": 0.6389307544168132, "grad_norm": 0.3563622832298279, "learning_rate": 0.0001784709463478437, "loss": 11.6746, "step": 30523 }, { "epoch": 0.6389516871807753, "grad_norm": 0.26043426990509033, "learning_rate": 0.00017846958725735218, "loss": 11.6739, "step": 30524 }, { "epoch": 0.6389726199447375, "grad_norm": 0.26344963908195496, "learning_rate": 0.00017846822812913872, "loss": 11.6668, "step": 30525 }, { "epoch": 0.6389935527086996, "grad_norm": 0.31210190057754517, "learning_rate": 0.00017846686896320405, "loss": 11.6823, "step": 30526 }, { "epoch": 0.6390144854726618, "grad_norm": 0.2895759642124176, "learning_rate": 0.00017846550975954877, "loss": 11.6832, "step": 30527 }, { "epoch": 0.6390354182366239, "grad_norm": 0.29618313908576965, "learning_rate": 0.00017846415051817356, "loss": 11.6763, "step": 30528 }, { "epoch": 0.6390563510005861, "grad_norm": 0.23145641386508942, "learning_rate": 0.00017846279123907906, "loss": 11.6886, "step": 30529 }, { "epoch": 0.6390772837645483, "grad_norm": 0.24168269336223602, "learning_rate": 0.0001784614319222659, "loss": 11.6753, "step": 30530 }, { "epoch": 0.6390982165285104, "grad_norm": 0.31342023611068726, "learning_rate": 0.00017846007256773478, "loss": 11.6532, "step": 30531 }, { "epoch": 0.6391191492924726, "grad_norm": 0.287337064743042, "learning_rate": 0.00017845871317548632, "loss": 11.6713, "step": 30532 }, { "epoch": 0.6391400820564347, "grad_norm": 0.24767224490642548, "learning_rate": 0.00017845735374552119, "loss": 11.6929, "step": 30533 }, { "epoch": 0.6391610148203969, "grad_norm": 0.27979373931884766, "learning_rate": 0.00017845599427784004, "loss": 11.6854, "step": 30534 }, { "epoch": 0.6391819475843591, "grad_norm": 0.3172830641269684, "learning_rate": 0.00017845463477244352, "loss": 11.6681, "step": 30535 }, { "epoch": 0.6392028803483212, "grad_norm": 0.2562122941017151, "learning_rate": 0.0001784532752293323, "loss": 11.6642, "step": 30536 }, { "epoch": 0.6392238131122834, "grad_norm": 0.308124840259552, "learning_rate": 0.00017845191564850698, "loss": 11.6713, "step": 30537 }, { "epoch": 0.6392447458762455, "grad_norm": 0.270749568939209, "learning_rate": 0.00017845055602996827, "loss": 11.6603, "step": 30538 }, { "epoch": 0.6392656786402077, "grad_norm": 0.26834776997566223, "learning_rate": 0.0001784491963737168, "loss": 11.6719, "step": 30539 }, { "epoch": 0.6392866114041698, "grad_norm": 0.24302814900875092, "learning_rate": 0.00017844783667975324, "loss": 11.6631, "step": 30540 }, { "epoch": 0.639307544168132, "grad_norm": 0.28756770491600037, "learning_rate": 0.00017844647694807825, "loss": 11.6757, "step": 30541 }, { "epoch": 0.6393284769320942, "grad_norm": 0.2736825942993164, "learning_rate": 0.00017844511717869243, "loss": 11.6565, "step": 30542 }, { "epoch": 0.6393494096960562, "grad_norm": 0.3135918378829956, "learning_rate": 0.0001784437573715965, "loss": 11.6723, "step": 30543 }, { "epoch": 0.6393703424600184, "grad_norm": 0.2720597982406616, "learning_rate": 0.00017844239752679106, "loss": 11.6614, "step": 30544 }, { "epoch": 0.6393912752239805, "grad_norm": 0.2563006579875946, "learning_rate": 0.0001784410376442768, "loss": 11.6664, "step": 30545 }, { "epoch": 0.6394122079879427, "grad_norm": 0.32546454668045044, "learning_rate": 0.00017843967772405436, "loss": 11.6538, "step": 30546 }, { "epoch": 0.6394331407519048, "grad_norm": 0.2582498788833618, "learning_rate": 0.0001784383177661244, "loss": 11.6826, "step": 30547 }, { "epoch": 0.639454073515867, "grad_norm": 0.31573575735092163, "learning_rate": 0.00017843695777048754, "loss": 11.6769, "step": 30548 }, { "epoch": 0.6394750062798292, "grad_norm": 0.28535932302474976, "learning_rate": 0.00017843559773714448, "loss": 11.6772, "step": 30549 }, { "epoch": 0.6394959390437913, "grad_norm": 0.2997879087924957, "learning_rate": 0.0001784342376660959, "loss": 11.6801, "step": 30550 }, { "epoch": 0.6395168718077535, "grad_norm": 0.3916095793247223, "learning_rate": 0.00017843287755734237, "loss": 11.6725, "step": 30551 }, { "epoch": 0.6395378045717156, "grad_norm": 0.3726719319820404, "learning_rate": 0.00017843151741088456, "loss": 11.6815, "step": 30552 }, { "epoch": 0.6395587373356778, "grad_norm": 0.3331322968006134, "learning_rate": 0.0001784301572267232, "loss": 11.6776, "step": 30553 }, { "epoch": 0.63957967009964, "grad_norm": 0.3137166202068329, "learning_rate": 0.00017842879700485887, "loss": 11.6649, "step": 30554 }, { "epoch": 0.6396006028636021, "grad_norm": 0.25925832986831665, "learning_rate": 0.00017842743674529224, "loss": 11.6707, "step": 30555 }, { "epoch": 0.6396215356275643, "grad_norm": 0.2978900372982025, "learning_rate": 0.00017842607644802394, "loss": 11.6715, "step": 30556 }, { "epoch": 0.6396424683915264, "grad_norm": 0.2351309210062027, "learning_rate": 0.0001784247161130547, "loss": 11.6788, "step": 30557 }, { "epoch": 0.6396634011554886, "grad_norm": 0.3495209813117981, "learning_rate": 0.0001784233557403851, "loss": 11.6856, "step": 30558 }, { "epoch": 0.6396843339194507, "grad_norm": 0.34794679284095764, "learning_rate": 0.00017842199533001584, "loss": 11.6596, "step": 30559 }, { "epoch": 0.6397052666834129, "grad_norm": 0.28556933999061584, "learning_rate": 0.00017842063488194754, "loss": 11.6718, "step": 30560 }, { "epoch": 0.6397261994473751, "grad_norm": 0.31519508361816406, "learning_rate": 0.00017841927439618088, "loss": 11.6937, "step": 30561 }, { "epoch": 0.6397471322113372, "grad_norm": 0.29157838225364685, "learning_rate": 0.00017841791387271648, "loss": 11.6557, "step": 30562 }, { "epoch": 0.6397680649752994, "grad_norm": 0.32256558537483215, "learning_rate": 0.00017841655331155506, "loss": 11.6744, "step": 30563 }, { "epoch": 0.6397889977392615, "grad_norm": 0.34750983119010925, "learning_rate": 0.00017841519271269722, "loss": 11.6643, "step": 30564 }, { "epoch": 0.6398099305032237, "grad_norm": 0.33142754435539246, "learning_rate": 0.00017841383207614362, "loss": 11.6513, "step": 30565 }, { "epoch": 0.6398308632671857, "grad_norm": 0.33387210965156555, "learning_rate": 0.0001784124714018949, "loss": 11.6673, "step": 30566 }, { "epoch": 0.639851796031148, "grad_norm": 0.330609530210495, "learning_rate": 0.00017841111068995176, "loss": 11.6605, "step": 30567 }, { "epoch": 0.6398727287951101, "grad_norm": 0.3597663640975952, "learning_rate": 0.0001784097499403148, "loss": 11.6597, "step": 30568 }, { "epoch": 0.6398936615590722, "grad_norm": 0.3122708797454834, "learning_rate": 0.00017840838915298473, "loss": 11.6787, "step": 30569 }, { "epoch": 0.6399145943230344, "grad_norm": 0.25857335329055786, "learning_rate": 0.00017840702832796216, "loss": 11.6821, "step": 30570 }, { "epoch": 0.6399355270869965, "grad_norm": 0.29277679324150085, "learning_rate": 0.00017840566746524776, "loss": 11.681, "step": 30571 }, { "epoch": 0.6399564598509587, "grad_norm": 0.2988468110561371, "learning_rate": 0.00017840430656484218, "loss": 11.6636, "step": 30572 }, { "epoch": 0.6399773926149209, "grad_norm": 0.28230154514312744, "learning_rate": 0.0001784029456267461, "loss": 11.6605, "step": 30573 }, { "epoch": 0.639998325378883, "grad_norm": 0.2658420503139496, "learning_rate": 0.00017840158465096012, "loss": 11.6788, "step": 30574 }, { "epoch": 0.6400192581428452, "grad_norm": 0.27314063906669617, "learning_rate": 0.00017840022363748497, "loss": 11.6727, "step": 30575 }, { "epoch": 0.6400401909068073, "grad_norm": 0.3348868787288666, "learning_rate": 0.00017839886258632121, "loss": 11.6767, "step": 30576 }, { "epoch": 0.6400611236707695, "grad_norm": 0.2947385609149933, "learning_rate": 0.00017839750149746956, "loss": 11.6639, "step": 30577 }, { "epoch": 0.6400820564347316, "grad_norm": 0.2953009009361267, "learning_rate": 0.0001783961403709307, "loss": 11.6836, "step": 30578 }, { "epoch": 0.6401029891986938, "grad_norm": 0.2956239879131317, "learning_rate": 0.0001783947792067052, "loss": 11.6587, "step": 30579 }, { "epoch": 0.640123921962656, "grad_norm": 0.3036971986293793, "learning_rate": 0.00017839341800479378, "loss": 11.6738, "step": 30580 }, { "epoch": 0.6401448547266181, "grad_norm": 0.27978792786598206, "learning_rate": 0.00017839205676519704, "loss": 11.6601, "step": 30581 }, { "epoch": 0.6401657874905803, "grad_norm": 0.3167535662651062, "learning_rate": 0.00017839069548791572, "loss": 11.6866, "step": 30582 }, { "epoch": 0.6401867202545424, "grad_norm": 0.28909310698509216, "learning_rate": 0.0001783893341729504, "loss": 11.6802, "step": 30583 }, { "epoch": 0.6402076530185046, "grad_norm": 0.2922666668891907, "learning_rate": 0.00017838797282030176, "loss": 11.6664, "step": 30584 }, { "epoch": 0.6402285857824667, "grad_norm": 0.4362353980541229, "learning_rate": 0.00017838661142997045, "loss": 11.669, "step": 30585 }, { "epoch": 0.6402495185464289, "grad_norm": 0.3427128195762634, "learning_rate": 0.0001783852500019571, "loss": 11.6748, "step": 30586 }, { "epoch": 0.6402704513103911, "grad_norm": 0.33269158005714417, "learning_rate": 0.00017838388853626242, "loss": 11.6732, "step": 30587 }, { "epoch": 0.6402913840743532, "grad_norm": 0.5174521803855896, "learning_rate": 0.00017838252703288702, "loss": 11.6664, "step": 30588 }, { "epoch": 0.6403123168383154, "grad_norm": 0.3079071640968323, "learning_rate": 0.00017838116549183158, "loss": 11.6677, "step": 30589 }, { "epoch": 0.6403332496022774, "grad_norm": 0.3680688738822937, "learning_rate": 0.00017837980391309673, "loss": 11.6674, "step": 30590 }, { "epoch": 0.6403541823662396, "grad_norm": 0.33266595005989075, "learning_rate": 0.00017837844229668312, "loss": 11.6625, "step": 30591 }, { "epoch": 0.6403751151302018, "grad_norm": 0.2643727660179138, "learning_rate": 0.00017837708064259147, "loss": 11.6647, "step": 30592 }, { "epoch": 0.6403960478941639, "grad_norm": 0.28918638825416565, "learning_rate": 0.00017837571895082234, "loss": 11.6758, "step": 30593 }, { "epoch": 0.6404169806581261, "grad_norm": 0.3126065135002136, "learning_rate": 0.00017837435722137647, "loss": 11.6803, "step": 30594 }, { "epoch": 0.6404379134220882, "grad_norm": 0.3599139153957367, "learning_rate": 0.00017837299545425445, "loss": 11.6644, "step": 30595 }, { "epoch": 0.6404588461860504, "grad_norm": 0.3891555070877075, "learning_rate": 0.00017837163364945696, "loss": 11.6841, "step": 30596 }, { "epoch": 0.6404797789500125, "grad_norm": 0.28655683994293213, "learning_rate": 0.00017837027180698468, "loss": 11.6732, "step": 30597 }, { "epoch": 0.6405007117139747, "grad_norm": 0.3772486746311188, "learning_rate": 0.0001783689099268382, "loss": 11.6729, "step": 30598 }, { "epoch": 0.6405216444779369, "grad_norm": 0.3137133717536926, "learning_rate": 0.00017836754800901826, "loss": 11.675, "step": 30599 }, { "epoch": 0.640542577241899, "grad_norm": 0.26760411262512207, "learning_rate": 0.00017836618605352543, "loss": 11.6896, "step": 30600 }, { "epoch": 0.6405635100058612, "grad_norm": 0.23989388346672058, "learning_rate": 0.00017836482406036042, "loss": 11.6699, "step": 30601 }, { "epoch": 0.6405844427698233, "grad_norm": 0.25748223066329956, "learning_rate": 0.00017836346202952387, "loss": 11.6744, "step": 30602 }, { "epoch": 0.6406053755337855, "grad_norm": 0.35247382521629333, "learning_rate": 0.00017836209996101646, "loss": 11.6542, "step": 30603 }, { "epoch": 0.6406263082977476, "grad_norm": 1.806365966796875, "learning_rate": 0.00017836073785483877, "loss": 11.652, "step": 30604 }, { "epoch": 0.6406472410617098, "grad_norm": 0.26670747995376587, "learning_rate": 0.0001783593757109915, "loss": 11.6807, "step": 30605 }, { "epoch": 0.640668173825672, "grad_norm": 0.24197453260421753, "learning_rate": 0.00017835801352947535, "loss": 11.6767, "step": 30606 }, { "epoch": 0.6406891065896341, "grad_norm": 0.2806321680545807, "learning_rate": 0.00017835665131029092, "loss": 11.6743, "step": 30607 }, { "epoch": 0.6407100393535963, "grad_norm": 0.3129093050956726, "learning_rate": 0.00017835528905343887, "loss": 11.6659, "step": 30608 }, { "epoch": 0.6407309721175584, "grad_norm": 0.28608238697052, "learning_rate": 0.00017835392675891987, "loss": 11.6769, "step": 30609 }, { "epoch": 0.6407519048815206, "grad_norm": 0.28110718727111816, "learning_rate": 0.00017835256442673455, "loss": 11.6698, "step": 30610 }, { "epoch": 0.6407728376454828, "grad_norm": 0.43124496936798096, "learning_rate": 0.0001783512020568836, "loss": 11.6649, "step": 30611 }, { "epoch": 0.6407937704094449, "grad_norm": 0.265648752450943, "learning_rate": 0.00017834983964936765, "loss": 11.6751, "step": 30612 }, { "epoch": 0.640814703173407, "grad_norm": 0.280758798122406, "learning_rate": 0.00017834847720418735, "loss": 11.6751, "step": 30613 }, { "epoch": 0.6408356359373691, "grad_norm": 0.3438417315483093, "learning_rate": 0.00017834711472134338, "loss": 11.6902, "step": 30614 }, { "epoch": 0.6408565687013313, "grad_norm": 0.3010128438472748, "learning_rate": 0.00017834575220083637, "loss": 11.6608, "step": 30615 }, { "epoch": 0.6408775014652934, "grad_norm": 0.35493314266204834, "learning_rate": 0.000178344389642667, "loss": 11.6755, "step": 30616 }, { "epoch": 0.6408984342292556, "grad_norm": 0.3489103615283966, "learning_rate": 0.00017834302704683593, "loss": 11.6815, "step": 30617 }, { "epoch": 0.6409193669932178, "grad_norm": 0.3074370324611664, "learning_rate": 0.00017834166441334378, "loss": 11.6549, "step": 30618 }, { "epoch": 0.6409402997571799, "grad_norm": 0.3865382671356201, "learning_rate": 0.0001783403017421912, "loss": 11.6836, "step": 30619 }, { "epoch": 0.6409612325211421, "grad_norm": 0.2653575837612152, "learning_rate": 0.00017833893903337889, "loss": 11.6665, "step": 30620 }, { "epoch": 0.6409821652851042, "grad_norm": 0.2867690920829773, "learning_rate": 0.00017833757628690745, "loss": 11.6575, "step": 30621 }, { "epoch": 0.6410030980490664, "grad_norm": 0.26868677139282227, "learning_rate": 0.0001783362135027776, "loss": 11.6713, "step": 30622 }, { "epoch": 0.6410240308130285, "grad_norm": 0.30194464325904846, "learning_rate": 0.00017833485068098997, "loss": 11.6767, "step": 30623 }, { "epoch": 0.6410449635769907, "grad_norm": 0.25930675864219666, "learning_rate": 0.0001783334878215452, "loss": 11.6564, "step": 30624 }, { "epoch": 0.6410658963409529, "grad_norm": 0.3300466239452362, "learning_rate": 0.00017833212492444393, "loss": 11.6712, "step": 30625 }, { "epoch": 0.641086829104915, "grad_norm": 0.27546724677085876, "learning_rate": 0.00017833076198968688, "loss": 11.6627, "step": 30626 }, { "epoch": 0.6411077618688772, "grad_norm": 0.2902037799358368, "learning_rate": 0.00017832939901727462, "loss": 11.6824, "step": 30627 }, { "epoch": 0.6411286946328393, "grad_norm": 5.288132190704346, "learning_rate": 0.00017832803600720788, "loss": 11.6713, "step": 30628 }, { "epoch": 0.6411496273968015, "grad_norm": 0.31122061610221863, "learning_rate": 0.00017832667295948723, "loss": 11.6636, "step": 30629 }, { "epoch": 0.6411705601607637, "grad_norm": 0.3024231791496277, "learning_rate": 0.00017832530987411344, "loss": 11.6878, "step": 30630 }, { "epoch": 0.6411914929247258, "grad_norm": 0.2526000440120697, "learning_rate": 0.0001783239467510871, "loss": 11.6487, "step": 30631 }, { "epoch": 0.641212425688688, "grad_norm": 0.32797786593437195, "learning_rate": 0.00017832258359040885, "loss": 11.6617, "step": 30632 }, { "epoch": 0.6412333584526501, "grad_norm": 0.31927305459976196, "learning_rate": 0.00017832122039207934, "loss": 11.6719, "step": 30633 }, { "epoch": 0.6412542912166123, "grad_norm": 0.34756648540496826, "learning_rate": 0.0001783198571560993, "loss": 11.6583, "step": 30634 }, { "epoch": 0.6412752239805743, "grad_norm": 0.3596835136413574, "learning_rate": 0.0001783184938824693, "loss": 11.6838, "step": 30635 }, { "epoch": 0.6412961567445365, "grad_norm": 0.2950953245162964, "learning_rate": 0.00017831713057119, "loss": 11.6826, "step": 30636 }, { "epoch": 0.6413170895084987, "grad_norm": 0.2597472369670868, "learning_rate": 0.00017831576722226216, "loss": 11.6599, "step": 30637 }, { "epoch": 0.6413380222724608, "grad_norm": 0.4336691200733185, "learning_rate": 0.00017831440383568633, "loss": 11.6874, "step": 30638 }, { "epoch": 0.641358955036423, "grad_norm": 0.3540958762168884, "learning_rate": 0.0001783130404114632, "loss": 11.6631, "step": 30639 }, { "epoch": 0.6413798878003851, "grad_norm": 0.34482070803642273, "learning_rate": 0.00017831167694959342, "loss": 11.6797, "step": 30640 }, { "epoch": 0.6414008205643473, "grad_norm": 0.31824320554733276, "learning_rate": 0.00017831031345007765, "loss": 11.6791, "step": 30641 }, { "epoch": 0.6414217533283094, "grad_norm": 0.26388588547706604, "learning_rate": 0.00017830894991291652, "loss": 11.6723, "step": 30642 }, { "epoch": 0.6414426860922716, "grad_norm": 0.3499777019023895, "learning_rate": 0.00017830758633811074, "loss": 11.6728, "step": 30643 }, { "epoch": 0.6414636188562338, "grad_norm": 0.2827419936656952, "learning_rate": 0.0001783062227256609, "loss": 11.6644, "step": 30644 }, { "epoch": 0.6414845516201959, "grad_norm": 0.31161263585090637, "learning_rate": 0.00017830485907556771, "loss": 11.6656, "step": 30645 }, { "epoch": 0.6415054843841581, "grad_norm": 0.30383026599884033, "learning_rate": 0.0001783034953878318, "loss": 11.6901, "step": 30646 }, { "epoch": 0.6415264171481202, "grad_norm": 0.2670406699180603, "learning_rate": 0.00017830213166245384, "loss": 11.6554, "step": 30647 }, { "epoch": 0.6415473499120824, "grad_norm": 0.36594921350479126, "learning_rate": 0.00017830076789943447, "loss": 11.6724, "step": 30648 }, { "epoch": 0.6415682826760446, "grad_norm": 0.30112773180007935, "learning_rate": 0.00017829940409877433, "loss": 11.6718, "step": 30649 }, { "epoch": 0.6415892154400067, "grad_norm": 0.2862561047077179, "learning_rate": 0.00017829804026047415, "loss": 11.6593, "step": 30650 }, { "epoch": 0.6416101482039689, "grad_norm": 0.23271159827709198, "learning_rate": 0.0001782966763845345, "loss": 11.673, "step": 30651 }, { "epoch": 0.641631080967931, "grad_norm": 0.3220294117927551, "learning_rate": 0.00017829531247095606, "loss": 11.6713, "step": 30652 }, { "epoch": 0.6416520137318932, "grad_norm": 0.3429969549179077, "learning_rate": 0.0001782939485197395, "loss": 11.6625, "step": 30653 }, { "epoch": 0.6416729464958553, "grad_norm": 0.2883537709712982, "learning_rate": 0.00017829258453088545, "loss": 11.6795, "step": 30654 }, { "epoch": 0.6416938792598175, "grad_norm": 0.29038968682289124, "learning_rate": 0.00017829122050439463, "loss": 11.6803, "step": 30655 }, { "epoch": 0.6417148120237797, "grad_norm": 0.33550745248794556, "learning_rate": 0.00017828985644026762, "loss": 11.6726, "step": 30656 }, { "epoch": 0.6417357447877418, "grad_norm": 0.2623453140258789, "learning_rate": 0.00017828849233850514, "loss": 11.6554, "step": 30657 }, { "epoch": 0.641756677551704, "grad_norm": 0.3238471746444702, "learning_rate": 0.00017828712819910778, "loss": 11.6649, "step": 30658 }, { "epoch": 0.641777610315666, "grad_norm": 0.28813669085502625, "learning_rate": 0.00017828576402207624, "loss": 11.6723, "step": 30659 }, { "epoch": 0.6417985430796282, "grad_norm": 0.33090901374816895, "learning_rate": 0.00017828439980741115, "loss": 11.6552, "step": 30660 }, { "epoch": 0.6418194758435903, "grad_norm": 0.2871355414390564, "learning_rate": 0.00017828303555511322, "loss": 11.6534, "step": 30661 }, { "epoch": 0.6418404086075525, "grad_norm": 0.24839724600315094, "learning_rate": 0.00017828167126518306, "loss": 11.6736, "step": 30662 }, { "epoch": 0.6418613413715147, "grad_norm": 0.3042275607585907, "learning_rate": 0.00017828030693762127, "loss": 11.6699, "step": 30663 }, { "epoch": 0.6418822741354768, "grad_norm": 0.27102088928222656, "learning_rate": 0.00017827894257242864, "loss": 11.676, "step": 30664 }, { "epoch": 0.641903206899439, "grad_norm": 0.2659740746021271, "learning_rate": 0.0001782775781696057, "loss": 11.6728, "step": 30665 }, { "epoch": 0.6419241396634011, "grad_norm": 0.31987664103507996, "learning_rate": 0.0001782762137291532, "loss": 11.6794, "step": 30666 }, { "epoch": 0.6419450724273633, "grad_norm": 0.37772732973098755, "learning_rate": 0.00017827484925107175, "loss": 11.6677, "step": 30667 }, { "epoch": 0.6419660051913255, "grad_norm": 0.3086623251438141, "learning_rate": 0.000178273484735362, "loss": 11.6824, "step": 30668 }, { "epoch": 0.6419869379552876, "grad_norm": 0.3822469413280487, "learning_rate": 0.0001782721201820246, "loss": 11.6818, "step": 30669 }, { "epoch": 0.6420078707192498, "grad_norm": 0.3562813699245453, "learning_rate": 0.00017827075559106025, "loss": 11.6734, "step": 30670 }, { "epoch": 0.6420288034832119, "grad_norm": 0.2979332208633423, "learning_rate": 0.00017826939096246956, "loss": 11.6756, "step": 30671 }, { "epoch": 0.6420497362471741, "grad_norm": 0.2393454760313034, "learning_rate": 0.00017826802629625321, "loss": 11.6731, "step": 30672 }, { "epoch": 0.6420706690111362, "grad_norm": 0.31251025199890137, "learning_rate": 0.00017826666159241185, "loss": 11.6794, "step": 30673 }, { "epoch": 0.6420916017750984, "grad_norm": 0.26013675332069397, "learning_rate": 0.00017826529685094616, "loss": 11.6661, "step": 30674 }, { "epoch": 0.6421125345390606, "grad_norm": 0.29454779624938965, "learning_rate": 0.00017826393207185675, "loss": 11.6544, "step": 30675 }, { "epoch": 0.6421334673030227, "grad_norm": 0.28687784075737, "learning_rate": 0.0001782625672551443, "loss": 11.6753, "step": 30676 }, { "epoch": 0.6421544000669849, "grad_norm": 0.2669055461883545, "learning_rate": 0.00017826120240080944, "loss": 11.6636, "step": 30677 }, { "epoch": 0.642175332830947, "grad_norm": 0.35967445373535156, "learning_rate": 0.0001782598375088529, "loss": 11.6657, "step": 30678 }, { "epoch": 0.6421962655949092, "grad_norm": 0.27098166942596436, "learning_rate": 0.00017825847257927529, "loss": 11.6711, "step": 30679 }, { "epoch": 0.6422171983588713, "grad_norm": 0.36980366706848145, "learning_rate": 0.00017825710761207723, "loss": 11.6608, "step": 30680 }, { "epoch": 0.6422381311228335, "grad_norm": 0.4961065948009491, "learning_rate": 0.00017825574260725942, "loss": 11.6659, "step": 30681 }, { "epoch": 0.6422590638867957, "grad_norm": 0.308932363986969, "learning_rate": 0.00017825437756482254, "loss": 11.6753, "step": 30682 }, { "epoch": 0.6422799966507577, "grad_norm": 0.34245729446411133, "learning_rate": 0.00017825301248476717, "loss": 11.6524, "step": 30683 }, { "epoch": 0.64230092941472, "grad_norm": 0.28247740864753723, "learning_rate": 0.000178251647367094, "loss": 11.6698, "step": 30684 }, { "epoch": 0.642321862178682, "grad_norm": 0.30995291471481323, "learning_rate": 0.00017825028221180373, "loss": 11.6621, "step": 30685 }, { "epoch": 0.6423427949426442, "grad_norm": 0.27776631712913513, "learning_rate": 0.00017824891701889695, "loss": 11.671, "step": 30686 }, { "epoch": 0.6423637277066064, "grad_norm": 0.24137991666793823, "learning_rate": 0.00017824755178837438, "loss": 11.6792, "step": 30687 }, { "epoch": 0.6423846604705685, "grad_norm": 0.23110677301883698, "learning_rate": 0.00017824618652023663, "loss": 11.6724, "step": 30688 }, { "epoch": 0.6424055932345307, "grad_norm": 0.3116207718849182, "learning_rate": 0.0001782448212144844, "loss": 11.6701, "step": 30689 }, { "epoch": 0.6424265259984928, "grad_norm": 0.31331127882003784, "learning_rate": 0.00017824345587111825, "loss": 11.6787, "step": 30690 }, { "epoch": 0.642447458762455, "grad_norm": 0.30153244733810425, "learning_rate": 0.00017824209049013896, "loss": 11.6536, "step": 30691 }, { "epoch": 0.6424683915264171, "grad_norm": 0.2917231619358063, "learning_rate": 0.0001782407250715471, "loss": 11.6822, "step": 30692 }, { "epoch": 0.6424893242903793, "grad_norm": 0.2958431839942932, "learning_rate": 0.00017823935961534338, "loss": 11.6828, "step": 30693 }, { "epoch": 0.6425102570543415, "grad_norm": 0.34829071164131165, "learning_rate": 0.0001782379941215284, "loss": 11.6907, "step": 30694 }, { "epoch": 0.6425311898183036, "grad_norm": 0.3360961973667145, "learning_rate": 0.00017823662859010287, "loss": 11.6501, "step": 30695 }, { "epoch": 0.6425521225822658, "grad_norm": 0.2860317528247833, "learning_rate": 0.0001782352630210674, "loss": 11.6843, "step": 30696 }, { "epoch": 0.6425730553462279, "grad_norm": 0.2638147175312042, "learning_rate": 0.0001782338974144227, "loss": 11.6803, "step": 30697 }, { "epoch": 0.6425939881101901, "grad_norm": 0.22472964227199554, "learning_rate": 0.00017823253177016939, "loss": 11.6801, "step": 30698 }, { "epoch": 0.6426149208741522, "grad_norm": 0.33808380365371704, "learning_rate": 0.0001782311660883081, "loss": 11.6654, "step": 30699 }, { "epoch": 0.6426358536381144, "grad_norm": 0.2990433871746063, "learning_rate": 0.00017822980036883958, "loss": 11.667, "step": 30700 }, { "epoch": 0.6426567864020766, "grad_norm": 0.309349924325943, "learning_rate": 0.0001782284346117644, "loss": 11.6832, "step": 30701 }, { "epoch": 0.6426777191660387, "grad_norm": 0.275208055973053, "learning_rate": 0.00017822706881708323, "loss": 11.6783, "step": 30702 }, { "epoch": 0.6426986519300009, "grad_norm": 0.2918749749660492, "learning_rate": 0.00017822570298479677, "loss": 11.661, "step": 30703 }, { "epoch": 0.642719584693963, "grad_norm": 0.2414151281118393, "learning_rate": 0.00017822433711490562, "loss": 11.6832, "step": 30704 }, { "epoch": 0.6427405174579252, "grad_norm": 0.3359241485595703, "learning_rate": 0.00017822297120741046, "loss": 11.6742, "step": 30705 }, { "epoch": 0.6427614502218872, "grad_norm": 0.2748960256576538, "learning_rate": 0.00017822160526231198, "loss": 11.6751, "step": 30706 }, { "epoch": 0.6427823829858494, "grad_norm": 0.2965478301048279, "learning_rate": 0.0001782202392796108, "loss": 11.6955, "step": 30707 }, { "epoch": 0.6428033157498116, "grad_norm": 0.29381856322288513, "learning_rate": 0.00017821887325930756, "loss": 11.6697, "step": 30708 }, { "epoch": 0.6428242485137737, "grad_norm": 0.35244283080101013, "learning_rate": 0.00017821750720140297, "loss": 11.6673, "step": 30709 }, { "epoch": 0.6428451812777359, "grad_norm": 0.32005202770233154, "learning_rate": 0.0001782161411058976, "loss": 11.6811, "step": 30710 }, { "epoch": 0.642866114041698, "grad_norm": 0.2707224488258362, "learning_rate": 0.00017821477497279224, "loss": 11.6801, "step": 30711 }, { "epoch": 0.6428870468056602, "grad_norm": 0.29277366399765015, "learning_rate": 0.00017821340880208743, "loss": 11.6595, "step": 30712 }, { "epoch": 0.6429079795696224, "grad_norm": 0.3913690149784088, "learning_rate": 0.00017821204259378388, "loss": 11.6738, "step": 30713 }, { "epoch": 0.6429289123335845, "grad_norm": 0.2771541178226471, "learning_rate": 0.00017821067634788223, "loss": 11.6761, "step": 30714 }, { "epoch": 0.6429498450975467, "grad_norm": 0.4735073745250702, "learning_rate": 0.00017820931006438313, "loss": 11.6685, "step": 30715 }, { "epoch": 0.6429707778615088, "grad_norm": 0.2748575508594513, "learning_rate": 0.00017820794374328726, "loss": 11.6721, "step": 30716 }, { "epoch": 0.642991710625471, "grad_norm": 0.3251035809516907, "learning_rate": 0.00017820657738459525, "loss": 11.6722, "step": 30717 }, { "epoch": 0.6430126433894331, "grad_norm": 0.2656956613063812, "learning_rate": 0.0001782052109883078, "loss": 11.6761, "step": 30718 }, { "epoch": 0.6430335761533953, "grad_norm": 0.3547532558441162, "learning_rate": 0.0001782038445544255, "loss": 11.6757, "step": 30719 }, { "epoch": 0.6430545089173575, "grad_norm": 0.43089768290519714, "learning_rate": 0.00017820247808294904, "loss": 11.6701, "step": 30720 }, { "epoch": 0.6430754416813196, "grad_norm": 0.32804155349731445, "learning_rate": 0.0001782011115738791, "loss": 11.6582, "step": 30721 }, { "epoch": 0.6430963744452818, "grad_norm": 0.32946667075157166, "learning_rate": 0.00017819974502721633, "loss": 11.6696, "step": 30722 }, { "epoch": 0.6431173072092439, "grad_norm": 0.3846326768398285, "learning_rate": 0.00017819837844296137, "loss": 11.6649, "step": 30723 }, { "epoch": 0.6431382399732061, "grad_norm": 0.31093859672546387, "learning_rate": 0.0001781970118211149, "loss": 11.6681, "step": 30724 }, { "epoch": 0.6431591727371682, "grad_norm": 0.30989760160446167, "learning_rate": 0.00017819564516167752, "loss": 11.6579, "step": 30725 }, { "epoch": 0.6431801055011304, "grad_norm": 0.32331711053848267, "learning_rate": 0.00017819427846464992, "loss": 11.674, "step": 30726 }, { "epoch": 0.6432010382650926, "grad_norm": 0.4023149609565735, "learning_rate": 0.0001781929117300328, "loss": 11.6859, "step": 30727 }, { "epoch": 0.6432219710290547, "grad_norm": 0.286685585975647, "learning_rate": 0.00017819154495782676, "loss": 11.6628, "step": 30728 }, { "epoch": 0.6432429037930169, "grad_norm": 0.29969853162765503, "learning_rate": 0.00017819017814803249, "loss": 11.6644, "step": 30729 }, { "epoch": 0.6432638365569789, "grad_norm": 0.39672935009002686, "learning_rate": 0.00017818881130065063, "loss": 11.6612, "step": 30730 }, { "epoch": 0.6432847693209411, "grad_norm": 0.30306148529052734, "learning_rate": 0.00017818744441568186, "loss": 11.66, "step": 30731 }, { "epoch": 0.6433057020849033, "grad_norm": 0.33830735087394714, "learning_rate": 0.0001781860774931268, "loss": 11.6699, "step": 30732 }, { "epoch": 0.6433266348488654, "grad_norm": 0.3129942715167999, "learning_rate": 0.00017818471053298614, "loss": 11.6662, "step": 30733 }, { "epoch": 0.6433475676128276, "grad_norm": 0.3123827576637268, "learning_rate": 0.00017818334353526049, "loss": 11.6665, "step": 30734 }, { "epoch": 0.6433685003767897, "grad_norm": 0.3724555969238281, "learning_rate": 0.00017818197649995056, "loss": 11.6716, "step": 30735 }, { "epoch": 0.6433894331407519, "grad_norm": 0.28699424862861633, "learning_rate": 0.000178180609427057, "loss": 11.6742, "step": 30736 }, { "epoch": 0.643410365904714, "grad_norm": 0.3458028733730316, "learning_rate": 0.00017817924231658042, "loss": 11.6691, "step": 30737 }, { "epoch": 0.6434312986686762, "grad_norm": 0.37499937415122986, "learning_rate": 0.0001781778751685215, "loss": 11.6571, "step": 30738 }, { "epoch": 0.6434522314326384, "grad_norm": 0.33568334579467773, "learning_rate": 0.00017817650798288096, "loss": 11.6677, "step": 30739 }, { "epoch": 0.6434731641966005, "grad_norm": 0.2968152165412903, "learning_rate": 0.0001781751407596594, "loss": 11.6659, "step": 30740 }, { "epoch": 0.6434940969605627, "grad_norm": 0.26184001564979553, "learning_rate": 0.00017817377349885747, "loss": 11.6854, "step": 30741 }, { "epoch": 0.6435150297245248, "grad_norm": 0.40067508816719055, "learning_rate": 0.00017817240620047583, "loss": 11.661, "step": 30742 }, { "epoch": 0.643535962488487, "grad_norm": 0.3086075484752655, "learning_rate": 0.00017817103886451515, "loss": 11.6749, "step": 30743 }, { "epoch": 0.6435568952524491, "grad_norm": 0.3617599606513977, "learning_rate": 0.0001781696714909761, "loss": 11.6767, "step": 30744 }, { "epoch": 0.6435778280164113, "grad_norm": 0.3000832796096802, "learning_rate": 0.0001781683040798593, "loss": 11.6672, "step": 30745 }, { "epoch": 0.6435987607803735, "grad_norm": 0.25830885767936707, "learning_rate": 0.00017816693663116545, "loss": 11.6626, "step": 30746 }, { "epoch": 0.6436196935443356, "grad_norm": 0.3830622434616089, "learning_rate": 0.0001781655691448952, "loss": 11.6605, "step": 30747 }, { "epoch": 0.6436406263082978, "grad_norm": 0.35252803564071655, "learning_rate": 0.0001781642016210492, "loss": 11.6715, "step": 30748 }, { "epoch": 0.6436615590722599, "grad_norm": 0.2995873689651489, "learning_rate": 0.00017816283405962805, "loss": 11.6625, "step": 30749 }, { "epoch": 0.6436824918362221, "grad_norm": 0.3180781900882721, "learning_rate": 0.0001781614664606325, "loss": 11.6674, "step": 30750 }, { "epoch": 0.6437034246001843, "grad_norm": 0.34516507387161255, "learning_rate": 0.00017816009882406312, "loss": 11.6648, "step": 30751 }, { "epoch": 0.6437243573641463, "grad_norm": 0.3725747764110565, "learning_rate": 0.00017815873114992065, "loss": 11.659, "step": 30752 }, { "epoch": 0.6437452901281085, "grad_norm": 0.2604236900806427, "learning_rate": 0.00017815736343820571, "loss": 11.6667, "step": 30753 }, { "epoch": 0.6437662228920706, "grad_norm": 0.3150973320007324, "learning_rate": 0.00017815599568891897, "loss": 11.6621, "step": 30754 }, { "epoch": 0.6437871556560328, "grad_norm": 0.28087469935417175, "learning_rate": 0.00017815462790206108, "loss": 11.6617, "step": 30755 }, { "epoch": 0.6438080884199949, "grad_norm": 0.283659964799881, "learning_rate": 0.0001781532600776327, "loss": 11.6723, "step": 30756 }, { "epoch": 0.6438290211839571, "grad_norm": 0.27256685495376587, "learning_rate": 0.00017815189221563445, "loss": 11.6729, "step": 30757 }, { "epoch": 0.6438499539479193, "grad_norm": 0.3218190371990204, "learning_rate": 0.000178150524316067, "loss": 11.6775, "step": 30758 }, { "epoch": 0.6438708867118814, "grad_norm": 0.36354976892471313, "learning_rate": 0.00017814915637893108, "loss": 11.6715, "step": 30759 }, { "epoch": 0.6438918194758436, "grad_norm": 0.23788651823997498, "learning_rate": 0.00017814778840422727, "loss": 11.6764, "step": 30760 }, { "epoch": 0.6439127522398057, "grad_norm": 0.2900239825248718, "learning_rate": 0.00017814642039195623, "loss": 11.6908, "step": 30761 }, { "epoch": 0.6439336850037679, "grad_norm": 0.4531209170818329, "learning_rate": 0.0001781450523421187, "loss": 11.6765, "step": 30762 }, { "epoch": 0.64395461776773, "grad_norm": 0.2869716286659241, "learning_rate": 0.00017814368425471522, "loss": 11.6827, "step": 30763 }, { "epoch": 0.6439755505316922, "grad_norm": 0.28656384348869324, "learning_rate": 0.00017814231612974652, "loss": 11.6604, "step": 30764 }, { "epoch": 0.6439964832956544, "grad_norm": 0.28299590945243835, "learning_rate": 0.00017814094796721324, "loss": 11.6633, "step": 30765 }, { "epoch": 0.6440174160596165, "grad_norm": 0.34036365151405334, "learning_rate": 0.00017813957976711605, "loss": 11.6726, "step": 30766 }, { "epoch": 0.6440383488235787, "grad_norm": 0.2636869549751282, "learning_rate": 0.0001781382115294556, "loss": 11.6916, "step": 30767 }, { "epoch": 0.6440592815875408, "grad_norm": 0.34439361095428467, "learning_rate": 0.00017813684325423257, "loss": 11.6801, "step": 30768 }, { "epoch": 0.644080214351503, "grad_norm": 0.25762939453125, "learning_rate": 0.00017813547494144753, "loss": 11.6846, "step": 30769 }, { "epoch": 0.6441011471154652, "grad_norm": 0.29270851612091064, "learning_rate": 0.00017813410659110125, "loss": 11.6652, "step": 30770 }, { "epoch": 0.6441220798794273, "grad_norm": 0.35794827342033386, "learning_rate": 0.00017813273820319433, "loss": 11.6788, "step": 30771 }, { "epoch": 0.6441430126433895, "grad_norm": 0.28946512937545776, "learning_rate": 0.00017813136977772742, "loss": 11.6714, "step": 30772 }, { "epoch": 0.6441639454073516, "grad_norm": 0.2808549702167511, "learning_rate": 0.00017813000131470123, "loss": 11.6654, "step": 30773 }, { "epoch": 0.6441848781713138, "grad_norm": 0.24630801379680634, "learning_rate": 0.00017812863281411635, "loss": 11.6603, "step": 30774 }, { "epoch": 0.6442058109352758, "grad_norm": 0.3486826419830322, "learning_rate": 0.00017812726427597348, "loss": 11.6625, "step": 30775 }, { "epoch": 0.644226743699238, "grad_norm": 0.24899046123027802, "learning_rate": 0.00017812589570027326, "loss": 11.6693, "step": 30776 }, { "epoch": 0.6442476764632002, "grad_norm": 0.26898592710494995, "learning_rate": 0.00017812452708701636, "loss": 11.6644, "step": 30777 }, { "epoch": 0.6442686092271623, "grad_norm": 0.29856228828430176, "learning_rate": 0.00017812315843620345, "loss": 11.6676, "step": 30778 }, { "epoch": 0.6442895419911245, "grad_norm": 0.2639847993850708, "learning_rate": 0.00017812178974783516, "loss": 11.6816, "step": 30779 }, { "epoch": 0.6443104747550866, "grad_norm": 0.28646185994148254, "learning_rate": 0.00017812042102191217, "loss": 11.6788, "step": 30780 }, { "epoch": 0.6443314075190488, "grad_norm": 0.2969784438610077, "learning_rate": 0.0001781190522584351, "loss": 11.6665, "step": 30781 }, { "epoch": 0.6443523402830109, "grad_norm": 0.2727090120315552, "learning_rate": 0.00017811768345740467, "loss": 11.6608, "step": 30782 }, { "epoch": 0.6443732730469731, "grad_norm": 0.24308893084526062, "learning_rate": 0.00017811631461882147, "loss": 11.683, "step": 30783 }, { "epoch": 0.6443942058109353, "grad_norm": 0.2832132577896118, "learning_rate": 0.00017811494574268623, "loss": 11.6666, "step": 30784 }, { "epoch": 0.6444151385748974, "grad_norm": 0.2738959491252899, "learning_rate": 0.00017811357682899956, "loss": 11.6819, "step": 30785 }, { "epoch": 0.6444360713388596, "grad_norm": 0.27315032482147217, "learning_rate": 0.00017811220787776211, "loss": 11.6637, "step": 30786 }, { "epoch": 0.6444570041028217, "grad_norm": 0.26699018478393555, "learning_rate": 0.00017811083888897458, "loss": 11.6684, "step": 30787 }, { "epoch": 0.6444779368667839, "grad_norm": 0.272768497467041, "learning_rate": 0.0001781094698626376, "loss": 11.6684, "step": 30788 }, { "epoch": 0.6444988696307461, "grad_norm": 0.2555731236934662, "learning_rate": 0.00017810810079875182, "loss": 11.6705, "step": 30789 }, { "epoch": 0.6445198023947082, "grad_norm": 0.29453331232070923, "learning_rate": 0.00017810673169731792, "loss": 11.673, "step": 30790 }, { "epoch": 0.6445407351586704, "grad_norm": 0.40028300881385803, "learning_rate": 0.00017810536255833653, "loss": 11.6657, "step": 30791 }, { "epoch": 0.6445616679226325, "grad_norm": 0.30022287368774414, "learning_rate": 0.00017810399338180833, "loss": 11.6692, "step": 30792 }, { "epoch": 0.6445826006865947, "grad_norm": 0.42893901467323303, "learning_rate": 0.000178102624167734, "loss": 11.6569, "step": 30793 }, { "epoch": 0.6446035334505568, "grad_norm": 0.295509934425354, "learning_rate": 0.00017810125491611416, "loss": 11.6722, "step": 30794 }, { "epoch": 0.644624466214519, "grad_norm": 0.298929899930954, "learning_rate": 0.0001780998856269495, "loss": 11.6765, "step": 30795 }, { "epoch": 0.6446453989784812, "grad_norm": 0.3667888045310974, "learning_rate": 0.0001780985163002406, "loss": 11.6507, "step": 30796 }, { "epoch": 0.6446663317424433, "grad_norm": 0.3441784381866455, "learning_rate": 0.00017809714693598822, "loss": 11.6722, "step": 30797 }, { "epoch": 0.6446872645064055, "grad_norm": 0.27630022168159485, "learning_rate": 0.00017809577753419298, "loss": 11.6745, "step": 30798 }, { "epoch": 0.6447081972703675, "grad_norm": 0.22766271233558655, "learning_rate": 0.00017809440809485555, "loss": 11.6697, "step": 30799 }, { "epoch": 0.6447291300343297, "grad_norm": 0.34269529581069946, "learning_rate": 0.00017809303861797654, "loss": 11.6822, "step": 30800 }, { "epoch": 0.6447500627982918, "grad_norm": 0.3696235120296478, "learning_rate": 0.00017809166910355665, "loss": 11.6751, "step": 30801 }, { "epoch": 0.644770995562254, "grad_norm": 0.2597202956676483, "learning_rate": 0.00017809029955159652, "loss": 11.6631, "step": 30802 }, { "epoch": 0.6447919283262162, "grad_norm": 0.30339211225509644, "learning_rate": 0.00017808892996209682, "loss": 11.6665, "step": 30803 }, { "epoch": 0.6448128610901783, "grad_norm": 0.2771484851837158, "learning_rate": 0.00017808756033505825, "loss": 11.6732, "step": 30804 }, { "epoch": 0.6448337938541405, "grad_norm": 0.3502005636692047, "learning_rate": 0.00017808619067048137, "loss": 11.6786, "step": 30805 }, { "epoch": 0.6448547266181026, "grad_norm": 0.2887658178806305, "learning_rate": 0.00017808482096836695, "loss": 11.6701, "step": 30806 }, { "epoch": 0.6448756593820648, "grad_norm": 0.2768402695655823, "learning_rate": 0.00017808345122871555, "loss": 11.6562, "step": 30807 }, { "epoch": 0.644896592146027, "grad_norm": 0.34591883420944214, "learning_rate": 0.00017808208145152788, "loss": 11.6649, "step": 30808 }, { "epoch": 0.6449175249099891, "grad_norm": 0.34825459122657776, "learning_rate": 0.00017808071163680458, "loss": 11.6703, "step": 30809 }, { "epoch": 0.6449384576739513, "grad_norm": 0.2957746088504791, "learning_rate": 0.00017807934178454632, "loss": 11.679, "step": 30810 }, { "epoch": 0.6449593904379134, "grad_norm": 0.32947084307670593, "learning_rate": 0.00017807797189475376, "loss": 11.6686, "step": 30811 }, { "epoch": 0.6449803232018756, "grad_norm": 1.1711812019348145, "learning_rate": 0.00017807660196742756, "loss": 11.6527, "step": 30812 }, { "epoch": 0.6450012559658377, "grad_norm": 0.3015492260456085, "learning_rate": 0.00017807523200256834, "loss": 11.6557, "step": 30813 }, { "epoch": 0.6450221887297999, "grad_norm": 0.261529803276062, "learning_rate": 0.0001780738620001768, "loss": 11.6639, "step": 30814 }, { "epoch": 0.6450431214937621, "grad_norm": 0.32789862155914307, "learning_rate": 0.00017807249196025363, "loss": 11.6722, "step": 30815 }, { "epoch": 0.6450640542577242, "grad_norm": 0.3024449050426483, "learning_rate": 0.00017807112188279945, "loss": 11.6719, "step": 30816 }, { "epoch": 0.6450849870216864, "grad_norm": 0.34567585587501526, "learning_rate": 0.00017806975176781488, "loss": 11.6654, "step": 30817 }, { "epoch": 0.6451059197856485, "grad_norm": 0.26216962933540344, "learning_rate": 0.00017806838161530064, "loss": 11.6735, "step": 30818 }, { "epoch": 0.6451268525496107, "grad_norm": 0.2863697111606598, "learning_rate": 0.00017806701142525735, "loss": 11.6627, "step": 30819 }, { "epoch": 0.6451477853135728, "grad_norm": 0.32610365748405457, "learning_rate": 0.00017806564119768567, "loss": 11.6752, "step": 30820 }, { "epoch": 0.645168718077535, "grad_norm": 1.3536185026168823, "learning_rate": 0.00017806427093258628, "loss": 11.6432, "step": 30821 }, { "epoch": 0.6451896508414972, "grad_norm": 0.2799309492111206, "learning_rate": 0.00017806290062995984, "loss": 11.6909, "step": 30822 }, { "epoch": 0.6452105836054592, "grad_norm": 0.43599969148635864, "learning_rate": 0.00017806153028980702, "loss": 11.6552, "step": 30823 }, { "epoch": 0.6452315163694214, "grad_norm": 0.2602371275424957, "learning_rate": 0.00017806015991212842, "loss": 11.6481, "step": 30824 }, { "epoch": 0.6452524491333835, "grad_norm": 0.2562171220779419, "learning_rate": 0.00017805878949692477, "loss": 11.6652, "step": 30825 }, { "epoch": 0.6452733818973457, "grad_norm": 0.36881548166275024, "learning_rate": 0.0001780574190441967, "loss": 11.6831, "step": 30826 }, { "epoch": 0.6452943146613079, "grad_norm": 0.31146374344825745, "learning_rate": 0.00017805604855394483, "loss": 11.6662, "step": 30827 }, { "epoch": 0.64531524742527, "grad_norm": 0.2988969683647156, "learning_rate": 0.00017805467802616985, "loss": 11.6526, "step": 30828 }, { "epoch": 0.6453361801892322, "grad_norm": 0.3098243474960327, "learning_rate": 0.00017805330746087242, "loss": 11.6577, "step": 30829 }, { "epoch": 0.6453571129531943, "grad_norm": 0.3290763199329376, "learning_rate": 0.00017805193685805325, "loss": 11.6711, "step": 30830 }, { "epoch": 0.6453780457171565, "grad_norm": 0.33266380429267883, "learning_rate": 0.0001780505662177129, "loss": 11.6844, "step": 30831 }, { "epoch": 0.6453989784811186, "grad_norm": 0.3235494792461395, "learning_rate": 0.00017804919553985208, "loss": 11.6681, "step": 30832 }, { "epoch": 0.6454199112450808, "grad_norm": 0.34195390343666077, "learning_rate": 0.0001780478248244715, "loss": 11.6674, "step": 30833 }, { "epoch": 0.645440844009043, "grad_norm": 0.28207141160964966, "learning_rate": 0.00017804645407157172, "loss": 11.6718, "step": 30834 }, { "epoch": 0.6454617767730051, "grad_norm": 0.3535853922367096, "learning_rate": 0.00017804508328115345, "loss": 11.6845, "step": 30835 }, { "epoch": 0.6454827095369673, "grad_norm": 0.3571547865867615, "learning_rate": 0.00017804371245321737, "loss": 11.6871, "step": 30836 }, { "epoch": 0.6455036423009294, "grad_norm": 0.27352672815322876, "learning_rate": 0.00017804234158776407, "loss": 11.6643, "step": 30837 }, { "epoch": 0.6455245750648916, "grad_norm": 0.37632954120635986, "learning_rate": 0.0001780409706847943, "loss": 11.6756, "step": 30838 }, { "epoch": 0.6455455078288537, "grad_norm": 0.3631947338581085, "learning_rate": 0.00017803959974430864, "loss": 11.6873, "step": 30839 }, { "epoch": 0.6455664405928159, "grad_norm": 0.3055292069911957, "learning_rate": 0.00017803822876630778, "loss": 11.6773, "step": 30840 }, { "epoch": 0.6455873733567781, "grad_norm": 0.28935906291007996, "learning_rate": 0.0001780368577507924, "loss": 11.6618, "step": 30841 }, { "epoch": 0.6456083061207402, "grad_norm": 0.268981397151947, "learning_rate": 0.00017803548669776312, "loss": 11.6907, "step": 30842 }, { "epoch": 0.6456292388847024, "grad_norm": 0.2999582588672638, "learning_rate": 0.00017803411560722064, "loss": 11.6687, "step": 30843 }, { "epoch": 0.6456501716486645, "grad_norm": 0.30484604835510254, "learning_rate": 0.00017803274447916558, "loss": 11.67, "step": 30844 }, { "epoch": 0.6456711044126267, "grad_norm": 0.23762425780296326, "learning_rate": 0.0001780313733135986, "loss": 11.6828, "step": 30845 }, { "epoch": 0.6456920371765889, "grad_norm": 0.31901815533638, "learning_rate": 0.0001780300021105204, "loss": 11.6861, "step": 30846 }, { "epoch": 0.6457129699405509, "grad_norm": 0.4421042203903198, "learning_rate": 0.0001780286308699316, "loss": 11.6732, "step": 30847 }, { "epoch": 0.6457339027045131, "grad_norm": 0.32874926924705505, "learning_rate": 0.00017802725959183287, "loss": 11.6785, "step": 30848 }, { "epoch": 0.6457548354684752, "grad_norm": 0.25350087881088257, "learning_rate": 0.0001780258882762249, "loss": 11.6635, "step": 30849 }, { "epoch": 0.6457757682324374, "grad_norm": 0.34276893734931946, "learning_rate": 0.0001780245169231083, "loss": 11.6643, "step": 30850 }, { "epoch": 0.6457967009963995, "grad_norm": 0.3641851246356964, "learning_rate": 0.00017802314553248377, "loss": 11.6685, "step": 30851 }, { "epoch": 0.6458176337603617, "grad_norm": 0.37531355023384094, "learning_rate": 0.00017802177410435196, "loss": 11.6866, "step": 30852 }, { "epoch": 0.6458385665243239, "grad_norm": 0.31548285484313965, "learning_rate": 0.00017802040263871347, "loss": 11.6646, "step": 30853 }, { "epoch": 0.645859499288286, "grad_norm": 0.28439730405807495, "learning_rate": 0.00017801903113556905, "loss": 11.6623, "step": 30854 }, { "epoch": 0.6458804320522482, "grad_norm": 0.3986586928367615, "learning_rate": 0.00017801765959491928, "loss": 11.6757, "step": 30855 }, { "epoch": 0.6459013648162103, "grad_norm": 0.31961604952812195, "learning_rate": 0.0001780162880167649, "loss": 11.6792, "step": 30856 }, { "epoch": 0.6459222975801725, "grad_norm": 0.24170327186584473, "learning_rate": 0.0001780149164011065, "loss": 11.6677, "step": 30857 }, { "epoch": 0.6459432303441346, "grad_norm": 0.2838314473628998, "learning_rate": 0.00017801354474794478, "loss": 11.6742, "step": 30858 }, { "epoch": 0.6459641631080968, "grad_norm": 0.2682648003101349, "learning_rate": 0.00017801217305728038, "loss": 11.6816, "step": 30859 }, { "epoch": 0.645985095872059, "grad_norm": 0.36046895384788513, "learning_rate": 0.00017801080132911397, "loss": 11.6785, "step": 30860 }, { "epoch": 0.6460060286360211, "grad_norm": 0.2605229914188385, "learning_rate": 0.00017800942956344617, "loss": 11.6752, "step": 30861 }, { "epoch": 0.6460269613999833, "grad_norm": 0.2769336402416229, "learning_rate": 0.00017800805776027772, "loss": 11.6634, "step": 30862 }, { "epoch": 0.6460478941639454, "grad_norm": 0.30588722229003906, "learning_rate": 0.0001780066859196092, "loss": 11.671, "step": 30863 }, { "epoch": 0.6460688269279076, "grad_norm": 0.3410080671310425, "learning_rate": 0.00017800531404144133, "loss": 11.6915, "step": 30864 }, { "epoch": 0.6460897596918698, "grad_norm": 0.29592782258987427, "learning_rate": 0.00017800394212577473, "loss": 11.6825, "step": 30865 }, { "epoch": 0.6461106924558319, "grad_norm": 0.25892671942710876, "learning_rate": 0.00017800257017261005, "loss": 11.6605, "step": 30866 }, { "epoch": 0.6461316252197941, "grad_norm": 0.3035237789154053, "learning_rate": 0.000178001198181948, "loss": 11.6894, "step": 30867 }, { "epoch": 0.6461525579837561, "grad_norm": 0.2755982279777527, "learning_rate": 0.00017799982615378923, "loss": 11.6718, "step": 30868 }, { "epoch": 0.6461734907477183, "grad_norm": 0.3329947888851166, "learning_rate": 0.00017799845408813435, "loss": 11.6873, "step": 30869 }, { "epoch": 0.6461944235116804, "grad_norm": 0.3186385929584503, "learning_rate": 0.00017799708198498407, "loss": 11.6812, "step": 30870 }, { "epoch": 0.6462153562756426, "grad_norm": 0.40795955061912537, "learning_rate": 0.000177995709844339, "loss": 11.6868, "step": 30871 }, { "epoch": 0.6462362890396048, "grad_norm": 0.26802900433540344, "learning_rate": 0.00017799433766619983, "loss": 11.6562, "step": 30872 }, { "epoch": 0.6462572218035669, "grad_norm": 0.3051343858242035, "learning_rate": 0.00017799296545056726, "loss": 11.6716, "step": 30873 }, { "epoch": 0.6462781545675291, "grad_norm": 0.4150264263153076, "learning_rate": 0.00017799159319744186, "loss": 11.6969, "step": 30874 }, { "epoch": 0.6462990873314912, "grad_norm": 0.36208662390708923, "learning_rate": 0.00017799022090682435, "loss": 11.6683, "step": 30875 }, { "epoch": 0.6463200200954534, "grad_norm": 0.37571224570274353, "learning_rate": 0.0001779888485787154, "loss": 11.6574, "step": 30876 }, { "epoch": 0.6463409528594155, "grad_norm": 0.2921413481235504, "learning_rate": 0.00017798747621311566, "loss": 11.6609, "step": 30877 }, { "epoch": 0.6463618856233777, "grad_norm": 0.3159659206867218, "learning_rate": 0.00017798610381002575, "loss": 11.6703, "step": 30878 }, { "epoch": 0.6463828183873399, "grad_norm": 0.25898832082748413, "learning_rate": 0.00017798473136944635, "loss": 11.6643, "step": 30879 }, { "epoch": 0.646403751151302, "grad_norm": 0.29922667145729065, "learning_rate": 0.00017798335889137814, "loss": 11.6538, "step": 30880 }, { "epoch": 0.6464246839152642, "grad_norm": 0.34112152457237244, "learning_rate": 0.00017798198637582175, "loss": 11.666, "step": 30881 }, { "epoch": 0.6464456166792263, "grad_norm": 0.2817257344722748, "learning_rate": 0.0001779806138227779, "loss": 11.6769, "step": 30882 }, { "epoch": 0.6464665494431885, "grad_norm": 0.31913214921951294, "learning_rate": 0.00017797924123224713, "loss": 11.6812, "step": 30883 }, { "epoch": 0.6464874822071506, "grad_norm": 0.2868247330188751, "learning_rate": 0.00017797786860423024, "loss": 11.6763, "step": 30884 }, { "epoch": 0.6465084149711128, "grad_norm": 0.26365232467651367, "learning_rate": 0.0001779764959387278, "loss": 11.6657, "step": 30885 }, { "epoch": 0.646529347735075, "grad_norm": 0.2747977077960968, "learning_rate": 0.00017797512323574052, "loss": 11.6763, "step": 30886 }, { "epoch": 0.6465502804990371, "grad_norm": 0.32295385003089905, "learning_rate": 0.000177973750495269, "loss": 11.6659, "step": 30887 }, { "epoch": 0.6465712132629993, "grad_norm": 0.37842699885368347, "learning_rate": 0.00017797237771731395, "loss": 11.6669, "step": 30888 }, { "epoch": 0.6465921460269614, "grad_norm": 0.3050057888031006, "learning_rate": 0.00017797100490187605, "loss": 11.6558, "step": 30889 }, { "epoch": 0.6466130787909236, "grad_norm": 0.3793945014476776, "learning_rate": 0.0001779696320489559, "loss": 11.6945, "step": 30890 }, { "epoch": 0.6466340115548858, "grad_norm": 0.35770630836486816, "learning_rate": 0.00017796825915855418, "loss": 11.6819, "step": 30891 }, { "epoch": 0.6466549443188478, "grad_norm": 0.2914595901966095, "learning_rate": 0.00017796688623067155, "loss": 11.6498, "step": 30892 }, { "epoch": 0.64667587708281, "grad_norm": 0.3168942928314209, "learning_rate": 0.00017796551326530868, "loss": 11.671, "step": 30893 }, { "epoch": 0.6466968098467721, "grad_norm": 0.35726282000541687, "learning_rate": 0.0001779641402624662, "loss": 11.6818, "step": 30894 }, { "epoch": 0.6467177426107343, "grad_norm": 0.2870071232318878, "learning_rate": 0.00017796276722214485, "loss": 11.6844, "step": 30895 }, { "epoch": 0.6467386753746964, "grad_norm": 0.30784496665000916, "learning_rate": 0.0001779613941443452, "loss": 11.6623, "step": 30896 }, { "epoch": 0.6467596081386586, "grad_norm": 0.2399977594614029, "learning_rate": 0.00017796002102906796, "loss": 11.671, "step": 30897 }, { "epoch": 0.6467805409026208, "grad_norm": 0.3250289857387543, "learning_rate": 0.00017795864787631375, "loss": 11.6785, "step": 30898 }, { "epoch": 0.6468014736665829, "grad_norm": 0.24514220654964447, "learning_rate": 0.00017795727468608328, "loss": 11.6722, "step": 30899 }, { "epoch": 0.6468224064305451, "grad_norm": 0.2154081016778946, "learning_rate": 0.0001779559014583772, "loss": 11.6684, "step": 30900 }, { "epoch": 0.6468433391945072, "grad_norm": 0.31398746371269226, "learning_rate": 0.0001779545281931961, "loss": 11.6652, "step": 30901 }, { "epoch": 0.6468642719584694, "grad_norm": 0.28451433777809143, "learning_rate": 0.00017795315489054075, "loss": 11.6584, "step": 30902 }, { "epoch": 0.6468852047224315, "grad_norm": 0.3432101607322693, "learning_rate": 0.00017795178155041174, "loss": 11.6682, "step": 30903 }, { "epoch": 0.6469061374863937, "grad_norm": 0.26640626788139343, "learning_rate": 0.00017795040817280975, "loss": 11.6828, "step": 30904 }, { "epoch": 0.6469270702503559, "grad_norm": 0.2594311833381653, "learning_rate": 0.0001779490347577354, "loss": 11.6806, "step": 30905 }, { "epoch": 0.646948003014318, "grad_norm": 0.43294352293014526, "learning_rate": 0.00017794766130518942, "loss": 11.6835, "step": 30906 }, { "epoch": 0.6469689357782802, "grad_norm": 0.27166423201560974, "learning_rate": 0.00017794628781517245, "loss": 11.6615, "step": 30907 }, { "epoch": 0.6469898685422423, "grad_norm": 0.3566697835922241, "learning_rate": 0.00017794491428768514, "loss": 11.6734, "step": 30908 }, { "epoch": 0.6470108013062045, "grad_norm": 0.3160097897052765, "learning_rate": 0.0001779435407227281, "loss": 11.6766, "step": 30909 }, { "epoch": 0.6470317340701667, "grad_norm": 0.28672507405281067, "learning_rate": 0.0001779421671203021, "loss": 11.671, "step": 30910 }, { "epoch": 0.6470526668341288, "grad_norm": 0.27229559421539307, "learning_rate": 0.00017794079348040768, "loss": 11.6691, "step": 30911 }, { "epoch": 0.647073599598091, "grad_norm": 0.2710639238357544, "learning_rate": 0.0001779394198030456, "loss": 11.6888, "step": 30912 }, { "epoch": 0.647094532362053, "grad_norm": 0.26736369729042053, "learning_rate": 0.00017793804608821646, "loss": 11.6743, "step": 30913 }, { "epoch": 0.6471154651260153, "grad_norm": 0.2629765570163727, "learning_rate": 0.00017793667233592094, "loss": 11.6863, "step": 30914 }, { "epoch": 0.6471363978899773, "grad_norm": 0.26685911417007446, "learning_rate": 0.00017793529854615971, "loss": 11.6665, "step": 30915 }, { "epoch": 0.6471573306539395, "grad_norm": 0.3242040276527405, "learning_rate": 0.00017793392471893342, "loss": 11.68, "step": 30916 }, { "epoch": 0.6471782634179017, "grad_norm": 0.2918412685394287, "learning_rate": 0.0001779325508542427, "loss": 11.6685, "step": 30917 }, { "epoch": 0.6471991961818638, "grad_norm": 0.257971853017807, "learning_rate": 0.00017793117695208828, "loss": 11.6712, "step": 30918 }, { "epoch": 0.647220128945826, "grad_norm": 0.26284605264663696, "learning_rate": 0.00017792980301247076, "loss": 11.6928, "step": 30919 }, { "epoch": 0.6472410617097881, "grad_norm": 0.2874060869216919, "learning_rate": 0.0001779284290353908, "loss": 11.6622, "step": 30920 }, { "epoch": 0.6472619944737503, "grad_norm": 0.39151254296302795, "learning_rate": 0.00017792705502084913, "loss": 11.6821, "step": 30921 }, { "epoch": 0.6472829272377124, "grad_norm": 0.22654159367084503, "learning_rate": 0.00017792568096884636, "loss": 11.675, "step": 30922 }, { "epoch": 0.6473038600016746, "grad_norm": 0.29140135645866394, "learning_rate": 0.00017792430687938314, "loss": 11.6763, "step": 30923 }, { "epoch": 0.6473247927656368, "grad_norm": 0.30705493688583374, "learning_rate": 0.00017792293275246013, "loss": 11.6718, "step": 30924 }, { "epoch": 0.6473457255295989, "grad_norm": 0.26112958788871765, "learning_rate": 0.00017792155858807802, "loss": 11.6885, "step": 30925 }, { "epoch": 0.6473666582935611, "grad_norm": 0.27949684858322144, "learning_rate": 0.00017792018438623743, "loss": 11.6666, "step": 30926 }, { "epoch": 0.6473875910575232, "grad_norm": 0.30413681268692017, "learning_rate": 0.00017791881014693906, "loss": 11.656, "step": 30927 }, { "epoch": 0.6474085238214854, "grad_norm": 0.23822121322155, "learning_rate": 0.00017791743587018357, "loss": 11.675, "step": 30928 }, { "epoch": 0.6474294565854476, "grad_norm": 0.283944696187973, "learning_rate": 0.00017791606155597163, "loss": 11.6473, "step": 30929 }, { "epoch": 0.6474503893494097, "grad_norm": 0.3065791726112366, "learning_rate": 0.00017791468720430382, "loss": 11.6641, "step": 30930 }, { "epoch": 0.6474713221133719, "grad_norm": 0.37814822793006897, "learning_rate": 0.0001779133128151809, "loss": 11.6778, "step": 30931 }, { "epoch": 0.647492254877334, "grad_norm": 0.26094165444374084, "learning_rate": 0.00017791193838860347, "loss": 11.6793, "step": 30932 }, { "epoch": 0.6475131876412962, "grad_norm": 0.33370429277420044, "learning_rate": 0.00017791056392457222, "loss": 11.686, "step": 30933 }, { "epoch": 0.6475341204052583, "grad_norm": 0.2704964876174927, "learning_rate": 0.0001779091894230878, "loss": 11.6644, "step": 30934 }, { "epoch": 0.6475550531692205, "grad_norm": 0.31028881669044495, "learning_rate": 0.00017790781488415087, "loss": 11.6859, "step": 30935 }, { "epoch": 0.6475759859331827, "grad_norm": 0.32181257009506226, "learning_rate": 0.0001779064403077621, "loss": 11.6745, "step": 30936 }, { "epoch": 0.6475969186971448, "grad_norm": 0.39664095640182495, "learning_rate": 0.0001779050656939221, "loss": 11.6929, "step": 30937 }, { "epoch": 0.647617851461107, "grad_norm": 0.29679980874061584, "learning_rate": 0.00017790369104263163, "loss": 11.6846, "step": 30938 }, { "epoch": 0.647638784225069, "grad_norm": 0.30984067916870117, "learning_rate": 0.00017790231635389126, "loss": 11.664, "step": 30939 }, { "epoch": 0.6476597169890312, "grad_norm": 0.2514846920967102, "learning_rate": 0.0001779009416277017, "loss": 11.6562, "step": 30940 }, { "epoch": 0.6476806497529933, "grad_norm": 0.3569797873497009, "learning_rate": 0.00017789956686406358, "loss": 11.6635, "step": 30941 }, { "epoch": 0.6477015825169555, "grad_norm": 0.35624051094055176, "learning_rate": 0.0001778981920629776, "loss": 11.6841, "step": 30942 }, { "epoch": 0.6477225152809177, "grad_norm": 0.24583405256271362, "learning_rate": 0.0001778968172244444, "loss": 11.666, "step": 30943 }, { "epoch": 0.6477434480448798, "grad_norm": 0.4217081665992737, "learning_rate": 0.00017789544234846462, "loss": 11.6725, "step": 30944 }, { "epoch": 0.647764380808842, "grad_norm": 0.2777736485004425, "learning_rate": 0.00017789406743503895, "loss": 11.6742, "step": 30945 }, { "epoch": 0.6477853135728041, "grad_norm": 0.30155596137046814, "learning_rate": 0.00017789269248416801, "loss": 11.6577, "step": 30946 }, { "epoch": 0.6478062463367663, "grad_norm": 0.30889520049095154, "learning_rate": 0.00017789131749585253, "loss": 11.6663, "step": 30947 }, { "epoch": 0.6478271791007285, "grad_norm": 0.29983237385749817, "learning_rate": 0.00017788994247009313, "loss": 11.6754, "step": 30948 }, { "epoch": 0.6478481118646906, "grad_norm": 0.26677578687667847, "learning_rate": 0.00017788856740689045, "loss": 11.6601, "step": 30949 }, { "epoch": 0.6478690446286528, "grad_norm": 0.28902891278266907, "learning_rate": 0.0001778871923062452, "loss": 11.6617, "step": 30950 }, { "epoch": 0.6478899773926149, "grad_norm": 0.30176842212677, "learning_rate": 0.00017788581716815803, "loss": 11.6657, "step": 30951 }, { "epoch": 0.6479109101565771, "grad_norm": 0.23603904247283936, "learning_rate": 0.00017788444199262954, "loss": 11.6878, "step": 30952 }, { "epoch": 0.6479318429205392, "grad_norm": 0.28809601068496704, "learning_rate": 0.00017788306677966043, "loss": 11.6671, "step": 30953 }, { "epoch": 0.6479527756845014, "grad_norm": 0.36044231057167053, "learning_rate": 0.00017788169152925144, "loss": 11.6926, "step": 30954 }, { "epoch": 0.6479737084484636, "grad_norm": 0.283241331577301, "learning_rate": 0.0001778803162414031, "loss": 11.6717, "step": 30955 }, { "epoch": 0.6479946412124257, "grad_norm": 0.24001513421535492, "learning_rate": 0.00017787894091611617, "loss": 11.6658, "step": 30956 }, { "epoch": 0.6480155739763879, "grad_norm": 0.3446314036846161, "learning_rate": 0.00017787756555339123, "loss": 11.6678, "step": 30957 }, { "epoch": 0.64803650674035, "grad_norm": 0.2726333439350128, "learning_rate": 0.000177876190153229, "loss": 11.676, "step": 30958 }, { "epoch": 0.6480574395043122, "grad_norm": 0.2521800994873047, "learning_rate": 0.00017787481471563015, "loss": 11.6724, "step": 30959 }, { "epoch": 0.6480783722682742, "grad_norm": 0.28780597448349, "learning_rate": 0.0001778734392405953, "loss": 11.6631, "step": 30960 }, { "epoch": 0.6480993050322365, "grad_norm": 0.24648509919643402, "learning_rate": 0.00017787206372812513, "loss": 11.6676, "step": 30961 }, { "epoch": 0.6481202377961987, "grad_norm": 0.2677004337310791, "learning_rate": 0.00017787068817822028, "loss": 11.6652, "step": 30962 }, { "epoch": 0.6481411705601607, "grad_norm": 0.2394823282957077, "learning_rate": 0.00017786931259088144, "loss": 11.6679, "step": 30963 }, { "epoch": 0.6481621033241229, "grad_norm": 0.27699726819992065, "learning_rate": 0.00017786793696610928, "loss": 11.6944, "step": 30964 }, { "epoch": 0.648183036088085, "grad_norm": 0.2789270579814911, "learning_rate": 0.00017786656130390442, "loss": 11.679, "step": 30965 }, { "epoch": 0.6482039688520472, "grad_norm": 0.30083712935447693, "learning_rate": 0.00017786518560426756, "loss": 11.6665, "step": 30966 }, { "epoch": 0.6482249016160094, "grad_norm": 0.23332875967025757, "learning_rate": 0.00017786380986719932, "loss": 11.6644, "step": 30967 }, { "epoch": 0.6482458343799715, "grad_norm": 0.28586849570274353, "learning_rate": 0.00017786243409270038, "loss": 11.6703, "step": 30968 }, { "epoch": 0.6482667671439337, "grad_norm": 0.3634556531906128, "learning_rate": 0.00017786105828077145, "loss": 11.6741, "step": 30969 }, { "epoch": 0.6482876999078958, "grad_norm": 0.290163516998291, "learning_rate": 0.0001778596824314131, "loss": 11.6759, "step": 30970 }, { "epoch": 0.648308632671858, "grad_norm": 0.28638678789138794, "learning_rate": 0.00017785830654462609, "loss": 11.6709, "step": 30971 }, { "epoch": 0.6483295654358201, "grad_norm": 0.616459846496582, "learning_rate": 0.00017785693062041103, "loss": 11.6053, "step": 30972 }, { "epoch": 0.6483504981997823, "grad_norm": 0.29316243529319763, "learning_rate": 0.00017785555465876852, "loss": 11.6664, "step": 30973 }, { "epoch": 0.6483714309637445, "grad_norm": 0.2816241979598999, "learning_rate": 0.00017785417865969936, "loss": 11.6679, "step": 30974 }, { "epoch": 0.6483923637277066, "grad_norm": 0.3469323217868805, "learning_rate": 0.00017785280262320408, "loss": 11.6781, "step": 30975 }, { "epoch": 0.6484132964916688, "grad_norm": 0.2708927094936371, "learning_rate": 0.00017785142654928345, "loss": 11.6666, "step": 30976 }, { "epoch": 0.6484342292556309, "grad_norm": 0.3012911081314087, "learning_rate": 0.00017785005043793802, "loss": 11.67, "step": 30977 }, { "epoch": 0.6484551620195931, "grad_norm": 0.3058977723121643, "learning_rate": 0.00017784867428916856, "loss": 11.6704, "step": 30978 }, { "epoch": 0.6484760947835552, "grad_norm": 0.26244044303894043, "learning_rate": 0.00017784729810297565, "loss": 11.6857, "step": 30979 }, { "epoch": 0.6484970275475174, "grad_norm": 0.26108378171920776, "learning_rate": 0.00017784592187936, "loss": 11.6592, "step": 30980 }, { "epoch": 0.6485179603114796, "grad_norm": 0.258591890335083, "learning_rate": 0.00017784454561832225, "loss": 11.6867, "step": 30981 }, { "epoch": 0.6485388930754417, "grad_norm": 0.21926374733448029, "learning_rate": 0.00017784316931986305, "loss": 11.6742, "step": 30982 }, { "epoch": 0.6485598258394039, "grad_norm": 0.26998573541641235, "learning_rate": 0.00017784179298398312, "loss": 11.6715, "step": 30983 }, { "epoch": 0.648580758603366, "grad_norm": 0.2875182032585144, "learning_rate": 0.00017784041661068304, "loss": 11.6718, "step": 30984 }, { "epoch": 0.6486016913673281, "grad_norm": 0.34853649139404297, "learning_rate": 0.00017783904019996353, "loss": 11.6908, "step": 30985 }, { "epoch": 0.6486226241312903, "grad_norm": 0.34821781516075134, "learning_rate": 0.00017783766375182523, "loss": 11.6709, "step": 30986 }, { "epoch": 0.6486435568952524, "grad_norm": 0.31376931071281433, "learning_rate": 0.0001778362872662688, "loss": 11.6597, "step": 30987 }, { "epoch": 0.6486644896592146, "grad_norm": 0.3772653341293335, "learning_rate": 0.0001778349107432949, "loss": 11.6718, "step": 30988 }, { "epoch": 0.6486854224231767, "grad_norm": 0.2732783257961273, "learning_rate": 0.00017783353418290423, "loss": 11.6549, "step": 30989 }, { "epoch": 0.6487063551871389, "grad_norm": 0.27283549308776855, "learning_rate": 0.00017783215758509742, "loss": 11.6704, "step": 30990 }, { "epoch": 0.648727287951101, "grad_norm": 0.26718440651893616, "learning_rate": 0.0001778307809498751, "loss": 11.6635, "step": 30991 }, { "epoch": 0.6487482207150632, "grad_norm": 0.3321826457977295, "learning_rate": 0.00017782940427723797, "loss": 11.6647, "step": 30992 }, { "epoch": 0.6487691534790254, "grad_norm": 0.32557228207588196, "learning_rate": 0.00017782802756718673, "loss": 11.6597, "step": 30993 }, { "epoch": 0.6487900862429875, "grad_norm": 0.9412609934806824, "learning_rate": 0.00017782665081972194, "loss": 11.5889, "step": 30994 }, { "epoch": 0.6488110190069497, "grad_norm": 0.26805293560028076, "learning_rate": 0.00017782527403484432, "loss": 11.6741, "step": 30995 }, { "epoch": 0.6488319517709118, "grad_norm": 0.28426969051361084, "learning_rate": 0.00017782389721255457, "loss": 11.6697, "step": 30996 }, { "epoch": 0.648852884534874, "grad_norm": 0.30970972776412964, "learning_rate": 0.0001778225203528533, "loss": 11.6503, "step": 30997 }, { "epoch": 0.6488738172988361, "grad_norm": 0.3048335313796997, "learning_rate": 0.00017782114345574117, "loss": 11.6766, "step": 30998 }, { "epoch": 0.6488947500627983, "grad_norm": 0.3337072432041168, "learning_rate": 0.00017781976652121885, "loss": 11.6558, "step": 30999 }, { "epoch": 0.6489156828267605, "grad_norm": 0.24222397804260254, "learning_rate": 0.00017781838954928706, "loss": 11.6758, "step": 31000 }, { "epoch": 0.6489156828267605, "eval_loss": 11.671194076538086, "eval_runtime": 34.3549, "eval_samples_per_second": 27.973, "eval_steps_per_second": 7.015, "step": 31000 }, { "epoch": 0.6489366155907226, "grad_norm": 0.5434548258781433, "learning_rate": 0.00017781701253994636, "loss": 11.6792, "step": 31001 }, { "epoch": 0.6489575483546848, "grad_norm": 0.2709614634513855, "learning_rate": 0.00017781563549319748, "loss": 11.6745, "step": 31002 }, { "epoch": 0.6489784811186469, "grad_norm": 0.26218217611312866, "learning_rate": 0.00017781425840904106, "loss": 11.6718, "step": 31003 }, { "epoch": 0.6489994138826091, "grad_norm": 0.29203325510025024, "learning_rate": 0.0001778128812874778, "loss": 11.6711, "step": 31004 }, { "epoch": 0.6490203466465713, "grad_norm": 0.2664991319179535, "learning_rate": 0.00017781150412850828, "loss": 11.6568, "step": 31005 }, { "epoch": 0.6490412794105334, "grad_norm": 0.4248564541339874, "learning_rate": 0.00017781012693213324, "loss": 11.6849, "step": 31006 }, { "epoch": 0.6490622121744956, "grad_norm": 0.2790728509426117, "learning_rate": 0.00017780874969835329, "loss": 11.6588, "step": 31007 }, { "epoch": 0.6490831449384576, "grad_norm": 0.2832108438014984, "learning_rate": 0.00017780737242716911, "loss": 11.6706, "step": 31008 }, { "epoch": 0.6491040777024198, "grad_norm": 0.3064325451850891, "learning_rate": 0.0001778059951185814, "loss": 11.6678, "step": 31009 }, { "epoch": 0.6491250104663819, "grad_norm": 0.3259704113006592, "learning_rate": 0.00017780461777259075, "loss": 11.6641, "step": 31010 }, { "epoch": 0.6491459432303441, "grad_norm": 0.296976774930954, "learning_rate": 0.00017780324038919792, "loss": 11.6764, "step": 31011 }, { "epoch": 0.6491668759943063, "grad_norm": 0.2931980788707733, "learning_rate": 0.00017780186296840347, "loss": 11.6631, "step": 31012 }, { "epoch": 0.6491878087582684, "grad_norm": 0.26435068249702454, "learning_rate": 0.00017780048551020809, "loss": 11.6804, "step": 31013 }, { "epoch": 0.6492087415222306, "grad_norm": 0.4592229127883911, "learning_rate": 0.0001777991080146125, "loss": 11.6879, "step": 31014 }, { "epoch": 0.6492296742861927, "grad_norm": 0.3409941792488098, "learning_rate": 0.00017779773048161727, "loss": 11.6657, "step": 31015 }, { "epoch": 0.6492506070501549, "grad_norm": 0.30719488859176636, "learning_rate": 0.00017779635291122317, "loss": 11.6842, "step": 31016 }, { "epoch": 0.649271539814117, "grad_norm": 0.2599722146987915, "learning_rate": 0.00017779497530343074, "loss": 11.6624, "step": 31017 }, { "epoch": 0.6492924725780792, "grad_norm": 0.24061869084835052, "learning_rate": 0.00017779359765824078, "loss": 11.6618, "step": 31018 }, { "epoch": 0.6493134053420414, "grad_norm": 0.25282129645347595, "learning_rate": 0.00017779221997565382, "loss": 11.6686, "step": 31019 }, { "epoch": 0.6493343381060035, "grad_norm": 0.32150495052337646, "learning_rate": 0.0001777908422556706, "loss": 11.6666, "step": 31020 }, { "epoch": 0.6493552708699657, "grad_norm": 0.2822962999343872, "learning_rate": 0.00017778946449829177, "loss": 11.6653, "step": 31021 }, { "epoch": 0.6493762036339278, "grad_norm": 0.2585905194282532, "learning_rate": 0.000177788086703518, "loss": 11.6537, "step": 31022 }, { "epoch": 0.64939713639789, "grad_norm": 0.26299694180488586, "learning_rate": 0.0001777867088713499, "loss": 11.6827, "step": 31023 }, { "epoch": 0.6494180691618522, "grad_norm": 0.2752123177051544, "learning_rate": 0.00017778533100178817, "loss": 11.6677, "step": 31024 }, { "epoch": 0.6494390019258143, "grad_norm": 0.31926771998405457, "learning_rate": 0.00017778395309483349, "loss": 11.6649, "step": 31025 }, { "epoch": 0.6494599346897765, "grad_norm": 0.29736050963401794, "learning_rate": 0.0001777825751504865, "loss": 11.6571, "step": 31026 }, { "epoch": 0.6494808674537386, "grad_norm": 0.3070638179779053, "learning_rate": 0.0001777811971687479, "loss": 11.6614, "step": 31027 }, { "epoch": 0.6495018002177008, "grad_norm": 0.23675097525119781, "learning_rate": 0.0001777798191496183, "loss": 11.6581, "step": 31028 }, { "epoch": 0.6495227329816629, "grad_norm": 0.4689868986606598, "learning_rate": 0.00017777844109309837, "loss": 11.6635, "step": 31029 }, { "epoch": 0.649543665745625, "grad_norm": 0.26351290941238403, "learning_rate": 0.00017777706299918877, "loss": 11.6581, "step": 31030 }, { "epoch": 0.6495645985095873, "grad_norm": 0.2902490496635437, "learning_rate": 0.0001777756848678902, "loss": 11.6676, "step": 31031 }, { "epoch": 0.6495855312735493, "grad_norm": 0.3872990608215332, "learning_rate": 0.0001777743066992033, "loss": 11.6855, "step": 31032 }, { "epoch": 0.6496064640375115, "grad_norm": 0.25472116470336914, "learning_rate": 0.0001777729284931287, "loss": 11.663, "step": 31033 }, { "epoch": 0.6496273968014736, "grad_norm": 0.3153124749660492, "learning_rate": 0.00017777155024966714, "loss": 11.6782, "step": 31034 }, { "epoch": 0.6496483295654358, "grad_norm": 0.29483935236930847, "learning_rate": 0.00017777017196881923, "loss": 11.6663, "step": 31035 }, { "epoch": 0.6496692623293979, "grad_norm": 0.2630348801612854, "learning_rate": 0.00017776879365058562, "loss": 11.6537, "step": 31036 }, { "epoch": 0.6496901950933601, "grad_norm": 0.2953663170337677, "learning_rate": 0.00017776741529496702, "loss": 11.6633, "step": 31037 }, { "epoch": 0.6497111278573223, "grad_norm": 0.2844347059726715, "learning_rate": 0.00017776603690196403, "loss": 11.6676, "step": 31038 }, { "epoch": 0.6497320606212844, "grad_norm": 0.31307220458984375, "learning_rate": 0.00017776465847157735, "loss": 11.6748, "step": 31039 }, { "epoch": 0.6497529933852466, "grad_norm": 0.27867811918258667, "learning_rate": 0.00017776328000380766, "loss": 11.6677, "step": 31040 }, { "epoch": 0.6497739261492087, "grad_norm": 0.2598021924495697, "learning_rate": 0.0001777619014986556, "loss": 11.6829, "step": 31041 }, { "epoch": 0.6497948589131709, "grad_norm": 0.30195334553718567, "learning_rate": 0.00017776052295612185, "loss": 11.6724, "step": 31042 }, { "epoch": 0.6498157916771331, "grad_norm": 0.2826633155345917, "learning_rate": 0.00017775914437620702, "loss": 11.6566, "step": 31043 }, { "epoch": 0.6498367244410952, "grad_norm": 0.2698644697666168, "learning_rate": 0.00017775776575891183, "loss": 11.6843, "step": 31044 }, { "epoch": 0.6498576572050574, "grad_norm": 0.3039945662021637, "learning_rate": 0.00017775638710423693, "loss": 11.6794, "step": 31045 }, { "epoch": 0.6498785899690195, "grad_norm": 0.25793176889419556, "learning_rate": 0.000177755008412183, "loss": 11.6782, "step": 31046 }, { "epoch": 0.6498995227329817, "grad_norm": 0.2847917377948761, "learning_rate": 0.00017775362968275063, "loss": 11.6549, "step": 31047 }, { "epoch": 0.6499204554969438, "grad_norm": 0.23475803434848785, "learning_rate": 0.00017775225091594057, "loss": 11.6458, "step": 31048 }, { "epoch": 0.649941388260906, "grad_norm": 0.2670447528362274, "learning_rate": 0.0001777508721117534, "loss": 11.6564, "step": 31049 }, { "epoch": 0.6499623210248682, "grad_norm": 0.327203631401062, "learning_rate": 0.00017774949327018985, "loss": 11.684, "step": 31050 }, { "epoch": 0.6499832537888303, "grad_norm": 0.24190151691436768, "learning_rate": 0.00017774811439125057, "loss": 11.6599, "step": 31051 }, { "epoch": 0.6500041865527925, "grad_norm": 0.28003332018852234, "learning_rate": 0.00017774673547493622, "loss": 11.6953, "step": 31052 }, { "epoch": 0.6500251193167546, "grad_norm": 0.22871047258377075, "learning_rate": 0.00017774535652124746, "loss": 11.6744, "step": 31053 }, { "epoch": 0.6500460520807168, "grad_norm": 0.30221766233444214, "learning_rate": 0.00017774397753018493, "loss": 11.6739, "step": 31054 }, { "epoch": 0.6500669848446788, "grad_norm": 0.3005484640598297, "learning_rate": 0.0001777425985017493, "loss": 11.686, "step": 31055 }, { "epoch": 0.650087917608641, "grad_norm": 0.2556488811969757, "learning_rate": 0.00017774121943594126, "loss": 11.6797, "step": 31056 }, { "epoch": 0.6501088503726032, "grad_norm": 0.2869243919849396, "learning_rate": 0.00017773984033276147, "loss": 11.678, "step": 31057 }, { "epoch": 0.6501297831365653, "grad_norm": 0.24907635152339935, "learning_rate": 0.00017773846119221057, "loss": 11.671, "step": 31058 }, { "epoch": 0.6501507159005275, "grad_norm": 0.27435770630836487, "learning_rate": 0.00017773708201428924, "loss": 11.6683, "step": 31059 }, { "epoch": 0.6501716486644896, "grad_norm": 0.357901006937027, "learning_rate": 0.00017773570279899813, "loss": 11.6684, "step": 31060 }, { "epoch": 0.6501925814284518, "grad_norm": 0.29760414361953735, "learning_rate": 0.0001777343235463379, "loss": 11.673, "step": 31061 }, { "epoch": 0.650213514192414, "grad_norm": 0.44512510299682617, "learning_rate": 0.00017773294425630924, "loss": 11.663, "step": 31062 }, { "epoch": 0.6502344469563761, "grad_norm": 0.3115992844104767, "learning_rate": 0.0001777315649289128, "loss": 11.6816, "step": 31063 }, { "epoch": 0.6502553797203383, "grad_norm": 0.268240362405777, "learning_rate": 0.00017773018556414922, "loss": 11.6736, "step": 31064 }, { "epoch": 0.6502763124843004, "grad_norm": 0.31750795245170593, "learning_rate": 0.00017772880616201918, "loss": 11.6745, "step": 31065 }, { "epoch": 0.6502972452482626, "grad_norm": 0.31049615144729614, "learning_rate": 0.00017772742672252337, "loss": 11.676, "step": 31066 }, { "epoch": 0.6503181780122247, "grad_norm": 0.2796696126461029, "learning_rate": 0.00017772604724566242, "loss": 11.6633, "step": 31067 }, { "epoch": 0.6503391107761869, "grad_norm": 0.3086475729942322, "learning_rate": 0.000177724667731437, "loss": 11.6539, "step": 31068 }, { "epoch": 0.6503600435401491, "grad_norm": 0.22939328849315643, "learning_rate": 0.00017772328817984776, "loss": 11.6676, "step": 31069 }, { "epoch": 0.6503809763041112, "grad_norm": 0.2651931941509247, "learning_rate": 0.00017772190859089538, "loss": 11.6666, "step": 31070 }, { "epoch": 0.6504019090680734, "grad_norm": 0.33933576941490173, "learning_rate": 0.00017772052896458052, "loss": 11.6813, "step": 31071 }, { "epoch": 0.6504228418320355, "grad_norm": 0.2869780361652374, "learning_rate": 0.00017771914930090387, "loss": 11.6821, "step": 31072 }, { "epoch": 0.6504437745959977, "grad_norm": 0.30684584379196167, "learning_rate": 0.00017771776959986605, "loss": 11.6718, "step": 31073 }, { "epoch": 0.6504647073599598, "grad_norm": 0.3364090919494629, "learning_rate": 0.00017771638986146773, "loss": 11.6807, "step": 31074 }, { "epoch": 0.650485640123922, "grad_norm": 0.288238525390625, "learning_rate": 0.0001777150100857096, "loss": 11.655, "step": 31075 }, { "epoch": 0.6505065728878842, "grad_norm": 0.31730759143829346, "learning_rate": 0.0001777136302725923, "loss": 11.6731, "step": 31076 }, { "epoch": 0.6505275056518462, "grad_norm": 0.32845890522003174, "learning_rate": 0.0001777122504221165, "loss": 11.667, "step": 31077 }, { "epoch": 0.6505484384158084, "grad_norm": 0.28285810351371765, "learning_rate": 0.00017771087053428287, "loss": 11.668, "step": 31078 }, { "epoch": 0.6505693711797705, "grad_norm": 0.3209276795387268, "learning_rate": 0.00017770949060909205, "loss": 11.6681, "step": 31079 }, { "epoch": 0.6505903039437327, "grad_norm": 0.27893462777137756, "learning_rate": 0.00017770811064654477, "loss": 11.6732, "step": 31080 }, { "epoch": 0.6506112367076948, "grad_norm": 0.2509429156780243, "learning_rate": 0.0001777067306466416, "loss": 11.6597, "step": 31081 }, { "epoch": 0.650632169471657, "grad_norm": 0.30275461077690125, "learning_rate": 0.00017770535060938325, "loss": 11.6564, "step": 31082 }, { "epoch": 0.6506531022356192, "grad_norm": 0.27030110359191895, "learning_rate": 0.0001777039705347704, "loss": 11.6707, "step": 31083 }, { "epoch": 0.6506740349995813, "grad_norm": 0.27133700251579285, "learning_rate": 0.00017770259042280365, "loss": 11.6791, "step": 31084 }, { "epoch": 0.6506949677635435, "grad_norm": 0.2794525623321533, "learning_rate": 0.00017770121027348376, "loss": 11.6787, "step": 31085 }, { "epoch": 0.6507159005275056, "grad_norm": 0.26713359355926514, "learning_rate": 0.0001776998300868113, "loss": 11.6673, "step": 31086 }, { "epoch": 0.6507368332914678, "grad_norm": 0.2798088490962982, "learning_rate": 0.000177698449862787, "loss": 11.6616, "step": 31087 }, { "epoch": 0.65075776605543, "grad_norm": 0.37293314933776855, "learning_rate": 0.0001776970696014115, "loss": 11.6642, "step": 31088 }, { "epoch": 0.6507786988193921, "grad_norm": 0.29008230566978455, "learning_rate": 0.00017769568930268545, "loss": 11.6727, "step": 31089 }, { "epoch": 0.6507996315833543, "grad_norm": 0.2730461359024048, "learning_rate": 0.00017769430896660954, "loss": 11.6735, "step": 31090 }, { "epoch": 0.6508205643473164, "grad_norm": 0.3019357919692993, "learning_rate": 0.0001776929285931844, "loss": 11.6744, "step": 31091 }, { "epoch": 0.6508414971112786, "grad_norm": 0.4155157506465912, "learning_rate": 0.0001776915481824107, "loss": 11.6548, "step": 31092 }, { "epoch": 0.6508624298752407, "grad_norm": 0.3108896017074585, "learning_rate": 0.00017769016773428917, "loss": 11.661, "step": 31093 }, { "epoch": 0.6508833626392029, "grad_norm": 0.2519264817237854, "learning_rate": 0.0001776887872488204, "loss": 11.6753, "step": 31094 }, { "epoch": 0.6509042954031651, "grad_norm": 0.26272109150886536, "learning_rate": 0.00017768740672600503, "loss": 11.6609, "step": 31095 }, { "epoch": 0.6509252281671272, "grad_norm": 0.3295505940914154, "learning_rate": 0.0001776860261658438, "loss": 11.6662, "step": 31096 }, { "epoch": 0.6509461609310894, "grad_norm": 0.32344186305999756, "learning_rate": 0.00017768464556833731, "loss": 11.6705, "step": 31097 }, { "epoch": 0.6509670936950515, "grad_norm": 0.31106317043304443, "learning_rate": 0.0001776832649334863, "loss": 11.6785, "step": 31098 }, { "epoch": 0.6509880264590137, "grad_norm": 0.42423468828201294, "learning_rate": 0.00017768188426129137, "loss": 11.7035, "step": 31099 }, { "epoch": 0.6510089592229757, "grad_norm": 0.26032593846321106, "learning_rate": 0.0001776805035517532, "loss": 11.6649, "step": 31100 }, { "epoch": 0.651029891986938, "grad_norm": 0.3335914611816406, "learning_rate": 0.00017767912280487248, "loss": 11.6737, "step": 31101 }, { "epoch": 0.6510508247509001, "grad_norm": 0.36417192220687866, "learning_rate": 0.00017767774202064979, "loss": 11.6621, "step": 31102 }, { "epoch": 0.6510717575148622, "grad_norm": 0.33360689878463745, "learning_rate": 0.00017767636119908587, "loss": 11.6706, "step": 31103 }, { "epoch": 0.6510926902788244, "grad_norm": 0.33806946873664856, "learning_rate": 0.0001776749803401814, "loss": 11.6725, "step": 31104 }, { "epoch": 0.6511136230427865, "grad_norm": 0.2763884365558624, "learning_rate": 0.000177673599443937, "loss": 11.6744, "step": 31105 }, { "epoch": 0.6511345558067487, "grad_norm": 0.3979654312133789, "learning_rate": 0.00017767221851035334, "loss": 11.6795, "step": 31106 }, { "epoch": 0.6511554885707109, "grad_norm": 0.25432440638542175, "learning_rate": 0.0001776708375394311, "loss": 11.6718, "step": 31107 }, { "epoch": 0.651176421334673, "grad_norm": 0.2726111114025116, "learning_rate": 0.0001776694565311709, "loss": 11.676, "step": 31108 }, { "epoch": 0.6511973540986352, "grad_norm": 0.349348783493042, "learning_rate": 0.00017766807548557343, "loss": 11.6624, "step": 31109 }, { "epoch": 0.6512182868625973, "grad_norm": 0.3348802328109741, "learning_rate": 0.00017766669440263938, "loss": 11.6797, "step": 31110 }, { "epoch": 0.6512392196265595, "grad_norm": 0.24224841594696045, "learning_rate": 0.0001776653132823694, "loss": 11.6606, "step": 31111 }, { "epoch": 0.6512601523905216, "grad_norm": 0.28223082423210144, "learning_rate": 0.00017766393212476414, "loss": 11.6699, "step": 31112 }, { "epoch": 0.6512810851544838, "grad_norm": 0.3092367649078369, "learning_rate": 0.00017766255092982428, "loss": 11.6742, "step": 31113 }, { "epoch": 0.651302017918446, "grad_norm": 0.31706395745277405, "learning_rate": 0.0001776611696975505, "loss": 11.6557, "step": 31114 }, { "epoch": 0.6513229506824081, "grad_norm": 0.30541089177131653, "learning_rate": 0.00017765978842794336, "loss": 11.6609, "step": 31115 }, { "epoch": 0.6513438834463703, "grad_norm": 0.3076734244823456, "learning_rate": 0.00017765840712100368, "loss": 11.6735, "step": 31116 }, { "epoch": 0.6513648162103324, "grad_norm": 0.28145167231559753, "learning_rate": 0.000177657025776732, "loss": 11.6629, "step": 31117 }, { "epoch": 0.6513857489742946, "grad_norm": 0.34061503410339355, "learning_rate": 0.00017765564439512906, "loss": 11.6908, "step": 31118 }, { "epoch": 0.6514066817382567, "grad_norm": 0.28774648904800415, "learning_rate": 0.0001776542629761955, "loss": 11.6735, "step": 31119 }, { "epoch": 0.6514276145022189, "grad_norm": 0.2197943925857544, "learning_rate": 0.00017765288151993196, "loss": 11.674, "step": 31120 }, { "epoch": 0.6514485472661811, "grad_norm": 0.27379941940307617, "learning_rate": 0.00017765150002633912, "loss": 11.6493, "step": 31121 }, { "epoch": 0.6514694800301432, "grad_norm": 0.36502423882484436, "learning_rate": 0.00017765011849541767, "loss": 11.6863, "step": 31122 }, { "epoch": 0.6514904127941054, "grad_norm": 0.29900410771369934, "learning_rate": 0.00017764873692716824, "loss": 11.6778, "step": 31123 }, { "epoch": 0.6515113455580674, "grad_norm": 0.2784956991672516, "learning_rate": 0.0001776473553215915, "loss": 11.6767, "step": 31124 }, { "epoch": 0.6515322783220296, "grad_norm": 0.26417455077171326, "learning_rate": 0.00017764597367868814, "loss": 11.6612, "step": 31125 }, { "epoch": 0.6515532110859918, "grad_norm": 0.2918906509876251, "learning_rate": 0.00017764459199845874, "loss": 11.6682, "step": 31126 }, { "epoch": 0.6515741438499539, "grad_norm": 0.34563127160072327, "learning_rate": 0.0001776432102809041, "loss": 11.661, "step": 31127 }, { "epoch": 0.6515950766139161, "grad_norm": 0.3404632806777954, "learning_rate": 0.0001776418285260248, "loss": 11.6615, "step": 31128 }, { "epoch": 0.6516160093778782, "grad_norm": 0.3179818093776703, "learning_rate": 0.00017764044673382153, "loss": 11.6648, "step": 31129 }, { "epoch": 0.6516369421418404, "grad_norm": 0.2964440882205963, "learning_rate": 0.00017763906490429488, "loss": 11.6538, "step": 31130 }, { "epoch": 0.6516578749058025, "grad_norm": 0.29169508814811707, "learning_rate": 0.00017763768303744563, "loss": 11.6792, "step": 31131 }, { "epoch": 0.6516788076697647, "grad_norm": 0.2847978174686432, "learning_rate": 0.00017763630113327435, "loss": 11.6512, "step": 31132 }, { "epoch": 0.6516997404337269, "grad_norm": 0.2941986322402954, "learning_rate": 0.0001776349191917818, "loss": 11.6757, "step": 31133 }, { "epoch": 0.651720673197689, "grad_norm": 0.5783135294914246, "learning_rate": 0.00017763353721296857, "loss": 11.6957, "step": 31134 }, { "epoch": 0.6517416059616512, "grad_norm": 0.29645946621894836, "learning_rate": 0.0001776321551968353, "loss": 11.6681, "step": 31135 }, { "epoch": 0.6517625387256133, "grad_norm": 0.3489439785480499, "learning_rate": 0.00017763077314338275, "loss": 11.6785, "step": 31136 }, { "epoch": 0.6517834714895755, "grad_norm": 0.2387371063232422, "learning_rate": 0.0001776293910526115, "loss": 11.6596, "step": 31137 }, { "epoch": 0.6518044042535376, "grad_norm": 0.23090940713882446, "learning_rate": 0.00017762800892452227, "loss": 11.6779, "step": 31138 }, { "epoch": 0.6518253370174998, "grad_norm": 0.3037680685520172, "learning_rate": 0.00017762662675911567, "loss": 11.6522, "step": 31139 }, { "epoch": 0.651846269781462, "grad_norm": 0.27261829376220703, "learning_rate": 0.0001776252445563924, "loss": 11.6524, "step": 31140 }, { "epoch": 0.6518672025454241, "grad_norm": 0.28889620304107666, "learning_rate": 0.00017762386231635316, "loss": 11.6647, "step": 31141 }, { "epoch": 0.6518881353093863, "grad_norm": 0.2616947591304779, "learning_rate": 0.00017762248003899852, "loss": 11.6735, "step": 31142 }, { "epoch": 0.6519090680733484, "grad_norm": 0.26634079217910767, "learning_rate": 0.00017762109772432924, "loss": 11.6692, "step": 31143 }, { "epoch": 0.6519300008373106, "grad_norm": 0.33017605543136597, "learning_rate": 0.00017761971537234592, "loss": 11.6644, "step": 31144 }, { "epoch": 0.6519509336012728, "grad_norm": 0.25974541902542114, "learning_rate": 0.00017761833298304925, "loss": 11.6513, "step": 31145 }, { "epoch": 0.6519718663652349, "grad_norm": 0.29076269268989563, "learning_rate": 0.0001776169505564399, "loss": 11.6563, "step": 31146 }, { "epoch": 0.651992799129197, "grad_norm": 0.2691224217414856, "learning_rate": 0.00017761556809251853, "loss": 11.6749, "step": 31147 }, { "epoch": 0.6520137318931591, "grad_norm": 0.31064555048942566, "learning_rate": 0.00017761418559128579, "loss": 11.6613, "step": 31148 }, { "epoch": 0.6520346646571213, "grad_norm": 0.34081217646598816, "learning_rate": 0.00017761280305274234, "loss": 11.6635, "step": 31149 }, { "epoch": 0.6520555974210834, "grad_norm": 0.4241234064102173, "learning_rate": 0.00017761142047688887, "loss": 11.6759, "step": 31150 }, { "epoch": 0.6520765301850456, "grad_norm": 0.3039337396621704, "learning_rate": 0.00017761003786372605, "loss": 11.6773, "step": 31151 }, { "epoch": 0.6520974629490078, "grad_norm": 0.3521549105644226, "learning_rate": 0.00017760865521325453, "loss": 11.6764, "step": 31152 }, { "epoch": 0.6521183957129699, "grad_norm": 0.5059330463409424, "learning_rate": 0.000177607272525475, "loss": 11.6721, "step": 31153 }, { "epoch": 0.6521393284769321, "grad_norm": 0.3883800804615021, "learning_rate": 0.00017760588980038803, "loss": 11.6696, "step": 31154 }, { "epoch": 0.6521602612408942, "grad_norm": 0.3290180265903473, "learning_rate": 0.00017760450703799437, "loss": 11.664, "step": 31155 }, { "epoch": 0.6521811940048564, "grad_norm": 0.368887335062027, "learning_rate": 0.0001776031242382947, "loss": 11.6733, "step": 31156 }, { "epoch": 0.6522021267688185, "grad_norm": 0.3006424903869629, "learning_rate": 0.00017760174140128964, "loss": 11.6638, "step": 31157 }, { "epoch": 0.6522230595327807, "grad_norm": 0.3363898992538452, "learning_rate": 0.00017760035852697987, "loss": 11.6558, "step": 31158 }, { "epoch": 0.6522439922967429, "grad_norm": 0.33167722821235657, "learning_rate": 0.00017759897561536606, "loss": 11.6577, "step": 31159 }, { "epoch": 0.652264925060705, "grad_norm": 0.3137238323688507, "learning_rate": 0.00017759759266644884, "loss": 11.6761, "step": 31160 }, { "epoch": 0.6522858578246672, "grad_norm": 0.41592541337013245, "learning_rate": 0.0001775962096802289, "loss": 11.6642, "step": 31161 }, { "epoch": 0.6523067905886293, "grad_norm": 0.2700451910495758, "learning_rate": 0.00017759482665670695, "loss": 11.6709, "step": 31162 }, { "epoch": 0.6523277233525915, "grad_norm": 0.25811880826950073, "learning_rate": 0.00017759344359588358, "loss": 11.673, "step": 31163 }, { "epoch": 0.6523486561165537, "grad_norm": 0.3631749153137207, "learning_rate": 0.0001775920604977595, "loss": 11.6683, "step": 31164 }, { "epoch": 0.6523695888805158, "grad_norm": 0.2828395962715149, "learning_rate": 0.00017759067736233534, "loss": 11.669, "step": 31165 }, { "epoch": 0.652390521644478, "grad_norm": 0.28336626291275024, "learning_rate": 0.00017758929418961182, "loss": 11.6803, "step": 31166 }, { "epoch": 0.6524114544084401, "grad_norm": 0.2656364440917969, "learning_rate": 0.00017758791097958955, "loss": 11.6632, "step": 31167 }, { "epoch": 0.6524323871724023, "grad_norm": 0.2743349075317383, "learning_rate": 0.00017758652773226924, "loss": 11.6754, "step": 31168 }, { "epoch": 0.6524533199363644, "grad_norm": 0.2379750907421112, "learning_rate": 0.0001775851444476515, "loss": 11.6718, "step": 31169 }, { "epoch": 0.6524742527003266, "grad_norm": 0.3265259563922882, "learning_rate": 0.00017758376112573702, "loss": 11.6721, "step": 31170 }, { "epoch": 0.6524951854642888, "grad_norm": 0.31216803193092346, "learning_rate": 0.0001775823777665265, "loss": 11.6742, "step": 31171 }, { "epoch": 0.6525161182282508, "grad_norm": 0.2746124267578125, "learning_rate": 0.00017758099437002059, "loss": 11.6743, "step": 31172 }, { "epoch": 0.652537050992213, "grad_norm": 0.3228471279144287, "learning_rate": 0.0001775796109362199, "loss": 11.6826, "step": 31173 }, { "epoch": 0.6525579837561751, "grad_norm": 0.2820080518722534, "learning_rate": 0.00017757822746512514, "loss": 11.6696, "step": 31174 }, { "epoch": 0.6525789165201373, "grad_norm": 0.29788073897361755, "learning_rate": 0.000177576843956737, "loss": 11.6822, "step": 31175 }, { "epoch": 0.6525998492840994, "grad_norm": 0.26921749114990234, "learning_rate": 0.0001775754604110561, "loss": 11.6709, "step": 31176 }, { "epoch": 0.6526207820480616, "grad_norm": 0.25408709049224854, "learning_rate": 0.00017757407682808313, "loss": 11.6713, "step": 31177 }, { "epoch": 0.6526417148120238, "grad_norm": 0.3272329568862915, "learning_rate": 0.00017757269320781873, "loss": 11.6554, "step": 31178 }, { "epoch": 0.6526626475759859, "grad_norm": 0.24884550273418427, "learning_rate": 0.00017757130955026358, "loss": 11.6747, "step": 31179 }, { "epoch": 0.6526835803399481, "grad_norm": 0.3202579915523529, "learning_rate": 0.00017756992585541835, "loss": 11.6624, "step": 31180 }, { "epoch": 0.6527045131039102, "grad_norm": 0.335723340511322, "learning_rate": 0.00017756854212328373, "loss": 11.6783, "step": 31181 }, { "epoch": 0.6527254458678724, "grad_norm": 0.26943832635879517, "learning_rate": 0.00017756715835386033, "loss": 11.6892, "step": 31182 }, { "epoch": 0.6527463786318346, "grad_norm": 0.26783856749534607, "learning_rate": 0.00017756577454714886, "loss": 11.6729, "step": 31183 }, { "epoch": 0.6527673113957967, "grad_norm": 0.3679250180721283, "learning_rate": 0.00017756439070314994, "loss": 11.6849, "step": 31184 }, { "epoch": 0.6527882441597589, "grad_norm": 0.32908719778060913, "learning_rate": 0.00017756300682186427, "loss": 11.6738, "step": 31185 }, { "epoch": 0.652809176923721, "grad_norm": 0.29491955041885376, "learning_rate": 0.00017756162290329255, "loss": 11.6524, "step": 31186 }, { "epoch": 0.6528301096876832, "grad_norm": 0.31928834319114685, "learning_rate": 0.00017756023894743535, "loss": 11.6752, "step": 31187 }, { "epoch": 0.6528510424516453, "grad_norm": 0.30566444993019104, "learning_rate": 0.00017755885495429342, "loss": 11.6681, "step": 31188 }, { "epoch": 0.6528719752156075, "grad_norm": 0.3787807822227478, "learning_rate": 0.00017755747092386737, "loss": 11.6825, "step": 31189 }, { "epoch": 0.6528929079795697, "grad_norm": 0.311886727809906, "learning_rate": 0.00017755608685615792, "loss": 11.6752, "step": 31190 }, { "epoch": 0.6529138407435318, "grad_norm": 0.33982691168785095, "learning_rate": 0.0001775547027511657, "loss": 11.6745, "step": 31191 }, { "epoch": 0.652934773507494, "grad_norm": 0.3111784756183624, "learning_rate": 0.00017755331860889136, "loss": 11.6583, "step": 31192 }, { "epoch": 0.652955706271456, "grad_norm": 0.27396464347839355, "learning_rate": 0.00017755193442933558, "loss": 11.6659, "step": 31193 }, { "epoch": 0.6529766390354182, "grad_norm": 0.34854233264923096, "learning_rate": 0.00017755055021249905, "loss": 11.6656, "step": 31194 }, { "epoch": 0.6529975717993803, "grad_norm": 0.3244265019893646, "learning_rate": 0.00017754916595838242, "loss": 11.6612, "step": 31195 }, { "epoch": 0.6530185045633425, "grad_norm": 0.2873934507369995, "learning_rate": 0.00017754778166698633, "loss": 11.6698, "step": 31196 }, { "epoch": 0.6530394373273047, "grad_norm": 0.27442654967308044, "learning_rate": 0.00017754639733831147, "loss": 11.6813, "step": 31197 }, { "epoch": 0.6530603700912668, "grad_norm": 0.27587470412254333, "learning_rate": 0.00017754501297235855, "loss": 11.6619, "step": 31198 }, { "epoch": 0.653081302855229, "grad_norm": 0.23852109909057617, "learning_rate": 0.00017754362856912815, "loss": 11.6672, "step": 31199 }, { "epoch": 0.6531022356191911, "grad_norm": 0.2914687693119049, "learning_rate": 0.00017754224412862098, "loss": 11.6594, "step": 31200 }, { "epoch": 0.6531231683831533, "grad_norm": 0.3392367959022522, "learning_rate": 0.0001775408596508377, "loss": 11.6595, "step": 31201 }, { "epoch": 0.6531441011471155, "grad_norm": 0.2972985804080963, "learning_rate": 0.00017753947513577894, "loss": 11.6646, "step": 31202 }, { "epoch": 0.6531650339110776, "grad_norm": 0.291420578956604, "learning_rate": 0.00017753809058344543, "loss": 11.6718, "step": 31203 }, { "epoch": 0.6531859666750398, "grad_norm": 0.30671748518943787, "learning_rate": 0.00017753670599383783, "loss": 11.6653, "step": 31204 }, { "epoch": 0.6532068994390019, "grad_norm": 0.2959456145763397, "learning_rate": 0.00017753532136695674, "loss": 11.6581, "step": 31205 }, { "epoch": 0.6532278322029641, "grad_norm": 0.2844076156616211, "learning_rate": 0.0001775339367028029, "loss": 11.6642, "step": 31206 }, { "epoch": 0.6532487649669262, "grad_norm": 0.3570568263530731, "learning_rate": 0.00017753255200137692, "loss": 11.6773, "step": 31207 }, { "epoch": 0.6532696977308884, "grad_norm": 0.37293508648872375, "learning_rate": 0.00017753116726267951, "loss": 11.6757, "step": 31208 }, { "epoch": 0.6532906304948506, "grad_norm": 0.29498040676116943, "learning_rate": 0.0001775297824867113, "loss": 11.6624, "step": 31209 }, { "epoch": 0.6533115632588127, "grad_norm": 0.32480940222740173, "learning_rate": 0.00017752839767347297, "loss": 11.6665, "step": 31210 }, { "epoch": 0.6533324960227749, "grad_norm": 0.28475239872932434, "learning_rate": 0.00017752701282296518, "loss": 11.6651, "step": 31211 }, { "epoch": 0.653353428786737, "grad_norm": 0.30259227752685547, "learning_rate": 0.00017752562793518862, "loss": 11.6778, "step": 31212 }, { "epoch": 0.6533743615506992, "grad_norm": 0.29471608996391296, "learning_rate": 0.00017752424301014388, "loss": 11.6716, "step": 31213 }, { "epoch": 0.6533952943146613, "grad_norm": 0.31987932324409485, "learning_rate": 0.0001775228580478318, "loss": 11.6828, "step": 31214 }, { "epoch": 0.6534162270786235, "grad_norm": 0.2831944525241852, "learning_rate": 0.00017752147304825281, "loss": 11.6746, "step": 31215 }, { "epoch": 0.6534371598425857, "grad_norm": 0.281428724527359, "learning_rate": 0.00017752008801140775, "loss": 11.6814, "step": 31216 }, { "epoch": 0.6534580926065477, "grad_norm": 0.36898311972618103, "learning_rate": 0.00017751870293729723, "loss": 11.6597, "step": 31217 }, { "epoch": 0.65347902537051, "grad_norm": 0.3095979690551758, "learning_rate": 0.0001775173178259219, "loss": 11.6432, "step": 31218 }, { "epoch": 0.653499958134472, "grad_norm": 0.3369666039943695, "learning_rate": 0.00017751593267728244, "loss": 11.6716, "step": 31219 }, { "epoch": 0.6535208908984342, "grad_norm": 0.2442927211523056, "learning_rate": 0.00017751454749137952, "loss": 11.6569, "step": 31220 }, { "epoch": 0.6535418236623964, "grad_norm": 0.24146397411823273, "learning_rate": 0.0001775131622682138, "loss": 11.6675, "step": 31221 }, { "epoch": 0.6535627564263585, "grad_norm": 0.3442741930484772, "learning_rate": 0.00017751177700778598, "loss": 11.6833, "step": 31222 }, { "epoch": 0.6535836891903207, "grad_norm": 0.29493486881256104, "learning_rate": 0.00017751039171009668, "loss": 11.6658, "step": 31223 }, { "epoch": 0.6536046219542828, "grad_norm": 0.3105531632900238, "learning_rate": 0.00017750900637514657, "loss": 11.662, "step": 31224 }, { "epoch": 0.653625554718245, "grad_norm": 0.41589847207069397, "learning_rate": 0.0001775076210029363, "loss": 11.6843, "step": 31225 }, { "epoch": 0.6536464874822071, "grad_norm": 0.2991035580635071, "learning_rate": 0.00017750623559346663, "loss": 11.6658, "step": 31226 }, { "epoch": 0.6536674202461693, "grad_norm": 0.2555955648422241, "learning_rate": 0.00017750485014673812, "loss": 11.671, "step": 31227 }, { "epoch": 0.6536883530101315, "grad_norm": 0.2934134304523468, "learning_rate": 0.0001775034646627515, "loss": 11.6533, "step": 31228 }, { "epoch": 0.6537092857740936, "grad_norm": 0.28010568022727966, "learning_rate": 0.00017750207914150738, "loss": 11.6817, "step": 31229 }, { "epoch": 0.6537302185380558, "grad_norm": 0.2485448271036148, "learning_rate": 0.00017750069358300646, "loss": 11.67, "step": 31230 }, { "epoch": 0.6537511513020179, "grad_norm": 0.3408104479312897, "learning_rate": 0.00017749930798724944, "loss": 11.6661, "step": 31231 }, { "epoch": 0.6537720840659801, "grad_norm": 0.26438987255096436, "learning_rate": 0.0001774979223542369, "loss": 11.677, "step": 31232 }, { "epoch": 0.6537930168299422, "grad_norm": 0.26643452048301697, "learning_rate": 0.0001774965366839696, "loss": 11.6652, "step": 31233 }, { "epoch": 0.6538139495939044, "grad_norm": 0.34803506731987, "learning_rate": 0.00017749515097644812, "loss": 11.6716, "step": 31234 }, { "epoch": 0.6538348823578666, "grad_norm": 0.24429987370967865, "learning_rate": 0.00017749376523167317, "loss": 11.6631, "step": 31235 }, { "epoch": 0.6538558151218287, "grad_norm": 0.2598797678947449, "learning_rate": 0.00017749237944964544, "loss": 11.675, "step": 31236 }, { "epoch": 0.6538767478857909, "grad_norm": 0.3115817606449127, "learning_rate": 0.00017749099363036558, "loss": 11.6724, "step": 31237 }, { "epoch": 0.653897680649753, "grad_norm": 0.3623855412006378, "learning_rate": 0.0001774896077738342, "loss": 11.664, "step": 31238 }, { "epoch": 0.6539186134137152, "grad_norm": 0.29312098026275635, "learning_rate": 0.00017748822188005207, "loss": 11.6731, "step": 31239 }, { "epoch": 0.6539395461776774, "grad_norm": 0.3483279049396515, "learning_rate": 0.00017748683594901978, "loss": 11.6729, "step": 31240 }, { "epoch": 0.6539604789416394, "grad_norm": 0.301899790763855, "learning_rate": 0.00017748544998073799, "loss": 11.6802, "step": 31241 }, { "epoch": 0.6539814117056016, "grad_norm": 0.3048730492591858, "learning_rate": 0.0001774840639752074, "loss": 11.6564, "step": 31242 }, { "epoch": 0.6540023444695637, "grad_norm": 0.23822978138923645, "learning_rate": 0.0001774826779324287, "loss": 11.6633, "step": 31243 }, { "epoch": 0.6540232772335259, "grad_norm": 0.26386791467666626, "learning_rate": 0.00017748129185240247, "loss": 11.6813, "step": 31244 }, { "epoch": 0.654044209997488, "grad_norm": 0.343161404132843, "learning_rate": 0.00017747990573512947, "loss": 11.6745, "step": 31245 }, { "epoch": 0.6540651427614502, "grad_norm": 0.30427438020706177, "learning_rate": 0.0001774785195806103, "loss": 11.6739, "step": 31246 }, { "epoch": 0.6540860755254124, "grad_norm": 0.2362121194601059, "learning_rate": 0.00017747713338884567, "loss": 11.6775, "step": 31247 }, { "epoch": 0.6541070082893745, "grad_norm": 0.31043654680252075, "learning_rate": 0.00017747574715983624, "loss": 11.6538, "step": 31248 }, { "epoch": 0.6541279410533367, "grad_norm": 0.2951648533344269, "learning_rate": 0.00017747436089358268, "loss": 11.6714, "step": 31249 }, { "epoch": 0.6541488738172988, "grad_norm": 0.4099326431751251, "learning_rate": 0.0001774729745900856, "loss": 11.6679, "step": 31250 }, { "epoch": 0.654169806581261, "grad_norm": 0.30244889855384827, "learning_rate": 0.0001774715882493457, "loss": 11.6625, "step": 31251 }, { "epoch": 0.6541907393452231, "grad_norm": 0.28720271587371826, "learning_rate": 0.0001774702018713637, "loss": 11.6742, "step": 31252 }, { "epoch": 0.6542116721091853, "grad_norm": 0.30767181515693665, "learning_rate": 0.00017746881545614018, "loss": 11.6722, "step": 31253 }, { "epoch": 0.6542326048731475, "grad_norm": 0.22405271232128143, "learning_rate": 0.00017746742900367587, "loss": 11.6819, "step": 31254 }, { "epoch": 0.6542535376371096, "grad_norm": 0.32505765557289124, "learning_rate": 0.0001774660425139714, "loss": 11.6683, "step": 31255 }, { "epoch": 0.6542744704010718, "grad_norm": 0.25405803322792053, "learning_rate": 0.00017746465598702748, "loss": 11.6671, "step": 31256 }, { "epoch": 0.6542954031650339, "grad_norm": 0.4059585928916931, "learning_rate": 0.00017746326942284473, "loss": 11.6818, "step": 31257 }, { "epoch": 0.6543163359289961, "grad_norm": 0.3068751096725464, "learning_rate": 0.00017746188282142383, "loss": 11.6428, "step": 31258 }, { "epoch": 0.6543372686929582, "grad_norm": 0.21697776019573212, "learning_rate": 0.00017746049618276545, "loss": 11.6675, "step": 31259 }, { "epoch": 0.6543582014569204, "grad_norm": 0.22254543006420135, "learning_rate": 0.00017745910950687027, "loss": 11.676, "step": 31260 }, { "epoch": 0.6543791342208826, "grad_norm": 0.3073175549507141, "learning_rate": 0.00017745772279373893, "loss": 11.6876, "step": 31261 }, { "epoch": 0.6544000669848447, "grad_norm": 0.33787089586257935, "learning_rate": 0.00017745633604337213, "loss": 11.6593, "step": 31262 }, { "epoch": 0.6544209997488069, "grad_norm": 0.27187633514404297, "learning_rate": 0.00017745494925577052, "loss": 11.653, "step": 31263 }, { "epoch": 0.6544419325127689, "grad_norm": 0.3058737516403198, "learning_rate": 0.00017745356243093475, "loss": 11.6771, "step": 31264 }, { "epoch": 0.6544628652767311, "grad_norm": 0.30520734190940857, "learning_rate": 0.0001774521755688655, "loss": 11.6734, "step": 31265 }, { "epoch": 0.6544837980406933, "grad_norm": 0.3097735345363617, "learning_rate": 0.00017745078866956342, "loss": 11.691, "step": 31266 }, { "epoch": 0.6545047308046554, "grad_norm": 0.2748728096485138, "learning_rate": 0.0001774494017330292, "loss": 11.6576, "step": 31267 }, { "epoch": 0.6545256635686176, "grad_norm": 0.3211677670478821, "learning_rate": 0.00017744801475926353, "loss": 11.6739, "step": 31268 }, { "epoch": 0.6545465963325797, "grad_norm": 0.3012228012084961, "learning_rate": 0.00017744662774826702, "loss": 11.6621, "step": 31269 }, { "epoch": 0.6545675290965419, "grad_norm": 0.29276755452156067, "learning_rate": 0.00017744524070004036, "loss": 11.6694, "step": 31270 }, { "epoch": 0.654588461860504, "grad_norm": 0.26509425044059753, "learning_rate": 0.00017744385361458424, "loss": 11.6698, "step": 31271 }, { "epoch": 0.6546093946244662, "grad_norm": 0.27008748054504395, "learning_rate": 0.0001774424664918993, "loss": 11.6703, "step": 31272 }, { "epoch": 0.6546303273884284, "grad_norm": 0.320722371339798, "learning_rate": 0.0001774410793319862, "loss": 11.6623, "step": 31273 }, { "epoch": 0.6546512601523905, "grad_norm": 0.305415540933609, "learning_rate": 0.00017743969213484563, "loss": 11.6761, "step": 31274 }, { "epoch": 0.6546721929163527, "grad_norm": 0.3064540922641754, "learning_rate": 0.00017743830490047827, "loss": 11.6689, "step": 31275 }, { "epoch": 0.6546931256803148, "grad_norm": 0.2784036695957184, "learning_rate": 0.00017743691762888474, "loss": 11.6793, "step": 31276 }, { "epoch": 0.654714058444277, "grad_norm": 0.2972833514213562, "learning_rate": 0.00017743553032006576, "loss": 11.669, "step": 31277 }, { "epoch": 0.6547349912082391, "grad_norm": 0.3103398084640503, "learning_rate": 0.0001774341429740219, "loss": 11.6747, "step": 31278 }, { "epoch": 0.6547559239722013, "grad_norm": 0.2941056191921234, "learning_rate": 0.00017743275559075396, "loss": 11.6645, "step": 31279 }, { "epoch": 0.6547768567361635, "grad_norm": 0.2267722636461258, "learning_rate": 0.00017743136817026254, "loss": 11.6626, "step": 31280 }, { "epoch": 0.6547977895001256, "grad_norm": 0.3098872900009155, "learning_rate": 0.0001774299807125483, "loss": 11.6626, "step": 31281 }, { "epoch": 0.6548187222640878, "grad_norm": 0.324795663356781, "learning_rate": 0.0001774285932176119, "loss": 11.6614, "step": 31282 }, { "epoch": 0.6548396550280499, "grad_norm": 0.3951445519924164, "learning_rate": 0.00017742720568545407, "loss": 11.666, "step": 31283 }, { "epoch": 0.6548605877920121, "grad_norm": 0.37885984778404236, "learning_rate": 0.00017742581811607536, "loss": 11.6785, "step": 31284 }, { "epoch": 0.6548815205559743, "grad_norm": 0.26603588461875916, "learning_rate": 0.00017742443050947654, "loss": 11.6605, "step": 31285 }, { "epoch": 0.6549024533199364, "grad_norm": 0.26229164004325867, "learning_rate": 0.00017742304286565825, "loss": 11.6704, "step": 31286 }, { "epoch": 0.6549233860838986, "grad_norm": 0.32638153433799744, "learning_rate": 0.00017742165518462114, "loss": 11.6474, "step": 31287 }, { "epoch": 0.6549443188478606, "grad_norm": 0.3432716131210327, "learning_rate": 0.00017742026746636591, "loss": 11.6683, "step": 31288 }, { "epoch": 0.6549652516118228, "grad_norm": 0.2555799186229706, "learning_rate": 0.0001774188797108932, "loss": 11.6707, "step": 31289 }, { "epoch": 0.6549861843757849, "grad_norm": 0.2542957663536072, "learning_rate": 0.00017741749191820368, "loss": 11.677, "step": 31290 }, { "epoch": 0.6550071171397471, "grad_norm": 0.2853612005710602, "learning_rate": 0.000177416104088298, "loss": 11.67, "step": 31291 }, { "epoch": 0.6550280499037093, "grad_norm": 0.32543638348579407, "learning_rate": 0.00017741471622117688, "loss": 11.6758, "step": 31292 }, { "epoch": 0.6550489826676714, "grad_norm": 0.3357076048851013, "learning_rate": 0.00017741332831684096, "loss": 11.6829, "step": 31293 }, { "epoch": 0.6550699154316336, "grad_norm": 0.7017821073532104, "learning_rate": 0.0001774119403752909, "loss": 11.6249, "step": 31294 }, { "epoch": 0.6550908481955957, "grad_norm": 0.24296699464321136, "learning_rate": 0.00017741055239652733, "loss": 11.6641, "step": 31295 }, { "epoch": 0.6551117809595579, "grad_norm": 0.26862555742263794, "learning_rate": 0.00017740916438055098, "loss": 11.6804, "step": 31296 }, { "epoch": 0.65513271372352, "grad_norm": 0.3605348467826843, "learning_rate": 0.00017740777632736248, "loss": 11.695, "step": 31297 }, { "epoch": 0.6551536464874822, "grad_norm": 0.302288293838501, "learning_rate": 0.0001774063882369625, "loss": 11.6724, "step": 31298 }, { "epoch": 0.6551745792514444, "grad_norm": 0.25437527894973755, "learning_rate": 0.00017740500010935176, "loss": 11.6608, "step": 31299 }, { "epoch": 0.6551955120154065, "grad_norm": 0.3415709137916565, "learning_rate": 0.00017740361194453085, "loss": 11.6778, "step": 31300 }, { "epoch": 0.6552164447793687, "grad_norm": 0.3645360767841339, "learning_rate": 0.0001774022237425005, "loss": 11.6723, "step": 31301 }, { "epoch": 0.6552373775433308, "grad_norm": 0.27942362427711487, "learning_rate": 0.00017740083550326133, "loss": 11.6827, "step": 31302 }, { "epoch": 0.655258310307293, "grad_norm": 0.36223718523979187, "learning_rate": 0.00017739944722681403, "loss": 11.6624, "step": 31303 }, { "epoch": 0.6552792430712552, "grad_norm": 0.26638635993003845, "learning_rate": 0.0001773980589131593, "loss": 11.6732, "step": 31304 }, { "epoch": 0.6553001758352173, "grad_norm": 0.3426193296909332, "learning_rate": 0.0001773966705622977, "loss": 11.6755, "step": 31305 }, { "epoch": 0.6553211085991795, "grad_norm": 0.28424614667892456, "learning_rate": 0.00017739528217423006, "loss": 11.679, "step": 31306 }, { "epoch": 0.6553420413631416, "grad_norm": 0.25529029965400696, "learning_rate": 0.00017739389374895687, "loss": 11.6739, "step": 31307 }, { "epoch": 0.6553629741271038, "grad_norm": 0.2820260226726532, "learning_rate": 0.00017739250528647893, "loss": 11.6906, "step": 31308 }, { "epoch": 0.6553839068910658, "grad_norm": 0.31318533420562744, "learning_rate": 0.00017739111678679686, "loss": 11.6606, "step": 31309 }, { "epoch": 0.655404839655028, "grad_norm": 0.32574573159217834, "learning_rate": 0.00017738972824991132, "loss": 11.6858, "step": 31310 }, { "epoch": 0.6554257724189902, "grad_norm": 0.2954556941986084, "learning_rate": 0.00017738833967582298, "loss": 11.6702, "step": 31311 }, { "epoch": 0.6554467051829523, "grad_norm": 0.288094162940979, "learning_rate": 0.00017738695106453252, "loss": 11.6804, "step": 31312 }, { "epoch": 0.6554676379469145, "grad_norm": 0.2644132971763611, "learning_rate": 0.00017738556241604064, "loss": 11.6655, "step": 31313 }, { "epoch": 0.6554885707108766, "grad_norm": 0.28469064831733704, "learning_rate": 0.0001773841737303479, "loss": 11.6558, "step": 31314 }, { "epoch": 0.6555095034748388, "grad_norm": 0.2794581651687622, "learning_rate": 0.00017738278500745505, "loss": 11.6682, "step": 31315 }, { "epoch": 0.6555304362388009, "grad_norm": 0.31438031792640686, "learning_rate": 0.00017738139624736278, "loss": 11.6685, "step": 31316 }, { "epoch": 0.6555513690027631, "grad_norm": 0.3336620330810547, "learning_rate": 0.00017738000745007174, "loss": 11.6695, "step": 31317 }, { "epoch": 0.6555723017667253, "grad_norm": 0.28645941615104675, "learning_rate": 0.00017737861861558254, "loss": 11.6578, "step": 31318 }, { "epoch": 0.6555932345306874, "grad_norm": 0.27129197120666504, "learning_rate": 0.00017737722974389588, "loss": 11.6785, "step": 31319 }, { "epoch": 0.6556141672946496, "grad_norm": 0.26466718316078186, "learning_rate": 0.00017737584083501244, "loss": 11.667, "step": 31320 }, { "epoch": 0.6556351000586117, "grad_norm": 0.30388808250427246, "learning_rate": 0.0001773744518889329, "loss": 11.6935, "step": 31321 }, { "epoch": 0.6556560328225739, "grad_norm": 0.2714124917984009, "learning_rate": 0.0001773730629056579, "loss": 11.6688, "step": 31322 }, { "epoch": 0.6556769655865361, "grad_norm": 0.3258518576622009, "learning_rate": 0.00017737167388518814, "loss": 11.6901, "step": 31323 }, { "epoch": 0.6556978983504982, "grad_norm": 0.2886372208595276, "learning_rate": 0.00017737028482752421, "loss": 11.6759, "step": 31324 }, { "epoch": 0.6557188311144604, "grad_norm": 0.30848243832588196, "learning_rate": 0.0001773688957326669, "loss": 11.6685, "step": 31325 }, { "epoch": 0.6557397638784225, "grad_norm": 0.3156946897506714, "learning_rate": 0.00017736750660061676, "loss": 11.6661, "step": 31326 }, { "epoch": 0.6557606966423847, "grad_norm": 0.38280361890792847, "learning_rate": 0.00017736611743137456, "loss": 11.6603, "step": 31327 }, { "epoch": 0.6557816294063468, "grad_norm": 0.2695581018924713, "learning_rate": 0.00017736472822494088, "loss": 11.6864, "step": 31328 }, { "epoch": 0.655802562170309, "grad_norm": 0.3263327479362488, "learning_rate": 0.00017736333898131642, "loss": 11.683, "step": 31329 }, { "epoch": 0.6558234949342712, "grad_norm": 0.35423070192337036, "learning_rate": 0.00017736194970050187, "loss": 11.6837, "step": 31330 }, { "epoch": 0.6558444276982333, "grad_norm": 0.46667173504829407, "learning_rate": 0.0001773605603824979, "loss": 11.6782, "step": 31331 }, { "epoch": 0.6558653604621955, "grad_norm": 0.35717639327049255, "learning_rate": 0.00017735917102730512, "loss": 11.6768, "step": 31332 }, { "epoch": 0.6558862932261575, "grad_norm": 0.2992684543132782, "learning_rate": 0.00017735778163492424, "loss": 11.6807, "step": 31333 }, { "epoch": 0.6559072259901197, "grad_norm": 0.2895239293575287, "learning_rate": 0.0001773563922053559, "loss": 11.6726, "step": 31334 }, { "epoch": 0.6559281587540818, "grad_norm": 0.27747857570648193, "learning_rate": 0.00017735500273860085, "loss": 11.6766, "step": 31335 }, { "epoch": 0.655949091518044, "grad_norm": 0.2988995611667633, "learning_rate": 0.0001773536132346597, "loss": 11.6968, "step": 31336 }, { "epoch": 0.6559700242820062, "grad_norm": 0.27463194727897644, "learning_rate": 0.00017735222369353308, "loss": 11.6677, "step": 31337 }, { "epoch": 0.6559909570459683, "grad_norm": 0.3286292850971222, "learning_rate": 0.00017735083411522172, "loss": 11.6685, "step": 31338 }, { "epoch": 0.6560118898099305, "grad_norm": 0.2600667178630829, "learning_rate": 0.00017734944449972626, "loss": 11.6687, "step": 31339 }, { "epoch": 0.6560328225738926, "grad_norm": 0.2886098325252533, "learning_rate": 0.00017734805484704734, "loss": 11.6625, "step": 31340 }, { "epoch": 0.6560537553378548, "grad_norm": 0.26423200964927673, "learning_rate": 0.0001773466651571857, "loss": 11.6781, "step": 31341 }, { "epoch": 0.656074688101817, "grad_norm": 0.32131680846214294, "learning_rate": 0.00017734527543014194, "loss": 11.6794, "step": 31342 }, { "epoch": 0.6560956208657791, "grad_norm": 0.2883983254432678, "learning_rate": 0.00017734388566591677, "loss": 11.6822, "step": 31343 }, { "epoch": 0.6561165536297413, "grad_norm": 0.27762001752853394, "learning_rate": 0.00017734249586451087, "loss": 11.6737, "step": 31344 }, { "epoch": 0.6561374863937034, "grad_norm": 0.2814871072769165, "learning_rate": 0.00017734110602592482, "loss": 11.6787, "step": 31345 }, { "epoch": 0.6561584191576656, "grad_norm": 0.29994305968284607, "learning_rate": 0.0001773397161501594, "loss": 11.6741, "step": 31346 }, { "epoch": 0.6561793519216277, "grad_norm": 0.22424396872520447, "learning_rate": 0.0001773383262372152, "loss": 11.6661, "step": 31347 }, { "epoch": 0.6562002846855899, "grad_norm": 0.3848758637905121, "learning_rate": 0.00017733693628709297, "loss": 11.6845, "step": 31348 }, { "epoch": 0.6562212174495521, "grad_norm": 0.26525747776031494, "learning_rate": 0.00017733554629979325, "loss": 11.6598, "step": 31349 }, { "epoch": 0.6562421502135142, "grad_norm": 0.28242823481559753, "learning_rate": 0.00017733415627531684, "loss": 11.6908, "step": 31350 }, { "epoch": 0.6562630829774764, "grad_norm": 0.3222692906856537, "learning_rate": 0.00017733276621366434, "loss": 11.6723, "step": 31351 }, { "epoch": 0.6562840157414385, "grad_norm": 0.2997771203517914, "learning_rate": 0.00017733137611483638, "loss": 11.6638, "step": 31352 }, { "epoch": 0.6563049485054007, "grad_norm": 0.34337007999420166, "learning_rate": 0.00017732998597883376, "loss": 11.668, "step": 31353 }, { "epoch": 0.6563258812693628, "grad_norm": 0.3529263734817505, "learning_rate": 0.00017732859580565702, "loss": 11.6756, "step": 31354 }, { "epoch": 0.656346814033325, "grad_norm": 0.28670722246170044, "learning_rate": 0.00017732720559530686, "loss": 11.6758, "step": 31355 }, { "epoch": 0.6563677467972872, "grad_norm": 0.23977822065353394, "learning_rate": 0.00017732581534778399, "loss": 11.6725, "step": 31356 }, { "epoch": 0.6563886795612492, "grad_norm": 0.31774282455444336, "learning_rate": 0.00017732442506308903, "loss": 11.6834, "step": 31357 }, { "epoch": 0.6564096123252114, "grad_norm": 0.29535889625549316, "learning_rate": 0.00017732303474122269, "loss": 11.6714, "step": 31358 }, { "epoch": 0.6564305450891735, "grad_norm": 0.3078828752040863, "learning_rate": 0.00017732164438218558, "loss": 11.6621, "step": 31359 }, { "epoch": 0.6564514778531357, "grad_norm": 0.2449594885110855, "learning_rate": 0.00017732025398597848, "loss": 11.659, "step": 31360 }, { "epoch": 0.6564724106170979, "grad_norm": 0.2725467383861542, "learning_rate": 0.00017731886355260192, "loss": 11.684, "step": 31361 }, { "epoch": 0.65649334338106, "grad_norm": 0.2677183747291565, "learning_rate": 0.00017731747308205665, "loss": 11.6602, "step": 31362 }, { "epoch": 0.6565142761450222, "grad_norm": 0.32630521059036255, "learning_rate": 0.0001773160825743433, "loss": 11.6616, "step": 31363 }, { "epoch": 0.6565352089089843, "grad_norm": 0.29241088032722473, "learning_rate": 0.0001773146920294626, "loss": 11.6666, "step": 31364 }, { "epoch": 0.6565561416729465, "grad_norm": 0.23834727704524994, "learning_rate": 0.0001773133014474152, "loss": 11.6579, "step": 31365 }, { "epoch": 0.6565770744369086, "grad_norm": 0.29359936714172363, "learning_rate": 0.00017731191082820166, "loss": 11.6801, "step": 31366 }, { "epoch": 0.6565980072008708, "grad_norm": 0.2852347493171692, "learning_rate": 0.0001773105201718228, "loss": 11.6771, "step": 31367 }, { "epoch": 0.656618939964833, "grad_norm": 0.33565905690193176, "learning_rate": 0.0001773091294782792, "loss": 11.6677, "step": 31368 }, { "epoch": 0.6566398727287951, "grad_norm": 0.32203400135040283, "learning_rate": 0.00017730773874757155, "loss": 11.6816, "step": 31369 }, { "epoch": 0.6566608054927573, "grad_norm": 0.2516816258430481, "learning_rate": 0.00017730634797970054, "loss": 11.6747, "step": 31370 }, { "epoch": 0.6566817382567194, "grad_norm": 0.2506129741668701, "learning_rate": 0.00017730495717466682, "loss": 11.6769, "step": 31371 }, { "epoch": 0.6567026710206816, "grad_norm": 0.31220847368240356, "learning_rate": 0.00017730356633247104, "loss": 11.6691, "step": 31372 }, { "epoch": 0.6567236037846437, "grad_norm": 0.2745507061481476, "learning_rate": 0.0001773021754531139, "loss": 11.6607, "step": 31373 }, { "epoch": 0.6567445365486059, "grad_norm": 0.3196314871311188, "learning_rate": 0.00017730078453659603, "loss": 11.6781, "step": 31374 }, { "epoch": 0.6567654693125681, "grad_norm": 0.28294894099235535, "learning_rate": 0.00017729939358291814, "loss": 11.6751, "step": 31375 }, { "epoch": 0.6567864020765302, "grad_norm": 0.2555237412452698, "learning_rate": 0.0001772980025920809, "loss": 11.6611, "step": 31376 }, { "epoch": 0.6568073348404924, "grad_norm": 0.33078786730766296, "learning_rate": 0.00017729661156408495, "loss": 11.672, "step": 31377 }, { "epoch": 0.6568282676044545, "grad_norm": 0.2696475088596344, "learning_rate": 0.00017729522049893094, "loss": 11.663, "step": 31378 }, { "epoch": 0.6568492003684167, "grad_norm": 0.31404557824134827, "learning_rate": 0.0001772938293966196, "loss": 11.681, "step": 31379 }, { "epoch": 0.6568701331323789, "grad_norm": 0.3518030643463135, "learning_rate": 0.00017729243825715155, "loss": 11.6792, "step": 31380 }, { "epoch": 0.6568910658963409, "grad_norm": 0.35215747356414795, "learning_rate": 0.0001772910470805275, "loss": 11.6746, "step": 31381 }, { "epoch": 0.6569119986603031, "grad_norm": 0.33434033393859863, "learning_rate": 0.0001772896558667481, "loss": 11.6731, "step": 31382 }, { "epoch": 0.6569329314242652, "grad_norm": 0.2983587980270386, "learning_rate": 0.000177288264615814, "loss": 11.675, "step": 31383 }, { "epoch": 0.6569538641882274, "grad_norm": 0.3261393904685974, "learning_rate": 0.00017728687332772583, "loss": 11.6548, "step": 31384 }, { "epoch": 0.6569747969521895, "grad_norm": 0.376638263463974, "learning_rate": 0.00017728548200248437, "loss": 11.6731, "step": 31385 }, { "epoch": 0.6569957297161517, "grad_norm": 0.3241049647331238, "learning_rate": 0.00017728409064009023, "loss": 11.6845, "step": 31386 }, { "epoch": 0.6570166624801139, "grad_norm": 0.2923661768436432, "learning_rate": 0.0001772826992405441, "loss": 11.6713, "step": 31387 }, { "epoch": 0.657037595244076, "grad_norm": 0.3179696202278137, "learning_rate": 0.00017728130780384656, "loss": 11.6733, "step": 31388 }, { "epoch": 0.6570585280080382, "grad_norm": 0.8186684846878052, "learning_rate": 0.0001772799163299984, "loss": 11.6933, "step": 31389 }, { "epoch": 0.6570794607720003, "grad_norm": 0.2755071520805359, "learning_rate": 0.0001772785248190002, "loss": 11.6603, "step": 31390 }, { "epoch": 0.6571003935359625, "grad_norm": 0.2784750461578369, "learning_rate": 0.0001772771332708527, "loss": 11.6942, "step": 31391 }, { "epoch": 0.6571213262999246, "grad_norm": 0.29868945479393005, "learning_rate": 0.00017727574168555652, "loss": 11.6498, "step": 31392 }, { "epoch": 0.6571422590638868, "grad_norm": 0.25794392824172974, "learning_rate": 0.00017727435006311234, "loss": 11.672, "step": 31393 }, { "epoch": 0.657163191827849, "grad_norm": 0.3607596755027771, "learning_rate": 0.00017727295840352082, "loss": 11.6763, "step": 31394 }, { "epoch": 0.6571841245918111, "grad_norm": 0.3299385607242584, "learning_rate": 0.00017727156670678263, "loss": 11.6718, "step": 31395 }, { "epoch": 0.6572050573557733, "grad_norm": 0.2578217089176178, "learning_rate": 0.0001772701749728985, "loss": 11.6736, "step": 31396 }, { "epoch": 0.6572259901197354, "grad_norm": 0.4198174774646759, "learning_rate": 0.00017726878320186898, "loss": 11.6659, "step": 31397 }, { "epoch": 0.6572469228836976, "grad_norm": 0.4149119257926941, "learning_rate": 0.00017726739139369485, "loss": 11.6684, "step": 31398 }, { "epoch": 0.6572678556476598, "grad_norm": 0.25895529985427856, "learning_rate": 0.00017726599954837676, "loss": 11.6651, "step": 31399 }, { "epoch": 0.6572887884116219, "grad_norm": 0.3232311010360718, "learning_rate": 0.00017726460766591528, "loss": 11.6787, "step": 31400 }, { "epoch": 0.6573097211755841, "grad_norm": 0.2856783866882324, "learning_rate": 0.0001772632157463112, "loss": 11.6738, "step": 31401 }, { "epoch": 0.6573306539395462, "grad_norm": 0.46538886427879333, "learning_rate": 0.00017726182378956516, "loss": 11.6757, "step": 31402 }, { "epoch": 0.6573515867035084, "grad_norm": 0.3423500061035156, "learning_rate": 0.00017726043179567782, "loss": 11.6809, "step": 31403 }, { "epoch": 0.6573725194674704, "grad_norm": 0.3097308874130249, "learning_rate": 0.0001772590397646498, "loss": 11.6917, "step": 31404 }, { "epoch": 0.6573934522314326, "grad_norm": 0.3275197446346283, "learning_rate": 0.00017725764769648184, "loss": 11.6709, "step": 31405 }, { "epoch": 0.6574143849953948, "grad_norm": 0.2683880627155304, "learning_rate": 0.0001772562555911746, "loss": 11.6692, "step": 31406 }, { "epoch": 0.6574353177593569, "grad_norm": 0.24643181264400482, "learning_rate": 0.00017725486344872868, "loss": 11.6619, "step": 31407 }, { "epoch": 0.6574562505233191, "grad_norm": 0.25824716687202454, "learning_rate": 0.0001772534712691448, "loss": 11.6798, "step": 31408 }, { "epoch": 0.6574771832872812, "grad_norm": 0.29568517208099365, "learning_rate": 0.00017725207905242365, "loss": 11.6649, "step": 31409 }, { "epoch": 0.6574981160512434, "grad_norm": 0.29000869393348694, "learning_rate": 0.00017725068679856588, "loss": 11.6757, "step": 31410 }, { "epoch": 0.6575190488152055, "grad_norm": 0.3433207869529724, "learning_rate": 0.00017724929450757215, "loss": 11.6821, "step": 31411 }, { "epoch": 0.6575399815791677, "grad_norm": 0.2867434620857239, "learning_rate": 0.00017724790217944314, "loss": 11.6533, "step": 31412 }, { "epoch": 0.6575609143431299, "grad_norm": 0.27995094656944275, "learning_rate": 0.00017724650981417952, "loss": 11.6777, "step": 31413 }, { "epoch": 0.657581847107092, "grad_norm": 0.2748352587223053, "learning_rate": 0.00017724511741178196, "loss": 11.6739, "step": 31414 }, { "epoch": 0.6576027798710542, "grad_norm": 0.3626973032951355, "learning_rate": 0.0001772437249722511, "loss": 11.679, "step": 31415 }, { "epoch": 0.6576237126350163, "grad_norm": 0.22551016509532928, "learning_rate": 0.00017724233249558763, "loss": 11.6628, "step": 31416 }, { "epoch": 0.6576446453989785, "grad_norm": 0.30871158838272095, "learning_rate": 0.00017724093998179225, "loss": 11.6771, "step": 31417 }, { "epoch": 0.6576655781629407, "grad_norm": 0.3357895612716675, "learning_rate": 0.0001772395474308656, "loss": 11.6673, "step": 31418 }, { "epoch": 0.6576865109269028, "grad_norm": 0.4284578859806061, "learning_rate": 0.00017723815484280833, "loss": 11.6756, "step": 31419 }, { "epoch": 0.657707443690865, "grad_norm": 0.2986655831336975, "learning_rate": 0.00017723676221762115, "loss": 11.6585, "step": 31420 }, { "epoch": 0.6577283764548271, "grad_norm": 0.26250243186950684, "learning_rate": 0.0001772353695553047, "loss": 11.6839, "step": 31421 }, { "epoch": 0.6577493092187893, "grad_norm": 0.34922316670417786, "learning_rate": 0.00017723397685585969, "loss": 11.6589, "step": 31422 }, { "epoch": 0.6577702419827514, "grad_norm": 0.3726200759410858, "learning_rate": 0.00017723258411928673, "loss": 11.6599, "step": 31423 }, { "epoch": 0.6577911747467136, "grad_norm": 0.2881263494491577, "learning_rate": 0.0001772311913455865, "loss": 11.681, "step": 31424 }, { "epoch": 0.6578121075106758, "grad_norm": 0.3522588610649109, "learning_rate": 0.0001772297985347597, "loss": 11.6672, "step": 31425 }, { "epoch": 0.6578330402746378, "grad_norm": 0.30571964383125305, "learning_rate": 0.00017722840568680704, "loss": 11.6624, "step": 31426 }, { "epoch": 0.6578539730386, "grad_norm": 0.39284324645996094, "learning_rate": 0.0001772270128017291, "loss": 11.6725, "step": 31427 }, { "epoch": 0.6578749058025621, "grad_norm": 0.3192913234233856, "learning_rate": 0.00017722561987952657, "loss": 11.671, "step": 31428 }, { "epoch": 0.6578958385665243, "grad_norm": 0.3090856969356537, "learning_rate": 0.00017722422692020016, "loss": 11.6559, "step": 31429 }, { "epoch": 0.6579167713304864, "grad_norm": 0.28123757243156433, "learning_rate": 0.0001772228339237505, "loss": 11.6501, "step": 31430 }, { "epoch": 0.6579377040944486, "grad_norm": 0.3045288324356079, "learning_rate": 0.00017722144089017826, "loss": 11.6657, "step": 31431 }, { "epoch": 0.6579586368584108, "grad_norm": 0.3639974892139435, "learning_rate": 0.00017722004781948417, "loss": 11.6703, "step": 31432 }, { "epoch": 0.6579795696223729, "grad_norm": 0.2775845229625702, "learning_rate": 0.00017721865471166884, "loss": 11.6585, "step": 31433 }, { "epoch": 0.6580005023863351, "grad_norm": 0.3558528423309326, "learning_rate": 0.00017721726156673295, "loss": 11.666, "step": 31434 }, { "epoch": 0.6580214351502972, "grad_norm": 0.29961061477661133, "learning_rate": 0.0001772158683846772, "loss": 11.6805, "step": 31435 }, { "epoch": 0.6580423679142594, "grad_norm": 0.34878015518188477, "learning_rate": 0.0001772144751655022, "loss": 11.6981, "step": 31436 }, { "epoch": 0.6580633006782216, "grad_norm": 0.4055207669734955, "learning_rate": 0.00017721308190920865, "loss": 11.6908, "step": 31437 }, { "epoch": 0.6580842334421837, "grad_norm": 0.3245391547679901, "learning_rate": 0.00017721168861579723, "loss": 11.6505, "step": 31438 }, { "epoch": 0.6581051662061459, "grad_norm": 0.2911953330039978, "learning_rate": 0.00017721029528526861, "loss": 11.6617, "step": 31439 }, { "epoch": 0.658126098970108, "grad_norm": 0.3858727514743805, "learning_rate": 0.00017720890191762347, "loss": 11.6834, "step": 31440 }, { "epoch": 0.6581470317340702, "grad_norm": 0.4616403579711914, "learning_rate": 0.00017720750851286243, "loss": 11.685, "step": 31441 }, { "epoch": 0.6581679644980323, "grad_norm": 0.3152145743370056, "learning_rate": 0.00017720611507098622, "loss": 11.666, "step": 31442 }, { "epoch": 0.6581888972619945, "grad_norm": 0.25431960821151733, "learning_rate": 0.0001772047215919955, "loss": 11.6588, "step": 31443 }, { "epoch": 0.6582098300259567, "grad_norm": 0.35839006304740906, "learning_rate": 0.00017720332807589088, "loss": 11.6682, "step": 31444 }, { "epoch": 0.6582307627899188, "grad_norm": 0.2825266420841217, "learning_rate": 0.0001772019345226731, "loss": 11.6619, "step": 31445 }, { "epoch": 0.658251695553881, "grad_norm": 0.5129444599151611, "learning_rate": 0.0001772005409323428, "loss": 11.6586, "step": 31446 }, { "epoch": 0.6582726283178431, "grad_norm": 0.2663637101650238, "learning_rate": 0.00017719914730490065, "loss": 11.6657, "step": 31447 }, { "epoch": 0.6582935610818053, "grad_norm": 0.3063773512840271, "learning_rate": 0.00017719775364034732, "loss": 11.6718, "step": 31448 }, { "epoch": 0.6583144938457673, "grad_norm": 0.2931673228740692, "learning_rate": 0.00017719635993868348, "loss": 11.6688, "step": 31449 }, { "epoch": 0.6583354266097295, "grad_norm": 0.2609270215034485, "learning_rate": 0.0001771949661999098, "loss": 11.6662, "step": 31450 }, { "epoch": 0.6583563593736917, "grad_norm": 0.2650314271450043, "learning_rate": 0.00017719357242402696, "loss": 11.6742, "step": 31451 }, { "epoch": 0.6583772921376538, "grad_norm": 0.2581329345703125, "learning_rate": 0.00017719217861103564, "loss": 11.6503, "step": 31452 }, { "epoch": 0.658398224901616, "grad_norm": 0.2943252623081207, "learning_rate": 0.00017719078476093647, "loss": 11.6557, "step": 31453 }, { "epoch": 0.6584191576655781, "grad_norm": 0.26751014590263367, "learning_rate": 0.00017718939087373015, "loss": 11.6794, "step": 31454 }, { "epoch": 0.6584400904295403, "grad_norm": 0.2782491445541382, "learning_rate": 0.00017718799694941732, "loss": 11.6603, "step": 31455 }, { "epoch": 0.6584610231935024, "grad_norm": 0.28530290722846985, "learning_rate": 0.00017718660298799872, "loss": 11.6755, "step": 31456 }, { "epoch": 0.6584819559574646, "grad_norm": 0.34276339411735535, "learning_rate": 0.00017718520898947496, "loss": 11.6714, "step": 31457 }, { "epoch": 0.6585028887214268, "grad_norm": 0.24011221528053284, "learning_rate": 0.00017718381495384668, "loss": 11.6747, "step": 31458 }, { "epoch": 0.6585238214853889, "grad_norm": 0.2696937620639801, "learning_rate": 0.00017718242088111464, "loss": 11.6672, "step": 31459 }, { "epoch": 0.6585447542493511, "grad_norm": 0.26432493329048157, "learning_rate": 0.00017718102677127944, "loss": 11.6729, "step": 31460 }, { "epoch": 0.6585656870133132, "grad_norm": 0.30084118247032166, "learning_rate": 0.0001771796326243418, "loss": 11.65, "step": 31461 }, { "epoch": 0.6585866197772754, "grad_norm": 0.28333017230033875, "learning_rate": 0.00017717823844030234, "loss": 11.6618, "step": 31462 }, { "epoch": 0.6586075525412376, "grad_norm": 0.2528274357318878, "learning_rate": 0.00017717684421916175, "loss": 11.6504, "step": 31463 }, { "epoch": 0.6586284853051997, "grad_norm": 0.27541202306747437, "learning_rate": 0.0001771754499609207, "loss": 11.674, "step": 31464 }, { "epoch": 0.6586494180691619, "grad_norm": 0.30869200825691223, "learning_rate": 0.0001771740556655799, "loss": 11.6717, "step": 31465 }, { "epoch": 0.658670350833124, "grad_norm": 0.3407609760761261, "learning_rate": 0.00017717266133313993, "loss": 11.6709, "step": 31466 }, { "epoch": 0.6586912835970862, "grad_norm": 0.24579420685768127, "learning_rate": 0.00017717126696360156, "loss": 11.6588, "step": 31467 }, { "epoch": 0.6587122163610483, "grad_norm": 0.28462108969688416, "learning_rate": 0.0001771698725569654, "loss": 11.6696, "step": 31468 }, { "epoch": 0.6587331491250105, "grad_norm": 0.2761040925979614, "learning_rate": 0.00017716847811323217, "loss": 11.68, "step": 31469 }, { "epoch": 0.6587540818889727, "grad_norm": 0.2678118944168091, "learning_rate": 0.00017716708363240246, "loss": 11.6784, "step": 31470 }, { "epoch": 0.6587750146529348, "grad_norm": 0.3203425407409668, "learning_rate": 0.00017716568911447698, "loss": 11.6726, "step": 31471 }, { "epoch": 0.658795947416897, "grad_norm": 0.31059375405311584, "learning_rate": 0.00017716429455945644, "loss": 11.6642, "step": 31472 }, { "epoch": 0.658816880180859, "grad_norm": 0.30059149861335754, "learning_rate": 0.00017716289996734145, "loss": 11.6652, "step": 31473 }, { "epoch": 0.6588378129448212, "grad_norm": 0.24213968217372894, "learning_rate": 0.00017716150533813273, "loss": 11.6773, "step": 31474 }, { "epoch": 0.6588587457087833, "grad_norm": 0.2474302053451538, "learning_rate": 0.00017716011067183093, "loss": 11.6659, "step": 31475 }, { "epoch": 0.6588796784727455, "grad_norm": 0.2540321946144104, "learning_rate": 0.0001771587159684367, "loss": 11.6524, "step": 31476 }, { "epoch": 0.6589006112367077, "grad_norm": 0.26789405941963196, "learning_rate": 0.00017715732122795076, "loss": 11.6776, "step": 31477 }, { "epoch": 0.6589215440006698, "grad_norm": 0.2538856565952301, "learning_rate": 0.0001771559264503737, "loss": 11.6709, "step": 31478 }, { "epoch": 0.658942476764632, "grad_norm": 0.3069693446159363, "learning_rate": 0.0001771545316357063, "loss": 11.659, "step": 31479 }, { "epoch": 0.6589634095285941, "grad_norm": 0.20970213413238525, "learning_rate": 0.00017715313678394915, "loss": 11.6743, "step": 31480 }, { "epoch": 0.6589843422925563, "grad_norm": 0.23595356941223145, "learning_rate": 0.0001771517418951029, "loss": 11.6702, "step": 31481 }, { "epoch": 0.6590052750565185, "grad_norm": 0.5104469060897827, "learning_rate": 0.0001771503469691683, "loss": 11.675, "step": 31482 }, { "epoch": 0.6590262078204806, "grad_norm": 0.3092263340950012, "learning_rate": 0.00017714895200614597, "loss": 11.6607, "step": 31483 }, { "epoch": 0.6590471405844428, "grad_norm": 0.2998514473438263, "learning_rate": 0.00017714755700603657, "loss": 11.6809, "step": 31484 }, { "epoch": 0.6590680733484049, "grad_norm": 0.27635765075683594, "learning_rate": 0.00017714616196884083, "loss": 11.6757, "step": 31485 }, { "epoch": 0.6590890061123671, "grad_norm": 0.3138565421104431, "learning_rate": 0.0001771447668945594, "loss": 11.6717, "step": 31486 }, { "epoch": 0.6591099388763292, "grad_norm": 0.28861868381500244, "learning_rate": 0.0001771433717831929, "loss": 11.6839, "step": 31487 }, { "epoch": 0.6591308716402914, "grad_norm": 0.2792401909828186, "learning_rate": 0.00017714197663474204, "loss": 11.6899, "step": 31488 }, { "epoch": 0.6591518044042536, "grad_norm": 0.2902248799800873, "learning_rate": 0.0001771405814492075, "loss": 11.6686, "step": 31489 }, { "epoch": 0.6591727371682157, "grad_norm": 0.3041427433490753, "learning_rate": 0.0001771391862265899, "loss": 11.6684, "step": 31490 }, { "epoch": 0.6591936699321779, "grad_norm": 0.22916290163993835, "learning_rate": 0.00017713779096688998, "loss": 11.6627, "step": 31491 }, { "epoch": 0.65921460269614, "grad_norm": 0.28247174620628357, "learning_rate": 0.00017713639567010838, "loss": 11.6761, "step": 31492 }, { "epoch": 0.6592355354601022, "grad_norm": 0.22212420403957367, "learning_rate": 0.00017713500033624574, "loss": 11.6791, "step": 31493 }, { "epoch": 0.6592564682240643, "grad_norm": 0.2583461105823517, "learning_rate": 0.0001771336049653028, "loss": 11.6639, "step": 31494 }, { "epoch": 0.6592774009880265, "grad_norm": 0.30162110924720764, "learning_rate": 0.00017713220955728015, "loss": 11.6785, "step": 31495 }, { "epoch": 0.6592983337519887, "grad_norm": 0.4009194076061249, "learning_rate": 0.00017713081411217853, "loss": 11.6707, "step": 31496 }, { "epoch": 0.6593192665159507, "grad_norm": 0.2919042706489563, "learning_rate": 0.00017712941862999858, "loss": 11.6625, "step": 31497 }, { "epoch": 0.6593401992799129, "grad_norm": 0.24345165491104126, "learning_rate": 0.00017712802311074097, "loss": 11.6733, "step": 31498 }, { "epoch": 0.659361132043875, "grad_norm": 0.23074838519096375, "learning_rate": 0.00017712662755440633, "loss": 11.681, "step": 31499 }, { "epoch": 0.6593820648078372, "grad_norm": 0.3172370493412018, "learning_rate": 0.00017712523196099545, "loss": 11.685, "step": 31500 }, { "epoch": 0.6594029975717994, "grad_norm": 0.30692610144615173, "learning_rate": 0.00017712383633050887, "loss": 11.6677, "step": 31501 }, { "epoch": 0.6594239303357615, "grad_norm": 0.45169755816459656, "learning_rate": 0.00017712244066294734, "loss": 11.6929, "step": 31502 }, { "epoch": 0.6594448630997237, "grad_norm": 0.359608918428421, "learning_rate": 0.00017712104495831152, "loss": 11.6627, "step": 31503 }, { "epoch": 0.6594657958636858, "grad_norm": 0.2980656623840332, "learning_rate": 0.00017711964921660202, "loss": 11.6776, "step": 31504 }, { "epoch": 0.659486728627648, "grad_norm": 0.24483704566955566, "learning_rate": 0.0001771182534378196, "loss": 11.6618, "step": 31505 }, { "epoch": 0.6595076613916101, "grad_norm": 0.2966671288013458, "learning_rate": 0.00017711685762196488, "loss": 11.6659, "step": 31506 }, { "epoch": 0.6595285941555723, "grad_norm": 0.26172715425491333, "learning_rate": 0.00017711546176903855, "loss": 11.6559, "step": 31507 }, { "epoch": 0.6595495269195345, "grad_norm": 0.2891857922077179, "learning_rate": 0.00017711406587904123, "loss": 11.675, "step": 31508 }, { "epoch": 0.6595704596834966, "grad_norm": 0.22655555605888367, "learning_rate": 0.00017711266995197372, "loss": 11.6585, "step": 31509 }, { "epoch": 0.6595913924474588, "grad_norm": 0.3314477801322937, "learning_rate": 0.00017711127398783655, "loss": 11.6744, "step": 31510 }, { "epoch": 0.6596123252114209, "grad_norm": 0.2650291919708252, "learning_rate": 0.00017710987798663042, "loss": 11.6531, "step": 31511 }, { "epoch": 0.6596332579753831, "grad_norm": 0.3855729401111603, "learning_rate": 0.00017710848194835605, "loss": 11.6721, "step": 31512 }, { "epoch": 0.6596541907393452, "grad_norm": 0.3296719491481781, "learning_rate": 0.00017710708587301407, "loss": 11.6733, "step": 31513 }, { "epoch": 0.6596751235033074, "grad_norm": 0.28593704104423523, "learning_rate": 0.0001771056897606052, "loss": 11.6586, "step": 31514 }, { "epoch": 0.6596960562672696, "grad_norm": 0.32399982213974, "learning_rate": 0.00017710429361113007, "loss": 11.6656, "step": 31515 }, { "epoch": 0.6597169890312317, "grad_norm": 0.30793023109436035, "learning_rate": 0.00017710289742458937, "loss": 11.6677, "step": 31516 }, { "epoch": 0.6597379217951939, "grad_norm": 0.23625242710113525, "learning_rate": 0.00017710150120098375, "loss": 11.6454, "step": 31517 }, { "epoch": 0.659758854559156, "grad_norm": 0.311963826417923, "learning_rate": 0.0001771001049403139, "loss": 11.6715, "step": 31518 }, { "epoch": 0.6597797873231181, "grad_norm": 0.26618722081184387, "learning_rate": 0.00017709870864258048, "loss": 11.663, "step": 31519 }, { "epoch": 0.6598007200870804, "grad_norm": 0.28766778111457825, "learning_rate": 0.00017709731230778416, "loss": 11.6627, "step": 31520 }, { "epoch": 0.6598216528510424, "grad_norm": 0.3003709614276886, "learning_rate": 0.0001770959159359256, "loss": 11.676, "step": 31521 }, { "epoch": 0.6598425856150046, "grad_norm": 0.30108803510665894, "learning_rate": 0.00017709451952700552, "loss": 11.6785, "step": 31522 }, { "epoch": 0.6598635183789667, "grad_norm": 0.2730359137058258, "learning_rate": 0.00017709312308102456, "loss": 11.6783, "step": 31523 }, { "epoch": 0.6598844511429289, "grad_norm": 0.3121902346611023, "learning_rate": 0.00017709172659798338, "loss": 11.6619, "step": 31524 }, { "epoch": 0.659905383906891, "grad_norm": 0.2650384306907654, "learning_rate": 0.00017709033007788264, "loss": 11.6678, "step": 31525 }, { "epoch": 0.6599263166708532, "grad_norm": 0.32382461428642273, "learning_rate": 0.00017708893352072304, "loss": 11.6714, "step": 31526 }, { "epoch": 0.6599472494348154, "grad_norm": 0.3570510447025299, "learning_rate": 0.00017708753692650527, "loss": 11.6773, "step": 31527 }, { "epoch": 0.6599681821987775, "grad_norm": 0.2930740416049957, "learning_rate": 0.00017708614029522996, "loss": 11.6676, "step": 31528 }, { "epoch": 0.6599891149627397, "grad_norm": 0.26644617319107056, "learning_rate": 0.0001770847436268978, "loss": 11.6626, "step": 31529 }, { "epoch": 0.6600100477267018, "grad_norm": 0.340254008769989, "learning_rate": 0.0001770833469215095, "loss": 11.6717, "step": 31530 }, { "epoch": 0.660030980490664, "grad_norm": 0.2807447016239166, "learning_rate": 0.00017708195017906563, "loss": 11.6552, "step": 31531 }, { "epoch": 0.6600519132546261, "grad_norm": 0.3068079352378845, "learning_rate": 0.00017708055339956693, "loss": 11.672, "step": 31532 }, { "epoch": 0.6600728460185883, "grad_norm": 0.3102114796638489, "learning_rate": 0.0001770791565830141, "loss": 11.6758, "step": 31533 }, { "epoch": 0.6600937787825505, "grad_norm": 0.28862762451171875, "learning_rate": 0.00017707775972940773, "loss": 11.6705, "step": 31534 }, { "epoch": 0.6601147115465126, "grad_norm": 0.3335328996181488, "learning_rate": 0.00017707636283874857, "loss": 11.6727, "step": 31535 }, { "epoch": 0.6601356443104748, "grad_norm": 0.23263180255889893, "learning_rate": 0.00017707496591103726, "loss": 11.6645, "step": 31536 }, { "epoch": 0.6601565770744369, "grad_norm": 0.2943609058856964, "learning_rate": 0.00017707356894627446, "loss": 11.6647, "step": 31537 }, { "epoch": 0.6601775098383991, "grad_norm": 0.23051534593105316, "learning_rate": 0.00017707217194446083, "loss": 11.6857, "step": 31538 }, { "epoch": 0.6601984426023613, "grad_norm": 0.3161700665950775, "learning_rate": 0.00017707077490559708, "loss": 11.6733, "step": 31539 }, { "epoch": 0.6602193753663234, "grad_norm": 0.3174326717853546, "learning_rate": 0.00017706937782968388, "loss": 11.6805, "step": 31540 }, { "epoch": 0.6602403081302856, "grad_norm": 0.26009485125541687, "learning_rate": 0.00017706798071672185, "loss": 11.6525, "step": 31541 }, { "epoch": 0.6602612408942476, "grad_norm": 0.2717907130718231, "learning_rate": 0.00017706658356671172, "loss": 11.6709, "step": 31542 }, { "epoch": 0.6602821736582098, "grad_norm": 0.2652302086353302, "learning_rate": 0.00017706518637965415, "loss": 11.6575, "step": 31543 }, { "epoch": 0.6603031064221719, "grad_norm": 0.26745766401290894, "learning_rate": 0.00017706378915554978, "loss": 11.6764, "step": 31544 }, { "epoch": 0.6603240391861341, "grad_norm": 0.29693102836608887, "learning_rate": 0.0001770623918943993, "loss": 11.6589, "step": 31545 }, { "epoch": 0.6603449719500963, "grad_norm": 0.2566649317741394, "learning_rate": 0.0001770609945962034, "loss": 11.6907, "step": 31546 }, { "epoch": 0.6603659047140584, "grad_norm": 0.28836989402770996, "learning_rate": 0.00017705959726096272, "loss": 11.6676, "step": 31547 }, { "epoch": 0.6603868374780206, "grad_norm": 0.4272706210613251, "learning_rate": 0.00017705819988867797, "loss": 11.6857, "step": 31548 }, { "epoch": 0.6604077702419827, "grad_norm": 0.3391473591327667, "learning_rate": 0.00017705680247934974, "loss": 11.6591, "step": 31549 }, { "epoch": 0.6604287030059449, "grad_norm": 0.28133586049079895, "learning_rate": 0.00017705540503297883, "loss": 11.6663, "step": 31550 }, { "epoch": 0.660449635769907, "grad_norm": 0.3581923544406891, "learning_rate": 0.00017705400754956582, "loss": 11.6835, "step": 31551 }, { "epoch": 0.6604705685338692, "grad_norm": 0.23724734783172607, "learning_rate": 0.0001770526100291114, "loss": 11.6815, "step": 31552 }, { "epoch": 0.6604915012978314, "grad_norm": 0.2580319046974182, "learning_rate": 0.00017705121247161624, "loss": 11.6732, "step": 31553 }, { "epoch": 0.6605124340617935, "grad_norm": 0.22918736934661865, "learning_rate": 0.00017704981487708102, "loss": 11.6747, "step": 31554 }, { "epoch": 0.6605333668257557, "grad_norm": 0.30600592494010925, "learning_rate": 0.00017704841724550642, "loss": 11.6696, "step": 31555 }, { "epoch": 0.6605542995897178, "grad_norm": 0.2968083322048187, "learning_rate": 0.00017704701957689309, "loss": 11.6811, "step": 31556 }, { "epoch": 0.66057523235368, "grad_norm": 0.31767645478248596, "learning_rate": 0.00017704562187124173, "loss": 11.679, "step": 31557 }, { "epoch": 0.6605961651176422, "grad_norm": 0.3181050717830658, "learning_rate": 0.000177044224128553, "loss": 11.6661, "step": 31558 }, { "epoch": 0.6606170978816043, "grad_norm": 0.3310307562351227, "learning_rate": 0.00017704282634882753, "loss": 11.6656, "step": 31559 }, { "epoch": 0.6606380306455665, "grad_norm": 0.25318652391433716, "learning_rate": 0.00017704142853206608, "loss": 11.664, "step": 31560 }, { "epoch": 0.6606589634095286, "grad_norm": 0.3087090253829956, "learning_rate": 0.00017704003067826922, "loss": 11.6617, "step": 31561 }, { "epoch": 0.6606798961734908, "grad_norm": 0.23559121787548065, "learning_rate": 0.00017703863278743768, "loss": 11.6642, "step": 31562 }, { "epoch": 0.6607008289374529, "grad_norm": 0.3393458127975464, "learning_rate": 0.00017703723485957217, "loss": 11.6555, "step": 31563 }, { "epoch": 0.6607217617014151, "grad_norm": 0.29756593704223633, "learning_rate": 0.00017703583689467327, "loss": 11.6535, "step": 31564 }, { "epoch": 0.6607426944653773, "grad_norm": 1.190203309059143, "learning_rate": 0.00017703443889274172, "loss": 11.633, "step": 31565 }, { "epoch": 0.6607636272293393, "grad_norm": 0.3329566419124603, "learning_rate": 0.00017703304085377818, "loss": 11.6696, "step": 31566 }, { "epoch": 0.6607845599933015, "grad_norm": 0.34319815039634705, "learning_rate": 0.0001770316427777833, "loss": 11.6803, "step": 31567 }, { "epoch": 0.6608054927572636, "grad_norm": 0.3348607122898102, "learning_rate": 0.00017703024466475778, "loss": 11.6719, "step": 31568 }, { "epoch": 0.6608264255212258, "grad_norm": 0.30295664072036743, "learning_rate": 0.00017702884651470227, "loss": 11.6705, "step": 31569 }, { "epoch": 0.6608473582851879, "grad_norm": 0.3229999244213104, "learning_rate": 0.00017702744832761745, "loss": 11.6623, "step": 31570 }, { "epoch": 0.6608682910491501, "grad_norm": 0.2712613046169281, "learning_rate": 0.00017702605010350397, "loss": 11.6667, "step": 31571 }, { "epoch": 0.6608892238131123, "grad_norm": 0.3129662871360779, "learning_rate": 0.00017702465184236258, "loss": 11.6668, "step": 31572 }, { "epoch": 0.6609101565770744, "grad_norm": 0.29577887058258057, "learning_rate": 0.00017702325354419383, "loss": 11.6715, "step": 31573 }, { "epoch": 0.6609310893410366, "grad_norm": 0.23754169046878815, "learning_rate": 0.00017702185520899848, "loss": 11.6575, "step": 31574 }, { "epoch": 0.6609520221049987, "grad_norm": 0.27254900336265564, "learning_rate": 0.0001770204568367772, "loss": 11.6549, "step": 31575 }, { "epoch": 0.6609729548689609, "grad_norm": 0.3492421805858612, "learning_rate": 0.00017701905842753063, "loss": 11.6527, "step": 31576 }, { "epoch": 0.6609938876329231, "grad_norm": 0.3486638069152832, "learning_rate": 0.00017701765998125947, "loss": 11.6749, "step": 31577 }, { "epoch": 0.6610148203968852, "grad_norm": 0.30692949891090393, "learning_rate": 0.00017701626149796434, "loss": 11.6716, "step": 31578 }, { "epoch": 0.6610357531608474, "grad_norm": 0.38619375228881836, "learning_rate": 0.000177014862977646, "loss": 11.6585, "step": 31579 }, { "epoch": 0.6610566859248095, "grad_norm": 0.3685750961303711, "learning_rate": 0.000177013464420305, "loss": 11.6553, "step": 31580 }, { "epoch": 0.6610776186887717, "grad_norm": 0.2765950858592987, "learning_rate": 0.00017701206582594216, "loss": 11.6642, "step": 31581 }, { "epoch": 0.6610985514527338, "grad_norm": 0.2762278616428375, "learning_rate": 0.00017701066719455803, "loss": 11.6807, "step": 31582 }, { "epoch": 0.661119484216696, "grad_norm": 0.2578813433647156, "learning_rate": 0.00017700926852615335, "loss": 11.6764, "step": 31583 }, { "epoch": 0.6611404169806582, "grad_norm": 0.32307907938957214, "learning_rate": 0.00017700786982072878, "loss": 11.6616, "step": 31584 }, { "epoch": 0.6611613497446203, "grad_norm": 0.34600621461868286, "learning_rate": 0.00017700647107828497, "loss": 11.6879, "step": 31585 }, { "epoch": 0.6611822825085825, "grad_norm": 0.40428733825683594, "learning_rate": 0.00017700507229882263, "loss": 11.692, "step": 31586 }, { "epoch": 0.6612032152725446, "grad_norm": 0.32679295539855957, "learning_rate": 0.00017700367348234236, "loss": 11.647, "step": 31587 }, { "epoch": 0.6612241480365068, "grad_norm": 0.2783478796482086, "learning_rate": 0.00017700227462884492, "loss": 11.6873, "step": 31588 }, { "epoch": 0.6612450808004688, "grad_norm": 0.3746179938316345, "learning_rate": 0.00017700087573833094, "loss": 11.6659, "step": 31589 }, { "epoch": 0.661266013564431, "grad_norm": 0.28790047764778137, "learning_rate": 0.00017699947681080108, "loss": 11.6719, "step": 31590 }, { "epoch": 0.6612869463283932, "grad_norm": 0.31198650598526, "learning_rate": 0.00017699807784625601, "loss": 11.6538, "step": 31591 }, { "epoch": 0.6613078790923553, "grad_norm": 0.3425714075565338, "learning_rate": 0.00017699667884469645, "loss": 11.6755, "step": 31592 }, { "epoch": 0.6613288118563175, "grad_norm": 0.2615211009979248, "learning_rate": 0.00017699527980612304, "loss": 11.6789, "step": 31593 }, { "epoch": 0.6613497446202796, "grad_norm": 0.2870643734931946, "learning_rate": 0.00017699388073053645, "loss": 11.6555, "step": 31594 }, { "epoch": 0.6613706773842418, "grad_norm": 0.23430991172790527, "learning_rate": 0.00017699248161793735, "loss": 11.6809, "step": 31595 }, { "epoch": 0.661391610148204, "grad_norm": 0.23696254193782806, "learning_rate": 0.00017699108246832645, "loss": 11.6587, "step": 31596 }, { "epoch": 0.6614125429121661, "grad_norm": 0.34391331672668457, "learning_rate": 0.0001769896832817044, "loss": 11.658, "step": 31597 }, { "epoch": 0.6614334756761283, "grad_norm": 0.3163006007671356, "learning_rate": 0.00017698828405807182, "loss": 11.667, "step": 31598 }, { "epoch": 0.6614544084400904, "grad_norm": 0.28392839431762695, "learning_rate": 0.00017698688479742946, "loss": 11.6683, "step": 31599 }, { "epoch": 0.6614753412040526, "grad_norm": 0.315739244222641, "learning_rate": 0.00017698548549977797, "loss": 11.659, "step": 31600 }, { "epoch": 0.6614962739680147, "grad_norm": 0.2948260009288788, "learning_rate": 0.00017698408616511797, "loss": 11.6699, "step": 31601 }, { "epoch": 0.6615172067319769, "grad_norm": 0.2521054148674011, "learning_rate": 0.00017698268679345022, "loss": 11.6604, "step": 31602 }, { "epoch": 0.6615381394959391, "grad_norm": 0.33830854296684265, "learning_rate": 0.00017698128738477534, "loss": 11.6826, "step": 31603 }, { "epoch": 0.6615590722599012, "grad_norm": 0.28991949558258057, "learning_rate": 0.00017697988793909398, "loss": 11.679, "step": 31604 }, { "epoch": 0.6615800050238634, "grad_norm": 0.26950469613075256, "learning_rate": 0.0001769784884564069, "loss": 11.6603, "step": 31605 }, { "epoch": 0.6616009377878255, "grad_norm": 0.3214911222457886, "learning_rate": 0.0001769770889367147, "loss": 11.6815, "step": 31606 }, { "epoch": 0.6616218705517877, "grad_norm": 0.3012501299381256, "learning_rate": 0.00017697568938001806, "loss": 11.6591, "step": 31607 }, { "epoch": 0.6616428033157498, "grad_norm": 0.29939326643943787, "learning_rate": 0.00017697428978631768, "loss": 11.6643, "step": 31608 }, { "epoch": 0.661663736079712, "grad_norm": 0.226618230342865, "learning_rate": 0.0001769728901556142, "loss": 11.6783, "step": 31609 }, { "epoch": 0.6616846688436742, "grad_norm": 0.317574143409729, "learning_rate": 0.0001769714904879083, "loss": 11.6788, "step": 31610 }, { "epoch": 0.6617056016076363, "grad_norm": 0.3016692101955414, "learning_rate": 0.00017697009078320065, "loss": 11.6661, "step": 31611 }, { "epoch": 0.6617265343715985, "grad_norm": 0.26751312613487244, "learning_rate": 0.00017696869104149196, "loss": 11.6785, "step": 31612 }, { "epoch": 0.6617474671355605, "grad_norm": 0.23623892664909363, "learning_rate": 0.0001769672912627829, "loss": 11.6608, "step": 31613 }, { "epoch": 0.6617683998995227, "grad_norm": 0.3175552189350128, "learning_rate": 0.00017696589144707407, "loss": 11.6782, "step": 31614 }, { "epoch": 0.6617893326634849, "grad_norm": 0.4892769157886505, "learning_rate": 0.00017696449159436623, "loss": 11.6809, "step": 31615 }, { "epoch": 0.661810265427447, "grad_norm": 0.3303254246711731, "learning_rate": 0.00017696309170466, "loss": 11.682, "step": 31616 }, { "epoch": 0.6618311981914092, "grad_norm": 0.3136060833930969, "learning_rate": 0.0001769616917779561, "loss": 11.6853, "step": 31617 }, { "epoch": 0.6618521309553713, "grad_norm": 0.34584617614746094, "learning_rate": 0.00017696029181425512, "loss": 11.666, "step": 31618 }, { "epoch": 0.6618730637193335, "grad_norm": 0.30126023292541504, "learning_rate": 0.00017695889181355783, "loss": 11.6862, "step": 31619 }, { "epoch": 0.6618939964832956, "grad_norm": 0.27738478779792786, "learning_rate": 0.00017695749177586482, "loss": 11.6885, "step": 31620 }, { "epoch": 0.6619149292472578, "grad_norm": 0.3270949423313141, "learning_rate": 0.00017695609170117685, "loss": 11.7043, "step": 31621 }, { "epoch": 0.66193586201122, "grad_norm": 0.2774644196033478, "learning_rate": 0.00017695469158949452, "loss": 11.6594, "step": 31622 }, { "epoch": 0.6619567947751821, "grad_norm": 0.33111241459846497, "learning_rate": 0.0001769532914408185, "loss": 11.6741, "step": 31623 }, { "epoch": 0.6619777275391443, "grad_norm": 0.3275969326496124, "learning_rate": 0.00017695189125514955, "loss": 11.6694, "step": 31624 }, { "epoch": 0.6619986603031064, "grad_norm": 0.2755091190338135, "learning_rate": 0.0001769504910324882, "loss": 11.6742, "step": 31625 }, { "epoch": 0.6620195930670686, "grad_norm": 0.2691393196582794, "learning_rate": 0.00017694909077283527, "loss": 11.6709, "step": 31626 }, { "epoch": 0.6620405258310307, "grad_norm": 0.2832643985748291, "learning_rate": 0.00017694769047619139, "loss": 11.6763, "step": 31627 }, { "epoch": 0.6620614585949929, "grad_norm": 0.2315930426120758, "learning_rate": 0.00017694629014255718, "loss": 11.6563, "step": 31628 }, { "epoch": 0.6620823913589551, "grad_norm": 0.34710192680358887, "learning_rate": 0.00017694488977193333, "loss": 11.6693, "step": 31629 }, { "epoch": 0.6621033241229172, "grad_norm": 0.3224596083164215, "learning_rate": 0.00017694348936432054, "loss": 11.6754, "step": 31630 }, { "epoch": 0.6621242568868794, "grad_norm": 0.3079284131526947, "learning_rate": 0.0001769420889197195, "loss": 11.664, "step": 31631 }, { "epoch": 0.6621451896508415, "grad_norm": 0.2644816040992737, "learning_rate": 0.00017694068843813083, "loss": 11.6932, "step": 31632 }, { "epoch": 0.6621661224148037, "grad_norm": 0.32407504320144653, "learning_rate": 0.00017693928791955523, "loss": 11.6695, "step": 31633 }, { "epoch": 0.6621870551787657, "grad_norm": 0.35317009687423706, "learning_rate": 0.00017693788736399338, "loss": 11.6758, "step": 31634 }, { "epoch": 0.662207987942728, "grad_norm": 0.2558678090572357, "learning_rate": 0.00017693648677144595, "loss": 11.6787, "step": 31635 }, { "epoch": 0.6622289207066901, "grad_norm": 0.2691846489906311, "learning_rate": 0.0001769350861419136, "loss": 11.6828, "step": 31636 }, { "epoch": 0.6622498534706522, "grad_norm": 0.43388354778289795, "learning_rate": 0.000176933685475397, "loss": 11.6794, "step": 31637 }, { "epoch": 0.6622707862346144, "grad_norm": 0.2632666528224945, "learning_rate": 0.00017693228477189682, "loss": 11.6604, "step": 31638 }, { "epoch": 0.6622917189985765, "grad_norm": 0.2572297751903534, "learning_rate": 0.0001769308840314138, "loss": 11.6814, "step": 31639 }, { "epoch": 0.6623126517625387, "grad_norm": 0.31226903200149536, "learning_rate": 0.00017692948325394854, "loss": 11.6732, "step": 31640 }, { "epoch": 0.6623335845265009, "grad_norm": 0.3145187497138977, "learning_rate": 0.00017692808243950173, "loss": 11.6676, "step": 31641 }, { "epoch": 0.662354517290463, "grad_norm": 0.36297762393951416, "learning_rate": 0.00017692668158807406, "loss": 11.6752, "step": 31642 }, { "epoch": 0.6623754500544252, "grad_norm": 0.29738128185272217, "learning_rate": 0.00017692528069966618, "loss": 11.6755, "step": 31643 }, { "epoch": 0.6623963828183873, "grad_norm": 0.3272953927516937, "learning_rate": 0.0001769238797742788, "loss": 11.6846, "step": 31644 }, { "epoch": 0.6624173155823495, "grad_norm": 0.25388720631599426, "learning_rate": 0.00017692247881191255, "loss": 11.6837, "step": 31645 }, { "epoch": 0.6624382483463116, "grad_norm": 0.29347777366638184, "learning_rate": 0.00017692107781256812, "loss": 11.6773, "step": 31646 }, { "epoch": 0.6624591811102738, "grad_norm": 0.35159364342689514, "learning_rate": 0.0001769196767762462, "loss": 11.6845, "step": 31647 }, { "epoch": 0.662480113874236, "grad_norm": 0.326271653175354, "learning_rate": 0.00017691827570294743, "loss": 11.6708, "step": 31648 }, { "epoch": 0.6625010466381981, "grad_norm": 0.28746140003204346, "learning_rate": 0.00017691687459267253, "loss": 11.6746, "step": 31649 }, { "epoch": 0.6625219794021603, "grad_norm": 0.2799386978149414, "learning_rate": 0.00017691547344542212, "loss": 11.6646, "step": 31650 }, { "epoch": 0.6625429121661224, "grad_norm": 0.2521141469478607, "learning_rate": 0.0001769140722611969, "loss": 11.69, "step": 31651 }, { "epoch": 0.6625638449300846, "grad_norm": 0.3042464256286621, "learning_rate": 0.0001769126710399976, "loss": 11.6818, "step": 31652 }, { "epoch": 0.6625847776940467, "grad_norm": 0.3115062117576599, "learning_rate": 0.00017691126978182478, "loss": 11.668, "step": 31653 }, { "epoch": 0.6626057104580089, "grad_norm": 0.3107700049877167, "learning_rate": 0.00017690986848667916, "loss": 11.6791, "step": 31654 }, { "epoch": 0.6626266432219711, "grad_norm": 0.30443787574768066, "learning_rate": 0.00017690846715456146, "loss": 11.6759, "step": 31655 }, { "epoch": 0.6626475759859332, "grad_norm": 0.3168509304523468, "learning_rate": 0.0001769070657854723, "loss": 11.6535, "step": 31656 }, { "epoch": 0.6626685087498954, "grad_norm": 0.28550100326538086, "learning_rate": 0.0001769056643794124, "loss": 11.6747, "step": 31657 }, { "epoch": 0.6626894415138574, "grad_norm": 0.3541206419467926, "learning_rate": 0.00017690426293638238, "loss": 11.6634, "step": 31658 }, { "epoch": 0.6627103742778196, "grad_norm": 0.2606300711631775, "learning_rate": 0.00017690286145638295, "loss": 11.6676, "step": 31659 }, { "epoch": 0.6627313070417818, "grad_norm": 0.30511775612831116, "learning_rate": 0.00017690145993941477, "loss": 11.674, "step": 31660 }, { "epoch": 0.6627522398057439, "grad_norm": 0.37551864981651306, "learning_rate": 0.00017690005838547852, "loss": 11.6729, "step": 31661 }, { "epoch": 0.6627731725697061, "grad_norm": 0.2273268848657608, "learning_rate": 0.00017689865679457488, "loss": 11.6611, "step": 31662 }, { "epoch": 0.6627941053336682, "grad_norm": 0.26816099882125854, "learning_rate": 0.00017689725516670447, "loss": 11.6635, "step": 31663 }, { "epoch": 0.6628150380976304, "grad_norm": 0.26866617798805237, "learning_rate": 0.00017689585350186805, "loss": 11.6765, "step": 31664 }, { "epoch": 0.6628359708615925, "grad_norm": 0.30413973331451416, "learning_rate": 0.00017689445180006625, "loss": 11.6647, "step": 31665 }, { "epoch": 0.6628569036255547, "grad_norm": 0.24287763237953186, "learning_rate": 0.00017689305006129974, "loss": 11.6841, "step": 31666 }, { "epoch": 0.6628778363895169, "grad_norm": 0.337369829416275, "learning_rate": 0.0001768916482855692, "loss": 11.6614, "step": 31667 }, { "epoch": 0.662898769153479, "grad_norm": 0.25888413190841675, "learning_rate": 0.0001768902464728753, "loss": 11.6613, "step": 31668 }, { "epoch": 0.6629197019174412, "grad_norm": 0.251778781414032, "learning_rate": 0.00017688884462321874, "loss": 11.6509, "step": 31669 }, { "epoch": 0.6629406346814033, "grad_norm": 0.46876034140586853, "learning_rate": 0.00017688744273660015, "loss": 11.672, "step": 31670 }, { "epoch": 0.6629615674453655, "grad_norm": 0.40468981862068176, "learning_rate": 0.0001768860408130202, "loss": 11.685, "step": 31671 }, { "epoch": 0.6629825002093276, "grad_norm": 0.2524467408657074, "learning_rate": 0.00017688463885247963, "loss": 11.6555, "step": 31672 }, { "epoch": 0.6630034329732898, "grad_norm": 0.27470484375953674, "learning_rate": 0.00017688323685497906, "loss": 11.6698, "step": 31673 }, { "epoch": 0.663024365737252, "grad_norm": 0.29326385259628296, "learning_rate": 0.00017688183482051918, "loss": 11.6758, "step": 31674 }, { "epoch": 0.6630452985012141, "grad_norm": 0.29981186985969543, "learning_rate": 0.00017688043274910066, "loss": 11.6672, "step": 31675 }, { "epoch": 0.6630662312651763, "grad_norm": 0.2873483896255493, "learning_rate": 0.00017687903064072418, "loss": 11.6688, "step": 31676 }, { "epoch": 0.6630871640291384, "grad_norm": 0.26548123359680176, "learning_rate": 0.0001768776284953904, "loss": 11.6649, "step": 31677 }, { "epoch": 0.6631080967931006, "grad_norm": 0.29588910937309265, "learning_rate": 0.00017687622631309998, "loss": 11.6492, "step": 31678 }, { "epoch": 0.6631290295570628, "grad_norm": 0.35435113310813904, "learning_rate": 0.00017687482409385366, "loss": 11.6733, "step": 31679 }, { "epoch": 0.6631499623210249, "grad_norm": 0.2804584205150604, "learning_rate": 0.00017687342183765207, "loss": 11.6662, "step": 31680 }, { "epoch": 0.663170895084987, "grad_norm": 0.27044638991355896, "learning_rate": 0.00017687201954449586, "loss": 11.6675, "step": 31681 }, { "epoch": 0.6631918278489491, "grad_norm": 0.302971214056015, "learning_rate": 0.00017687061721438573, "loss": 11.6708, "step": 31682 }, { "epoch": 0.6632127606129113, "grad_norm": 0.2423257827758789, "learning_rate": 0.00017686921484732238, "loss": 11.6712, "step": 31683 }, { "epoch": 0.6632336933768734, "grad_norm": 0.25416308641433716, "learning_rate": 0.00017686781244330644, "loss": 11.6668, "step": 31684 }, { "epoch": 0.6632546261408356, "grad_norm": 0.27699410915374756, "learning_rate": 0.00017686641000233858, "loss": 11.6875, "step": 31685 }, { "epoch": 0.6632755589047978, "grad_norm": 0.3061016798019409, "learning_rate": 0.00017686500752441955, "loss": 11.675, "step": 31686 }, { "epoch": 0.6632964916687599, "grad_norm": 0.39778080582618713, "learning_rate": 0.00017686360500954992, "loss": 11.6621, "step": 31687 }, { "epoch": 0.6633174244327221, "grad_norm": 0.2749880254268646, "learning_rate": 0.00017686220245773047, "loss": 11.6675, "step": 31688 }, { "epoch": 0.6633383571966842, "grad_norm": 0.39559370279312134, "learning_rate": 0.00017686079986896177, "loss": 11.6696, "step": 31689 }, { "epoch": 0.6633592899606464, "grad_norm": 0.33816084265708923, "learning_rate": 0.00017685939724324456, "loss": 11.6656, "step": 31690 }, { "epoch": 0.6633802227246085, "grad_norm": 0.25328564643859863, "learning_rate": 0.0001768579945805795, "loss": 11.6622, "step": 31691 }, { "epoch": 0.6634011554885707, "grad_norm": 0.3243970274925232, "learning_rate": 0.00017685659188096725, "loss": 11.668, "step": 31692 }, { "epoch": 0.6634220882525329, "grad_norm": 0.3563232421875, "learning_rate": 0.0001768551891444085, "loss": 11.6571, "step": 31693 }, { "epoch": 0.663443021016495, "grad_norm": 0.2936175465583801, "learning_rate": 0.00017685378637090392, "loss": 11.6832, "step": 31694 }, { "epoch": 0.6634639537804572, "grad_norm": 0.2984655499458313, "learning_rate": 0.00017685238356045422, "loss": 11.6678, "step": 31695 }, { "epoch": 0.6634848865444193, "grad_norm": 0.2824297547340393, "learning_rate": 0.00017685098071305999, "loss": 11.6816, "step": 31696 }, { "epoch": 0.6635058193083815, "grad_norm": 0.2595508396625519, "learning_rate": 0.000176849577828722, "loss": 11.6783, "step": 31697 }, { "epoch": 0.6635267520723437, "grad_norm": 0.270123153924942, "learning_rate": 0.00017684817490744084, "loss": 11.6793, "step": 31698 }, { "epoch": 0.6635476848363058, "grad_norm": 0.3190805912017822, "learning_rate": 0.0001768467719492172, "loss": 11.6857, "step": 31699 }, { "epoch": 0.663568617600268, "grad_norm": 0.3332824110984802, "learning_rate": 0.00017684536895405182, "loss": 11.6698, "step": 31700 }, { "epoch": 0.6635895503642301, "grad_norm": 0.28751125931739807, "learning_rate": 0.0001768439659219453, "loss": 11.6616, "step": 31701 }, { "epoch": 0.6636104831281923, "grad_norm": 0.37481510639190674, "learning_rate": 0.00017684256285289838, "loss": 11.696, "step": 31702 }, { "epoch": 0.6636314158921544, "grad_norm": 0.27546122670173645, "learning_rate": 0.0001768411597469117, "loss": 11.6694, "step": 31703 }, { "epoch": 0.6636523486561166, "grad_norm": 0.25804468989372253, "learning_rate": 0.0001768397566039859, "loss": 11.6692, "step": 31704 }, { "epoch": 0.6636732814200788, "grad_norm": 0.29502415657043457, "learning_rate": 0.00017683835342412172, "loss": 11.6683, "step": 31705 }, { "epoch": 0.6636942141840408, "grad_norm": 0.31875360012054443, "learning_rate": 0.0001768369502073198, "loss": 11.6874, "step": 31706 }, { "epoch": 0.663715146948003, "grad_norm": 0.28224825859069824, "learning_rate": 0.0001768355469535808, "loss": 11.6841, "step": 31707 }, { "epoch": 0.6637360797119651, "grad_norm": 0.2917981445789337, "learning_rate": 0.00017683414366290542, "loss": 11.6741, "step": 31708 }, { "epoch": 0.6637570124759273, "grad_norm": 1.9173297882080078, "learning_rate": 0.00017683274033529432, "loss": 11.7232, "step": 31709 }, { "epoch": 0.6637779452398894, "grad_norm": 0.38502150774002075, "learning_rate": 0.0001768313369707482, "loss": 11.6776, "step": 31710 }, { "epoch": 0.6637988780038516, "grad_norm": 0.2504192888736725, "learning_rate": 0.00017682993356926772, "loss": 11.6681, "step": 31711 }, { "epoch": 0.6638198107678138, "grad_norm": 0.26457679271698, "learning_rate": 0.00017682853013085354, "loss": 11.6583, "step": 31712 }, { "epoch": 0.6638407435317759, "grad_norm": 0.36207112669944763, "learning_rate": 0.00017682712665550635, "loss": 11.6752, "step": 31713 }, { "epoch": 0.6638616762957381, "grad_norm": 0.3608021140098572, "learning_rate": 0.0001768257231432268, "loss": 11.6539, "step": 31714 }, { "epoch": 0.6638826090597002, "grad_norm": 0.2446296066045761, "learning_rate": 0.00017682431959401563, "loss": 11.6748, "step": 31715 }, { "epoch": 0.6639035418236624, "grad_norm": 0.2846020460128784, "learning_rate": 0.00017682291600787343, "loss": 11.6681, "step": 31716 }, { "epoch": 0.6639244745876246, "grad_norm": 0.2996557056903839, "learning_rate": 0.00017682151238480092, "loss": 11.6726, "step": 31717 }, { "epoch": 0.6639454073515867, "grad_norm": 0.25503623485565186, "learning_rate": 0.00017682010872479878, "loss": 11.6614, "step": 31718 }, { "epoch": 0.6639663401155489, "grad_norm": 0.2530530095100403, "learning_rate": 0.00017681870502786766, "loss": 11.6772, "step": 31719 }, { "epoch": 0.663987272879511, "grad_norm": 0.24151958525180817, "learning_rate": 0.00017681730129400825, "loss": 11.6548, "step": 31720 }, { "epoch": 0.6640082056434732, "grad_norm": 0.26349228620529175, "learning_rate": 0.00017681589752322125, "loss": 11.6776, "step": 31721 }, { "epoch": 0.6640291384074353, "grad_norm": 0.2534860670566559, "learning_rate": 0.0001768144937155073, "loss": 11.6815, "step": 31722 }, { "epoch": 0.6640500711713975, "grad_norm": 0.31548500061035156, "learning_rate": 0.00017681308987086707, "loss": 11.6763, "step": 31723 }, { "epoch": 0.6640710039353597, "grad_norm": 0.29131677746772766, "learning_rate": 0.00017681168598930125, "loss": 11.6651, "step": 31724 }, { "epoch": 0.6640919366993218, "grad_norm": 0.3170776963233948, "learning_rate": 0.0001768102820708105, "loss": 11.6732, "step": 31725 }, { "epoch": 0.664112869463284, "grad_norm": 0.24019281566143036, "learning_rate": 0.00017680887811539556, "loss": 11.6734, "step": 31726 }, { "epoch": 0.664133802227246, "grad_norm": 1.3376668691635132, "learning_rate": 0.000176807474123057, "loss": 11.6584, "step": 31727 }, { "epoch": 0.6641547349912083, "grad_norm": 0.29811280965805054, "learning_rate": 0.00017680607009379557, "loss": 11.6674, "step": 31728 }, { "epoch": 0.6641756677551703, "grad_norm": 0.311306357383728, "learning_rate": 0.0001768046660276119, "loss": 11.6556, "step": 31729 }, { "epoch": 0.6641966005191325, "grad_norm": 0.29013365507125854, "learning_rate": 0.00017680326192450672, "loss": 11.6734, "step": 31730 }, { "epoch": 0.6642175332830947, "grad_norm": 0.3708762526512146, "learning_rate": 0.00017680185778448066, "loss": 11.683, "step": 31731 }, { "epoch": 0.6642384660470568, "grad_norm": 0.29713475704193115, "learning_rate": 0.00017680045360753442, "loss": 11.6585, "step": 31732 }, { "epoch": 0.664259398811019, "grad_norm": 0.2707900404930115, "learning_rate": 0.00017679904939366865, "loss": 11.6669, "step": 31733 }, { "epoch": 0.6642803315749811, "grad_norm": 0.39309731125831604, "learning_rate": 0.00017679764514288402, "loss": 11.6735, "step": 31734 }, { "epoch": 0.6643012643389433, "grad_norm": 0.2594560980796814, "learning_rate": 0.00017679624085518124, "loss": 11.6911, "step": 31735 }, { "epoch": 0.6643221971029055, "grad_norm": 0.2920878231525421, "learning_rate": 0.000176794836530561, "loss": 11.6552, "step": 31736 }, { "epoch": 0.6643431298668676, "grad_norm": 0.23986634612083435, "learning_rate": 0.0001767934321690239, "loss": 11.6435, "step": 31737 }, { "epoch": 0.6643640626308298, "grad_norm": 0.35141241550445557, "learning_rate": 0.00017679202777057067, "loss": 11.6765, "step": 31738 }, { "epoch": 0.6643849953947919, "grad_norm": 0.2938538193702698, "learning_rate": 0.00017679062333520194, "loss": 11.6659, "step": 31739 }, { "epoch": 0.6644059281587541, "grad_norm": 0.2748223543167114, "learning_rate": 0.00017678921886291846, "loss": 11.6687, "step": 31740 }, { "epoch": 0.6644268609227162, "grad_norm": 0.2763749659061432, "learning_rate": 0.00017678781435372088, "loss": 11.6715, "step": 31741 }, { "epoch": 0.6644477936866784, "grad_norm": 0.31767773628234863, "learning_rate": 0.00017678640980760984, "loss": 11.6623, "step": 31742 }, { "epoch": 0.6644687264506406, "grad_norm": 0.42310717701911926, "learning_rate": 0.00017678500522458603, "loss": 11.6917, "step": 31743 }, { "epoch": 0.6644896592146027, "grad_norm": 0.3739089071750641, "learning_rate": 0.00017678360060465013, "loss": 11.6723, "step": 31744 }, { "epoch": 0.6645105919785649, "grad_norm": 0.33633360266685486, "learning_rate": 0.0001767821959478028, "loss": 11.6673, "step": 31745 }, { "epoch": 0.664531524742527, "grad_norm": 0.27422410249710083, "learning_rate": 0.00017678079125404473, "loss": 11.6717, "step": 31746 }, { "epoch": 0.6645524575064892, "grad_norm": 0.2787638306617737, "learning_rate": 0.00017677938652337663, "loss": 11.6803, "step": 31747 }, { "epoch": 0.6645733902704513, "grad_norm": 0.28555458784103394, "learning_rate": 0.0001767779817557991, "loss": 11.6734, "step": 31748 }, { "epoch": 0.6645943230344135, "grad_norm": 0.2868688106536865, "learning_rate": 0.00017677657695131286, "loss": 11.6714, "step": 31749 }, { "epoch": 0.6646152557983757, "grad_norm": 0.4175427556037903, "learning_rate": 0.00017677517210991863, "loss": 11.6879, "step": 31750 }, { "epoch": 0.6646361885623377, "grad_norm": 0.31052833795547485, "learning_rate": 0.000176773767231617, "loss": 11.6753, "step": 31751 }, { "epoch": 0.6646571213263, "grad_norm": 0.30397358536720276, "learning_rate": 0.00017677236231640865, "loss": 11.6755, "step": 31752 }, { "epoch": 0.664678054090262, "grad_norm": 0.3453065752983093, "learning_rate": 0.00017677095736429434, "loss": 11.6797, "step": 31753 }, { "epoch": 0.6646989868542242, "grad_norm": 0.26747867465019226, "learning_rate": 0.00017676955237527466, "loss": 11.6791, "step": 31754 }, { "epoch": 0.6647199196181864, "grad_norm": 0.2877657413482666, "learning_rate": 0.00017676814734935033, "loss": 11.6619, "step": 31755 }, { "epoch": 0.6647408523821485, "grad_norm": 0.3251034617424011, "learning_rate": 0.000176766742286522, "loss": 11.6524, "step": 31756 }, { "epoch": 0.6647617851461107, "grad_norm": 0.3044104278087616, "learning_rate": 0.00017676533718679038, "loss": 11.6663, "step": 31757 }, { "epoch": 0.6647827179100728, "grad_norm": 0.24577704071998596, "learning_rate": 0.0001767639320501561, "loss": 11.6631, "step": 31758 }, { "epoch": 0.664803650674035, "grad_norm": 0.29913225769996643, "learning_rate": 0.0001767625268766199, "loss": 11.6537, "step": 31759 }, { "epoch": 0.6648245834379971, "grad_norm": 0.343111515045166, "learning_rate": 0.00017676112166618236, "loss": 11.6849, "step": 31760 }, { "epoch": 0.6648455162019593, "grad_norm": 0.29374802112579346, "learning_rate": 0.00017675971641884424, "loss": 11.6622, "step": 31761 }, { "epoch": 0.6648664489659215, "grad_norm": 0.27274438738822937, "learning_rate": 0.0001767583111346062, "loss": 11.6847, "step": 31762 }, { "epoch": 0.6648873817298836, "grad_norm": 0.7995854020118713, "learning_rate": 0.00017675690581346888, "loss": 11.6443, "step": 31763 }, { "epoch": 0.6649083144938458, "grad_norm": 0.2809116542339325, "learning_rate": 0.000176755500455433, "loss": 11.6635, "step": 31764 }, { "epoch": 0.6649292472578079, "grad_norm": 0.26895925402641296, "learning_rate": 0.0001767540950604992, "loss": 11.6624, "step": 31765 }, { "epoch": 0.6649501800217701, "grad_norm": 0.3506382405757904, "learning_rate": 0.00017675268962866815, "loss": 11.6837, "step": 31766 }, { "epoch": 0.6649711127857322, "grad_norm": 0.3316921591758728, "learning_rate": 0.0001767512841599406, "loss": 11.6821, "step": 31767 }, { "epoch": 0.6649920455496944, "grad_norm": 0.291161447763443, "learning_rate": 0.00017674987865431708, "loss": 11.6556, "step": 31768 }, { "epoch": 0.6650129783136566, "grad_norm": 0.3031640946865082, "learning_rate": 0.00017674847311179842, "loss": 11.6787, "step": 31769 }, { "epoch": 0.6650339110776187, "grad_norm": 0.33169448375701904, "learning_rate": 0.0001767470675323852, "loss": 11.6885, "step": 31770 }, { "epoch": 0.6650548438415809, "grad_norm": 0.3105139136314392, "learning_rate": 0.00017674566191607814, "loss": 11.668, "step": 31771 }, { "epoch": 0.665075776605543, "grad_norm": 0.2547875642776489, "learning_rate": 0.00017674425626287792, "loss": 11.667, "step": 31772 }, { "epoch": 0.6650967093695052, "grad_norm": 0.2595876455307007, "learning_rate": 0.0001767428505727852, "loss": 11.6688, "step": 31773 }, { "epoch": 0.6651176421334674, "grad_norm": 0.2816169857978821, "learning_rate": 0.00017674144484580058, "loss": 11.6698, "step": 31774 }, { "epoch": 0.6651385748974294, "grad_norm": 0.3617779314517975, "learning_rate": 0.0001767400390819249, "loss": 11.6896, "step": 31775 }, { "epoch": 0.6651595076613916, "grad_norm": 0.26353535056114197, "learning_rate": 0.0001767386332811587, "loss": 11.647, "step": 31776 }, { "epoch": 0.6651804404253537, "grad_norm": 0.31193864345550537, "learning_rate": 0.00017673722744350272, "loss": 11.6698, "step": 31777 }, { "epoch": 0.6652013731893159, "grad_norm": 0.29727670550346375, "learning_rate": 0.0001767358215689576, "loss": 11.6657, "step": 31778 }, { "epoch": 0.665222305953278, "grad_norm": 0.2880598306655884, "learning_rate": 0.00017673441565752403, "loss": 11.6587, "step": 31779 }, { "epoch": 0.6652432387172402, "grad_norm": 0.3572872579097748, "learning_rate": 0.00017673300970920272, "loss": 11.6695, "step": 31780 }, { "epoch": 0.6652641714812024, "grad_norm": 0.3008691668510437, "learning_rate": 0.00017673160372399429, "loss": 11.6845, "step": 31781 }, { "epoch": 0.6652851042451645, "grad_norm": 0.3242660462856293, "learning_rate": 0.00017673019770189944, "loss": 11.6721, "step": 31782 }, { "epoch": 0.6653060370091267, "grad_norm": 0.3283024728298187, "learning_rate": 0.00017672879164291885, "loss": 11.6706, "step": 31783 }, { "epoch": 0.6653269697730888, "grad_norm": 0.3421998918056488, "learning_rate": 0.0001767273855470532, "loss": 11.6841, "step": 31784 }, { "epoch": 0.665347902537051, "grad_norm": 0.2743285298347473, "learning_rate": 0.00017672597941430316, "loss": 11.6571, "step": 31785 }, { "epoch": 0.6653688353010131, "grad_norm": 0.3435467481613159, "learning_rate": 0.00017672457324466938, "loss": 11.6664, "step": 31786 }, { "epoch": 0.6653897680649753, "grad_norm": 0.3346158564090729, "learning_rate": 0.00017672316703815256, "loss": 11.67, "step": 31787 }, { "epoch": 0.6654107008289375, "grad_norm": 0.2750881612300873, "learning_rate": 0.0001767217607947534, "loss": 11.6569, "step": 31788 }, { "epoch": 0.6654316335928996, "grad_norm": 0.27510082721710205, "learning_rate": 0.00017672035451447255, "loss": 11.6696, "step": 31789 }, { "epoch": 0.6654525663568618, "grad_norm": 0.31633126735687256, "learning_rate": 0.0001767189481973107, "loss": 11.6651, "step": 31790 }, { "epoch": 0.6654734991208239, "grad_norm": 0.3106459975242615, "learning_rate": 0.00017671754184326848, "loss": 11.6518, "step": 31791 }, { "epoch": 0.6654944318847861, "grad_norm": 0.28612688183784485, "learning_rate": 0.0001767161354523466, "loss": 11.6524, "step": 31792 }, { "epoch": 0.6655153646487483, "grad_norm": 0.34979701042175293, "learning_rate": 0.00017671472902454574, "loss": 11.6855, "step": 31793 }, { "epoch": 0.6655362974127104, "grad_norm": 0.3466397523880005, "learning_rate": 0.00017671332255986662, "loss": 11.6782, "step": 31794 }, { "epoch": 0.6655572301766726, "grad_norm": 0.3215315043926239, "learning_rate": 0.00017671191605830982, "loss": 11.6668, "step": 31795 }, { "epoch": 0.6655781629406347, "grad_norm": 0.2768641412258148, "learning_rate": 0.00017671050951987609, "loss": 11.6725, "step": 31796 }, { "epoch": 0.6655990957045969, "grad_norm": 0.3245154023170471, "learning_rate": 0.00017670910294456606, "loss": 11.6716, "step": 31797 }, { "epoch": 0.6656200284685589, "grad_norm": 0.37301263213157654, "learning_rate": 0.00017670769633238042, "loss": 11.657, "step": 31798 }, { "epoch": 0.6656409612325211, "grad_norm": 0.3065873682498932, "learning_rate": 0.0001767062896833199, "loss": 11.6689, "step": 31799 }, { "epoch": 0.6656618939964833, "grad_norm": 0.26628565788269043, "learning_rate": 0.00017670488299738508, "loss": 11.6557, "step": 31800 }, { "epoch": 0.6656828267604454, "grad_norm": 0.3224415183067322, "learning_rate": 0.0001767034762745767, "loss": 11.6778, "step": 31801 }, { "epoch": 0.6657037595244076, "grad_norm": 0.26980647444725037, "learning_rate": 0.00017670206951489543, "loss": 11.6603, "step": 31802 }, { "epoch": 0.6657246922883697, "grad_norm": 0.254627525806427, "learning_rate": 0.00017670066271834194, "loss": 11.6498, "step": 31803 }, { "epoch": 0.6657456250523319, "grad_norm": 0.27468547224998474, "learning_rate": 0.0001766992558849169, "loss": 11.677, "step": 31804 }, { "epoch": 0.665766557816294, "grad_norm": 0.39530184864997864, "learning_rate": 0.000176697849014621, "loss": 11.6857, "step": 31805 }, { "epoch": 0.6657874905802562, "grad_norm": 0.31534120440483093, "learning_rate": 0.00017669644210745487, "loss": 11.6849, "step": 31806 }, { "epoch": 0.6658084233442184, "grad_norm": 0.3284914195537567, "learning_rate": 0.00017669503516341926, "loss": 11.6792, "step": 31807 }, { "epoch": 0.6658293561081805, "grad_norm": 0.32134363055229187, "learning_rate": 0.00017669362818251478, "loss": 11.6742, "step": 31808 }, { "epoch": 0.6658502888721427, "grad_norm": 0.37749096751213074, "learning_rate": 0.00017669222116474217, "loss": 11.6738, "step": 31809 }, { "epoch": 0.6658712216361048, "grad_norm": 0.3303123712539673, "learning_rate": 0.00017669081411010205, "loss": 11.6728, "step": 31810 }, { "epoch": 0.665892154400067, "grad_norm": 0.29438894987106323, "learning_rate": 0.0001766894070185951, "loss": 11.6967, "step": 31811 }, { "epoch": 0.6659130871640292, "grad_norm": 0.28045621514320374, "learning_rate": 0.00017668799989022205, "loss": 11.6538, "step": 31812 }, { "epoch": 0.6659340199279913, "grad_norm": 0.3267209827899933, "learning_rate": 0.00017668659272498352, "loss": 11.6629, "step": 31813 }, { "epoch": 0.6659549526919535, "grad_norm": 0.26777592301368713, "learning_rate": 0.00017668518552288025, "loss": 11.6731, "step": 31814 }, { "epoch": 0.6659758854559156, "grad_norm": 0.35264602303504944, "learning_rate": 0.00017668377828391284, "loss": 11.6641, "step": 31815 }, { "epoch": 0.6659968182198778, "grad_norm": 0.3116322159767151, "learning_rate": 0.000176682371008082, "loss": 11.6713, "step": 31816 }, { "epoch": 0.6660177509838399, "grad_norm": 0.3787462115287781, "learning_rate": 0.00017668096369538843, "loss": 11.6887, "step": 31817 }, { "epoch": 0.6660386837478021, "grad_norm": 0.3549225628376007, "learning_rate": 0.00017667955634583272, "loss": 11.6929, "step": 31818 }, { "epoch": 0.6660596165117643, "grad_norm": 0.43263596296310425, "learning_rate": 0.00017667814895941568, "loss": 11.6554, "step": 31819 }, { "epoch": 0.6660805492757264, "grad_norm": 0.30200818181037903, "learning_rate": 0.0001766767415361379, "loss": 11.673, "step": 31820 }, { "epoch": 0.6661014820396886, "grad_norm": 0.37022244930267334, "learning_rate": 0.00017667533407600007, "loss": 11.6799, "step": 31821 }, { "epoch": 0.6661224148036506, "grad_norm": 0.2979555130004883, "learning_rate": 0.00017667392657900286, "loss": 11.6678, "step": 31822 }, { "epoch": 0.6661433475676128, "grad_norm": 0.28655001521110535, "learning_rate": 0.00017667251904514696, "loss": 11.6801, "step": 31823 }, { "epoch": 0.6661642803315749, "grad_norm": 0.29173922538757324, "learning_rate": 0.00017667111147443305, "loss": 11.6759, "step": 31824 }, { "epoch": 0.6661852130955371, "grad_norm": 0.22406893968582153, "learning_rate": 0.0001766697038668618, "loss": 11.6578, "step": 31825 }, { "epoch": 0.6662061458594993, "grad_norm": 0.2681543827056885, "learning_rate": 0.0001766682962224339, "loss": 11.6823, "step": 31826 }, { "epoch": 0.6662270786234614, "grad_norm": 0.330704003572464, "learning_rate": 0.00017666688854115, "loss": 11.6695, "step": 31827 }, { "epoch": 0.6662480113874236, "grad_norm": 0.29474034905433655, "learning_rate": 0.00017666548082301078, "loss": 11.6858, "step": 31828 }, { "epoch": 0.6662689441513857, "grad_norm": 0.30173400044441223, "learning_rate": 0.00017666407306801696, "loss": 11.6539, "step": 31829 }, { "epoch": 0.6662898769153479, "grad_norm": 0.47409334778785706, "learning_rate": 0.00017666266527616915, "loss": 11.6787, "step": 31830 }, { "epoch": 0.66631080967931, "grad_norm": 0.41453540325164795, "learning_rate": 0.00017666125744746807, "loss": 11.6754, "step": 31831 }, { "epoch": 0.6663317424432722, "grad_norm": 0.28858113288879395, "learning_rate": 0.00017665984958191436, "loss": 11.687, "step": 31832 }, { "epoch": 0.6663526752072344, "grad_norm": 0.305635929107666, "learning_rate": 0.00017665844167950878, "loss": 11.6779, "step": 31833 }, { "epoch": 0.6663736079711965, "grad_norm": 0.2982884645462036, "learning_rate": 0.0001766570337402519, "loss": 11.6429, "step": 31834 }, { "epoch": 0.6663945407351587, "grad_norm": 0.2805444300174713, "learning_rate": 0.00017665562576414448, "loss": 11.6582, "step": 31835 }, { "epoch": 0.6664154734991208, "grad_norm": 0.28172793984413147, "learning_rate": 0.0001766542177511872, "loss": 11.6691, "step": 31836 }, { "epoch": 0.666436406263083, "grad_norm": 0.3173893094062805, "learning_rate": 0.0001766528097013806, "loss": 11.6849, "step": 31837 }, { "epoch": 0.6664573390270452, "grad_norm": 0.32944783568382263, "learning_rate": 0.00017665140161472555, "loss": 11.6728, "step": 31838 }, { "epoch": 0.6664782717910073, "grad_norm": 0.2443418800830841, "learning_rate": 0.00017664999349122258, "loss": 11.6746, "step": 31839 }, { "epoch": 0.6664992045549695, "grad_norm": 0.2977003753185272, "learning_rate": 0.00017664858533087246, "loss": 11.6681, "step": 31840 }, { "epoch": 0.6665201373189316, "grad_norm": 0.30129536986351013, "learning_rate": 0.0001766471771336758, "loss": 11.6742, "step": 31841 }, { "epoch": 0.6665410700828938, "grad_norm": 0.406341552734375, "learning_rate": 0.00017664576889963332, "loss": 11.6773, "step": 31842 }, { "epoch": 0.6665620028468559, "grad_norm": 0.2857764959335327, "learning_rate": 0.00017664436062874567, "loss": 11.6589, "step": 31843 }, { "epoch": 0.666582935610818, "grad_norm": 0.29628098011016846, "learning_rate": 0.00017664295232101355, "loss": 11.6642, "step": 31844 }, { "epoch": 0.6666038683747803, "grad_norm": 0.3406859040260315, "learning_rate": 0.00017664154397643763, "loss": 11.6811, "step": 31845 }, { "epoch": 0.6666248011387423, "grad_norm": 0.3247189521789551, "learning_rate": 0.00017664013559501857, "loss": 11.6613, "step": 31846 }, { "epoch": 0.6666457339027045, "grad_norm": 0.3434719443321228, "learning_rate": 0.00017663872717675708, "loss": 11.6641, "step": 31847 }, { "epoch": 0.6666666666666666, "grad_norm": 0.32515889406204224, "learning_rate": 0.0001766373187216538, "loss": 11.6615, "step": 31848 }, { "epoch": 0.6666875994306288, "grad_norm": 0.31704825162887573, "learning_rate": 0.00017663591022970944, "loss": 11.6857, "step": 31849 }, { "epoch": 0.6667085321945909, "grad_norm": 0.322440505027771, "learning_rate": 0.00017663450170092465, "loss": 11.663, "step": 31850 }, { "epoch": 0.6667294649585531, "grad_norm": 0.2631317973136902, "learning_rate": 0.00017663309313530013, "loss": 11.6648, "step": 31851 }, { "epoch": 0.6667503977225153, "grad_norm": 0.2913275957107544, "learning_rate": 0.00017663168453283654, "loss": 11.6617, "step": 31852 }, { "epoch": 0.6667713304864774, "grad_norm": 0.26863181591033936, "learning_rate": 0.00017663027589353458, "loss": 11.679, "step": 31853 }, { "epoch": 0.6667922632504396, "grad_norm": 0.24008801579475403, "learning_rate": 0.0001766288672173949, "loss": 11.6713, "step": 31854 }, { "epoch": 0.6668131960144017, "grad_norm": 0.24479028582572937, "learning_rate": 0.00017662745850441816, "loss": 11.6593, "step": 31855 }, { "epoch": 0.6668341287783639, "grad_norm": 0.33141493797302246, "learning_rate": 0.0001766260497546051, "loss": 11.6944, "step": 31856 }, { "epoch": 0.6668550615423261, "grad_norm": 0.24776078760623932, "learning_rate": 0.00017662464096795635, "loss": 11.6713, "step": 31857 }, { "epoch": 0.6668759943062882, "grad_norm": 0.2811781167984009, "learning_rate": 0.0001766232321444726, "loss": 11.6773, "step": 31858 }, { "epoch": 0.6668969270702504, "grad_norm": 0.3144887685775757, "learning_rate": 0.00017662182328415454, "loss": 11.6894, "step": 31859 }, { "epoch": 0.6669178598342125, "grad_norm": 0.27553123235702515, "learning_rate": 0.00017662041438700282, "loss": 11.6615, "step": 31860 }, { "epoch": 0.6669387925981747, "grad_norm": 0.29719680547714233, "learning_rate": 0.00017661900545301812, "loss": 11.671, "step": 31861 }, { "epoch": 0.6669597253621368, "grad_norm": 0.3781020939350128, "learning_rate": 0.00017661759648220115, "loss": 11.6653, "step": 31862 }, { "epoch": 0.666980658126099, "grad_norm": 0.26312851905822754, "learning_rate": 0.00017661618747455258, "loss": 11.6728, "step": 31863 }, { "epoch": 0.6670015908900612, "grad_norm": 0.2659880816936493, "learning_rate": 0.00017661477843007305, "loss": 11.6711, "step": 31864 }, { "epoch": 0.6670225236540233, "grad_norm": 0.2535274922847748, "learning_rate": 0.00017661336934876324, "loss": 11.6802, "step": 31865 }, { "epoch": 0.6670434564179855, "grad_norm": 0.35607436299324036, "learning_rate": 0.00017661196023062388, "loss": 11.6641, "step": 31866 }, { "epoch": 0.6670643891819475, "grad_norm": 0.3379272520542145, "learning_rate": 0.00017661055107565557, "loss": 11.6828, "step": 31867 }, { "epoch": 0.6670853219459097, "grad_norm": 0.2742457389831543, "learning_rate": 0.0001766091418838591, "loss": 11.6921, "step": 31868 }, { "epoch": 0.6671062547098718, "grad_norm": 0.28570276498794556, "learning_rate": 0.00017660773265523505, "loss": 11.6746, "step": 31869 }, { "epoch": 0.667127187473834, "grad_norm": 0.27025720477104187, "learning_rate": 0.0001766063233897841, "loss": 11.6834, "step": 31870 }, { "epoch": 0.6671481202377962, "grad_norm": 0.27641206979751587, "learning_rate": 0.000176604914087507, "loss": 11.6624, "step": 31871 }, { "epoch": 0.6671690530017583, "grad_norm": 0.3244410455226898, "learning_rate": 0.00017660350474840434, "loss": 11.6753, "step": 31872 }, { "epoch": 0.6671899857657205, "grad_norm": 0.32215020060539246, "learning_rate": 0.00017660209537247688, "loss": 11.6732, "step": 31873 }, { "epoch": 0.6672109185296826, "grad_norm": 0.28101521730422974, "learning_rate": 0.00017660068595972526, "loss": 11.6506, "step": 31874 }, { "epoch": 0.6672318512936448, "grad_norm": 0.212507426738739, "learning_rate": 0.0001765992765101501, "loss": 11.662, "step": 31875 }, { "epoch": 0.667252784057607, "grad_norm": 0.27723461389541626, "learning_rate": 0.0001765978670237522, "loss": 11.6655, "step": 31876 }, { "epoch": 0.6672737168215691, "grad_norm": 0.27293914556503296, "learning_rate": 0.00017659645750053214, "loss": 11.6729, "step": 31877 }, { "epoch": 0.6672946495855313, "grad_norm": 0.30412569642066956, "learning_rate": 0.00017659504794049062, "loss": 11.6612, "step": 31878 }, { "epoch": 0.6673155823494934, "grad_norm": 0.40683645009994507, "learning_rate": 0.00017659363834362832, "loss": 11.6792, "step": 31879 }, { "epoch": 0.6673365151134556, "grad_norm": 0.2664724588394165, "learning_rate": 0.00017659222870994597, "loss": 11.6595, "step": 31880 }, { "epoch": 0.6673574478774177, "grad_norm": 0.29365959763526917, "learning_rate": 0.00017659081903944414, "loss": 11.6743, "step": 31881 }, { "epoch": 0.6673783806413799, "grad_norm": 0.32477420568466187, "learning_rate": 0.0001765894093321236, "loss": 11.6811, "step": 31882 }, { "epoch": 0.6673993134053421, "grad_norm": 0.2807910442352295, "learning_rate": 0.000176587999587985, "loss": 11.6695, "step": 31883 }, { "epoch": 0.6674202461693042, "grad_norm": 0.2633923590183258, "learning_rate": 0.000176586589807029, "loss": 11.6753, "step": 31884 }, { "epoch": 0.6674411789332664, "grad_norm": 0.24605385959148407, "learning_rate": 0.0001765851799892563, "loss": 11.6545, "step": 31885 }, { "epoch": 0.6674621116972285, "grad_norm": 0.2334388792514801, "learning_rate": 0.00017658377013466756, "loss": 11.672, "step": 31886 }, { "epoch": 0.6674830444611907, "grad_norm": 0.2872169613838196, "learning_rate": 0.00017658236024326347, "loss": 11.671, "step": 31887 }, { "epoch": 0.6675039772251528, "grad_norm": 0.30835479497909546, "learning_rate": 0.00017658095031504472, "loss": 11.6798, "step": 31888 }, { "epoch": 0.667524909989115, "grad_norm": 0.30176636576652527, "learning_rate": 0.00017657954035001195, "loss": 11.6714, "step": 31889 }, { "epoch": 0.6675458427530772, "grad_norm": 0.292695552110672, "learning_rate": 0.00017657813034816587, "loss": 11.6693, "step": 31890 }, { "epoch": 0.6675667755170392, "grad_norm": 0.363665908575058, "learning_rate": 0.00017657672030950715, "loss": 11.7013, "step": 31891 }, { "epoch": 0.6675877082810014, "grad_norm": 0.25771865248680115, "learning_rate": 0.00017657531023403645, "loss": 11.6835, "step": 31892 }, { "epoch": 0.6676086410449635, "grad_norm": 0.29270532727241516, "learning_rate": 0.00017657390012175447, "loss": 11.6697, "step": 31893 }, { "epoch": 0.6676295738089257, "grad_norm": 0.2710303068161011, "learning_rate": 0.00017657248997266192, "loss": 11.6572, "step": 31894 }, { "epoch": 0.6676505065728879, "grad_norm": 0.2734121084213257, "learning_rate": 0.0001765710797867594, "loss": 11.6549, "step": 31895 }, { "epoch": 0.66767143933685, "grad_norm": 0.21442186832427979, "learning_rate": 0.00017656966956404764, "loss": 11.6729, "step": 31896 }, { "epoch": 0.6676923721008122, "grad_norm": 0.2735198140144348, "learning_rate": 0.0001765682593045273, "loss": 11.66, "step": 31897 }, { "epoch": 0.6677133048647743, "grad_norm": 0.2936704456806183, "learning_rate": 0.00017656684900819907, "loss": 11.6901, "step": 31898 }, { "epoch": 0.6677342376287365, "grad_norm": 0.3020375370979309, "learning_rate": 0.00017656543867506361, "loss": 11.6689, "step": 31899 }, { "epoch": 0.6677551703926986, "grad_norm": 0.3379296064376831, "learning_rate": 0.00017656402830512163, "loss": 11.6856, "step": 31900 }, { "epoch": 0.6677761031566608, "grad_norm": 0.2717267572879791, "learning_rate": 0.00017656261789837375, "loss": 11.6601, "step": 31901 }, { "epoch": 0.667797035920623, "grad_norm": 0.26248934864997864, "learning_rate": 0.00017656120745482073, "loss": 11.6565, "step": 31902 }, { "epoch": 0.6678179686845851, "grad_norm": 0.3823312222957611, "learning_rate": 0.00017655979697446316, "loss": 11.6663, "step": 31903 }, { "epoch": 0.6678389014485473, "grad_norm": 0.6219094395637512, "learning_rate": 0.00017655838645730181, "loss": 11.6823, "step": 31904 }, { "epoch": 0.6678598342125094, "grad_norm": 0.283342570066452, "learning_rate": 0.00017655697590333725, "loss": 11.6572, "step": 31905 }, { "epoch": 0.6678807669764716, "grad_norm": 0.2575669288635254, "learning_rate": 0.00017655556531257025, "loss": 11.6811, "step": 31906 }, { "epoch": 0.6679016997404337, "grad_norm": 0.3656969368457794, "learning_rate": 0.00017655415468500145, "loss": 11.681, "step": 31907 }, { "epoch": 0.6679226325043959, "grad_norm": 0.2955866754055023, "learning_rate": 0.00017655274402063155, "loss": 11.6332, "step": 31908 }, { "epoch": 0.6679435652683581, "grad_norm": 0.27309879660606384, "learning_rate": 0.00017655133331946122, "loss": 11.6737, "step": 31909 }, { "epoch": 0.6679644980323202, "grad_norm": 0.27037346363067627, "learning_rate": 0.0001765499225814911, "loss": 11.6683, "step": 31910 }, { "epoch": 0.6679854307962824, "grad_norm": 0.3028751313686371, "learning_rate": 0.0001765485118067219, "loss": 11.6671, "step": 31911 }, { "epoch": 0.6680063635602445, "grad_norm": 0.35751938819885254, "learning_rate": 0.0001765471009951543, "loss": 11.6681, "step": 31912 }, { "epoch": 0.6680272963242067, "grad_norm": 0.26187318563461304, "learning_rate": 0.00017654569014678899, "loss": 11.6723, "step": 31913 }, { "epoch": 0.6680482290881689, "grad_norm": 0.3562338650226593, "learning_rate": 0.00017654427926162662, "loss": 11.6548, "step": 31914 }, { "epoch": 0.6680691618521309, "grad_norm": 0.2353595346212387, "learning_rate": 0.00017654286833966787, "loss": 11.6613, "step": 31915 }, { "epoch": 0.6680900946160931, "grad_norm": 0.32988929748535156, "learning_rate": 0.00017654145738091345, "loss": 11.6706, "step": 31916 }, { "epoch": 0.6681110273800552, "grad_norm": 0.27129846811294556, "learning_rate": 0.00017654004638536402, "loss": 11.659, "step": 31917 }, { "epoch": 0.6681319601440174, "grad_norm": 0.3747316300868988, "learning_rate": 0.00017653863535302023, "loss": 11.657, "step": 31918 }, { "epoch": 0.6681528929079795, "grad_norm": 0.2884221076965332, "learning_rate": 0.0001765372242838828, "loss": 11.6496, "step": 31919 }, { "epoch": 0.6681738256719417, "grad_norm": 0.5490388870239258, "learning_rate": 0.0001765358131779524, "loss": 11.6579, "step": 31920 }, { "epoch": 0.6681947584359039, "grad_norm": 0.6033632755279541, "learning_rate": 0.0001765344020352297, "loss": 11.6725, "step": 31921 }, { "epoch": 0.668215691199866, "grad_norm": 0.3130267560482025, "learning_rate": 0.00017653299085571534, "loss": 11.6728, "step": 31922 }, { "epoch": 0.6682366239638282, "grad_norm": 0.2881518006324768, "learning_rate": 0.0001765315796394101, "loss": 11.6777, "step": 31923 }, { "epoch": 0.6682575567277903, "grad_norm": 0.32600513100624084, "learning_rate": 0.00017653016838631457, "loss": 11.6649, "step": 31924 }, { "epoch": 0.6682784894917525, "grad_norm": 0.31260165572166443, "learning_rate": 0.00017652875709642945, "loss": 11.68, "step": 31925 }, { "epoch": 0.6682994222557146, "grad_norm": 0.2227308303117752, "learning_rate": 0.00017652734576975544, "loss": 11.6626, "step": 31926 }, { "epoch": 0.6683203550196768, "grad_norm": 0.27379944920539856, "learning_rate": 0.00017652593440629314, "loss": 11.6759, "step": 31927 }, { "epoch": 0.668341287783639, "grad_norm": 0.4469528794288635, "learning_rate": 0.00017652452300604337, "loss": 11.6595, "step": 31928 }, { "epoch": 0.6683622205476011, "grad_norm": 0.2771158218383789, "learning_rate": 0.00017652311156900668, "loss": 11.6838, "step": 31929 }, { "epoch": 0.6683831533115633, "grad_norm": 0.2785550653934479, "learning_rate": 0.0001765217000951838, "loss": 11.6786, "step": 31930 }, { "epoch": 0.6684040860755254, "grad_norm": 0.27793824672698975, "learning_rate": 0.00017652028858457542, "loss": 11.6746, "step": 31931 }, { "epoch": 0.6684250188394876, "grad_norm": 0.35065555572509766, "learning_rate": 0.00017651887703718218, "loss": 11.6521, "step": 31932 }, { "epoch": 0.6684459516034498, "grad_norm": 0.2525327205657959, "learning_rate": 0.0001765174654530048, "loss": 11.669, "step": 31933 }, { "epoch": 0.6684668843674119, "grad_norm": 0.30272695422172546, "learning_rate": 0.00017651605383204398, "loss": 11.6694, "step": 31934 }, { "epoch": 0.6684878171313741, "grad_norm": 0.3170035481452942, "learning_rate": 0.0001765146421743003, "loss": 11.676, "step": 31935 }, { "epoch": 0.6685087498953362, "grad_norm": 0.28070947527885437, "learning_rate": 0.00017651323047977453, "loss": 11.6855, "step": 31936 }, { "epoch": 0.6685296826592984, "grad_norm": 0.3294329345226288, "learning_rate": 0.0001765118187484673, "loss": 11.6767, "step": 31937 }, { "epoch": 0.6685506154232604, "grad_norm": 0.3230587840080261, "learning_rate": 0.0001765104069803793, "loss": 11.6864, "step": 31938 }, { "epoch": 0.6685715481872226, "grad_norm": 0.3279239237308502, "learning_rate": 0.00017650899517551122, "loss": 11.6733, "step": 31939 }, { "epoch": 0.6685924809511848, "grad_norm": 0.3394983112812042, "learning_rate": 0.00017650758333386375, "loss": 11.6892, "step": 31940 }, { "epoch": 0.6686134137151469, "grad_norm": 0.2979986071586609, "learning_rate": 0.00017650617145543754, "loss": 11.6696, "step": 31941 }, { "epoch": 0.6686343464791091, "grad_norm": 0.2782261073589325, "learning_rate": 0.0001765047595402333, "loss": 11.6715, "step": 31942 }, { "epoch": 0.6686552792430712, "grad_norm": 0.3479993939399719, "learning_rate": 0.00017650334758825164, "loss": 11.6753, "step": 31943 }, { "epoch": 0.6686762120070334, "grad_norm": 0.33972764015197754, "learning_rate": 0.00017650193559949333, "loss": 11.7011, "step": 31944 }, { "epoch": 0.6686971447709955, "grad_norm": 0.3869363069534302, "learning_rate": 0.00017650052357395896, "loss": 11.6865, "step": 31945 }, { "epoch": 0.6687180775349577, "grad_norm": 0.4499382972717285, "learning_rate": 0.0001764991115116493, "loss": 11.6976, "step": 31946 }, { "epoch": 0.6687390102989199, "grad_norm": 0.26497480273246765, "learning_rate": 0.000176497699412565, "loss": 11.6645, "step": 31947 }, { "epoch": 0.668759943062882, "grad_norm": 0.23340333998203278, "learning_rate": 0.00017649628727670668, "loss": 11.6631, "step": 31948 }, { "epoch": 0.6687808758268442, "grad_norm": 0.3704274594783783, "learning_rate": 0.00017649487510407505, "loss": 11.6521, "step": 31949 }, { "epoch": 0.6688018085908063, "grad_norm": 0.3416070342063904, "learning_rate": 0.00017649346289467084, "loss": 11.6929, "step": 31950 }, { "epoch": 0.6688227413547685, "grad_norm": 0.3496636152267456, "learning_rate": 0.00017649205064849467, "loss": 11.6786, "step": 31951 }, { "epoch": 0.6688436741187307, "grad_norm": 0.2532685101032257, "learning_rate": 0.00017649063836554726, "loss": 11.6878, "step": 31952 }, { "epoch": 0.6688646068826928, "grad_norm": 0.310699999332428, "learning_rate": 0.00017648922604582924, "loss": 11.6853, "step": 31953 }, { "epoch": 0.668885539646655, "grad_norm": 0.38660290837287903, "learning_rate": 0.00017648781368934132, "loss": 11.6685, "step": 31954 }, { "epoch": 0.6689064724106171, "grad_norm": 0.28699028491973877, "learning_rate": 0.0001764864012960842, "loss": 11.6699, "step": 31955 }, { "epoch": 0.6689274051745793, "grad_norm": 0.3493005037307739, "learning_rate": 0.0001764849888660585, "loss": 11.657, "step": 31956 }, { "epoch": 0.6689483379385414, "grad_norm": 0.30736061930656433, "learning_rate": 0.00017648357639926497, "loss": 11.6683, "step": 31957 }, { "epoch": 0.6689692707025036, "grad_norm": 0.2946498990058899, "learning_rate": 0.00017648216389570424, "loss": 11.6808, "step": 31958 }, { "epoch": 0.6689902034664658, "grad_norm": 0.3174385130405426, "learning_rate": 0.000176480751355377, "loss": 11.6667, "step": 31959 }, { "epoch": 0.6690111362304278, "grad_norm": 0.32700979709625244, "learning_rate": 0.0001764793387782839, "loss": 11.6792, "step": 31960 }, { "epoch": 0.66903206899439, "grad_norm": 0.28466981649398804, "learning_rate": 0.0001764779261644257, "loss": 11.6548, "step": 31961 }, { "epoch": 0.6690530017583521, "grad_norm": 0.2842864990234375, "learning_rate": 0.000176476513513803, "loss": 11.6756, "step": 31962 }, { "epoch": 0.6690739345223143, "grad_norm": 0.3851430416107178, "learning_rate": 0.0001764751008264165, "loss": 11.6808, "step": 31963 }, { "epoch": 0.6690948672862764, "grad_norm": 0.24511224031448364, "learning_rate": 0.0001764736881022669, "loss": 11.6576, "step": 31964 }, { "epoch": 0.6691158000502386, "grad_norm": 0.32336878776550293, "learning_rate": 0.00017647227534135483, "loss": 11.675, "step": 31965 }, { "epoch": 0.6691367328142008, "grad_norm": 0.29213666915893555, "learning_rate": 0.00017647086254368106, "loss": 11.6727, "step": 31966 }, { "epoch": 0.6691576655781629, "grad_norm": 0.27560243010520935, "learning_rate": 0.0001764694497092462, "loss": 11.6683, "step": 31967 }, { "epoch": 0.6691785983421251, "grad_norm": 0.3743983507156372, "learning_rate": 0.0001764680368380509, "loss": 11.6689, "step": 31968 }, { "epoch": 0.6691995311060872, "grad_norm": 0.3636983633041382, "learning_rate": 0.00017646662393009592, "loss": 11.6725, "step": 31969 }, { "epoch": 0.6692204638700494, "grad_norm": 0.2610595226287842, "learning_rate": 0.0001764652109853819, "loss": 11.6697, "step": 31970 }, { "epoch": 0.6692413966340116, "grad_norm": 0.2747051417827606, "learning_rate": 0.00017646379800390953, "loss": 11.6812, "step": 31971 }, { "epoch": 0.6692623293979737, "grad_norm": 0.26472237706184387, "learning_rate": 0.00017646238498567946, "loss": 11.673, "step": 31972 }, { "epoch": 0.6692832621619359, "grad_norm": 0.3327707052230835, "learning_rate": 0.0001764609719306924, "loss": 11.6842, "step": 31973 }, { "epoch": 0.669304194925898, "grad_norm": 0.31273993849754333, "learning_rate": 0.000176459558838949, "loss": 11.6856, "step": 31974 }, { "epoch": 0.6693251276898602, "grad_norm": 0.28963983058929443, "learning_rate": 0.00017645814571045, "loss": 11.694, "step": 31975 }, { "epoch": 0.6693460604538223, "grad_norm": 0.3039834499359131, "learning_rate": 0.00017645673254519596, "loss": 11.6588, "step": 31976 }, { "epoch": 0.6693669932177845, "grad_norm": 0.3596802055835724, "learning_rate": 0.00017645531934318772, "loss": 11.673, "step": 31977 }, { "epoch": 0.6693879259817467, "grad_norm": 0.3582831025123596, "learning_rate": 0.00017645390610442584, "loss": 11.6599, "step": 31978 }, { "epoch": 0.6694088587457088, "grad_norm": 0.2531583905220032, "learning_rate": 0.00017645249282891102, "loss": 11.6758, "step": 31979 }, { "epoch": 0.669429791509671, "grad_norm": 0.2544734477996826, "learning_rate": 0.000176451079516644, "loss": 11.6714, "step": 31980 }, { "epoch": 0.6694507242736331, "grad_norm": 0.26746031641960144, "learning_rate": 0.00017644966616762537, "loss": 11.6802, "step": 31981 }, { "epoch": 0.6694716570375953, "grad_norm": 0.3706131875514984, "learning_rate": 0.00017644825278185586, "loss": 11.6638, "step": 31982 }, { "epoch": 0.6694925898015573, "grad_norm": 0.34956493973731995, "learning_rate": 0.00017644683935933616, "loss": 11.6613, "step": 31983 }, { "epoch": 0.6695135225655195, "grad_norm": 0.2738783061504364, "learning_rate": 0.00017644542590006693, "loss": 11.6671, "step": 31984 }, { "epoch": 0.6695344553294817, "grad_norm": 0.2850906550884247, "learning_rate": 0.0001764440124040488, "loss": 11.6745, "step": 31985 }, { "epoch": 0.6695553880934438, "grad_norm": 0.3362727463245392, "learning_rate": 0.00017644259887128258, "loss": 11.6737, "step": 31986 }, { "epoch": 0.669576320857406, "grad_norm": 0.3101594150066376, "learning_rate": 0.00017644118530176883, "loss": 11.665, "step": 31987 }, { "epoch": 0.6695972536213681, "grad_norm": 0.3172861933708191, "learning_rate": 0.00017643977169550827, "loss": 11.6702, "step": 31988 }, { "epoch": 0.6696181863853303, "grad_norm": 0.26675617694854736, "learning_rate": 0.0001764383580525016, "loss": 11.6666, "step": 31989 }, { "epoch": 0.6696391191492925, "grad_norm": 0.2737399935722351, "learning_rate": 0.00017643694437274946, "loss": 11.6649, "step": 31990 }, { "epoch": 0.6696600519132546, "grad_norm": 0.3343547582626343, "learning_rate": 0.00017643553065625257, "loss": 11.6816, "step": 31991 }, { "epoch": 0.6696809846772168, "grad_norm": 0.264858216047287, "learning_rate": 0.00017643411690301158, "loss": 11.6658, "step": 31992 }, { "epoch": 0.6697019174411789, "grad_norm": 0.3027823269367218, "learning_rate": 0.00017643270311302716, "loss": 11.6695, "step": 31993 }, { "epoch": 0.6697228502051411, "grad_norm": 0.26368674635887146, "learning_rate": 0.00017643128928630003, "loss": 11.654, "step": 31994 }, { "epoch": 0.6697437829691032, "grad_norm": 0.2778545022010803, "learning_rate": 0.00017642987542283085, "loss": 11.6809, "step": 31995 }, { "epoch": 0.6697647157330654, "grad_norm": 0.2801229953765869, "learning_rate": 0.00017642846152262026, "loss": 11.6783, "step": 31996 }, { "epoch": 0.6697856484970276, "grad_norm": 0.30792033672332764, "learning_rate": 0.00017642704758566902, "loss": 11.6889, "step": 31997 }, { "epoch": 0.6698065812609897, "grad_norm": 0.3728531002998352, "learning_rate": 0.00017642563361197774, "loss": 11.6751, "step": 31998 }, { "epoch": 0.6698275140249519, "grad_norm": 0.41830024123191833, "learning_rate": 0.00017642421960154713, "loss": 11.6536, "step": 31999 }, { "epoch": 0.669848446788914, "grad_norm": 0.31488317251205444, "learning_rate": 0.00017642280555437787, "loss": 11.6839, "step": 32000 }, { "epoch": 0.669848446788914, "eval_loss": 11.671031951904297, "eval_runtime": 34.385, "eval_samples_per_second": 27.948, "eval_steps_per_second": 7.009, "step": 32000 }, { "epoch": 0.6698693795528762, "grad_norm": 0.34180453419685364, "learning_rate": 0.00017642139147047066, "loss": 11.6887, "step": 32001 }, { "epoch": 0.6698903123168383, "grad_norm": 0.302584707736969, "learning_rate": 0.0001764199773498261, "loss": 11.674, "step": 32002 }, { "epoch": 0.6699112450808005, "grad_norm": 0.32997745275497437, "learning_rate": 0.00017641856319244495, "loss": 11.6727, "step": 32003 }, { "epoch": 0.6699321778447627, "grad_norm": 0.44845497608184814, "learning_rate": 0.00017641714899832792, "loss": 11.6875, "step": 32004 }, { "epoch": 0.6699531106087248, "grad_norm": 0.23800069093704224, "learning_rate": 0.00017641573476747557, "loss": 11.6684, "step": 32005 }, { "epoch": 0.669974043372687, "grad_norm": 0.2791934907436371, "learning_rate": 0.00017641432049988865, "loss": 11.6853, "step": 32006 }, { "epoch": 0.669994976136649, "grad_norm": 0.3354228734970093, "learning_rate": 0.00017641290619556786, "loss": 11.6894, "step": 32007 }, { "epoch": 0.6700159089006112, "grad_norm": 0.29217755794525146, "learning_rate": 0.00017641149185451384, "loss": 11.6828, "step": 32008 }, { "epoch": 0.6700368416645733, "grad_norm": 0.4612618386745453, "learning_rate": 0.00017641007747672732, "loss": 11.6797, "step": 32009 }, { "epoch": 0.6700577744285355, "grad_norm": 0.34056198596954346, "learning_rate": 0.0001764086630622089, "loss": 11.6769, "step": 32010 }, { "epoch": 0.6700787071924977, "grad_norm": 0.2963313162326813, "learning_rate": 0.00017640724861095933, "loss": 11.6873, "step": 32011 }, { "epoch": 0.6700996399564598, "grad_norm": 0.25930026173591614, "learning_rate": 0.00017640583412297924, "loss": 11.6656, "step": 32012 }, { "epoch": 0.670120572720422, "grad_norm": 0.28777220845222473, "learning_rate": 0.00017640441959826934, "loss": 11.6688, "step": 32013 }, { "epoch": 0.6701415054843841, "grad_norm": 0.35500234365463257, "learning_rate": 0.0001764030050368303, "loss": 11.6739, "step": 32014 }, { "epoch": 0.6701624382483463, "grad_norm": 0.2615029513835907, "learning_rate": 0.0001764015904386628, "loss": 11.6627, "step": 32015 }, { "epoch": 0.6701833710123085, "grad_norm": 0.25873449444770813, "learning_rate": 0.00017640017580376754, "loss": 11.6849, "step": 32016 }, { "epoch": 0.6702043037762706, "grad_norm": 0.35434404015541077, "learning_rate": 0.00017639876113214519, "loss": 11.6625, "step": 32017 }, { "epoch": 0.6702252365402328, "grad_norm": 0.28915759921073914, "learning_rate": 0.00017639734642379643, "loss": 11.6649, "step": 32018 }, { "epoch": 0.6702461693041949, "grad_norm": 0.2929728925228119, "learning_rate": 0.0001763959316787219, "loss": 11.667, "step": 32019 }, { "epoch": 0.6702671020681571, "grad_norm": 0.307979017496109, "learning_rate": 0.00017639451689692234, "loss": 11.6614, "step": 32020 }, { "epoch": 0.6702880348321192, "grad_norm": 0.36679747700691223, "learning_rate": 0.00017639310207839842, "loss": 11.6682, "step": 32021 }, { "epoch": 0.6703089675960814, "grad_norm": 0.2618553042411804, "learning_rate": 0.00017639168722315078, "loss": 11.675, "step": 32022 }, { "epoch": 0.6703299003600436, "grad_norm": 0.28997328877449036, "learning_rate": 0.00017639027233118013, "loss": 11.6878, "step": 32023 }, { "epoch": 0.6703508331240057, "grad_norm": 0.3477570116519928, "learning_rate": 0.00017638885740248715, "loss": 11.667, "step": 32024 }, { "epoch": 0.6703717658879679, "grad_norm": 0.2994159758090973, "learning_rate": 0.00017638744243707254, "loss": 11.6614, "step": 32025 }, { "epoch": 0.67039269865193, "grad_norm": 0.27732694149017334, "learning_rate": 0.0001763860274349369, "loss": 11.6483, "step": 32026 }, { "epoch": 0.6704136314158922, "grad_norm": 0.2528041899204254, "learning_rate": 0.00017638461239608097, "loss": 11.6743, "step": 32027 }, { "epoch": 0.6704345641798543, "grad_norm": 0.27089324593544006, "learning_rate": 0.00017638319732050546, "loss": 11.6631, "step": 32028 }, { "epoch": 0.6704554969438165, "grad_norm": 0.3812768757343292, "learning_rate": 0.000176381782208211, "loss": 11.6863, "step": 32029 }, { "epoch": 0.6704764297077787, "grad_norm": 0.29533788561820984, "learning_rate": 0.00017638036705919832, "loss": 11.6743, "step": 32030 }, { "epoch": 0.6704973624717407, "grad_norm": 0.336678147315979, "learning_rate": 0.00017637895187346802, "loss": 11.6572, "step": 32031 }, { "epoch": 0.6705182952357029, "grad_norm": 0.28590744733810425, "learning_rate": 0.00017637753665102084, "loss": 11.6808, "step": 32032 }, { "epoch": 0.670539227999665, "grad_norm": 0.37545618414878845, "learning_rate": 0.00017637612139185746, "loss": 11.6798, "step": 32033 }, { "epoch": 0.6705601607636272, "grad_norm": 0.2938733994960785, "learning_rate": 0.00017637470609597854, "loss": 11.6929, "step": 32034 }, { "epoch": 0.6705810935275894, "grad_norm": 0.3685237169265747, "learning_rate": 0.0001763732907633848, "loss": 11.666, "step": 32035 }, { "epoch": 0.6706020262915515, "grad_norm": 0.3164801597595215, "learning_rate": 0.00017637187539407684, "loss": 11.6642, "step": 32036 }, { "epoch": 0.6706229590555137, "grad_norm": 0.3007679879665375, "learning_rate": 0.0001763704599880554, "loss": 11.6714, "step": 32037 }, { "epoch": 0.6706438918194758, "grad_norm": 0.3191777765750885, "learning_rate": 0.00017636904454532116, "loss": 11.6654, "step": 32038 }, { "epoch": 0.670664824583438, "grad_norm": 0.27927863597869873, "learning_rate": 0.00017636762906587478, "loss": 11.6638, "step": 32039 }, { "epoch": 0.6706857573474001, "grad_norm": 0.3117794096469879, "learning_rate": 0.00017636621354971696, "loss": 11.6676, "step": 32040 }, { "epoch": 0.6707066901113623, "grad_norm": 0.2805062234401703, "learning_rate": 0.00017636479799684835, "loss": 11.6543, "step": 32041 }, { "epoch": 0.6707276228753245, "grad_norm": 0.25946930050849915, "learning_rate": 0.0001763633824072697, "loss": 11.6635, "step": 32042 }, { "epoch": 0.6707485556392866, "grad_norm": 0.3736647963523865, "learning_rate": 0.0001763619667809816, "loss": 11.6712, "step": 32043 }, { "epoch": 0.6707694884032488, "grad_norm": 0.2752824127674103, "learning_rate": 0.00017636055111798477, "loss": 11.6715, "step": 32044 }, { "epoch": 0.6707904211672109, "grad_norm": 0.32466283440589905, "learning_rate": 0.0001763591354182799, "loss": 11.6645, "step": 32045 }, { "epoch": 0.6708113539311731, "grad_norm": 0.31144362688064575, "learning_rate": 0.00017635771968186768, "loss": 11.6895, "step": 32046 }, { "epoch": 0.6708322866951352, "grad_norm": 0.33044952154159546, "learning_rate": 0.00017635630390874874, "loss": 11.6729, "step": 32047 }, { "epoch": 0.6708532194590974, "grad_norm": 0.3637262284755707, "learning_rate": 0.00017635488809892384, "loss": 11.6698, "step": 32048 }, { "epoch": 0.6708741522230596, "grad_norm": 0.2674057185649872, "learning_rate": 0.00017635347225239357, "loss": 11.6726, "step": 32049 }, { "epoch": 0.6708950849870217, "grad_norm": 0.2756684124469757, "learning_rate": 0.0001763520563691587, "loss": 11.6747, "step": 32050 }, { "epoch": 0.6709160177509839, "grad_norm": 0.3987683653831482, "learning_rate": 0.00017635064044921984, "loss": 11.6677, "step": 32051 }, { "epoch": 0.670936950514946, "grad_norm": 0.31203410029411316, "learning_rate": 0.00017634922449257773, "loss": 11.6779, "step": 32052 }, { "epoch": 0.6709578832789082, "grad_norm": 0.38194599747657776, "learning_rate": 0.00017634780849923296, "loss": 11.6999, "step": 32053 }, { "epoch": 0.6709788160428704, "grad_norm": 0.34227922558784485, "learning_rate": 0.0001763463924691863, "loss": 11.6664, "step": 32054 }, { "epoch": 0.6709997488068324, "grad_norm": 0.33895766735076904, "learning_rate": 0.00017634497640243838, "loss": 11.6724, "step": 32055 }, { "epoch": 0.6710206815707946, "grad_norm": 0.3597862422466278, "learning_rate": 0.00017634356029898993, "loss": 11.6558, "step": 32056 }, { "epoch": 0.6710416143347567, "grad_norm": 0.279811829328537, "learning_rate": 0.0001763421441588416, "loss": 11.662, "step": 32057 }, { "epoch": 0.6710625470987189, "grad_norm": 0.28359052538871765, "learning_rate": 0.00017634072798199406, "loss": 11.6709, "step": 32058 }, { "epoch": 0.671083479862681, "grad_norm": 0.43960580229759216, "learning_rate": 0.00017633931176844797, "loss": 11.6914, "step": 32059 }, { "epoch": 0.6711044126266432, "grad_norm": 0.307370126247406, "learning_rate": 0.0001763378955182041, "loss": 11.6799, "step": 32060 }, { "epoch": 0.6711253453906054, "grad_norm": 0.33124637603759766, "learning_rate": 0.00017633647923126304, "loss": 11.6626, "step": 32061 }, { "epoch": 0.6711462781545675, "grad_norm": 0.30579662322998047, "learning_rate": 0.00017633506290762552, "loss": 11.6631, "step": 32062 }, { "epoch": 0.6711672109185297, "grad_norm": 0.2522202432155609, "learning_rate": 0.0001763336465472922, "loss": 11.6622, "step": 32063 }, { "epoch": 0.6711881436824918, "grad_norm": 0.3492831289768219, "learning_rate": 0.00017633223015026377, "loss": 11.6866, "step": 32064 }, { "epoch": 0.671209076446454, "grad_norm": 0.26621049642562866, "learning_rate": 0.00017633081371654089, "loss": 11.6637, "step": 32065 }, { "epoch": 0.6712300092104161, "grad_norm": 0.3437405228614807, "learning_rate": 0.00017632939724612429, "loss": 11.666, "step": 32066 }, { "epoch": 0.6712509419743783, "grad_norm": 0.2941974997520447, "learning_rate": 0.00017632798073901457, "loss": 11.6682, "step": 32067 }, { "epoch": 0.6712718747383405, "grad_norm": 0.25571849942207336, "learning_rate": 0.0001763265641952125, "loss": 11.6918, "step": 32068 }, { "epoch": 0.6712928075023026, "grad_norm": 0.38595637679100037, "learning_rate": 0.0001763251476147187, "loss": 11.6787, "step": 32069 }, { "epoch": 0.6713137402662648, "grad_norm": 0.3123033940792084, "learning_rate": 0.0001763237309975339, "loss": 11.6698, "step": 32070 }, { "epoch": 0.6713346730302269, "grad_norm": 0.29565855860710144, "learning_rate": 0.0001763223143436587, "loss": 11.6581, "step": 32071 }, { "epoch": 0.6713556057941891, "grad_norm": 0.25434982776641846, "learning_rate": 0.00017632089765309388, "loss": 11.6896, "step": 32072 }, { "epoch": 0.6713765385581513, "grad_norm": 0.2695407569408417, "learning_rate": 0.00017631948092584006, "loss": 11.6801, "step": 32073 }, { "epoch": 0.6713974713221134, "grad_norm": 0.2540564239025116, "learning_rate": 0.00017631806416189795, "loss": 11.6584, "step": 32074 }, { "epoch": 0.6714184040860756, "grad_norm": 0.39414459466934204, "learning_rate": 0.0001763166473612682, "loss": 11.6756, "step": 32075 }, { "epoch": 0.6714393368500376, "grad_norm": 0.2779008746147156, "learning_rate": 0.00017631523052395154, "loss": 11.6804, "step": 32076 }, { "epoch": 0.6714602696139998, "grad_norm": 0.2355060875415802, "learning_rate": 0.00017631381364994857, "loss": 11.6616, "step": 32077 }, { "epoch": 0.6714812023779619, "grad_norm": 0.26955386996269226, "learning_rate": 0.00017631239673926004, "loss": 11.6656, "step": 32078 }, { "epoch": 0.6715021351419241, "grad_norm": 0.25958094000816345, "learning_rate": 0.00017631097979188662, "loss": 11.6841, "step": 32079 }, { "epoch": 0.6715230679058863, "grad_norm": 0.2945495545864105, "learning_rate": 0.00017630956280782897, "loss": 11.6748, "step": 32080 }, { "epoch": 0.6715440006698484, "grad_norm": 0.2906332314014435, "learning_rate": 0.0001763081457870878, "loss": 11.6734, "step": 32081 }, { "epoch": 0.6715649334338106, "grad_norm": 0.33661407232284546, "learning_rate": 0.0001763067287296638, "loss": 11.6597, "step": 32082 }, { "epoch": 0.6715858661977727, "grad_norm": 0.2840481400489807, "learning_rate": 0.00017630531163555756, "loss": 11.664, "step": 32083 }, { "epoch": 0.6716067989617349, "grad_norm": 0.3391551971435547, "learning_rate": 0.00017630389450476987, "loss": 11.6817, "step": 32084 }, { "epoch": 0.671627731725697, "grad_norm": 0.30757591128349304, "learning_rate": 0.00017630247733730137, "loss": 11.6621, "step": 32085 }, { "epoch": 0.6716486644896592, "grad_norm": 0.279492050409317, "learning_rate": 0.00017630106013315273, "loss": 11.6718, "step": 32086 }, { "epoch": 0.6716695972536214, "grad_norm": 0.31384167075157166, "learning_rate": 0.00017629964289232462, "loss": 11.6836, "step": 32087 }, { "epoch": 0.6716905300175835, "grad_norm": 0.24852697551250458, "learning_rate": 0.0001762982256148178, "loss": 11.6621, "step": 32088 }, { "epoch": 0.6717114627815457, "grad_norm": 0.2683013081550598, "learning_rate": 0.00017629680830063285, "loss": 11.6705, "step": 32089 }, { "epoch": 0.6717323955455078, "grad_norm": 0.2937861680984497, "learning_rate": 0.0001762953909497705, "loss": 11.6766, "step": 32090 }, { "epoch": 0.67175332830947, "grad_norm": 0.2904650866985321, "learning_rate": 0.00017629397356223143, "loss": 11.6593, "step": 32091 }, { "epoch": 0.6717742610734322, "grad_norm": 0.4588756561279297, "learning_rate": 0.0001762925561380163, "loss": 11.6569, "step": 32092 }, { "epoch": 0.6717951938373943, "grad_norm": 0.2671079635620117, "learning_rate": 0.00017629113867712585, "loss": 11.6419, "step": 32093 }, { "epoch": 0.6718161266013565, "grad_norm": 0.31910309195518494, "learning_rate": 0.0001762897211795607, "loss": 11.6837, "step": 32094 }, { "epoch": 0.6718370593653186, "grad_norm": 0.2715710401535034, "learning_rate": 0.00017628830364532154, "loss": 11.6818, "step": 32095 }, { "epoch": 0.6718579921292808, "grad_norm": 0.42993053793907166, "learning_rate": 0.00017628688607440908, "loss": 11.6943, "step": 32096 }, { "epoch": 0.6718789248932429, "grad_norm": 0.3207973539829254, "learning_rate": 0.00017628546846682396, "loss": 11.6589, "step": 32097 }, { "epoch": 0.6718998576572051, "grad_norm": 0.4187926650047302, "learning_rate": 0.0001762840508225669, "loss": 11.6793, "step": 32098 }, { "epoch": 0.6719207904211673, "grad_norm": 0.311283141374588, "learning_rate": 0.0001762826331416386, "loss": 11.6692, "step": 32099 }, { "epoch": 0.6719417231851293, "grad_norm": 0.26466163992881775, "learning_rate": 0.0001762812154240397, "loss": 11.6753, "step": 32100 }, { "epoch": 0.6719626559490915, "grad_norm": 0.2579142153263092, "learning_rate": 0.00017627979766977086, "loss": 11.6733, "step": 32101 }, { "epoch": 0.6719835887130536, "grad_norm": 0.3921447992324829, "learning_rate": 0.0001762783798788328, "loss": 11.6851, "step": 32102 }, { "epoch": 0.6720045214770158, "grad_norm": 0.27579355239868164, "learning_rate": 0.0001762769620512262, "loss": 11.6739, "step": 32103 }, { "epoch": 0.6720254542409779, "grad_norm": 0.26509466767311096, "learning_rate": 0.00017627554418695173, "loss": 11.6724, "step": 32104 }, { "epoch": 0.6720463870049401, "grad_norm": 0.3460184633731842, "learning_rate": 0.0001762741262860101, "loss": 11.675, "step": 32105 }, { "epoch": 0.6720673197689023, "grad_norm": 0.2522730231285095, "learning_rate": 0.00017627270834840193, "loss": 11.6587, "step": 32106 }, { "epoch": 0.6720882525328644, "grad_norm": 0.2633766233921051, "learning_rate": 0.00017627129037412796, "loss": 11.6601, "step": 32107 }, { "epoch": 0.6721091852968266, "grad_norm": 0.2969820499420166, "learning_rate": 0.00017626987236318885, "loss": 11.6675, "step": 32108 }, { "epoch": 0.6721301180607887, "grad_norm": 0.307939738035202, "learning_rate": 0.0001762684543155853, "loss": 11.6769, "step": 32109 }, { "epoch": 0.6721510508247509, "grad_norm": 0.26894789934158325, "learning_rate": 0.00017626703623131797, "loss": 11.6649, "step": 32110 }, { "epoch": 0.6721719835887131, "grad_norm": 0.296112596988678, "learning_rate": 0.00017626561811038752, "loss": 11.6515, "step": 32111 }, { "epoch": 0.6721929163526752, "grad_norm": 0.4220797121524811, "learning_rate": 0.0001762641999527947, "loss": 11.679, "step": 32112 }, { "epoch": 0.6722138491166374, "grad_norm": 0.2678889036178589, "learning_rate": 0.00017626278175854015, "loss": 11.6678, "step": 32113 }, { "epoch": 0.6722347818805995, "grad_norm": 0.3857923746109009, "learning_rate": 0.00017626136352762452, "loss": 11.6612, "step": 32114 }, { "epoch": 0.6722557146445617, "grad_norm": 0.2939596176147461, "learning_rate": 0.00017625994526004855, "loss": 11.6868, "step": 32115 }, { "epoch": 0.6722766474085238, "grad_norm": 0.29563868045806885, "learning_rate": 0.00017625852695581288, "loss": 11.6531, "step": 32116 }, { "epoch": 0.672297580172486, "grad_norm": 0.24516722559928894, "learning_rate": 0.0001762571086149182, "loss": 11.6662, "step": 32117 }, { "epoch": 0.6723185129364482, "grad_norm": 0.2755962908267975, "learning_rate": 0.00017625569023736523, "loss": 11.6686, "step": 32118 }, { "epoch": 0.6723394457004103, "grad_norm": 0.2557268738746643, "learning_rate": 0.00017625427182315463, "loss": 11.6835, "step": 32119 }, { "epoch": 0.6723603784643725, "grad_norm": 0.2961770296096802, "learning_rate": 0.000176252853372287, "loss": 11.6698, "step": 32120 }, { "epoch": 0.6723813112283346, "grad_norm": 0.3539501428604126, "learning_rate": 0.00017625143488476317, "loss": 11.6818, "step": 32121 }, { "epoch": 0.6724022439922968, "grad_norm": 0.26872506737709045, "learning_rate": 0.0001762500163605837, "loss": 11.6779, "step": 32122 }, { "epoch": 0.6724231767562588, "grad_norm": 0.2967723608016968, "learning_rate": 0.00017624859779974937, "loss": 11.663, "step": 32123 }, { "epoch": 0.672444109520221, "grad_norm": 0.29689303040504456, "learning_rate": 0.00017624717920226076, "loss": 11.6804, "step": 32124 }, { "epoch": 0.6724650422841832, "grad_norm": 0.36928436160087585, "learning_rate": 0.0001762457605681186, "loss": 11.6922, "step": 32125 }, { "epoch": 0.6724859750481453, "grad_norm": 0.2488718181848526, "learning_rate": 0.00017624434189732362, "loss": 11.6747, "step": 32126 }, { "epoch": 0.6725069078121075, "grad_norm": 0.31382834911346436, "learning_rate": 0.00017624292318987642, "loss": 11.6723, "step": 32127 }, { "epoch": 0.6725278405760696, "grad_norm": 0.31082549691200256, "learning_rate": 0.00017624150444577773, "loss": 11.6792, "step": 32128 }, { "epoch": 0.6725487733400318, "grad_norm": 0.3383561670780182, "learning_rate": 0.00017624008566502822, "loss": 11.6646, "step": 32129 }, { "epoch": 0.672569706103994, "grad_norm": 0.26493898034095764, "learning_rate": 0.0001762386668476286, "loss": 11.6744, "step": 32130 }, { "epoch": 0.6725906388679561, "grad_norm": 0.29665717482566833, "learning_rate": 0.0001762372479935795, "loss": 11.6627, "step": 32131 }, { "epoch": 0.6726115716319183, "grad_norm": 0.21821554005146027, "learning_rate": 0.00017623582910288163, "loss": 11.6763, "step": 32132 }, { "epoch": 0.6726325043958804, "grad_norm": 0.36095839738845825, "learning_rate": 0.00017623441017553567, "loss": 11.6841, "step": 32133 }, { "epoch": 0.6726534371598426, "grad_norm": 0.3097226321697235, "learning_rate": 0.0001762329912115423, "loss": 11.6648, "step": 32134 }, { "epoch": 0.6726743699238047, "grad_norm": 0.34371456503868103, "learning_rate": 0.0001762315722109022, "loss": 11.6916, "step": 32135 }, { "epoch": 0.6726953026877669, "grad_norm": 0.3528929352760315, "learning_rate": 0.00017623015317361606, "loss": 11.6838, "step": 32136 }, { "epoch": 0.6727162354517291, "grad_norm": 0.3077046573162079, "learning_rate": 0.00017622873409968456, "loss": 11.6623, "step": 32137 }, { "epoch": 0.6727371682156912, "grad_norm": 0.2581374943256378, "learning_rate": 0.00017622731498910836, "loss": 11.6667, "step": 32138 }, { "epoch": 0.6727581009796534, "grad_norm": 0.36328575015068054, "learning_rate": 0.0001762258958418882, "loss": 11.6699, "step": 32139 }, { "epoch": 0.6727790337436155, "grad_norm": 0.2858964502811432, "learning_rate": 0.00017622447665802468, "loss": 11.6551, "step": 32140 }, { "epoch": 0.6727999665075777, "grad_norm": 0.34365490078926086, "learning_rate": 0.00017622305743751854, "loss": 11.6789, "step": 32141 }, { "epoch": 0.6728208992715398, "grad_norm": 3.159223794937134, "learning_rate": 0.00017622163818037048, "loss": 11.6631, "step": 32142 }, { "epoch": 0.672841832035502, "grad_norm": 0.2892836928367615, "learning_rate": 0.00017622021888658113, "loss": 11.6554, "step": 32143 }, { "epoch": 0.6728627647994642, "grad_norm": 0.3132714629173279, "learning_rate": 0.0001762187995561512, "loss": 11.6436, "step": 32144 }, { "epoch": 0.6728836975634263, "grad_norm": 0.30694809556007385, "learning_rate": 0.00017621738018908136, "loss": 11.6657, "step": 32145 }, { "epoch": 0.6729046303273885, "grad_norm": 0.3586786985397339, "learning_rate": 0.00017621596078537232, "loss": 11.6648, "step": 32146 }, { "epoch": 0.6729255630913505, "grad_norm": 0.32446080446243286, "learning_rate": 0.0001762145413450247, "loss": 11.6538, "step": 32147 }, { "epoch": 0.6729464958553127, "grad_norm": 0.2746477723121643, "learning_rate": 0.00017621312186803922, "loss": 11.6766, "step": 32148 }, { "epoch": 0.6729674286192749, "grad_norm": 0.4331068992614746, "learning_rate": 0.00017621170235441658, "loss": 11.6746, "step": 32149 }, { "epoch": 0.672988361383237, "grad_norm": 0.31676846742630005, "learning_rate": 0.00017621028280415745, "loss": 11.6826, "step": 32150 }, { "epoch": 0.6730092941471992, "grad_norm": 0.5260539054870605, "learning_rate": 0.00017620886321726253, "loss": 11.6567, "step": 32151 }, { "epoch": 0.6730302269111613, "grad_norm": 0.278799831867218, "learning_rate": 0.00017620744359373244, "loss": 11.6746, "step": 32152 }, { "epoch": 0.6730511596751235, "grad_norm": 0.25494229793548584, "learning_rate": 0.00017620602393356792, "loss": 11.654, "step": 32153 }, { "epoch": 0.6730720924390856, "grad_norm": 0.31269314885139465, "learning_rate": 0.00017620460423676968, "loss": 11.6493, "step": 32154 }, { "epoch": 0.6730930252030478, "grad_norm": 0.2749577760696411, "learning_rate": 0.0001762031845033383, "loss": 11.6635, "step": 32155 }, { "epoch": 0.67311395796701, "grad_norm": 0.26819130778312683, "learning_rate": 0.00017620176473327455, "loss": 11.6736, "step": 32156 }, { "epoch": 0.6731348907309721, "grad_norm": 0.3245604932308197, "learning_rate": 0.00017620034492657908, "loss": 11.6632, "step": 32157 }, { "epoch": 0.6731558234949343, "grad_norm": 0.2658800482749939, "learning_rate": 0.00017619892508325256, "loss": 11.6651, "step": 32158 }, { "epoch": 0.6731767562588964, "grad_norm": 0.33122897148132324, "learning_rate": 0.0001761975052032957, "loss": 11.6742, "step": 32159 }, { "epoch": 0.6731976890228586, "grad_norm": 0.3086733818054199, "learning_rate": 0.00017619608528670922, "loss": 11.6627, "step": 32160 }, { "epoch": 0.6732186217868207, "grad_norm": 0.26646557450294495, "learning_rate": 0.0001761946653334937, "loss": 11.673, "step": 32161 }, { "epoch": 0.6732395545507829, "grad_norm": 0.31431347131729126, "learning_rate": 0.00017619324534364988, "loss": 11.6688, "step": 32162 }, { "epoch": 0.6732604873147451, "grad_norm": 0.3530191481113434, "learning_rate": 0.00017619182531717847, "loss": 11.6805, "step": 32163 }, { "epoch": 0.6732814200787072, "grad_norm": 0.31966760754585266, "learning_rate": 0.0001761904052540801, "loss": 11.674, "step": 32164 }, { "epoch": 0.6733023528426694, "grad_norm": 0.3132725954055786, "learning_rate": 0.0001761889851543555, "loss": 11.6566, "step": 32165 }, { "epoch": 0.6733232856066315, "grad_norm": 0.2721750736236572, "learning_rate": 0.0001761875650180053, "loss": 11.6685, "step": 32166 }, { "epoch": 0.6733442183705937, "grad_norm": 0.308000385761261, "learning_rate": 0.0001761861448450302, "loss": 11.6856, "step": 32167 }, { "epoch": 0.6733651511345559, "grad_norm": 0.4006405770778656, "learning_rate": 0.00017618472463543094, "loss": 11.6842, "step": 32168 }, { "epoch": 0.673386083898518, "grad_norm": 0.27335259318351746, "learning_rate": 0.00017618330438920812, "loss": 11.6645, "step": 32169 }, { "epoch": 0.6734070166624802, "grad_norm": 0.2837047576904297, "learning_rate": 0.00017618188410636245, "loss": 11.6757, "step": 32170 }, { "epoch": 0.6734279494264422, "grad_norm": 0.2732217311859131, "learning_rate": 0.00017618046378689464, "loss": 11.6555, "step": 32171 }, { "epoch": 0.6734488821904044, "grad_norm": 0.3009684085845947, "learning_rate": 0.00017617904343080536, "loss": 11.6841, "step": 32172 }, { "epoch": 0.6734698149543665, "grad_norm": 0.3103383481502533, "learning_rate": 0.00017617762303809528, "loss": 11.6778, "step": 32173 }, { "epoch": 0.6734907477183287, "grad_norm": 0.2440892606973648, "learning_rate": 0.00017617620260876508, "loss": 11.6768, "step": 32174 }, { "epoch": 0.6735116804822909, "grad_norm": 0.2981453239917755, "learning_rate": 0.00017617478214281546, "loss": 11.6756, "step": 32175 }, { "epoch": 0.673532613246253, "grad_norm": 0.25124573707580566, "learning_rate": 0.0001761733616402471, "loss": 11.6658, "step": 32176 }, { "epoch": 0.6735535460102152, "grad_norm": 0.2655794024467468, "learning_rate": 0.00017617194110106066, "loss": 11.6673, "step": 32177 }, { "epoch": 0.6735744787741773, "grad_norm": 0.40501588582992554, "learning_rate": 0.0001761705205252569, "loss": 11.6745, "step": 32178 }, { "epoch": 0.6735954115381395, "grad_norm": 0.4558175802230835, "learning_rate": 0.0001761690999128364, "loss": 11.6841, "step": 32179 }, { "epoch": 0.6736163443021016, "grad_norm": 0.30937445163726807, "learning_rate": 0.00017616767926379988, "loss": 11.6698, "step": 32180 }, { "epoch": 0.6736372770660638, "grad_norm": 0.2909258008003235, "learning_rate": 0.00017616625857814805, "loss": 11.6711, "step": 32181 }, { "epoch": 0.673658209830026, "grad_norm": 0.34700578451156616, "learning_rate": 0.00017616483785588155, "loss": 11.6802, "step": 32182 }, { "epoch": 0.6736791425939881, "grad_norm": 0.2605505883693695, "learning_rate": 0.0001761634170970011, "loss": 11.6613, "step": 32183 }, { "epoch": 0.6737000753579503, "grad_norm": 0.27217692136764526, "learning_rate": 0.00017616199630150737, "loss": 11.6742, "step": 32184 }, { "epoch": 0.6737210081219124, "grad_norm": 0.2621268033981323, "learning_rate": 0.00017616057546940102, "loss": 11.6694, "step": 32185 }, { "epoch": 0.6737419408858746, "grad_norm": 0.3156384229660034, "learning_rate": 0.0001761591546006828, "loss": 11.6618, "step": 32186 }, { "epoch": 0.6737628736498368, "grad_norm": 0.3280806541442871, "learning_rate": 0.00017615773369535333, "loss": 11.6737, "step": 32187 }, { "epoch": 0.6737838064137989, "grad_norm": 0.3389841914176941, "learning_rate": 0.00017615631275341332, "loss": 11.6564, "step": 32188 }, { "epoch": 0.6738047391777611, "grad_norm": 0.24182942509651184, "learning_rate": 0.0001761548917748634, "loss": 11.6765, "step": 32189 }, { "epoch": 0.6738256719417232, "grad_norm": 0.3678227961063385, "learning_rate": 0.00017615347075970435, "loss": 11.6795, "step": 32190 }, { "epoch": 0.6738466047056854, "grad_norm": 0.2809672951698303, "learning_rate": 0.00017615204970793674, "loss": 11.6859, "step": 32191 }, { "epoch": 0.6738675374696474, "grad_norm": 0.26640480756759644, "learning_rate": 0.00017615062861956138, "loss": 11.681, "step": 32192 }, { "epoch": 0.6738884702336096, "grad_norm": 0.290755033493042, "learning_rate": 0.00017614920749457885, "loss": 11.6507, "step": 32193 }, { "epoch": 0.6739094029975718, "grad_norm": 0.3322850465774536, "learning_rate": 0.00017614778633298988, "loss": 11.6836, "step": 32194 }, { "epoch": 0.6739303357615339, "grad_norm": 0.36513763666152954, "learning_rate": 0.00017614636513479514, "loss": 11.6724, "step": 32195 }, { "epoch": 0.6739512685254961, "grad_norm": 0.34268027544021606, "learning_rate": 0.00017614494389999531, "loss": 11.681, "step": 32196 }, { "epoch": 0.6739722012894582, "grad_norm": 0.32659536600112915, "learning_rate": 0.00017614352262859112, "loss": 11.6795, "step": 32197 }, { "epoch": 0.6739931340534204, "grad_norm": 0.32106077671051025, "learning_rate": 0.00017614210132058314, "loss": 11.6798, "step": 32198 }, { "epoch": 0.6740140668173825, "grad_norm": 0.2933121621608734, "learning_rate": 0.00017614067997597218, "loss": 11.6556, "step": 32199 }, { "epoch": 0.6740349995813447, "grad_norm": 0.2954854369163513, "learning_rate": 0.00017613925859475885, "loss": 11.6865, "step": 32200 }, { "epoch": 0.6740559323453069, "grad_norm": 0.2929138243198395, "learning_rate": 0.00017613783717694386, "loss": 11.6674, "step": 32201 }, { "epoch": 0.674076865109269, "grad_norm": 0.29235005378723145, "learning_rate": 0.00017613641572252788, "loss": 11.6837, "step": 32202 }, { "epoch": 0.6740977978732312, "grad_norm": 0.2630603611469269, "learning_rate": 0.00017613499423151162, "loss": 11.6766, "step": 32203 }, { "epoch": 0.6741187306371933, "grad_norm": 0.27881819009780884, "learning_rate": 0.0001761335727038957, "loss": 11.6774, "step": 32204 }, { "epoch": 0.6741396634011555, "grad_norm": 0.443202942609787, "learning_rate": 0.0001761321511396809, "loss": 11.675, "step": 32205 }, { "epoch": 0.6741605961651176, "grad_norm": 0.32013052701950073, "learning_rate": 0.0001761307295388678, "loss": 11.669, "step": 32206 }, { "epoch": 0.6741815289290798, "grad_norm": 0.3104820251464844, "learning_rate": 0.00017612930790145717, "loss": 11.6672, "step": 32207 }, { "epoch": 0.674202461693042, "grad_norm": 0.36552855372428894, "learning_rate": 0.00017612788622744962, "loss": 11.6699, "step": 32208 }, { "epoch": 0.6742233944570041, "grad_norm": 0.31614601612091064, "learning_rate": 0.0001761264645168459, "loss": 11.6638, "step": 32209 }, { "epoch": 0.6742443272209663, "grad_norm": 0.23738190531730652, "learning_rate": 0.00017612504276964664, "loss": 11.6555, "step": 32210 }, { "epoch": 0.6742652599849284, "grad_norm": 0.2826150059700012, "learning_rate": 0.00017612362098585256, "loss": 11.6796, "step": 32211 }, { "epoch": 0.6742861927488906, "grad_norm": 0.3299444317817688, "learning_rate": 0.00017612219916546433, "loss": 11.6579, "step": 32212 }, { "epoch": 0.6743071255128528, "grad_norm": 0.28233468532562256, "learning_rate": 0.00017612077730848262, "loss": 11.6759, "step": 32213 }, { "epoch": 0.6743280582768149, "grad_norm": 0.40788277983665466, "learning_rate": 0.00017611935541490814, "loss": 11.6796, "step": 32214 }, { "epoch": 0.6743489910407771, "grad_norm": 0.2658267915248871, "learning_rate": 0.00017611793348474154, "loss": 11.6688, "step": 32215 }, { "epoch": 0.6743699238047391, "grad_norm": 0.2924385964870453, "learning_rate": 0.00017611651151798357, "loss": 11.6664, "step": 32216 }, { "epoch": 0.6743908565687013, "grad_norm": 0.25842124223709106, "learning_rate": 0.00017611508951463484, "loss": 11.656, "step": 32217 }, { "epoch": 0.6744117893326634, "grad_norm": 0.3205133378505707, "learning_rate": 0.00017611366747469605, "loss": 11.6611, "step": 32218 }, { "epoch": 0.6744327220966256, "grad_norm": 0.2694196105003357, "learning_rate": 0.0001761122453981679, "loss": 11.6829, "step": 32219 }, { "epoch": 0.6744536548605878, "grad_norm": 0.2798430621623993, "learning_rate": 0.00017611082328505107, "loss": 11.6766, "step": 32220 }, { "epoch": 0.6744745876245499, "grad_norm": 0.2998303472995758, "learning_rate": 0.00017610940113534627, "loss": 11.6652, "step": 32221 }, { "epoch": 0.6744955203885121, "grad_norm": 0.31940844655036926, "learning_rate": 0.00017610797894905412, "loss": 11.6675, "step": 32222 }, { "epoch": 0.6745164531524742, "grad_norm": 0.33560895919799805, "learning_rate": 0.00017610655672617534, "loss": 11.6653, "step": 32223 }, { "epoch": 0.6745373859164364, "grad_norm": 0.24217990040779114, "learning_rate": 0.00017610513446671063, "loss": 11.6548, "step": 32224 }, { "epoch": 0.6745583186803985, "grad_norm": 0.31401655077934265, "learning_rate": 0.00017610371217066065, "loss": 11.6729, "step": 32225 }, { "epoch": 0.6745792514443607, "grad_norm": 0.3101947009563446, "learning_rate": 0.0001761022898380261, "loss": 11.6537, "step": 32226 }, { "epoch": 0.6746001842083229, "grad_norm": 0.27840691804885864, "learning_rate": 0.00017610086746880764, "loss": 11.6811, "step": 32227 }, { "epoch": 0.674621116972285, "grad_norm": 0.24555200338363647, "learning_rate": 0.00017609944506300596, "loss": 11.6685, "step": 32228 }, { "epoch": 0.6746420497362472, "grad_norm": 0.2939852178096771, "learning_rate": 0.00017609802262062179, "loss": 11.6677, "step": 32229 }, { "epoch": 0.6746629825002093, "grad_norm": 0.30884644389152527, "learning_rate": 0.00017609660014165573, "loss": 11.6676, "step": 32230 }, { "epoch": 0.6746839152641715, "grad_norm": 0.30437859892845154, "learning_rate": 0.0001760951776261085, "loss": 11.6859, "step": 32231 }, { "epoch": 0.6747048480281337, "grad_norm": 0.28051742911338806, "learning_rate": 0.00017609375507398085, "loss": 11.6811, "step": 32232 }, { "epoch": 0.6747257807920958, "grad_norm": 0.2901879549026489, "learning_rate": 0.00017609233248527337, "loss": 11.6523, "step": 32233 }, { "epoch": 0.674746713556058, "grad_norm": 0.2339196503162384, "learning_rate": 0.0001760909098599868, "loss": 11.6632, "step": 32234 }, { "epoch": 0.6747676463200201, "grad_norm": 0.27391064167022705, "learning_rate": 0.00017608948719812179, "loss": 11.6631, "step": 32235 }, { "epoch": 0.6747885790839823, "grad_norm": 0.2506709396839142, "learning_rate": 0.00017608806449967906, "loss": 11.6841, "step": 32236 }, { "epoch": 0.6748095118479444, "grad_norm": 0.2481977343559265, "learning_rate": 0.00017608664176465926, "loss": 11.657, "step": 32237 }, { "epoch": 0.6748304446119066, "grad_norm": 0.2981763780117035, "learning_rate": 0.00017608521899306308, "loss": 11.6651, "step": 32238 }, { "epoch": 0.6748513773758688, "grad_norm": 0.3300979435443878, "learning_rate": 0.0001760837961848912, "loss": 11.6746, "step": 32239 }, { "epoch": 0.6748723101398308, "grad_norm": 0.3069475293159485, "learning_rate": 0.00017608237334014432, "loss": 11.6839, "step": 32240 }, { "epoch": 0.674893242903793, "grad_norm": 0.42630302906036377, "learning_rate": 0.00017608095045882314, "loss": 11.6735, "step": 32241 }, { "epoch": 0.6749141756677551, "grad_norm": 0.2904060482978821, "learning_rate": 0.00017607952754092828, "loss": 11.6839, "step": 32242 }, { "epoch": 0.6749351084317173, "grad_norm": 0.2817164361476898, "learning_rate": 0.00017607810458646054, "loss": 11.6751, "step": 32243 }, { "epoch": 0.6749560411956794, "grad_norm": 0.28151994943618774, "learning_rate": 0.00017607668159542046, "loss": 11.6721, "step": 32244 }, { "epoch": 0.6749769739596416, "grad_norm": 0.27031180262565613, "learning_rate": 0.00017607525856780883, "loss": 11.6711, "step": 32245 }, { "epoch": 0.6749979067236038, "grad_norm": 0.3205380141735077, "learning_rate": 0.00017607383550362628, "loss": 11.6748, "step": 32246 }, { "epoch": 0.6750188394875659, "grad_norm": 0.25101199746131897, "learning_rate": 0.0001760724124028735, "loss": 11.6781, "step": 32247 }, { "epoch": 0.6750397722515281, "grad_norm": 0.25794240832328796, "learning_rate": 0.00017607098926555124, "loss": 11.6596, "step": 32248 }, { "epoch": 0.6750607050154902, "grad_norm": 0.2779442369937897, "learning_rate": 0.00017606956609166006, "loss": 11.6771, "step": 32249 }, { "epoch": 0.6750816377794524, "grad_norm": 0.30792325735092163, "learning_rate": 0.00017606814288120078, "loss": 11.6584, "step": 32250 }, { "epoch": 0.6751025705434146, "grad_norm": 0.3161596953868866, "learning_rate": 0.000176066719634174, "loss": 11.6635, "step": 32251 }, { "epoch": 0.6751235033073767, "grad_norm": 0.28466957807540894, "learning_rate": 0.0001760652963505804, "loss": 11.6884, "step": 32252 }, { "epoch": 0.6751444360713389, "grad_norm": 0.2897622287273407, "learning_rate": 0.0001760638730304207, "loss": 11.6749, "step": 32253 }, { "epoch": 0.675165368835301, "grad_norm": 0.31693634390830994, "learning_rate": 0.0001760624496736956, "loss": 11.7057, "step": 32254 }, { "epoch": 0.6751863015992632, "grad_norm": 0.31714436411857605, "learning_rate": 0.00017606102628040573, "loss": 11.6848, "step": 32255 }, { "epoch": 0.6752072343632253, "grad_norm": 0.30494919419288635, "learning_rate": 0.00017605960285055182, "loss": 11.6823, "step": 32256 }, { "epoch": 0.6752281671271875, "grad_norm": 0.26923614740371704, "learning_rate": 0.0001760581793841345, "loss": 11.6693, "step": 32257 }, { "epoch": 0.6752490998911497, "grad_norm": 0.3084370195865631, "learning_rate": 0.0001760567558811545, "loss": 11.6596, "step": 32258 }, { "epoch": 0.6752700326551118, "grad_norm": 0.41238465905189514, "learning_rate": 0.00017605533234161248, "loss": 11.6677, "step": 32259 }, { "epoch": 0.675290965419074, "grad_norm": 0.3493724465370178, "learning_rate": 0.00017605390876550918, "loss": 11.6536, "step": 32260 }, { "epoch": 0.675311898183036, "grad_norm": 0.2565608322620392, "learning_rate": 0.00017605248515284523, "loss": 11.6785, "step": 32261 }, { "epoch": 0.6753328309469983, "grad_norm": 0.29589325189590454, "learning_rate": 0.0001760510615036213, "loss": 11.664, "step": 32262 }, { "epoch": 0.6753537637109603, "grad_norm": 0.3003690540790558, "learning_rate": 0.00017604963781783813, "loss": 11.6684, "step": 32263 }, { "epoch": 0.6753746964749225, "grad_norm": 0.276083379983902, "learning_rate": 0.00017604821409549634, "loss": 11.6608, "step": 32264 }, { "epoch": 0.6753956292388847, "grad_norm": 0.2466726154088974, "learning_rate": 0.00017604679033659667, "loss": 11.6522, "step": 32265 }, { "epoch": 0.6754165620028468, "grad_norm": 0.35901939868927, "learning_rate": 0.0001760453665411398, "loss": 11.6689, "step": 32266 }, { "epoch": 0.675437494766809, "grad_norm": 0.261664479970932, "learning_rate": 0.00017604394270912636, "loss": 11.6689, "step": 32267 }, { "epoch": 0.6754584275307711, "grad_norm": 0.2804050147533417, "learning_rate": 0.0001760425188405571, "loss": 11.6732, "step": 32268 }, { "epoch": 0.6754793602947333, "grad_norm": 0.28736066818237305, "learning_rate": 0.0001760410949354327, "loss": 11.6747, "step": 32269 }, { "epoch": 0.6755002930586955, "grad_norm": 0.31081709265708923, "learning_rate": 0.0001760396709937538, "loss": 11.672, "step": 32270 }, { "epoch": 0.6755212258226576, "grad_norm": 0.24356389045715332, "learning_rate": 0.00017603824701552107, "loss": 11.6815, "step": 32271 }, { "epoch": 0.6755421585866198, "grad_norm": 0.3339202105998993, "learning_rate": 0.00017603682300073526, "loss": 11.6744, "step": 32272 }, { "epoch": 0.6755630913505819, "grad_norm": 0.2719449996948242, "learning_rate": 0.00017603539894939701, "loss": 11.6756, "step": 32273 }, { "epoch": 0.6755840241145441, "grad_norm": 0.32251906394958496, "learning_rate": 0.00017603397486150706, "loss": 11.668, "step": 32274 }, { "epoch": 0.6756049568785062, "grad_norm": 0.23196762800216675, "learning_rate": 0.00017603255073706604, "loss": 11.6841, "step": 32275 }, { "epoch": 0.6756258896424684, "grad_norm": 0.3768753409385681, "learning_rate": 0.00017603112657607465, "loss": 11.6571, "step": 32276 }, { "epoch": 0.6756468224064306, "grad_norm": 0.26545196771621704, "learning_rate": 0.00017602970237853355, "loss": 11.6598, "step": 32277 }, { "epoch": 0.6756677551703927, "grad_norm": 0.21961072087287903, "learning_rate": 0.00017602827814444346, "loss": 11.6748, "step": 32278 }, { "epoch": 0.6756886879343549, "grad_norm": 0.2427334189414978, "learning_rate": 0.00017602685387380508, "loss": 11.6717, "step": 32279 }, { "epoch": 0.675709620698317, "grad_norm": 0.2803395688533783, "learning_rate": 0.00017602542956661902, "loss": 11.6681, "step": 32280 }, { "epoch": 0.6757305534622792, "grad_norm": 0.3483276665210724, "learning_rate": 0.00017602400522288606, "loss": 11.6737, "step": 32281 }, { "epoch": 0.6757514862262413, "grad_norm": 0.2850811183452606, "learning_rate": 0.0001760225808426068, "loss": 11.6707, "step": 32282 }, { "epoch": 0.6757724189902035, "grad_norm": 0.2720224857330322, "learning_rate": 0.00017602115642578197, "loss": 11.6686, "step": 32283 }, { "epoch": 0.6757933517541657, "grad_norm": 0.31996023654937744, "learning_rate": 0.00017601973197241226, "loss": 11.6805, "step": 32284 }, { "epoch": 0.6758142845181278, "grad_norm": 0.2949526011943817, "learning_rate": 0.00017601830748249832, "loss": 11.6825, "step": 32285 }, { "epoch": 0.67583521728209, "grad_norm": 0.2841593027114868, "learning_rate": 0.00017601688295604086, "loss": 11.652, "step": 32286 }, { "epoch": 0.675856150046052, "grad_norm": 0.42423108220100403, "learning_rate": 0.00017601545839304058, "loss": 11.6772, "step": 32287 }, { "epoch": 0.6758770828100142, "grad_norm": 0.28659141063690186, "learning_rate": 0.00017601403379349814, "loss": 11.6811, "step": 32288 }, { "epoch": 0.6758980155739764, "grad_norm": 0.31676265597343445, "learning_rate": 0.00017601260915741423, "loss": 11.6718, "step": 32289 }, { "epoch": 0.6759189483379385, "grad_norm": 0.31535565853118896, "learning_rate": 0.00017601118448478954, "loss": 11.6691, "step": 32290 }, { "epoch": 0.6759398811019007, "grad_norm": 0.25850000977516174, "learning_rate": 0.00017600975977562474, "loss": 11.6773, "step": 32291 }, { "epoch": 0.6759608138658628, "grad_norm": 0.28485727310180664, "learning_rate": 0.0001760083350299205, "loss": 11.6601, "step": 32292 }, { "epoch": 0.675981746629825, "grad_norm": 0.34419336915016174, "learning_rate": 0.00017600691024767756, "loss": 11.6637, "step": 32293 }, { "epoch": 0.6760026793937871, "grad_norm": 0.3549039661884308, "learning_rate": 0.00017600548542889662, "loss": 11.6553, "step": 32294 }, { "epoch": 0.6760236121577493, "grad_norm": 0.33651742339134216, "learning_rate": 0.00017600406057357825, "loss": 11.6594, "step": 32295 }, { "epoch": 0.6760445449217115, "grad_norm": 0.27486202120780945, "learning_rate": 0.00017600263568172322, "loss": 11.6575, "step": 32296 }, { "epoch": 0.6760654776856736, "grad_norm": 0.24668623507022858, "learning_rate": 0.0001760012107533322, "loss": 11.6537, "step": 32297 }, { "epoch": 0.6760864104496358, "grad_norm": 0.3689057230949402, "learning_rate": 0.00017599978578840587, "loss": 11.6634, "step": 32298 }, { "epoch": 0.6761073432135979, "grad_norm": 0.2737612724304199, "learning_rate": 0.00017599836078694496, "loss": 11.6736, "step": 32299 }, { "epoch": 0.6761282759775601, "grad_norm": 0.2875952422618866, "learning_rate": 0.00017599693574895007, "loss": 11.6725, "step": 32300 }, { "epoch": 0.6761492087415222, "grad_norm": 0.3605508804321289, "learning_rate": 0.00017599551067442194, "loss": 11.6684, "step": 32301 }, { "epoch": 0.6761701415054844, "grad_norm": 0.29998916387557983, "learning_rate": 0.00017599408556336128, "loss": 11.6605, "step": 32302 }, { "epoch": 0.6761910742694466, "grad_norm": 0.2928931415081024, "learning_rate": 0.0001759926604157687, "loss": 11.6657, "step": 32303 }, { "epoch": 0.6762120070334087, "grad_norm": 0.23614168167114258, "learning_rate": 0.00017599123523164494, "loss": 11.6834, "step": 32304 }, { "epoch": 0.6762329397973709, "grad_norm": 0.26336365938186646, "learning_rate": 0.0001759898100109907, "loss": 11.6591, "step": 32305 }, { "epoch": 0.676253872561333, "grad_norm": 0.2689061462879181, "learning_rate": 0.00017598838475380656, "loss": 11.6789, "step": 32306 }, { "epoch": 0.6762748053252952, "grad_norm": 0.35562944412231445, "learning_rate": 0.00017598695946009334, "loss": 11.6674, "step": 32307 }, { "epoch": 0.6762957380892574, "grad_norm": 0.33109787106513977, "learning_rate": 0.00017598553412985165, "loss": 11.6706, "step": 32308 }, { "epoch": 0.6763166708532194, "grad_norm": 0.4134564995765686, "learning_rate": 0.00017598410876308221, "loss": 11.6799, "step": 32309 }, { "epoch": 0.6763376036171816, "grad_norm": 0.3053153157234192, "learning_rate": 0.00017598268335978565, "loss": 11.6837, "step": 32310 }, { "epoch": 0.6763585363811437, "grad_norm": 0.30421221256256104, "learning_rate": 0.00017598125791996273, "loss": 11.6854, "step": 32311 }, { "epoch": 0.6763794691451059, "grad_norm": 0.2562049627304077, "learning_rate": 0.00017597983244361406, "loss": 11.6724, "step": 32312 }, { "epoch": 0.676400401909068, "grad_norm": 0.3663102984428406, "learning_rate": 0.00017597840693074038, "loss": 11.6735, "step": 32313 }, { "epoch": 0.6764213346730302, "grad_norm": 0.2767341732978821, "learning_rate": 0.00017597698138134234, "loss": 11.689, "step": 32314 }, { "epoch": 0.6764422674369924, "grad_norm": 0.3960282802581787, "learning_rate": 0.00017597555579542065, "loss": 11.6754, "step": 32315 }, { "epoch": 0.6764632002009545, "grad_norm": 0.27250707149505615, "learning_rate": 0.000175974130172976, "loss": 11.677, "step": 32316 }, { "epoch": 0.6764841329649167, "grad_norm": 0.3787091374397278, "learning_rate": 0.00017597270451400908, "loss": 11.6686, "step": 32317 }, { "epoch": 0.6765050657288788, "grad_norm": 0.28162881731987, "learning_rate": 0.00017597127881852052, "loss": 11.6744, "step": 32318 }, { "epoch": 0.676525998492841, "grad_norm": 0.33863842487335205, "learning_rate": 0.00017596985308651107, "loss": 11.6817, "step": 32319 }, { "epoch": 0.6765469312568031, "grad_norm": 0.3137950599193573, "learning_rate": 0.00017596842731798135, "loss": 11.6745, "step": 32320 }, { "epoch": 0.6765678640207653, "grad_norm": 0.3203456997871399, "learning_rate": 0.00017596700151293213, "loss": 11.6707, "step": 32321 }, { "epoch": 0.6765887967847275, "grad_norm": 0.28026899695396423, "learning_rate": 0.00017596557567136403, "loss": 11.6653, "step": 32322 }, { "epoch": 0.6766097295486896, "grad_norm": 0.3071969747543335, "learning_rate": 0.00017596414979327774, "loss": 11.6668, "step": 32323 }, { "epoch": 0.6766306623126518, "grad_norm": 0.2935665547847748, "learning_rate": 0.000175962723878674, "loss": 11.6542, "step": 32324 }, { "epoch": 0.6766515950766139, "grad_norm": 0.335676372051239, "learning_rate": 0.00017596129792755343, "loss": 11.6686, "step": 32325 }, { "epoch": 0.6766725278405761, "grad_norm": 0.3734687566757202, "learning_rate": 0.00017595987193991675, "loss": 11.6898, "step": 32326 }, { "epoch": 0.6766934606045383, "grad_norm": 0.24175116419792175, "learning_rate": 0.00017595844591576464, "loss": 11.6758, "step": 32327 }, { "epoch": 0.6767143933685004, "grad_norm": 0.2787242829799652, "learning_rate": 0.0001759570198550978, "loss": 11.6644, "step": 32328 }, { "epoch": 0.6767353261324626, "grad_norm": 0.28997647762298584, "learning_rate": 0.00017595559375791685, "loss": 11.6717, "step": 32329 }, { "epoch": 0.6767562588964247, "grad_norm": 0.29503682255744934, "learning_rate": 0.00017595416762422256, "loss": 11.6728, "step": 32330 }, { "epoch": 0.6767771916603869, "grad_norm": 0.26544636487960815, "learning_rate": 0.00017595274145401556, "loss": 11.6811, "step": 32331 }, { "epoch": 0.676798124424349, "grad_norm": 0.287645161151886, "learning_rate": 0.00017595131524729657, "loss": 11.6632, "step": 32332 }, { "epoch": 0.6768190571883111, "grad_norm": 0.2587166130542755, "learning_rate": 0.00017594988900406625, "loss": 11.6732, "step": 32333 }, { "epoch": 0.6768399899522733, "grad_norm": 0.41362708806991577, "learning_rate": 0.0001759484627243253, "loss": 11.6624, "step": 32334 }, { "epoch": 0.6768609227162354, "grad_norm": 0.24266573786735535, "learning_rate": 0.00017594703640807442, "loss": 11.6813, "step": 32335 }, { "epoch": 0.6768818554801976, "grad_norm": 0.2769148051738739, "learning_rate": 0.00017594561005531426, "loss": 11.6665, "step": 32336 }, { "epoch": 0.6769027882441597, "grad_norm": 0.37704384326934814, "learning_rate": 0.00017594418366604552, "loss": 11.665, "step": 32337 }, { "epoch": 0.6769237210081219, "grad_norm": 0.363572895526886, "learning_rate": 0.0001759427572402689, "loss": 11.6875, "step": 32338 }, { "epoch": 0.676944653772084, "grad_norm": 0.3226867914199829, "learning_rate": 0.00017594133077798504, "loss": 11.6681, "step": 32339 }, { "epoch": 0.6769655865360462, "grad_norm": 0.2871227264404297, "learning_rate": 0.00017593990427919472, "loss": 11.6869, "step": 32340 }, { "epoch": 0.6769865193000084, "grad_norm": 0.31431058049201965, "learning_rate": 0.0001759384777438985, "loss": 11.6535, "step": 32341 }, { "epoch": 0.6770074520639705, "grad_norm": 0.3051740825176239, "learning_rate": 0.0001759370511720972, "loss": 11.6673, "step": 32342 }, { "epoch": 0.6770283848279327, "grad_norm": 0.31025636196136475, "learning_rate": 0.0001759356245637914, "loss": 11.6851, "step": 32343 }, { "epoch": 0.6770493175918948, "grad_norm": 0.3283141851425171, "learning_rate": 0.00017593419791898184, "loss": 11.6742, "step": 32344 }, { "epoch": 0.677070250355857, "grad_norm": 0.24969331920146942, "learning_rate": 0.00017593277123766917, "loss": 11.6634, "step": 32345 }, { "epoch": 0.6770911831198192, "grad_norm": 0.31090831756591797, "learning_rate": 0.0001759313445198541, "loss": 11.6732, "step": 32346 }, { "epoch": 0.6771121158837813, "grad_norm": 0.36718618869781494, "learning_rate": 0.0001759299177655373, "loss": 11.6787, "step": 32347 }, { "epoch": 0.6771330486477435, "grad_norm": 0.30446958541870117, "learning_rate": 0.00017592849097471949, "loss": 11.6636, "step": 32348 }, { "epoch": 0.6771539814117056, "grad_norm": 0.2560208737850189, "learning_rate": 0.00017592706414740132, "loss": 11.6686, "step": 32349 }, { "epoch": 0.6771749141756678, "grad_norm": 0.32946932315826416, "learning_rate": 0.0001759256372835835, "loss": 11.6745, "step": 32350 }, { "epoch": 0.6771958469396299, "grad_norm": 0.30828526616096497, "learning_rate": 0.0001759242103832667, "loss": 11.6487, "step": 32351 }, { "epoch": 0.6772167797035921, "grad_norm": 0.3193237781524658, "learning_rate": 0.00017592278344645158, "loss": 11.6577, "step": 32352 }, { "epoch": 0.6772377124675543, "grad_norm": 0.2937436103820801, "learning_rate": 0.0001759213564731389, "loss": 11.6604, "step": 32353 }, { "epoch": 0.6772586452315164, "grad_norm": 0.27347755432128906, "learning_rate": 0.0001759199294633293, "loss": 11.6698, "step": 32354 }, { "epoch": 0.6772795779954786, "grad_norm": 0.31618860363960266, "learning_rate": 0.00017591850241702344, "loss": 11.6862, "step": 32355 }, { "epoch": 0.6773005107594406, "grad_norm": 0.33706894516944885, "learning_rate": 0.00017591707533422203, "loss": 11.6801, "step": 32356 }, { "epoch": 0.6773214435234028, "grad_norm": 0.3695332407951355, "learning_rate": 0.00017591564821492579, "loss": 11.6648, "step": 32357 }, { "epoch": 0.6773423762873649, "grad_norm": 0.2871805429458618, "learning_rate": 0.00017591422105913538, "loss": 11.6664, "step": 32358 }, { "epoch": 0.6773633090513271, "grad_norm": 0.2857103943824768, "learning_rate": 0.00017591279386685145, "loss": 11.6733, "step": 32359 }, { "epoch": 0.6773842418152893, "grad_norm": 0.3234766721725464, "learning_rate": 0.00017591136663807475, "loss": 11.658, "step": 32360 }, { "epoch": 0.6774051745792514, "grad_norm": 0.3204272389411926, "learning_rate": 0.0001759099393728059, "loss": 11.6683, "step": 32361 }, { "epoch": 0.6774261073432136, "grad_norm": 0.30829423666000366, "learning_rate": 0.00017590851207104565, "loss": 11.6574, "step": 32362 }, { "epoch": 0.6774470401071757, "grad_norm": 0.27462345361709595, "learning_rate": 0.00017590708473279467, "loss": 11.6819, "step": 32363 }, { "epoch": 0.6774679728711379, "grad_norm": 0.3191550374031067, "learning_rate": 0.00017590565735805361, "loss": 11.6834, "step": 32364 }, { "epoch": 0.6774889056351001, "grad_norm": 0.28628572821617126, "learning_rate": 0.0001759042299468232, "loss": 11.6656, "step": 32365 }, { "epoch": 0.6775098383990622, "grad_norm": 0.3274070620536804, "learning_rate": 0.00017590280249910406, "loss": 11.6601, "step": 32366 }, { "epoch": 0.6775307711630244, "grad_norm": 0.5836201906204224, "learning_rate": 0.00017590137501489697, "loss": 11.7041, "step": 32367 }, { "epoch": 0.6775517039269865, "grad_norm": 0.2920921742916107, "learning_rate": 0.00017589994749420257, "loss": 11.6841, "step": 32368 }, { "epoch": 0.6775726366909487, "grad_norm": 0.36284008622169495, "learning_rate": 0.00017589851993702149, "loss": 11.6635, "step": 32369 }, { "epoch": 0.6775935694549108, "grad_norm": 0.2639811038970947, "learning_rate": 0.0001758970923433545, "loss": 11.6789, "step": 32370 }, { "epoch": 0.677614502218873, "grad_norm": 0.2788461148738861, "learning_rate": 0.00017589566471320226, "loss": 11.6591, "step": 32371 }, { "epoch": 0.6776354349828352, "grad_norm": 0.32018962502479553, "learning_rate": 0.00017589423704656543, "loss": 11.6718, "step": 32372 }, { "epoch": 0.6776563677467973, "grad_norm": 0.2659517824649811, "learning_rate": 0.00017589280934344478, "loss": 11.6794, "step": 32373 }, { "epoch": 0.6776773005107595, "grad_norm": 0.26426756381988525, "learning_rate": 0.00017589138160384085, "loss": 11.6711, "step": 32374 }, { "epoch": 0.6776982332747216, "grad_norm": 0.2649412453174591, "learning_rate": 0.00017588995382775447, "loss": 11.6721, "step": 32375 }, { "epoch": 0.6777191660386838, "grad_norm": 0.3314674198627472, "learning_rate": 0.0001758885260151863, "loss": 11.6796, "step": 32376 }, { "epoch": 0.6777400988026459, "grad_norm": 0.317848801612854, "learning_rate": 0.0001758870981661369, "loss": 11.6752, "step": 32377 }, { "epoch": 0.677761031566608, "grad_norm": 0.29513421654701233, "learning_rate": 0.00017588567028060714, "loss": 11.6742, "step": 32378 }, { "epoch": 0.6777819643305703, "grad_norm": 0.30590394139289856, "learning_rate": 0.00017588424235859759, "loss": 11.673, "step": 32379 }, { "epoch": 0.6778028970945323, "grad_norm": 0.28531309962272644, "learning_rate": 0.00017588281440010894, "loss": 11.6687, "step": 32380 }, { "epoch": 0.6778238298584945, "grad_norm": 0.40755170583724976, "learning_rate": 0.00017588138640514192, "loss": 11.6792, "step": 32381 }, { "epoch": 0.6778447626224566, "grad_norm": 0.2747724950313568, "learning_rate": 0.0001758799583736972, "loss": 11.6685, "step": 32382 }, { "epoch": 0.6778656953864188, "grad_norm": 0.26694831252098083, "learning_rate": 0.00017587853030577545, "loss": 11.6778, "step": 32383 }, { "epoch": 0.6778866281503809, "grad_norm": 0.2843712270259857, "learning_rate": 0.00017587710220137736, "loss": 11.6822, "step": 32384 }, { "epoch": 0.6779075609143431, "grad_norm": 0.2721388339996338, "learning_rate": 0.00017587567406050365, "loss": 11.6785, "step": 32385 }, { "epoch": 0.6779284936783053, "grad_norm": 0.24913442134857178, "learning_rate": 0.00017587424588315497, "loss": 11.666, "step": 32386 }, { "epoch": 0.6779494264422674, "grad_norm": 0.2652355134487152, "learning_rate": 0.00017587281766933202, "loss": 11.6742, "step": 32387 }, { "epoch": 0.6779703592062296, "grad_norm": 0.24359361827373505, "learning_rate": 0.00017587138941903548, "loss": 11.6778, "step": 32388 }, { "epoch": 0.6779912919701917, "grad_norm": 0.27276095747947693, "learning_rate": 0.00017586996113226605, "loss": 11.673, "step": 32389 }, { "epoch": 0.6780122247341539, "grad_norm": 0.30227091908454895, "learning_rate": 0.00017586853280902442, "loss": 11.6884, "step": 32390 }, { "epoch": 0.6780331574981161, "grad_norm": 0.31544041633605957, "learning_rate": 0.00017586710444931125, "loss": 11.669, "step": 32391 }, { "epoch": 0.6780540902620782, "grad_norm": 0.2852611243724823, "learning_rate": 0.00017586567605312726, "loss": 11.6708, "step": 32392 }, { "epoch": 0.6780750230260404, "grad_norm": 0.2353994995355606, "learning_rate": 0.0001758642476204731, "loss": 11.6573, "step": 32393 }, { "epoch": 0.6780959557900025, "grad_norm": 0.4186396598815918, "learning_rate": 0.00017586281915134947, "loss": 11.6896, "step": 32394 }, { "epoch": 0.6781168885539647, "grad_norm": 0.24731957912445068, "learning_rate": 0.00017586139064575712, "loss": 11.6718, "step": 32395 }, { "epoch": 0.6781378213179268, "grad_norm": 0.28930121660232544, "learning_rate": 0.00017585996210369662, "loss": 11.6524, "step": 32396 }, { "epoch": 0.678158754081889, "grad_norm": 0.2628556787967682, "learning_rate": 0.00017585853352516874, "loss": 11.6651, "step": 32397 }, { "epoch": 0.6781796868458512, "grad_norm": 0.30233970284461975, "learning_rate": 0.00017585710491017413, "loss": 11.6627, "step": 32398 }, { "epoch": 0.6782006196098133, "grad_norm": 0.3342982828617096, "learning_rate": 0.00017585567625871349, "loss": 11.6695, "step": 32399 }, { "epoch": 0.6782215523737755, "grad_norm": 0.26185551285743713, "learning_rate": 0.00017585424757078752, "loss": 11.6669, "step": 32400 }, { "epoch": 0.6782424851377375, "grad_norm": 0.28570470213890076, "learning_rate": 0.00017585281884639688, "loss": 11.6726, "step": 32401 }, { "epoch": 0.6782634179016997, "grad_norm": 0.30824244022369385, "learning_rate": 0.00017585139008554227, "loss": 11.6814, "step": 32402 }, { "epoch": 0.6782843506656618, "grad_norm": 0.2906968295574188, "learning_rate": 0.00017584996128822437, "loss": 11.6568, "step": 32403 }, { "epoch": 0.678305283429624, "grad_norm": 0.3304395377635956, "learning_rate": 0.0001758485324544439, "loss": 11.6833, "step": 32404 }, { "epoch": 0.6783262161935862, "grad_norm": 0.31422796845436096, "learning_rate": 0.0001758471035842015, "loss": 11.6713, "step": 32405 }, { "epoch": 0.6783471489575483, "grad_norm": 0.2993881404399872, "learning_rate": 0.0001758456746774979, "loss": 11.6669, "step": 32406 }, { "epoch": 0.6783680817215105, "grad_norm": 0.3354100286960602, "learning_rate": 0.00017584424573433374, "loss": 11.6786, "step": 32407 }, { "epoch": 0.6783890144854726, "grad_norm": 0.2980476915836334, "learning_rate": 0.00017584281675470974, "loss": 11.6643, "step": 32408 }, { "epoch": 0.6784099472494348, "grad_norm": 0.3398352563381195, "learning_rate": 0.00017584138773862654, "loss": 11.661, "step": 32409 }, { "epoch": 0.678430880013397, "grad_norm": 0.3484322726726532, "learning_rate": 0.00017583995868608493, "loss": 11.6875, "step": 32410 }, { "epoch": 0.6784518127773591, "grad_norm": 0.2627773582935333, "learning_rate": 0.0001758385295970855, "loss": 11.6531, "step": 32411 }, { "epoch": 0.6784727455413213, "grad_norm": 0.277687668800354, "learning_rate": 0.00017583710047162896, "loss": 11.6581, "step": 32412 }, { "epoch": 0.6784936783052834, "grad_norm": 0.544319748878479, "learning_rate": 0.000175835671309716, "loss": 11.6734, "step": 32413 }, { "epoch": 0.6785146110692456, "grad_norm": 0.29041239619255066, "learning_rate": 0.00017583424211134737, "loss": 11.6647, "step": 32414 }, { "epoch": 0.6785355438332077, "grad_norm": 0.3602324426174164, "learning_rate": 0.00017583281287652364, "loss": 11.6744, "step": 32415 }, { "epoch": 0.6785564765971699, "grad_norm": 0.2897811532020569, "learning_rate": 0.00017583138360524557, "loss": 11.6748, "step": 32416 }, { "epoch": 0.6785774093611321, "grad_norm": 0.3153994381427765, "learning_rate": 0.00017582995429751385, "loss": 11.6893, "step": 32417 }, { "epoch": 0.6785983421250942, "grad_norm": 0.38833898305892944, "learning_rate": 0.00017582852495332912, "loss": 11.68, "step": 32418 }, { "epoch": 0.6786192748890564, "grad_norm": 0.32400089502334595, "learning_rate": 0.00017582709557269216, "loss": 11.6676, "step": 32419 }, { "epoch": 0.6786402076530185, "grad_norm": 0.31098344922065735, "learning_rate": 0.00017582566615560356, "loss": 11.6699, "step": 32420 }, { "epoch": 0.6786611404169807, "grad_norm": 0.31201720237731934, "learning_rate": 0.00017582423670206403, "loss": 11.6599, "step": 32421 }, { "epoch": 0.6786820731809428, "grad_norm": 0.3869929611682892, "learning_rate": 0.00017582280721207427, "loss": 11.6658, "step": 32422 }, { "epoch": 0.678703005944905, "grad_norm": 0.4190043807029724, "learning_rate": 0.000175821377685635, "loss": 11.6876, "step": 32423 }, { "epoch": 0.6787239387088672, "grad_norm": 0.3944958448410034, "learning_rate": 0.00017581994812274684, "loss": 11.6795, "step": 32424 }, { "epoch": 0.6787448714728292, "grad_norm": 0.31055182218551636, "learning_rate": 0.00017581851852341052, "loss": 11.66, "step": 32425 }, { "epoch": 0.6787658042367914, "grad_norm": 0.2644589841365814, "learning_rate": 0.0001758170888876267, "loss": 11.6711, "step": 32426 }, { "epoch": 0.6787867370007535, "grad_norm": 0.279755562543869, "learning_rate": 0.00017581565921539612, "loss": 11.6677, "step": 32427 }, { "epoch": 0.6788076697647157, "grad_norm": 0.2959614396095276, "learning_rate": 0.00017581422950671942, "loss": 11.68, "step": 32428 }, { "epoch": 0.6788286025286779, "grad_norm": 0.23690104484558105, "learning_rate": 0.0001758127997615973, "loss": 11.6823, "step": 32429 }, { "epoch": 0.67884953529264, "grad_norm": 0.2724432349205017, "learning_rate": 0.00017581136998003047, "loss": 11.6693, "step": 32430 }, { "epoch": 0.6788704680566022, "grad_norm": 0.26424261927604675, "learning_rate": 0.00017580994016201955, "loss": 11.6737, "step": 32431 }, { "epoch": 0.6788914008205643, "grad_norm": 0.3238469958305359, "learning_rate": 0.00017580851030756528, "loss": 11.6697, "step": 32432 }, { "epoch": 0.6789123335845265, "grad_norm": 0.28012704849243164, "learning_rate": 0.00017580708041666838, "loss": 11.6463, "step": 32433 }, { "epoch": 0.6789332663484886, "grad_norm": 0.2569231390953064, "learning_rate": 0.00017580565048932946, "loss": 11.6628, "step": 32434 }, { "epoch": 0.6789541991124508, "grad_norm": 0.3705234229564667, "learning_rate": 0.0001758042205255493, "loss": 11.6739, "step": 32435 }, { "epoch": 0.678975131876413, "grad_norm": 0.2887583076953888, "learning_rate": 0.00017580279052532846, "loss": 11.6582, "step": 32436 }, { "epoch": 0.6789960646403751, "grad_norm": 0.3019247353076935, "learning_rate": 0.00017580136048866773, "loss": 11.6633, "step": 32437 }, { "epoch": 0.6790169974043373, "grad_norm": 0.30971258878707886, "learning_rate": 0.00017579993041556778, "loss": 11.6793, "step": 32438 }, { "epoch": 0.6790379301682994, "grad_norm": 0.3537902235984802, "learning_rate": 0.00017579850030602926, "loss": 11.6901, "step": 32439 }, { "epoch": 0.6790588629322616, "grad_norm": 0.341770738363266, "learning_rate": 0.00017579707016005292, "loss": 11.6882, "step": 32440 }, { "epoch": 0.6790797956962237, "grad_norm": 0.30052581429481506, "learning_rate": 0.00017579563997763934, "loss": 11.6755, "step": 32441 }, { "epoch": 0.6791007284601859, "grad_norm": 0.3284982442855835, "learning_rate": 0.00017579420975878934, "loss": 11.6911, "step": 32442 }, { "epoch": 0.6791216612241481, "grad_norm": 0.37028929591178894, "learning_rate": 0.00017579277950350352, "loss": 11.6639, "step": 32443 }, { "epoch": 0.6791425939881102, "grad_norm": 0.3204450309276581, "learning_rate": 0.0001757913492117826, "loss": 11.6659, "step": 32444 }, { "epoch": 0.6791635267520724, "grad_norm": 0.28106167912483215, "learning_rate": 0.00017578991888362726, "loss": 11.6638, "step": 32445 }, { "epoch": 0.6791844595160345, "grad_norm": 0.37842899560928345, "learning_rate": 0.00017578848851903817, "loss": 11.6736, "step": 32446 }, { "epoch": 0.6792053922799967, "grad_norm": 0.3108336925506592, "learning_rate": 0.00017578705811801604, "loss": 11.6656, "step": 32447 }, { "epoch": 0.6792263250439589, "grad_norm": 0.29886114597320557, "learning_rate": 0.00017578562768056158, "loss": 11.6654, "step": 32448 }, { "epoch": 0.679247257807921, "grad_norm": 0.27959829568862915, "learning_rate": 0.00017578419720667542, "loss": 11.6698, "step": 32449 }, { "epoch": 0.6792681905718831, "grad_norm": 0.2615004777908325, "learning_rate": 0.0001757827666963583, "loss": 11.6741, "step": 32450 }, { "epoch": 0.6792891233358452, "grad_norm": 0.3295437693595886, "learning_rate": 0.00017578133614961088, "loss": 11.6832, "step": 32451 }, { "epoch": 0.6793100560998074, "grad_norm": 0.2985670566558838, "learning_rate": 0.00017577990556643384, "loss": 11.6678, "step": 32452 }, { "epoch": 0.6793309888637695, "grad_norm": 0.28810691833496094, "learning_rate": 0.00017577847494682788, "loss": 11.6793, "step": 32453 }, { "epoch": 0.6793519216277317, "grad_norm": 0.2591439187526703, "learning_rate": 0.0001757770442907937, "loss": 11.6694, "step": 32454 }, { "epoch": 0.6793728543916939, "grad_norm": 0.268754780292511, "learning_rate": 0.00017577561359833195, "loss": 11.6748, "step": 32455 }, { "epoch": 0.679393787155656, "grad_norm": 0.2759145200252533, "learning_rate": 0.00017577418286944337, "loss": 11.6716, "step": 32456 }, { "epoch": 0.6794147199196182, "grad_norm": 0.3152281939983368, "learning_rate": 0.00017577275210412863, "loss": 11.6629, "step": 32457 }, { "epoch": 0.6794356526835803, "grad_norm": 0.3155069053173065, "learning_rate": 0.0001757713213023884, "loss": 11.6806, "step": 32458 }, { "epoch": 0.6794565854475425, "grad_norm": 0.3143978416919708, "learning_rate": 0.00017576989046422337, "loss": 11.662, "step": 32459 }, { "epoch": 0.6794775182115046, "grad_norm": 0.37760838866233826, "learning_rate": 0.00017576845958963423, "loss": 11.6601, "step": 32460 }, { "epoch": 0.6794984509754668, "grad_norm": 0.2741592228412628, "learning_rate": 0.00017576702867862167, "loss": 11.6793, "step": 32461 }, { "epoch": 0.679519383739429, "grad_norm": 0.31836986541748047, "learning_rate": 0.00017576559773118641, "loss": 11.6686, "step": 32462 }, { "epoch": 0.6795403165033911, "grad_norm": 0.27418914437294006, "learning_rate": 0.0001757641667473291, "loss": 11.6703, "step": 32463 }, { "epoch": 0.6795612492673533, "grad_norm": 0.28427666425704956, "learning_rate": 0.00017576273572705042, "loss": 11.6661, "step": 32464 }, { "epoch": 0.6795821820313154, "grad_norm": 0.2516964375972748, "learning_rate": 0.0001757613046703511, "loss": 11.6834, "step": 32465 }, { "epoch": 0.6796031147952776, "grad_norm": 0.2775883078575134, "learning_rate": 0.00017575987357723176, "loss": 11.6613, "step": 32466 }, { "epoch": 0.6796240475592398, "grad_norm": 0.26702243089675903, "learning_rate": 0.00017575844244769318, "loss": 11.669, "step": 32467 }, { "epoch": 0.6796449803232019, "grad_norm": 0.23992857336997986, "learning_rate": 0.00017575701128173595, "loss": 11.665, "step": 32468 }, { "epoch": 0.6796659130871641, "grad_norm": 0.29529622197151184, "learning_rate": 0.00017575558007936083, "loss": 11.662, "step": 32469 }, { "epoch": 0.6796868458511262, "grad_norm": 0.2826681435108185, "learning_rate": 0.0001757541488405685, "loss": 11.6668, "step": 32470 }, { "epoch": 0.6797077786150884, "grad_norm": 0.3077602982521057, "learning_rate": 0.00017575271756535964, "loss": 11.6686, "step": 32471 }, { "epoch": 0.6797287113790504, "grad_norm": 0.3156644105911255, "learning_rate": 0.0001757512862537349, "loss": 11.6876, "step": 32472 }, { "epoch": 0.6797496441430126, "grad_norm": 0.3664799928665161, "learning_rate": 0.000175749854905695, "loss": 11.6727, "step": 32473 }, { "epoch": 0.6797705769069748, "grad_norm": 0.3255115747451782, "learning_rate": 0.00017574842352124063, "loss": 11.6693, "step": 32474 }, { "epoch": 0.6797915096709369, "grad_norm": 0.2990107834339142, "learning_rate": 0.00017574699210037248, "loss": 11.6781, "step": 32475 }, { "epoch": 0.6798124424348991, "grad_norm": 0.2619074881076813, "learning_rate": 0.00017574556064309122, "loss": 11.6598, "step": 32476 }, { "epoch": 0.6798333751988612, "grad_norm": 0.2608685791492462, "learning_rate": 0.0001757441291493976, "loss": 11.6579, "step": 32477 }, { "epoch": 0.6798543079628234, "grad_norm": 0.28649693727493286, "learning_rate": 0.0001757426976192922, "loss": 11.6632, "step": 32478 }, { "epoch": 0.6798752407267855, "grad_norm": 0.2632328271865845, "learning_rate": 0.0001757412660527758, "loss": 11.6673, "step": 32479 }, { "epoch": 0.6798961734907477, "grad_norm": 0.2847275137901306, "learning_rate": 0.00017573983444984904, "loss": 11.669, "step": 32480 }, { "epoch": 0.6799171062547099, "grad_norm": 0.2670397460460663, "learning_rate": 0.0001757384028105126, "loss": 11.6667, "step": 32481 }, { "epoch": 0.679938039018672, "grad_norm": 0.27974560856819153, "learning_rate": 0.00017573697113476723, "loss": 11.6718, "step": 32482 }, { "epoch": 0.6799589717826342, "grad_norm": 0.3520734906196594, "learning_rate": 0.0001757355394226136, "loss": 11.6656, "step": 32483 }, { "epoch": 0.6799799045465963, "grad_norm": 0.2763536870479584, "learning_rate": 0.00017573410767405233, "loss": 11.6581, "step": 32484 }, { "epoch": 0.6800008373105585, "grad_norm": 0.32173702120780945, "learning_rate": 0.00017573267588908418, "loss": 11.6672, "step": 32485 }, { "epoch": 0.6800217700745207, "grad_norm": 0.33597105741500854, "learning_rate": 0.00017573124406770982, "loss": 11.6653, "step": 32486 }, { "epoch": 0.6800427028384828, "grad_norm": 0.27021804451942444, "learning_rate": 0.0001757298122099299, "loss": 11.6811, "step": 32487 }, { "epoch": 0.680063635602445, "grad_norm": 0.2942473292350769, "learning_rate": 0.0001757283803157452, "loss": 11.6623, "step": 32488 }, { "epoch": 0.6800845683664071, "grad_norm": 0.2622050642967224, "learning_rate": 0.0001757269483851563, "loss": 11.6533, "step": 32489 }, { "epoch": 0.6801055011303693, "grad_norm": 0.37283238768577576, "learning_rate": 0.00017572551641816396, "loss": 11.6783, "step": 32490 }, { "epoch": 0.6801264338943314, "grad_norm": 0.36247387528419495, "learning_rate": 0.0001757240844147688, "loss": 11.6772, "step": 32491 }, { "epoch": 0.6801473666582936, "grad_norm": 0.2883906662464142, "learning_rate": 0.00017572265237497163, "loss": 11.6821, "step": 32492 }, { "epoch": 0.6801682994222558, "grad_norm": 0.2693460285663605, "learning_rate": 0.00017572122029877303, "loss": 11.6605, "step": 32493 }, { "epoch": 0.6801892321862179, "grad_norm": 0.2799746096134186, "learning_rate": 0.0001757197881861737, "loss": 11.6843, "step": 32494 }, { "epoch": 0.68021016495018, "grad_norm": 0.29706260561943054, "learning_rate": 0.00017571835603717438, "loss": 11.6757, "step": 32495 }, { "epoch": 0.6802310977141421, "grad_norm": 0.3158937692642212, "learning_rate": 0.00017571692385177573, "loss": 11.6515, "step": 32496 }, { "epoch": 0.6802520304781043, "grad_norm": 0.33639228343963623, "learning_rate": 0.00017571549162997843, "loss": 11.6758, "step": 32497 }, { "epoch": 0.6802729632420664, "grad_norm": 0.26211073994636536, "learning_rate": 0.00017571405937178317, "loss": 11.6706, "step": 32498 }, { "epoch": 0.6802938960060286, "grad_norm": 0.3035039007663727, "learning_rate": 0.00017571262707719062, "loss": 11.6854, "step": 32499 }, { "epoch": 0.6803148287699908, "grad_norm": 0.25526314973831177, "learning_rate": 0.00017571119474620156, "loss": 11.6884, "step": 32500 }, { "epoch": 0.6803357615339529, "grad_norm": 0.34724950790405273, "learning_rate": 0.00017570976237881657, "loss": 11.678, "step": 32501 }, { "epoch": 0.6803566942979151, "grad_norm": 0.3541651666164398, "learning_rate": 0.0001757083299750364, "loss": 11.6885, "step": 32502 }, { "epoch": 0.6803776270618772, "grad_norm": 0.3263864815235138, "learning_rate": 0.00017570689753486167, "loss": 11.6705, "step": 32503 }, { "epoch": 0.6803985598258394, "grad_norm": 0.28866803646087646, "learning_rate": 0.00017570546505829316, "loss": 11.6695, "step": 32504 }, { "epoch": 0.6804194925898016, "grad_norm": 0.3234182894229889, "learning_rate": 0.0001757040325453315, "loss": 11.6802, "step": 32505 }, { "epoch": 0.6804404253537637, "grad_norm": 0.2883076071739197, "learning_rate": 0.00017570259999597737, "loss": 11.685, "step": 32506 }, { "epoch": 0.6804613581177259, "grad_norm": 0.2739211916923523, "learning_rate": 0.00017570116741023151, "loss": 11.67, "step": 32507 }, { "epoch": 0.680482290881688, "grad_norm": 0.28682392835617065, "learning_rate": 0.0001756997347880946, "loss": 11.6685, "step": 32508 }, { "epoch": 0.6805032236456502, "grad_norm": 0.291782021522522, "learning_rate": 0.00017569830212956728, "loss": 11.664, "step": 32509 }, { "epoch": 0.6805241564096123, "grad_norm": 0.34136274456977844, "learning_rate": 0.0001756968694346503, "loss": 11.6591, "step": 32510 }, { "epoch": 0.6805450891735745, "grad_norm": 0.2584429085254669, "learning_rate": 0.00017569543670334432, "loss": 11.6578, "step": 32511 }, { "epoch": 0.6805660219375367, "grad_norm": 0.23984426259994507, "learning_rate": 0.00017569400393564999, "loss": 11.6501, "step": 32512 }, { "epoch": 0.6805869547014988, "grad_norm": 0.25948092341423035, "learning_rate": 0.00017569257113156806, "loss": 11.6828, "step": 32513 }, { "epoch": 0.680607887465461, "grad_norm": 0.32296526432037354, "learning_rate": 0.00017569113829109918, "loss": 11.6822, "step": 32514 }, { "epoch": 0.6806288202294231, "grad_norm": 0.30097585916519165, "learning_rate": 0.00017568970541424406, "loss": 11.6836, "step": 32515 }, { "epoch": 0.6806497529933853, "grad_norm": 0.3805551826953888, "learning_rate": 0.00017568827250100339, "loss": 11.6699, "step": 32516 }, { "epoch": 0.6806706857573473, "grad_norm": 0.3541586101055145, "learning_rate": 0.00017568683955137784, "loss": 11.6801, "step": 32517 }, { "epoch": 0.6806916185213095, "grad_norm": 0.34346312284469604, "learning_rate": 0.0001756854065653681, "loss": 11.6619, "step": 32518 }, { "epoch": 0.6807125512852717, "grad_norm": 0.2692251205444336, "learning_rate": 0.0001756839735429749, "loss": 11.6724, "step": 32519 }, { "epoch": 0.6807334840492338, "grad_norm": 0.3311125636100769, "learning_rate": 0.00017568254048419885, "loss": 11.667, "step": 32520 }, { "epoch": 0.680754416813196, "grad_norm": 0.4490927457809448, "learning_rate": 0.0001756811073890407, "loss": 11.6697, "step": 32521 }, { "epoch": 0.6807753495771581, "grad_norm": 0.24832524359226227, "learning_rate": 0.00017567967425750116, "loss": 11.6519, "step": 32522 }, { "epoch": 0.6807962823411203, "grad_norm": 0.2746991813182831, "learning_rate": 0.00017567824108958086, "loss": 11.6604, "step": 32523 }, { "epoch": 0.6808172151050825, "grad_norm": 0.31865084171295166, "learning_rate": 0.00017567680788528053, "loss": 11.6713, "step": 32524 }, { "epoch": 0.6808381478690446, "grad_norm": 0.3515988290309906, "learning_rate": 0.0001756753746446008, "loss": 11.6696, "step": 32525 }, { "epoch": 0.6808590806330068, "grad_norm": 0.3178195059299469, "learning_rate": 0.00017567394136754243, "loss": 11.669, "step": 32526 }, { "epoch": 0.6808800133969689, "grad_norm": 0.2566773295402527, "learning_rate": 0.0001756725080541061, "loss": 11.6736, "step": 32527 }, { "epoch": 0.6809009461609311, "grad_norm": 0.35155197978019714, "learning_rate": 0.00017567107470429244, "loss": 11.6787, "step": 32528 }, { "epoch": 0.6809218789248932, "grad_norm": 0.2848922312259674, "learning_rate": 0.0001756696413181022, "loss": 11.673, "step": 32529 }, { "epoch": 0.6809428116888554, "grad_norm": 0.284681111574173, "learning_rate": 0.00017566820789553607, "loss": 11.6747, "step": 32530 }, { "epoch": 0.6809637444528176, "grad_norm": 0.28939878940582275, "learning_rate": 0.00017566677443659465, "loss": 11.6696, "step": 32531 }, { "epoch": 0.6809846772167797, "grad_norm": 0.2814841866493225, "learning_rate": 0.00017566534094127874, "loss": 11.6747, "step": 32532 }, { "epoch": 0.6810056099807419, "grad_norm": 0.252676397562027, "learning_rate": 0.00017566390740958898, "loss": 11.6629, "step": 32533 }, { "epoch": 0.681026542744704, "grad_norm": 0.32003679871559143, "learning_rate": 0.00017566247384152606, "loss": 11.6649, "step": 32534 }, { "epoch": 0.6810474755086662, "grad_norm": 0.284367173910141, "learning_rate": 0.00017566104023709068, "loss": 11.6776, "step": 32535 }, { "epoch": 0.6810684082726283, "grad_norm": 0.3262735605239868, "learning_rate": 0.00017565960659628348, "loss": 11.6743, "step": 32536 }, { "epoch": 0.6810893410365905, "grad_norm": 0.44387099146842957, "learning_rate": 0.00017565817291910525, "loss": 11.6915, "step": 32537 }, { "epoch": 0.6811102738005527, "grad_norm": 0.3127439022064209, "learning_rate": 0.00017565673920555657, "loss": 11.6839, "step": 32538 }, { "epoch": 0.6811312065645148, "grad_norm": 0.2976539731025696, "learning_rate": 0.00017565530545563825, "loss": 11.6738, "step": 32539 }, { "epoch": 0.681152139328477, "grad_norm": 0.32916077971458435, "learning_rate": 0.00017565387166935086, "loss": 11.6688, "step": 32540 }, { "epoch": 0.681173072092439, "grad_norm": 0.2872481346130371, "learning_rate": 0.00017565243784669514, "loss": 11.6807, "step": 32541 }, { "epoch": 0.6811940048564012, "grad_norm": 0.39830365777015686, "learning_rate": 0.00017565100398767177, "loss": 11.6778, "step": 32542 }, { "epoch": 0.6812149376203634, "grad_norm": 0.2865583300590515, "learning_rate": 0.00017564957009228146, "loss": 11.6751, "step": 32543 }, { "epoch": 0.6812358703843255, "grad_norm": 0.35940253734588623, "learning_rate": 0.00017564813616052487, "loss": 11.652, "step": 32544 }, { "epoch": 0.6812568031482877, "grad_norm": 0.29286879301071167, "learning_rate": 0.0001756467021924027, "loss": 11.6832, "step": 32545 }, { "epoch": 0.6812777359122498, "grad_norm": 0.38083282113075256, "learning_rate": 0.00017564526818791566, "loss": 11.6786, "step": 32546 }, { "epoch": 0.681298668676212, "grad_norm": 0.2847689688205719, "learning_rate": 0.00017564383414706445, "loss": 11.6605, "step": 32547 }, { "epoch": 0.6813196014401741, "grad_norm": 0.2855056822299957, "learning_rate": 0.0001756424000698497, "loss": 11.6657, "step": 32548 }, { "epoch": 0.6813405342041363, "grad_norm": 0.3640819489955902, "learning_rate": 0.00017564096595627213, "loss": 11.6661, "step": 32549 }, { "epoch": 0.6813614669680985, "grad_norm": 0.3021327555179596, "learning_rate": 0.00017563953180633243, "loss": 11.6808, "step": 32550 }, { "epoch": 0.6813823997320606, "grad_norm": 0.34343403577804565, "learning_rate": 0.00017563809762003132, "loss": 11.6833, "step": 32551 }, { "epoch": 0.6814033324960228, "grad_norm": 0.2619085907936096, "learning_rate": 0.00017563666339736944, "loss": 11.6741, "step": 32552 }, { "epoch": 0.6814242652599849, "grad_norm": 0.23279257118701935, "learning_rate": 0.00017563522913834748, "loss": 11.6738, "step": 32553 }, { "epoch": 0.6814451980239471, "grad_norm": 0.2587060034275055, "learning_rate": 0.00017563379484296618, "loss": 11.6566, "step": 32554 }, { "epoch": 0.6814661307879092, "grad_norm": 0.28928589820861816, "learning_rate": 0.00017563236051122618, "loss": 11.6719, "step": 32555 }, { "epoch": 0.6814870635518714, "grad_norm": 0.2614816427230835, "learning_rate": 0.00017563092614312822, "loss": 11.6625, "step": 32556 }, { "epoch": 0.6815079963158336, "grad_norm": 0.35495904088020325, "learning_rate": 0.00017562949173867293, "loss": 11.6721, "step": 32557 }, { "epoch": 0.6815289290797957, "grad_norm": 1.0207117795944214, "learning_rate": 0.00017562805729786103, "loss": 11.583, "step": 32558 }, { "epoch": 0.6815498618437579, "grad_norm": 0.34573042392730713, "learning_rate": 0.00017562662282069323, "loss": 11.675, "step": 32559 }, { "epoch": 0.68157079460772, "grad_norm": 0.32995837926864624, "learning_rate": 0.00017562518830717015, "loss": 11.6606, "step": 32560 }, { "epoch": 0.6815917273716822, "grad_norm": 0.2789444327354431, "learning_rate": 0.00017562375375729258, "loss": 11.6582, "step": 32561 }, { "epoch": 0.6816126601356444, "grad_norm": 0.321414053440094, "learning_rate": 0.00017562231917106114, "loss": 11.6671, "step": 32562 }, { "epoch": 0.6816335928996065, "grad_norm": 0.2870860695838928, "learning_rate": 0.00017562088454847651, "loss": 11.687, "step": 32563 }, { "epoch": 0.6816545256635687, "grad_norm": 0.26487863063812256, "learning_rate": 0.00017561944988953943, "loss": 11.6777, "step": 32564 }, { "epoch": 0.6816754584275307, "grad_norm": 0.31522244215011597, "learning_rate": 0.00017561801519425053, "loss": 11.6764, "step": 32565 }, { "epoch": 0.681696391191493, "grad_norm": 0.23026031255722046, "learning_rate": 0.0001756165804626106, "loss": 11.6806, "step": 32566 }, { "epoch": 0.681717323955455, "grad_norm": 0.3535866141319275, "learning_rate": 0.00017561514569462022, "loss": 11.6573, "step": 32567 }, { "epoch": 0.6817382567194172, "grad_norm": 0.2689497768878937, "learning_rate": 0.00017561371089028013, "loss": 11.6564, "step": 32568 }, { "epoch": 0.6817591894833794, "grad_norm": 0.37114065885543823, "learning_rate": 0.000175612276049591, "loss": 11.6854, "step": 32569 }, { "epoch": 0.6817801222473415, "grad_norm": 0.29534828662872314, "learning_rate": 0.00017561084117255355, "loss": 11.6873, "step": 32570 }, { "epoch": 0.6818010550113037, "grad_norm": 0.2889569401741028, "learning_rate": 0.00017560940625916844, "loss": 11.6673, "step": 32571 }, { "epoch": 0.6818219877752658, "grad_norm": 0.3296014070510864, "learning_rate": 0.0001756079713094364, "loss": 11.6709, "step": 32572 }, { "epoch": 0.681842920539228, "grad_norm": 0.36295434832572937, "learning_rate": 0.0001756065363233581, "loss": 11.6687, "step": 32573 }, { "epoch": 0.6818638533031901, "grad_norm": 0.3475522994995117, "learning_rate": 0.0001756051013009342, "loss": 11.6775, "step": 32574 }, { "epoch": 0.6818847860671523, "grad_norm": 0.2864769697189331, "learning_rate": 0.0001756036662421654, "loss": 11.6715, "step": 32575 }, { "epoch": 0.6819057188311145, "grad_norm": 0.25384804606437683, "learning_rate": 0.00017560223114705244, "loss": 11.6791, "step": 32576 }, { "epoch": 0.6819266515950766, "grad_norm": 0.28442585468292236, "learning_rate": 0.00017560079601559593, "loss": 11.6735, "step": 32577 }, { "epoch": 0.6819475843590388, "grad_norm": 0.25295138359069824, "learning_rate": 0.0001755993608477966, "loss": 11.6746, "step": 32578 }, { "epoch": 0.6819685171230009, "grad_norm": 0.2730937600135803, "learning_rate": 0.00017559792564365517, "loss": 11.6648, "step": 32579 }, { "epoch": 0.6819894498869631, "grad_norm": 0.5584942102432251, "learning_rate": 0.00017559649040317228, "loss": 11.6771, "step": 32580 }, { "epoch": 0.6820103826509252, "grad_norm": 0.2604219913482666, "learning_rate": 0.00017559505512634868, "loss": 11.6476, "step": 32581 }, { "epoch": 0.6820313154148874, "grad_norm": 0.2655068337917328, "learning_rate": 0.00017559361981318497, "loss": 11.6875, "step": 32582 }, { "epoch": 0.6820522481788496, "grad_norm": 0.3360603153705597, "learning_rate": 0.00017559218446368195, "loss": 11.6724, "step": 32583 }, { "epoch": 0.6820731809428117, "grad_norm": 0.31216713786125183, "learning_rate": 0.00017559074907784022, "loss": 11.6518, "step": 32584 }, { "epoch": 0.6820941137067739, "grad_norm": 0.36745819449424744, "learning_rate": 0.0001755893136556605, "loss": 11.6885, "step": 32585 }, { "epoch": 0.682115046470736, "grad_norm": 0.2501155436038971, "learning_rate": 0.00017558787819714348, "loss": 11.6521, "step": 32586 }, { "epoch": 0.6821359792346982, "grad_norm": 0.2783721089363098, "learning_rate": 0.00017558644270228988, "loss": 11.6595, "step": 32587 }, { "epoch": 0.6821569119986604, "grad_norm": 0.30594611167907715, "learning_rate": 0.00017558500717110032, "loss": 11.657, "step": 32588 }, { "epoch": 0.6821778447626224, "grad_norm": 0.32979485392570496, "learning_rate": 0.00017558357160357558, "loss": 11.6826, "step": 32589 }, { "epoch": 0.6821987775265846, "grad_norm": 0.31061917543411255, "learning_rate": 0.00017558213599971626, "loss": 11.6759, "step": 32590 }, { "epoch": 0.6822197102905467, "grad_norm": 0.2723176181316376, "learning_rate": 0.0001755807003595231, "loss": 11.6836, "step": 32591 }, { "epoch": 0.6822406430545089, "grad_norm": 0.33554506301879883, "learning_rate": 0.0001755792646829968, "loss": 11.674, "step": 32592 }, { "epoch": 0.682261575818471, "grad_norm": 0.27002647519111633, "learning_rate": 0.00017557782897013803, "loss": 11.6563, "step": 32593 }, { "epoch": 0.6822825085824332, "grad_norm": 0.42929738759994507, "learning_rate": 0.0001755763932209475, "loss": 11.6633, "step": 32594 }, { "epoch": 0.6823034413463954, "grad_norm": 0.30628228187561035, "learning_rate": 0.00017557495743542585, "loss": 11.6674, "step": 32595 }, { "epoch": 0.6823243741103575, "grad_norm": 0.3487198054790497, "learning_rate": 0.0001755735216135738, "loss": 11.6818, "step": 32596 }, { "epoch": 0.6823453068743197, "grad_norm": 0.3792344331741333, "learning_rate": 0.00017557208575539205, "loss": 11.6874, "step": 32597 }, { "epoch": 0.6823662396382818, "grad_norm": 0.27527880668640137, "learning_rate": 0.0001755706498608813, "loss": 11.6734, "step": 32598 }, { "epoch": 0.682387172402244, "grad_norm": 0.31854379177093506, "learning_rate": 0.00017556921393004223, "loss": 11.6652, "step": 32599 }, { "epoch": 0.6824081051662061, "grad_norm": 0.34991148114204407, "learning_rate": 0.0001755677779628755, "loss": 11.6703, "step": 32600 }, { "epoch": 0.6824290379301683, "grad_norm": 0.37954947352409363, "learning_rate": 0.00017556634195938184, "loss": 11.654, "step": 32601 }, { "epoch": 0.6824499706941305, "grad_norm": 0.32130753993988037, "learning_rate": 0.00017556490591956193, "loss": 11.6629, "step": 32602 }, { "epoch": 0.6824709034580926, "grad_norm": 0.24698220193386078, "learning_rate": 0.00017556346984341642, "loss": 11.6672, "step": 32603 }, { "epoch": 0.6824918362220548, "grad_norm": 0.39066967368125916, "learning_rate": 0.00017556203373094608, "loss": 11.6771, "step": 32604 }, { "epoch": 0.6825127689860169, "grad_norm": 0.25533777475357056, "learning_rate": 0.00017556059758215152, "loss": 11.6516, "step": 32605 }, { "epoch": 0.6825337017499791, "grad_norm": 0.3117569088935852, "learning_rate": 0.0001755591613970335, "loss": 11.6692, "step": 32606 }, { "epoch": 0.6825546345139413, "grad_norm": 0.26271045207977295, "learning_rate": 0.00017555772517559266, "loss": 11.6805, "step": 32607 }, { "epoch": 0.6825755672779034, "grad_norm": 0.2765621542930603, "learning_rate": 0.0001755562889178297, "loss": 11.6694, "step": 32608 }, { "epoch": 0.6825965000418656, "grad_norm": 0.352932870388031, "learning_rate": 0.00017555485262374534, "loss": 11.67, "step": 32609 }, { "epoch": 0.6826174328058277, "grad_norm": 0.4067349135875702, "learning_rate": 0.00017555341629334024, "loss": 11.6815, "step": 32610 }, { "epoch": 0.6826383655697899, "grad_norm": 0.25217971205711365, "learning_rate": 0.00017555197992661509, "loss": 11.6771, "step": 32611 }, { "epoch": 0.6826592983337519, "grad_norm": 0.2691422402858734, "learning_rate": 0.00017555054352357056, "loss": 11.6802, "step": 32612 }, { "epoch": 0.6826802310977141, "grad_norm": 0.29451677203178406, "learning_rate": 0.0001755491070842074, "loss": 11.6542, "step": 32613 }, { "epoch": 0.6827011638616763, "grad_norm": 0.4703417420387268, "learning_rate": 0.00017554767060852628, "loss": 11.6815, "step": 32614 }, { "epoch": 0.6827220966256384, "grad_norm": 0.29983440041542053, "learning_rate": 0.0001755462340965279, "loss": 11.6684, "step": 32615 }, { "epoch": 0.6827430293896006, "grad_norm": 0.3489592373371124, "learning_rate": 0.00017554479754821288, "loss": 11.6633, "step": 32616 }, { "epoch": 0.6827639621535627, "grad_norm": 0.28492265939712524, "learning_rate": 0.000175543360963582, "loss": 11.6717, "step": 32617 }, { "epoch": 0.6827848949175249, "grad_norm": 0.23524311184883118, "learning_rate": 0.0001755419243426359, "loss": 11.6643, "step": 32618 }, { "epoch": 0.682805827681487, "grad_norm": 0.24590083956718445, "learning_rate": 0.00017554048768537528, "loss": 11.6625, "step": 32619 }, { "epoch": 0.6828267604454492, "grad_norm": 0.8679897785186768, "learning_rate": 0.00017553905099180082, "loss": 11.6131, "step": 32620 }, { "epoch": 0.6828476932094114, "grad_norm": 0.3292376697063446, "learning_rate": 0.00017553761426191325, "loss": 11.6672, "step": 32621 }, { "epoch": 0.6828686259733735, "grad_norm": 0.27629947662353516, "learning_rate": 0.0001755361774957132, "loss": 11.6536, "step": 32622 }, { "epoch": 0.6828895587373357, "grad_norm": 0.2759953737258911, "learning_rate": 0.00017553474069320144, "loss": 11.6643, "step": 32623 }, { "epoch": 0.6829104915012978, "grad_norm": 0.2959761619567871, "learning_rate": 0.00017553330385437858, "loss": 11.6682, "step": 32624 }, { "epoch": 0.68293142426526, "grad_norm": 0.3642076849937439, "learning_rate": 0.00017553186697924538, "loss": 11.6833, "step": 32625 }, { "epoch": 0.6829523570292222, "grad_norm": 0.32842448353767395, "learning_rate": 0.00017553043006780246, "loss": 11.6642, "step": 32626 }, { "epoch": 0.6829732897931843, "grad_norm": 0.24954600632190704, "learning_rate": 0.0001755289931200506, "loss": 11.6635, "step": 32627 }, { "epoch": 0.6829942225571465, "grad_norm": 0.2880231440067291, "learning_rate": 0.00017552755613599042, "loss": 11.6597, "step": 32628 }, { "epoch": 0.6830151553211086, "grad_norm": 0.26194310188293457, "learning_rate": 0.0001755261191156226, "loss": 11.6558, "step": 32629 }, { "epoch": 0.6830360880850708, "grad_norm": 0.35471734404563904, "learning_rate": 0.00017552468205894788, "loss": 11.6627, "step": 32630 }, { "epoch": 0.6830570208490329, "grad_norm": 0.4026358127593994, "learning_rate": 0.00017552324496596694, "loss": 11.6723, "step": 32631 }, { "epoch": 0.6830779536129951, "grad_norm": 0.2911205589771271, "learning_rate": 0.00017552180783668043, "loss": 11.673, "step": 32632 }, { "epoch": 0.6830988863769573, "grad_norm": 0.2527671456336975, "learning_rate": 0.00017552037067108912, "loss": 11.6738, "step": 32633 }, { "epoch": 0.6831198191409193, "grad_norm": 0.2375589907169342, "learning_rate": 0.00017551893346919362, "loss": 11.669, "step": 32634 }, { "epoch": 0.6831407519048815, "grad_norm": 0.32634034752845764, "learning_rate": 0.00017551749623099467, "loss": 11.6577, "step": 32635 }, { "epoch": 0.6831616846688436, "grad_norm": 0.24333573877811432, "learning_rate": 0.00017551605895649294, "loss": 11.6585, "step": 32636 }, { "epoch": 0.6831826174328058, "grad_norm": 0.25578078627586365, "learning_rate": 0.00017551462164568915, "loss": 11.6698, "step": 32637 }, { "epoch": 0.6832035501967679, "grad_norm": 0.32380813360214233, "learning_rate": 0.00017551318429858393, "loss": 11.6677, "step": 32638 }, { "epoch": 0.6832244829607301, "grad_norm": 0.2641511559486389, "learning_rate": 0.00017551174691517806, "loss": 11.6673, "step": 32639 }, { "epoch": 0.6832454157246923, "grad_norm": 0.3159942626953125, "learning_rate": 0.00017551030949547212, "loss": 11.6713, "step": 32640 }, { "epoch": 0.6832663484886544, "grad_norm": 0.2866814434528351, "learning_rate": 0.0001755088720394669, "loss": 11.6698, "step": 32641 }, { "epoch": 0.6832872812526166, "grad_norm": 0.3497803509235382, "learning_rate": 0.00017550743454716302, "loss": 11.658, "step": 32642 }, { "epoch": 0.6833082140165787, "grad_norm": 0.2839469909667969, "learning_rate": 0.00017550599701856125, "loss": 11.6621, "step": 32643 }, { "epoch": 0.6833291467805409, "grad_norm": 0.4311927855014801, "learning_rate": 0.0001755045594536622, "loss": 11.6827, "step": 32644 }, { "epoch": 0.6833500795445031, "grad_norm": 0.36248040199279785, "learning_rate": 0.0001755031218524666, "loss": 11.6806, "step": 32645 }, { "epoch": 0.6833710123084652, "grad_norm": 0.3311230540275574, "learning_rate": 0.00017550168421497512, "loss": 11.6945, "step": 32646 }, { "epoch": 0.6833919450724274, "grad_norm": 0.27444884181022644, "learning_rate": 0.00017550024654118852, "loss": 11.6654, "step": 32647 }, { "epoch": 0.6834128778363895, "grad_norm": 0.3662782311439514, "learning_rate": 0.0001754988088311074, "loss": 11.6801, "step": 32648 }, { "epoch": 0.6834338106003517, "grad_norm": 0.25438788533210754, "learning_rate": 0.00017549737108473249, "loss": 11.6665, "step": 32649 }, { "epoch": 0.6834547433643138, "grad_norm": 0.2641499638557434, "learning_rate": 0.0001754959333020645, "loss": 11.6524, "step": 32650 }, { "epoch": 0.683475676128276, "grad_norm": 0.348236083984375, "learning_rate": 0.00017549449548310406, "loss": 11.6638, "step": 32651 }, { "epoch": 0.6834966088922382, "grad_norm": 0.30199721455574036, "learning_rate": 0.00017549305762785196, "loss": 11.6731, "step": 32652 }, { "epoch": 0.6835175416562003, "grad_norm": 0.36197417974472046, "learning_rate": 0.0001754916197363088, "loss": 11.6674, "step": 32653 }, { "epoch": 0.6835384744201625, "grad_norm": 0.3052945137023926, "learning_rate": 0.0001754901818084753, "loss": 11.6732, "step": 32654 }, { "epoch": 0.6835594071841246, "grad_norm": 0.29165899753570557, "learning_rate": 0.00017548874384435217, "loss": 11.6513, "step": 32655 }, { "epoch": 0.6835803399480868, "grad_norm": 0.32321280241012573, "learning_rate": 0.0001754873058439401, "loss": 11.6743, "step": 32656 }, { "epoch": 0.6836012727120488, "grad_norm": 0.32402685284614563, "learning_rate": 0.00017548586780723974, "loss": 11.6547, "step": 32657 }, { "epoch": 0.683622205476011, "grad_norm": 0.24316097795963287, "learning_rate": 0.00017548442973425182, "loss": 11.6632, "step": 32658 }, { "epoch": 0.6836431382399732, "grad_norm": 0.297980397939682, "learning_rate": 0.00017548299162497704, "loss": 11.6686, "step": 32659 }, { "epoch": 0.6836640710039353, "grad_norm": 0.2844986617565155, "learning_rate": 0.00017548155347941604, "loss": 11.6637, "step": 32660 }, { "epoch": 0.6836850037678975, "grad_norm": 0.22844424843788147, "learning_rate": 0.00017548011529756958, "loss": 11.6592, "step": 32661 }, { "epoch": 0.6837059365318596, "grad_norm": 0.38046538829803467, "learning_rate": 0.0001754786770794383, "loss": 11.6808, "step": 32662 }, { "epoch": 0.6837268692958218, "grad_norm": 0.8643004298210144, "learning_rate": 0.00017547723882502288, "loss": 11.6951, "step": 32663 }, { "epoch": 0.683747802059784, "grad_norm": 0.29794642329216003, "learning_rate": 0.00017547580053432407, "loss": 11.6696, "step": 32664 }, { "epoch": 0.6837687348237461, "grad_norm": 0.28192955255508423, "learning_rate": 0.00017547436220734255, "loss": 11.6778, "step": 32665 }, { "epoch": 0.6837896675877083, "grad_norm": 0.28888365626335144, "learning_rate": 0.00017547292384407896, "loss": 11.6832, "step": 32666 }, { "epoch": 0.6838106003516704, "grad_norm": 0.2838636338710785, "learning_rate": 0.00017547148544453403, "loss": 11.675, "step": 32667 }, { "epoch": 0.6838315331156326, "grad_norm": 0.31558698415756226, "learning_rate": 0.00017547004700870845, "loss": 11.6775, "step": 32668 }, { "epoch": 0.6838524658795947, "grad_norm": 0.39168670773506165, "learning_rate": 0.00017546860853660288, "loss": 11.7, "step": 32669 }, { "epoch": 0.6838733986435569, "grad_norm": 0.3324751555919647, "learning_rate": 0.00017546717002821807, "loss": 11.6745, "step": 32670 }, { "epoch": 0.6838943314075191, "grad_norm": 0.2946029305458069, "learning_rate": 0.0001754657314835547, "loss": 11.6738, "step": 32671 }, { "epoch": 0.6839152641714812, "grad_norm": 0.3445070683956146, "learning_rate": 0.0001754642929026134, "loss": 11.6661, "step": 32672 }, { "epoch": 0.6839361969354434, "grad_norm": 0.3754700720310211, "learning_rate": 0.0001754628542853949, "loss": 11.6782, "step": 32673 }, { "epoch": 0.6839571296994055, "grad_norm": 0.29486536979675293, "learning_rate": 0.0001754614156318999, "loss": 11.6437, "step": 32674 }, { "epoch": 0.6839780624633677, "grad_norm": 0.2519231140613556, "learning_rate": 0.0001754599769421291, "loss": 11.6655, "step": 32675 }, { "epoch": 0.6839989952273298, "grad_norm": 0.24679183959960938, "learning_rate": 0.00017545853821608316, "loss": 11.6679, "step": 32676 }, { "epoch": 0.684019927991292, "grad_norm": 0.31729012727737427, "learning_rate": 0.0001754570994537628, "loss": 11.6862, "step": 32677 }, { "epoch": 0.6840408607552542, "grad_norm": 0.2494010627269745, "learning_rate": 0.0001754556606551687, "loss": 11.6604, "step": 32678 }, { "epoch": 0.6840617935192163, "grad_norm": 0.337424635887146, "learning_rate": 0.00017545422182030157, "loss": 11.6789, "step": 32679 }, { "epoch": 0.6840827262831785, "grad_norm": 0.2794061601161957, "learning_rate": 0.00017545278294916203, "loss": 11.6645, "step": 32680 }, { "epoch": 0.6841036590471405, "grad_norm": 0.34114959836006165, "learning_rate": 0.00017545134404175084, "loss": 11.6668, "step": 32681 }, { "epoch": 0.6841245918111027, "grad_norm": 0.25729674100875854, "learning_rate": 0.00017544990509806872, "loss": 11.6667, "step": 32682 }, { "epoch": 0.684145524575065, "grad_norm": 0.27985143661499023, "learning_rate": 0.0001754484661181163, "loss": 11.6605, "step": 32683 }, { "epoch": 0.684166457339027, "grad_norm": 0.29757121205329895, "learning_rate": 0.00017544702710189426, "loss": 11.6847, "step": 32684 }, { "epoch": 0.6841873901029892, "grad_norm": 0.36975759267807007, "learning_rate": 0.00017544558804940335, "loss": 11.656, "step": 32685 }, { "epoch": 0.6842083228669513, "grad_norm": 0.39245325326919556, "learning_rate": 0.00017544414896064422, "loss": 11.6711, "step": 32686 }, { "epoch": 0.6842292556309135, "grad_norm": 0.28204476833343506, "learning_rate": 0.0001754427098356176, "loss": 11.6702, "step": 32687 }, { "epoch": 0.6842501883948756, "grad_norm": 0.2584577798843384, "learning_rate": 0.00017544127067432413, "loss": 11.6572, "step": 32688 }, { "epoch": 0.6842711211588378, "grad_norm": 0.36357545852661133, "learning_rate": 0.00017543983147676455, "loss": 11.6882, "step": 32689 }, { "epoch": 0.6842920539228, "grad_norm": 0.2914780378341675, "learning_rate": 0.00017543839224293953, "loss": 11.6687, "step": 32690 }, { "epoch": 0.6843129866867621, "grad_norm": 0.27584490180015564, "learning_rate": 0.00017543695297284975, "loss": 11.6511, "step": 32691 }, { "epoch": 0.6843339194507243, "grad_norm": 0.4130093455314636, "learning_rate": 0.0001754355136664959, "loss": 11.67, "step": 32692 }, { "epoch": 0.6843548522146864, "grad_norm": 0.2555742859840393, "learning_rate": 0.0001754340743238787, "loss": 11.658, "step": 32693 }, { "epoch": 0.6843757849786486, "grad_norm": 0.35343044996261597, "learning_rate": 0.00017543263494499884, "loss": 11.6759, "step": 32694 }, { "epoch": 0.6843967177426107, "grad_norm": 0.27172115445137024, "learning_rate": 0.00017543119552985702, "loss": 11.659, "step": 32695 }, { "epoch": 0.6844176505065729, "grad_norm": 0.28176233172416687, "learning_rate": 0.00017542975607845386, "loss": 11.67, "step": 32696 }, { "epoch": 0.6844385832705351, "grad_norm": 0.29484108090400696, "learning_rate": 0.00017542831659079017, "loss": 11.6701, "step": 32697 }, { "epoch": 0.6844595160344972, "grad_norm": 0.2872985899448395, "learning_rate": 0.00017542687706686652, "loss": 11.6728, "step": 32698 }, { "epoch": 0.6844804487984594, "grad_norm": 0.2861299514770508, "learning_rate": 0.00017542543750668368, "loss": 11.6568, "step": 32699 }, { "epoch": 0.6845013815624215, "grad_norm": 0.26767441630363464, "learning_rate": 0.0001754239979102423, "loss": 11.6677, "step": 32700 }, { "epoch": 0.6845223143263837, "grad_norm": 0.2984054684638977, "learning_rate": 0.00017542255827754312, "loss": 11.6824, "step": 32701 }, { "epoch": 0.6845432470903459, "grad_norm": 0.4295247495174408, "learning_rate": 0.00017542111860858678, "loss": 11.6707, "step": 32702 }, { "epoch": 0.684564179854308, "grad_norm": 0.33704566955566406, "learning_rate": 0.00017541967890337404, "loss": 11.6579, "step": 32703 }, { "epoch": 0.6845851126182702, "grad_norm": 0.28001609444618225, "learning_rate": 0.00017541823916190552, "loss": 11.6777, "step": 32704 }, { "epoch": 0.6846060453822322, "grad_norm": 0.37741518020629883, "learning_rate": 0.00017541679938418194, "loss": 11.6752, "step": 32705 }, { "epoch": 0.6846269781461944, "grad_norm": 0.31461188197135925, "learning_rate": 0.000175415359570204, "loss": 11.6551, "step": 32706 }, { "epoch": 0.6846479109101565, "grad_norm": 0.31400156021118164, "learning_rate": 0.00017541391971997238, "loss": 11.6612, "step": 32707 }, { "epoch": 0.6846688436741187, "grad_norm": 0.2875000536441803, "learning_rate": 0.00017541247983348778, "loss": 11.6717, "step": 32708 }, { "epoch": 0.6846897764380809, "grad_norm": 0.23671972751617432, "learning_rate": 0.00017541103991075088, "loss": 11.6637, "step": 32709 }, { "epoch": 0.684710709202043, "grad_norm": 0.27683714032173157, "learning_rate": 0.0001754095999517624, "loss": 11.6639, "step": 32710 }, { "epoch": 0.6847316419660052, "grad_norm": 0.25405147671699524, "learning_rate": 0.00017540815995652301, "loss": 11.6891, "step": 32711 }, { "epoch": 0.6847525747299673, "grad_norm": 0.27878129482269287, "learning_rate": 0.0001754067199250334, "loss": 11.6889, "step": 32712 }, { "epoch": 0.6847735074939295, "grad_norm": 0.41630491614341736, "learning_rate": 0.00017540527985729427, "loss": 11.675, "step": 32713 }, { "epoch": 0.6847944402578916, "grad_norm": 0.37839338183403015, "learning_rate": 0.0001754038397533063, "loss": 11.6677, "step": 32714 }, { "epoch": 0.6848153730218538, "grad_norm": 0.310002863407135, "learning_rate": 0.0001754023996130702, "loss": 11.663, "step": 32715 }, { "epoch": 0.684836305785816, "grad_norm": 0.3861108124256134, "learning_rate": 0.00017540095943658666, "loss": 11.6623, "step": 32716 }, { "epoch": 0.6848572385497781, "grad_norm": 0.30609166622161865, "learning_rate": 0.00017539951922385637, "loss": 11.6717, "step": 32717 }, { "epoch": 0.6848781713137403, "grad_norm": 0.37505242228507996, "learning_rate": 0.00017539807897488, "loss": 11.6678, "step": 32718 }, { "epoch": 0.6848991040777024, "grad_norm": 0.2784668505191803, "learning_rate": 0.0001753966386896583, "loss": 11.6706, "step": 32719 }, { "epoch": 0.6849200368416646, "grad_norm": 0.3024473488330841, "learning_rate": 0.0001753951983681919, "loss": 11.668, "step": 32720 }, { "epoch": 0.6849409696056268, "grad_norm": 0.28303593397140503, "learning_rate": 0.0001753937580104815, "loss": 11.6717, "step": 32721 }, { "epoch": 0.6849619023695889, "grad_norm": 0.34773918986320496, "learning_rate": 0.00017539231761652785, "loss": 11.6679, "step": 32722 }, { "epoch": 0.6849828351335511, "grad_norm": 0.29395461082458496, "learning_rate": 0.0001753908771863316, "loss": 11.6744, "step": 32723 }, { "epoch": 0.6850037678975132, "grad_norm": 0.313788503408432, "learning_rate": 0.0001753894367198934, "loss": 11.6678, "step": 32724 }, { "epoch": 0.6850247006614754, "grad_norm": 0.34637337923049927, "learning_rate": 0.00017538799621721402, "loss": 11.6634, "step": 32725 }, { "epoch": 0.6850456334254375, "grad_norm": 0.3278356194496155, "learning_rate": 0.00017538655567829413, "loss": 11.6746, "step": 32726 }, { "epoch": 0.6850665661893997, "grad_norm": 0.3424876928329468, "learning_rate": 0.0001753851151031344, "loss": 11.6694, "step": 32727 }, { "epoch": 0.6850874989533619, "grad_norm": 0.3435978889465332, "learning_rate": 0.0001753836744917355, "loss": 11.6672, "step": 32728 }, { "epoch": 0.6851084317173239, "grad_norm": 0.3188796937465668, "learning_rate": 0.0001753822338440982, "loss": 11.6768, "step": 32729 }, { "epoch": 0.6851293644812861, "grad_norm": 0.27651962637901306, "learning_rate": 0.00017538079316022315, "loss": 11.6647, "step": 32730 }, { "epoch": 0.6851502972452482, "grad_norm": 0.2539333403110504, "learning_rate": 0.000175379352440111, "loss": 11.665, "step": 32731 }, { "epoch": 0.6851712300092104, "grad_norm": 0.2870970666408539, "learning_rate": 0.00017537791168376253, "loss": 11.6807, "step": 32732 }, { "epoch": 0.6851921627731725, "grad_norm": 0.30875393748283386, "learning_rate": 0.00017537647089117838, "loss": 11.6565, "step": 32733 }, { "epoch": 0.6852130955371347, "grad_norm": 0.29020366072654724, "learning_rate": 0.00017537503006235923, "loss": 11.6603, "step": 32734 }, { "epoch": 0.6852340283010969, "grad_norm": 0.2678074240684509, "learning_rate": 0.0001753735891973058, "loss": 11.6741, "step": 32735 }, { "epoch": 0.685254961065059, "grad_norm": 0.46547770500183105, "learning_rate": 0.00017537214829601878, "loss": 11.6727, "step": 32736 }, { "epoch": 0.6852758938290212, "grad_norm": 0.3107031285762787, "learning_rate": 0.00017537070735849887, "loss": 11.6588, "step": 32737 }, { "epoch": 0.6852968265929833, "grad_norm": 0.22958780825138092, "learning_rate": 0.00017536926638474673, "loss": 11.6577, "step": 32738 }, { "epoch": 0.6853177593569455, "grad_norm": 0.2907153367996216, "learning_rate": 0.00017536782537476306, "loss": 11.6859, "step": 32739 }, { "epoch": 0.6853386921209077, "grad_norm": 0.37313467264175415, "learning_rate": 0.0001753663843285486, "loss": 11.6757, "step": 32740 }, { "epoch": 0.6853596248848698, "grad_norm": 0.3296424448490143, "learning_rate": 0.00017536494324610398, "loss": 11.6722, "step": 32741 }, { "epoch": 0.685380557648832, "grad_norm": 0.39330291748046875, "learning_rate": 0.00017536350212742993, "loss": 11.6513, "step": 32742 }, { "epoch": 0.6854014904127941, "grad_norm": 0.28169316053390503, "learning_rate": 0.00017536206097252714, "loss": 11.6604, "step": 32743 }, { "epoch": 0.6854224231767563, "grad_norm": 0.25342637300491333, "learning_rate": 0.00017536061978139632, "loss": 11.6669, "step": 32744 }, { "epoch": 0.6854433559407184, "grad_norm": 0.2743973135948181, "learning_rate": 0.0001753591785540381, "loss": 11.6701, "step": 32745 }, { "epoch": 0.6854642887046806, "grad_norm": 0.4616859555244446, "learning_rate": 0.00017535773729045323, "loss": 11.6662, "step": 32746 }, { "epoch": 0.6854852214686428, "grad_norm": 0.2489912360906601, "learning_rate": 0.0001753562959906424, "loss": 11.6808, "step": 32747 }, { "epoch": 0.6855061542326049, "grad_norm": 0.3362216353416443, "learning_rate": 0.00017535485465460627, "loss": 11.6962, "step": 32748 }, { "epoch": 0.6855270869965671, "grad_norm": 0.2730496823787689, "learning_rate": 0.00017535341328234555, "loss": 11.6709, "step": 32749 }, { "epoch": 0.6855480197605291, "grad_norm": 0.41099628806114197, "learning_rate": 0.00017535197187386095, "loss": 11.6689, "step": 32750 }, { "epoch": 0.6855689525244913, "grad_norm": 0.3130401372909546, "learning_rate": 0.00017535053042915314, "loss": 11.6784, "step": 32751 }, { "epoch": 0.6855898852884534, "grad_norm": 0.3117504417896271, "learning_rate": 0.0001753490889482228, "loss": 11.6719, "step": 32752 }, { "epoch": 0.6856108180524156, "grad_norm": 0.28340286016464233, "learning_rate": 0.00017534764743107066, "loss": 11.6623, "step": 32753 }, { "epoch": 0.6856317508163778, "grad_norm": 0.4927830398082733, "learning_rate": 0.0001753462058776974, "loss": 11.6703, "step": 32754 }, { "epoch": 0.6856526835803399, "grad_norm": 0.3051774203777313, "learning_rate": 0.0001753447642881037, "loss": 11.6861, "step": 32755 }, { "epoch": 0.6856736163443021, "grad_norm": 0.3177591860294342, "learning_rate": 0.00017534332266229026, "loss": 11.6745, "step": 32756 }, { "epoch": 0.6856945491082642, "grad_norm": 0.27252402901649475, "learning_rate": 0.0001753418810002578, "loss": 11.6712, "step": 32757 }, { "epoch": 0.6857154818722264, "grad_norm": 0.2741020619869232, "learning_rate": 0.00017534043930200695, "loss": 11.6473, "step": 32758 }, { "epoch": 0.6857364146361885, "grad_norm": 0.33594369888305664, "learning_rate": 0.00017533899756753846, "loss": 11.6876, "step": 32759 }, { "epoch": 0.6857573474001507, "grad_norm": 0.2895396053791046, "learning_rate": 0.000175337555796853, "loss": 11.6571, "step": 32760 }, { "epoch": 0.6857782801641129, "grad_norm": 0.24607005715370178, "learning_rate": 0.00017533611398995127, "loss": 11.6689, "step": 32761 }, { "epoch": 0.685799212928075, "grad_norm": 0.28362035751342773, "learning_rate": 0.00017533467214683396, "loss": 11.6791, "step": 32762 }, { "epoch": 0.6858201456920372, "grad_norm": 0.36825641989707947, "learning_rate": 0.00017533323026750175, "loss": 11.6708, "step": 32763 }, { "epoch": 0.6858410784559993, "grad_norm": 0.35439735651016235, "learning_rate": 0.00017533178835195536, "loss": 11.6766, "step": 32764 }, { "epoch": 0.6858620112199615, "grad_norm": 0.29429924488067627, "learning_rate": 0.0001753303464001955, "loss": 11.6554, "step": 32765 }, { "epoch": 0.6858829439839237, "grad_norm": 0.26393476128578186, "learning_rate": 0.00017532890441222278, "loss": 11.6672, "step": 32766 }, { "epoch": 0.6859038767478858, "grad_norm": 0.2523288130760193, "learning_rate": 0.00017532746238803796, "loss": 11.6771, "step": 32767 }, { "epoch": 0.685924809511848, "grad_norm": 0.2574795186519623, "learning_rate": 0.00017532602032764173, "loss": 11.6594, "step": 32768 }, { "epoch": 0.6859457422758101, "grad_norm": 0.2837910056114197, "learning_rate": 0.00017532457823103478, "loss": 11.6629, "step": 32769 }, { "epoch": 0.6859666750397723, "grad_norm": 0.3329612612724304, "learning_rate": 0.0001753231360982178, "loss": 11.688, "step": 32770 }, { "epoch": 0.6859876078037344, "grad_norm": 0.2988382577896118, "learning_rate": 0.00017532169392919146, "loss": 11.6684, "step": 32771 }, { "epoch": 0.6860085405676966, "grad_norm": 0.2916680574417114, "learning_rate": 0.00017532025172395645, "loss": 11.6694, "step": 32772 }, { "epoch": 0.6860294733316588, "grad_norm": 0.2540111839771271, "learning_rate": 0.00017531880948251354, "loss": 11.6791, "step": 32773 }, { "epoch": 0.6860504060956208, "grad_norm": 0.2543551027774811, "learning_rate": 0.00017531736720486333, "loss": 11.6725, "step": 32774 }, { "epoch": 0.686071338859583, "grad_norm": 0.35494527220726013, "learning_rate": 0.00017531592489100658, "loss": 11.6559, "step": 32775 }, { "epoch": 0.6860922716235451, "grad_norm": 0.3380519151687622, "learning_rate": 0.00017531448254094392, "loss": 11.6378, "step": 32776 }, { "epoch": 0.6861132043875073, "grad_norm": 0.27966710925102234, "learning_rate": 0.0001753130401546761, "loss": 11.649, "step": 32777 }, { "epoch": 0.6861341371514694, "grad_norm": 0.3000469207763672, "learning_rate": 0.00017531159773220378, "loss": 11.6878, "step": 32778 }, { "epoch": 0.6861550699154316, "grad_norm": 0.3016311228275299, "learning_rate": 0.0001753101552735277, "loss": 11.6878, "step": 32779 }, { "epoch": 0.6861760026793938, "grad_norm": 0.3509073853492737, "learning_rate": 0.0001753087127786485, "loss": 11.6876, "step": 32780 }, { "epoch": 0.6861969354433559, "grad_norm": 0.2702990770339966, "learning_rate": 0.00017530727024756687, "loss": 11.6875, "step": 32781 }, { "epoch": 0.6862178682073181, "grad_norm": 0.39244505763053894, "learning_rate": 0.00017530582768028355, "loss": 11.6794, "step": 32782 }, { "epoch": 0.6862388009712802, "grad_norm": 0.2898569107055664, "learning_rate": 0.0001753043850767992, "loss": 11.6739, "step": 32783 }, { "epoch": 0.6862597337352424, "grad_norm": 0.28556692600250244, "learning_rate": 0.00017530294243711452, "loss": 11.6722, "step": 32784 }, { "epoch": 0.6862806664992046, "grad_norm": 0.3108213543891907, "learning_rate": 0.00017530149976123022, "loss": 11.6672, "step": 32785 }, { "epoch": 0.6863015992631667, "grad_norm": 0.24952973425388336, "learning_rate": 0.000175300057049147, "loss": 11.6749, "step": 32786 }, { "epoch": 0.6863225320271289, "grad_norm": 0.3258977234363556, "learning_rate": 0.0001752986143008655, "loss": 11.6594, "step": 32787 }, { "epoch": 0.686343464791091, "grad_norm": 0.3834744691848755, "learning_rate": 0.00017529717151638645, "loss": 11.6786, "step": 32788 }, { "epoch": 0.6863643975550532, "grad_norm": 0.35002195835113525, "learning_rate": 0.00017529572869571052, "loss": 11.6673, "step": 32789 }, { "epoch": 0.6863853303190153, "grad_norm": 0.3011165261268616, "learning_rate": 0.00017529428583883846, "loss": 11.6907, "step": 32790 }, { "epoch": 0.6864062630829775, "grad_norm": 0.34603843092918396, "learning_rate": 0.00017529284294577093, "loss": 11.6682, "step": 32791 }, { "epoch": 0.6864271958469397, "grad_norm": 0.3278908133506775, "learning_rate": 0.00017529140001650858, "loss": 11.6681, "step": 32792 }, { "epoch": 0.6864481286109018, "grad_norm": 0.22656193375587463, "learning_rate": 0.0001752899570510522, "loss": 11.6784, "step": 32793 }, { "epoch": 0.686469061374864, "grad_norm": 0.37085336446762085, "learning_rate": 0.00017528851404940242, "loss": 11.6846, "step": 32794 }, { "epoch": 0.686489994138826, "grad_norm": 0.21402832865715027, "learning_rate": 0.0001752870710115599, "loss": 11.6747, "step": 32795 }, { "epoch": 0.6865109269027883, "grad_norm": 0.4284840226173401, "learning_rate": 0.00017528562793752542, "loss": 11.6738, "step": 32796 }, { "epoch": 0.6865318596667503, "grad_norm": 0.3498786985874176, "learning_rate": 0.0001752841848272996, "loss": 11.6736, "step": 32797 }, { "epoch": 0.6865527924307125, "grad_norm": 0.23129595816135406, "learning_rate": 0.00017528274168088318, "loss": 11.6594, "step": 32798 }, { "epoch": 0.6865737251946747, "grad_norm": 0.34408849477767944, "learning_rate": 0.00017528129849827683, "loss": 11.6799, "step": 32799 }, { "epoch": 0.6865946579586368, "grad_norm": 0.3047725558280945, "learning_rate": 0.00017527985527948124, "loss": 11.6545, "step": 32800 }, { "epoch": 0.686615590722599, "grad_norm": 0.33407098054885864, "learning_rate": 0.00017527841202449713, "loss": 11.6857, "step": 32801 }, { "epoch": 0.6866365234865611, "grad_norm": 0.2813357412815094, "learning_rate": 0.00017527696873332517, "loss": 11.6875, "step": 32802 }, { "epoch": 0.6866574562505233, "grad_norm": 0.2515428364276886, "learning_rate": 0.00017527552540596607, "loss": 11.679, "step": 32803 }, { "epoch": 0.6866783890144855, "grad_norm": 0.27832794189453125, "learning_rate": 0.00017527408204242052, "loss": 11.6656, "step": 32804 }, { "epoch": 0.6866993217784476, "grad_norm": 0.34159064292907715, "learning_rate": 0.00017527263864268921, "loss": 11.6776, "step": 32805 }, { "epoch": 0.6867202545424098, "grad_norm": 0.31445714831352234, "learning_rate": 0.00017527119520677285, "loss": 11.6575, "step": 32806 }, { "epoch": 0.6867411873063719, "grad_norm": 0.44585269689559937, "learning_rate": 0.00017526975173467207, "loss": 11.667, "step": 32807 }, { "epoch": 0.6867621200703341, "grad_norm": 0.2388283908367157, "learning_rate": 0.00017526830822638765, "loss": 11.6648, "step": 32808 }, { "epoch": 0.6867830528342962, "grad_norm": 0.2295886129140854, "learning_rate": 0.0001752668646819202, "loss": 11.6653, "step": 32809 }, { "epoch": 0.6868039855982584, "grad_norm": 0.3083903193473816, "learning_rate": 0.00017526542110127052, "loss": 11.665, "step": 32810 }, { "epoch": 0.6868249183622206, "grad_norm": 0.396150141954422, "learning_rate": 0.00017526397748443922, "loss": 11.6722, "step": 32811 }, { "epoch": 0.6868458511261827, "grad_norm": 0.2816507816314697, "learning_rate": 0.000175262533831427, "loss": 11.6749, "step": 32812 }, { "epoch": 0.6868667838901449, "grad_norm": 0.3325754404067993, "learning_rate": 0.00017526109014223462, "loss": 11.6706, "step": 32813 }, { "epoch": 0.686887716654107, "grad_norm": 0.3897033631801605, "learning_rate": 0.00017525964641686267, "loss": 11.6697, "step": 32814 }, { "epoch": 0.6869086494180692, "grad_norm": 0.26952818036079407, "learning_rate": 0.00017525820265531193, "loss": 11.6718, "step": 32815 }, { "epoch": 0.6869295821820313, "grad_norm": 0.27262765169143677, "learning_rate": 0.00017525675885758307, "loss": 11.6602, "step": 32816 }, { "epoch": 0.6869505149459935, "grad_norm": 0.33736759424209595, "learning_rate": 0.00017525531502367677, "loss": 11.6695, "step": 32817 }, { "epoch": 0.6869714477099557, "grad_norm": 0.2904306650161743, "learning_rate": 0.00017525387115359373, "loss": 11.6663, "step": 32818 }, { "epoch": 0.6869923804739178, "grad_norm": 0.27334919571876526, "learning_rate": 0.00017525242724733463, "loss": 11.6598, "step": 32819 }, { "epoch": 0.68701331323788, "grad_norm": 0.3041553795337677, "learning_rate": 0.0001752509833049002, "loss": 11.6731, "step": 32820 }, { "epoch": 0.687034246001842, "grad_norm": 0.3654094934463501, "learning_rate": 0.0001752495393262911, "loss": 11.6765, "step": 32821 }, { "epoch": 0.6870551787658042, "grad_norm": 0.32798832654953003, "learning_rate": 0.00017524809531150806, "loss": 11.6756, "step": 32822 }, { "epoch": 0.6870761115297664, "grad_norm": 0.34376418590545654, "learning_rate": 0.00017524665126055172, "loss": 11.6759, "step": 32823 }, { "epoch": 0.6870970442937285, "grad_norm": 0.259846031665802, "learning_rate": 0.00017524520717342282, "loss": 11.6692, "step": 32824 }, { "epoch": 0.6871179770576907, "grad_norm": 0.2757721543312073, "learning_rate": 0.00017524376305012206, "loss": 11.6602, "step": 32825 }, { "epoch": 0.6871389098216528, "grad_norm": 0.28352871537208557, "learning_rate": 0.0001752423188906501, "loss": 11.6699, "step": 32826 }, { "epoch": 0.687159842585615, "grad_norm": 0.262996643781662, "learning_rate": 0.00017524087469500765, "loss": 11.6707, "step": 32827 }, { "epoch": 0.6871807753495771, "grad_norm": 0.27593836188316345, "learning_rate": 0.0001752394304631954, "loss": 11.6597, "step": 32828 }, { "epoch": 0.6872017081135393, "grad_norm": 0.44628313183784485, "learning_rate": 0.00017523798619521408, "loss": 11.662, "step": 32829 }, { "epoch": 0.6872226408775015, "grad_norm": 0.31107255816459656, "learning_rate": 0.00017523654189106433, "loss": 11.6682, "step": 32830 }, { "epoch": 0.6872435736414636, "grad_norm": 0.35901305079460144, "learning_rate": 0.00017523509755074684, "loss": 11.6582, "step": 32831 }, { "epoch": 0.6872645064054258, "grad_norm": 0.3269466161727905, "learning_rate": 0.00017523365317426238, "loss": 11.6629, "step": 32832 }, { "epoch": 0.6872854391693879, "grad_norm": 0.26161617040634155, "learning_rate": 0.00017523220876161156, "loss": 11.6553, "step": 32833 }, { "epoch": 0.6873063719333501, "grad_norm": 0.33683842420578003, "learning_rate": 0.00017523076431279512, "loss": 11.6748, "step": 32834 }, { "epoch": 0.6873273046973122, "grad_norm": 0.30107662081718445, "learning_rate": 0.00017522931982781375, "loss": 11.6757, "step": 32835 }, { "epoch": 0.6873482374612744, "grad_norm": 1.2314796447753906, "learning_rate": 0.0001752278753066681, "loss": 11.6155, "step": 32836 }, { "epoch": 0.6873691702252366, "grad_norm": 0.35877856612205505, "learning_rate": 0.00017522643074935896, "loss": 11.6671, "step": 32837 }, { "epoch": 0.6873901029891987, "grad_norm": 0.30038607120513916, "learning_rate": 0.0001752249861558869, "loss": 11.6814, "step": 32838 }, { "epoch": 0.6874110357531609, "grad_norm": 0.3307773470878601, "learning_rate": 0.00017522354152625273, "loss": 11.6769, "step": 32839 }, { "epoch": 0.687431968517123, "grad_norm": 0.33909526467323303, "learning_rate": 0.0001752220968604571, "loss": 11.6734, "step": 32840 }, { "epoch": 0.6874529012810852, "grad_norm": 0.34032103419303894, "learning_rate": 0.0001752206521585007, "loss": 11.6716, "step": 32841 }, { "epoch": 0.6874738340450474, "grad_norm": 0.3193768560886383, "learning_rate": 0.0001752192074203842, "loss": 11.6714, "step": 32842 }, { "epoch": 0.6874947668090094, "grad_norm": 0.27494698762893677, "learning_rate": 0.00017521776264610835, "loss": 11.6601, "step": 32843 }, { "epoch": 0.6875156995729717, "grad_norm": 0.33386659622192383, "learning_rate": 0.00017521631783567378, "loss": 11.6706, "step": 32844 }, { "epoch": 0.6875366323369337, "grad_norm": 0.34838202595710754, "learning_rate": 0.00017521487298908123, "loss": 11.6729, "step": 32845 }, { "epoch": 0.6875575651008959, "grad_norm": 0.2889491617679596, "learning_rate": 0.00017521342810633138, "loss": 11.68, "step": 32846 }, { "epoch": 0.687578497864858, "grad_norm": 0.2658185660839081, "learning_rate": 0.00017521198318742496, "loss": 11.6664, "step": 32847 }, { "epoch": 0.6875994306288202, "grad_norm": 0.3227101266384125, "learning_rate": 0.0001752105382323626, "loss": 11.6665, "step": 32848 }, { "epoch": 0.6876203633927824, "grad_norm": 0.30198055505752563, "learning_rate": 0.00017520909324114504, "loss": 11.6684, "step": 32849 }, { "epoch": 0.6876412961567445, "grad_norm": 0.3214026689529419, "learning_rate": 0.00017520764821377295, "loss": 11.6762, "step": 32850 }, { "epoch": 0.6876622289207067, "grad_norm": 0.2871357202529907, "learning_rate": 0.00017520620315024705, "loss": 11.6844, "step": 32851 }, { "epoch": 0.6876831616846688, "grad_norm": 0.356365829706192, "learning_rate": 0.00017520475805056802, "loss": 11.6626, "step": 32852 }, { "epoch": 0.687704094448631, "grad_norm": 0.2885514497756958, "learning_rate": 0.00017520331291473654, "loss": 11.6856, "step": 32853 }, { "epoch": 0.6877250272125931, "grad_norm": 0.3185946047306061, "learning_rate": 0.00017520186774275332, "loss": 11.671, "step": 32854 }, { "epoch": 0.6877459599765553, "grad_norm": 0.31741347908973694, "learning_rate": 0.00017520042253461909, "loss": 11.6705, "step": 32855 }, { "epoch": 0.6877668927405175, "grad_norm": 0.2529606521129608, "learning_rate": 0.00017519897729033449, "loss": 11.6694, "step": 32856 }, { "epoch": 0.6877878255044796, "grad_norm": 0.2603795528411865, "learning_rate": 0.00017519753200990023, "loss": 11.6592, "step": 32857 }, { "epoch": 0.6878087582684418, "grad_norm": 0.4082806408405304, "learning_rate": 0.00017519608669331702, "loss": 11.6702, "step": 32858 }, { "epoch": 0.6878296910324039, "grad_norm": 0.27289125323295593, "learning_rate": 0.00017519464134058553, "loss": 11.6517, "step": 32859 }, { "epoch": 0.6878506237963661, "grad_norm": 0.2274383157491684, "learning_rate": 0.00017519319595170644, "loss": 11.6689, "step": 32860 }, { "epoch": 0.6878715565603283, "grad_norm": 0.292073130607605, "learning_rate": 0.00017519175052668051, "loss": 11.6747, "step": 32861 }, { "epoch": 0.6878924893242904, "grad_norm": 0.2602698504924774, "learning_rate": 0.00017519030506550843, "loss": 11.6584, "step": 32862 }, { "epoch": 0.6879134220882526, "grad_norm": 0.34632566571235657, "learning_rate": 0.0001751888595681908, "loss": 11.6707, "step": 32863 }, { "epoch": 0.6879343548522147, "grad_norm": 0.26409465074539185, "learning_rate": 0.00017518741403472842, "loss": 11.6796, "step": 32864 }, { "epoch": 0.6879552876161769, "grad_norm": 0.36656197905540466, "learning_rate": 0.00017518596846512194, "loss": 11.6578, "step": 32865 }, { "epoch": 0.687976220380139, "grad_norm": 0.27282509207725525, "learning_rate": 0.00017518452285937203, "loss": 11.6591, "step": 32866 }, { "epoch": 0.6879971531441011, "grad_norm": 0.2737460434436798, "learning_rate": 0.00017518307721747943, "loss": 11.6716, "step": 32867 }, { "epoch": 0.6880180859080633, "grad_norm": 0.32266077399253845, "learning_rate": 0.00017518163153944482, "loss": 11.6953, "step": 32868 }, { "epoch": 0.6880390186720254, "grad_norm": 0.28596678376197815, "learning_rate": 0.0001751801858252689, "loss": 11.659, "step": 32869 }, { "epoch": 0.6880599514359876, "grad_norm": 0.3537379801273346, "learning_rate": 0.00017517874007495237, "loss": 11.6634, "step": 32870 }, { "epoch": 0.6880808841999497, "grad_norm": 0.35957837104797363, "learning_rate": 0.00017517729428849588, "loss": 11.6658, "step": 32871 }, { "epoch": 0.6881018169639119, "grad_norm": 0.38783496618270874, "learning_rate": 0.00017517584846590015, "loss": 11.6744, "step": 32872 }, { "epoch": 0.688122749727874, "grad_norm": 0.3097905218601227, "learning_rate": 0.00017517440260716592, "loss": 11.6678, "step": 32873 }, { "epoch": 0.6881436824918362, "grad_norm": 0.2702690064907074, "learning_rate": 0.00017517295671229386, "loss": 11.6774, "step": 32874 }, { "epoch": 0.6881646152557984, "grad_norm": 0.2979986369609833, "learning_rate": 0.00017517151078128463, "loss": 11.669, "step": 32875 }, { "epoch": 0.6881855480197605, "grad_norm": 0.37024855613708496, "learning_rate": 0.0001751700648141389, "loss": 11.6642, "step": 32876 }, { "epoch": 0.6882064807837227, "grad_norm": 0.2830425798892975, "learning_rate": 0.0001751686188108575, "loss": 11.6695, "step": 32877 }, { "epoch": 0.6882274135476848, "grad_norm": 0.4027040898799896, "learning_rate": 0.00017516717277144098, "loss": 11.6983, "step": 32878 }, { "epoch": 0.688248346311647, "grad_norm": 0.29250675439834595, "learning_rate": 0.00017516572669589012, "loss": 11.6647, "step": 32879 }, { "epoch": 0.6882692790756092, "grad_norm": 0.3061736524105072, "learning_rate": 0.0001751642805842056, "loss": 11.6492, "step": 32880 }, { "epoch": 0.6882902118395713, "grad_norm": 0.3233535587787628, "learning_rate": 0.00017516283443638807, "loss": 11.6845, "step": 32881 }, { "epoch": 0.6883111446035335, "grad_norm": 0.24425804615020752, "learning_rate": 0.00017516138825243824, "loss": 11.6481, "step": 32882 }, { "epoch": 0.6883320773674956, "grad_norm": 0.33638325333595276, "learning_rate": 0.00017515994203235686, "loss": 11.6742, "step": 32883 }, { "epoch": 0.6883530101314578, "grad_norm": 0.29348382353782654, "learning_rate": 0.0001751584957761446, "loss": 11.6694, "step": 32884 }, { "epoch": 0.6883739428954199, "grad_norm": 0.32863226532936096, "learning_rate": 0.00017515704948380215, "loss": 11.6707, "step": 32885 }, { "epoch": 0.6883948756593821, "grad_norm": 0.31833314895629883, "learning_rate": 0.00017515560315533018, "loss": 11.6716, "step": 32886 }, { "epoch": 0.6884158084233443, "grad_norm": 0.35243725776672363, "learning_rate": 0.0001751541567907294, "loss": 11.6769, "step": 32887 }, { "epoch": 0.6884367411873064, "grad_norm": 0.32109588384628296, "learning_rate": 0.0001751527103900005, "loss": 11.6461, "step": 32888 }, { "epoch": 0.6884576739512686, "grad_norm": 0.2846001088619232, "learning_rate": 0.00017515126395314422, "loss": 11.6685, "step": 32889 }, { "epoch": 0.6884786067152306, "grad_norm": 0.3178809583187103, "learning_rate": 0.00017514981748016121, "loss": 11.6648, "step": 32890 }, { "epoch": 0.6884995394791928, "grad_norm": 0.31517907977104187, "learning_rate": 0.00017514837097105216, "loss": 11.6693, "step": 32891 }, { "epoch": 0.6885204722431549, "grad_norm": 0.3381098806858063, "learning_rate": 0.0001751469244258178, "loss": 11.6751, "step": 32892 }, { "epoch": 0.6885414050071171, "grad_norm": 0.291812002658844, "learning_rate": 0.00017514547784445878, "loss": 11.6695, "step": 32893 }, { "epoch": 0.6885623377710793, "grad_norm": 0.3103218674659729, "learning_rate": 0.00017514403122697588, "loss": 11.6767, "step": 32894 }, { "epoch": 0.6885832705350414, "grad_norm": 0.3859521746635437, "learning_rate": 0.0001751425845733697, "loss": 11.6709, "step": 32895 }, { "epoch": 0.6886042032990036, "grad_norm": 0.2769489288330078, "learning_rate": 0.00017514113788364097, "loss": 11.6668, "step": 32896 }, { "epoch": 0.6886251360629657, "grad_norm": 0.29903843998908997, "learning_rate": 0.00017513969115779042, "loss": 11.6808, "step": 32897 }, { "epoch": 0.6886460688269279, "grad_norm": 0.2683453857898712, "learning_rate": 0.00017513824439581866, "loss": 11.6638, "step": 32898 }, { "epoch": 0.6886670015908901, "grad_norm": 0.29212847352027893, "learning_rate": 0.00017513679759772647, "loss": 11.6776, "step": 32899 }, { "epoch": 0.6886879343548522, "grad_norm": 0.27237603068351746, "learning_rate": 0.00017513535076351452, "loss": 11.6792, "step": 32900 }, { "epoch": 0.6887088671188144, "grad_norm": 0.3011302053928375, "learning_rate": 0.0001751339038931835, "loss": 11.6741, "step": 32901 }, { "epoch": 0.6887297998827765, "grad_norm": 0.32553574442863464, "learning_rate": 0.00017513245698673412, "loss": 11.6605, "step": 32902 }, { "epoch": 0.6887507326467387, "grad_norm": 0.2884492874145508, "learning_rate": 0.00017513101004416702, "loss": 11.6736, "step": 32903 }, { "epoch": 0.6887716654107008, "grad_norm": 0.344613641500473, "learning_rate": 0.00017512956306548296, "loss": 11.6881, "step": 32904 }, { "epoch": 0.688792598174663, "grad_norm": 0.29418471455574036, "learning_rate": 0.00017512811605068263, "loss": 11.6684, "step": 32905 }, { "epoch": 0.6888135309386252, "grad_norm": 0.2628190815448761, "learning_rate": 0.0001751266689997667, "loss": 11.6651, "step": 32906 }, { "epoch": 0.6888344637025873, "grad_norm": 0.2690926790237427, "learning_rate": 0.00017512522191273587, "loss": 11.676, "step": 32907 }, { "epoch": 0.6888553964665495, "grad_norm": 0.2782326638698578, "learning_rate": 0.00017512377478959087, "loss": 11.6715, "step": 32908 }, { "epoch": 0.6888763292305116, "grad_norm": 0.26088547706604004, "learning_rate": 0.0001751223276303323, "loss": 11.6785, "step": 32909 }, { "epoch": 0.6888972619944738, "grad_norm": 0.28183573484420776, "learning_rate": 0.00017512088043496098, "loss": 11.6846, "step": 32910 }, { "epoch": 0.6889181947584359, "grad_norm": 0.33383968472480774, "learning_rate": 0.0001751194332034775, "loss": 11.6685, "step": 32911 }, { "epoch": 0.688939127522398, "grad_norm": 0.287460595369339, "learning_rate": 0.00017511798593588266, "loss": 11.6609, "step": 32912 }, { "epoch": 0.6889600602863603, "grad_norm": 0.26787805557250977, "learning_rate": 0.00017511653863217706, "loss": 11.6647, "step": 32913 }, { "epoch": 0.6889809930503223, "grad_norm": 0.2715299725532532, "learning_rate": 0.00017511509129236144, "loss": 11.6988, "step": 32914 }, { "epoch": 0.6890019258142845, "grad_norm": 0.354389488697052, "learning_rate": 0.0001751136439164365, "loss": 11.6677, "step": 32915 }, { "epoch": 0.6890228585782466, "grad_norm": 0.26873695850372314, "learning_rate": 0.00017511219650440292, "loss": 11.6609, "step": 32916 }, { "epoch": 0.6890437913422088, "grad_norm": 0.3029730021953583, "learning_rate": 0.0001751107490562614, "loss": 11.6641, "step": 32917 }, { "epoch": 0.689064724106171, "grad_norm": 0.26811227202415466, "learning_rate": 0.00017510930157201262, "loss": 11.6715, "step": 32918 }, { "epoch": 0.6890856568701331, "grad_norm": 0.29224416613578796, "learning_rate": 0.00017510785405165733, "loss": 11.6606, "step": 32919 }, { "epoch": 0.6891065896340953, "grad_norm": 0.37548568844795227, "learning_rate": 0.00017510640649519616, "loss": 11.6672, "step": 32920 }, { "epoch": 0.6891275223980574, "grad_norm": 0.2900831401348114, "learning_rate": 0.00017510495890262986, "loss": 11.6772, "step": 32921 }, { "epoch": 0.6891484551620196, "grad_norm": 0.33662161231040955, "learning_rate": 0.0001751035112739591, "loss": 11.6657, "step": 32922 }, { "epoch": 0.6891693879259817, "grad_norm": 0.27944818139076233, "learning_rate": 0.00017510206360918455, "loss": 11.6954, "step": 32923 }, { "epoch": 0.6891903206899439, "grad_norm": 0.23430421948432922, "learning_rate": 0.00017510061590830694, "loss": 11.6713, "step": 32924 }, { "epoch": 0.6892112534539061, "grad_norm": 0.30563533306121826, "learning_rate": 0.00017509916817132697, "loss": 11.6851, "step": 32925 }, { "epoch": 0.6892321862178682, "grad_norm": 0.27374038100242615, "learning_rate": 0.00017509772039824532, "loss": 11.6701, "step": 32926 }, { "epoch": 0.6892531189818304, "grad_norm": 0.2689994275569916, "learning_rate": 0.0001750962725890627, "loss": 11.674, "step": 32927 }, { "epoch": 0.6892740517457925, "grad_norm": 0.286704957485199, "learning_rate": 0.0001750948247437798, "loss": 11.6633, "step": 32928 }, { "epoch": 0.6892949845097547, "grad_norm": 0.30273500084877014, "learning_rate": 0.00017509337686239727, "loss": 11.6729, "step": 32929 }, { "epoch": 0.6893159172737168, "grad_norm": 0.4092395305633545, "learning_rate": 0.0001750919289449159, "loss": 11.6791, "step": 32930 }, { "epoch": 0.689336850037679, "grad_norm": 0.30186760425567627, "learning_rate": 0.0001750904809913363, "loss": 11.6602, "step": 32931 }, { "epoch": 0.6893577828016412, "grad_norm": 0.32616016268730164, "learning_rate": 0.00017508903300165923, "loss": 11.6708, "step": 32932 }, { "epoch": 0.6893787155656033, "grad_norm": 0.2666056752204895, "learning_rate": 0.00017508758497588534, "loss": 11.6627, "step": 32933 }, { "epoch": 0.6893996483295655, "grad_norm": 0.29764148592948914, "learning_rate": 0.00017508613691401536, "loss": 11.6737, "step": 32934 }, { "epoch": 0.6894205810935276, "grad_norm": 0.288356751203537, "learning_rate": 0.00017508468881604995, "loss": 11.6705, "step": 32935 }, { "epoch": 0.6894415138574898, "grad_norm": 0.2692964971065521, "learning_rate": 0.00017508324068198983, "loss": 11.6831, "step": 32936 }, { "epoch": 0.6894624466214518, "grad_norm": 0.2746887803077698, "learning_rate": 0.0001750817925118357, "loss": 11.6631, "step": 32937 }, { "epoch": 0.689483379385414, "grad_norm": 0.3285834491252899, "learning_rate": 0.00017508034430558823, "loss": 11.6504, "step": 32938 }, { "epoch": 0.6895043121493762, "grad_norm": 0.30815744400024414, "learning_rate": 0.00017507889606324816, "loss": 11.672, "step": 32939 }, { "epoch": 0.6895252449133383, "grad_norm": 0.2628021836280823, "learning_rate": 0.00017507744778481613, "loss": 11.6663, "step": 32940 }, { "epoch": 0.6895461776773005, "grad_norm": 0.3012641966342926, "learning_rate": 0.0001750759994702929, "loss": 11.6722, "step": 32941 }, { "epoch": 0.6895671104412626, "grad_norm": 0.2997500002384186, "learning_rate": 0.0001750745511196791, "loss": 11.6673, "step": 32942 }, { "epoch": 0.6895880432052248, "grad_norm": 0.3117533028125763, "learning_rate": 0.00017507310273297548, "loss": 11.6546, "step": 32943 }, { "epoch": 0.689608975969187, "grad_norm": 0.36952632665634155, "learning_rate": 0.0001750716543101827, "loss": 11.6799, "step": 32944 }, { "epoch": 0.6896299087331491, "grad_norm": 0.312724232673645, "learning_rate": 0.0001750702058513015, "loss": 11.683, "step": 32945 }, { "epoch": 0.6896508414971113, "grad_norm": 0.2671078145503998, "learning_rate": 0.0001750687573563325, "loss": 11.6708, "step": 32946 }, { "epoch": 0.6896717742610734, "grad_norm": 0.28548991680145264, "learning_rate": 0.0001750673088252765, "loss": 11.6702, "step": 32947 }, { "epoch": 0.6896927070250356, "grad_norm": 0.33556219935417175, "learning_rate": 0.0001750658602581341, "loss": 11.6714, "step": 32948 }, { "epoch": 0.6897136397889977, "grad_norm": 0.2445230484008789, "learning_rate": 0.00017506441165490605, "loss": 11.6795, "step": 32949 }, { "epoch": 0.6897345725529599, "grad_norm": 0.2993267774581909, "learning_rate": 0.00017506296301559302, "loss": 11.6605, "step": 32950 }, { "epoch": 0.6897555053169221, "grad_norm": 0.31879082322120667, "learning_rate": 0.00017506151434019574, "loss": 11.6633, "step": 32951 }, { "epoch": 0.6897764380808842, "grad_norm": 0.2674425542354584, "learning_rate": 0.00017506006562871486, "loss": 11.6578, "step": 32952 }, { "epoch": 0.6897973708448464, "grad_norm": 0.3073529303073883, "learning_rate": 0.00017505861688115113, "loss": 11.6681, "step": 32953 }, { "epoch": 0.6898183036088085, "grad_norm": 0.27593883872032166, "learning_rate": 0.00017505716809750518, "loss": 11.6705, "step": 32954 }, { "epoch": 0.6898392363727707, "grad_norm": 0.24163901805877686, "learning_rate": 0.0001750557192777778, "loss": 11.6799, "step": 32955 }, { "epoch": 0.6898601691367328, "grad_norm": 0.3799768388271332, "learning_rate": 0.00017505427042196957, "loss": 11.6757, "step": 32956 }, { "epoch": 0.689881101900695, "grad_norm": 0.2805246412754059, "learning_rate": 0.00017505282153008133, "loss": 11.6612, "step": 32957 }, { "epoch": 0.6899020346646572, "grad_norm": 0.3107815980911255, "learning_rate": 0.00017505137260211365, "loss": 11.6754, "step": 32958 }, { "epoch": 0.6899229674286192, "grad_norm": 0.2737439274787903, "learning_rate": 0.00017504992363806724, "loss": 11.6644, "step": 32959 }, { "epoch": 0.6899439001925814, "grad_norm": 0.3275248110294342, "learning_rate": 0.00017504847463794285, "loss": 11.6469, "step": 32960 }, { "epoch": 0.6899648329565435, "grad_norm": 0.29701974987983704, "learning_rate": 0.00017504702560174117, "loss": 11.6681, "step": 32961 }, { "epoch": 0.6899857657205057, "grad_norm": 0.24554181098937988, "learning_rate": 0.00017504557652946287, "loss": 11.6803, "step": 32962 }, { "epoch": 0.6900066984844679, "grad_norm": 0.356923371553421, "learning_rate": 0.00017504412742110864, "loss": 11.6672, "step": 32963 }, { "epoch": 0.69002763124843, "grad_norm": 0.27145323157310486, "learning_rate": 0.00017504267827667924, "loss": 11.6645, "step": 32964 }, { "epoch": 0.6900485640123922, "grad_norm": 0.22876793146133423, "learning_rate": 0.0001750412290961753, "loss": 11.6591, "step": 32965 }, { "epoch": 0.6900694967763543, "grad_norm": 0.5858594179153442, "learning_rate": 0.0001750397798795975, "loss": 11.6658, "step": 32966 }, { "epoch": 0.6900904295403165, "grad_norm": 0.3071264624595642, "learning_rate": 0.0001750383306269466, "loss": 11.6786, "step": 32967 }, { "epoch": 0.6901113623042786, "grad_norm": 0.4063260555267334, "learning_rate": 0.00017503688133822326, "loss": 11.6736, "step": 32968 }, { "epoch": 0.6901322950682408, "grad_norm": 0.2901829779148102, "learning_rate": 0.0001750354320134282, "loss": 11.6672, "step": 32969 }, { "epoch": 0.690153227832203, "grad_norm": 0.38750120997428894, "learning_rate": 0.00017503398265256213, "loss": 11.6847, "step": 32970 }, { "epoch": 0.6901741605961651, "grad_norm": 0.25412052869796753, "learning_rate": 0.0001750325332556257, "loss": 11.6642, "step": 32971 }, { "epoch": 0.6901950933601273, "grad_norm": 0.29871025681495667, "learning_rate": 0.0001750310838226196, "loss": 11.6596, "step": 32972 }, { "epoch": 0.6902160261240894, "grad_norm": 0.3265184164047241, "learning_rate": 0.0001750296343535446, "loss": 11.6804, "step": 32973 }, { "epoch": 0.6902369588880516, "grad_norm": 0.2655285596847534, "learning_rate": 0.00017502818484840132, "loss": 11.6632, "step": 32974 }, { "epoch": 0.6902578916520137, "grad_norm": 0.3542432487010956, "learning_rate": 0.00017502673530719052, "loss": 11.6597, "step": 32975 }, { "epoch": 0.6902788244159759, "grad_norm": 0.34590479731559753, "learning_rate": 0.00017502528572991283, "loss": 11.6794, "step": 32976 }, { "epoch": 0.6902997571799381, "grad_norm": 0.25721561908721924, "learning_rate": 0.00017502383611656897, "loss": 11.668, "step": 32977 }, { "epoch": 0.6903206899439002, "grad_norm": 0.3658944070339203, "learning_rate": 0.0001750223864671597, "loss": 11.662, "step": 32978 }, { "epoch": 0.6903416227078624, "grad_norm": 0.28230413794517517, "learning_rate": 0.00017502093678168563, "loss": 11.6924, "step": 32979 }, { "epoch": 0.6903625554718245, "grad_norm": 0.31052687764167786, "learning_rate": 0.0001750194870601475, "loss": 11.6723, "step": 32980 }, { "epoch": 0.6903834882357867, "grad_norm": 0.2510663866996765, "learning_rate": 0.000175018037302546, "loss": 11.673, "step": 32981 }, { "epoch": 0.6904044209997489, "grad_norm": 0.35326504707336426, "learning_rate": 0.00017501658750888183, "loss": 11.6718, "step": 32982 }, { "epoch": 0.690425353763711, "grad_norm": 0.24156606197357178, "learning_rate": 0.00017501513767915566, "loss": 11.6653, "step": 32983 }, { "epoch": 0.6904462865276731, "grad_norm": 0.33195605874061584, "learning_rate": 0.00017501368781336823, "loss": 11.6787, "step": 32984 }, { "epoch": 0.6904672192916352, "grad_norm": 0.27558469772338867, "learning_rate": 0.0001750122379115202, "loss": 11.6728, "step": 32985 }, { "epoch": 0.6904881520555974, "grad_norm": 0.3158916234970093, "learning_rate": 0.00017501078797361232, "loss": 11.6622, "step": 32986 }, { "epoch": 0.6905090848195595, "grad_norm": 0.2849160432815552, "learning_rate": 0.00017500933799964523, "loss": 11.673, "step": 32987 }, { "epoch": 0.6905300175835217, "grad_norm": 0.24976679682731628, "learning_rate": 0.00017500788798961963, "loss": 11.6599, "step": 32988 }, { "epoch": 0.6905509503474839, "grad_norm": 0.272242933511734, "learning_rate": 0.0001750064379435363, "loss": 11.6642, "step": 32989 }, { "epoch": 0.690571883111446, "grad_norm": 0.2977354824542999, "learning_rate": 0.00017500498786139577, "loss": 11.6609, "step": 32990 }, { "epoch": 0.6905928158754082, "grad_norm": 0.24850818514823914, "learning_rate": 0.00017500353774319892, "loss": 11.6705, "step": 32991 }, { "epoch": 0.6906137486393703, "grad_norm": 0.2860148251056671, "learning_rate": 0.00017500208758894635, "loss": 11.6635, "step": 32992 }, { "epoch": 0.6906346814033325, "grad_norm": 0.2781623303890228, "learning_rate": 0.00017500063739863878, "loss": 11.6677, "step": 32993 }, { "epoch": 0.6906556141672946, "grad_norm": 0.2951963543891907, "learning_rate": 0.00017499918717227687, "loss": 11.6611, "step": 32994 }, { "epoch": 0.6906765469312568, "grad_norm": 0.3075410723686218, "learning_rate": 0.00017499773690986134, "loss": 11.6837, "step": 32995 }, { "epoch": 0.690697479695219, "grad_norm": 0.25599634647369385, "learning_rate": 0.00017499628661139295, "loss": 11.6755, "step": 32996 }, { "epoch": 0.6907184124591811, "grad_norm": 0.2981478273868561, "learning_rate": 0.0001749948362768723, "loss": 11.67, "step": 32997 }, { "epoch": 0.6907393452231433, "grad_norm": 0.29185017943382263, "learning_rate": 0.00017499338590630013, "loss": 11.6844, "step": 32998 }, { "epoch": 0.6907602779871054, "grad_norm": 0.3255573511123657, "learning_rate": 0.00017499193549967715, "loss": 11.6656, "step": 32999 }, { "epoch": 0.6907812107510676, "grad_norm": 0.31525591015815735, "learning_rate": 0.00017499048505700403, "loss": 11.6607, "step": 33000 }, { "epoch": 0.6907812107510676, "eval_loss": 11.670527458190918, "eval_runtime": 34.2914, "eval_samples_per_second": 28.024, "eval_steps_per_second": 7.028, "step": 33000 }, { "epoch": 0.6908021435150298, "grad_norm": 0.3238915205001831, "learning_rate": 0.0001749890345782815, "loss": 11.6842, "step": 33001 }, { "epoch": 0.6908230762789919, "grad_norm": 0.25133517384529114, "learning_rate": 0.00017498758406351024, "loss": 11.6575, "step": 33002 }, { "epoch": 0.6908440090429541, "grad_norm": 0.38111311197280884, "learning_rate": 0.00017498613351269091, "loss": 11.6727, "step": 33003 }, { "epoch": 0.6908649418069162, "grad_norm": 0.2626655697822571, "learning_rate": 0.00017498468292582428, "loss": 11.667, "step": 33004 }, { "epoch": 0.6908858745708784, "grad_norm": 0.2577216625213623, "learning_rate": 0.00017498323230291099, "loss": 11.6751, "step": 33005 }, { "epoch": 0.6909068073348404, "grad_norm": 0.3636418581008911, "learning_rate": 0.00017498178164395176, "loss": 11.6843, "step": 33006 }, { "epoch": 0.6909277400988026, "grad_norm": 0.23736228048801422, "learning_rate": 0.0001749803309489473, "loss": 11.6704, "step": 33007 }, { "epoch": 0.6909486728627648, "grad_norm": 0.28006279468536377, "learning_rate": 0.00017497888021789827, "loss": 11.6734, "step": 33008 }, { "epoch": 0.6909696056267269, "grad_norm": 0.3757841885089874, "learning_rate": 0.0001749774294508054, "loss": 11.6805, "step": 33009 }, { "epoch": 0.6909905383906891, "grad_norm": 0.34478136897087097, "learning_rate": 0.0001749759786476694, "loss": 11.6688, "step": 33010 }, { "epoch": 0.6910114711546512, "grad_norm": 0.38036537170410156, "learning_rate": 0.00017497452780849092, "loss": 11.6669, "step": 33011 }, { "epoch": 0.6910324039186134, "grad_norm": 0.36648598313331604, "learning_rate": 0.0001749730769332707, "loss": 11.6911, "step": 33012 }, { "epoch": 0.6910533366825755, "grad_norm": 0.32079431414604187, "learning_rate": 0.00017497162602200938, "loss": 11.6648, "step": 33013 }, { "epoch": 0.6910742694465377, "grad_norm": 0.4483976364135742, "learning_rate": 0.00017497017507470773, "loss": 11.6795, "step": 33014 }, { "epoch": 0.6910952022104999, "grad_norm": 0.31924667954444885, "learning_rate": 0.0001749687240913664, "loss": 11.6806, "step": 33015 }, { "epoch": 0.691116134974462, "grad_norm": 0.27758482098579407, "learning_rate": 0.0001749672730719861, "loss": 11.6741, "step": 33016 }, { "epoch": 0.6911370677384242, "grad_norm": 0.2553147077560425, "learning_rate": 0.00017496582201656754, "loss": 11.6591, "step": 33017 }, { "epoch": 0.6911580005023863, "grad_norm": 0.2979896366596222, "learning_rate": 0.00017496437092511143, "loss": 11.6758, "step": 33018 }, { "epoch": 0.6911789332663485, "grad_norm": 0.2751010060310364, "learning_rate": 0.0001749629197976184, "loss": 11.668, "step": 33019 }, { "epoch": 0.6911998660303107, "grad_norm": 0.26096829771995544, "learning_rate": 0.0001749614686340892, "loss": 11.6718, "step": 33020 }, { "epoch": 0.6912207987942728, "grad_norm": 0.36525392532348633, "learning_rate": 0.00017496001743452455, "loss": 11.6629, "step": 33021 }, { "epoch": 0.691241731558235, "grad_norm": 0.29377472400665283, "learning_rate": 0.00017495856619892508, "loss": 11.668, "step": 33022 }, { "epoch": 0.6912626643221971, "grad_norm": 0.34809643030166626, "learning_rate": 0.00017495711492729153, "loss": 11.687, "step": 33023 }, { "epoch": 0.6912835970861593, "grad_norm": 0.3269769847393036, "learning_rate": 0.00017495566361962463, "loss": 11.6808, "step": 33024 }, { "epoch": 0.6913045298501214, "grad_norm": 0.26912882924079895, "learning_rate": 0.000174954212275925, "loss": 11.6487, "step": 33025 }, { "epoch": 0.6913254626140836, "grad_norm": 0.2718549072742462, "learning_rate": 0.00017495276089619338, "loss": 11.6678, "step": 33026 }, { "epoch": 0.6913463953780458, "grad_norm": 0.2880246341228485, "learning_rate": 0.0001749513094804305, "loss": 11.6701, "step": 33027 }, { "epoch": 0.6913673281420079, "grad_norm": 0.3246571719646454, "learning_rate": 0.00017494985802863703, "loss": 11.6642, "step": 33028 }, { "epoch": 0.69138826090597, "grad_norm": 0.30924636125564575, "learning_rate": 0.00017494840654081362, "loss": 11.6739, "step": 33029 }, { "epoch": 0.6914091936699321, "grad_norm": 0.2538090646266937, "learning_rate": 0.00017494695501696104, "loss": 11.6573, "step": 33030 }, { "epoch": 0.6914301264338943, "grad_norm": 0.3042949438095093, "learning_rate": 0.00017494550345707993, "loss": 11.6859, "step": 33031 }, { "epoch": 0.6914510591978564, "grad_norm": 0.3247295320034027, "learning_rate": 0.000174944051861171, "loss": 11.6616, "step": 33032 }, { "epoch": 0.6914719919618186, "grad_norm": 0.28046348690986633, "learning_rate": 0.000174942600229235, "loss": 11.6689, "step": 33033 }, { "epoch": 0.6914929247257808, "grad_norm": 0.31289297342300415, "learning_rate": 0.0001749411485612726, "loss": 11.677, "step": 33034 }, { "epoch": 0.6915138574897429, "grad_norm": 0.2716098427772522, "learning_rate": 0.00017493969685728446, "loss": 11.6603, "step": 33035 }, { "epoch": 0.6915347902537051, "grad_norm": 0.43005770444869995, "learning_rate": 0.0001749382451172713, "loss": 11.6909, "step": 33036 }, { "epoch": 0.6915557230176672, "grad_norm": 0.24771347641944885, "learning_rate": 0.00017493679334123385, "loss": 11.668, "step": 33037 }, { "epoch": 0.6915766557816294, "grad_norm": 0.2838120460510254, "learning_rate": 0.00017493534152917276, "loss": 11.6727, "step": 33038 }, { "epoch": 0.6915975885455916, "grad_norm": 0.2743614614009857, "learning_rate": 0.00017493388968108876, "loss": 11.6732, "step": 33039 }, { "epoch": 0.6916185213095537, "grad_norm": 0.31336843967437744, "learning_rate": 0.00017493243779698254, "loss": 11.6582, "step": 33040 }, { "epoch": 0.6916394540735159, "grad_norm": 0.3023315966129303, "learning_rate": 0.00017493098587685477, "loss": 11.6685, "step": 33041 }, { "epoch": 0.691660386837478, "grad_norm": 0.3233173191547394, "learning_rate": 0.00017492953392070622, "loss": 11.6829, "step": 33042 }, { "epoch": 0.6916813196014402, "grad_norm": 0.2465590387582779, "learning_rate": 0.0001749280819285375, "loss": 11.6748, "step": 33043 }, { "epoch": 0.6917022523654023, "grad_norm": 0.2759849727153778, "learning_rate": 0.00017492662990034935, "loss": 11.6648, "step": 33044 }, { "epoch": 0.6917231851293645, "grad_norm": 0.4028407335281372, "learning_rate": 0.0001749251778361425, "loss": 11.6954, "step": 33045 }, { "epoch": 0.6917441178933267, "grad_norm": 0.32031309604644775, "learning_rate": 0.00017492372573591757, "loss": 11.6915, "step": 33046 }, { "epoch": 0.6917650506572888, "grad_norm": 0.2797684073448181, "learning_rate": 0.00017492227359967535, "loss": 11.6547, "step": 33047 }, { "epoch": 0.691785983421251, "grad_norm": 0.315677285194397, "learning_rate": 0.00017492082142741647, "loss": 11.6773, "step": 33048 }, { "epoch": 0.6918069161852131, "grad_norm": 0.29066622257232666, "learning_rate": 0.00017491936921914164, "loss": 11.6691, "step": 33049 }, { "epoch": 0.6918278489491753, "grad_norm": 0.3011740744113922, "learning_rate": 0.0001749179169748516, "loss": 11.6686, "step": 33050 }, { "epoch": 0.6918487817131374, "grad_norm": 0.3471924960613251, "learning_rate": 0.00017491646469454698, "loss": 11.6881, "step": 33051 }, { "epoch": 0.6918697144770996, "grad_norm": 0.2809518575668335, "learning_rate": 0.00017491501237822853, "loss": 11.6623, "step": 33052 }, { "epoch": 0.6918906472410618, "grad_norm": 0.3205622136592865, "learning_rate": 0.00017491356002589695, "loss": 11.6753, "step": 33053 }, { "epoch": 0.6919115800050238, "grad_norm": 0.30888593196868896, "learning_rate": 0.0001749121076375529, "loss": 11.662, "step": 33054 }, { "epoch": 0.691932512768986, "grad_norm": 0.23495107889175415, "learning_rate": 0.00017491065521319708, "loss": 11.6687, "step": 33055 }, { "epoch": 0.6919534455329481, "grad_norm": 0.2594645917415619, "learning_rate": 0.00017490920275283026, "loss": 11.6612, "step": 33056 }, { "epoch": 0.6919743782969103, "grad_norm": 0.2808155417442322, "learning_rate": 0.00017490775025645302, "loss": 11.6746, "step": 33057 }, { "epoch": 0.6919953110608725, "grad_norm": 0.3240768015384674, "learning_rate": 0.00017490629772406617, "loss": 11.6752, "step": 33058 }, { "epoch": 0.6920162438248346, "grad_norm": 0.30866047739982605, "learning_rate": 0.00017490484515567033, "loss": 11.6684, "step": 33059 }, { "epoch": 0.6920371765887968, "grad_norm": 0.35495200753211975, "learning_rate": 0.00017490339255126628, "loss": 11.6675, "step": 33060 }, { "epoch": 0.6920581093527589, "grad_norm": 0.2734575569629669, "learning_rate": 0.00017490193991085464, "loss": 11.6609, "step": 33061 }, { "epoch": 0.6920790421167211, "grad_norm": 0.2866969108581543, "learning_rate": 0.00017490048723443612, "loss": 11.6795, "step": 33062 }, { "epoch": 0.6920999748806832, "grad_norm": 0.3092067241668701, "learning_rate": 0.00017489903452201145, "loss": 11.6774, "step": 33063 }, { "epoch": 0.6921209076446454, "grad_norm": 0.2833339273929596, "learning_rate": 0.0001748975817735813, "loss": 11.6756, "step": 33064 }, { "epoch": 0.6921418404086076, "grad_norm": 0.25465431809425354, "learning_rate": 0.0001748961289891464, "loss": 11.6616, "step": 33065 }, { "epoch": 0.6921627731725697, "grad_norm": 0.28289979696273804, "learning_rate": 0.0001748946761687074, "loss": 11.6623, "step": 33066 }, { "epoch": 0.6921837059365319, "grad_norm": 0.3164204955101013, "learning_rate": 0.00017489322331226505, "loss": 11.6896, "step": 33067 }, { "epoch": 0.692204638700494, "grad_norm": 0.25823619961738586, "learning_rate": 0.00017489177041982002, "loss": 11.6713, "step": 33068 }, { "epoch": 0.6922255714644562, "grad_norm": 0.26144111156463623, "learning_rate": 0.00017489031749137304, "loss": 11.6716, "step": 33069 }, { "epoch": 0.6922465042284183, "grad_norm": 0.3464048206806183, "learning_rate": 0.00017488886452692476, "loss": 11.6483, "step": 33070 }, { "epoch": 0.6922674369923805, "grad_norm": 0.3052208423614502, "learning_rate": 0.0001748874115264759, "loss": 11.6849, "step": 33071 }, { "epoch": 0.6922883697563427, "grad_norm": 0.25971585512161255, "learning_rate": 0.00017488595849002718, "loss": 11.6727, "step": 33072 }, { "epoch": 0.6923093025203048, "grad_norm": 0.30619940161705017, "learning_rate": 0.00017488450541757925, "loss": 11.6562, "step": 33073 }, { "epoch": 0.692330235284267, "grad_norm": 0.308408260345459, "learning_rate": 0.00017488305230913285, "loss": 11.6519, "step": 33074 }, { "epoch": 0.692351168048229, "grad_norm": 0.516179084777832, "learning_rate": 0.0001748815991646887, "loss": 11.6759, "step": 33075 }, { "epoch": 0.6923721008121912, "grad_norm": 0.28243979811668396, "learning_rate": 0.00017488014598424743, "loss": 11.6491, "step": 33076 }, { "epoch": 0.6923930335761534, "grad_norm": 0.3643127977848053, "learning_rate": 0.00017487869276780978, "loss": 11.6697, "step": 33077 }, { "epoch": 0.6924139663401155, "grad_norm": 0.3162285089492798, "learning_rate": 0.00017487723951537643, "loss": 11.6603, "step": 33078 }, { "epoch": 0.6924348991040777, "grad_norm": 0.3016867935657501, "learning_rate": 0.0001748757862269481, "loss": 11.6565, "step": 33079 }, { "epoch": 0.6924558318680398, "grad_norm": 0.33072105050086975, "learning_rate": 0.00017487433290252547, "loss": 11.6848, "step": 33080 }, { "epoch": 0.692476764632002, "grad_norm": 0.2896297574043274, "learning_rate": 0.00017487287954210927, "loss": 11.6381, "step": 33081 }, { "epoch": 0.6924976973959641, "grad_norm": 0.31399863958358765, "learning_rate": 0.00017487142614570016, "loss": 11.6745, "step": 33082 }, { "epoch": 0.6925186301599263, "grad_norm": 0.2643221616744995, "learning_rate": 0.00017486997271329885, "loss": 11.6548, "step": 33083 }, { "epoch": 0.6925395629238885, "grad_norm": 0.26103073358535767, "learning_rate": 0.00017486851924490606, "loss": 11.6887, "step": 33084 }, { "epoch": 0.6925604956878506, "grad_norm": 0.3857460021972656, "learning_rate": 0.00017486706574052247, "loss": 11.6682, "step": 33085 }, { "epoch": 0.6925814284518128, "grad_norm": 0.3716515302658081, "learning_rate": 0.00017486561220014877, "loss": 11.6688, "step": 33086 }, { "epoch": 0.6926023612157749, "grad_norm": 0.3483436107635498, "learning_rate": 0.00017486415862378565, "loss": 11.6885, "step": 33087 }, { "epoch": 0.6926232939797371, "grad_norm": 0.3112540543079376, "learning_rate": 0.00017486270501143388, "loss": 11.675, "step": 33088 }, { "epoch": 0.6926442267436992, "grad_norm": 0.32890820503234863, "learning_rate": 0.00017486125136309408, "loss": 11.6772, "step": 33089 }, { "epoch": 0.6926651595076614, "grad_norm": 0.24124081432819366, "learning_rate": 0.00017485979767876698, "loss": 11.6629, "step": 33090 }, { "epoch": 0.6926860922716236, "grad_norm": 0.28803086280822754, "learning_rate": 0.00017485834395845326, "loss": 11.6677, "step": 33091 }, { "epoch": 0.6927070250355857, "grad_norm": 0.2820350229740143, "learning_rate": 0.00017485689020215364, "loss": 11.6733, "step": 33092 }, { "epoch": 0.6927279577995479, "grad_norm": 0.3058569133281708, "learning_rate": 0.00017485543640986885, "loss": 11.66, "step": 33093 }, { "epoch": 0.69274889056351, "grad_norm": 0.30594131350517273, "learning_rate": 0.00017485398258159951, "loss": 11.6751, "step": 33094 }, { "epoch": 0.6927698233274722, "grad_norm": 0.3448801636695862, "learning_rate": 0.00017485252871734635, "loss": 11.6756, "step": 33095 }, { "epoch": 0.6927907560914344, "grad_norm": 0.25091123580932617, "learning_rate": 0.00017485107481711012, "loss": 11.6715, "step": 33096 }, { "epoch": 0.6928116888553965, "grad_norm": 0.3424427807331085, "learning_rate": 0.00017484962088089146, "loss": 11.6859, "step": 33097 }, { "epoch": 0.6928326216193587, "grad_norm": 0.3470610976219177, "learning_rate": 0.00017484816690869107, "loss": 11.6756, "step": 33098 }, { "epoch": 0.6928535543833207, "grad_norm": 0.3119378089904785, "learning_rate": 0.00017484671290050972, "loss": 11.6647, "step": 33099 }, { "epoch": 0.692874487147283, "grad_norm": 0.4020414352416992, "learning_rate": 0.000174845258856348, "loss": 11.6635, "step": 33100 }, { "epoch": 0.692895419911245, "grad_norm": 0.343365877866745, "learning_rate": 0.0001748438047762067, "loss": 11.6508, "step": 33101 }, { "epoch": 0.6929163526752072, "grad_norm": 0.2544870674610138, "learning_rate": 0.00017484235066008642, "loss": 11.6776, "step": 33102 }, { "epoch": 0.6929372854391694, "grad_norm": 0.34455862641334534, "learning_rate": 0.000174840896507988, "loss": 11.6693, "step": 33103 }, { "epoch": 0.6929582182031315, "grad_norm": 0.22970353066921234, "learning_rate": 0.00017483944231991202, "loss": 11.6667, "step": 33104 }, { "epoch": 0.6929791509670937, "grad_norm": 0.363120973110199, "learning_rate": 0.00017483798809585924, "loss": 11.67, "step": 33105 }, { "epoch": 0.6930000837310558, "grad_norm": 0.34878307580947876, "learning_rate": 0.00017483653383583032, "loss": 11.6753, "step": 33106 }, { "epoch": 0.693021016495018, "grad_norm": 0.28579068183898926, "learning_rate": 0.00017483507953982595, "loss": 11.6771, "step": 33107 }, { "epoch": 0.6930419492589801, "grad_norm": 0.3313085436820984, "learning_rate": 0.00017483362520784692, "loss": 11.6856, "step": 33108 }, { "epoch": 0.6930628820229423, "grad_norm": 0.2935073673725128, "learning_rate": 0.00017483217083989386, "loss": 11.6711, "step": 33109 }, { "epoch": 0.6930838147869045, "grad_norm": 0.2935010492801666, "learning_rate": 0.00017483071643596744, "loss": 11.6641, "step": 33110 }, { "epoch": 0.6931047475508666, "grad_norm": 0.3136177361011505, "learning_rate": 0.0001748292619960684, "loss": 11.6908, "step": 33111 }, { "epoch": 0.6931256803148288, "grad_norm": 0.2529017925262451, "learning_rate": 0.00017482780752019745, "loss": 11.6756, "step": 33112 }, { "epoch": 0.6931466130787909, "grad_norm": 0.32120537757873535, "learning_rate": 0.0001748263530083553, "loss": 11.6704, "step": 33113 }, { "epoch": 0.6931675458427531, "grad_norm": 0.3975119888782501, "learning_rate": 0.00017482489846054256, "loss": 11.6795, "step": 33114 }, { "epoch": 0.6931884786067153, "grad_norm": 0.27340272068977356, "learning_rate": 0.00017482344387676003, "loss": 11.6638, "step": 33115 }, { "epoch": 0.6932094113706774, "grad_norm": 0.27929767966270447, "learning_rate": 0.00017482198925700837, "loss": 11.6637, "step": 33116 }, { "epoch": 0.6932303441346396, "grad_norm": 0.27674704790115356, "learning_rate": 0.0001748205346012883, "loss": 11.6704, "step": 33117 }, { "epoch": 0.6932512768986017, "grad_norm": 0.34831705689430237, "learning_rate": 0.00017481907990960042, "loss": 11.6811, "step": 33118 }, { "epoch": 0.6932722096625639, "grad_norm": 0.32967811822891235, "learning_rate": 0.0001748176251819456, "loss": 11.6747, "step": 33119 }, { "epoch": 0.693293142426526, "grad_norm": 0.30647775530815125, "learning_rate": 0.0001748161704183244, "loss": 11.6762, "step": 33120 }, { "epoch": 0.6933140751904882, "grad_norm": 0.30485615134239197, "learning_rate": 0.00017481471561873758, "loss": 11.678, "step": 33121 }, { "epoch": 0.6933350079544504, "grad_norm": 0.22543363273143768, "learning_rate": 0.00017481326078318583, "loss": 11.6593, "step": 33122 }, { "epoch": 0.6933559407184124, "grad_norm": 0.34064987301826477, "learning_rate": 0.00017481180591166984, "loss": 11.6772, "step": 33123 }, { "epoch": 0.6933768734823746, "grad_norm": 0.37622132897377014, "learning_rate": 0.0001748103510041903, "loss": 11.6659, "step": 33124 }, { "epoch": 0.6933978062463367, "grad_norm": 0.27607953548431396, "learning_rate": 0.00017480889606074794, "loss": 11.6856, "step": 33125 }, { "epoch": 0.6934187390102989, "grad_norm": 0.41617074608802795, "learning_rate": 0.00017480744108134346, "loss": 11.6683, "step": 33126 }, { "epoch": 0.693439671774261, "grad_norm": 0.28649330139160156, "learning_rate": 0.00017480598606597753, "loss": 11.6635, "step": 33127 }, { "epoch": 0.6934606045382232, "grad_norm": 0.3522234261035919, "learning_rate": 0.00017480453101465087, "loss": 11.6764, "step": 33128 }, { "epoch": 0.6934815373021854, "grad_norm": 0.22164779901504517, "learning_rate": 0.00017480307592736418, "loss": 11.6806, "step": 33129 }, { "epoch": 0.6935024700661475, "grad_norm": 0.2560197412967682, "learning_rate": 0.00017480162080411814, "loss": 11.662, "step": 33130 }, { "epoch": 0.6935234028301097, "grad_norm": 0.3152601420879364, "learning_rate": 0.00017480016564491347, "loss": 11.6668, "step": 33131 }, { "epoch": 0.6935443355940718, "grad_norm": 0.28866657614707947, "learning_rate": 0.00017479871044975083, "loss": 11.669, "step": 33132 }, { "epoch": 0.693565268358034, "grad_norm": 0.24614541232585907, "learning_rate": 0.00017479725521863098, "loss": 11.6624, "step": 33133 }, { "epoch": 0.6935862011219961, "grad_norm": 0.33851975202560425, "learning_rate": 0.0001747957999515546, "loss": 11.6875, "step": 33134 }, { "epoch": 0.6936071338859583, "grad_norm": 0.2745307385921478, "learning_rate": 0.00017479434464852235, "loss": 11.6644, "step": 33135 }, { "epoch": 0.6936280666499205, "grad_norm": 0.3141345679759979, "learning_rate": 0.00017479288930953498, "loss": 11.657, "step": 33136 }, { "epoch": 0.6936489994138826, "grad_norm": 0.2982082962989807, "learning_rate": 0.00017479143393459316, "loss": 11.6668, "step": 33137 }, { "epoch": 0.6936699321778448, "grad_norm": 0.2509441077709198, "learning_rate": 0.00017478997852369757, "loss": 11.6754, "step": 33138 }, { "epoch": 0.6936908649418069, "grad_norm": 0.3061133027076721, "learning_rate": 0.00017478852307684897, "loss": 11.6828, "step": 33139 }, { "epoch": 0.6937117977057691, "grad_norm": 0.35979586839675903, "learning_rate": 0.00017478706759404804, "loss": 11.664, "step": 33140 }, { "epoch": 0.6937327304697313, "grad_norm": 0.3300159275531769, "learning_rate": 0.00017478561207529548, "loss": 11.6633, "step": 33141 }, { "epoch": 0.6937536632336934, "grad_norm": 0.27885329723358154, "learning_rate": 0.00017478415652059194, "loss": 11.6541, "step": 33142 }, { "epoch": 0.6937745959976556, "grad_norm": 0.2750381529331207, "learning_rate": 0.00017478270092993818, "loss": 11.656, "step": 33143 }, { "epoch": 0.6937955287616177, "grad_norm": 0.2946259677410126, "learning_rate": 0.00017478124530333487, "loss": 11.6577, "step": 33144 }, { "epoch": 0.6938164615255799, "grad_norm": 0.2780205011367798, "learning_rate": 0.0001747797896407827, "loss": 11.6691, "step": 33145 }, { "epoch": 0.6938373942895419, "grad_norm": 0.2806585729122162, "learning_rate": 0.00017477833394228244, "loss": 11.6625, "step": 33146 }, { "epoch": 0.6938583270535041, "grad_norm": 0.31394872069358826, "learning_rate": 0.00017477687820783465, "loss": 11.6891, "step": 33147 }, { "epoch": 0.6938792598174663, "grad_norm": 0.34927189350128174, "learning_rate": 0.00017477542243744018, "loss": 11.6902, "step": 33148 }, { "epoch": 0.6939001925814284, "grad_norm": 0.2755122482776642, "learning_rate": 0.00017477396663109963, "loss": 11.6756, "step": 33149 }, { "epoch": 0.6939211253453906, "grad_norm": 0.24026541411876678, "learning_rate": 0.0001747725107888138, "loss": 11.6677, "step": 33150 }, { "epoch": 0.6939420581093527, "grad_norm": 0.36493822932243347, "learning_rate": 0.00017477105491058327, "loss": 11.6859, "step": 33151 }, { "epoch": 0.6939629908733149, "grad_norm": 0.2891879975795746, "learning_rate": 0.00017476959899640878, "loss": 11.6733, "step": 33152 }, { "epoch": 0.693983923637277, "grad_norm": 0.2790372371673584, "learning_rate": 0.00017476814304629107, "loss": 11.6787, "step": 33153 }, { "epoch": 0.6940048564012392, "grad_norm": 0.3141481280326843, "learning_rate": 0.00017476668706023084, "loss": 11.6814, "step": 33154 }, { "epoch": 0.6940257891652014, "grad_norm": 0.2902134954929352, "learning_rate": 0.00017476523103822876, "loss": 11.6787, "step": 33155 }, { "epoch": 0.6940467219291635, "grad_norm": 0.2939157485961914, "learning_rate": 0.00017476377498028552, "loss": 11.6715, "step": 33156 }, { "epoch": 0.6940676546931257, "grad_norm": 0.3301129639148712, "learning_rate": 0.0001747623188864018, "loss": 11.6774, "step": 33157 }, { "epoch": 0.6940885874570878, "grad_norm": 0.31896254420280457, "learning_rate": 0.00017476086275657838, "loss": 11.6589, "step": 33158 }, { "epoch": 0.69410952022105, "grad_norm": 0.28875255584716797, "learning_rate": 0.0001747594065908159, "loss": 11.6667, "step": 33159 }, { "epoch": 0.6941304529850122, "grad_norm": 0.3292306959629059, "learning_rate": 0.00017475795038911506, "loss": 11.6691, "step": 33160 }, { "epoch": 0.6941513857489743, "grad_norm": 0.28659212589263916, "learning_rate": 0.0001747564941514766, "loss": 11.6748, "step": 33161 }, { "epoch": 0.6941723185129365, "grad_norm": 0.30934372544288635, "learning_rate": 0.0001747550378779012, "loss": 11.6619, "step": 33162 }, { "epoch": 0.6941932512768986, "grad_norm": 0.4128739535808563, "learning_rate": 0.00017475358156838954, "loss": 11.679, "step": 33163 }, { "epoch": 0.6942141840408608, "grad_norm": 0.25418299436569214, "learning_rate": 0.00017475212522294236, "loss": 11.6609, "step": 33164 }, { "epoch": 0.6942351168048229, "grad_norm": 0.40897563099861145, "learning_rate": 0.00017475066884156028, "loss": 11.6909, "step": 33165 }, { "epoch": 0.6942560495687851, "grad_norm": 0.2578399181365967, "learning_rate": 0.0001747492124242441, "loss": 11.6732, "step": 33166 }, { "epoch": 0.6942769823327473, "grad_norm": 0.37415188550949097, "learning_rate": 0.00017474775597099446, "loss": 11.6755, "step": 33167 }, { "epoch": 0.6942979150967094, "grad_norm": 0.4123595058917999, "learning_rate": 0.00017474629948181208, "loss": 11.6803, "step": 33168 }, { "epoch": 0.6943188478606716, "grad_norm": 0.3234442472457886, "learning_rate": 0.00017474484295669767, "loss": 11.6825, "step": 33169 }, { "epoch": 0.6943397806246336, "grad_norm": 0.3459678292274475, "learning_rate": 0.00017474338639565187, "loss": 11.6706, "step": 33170 }, { "epoch": 0.6943607133885958, "grad_norm": 0.29832586646080017, "learning_rate": 0.00017474192979867548, "loss": 11.6572, "step": 33171 }, { "epoch": 0.6943816461525579, "grad_norm": 0.24895977973937988, "learning_rate": 0.00017474047316576912, "loss": 11.6586, "step": 33172 }, { "epoch": 0.6944025789165201, "grad_norm": 0.280964195728302, "learning_rate": 0.00017473901649693355, "loss": 11.658, "step": 33173 }, { "epoch": 0.6944235116804823, "grad_norm": 0.3059593439102173, "learning_rate": 0.00017473755979216938, "loss": 11.6575, "step": 33174 }, { "epoch": 0.6944444444444444, "grad_norm": 0.3241753578186035, "learning_rate": 0.00017473610305147741, "loss": 11.6599, "step": 33175 }, { "epoch": 0.6944653772084066, "grad_norm": 0.28581202030181885, "learning_rate": 0.0001747346462748583, "loss": 11.6624, "step": 33176 }, { "epoch": 0.6944863099723687, "grad_norm": 0.2973017990589142, "learning_rate": 0.00017473318946231272, "loss": 11.6864, "step": 33177 }, { "epoch": 0.6945072427363309, "grad_norm": 0.2295638471841812, "learning_rate": 0.0001747317326138414, "loss": 11.6733, "step": 33178 }, { "epoch": 0.6945281755002931, "grad_norm": 0.22393299639225006, "learning_rate": 0.00017473027572944506, "loss": 11.6561, "step": 33179 }, { "epoch": 0.6945491082642552, "grad_norm": 0.2795194685459137, "learning_rate": 0.00017472881880912437, "loss": 11.6861, "step": 33180 }, { "epoch": 0.6945700410282174, "grad_norm": 0.29879072308540344, "learning_rate": 0.00017472736185288003, "loss": 11.6657, "step": 33181 }, { "epoch": 0.6945909737921795, "grad_norm": 0.3012152910232544, "learning_rate": 0.00017472590486071276, "loss": 11.6762, "step": 33182 }, { "epoch": 0.6946119065561417, "grad_norm": 0.41273319721221924, "learning_rate": 0.00017472444783262324, "loss": 11.6815, "step": 33183 }, { "epoch": 0.6946328393201038, "grad_norm": 0.32584747672080994, "learning_rate": 0.00017472299076861218, "loss": 11.6666, "step": 33184 }, { "epoch": 0.694653772084066, "grad_norm": 0.3378034830093384, "learning_rate": 0.0001747215336686803, "loss": 11.6649, "step": 33185 }, { "epoch": 0.6946747048480282, "grad_norm": 0.29356464743614197, "learning_rate": 0.00017472007653282826, "loss": 11.684, "step": 33186 }, { "epoch": 0.6946956376119903, "grad_norm": 0.3684786856174469, "learning_rate": 0.0001747186193610568, "loss": 11.6824, "step": 33187 }, { "epoch": 0.6947165703759525, "grad_norm": 0.25448933243751526, "learning_rate": 0.0001747171621533666, "loss": 11.6791, "step": 33188 }, { "epoch": 0.6947375031399146, "grad_norm": 0.3136937618255615, "learning_rate": 0.00017471570490975834, "loss": 11.6594, "step": 33189 }, { "epoch": 0.6947584359038768, "grad_norm": 0.36385834217071533, "learning_rate": 0.00017471424763023277, "loss": 11.6694, "step": 33190 }, { "epoch": 0.6947793686678388, "grad_norm": 0.3382614850997925, "learning_rate": 0.00017471279031479054, "loss": 11.6804, "step": 33191 }, { "epoch": 0.694800301431801, "grad_norm": 0.29591456055641174, "learning_rate": 0.0001747113329634324, "loss": 11.6836, "step": 33192 }, { "epoch": 0.6948212341957632, "grad_norm": 0.266174852848053, "learning_rate": 0.00017470987557615902, "loss": 11.6757, "step": 33193 }, { "epoch": 0.6948421669597253, "grad_norm": 0.3004172742366791, "learning_rate": 0.0001747084181529711, "loss": 11.6846, "step": 33194 }, { "epoch": 0.6948630997236875, "grad_norm": 0.3555130064487457, "learning_rate": 0.00017470696069386935, "loss": 11.6742, "step": 33195 }, { "epoch": 0.6948840324876496, "grad_norm": 0.3139190375804901, "learning_rate": 0.00017470550319885443, "loss": 11.6568, "step": 33196 }, { "epoch": 0.6949049652516118, "grad_norm": 0.2525123059749603, "learning_rate": 0.0001747040456679271, "loss": 11.6741, "step": 33197 }, { "epoch": 0.694925898015574, "grad_norm": 0.34017807245254517, "learning_rate": 0.00017470258810108806, "loss": 11.6814, "step": 33198 }, { "epoch": 0.6949468307795361, "grad_norm": 0.2818639278411865, "learning_rate": 0.00017470113049833797, "loss": 11.6589, "step": 33199 }, { "epoch": 0.6949677635434983, "grad_norm": 0.3776363730430603, "learning_rate": 0.00017469967285967756, "loss": 11.6653, "step": 33200 }, { "epoch": 0.6949886963074604, "grad_norm": 0.2979179322719574, "learning_rate": 0.0001746982151851075, "loss": 11.6462, "step": 33201 }, { "epoch": 0.6950096290714226, "grad_norm": 0.337624192237854, "learning_rate": 0.00017469675747462855, "loss": 11.6722, "step": 33202 }, { "epoch": 0.6950305618353847, "grad_norm": 0.2971116304397583, "learning_rate": 0.00017469529972824135, "loss": 11.6697, "step": 33203 }, { "epoch": 0.6950514945993469, "grad_norm": 0.2661762535572052, "learning_rate": 0.0001746938419459466, "loss": 11.6723, "step": 33204 }, { "epoch": 0.6950724273633091, "grad_norm": 0.2948790490627289, "learning_rate": 0.00017469238412774507, "loss": 11.6615, "step": 33205 }, { "epoch": 0.6950933601272712, "grad_norm": 0.25541627407073975, "learning_rate": 0.00017469092627363738, "loss": 11.6528, "step": 33206 }, { "epoch": 0.6951142928912334, "grad_norm": 0.31159546971321106, "learning_rate": 0.00017468946838362427, "loss": 11.6566, "step": 33207 }, { "epoch": 0.6951352256551955, "grad_norm": 0.3225012421607971, "learning_rate": 0.00017468801045770644, "loss": 11.6749, "step": 33208 }, { "epoch": 0.6951561584191577, "grad_norm": 0.2596997916698456, "learning_rate": 0.0001746865524958846, "loss": 11.6773, "step": 33209 }, { "epoch": 0.6951770911831198, "grad_norm": 0.2958676517009735, "learning_rate": 0.00017468509449815943, "loss": 11.6756, "step": 33210 }, { "epoch": 0.695198023947082, "grad_norm": 0.3255689740180969, "learning_rate": 0.0001746836364645316, "loss": 11.6662, "step": 33211 }, { "epoch": 0.6952189567110442, "grad_norm": 0.2989363372325897, "learning_rate": 0.0001746821783950019, "loss": 11.6597, "step": 33212 }, { "epoch": 0.6952398894750063, "grad_norm": 0.3246425986289978, "learning_rate": 0.00017468072028957094, "loss": 11.6567, "step": 33213 }, { "epoch": 0.6952608222389685, "grad_norm": 0.2528386414051056, "learning_rate": 0.0001746792621482395, "loss": 11.6644, "step": 33214 }, { "epoch": 0.6952817550029305, "grad_norm": 0.2502022385597229, "learning_rate": 0.0001746778039710082, "loss": 11.6619, "step": 33215 }, { "epoch": 0.6953026877668927, "grad_norm": 0.34336185455322266, "learning_rate": 0.0001746763457578778, "loss": 11.6475, "step": 33216 }, { "epoch": 0.695323620530855, "grad_norm": 0.31428882479667664, "learning_rate": 0.000174674887508849, "loss": 11.6779, "step": 33217 }, { "epoch": 0.695344553294817, "grad_norm": 0.2739323079586029, "learning_rate": 0.0001746734292239225, "loss": 11.6777, "step": 33218 }, { "epoch": 0.6953654860587792, "grad_norm": 0.2583954334259033, "learning_rate": 0.00017467197090309893, "loss": 11.6646, "step": 33219 }, { "epoch": 0.6953864188227413, "grad_norm": 0.27591729164123535, "learning_rate": 0.0001746705125463791, "loss": 11.675, "step": 33220 }, { "epoch": 0.6954073515867035, "grad_norm": 0.30477818846702576, "learning_rate": 0.00017466905415376363, "loss": 11.6775, "step": 33221 }, { "epoch": 0.6954282843506656, "grad_norm": 0.31423836946487427, "learning_rate": 0.00017466759572525327, "loss": 11.6597, "step": 33222 }, { "epoch": 0.6954492171146278, "grad_norm": 0.3372931480407715, "learning_rate": 0.00017466613726084868, "loss": 11.6738, "step": 33223 }, { "epoch": 0.69547014987859, "grad_norm": 0.3802237808704376, "learning_rate": 0.00017466467876055058, "loss": 11.6587, "step": 33224 }, { "epoch": 0.6954910826425521, "grad_norm": 0.2583482265472412, "learning_rate": 0.0001746632202243597, "loss": 11.6719, "step": 33225 }, { "epoch": 0.6955120154065143, "grad_norm": 0.3423890471458435, "learning_rate": 0.00017466176165227668, "loss": 11.6689, "step": 33226 }, { "epoch": 0.6955329481704764, "grad_norm": 0.25883787870407104, "learning_rate": 0.00017466030304430228, "loss": 11.6723, "step": 33227 }, { "epoch": 0.6955538809344386, "grad_norm": 0.3167935609817505, "learning_rate": 0.00017465884440043717, "loss": 11.6753, "step": 33228 }, { "epoch": 0.6955748136984007, "grad_norm": 0.38609030842781067, "learning_rate": 0.00017465738572068203, "loss": 11.6731, "step": 33229 }, { "epoch": 0.6955957464623629, "grad_norm": 0.2641924023628235, "learning_rate": 0.00017465592700503763, "loss": 11.6573, "step": 33230 }, { "epoch": 0.6956166792263251, "grad_norm": 0.28950878977775574, "learning_rate": 0.0001746544682535046, "loss": 11.656, "step": 33231 }, { "epoch": 0.6956376119902872, "grad_norm": 0.33248043060302734, "learning_rate": 0.0001746530094660837, "loss": 11.67, "step": 33232 }, { "epoch": 0.6956585447542494, "grad_norm": 0.35717013478279114, "learning_rate": 0.00017465155064277558, "loss": 11.6665, "step": 33233 }, { "epoch": 0.6956794775182115, "grad_norm": 0.28878331184387207, "learning_rate": 0.00017465009178358097, "loss": 11.6761, "step": 33234 }, { "epoch": 0.6957004102821737, "grad_norm": 0.3236987292766571, "learning_rate": 0.00017464863288850054, "loss": 11.6891, "step": 33235 }, { "epoch": 0.6957213430461359, "grad_norm": 0.34236854314804077, "learning_rate": 0.00017464717395753506, "loss": 11.6692, "step": 33236 }, { "epoch": 0.695742275810098, "grad_norm": 0.33852308988571167, "learning_rate": 0.00017464571499068517, "loss": 11.669, "step": 33237 }, { "epoch": 0.6957632085740602, "grad_norm": 0.2944372892379761, "learning_rate": 0.00017464425598795157, "loss": 11.6714, "step": 33238 }, { "epoch": 0.6957841413380222, "grad_norm": 0.28497058153152466, "learning_rate": 0.00017464279694933497, "loss": 11.6611, "step": 33239 }, { "epoch": 0.6958050741019844, "grad_norm": 0.3002737760543823, "learning_rate": 0.00017464133787483614, "loss": 11.6732, "step": 33240 }, { "epoch": 0.6958260068659465, "grad_norm": 0.29853394627571106, "learning_rate": 0.00017463987876445569, "loss": 11.6488, "step": 33241 }, { "epoch": 0.6958469396299087, "grad_norm": 0.3081689774990082, "learning_rate": 0.00017463841961819435, "loss": 11.6559, "step": 33242 }, { "epoch": 0.6958678723938709, "grad_norm": 0.27816101908683777, "learning_rate": 0.00017463696043605283, "loss": 11.685, "step": 33243 }, { "epoch": 0.695888805157833, "grad_norm": 0.2842980623245239, "learning_rate": 0.00017463550121803184, "loss": 11.6733, "step": 33244 }, { "epoch": 0.6959097379217952, "grad_norm": 0.26612550020217896, "learning_rate": 0.00017463404196413205, "loss": 11.6741, "step": 33245 }, { "epoch": 0.6959306706857573, "grad_norm": 0.28938961029052734, "learning_rate": 0.0001746325826743542, "loss": 11.6756, "step": 33246 }, { "epoch": 0.6959516034497195, "grad_norm": 0.4316721558570862, "learning_rate": 0.00017463112334869895, "loss": 11.6781, "step": 33247 }, { "epoch": 0.6959725362136816, "grad_norm": 0.37858423590660095, "learning_rate": 0.00017462966398716705, "loss": 11.6601, "step": 33248 }, { "epoch": 0.6959934689776438, "grad_norm": 0.2347654104232788, "learning_rate": 0.00017462820458975914, "loss": 11.6713, "step": 33249 }, { "epoch": 0.696014401741606, "grad_norm": 0.24266324937343597, "learning_rate": 0.00017462674515647596, "loss": 11.6673, "step": 33250 }, { "epoch": 0.6960353345055681, "grad_norm": 0.290094256401062, "learning_rate": 0.00017462528568731823, "loss": 11.6679, "step": 33251 }, { "epoch": 0.6960562672695303, "grad_norm": 0.2709312438964844, "learning_rate": 0.00017462382618228664, "loss": 11.671, "step": 33252 }, { "epoch": 0.6960772000334924, "grad_norm": 0.3784579038619995, "learning_rate": 0.00017462236664138184, "loss": 11.6675, "step": 33253 }, { "epoch": 0.6960981327974546, "grad_norm": 0.2795654237270355, "learning_rate": 0.0001746209070646046, "loss": 11.6733, "step": 33254 }, { "epoch": 0.6961190655614168, "grad_norm": 0.28934770822525024, "learning_rate": 0.0001746194474519556, "loss": 11.6868, "step": 33255 }, { "epoch": 0.6961399983253789, "grad_norm": 0.25319719314575195, "learning_rate": 0.00017461798780343552, "loss": 11.658, "step": 33256 }, { "epoch": 0.6961609310893411, "grad_norm": 0.2725476920604706, "learning_rate": 0.0001746165281190451, "loss": 11.6662, "step": 33257 }, { "epoch": 0.6961818638533032, "grad_norm": 0.28567633032798767, "learning_rate": 0.00017461506839878502, "loss": 11.6653, "step": 33258 }, { "epoch": 0.6962027966172654, "grad_norm": 0.4334237277507782, "learning_rate": 0.00017461360864265595, "loss": 11.6719, "step": 33259 }, { "epoch": 0.6962237293812275, "grad_norm": 0.2841723561286926, "learning_rate": 0.00017461214885065865, "loss": 11.6651, "step": 33260 }, { "epoch": 0.6962446621451897, "grad_norm": 0.2949756681919098, "learning_rate": 0.00017461068902279377, "loss": 11.688, "step": 33261 }, { "epoch": 0.6962655949091519, "grad_norm": 0.2727953791618347, "learning_rate": 0.00017460922915906203, "loss": 11.665, "step": 33262 }, { "epoch": 0.6962865276731139, "grad_norm": 0.2925041615962982, "learning_rate": 0.00017460776925946417, "loss": 11.6786, "step": 33263 }, { "epoch": 0.6963074604370761, "grad_norm": 0.2825815677642822, "learning_rate": 0.00017460630932400085, "loss": 11.6742, "step": 33264 }, { "epoch": 0.6963283932010382, "grad_norm": 0.31611746549606323, "learning_rate": 0.00017460484935267277, "loss": 11.6877, "step": 33265 }, { "epoch": 0.6963493259650004, "grad_norm": 0.3258844316005707, "learning_rate": 0.0001746033893454807, "loss": 11.6894, "step": 33266 }, { "epoch": 0.6963702587289625, "grad_norm": 0.2950250804424286, "learning_rate": 0.0001746019293024252, "loss": 11.6802, "step": 33267 }, { "epoch": 0.6963911914929247, "grad_norm": 0.25794753432273865, "learning_rate": 0.00017460046922350712, "loss": 11.6891, "step": 33268 }, { "epoch": 0.6964121242568869, "grad_norm": 0.2998214364051819, "learning_rate": 0.00017459900910872706, "loss": 11.6774, "step": 33269 }, { "epoch": 0.696433057020849, "grad_norm": 0.30414989590644836, "learning_rate": 0.00017459754895808577, "loss": 11.6761, "step": 33270 }, { "epoch": 0.6964539897848112, "grad_norm": 0.3275793194770813, "learning_rate": 0.00017459608877158398, "loss": 11.6748, "step": 33271 }, { "epoch": 0.6964749225487733, "grad_norm": 0.34799274802207947, "learning_rate": 0.00017459462854922233, "loss": 11.6726, "step": 33272 }, { "epoch": 0.6964958553127355, "grad_norm": 0.302292138338089, "learning_rate": 0.00017459316829100154, "loss": 11.6746, "step": 33273 }, { "epoch": 0.6965167880766977, "grad_norm": 0.32284072041511536, "learning_rate": 0.00017459170799692233, "loss": 11.663, "step": 33274 }, { "epoch": 0.6965377208406598, "grad_norm": 0.3299095928668976, "learning_rate": 0.00017459024766698538, "loss": 11.6633, "step": 33275 }, { "epoch": 0.696558653604622, "grad_norm": 0.2804591655731201, "learning_rate": 0.00017458878730119138, "loss": 11.67, "step": 33276 }, { "epoch": 0.6965795863685841, "grad_norm": 0.2646598815917969, "learning_rate": 0.0001745873268995411, "loss": 11.6706, "step": 33277 }, { "epoch": 0.6966005191325463, "grad_norm": 0.22743839025497437, "learning_rate": 0.00017458586646203517, "loss": 11.6597, "step": 33278 }, { "epoch": 0.6966214518965084, "grad_norm": 0.28000301122665405, "learning_rate": 0.00017458440598867433, "loss": 11.6854, "step": 33279 }, { "epoch": 0.6966423846604706, "grad_norm": 0.3122115135192871, "learning_rate": 0.00017458294547945927, "loss": 11.6936, "step": 33280 }, { "epoch": 0.6966633174244328, "grad_norm": 0.33566173911094666, "learning_rate": 0.0001745814849343907, "loss": 11.6563, "step": 33281 }, { "epoch": 0.6966842501883949, "grad_norm": 0.3594447076320648, "learning_rate": 0.00017458002435346933, "loss": 11.6865, "step": 33282 }, { "epoch": 0.6967051829523571, "grad_norm": 0.33734628558158875, "learning_rate": 0.0001745785637366958, "loss": 11.6926, "step": 33283 }, { "epoch": 0.6967261157163191, "grad_norm": 0.28010568022727966, "learning_rate": 0.00017457710308407088, "loss": 11.677, "step": 33284 }, { "epoch": 0.6967470484802814, "grad_norm": 0.2914979159832001, "learning_rate": 0.00017457564239559527, "loss": 11.6674, "step": 33285 }, { "epoch": 0.6967679812442434, "grad_norm": 0.2535138726234436, "learning_rate": 0.00017457418167126965, "loss": 11.6735, "step": 33286 }, { "epoch": 0.6967889140082056, "grad_norm": 0.2689764201641083, "learning_rate": 0.00017457272091109474, "loss": 11.6895, "step": 33287 }, { "epoch": 0.6968098467721678, "grad_norm": 0.27031633257865906, "learning_rate": 0.00017457126011507119, "loss": 11.671, "step": 33288 }, { "epoch": 0.6968307795361299, "grad_norm": 0.3904877007007599, "learning_rate": 0.00017456979928319975, "loss": 11.6706, "step": 33289 }, { "epoch": 0.6968517123000921, "grad_norm": 0.27681681513786316, "learning_rate": 0.00017456833841548113, "loss": 11.6642, "step": 33290 }, { "epoch": 0.6968726450640542, "grad_norm": 0.26232317090034485, "learning_rate": 0.00017456687751191602, "loss": 11.6589, "step": 33291 }, { "epoch": 0.6968935778280164, "grad_norm": 0.2242307811975479, "learning_rate": 0.0001745654165725051, "loss": 11.6759, "step": 33292 }, { "epoch": 0.6969145105919786, "grad_norm": 0.2507372200489044, "learning_rate": 0.0001745639555972491, "loss": 11.671, "step": 33293 }, { "epoch": 0.6969354433559407, "grad_norm": 0.2833016812801361, "learning_rate": 0.0001745624945861487, "loss": 11.6753, "step": 33294 }, { "epoch": 0.6969563761199029, "grad_norm": 0.23226939141750336, "learning_rate": 0.00017456103353920461, "loss": 11.6619, "step": 33295 }, { "epoch": 0.696977308883865, "grad_norm": 0.35788387060165405, "learning_rate": 0.00017455957245641758, "loss": 11.667, "step": 33296 }, { "epoch": 0.6969982416478272, "grad_norm": 0.23348107933998108, "learning_rate": 0.00017455811133778822, "loss": 11.6801, "step": 33297 }, { "epoch": 0.6970191744117893, "grad_norm": 0.3398536145687103, "learning_rate": 0.00017455665018331732, "loss": 11.6767, "step": 33298 }, { "epoch": 0.6970401071757515, "grad_norm": 0.32958826422691345, "learning_rate": 0.0001745551889930055, "loss": 11.6707, "step": 33299 }, { "epoch": 0.6970610399397137, "grad_norm": 0.32462579011917114, "learning_rate": 0.00017455372776685352, "loss": 11.647, "step": 33300 }, { "epoch": 0.6970819727036758, "grad_norm": 0.2588035464286804, "learning_rate": 0.0001745522665048621, "loss": 11.6732, "step": 33301 }, { "epoch": 0.697102905467638, "grad_norm": 0.3139875829219818, "learning_rate": 0.0001745508052070319, "loss": 11.677, "step": 33302 }, { "epoch": 0.6971238382316001, "grad_norm": 0.22775070369243622, "learning_rate": 0.0001745493438733636, "loss": 11.6677, "step": 33303 }, { "epoch": 0.6971447709955623, "grad_norm": 0.28575077652931213, "learning_rate": 0.00017454788250385796, "loss": 11.6853, "step": 33304 }, { "epoch": 0.6971657037595244, "grad_norm": 0.31010982394218445, "learning_rate": 0.00017454642109851565, "loss": 11.6773, "step": 33305 }, { "epoch": 0.6971866365234866, "grad_norm": 0.30027422308921814, "learning_rate": 0.0001745449596573374, "loss": 11.6678, "step": 33306 }, { "epoch": 0.6972075692874488, "grad_norm": 0.3962758183479309, "learning_rate": 0.00017454349818032388, "loss": 11.6703, "step": 33307 }, { "epoch": 0.6972285020514108, "grad_norm": 0.2823069989681244, "learning_rate": 0.00017454203666747583, "loss": 11.6668, "step": 33308 }, { "epoch": 0.697249434815373, "grad_norm": 0.3385559618473053, "learning_rate": 0.00017454057511879392, "loss": 11.6763, "step": 33309 }, { "epoch": 0.6972703675793351, "grad_norm": 0.3641456663608551, "learning_rate": 0.00017453911353427882, "loss": 11.6452, "step": 33310 }, { "epoch": 0.6972913003432973, "grad_norm": 0.36444634199142456, "learning_rate": 0.0001745376519139313, "loss": 11.693, "step": 33311 }, { "epoch": 0.6973122331072594, "grad_norm": 0.27874234318733215, "learning_rate": 0.00017453619025775208, "loss": 11.6688, "step": 33312 }, { "epoch": 0.6973331658712216, "grad_norm": 0.3624707758426666, "learning_rate": 0.0001745347285657418, "loss": 11.6649, "step": 33313 }, { "epoch": 0.6973540986351838, "grad_norm": 0.3071529269218445, "learning_rate": 0.00017453326683790113, "loss": 11.658, "step": 33314 }, { "epoch": 0.6973750313991459, "grad_norm": 0.2576294541358948, "learning_rate": 0.00017453180507423087, "loss": 11.6725, "step": 33315 }, { "epoch": 0.6973959641631081, "grad_norm": 0.2691175043582916, "learning_rate": 0.00017453034327473166, "loss": 11.657, "step": 33316 }, { "epoch": 0.6974168969270702, "grad_norm": 0.34888574481010437, "learning_rate": 0.00017452888143940423, "loss": 11.6706, "step": 33317 }, { "epoch": 0.6974378296910324, "grad_norm": 0.27822309732437134, "learning_rate": 0.00017452741956824926, "loss": 11.6756, "step": 33318 }, { "epoch": 0.6974587624549946, "grad_norm": 0.2709072232246399, "learning_rate": 0.00017452595766126748, "loss": 11.6615, "step": 33319 }, { "epoch": 0.6974796952189567, "grad_norm": 0.2587352693080902, "learning_rate": 0.00017452449571845962, "loss": 11.6692, "step": 33320 }, { "epoch": 0.6975006279829189, "grad_norm": 0.3147255778312683, "learning_rate": 0.00017452303373982628, "loss": 11.6738, "step": 33321 }, { "epoch": 0.697521560746881, "grad_norm": 0.34199750423431396, "learning_rate": 0.00017452157172536824, "loss": 11.6644, "step": 33322 }, { "epoch": 0.6975424935108432, "grad_norm": 0.19890575110912323, "learning_rate": 0.00017452010967508618, "loss": 11.6623, "step": 33323 }, { "epoch": 0.6975634262748053, "grad_norm": 0.32469984889030457, "learning_rate": 0.00017451864758898084, "loss": 11.6839, "step": 33324 }, { "epoch": 0.6975843590387675, "grad_norm": 0.2986231744289398, "learning_rate": 0.00017451718546705286, "loss": 11.6701, "step": 33325 }, { "epoch": 0.6976052918027297, "grad_norm": 0.42118656635284424, "learning_rate": 0.000174515723309303, "loss": 11.6901, "step": 33326 }, { "epoch": 0.6976262245666918, "grad_norm": 0.30602961778640747, "learning_rate": 0.0001745142611157319, "loss": 11.6574, "step": 33327 }, { "epoch": 0.697647157330654, "grad_norm": 0.2709851562976837, "learning_rate": 0.00017451279888634034, "loss": 11.659, "step": 33328 }, { "epoch": 0.6976680900946161, "grad_norm": 0.27392247319221497, "learning_rate": 0.00017451133662112897, "loss": 11.6596, "step": 33329 }, { "epoch": 0.6976890228585783, "grad_norm": 0.28392016887664795, "learning_rate": 0.00017450987432009854, "loss": 11.6612, "step": 33330 }, { "epoch": 0.6977099556225403, "grad_norm": 0.319118469953537, "learning_rate": 0.00017450841198324968, "loss": 11.6656, "step": 33331 }, { "epoch": 0.6977308883865025, "grad_norm": 0.25635236501693726, "learning_rate": 0.00017450694961058312, "loss": 11.6783, "step": 33332 }, { "epoch": 0.6977518211504647, "grad_norm": 0.31675371527671814, "learning_rate": 0.0001745054872020996, "loss": 11.6832, "step": 33333 }, { "epoch": 0.6977727539144268, "grad_norm": 0.26984673738479614, "learning_rate": 0.0001745040247577998, "loss": 11.6806, "step": 33334 }, { "epoch": 0.697793686678389, "grad_norm": 0.4112294018268585, "learning_rate": 0.00017450256227768445, "loss": 11.6622, "step": 33335 }, { "epoch": 0.6978146194423511, "grad_norm": 0.35929378867149353, "learning_rate": 0.00017450109976175417, "loss": 11.66, "step": 33336 }, { "epoch": 0.6978355522063133, "grad_norm": 0.294107586145401, "learning_rate": 0.00017449963721000974, "loss": 11.6861, "step": 33337 }, { "epoch": 0.6978564849702755, "grad_norm": 0.27031001448631287, "learning_rate": 0.00017449817462245184, "loss": 11.6669, "step": 33338 }, { "epoch": 0.6978774177342376, "grad_norm": 0.2723747193813324, "learning_rate": 0.0001744967119990812, "loss": 11.6702, "step": 33339 }, { "epoch": 0.6978983504981998, "grad_norm": 0.30546727776527405, "learning_rate": 0.00017449524933989846, "loss": 11.6631, "step": 33340 }, { "epoch": 0.6979192832621619, "grad_norm": 0.30796828866004944, "learning_rate": 0.0001744937866449044, "loss": 11.6706, "step": 33341 }, { "epoch": 0.6979402160261241, "grad_norm": 0.2917928695678711, "learning_rate": 0.00017449232391409964, "loss": 11.673, "step": 33342 }, { "epoch": 0.6979611487900862, "grad_norm": 0.27464306354522705, "learning_rate": 0.00017449086114748496, "loss": 11.665, "step": 33343 }, { "epoch": 0.6979820815540484, "grad_norm": 0.2643860876560211, "learning_rate": 0.00017448939834506101, "loss": 11.6661, "step": 33344 }, { "epoch": 0.6980030143180106, "grad_norm": 0.27008798718452454, "learning_rate": 0.00017448793550682855, "loss": 11.6577, "step": 33345 }, { "epoch": 0.6980239470819727, "grad_norm": 0.35709869861602783, "learning_rate": 0.00017448647263278822, "loss": 11.6587, "step": 33346 }, { "epoch": 0.6980448798459349, "grad_norm": 0.2333834320306778, "learning_rate": 0.00017448500972294074, "loss": 11.6572, "step": 33347 }, { "epoch": 0.698065812609897, "grad_norm": 0.3833468556404114, "learning_rate": 0.00017448354677728682, "loss": 11.6816, "step": 33348 }, { "epoch": 0.6980867453738592, "grad_norm": 0.3932717442512512, "learning_rate": 0.00017448208379582718, "loss": 11.6774, "step": 33349 }, { "epoch": 0.6981076781378213, "grad_norm": 0.40585562586784363, "learning_rate": 0.0001744806207785625, "loss": 11.6565, "step": 33350 }, { "epoch": 0.6981286109017835, "grad_norm": 0.28108665347099304, "learning_rate": 0.00017447915772549354, "loss": 11.6693, "step": 33351 }, { "epoch": 0.6981495436657457, "grad_norm": 0.27797526121139526, "learning_rate": 0.0001744776946366209, "loss": 11.6761, "step": 33352 }, { "epoch": 0.6981704764297078, "grad_norm": 0.28026679158210754, "learning_rate": 0.00017447623151194534, "loss": 11.6832, "step": 33353 }, { "epoch": 0.69819140919367, "grad_norm": 0.3690861761569977, "learning_rate": 0.00017447476835146758, "loss": 11.6725, "step": 33354 }, { "epoch": 0.698212341957632, "grad_norm": 0.26268601417541504, "learning_rate": 0.0001744733051551883, "loss": 11.6616, "step": 33355 }, { "epoch": 0.6982332747215942, "grad_norm": 0.260610431432724, "learning_rate": 0.00017447184192310823, "loss": 11.6682, "step": 33356 }, { "epoch": 0.6982542074855564, "grad_norm": 0.2423655241727829, "learning_rate": 0.00017447037865522805, "loss": 11.6653, "step": 33357 }, { "epoch": 0.6982751402495185, "grad_norm": 0.2806645929813385, "learning_rate": 0.00017446891535154846, "loss": 11.6616, "step": 33358 }, { "epoch": 0.6982960730134807, "grad_norm": 0.23783551156520844, "learning_rate": 0.00017446745201207014, "loss": 11.6483, "step": 33359 }, { "epoch": 0.6983170057774428, "grad_norm": 0.34866106510162354, "learning_rate": 0.00017446598863679385, "loss": 11.6783, "step": 33360 }, { "epoch": 0.698337938541405, "grad_norm": 0.4080159068107605, "learning_rate": 0.00017446452522572027, "loss": 11.6558, "step": 33361 }, { "epoch": 0.6983588713053671, "grad_norm": 0.3009107708930969, "learning_rate": 0.00017446306177885013, "loss": 11.6726, "step": 33362 }, { "epoch": 0.6983798040693293, "grad_norm": 0.28968924283981323, "learning_rate": 0.00017446159829618405, "loss": 11.6564, "step": 33363 }, { "epoch": 0.6984007368332915, "grad_norm": 0.274739146232605, "learning_rate": 0.00017446013477772282, "loss": 11.672, "step": 33364 }, { "epoch": 0.6984216695972536, "grad_norm": 0.2824184000492096, "learning_rate": 0.00017445867122346708, "loss": 11.6845, "step": 33365 }, { "epoch": 0.6984426023612158, "grad_norm": 0.3683280050754547, "learning_rate": 0.00017445720763341758, "loss": 11.678, "step": 33366 }, { "epoch": 0.6984635351251779, "grad_norm": 0.3975135087966919, "learning_rate": 0.00017445574400757504, "loss": 11.6559, "step": 33367 }, { "epoch": 0.6984844678891401, "grad_norm": 0.28896480798721313, "learning_rate": 0.0001744542803459401, "loss": 11.67, "step": 33368 }, { "epoch": 0.6985054006531022, "grad_norm": 0.2879185378551483, "learning_rate": 0.0001744528166485135, "loss": 11.6748, "step": 33369 }, { "epoch": 0.6985263334170644, "grad_norm": 0.3089222013950348, "learning_rate": 0.00017445135291529592, "loss": 11.6591, "step": 33370 }, { "epoch": 0.6985472661810266, "grad_norm": 0.33965378999710083, "learning_rate": 0.00017444988914628811, "loss": 11.6703, "step": 33371 }, { "epoch": 0.6985681989449887, "grad_norm": 0.3787997364997864, "learning_rate": 0.00017444842534149074, "loss": 11.6512, "step": 33372 }, { "epoch": 0.6985891317089509, "grad_norm": 0.3122464716434479, "learning_rate": 0.0001744469615009045, "loss": 11.6424, "step": 33373 }, { "epoch": 0.698610064472913, "grad_norm": 0.2634042799472809, "learning_rate": 0.00017444549762453014, "loss": 11.6685, "step": 33374 }, { "epoch": 0.6986309972368752, "grad_norm": 0.3177871108055115, "learning_rate": 0.00017444403371236832, "loss": 11.6873, "step": 33375 }, { "epoch": 0.6986519300008374, "grad_norm": 0.3163047730922699, "learning_rate": 0.00017444256976441978, "loss": 11.6762, "step": 33376 }, { "epoch": 0.6986728627647995, "grad_norm": 0.281696081161499, "learning_rate": 0.0001744411057806852, "loss": 11.6732, "step": 33377 }, { "epoch": 0.6986937955287617, "grad_norm": 0.31460195779800415, "learning_rate": 0.00017443964176116526, "loss": 11.6693, "step": 33378 }, { "epoch": 0.6987147282927237, "grad_norm": 0.29862087965011597, "learning_rate": 0.00017443817770586073, "loss": 11.6471, "step": 33379 }, { "epoch": 0.6987356610566859, "grad_norm": 0.2718327045440674, "learning_rate": 0.00017443671361477226, "loss": 11.6791, "step": 33380 }, { "epoch": 0.698756593820648, "grad_norm": 0.3195256292819977, "learning_rate": 0.00017443524948790057, "loss": 11.6712, "step": 33381 }, { "epoch": 0.6987775265846102, "grad_norm": 0.3090957701206207, "learning_rate": 0.00017443378532524636, "loss": 11.6742, "step": 33382 }, { "epoch": 0.6987984593485724, "grad_norm": 0.29636162519454956, "learning_rate": 0.00017443232112681036, "loss": 11.6655, "step": 33383 }, { "epoch": 0.6988193921125345, "grad_norm": 0.3164781928062439, "learning_rate": 0.00017443085689259325, "loss": 11.6668, "step": 33384 }, { "epoch": 0.6988403248764967, "grad_norm": 0.2714468538761139, "learning_rate": 0.0001744293926225957, "loss": 11.672, "step": 33385 }, { "epoch": 0.6988612576404588, "grad_norm": 0.3392913341522217, "learning_rate": 0.00017442792831681846, "loss": 11.6868, "step": 33386 }, { "epoch": 0.698882190404421, "grad_norm": 0.23943385481834412, "learning_rate": 0.00017442646397526222, "loss": 11.6581, "step": 33387 }, { "epoch": 0.6989031231683831, "grad_norm": 0.2740641236305237, "learning_rate": 0.00017442499959792774, "loss": 11.6807, "step": 33388 }, { "epoch": 0.6989240559323453, "grad_norm": 0.268586128950119, "learning_rate": 0.00017442353518481562, "loss": 11.6686, "step": 33389 }, { "epoch": 0.6989449886963075, "grad_norm": 0.23873865604400635, "learning_rate": 0.00017442207073592662, "loss": 11.6677, "step": 33390 }, { "epoch": 0.6989659214602696, "grad_norm": 0.347578227519989, "learning_rate": 0.00017442060625126143, "loss": 11.6772, "step": 33391 }, { "epoch": 0.6989868542242318, "grad_norm": 0.2800809144973755, "learning_rate": 0.00017441914173082078, "loss": 11.6675, "step": 33392 }, { "epoch": 0.6990077869881939, "grad_norm": 0.301797091960907, "learning_rate": 0.00017441767717460538, "loss": 11.6727, "step": 33393 }, { "epoch": 0.6990287197521561, "grad_norm": 0.32647237181663513, "learning_rate": 0.0001744162125826159, "loss": 11.6802, "step": 33394 }, { "epoch": 0.6990496525161183, "grad_norm": 0.26997047662734985, "learning_rate": 0.00017441474795485302, "loss": 11.6812, "step": 33395 }, { "epoch": 0.6990705852800804, "grad_norm": 0.39808914065361023, "learning_rate": 0.00017441328329131749, "loss": 11.6599, "step": 33396 }, { "epoch": 0.6990915180440426, "grad_norm": 0.3336198627948761, "learning_rate": 0.00017441181859201003, "loss": 11.6606, "step": 33397 }, { "epoch": 0.6991124508080047, "grad_norm": 0.2801715135574341, "learning_rate": 0.0001744103538569313, "loss": 11.6745, "step": 33398 }, { "epoch": 0.6991333835719669, "grad_norm": 0.31171485781669617, "learning_rate": 0.00017440888908608205, "loss": 11.6773, "step": 33399 }, { "epoch": 0.699154316335929, "grad_norm": 0.24358420073986053, "learning_rate": 0.00017440742427946292, "loss": 11.6634, "step": 33400 }, { "epoch": 0.6991752490998911, "grad_norm": 0.31388431787490845, "learning_rate": 0.00017440595943707466, "loss": 11.6808, "step": 33401 }, { "epoch": 0.6991961818638533, "grad_norm": 0.30353373289108276, "learning_rate": 0.00017440449455891795, "loss": 11.6673, "step": 33402 }, { "epoch": 0.6992171146278154, "grad_norm": 0.26658517122268677, "learning_rate": 0.00017440302964499352, "loss": 11.6454, "step": 33403 }, { "epoch": 0.6992380473917776, "grad_norm": 0.29424557089805603, "learning_rate": 0.00017440156469530207, "loss": 11.6845, "step": 33404 }, { "epoch": 0.6992589801557397, "grad_norm": 0.3164208233356476, "learning_rate": 0.0001744000997098443, "loss": 11.6643, "step": 33405 }, { "epoch": 0.6992799129197019, "grad_norm": 0.32712164521217346, "learning_rate": 0.00017439863468862092, "loss": 11.6645, "step": 33406 }, { "epoch": 0.699300845683664, "grad_norm": 0.24557608366012573, "learning_rate": 0.0001743971696316326, "loss": 11.6926, "step": 33407 }, { "epoch": 0.6993217784476262, "grad_norm": 0.3516521453857422, "learning_rate": 0.00017439570453888008, "loss": 11.6715, "step": 33408 }, { "epoch": 0.6993427112115884, "grad_norm": 0.35006576776504517, "learning_rate": 0.00017439423941036406, "loss": 11.6977, "step": 33409 }, { "epoch": 0.6993636439755505, "grad_norm": 0.31734344363212585, "learning_rate": 0.00017439277424608523, "loss": 11.6517, "step": 33410 }, { "epoch": 0.6993845767395127, "grad_norm": 0.29583558440208435, "learning_rate": 0.0001743913090460443, "loss": 11.6568, "step": 33411 }, { "epoch": 0.6994055095034748, "grad_norm": 0.4220404028892517, "learning_rate": 0.00017438984381024198, "loss": 11.6688, "step": 33412 }, { "epoch": 0.699426442267437, "grad_norm": 0.3399064540863037, "learning_rate": 0.00017438837853867898, "loss": 11.6739, "step": 33413 }, { "epoch": 0.6994473750313992, "grad_norm": 0.3178297281265259, "learning_rate": 0.000174386913231356, "loss": 11.6807, "step": 33414 }, { "epoch": 0.6994683077953613, "grad_norm": 0.32605981826782227, "learning_rate": 0.0001743854478882737, "loss": 11.6845, "step": 33415 }, { "epoch": 0.6994892405593235, "grad_norm": 0.36918798089027405, "learning_rate": 0.00017438398250943285, "loss": 11.6795, "step": 33416 }, { "epoch": 0.6995101733232856, "grad_norm": 0.34236228466033936, "learning_rate": 0.00017438251709483413, "loss": 11.6539, "step": 33417 }, { "epoch": 0.6995311060872478, "grad_norm": 0.3437288701534271, "learning_rate": 0.00017438105164447825, "loss": 11.6749, "step": 33418 }, { "epoch": 0.6995520388512099, "grad_norm": 0.2763266861438751, "learning_rate": 0.00017437958615836588, "loss": 11.6839, "step": 33419 }, { "epoch": 0.6995729716151721, "grad_norm": 0.292335569858551, "learning_rate": 0.0001743781206364978, "loss": 11.6648, "step": 33420 }, { "epoch": 0.6995939043791343, "grad_norm": 0.40267258882522583, "learning_rate": 0.00017437665507887462, "loss": 11.6718, "step": 33421 }, { "epoch": 0.6996148371430964, "grad_norm": 0.25006088614463806, "learning_rate": 0.00017437518948549711, "loss": 11.6697, "step": 33422 }, { "epoch": 0.6996357699070586, "grad_norm": 0.3844459354877472, "learning_rate": 0.00017437372385636598, "loss": 11.6574, "step": 33423 }, { "epoch": 0.6996567026710206, "grad_norm": 0.3004126250743866, "learning_rate": 0.0001743722581914819, "loss": 11.691, "step": 33424 }, { "epoch": 0.6996776354349828, "grad_norm": 0.29264241456985474, "learning_rate": 0.00017437079249084557, "loss": 11.6777, "step": 33425 }, { "epoch": 0.6996985681989449, "grad_norm": 0.3109828531742096, "learning_rate": 0.0001743693267544577, "loss": 11.6541, "step": 33426 }, { "epoch": 0.6997195009629071, "grad_norm": 0.3345434069633484, "learning_rate": 0.000174367860982319, "loss": 11.6651, "step": 33427 }, { "epoch": 0.6997404337268693, "grad_norm": 0.34079256653785706, "learning_rate": 0.0001743663951744302, "loss": 11.6817, "step": 33428 }, { "epoch": 0.6997613664908314, "grad_norm": 0.2596689462661743, "learning_rate": 0.000174364929330792, "loss": 11.6678, "step": 33429 }, { "epoch": 0.6997822992547936, "grad_norm": 0.30229198932647705, "learning_rate": 0.00017436346345140507, "loss": 11.6756, "step": 33430 }, { "epoch": 0.6998032320187557, "grad_norm": 0.40651169419288635, "learning_rate": 0.0001743619975362701, "loss": 11.6901, "step": 33431 }, { "epoch": 0.6998241647827179, "grad_norm": 0.23254477977752686, "learning_rate": 0.00017436053158538788, "loss": 11.6727, "step": 33432 }, { "epoch": 0.6998450975466801, "grad_norm": 0.41745463013648987, "learning_rate": 0.00017435906559875903, "loss": 11.6734, "step": 33433 }, { "epoch": 0.6998660303106422, "grad_norm": 0.33477798104286194, "learning_rate": 0.00017435759957638432, "loss": 11.6681, "step": 33434 }, { "epoch": 0.6998869630746044, "grad_norm": 0.339067280292511, "learning_rate": 0.0001743561335182644, "loss": 11.6739, "step": 33435 }, { "epoch": 0.6999078958385665, "grad_norm": 0.25857025384902954, "learning_rate": 0.0001743546674244, "loss": 11.6638, "step": 33436 }, { "epoch": 0.6999288286025287, "grad_norm": 0.2867691218852997, "learning_rate": 0.0001743532012947918, "loss": 11.6722, "step": 33437 }, { "epoch": 0.6999497613664908, "grad_norm": 0.30861473083496094, "learning_rate": 0.00017435173512944053, "loss": 11.6699, "step": 33438 }, { "epoch": 0.699970694130453, "grad_norm": 0.28818345069885254, "learning_rate": 0.0001743502689283469, "loss": 11.6699, "step": 33439 }, { "epoch": 0.6999916268944152, "grad_norm": 0.31759417057037354, "learning_rate": 0.00017434880269151163, "loss": 11.646, "step": 33440 }, { "epoch": 0.7000125596583773, "grad_norm": 0.33054137229919434, "learning_rate": 0.00017434733641893537, "loss": 11.6758, "step": 33441 }, { "epoch": 0.7000334924223395, "grad_norm": 0.3416439890861511, "learning_rate": 0.00017434587011061888, "loss": 11.6799, "step": 33442 }, { "epoch": 0.7000544251863016, "grad_norm": 0.275713175535202, "learning_rate": 0.00017434440376656282, "loss": 11.6624, "step": 33443 }, { "epoch": 0.7000753579502638, "grad_norm": 0.31416499614715576, "learning_rate": 0.00017434293738676792, "loss": 11.6727, "step": 33444 }, { "epoch": 0.7000962907142259, "grad_norm": 0.30635109543800354, "learning_rate": 0.0001743414709712349, "loss": 11.6741, "step": 33445 }, { "epoch": 0.700117223478188, "grad_norm": 0.2666395902633667, "learning_rate": 0.00017434000451996441, "loss": 11.6718, "step": 33446 }, { "epoch": 0.7001381562421503, "grad_norm": 0.29693886637687683, "learning_rate": 0.0001743385380329572, "loss": 11.6602, "step": 33447 }, { "epoch": 0.7001590890061123, "grad_norm": 0.2900051772594452, "learning_rate": 0.00017433707151021398, "loss": 11.6615, "step": 33448 }, { "epoch": 0.7001800217700745, "grad_norm": 0.2740894854068756, "learning_rate": 0.00017433560495173542, "loss": 11.6804, "step": 33449 }, { "epoch": 0.7002009545340366, "grad_norm": 0.25520193576812744, "learning_rate": 0.00017433413835752227, "loss": 11.6762, "step": 33450 }, { "epoch": 0.7002218872979988, "grad_norm": 0.28157129883766174, "learning_rate": 0.0001743326717275752, "loss": 11.6719, "step": 33451 }, { "epoch": 0.700242820061961, "grad_norm": 0.3254488706588745, "learning_rate": 0.0001743312050618949, "loss": 11.6648, "step": 33452 }, { "epoch": 0.7002637528259231, "grad_norm": 0.25033149123191833, "learning_rate": 0.00017432973836048213, "loss": 11.6731, "step": 33453 }, { "epoch": 0.7002846855898853, "grad_norm": 0.29203274846076965, "learning_rate": 0.00017432827162333755, "loss": 11.6703, "step": 33454 }, { "epoch": 0.7003056183538474, "grad_norm": 0.23383691906929016, "learning_rate": 0.00017432680485046192, "loss": 11.6547, "step": 33455 }, { "epoch": 0.7003265511178096, "grad_norm": 0.32123422622680664, "learning_rate": 0.00017432533804185587, "loss": 11.6708, "step": 33456 }, { "epoch": 0.7003474838817717, "grad_norm": 0.2985130548477173, "learning_rate": 0.00017432387119752015, "loss": 11.6758, "step": 33457 }, { "epoch": 0.7003684166457339, "grad_norm": 0.3120955526828766, "learning_rate": 0.00017432240431745544, "loss": 11.6618, "step": 33458 }, { "epoch": 0.7003893494096961, "grad_norm": 0.3438991904258728, "learning_rate": 0.00017432093740166248, "loss": 11.6657, "step": 33459 }, { "epoch": 0.7004102821736582, "grad_norm": 0.24803845584392548, "learning_rate": 0.00017431947045014193, "loss": 11.6739, "step": 33460 }, { "epoch": 0.7004312149376204, "grad_norm": 0.2744601368904114, "learning_rate": 0.00017431800346289454, "loss": 11.6653, "step": 33461 }, { "epoch": 0.7004521477015825, "grad_norm": 0.388262003660202, "learning_rate": 0.00017431653643992098, "loss": 11.6647, "step": 33462 }, { "epoch": 0.7004730804655447, "grad_norm": 0.2939513921737671, "learning_rate": 0.000174315069381222, "loss": 11.6633, "step": 33463 }, { "epoch": 0.7004940132295068, "grad_norm": 0.33574536442756653, "learning_rate": 0.00017431360228679826, "loss": 11.6835, "step": 33464 }, { "epoch": 0.700514945993469, "grad_norm": 0.42113471031188965, "learning_rate": 0.00017431213515665047, "loss": 11.6773, "step": 33465 }, { "epoch": 0.7005358787574312, "grad_norm": 0.27751460671424866, "learning_rate": 0.00017431066799077937, "loss": 11.6619, "step": 33466 }, { "epoch": 0.7005568115213933, "grad_norm": 0.3011230528354645, "learning_rate": 0.00017430920078918564, "loss": 11.6719, "step": 33467 }, { "epoch": 0.7005777442853555, "grad_norm": 0.28847959637641907, "learning_rate": 0.00017430773355187, "loss": 11.6595, "step": 33468 }, { "epoch": 0.7005986770493176, "grad_norm": 0.37666311860084534, "learning_rate": 0.0001743062662788331, "loss": 11.6579, "step": 33469 }, { "epoch": 0.7006196098132798, "grad_norm": 0.36860737204551697, "learning_rate": 0.00017430479897007572, "loss": 11.684, "step": 33470 }, { "epoch": 0.700640542577242, "grad_norm": 0.3285745680332184, "learning_rate": 0.00017430333162559854, "loss": 11.6684, "step": 33471 }, { "epoch": 0.700661475341204, "grad_norm": 0.2795964181423187, "learning_rate": 0.00017430186424540224, "loss": 11.6718, "step": 33472 }, { "epoch": 0.7006824081051662, "grad_norm": 0.3758215308189392, "learning_rate": 0.00017430039682948755, "loss": 11.6647, "step": 33473 }, { "epoch": 0.7007033408691283, "grad_norm": 0.2611984610557556, "learning_rate": 0.0001742989293778552, "loss": 11.6645, "step": 33474 }, { "epoch": 0.7007242736330905, "grad_norm": 0.28661850094795227, "learning_rate": 0.0001742974618905058, "loss": 11.6629, "step": 33475 }, { "epoch": 0.7007452063970526, "grad_norm": 0.2687542736530304, "learning_rate": 0.00017429599436744018, "loss": 11.6613, "step": 33476 }, { "epoch": 0.7007661391610148, "grad_norm": 0.37848418951034546, "learning_rate": 0.00017429452680865897, "loss": 11.659, "step": 33477 }, { "epoch": 0.700787071924977, "grad_norm": 0.2981019914150238, "learning_rate": 0.00017429305921416288, "loss": 11.6793, "step": 33478 }, { "epoch": 0.7008080046889391, "grad_norm": 0.29403114318847656, "learning_rate": 0.00017429159158395262, "loss": 11.6667, "step": 33479 }, { "epoch": 0.7008289374529013, "grad_norm": 0.29811760783195496, "learning_rate": 0.00017429012391802893, "loss": 11.6777, "step": 33480 }, { "epoch": 0.7008498702168634, "grad_norm": 0.25613102316856384, "learning_rate": 0.00017428865621639249, "loss": 11.6718, "step": 33481 }, { "epoch": 0.7008708029808256, "grad_norm": 0.24386586248874664, "learning_rate": 0.00017428718847904397, "loss": 11.6593, "step": 33482 }, { "epoch": 0.7008917357447877, "grad_norm": 0.38762423396110535, "learning_rate": 0.00017428572070598414, "loss": 11.6654, "step": 33483 }, { "epoch": 0.7009126685087499, "grad_norm": 0.28939321637153625, "learning_rate": 0.0001742842528972137, "loss": 11.6581, "step": 33484 }, { "epoch": 0.7009336012727121, "grad_norm": 0.3169184923171997, "learning_rate": 0.00017428278505273327, "loss": 11.6738, "step": 33485 }, { "epoch": 0.7009545340366742, "grad_norm": 0.29169562458992004, "learning_rate": 0.00017428131717254363, "loss": 11.6619, "step": 33486 }, { "epoch": 0.7009754668006364, "grad_norm": 0.2732726037502289, "learning_rate": 0.00017427984925664548, "loss": 11.6561, "step": 33487 }, { "epoch": 0.7009963995645985, "grad_norm": 0.2832094430923462, "learning_rate": 0.00017427838130503953, "loss": 11.6674, "step": 33488 }, { "epoch": 0.7010173323285607, "grad_norm": 0.33689451217651367, "learning_rate": 0.0001742769133177265, "loss": 11.6638, "step": 33489 }, { "epoch": 0.7010382650925229, "grad_norm": 0.33040255308151245, "learning_rate": 0.00017427544529470703, "loss": 11.6673, "step": 33490 }, { "epoch": 0.701059197856485, "grad_norm": 0.35385575890541077, "learning_rate": 0.00017427397723598188, "loss": 11.659, "step": 33491 }, { "epoch": 0.7010801306204472, "grad_norm": 0.37577024102211, "learning_rate": 0.0001742725091415517, "loss": 11.6727, "step": 33492 }, { "epoch": 0.7011010633844093, "grad_norm": 0.5396711826324463, "learning_rate": 0.00017427104101141726, "loss": 11.6723, "step": 33493 }, { "epoch": 0.7011219961483715, "grad_norm": 0.3440474569797516, "learning_rate": 0.00017426957284557928, "loss": 11.6894, "step": 33494 }, { "epoch": 0.7011429289123335, "grad_norm": 0.3040601313114166, "learning_rate": 0.0001742681046440384, "loss": 11.6706, "step": 33495 }, { "epoch": 0.7011638616762957, "grad_norm": 0.32365870475769043, "learning_rate": 0.00017426663640679536, "loss": 11.6762, "step": 33496 }, { "epoch": 0.7011847944402579, "grad_norm": 0.2640615403652191, "learning_rate": 0.00017426516813385084, "loss": 11.6715, "step": 33497 }, { "epoch": 0.70120572720422, "grad_norm": 0.25664880871772766, "learning_rate": 0.0001742636998252056, "loss": 11.6589, "step": 33498 }, { "epoch": 0.7012266599681822, "grad_norm": 0.2723977565765381, "learning_rate": 0.00017426223148086028, "loss": 11.6687, "step": 33499 }, { "epoch": 0.7012475927321443, "grad_norm": 0.3131890892982483, "learning_rate": 0.0001742607631008156, "loss": 11.6731, "step": 33500 }, { "epoch": 0.7012685254961065, "grad_norm": 0.293343722820282, "learning_rate": 0.00017425929468507234, "loss": 11.6886, "step": 33501 }, { "epoch": 0.7012894582600686, "grad_norm": 0.24078162014484406, "learning_rate": 0.00017425782623363108, "loss": 11.6676, "step": 33502 }, { "epoch": 0.7013103910240308, "grad_norm": 0.3549453914165497, "learning_rate": 0.00017425635774649266, "loss": 11.6793, "step": 33503 }, { "epoch": 0.701331323787993, "grad_norm": 0.27829378843307495, "learning_rate": 0.0001742548892236577, "loss": 11.6677, "step": 33504 }, { "epoch": 0.7013522565519551, "grad_norm": 0.2794674038887024, "learning_rate": 0.00017425342066512692, "loss": 11.6668, "step": 33505 }, { "epoch": 0.7013731893159173, "grad_norm": 0.263579785823822, "learning_rate": 0.00017425195207090104, "loss": 11.6734, "step": 33506 }, { "epoch": 0.7013941220798794, "grad_norm": 0.32353293895721436, "learning_rate": 0.00017425048344098075, "loss": 11.6687, "step": 33507 }, { "epoch": 0.7014150548438416, "grad_norm": 0.3060999810695648, "learning_rate": 0.00017424901477536677, "loss": 11.6824, "step": 33508 }, { "epoch": 0.7014359876078037, "grad_norm": 0.4716029465198517, "learning_rate": 0.0001742475460740598, "loss": 11.692, "step": 33509 }, { "epoch": 0.7014569203717659, "grad_norm": 0.2459695190191269, "learning_rate": 0.00017424607733706053, "loss": 11.665, "step": 33510 }, { "epoch": 0.7014778531357281, "grad_norm": 0.24178530275821686, "learning_rate": 0.00017424460856436968, "loss": 11.6669, "step": 33511 }, { "epoch": 0.7014987858996902, "grad_norm": 0.29435962438583374, "learning_rate": 0.000174243139755988, "loss": 11.6661, "step": 33512 }, { "epoch": 0.7015197186636524, "grad_norm": 0.40738168358802795, "learning_rate": 0.0001742416709119161, "loss": 11.6775, "step": 33513 }, { "epoch": 0.7015406514276145, "grad_norm": 0.2716277539730072, "learning_rate": 0.0001742402020321548, "loss": 11.6655, "step": 33514 }, { "epoch": 0.7015615841915767, "grad_norm": 0.33321860432624817, "learning_rate": 0.00017423873311670472, "loss": 11.6598, "step": 33515 }, { "epoch": 0.7015825169555389, "grad_norm": 0.30276820063591003, "learning_rate": 0.00017423726416556658, "loss": 11.6642, "step": 33516 }, { "epoch": 0.701603449719501, "grad_norm": 0.35436657071113586, "learning_rate": 0.00017423579517874112, "loss": 11.6604, "step": 33517 }, { "epoch": 0.7016243824834631, "grad_norm": 0.32567957043647766, "learning_rate": 0.000174234326156229, "loss": 11.6713, "step": 33518 }, { "epoch": 0.7016453152474252, "grad_norm": 0.24117548763751984, "learning_rate": 0.000174232857098031, "loss": 11.6658, "step": 33519 }, { "epoch": 0.7016662480113874, "grad_norm": 0.3347495496273041, "learning_rate": 0.00017423138800414772, "loss": 11.6688, "step": 33520 }, { "epoch": 0.7016871807753495, "grad_norm": 0.24583233892917633, "learning_rate": 0.00017422991887457997, "loss": 11.6594, "step": 33521 }, { "epoch": 0.7017081135393117, "grad_norm": 0.2780587673187256, "learning_rate": 0.00017422844970932836, "loss": 11.6702, "step": 33522 }, { "epoch": 0.7017290463032739, "grad_norm": 0.23182329535484314, "learning_rate": 0.00017422698050839367, "loss": 11.6695, "step": 33523 }, { "epoch": 0.701749979067236, "grad_norm": 0.27132752537727356, "learning_rate": 0.0001742255112717766, "loss": 11.6734, "step": 33524 }, { "epoch": 0.7017709118311982, "grad_norm": 0.27196118235588074, "learning_rate": 0.00017422404199947785, "loss": 11.665, "step": 33525 }, { "epoch": 0.7017918445951603, "grad_norm": 0.28478291630744934, "learning_rate": 0.0001742225726914981, "loss": 11.6629, "step": 33526 }, { "epoch": 0.7018127773591225, "grad_norm": 0.2621628940105438, "learning_rate": 0.00017422110334783806, "loss": 11.6777, "step": 33527 }, { "epoch": 0.7018337101230846, "grad_norm": 0.37208059430122375, "learning_rate": 0.00017421963396849845, "loss": 11.6809, "step": 33528 }, { "epoch": 0.7018546428870468, "grad_norm": 0.26007312536239624, "learning_rate": 0.00017421816455348, "loss": 11.6705, "step": 33529 }, { "epoch": 0.701875575651009, "grad_norm": 0.2720460891723633, "learning_rate": 0.00017421669510278334, "loss": 11.6702, "step": 33530 }, { "epoch": 0.7018965084149711, "grad_norm": 0.3101125657558441, "learning_rate": 0.00017421522561640926, "loss": 11.6686, "step": 33531 }, { "epoch": 0.7019174411789333, "grad_norm": 0.251801997423172, "learning_rate": 0.00017421375609435845, "loss": 11.6697, "step": 33532 }, { "epoch": 0.7019383739428954, "grad_norm": 0.32232967019081116, "learning_rate": 0.00017421228653663159, "loss": 11.6752, "step": 33533 }, { "epoch": 0.7019593067068576, "grad_norm": 0.2848920226097107, "learning_rate": 0.00017421081694322935, "loss": 11.6823, "step": 33534 }, { "epoch": 0.7019802394708198, "grad_norm": 0.3052166998386383, "learning_rate": 0.00017420934731415252, "loss": 11.6887, "step": 33535 }, { "epoch": 0.7020011722347819, "grad_norm": 0.345149964094162, "learning_rate": 0.00017420787764940178, "loss": 11.6832, "step": 33536 }, { "epoch": 0.7020221049987441, "grad_norm": 0.2951919436454773, "learning_rate": 0.0001742064079489778, "loss": 11.6647, "step": 33537 }, { "epoch": 0.7020430377627062, "grad_norm": 0.2893379032611847, "learning_rate": 0.00017420493821288134, "loss": 11.6764, "step": 33538 }, { "epoch": 0.7020639705266684, "grad_norm": 0.27127015590667725, "learning_rate": 0.00017420346844111307, "loss": 11.6682, "step": 33539 }, { "epoch": 0.7020849032906304, "grad_norm": 0.3328896164894104, "learning_rate": 0.00017420199863367371, "loss": 11.676, "step": 33540 }, { "epoch": 0.7021058360545926, "grad_norm": 0.24985402822494507, "learning_rate": 0.00017420052879056397, "loss": 11.6675, "step": 33541 }, { "epoch": 0.7021267688185548, "grad_norm": 0.33498847484588623, "learning_rate": 0.0001741990589117845, "loss": 11.6658, "step": 33542 }, { "epoch": 0.7021477015825169, "grad_norm": 0.2609390318393707, "learning_rate": 0.00017419758899733613, "loss": 11.6642, "step": 33543 }, { "epoch": 0.7021686343464791, "grad_norm": 0.3335888683795929, "learning_rate": 0.00017419611904721945, "loss": 11.6709, "step": 33544 }, { "epoch": 0.7021895671104412, "grad_norm": 0.3287988603115082, "learning_rate": 0.0001741946490614352, "loss": 11.6749, "step": 33545 }, { "epoch": 0.7022104998744034, "grad_norm": 0.2889833450317383, "learning_rate": 0.00017419317903998412, "loss": 11.6797, "step": 33546 }, { "epoch": 0.7022314326383655, "grad_norm": 0.24436257779598236, "learning_rate": 0.00017419170898286688, "loss": 11.655, "step": 33547 }, { "epoch": 0.7022523654023277, "grad_norm": 0.30510103702545166, "learning_rate": 0.00017419023889008418, "loss": 11.6967, "step": 33548 }, { "epoch": 0.7022732981662899, "grad_norm": 0.2810538113117218, "learning_rate": 0.00017418876876163677, "loss": 11.6805, "step": 33549 }, { "epoch": 0.702294230930252, "grad_norm": 0.27975690364837646, "learning_rate": 0.00017418729859752533, "loss": 11.6537, "step": 33550 }, { "epoch": 0.7023151636942142, "grad_norm": 0.30801600217819214, "learning_rate": 0.00017418582839775058, "loss": 11.6921, "step": 33551 }, { "epoch": 0.7023360964581763, "grad_norm": 0.4671268165111542, "learning_rate": 0.00017418435816231317, "loss": 11.6963, "step": 33552 }, { "epoch": 0.7023570292221385, "grad_norm": 0.3109269142150879, "learning_rate": 0.00017418288789121391, "loss": 11.6716, "step": 33553 }, { "epoch": 0.7023779619861007, "grad_norm": 0.3009911775588989, "learning_rate": 0.0001741814175844534, "loss": 11.6815, "step": 33554 }, { "epoch": 0.7023988947500628, "grad_norm": 0.2835160493850708, "learning_rate": 0.00017417994724203243, "loss": 11.6678, "step": 33555 }, { "epoch": 0.702419827514025, "grad_norm": 0.30476492643356323, "learning_rate": 0.00017417847686395166, "loss": 11.6633, "step": 33556 }, { "epoch": 0.7024407602779871, "grad_norm": 0.2715913653373718, "learning_rate": 0.0001741770064502118, "loss": 11.6658, "step": 33557 }, { "epoch": 0.7024616930419493, "grad_norm": 0.4000771641731262, "learning_rate": 0.00017417553600081358, "loss": 11.6855, "step": 33558 }, { "epoch": 0.7024826258059114, "grad_norm": 0.3653917908668518, "learning_rate": 0.00017417406551575767, "loss": 11.6709, "step": 33559 }, { "epoch": 0.7025035585698736, "grad_norm": 0.32675692439079285, "learning_rate": 0.00017417259499504482, "loss": 11.6708, "step": 33560 }, { "epoch": 0.7025244913338358, "grad_norm": 0.30509161949157715, "learning_rate": 0.00017417112443867574, "loss": 11.6548, "step": 33561 }, { "epoch": 0.7025454240977979, "grad_norm": 0.280560165643692, "learning_rate": 0.00017416965384665106, "loss": 11.6628, "step": 33562 }, { "epoch": 0.70256635686176, "grad_norm": 0.3079472780227661, "learning_rate": 0.0001741681832189716, "loss": 11.6646, "step": 33563 }, { "epoch": 0.7025872896257221, "grad_norm": 0.2656491696834564, "learning_rate": 0.00017416671255563796, "loss": 11.6716, "step": 33564 }, { "epoch": 0.7026082223896843, "grad_norm": 0.32815906405448914, "learning_rate": 0.00017416524185665094, "loss": 11.6714, "step": 33565 }, { "epoch": 0.7026291551536464, "grad_norm": 0.2791980803012848, "learning_rate": 0.00017416377112201115, "loss": 11.6755, "step": 33566 }, { "epoch": 0.7026500879176086, "grad_norm": 1.3500378131866455, "learning_rate": 0.00017416230035171938, "loss": 11.659, "step": 33567 }, { "epoch": 0.7026710206815708, "grad_norm": 0.20441676676273346, "learning_rate": 0.0001741608295457763, "loss": 11.6624, "step": 33568 }, { "epoch": 0.7026919534455329, "grad_norm": 0.2904605567455292, "learning_rate": 0.00017415935870418263, "loss": 11.6669, "step": 33569 }, { "epoch": 0.7027128862094951, "grad_norm": 0.2777037024497986, "learning_rate": 0.00017415788782693906, "loss": 11.6782, "step": 33570 }, { "epoch": 0.7027338189734572, "grad_norm": 0.2596374750137329, "learning_rate": 0.00017415641691404633, "loss": 11.6868, "step": 33571 }, { "epoch": 0.7027547517374194, "grad_norm": 0.30791592597961426, "learning_rate": 0.0001741549459655051, "loss": 11.6785, "step": 33572 }, { "epoch": 0.7027756845013816, "grad_norm": 0.2644539177417755, "learning_rate": 0.0001741534749813161, "loss": 11.6778, "step": 33573 }, { "epoch": 0.7027966172653437, "grad_norm": 0.3106386363506317, "learning_rate": 0.00017415200396148002, "loss": 11.6568, "step": 33574 }, { "epoch": 0.7028175500293059, "grad_norm": 0.2560214400291443, "learning_rate": 0.00017415053290599763, "loss": 11.6744, "step": 33575 }, { "epoch": 0.702838482793268, "grad_norm": 0.26250210404396057, "learning_rate": 0.0001741490618148696, "loss": 11.6593, "step": 33576 }, { "epoch": 0.7028594155572302, "grad_norm": 0.3148074150085449, "learning_rate": 0.0001741475906880966, "loss": 11.6746, "step": 33577 }, { "epoch": 0.7028803483211923, "grad_norm": 0.27117836475372314, "learning_rate": 0.00017414611952567936, "loss": 11.6644, "step": 33578 }, { "epoch": 0.7029012810851545, "grad_norm": 0.30486413836479187, "learning_rate": 0.0001741446483276186, "loss": 11.6733, "step": 33579 }, { "epoch": 0.7029222138491167, "grad_norm": 0.23592376708984375, "learning_rate": 0.00017414317709391503, "loss": 11.6725, "step": 33580 }, { "epoch": 0.7029431466130788, "grad_norm": 0.32518884539604187, "learning_rate": 0.00017414170582456934, "loss": 11.6773, "step": 33581 }, { "epoch": 0.702964079377041, "grad_norm": 0.3111569285392761, "learning_rate": 0.00017414023451958226, "loss": 11.6627, "step": 33582 }, { "epoch": 0.7029850121410031, "grad_norm": 0.2947755753993988, "learning_rate": 0.00017413876317895447, "loss": 11.6663, "step": 33583 }, { "epoch": 0.7030059449049653, "grad_norm": 0.3577699661254883, "learning_rate": 0.0001741372918026867, "loss": 11.6704, "step": 33584 }, { "epoch": 0.7030268776689274, "grad_norm": 0.3008898198604584, "learning_rate": 0.00017413582039077966, "loss": 11.6796, "step": 33585 }, { "epoch": 0.7030478104328896, "grad_norm": 0.3315022885799408, "learning_rate": 0.000174134348943234, "loss": 11.6911, "step": 33586 }, { "epoch": 0.7030687431968518, "grad_norm": 0.37796199321746826, "learning_rate": 0.00017413287746005055, "loss": 11.6873, "step": 33587 }, { "epoch": 0.7030896759608138, "grad_norm": 0.33258920907974243, "learning_rate": 0.00017413140594122988, "loss": 11.6571, "step": 33588 }, { "epoch": 0.703110608724776, "grad_norm": 0.2457512617111206, "learning_rate": 0.0001741299343867728, "loss": 11.6696, "step": 33589 }, { "epoch": 0.7031315414887381, "grad_norm": 0.5004287958145142, "learning_rate": 0.00017412846279667992, "loss": 11.6856, "step": 33590 }, { "epoch": 0.7031524742527003, "grad_norm": 0.2919125556945801, "learning_rate": 0.00017412699117095204, "loss": 11.6701, "step": 33591 }, { "epoch": 0.7031734070166625, "grad_norm": 0.274932324886322, "learning_rate": 0.0001741255195095898, "loss": 11.6668, "step": 33592 }, { "epoch": 0.7031943397806246, "grad_norm": 0.3730439245700836, "learning_rate": 0.00017412404781259397, "loss": 11.688, "step": 33593 }, { "epoch": 0.7032152725445868, "grad_norm": 0.2939394414424896, "learning_rate": 0.0001741225760799652, "loss": 11.6672, "step": 33594 }, { "epoch": 0.7032362053085489, "grad_norm": 0.2980988621711731, "learning_rate": 0.00017412110431170422, "loss": 11.6605, "step": 33595 }, { "epoch": 0.7032571380725111, "grad_norm": 0.3610285222530365, "learning_rate": 0.00017411963250781176, "loss": 11.6923, "step": 33596 }, { "epoch": 0.7032780708364732, "grad_norm": 0.2778293788433075, "learning_rate": 0.0001741181606682885, "loss": 11.6557, "step": 33597 }, { "epoch": 0.7032990036004354, "grad_norm": 0.24657966196537018, "learning_rate": 0.00017411668879313514, "loss": 11.6814, "step": 33598 }, { "epoch": 0.7033199363643976, "grad_norm": 0.2948700189590454, "learning_rate": 0.00017411521688235243, "loss": 11.6856, "step": 33599 }, { "epoch": 0.7033408691283597, "grad_norm": 0.327601820230484, "learning_rate": 0.00017411374493594105, "loss": 11.6715, "step": 33600 }, { "epoch": 0.7033618018923219, "grad_norm": 0.31774500012397766, "learning_rate": 0.00017411227295390167, "loss": 11.6822, "step": 33601 }, { "epoch": 0.703382734656284, "grad_norm": 0.3083418011665344, "learning_rate": 0.00017411080093623506, "loss": 11.6662, "step": 33602 }, { "epoch": 0.7034036674202462, "grad_norm": 0.21714824438095093, "learning_rate": 0.0001741093288829419, "loss": 11.6583, "step": 33603 }, { "epoch": 0.7034246001842083, "grad_norm": 0.2329854816198349, "learning_rate": 0.00017410785679402291, "loss": 11.6558, "step": 33604 }, { "epoch": 0.7034455329481705, "grad_norm": 0.35039666295051575, "learning_rate": 0.0001741063846694788, "loss": 11.6785, "step": 33605 }, { "epoch": 0.7034664657121327, "grad_norm": 0.2943791449069977, "learning_rate": 0.00017410491250931026, "loss": 11.659, "step": 33606 }, { "epoch": 0.7034873984760948, "grad_norm": 0.28881511092185974, "learning_rate": 0.00017410344031351798, "loss": 11.6772, "step": 33607 }, { "epoch": 0.703508331240057, "grad_norm": 0.2882467210292816, "learning_rate": 0.00017410196808210267, "loss": 11.6779, "step": 33608 }, { "epoch": 0.703529264004019, "grad_norm": 0.28204530477523804, "learning_rate": 0.00017410049581506511, "loss": 11.6768, "step": 33609 }, { "epoch": 0.7035501967679813, "grad_norm": 0.28290992975234985, "learning_rate": 0.00017409902351240596, "loss": 11.6635, "step": 33610 }, { "epoch": 0.7035711295319435, "grad_norm": 0.3090096414089203, "learning_rate": 0.00017409755117412588, "loss": 11.672, "step": 33611 }, { "epoch": 0.7035920622959055, "grad_norm": 0.32003846764564514, "learning_rate": 0.00017409607880022564, "loss": 11.6757, "step": 33612 }, { "epoch": 0.7036129950598677, "grad_norm": 0.41260144114494324, "learning_rate": 0.00017409460639070595, "loss": 11.6773, "step": 33613 }, { "epoch": 0.7036339278238298, "grad_norm": 0.2818356156349182, "learning_rate": 0.00017409313394556748, "loss": 11.6721, "step": 33614 }, { "epoch": 0.703654860587792, "grad_norm": 0.2673996388912201, "learning_rate": 0.00017409166146481095, "loss": 11.6699, "step": 33615 }, { "epoch": 0.7036757933517541, "grad_norm": 0.28093600273132324, "learning_rate": 0.00017409018894843706, "loss": 11.6581, "step": 33616 }, { "epoch": 0.7036967261157163, "grad_norm": 0.3753765821456909, "learning_rate": 0.00017408871639644656, "loss": 11.6857, "step": 33617 }, { "epoch": 0.7037176588796785, "grad_norm": 0.31439408659935, "learning_rate": 0.00017408724380884013, "loss": 11.6859, "step": 33618 }, { "epoch": 0.7037385916436406, "grad_norm": 0.22679054737091064, "learning_rate": 0.0001740857711856185, "loss": 11.6708, "step": 33619 }, { "epoch": 0.7037595244076028, "grad_norm": 0.7608126997947693, "learning_rate": 0.0001740842985267823, "loss": 11.5737, "step": 33620 }, { "epoch": 0.7037804571715649, "grad_norm": 0.3833160698413849, "learning_rate": 0.00017408282583233233, "loss": 11.6535, "step": 33621 }, { "epoch": 0.7038013899355271, "grad_norm": 0.34777653217315674, "learning_rate": 0.00017408135310226924, "loss": 11.6878, "step": 33622 }, { "epoch": 0.7038223226994892, "grad_norm": 0.34578368067741394, "learning_rate": 0.00017407988033659377, "loss": 11.6924, "step": 33623 }, { "epoch": 0.7038432554634514, "grad_norm": 0.28844180703163147, "learning_rate": 0.0001740784075353066, "loss": 11.6726, "step": 33624 }, { "epoch": 0.7038641882274136, "grad_norm": 0.28407904505729675, "learning_rate": 0.00017407693469840847, "loss": 11.6605, "step": 33625 }, { "epoch": 0.7038851209913757, "grad_norm": 0.3456234633922577, "learning_rate": 0.00017407546182590008, "loss": 11.6655, "step": 33626 }, { "epoch": 0.7039060537553379, "grad_norm": 0.3197292685508728, "learning_rate": 0.00017407398891778212, "loss": 11.6677, "step": 33627 }, { "epoch": 0.7039269865193, "grad_norm": 0.2875744700431824, "learning_rate": 0.0001740725159740553, "loss": 11.6643, "step": 33628 }, { "epoch": 0.7039479192832622, "grad_norm": 0.2690233290195465, "learning_rate": 0.00017407104299472034, "loss": 11.6716, "step": 33629 }, { "epoch": 0.7039688520472244, "grad_norm": 0.30926015973091125, "learning_rate": 0.00017406956997977798, "loss": 11.6847, "step": 33630 }, { "epoch": 0.7039897848111865, "grad_norm": 0.27048927545547485, "learning_rate": 0.00017406809692922885, "loss": 11.6816, "step": 33631 }, { "epoch": 0.7040107175751487, "grad_norm": 0.30817973613739014, "learning_rate": 0.00017406662384307373, "loss": 11.6848, "step": 33632 }, { "epoch": 0.7040316503391107, "grad_norm": 0.27139613032341003, "learning_rate": 0.00017406515072131327, "loss": 11.6708, "step": 33633 }, { "epoch": 0.704052583103073, "grad_norm": 0.2742311358451843, "learning_rate": 0.00017406367756394824, "loss": 11.6889, "step": 33634 }, { "epoch": 0.704073515867035, "grad_norm": 0.3082827925682068, "learning_rate": 0.0001740622043709793, "loss": 11.6551, "step": 33635 }, { "epoch": 0.7040944486309972, "grad_norm": 0.30709540843963623, "learning_rate": 0.00017406073114240717, "loss": 11.6647, "step": 33636 }, { "epoch": 0.7041153813949594, "grad_norm": 0.3165963292121887, "learning_rate": 0.00017405925787823256, "loss": 11.6665, "step": 33637 }, { "epoch": 0.7041363141589215, "grad_norm": 0.26168572902679443, "learning_rate": 0.00017405778457845622, "loss": 11.6793, "step": 33638 }, { "epoch": 0.7041572469228837, "grad_norm": 0.3380975127220154, "learning_rate": 0.00017405631124307877, "loss": 11.6662, "step": 33639 }, { "epoch": 0.7041781796868458, "grad_norm": 0.2419251948595047, "learning_rate": 0.00017405483787210098, "loss": 11.6617, "step": 33640 }, { "epoch": 0.704199112450808, "grad_norm": 0.35082101821899414, "learning_rate": 0.00017405336446552357, "loss": 11.6586, "step": 33641 }, { "epoch": 0.7042200452147701, "grad_norm": 0.27113088965415955, "learning_rate": 0.0001740518910233472, "loss": 11.67, "step": 33642 }, { "epoch": 0.7042409779787323, "grad_norm": 0.2646844685077667, "learning_rate": 0.0001740504175455726, "loss": 11.6572, "step": 33643 }, { "epoch": 0.7042619107426945, "grad_norm": 0.324057400226593, "learning_rate": 0.0001740489440322005, "loss": 11.6714, "step": 33644 }, { "epoch": 0.7042828435066566, "grad_norm": 0.33176353573799133, "learning_rate": 0.00017404747048323157, "loss": 11.6729, "step": 33645 }, { "epoch": 0.7043037762706188, "grad_norm": 0.3174537718296051, "learning_rate": 0.0001740459968986665, "loss": 11.6678, "step": 33646 }, { "epoch": 0.7043247090345809, "grad_norm": 0.2598121166229248, "learning_rate": 0.0001740445232785061, "loss": 11.6763, "step": 33647 }, { "epoch": 0.7043456417985431, "grad_norm": 0.2905329465866089, "learning_rate": 0.000174043049622751, "loss": 11.6765, "step": 33648 }, { "epoch": 0.7043665745625053, "grad_norm": 0.2995181381702423, "learning_rate": 0.0001740415759314019, "loss": 11.6735, "step": 33649 }, { "epoch": 0.7043875073264674, "grad_norm": 0.3435656428337097, "learning_rate": 0.00017404010220445953, "loss": 11.6766, "step": 33650 }, { "epoch": 0.7044084400904296, "grad_norm": 0.3022252023220062, "learning_rate": 0.00017403862844192464, "loss": 11.685, "step": 33651 }, { "epoch": 0.7044293728543917, "grad_norm": 0.25052735209465027, "learning_rate": 0.00017403715464379786, "loss": 11.675, "step": 33652 }, { "epoch": 0.7044503056183539, "grad_norm": 0.739532470703125, "learning_rate": 0.00017403568081007995, "loss": 11.709, "step": 33653 }, { "epoch": 0.704471238382316, "grad_norm": 0.28181010484695435, "learning_rate": 0.00017403420694077161, "loss": 11.6666, "step": 33654 }, { "epoch": 0.7044921711462782, "grad_norm": 0.3153548240661621, "learning_rate": 0.00017403273303587353, "loss": 11.6839, "step": 33655 }, { "epoch": 0.7045131039102404, "grad_norm": 0.3692573606967926, "learning_rate": 0.00017403125909538645, "loss": 11.6583, "step": 33656 }, { "epoch": 0.7045340366742024, "grad_norm": 0.2573779225349426, "learning_rate": 0.00017402978511931103, "loss": 11.6701, "step": 33657 }, { "epoch": 0.7045549694381646, "grad_norm": 0.276187002658844, "learning_rate": 0.00017402831110764803, "loss": 11.656, "step": 33658 }, { "epoch": 0.7045759022021267, "grad_norm": 0.22557495534420013, "learning_rate": 0.00017402683706039813, "loss": 11.6793, "step": 33659 }, { "epoch": 0.7045968349660889, "grad_norm": 0.35120195150375366, "learning_rate": 0.00017402536297756206, "loss": 11.6717, "step": 33660 }, { "epoch": 0.704617767730051, "grad_norm": 0.25225067138671875, "learning_rate": 0.0001740238888591405, "loss": 11.6501, "step": 33661 }, { "epoch": 0.7046387004940132, "grad_norm": 0.2524941563606262, "learning_rate": 0.00017402241470513416, "loss": 11.6659, "step": 33662 }, { "epoch": 0.7046596332579754, "grad_norm": 0.29227980971336365, "learning_rate": 0.00017402094051554377, "loss": 11.6713, "step": 33663 }, { "epoch": 0.7046805660219375, "grad_norm": 0.2865767180919647, "learning_rate": 0.00017401946629037006, "loss": 11.6794, "step": 33664 }, { "epoch": 0.7047014987858997, "grad_norm": 0.24639028310775757, "learning_rate": 0.00017401799202961367, "loss": 11.6568, "step": 33665 }, { "epoch": 0.7047224315498618, "grad_norm": 0.24410630762577057, "learning_rate": 0.00017401651773327537, "loss": 11.6659, "step": 33666 }, { "epoch": 0.704743364313824, "grad_norm": 0.27944591641426086, "learning_rate": 0.00017401504340135583, "loss": 11.6661, "step": 33667 }, { "epoch": 0.7047642970777862, "grad_norm": 0.2885866165161133, "learning_rate": 0.0001740135690338558, "loss": 11.6753, "step": 33668 }, { "epoch": 0.7047852298417483, "grad_norm": 0.289695680141449, "learning_rate": 0.00017401209463077595, "loss": 11.6701, "step": 33669 }, { "epoch": 0.7048061626057105, "grad_norm": 0.29115059971809387, "learning_rate": 0.000174010620192117, "loss": 11.6502, "step": 33670 }, { "epoch": 0.7048270953696726, "grad_norm": 0.26289525628089905, "learning_rate": 0.00017400914571787966, "loss": 11.6728, "step": 33671 }, { "epoch": 0.7048480281336348, "grad_norm": 0.287544846534729, "learning_rate": 0.00017400767120806466, "loss": 11.6755, "step": 33672 }, { "epoch": 0.7048689608975969, "grad_norm": 0.2681966722011566, "learning_rate": 0.00017400619666267265, "loss": 11.6823, "step": 33673 }, { "epoch": 0.7048898936615591, "grad_norm": 0.3334580957889557, "learning_rate": 0.00017400472208170442, "loss": 11.6694, "step": 33674 }, { "epoch": 0.7049108264255213, "grad_norm": 0.2812364399433136, "learning_rate": 0.00017400324746516063, "loss": 11.6537, "step": 33675 }, { "epoch": 0.7049317591894834, "grad_norm": 0.29178741574287415, "learning_rate": 0.00017400177281304198, "loss": 11.6737, "step": 33676 }, { "epoch": 0.7049526919534456, "grad_norm": 0.30902931094169617, "learning_rate": 0.0001740002981253492, "loss": 11.6697, "step": 33677 }, { "epoch": 0.7049736247174077, "grad_norm": 0.26220884919166565, "learning_rate": 0.00017399882340208298, "loss": 11.6663, "step": 33678 }, { "epoch": 0.7049945574813699, "grad_norm": 0.30182406306266785, "learning_rate": 0.00017399734864324406, "loss": 11.675, "step": 33679 }, { "epoch": 0.7050154902453319, "grad_norm": 0.28663092851638794, "learning_rate": 0.00017399587384883314, "loss": 11.6505, "step": 33680 }, { "epoch": 0.7050364230092941, "grad_norm": 0.26160117983818054, "learning_rate": 0.0001739943990188509, "loss": 11.682, "step": 33681 }, { "epoch": 0.7050573557732563, "grad_norm": 0.33244940638542175, "learning_rate": 0.0001739929241532981, "loss": 11.6698, "step": 33682 }, { "epoch": 0.7050782885372184, "grad_norm": 0.3506900370121002, "learning_rate": 0.00017399144925217539, "loss": 11.6805, "step": 33683 }, { "epoch": 0.7050992213011806, "grad_norm": 0.276350736618042, "learning_rate": 0.0001739899743154835, "loss": 11.661, "step": 33684 }, { "epoch": 0.7051201540651427, "grad_norm": 0.2736481726169586, "learning_rate": 0.00017398849934322316, "loss": 11.6748, "step": 33685 }, { "epoch": 0.7051410868291049, "grad_norm": 0.3544689416885376, "learning_rate": 0.00017398702433539507, "loss": 11.6829, "step": 33686 }, { "epoch": 0.705162019593067, "grad_norm": 0.35558828711509705, "learning_rate": 0.00017398554929199994, "loss": 11.6755, "step": 33687 }, { "epoch": 0.7051829523570292, "grad_norm": 0.2745991349220276, "learning_rate": 0.00017398407421303849, "loss": 11.6856, "step": 33688 }, { "epoch": 0.7052038851209914, "grad_norm": 0.3099786341190338, "learning_rate": 0.00017398259909851138, "loss": 11.6792, "step": 33689 }, { "epoch": 0.7052248178849535, "grad_norm": 0.3297930359840393, "learning_rate": 0.00017398112394841935, "loss": 11.6924, "step": 33690 }, { "epoch": 0.7052457506489157, "grad_norm": 0.2388712614774704, "learning_rate": 0.00017397964876276312, "loss": 11.6645, "step": 33691 }, { "epoch": 0.7052666834128778, "grad_norm": 0.3922553062438965, "learning_rate": 0.0001739781735415434, "loss": 11.6807, "step": 33692 }, { "epoch": 0.70528761617684, "grad_norm": 0.3062627613544464, "learning_rate": 0.00017397669828476085, "loss": 11.6783, "step": 33693 }, { "epoch": 0.7053085489408022, "grad_norm": 0.22809863090515137, "learning_rate": 0.00017397522299241627, "loss": 11.6734, "step": 33694 }, { "epoch": 0.7053294817047643, "grad_norm": 0.39095571637153625, "learning_rate": 0.00017397374766451028, "loss": 11.6799, "step": 33695 }, { "epoch": 0.7053504144687265, "grad_norm": 0.31649601459503174, "learning_rate": 0.00017397227230104366, "loss": 11.6633, "step": 33696 }, { "epoch": 0.7053713472326886, "grad_norm": 0.2543971836566925, "learning_rate": 0.00017397079690201707, "loss": 11.6681, "step": 33697 }, { "epoch": 0.7053922799966508, "grad_norm": 0.2868657112121582, "learning_rate": 0.00017396932146743126, "loss": 11.6751, "step": 33698 }, { "epoch": 0.7054132127606129, "grad_norm": 0.32873454689979553, "learning_rate": 0.00017396784599728687, "loss": 11.6727, "step": 33699 }, { "epoch": 0.7054341455245751, "grad_norm": 0.2658120095729828, "learning_rate": 0.00017396637049158468, "loss": 11.6624, "step": 33700 }, { "epoch": 0.7054550782885373, "grad_norm": 0.2850452959537506, "learning_rate": 0.00017396489495032539, "loss": 11.6695, "step": 33701 }, { "epoch": 0.7054760110524994, "grad_norm": 0.2712765336036682, "learning_rate": 0.00017396341937350968, "loss": 11.6785, "step": 33702 }, { "epoch": 0.7054969438164616, "grad_norm": 0.42951536178588867, "learning_rate": 0.00017396194376113823, "loss": 11.6696, "step": 33703 }, { "epoch": 0.7055178765804236, "grad_norm": 0.28011348843574524, "learning_rate": 0.00017396046811321182, "loss": 11.6748, "step": 33704 }, { "epoch": 0.7055388093443858, "grad_norm": 0.31272611021995544, "learning_rate": 0.00017395899242973116, "loss": 11.6797, "step": 33705 }, { "epoch": 0.7055597421083479, "grad_norm": 0.2968178987503052, "learning_rate": 0.00017395751671069688, "loss": 11.675, "step": 33706 }, { "epoch": 0.7055806748723101, "grad_norm": 0.24935364723205566, "learning_rate": 0.00017395604095610977, "loss": 11.666, "step": 33707 }, { "epoch": 0.7056016076362723, "grad_norm": 0.2831825613975525, "learning_rate": 0.0001739545651659705, "loss": 11.6683, "step": 33708 }, { "epoch": 0.7056225404002344, "grad_norm": 0.3448464870452881, "learning_rate": 0.0001739530893402798, "loss": 11.6695, "step": 33709 }, { "epoch": 0.7056434731641966, "grad_norm": 0.2978319227695465, "learning_rate": 0.0001739516134790384, "loss": 11.6752, "step": 33710 }, { "epoch": 0.7056644059281587, "grad_norm": 0.28470227122306824, "learning_rate": 0.0001739501375822469, "loss": 11.6783, "step": 33711 }, { "epoch": 0.7056853386921209, "grad_norm": 0.34904587268829346, "learning_rate": 0.00017394866164990614, "loss": 11.6716, "step": 33712 }, { "epoch": 0.7057062714560831, "grad_norm": 0.27243420481681824, "learning_rate": 0.00017394718568201675, "loss": 11.6795, "step": 33713 }, { "epoch": 0.7057272042200452, "grad_norm": 0.4325674772262573, "learning_rate": 0.0001739457096785795, "loss": 11.6749, "step": 33714 }, { "epoch": 0.7057481369840074, "grad_norm": 0.24585352838039398, "learning_rate": 0.00017394423363959502, "loss": 11.6721, "step": 33715 }, { "epoch": 0.7057690697479695, "grad_norm": 0.33074718713760376, "learning_rate": 0.0001739427575650641, "loss": 11.6932, "step": 33716 }, { "epoch": 0.7057900025119317, "grad_norm": 0.2767069339752197, "learning_rate": 0.0001739412814549874, "loss": 11.6613, "step": 33717 }, { "epoch": 0.7058109352758938, "grad_norm": 0.3093099594116211, "learning_rate": 0.00017393980530936565, "loss": 11.6666, "step": 33718 }, { "epoch": 0.705831868039856, "grad_norm": 0.3031144142150879, "learning_rate": 0.00017393832912819955, "loss": 11.6685, "step": 33719 }, { "epoch": 0.7058528008038182, "grad_norm": 0.32079488039016724, "learning_rate": 0.0001739368529114898, "loss": 11.691, "step": 33720 }, { "epoch": 0.7058737335677803, "grad_norm": 0.2833738923072815, "learning_rate": 0.00017393537665923715, "loss": 11.6797, "step": 33721 }, { "epoch": 0.7058946663317425, "grad_norm": 0.2803194522857666, "learning_rate": 0.00017393390037144228, "loss": 11.6738, "step": 33722 }, { "epoch": 0.7059155990957046, "grad_norm": 0.3138085603713989, "learning_rate": 0.0001739324240481059, "loss": 11.6783, "step": 33723 }, { "epoch": 0.7059365318596668, "grad_norm": 0.26305630803108215, "learning_rate": 0.0001739309476892287, "loss": 11.6661, "step": 33724 }, { "epoch": 0.7059574646236288, "grad_norm": 0.305739164352417, "learning_rate": 0.00017392947129481144, "loss": 11.6696, "step": 33725 }, { "epoch": 0.705978397387591, "grad_norm": 0.31144222617149353, "learning_rate": 0.00017392799486485477, "loss": 11.6749, "step": 33726 }, { "epoch": 0.7059993301515533, "grad_norm": 0.33138570189476013, "learning_rate": 0.00017392651839935946, "loss": 11.6835, "step": 33727 }, { "epoch": 0.7060202629155153, "grad_norm": 0.2763657867908478, "learning_rate": 0.00017392504189832616, "loss": 11.6518, "step": 33728 }, { "epoch": 0.7060411956794775, "grad_norm": 0.3006053566932678, "learning_rate": 0.00017392356536175566, "loss": 11.6764, "step": 33729 }, { "epoch": 0.7060621284434396, "grad_norm": 0.35375550389289856, "learning_rate": 0.00017392208878964859, "loss": 11.6731, "step": 33730 }, { "epoch": 0.7060830612074018, "grad_norm": 0.29226604104042053, "learning_rate": 0.00017392061218200568, "loss": 11.6712, "step": 33731 }, { "epoch": 0.706103993971364, "grad_norm": 0.3972037136554718, "learning_rate": 0.00017391913553882767, "loss": 11.6812, "step": 33732 }, { "epoch": 0.7061249267353261, "grad_norm": 0.2972992956638336, "learning_rate": 0.00017391765886011525, "loss": 11.6758, "step": 33733 }, { "epoch": 0.7061458594992883, "grad_norm": 0.3764856457710266, "learning_rate": 0.00017391618214586911, "loss": 11.6612, "step": 33734 }, { "epoch": 0.7061667922632504, "grad_norm": 0.4000943601131439, "learning_rate": 0.00017391470539609, "loss": 11.6583, "step": 33735 }, { "epoch": 0.7061877250272126, "grad_norm": 0.2483513057231903, "learning_rate": 0.00017391322861077862, "loss": 11.6502, "step": 33736 }, { "epoch": 0.7062086577911747, "grad_norm": 0.3211227059364319, "learning_rate": 0.00017391175178993565, "loss": 11.6613, "step": 33737 }, { "epoch": 0.7062295905551369, "grad_norm": 0.34606337547302246, "learning_rate": 0.0001739102749335618, "loss": 11.6663, "step": 33738 }, { "epoch": 0.7062505233190991, "grad_norm": 0.31223276257514954, "learning_rate": 0.00017390879804165784, "loss": 11.6589, "step": 33739 }, { "epoch": 0.7062714560830612, "grad_norm": 0.27608683705329895, "learning_rate": 0.0001739073211142244, "loss": 11.6556, "step": 33740 }, { "epoch": 0.7062923888470234, "grad_norm": 0.3417898714542389, "learning_rate": 0.00017390584415126226, "loss": 11.6814, "step": 33741 }, { "epoch": 0.7063133216109855, "grad_norm": 0.2754233777523041, "learning_rate": 0.0001739043671527721, "loss": 11.6559, "step": 33742 }, { "epoch": 0.7063342543749477, "grad_norm": 0.37209275364875793, "learning_rate": 0.0001739028901187546, "loss": 11.6713, "step": 33743 }, { "epoch": 0.7063551871389098, "grad_norm": 0.24016200006008148, "learning_rate": 0.00017390141304921052, "loss": 11.6788, "step": 33744 }, { "epoch": 0.706376119902872, "grad_norm": 0.23605401813983917, "learning_rate": 0.00017389993594414059, "loss": 11.6534, "step": 33745 }, { "epoch": 0.7063970526668342, "grad_norm": 0.2634411156177521, "learning_rate": 0.00017389845880354542, "loss": 11.6901, "step": 33746 }, { "epoch": 0.7064179854307963, "grad_norm": 0.24393533170223236, "learning_rate": 0.0001738969816274258, "loss": 11.6788, "step": 33747 }, { "epoch": 0.7064389181947585, "grad_norm": 0.3228206932544708, "learning_rate": 0.00017389550441578242, "loss": 11.6726, "step": 33748 }, { "epoch": 0.7064598509587205, "grad_norm": 0.3314160406589508, "learning_rate": 0.00017389402716861598, "loss": 11.6711, "step": 33749 }, { "epoch": 0.7064807837226827, "grad_norm": 0.29210609197616577, "learning_rate": 0.00017389254988592722, "loss": 11.6922, "step": 33750 }, { "epoch": 0.706501716486645, "grad_norm": 0.3079579472541809, "learning_rate": 0.00017389107256771683, "loss": 11.6828, "step": 33751 }, { "epoch": 0.706522649250607, "grad_norm": 0.39820656180381775, "learning_rate": 0.00017388959521398548, "loss": 11.654, "step": 33752 }, { "epoch": 0.7065435820145692, "grad_norm": 0.301635205745697, "learning_rate": 0.00017388811782473396, "loss": 11.6696, "step": 33753 }, { "epoch": 0.7065645147785313, "grad_norm": 0.24673926830291748, "learning_rate": 0.00017388664039996295, "loss": 11.6564, "step": 33754 }, { "epoch": 0.7065854475424935, "grad_norm": 0.3157525360584259, "learning_rate": 0.00017388516293967313, "loss": 11.6691, "step": 33755 }, { "epoch": 0.7066063803064556, "grad_norm": 0.309924453496933, "learning_rate": 0.00017388368544386524, "loss": 11.671, "step": 33756 }, { "epoch": 0.7066273130704178, "grad_norm": 0.2349713295698166, "learning_rate": 0.00017388220791253998, "loss": 11.6706, "step": 33757 }, { "epoch": 0.70664824583438, "grad_norm": 0.33436205983161926, "learning_rate": 0.00017388073034569804, "loss": 11.6693, "step": 33758 }, { "epoch": 0.7066691785983421, "grad_norm": 0.3421935737133026, "learning_rate": 0.0001738792527433402, "loss": 11.6773, "step": 33759 }, { "epoch": 0.7066901113623043, "grad_norm": 0.3643018901348114, "learning_rate": 0.00017387777510546708, "loss": 11.6875, "step": 33760 }, { "epoch": 0.7067110441262664, "grad_norm": 0.2846479117870331, "learning_rate": 0.00017387629743207945, "loss": 11.6771, "step": 33761 }, { "epoch": 0.7067319768902286, "grad_norm": 0.31202760338783264, "learning_rate": 0.000173874819723178, "loss": 11.671, "step": 33762 }, { "epoch": 0.7067529096541907, "grad_norm": 0.26556703448295593, "learning_rate": 0.00017387334197876343, "loss": 11.6617, "step": 33763 }, { "epoch": 0.7067738424181529, "grad_norm": 0.30270957946777344, "learning_rate": 0.0001738718641988365, "loss": 11.6679, "step": 33764 }, { "epoch": 0.7067947751821151, "grad_norm": 0.38870739936828613, "learning_rate": 0.00017387038638339787, "loss": 11.6697, "step": 33765 }, { "epoch": 0.7068157079460772, "grad_norm": 0.25723472237586975, "learning_rate": 0.00017386890853244823, "loss": 11.6614, "step": 33766 }, { "epoch": 0.7068366407100394, "grad_norm": 0.2972109019756317, "learning_rate": 0.00017386743064598837, "loss": 11.6621, "step": 33767 }, { "epoch": 0.7068575734740015, "grad_norm": 0.30129820108413696, "learning_rate": 0.00017386595272401894, "loss": 11.6611, "step": 33768 }, { "epoch": 0.7068785062379637, "grad_norm": 0.3146406412124634, "learning_rate": 0.00017386447476654067, "loss": 11.6612, "step": 33769 }, { "epoch": 0.7068994390019259, "grad_norm": 0.2992136478424072, "learning_rate": 0.00017386299677355424, "loss": 11.6684, "step": 33770 }, { "epoch": 0.706920371765888, "grad_norm": 0.2781982421875, "learning_rate": 0.00017386151874506042, "loss": 11.6735, "step": 33771 }, { "epoch": 0.7069413045298502, "grad_norm": 0.3406878113746643, "learning_rate": 0.00017386004068105989, "loss": 11.6753, "step": 33772 }, { "epoch": 0.7069622372938122, "grad_norm": 0.323935866355896, "learning_rate": 0.00017385856258155332, "loss": 11.6577, "step": 33773 }, { "epoch": 0.7069831700577744, "grad_norm": 0.3562234342098236, "learning_rate": 0.00017385708444654153, "loss": 11.6719, "step": 33774 }, { "epoch": 0.7070041028217365, "grad_norm": 0.3091665506362915, "learning_rate": 0.0001738556062760251, "loss": 11.6635, "step": 33775 }, { "epoch": 0.7070250355856987, "grad_norm": 0.25652840733528137, "learning_rate": 0.0001738541280700048, "loss": 11.6522, "step": 33776 }, { "epoch": 0.7070459683496609, "grad_norm": 0.2987505793571472, "learning_rate": 0.00017385264982848136, "loss": 11.6601, "step": 33777 }, { "epoch": 0.707066901113623, "grad_norm": 0.3960975706577301, "learning_rate": 0.00017385117155145548, "loss": 11.6781, "step": 33778 }, { "epoch": 0.7070878338775852, "grad_norm": 0.2763100266456604, "learning_rate": 0.00017384969323892783, "loss": 11.6734, "step": 33779 }, { "epoch": 0.7071087666415473, "grad_norm": 0.3591424226760864, "learning_rate": 0.00017384821489089918, "loss": 11.6852, "step": 33780 }, { "epoch": 0.7071296994055095, "grad_norm": 0.42068785429000854, "learning_rate": 0.0001738467365073702, "loss": 11.6546, "step": 33781 }, { "epoch": 0.7071506321694716, "grad_norm": 0.2554837465286255, "learning_rate": 0.0001738452580883416, "loss": 11.6725, "step": 33782 }, { "epoch": 0.7071715649334338, "grad_norm": 0.33021432161331177, "learning_rate": 0.00017384377963381413, "loss": 11.6721, "step": 33783 }, { "epoch": 0.707192497697396, "grad_norm": 0.34338971972465515, "learning_rate": 0.00017384230114378846, "loss": 11.6477, "step": 33784 }, { "epoch": 0.7072134304613581, "grad_norm": 0.3122347295284271, "learning_rate": 0.00017384082261826534, "loss": 11.6781, "step": 33785 }, { "epoch": 0.7072343632253203, "grad_norm": 0.374916136264801, "learning_rate": 0.00017383934405724544, "loss": 11.6651, "step": 33786 }, { "epoch": 0.7072552959892824, "grad_norm": 0.3248193562030792, "learning_rate": 0.00017383786546072946, "loss": 11.6715, "step": 33787 }, { "epoch": 0.7072762287532446, "grad_norm": 0.269991934299469, "learning_rate": 0.0001738363868287182, "loss": 11.6471, "step": 33788 }, { "epoch": 0.7072971615172068, "grad_norm": 0.3046831488609314, "learning_rate": 0.00017383490816121224, "loss": 11.6727, "step": 33789 }, { "epoch": 0.7073180942811689, "grad_norm": 0.29046401381492615, "learning_rate": 0.00017383342945821243, "loss": 11.6618, "step": 33790 }, { "epoch": 0.7073390270451311, "grad_norm": 0.23953473567962646, "learning_rate": 0.00017383195071971934, "loss": 11.6716, "step": 33791 }, { "epoch": 0.7073599598090932, "grad_norm": 0.2975582480430603, "learning_rate": 0.0001738304719457338, "loss": 11.6652, "step": 33792 }, { "epoch": 0.7073808925730554, "grad_norm": 0.2715595066547394, "learning_rate": 0.00017382899313625645, "loss": 11.6661, "step": 33793 }, { "epoch": 0.7074018253370175, "grad_norm": 0.27011173963546753, "learning_rate": 0.00017382751429128806, "loss": 11.6657, "step": 33794 }, { "epoch": 0.7074227581009797, "grad_norm": 0.37371450662612915, "learning_rate": 0.00017382603541082924, "loss": 11.6787, "step": 33795 }, { "epoch": 0.7074436908649419, "grad_norm": 0.3215506374835968, "learning_rate": 0.00017382455649488082, "loss": 11.6517, "step": 33796 }, { "epoch": 0.7074646236289039, "grad_norm": 0.29461321234703064, "learning_rate": 0.00017382307754344343, "loss": 11.6626, "step": 33797 }, { "epoch": 0.7074855563928661, "grad_norm": 0.33877500891685486, "learning_rate": 0.00017382159855651783, "loss": 11.6711, "step": 33798 }, { "epoch": 0.7075064891568282, "grad_norm": 0.3012746572494507, "learning_rate": 0.0001738201195341047, "loss": 11.6685, "step": 33799 }, { "epoch": 0.7075274219207904, "grad_norm": 0.29191967844963074, "learning_rate": 0.00017381864047620474, "loss": 11.6719, "step": 33800 }, { "epoch": 0.7075483546847525, "grad_norm": 0.3033107817173004, "learning_rate": 0.0001738171613828187, "loss": 11.6783, "step": 33801 }, { "epoch": 0.7075692874487147, "grad_norm": 0.3294147551059723, "learning_rate": 0.00017381568225394725, "loss": 11.6538, "step": 33802 }, { "epoch": 0.7075902202126769, "grad_norm": 0.2902080714702606, "learning_rate": 0.00017381420308959115, "loss": 11.6745, "step": 33803 }, { "epoch": 0.707611152976639, "grad_norm": 0.2648649513721466, "learning_rate": 0.00017381272388975106, "loss": 11.6568, "step": 33804 }, { "epoch": 0.7076320857406012, "grad_norm": 0.2912169396877289, "learning_rate": 0.00017381124465442772, "loss": 11.6633, "step": 33805 }, { "epoch": 0.7076530185045633, "grad_norm": 0.31537237763404846, "learning_rate": 0.00017380976538362185, "loss": 11.6747, "step": 33806 }, { "epoch": 0.7076739512685255, "grad_norm": 0.28973478078842163, "learning_rate": 0.00017380828607733414, "loss": 11.6657, "step": 33807 }, { "epoch": 0.7076948840324877, "grad_norm": 0.328661173582077, "learning_rate": 0.0001738068067355653, "loss": 11.6862, "step": 33808 }, { "epoch": 0.7077158167964498, "grad_norm": 0.31891152262687683, "learning_rate": 0.00017380532735831604, "loss": 11.6729, "step": 33809 }, { "epoch": 0.707736749560412, "grad_norm": 0.30142441391944885, "learning_rate": 0.0001738038479455871, "loss": 11.6739, "step": 33810 }, { "epoch": 0.7077576823243741, "grad_norm": 0.29681190848350525, "learning_rate": 0.00017380236849737918, "loss": 11.6762, "step": 33811 }, { "epoch": 0.7077786150883363, "grad_norm": 0.28392913937568665, "learning_rate": 0.00017380088901369298, "loss": 11.6779, "step": 33812 }, { "epoch": 0.7077995478522984, "grad_norm": 0.2591255009174347, "learning_rate": 0.00017379940949452917, "loss": 11.6574, "step": 33813 }, { "epoch": 0.7078204806162606, "grad_norm": 0.24621880054473877, "learning_rate": 0.00017379792993988855, "loss": 11.6655, "step": 33814 }, { "epoch": 0.7078414133802228, "grad_norm": 0.3165534734725952, "learning_rate": 0.00017379645034977176, "loss": 11.6655, "step": 33815 }, { "epoch": 0.7078623461441849, "grad_norm": 0.28172510862350464, "learning_rate": 0.00017379497072417957, "loss": 11.65, "step": 33816 }, { "epoch": 0.7078832789081471, "grad_norm": 0.27150237560272217, "learning_rate": 0.0001737934910631126, "loss": 11.6819, "step": 33817 }, { "epoch": 0.7079042116721092, "grad_norm": 0.37862443923950195, "learning_rate": 0.00017379201136657167, "loss": 11.6749, "step": 33818 }, { "epoch": 0.7079251444360714, "grad_norm": 0.3612746596336365, "learning_rate": 0.00017379053163455745, "loss": 11.665, "step": 33819 }, { "epoch": 0.7079460772000334, "grad_norm": 0.22666718065738678, "learning_rate": 0.00017378905186707063, "loss": 11.6584, "step": 33820 }, { "epoch": 0.7079670099639956, "grad_norm": 0.3372184932231903, "learning_rate": 0.0001737875720641119, "loss": 11.6795, "step": 33821 }, { "epoch": 0.7079879427279578, "grad_norm": 0.29387399554252625, "learning_rate": 0.00017378609222568205, "loss": 11.6695, "step": 33822 }, { "epoch": 0.7080088754919199, "grad_norm": 0.36291375756263733, "learning_rate": 0.0001737846123517817, "loss": 11.6514, "step": 33823 }, { "epoch": 0.7080298082558821, "grad_norm": 0.2470996379852295, "learning_rate": 0.00017378313244241167, "loss": 11.6714, "step": 33824 }, { "epoch": 0.7080507410198442, "grad_norm": 0.2802947163581848, "learning_rate": 0.00017378165249757257, "loss": 11.6725, "step": 33825 }, { "epoch": 0.7080716737838064, "grad_norm": 0.2696581482887268, "learning_rate": 0.00017378017251726516, "loss": 11.6611, "step": 33826 }, { "epoch": 0.7080926065477686, "grad_norm": 0.26082509756088257, "learning_rate": 0.00017377869250149015, "loss": 11.6759, "step": 33827 }, { "epoch": 0.7081135393117307, "grad_norm": 0.22980178892612457, "learning_rate": 0.00017377721245024822, "loss": 11.6579, "step": 33828 }, { "epoch": 0.7081344720756929, "grad_norm": 0.2907353639602661, "learning_rate": 0.00017377573236354013, "loss": 11.6704, "step": 33829 }, { "epoch": 0.708155404839655, "grad_norm": 0.28298860788345337, "learning_rate": 0.00017377425224136654, "loss": 11.6665, "step": 33830 }, { "epoch": 0.7081763376036172, "grad_norm": 0.2742326557636261, "learning_rate": 0.00017377277208372818, "loss": 11.6758, "step": 33831 }, { "epoch": 0.7081972703675793, "grad_norm": 0.2723649740219116, "learning_rate": 0.00017377129189062582, "loss": 11.6783, "step": 33832 }, { "epoch": 0.7082182031315415, "grad_norm": 0.29177525639533997, "learning_rate": 0.0001737698116620601, "loss": 11.6613, "step": 33833 }, { "epoch": 0.7082391358955037, "grad_norm": 0.2777281105518341, "learning_rate": 0.00017376833139803175, "loss": 11.6613, "step": 33834 }, { "epoch": 0.7082600686594658, "grad_norm": 0.4029599726200104, "learning_rate": 0.00017376685109854147, "loss": 11.6716, "step": 33835 }, { "epoch": 0.708281001423428, "grad_norm": 0.26289260387420654, "learning_rate": 0.00017376537076359, "loss": 11.6799, "step": 33836 }, { "epoch": 0.7083019341873901, "grad_norm": 0.29176998138427734, "learning_rate": 0.00017376389039317806, "loss": 11.6856, "step": 33837 }, { "epoch": 0.7083228669513523, "grad_norm": 0.31042277812957764, "learning_rate": 0.0001737624099873063, "loss": 11.6866, "step": 33838 }, { "epoch": 0.7083437997153144, "grad_norm": 0.29080918431282043, "learning_rate": 0.0001737609295459755, "loss": 11.6705, "step": 33839 }, { "epoch": 0.7083647324792766, "grad_norm": 0.35658136010169983, "learning_rate": 0.00017375944906918632, "loss": 11.6577, "step": 33840 }, { "epoch": 0.7083856652432388, "grad_norm": 0.2964606285095215, "learning_rate": 0.00017375796855693954, "loss": 11.6764, "step": 33841 }, { "epoch": 0.7084065980072008, "grad_norm": 0.27626025676727295, "learning_rate": 0.00017375648800923576, "loss": 11.6664, "step": 33842 }, { "epoch": 0.708427530771163, "grad_norm": 0.36930540204048157, "learning_rate": 0.00017375500742607583, "loss": 11.6825, "step": 33843 }, { "epoch": 0.7084484635351251, "grad_norm": 0.33802512288093567, "learning_rate": 0.00017375352680746033, "loss": 11.6946, "step": 33844 }, { "epoch": 0.7084693962990873, "grad_norm": 0.3077668249607086, "learning_rate": 0.00017375204615339006, "loss": 11.6688, "step": 33845 }, { "epoch": 0.7084903290630495, "grad_norm": 0.3497442305088043, "learning_rate": 0.00017375056546386567, "loss": 11.6661, "step": 33846 }, { "epoch": 0.7085112618270116, "grad_norm": 0.26313167810440063, "learning_rate": 0.00017374908473888792, "loss": 11.6744, "step": 33847 }, { "epoch": 0.7085321945909738, "grad_norm": 0.8535274863243103, "learning_rate": 0.00017374760397845755, "loss": 11.6319, "step": 33848 }, { "epoch": 0.7085531273549359, "grad_norm": 0.3668677806854248, "learning_rate": 0.00017374612318257518, "loss": 11.6802, "step": 33849 }, { "epoch": 0.7085740601188981, "grad_norm": 0.24450504779815674, "learning_rate": 0.00017374464235124162, "loss": 11.6737, "step": 33850 }, { "epoch": 0.7085949928828602, "grad_norm": 0.3247922360897064, "learning_rate": 0.00017374316148445747, "loss": 11.6594, "step": 33851 }, { "epoch": 0.7086159256468224, "grad_norm": 0.28698140382766724, "learning_rate": 0.00017374168058222354, "loss": 11.6709, "step": 33852 }, { "epoch": 0.7086368584107846, "grad_norm": 0.26852908730506897, "learning_rate": 0.0001737401996445405, "loss": 11.6687, "step": 33853 }, { "epoch": 0.7086577911747467, "grad_norm": 0.26362356543540955, "learning_rate": 0.0001737387186714091, "loss": 11.6695, "step": 33854 }, { "epoch": 0.7086787239387089, "grad_norm": 0.314710408449173, "learning_rate": 0.00017373723766283, "loss": 11.6674, "step": 33855 }, { "epoch": 0.708699656702671, "grad_norm": 0.32657530903816223, "learning_rate": 0.0001737357566188039, "loss": 11.674, "step": 33856 }, { "epoch": 0.7087205894666332, "grad_norm": 0.27842745184898376, "learning_rate": 0.00017373427553933155, "loss": 11.6662, "step": 33857 }, { "epoch": 0.7087415222305953, "grad_norm": 0.3014088273048401, "learning_rate": 0.00017373279442441369, "loss": 11.6653, "step": 33858 }, { "epoch": 0.7087624549945575, "grad_norm": 0.29885146021842957, "learning_rate": 0.00017373131327405095, "loss": 11.6602, "step": 33859 }, { "epoch": 0.7087833877585197, "grad_norm": 0.3601674735546112, "learning_rate": 0.00017372983208824414, "loss": 11.6816, "step": 33860 }, { "epoch": 0.7088043205224818, "grad_norm": 0.29039466381073, "learning_rate": 0.0001737283508669939, "loss": 11.6627, "step": 33861 }, { "epoch": 0.708825253286444, "grad_norm": 0.23796769976615906, "learning_rate": 0.00017372686961030097, "loss": 11.6871, "step": 33862 }, { "epoch": 0.7088461860504061, "grad_norm": 0.29252925515174866, "learning_rate": 0.00017372538831816607, "loss": 11.6733, "step": 33863 }, { "epoch": 0.7088671188143683, "grad_norm": 0.3292423486709595, "learning_rate": 0.00017372390699058987, "loss": 11.657, "step": 33864 }, { "epoch": 0.7088880515783305, "grad_norm": 0.22891314327716827, "learning_rate": 0.00017372242562757313, "loss": 11.6841, "step": 33865 }, { "epoch": 0.7089089843422925, "grad_norm": 0.2642604410648346, "learning_rate": 0.0001737209442291165, "loss": 11.6766, "step": 33866 }, { "epoch": 0.7089299171062547, "grad_norm": 0.3333972692489624, "learning_rate": 0.0001737194627952208, "loss": 11.6677, "step": 33867 }, { "epoch": 0.7089508498702168, "grad_norm": 0.3238922655582428, "learning_rate": 0.00017371798132588664, "loss": 11.6791, "step": 33868 }, { "epoch": 0.708971782634179, "grad_norm": 0.34615933895111084, "learning_rate": 0.00017371649982111478, "loss": 11.6613, "step": 33869 }, { "epoch": 0.7089927153981411, "grad_norm": 0.32469022274017334, "learning_rate": 0.0001737150182809059, "loss": 11.6733, "step": 33870 }, { "epoch": 0.7090136481621033, "grad_norm": 0.2917696535587311, "learning_rate": 0.00017371353670526075, "loss": 11.6563, "step": 33871 }, { "epoch": 0.7090345809260655, "grad_norm": 0.2691234052181244, "learning_rate": 0.00017371205509418002, "loss": 11.6717, "step": 33872 }, { "epoch": 0.7090555136900276, "grad_norm": 0.39475446939468384, "learning_rate": 0.00017371057344766445, "loss": 11.6605, "step": 33873 }, { "epoch": 0.7090764464539898, "grad_norm": 0.2875230014324188, "learning_rate": 0.0001737090917657147, "loss": 11.6676, "step": 33874 }, { "epoch": 0.7090973792179519, "grad_norm": 0.29422828555107117, "learning_rate": 0.00017370761004833156, "loss": 11.664, "step": 33875 }, { "epoch": 0.7091183119819141, "grad_norm": 0.3242359757423401, "learning_rate": 0.00017370612829551566, "loss": 11.6842, "step": 33876 }, { "epoch": 0.7091392447458762, "grad_norm": 0.284123033285141, "learning_rate": 0.00017370464650726773, "loss": 11.6733, "step": 33877 }, { "epoch": 0.7091601775098384, "grad_norm": 0.2767094671726227, "learning_rate": 0.0001737031646835885, "loss": 11.6836, "step": 33878 }, { "epoch": 0.7091811102738006, "grad_norm": 0.30898317694664, "learning_rate": 0.00017370168282447872, "loss": 11.6639, "step": 33879 }, { "epoch": 0.7092020430377627, "grad_norm": 0.30370739102363586, "learning_rate": 0.00017370020092993905, "loss": 11.6749, "step": 33880 }, { "epoch": 0.7092229758017249, "grad_norm": 0.30414724349975586, "learning_rate": 0.0001736987189999702, "loss": 11.6642, "step": 33881 }, { "epoch": 0.709243908565687, "grad_norm": 0.3411453366279602, "learning_rate": 0.0001736972370345729, "loss": 11.676, "step": 33882 }, { "epoch": 0.7092648413296492, "grad_norm": 0.30629628896713257, "learning_rate": 0.00017369575503374787, "loss": 11.6597, "step": 33883 }, { "epoch": 0.7092857740936113, "grad_norm": 0.3035754859447479, "learning_rate": 0.00017369427299749582, "loss": 11.6733, "step": 33884 }, { "epoch": 0.7093067068575735, "grad_norm": 0.2806384861469269, "learning_rate": 0.00017369279092581744, "loss": 11.6644, "step": 33885 }, { "epoch": 0.7093276396215357, "grad_norm": 0.4307851791381836, "learning_rate": 0.00017369130881871348, "loss": 11.687, "step": 33886 }, { "epoch": 0.7093485723854978, "grad_norm": 0.30127543210983276, "learning_rate": 0.0001736898266761846, "loss": 11.6657, "step": 33887 }, { "epoch": 0.70936950514946, "grad_norm": 0.30874454975128174, "learning_rate": 0.00017368834449823156, "loss": 11.6719, "step": 33888 }, { "epoch": 0.709390437913422, "grad_norm": 0.33174917101860046, "learning_rate": 0.00017368686228485508, "loss": 11.6827, "step": 33889 }, { "epoch": 0.7094113706773842, "grad_norm": 0.30249103903770447, "learning_rate": 0.00017368538003605582, "loss": 11.6704, "step": 33890 }, { "epoch": 0.7094323034413464, "grad_norm": 0.29995912313461304, "learning_rate": 0.00017368389775183454, "loss": 11.6697, "step": 33891 }, { "epoch": 0.7094532362053085, "grad_norm": 0.2698787748813629, "learning_rate": 0.0001736824154321919, "loss": 11.6615, "step": 33892 }, { "epoch": 0.7094741689692707, "grad_norm": 0.279093861579895, "learning_rate": 0.0001736809330771287, "loss": 11.6729, "step": 33893 }, { "epoch": 0.7094951017332328, "grad_norm": 0.28394773602485657, "learning_rate": 0.00017367945068664554, "loss": 11.6619, "step": 33894 }, { "epoch": 0.709516034497195, "grad_norm": 0.2667514681816101, "learning_rate": 0.00017367796826074324, "loss": 11.6738, "step": 33895 }, { "epoch": 0.7095369672611571, "grad_norm": 0.2785898447036743, "learning_rate": 0.0001736764857994224, "loss": 11.6752, "step": 33896 }, { "epoch": 0.7095579000251193, "grad_norm": 0.323968768119812, "learning_rate": 0.00017367500330268387, "loss": 11.6785, "step": 33897 }, { "epoch": 0.7095788327890815, "grad_norm": 0.3191859722137451, "learning_rate": 0.00017367352077052827, "loss": 11.6727, "step": 33898 }, { "epoch": 0.7095997655530436, "grad_norm": 0.26672428846359253, "learning_rate": 0.00017367203820295633, "loss": 11.6572, "step": 33899 }, { "epoch": 0.7096206983170058, "grad_norm": 0.2671007215976715, "learning_rate": 0.00017367055559996874, "loss": 11.6776, "step": 33900 }, { "epoch": 0.7096416310809679, "grad_norm": 0.5042053461074829, "learning_rate": 0.00017366907296156625, "loss": 11.6851, "step": 33901 }, { "epoch": 0.7096625638449301, "grad_norm": 0.3580425977706909, "learning_rate": 0.00017366759028774955, "loss": 11.6857, "step": 33902 }, { "epoch": 0.7096834966088922, "grad_norm": 0.26470375061035156, "learning_rate": 0.0001736661075785194, "loss": 11.6646, "step": 33903 }, { "epoch": 0.7097044293728544, "grad_norm": 0.2825916111469269, "learning_rate": 0.00017366462483387642, "loss": 11.6598, "step": 33904 }, { "epoch": 0.7097253621368166, "grad_norm": 0.2888649106025696, "learning_rate": 0.0001736631420538214, "loss": 11.6694, "step": 33905 }, { "epoch": 0.7097462949007787, "grad_norm": 0.3261151909828186, "learning_rate": 0.0001736616592383551, "loss": 11.6663, "step": 33906 }, { "epoch": 0.7097672276647409, "grad_norm": 0.2574615776538849, "learning_rate": 0.00017366017638747807, "loss": 11.6815, "step": 33907 }, { "epoch": 0.709788160428703, "grad_norm": 0.3133297264575958, "learning_rate": 0.00017365869350119116, "loss": 11.6704, "step": 33908 }, { "epoch": 0.7098090931926652, "grad_norm": 0.35838043689727783, "learning_rate": 0.00017365721057949505, "loss": 11.6805, "step": 33909 }, { "epoch": 0.7098300259566274, "grad_norm": 0.3067834973335266, "learning_rate": 0.00017365572762239042, "loss": 11.6732, "step": 33910 }, { "epoch": 0.7098509587205895, "grad_norm": 0.3072202801704407, "learning_rate": 0.00017365424462987801, "loss": 11.6895, "step": 33911 }, { "epoch": 0.7098718914845517, "grad_norm": 0.30669066309928894, "learning_rate": 0.00017365276160195852, "loss": 11.6724, "step": 33912 }, { "epoch": 0.7098928242485137, "grad_norm": 0.23993873596191406, "learning_rate": 0.0001736512785386327, "loss": 11.6711, "step": 33913 }, { "epoch": 0.7099137570124759, "grad_norm": 0.3164878785610199, "learning_rate": 0.00017364979543990118, "loss": 11.6791, "step": 33914 }, { "epoch": 0.709934689776438, "grad_norm": 0.297539621591568, "learning_rate": 0.0001736483123057648, "loss": 11.6604, "step": 33915 }, { "epoch": 0.7099556225404002, "grad_norm": 0.2984132468700409, "learning_rate": 0.00017364682913622415, "loss": 11.6705, "step": 33916 }, { "epoch": 0.7099765553043624, "grad_norm": 0.32301652431488037, "learning_rate": 0.00017364534593128, "loss": 11.6694, "step": 33917 }, { "epoch": 0.7099974880683245, "grad_norm": 0.33922654390335083, "learning_rate": 0.00017364386269093309, "loss": 11.6782, "step": 33918 }, { "epoch": 0.7100184208322867, "grad_norm": 0.32237762212753296, "learning_rate": 0.00017364237941518406, "loss": 11.679, "step": 33919 }, { "epoch": 0.7100393535962488, "grad_norm": 0.24462097883224487, "learning_rate": 0.00017364089610403367, "loss": 11.6601, "step": 33920 }, { "epoch": 0.710060286360211, "grad_norm": 0.3132820427417755, "learning_rate": 0.00017363941275748263, "loss": 11.665, "step": 33921 }, { "epoch": 0.7100812191241731, "grad_norm": 0.2885455787181854, "learning_rate": 0.00017363792937553164, "loss": 11.6754, "step": 33922 }, { "epoch": 0.7101021518881353, "grad_norm": 0.32044678926467896, "learning_rate": 0.00017363644595818145, "loss": 11.6568, "step": 33923 }, { "epoch": 0.7101230846520975, "grad_norm": 0.2812243103981018, "learning_rate": 0.0001736349625054327, "loss": 11.674, "step": 33924 }, { "epoch": 0.7101440174160596, "grad_norm": 0.2988155484199524, "learning_rate": 0.0001736334790172862, "loss": 11.6458, "step": 33925 }, { "epoch": 0.7101649501800218, "grad_norm": 0.31613290309906006, "learning_rate": 0.00017363199549374258, "loss": 11.6851, "step": 33926 }, { "epoch": 0.7101858829439839, "grad_norm": 0.3170158863067627, "learning_rate": 0.0001736305119348026, "loss": 11.6807, "step": 33927 }, { "epoch": 0.7102068157079461, "grad_norm": 0.25786420702934265, "learning_rate": 0.00017362902834046695, "loss": 11.6676, "step": 33928 }, { "epoch": 0.7102277484719083, "grad_norm": 0.37052229046821594, "learning_rate": 0.00017362754471073632, "loss": 11.6693, "step": 33929 }, { "epoch": 0.7102486812358704, "grad_norm": 0.2594922184944153, "learning_rate": 0.0001736260610456115, "loss": 11.6632, "step": 33930 }, { "epoch": 0.7102696139998326, "grad_norm": 0.3205273747444153, "learning_rate": 0.0001736245773450931, "loss": 11.6767, "step": 33931 }, { "epoch": 0.7102905467637947, "grad_norm": 0.40455079078674316, "learning_rate": 0.00017362309360918196, "loss": 11.6848, "step": 33932 }, { "epoch": 0.7103114795277569, "grad_norm": 0.25033387541770935, "learning_rate": 0.0001736216098378787, "loss": 11.6751, "step": 33933 }, { "epoch": 0.710332412291719, "grad_norm": 0.2770397365093231, "learning_rate": 0.00017362012603118406, "loss": 11.676, "step": 33934 }, { "epoch": 0.7103533450556812, "grad_norm": 0.31487521529197693, "learning_rate": 0.00017361864218909872, "loss": 11.6757, "step": 33935 }, { "epoch": 0.7103742778196434, "grad_norm": 0.3047218918800354, "learning_rate": 0.00017361715831162343, "loss": 11.6666, "step": 33936 }, { "epoch": 0.7103952105836054, "grad_norm": 0.39625418186187744, "learning_rate": 0.0001736156743987589, "loss": 11.6824, "step": 33937 }, { "epoch": 0.7104161433475676, "grad_norm": 0.31248897314071655, "learning_rate": 0.00017361419045050587, "loss": 11.6778, "step": 33938 }, { "epoch": 0.7104370761115297, "grad_norm": 0.3102853000164032, "learning_rate": 0.00017361270646686498, "loss": 11.6695, "step": 33939 }, { "epoch": 0.7104580088754919, "grad_norm": 0.25705569982528687, "learning_rate": 0.00017361122244783703, "loss": 11.6646, "step": 33940 }, { "epoch": 0.710478941639454, "grad_norm": 0.5838955640792847, "learning_rate": 0.00017360973839342268, "loss": 11.6689, "step": 33941 }, { "epoch": 0.7104998744034162, "grad_norm": 0.2961689233779907, "learning_rate": 0.00017360825430362263, "loss": 11.67, "step": 33942 }, { "epoch": 0.7105208071673784, "grad_norm": 0.28263628482818604, "learning_rate": 0.00017360677017843763, "loss": 11.6501, "step": 33943 }, { "epoch": 0.7105417399313405, "grad_norm": 0.224518820643425, "learning_rate": 0.0001736052860178684, "loss": 11.6762, "step": 33944 }, { "epoch": 0.7105626726953027, "grad_norm": 0.3962555527687073, "learning_rate": 0.00017360380182191557, "loss": 11.6698, "step": 33945 }, { "epoch": 0.7105836054592648, "grad_norm": 0.27744510769844055, "learning_rate": 0.00017360231759057997, "loss": 11.6733, "step": 33946 }, { "epoch": 0.710604538223227, "grad_norm": 0.2849424481391907, "learning_rate": 0.00017360083332386224, "loss": 11.6746, "step": 33947 }, { "epoch": 0.7106254709871892, "grad_norm": 0.3670254945755005, "learning_rate": 0.00017359934902176315, "loss": 11.6821, "step": 33948 }, { "epoch": 0.7106464037511513, "grad_norm": 0.27725470066070557, "learning_rate": 0.00017359786468428335, "loss": 11.6676, "step": 33949 }, { "epoch": 0.7106673365151135, "grad_norm": 0.2962631285190582, "learning_rate": 0.0001735963803114236, "loss": 11.6622, "step": 33950 }, { "epoch": 0.7106882692790756, "grad_norm": 0.3064073920249939, "learning_rate": 0.00017359489590318457, "loss": 11.6709, "step": 33951 }, { "epoch": 0.7107092020430378, "grad_norm": 0.2923491597175598, "learning_rate": 0.00017359341145956704, "loss": 11.6708, "step": 33952 }, { "epoch": 0.7107301348069999, "grad_norm": 0.28122639656066895, "learning_rate": 0.00017359192698057163, "loss": 11.6784, "step": 33953 }, { "epoch": 0.7107510675709621, "grad_norm": 0.30113524198532104, "learning_rate": 0.00017359044246619912, "loss": 11.6739, "step": 33954 }, { "epoch": 0.7107720003349243, "grad_norm": 0.3187539577484131, "learning_rate": 0.0001735889579164502, "loss": 11.6646, "step": 33955 }, { "epoch": 0.7107929330988864, "grad_norm": 0.3124508261680603, "learning_rate": 0.00017358747333132564, "loss": 11.6617, "step": 33956 }, { "epoch": 0.7108138658628486, "grad_norm": 0.26197126507759094, "learning_rate": 0.00017358598871082607, "loss": 11.6674, "step": 33957 }, { "epoch": 0.7108347986268106, "grad_norm": 0.35307496786117554, "learning_rate": 0.00017358450405495225, "loss": 11.6503, "step": 33958 }, { "epoch": 0.7108557313907728, "grad_norm": 0.30657777190208435, "learning_rate": 0.00017358301936370486, "loss": 11.6705, "step": 33959 }, { "epoch": 0.7108766641547349, "grad_norm": 0.3028666079044342, "learning_rate": 0.00017358153463708468, "loss": 11.668, "step": 33960 }, { "epoch": 0.7108975969186971, "grad_norm": 0.30286678671836853, "learning_rate": 0.00017358004987509237, "loss": 11.6617, "step": 33961 }, { "epoch": 0.7109185296826593, "grad_norm": 0.27542024850845337, "learning_rate": 0.00017357856507772864, "loss": 11.681, "step": 33962 }, { "epoch": 0.7109394624466214, "grad_norm": 0.24326959252357483, "learning_rate": 0.00017357708024499422, "loss": 11.6575, "step": 33963 }, { "epoch": 0.7109603952105836, "grad_norm": 0.27187952399253845, "learning_rate": 0.00017357559537688983, "loss": 11.6666, "step": 33964 }, { "epoch": 0.7109813279745457, "grad_norm": 0.35046541690826416, "learning_rate": 0.00017357411047341618, "loss": 11.6886, "step": 33965 }, { "epoch": 0.7110022607385079, "grad_norm": 0.34558558464050293, "learning_rate": 0.000173572625534574, "loss": 11.6748, "step": 33966 }, { "epoch": 0.7110231935024701, "grad_norm": 0.3527621626853943, "learning_rate": 0.00017357114056036394, "loss": 11.6776, "step": 33967 }, { "epoch": 0.7110441262664322, "grad_norm": 0.3193332254886627, "learning_rate": 0.00017356965555078677, "loss": 11.6758, "step": 33968 }, { "epoch": 0.7110650590303944, "grad_norm": 0.34072867035865784, "learning_rate": 0.00017356817050584323, "loss": 11.6735, "step": 33969 }, { "epoch": 0.7110859917943565, "grad_norm": 0.2985926568508148, "learning_rate": 0.00017356668542553396, "loss": 11.6772, "step": 33970 }, { "epoch": 0.7111069245583187, "grad_norm": 0.3079618215560913, "learning_rate": 0.0001735652003098597, "loss": 11.6733, "step": 33971 }, { "epoch": 0.7111278573222808, "grad_norm": 0.2824799418449402, "learning_rate": 0.0001735637151588212, "loss": 11.6624, "step": 33972 }, { "epoch": 0.711148790086243, "grad_norm": 0.3182966411113739, "learning_rate": 0.00017356222997241916, "loss": 11.6684, "step": 33973 }, { "epoch": 0.7111697228502052, "grad_norm": 0.30019259452819824, "learning_rate": 0.00017356074475065425, "loss": 11.6698, "step": 33974 }, { "epoch": 0.7111906556141673, "grad_norm": 0.28210699558258057, "learning_rate": 0.00017355925949352722, "loss": 11.6478, "step": 33975 }, { "epoch": 0.7112115883781295, "grad_norm": 0.23758964240550995, "learning_rate": 0.0001735577742010388, "loss": 11.6698, "step": 33976 }, { "epoch": 0.7112325211420916, "grad_norm": 0.27446940541267395, "learning_rate": 0.00017355628887318967, "loss": 11.6706, "step": 33977 }, { "epoch": 0.7112534539060538, "grad_norm": 0.35424724221229553, "learning_rate": 0.00017355480350998056, "loss": 11.6555, "step": 33978 }, { "epoch": 0.7112743866700159, "grad_norm": 0.30526429414749146, "learning_rate": 0.00017355331811141217, "loss": 11.6875, "step": 33979 }, { "epoch": 0.7112953194339781, "grad_norm": 0.37330201268196106, "learning_rate": 0.00017355183267748524, "loss": 11.6764, "step": 33980 }, { "epoch": 0.7113162521979403, "grad_norm": 0.31064191460609436, "learning_rate": 0.00017355034720820047, "loss": 11.6657, "step": 33981 }, { "epoch": 0.7113371849619023, "grad_norm": 0.28778284788131714, "learning_rate": 0.00017354886170355856, "loss": 11.6675, "step": 33982 }, { "epoch": 0.7113581177258645, "grad_norm": 0.26384976506233215, "learning_rate": 0.00017354737616356026, "loss": 11.6703, "step": 33983 }, { "epoch": 0.7113790504898266, "grad_norm": 0.291102796792984, "learning_rate": 0.00017354589058820626, "loss": 11.6889, "step": 33984 }, { "epoch": 0.7113999832537888, "grad_norm": 0.3033786416053772, "learning_rate": 0.00017354440497749726, "loss": 11.669, "step": 33985 }, { "epoch": 0.711420916017751, "grad_norm": 0.3356943726539612, "learning_rate": 0.000173542919331434, "loss": 11.6759, "step": 33986 }, { "epoch": 0.7114418487817131, "grad_norm": 0.30113786458969116, "learning_rate": 0.0001735414336500172, "loss": 11.6766, "step": 33987 }, { "epoch": 0.7114627815456753, "grad_norm": 0.3267574608325958, "learning_rate": 0.00017353994793324753, "loss": 11.6671, "step": 33988 }, { "epoch": 0.7114837143096374, "grad_norm": 0.46001607179641724, "learning_rate": 0.00017353846218112575, "loss": 11.6742, "step": 33989 }, { "epoch": 0.7115046470735996, "grad_norm": 0.3184090554714203, "learning_rate": 0.00017353697639365255, "loss": 11.6569, "step": 33990 }, { "epoch": 0.7115255798375617, "grad_norm": 0.26365044713020325, "learning_rate": 0.00017353549057082864, "loss": 11.6609, "step": 33991 }, { "epoch": 0.7115465126015239, "grad_norm": 0.2949771285057068, "learning_rate": 0.00017353400471265478, "loss": 11.6721, "step": 33992 }, { "epoch": 0.7115674453654861, "grad_norm": 0.2999906837940216, "learning_rate": 0.00017353251881913165, "loss": 11.6891, "step": 33993 }, { "epoch": 0.7115883781294482, "grad_norm": 0.3398823142051697, "learning_rate": 0.00017353103289025992, "loss": 11.6836, "step": 33994 }, { "epoch": 0.7116093108934104, "grad_norm": 0.2552995979785919, "learning_rate": 0.00017352954692604038, "loss": 11.6777, "step": 33995 }, { "epoch": 0.7116302436573725, "grad_norm": 0.2875002324581146, "learning_rate": 0.00017352806092647371, "loss": 11.6653, "step": 33996 }, { "epoch": 0.7116511764213347, "grad_norm": 0.30865874886512756, "learning_rate": 0.00017352657489156062, "loss": 11.6678, "step": 33997 }, { "epoch": 0.7116721091852968, "grad_norm": 0.32363662123680115, "learning_rate": 0.00017352508882130185, "loss": 11.6692, "step": 33998 }, { "epoch": 0.711693041949259, "grad_norm": 0.3332323133945465, "learning_rate": 0.00017352360271569807, "loss": 11.6782, "step": 33999 }, { "epoch": 0.7117139747132212, "grad_norm": 0.29123055934906006, "learning_rate": 0.00017352211657475003, "loss": 11.6772, "step": 34000 }, { "epoch": 0.7117139747132212, "eval_loss": 11.670578002929688, "eval_runtime": 34.4098, "eval_samples_per_second": 27.928, "eval_steps_per_second": 7.004, "step": 34000 }, { "epoch": 0.7117349074771833, "grad_norm": 0.34959176182746887, "learning_rate": 0.00017352063039845846, "loss": 11.6586, "step": 34001 }, { "epoch": 0.7117558402411455, "grad_norm": 0.2582622170448303, "learning_rate": 0.000173519144186824, "loss": 11.675, "step": 34002 }, { "epoch": 0.7117767730051076, "grad_norm": 0.30620187520980835, "learning_rate": 0.00017351765793984747, "loss": 11.675, "step": 34003 }, { "epoch": 0.7117977057690698, "grad_norm": 0.31867510080337524, "learning_rate": 0.0001735161716575295, "loss": 11.6755, "step": 34004 }, { "epoch": 0.711818638533032, "grad_norm": 0.2810840904712677, "learning_rate": 0.00017351468533987084, "loss": 11.6747, "step": 34005 }, { "epoch": 0.711839571296994, "grad_norm": 0.3418501317501068, "learning_rate": 0.00017351319898687218, "loss": 11.6659, "step": 34006 }, { "epoch": 0.7118605040609562, "grad_norm": 0.3658755421638489, "learning_rate": 0.00017351171259853427, "loss": 11.6715, "step": 34007 }, { "epoch": 0.7118814368249183, "grad_norm": 0.25146782398223877, "learning_rate": 0.0001735102261748578, "loss": 11.6609, "step": 34008 }, { "epoch": 0.7119023695888805, "grad_norm": 0.35472071170806885, "learning_rate": 0.0001735087397158435, "loss": 11.6718, "step": 34009 }, { "epoch": 0.7119233023528426, "grad_norm": 0.2960747480392456, "learning_rate": 0.00017350725322149205, "loss": 11.68, "step": 34010 }, { "epoch": 0.7119442351168048, "grad_norm": 0.2550755441188812, "learning_rate": 0.00017350576669180424, "loss": 11.6758, "step": 34011 }, { "epoch": 0.711965167880767, "grad_norm": 0.29630815982818604, "learning_rate": 0.00017350428012678068, "loss": 11.6737, "step": 34012 }, { "epoch": 0.7119861006447291, "grad_norm": 0.24891947209835052, "learning_rate": 0.0001735027935264222, "loss": 11.6584, "step": 34013 }, { "epoch": 0.7120070334086913, "grad_norm": 0.25450775027275085, "learning_rate": 0.00017350130689072939, "loss": 11.6743, "step": 34014 }, { "epoch": 0.7120279661726534, "grad_norm": 0.305011510848999, "learning_rate": 0.00017349982021970303, "loss": 11.6681, "step": 34015 }, { "epoch": 0.7120488989366156, "grad_norm": 0.2661609947681427, "learning_rate": 0.00017349833351334388, "loss": 11.6703, "step": 34016 }, { "epoch": 0.7120698317005777, "grad_norm": 0.29190707206726074, "learning_rate": 0.00017349684677165259, "loss": 11.6847, "step": 34017 }, { "epoch": 0.7120907644645399, "grad_norm": 0.3108765780925751, "learning_rate": 0.00017349535999462988, "loss": 11.6664, "step": 34018 }, { "epoch": 0.7121116972285021, "grad_norm": 0.32288658618927, "learning_rate": 0.00017349387318227647, "loss": 11.689, "step": 34019 }, { "epoch": 0.7121326299924642, "grad_norm": 0.27108415961265564, "learning_rate": 0.00017349238633459314, "loss": 11.6761, "step": 34020 }, { "epoch": 0.7121535627564264, "grad_norm": 0.2511478066444397, "learning_rate": 0.00017349089945158048, "loss": 11.6833, "step": 34021 }, { "epoch": 0.7121744955203885, "grad_norm": 0.38521188497543335, "learning_rate": 0.00017348941253323932, "loss": 11.6794, "step": 34022 }, { "epoch": 0.7121954282843507, "grad_norm": 0.3291807770729065, "learning_rate": 0.0001734879255795703, "loss": 11.6589, "step": 34023 }, { "epoch": 0.7122163610483129, "grad_norm": 0.2832174003124237, "learning_rate": 0.00017348643859057416, "loss": 11.6705, "step": 34024 }, { "epoch": 0.712237293812275, "grad_norm": 0.32224443554878235, "learning_rate": 0.00017348495156625164, "loss": 11.6568, "step": 34025 }, { "epoch": 0.7122582265762372, "grad_norm": 0.3340810239315033, "learning_rate": 0.00017348346450660342, "loss": 11.6693, "step": 34026 }, { "epoch": 0.7122791593401993, "grad_norm": 0.2476336508989334, "learning_rate": 0.00017348197741163022, "loss": 11.6644, "step": 34027 }, { "epoch": 0.7123000921041615, "grad_norm": 0.2752784192562103, "learning_rate": 0.00017348049028133276, "loss": 11.6609, "step": 34028 }, { "epoch": 0.7123210248681235, "grad_norm": 0.31239837408065796, "learning_rate": 0.00017347900311571175, "loss": 11.6904, "step": 34029 }, { "epoch": 0.7123419576320857, "grad_norm": 0.29888585209846497, "learning_rate": 0.00017347751591476792, "loss": 11.6577, "step": 34030 }, { "epoch": 0.7123628903960479, "grad_norm": 0.33150020241737366, "learning_rate": 0.00017347602867850197, "loss": 11.6766, "step": 34031 }, { "epoch": 0.71238382316001, "grad_norm": 0.2738206088542938, "learning_rate": 0.00017347454140691465, "loss": 11.6767, "step": 34032 }, { "epoch": 0.7124047559239722, "grad_norm": 0.3746553957462311, "learning_rate": 0.00017347305410000662, "loss": 11.6866, "step": 34033 }, { "epoch": 0.7124256886879343, "grad_norm": 0.2514229416847229, "learning_rate": 0.00017347156675777861, "loss": 11.654, "step": 34034 }, { "epoch": 0.7124466214518965, "grad_norm": 0.271190881729126, "learning_rate": 0.00017347007938023134, "loss": 11.6636, "step": 34035 }, { "epoch": 0.7124675542158586, "grad_norm": 0.3779138922691345, "learning_rate": 0.00017346859196736553, "loss": 11.6651, "step": 34036 }, { "epoch": 0.7124884869798208, "grad_norm": 0.30837053060531616, "learning_rate": 0.00017346710451918192, "loss": 11.6852, "step": 34037 }, { "epoch": 0.712509419743783, "grad_norm": 0.25538894534111023, "learning_rate": 0.0001734656170356812, "loss": 11.677, "step": 34038 }, { "epoch": 0.7125303525077451, "grad_norm": 0.22859424352645874, "learning_rate": 0.00017346412951686408, "loss": 11.68, "step": 34039 }, { "epoch": 0.7125512852717073, "grad_norm": 0.38031989336013794, "learning_rate": 0.00017346264196273125, "loss": 11.6823, "step": 34040 }, { "epoch": 0.7125722180356694, "grad_norm": 0.23755212128162384, "learning_rate": 0.0001734611543732835, "loss": 11.6578, "step": 34041 }, { "epoch": 0.7125931507996316, "grad_norm": 0.29552921652793884, "learning_rate": 0.0001734596667485215, "loss": 11.67, "step": 34042 }, { "epoch": 0.7126140835635938, "grad_norm": 0.24801933765411377, "learning_rate": 0.00017345817908844596, "loss": 11.6691, "step": 34043 }, { "epoch": 0.7126350163275559, "grad_norm": 0.2781975269317627, "learning_rate": 0.00017345669139305758, "loss": 11.6678, "step": 34044 }, { "epoch": 0.7126559490915181, "grad_norm": 0.30896076560020447, "learning_rate": 0.00017345520366235712, "loss": 11.6633, "step": 34045 }, { "epoch": 0.7126768818554802, "grad_norm": 0.3047839105129242, "learning_rate": 0.00017345371589634525, "loss": 11.6746, "step": 34046 }, { "epoch": 0.7126978146194424, "grad_norm": 0.41038084030151367, "learning_rate": 0.00017345222809502273, "loss": 11.676, "step": 34047 }, { "epoch": 0.7127187473834045, "grad_norm": 0.34017401933670044, "learning_rate": 0.00017345074025839022, "loss": 11.685, "step": 34048 }, { "epoch": 0.7127396801473667, "grad_norm": 0.3243226408958435, "learning_rate": 0.0001734492523864485, "loss": 11.6821, "step": 34049 }, { "epoch": 0.7127606129113289, "grad_norm": 0.29879218339920044, "learning_rate": 0.00017344776447919823, "loss": 11.669, "step": 34050 }, { "epoch": 0.712781545675291, "grad_norm": 0.2945832312107086, "learning_rate": 0.00017344627653664016, "loss": 11.6647, "step": 34051 }, { "epoch": 0.7128024784392532, "grad_norm": 0.2925474941730499, "learning_rate": 0.00017344478855877497, "loss": 11.6812, "step": 34052 }, { "epoch": 0.7128234112032152, "grad_norm": 0.2901124954223633, "learning_rate": 0.00017344330054560342, "loss": 11.6584, "step": 34053 }, { "epoch": 0.7128443439671774, "grad_norm": 0.23952451348304749, "learning_rate": 0.0001734418124971262, "loss": 11.6739, "step": 34054 }, { "epoch": 0.7128652767311395, "grad_norm": 0.2917731702327728, "learning_rate": 0.00017344032441334405, "loss": 11.6523, "step": 34055 }, { "epoch": 0.7128862094951017, "grad_norm": 0.2630043029785156, "learning_rate": 0.00017343883629425766, "loss": 11.6607, "step": 34056 }, { "epoch": 0.7129071422590639, "grad_norm": 0.3164072632789612, "learning_rate": 0.0001734373481398677, "loss": 11.6716, "step": 34057 }, { "epoch": 0.712928075023026, "grad_norm": 0.27094992995262146, "learning_rate": 0.000173435859950175, "loss": 11.6831, "step": 34058 }, { "epoch": 0.7129490077869882, "grad_norm": 0.2851305902004242, "learning_rate": 0.00017343437172518016, "loss": 11.666, "step": 34059 }, { "epoch": 0.7129699405509503, "grad_norm": 0.32985931634902954, "learning_rate": 0.00017343288346488396, "loss": 11.6617, "step": 34060 }, { "epoch": 0.7129908733149125, "grad_norm": 0.2474411278963089, "learning_rate": 0.0001734313951692871, "loss": 11.6813, "step": 34061 }, { "epoch": 0.7130118060788746, "grad_norm": 0.3150203824043274, "learning_rate": 0.0001734299068383903, "loss": 11.6655, "step": 34062 }, { "epoch": 0.7130327388428368, "grad_norm": 0.2367257922887802, "learning_rate": 0.00017342841847219428, "loss": 11.6778, "step": 34063 }, { "epoch": 0.713053671606799, "grad_norm": 0.2878349721431732, "learning_rate": 0.00017342693007069975, "loss": 11.6791, "step": 34064 }, { "epoch": 0.7130746043707611, "grad_norm": 0.2843294143676758, "learning_rate": 0.00017342544163390745, "loss": 11.6771, "step": 34065 }, { "epoch": 0.7130955371347233, "grad_norm": 0.342708945274353, "learning_rate": 0.000173423953161818, "loss": 11.6734, "step": 34066 }, { "epoch": 0.7131164698986854, "grad_norm": 0.2614230811595917, "learning_rate": 0.00017342246465443223, "loss": 11.6715, "step": 34067 }, { "epoch": 0.7131374026626476, "grad_norm": 0.24265502393245697, "learning_rate": 0.0001734209761117508, "loss": 11.6641, "step": 34068 }, { "epoch": 0.7131583354266098, "grad_norm": 0.30736324191093445, "learning_rate": 0.00017341948753377443, "loss": 11.6574, "step": 34069 }, { "epoch": 0.7131792681905719, "grad_norm": 0.22905315458774567, "learning_rate": 0.00017341799892050386, "loss": 11.6651, "step": 34070 }, { "epoch": 0.7132002009545341, "grad_norm": 0.3214164972305298, "learning_rate": 0.00017341651027193974, "loss": 11.6755, "step": 34071 }, { "epoch": 0.7132211337184962, "grad_norm": 0.4330612123012543, "learning_rate": 0.0001734150215880829, "loss": 11.6793, "step": 34072 }, { "epoch": 0.7132420664824584, "grad_norm": 0.3358965218067169, "learning_rate": 0.00017341353286893396, "loss": 11.6659, "step": 34073 }, { "epoch": 0.7132629992464204, "grad_norm": 0.2830120921134949, "learning_rate": 0.00017341204411449365, "loss": 11.6677, "step": 34074 }, { "epoch": 0.7132839320103826, "grad_norm": 0.3129216730594635, "learning_rate": 0.0001734105553247627, "loss": 11.6835, "step": 34075 }, { "epoch": 0.7133048647743448, "grad_norm": 0.25134217739105225, "learning_rate": 0.00017340906649974182, "loss": 11.6641, "step": 34076 }, { "epoch": 0.7133257975383069, "grad_norm": 0.27985572814941406, "learning_rate": 0.00017340757763943174, "loss": 11.6689, "step": 34077 }, { "epoch": 0.7133467303022691, "grad_norm": 0.5295104384422302, "learning_rate": 0.00017340608874383316, "loss": 11.6749, "step": 34078 }, { "epoch": 0.7133676630662312, "grad_norm": 0.26741883158683777, "learning_rate": 0.0001734045998129468, "loss": 11.6614, "step": 34079 }, { "epoch": 0.7133885958301934, "grad_norm": 0.23325301706790924, "learning_rate": 0.00017340311084677342, "loss": 11.6722, "step": 34080 }, { "epoch": 0.7134095285941555, "grad_norm": 0.2701508402824402, "learning_rate": 0.00017340162184531366, "loss": 11.6709, "step": 34081 }, { "epoch": 0.7134304613581177, "grad_norm": 0.2634943127632141, "learning_rate": 0.00017340013280856827, "loss": 11.6854, "step": 34082 }, { "epoch": 0.7134513941220799, "grad_norm": 0.3077176511287689, "learning_rate": 0.00017339864373653796, "loss": 11.6854, "step": 34083 }, { "epoch": 0.713472326886042, "grad_norm": 0.32771095633506775, "learning_rate": 0.00017339715462922345, "loss": 11.6613, "step": 34084 }, { "epoch": 0.7134932596500042, "grad_norm": 0.29792553186416626, "learning_rate": 0.00017339566548662546, "loss": 11.6774, "step": 34085 }, { "epoch": 0.7135141924139663, "grad_norm": 0.2271583080291748, "learning_rate": 0.00017339417630874473, "loss": 11.6726, "step": 34086 }, { "epoch": 0.7135351251779285, "grad_norm": 0.3241962492465973, "learning_rate": 0.0001733926870955819, "loss": 11.6781, "step": 34087 }, { "epoch": 0.7135560579418907, "grad_norm": 0.2696765065193176, "learning_rate": 0.00017339119784713777, "loss": 11.6716, "step": 34088 }, { "epoch": 0.7135769907058528, "grad_norm": 0.3435436487197876, "learning_rate": 0.00017338970856341302, "loss": 11.6699, "step": 34089 }, { "epoch": 0.713597923469815, "grad_norm": 0.2873942255973816, "learning_rate": 0.00017338821924440834, "loss": 11.6658, "step": 34090 }, { "epoch": 0.7136188562337771, "grad_norm": 0.32856032252311707, "learning_rate": 0.00017338672989012448, "loss": 11.6707, "step": 34091 }, { "epoch": 0.7136397889977393, "grad_norm": 0.33778300881385803, "learning_rate": 0.00017338524050056217, "loss": 11.669, "step": 34092 }, { "epoch": 0.7136607217617014, "grad_norm": 0.28394490480422974, "learning_rate": 0.0001733837510757221, "loss": 11.6811, "step": 34093 }, { "epoch": 0.7136816545256636, "grad_norm": 0.24191947281360626, "learning_rate": 0.00017338226161560497, "loss": 11.6492, "step": 34094 }, { "epoch": 0.7137025872896258, "grad_norm": 0.24614045023918152, "learning_rate": 0.00017338077212021151, "loss": 11.6754, "step": 34095 }, { "epoch": 0.7137235200535879, "grad_norm": 0.2576928436756134, "learning_rate": 0.00017337928258954249, "loss": 11.6593, "step": 34096 }, { "epoch": 0.7137444528175501, "grad_norm": 0.29016587138175964, "learning_rate": 0.00017337779302359853, "loss": 11.6537, "step": 34097 }, { "epoch": 0.7137653855815121, "grad_norm": 0.3201620876789093, "learning_rate": 0.00017337630342238042, "loss": 11.6739, "step": 34098 }, { "epoch": 0.7137863183454743, "grad_norm": 0.263659805059433, "learning_rate": 0.0001733748137858888, "loss": 11.6793, "step": 34099 }, { "epoch": 0.7138072511094364, "grad_norm": 0.3368784487247467, "learning_rate": 0.00017337332411412453, "loss": 11.6494, "step": 34100 }, { "epoch": 0.7138281838733986, "grad_norm": 0.3061804473400116, "learning_rate": 0.00017337183440708817, "loss": 11.6699, "step": 34101 }, { "epoch": 0.7138491166373608, "grad_norm": 0.37950000166893005, "learning_rate": 0.0001733703446647805, "loss": 11.6778, "step": 34102 }, { "epoch": 0.7138700494013229, "grad_norm": 0.3076585531234741, "learning_rate": 0.00017336885488720226, "loss": 11.6734, "step": 34103 }, { "epoch": 0.7138909821652851, "grad_norm": 0.7030141353607178, "learning_rate": 0.00017336736507435414, "loss": 11.5941, "step": 34104 }, { "epoch": 0.7139119149292472, "grad_norm": 0.22812597453594208, "learning_rate": 0.0001733658752262368, "loss": 11.6765, "step": 34105 }, { "epoch": 0.7139328476932094, "grad_norm": 0.3240189850330353, "learning_rate": 0.0001733643853428511, "loss": 11.6762, "step": 34106 }, { "epoch": 0.7139537804571716, "grad_norm": 0.29987525939941406, "learning_rate": 0.0001733628954241976, "loss": 11.6742, "step": 34107 }, { "epoch": 0.7139747132211337, "grad_norm": 0.37461140751838684, "learning_rate": 0.00017336140547027711, "loss": 11.6663, "step": 34108 }, { "epoch": 0.7139956459850959, "grad_norm": 0.3717837929725647, "learning_rate": 0.00017335991548109034, "loss": 11.6631, "step": 34109 }, { "epoch": 0.714016578749058, "grad_norm": 0.29655739665031433, "learning_rate": 0.00017335842545663797, "loss": 11.6761, "step": 34110 }, { "epoch": 0.7140375115130202, "grad_norm": 0.2690771222114563, "learning_rate": 0.00017335693539692075, "loss": 11.6673, "step": 34111 }, { "epoch": 0.7140584442769823, "grad_norm": 0.2539563477039337, "learning_rate": 0.00017335544530193935, "loss": 11.6683, "step": 34112 }, { "epoch": 0.7140793770409445, "grad_norm": 0.32316240668296814, "learning_rate": 0.00017335395517169454, "loss": 11.6475, "step": 34113 }, { "epoch": 0.7141003098049067, "grad_norm": 0.25804418325424194, "learning_rate": 0.00017335246500618703, "loss": 11.6616, "step": 34114 }, { "epoch": 0.7141212425688688, "grad_norm": 0.32528457045555115, "learning_rate": 0.0001733509748054175, "loss": 11.656, "step": 34115 }, { "epoch": 0.714142175332831, "grad_norm": 0.30379486083984375, "learning_rate": 0.0001733494845693867, "loss": 11.672, "step": 34116 }, { "epoch": 0.7141631080967931, "grad_norm": 0.29259419441223145, "learning_rate": 0.0001733479942980953, "loss": 11.6765, "step": 34117 }, { "epoch": 0.7141840408607553, "grad_norm": 0.25579604506492615, "learning_rate": 0.00017334650399154406, "loss": 11.6588, "step": 34118 }, { "epoch": 0.7142049736247174, "grad_norm": 0.3582240641117096, "learning_rate": 0.0001733450136497337, "loss": 11.6798, "step": 34119 }, { "epoch": 0.7142259063886796, "grad_norm": 0.2944725453853607, "learning_rate": 0.00017334352327266493, "loss": 11.6559, "step": 34120 }, { "epoch": 0.7142468391526418, "grad_norm": 0.338684618473053, "learning_rate": 0.00017334203286033844, "loss": 11.6767, "step": 34121 }, { "epoch": 0.7142677719166038, "grad_norm": 0.38918569684028625, "learning_rate": 0.00017334054241275497, "loss": 11.6922, "step": 34122 }, { "epoch": 0.714288704680566, "grad_norm": 0.2790801525115967, "learning_rate": 0.00017333905192991523, "loss": 11.6602, "step": 34123 }, { "epoch": 0.7143096374445281, "grad_norm": 0.2836554944515228, "learning_rate": 0.00017333756141181994, "loss": 11.6624, "step": 34124 }, { "epoch": 0.7143305702084903, "grad_norm": 0.28450754284858704, "learning_rate": 0.00017333607085846982, "loss": 11.6611, "step": 34125 }, { "epoch": 0.7143515029724525, "grad_norm": 0.3059070110321045, "learning_rate": 0.00017333458026986556, "loss": 11.667, "step": 34126 }, { "epoch": 0.7143724357364146, "grad_norm": 0.2814899682998657, "learning_rate": 0.00017333308964600792, "loss": 11.6588, "step": 34127 }, { "epoch": 0.7143933685003768, "grad_norm": 0.29247158765792847, "learning_rate": 0.00017333159898689758, "loss": 11.6922, "step": 34128 }, { "epoch": 0.7144143012643389, "grad_norm": 0.3589034378528595, "learning_rate": 0.00017333010829253528, "loss": 11.645, "step": 34129 }, { "epoch": 0.7144352340283011, "grad_norm": 0.2723366916179657, "learning_rate": 0.00017332861756292174, "loss": 11.6794, "step": 34130 }, { "epoch": 0.7144561667922632, "grad_norm": 0.26826146245002747, "learning_rate": 0.00017332712679805762, "loss": 11.6675, "step": 34131 }, { "epoch": 0.7144770995562254, "grad_norm": 0.27007293701171875, "learning_rate": 0.00017332563599794373, "loss": 11.6856, "step": 34132 }, { "epoch": 0.7144980323201876, "grad_norm": 0.2940625846385956, "learning_rate": 0.0001733241451625807, "loss": 11.6703, "step": 34133 }, { "epoch": 0.7145189650841497, "grad_norm": 0.31936269998550415, "learning_rate": 0.00017332265429196932, "loss": 11.6749, "step": 34134 }, { "epoch": 0.7145398978481119, "grad_norm": 0.24024680256843567, "learning_rate": 0.00017332116338611027, "loss": 11.6511, "step": 34135 }, { "epoch": 0.714560830612074, "grad_norm": 0.3287988007068634, "learning_rate": 0.00017331967244500423, "loss": 11.6898, "step": 34136 }, { "epoch": 0.7145817633760362, "grad_norm": 0.2718120217323303, "learning_rate": 0.00017331818146865198, "loss": 11.6662, "step": 34137 }, { "epoch": 0.7146026961399983, "grad_norm": 0.30854079127311707, "learning_rate": 0.00017331669045705418, "loss": 11.67, "step": 34138 }, { "epoch": 0.7146236289039605, "grad_norm": 0.26724952459335327, "learning_rate": 0.00017331519941021163, "loss": 11.6878, "step": 34139 }, { "epoch": 0.7146445616679227, "grad_norm": 0.29728275537490845, "learning_rate": 0.00017331370832812497, "loss": 11.6883, "step": 34140 }, { "epoch": 0.7146654944318848, "grad_norm": 0.32712486386299133, "learning_rate": 0.0001733122172107949, "loss": 11.6574, "step": 34141 }, { "epoch": 0.714686427195847, "grad_norm": 0.24358117580413818, "learning_rate": 0.00017331072605822226, "loss": 11.6521, "step": 34142 }, { "epoch": 0.714707359959809, "grad_norm": 0.31713929772377014, "learning_rate": 0.00017330923487040762, "loss": 11.6552, "step": 34143 }, { "epoch": 0.7147282927237713, "grad_norm": 0.3185711205005646, "learning_rate": 0.0001733077436473518, "loss": 11.6736, "step": 34144 }, { "epoch": 0.7147492254877335, "grad_norm": 0.2688536047935486, "learning_rate": 0.00017330625238905545, "loss": 11.6694, "step": 34145 }, { "epoch": 0.7147701582516955, "grad_norm": 0.3443465828895569, "learning_rate": 0.0001733047610955193, "loss": 11.6624, "step": 34146 }, { "epoch": 0.7147910910156577, "grad_norm": 0.2664877772331238, "learning_rate": 0.00017330326976674413, "loss": 11.674, "step": 34147 }, { "epoch": 0.7148120237796198, "grad_norm": 0.31392088532447815, "learning_rate": 0.0001733017784027306, "loss": 11.6717, "step": 34148 }, { "epoch": 0.714832956543582, "grad_norm": 0.304019033908844, "learning_rate": 0.0001733002870034794, "loss": 11.6749, "step": 34149 }, { "epoch": 0.7148538893075441, "grad_norm": 0.2932484745979309, "learning_rate": 0.0001732987955689913, "loss": 11.6608, "step": 34150 }, { "epoch": 0.7148748220715063, "grad_norm": 0.2550244629383087, "learning_rate": 0.000173297304099267, "loss": 11.6626, "step": 34151 }, { "epoch": 0.7148957548354685, "grad_norm": 0.3096819519996643, "learning_rate": 0.00017329581259430726, "loss": 11.6751, "step": 34152 }, { "epoch": 0.7149166875994306, "grad_norm": 0.3011608123779297, "learning_rate": 0.0001732943210541127, "loss": 11.6775, "step": 34153 }, { "epoch": 0.7149376203633928, "grad_norm": 0.2387627512216568, "learning_rate": 0.0001732928294786841, "loss": 11.6759, "step": 34154 }, { "epoch": 0.7149585531273549, "grad_norm": 0.27654317021369934, "learning_rate": 0.0001732913378680222, "loss": 11.662, "step": 34155 }, { "epoch": 0.7149794858913171, "grad_norm": 0.31797102093696594, "learning_rate": 0.00017328984622212766, "loss": 11.6769, "step": 34156 }, { "epoch": 0.7150004186552792, "grad_norm": 0.3686082363128662, "learning_rate": 0.0001732883545410012, "loss": 11.6708, "step": 34157 }, { "epoch": 0.7150213514192414, "grad_norm": 0.27267250418663025, "learning_rate": 0.0001732868628246436, "loss": 11.6698, "step": 34158 }, { "epoch": 0.7150422841832036, "grad_norm": 0.37289342284202576, "learning_rate": 0.00017328537107305555, "loss": 11.6945, "step": 34159 }, { "epoch": 0.7150632169471657, "grad_norm": 0.32604461908340454, "learning_rate": 0.0001732838792862377, "loss": 11.6885, "step": 34160 }, { "epoch": 0.7150841497111279, "grad_norm": 0.40714937448501587, "learning_rate": 0.00017328238746419086, "loss": 11.6875, "step": 34161 }, { "epoch": 0.71510508247509, "grad_norm": 0.24504803121089935, "learning_rate": 0.00017328089560691566, "loss": 11.6742, "step": 34162 }, { "epoch": 0.7151260152390522, "grad_norm": 0.2903294265270233, "learning_rate": 0.00017327940371441293, "loss": 11.6894, "step": 34163 }, { "epoch": 0.7151469480030144, "grad_norm": 0.2882980406284332, "learning_rate": 0.00017327791178668327, "loss": 11.6688, "step": 34164 }, { "epoch": 0.7151678807669765, "grad_norm": 0.3150152564048767, "learning_rate": 0.00017327641982372747, "loss": 11.6634, "step": 34165 }, { "epoch": 0.7151888135309387, "grad_norm": 0.2899979054927826, "learning_rate": 0.00017327492782554625, "loss": 11.6722, "step": 34166 }, { "epoch": 0.7152097462949008, "grad_norm": 0.285681813955307, "learning_rate": 0.00017327343579214027, "loss": 11.6757, "step": 34167 }, { "epoch": 0.715230679058863, "grad_norm": 0.3133096396923065, "learning_rate": 0.00017327194372351027, "loss": 11.6769, "step": 34168 }, { "epoch": 0.715251611822825, "grad_norm": 0.30806317925453186, "learning_rate": 0.000173270451619657, "loss": 11.6614, "step": 34169 }, { "epoch": 0.7152725445867872, "grad_norm": 0.2869341969490051, "learning_rate": 0.00017326895948058116, "loss": 11.6613, "step": 34170 }, { "epoch": 0.7152934773507494, "grad_norm": 0.2960214912891388, "learning_rate": 0.00017326746730628344, "loss": 11.6681, "step": 34171 }, { "epoch": 0.7153144101147115, "grad_norm": 0.364723265171051, "learning_rate": 0.0001732659750967646, "loss": 11.6706, "step": 34172 }, { "epoch": 0.7153353428786737, "grad_norm": 0.2878401279449463, "learning_rate": 0.00017326448285202536, "loss": 11.6567, "step": 34173 }, { "epoch": 0.7153562756426358, "grad_norm": 0.2793472111225128, "learning_rate": 0.00017326299057206636, "loss": 11.6645, "step": 34174 }, { "epoch": 0.715377208406598, "grad_norm": 0.2632994055747986, "learning_rate": 0.0001732614982568884, "loss": 11.6762, "step": 34175 }, { "epoch": 0.7153981411705601, "grad_norm": 0.2634302079677582, "learning_rate": 0.00017326000590649218, "loss": 11.6786, "step": 34176 }, { "epoch": 0.7154190739345223, "grad_norm": 0.30198317766189575, "learning_rate": 0.0001732585135208784, "loss": 11.6862, "step": 34177 }, { "epoch": 0.7154400066984845, "grad_norm": 0.23604562878608704, "learning_rate": 0.00017325702110004777, "loss": 11.6786, "step": 34178 }, { "epoch": 0.7154609394624466, "grad_norm": 0.3149622082710266, "learning_rate": 0.00017325552864400106, "loss": 11.6755, "step": 34179 }, { "epoch": 0.7154818722264088, "grad_norm": 0.2933705747127533, "learning_rate": 0.00017325403615273892, "loss": 11.678, "step": 34180 }, { "epoch": 0.7155028049903709, "grad_norm": 0.32036125659942627, "learning_rate": 0.00017325254362626208, "loss": 11.6761, "step": 34181 }, { "epoch": 0.7155237377543331, "grad_norm": 0.29931238293647766, "learning_rate": 0.0001732510510645713, "loss": 11.6744, "step": 34182 }, { "epoch": 0.7155446705182953, "grad_norm": 0.2663916051387787, "learning_rate": 0.00017324955846766726, "loss": 11.6532, "step": 34183 }, { "epoch": 0.7155656032822574, "grad_norm": 0.2629072070121765, "learning_rate": 0.00017324806583555072, "loss": 11.675, "step": 34184 }, { "epoch": 0.7155865360462196, "grad_norm": 0.30025750398635864, "learning_rate": 0.00017324657316822233, "loss": 11.673, "step": 34185 }, { "epoch": 0.7156074688101817, "grad_norm": 0.38685065507888794, "learning_rate": 0.00017324508046568285, "loss": 11.6783, "step": 34186 }, { "epoch": 0.7156284015741439, "grad_norm": 0.30973532795906067, "learning_rate": 0.00017324358772793298, "loss": 11.6913, "step": 34187 }, { "epoch": 0.715649334338106, "grad_norm": 0.2786925435066223, "learning_rate": 0.00017324209495497348, "loss": 11.6655, "step": 34188 }, { "epoch": 0.7156702671020682, "grad_norm": 0.2810911238193512, "learning_rate": 0.000173240602146805, "loss": 11.664, "step": 34189 }, { "epoch": 0.7156911998660304, "grad_norm": 0.24599279463291168, "learning_rate": 0.00017323910930342837, "loss": 11.6776, "step": 34190 }, { "epoch": 0.7157121326299924, "grad_norm": 0.32600948214530945, "learning_rate": 0.00017323761642484416, "loss": 11.6804, "step": 34191 }, { "epoch": 0.7157330653939546, "grad_norm": 0.3525083065032959, "learning_rate": 0.0001732361235110532, "loss": 11.6737, "step": 34192 }, { "epoch": 0.7157539981579167, "grad_norm": 0.2603975832462311, "learning_rate": 0.00017323463056205613, "loss": 11.6842, "step": 34193 }, { "epoch": 0.7157749309218789, "grad_norm": 0.31739088892936707, "learning_rate": 0.00017323313757785375, "loss": 11.668, "step": 34194 }, { "epoch": 0.715795863685841, "grad_norm": 0.35294944047927856, "learning_rate": 0.0001732316445584467, "loss": 11.6687, "step": 34195 }, { "epoch": 0.7158167964498032, "grad_norm": 0.30490151047706604, "learning_rate": 0.00017323015150383576, "loss": 11.6746, "step": 34196 }, { "epoch": 0.7158377292137654, "grad_norm": 0.28578245639801025, "learning_rate": 0.00017322865841402156, "loss": 11.6742, "step": 34197 }, { "epoch": 0.7158586619777275, "grad_norm": 0.4585915505886078, "learning_rate": 0.00017322716528900492, "loss": 11.6735, "step": 34198 }, { "epoch": 0.7158795947416897, "grad_norm": 0.28592607378959656, "learning_rate": 0.00017322567212878652, "loss": 11.6548, "step": 34199 }, { "epoch": 0.7159005275056518, "grad_norm": 0.312627375125885, "learning_rate": 0.00017322417893336707, "loss": 11.6766, "step": 34200 }, { "epoch": 0.715921460269614, "grad_norm": 0.33581531047821045, "learning_rate": 0.00017322268570274727, "loss": 11.6633, "step": 34201 }, { "epoch": 0.7159423930335762, "grad_norm": 0.24461102485656738, "learning_rate": 0.00017322119243692787, "loss": 11.663, "step": 34202 }, { "epoch": 0.7159633257975383, "grad_norm": 0.33751851320266724, "learning_rate": 0.00017321969913590958, "loss": 11.6781, "step": 34203 }, { "epoch": 0.7159842585615005, "grad_norm": 0.3222067654132843, "learning_rate": 0.0001732182057996931, "loss": 11.67, "step": 34204 }, { "epoch": 0.7160051913254626, "grad_norm": 0.317668616771698, "learning_rate": 0.00017321671242827917, "loss": 11.6511, "step": 34205 }, { "epoch": 0.7160261240894248, "grad_norm": 0.26657429337501526, "learning_rate": 0.00017321521902166848, "loss": 11.6746, "step": 34206 }, { "epoch": 0.7160470568533869, "grad_norm": 0.29203516244888306, "learning_rate": 0.0001732137255798618, "loss": 11.641, "step": 34207 }, { "epoch": 0.7160679896173491, "grad_norm": 0.3259193003177643, "learning_rate": 0.0001732122321028598, "loss": 11.6591, "step": 34208 }, { "epoch": 0.7160889223813113, "grad_norm": 0.3379446864128113, "learning_rate": 0.00017321073859066322, "loss": 11.6604, "step": 34209 }, { "epoch": 0.7161098551452734, "grad_norm": 0.25754186511039734, "learning_rate": 0.00017320924504327274, "loss": 11.6633, "step": 34210 }, { "epoch": 0.7161307879092356, "grad_norm": 0.2640848457813263, "learning_rate": 0.00017320775146068912, "loss": 11.6532, "step": 34211 }, { "epoch": 0.7161517206731977, "grad_norm": 0.2856983244419098, "learning_rate": 0.00017320625784291309, "loss": 11.6646, "step": 34212 }, { "epoch": 0.7161726534371599, "grad_norm": 0.28195711970329285, "learning_rate": 0.00017320476418994534, "loss": 11.6627, "step": 34213 }, { "epoch": 0.716193586201122, "grad_norm": 0.29800620675086975, "learning_rate": 0.00017320327050178658, "loss": 11.666, "step": 34214 }, { "epoch": 0.7162145189650841, "grad_norm": 0.23620037734508514, "learning_rate": 0.00017320177677843758, "loss": 11.6609, "step": 34215 }, { "epoch": 0.7162354517290463, "grad_norm": 0.2594029903411865, "learning_rate": 0.00017320028301989898, "loss": 11.6719, "step": 34216 }, { "epoch": 0.7162563844930084, "grad_norm": 0.2572470009326935, "learning_rate": 0.0001731987892261715, "loss": 11.6881, "step": 34217 }, { "epoch": 0.7162773172569706, "grad_norm": 0.2855706810951233, "learning_rate": 0.00017319729539725596, "loss": 11.6859, "step": 34218 }, { "epoch": 0.7162982500209327, "grad_norm": 0.28441399335861206, "learning_rate": 0.000173195801533153, "loss": 11.6584, "step": 34219 }, { "epoch": 0.7163191827848949, "grad_norm": 0.311693400144577, "learning_rate": 0.00017319430763386334, "loss": 11.6784, "step": 34220 }, { "epoch": 0.7163401155488571, "grad_norm": 0.30152010917663574, "learning_rate": 0.0001731928136993877, "loss": 11.6752, "step": 34221 }, { "epoch": 0.7163610483128192, "grad_norm": 0.3636484146118164, "learning_rate": 0.00017319131972972683, "loss": 11.6766, "step": 34222 }, { "epoch": 0.7163819810767814, "grad_norm": 0.30395761132240295, "learning_rate": 0.0001731898257248814, "loss": 11.6561, "step": 34223 }, { "epoch": 0.7164029138407435, "grad_norm": 0.40121403336524963, "learning_rate": 0.00017318833168485217, "loss": 11.6569, "step": 34224 }, { "epoch": 0.7164238466047057, "grad_norm": 0.30847862362861633, "learning_rate": 0.00017318683760963986, "loss": 11.666, "step": 34225 }, { "epoch": 0.7164447793686678, "grad_norm": 0.289238303899765, "learning_rate": 0.00017318534349924515, "loss": 11.6691, "step": 34226 }, { "epoch": 0.71646571213263, "grad_norm": 0.32913607358932495, "learning_rate": 0.0001731838493536688, "loss": 11.6776, "step": 34227 }, { "epoch": 0.7164866448965922, "grad_norm": 0.2589741349220276, "learning_rate": 0.00017318235517291145, "loss": 11.6931, "step": 34228 }, { "epoch": 0.7165075776605543, "grad_norm": 0.2695668041706085, "learning_rate": 0.00017318086095697392, "loss": 11.6695, "step": 34229 }, { "epoch": 0.7165285104245165, "grad_norm": 0.3361767530441284, "learning_rate": 0.00017317936670585687, "loss": 11.6815, "step": 34230 }, { "epoch": 0.7165494431884786, "grad_norm": 0.23819869756698608, "learning_rate": 0.00017317787241956104, "loss": 11.6705, "step": 34231 }, { "epoch": 0.7165703759524408, "grad_norm": 0.2925550043582916, "learning_rate": 0.00017317637809808716, "loss": 11.667, "step": 34232 }, { "epoch": 0.7165913087164029, "grad_norm": 0.27811989188194275, "learning_rate": 0.00017317488374143589, "loss": 11.6579, "step": 34233 }, { "epoch": 0.7166122414803651, "grad_norm": 0.358271986246109, "learning_rate": 0.00017317338934960802, "loss": 11.6735, "step": 34234 }, { "epoch": 0.7166331742443273, "grad_norm": 0.2770225405693054, "learning_rate": 0.00017317189492260422, "loss": 11.674, "step": 34235 }, { "epoch": 0.7166541070082894, "grad_norm": 0.3274865746498108, "learning_rate": 0.0001731704004604252, "loss": 11.6598, "step": 34236 }, { "epoch": 0.7166750397722516, "grad_norm": 0.27142924070358276, "learning_rate": 0.00017316890596307173, "loss": 11.6615, "step": 34237 }, { "epoch": 0.7166959725362136, "grad_norm": 0.24934138357639313, "learning_rate": 0.00017316741143054448, "loss": 11.6843, "step": 34238 }, { "epoch": 0.7167169053001758, "grad_norm": 0.31252747774124146, "learning_rate": 0.00017316591686284422, "loss": 11.6642, "step": 34239 }, { "epoch": 0.716737838064138, "grad_norm": 0.42735761404037476, "learning_rate": 0.00017316442225997163, "loss": 11.6692, "step": 34240 }, { "epoch": 0.7167587708281001, "grad_norm": 0.26814743876457214, "learning_rate": 0.00017316292762192744, "loss": 11.6795, "step": 34241 }, { "epoch": 0.7167797035920623, "grad_norm": 0.30419179797172546, "learning_rate": 0.00017316143294871235, "loss": 11.6619, "step": 34242 }, { "epoch": 0.7168006363560244, "grad_norm": 0.2667909264564514, "learning_rate": 0.00017315993824032708, "loss": 11.6558, "step": 34243 }, { "epoch": 0.7168215691199866, "grad_norm": 0.3293270766735077, "learning_rate": 0.00017315844349677237, "loss": 11.6718, "step": 34244 }, { "epoch": 0.7168425018839487, "grad_norm": 0.2945464551448822, "learning_rate": 0.00017315694871804894, "loss": 11.6827, "step": 34245 }, { "epoch": 0.7168634346479109, "grad_norm": 0.30844637751579285, "learning_rate": 0.0001731554539041575, "loss": 11.6487, "step": 34246 }, { "epoch": 0.7168843674118731, "grad_norm": 0.26546546816825867, "learning_rate": 0.00017315395905509877, "loss": 11.6722, "step": 34247 }, { "epoch": 0.7169053001758352, "grad_norm": 0.2925609052181244, "learning_rate": 0.00017315246417087346, "loss": 11.6727, "step": 34248 }, { "epoch": 0.7169262329397974, "grad_norm": 0.30493852496147156, "learning_rate": 0.00017315096925148228, "loss": 11.6749, "step": 34249 }, { "epoch": 0.7169471657037595, "grad_norm": 0.2992052137851715, "learning_rate": 0.00017314947429692597, "loss": 11.6809, "step": 34250 }, { "epoch": 0.7169680984677217, "grad_norm": 0.2614575922489166, "learning_rate": 0.00017314797930720528, "loss": 11.6687, "step": 34251 }, { "epoch": 0.7169890312316838, "grad_norm": 0.27518683671951294, "learning_rate": 0.00017314648428232084, "loss": 11.664, "step": 34252 }, { "epoch": 0.717009963995646, "grad_norm": 0.30244430899620056, "learning_rate": 0.00017314498922227345, "loss": 11.6857, "step": 34253 }, { "epoch": 0.7170308967596082, "grad_norm": 0.2794760465621948, "learning_rate": 0.00017314349412706376, "loss": 11.6631, "step": 34254 }, { "epoch": 0.7170518295235703, "grad_norm": 0.27415090799331665, "learning_rate": 0.00017314199899669255, "loss": 11.6763, "step": 34255 }, { "epoch": 0.7170727622875325, "grad_norm": 0.23810499906539917, "learning_rate": 0.00017314050383116053, "loss": 11.6781, "step": 34256 }, { "epoch": 0.7170936950514946, "grad_norm": 0.23002181947231293, "learning_rate": 0.0001731390086304684, "loss": 11.6672, "step": 34257 }, { "epoch": 0.7171146278154568, "grad_norm": 0.3046947419643402, "learning_rate": 0.00017313751339461688, "loss": 11.6745, "step": 34258 }, { "epoch": 0.7171355605794189, "grad_norm": 0.29865530133247375, "learning_rate": 0.00017313601812360668, "loss": 11.67, "step": 34259 }, { "epoch": 0.717156493343381, "grad_norm": 0.2860887348651886, "learning_rate": 0.00017313452281743854, "loss": 11.6758, "step": 34260 }, { "epoch": 0.7171774261073433, "grad_norm": 0.25065913796424866, "learning_rate": 0.00017313302747611317, "loss": 11.6889, "step": 34261 }, { "epoch": 0.7171983588713053, "grad_norm": 0.3900470733642578, "learning_rate": 0.00017313153209963126, "loss": 11.6872, "step": 34262 }, { "epoch": 0.7172192916352675, "grad_norm": 0.2587220072746277, "learning_rate": 0.0001731300366879936, "loss": 11.6678, "step": 34263 }, { "epoch": 0.7172402243992296, "grad_norm": 0.3877573013305664, "learning_rate": 0.00017312854124120082, "loss": 11.6705, "step": 34264 }, { "epoch": 0.7172611571631918, "grad_norm": 0.23163864016532898, "learning_rate": 0.00017312704575925372, "loss": 11.6669, "step": 34265 }, { "epoch": 0.717282089927154, "grad_norm": 0.2795632779598236, "learning_rate": 0.00017312555024215297, "loss": 11.6754, "step": 34266 }, { "epoch": 0.7173030226911161, "grad_norm": 0.34806913137435913, "learning_rate": 0.00017312405468989934, "loss": 11.6473, "step": 34267 }, { "epoch": 0.7173239554550783, "grad_norm": 0.32089316844940186, "learning_rate": 0.00017312255910249347, "loss": 11.6652, "step": 34268 }, { "epoch": 0.7173448882190404, "grad_norm": 0.30470794439315796, "learning_rate": 0.00017312106347993614, "loss": 11.6774, "step": 34269 }, { "epoch": 0.7173658209830026, "grad_norm": 0.3217290937900543, "learning_rate": 0.00017311956782222802, "loss": 11.6685, "step": 34270 }, { "epoch": 0.7173867537469647, "grad_norm": 0.2838341295719147, "learning_rate": 0.00017311807212936988, "loss": 11.6713, "step": 34271 }, { "epoch": 0.7174076865109269, "grad_norm": 0.29235419631004333, "learning_rate": 0.00017311657640136242, "loss": 11.6674, "step": 34272 }, { "epoch": 0.7174286192748891, "grad_norm": 0.3208886384963989, "learning_rate": 0.00017311508063820634, "loss": 11.6661, "step": 34273 }, { "epoch": 0.7174495520388512, "grad_norm": 0.2758347690105438, "learning_rate": 0.0001731135848399024, "loss": 11.6746, "step": 34274 }, { "epoch": 0.7174704848028134, "grad_norm": 0.274372935295105, "learning_rate": 0.00017311208900645128, "loss": 11.6646, "step": 34275 }, { "epoch": 0.7174914175667755, "grad_norm": 0.2654343843460083, "learning_rate": 0.00017311059313785372, "loss": 11.6565, "step": 34276 }, { "epoch": 0.7175123503307377, "grad_norm": 0.2615987956523895, "learning_rate": 0.00017310909723411042, "loss": 11.6781, "step": 34277 }, { "epoch": 0.7175332830946998, "grad_norm": 0.24215000867843628, "learning_rate": 0.00017310760129522212, "loss": 11.6642, "step": 34278 }, { "epoch": 0.717554215858662, "grad_norm": 0.27845317125320435, "learning_rate": 0.00017310610532118953, "loss": 11.668, "step": 34279 }, { "epoch": 0.7175751486226242, "grad_norm": 0.2545529305934906, "learning_rate": 0.00017310460931201338, "loss": 11.6666, "step": 34280 }, { "epoch": 0.7175960813865863, "grad_norm": 0.29294851422309875, "learning_rate": 0.00017310311326769435, "loss": 11.6723, "step": 34281 }, { "epoch": 0.7176170141505485, "grad_norm": 0.29556766152381897, "learning_rate": 0.00017310161718823324, "loss": 11.6681, "step": 34282 }, { "epoch": 0.7176379469145105, "grad_norm": 0.2359970510005951, "learning_rate": 0.00017310012107363067, "loss": 11.6514, "step": 34283 }, { "epoch": 0.7176588796784727, "grad_norm": 0.3430598974227905, "learning_rate": 0.00017309862492388743, "loss": 11.6632, "step": 34284 }, { "epoch": 0.717679812442435, "grad_norm": 0.27511656284332275, "learning_rate": 0.0001730971287390042, "loss": 11.6705, "step": 34285 }, { "epoch": 0.717700745206397, "grad_norm": 0.4657849073410034, "learning_rate": 0.00017309563251898173, "loss": 11.6788, "step": 34286 }, { "epoch": 0.7177216779703592, "grad_norm": 0.2701103091239929, "learning_rate": 0.00017309413626382072, "loss": 11.6722, "step": 34287 }, { "epoch": 0.7177426107343213, "grad_norm": 0.265678346157074, "learning_rate": 0.0001730926399735219, "loss": 11.6694, "step": 34288 }, { "epoch": 0.7177635434982835, "grad_norm": 0.2784714698791504, "learning_rate": 0.00017309114364808597, "loss": 11.6632, "step": 34289 }, { "epoch": 0.7177844762622456, "grad_norm": 0.28464335203170776, "learning_rate": 0.00017308964728751365, "loss": 11.6662, "step": 34290 }, { "epoch": 0.7178054090262078, "grad_norm": 0.27702322602272034, "learning_rate": 0.0001730881508918057, "loss": 11.6716, "step": 34291 }, { "epoch": 0.71782634179017, "grad_norm": 0.2715129852294922, "learning_rate": 0.0001730866544609628, "loss": 11.6644, "step": 34292 }, { "epoch": 0.7178472745541321, "grad_norm": 0.3680909276008606, "learning_rate": 0.0001730851579949857, "loss": 11.6656, "step": 34293 }, { "epoch": 0.7178682073180943, "grad_norm": 0.2873697280883789, "learning_rate": 0.00017308366149387507, "loss": 11.6778, "step": 34294 }, { "epoch": 0.7178891400820564, "grad_norm": 0.2911376953125, "learning_rate": 0.00017308216495763164, "loss": 11.6635, "step": 34295 }, { "epoch": 0.7179100728460186, "grad_norm": 0.25224751234054565, "learning_rate": 0.00017308066838625622, "loss": 11.668, "step": 34296 }, { "epoch": 0.7179310056099807, "grad_norm": 0.289419561624527, "learning_rate": 0.0001730791717797494, "loss": 11.6645, "step": 34297 }, { "epoch": 0.7179519383739429, "grad_norm": 0.3247937262058258, "learning_rate": 0.00017307767513811197, "loss": 11.6509, "step": 34298 }, { "epoch": 0.7179728711379051, "grad_norm": 0.2881254255771637, "learning_rate": 0.00017307617846134464, "loss": 11.6694, "step": 34299 }, { "epoch": 0.7179938039018672, "grad_norm": 0.3053050637245178, "learning_rate": 0.00017307468174944813, "loss": 11.699, "step": 34300 }, { "epoch": 0.7180147366658294, "grad_norm": 0.2579382061958313, "learning_rate": 0.00017307318500242315, "loss": 11.6525, "step": 34301 }, { "epoch": 0.7180356694297915, "grad_norm": 0.3355395793914795, "learning_rate": 0.00017307168822027044, "loss": 11.6656, "step": 34302 }, { "epoch": 0.7180566021937537, "grad_norm": 0.3624787926673889, "learning_rate": 0.0001730701914029907, "loss": 11.6861, "step": 34303 }, { "epoch": 0.7180775349577159, "grad_norm": 0.28048548102378845, "learning_rate": 0.00017306869455058465, "loss": 11.6639, "step": 34304 }, { "epoch": 0.718098467721678, "grad_norm": 0.3062196671962738, "learning_rate": 0.00017306719766305303, "loss": 11.6683, "step": 34305 }, { "epoch": 0.7181194004856402, "grad_norm": 0.32963827252388, "learning_rate": 0.0001730657007403965, "loss": 11.681, "step": 34306 }, { "epoch": 0.7181403332496022, "grad_norm": 0.2802453637123108, "learning_rate": 0.00017306420378261587, "loss": 11.6718, "step": 34307 }, { "epoch": 0.7181612660135644, "grad_norm": 0.39298489689826965, "learning_rate": 0.0001730627067897118, "loss": 11.6718, "step": 34308 }, { "epoch": 0.7181821987775265, "grad_norm": 0.2633172571659088, "learning_rate": 0.000173061209761685, "loss": 11.6594, "step": 34309 }, { "epoch": 0.7182031315414887, "grad_norm": 0.29060518741607666, "learning_rate": 0.00017305971269853624, "loss": 11.6773, "step": 34310 }, { "epoch": 0.7182240643054509, "grad_norm": 0.2699965834617615, "learning_rate": 0.0001730582156002662, "loss": 11.664, "step": 34311 }, { "epoch": 0.718244997069413, "grad_norm": 0.2520483732223511, "learning_rate": 0.0001730567184668756, "loss": 11.6693, "step": 34312 }, { "epoch": 0.7182659298333752, "grad_norm": 0.21936024725437164, "learning_rate": 0.0001730552212983652, "loss": 11.6688, "step": 34313 }, { "epoch": 0.7182868625973373, "grad_norm": 0.2832717299461365, "learning_rate": 0.0001730537240947357, "loss": 11.6569, "step": 34314 }, { "epoch": 0.7183077953612995, "grad_norm": 0.26976731419563293, "learning_rate": 0.00017305222685598775, "loss": 11.6634, "step": 34315 }, { "epoch": 0.7183287281252616, "grad_norm": 0.28712743520736694, "learning_rate": 0.00017305072958212218, "loss": 11.6694, "step": 34316 }, { "epoch": 0.7183496608892238, "grad_norm": 0.3393356502056122, "learning_rate": 0.00017304923227313968, "loss": 11.6607, "step": 34317 }, { "epoch": 0.718370593653186, "grad_norm": 0.2651076018810272, "learning_rate": 0.0001730477349290409, "loss": 11.6659, "step": 34318 }, { "epoch": 0.7183915264171481, "grad_norm": 0.2769608795642853, "learning_rate": 0.00017304623754982662, "loss": 11.6665, "step": 34319 }, { "epoch": 0.7184124591811103, "grad_norm": 0.2877173125743866, "learning_rate": 0.00017304474013549753, "loss": 11.6786, "step": 34320 }, { "epoch": 0.7184333919450724, "grad_norm": 0.32061177492141724, "learning_rate": 0.00017304324268605444, "loss": 11.6636, "step": 34321 }, { "epoch": 0.7184543247090346, "grad_norm": 0.26710596680641174, "learning_rate": 0.00017304174520149795, "loss": 11.6702, "step": 34322 }, { "epoch": 0.7184752574729968, "grad_norm": 0.2596873641014099, "learning_rate": 0.00017304024768182883, "loss": 11.6743, "step": 34323 }, { "epoch": 0.7184961902369589, "grad_norm": 0.28204071521759033, "learning_rate": 0.0001730387501270478, "loss": 11.6656, "step": 34324 }, { "epoch": 0.7185171230009211, "grad_norm": 0.2818087339401245, "learning_rate": 0.0001730372525371556, "loss": 11.6761, "step": 34325 }, { "epoch": 0.7185380557648832, "grad_norm": 0.4098687171936035, "learning_rate": 0.0001730357549121529, "loss": 11.6815, "step": 34326 }, { "epoch": 0.7185589885288454, "grad_norm": 0.29728540778160095, "learning_rate": 0.00017303425725204047, "loss": 11.6786, "step": 34327 }, { "epoch": 0.7185799212928075, "grad_norm": 0.24526898562908173, "learning_rate": 0.000173032759556819, "loss": 11.6746, "step": 34328 }, { "epoch": 0.7186008540567697, "grad_norm": 0.3552754819393158, "learning_rate": 0.0001730312618264892, "loss": 11.6744, "step": 34329 }, { "epoch": 0.7186217868207319, "grad_norm": 0.24969011545181274, "learning_rate": 0.00017302976406105184, "loss": 11.6625, "step": 34330 }, { "epoch": 0.718642719584694, "grad_norm": 0.33847948908805847, "learning_rate": 0.0001730282662605076, "loss": 11.6682, "step": 34331 }, { "epoch": 0.7186636523486561, "grad_norm": 0.3327459394931793, "learning_rate": 0.0001730267684248572, "loss": 11.6695, "step": 34332 }, { "epoch": 0.7186845851126182, "grad_norm": 0.4298208951950073, "learning_rate": 0.00017302527055410138, "loss": 11.6546, "step": 34333 }, { "epoch": 0.7187055178765804, "grad_norm": 0.2662735879421234, "learning_rate": 0.00017302377264824084, "loss": 11.6807, "step": 34334 }, { "epoch": 0.7187264506405425, "grad_norm": 0.2768825888633728, "learning_rate": 0.00017302227470727632, "loss": 11.6697, "step": 34335 }, { "epoch": 0.7187473834045047, "grad_norm": 0.3194195330142975, "learning_rate": 0.0001730207767312085, "loss": 11.6884, "step": 34336 }, { "epoch": 0.7187683161684669, "grad_norm": 0.2878623306751251, "learning_rate": 0.00017301927872003816, "loss": 11.6913, "step": 34337 }, { "epoch": 0.718789248932429, "grad_norm": 0.2542622685432434, "learning_rate": 0.000173017780673766, "loss": 11.6716, "step": 34338 }, { "epoch": 0.7188101816963912, "grad_norm": 0.2812250852584839, "learning_rate": 0.00017301628259239272, "loss": 11.6629, "step": 34339 }, { "epoch": 0.7188311144603533, "grad_norm": 0.3886677026748657, "learning_rate": 0.00017301478447591905, "loss": 11.6778, "step": 34340 }, { "epoch": 0.7188520472243155, "grad_norm": 0.36709728837013245, "learning_rate": 0.00017301328632434568, "loss": 11.6802, "step": 34341 }, { "epoch": 0.7188729799882777, "grad_norm": 0.30061087012290955, "learning_rate": 0.00017301178813767338, "loss": 11.6635, "step": 34342 }, { "epoch": 0.7188939127522398, "grad_norm": 0.2719109058380127, "learning_rate": 0.00017301028991590283, "loss": 11.6772, "step": 34343 }, { "epoch": 0.718914845516202, "grad_norm": 0.2652686536312103, "learning_rate": 0.00017300879165903482, "loss": 11.6659, "step": 34344 }, { "epoch": 0.7189357782801641, "grad_norm": 0.2735610008239746, "learning_rate": 0.00017300729336706998, "loss": 11.6644, "step": 34345 }, { "epoch": 0.7189567110441263, "grad_norm": 0.3281702399253845, "learning_rate": 0.0001730057950400091, "loss": 11.677, "step": 34346 }, { "epoch": 0.7189776438080884, "grad_norm": 0.3180862069129944, "learning_rate": 0.00017300429667785284, "loss": 11.6798, "step": 34347 }, { "epoch": 0.7189985765720506, "grad_norm": 0.33320221304893494, "learning_rate": 0.00017300279828060195, "loss": 11.6627, "step": 34348 }, { "epoch": 0.7190195093360128, "grad_norm": 0.29008910059928894, "learning_rate": 0.00017300129984825716, "loss": 11.6734, "step": 34349 }, { "epoch": 0.7190404420999749, "grad_norm": 0.3276894688606262, "learning_rate": 0.00017299980138081922, "loss": 11.6773, "step": 34350 }, { "epoch": 0.7190613748639371, "grad_norm": 0.3419203460216522, "learning_rate": 0.00017299830287828877, "loss": 11.6829, "step": 34351 }, { "epoch": 0.7190823076278992, "grad_norm": 0.22700859606266022, "learning_rate": 0.00017299680434066658, "loss": 11.6677, "step": 34352 }, { "epoch": 0.7191032403918614, "grad_norm": 0.3129928708076477, "learning_rate": 0.00017299530576795336, "loss": 11.6706, "step": 34353 }, { "epoch": 0.7191241731558234, "grad_norm": 0.36375290155410767, "learning_rate": 0.00017299380716014986, "loss": 11.6561, "step": 34354 }, { "epoch": 0.7191451059197856, "grad_norm": 0.29822906851768494, "learning_rate": 0.00017299230851725676, "loss": 11.6677, "step": 34355 }, { "epoch": 0.7191660386837478, "grad_norm": 0.3698696494102478, "learning_rate": 0.00017299080983927478, "loss": 11.6554, "step": 34356 }, { "epoch": 0.7191869714477099, "grad_norm": 0.3068459928035736, "learning_rate": 0.00017298931112620468, "loss": 11.6549, "step": 34357 }, { "epoch": 0.7192079042116721, "grad_norm": 0.2862752079963684, "learning_rate": 0.00017298781237804715, "loss": 11.6738, "step": 34358 }, { "epoch": 0.7192288369756342, "grad_norm": 0.32288888096809387, "learning_rate": 0.00017298631359480288, "loss": 11.678, "step": 34359 }, { "epoch": 0.7192497697395964, "grad_norm": 0.2765456736087799, "learning_rate": 0.00017298481477647269, "loss": 11.662, "step": 34360 }, { "epoch": 0.7192707025035586, "grad_norm": 0.30593234300613403, "learning_rate": 0.00017298331592305717, "loss": 11.6649, "step": 34361 }, { "epoch": 0.7192916352675207, "grad_norm": 0.2589980661869049, "learning_rate": 0.00017298181703455715, "loss": 11.6671, "step": 34362 }, { "epoch": 0.7193125680314829, "grad_norm": 0.23495838046073914, "learning_rate": 0.00017298031811097331, "loss": 11.6644, "step": 34363 }, { "epoch": 0.719333500795445, "grad_norm": 0.29838287830352783, "learning_rate": 0.00017297881915230633, "loss": 11.67, "step": 34364 }, { "epoch": 0.7193544335594072, "grad_norm": 0.3500900864601135, "learning_rate": 0.000172977320158557, "loss": 11.6847, "step": 34365 }, { "epoch": 0.7193753663233693, "grad_norm": 0.30057021975517273, "learning_rate": 0.00017297582112972603, "loss": 11.6643, "step": 34366 }, { "epoch": 0.7193962990873315, "grad_norm": 0.26628243923187256, "learning_rate": 0.00017297432206581407, "loss": 11.6678, "step": 34367 }, { "epoch": 0.7194172318512937, "grad_norm": 0.283796101808548, "learning_rate": 0.0001729728229668219, "loss": 11.6734, "step": 34368 }, { "epoch": 0.7194381646152558, "grad_norm": 0.3395788073539734, "learning_rate": 0.00017297132383275026, "loss": 11.6862, "step": 34369 }, { "epoch": 0.719459097379218, "grad_norm": 0.31773608922958374, "learning_rate": 0.00017296982466359984, "loss": 11.6875, "step": 34370 }, { "epoch": 0.7194800301431801, "grad_norm": 0.2970012426376343, "learning_rate": 0.00017296832545937133, "loss": 11.6591, "step": 34371 }, { "epoch": 0.7195009629071423, "grad_norm": 0.29898393154144287, "learning_rate": 0.00017296682622006552, "loss": 11.683, "step": 34372 }, { "epoch": 0.7195218956711044, "grad_norm": 0.321074903011322, "learning_rate": 0.00017296532694568306, "loss": 11.6602, "step": 34373 }, { "epoch": 0.7195428284350666, "grad_norm": 0.3029899001121521, "learning_rate": 0.00017296382763622474, "loss": 11.6743, "step": 34374 }, { "epoch": 0.7195637611990288, "grad_norm": 0.34020382165908813, "learning_rate": 0.00017296232829169122, "loss": 11.6628, "step": 34375 }, { "epoch": 0.7195846939629909, "grad_norm": 0.3334355354309082, "learning_rate": 0.00017296082891208326, "loss": 11.6772, "step": 34376 }, { "epoch": 0.719605626726953, "grad_norm": 0.32027190923690796, "learning_rate": 0.00017295932949740157, "loss": 11.6792, "step": 34377 }, { "epoch": 0.7196265594909151, "grad_norm": 0.2696648836135864, "learning_rate": 0.00017295783004764689, "loss": 11.6741, "step": 34378 }, { "epoch": 0.7196474922548773, "grad_norm": 0.3129774034023285, "learning_rate": 0.00017295633056281986, "loss": 11.672, "step": 34379 }, { "epoch": 0.7196684250188395, "grad_norm": 0.2690247595310211, "learning_rate": 0.00017295483104292128, "loss": 11.6631, "step": 34380 }, { "epoch": 0.7196893577828016, "grad_norm": 0.305380254983902, "learning_rate": 0.00017295333148795188, "loss": 11.688, "step": 34381 }, { "epoch": 0.7197102905467638, "grad_norm": 0.30389463901519775, "learning_rate": 0.00017295183189791234, "loss": 11.6659, "step": 34382 }, { "epoch": 0.7197312233107259, "grad_norm": 0.38288015127182007, "learning_rate": 0.00017295033227280338, "loss": 11.6652, "step": 34383 }, { "epoch": 0.7197521560746881, "grad_norm": 0.3609132468700409, "learning_rate": 0.00017294883261262573, "loss": 11.6739, "step": 34384 }, { "epoch": 0.7197730888386502, "grad_norm": 0.29395100474357605, "learning_rate": 0.00017294733291738012, "loss": 11.6682, "step": 34385 }, { "epoch": 0.7197940216026124, "grad_norm": 0.2967107892036438, "learning_rate": 0.00017294583318706728, "loss": 11.655, "step": 34386 }, { "epoch": 0.7198149543665746, "grad_norm": 0.30238643288612366, "learning_rate": 0.0001729443334216879, "loss": 11.6785, "step": 34387 }, { "epoch": 0.7198358871305367, "grad_norm": 0.3139341175556183, "learning_rate": 0.00017294283362124274, "loss": 11.6758, "step": 34388 }, { "epoch": 0.7198568198944989, "grad_norm": 0.30289575457572937, "learning_rate": 0.00017294133378573245, "loss": 11.6819, "step": 34389 }, { "epoch": 0.719877752658461, "grad_norm": 0.32914629578590393, "learning_rate": 0.00017293983391515784, "loss": 11.6751, "step": 34390 }, { "epoch": 0.7198986854224232, "grad_norm": 0.2888195216655731, "learning_rate": 0.0001729383340095196, "loss": 11.6766, "step": 34391 }, { "epoch": 0.7199196181863853, "grad_norm": 0.29181405901908875, "learning_rate": 0.0001729368340688184, "loss": 11.6795, "step": 34392 }, { "epoch": 0.7199405509503475, "grad_norm": 0.292477548122406, "learning_rate": 0.00017293533409305502, "loss": 11.675, "step": 34393 }, { "epoch": 0.7199614837143097, "grad_norm": 0.3478836715221405, "learning_rate": 0.00017293383408223018, "loss": 11.6529, "step": 34394 }, { "epoch": 0.7199824164782718, "grad_norm": 0.3624795079231262, "learning_rate": 0.00017293233403634455, "loss": 11.6609, "step": 34395 }, { "epoch": 0.720003349242234, "grad_norm": 0.26901066303253174, "learning_rate": 0.00017293083395539892, "loss": 11.67, "step": 34396 }, { "epoch": 0.7200242820061961, "grad_norm": 0.267090767621994, "learning_rate": 0.00017292933383939395, "loss": 11.6756, "step": 34397 }, { "epoch": 0.7200452147701583, "grad_norm": 0.32781919836997986, "learning_rate": 0.0001729278336883304, "loss": 11.6656, "step": 34398 }, { "epoch": 0.7200661475341205, "grad_norm": 0.28584304451942444, "learning_rate": 0.00017292633350220898, "loss": 11.6882, "step": 34399 }, { "epoch": 0.7200870802980825, "grad_norm": 0.2984316051006317, "learning_rate": 0.0001729248332810304, "loss": 11.6419, "step": 34400 }, { "epoch": 0.7201080130620447, "grad_norm": 0.29800280928611755, "learning_rate": 0.00017292333302479541, "loss": 11.6809, "step": 34401 }, { "epoch": 0.7201289458260068, "grad_norm": 0.27727147936820984, "learning_rate": 0.00017292183273350472, "loss": 11.6858, "step": 34402 }, { "epoch": 0.720149878589969, "grad_norm": 0.31141194701194763, "learning_rate": 0.00017292033240715903, "loss": 11.6762, "step": 34403 }, { "epoch": 0.7201708113539311, "grad_norm": 0.30509307980537415, "learning_rate": 0.00017291883204575905, "loss": 11.6724, "step": 34404 }, { "epoch": 0.7201917441178933, "grad_norm": 0.27559220790863037, "learning_rate": 0.00017291733164930554, "loss": 11.67, "step": 34405 }, { "epoch": 0.7202126768818555, "grad_norm": 0.2707788050174713, "learning_rate": 0.00017291583121779923, "loss": 11.6584, "step": 34406 }, { "epoch": 0.7202336096458176, "grad_norm": 0.2782944440841675, "learning_rate": 0.00017291433075124083, "loss": 11.6554, "step": 34407 }, { "epoch": 0.7202545424097798, "grad_norm": 0.3011532127857208, "learning_rate": 0.000172912830249631, "loss": 11.6501, "step": 34408 }, { "epoch": 0.7202754751737419, "grad_norm": 0.24562497437000275, "learning_rate": 0.00017291132971297053, "loss": 11.6859, "step": 34409 }, { "epoch": 0.7202964079377041, "grad_norm": 0.34058114886283875, "learning_rate": 0.00017290982914126013, "loss": 11.6601, "step": 34410 }, { "epoch": 0.7203173407016662, "grad_norm": 0.24776361882686615, "learning_rate": 0.0001729083285345005, "loss": 11.6593, "step": 34411 }, { "epoch": 0.7203382734656284, "grad_norm": 0.3139081299304962, "learning_rate": 0.0001729068278926924, "loss": 11.678, "step": 34412 }, { "epoch": 0.7203592062295906, "grad_norm": 0.2885602116584778, "learning_rate": 0.0001729053272158365, "loss": 11.6602, "step": 34413 }, { "epoch": 0.7203801389935527, "grad_norm": 0.38476356863975525, "learning_rate": 0.00017290382650393357, "loss": 11.6868, "step": 34414 }, { "epoch": 0.7204010717575149, "grad_norm": 0.2435634732246399, "learning_rate": 0.0001729023257569843, "loss": 11.6643, "step": 34415 }, { "epoch": 0.720422004521477, "grad_norm": 0.3381974697113037, "learning_rate": 0.00017290082497498942, "loss": 11.6677, "step": 34416 }, { "epoch": 0.7204429372854392, "grad_norm": 0.2652340233325958, "learning_rate": 0.00017289932415794965, "loss": 11.6708, "step": 34417 }, { "epoch": 0.7204638700494014, "grad_norm": 0.3364638090133667, "learning_rate": 0.00017289782330586574, "loss": 11.6832, "step": 34418 }, { "epoch": 0.7204848028133635, "grad_norm": 0.395254522562027, "learning_rate": 0.00017289632241873835, "loss": 11.6736, "step": 34419 }, { "epoch": 0.7205057355773257, "grad_norm": 0.28096234798431396, "learning_rate": 0.00017289482149656827, "loss": 11.6685, "step": 34420 }, { "epoch": 0.7205266683412878, "grad_norm": 0.3134285509586334, "learning_rate": 0.00017289332053935615, "loss": 11.6853, "step": 34421 }, { "epoch": 0.72054760110525, "grad_norm": 0.32238316535949707, "learning_rate": 0.00017289181954710276, "loss": 11.6815, "step": 34422 }, { "epoch": 0.720568533869212, "grad_norm": 0.2670676112174988, "learning_rate": 0.00017289031851980884, "loss": 11.6715, "step": 34423 }, { "epoch": 0.7205894666331742, "grad_norm": 0.36304864287376404, "learning_rate": 0.00017288881745747506, "loss": 11.6738, "step": 34424 }, { "epoch": 0.7206103993971364, "grad_norm": 0.29623815417289734, "learning_rate": 0.00017288731636010216, "loss": 11.6743, "step": 34425 }, { "epoch": 0.7206313321610985, "grad_norm": 0.3034379780292511, "learning_rate": 0.00017288581522769086, "loss": 11.6613, "step": 34426 }, { "epoch": 0.7206522649250607, "grad_norm": 0.32618170976638794, "learning_rate": 0.00017288431406024192, "loss": 11.6616, "step": 34427 }, { "epoch": 0.7206731976890228, "grad_norm": 0.36928895115852356, "learning_rate": 0.00017288281285775603, "loss": 11.6712, "step": 34428 }, { "epoch": 0.720694130452985, "grad_norm": 0.3397524058818817, "learning_rate": 0.00017288131162023388, "loss": 11.6521, "step": 34429 }, { "epoch": 0.7207150632169471, "grad_norm": 0.27867117524147034, "learning_rate": 0.00017287981034767622, "loss": 11.6728, "step": 34430 }, { "epoch": 0.7207359959809093, "grad_norm": 0.31209951639175415, "learning_rate": 0.0001728783090400838, "loss": 11.678, "step": 34431 }, { "epoch": 0.7207569287448715, "grad_norm": 0.26052001118659973, "learning_rate": 0.00017287680769745731, "loss": 11.6698, "step": 34432 }, { "epoch": 0.7207778615088336, "grad_norm": 0.37217801809310913, "learning_rate": 0.00017287530631979748, "loss": 11.6802, "step": 34433 }, { "epoch": 0.7207987942727958, "grad_norm": 0.28452160954475403, "learning_rate": 0.00017287380490710503, "loss": 11.6655, "step": 34434 }, { "epoch": 0.7208197270367579, "grad_norm": 0.27825698256492615, "learning_rate": 0.0001728723034593807, "loss": 11.6804, "step": 34435 }, { "epoch": 0.7208406598007201, "grad_norm": 0.3175612986087799, "learning_rate": 0.00017287080197662515, "loss": 11.652, "step": 34436 }, { "epoch": 0.7208615925646822, "grad_norm": 0.3845698833465576, "learning_rate": 0.00017286930045883915, "loss": 11.6495, "step": 34437 }, { "epoch": 0.7208825253286444, "grad_norm": 0.26088830828666687, "learning_rate": 0.00017286779890602345, "loss": 11.6696, "step": 34438 }, { "epoch": 0.7209034580926066, "grad_norm": 0.3705897331237793, "learning_rate": 0.0001728662973181787, "loss": 11.6679, "step": 34439 }, { "epoch": 0.7209243908565687, "grad_norm": 0.25179725885391235, "learning_rate": 0.00017286479569530568, "loss": 11.6635, "step": 34440 }, { "epoch": 0.7209453236205309, "grad_norm": 0.33159470558166504, "learning_rate": 0.00017286329403740512, "loss": 11.6772, "step": 34441 }, { "epoch": 0.720966256384493, "grad_norm": 0.27059486508369446, "learning_rate": 0.0001728617923444777, "loss": 11.6652, "step": 34442 }, { "epoch": 0.7209871891484552, "grad_norm": 0.23861245810985565, "learning_rate": 0.0001728602906165241, "loss": 11.6693, "step": 34443 }, { "epoch": 0.7210081219124174, "grad_norm": 0.3097482919692993, "learning_rate": 0.00017285878885354517, "loss": 11.6695, "step": 34444 }, { "epoch": 0.7210290546763795, "grad_norm": 0.26054391264915466, "learning_rate": 0.00017285728705554153, "loss": 11.667, "step": 34445 }, { "epoch": 0.7210499874403417, "grad_norm": 0.366036593914032, "learning_rate": 0.00017285578522251393, "loss": 11.6984, "step": 34446 }, { "epoch": 0.7210709202043037, "grad_norm": 0.338095486164093, "learning_rate": 0.0001728542833544631, "loss": 11.666, "step": 34447 }, { "epoch": 0.721091852968266, "grad_norm": 0.28455764055252075, "learning_rate": 0.00017285278145138974, "loss": 11.6612, "step": 34448 }, { "epoch": 0.721112785732228, "grad_norm": 0.28794658184051514, "learning_rate": 0.00017285127951329463, "loss": 11.651, "step": 34449 }, { "epoch": 0.7211337184961902, "grad_norm": 0.26795604825019836, "learning_rate": 0.0001728497775401784, "loss": 11.6833, "step": 34450 }, { "epoch": 0.7211546512601524, "grad_norm": 0.2644071877002716, "learning_rate": 0.0001728482755320419, "loss": 11.6738, "step": 34451 }, { "epoch": 0.7211755840241145, "grad_norm": 0.30704203248023987, "learning_rate": 0.00017284677348888567, "loss": 11.6821, "step": 34452 }, { "epoch": 0.7211965167880767, "grad_norm": 0.2602076232433319, "learning_rate": 0.00017284527141071059, "loss": 11.6728, "step": 34453 }, { "epoch": 0.7212174495520388, "grad_norm": 0.337258517742157, "learning_rate": 0.00017284376929751734, "loss": 11.6718, "step": 34454 }, { "epoch": 0.721238382316001, "grad_norm": 0.30927151441574097, "learning_rate": 0.00017284226714930662, "loss": 11.6726, "step": 34455 }, { "epoch": 0.7212593150799631, "grad_norm": 0.3068159520626068, "learning_rate": 0.00017284076496607915, "loss": 11.6552, "step": 34456 }, { "epoch": 0.7212802478439253, "grad_norm": 0.2674466371536255, "learning_rate": 0.00017283926274783569, "loss": 11.6573, "step": 34457 }, { "epoch": 0.7213011806078875, "grad_norm": 0.30883970856666565, "learning_rate": 0.00017283776049457688, "loss": 11.6799, "step": 34458 }, { "epoch": 0.7213221133718496, "grad_norm": 0.3636738955974579, "learning_rate": 0.00017283625820630353, "loss": 11.6595, "step": 34459 }, { "epoch": 0.7213430461358118, "grad_norm": 0.25219929218292236, "learning_rate": 0.00017283475588301635, "loss": 11.6654, "step": 34460 }, { "epoch": 0.7213639788997739, "grad_norm": 0.3362619876861572, "learning_rate": 0.00017283325352471604, "loss": 11.6674, "step": 34461 }, { "epoch": 0.7213849116637361, "grad_norm": 0.30426907539367676, "learning_rate": 0.0001728317511314033, "loss": 11.6667, "step": 34462 }, { "epoch": 0.7214058444276983, "grad_norm": 0.3163401782512665, "learning_rate": 0.0001728302487030789, "loss": 11.6938, "step": 34463 }, { "epoch": 0.7214267771916604, "grad_norm": 0.3454127609729767, "learning_rate": 0.00017282874623974353, "loss": 11.6798, "step": 34464 }, { "epoch": 0.7214477099556226, "grad_norm": 0.28353607654571533, "learning_rate": 0.00017282724374139793, "loss": 11.6743, "step": 34465 }, { "epoch": 0.7214686427195847, "grad_norm": 0.2770177721977234, "learning_rate": 0.00017282574120804282, "loss": 11.6643, "step": 34466 }, { "epoch": 0.7214895754835469, "grad_norm": 0.35287806391716003, "learning_rate": 0.00017282423863967892, "loss": 11.6778, "step": 34467 }, { "epoch": 0.721510508247509, "grad_norm": 0.3126088082790375, "learning_rate": 0.00017282273603630693, "loss": 11.6926, "step": 34468 }, { "epoch": 0.7215314410114712, "grad_norm": 0.2489648461341858, "learning_rate": 0.0001728212333979276, "loss": 11.6704, "step": 34469 }, { "epoch": 0.7215523737754334, "grad_norm": 0.2873033881187439, "learning_rate": 0.00017281973072454164, "loss": 11.6635, "step": 34470 }, { "epoch": 0.7215733065393954, "grad_norm": 0.33828580379486084, "learning_rate": 0.00017281822801614976, "loss": 11.6688, "step": 34471 }, { "epoch": 0.7215942393033576, "grad_norm": 0.3399020731449127, "learning_rate": 0.00017281672527275272, "loss": 11.6858, "step": 34472 }, { "epoch": 0.7216151720673197, "grad_norm": 0.29982078075408936, "learning_rate": 0.00017281522249435122, "loss": 11.6626, "step": 34473 }, { "epoch": 0.7216361048312819, "grad_norm": 0.28550243377685547, "learning_rate": 0.00017281371968094598, "loss": 11.6706, "step": 34474 }, { "epoch": 0.721657037595244, "grad_norm": 0.3704497218132019, "learning_rate": 0.0001728122168325377, "loss": 11.6577, "step": 34475 }, { "epoch": 0.7216779703592062, "grad_norm": 0.23762711882591248, "learning_rate": 0.0001728107139491272, "loss": 11.6714, "step": 34476 }, { "epoch": 0.7216989031231684, "grad_norm": 0.2617037892341614, "learning_rate": 0.00017280921103071507, "loss": 11.6726, "step": 34477 }, { "epoch": 0.7217198358871305, "grad_norm": 0.30896949768066406, "learning_rate": 0.00017280770807730211, "loss": 11.6766, "step": 34478 }, { "epoch": 0.7217407686510927, "grad_norm": 0.3548426926136017, "learning_rate": 0.000172806205088889, "loss": 11.6649, "step": 34479 }, { "epoch": 0.7217617014150548, "grad_norm": 0.38022249937057495, "learning_rate": 0.0001728047020654765, "loss": 11.684, "step": 34480 }, { "epoch": 0.721782634179017, "grad_norm": 0.2912936806678772, "learning_rate": 0.00017280319900706538, "loss": 11.6739, "step": 34481 }, { "epoch": 0.7218035669429792, "grad_norm": 0.308943510055542, "learning_rate": 0.00017280169591365625, "loss": 11.6505, "step": 34482 }, { "epoch": 0.7218244997069413, "grad_norm": 0.320269912481308, "learning_rate": 0.0001728001927852499, "loss": 11.6615, "step": 34483 }, { "epoch": 0.7218454324709035, "grad_norm": 0.2716921269893646, "learning_rate": 0.000172798689621847, "loss": 11.6666, "step": 34484 }, { "epoch": 0.7218663652348656, "grad_norm": 0.3021918535232544, "learning_rate": 0.00017279718642344833, "loss": 11.6795, "step": 34485 }, { "epoch": 0.7218872979988278, "grad_norm": 0.3259652256965637, "learning_rate": 0.0001727956831900546, "loss": 11.668, "step": 34486 }, { "epoch": 0.7219082307627899, "grad_norm": 0.2982328236103058, "learning_rate": 0.00017279417992166655, "loss": 11.6754, "step": 34487 }, { "epoch": 0.7219291635267521, "grad_norm": 0.30099964141845703, "learning_rate": 0.00017279267661828487, "loss": 11.6663, "step": 34488 }, { "epoch": 0.7219500962907143, "grad_norm": 0.2916281223297119, "learning_rate": 0.00017279117327991027, "loss": 11.6825, "step": 34489 }, { "epoch": 0.7219710290546764, "grad_norm": 0.44658154249191284, "learning_rate": 0.00017278966990654353, "loss": 11.6888, "step": 34490 }, { "epoch": 0.7219919618186386, "grad_norm": 0.3567885458469391, "learning_rate": 0.00017278816649818527, "loss": 11.6604, "step": 34491 }, { "epoch": 0.7220128945826007, "grad_norm": 0.2938884198665619, "learning_rate": 0.00017278666305483635, "loss": 11.6729, "step": 34492 }, { "epoch": 0.7220338273465629, "grad_norm": 0.24470484256744385, "learning_rate": 0.0001727851595764974, "loss": 11.6686, "step": 34493 }, { "epoch": 0.7220547601105249, "grad_norm": 0.28478240966796875, "learning_rate": 0.00017278365606316915, "loss": 11.6574, "step": 34494 }, { "epoch": 0.7220756928744871, "grad_norm": 0.26375484466552734, "learning_rate": 0.00017278215251485235, "loss": 11.659, "step": 34495 }, { "epoch": 0.7220966256384493, "grad_norm": 0.26314452290534973, "learning_rate": 0.0001727806489315477, "loss": 11.6502, "step": 34496 }, { "epoch": 0.7221175584024114, "grad_norm": 0.3178071677684784, "learning_rate": 0.00017277914531325594, "loss": 11.6727, "step": 34497 }, { "epoch": 0.7221384911663736, "grad_norm": 0.31307151913642883, "learning_rate": 0.00017277764165997777, "loss": 11.6713, "step": 34498 }, { "epoch": 0.7221594239303357, "grad_norm": 0.29599764943122864, "learning_rate": 0.00017277613797171397, "loss": 11.6513, "step": 34499 }, { "epoch": 0.7221803566942979, "grad_norm": 0.31136012077331543, "learning_rate": 0.0001727746342484652, "loss": 11.6645, "step": 34500 }, { "epoch": 0.7222012894582601, "grad_norm": 0.275957852602005, "learning_rate": 0.0001727731304902322, "loss": 11.6716, "step": 34501 }, { "epoch": 0.7222222222222222, "grad_norm": 0.21348440647125244, "learning_rate": 0.0001727716266970157, "loss": 11.6691, "step": 34502 }, { "epoch": 0.7222431549861844, "grad_norm": 0.3075391352176666, "learning_rate": 0.00017277012286881643, "loss": 11.6783, "step": 34503 }, { "epoch": 0.7222640877501465, "grad_norm": 0.2447095364332199, "learning_rate": 0.0001727686190056351, "loss": 11.6568, "step": 34504 }, { "epoch": 0.7222850205141087, "grad_norm": 0.30677780508995056, "learning_rate": 0.00017276711510747243, "loss": 11.6757, "step": 34505 }, { "epoch": 0.7223059532780708, "grad_norm": 0.26282593607902527, "learning_rate": 0.00017276561117432918, "loss": 11.6638, "step": 34506 }, { "epoch": 0.722326886042033, "grad_norm": 0.26328548789024353, "learning_rate": 0.00017276410720620596, "loss": 11.6652, "step": 34507 }, { "epoch": 0.7223478188059952, "grad_norm": 0.2978379428386688, "learning_rate": 0.00017276260320310365, "loss": 11.6621, "step": 34508 }, { "epoch": 0.7223687515699573, "grad_norm": 0.24599012732505798, "learning_rate": 0.0001727610991650229, "loss": 11.661, "step": 34509 }, { "epoch": 0.7223896843339195, "grad_norm": 0.3455601930618286, "learning_rate": 0.0001727595950919644, "loss": 11.672, "step": 34510 }, { "epoch": 0.7224106170978816, "grad_norm": 0.27475661039352417, "learning_rate": 0.0001727580909839289, "loss": 11.6539, "step": 34511 }, { "epoch": 0.7224315498618438, "grad_norm": 0.32895368337631226, "learning_rate": 0.00017275658684091715, "loss": 11.6562, "step": 34512 }, { "epoch": 0.7224524826258059, "grad_norm": 0.2458905428647995, "learning_rate": 0.00017275508266292983, "loss": 11.6723, "step": 34513 }, { "epoch": 0.7224734153897681, "grad_norm": 0.27974769473075867, "learning_rate": 0.0001727535784499677, "loss": 11.667, "step": 34514 }, { "epoch": 0.7224943481537303, "grad_norm": 0.38364481925964355, "learning_rate": 0.00017275207420203148, "loss": 11.6882, "step": 34515 }, { "epoch": 0.7225152809176923, "grad_norm": 0.3140445351600647, "learning_rate": 0.00017275056991912188, "loss": 11.6606, "step": 34516 }, { "epoch": 0.7225362136816545, "grad_norm": 0.5305556654930115, "learning_rate": 0.0001727490656012396, "loss": 11.7069, "step": 34517 }, { "epoch": 0.7225571464456166, "grad_norm": 0.370481938123703, "learning_rate": 0.00017274756124838536, "loss": 11.6666, "step": 34518 }, { "epoch": 0.7225780792095788, "grad_norm": 0.33974745869636536, "learning_rate": 0.00017274605686055995, "loss": 11.6718, "step": 34519 }, { "epoch": 0.722599011973541, "grad_norm": 0.26550355553627014, "learning_rate": 0.00017274455243776403, "loss": 11.677, "step": 34520 }, { "epoch": 0.7226199447375031, "grad_norm": 0.30566924810409546, "learning_rate": 0.0001727430479799984, "loss": 11.6538, "step": 34521 }, { "epoch": 0.7226408775014653, "grad_norm": 0.35173383355140686, "learning_rate": 0.00017274154348726368, "loss": 11.6586, "step": 34522 }, { "epoch": 0.7226618102654274, "grad_norm": 0.37295445799827576, "learning_rate": 0.00017274003895956062, "loss": 11.6652, "step": 34523 }, { "epoch": 0.7226827430293896, "grad_norm": 0.4455729126930237, "learning_rate": 0.00017273853439689002, "loss": 11.6732, "step": 34524 }, { "epoch": 0.7227036757933517, "grad_norm": 0.4535079896450043, "learning_rate": 0.0001727370297992525, "loss": 11.6734, "step": 34525 }, { "epoch": 0.7227246085573139, "grad_norm": 0.27044129371643066, "learning_rate": 0.0001727355251666489, "loss": 11.6533, "step": 34526 }, { "epoch": 0.7227455413212761, "grad_norm": 0.2983216941356659, "learning_rate": 0.00017273402049907983, "loss": 11.6657, "step": 34527 }, { "epoch": 0.7227664740852382, "grad_norm": 0.3102913796901703, "learning_rate": 0.00017273251579654606, "loss": 11.6616, "step": 34528 }, { "epoch": 0.7227874068492004, "grad_norm": 0.3788222074508667, "learning_rate": 0.00017273101105904831, "loss": 11.6696, "step": 34529 }, { "epoch": 0.7228083396131625, "grad_norm": 0.31173649430274963, "learning_rate": 0.00017272950628658733, "loss": 11.6822, "step": 34530 }, { "epoch": 0.7228292723771247, "grad_norm": 0.3125397562980652, "learning_rate": 0.0001727280014791638, "loss": 11.6863, "step": 34531 }, { "epoch": 0.7228502051410868, "grad_norm": 0.30057990550994873, "learning_rate": 0.00017272649663677844, "loss": 11.6748, "step": 34532 }, { "epoch": 0.722871137905049, "grad_norm": 0.2607399523258209, "learning_rate": 0.00017272499175943203, "loss": 11.6512, "step": 34533 }, { "epoch": 0.7228920706690112, "grad_norm": 0.40406984090805054, "learning_rate": 0.00017272348684712526, "loss": 11.6741, "step": 34534 }, { "epoch": 0.7229130034329733, "grad_norm": 0.24872766435146332, "learning_rate": 0.0001727219818998588, "loss": 11.6578, "step": 34535 }, { "epoch": 0.7229339361969355, "grad_norm": 0.3711431920528412, "learning_rate": 0.00017272047691763346, "loss": 11.6801, "step": 34536 }, { "epoch": 0.7229548689608976, "grad_norm": 0.2796802222728729, "learning_rate": 0.00017271897190044997, "loss": 11.6679, "step": 34537 }, { "epoch": 0.7229758017248598, "grad_norm": 0.3098628520965576, "learning_rate": 0.00017271746684830897, "loss": 11.6703, "step": 34538 }, { "epoch": 0.722996734488822, "grad_norm": 0.25741997361183167, "learning_rate": 0.00017271596176121121, "loss": 11.6782, "step": 34539 }, { "epoch": 0.723017667252784, "grad_norm": 0.34311598539352417, "learning_rate": 0.00017271445663915745, "loss": 11.6819, "step": 34540 }, { "epoch": 0.7230386000167462, "grad_norm": 0.3393406569957733, "learning_rate": 0.0001727129514821484, "loss": 11.6647, "step": 34541 }, { "epoch": 0.7230595327807083, "grad_norm": 0.3205333352088928, "learning_rate": 0.00017271144629018477, "loss": 11.6747, "step": 34542 }, { "epoch": 0.7230804655446705, "grad_norm": 0.30609047412872314, "learning_rate": 0.00017270994106326733, "loss": 11.6733, "step": 34543 }, { "epoch": 0.7231013983086326, "grad_norm": 0.36797723174095154, "learning_rate": 0.0001727084358013967, "loss": 11.6793, "step": 34544 }, { "epoch": 0.7231223310725948, "grad_norm": 0.221498042345047, "learning_rate": 0.00017270693050457368, "loss": 11.6487, "step": 34545 }, { "epoch": 0.723143263836557, "grad_norm": 0.3361455798149109, "learning_rate": 0.000172705425172799, "loss": 11.6771, "step": 34546 }, { "epoch": 0.7231641966005191, "grad_norm": 0.22284553945064545, "learning_rate": 0.0001727039198060734, "loss": 11.6784, "step": 34547 }, { "epoch": 0.7231851293644813, "grad_norm": 0.36468762159347534, "learning_rate": 0.00017270241440439752, "loss": 11.6814, "step": 34548 }, { "epoch": 0.7232060621284434, "grad_norm": 0.2784726023674011, "learning_rate": 0.00017270090896777217, "loss": 11.6787, "step": 34549 }, { "epoch": 0.7232269948924056, "grad_norm": 0.27774566411972046, "learning_rate": 0.000172699403496198, "loss": 11.6778, "step": 34550 }, { "epoch": 0.7232479276563677, "grad_norm": 0.26077213883399963, "learning_rate": 0.00017269789798967576, "loss": 11.673, "step": 34551 }, { "epoch": 0.7232688604203299, "grad_norm": 0.2915979027748108, "learning_rate": 0.00017269639244820622, "loss": 11.6627, "step": 34552 }, { "epoch": 0.7232897931842921, "grad_norm": 0.26895228028297424, "learning_rate": 0.00017269488687179002, "loss": 11.6587, "step": 34553 }, { "epoch": 0.7233107259482542, "grad_norm": 0.5418742895126343, "learning_rate": 0.00017269338126042798, "loss": 11.6538, "step": 34554 }, { "epoch": 0.7233316587122164, "grad_norm": 0.2732848823070526, "learning_rate": 0.00017269187561412075, "loss": 11.6718, "step": 34555 }, { "epoch": 0.7233525914761785, "grad_norm": 0.4347127676010132, "learning_rate": 0.00017269036993286906, "loss": 11.6673, "step": 34556 }, { "epoch": 0.7233735242401407, "grad_norm": 0.3634011149406433, "learning_rate": 0.0001726888642166737, "loss": 11.6645, "step": 34557 }, { "epoch": 0.7233944570041029, "grad_norm": 0.25030481815338135, "learning_rate": 0.00017268735846553534, "loss": 11.6629, "step": 34558 }, { "epoch": 0.723415389768065, "grad_norm": 0.3290247619152069, "learning_rate": 0.00017268585267945466, "loss": 11.6845, "step": 34559 }, { "epoch": 0.7234363225320272, "grad_norm": 0.24542106688022614, "learning_rate": 0.00017268434685843248, "loss": 11.6624, "step": 34560 }, { "epoch": 0.7234572552959893, "grad_norm": 0.2696336507797241, "learning_rate": 0.00017268284100246947, "loss": 11.6925, "step": 34561 }, { "epoch": 0.7234781880599515, "grad_norm": 0.31336525082588196, "learning_rate": 0.00017268133511156637, "loss": 11.6843, "step": 34562 }, { "epoch": 0.7234991208239135, "grad_norm": 0.25545912981033325, "learning_rate": 0.00017267982918572387, "loss": 11.6769, "step": 34563 }, { "epoch": 0.7235200535878757, "grad_norm": 0.31013646721839905, "learning_rate": 0.00017267832322494274, "loss": 11.6743, "step": 34564 }, { "epoch": 0.723540986351838, "grad_norm": 0.2587176263332367, "learning_rate": 0.0001726768172292237, "loss": 11.6661, "step": 34565 }, { "epoch": 0.7235619191158, "grad_norm": 0.3107187747955322, "learning_rate": 0.0001726753111985674, "loss": 11.6575, "step": 34566 }, { "epoch": 0.7235828518797622, "grad_norm": 0.29904112219810486, "learning_rate": 0.00017267380513297467, "loss": 11.6739, "step": 34567 }, { "epoch": 0.7236037846437243, "grad_norm": 0.22008618712425232, "learning_rate": 0.00017267229903244617, "loss": 11.6772, "step": 34568 }, { "epoch": 0.7236247174076865, "grad_norm": 0.3456976115703583, "learning_rate": 0.00017267079289698262, "loss": 11.6547, "step": 34569 }, { "epoch": 0.7236456501716486, "grad_norm": 0.2784223258495331, "learning_rate": 0.0001726692867265848, "loss": 11.6729, "step": 34570 }, { "epoch": 0.7236665829356108, "grad_norm": 0.3030458390712738, "learning_rate": 0.00017266778052125336, "loss": 11.67, "step": 34571 }, { "epoch": 0.723687515699573, "grad_norm": 0.25081366300582886, "learning_rate": 0.00017266627428098912, "loss": 11.6681, "step": 34572 }, { "epoch": 0.7237084484635351, "grad_norm": 0.23788025975227356, "learning_rate": 0.00017266476800579267, "loss": 11.6636, "step": 34573 }, { "epoch": 0.7237293812274973, "grad_norm": 0.28326407074928284, "learning_rate": 0.00017266326169566488, "loss": 11.6697, "step": 34574 }, { "epoch": 0.7237503139914594, "grad_norm": 0.3601617217063904, "learning_rate": 0.00017266175535060637, "loss": 11.682, "step": 34575 }, { "epoch": 0.7237712467554216, "grad_norm": 0.3203168213367462, "learning_rate": 0.0001726602489706179, "loss": 11.6824, "step": 34576 }, { "epoch": 0.7237921795193838, "grad_norm": 0.2937350571155548, "learning_rate": 0.00017265874255570018, "loss": 11.6564, "step": 34577 }, { "epoch": 0.7238131122833459, "grad_norm": 0.2810337543487549, "learning_rate": 0.00017265723610585397, "loss": 11.6765, "step": 34578 }, { "epoch": 0.7238340450473081, "grad_norm": 0.2755001485347748, "learning_rate": 0.00017265572962107996, "loss": 11.6693, "step": 34579 }, { "epoch": 0.7238549778112702, "grad_norm": 0.3082193434238434, "learning_rate": 0.00017265422310137887, "loss": 11.6793, "step": 34580 }, { "epoch": 0.7238759105752324, "grad_norm": 0.30634063482284546, "learning_rate": 0.00017265271654675146, "loss": 11.6843, "step": 34581 }, { "epoch": 0.7238968433391945, "grad_norm": 0.3034479022026062, "learning_rate": 0.00017265120995719842, "loss": 11.6571, "step": 34582 }, { "epoch": 0.7239177761031567, "grad_norm": 0.22103051841259003, "learning_rate": 0.0001726497033327205, "loss": 11.682, "step": 34583 }, { "epoch": 0.7239387088671189, "grad_norm": 0.2684937119483948, "learning_rate": 0.0001726481966733184, "loss": 11.6658, "step": 34584 }, { "epoch": 0.723959641631081, "grad_norm": 0.26441773772239685, "learning_rate": 0.00017264668997899287, "loss": 11.6687, "step": 34585 }, { "epoch": 0.7239805743950432, "grad_norm": 0.29512694478034973, "learning_rate": 0.00017264518324974461, "loss": 11.6596, "step": 34586 }, { "epoch": 0.7240015071590052, "grad_norm": 0.27505427598953247, "learning_rate": 0.00017264367648557437, "loss": 11.6734, "step": 34587 }, { "epoch": 0.7240224399229674, "grad_norm": 0.2939107120037079, "learning_rate": 0.00017264216968648283, "loss": 11.6953, "step": 34588 }, { "epoch": 0.7240433726869295, "grad_norm": 0.3362225592136383, "learning_rate": 0.00017264066285247075, "loss": 11.6869, "step": 34589 }, { "epoch": 0.7240643054508917, "grad_norm": 0.3205176591873169, "learning_rate": 0.00017263915598353887, "loss": 11.663, "step": 34590 }, { "epoch": 0.7240852382148539, "grad_norm": 0.2670627534389496, "learning_rate": 0.00017263764907968787, "loss": 11.6464, "step": 34591 }, { "epoch": 0.724106170978816, "grad_norm": 0.4067031443119049, "learning_rate": 0.0001726361421409185, "loss": 11.6677, "step": 34592 }, { "epoch": 0.7241271037427782, "grad_norm": 0.28132590651512146, "learning_rate": 0.0001726346351672315, "loss": 11.6726, "step": 34593 }, { "epoch": 0.7241480365067403, "grad_norm": 0.28987860679626465, "learning_rate": 0.00017263312815862756, "loss": 11.6503, "step": 34594 }, { "epoch": 0.7241689692707025, "grad_norm": 0.2887553870677948, "learning_rate": 0.0001726316211151074, "loss": 11.6675, "step": 34595 }, { "epoch": 0.7241899020346647, "grad_norm": 0.2563118636608124, "learning_rate": 0.00017263011403667177, "loss": 11.655, "step": 34596 }, { "epoch": 0.7242108347986268, "grad_norm": 0.34485700726509094, "learning_rate": 0.00017262860692332144, "loss": 11.6705, "step": 34597 }, { "epoch": 0.724231767562589, "grad_norm": 0.2899494767189026, "learning_rate": 0.00017262709977505702, "loss": 11.6727, "step": 34598 }, { "epoch": 0.7242527003265511, "grad_norm": 0.37593749165534973, "learning_rate": 0.00017262559259187933, "loss": 11.6765, "step": 34599 }, { "epoch": 0.7242736330905133, "grad_norm": 0.28645050525665283, "learning_rate": 0.00017262408537378906, "loss": 11.6717, "step": 34600 }, { "epoch": 0.7242945658544754, "grad_norm": 0.3889175355434418, "learning_rate": 0.00017262257812078693, "loss": 11.6774, "step": 34601 }, { "epoch": 0.7243154986184376, "grad_norm": 0.3020963966846466, "learning_rate": 0.00017262107083287366, "loss": 11.6873, "step": 34602 }, { "epoch": 0.7243364313823998, "grad_norm": 0.3569795787334442, "learning_rate": 0.00017261956351005003, "loss": 11.6767, "step": 34603 }, { "epoch": 0.7243573641463619, "grad_norm": 0.2734089493751526, "learning_rate": 0.0001726180561523167, "loss": 11.6578, "step": 34604 }, { "epoch": 0.7243782969103241, "grad_norm": 0.2703147530555725, "learning_rate": 0.0001726165487596744, "loss": 11.6774, "step": 34605 }, { "epoch": 0.7243992296742862, "grad_norm": 0.3559832274913788, "learning_rate": 0.00017261504133212388, "loss": 11.6602, "step": 34606 }, { "epoch": 0.7244201624382484, "grad_norm": 0.34708571434020996, "learning_rate": 0.00017261353386966582, "loss": 11.6666, "step": 34607 }, { "epoch": 0.7244410952022104, "grad_norm": 0.43445026874542236, "learning_rate": 0.000172612026372301, "loss": 11.6763, "step": 34608 }, { "epoch": 0.7244620279661727, "grad_norm": 0.28405535221099854, "learning_rate": 0.00017261051884003017, "loss": 11.6801, "step": 34609 }, { "epoch": 0.7244829607301349, "grad_norm": 0.2760743796825409, "learning_rate": 0.00017260901127285396, "loss": 11.6589, "step": 34610 }, { "epoch": 0.7245038934940969, "grad_norm": 0.2796086072921753, "learning_rate": 0.00017260750367077313, "loss": 11.682, "step": 34611 }, { "epoch": 0.7245248262580591, "grad_norm": 0.2943912744522095, "learning_rate": 0.00017260599603378845, "loss": 11.6835, "step": 34612 }, { "epoch": 0.7245457590220212, "grad_norm": 0.29050445556640625, "learning_rate": 0.0001726044883619006, "loss": 11.6725, "step": 34613 }, { "epoch": 0.7245666917859834, "grad_norm": 0.2523787319660187, "learning_rate": 0.00017260298065511034, "loss": 11.667, "step": 34614 }, { "epoch": 0.7245876245499456, "grad_norm": 0.2672213912010193, "learning_rate": 0.00017260147291341834, "loss": 11.6696, "step": 34615 }, { "epoch": 0.7246085573139077, "grad_norm": 0.3195834159851074, "learning_rate": 0.0001725999651368254, "loss": 11.6636, "step": 34616 }, { "epoch": 0.7246294900778699, "grad_norm": 0.3838420510292053, "learning_rate": 0.00017259845732533216, "loss": 11.6726, "step": 34617 }, { "epoch": 0.724650422841832, "grad_norm": 0.29165974259376526, "learning_rate": 0.0001725969494789394, "loss": 11.669, "step": 34618 }, { "epoch": 0.7246713556057942, "grad_norm": 0.330914705991745, "learning_rate": 0.00017259544159764782, "loss": 11.6585, "step": 34619 }, { "epoch": 0.7246922883697563, "grad_norm": 0.2625904977321625, "learning_rate": 0.00017259393368145818, "loss": 11.6746, "step": 34620 }, { "epoch": 0.7247132211337185, "grad_norm": 0.3717927038669586, "learning_rate": 0.00017259242573037118, "loss": 11.6604, "step": 34621 }, { "epoch": 0.7247341538976807, "grad_norm": 0.2712933421134949, "learning_rate": 0.0001725909177443875, "loss": 11.6479, "step": 34622 }, { "epoch": 0.7247550866616428, "grad_norm": 0.30979540944099426, "learning_rate": 0.00017258940972350795, "loss": 11.6605, "step": 34623 }, { "epoch": 0.724776019425605, "grad_norm": 0.3778413236141205, "learning_rate": 0.00017258790166773321, "loss": 11.6534, "step": 34624 }, { "epoch": 0.7247969521895671, "grad_norm": 0.3710753619670868, "learning_rate": 0.00017258639357706402, "loss": 11.6667, "step": 34625 }, { "epoch": 0.7248178849535293, "grad_norm": 0.34266164898872375, "learning_rate": 0.0001725848854515011, "loss": 11.6835, "step": 34626 }, { "epoch": 0.7248388177174914, "grad_norm": 0.345344215631485, "learning_rate": 0.00017258337729104516, "loss": 11.6535, "step": 34627 }, { "epoch": 0.7248597504814536, "grad_norm": 0.32764366269111633, "learning_rate": 0.00017258186909569693, "loss": 11.6668, "step": 34628 }, { "epoch": 0.7248806832454158, "grad_norm": 0.3100413978099823, "learning_rate": 0.00017258036086545715, "loss": 11.6717, "step": 34629 }, { "epoch": 0.7249016160093779, "grad_norm": 0.3948173522949219, "learning_rate": 0.00017257885260032652, "loss": 11.6931, "step": 34630 }, { "epoch": 0.7249225487733401, "grad_norm": 0.26433756947517395, "learning_rate": 0.0001725773443003058, "loss": 11.66, "step": 34631 }, { "epoch": 0.7249434815373021, "grad_norm": 0.3505008816719055, "learning_rate": 0.0001725758359653957, "loss": 11.6723, "step": 34632 }, { "epoch": 0.7249644143012643, "grad_norm": 0.2730235755443573, "learning_rate": 0.00017257432759559694, "loss": 11.6815, "step": 34633 }, { "epoch": 0.7249853470652264, "grad_norm": 0.3377133905887604, "learning_rate": 0.00017257281919091022, "loss": 11.6666, "step": 34634 }, { "epoch": 0.7250062798291886, "grad_norm": 0.309334397315979, "learning_rate": 0.00017257131075133634, "loss": 11.67, "step": 34635 }, { "epoch": 0.7250272125931508, "grad_norm": 0.32214421033859253, "learning_rate": 0.00017256980227687595, "loss": 11.6686, "step": 34636 }, { "epoch": 0.7250481453571129, "grad_norm": 0.3407442271709442, "learning_rate": 0.00017256829376752978, "loss": 11.6687, "step": 34637 }, { "epoch": 0.7250690781210751, "grad_norm": 0.35451775789260864, "learning_rate": 0.0001725667852232986, "loss": 11.6804, "step": 34638 }, { "epoch": 0.7250900108850372, "grad_norm": 0.2854253351688385, "learning_rate": 0.0001725652766441831, "loss": 11.6593, "step": 34639 }, { "epoch": 0.7251109436489994, "grad_norm": 0.31137174367904663, "learning_rate": 0.00017256376803018403, "loss": 11.6729, "step": 34640 }, { "epoch": 0.7251318764129616, "grad_norm": 0.29154303669929504, "learning_rate": 0.00017256225938130207, "loss": 11.6671, "step": 34641 }, { "epoch": 0.7251528091769237, "grad_norm": 0.2909843623638153, "learning_rate": 0.000172560750697538, "loss": 11.6674, "step": 34642 }, { "epoch": 0.7251737419408859, "grad_norm": 0.25054073333740234, "learning_rate": 0.00017255924197889254, "loss": 11.6616, "step": 34643 }, { "epoch": 0.725194674704848, "grad_norm": 0.3169711232185364, "learning_rate": 0.0001725577332253664, "loss": 11.6811, "step": 34644 }, { "epoch": 0.7252156074688102, "grad_norm": 0.43928688764572144, "learning_rate": 0.0001725562244369603, "loss": 11.6729, "step": 34645 }, { "epoch": 0.7252365402327723, "grad_norm": 0.2641926109790802, "learning_rate": 0.00017255471561367495, "loss": 11.6689, "step": 34646 }, { "epoch": 0.7252574729967345, "grad_norm": 0.37715253233909607, "learning_rate": 0.00017255320675551108, "loss": 11.6691, "step": 34647 }, { "epoch": 0.7252784057606967, "grad_norm": 0.304293155670166, "learning_rate": 0.00017255169786246945, "loss": 11.6628, "step": 34648 }, { "epoch": 0.7252993385246588, "grad_norm": 0.3232532739639282, "learning_rate": 0.0001725501889345508, "loss": 11.6742, "step": 34649 }, { "epoch": 0.725320271288621, "grad_norm": 0.30049851536750793, "learning_rate": 0.00017254867997175576, "loss": 11.6617, "step": 34650 }, { "epoch": 0.7253412040525831, "grad_norm": 0.2966683804988861, "learning_rate": 0.00017254717097408518, "loss": 11.6551, "step": 34651 }, { "epoch": 0.7253621368165453, "grad_norm": 0.28101280331611633, "learning_rate": 0.00017254566194153966, "loss": 11.696, "step": 34652 }, { "epoch": 0.7253830695805074, "grad_norm": 0.34129154682159424, "learning_rate": 0.00017254415287412003, "loss": 11.669, "step": 34653 }, { "epoch": 0.7254040023444696, "grad_norm": 0.30611616373062134, "learning_rate": 0.00017254264377182695, "loss": 11.6602, "step": 34654 }, { "epoch": 0.7254249351084318, "grad_norm": 0.3664970099925995, "learning_rate": 0.00017254113463466115, "loss": 11.6654, "step": 34655 }, { "epoch": 0.7254458678723938, "grad_norm": 0.24285557866096497, "learning_rate": 0.00017253962546262338, "loss": 11.6561, "step": 34656 }, { "epoch": 0.725466800636356, "grad_norm": 0.30261698365211487, "learning_rate": 0.00017253811625571439, "loss": 11.6727, "step": 34657 }, { "epoch": 0.7254877334003181, "grad_norm": 0.32663029432296753, "learning_rate": 0.00017253660701393485, "loss": 11.6641, "step": 34658 }, { "epoch": 0.7255086661642803, "grad_norm": 0.2659134566783905, "learning_rate": 0.00017253509773728552, "loss": 11.6695, "step": 34659 }, { "epoch": 0.7255295989282425, "grad_norm": 0.2750762403011322, "learning_rate": 0.0001725335884257671, "loss": 11.6934, "step": 34660 }, { "epoch": 0.7255505316922046, "grad_norm": 0.4852440059185028, "learning_rate": 0.00017253207907938034, "loss": 11.6666, "step": 34661 }, { "epoch": 0.7255714644561668, "grad_norm": 0.31782740354537964, "learning_rate": 0.00017253056969812594, "loss": 11.6847, "step": 34662 }, { "epoch": 0.7255923972201289, "grad_norm": 0.3225483000278473, "learning_rate": 0.00017252906028200466, "loss": 11.674, "step": 34663 }, { "epoch": 0.7256133299840911, "grad_norm": 0.33352577686309814, "learning_rate": 0.0001725275508310172, "loss": 11.6607, "step": 34664 }, { "epoch": 0.7256342627480532, "grad_norm": 0.25484582781791687, "learning_rate": 0.00017252604134516432, "loss": 11.6539, "step": 34665 }, { "epoch": 0.7256551955120154, "grad_norm": 0.2894428074359894, "learning_rate": 0.00017252453182444667, "loss": 11.6899, "step": 34666 }, { "epoch": 0.7256761282759776, "grad_norm": 0.2919002175331116, "learning_rate": 0.00017252302226886506, "loss": 11.6758, "step": 34667 }, { "epoch": 0.7256970610399397, "grad_norm": 0.3051703870296478, "learning_rate": 0.0001725215126784202, "loss": 11.6681, "step": 34668 }, { "epoch": 0.7257179938039019, "grad_norm": 0.3096855580806732, "learning_rate": 0.00017252000305311276, "loss": 11.6764, "step": 34669 }, { "epoch": 0.725738926567864, "grad_norm": 0.3210606873035431, "learning_rate": 0.00017251849339294348, "loss": 11.6667, "step": 34670 }, { "epoch": 0.7257598593318262, "grad_norm": 0.2544768750667572, "learning_rate": 0.00017251698369791314, "loss": 11.6577, "step": 34671 }, { "epoch": 0.7257807920957883, "grad_norm": 0.2946755290031433, "learning_rate": 0.00017251547396802242, "loss": 11.6898, "step": 34672 }, { "epoch": 0.7258017248597505, "grad_norm": 0.3637092113494873, "learning_rate": 0.00017251396420327205, "loss": 11.689, "step": 34673 }, { "epoch": 0.7258226576237127, "grad_norm": 0.2623191177845001, "learning_rate": 0.0001725124544036628, "loss": 11.6804, "step": 34674 }, { "epoch": 0.7258435903876748, "grad_norm": 0.2790824770927429, "learning_rate": 0.00017251094456919533, "loss": 11.6668, "step": 34675 }, { "epoch": 0.725864523151637, "grad_norm": 0.5670427680015564, "learning_rate": 0.0001725094346998704, "loss": 11.5909, "step": 34676 }, { "epoch": 0.725885455915599, "grad_norm": 0.322601854801178, "learning_rate": 0.00017250792479568872, "loss": 11.6615, "step": 34677 }, { "epoch": 0.7259063886795613, "grad_norm": 0.2515619397163391, "learning_rate": 0.00017250641485665107, "loss": 11.686, "step": 34678 }, { "epoch": 0.7259273214435235, "grad_norm": 0.3197750449180603, "learning_rate": 0.0001725049048827581, "loss": 11.6979, "step": 34679 }, { "epoch": 0.7259482542074855, "grad_norm": 0.31368333101272583, "learning_rate": 0.0001725033948740106, "loss": 11.6799, "step": 34680 }, { "epoch": 0.7259691869714477, "grad_norm": 0.3472190499305725, "learning_rate": 0.00017250188483040923, "loss": 11.6644, "step": 34681 }, { "epoch": 0.7259901197354098, "grad_norm": 0.3703356385231018, "learning_rate": 0.00017250037475195475, "loss": 11.666, "step": 34682 }, { "epoch": 0.726011052499372, "grad_norm": 0.36060917377471924, "learning_rate": 0.00017249886463864788, "loss": 11.6667, "step": 34683 }, { "epoch": 0.7260319852633341, "grad_norm": 0.30642107129096985, "learning_rate": 0.0001724973544904894, "loss": 11.6645, "step": 34684 }, { "epoch": 0.7260529180272963, "grad_norm": 0.38604629039764404, "learning_rate": 0.00017249584430747996, "loss": 11.678, "step": 34685 }, { "epoch": 0.7260738507912585, "grad_norm": 0.2684496343135834, "learning_rate": 0.0001724943340896203, "loss": 11.659, "step": 34686 }, { "epoch": 0.7260947835552206, "grad_norm": 0.324544757604599, "learning_rate": 0.00017249282383691118, "loss": 11.6654, "step": 34687 }, { "epoch": 0.7261157163191828, "grad_norm": 0.31805136799812317, "learning_rate": 0.0001724913135493533, "loss": 11.673, "step": 34688 }, { "epoch": 0.7261366490831449, "grad_norm": 0.3320125937461853, "learning_rate": 0.0001724898032269474, "loss": 11.6584, "step": 34689 }, { "epoch": 0.7261575818471071, "grad_norm": 0.3233874440193176, "learning_rate": 0.0001724882928696942, "loss": 11.6618, "step": 34690 }, { "epoch": 0.7261785146110692, "grad_norm": 0.39440232515335083, "learning_rate": 0.00017248678247759443, "loss": 11.7008, "step": 34691 }, { "epoch": 0.7261994473750314, "grad_norm": 0.3934762179851532, "learning_rate": 0.00017248527205064877, "loss": 11.6614, "step": 34692 }, { "epoch": 0.7262203801389936, "grad_norm": 0.3331090211868286, "learning_rate": 0.00017248376158885803, "loss": 11.6766, "step": 34693 }, { "epoch": 0.7262413129029557, "grad_norm": 0.34722816944122314, "learning_rate": 0.00017248225109222287, "loss": 11.6834, "step": 34694 }, { "epoch": 0.7262622456669179, "grad_norm": 0.331897109746933, "learning_rate": 0.00017248074056074405, "loss": 11.675, "step": 34695 }, { "epoch": 0.72628317843088, "grad_norm": 0.27918741106987, "learning_rate": 0.0001724792299944223, "loss": 11.6701, "step": 34696 }, { "epoch": 0.7263041111948422, "grad_norm": 0.30960091948509216, "learning_rate": 0.00017247771939325832, "loss": 11.6816, "step": 34697 }, { "epoch": 0.7263250439588044, "grad_norm": 0.2763884663581848, "learning_rate": 0.00017247620875725285, "loss": 11.6795, "step": 34698 }, { "epoch": 0.7263459767227665, "grad_norm": 0.35455042123794556, "learning_rate": 0.0001724746980864066, "loss": 11.6774, "step": 34699 }, { "epoch": 0.7263669094867287, "grad_norm": 0.2971803843975067, "learning_rate": 0.00017247318738072032, "loss": 11.6737, "step": 34700 }, { "epoch": 0.7263878422506908, "grad_norm": 0.2805904150009155, "learning_rate": 0.00017247167664019472, "loss": 11.6814, "step": 34701 }, { "epoch": 0.726408775014653, "grad_norm": 0.27156415581703186, "learning_rate": 0.00017247016586483053, "loss": 11.6538, "step": 34702 }, { "epoch": 0.726429707778615, "grad_norm": 0.3097970187664032, "learning_rate": 0.0001724686550546285, "loss": 11.668, "step": 34703 }, { "epoch": 0.7264506405425772, "grad_norm": 0.23520538210868835, "learning_rate": 0.00017246714420958933, "loss": 11.6773, "step": 34704 }, { "epoch": 0.7264715733065394, "grad_norm": 0.2517932057380676, "learning_rate": 0.00017246563332971372, "loss": 11.6669, "step": 34705 }, { "epoch": 0.7264925060705015, "grad_norm": 0.3394889533519745, "learning_rate": 0.00017246412241500245, "loss": 11.6671, "step": 34706 }, { "epoch": 0.7265134388344637, "grad_norm": 0.2600764334201813, "learning_rate": 0.00017246261146545625, "loss": 11.6599, "step": 34707 }, { "epoch": 0.7265343715984258, "grad_norm": 0.32432594895362854, "learning_rate": 0.0001724611004810758, "loss": 11.6646, "step": 34708 }, { "epoch": 0.726555304362388, "grad_norm": 0.28584563732147217, "learning_rate": 0.00017245958946186183, "loss": 11.6595, "step": 34709 }, { "epoch": 0.7265762371263501, "grad_norm": 0.28553780913352966, "learning_rate": 0.0001724580784078151, "loss": 11.6717, "step": 34710 }, { "epoch": 0.7265971698903123, "grad_norm": 0.27514541149139404, "learning_rate": 0.00017245656731893633, "loss": 11.6753, "step": 34711 }, { "epoch": 0.7266181026542745, "grad_norm": 0.2310260832309723, "learning_rate": 0.0001724550561952262, "loss": 11.675, "step": 34712 }, { "epoch": 0.7266390354182366, "grad_norm": 0.3441651165485382, "learning_rate": 0.0001724535450366855, "loss": 11.6577, "step": 34713 }, { "epoch": 0.7266599681821988, "grad_norm": 0.2759724259376526, "learning_rate": 0.00017245203384331492, "loss": 11.6702, "step": 34714 }, { "epoch": 0.7266809009461609, "grad_norm": 0.2670731246471405, "learning_rate": 0.0001724505226151152, "loss": 11.6711, "step": 34715 }, { "epoch": 0.7267018337101231, "grad_norm": 0.265469491481781, "learning_rate": 0.00017244901135208705, "loss": 11.6694, "step": 34716 }, { "epoch": 0.7267227664740853, "grad_norm": 1.2823320627212524, "learning_rate": 0.00017244750005423126, "loss": 11.6515, "step": 34717 }, { "epoch": 0.7267436992380474, "grad_norm": 0.27377283573150635, "learning_rate": 0.00017244598872154842, "loss": 11.6673, "step": 34718 }, { "epoch": 0.7267646320020096, "grad_norm": 0.3146786689758301, "learning_rate": 0.0001724444773540394, "loss": 11.6663, "step": 34719 }, { "epoch": 0.7267855647659717, "grad_norm": 0.35039040446281433, "learning_rate": 0.00017244296595170485, "loss": 11.6616, "step": 34720 }, { "epoch": 0.7268064975299339, "grad_norm": 0.269813597202301, "learning_rate": 0.00017244145451454553, "loss": 11.6709, "step": 34721 }, { "epoch": 0.726827430293896, "grad_norm": 0.31861987709999084, "learning_rate": 0.00017243994304256215, "loss": 11.6832, "step": 34722 }, { "epoch": 0.7268483630578582, "grad_norm": 0.3668162524700165, "learning_rate": 0.00017243843153575542, "loss": 11.6594, "step": 34723 }, { "epoch": 0.7268692958218204, "grad_norm": 0.29373377561569214, "learning_rate": 0.0001724369199941261, "loss": 11.6913, "step": 34724 }, { "epoch": 0.7268902285857824, "grad_norm": 0.30554771423339844, "learning_rate": 0.00017243540841767487, "loss": 11.6497, "step": 34725 }, { "epoch": 0.7269111613497447, "grad_norm": 0.27464330196380615, "learning_rate": 0.00017243389680640252, "loss": 11.6752, "step": 34726 }, { "epoch": 0.7269320941137067, "grad_norm": 0.27399471402168274, "learning_rate": 0.0001724323851603097, "loss": 11.675, "step": 34727 }, { "epoch": 0.7269530268776689, "grad_norm": 0.2975768446922302, "learning_rate": 0.00017243087347939724, "loss": 11.6806, "step": 34728 }, { "epoch": 0.726973959641631, "grad_norm": 0.3187474310398102, "learning_rate": 0.00017242936176366578, "loss": 11.6759, "step": 34729 }, { "epoch": 0.7269948924055932, "grad_norm": 0.28355666995048523, "learning_rate": 0.00017242785001311608, "loss": 11.6632, "step": 34730 }, { "epoch": 0.7270158251695554, "grad_norm": 0.3133273422718048, "learning_rate": 0.00017242633822774886, "loss": 11.6704, "step": 34731 }, { "epoch": 0.7270367579335175, "grad_norm": 0.30476221442222595, "learning_rate": 0.00017242482640756484, "loss": 11.6615, "step": 34732 }, { "epoch": 0.7270576906974797, "grad_norm": 0.3028985559940338, "learning_rate": 0.00017242331455256474, "loss": 11.6693, "step": 34733 }, { "epoch": 0.7270786234614418, "grad_norm": 0.3467325270175934, "learning_rate": 0.00017242180266274933, "loss": 11.6835, "step": 34734 }, { "epoch": 0.727099556225404, "grad_norm": 0.2779602110385895, "learning_rate": 0.0001724202907381193, "loss": 11.6788, "step": 34735 }, { "epoch": 0.7271204889893662, "grad_norm": 0.32217174768447876, "learning_rate": 0.0001724187787786754, "loss": 11.6884, "step": 34736 }, { "epoch": 0.7271414217533283, "grad_norm": 0.35962367057800293, "learning_rate": 0.00017241726678441833, "loss": 11.6759, "step": 34737 }, { "epoch": 0.7271623545172905, "grad_norm": 0.31627151370048523, "learning_rate": 0.0001724157547553488, "loss": 11.6752, "step": 34738 }, { "epoch": 0.7271832872812526, "grad_norm": 0.35341760516166687, "learning_rate": 0.0001724142426914676, "loss": 11.673, "step": 34739 }, { "epoch": 0.7272042200452148, "grad_norm": 0.2667682468891144, "learning_rate": 0.0001724127305927754, "loss": 11.6684, "step": 34740 }, { "epoch": 0.7272251528091769, "grad_norm": 0.2702273726463318, "learning_rate": 0.00017241121845927297, "loss": 11.6717, "step": 34741 }, { "epoch": 0.7272460855731391, "grad_norm": 0.3549637496471405, "learning_rate": 0.00017240970629096101, "loss": 11.6882, "step": 34742 }, { "epoch": 0.7272670183371013, "grad_norm": 0.291570246219635, "learning_rate": 0.00017240819408784023, "loss": 11.6776, "step": 34743 }, { "epoch": 0.7272879511010634, "grad_norm": 0.3312375545501709, "learning_rate": 0.00017240668184991141, "loss": 11.6524, "step": 34744 }, { "epoch": 0.7273088838650256, "grad_norm": 0.2867591381072998, "learning_rate": 0.00017240516957717526, "loss": 11.6606, "step": 34745 }, { "epoch": 0.7273298166289877, "grad_norm": 0.2973616123199463, "learning_rate": 0.00017240365726963245, "loss": 11.6792, "step": 34746 }, { "epoch": 0.7273507493929499, "grad_norm": 0.29214486479759216, "learning_rate": 0.00017240214492728376, "loss": 11.6737, "step": 34747 }, { "epoch": 0.727371682156912, "grad_norm": 0.37297534942626953, "learning_rate": 0.00017240063255012994, "loss": 11.6835, "step": 34748 }, { "epoch": 0.7273926149208741, "grad_norm": 0.307378888130188, "learning_rate": 0.00017239912013817166, "loss": 11.6726, "step": 34749 }, { "epoch": 0.7274135476848363, "grad_norm": 0.3057137131690979, "learning_rate": 0.0001723976076914097, "loss": 11.6721, "step": 34750 }, { "epoch": 0.7274344804487984, "grad_norm": 0.23974521458148956, "learning_rate": 0.0001723960952098447, "loss": 11.6589, "step": 34751 }, { "epoch": 0.7274554132127606, "grad_norm": 0.3101722300052643, "learning_rate": 0.0001723945826934775, "loss": 11.6793, "step": 34752 }, { "epoch": 0.7274763459767227, "grad_norm": 0.2655957341194153, "learning_rate": 0.00017239307014230878, "loss": 11.6685, "step": 34753 }, { "epoch": 0.7274972787406849, "grad_norm": 0.38927268981933594, "learning_rate": 0.00017239155755633922, "loss": 11.6729, "step": 34754 }, { "epoch": 0.7275182115046471, "grad_norm": 0.2889934778213501, "learning_rate": 0.0001723900449355696, "loss": 11.6702, "step": 34755 }, { "epoch": 0.7275391442686092, "grad_norm": 0.2948407828807831, "learning_rate": 0.00017238853228000064, "loss": 11.672, "step": 34756 }, { "epoch": 0.7275600770325714, "grad_norm": 0.28262096643447876, "learning_rate": 0.00017238701958963306, "loss": 11.665, "step": 34757 }, { "epoch": 0.7275810097965335, "grad_norm": 0.31086069345474243, "learning_rate": 0.0001723855068644676, "loss": 11.6564, "step": 34758 }, { "epoch": 0.7276019425604957, "grad_norm": 0.27416372299194336, "learning_rate": 0.00017238399410450495, "loss": 11.6842, "step": 34759 }, { "epoch": 0.7276228753244578, "grad_norm": 0.2581944763660431, "learning_rate": 0.0001723824813097459, "loss": 11.6489, "step": 34760 }, { "epoch": 0.72764380808842, "grad_norm": 0.31691908836364746, "learning_rate": 0.00017238096848019112, "loss": 11.6663, "step": 34761 }, { "epoch": 0.7276647408523822, "grad_norm": 0.3915267586708069, "learning_rate": 0.00017237945561584134, "loss": 11.6921, "step": 34762 }, { "epoch": 0.7276856736163443, "grad_norm": 0.30023953318595886, "learning_rate": 0.00017237794271669733, "loss": 11.6914, "step": 34763 }, { "epoch": 0.7277066063803065, "grad_norm": 0.33587872982025146, "learning_rate": 0.00017237642978275977, "loss": 11.6795, "step": 34764 }, { "epoch": 0.7277275391442686, "grad_norm": 0.2887687385082245, "learning_rate": 0.00017237491681402944, "loss": 11.6609, "step": 34765 }, { "epoch": 0.7277484719082308, "grad_norm": 0.3072710335254669, "learning_rate": 0.00017237340381050703, "loss": 11.6873, "step": 34766 }, { "epoch": 0.7277694046721929, "grad_norm": 0.3134688138961792, "learning_rate": 0.00017237189077219328, "loss": 11.6592, "step": 34767 }, { "epoch": 0.7277903374361551, "grad_norm": 0.32810214161872864, "learning_rate": 0.0001723703776990889, "loss": 11.6717, "step": 34768 }, { "epoch": 0.7278112702001173, "grad_norm": 0.42665159702301025, "learning_rate": 0.00017236886459119462, "loss": 11.6769, "step": 34769 }, { "epoch": 0.7278322029640794, "grad_norm": 0.29650482535362244, "learning_rate": 0.00017236735144851117, "loss": 11.6728, "step": 34770 }, { "epoch": 0.7278531357280416, "grad_norm": 0.28701919317245483, "learning_rate": 0.00017236583827103931, "loss": 11.6705, "step": 34771 }, { "epoch": 0.7278740684920036, "grad_norm": 0.3499810993671417, "learning_rate": 0.00017236432505877972, "loss": 11.6624, "step": 34772 }, { "epoch": 0.7278950012559658, "grad_norm": 0.27671459317207336, "learning_rate": 0.00017236281181173317, "loss": 11.663, "step": 34773 }, { "epoch": 0.727915934019928, "grad_norm": 0.3810173273086548, "learning_rate": 0.00017236129852990034, "loss": 11.6588, "step": 34774 }, { "epoch": 0.7279368667838901, "grad_norm": 0.35267573595046997, "learning_rate": 0.00017235978521328203, "loss": 11.6746, "step": 34775 }, { "epoch": 0.7279577995478523, "grad_norm": 0.272726446390152, "learning_rate": 0.00017235827186187885, "loss": 11.6829, "step": 34776 }, { "epoch": 0.7279787323118144, "grad_norm": 0.32906433939933777, "learning_rate": 0.00017235675847569162, "loss": 11.668, "step": 34777 }, { "epoch": 0.7279996650757766, "grad_norm": 0.2841005325317383, "learning_rate": 0.0001723552450547211, "loss": 11.6645, "step": 34778 }, { "epoch": 0.7280205978397387, "grad_norm": 0.3202458918094635, "learning_rate": 0.00017235373159896794, "loss": 11.6611, "step": 34779 }, { "epoch": 0.7280415306037009, "grad_norm": 0.3196689486503601, "learning_rate": 0.00017235221810843284, "loss": 11.6548, "step": 34780 }, { "epoch": 0.7280624633676631, "grad_norm": 0.3622724711894989, "learning_rate": 0.00017235070458311664, "loss": 11.6708, "step": 34781 }, { "epoch": 0.7280833961316252, "grad_norm": 0.3071463704109192, "learning_rate": 0.00017234919102302, "loss": 11.6787, "step": 34782 }, { "epoch": 0.7281043288955874, "grad_norm": 0.3346311151981354, "learning_rate": 0.0001723476774281436, "loss": 11.6504, "step": 34783 }, { "epoch": 0.7281252616595495, "grad_norm": 0.2813505530357361, "learning_rate": 0.00017234616379848826, "loss": 11.668, "step": 34784 }, { "epoch": 0.7281461944235117, "grad_norm": 0.2650209963321686, "learning_rate": 0.0001723446501340547, "loss": 11.6812, "step": 34785 }, { "epoch": 0.7281671271874738, "grad_norm": 0.2665417492389679, "learning_rate": 0.00017234313643484358, "loss": 11.6669, "step": 34786 }, { "epoch": 0.728188059951436, "grad_norm": 0.3066207468509674, "learning_rate": 0.00017234162270085568, "loss": 11.6892, "step": 34787 }, { "epoch": 0.7282089927153982, "grad_norm": 0.2954583466053009, "learning_rate": 0.0001723401089320917, "loss": 11.6603, "step": 34788 }, { "epoch": 0.7282299254793603, "grad_norm": 0.27034661173820496, "learning_rate": 0.00017233859512855238, "loss": 11.6429, "step": 34789 }, { "epoch": 0.7282508582433225, "grad_norm": 0.24804814159870148, "learning_rate": 0.00017233708129023846, "loss": 11.6688, "step": 34790 }, { "epoch": 0.7282717910072846, "grad_norm": 0.3073410987854004, "learning_rate": 0.00017233556741715061, "loss": 11.6694, "step": 34791 }, { "epoch": 0.7282927237712468, "grad_norm": 0.33611613512039185, "learning_rate": 0.00017233405350928967, "loss": 11.6614, "step": 34792 }, { "epoch": 0.728313656535209, "grad_norm": 0.32642853260040283, "learning_rate": 0.00017233253956665625, "loss": 11.6688, "step": 34793 }, { "epoch": 0.728334589299171, "grad_norm": 0.36343908309936523, "learning_rate": 0.00017233102558925114, "loss": 11.6613, "step": 34794 }, { "epoch": 0.7283555220631333, "grad_norm": 0.3542044162750244, "learning_rate": 0.00017232951157707506, "loss": 11.6868, "step": 34795 }, { "epoch": 0.7283764548270953, "grad_norm": 0.34020644426345825, "learning_rate": 0.00017232799753012876, "loss": 11.6871, "step": 34796 }, { "epoch": 0.7283973875910575, "grad_norm": 0.23824100196361542, "learning_rate": 0.0001723264834484129, "loss": 11.6657, "step": 34797 }, { "epoch": 0.7284183203550196, "grad_norm": 0.2813721001148224, "learning_rate": 0.00017232496933192827, "loss": 11.6724, "step": 34798 }, { "epoch": 0.7284392531189818, "grad_norm": 0.2665341794490814, "learning_rate": 0.00017232345518067562, "loss": 11.6522, "step": 34799 }, { "epoch": 0.728460185882944, "grad_norm": 0.2749665081501007, "learning_rate": 0.00017232194099465556, "loss": 11.6614, "step": 34800 }, { "epoch": 0.7284811186469061, "grad_norm": 0.33928951621055603, "learning_rate": 0.00017232042677386896, "loss": 11.6772, "step": 34801 }, { "epoch": 0.7285020514108683, "grad_norm": 0.39426472783088684, "learning_rate": 0.00017231891251831644, "loss": 11.6533, "step": 34802 }, { "epoch": 0.7285229841748304, "grad_norm": 0.2958250045776367, "learning_rate": 0.00017231739822799876, "loss": 11.667, "step": 34803 }, { "epoch": 0.7285439169387926, "grad_norm": 0.2669079601764679, "learning_rate": 0.00017231588390291669, "loss": 11.6722, "step": 34804 }, { "epoch": 0.7285648497027547, "grad_norm": 0.25592565536499023, "learning_rate": 0.00017231436954307088, "loss": 11.6461, "step": 34805 }, { "epoch": 0.7285857824667169, "grad_norm": 0.27435773611068726, "learning_rate": 0.00017231285514846217, "loss": 11.6465, "step": 34806 }, { "epoch": 0.7286067152306791, "grad_norm": 0.2861135005950928, "learning_rate": 0.00017231134071909118, "loss": 11.6643, "step": 34807 }, { "epoch": 0.7286276479946412, "grad_norm": 0.27853208780288696, "learning_rate": 0.00017230982625495868, "loss": 11.6754, "step": 34808 }, { "epoch": 0.7286485807586034, "grad_norm": 0.2602761387825012, "learning_rate": 0.0001723083117560654, "loss": 11.6734, "step": 34809 }, { "epoch": 0.7286695135225655, "grad_norm": 0.3030385375022888, "learning_rate": 0.00017230679722241208, "loss": 11.6817, "step": 34810 }, { "epoch": 0.7286904462865277, "grad_norm": 0.364883154630661, "learning_rate": 0.00017230528265399942, "loss": 11.6802, "step": 34811 }, { "epoch": 0.7287113790504898, "grad_norm": 0.2785629630088806, "learning_rate": 0.00017230376805082816, "loss": 11.6693, "step": 34812 }, { "epoch": 0.728732311814452, "grad_norm": 0.2537103295326233, "learning_rate": 0.00017230225341289903, "loss": 11.6662, "step": 34813 }, { "epoch": 0.7287532445784142, "grad_norm": 0.28568848967552185, "learning_rate": 0.0001723007387402128, "loss": 11.6902, "step": 34814 }, { "epoch": 0.7287741773423763, "grad_norm": 0.3016655445098877, "learning_rate": 0.0001722992240327701, "loss": 11.6759, "step": 34815 }, { "epoch": 0.7287951101063385, "grad_norm": 0.31855326890945435, "learning_rate": 0.00017229770929057172, "loss": 11.6823, "step": 34816 }, { "epoch": 0.7288160428703006, "grad_norm": 0.2612282633781433, "learning_rate": 0.00017229619451361838, "loss": 11.66, "step": 34817 }, { "epoch": 0.7288369756342628, "grad_norm": 0.2726493775844574, "learning_rate": 0.00017229467970191083, "loss": 11.6498, "step": 34818 }, { "epoch": 0.728857908398225, "grad_norm": 0.3554592430591583, "learning_rate": 0.00017229316485544978, "loss": 11.6733, "step": 34819 }, { "epoch": 0.728878841162187, "grad_norm": 0.29415175318717957, "learning_rate": 0.00017229164997423593, "loss": 11.6719, "step": 34820 }, { "epoch": 0.7288997739261492, "grad_norm": 0.23495204746723175, "learning_rate": 0.00017229013505827006, "loss": 11.6712, "step": 34821 }, { "epoch": 0.7289207066901113, "grad_norm": 0.3981403708457947, "learning_rate": 0.00017228862010755286, "loss": 11.6813, "step": 34822 }, { "epoch": 0.7289416394540735, "grad_norm": 0.2942848205566406, "learning_rate": 0.0001722871051220851, "loss": 11.6604, "step": 34823 }, { "epoch": 0.7289625722180356, "grad_norm": 0.30882126092910767, "learning_rate": 0.00017228559010186746, "loss": 11.6727, "step": 34824 }, { "epoch": 0.7289835049819978, "grad_norm": 0.28236180543899536, "learning_rate": 0.00017228407504690067, "loss": 11.6765, "step": 34825 }, { "epoch": 0.72900443774596, "grad_norm": 0.3343599736690521, "learning_rate": 0.00017228255995718552, "loss": 11.6567, "step": 34826 }, { "epoch": 0.7290253705099221, "grad_norm": 0.29050466418266296, "learning_rate": 0.00017228104483272264, "loss": 11.6851, "step": 34827 }, { "epoch": 0.7290463032738843, "grad_norm": 0.5398470759391785, "learning_rate": 0.00017227952967351285, "loss": 11.6507, "step": 34828 }, { "epoch": 0.7290672360378464, "grad_norm": 0.36332952976226807, "learning_rate": 0.00017227801447955684, "loss": 11.6719, "step": 34829 }, { "epoch": 0.7290881688018086, "grad_norm": 0.2618193030357361, "learning_rate": 0.00017227649925085533, "loss": 11.6896, "step": 34830 }, { "epoch": 0.7291091015657707, "grad_norm": 0.27224069833755493, "learning_rate": 0.00017227498398740906, "loss": 11.6663, "step": 34831 }, { "epoch": 0.7291300343297329, "grad_norm": 0.2849724292755127, "learning_rate": 0.00017227346868921875, "loss": 11.6737, "step": 34832 }, { "epoch": 0.7291509670936951, "grad_norm": 0.40625298023223877, "learning_rate": 0.00017227195335628516, "loss": 11.6882, "step": 34833 }, { "epoch": 0.7291718998576572, "grad_norm": 0.24108946323394775, "learning_rate": 0.00017227043798860898, "loss": 11.6807, "step": 34834 }, { "epoch": 0.7291928326216194, "grad_norm": 0.32487449049949646, "learning_rate": 0.00017226892258619095, "loss": 11.6759, "step": 34835 }, { "epoch": 0.7292137653855815, "grad_norm": 0.26055967807769775, "learning_rate": 0.0001722674071490318, "loss": 11.6623, "step": 34836 }, { "epoch": 0.7292346981495437, "grad_norm": 0.2762923538684845, "learning_rate": 0.00017226589167713226, "loss": 11.6874, "step": 34837 }, { "epoch": 0.7292556309135059, "grad_norm": 0.28073951601982117, "learning_rate": 0.00017226437617049305, "loss": 11.6605, "step": 34838 }, { "epoch": 0.729276563677468, "grad_norm": 0.34396421909332275, "learning_rate": 0.00017226286062911491, "loss": 11.6376, "step": 34839 }, { "epoch": 0.7292974964414302, "grad_norm": 0.3699915409088135, "learning_rate": 0.00017226134505299857, "loss": 11.6671, "step": 34840 }, { "epoch": 0.7293184292053922, "grad_norm": 0.31414052844047546, "learning_rate": 0.0001722598294421448, "loss": 11.678, "step": 34841 }, { "epoch": 0.7293393619693544, "grad_norm": 0.38223186135292053, "learning_rate": 0.0001722583137965542, "loss": 11.6723, "step": 34842 }, { "epoch": 0.7293602947333165, "grad_norm": 0.3523465692996979, "learning_rate": 0.00017225679811622762, "loss": 11.6552, "step": 34843 }, { "epoch": 0.7293812274972787, "grad_norm": 0.3188529908657074, "learning_rate": 0.00017225528240116575, "loss": 11.6676, "step": 34844 }, { "epoch": 0.7294021602612409, "grad_norm": 0.3214094936847687, "learning_rate": 0.0001722537666513693, "loss": 11.68, "step": 34845 }, { "epoch": 0.729423093025203, "grad_norm": 0.29764002561569214, "learning_rate": 0.00017225225086683904, "loss": 11.6689, "step": 34846 }, { "epoch": 0.7294440257891652, "grad_norm": 0.2708335518836975, "learning_rate": 0.00017225073504757566, "loss": 11.6701, "step": 34847 }, { "epoch": 0.7294649585531273, "grad_norm": 0.3189455270767212, "learning_rate": 0.00017224921919357987, "loss": 11.6719, "step": 34848 }, { "epoch": 0.7294858913170895, "grad_norm": 0.2858416736125946, "learning_rate": 0.0001722477033048525, "loss": 11.6727, "step": 34849 }, { "epoch": 0.7295068240810516, "grad_norm": 0.39750951528549194, "learning_rate": 0.00017224618738139418, "loss": 11.6637, "step": 34850 }, { "epoch": 0.7295277568450138, "grad_norm": 0.2598026990890503, "learning_rate": 0.00017224467142320566, "loss": 11.6721, "step": 34851 }, { "epoch": 0.729548689608976, "grad_norm": 0.31166449189186096, "learning_rate": 0.00017224315543028768, "loss": 11.6669, "step": 34852 }, { "epoch": 0.7295696223729381, "grad_norm": 0.23133668303489685, "learning_rate": 0.00017224163940264098, "loss": 11.6752, "step": 34853 }, { "epoch": 0.7295905551369003, "grad_norm": 0.33337390422821045, "learning_rate": 0.00017224012334026627, "loss": 11.6902, "step": 34854 }, { "epoch": 0.7296114879008624, "grad_norm": 0.32548853754997253, "learning_rate": 0.0001722386072431643, "loss": 11.6738, "step": 34855 }, { "epoch": 0.7296324206648246, "grad_norm": 0.25468140840530396, "learning_rate": 0.00017223709111133576, "loss": 11.6971, "step": 34856 }, { "epoch": 0.7296533534287868, "grad_norm": 0.38289979100227356, "learning_rate": 0.00017223557494478142, "loss": 11.6785, "step": 34857 }, { "epoch": 0.7296742861927489, "grad_norm": 0.2530517578125, "learning_rate": 0.00017223405874350198, "loss": 11.6772, "step": 34858 }, { "epoch": 0.7296952189567111, "grad_norm": 0.28683197498321533, "learning_rate": 0.0001722325425074982, "loss": 11.6665, "step": 34859 }, { "epoch": 0.7297161517206732, "grad_norm": 0.2980535924434662, "learning_rate": 0.00017223102623677077, "loss": 11.6643, "step": 34860 }, { "epoch": 0.7297370844846354, "grad_norm": 0.2631118893623352, "learning_rate": 0.00017222950993132043, "loss": 11.6566, "step": 34861 }, { "epoch": 0.7297580172485975, "grad_norm": 0.2531166970729828, "learning_rate": 0.00017222799359114795, "loss": 11.6794, "step": 34862 }, { "epoch": 0.7297789500125597, "grad_norm": 0.2977084815502167, "learning_rate": 0.000172226477216254, "loss": 11.6683, "step": 34863 }, { "epoch": 0.7297998827765219, "grad_norm": 1.5217541456222534, "learning_rate": 0.00017222496080663934, "loss": 11.6206, "step": 34864 }, { "epoch": 0.729820815540484, "grad_norm": 0.3437403440475464, "learning_rate": 0.00017222344436230468, "loss": 11.6677, "step": 34865 }, { "epoch": 0.7298417483044461, "grad_norm": 0.30911898612976074, "learning_rate": 0.00017222192788325077, "loss": 11.6628, "step": 34866 }, { "epoch": 0.7298626810684082, "grad_norm": 0.2786697447299957, "learning_rate": 0.00017222041136947836, "loss": 11.6645, "step": 34867 }, { "epoch": 0.7298836138323704, "grad_norm": 0.22617675364017487, "learning_rate": 0.00017221889482098814, "loss": 11.6842, "step": 34868 }, { "epoch": 0.7299045465963325, "grad_norm": 0.2400987297296524, "learning_rate": 0.00017221737823778084, "loss": 11.6587, "step": 34869 }, { "epoch": 0.7299254793602947, "grad_norm": 0.3225158751010895, "learning_rate": 0.0001722158616198572, "loss": 11.6537, "step": 34870 }, { "epoch": 0.7299464121242569, "grad_norm": 0.28009238839149475, "learning_rate": 0.00017221434496721795, "loss": 11.6531, "step": 34871 }, { "epoch": 0.729967344888219, "grad_norm": 0.28530153632164, "learning_rate": 0.00017221282827986381, "loss": 11.673, "step": 34872 }, { "epoch": 0.7299882776521812, "grad_norm": 0.2898887097835541, "learning_rate": 0.0001722113115577955, "loss": 11.6637, "step": 34873 }, { "epoch": 0.7300092104161433, "grad_norm": 0.30955061316490173, "learning_rate": 0.0001722097948010138, "loss": 11.6754, "step": 34874 }, { "epoch": 0.7300301431801055, "grad_norm": 0.2960253357887268, "learning_rate": 0.0001722082780095194, "loss": 11.6762, "step": 34875 }, { "epoch": 0.7300510759440677, "grad_norm": 0.4308336675167084, "learning_rate": 0.00017220676118331302, "loss": 11.6712, "step": 34876 }, { "epoch": 0.7300720087080298, "grad_norm": 0.2636508047580719, "learning_rate": 0.0001722052443223954, "loss": 11.6623, "step": 34877 }, { "epoch": 0.730092941471992, "grad_norm": 0.26830899715423584, "learning_rate": 0.0001722037274267673, "loss": 11.6789, "step": 34878 }, { "epoch": 0.7301138742359541, "grad_norm": 0.36513033509254456, "learning_rate": 0.00017220221049642938, "loss": 11.6807, "step": 34879 }, { "epoch": 0.7301348069999163, "grad_norm": 0.27964088320732117, "learning_rate": 0.00017220069353138243, "loss": 11.6867, "step": 34880 }, { "epoch": 0.7301557397638784, "grad_norm": 0.26904159784317017, "learning_rate": 0.00017219917653162718, "loss": 11.6544, "step": 34881 }, { "epoch": 0.7301766725278406, "grad_norm": 0.2689301073551178, "learning_rate": 0.0001721976594971643, "loss": 11.6565, "step": 34882 }, { "epoch": 0.7301976052918028, "grad_norm": 0.3066878616809845, "learning_rate": 0.00017219614242799459, "loss": 11.6672, "step": 34883 }, { "epoch": 0.7302185380557649, "grad_norm": 0.31740593910217285, "learning_rate": 0.00017219462532411874, "loss": 11.6682, "step": 34884 }, { "epoch": 0.7302394708197271, "grad_norm": 0.25655853748321533, "learning_rate": 0.00017219310818553747, "loss": 11.6485, "step": 34885 }, { "epoch": 0.7302604035836892, "grad_norm": 0.26390939950942993, "learning_rate": 0.00017219159101225154, "loss": 11.6824, "step": 34886 }, { "epoch": 0.7302813363476514, "grad_norm": 0.3162318468093872, "learning_rate": 0.00017219007380426166, "loss": 11.6644, "step": 34887 }, { "epoch": 0.7303022691116134, "grad_norm": 0.28277114033699036, "learning_rate": 0.00017218855656156857, "loss": 11.6795, "step": 34888 }, { "epoch": 0.7303232018755756, "grad_norm": 0.34815531969070435, "learning_rate": 0.00017218703928417302, "loss": 11.666, "step": 34889 }, { "epoch": 0.7303441346395378, "grad_norm": 0.349167138338089, "learning_rate": 0.0001721855219720757, "loss": 11.6819, "step": 34890 }, { "epoch": 0.7303650674034999, "grad_norm": 0.432939350605011, "learning_rate": 0.0001721840046252773, "loss": 11.6714, "step": 34891 }, { "epoch": 0.7303860001674621, "grad_norm": 0.36040857434272766, "learning_rate": 0.00017218248724377866, "loss": 11.6816, "step": 34892 }, { "epoch": 0.7304069329314242, "grad_norm": 0.29289716482162476, "learning_rate": 0.00017218096982758043, "loss": 11.6641, "step": 34893 }, { "epoch": 0.7304278656953864, "grad_norm": 0.28974106907844543, "learning_rate": 0.00017217945237668335, "loss": 11.6538, "step": 34894 }, { "epoch": 0.7304487984593486, "grad_norm": 0.28840941190719604, "learning_rate": 0.00017217793489108818, "loss": 11.6573, "step": 34895 }, { "epoch": 0.7304697312233107, "grad_norm": 0.30559754371643066, "learning_rate": 0.00017217641737079565, "loss": 11.6912, "step": 34896 }, { "epoch": 0.7304906639872729, "grad_norm": 0.32894009351730347, "learning_rate": 0.00017217489981580643, "loss": 11.6666, "step": 34897 }, { "epoch": 0.730511596751235, "grad_norm": 0.30578625202178955, "learning_rate": 0.0001721733822261213, "loss": 11.6684, "step": 34898 }, { "epoch": 0.7305325295151972, "grad_norm": 0.37186235189437866, "learning_rate": 0.000172171864601741, "loss": 11.6775, "step": 34899 }, { "epoch": 0.7305534622791593, "grad_norm": 0.3424259126186371, "learning_rate": 0.00017217034694266622, "loss": 11.6721, "step": 34900 }, { "epoch": 0.7305743950431215, "grad_norm": 0.2988162636756897, "learning_rate": 0.0001721688292488977, "loss": 11.6782, "step": 34901 }, { "epoch": 0.7305953278070837, "grad_norm": 0.260515421628952, "learning_rate": 0.0001721673115204362, "loss": 11.6731, "step": 34902 }, { "epoch": 0.7306162605710458, "grad_norm": 0.2787831723690033, "learning_rate": 0.0001721657937572824, "loss": 11.653, "step": 34903 }, { "epoch": 0.730637193335008, "grad_norm": 0.36874520778656006, "learning_rate": 0.00017216427595943706, "loss": 11.6838, "step": 34904 }, { "epoch": 0.7306581260989701, "grad_norm": 0.31808459758758545, "learning_rate": 0.00017216275812690092, "loss": 11.673, "step": 34905 }, { "epoch": 0.7306790588629323, "grad_norm": 0.26686710119247437, "learning_rate": 0.0001721612402596747, "loss": 11.6834, "step": 34906 }, { "epoch": 0.7306999916268944, "grad_norm": 0.32003000378608704, "learning_rate": 0.00017215972235775913, "loss": 11.6791, "step": 34907 }, { "epoch": 0.7307209243908566, "grad_norm": 0.2957298457622528, "learning_rate": 0.00017215820442115494, "loss": 11.6647, "step": 34908 }, { "epoch": 0.7307418571548188, "grad_norm": 0.25550565123558044, "learning_rate": 0.00017215668644986282, "loss": 11.6584, "step": 34909 }, { "epoch": 0.7307627899187809, "grad_norm": 0.30746108293533325, "learning_rate": 0.00017215516844388358, "loss": 11.6605, "step": 34910 }, { "epoch": 0.730783722682743, "grad_norm": 0.2810806930065155, "learning_rate": 0.00017215365040321786, "loss": 11.6668, "step": 34911 }, { "epoch": 0.7308046554467051, "grad_norm": 0.2540895640850067, "learning_rate": 0.00017215213232786645, "loss": 11.6543, "step": 34912 }, { "epoch": 0.7308255882106673, "grad_norm": 0.31664976477622986, "learning_rate": 0.00017215061421783006, "loss": 11.6659, "step": 34913 }, { "epoch": 0.7308465209746295, "grad_norm": 0.31530141830444336, "learning_rate": 0.00017214909607310945, "loss": 11.6794, "step": 34914 }, { "epoch": 0.7308674537385916, "grad_norm": 0.21844129264354706, "learning_rate": 0.0001721475778937053, "loss": 11.6685, "step": 34915 }, { "epoch": 0.7308883865025538, "grad_norm": 0.3378547132015228, "learning_rate": 0.00017214605967961837, "loss": 11.6966, "step": 34916 }, { "epoch": 0.7309093192665159, "grad_norm": 0.3019234836101532, "learning_rate": 0.0001721445414308494, "loss": 11.6649, "step": 34917 }, { "epoch": 0.7309302520304781, "grad_norm": 0.3459886908531189, "learning_rate": 0.00017214302314739905, "loss": 11.664, "step": 34918 }, { "epoch": 0.7309511847944402, "grad_norm": 0.2852168679237366, "learning_rate": 0.00017214150482926813, "loss": 11.6655, "step": 34919 }, { "epoch": 0.7309721175584024, "grad_norm": 0.4054517149925232, "learning_rate": 0.00017213998647645738, "loss": 11.6635, "step": 34920 }, { "epoch": 0.7309930503223646, "grad_norm": 0.2414247691631317, "learning_rate": 0.00017213846808896745, "loss": 11.6669, "step": 34921 }, { "epoch": 0.7310139830863267, "grad_norm": 0.4013660252094269, "learning_rate": 0.00017213694966679914, "loss": 11.6735, "step": 34922 }, { "epoch": 0.7310349158502889, "grad_norm": 0.3200124502182007, "learning_rate": 0.00017213543120995315, "loss": 11.6623, "step": 34923 }, { "epoch": 0.731055848614251, "grad_norm": 0.4620964825153351, "learning_rate": 0.00017213391271843017, "loss": 11.6857, "step": 34924 }, { "epoch": 0.7310767813782132, "grad_norm": 0.3159835934638977, "learning_rate": 0.00017213239419223102, "loss": 11.6763, "step": 34925 }, { "epoch": 0.7310977141421753, "grad_norm": 0.28965282440185547, "learning_rate": 0.00017213087563135634, "loss": 11.6675, "step": 34926 }, { "epoch": 0.7311186469061375, "grad_norm": 0.30267855525016785, "learning_rate": 0.00017212935703580696, "loss": 11.668, "step": 34927 }, { "epoch": 0.7311395796700997, "grad_norm": 0.25588494539260864, "learning_rate": 0.0001721278384055835, "loss": 11.6573, "step": 34928 }, { "epoch": 0.7311605124340618, "grad_norm": 0.30010542273521423, "learning_rate": 0.0001721263197406868, "loss": 11.6589, "step": 34929 }, { "epoch": 0.731181445198024, "grad_norm": 0.48192310333251953, "learning_rate": 0.00017212480104111748, "loss": 11.6815, "step": 34930 }, { "epoch": 0.7312023779619861, "grad_norm": 0.40274500846862793, "learning_rate": 0.00017212328230687633, "loss": 11.6637, "step": 34931 }, { "epoch": 0.7312233107259483, "grad_norm": 0.280115008354187, "learning_rate": 0.00017212176353796407, "loss": 11.6777, "step": 34932 }, { "epoch": 0.7312442434899105, "grad_norm": 0.29721692204475403, "learning_rate": 0.00017212024473438147, "loss": 11.6813, "step": 34933 }, { "epoch": 0.7312651762538726, "grad_norm": 0.4020627439022064, "learning_rate": 0.0001721187258961292, "loss": 11.6789, "step": 34934 }, { "epoch": 0.7312861090178348, "grad_norm": 0.3893025815486908, "learning_rate": 0.000172117207023208, "loss": 11.6735, "step": 34935 }, { "epoch": 0.7313070417817968, "grad_norm": 0.2610922157764435, "learning_rate": 0.00017211568811561863, "loss": 11.6887, "step": 34936 }, { "epoch": 0.731327974545759, "grad_norm": 0.34959906339645386, "learning_rate": 0.00017211416917336177, "loss": 11.6752, "step": 34937 }, { "epoch": 0.7313489073097211, "grad_norm": 0.2455754578113556, "learning_rate": 0.00017211265019643826, "loss": 11.6536, "step": 34938 }, { "epoch": 0.7313698400736833, "grad_norm": 0.3043610751628876, "learning_rate": 0.00017211113118484868, "loss": 11.6634, "step": 34939 }, { "epoch": 0.7313907728376455, "grad_norm": 0.31432774662971497, "learning_rate": 0.00017210961213859387, "loss": 11.6639, "step": 34940 }, { "epoch": 0.7314117056016076, "grad_norm": 0.28315114974975586, "learning_rate": 0.00017210809305767452, "loss": 11.6799, "step": 34941 }, { "epoch": 0.7314326383655698, "grad_norm": 0.29766717553138733, "learning_rate": 0.00017210657394209136, "loss": 11.6715, "step": 34942 }, { "epoch": 0.7314535711295319, "grad_norm": 0.3146524131298065, "learning_rate": 0.0001721050547918451, "loss": 11.6538, "step": 34943 }, { "epoch": 0.7314745038934941, "grad_norm": 0.2534555494785309, "learning_rate": 0.00017210353560693652, "loss": 11.6556, "step": 34944 }, { "epoch": 0.7314954366574562, "grad_norm": 0.30299273133277893, "learning_rate": 0.00017210201638736633, "loss": 11.6709, "step": 34945 }, { "epoch": 0.7315163694214184, "grad_norm": 0.27766549587249756, "learning_rate": 0.00017210049713313526, "loss": 11.6608, "step": 34946 }, { "epoch": 0.7315373021853806, "grad_norm": 0.2476293444633484, "learning_rate": 0.00017209897784424403, "loss": 11.665, "step": 34947 }, { "epoch": 0.7315582349493427, "grad_norm": 0.3047054409980774, "learning_rate": 0.00017209745852069335, "loss": 11.6841, "step": 34948 }, { "epoch": 0.7315791677133049, "grad_norm": 0.2833174765110016, "learning_rate": 0.00017209593916248404, "loss": 11.6649, "step": 34949 }, { "epoch": 0.731600100477267, "grad_norm": 0.2667754590511322, "learning_rate": 0.0001720944197696167, "loss": 11.6664, "step": 34950 }, { "epoch": 0.7316210332412292, "grad_norm": 0.28593623638153076, "learning_rate": 0.00017209290034209219, "loss": 11.6654, "step": 34951 }, { "epoch": 0.7316419660051914, "grad_norm": 0.39740192890167236, "learning_rate": 0.00017209138087991114, "loss": 11.6579, "step": 34952 }, { "epoch": 0.7316628987691535, "grad_norm": 0.2888972759246826, "learning_rate": 0.00017208986138307427, "loss": 11.6842, "step": 34953 }, { "epoch": 0.7316838315331157, "grad_norm": 0.3053382933139801, "learning_rate": 0.00017208834185158245, "loss": 11.6546, "step": 34954 }, { "epoch": 0.7317047642970778, "grad_norm": 0.5317960977554321, "learning_rate": 0.00017208682228543625, "loss": 11.6489, "step": 34955 }, { "epoch": 0.73172569706104, "grad_norm": 0.3602483868598938, "learning_rate": 0.00017208530268463651, "loss": 11.6503, "step": 34956 }, { "epoch": 0.731746629825002, "grad_norm": 0.27166077494621277, "learning_rate": 0.0001720837830491839, "loss": 11.6834, "step": 34957 }, { "epoch": 0.7317675625889642, "grad_norm": 0.3456254005432129, "learning_rate": 0.0001720822633790792, "loss": 11.6707, "step": 34958 }, { "epoch": 0.7317884953529264, "grad_norm": 0.3302859961986542, "learning_rate": 0.00017208074367432307, "loss": 11.6654, "step": 34959 }, { "epoch": 0.7318094281168885, "grad_norm": 0.33204782009124756, "learning_rate": 0.00017207922393491632, "loss": 11.6921, "step": 34960 }, { "epoch": 0.7318303608808507, "grad_norm": 0.2620781362056732, "learning_rate": 0.0001720777041608596, "loss": 11.6719, "step": 34961 }, { "epoch": 0.7318512936448128, "grad_norm": 0.33349764347076416, "learning_rate": 0.00017207618435215375, "loss": 11.6694, "step": 34962 }, { "epoch": 0.731872226408775, "grad_norm": 0.27019745111465454, "learning_rate": 0.0001720746645087994, "loss": 11.6723, "step": 34963 }, { "epoch": 0.7318931591727371, "grad_norm": 0.353405624628067, "learning_rate": 0.0001720731446307973, "loss": 11.6598, "step": 34964 }, { "epoch": 0.7319140919366993, "grad_norm": 0.2791823446750641, "learning_rate": 0.00017207162471814818, "loss": 11.6713, "step": 34965 }, { "epoch": 0.7319350247006615, "grad_norm": 0.31355082988739014, "learning_rate": 0.00017207010477085283, "loss": 11.6681, "step": 34966 }, { "epoch": 0.7319559574646236, "grad_norm": 0.2888486981391907, "learning_rate": 0.0001720685847889119, "loss": 11.6595, "step": 34967 }, { "epoch": 0.7319768902285858, "grad_norm": 0.3582671582698822, "learning_rate": 0.00017206706477232618, "loss": 11.6764, "step": 34968 }, { "epoch": 0.7319978229925479, "grad_norm": 0.29156234860420227, "learning_rate": 0.00017206554472109638, "loss": 11.6501, "step": 34969 }, { "epoch": 0.7320187557565101, "grad_norm": 0.2511841654777527, "learning_rate": 0.0001720640246352232, "loss": 11.6873, "step": 34970 }, { "epoch": 0.7320396885204723, "grad_norm": 0.31470468640327454, "learning_rate": 0.00017206250451470742, "loss": 11.666, "step": 34971 }, { "epoch": 0.7320606212844344, "grad_norm": 0.23678560554981232, "learning_rate": 0.00017206098435954977, "loss": 11.6773, "step": 34972 }, { "epoch": 0.7320815540483966, "grad_norm": 0.4227805435657501, "learning_rate": 0.0001720594641697509, "loss": 11.6582, "step": 34973 }, { "epoch": 0.7321024868123587, "grad_norm": 0.2894662022590637, "learning_rate": 0.00017205794394531165, "loss": 11.6837, "step": 34974 }, { "epoch": 0.7321234195763209, "grad_norm": 0.2556978762149811, "learning_rate": 0.0001720564236862327, "loss": 11.6693, "step": 34975 }, { "epoch": 0.732144352340283, "grad_norm": 0.35650724172592163, "learning_rate": 0.00017205490339251478, "loss": 11.6589, "step": 34976 }, { "epoch": 0.7321652851042452, "grad_norm": 0.36855098605155945, "learning_rate": 0.00017205338306415863, "loss": 11.676, "step": 34977 }, { "epoch": 0.7321862178682074, "grad_norm": 0.31404346227645874, "learning_rate": 0.00017205186270116495, "loss": 11.6754, "step": 34978 }, { "epoch": 0.7322071506321695, "grad_norm": 0.3759436011314392, "learning_rate": 0.0001720503423035345, "loss": 11.6645, "step": 34979 }, { "epoch": 0.7322280833961317, "grad_norm": 0.25261199474334717, "learning_rate": 0.00017204882187126804, "loss": 11.6773, "step": 34980 }, { "epoch": 0.7322490161600937, "grad_norm": 0.30262720584869385, "learning_rate": 0.00017204730140436624, "loss": 11.6735, "step": 34981 }, { "epoch": 0.732269948924056, "grad_norm": 0.29891207814216614, "learning_rate": 0.00017204578090282985, "loss": 11.6767, "step": 34982 }, { "epoch": 0.732290881688018, "grad_norm": 0.2830756604671478, "learning_rate": 0.00017204426036665962, "loss": 11.6644, "step": 34983 }, { "epoch": 0.7323118144519802, "grad_norm": 0.284424751996994, "learning_rate": 0.0001720427397958563, "loss": 11.6678, "step": 34984 }, { "epoch": 0.7323327472159424, "grad_norm": 0.2679104208946228, "learning_rate": 0.00017204121919042056, "loss": 11.6752, "step": 34985 }, { "epoch": 0.7323536799799045, "grad_norm": 0.2511271834373474, "learning_rate": 0.00017203969855035314, "loss": 11.6671, "step": 34986 }, { "epoch": 0.7323746127438667, "grad_norm": 0.3214937150478363, "learning_rate": 0.00017203817787565483, "loss": 11.6552, "step": 34987 }, { "epoch": 0.7323955455078288, "grad_norm": 0.2689177691936493, "learning_rate": 0.0001720366571663263, "loss": 11.671, "step": 34988 }, { "epoch": 0.732416478271791, "grad_norm": 0.3622346520423889, "learning_rate": 0.00017203513642236832, "loss": 11.6784, "step": 34989 }, { "epoch": 0.7324374110357531, "grad_norm": 0.31132474541664124, "learning_rate": 0.00017203361564378162, "loss": 11.6626, "step": 34990 }, { "epoch": 0.7324583437997153, "grad_norm": 0.38264673948287964, "learning_rate": 0.0001720320948305669, "loss": 11.6689, "step": 34991 }, { "epoch": 0.7324792765636775, "grad_norm": 0.3002287745475769, "learning_rate": 0.00017203057398272492, "loss": 11.6723, "step": 34992 }, { "epoch": 0.7325002093276396, "grad_norm": 0.31368088722229004, "learning_rate": 0.00017202905310025637, "loss": 11.6877, "step": 34993 }, { "epoch": 0.7325211420916018, "grad_norm": 0.251539945602417, "learning_rate": 0.00017202753218316204, "loss": 11.6668, "step": 34994 }, { "epoch": 0.7325420748555639, "grad_norm": 0.32949134707450867, "learning_rate": 0.00017202601123144262, "loss": 11.6745, "step": 34995 }, { "epoch": 0.7325630076195261, "grad_norm": 0.2955942749977112, "learning_rate": 0.00017202449024509886, "loss": 11.6487, "step": 34996 }, { "epoch": 0.7325839403834883, "grad_norm": 0.2955927848815918, "learning_rate": 0.00017202296922413148, "loss": 11.6937, "step": 34997 }, { "epoch": 0.7326048731474504, "grad_norm": 0.25247669219970703, "learning_rate": 0.0001720214481685412, "loss": 11.6544, "step": 34998 }, { "epoch": 0.7326258059114126, "grad_norm": 0.27271485328674316, "learning_rate": 0.0001720199270783288, "loss": 11.6803, "step": 34999 }, { "epoch": 0.7326467386753747, "grad_norm": 0.2751217782497406, "learning_rate": 0.00017201840595349496, "loss": 11.6693, "step": 35000 }, { "epoch": 0.7326467386753747, "eval_loss": 11.670615196228027, "eval_runtime": 34.3381, "eval_samples_per_second": 27.986, "eval_steps_per_second": 7.018, "step": 35000 }, { "epoch": 0.7326676714393369, "grad_norm": 0.27796027064323425, "learning_rate": 0.0001720168847940404, "loss": 11.6588, "step": 35001 }, { "epoch": 0.732688604203299, "grad_norm": 0.29634276032447815, "learning_rate": 0.00017201536359996592, "loss": 11.6528, "step": 35002 }, { "epoch": 0.7327095369672612, "grad_norm": 0.35851341485977173, "learning_rate": 0.0001720138423712722, "loss": 11.6812, "step": 35003 }, { "epoch": 0.7327304697312234, "grad_norm": 0.31018733978271484, "learning_rate": 0.00017201232110795997, "loss": 11.6683, "step": 35004 }, { "epoch": 0.7327514024951854, "grad_norm": 0.30083951354026794, "learning_rate": 0.00017201079981003, "loss": 11.6871, "step": 35005 }, { "epoch": 0.7327723352591476, "grad_norm": 0.3144458532333374, "learning_rate": 0.00017200927847748298, "loss": 11.6821, "step": 35006 }, { "epoch": 0.7327932680231097, "grad_norm": 0.3367284834384918, "learning_rate": 0.00017200775711031968, "loss": 11.6733, "step": 35007 }, { "epoch": 0.7328142007870719, "grad_norm": 0.30175018310546875, "learning_rate": 0.00017200623570854076, "loss": 11.6704, "step": 35008 }, { "epoch": 0.732835133551034, "grad_norm": 0.2924351692199707, "learning_rate": 0.00017200471427214703, "loss": 11.6564, "step": 35009 }, { "epoch": 0.7328560663149962, "grad_norm": 0.26617902517318726, "learning_rate": 0.0001720031928011392, "loss": 11.6849, "step": 35010 }, { "epoch": 0.7328769990789584, "grad_norm": 0.3951423168182373, "learning_rate": 0.00017200167129551797, "loss": 11.6891, "step": 35011 }, { "epoch": 0.7328979318429205, "grad_norm": 0.3733401894569397, "learning_rate": 0.0001720001497552841, "loss": 11.6692, "step": 35012 }, { "epoch": 0.7329188646068827, "grad_norm": 0.29277369379997253, "learning_rate": 0.0001719986281804383, "loss": 11.6684, "step": 35013 }, { "epoch": 0.7329397973708448, "grad_norm": 0.2999162971973419, "learning_rate": 0.00017199710657098136, "loss": 11.653, "step": 35014 }, { "epoch": 0.732960730134807, "grad_norm": 0.2722235321998596, "learning_rate": 0.00017199558492691393, "loss": 11.6783, "step": 35015 }, { "epoch": 0.7329816628987692, "grad_norm": 0.2980034053325653, "learning_rate": 0.0001719940632482368, "loss": 11.6604, "step": 35016 }, { "epoch": 0.7330025956627313, "grad_norm": 0.2932646870613098, "learning_rate": 0.0001719925415349507, "loss": 11.6687, "step": 35017 }, { "epoch": 0.7330235284266935, "grad_norm": 0.31116795539855957, "learning_rate": 0.00017199101978705627, "loss": 11.6887, "step": 35018 }, { "epoch": 0.7330444611906556, "grad_norm": 0.3557867705821991, "learning_rate": 0.0001719894980045544, "loss": 11.6783, "step": 35019 }, { "epoch": 0.7330653939546178, "grad_norm": 0.2594701945781708, "learning_rate": 0.00017198797618744568, "loss": 11.6492, "step": 35020 }, { "epoch": 0.7330863267185799, "grad_norm": 0.23515202105045319, "learning_rate": 0.0001719864543357309, "loss": 11.6615, "step": 35021 }, { "epoch": 0.7331072594825421, "grad_norm": 0.23943166434764862, "learning_rate": 0.00017198493244941081, "loss": 11.6671, "step": 35022 }, { "epoch": 0.7331281922465043, "grad_norm": 0.26606234908103943, "learning_rate": 0.0001719834105284861, "loss": 11.6649, "step": 35023 }, { "epoch": 0.7331491250104664, "grad_norm": 0.28583988547325134, "learning_rate": 0.00017198188857295754, "loss": 11.6732, "step": 35024 }, { "epoch": 0.7331700577744286, "grad_norm": 0.23296478390693665, "learning_rate": 0.00017198036658282583, "loss": 11.6592, "step": 35025 }, { "epoch": 0.7331909905383907, "grad_norm": 0.4083253741264343, "learning_rate": 0.00017197884455809174, "loss": 11.6628, "step": 35026 }, { "epoch": 0.7332119233023529, "grad_norm": 0.2647593915462494, "learning_rate": 0.00017197732249875597, "loss": 11.6571, "step": 35027 }, { "epoch": 0.7332328560663149, "grad_norm": 0.2876124680042267, "learning_rate": 0.0001719758004048192, "loss": 11.672, "step": 35028 }, { "epoch": 0.7332537888302771, "grad_norm": 0.27006232738494873, "learning_rate": 0.00017197427827628228, "loss": 11.6567, "step": 35029 }, { "epoch": 0.7332747215942393, "grad_norm": 0.30850183963775635, "learning_rate": 0.0001719727561131459, "loss": 11.6725, "step": 35030 }, { "epoch": 0.7332956543582014, "grad_norm": 0.34537968039512634, "learning_rate": 0.00017197123391541073, "loss": 11.6728, "step": 35031 }, { "epoch": 0.7333165871221636, "grad_norm": 0.3341485261917114, "learning_rate": 0.00017196971168307755, "loss": 11.6677, "step": 35032 }, { "epoch": 0.7333375198861257, "grad_norm": 0.3253253102302551, "learning_rate": 0.00017196818941614712, "loss": 11.6754, "step": 35033 }, { "epoch": 0.7333584526500879, "grad_norm": 0.34146180748939514, "learning_rate": 0.0001719666671146201, "loss": 11.6856, "step": 35034 }, { "epoch": 0.7333793854140501, "grad_norm": 0.35912656784057617, "learning_rate": 0.00017196514477849727, "loss": 11.6772, "step": 35035 }, { "epoch": 0.7334003181780122, "grad_norm": 0.33307138085365295, "learning_rate": 0.00017196362240777936, "loss": 11.6522, "step": 35036 }, { "epoch": 0.7334212509419744, "grad_norm": 0.2795703411102295, "learning_rate": 0.00017196210000246711, "loss": 11.6748, "step": 35037 }, { "epoch": 0.7334421837059365, "grad_norm": 0.37191492319107056, "learning_rate": 0.00017196057756256123, "loss": 11.6617, "step": 35038 }, { "epoch": 0.7334631164698987, "grad_norm": 0.4344560503959656, "learning_rate": 0.00017195905508806246, "loss": 11.6861, "step": 35039 }, { "epoch": 0.7334840492338608, "grad_norm": 0.3467407524585724, "learning_rate": 0.0001719575325789715, "loss": 11.6674, "step": 35040 }, { "epoch": 0.733504981997823, "grad_norm": 0.3049328029155731, "learning_rate": 0.00017195601003528913, "loss": 11.6617, "step": 35041 }, { "epoch": 0.7335259147617852, "grad_norm": 0.3667978048324585, "learning_rate": 0.0001719544874570161, "loss": 11.6775, "step": 35042 }, { "epoch": 0.7335468475257473, "grad_norm": 0.284518301486969, "learning_rate": 0.00017195296484415304, "loss": 11.6664, "step": 35043 }, { "epoch": 0.7335677802897095, "grad_norm": 0.30502235889434814, "learning_rate": 0.00017195144219670077, "loss": 11.6392, "step": 35044 }, { "epoch": 0.7335887130536716, "grad_norm": 0.3007056415081024, "learning_rate": 0.00017194991951466001, "loss": 11.6571, "step": 35045 }, { "epoch": 0.7336096458176338, "grad_norm": 0.2810959219932556, "learning_rate": 0.0001719483967980315, "loss": 11.6934, "step": 35046 }, { "epoch": 0.7336305785815959, "grad_norm": 0.2750711739063263, "learning_rate": 0.00017194687404681593, "loss": 11.6861, "step": 35047 }, { "epoch": 0.7336515113455581, "grad_norm": 0.2914794087409973, "learning_rate": 0.00017194535126101404, "loss": 11.6773, "step": 35048 }, { "epoch": 0.7336724441095203, "grad_norm": 0.3704964518547058, "learning_rate": 0.0001719438284406266, "loss": 11.6576, "step": 35049 }, { "epoch": 0.7336933768734824, "grad_norm": 0.3243381679058075, "learning_rate": 0.00017194230558565434, "loss": 11.6753, "step": 35050 }, { "epoch": 0.7337143096374446, "grad_norm": 0.33379513025283813, "learning_rate": 0.0001719407826960979, "loss": 11.6729, "step": 35051 }, { "epoch": 0.7337352424014066, "grad_norm": 0.2715117931365967, "learning_rate": 0.00017193925977195814, "loss": 11.6806, "step": 35052 }, { "epoch": 0.7337561751653688, "grad_norm": 0.30562782287597656, "learning_rate": 0.00017193773681323577, "loss": 11.6606, "step": 35053 }, { "epoch": 0.733777107929331, "grad_norm": 0.30882135033607483, "learning_rate": 0.00017193621381993143, "loss": 11.668, "step": 35054 }, { "epoch": 0.7337980406932931, "grad_norm": 0.33588677644729614, "learning_rate": 0.00017193469079204593, "loss": 11.6675, "step": 35055 }, { "epoch": 0.7338189734572553, "grad_norm": 0.2807721197605133, "learning_rate": 0.00017193316772958, "loss": 11.6635, "step": 35056 }, { "epoch": 0.7338399062212174, "grad_norm": 0.2765938341617584, "learning_rate": 0.00017193164463253431, "loss": 11.6834, "step": 35057 }, { "epoch": 0.7338608389851796, "grad_norm": 0.27190667390823364, "learning_rate": 0.00017193012150090966, "loss": 11.6794, "step": 35058 }, { "epoch": 0.7338817717491417, "grad_norm": 0.330768883228302, "learning_rate": 0.00017192859833470677, "loss": 11.674, "step": 35059 }, { "epoch": 0.7339027045131039, "grad_norm": 0.3699095845222473, "learning_rate": 0.00017192707513392637, "loss": 11.6807, "step": 35060 }, { "epoch": 0.7339236372770661, "grad_norm": 0.2834906578063965, "learning_rate": 0.00017192555189856915, "loss": 11.6704, "step": 35061 }, { "epoch": 0.7339445700410282, "grad_norm": 0.3122255504131317, "learning_rate": 0.00017192402862863587, "loss": 11.6657, "step": 35062 }, { "epoch": 0.7339655028049904, "grad_norm": 0.32911568880081177, "learning_rate": 0.0001719225053241273, "loss": 11.6593, "step": 35063 }, { "epoch": 0.7339864355689525, "grad_norm": 0.26345598697662354, "learning_rate": 0.00017192098198504416, "loss": 11.6584, "step": 35064 }, { "epoch": 0.7340073683329147, "grad_norm": 0.2864645719528198, "learning_rate": 0.00017191945861138713, "loss": 11.6708, "step": 35065 }, { "epoch": 0.7340283010968768, "grad_norm": 0.30869269371032715, "learning_rate": 0.00017191793520315697, "loss": 11.6649, "step": 35066 }, { "epoch": 0.734049233860839, "grad_norm": 0.2560504376888275, "learning_rate": 0.00017191641176035442, "loss": 11.6594, "step": 35067 }, { "epoch": 0.7340701666248012, "grad_norm": 0.38407090306282043, "learning_rate": 0.00017191488828298025, "loss": 11.6991, "step": 35068 }, { "epoch": 0.7340910993887633, "grad_norm": 0.517062246799469, "learning_rate": 0.0001719133647710351, "loss": 11.5968, "step": 35069 }, { "epoch": 0.7341120321527255, "grad_norm": 0.2917449176311493, "learning_rate": 0.00017191184122451975, "loss": 11.6513, "step": 35070 }, { "epoch": 0.7341329649166876, "grad_norm": 0.2963530421257019, "learning_rate": 0.00017191031764343496, "loss": 11.6785, "step": 35071 }, { "epoch": 0.7341538976806498, "grad_norm": 0.2862634062767029, "learning_rate": 0.00017190879402778143, "loss": 11.6776, "step": 35072 }, { "epoch": 0.734174830444612, "grad_norm": 0.2814403772354126, "learning_rate": 0.00017190727037755995, "loss": 11.677, "step": 35073 }, { "epoch": 0.734195763208574, "grad_norm": 0.23369158804416656, "learning_rate": 0.00017190574669277115, "loss": 11.6699, "step": 35074 }, { "epoch": 0.7342166959725362, "grad_norm": 0.3168766498565674, "learning_rate": 0.0001719042229734158, "loss": 11.6593, "step": 35075 }, { "epoch": 0.7342376287364983, "grad_norm": 0.3080408573150635, "learning_rate": 0.0001719026992194947, "loss": 11.6798, "step": 35076 }, { "epoch": 0.7342585615004605, "grad_norm": 0.3008452355861664, "learning_rate": 0.00017190117543100853, "loss": 11.6733, "step": 35077 }, { "epoch": 0.7342794942644226, "grad_norm": 0.34444209933280945, "learning_rate": 0.00017189965160795798, "loss": 11.6718, "step": 35078 }, { "epoch": 0.7343004270283848, "grad_norm": 0.28581878542900085, "learning_rate": 0.00017189812775034383, "loss": 11.6569, "step": 35079 }, { "epoch": 0.734321359792347, "grad_norm": 0.2555367946624756, "learning_rate": 0.00017189660385816686, "loss": 11.6535, "step": 35080 }, { "epoch": 0.7343422925563091, "grad_norm": 0.3162328600883484, "learning_rate": 0.0001718950799314277, "loss": 11.6647, "step": 35081 }, { "epoch": 0.7343632253202713, "grad_norm": 0.3500492572784424, "learning_rate": 0.00017189355597012717, "loss": 11.6653, "step": 35082 }, { "epoch": 0.7343841580842334, "grad_norm": 0.2709366977214813, "learning_rate": 0.00017189203197426597, "loss": 11.6868, "step": 35083 }, { "epoch": 0.7344050908481956, "grad_norm": 0.3476596176624298, "learning_rate": 0.0001718905079438448, "loss": 11.6778, "step": 35084 }, { "epoch": 0.7344260236121577, "grad_norm": 0.29058393836021423, "learning_rate": 0.00017188898387886443, "loss": 11.6898, "step": 35085 }, { "epoch": 0.7344469563761199, "grad_norm": 0.2825779616832733, "learning_rate": 0.00017188745977932558, "loss": 11.6871, "step": 35086 }, { "epoch": 0.7344678891400821, "grad_norm": 0.3040984272956848, "learning_rate": 0.000171885935645229, "loss": 11.6435, "step": 35087 }, { "epoch": 0.7344888219040442, "grad_norm": 0.27294087409973145, "learning_rate": 0.0001718844114765754, "loss": 11.6658, "step": 35088 }, { "epoch": 0.7345097546680064, "grad_norm": 0.2928674817085266, "learning_rate": 0.00017188288727336556, "loss": 11.6728, "step": 35089 }, { "epoch": 0.7345306874319685, "grad_norm": 0.30905598402023315, "learning_rate": 0.00017188136303560015, "loss": 11.6783, "step": 35090 }, { "epoch": 0.7345516201959307, "grad_norm": 0.2613098919391632, "learning_rate": 0.0001718798387632799, "loss": 11.6761, "step": 35091 }, { "epoch": 0.7345725529598929, "grad_norm": 0.296530157327652, "learning_rate": 0.0001718783144564056, "loss": 11.6617, "step": 35092 }, { "epoch": 0.734593485723855, "grad_norm": 0.381586492061615, "learning_rate": 0.00017187679011497795, "loss": 11.6704, "step": 35093 }, { "epoch": 0.7346144184878172, "grad_norm": 0.2711818814277649, "learning_rate": 0.0001718752657389977, "loss": 11.663, "step": 35094 }, { "epoch": 0.7346353512517793, "grad_norm": 0.27203935384750366, "learning_rate": 0.00017187374132846553, "loss": 11.6628, "step": 35095 }, { "epoch": 0.7346562840157415, "grad_norm": 0.39816156029701233, "learning_rate": 0.00017187221688338226, "loss": 11.6652, "step": 35096 }, { "epoch": 0.7346772167797035, "grad_norm": 0.2528309226036072, "learning_rate": 0.00017187069240374853, "loss": 11.6683, "step": 35097 }, { "epoch": 0.7346981495436657, "grad_norm": 0.2521461248397827, "learning_rate": 0.00017186916788956516, "loss": 11.6707, "step": 35098 }, { "epoch": 0.734719082307628, "grad_norm": 0.2366577386856079, "learning_rate": 0.00017186764334083284, "loss": 11.6667, "step": 35099 }, { "epoch": 0.73474001507159, "grad_norm": 0.3255854547023773, "learning_rate": 0.00017186611875755228, "loss": 11.6711, "step": 35100 }, { "epoch": 0.7347609478355522, "grad_norm": 0.2715352773666382, "learning_rate": 0.00017186459413972422, "loss": 11.663, "step": 35101 }, { "epoch": 0.7347818805995143, "grad_norm": 0.23074239492416382, "learning_rate": 0.00017186306948734943, "loss": 11.6771, "step": 35102 }, { "epoch": 0.7348028133634765, "grad_norm": 0.30443891882896423, "learning_rate": 0.0001718615448004286, "loss": 11.6716, "step": 35103 }, { "epoch": 0.7348237461274386, "grad_norm": 0.29066333174705505, "learning_rate": 0.00017186002007896252, "loss": 11.6714, "step": 35104 }, { "epoch": 0.7348446788914008, "grad_norm": 0.3090169131755829, "learning_rate": 0.0001718584953229519, "loss": 11.6799, "step": 35105 }, { "epoch": 0.734865611655363, "grad_norm": 0.24029754102230072, "learning_rate": 0.0001718569705323974, "loss": 11.6582, "step": 35106 }, { "epoch": 0.7348865444193251, "grad_norm": 0.283623605966568, "learning_rate": 0.00017185544570729987, "loss": 11.6559, "step": 35107 }, { "epoch": 0.7349074771832873, "grad_norm": 0.30171677470207214, "learning_rate": 0.00017185392084766, "loss": 11.6728, "step": 35108 }, { "epoch": 0.7349284099472494, "grad_norm": 0.3133205473423004, "learning_rate": 0.00017185239595347843, "loss": 11.6857, "step": 35109 }, { "epoch": 0.7349493427112116, "grad_norm": 0.29018911719322205, "learning_rate": 0.00017185087102475604, "loss": 11.6775, "step": 35110 }, { "epoch": 0.7349702754751738, "grad_norm": 0.34636247158050537, "learning_rate": 0.00017184934606149347, "loss": 11.6629, "step": 35111 }, { "epoch": 0.7349912082391359, "grad_norm": 0.3093571662902832, "learning_rate": 0.0001718478210636915, "loss": 11.6627, "step": 35112 }, { "epoch": 0.7350121410030981, "grad_norm": 0.2542259097099304, "learning_rate": 0.00017184629603135085, "loss": 11.655, "step": 35113 }, { "epoch": 0.7350330737670602, "grad_norm": 0.33245179057121277, "learning_rate": 0.0001718447709644722, "loss": 11.658, "step": 35114 }, { "epoch": 0.7350540065310224, "grad_norm": 0.2829020023345947, "learning_rate": 0.00017184324586305637, "loss": 11.67, "step": 35115 }, { "epoch": 0.7350749392949845, "grad_norm": 0.27264150977134705, "learning_rate": 0.00017184172072710403, "loss": 11.6699, "step": 35116 }, { "epoch": 0.7350958720589467, "grad_norm": 0.3940761089324951, "learning_rate": 0.00017184019555661594, "loss": 11.6628, "step": 35117 }, { "epoch": 0.7351168048229089, "grad_norm": 0.26599180698394775, "learning_rate": 0.00017183867035159284, "loss": 11.6724, "step": 35118 }, { "epoch": 0.735137737586871, "grad_norm": 0.3154914379119873, "learning_rate": 0.00017183714511203544, "loss": 11.6693, "step": 35119 }, { "epoch": 0.7351586703508332, "grad_norm": 0.2850053608417511, "learning_rate": 0.0001718356198379445, "loss": 11.6557, "step": 35120 }, { "epoch": 0.7351796031147952, "grad_norm": 0.29428863525390625, "learning_rate": 0.0001718340945293207, "loss": 11.6573, "step": 35121 }, { "epoch": 0.7352005358787574, "grad_norm": 0.37428975105285645, "learning_rate": 0.00017183256918616483, "loss": 11.6822, "step": 35122 }, { "epoch": 0.7352214686427195, "grad_norm": 0.26964646577835083, "learning_rate": 0.00017183104380847763, "loss": 11.6613, "step": 35123 }, { "epoch": 0.7352424014066817, "grad_norm": 0.48748883605003357, "learning_rate": 0.00017182951839625978, "loss": 11.6865, "step": 35124 }, { "epoch": 0.7352633341706439, "grad_norm": 0.32094067335128784, "learning_rate": 0.00017182799294951205, "loss": 11.6789, "step": 35125 }, { "epoch": 0.735284266934606, "grad_norm": 0.263510137796402, "learning_rate": 0.00017182646746823516, "loss": 11.6681, "step": 35126 }, { "epoch": 0.7353051996985682, "grad_norm": 0.30354878306388855, "learning_rate": 0.00017182494195242987, "loss": 11.6655, "step": 35127 }, { "epoch": 0.7353261324625303, "grad_norm": 0.2943178713321686, "learning_rate": 0.00017182341640209687, "loss": 11.6789, "step": 35128 }, { "epoch": 0.7353470652264925, "grad_norm": 0.4295722544193268, "learning_rate": 0.00017182189081723693, "loss": 11.6862, "step": 35129 }, { "epoch": 0.7353679979904547, "grad_norm": 0.22441458702087402, "learning_rate": 0.0001718203651978507, "loss": 11.6689, "step": 35130 }, { "epoch": 0.7353889307544168, "grad_norm": 0.2939213514328003, "learning_rate": 0.00017181883954393908, "loss": 11.6634, "step": 35131 }, { "epoch": 0.735409863518379, "grad_norm": 0.2973495423793793, "learning_rate": 0.00017181731385550265, "loss": 11.6542, "step": 35132 }, { "epoch": 0.7354307962823411, "grad_norm": 0.3905848562717438, "learning_rate": 0.0001718157881325422, "loss": 11.6707, "step": 35133 }, { "epoch": 0.7354517290463033, "grad_norm": 0.39972254633903503, "learning_rate": 0.00017181426237505845, "loss": 11.6777, "step": 35134 }, { "epoch": 0.7354726618102654, "grad_norm": 0.29123735427856445, "learning_rate": 0.0001718127365830522, "loss": 11.6697, "step": 35135 }, { "epoch": 0.7354935945742276, "grad_norm": 0.3210518956184387, "learning_rate": 0.0001718112107565241, "loss": 11.675, "step": 35136 }, { "epoch": 0.7355145273381898, "grad_norm": 0.24720916152000427, "learning_rate": 0.0001718096848954749, "loss": 11.6701, "step": 35137 }, { "epoch": 0.7355354601021519, "grad_norm": 0.33022382855415344, "learning_rate": 0.00017180815899990536, "loss": 11.6396, "step": 35138 }, { "epoch": 0.7355563928661141, "grad_norm": 0.3052949011325836, "learning_rate": 0.00017180663306981618, "loss": 11.6669, "step": 35139 }, { "epoch": 0.7355773256300762, "grad_norm": 0.2857534885406494, "learning_rate": 0.00017180510710520808, "loss": 11.6772, "step": 35140 }, { "epoch": 0.7355982583940384, "grad_norm": 0.3191159665584564, "learning_rate": 0.0001718035811060819, "loss": 11.6844, "step": 35141 }, { "epoch": 0.7356191911580005, "grad_norm": 0.25272923707962036, "learning_rate": 0.00017180205507243826, "loss": 11.6704, "step": 35142 }, { "epoch": 0.7356401239219627, "grad_norm": 0.2673429250717163, "learning_rate": 0.00017180052900427794, "loss": 11.6741, "step": 35143 }, { "epoch": 0.7356610566859249, "grad_norm": 0.27241408824920654, "learning_rate": 0.00017179900290160166, "loss": 11.6556, "step": 35144 }, { "epoch": 0.7356819894498869, "grad_norm": 0.3346257209777832, "learning_rate": 0.00017179747676441016, "loss": 11.6789, "step": 35145 }, { "epoch": 0.7357029222138491, "grad_norm": 0.26578328013420105, "learning_rate": 0.0001717959505927042, "loss": 11.67, "step": 35146 }, { "epoch": 0.7357238549778112, "grad_norm": 0.28577926754951477, "learning_rate": 0.00017179442438648447, "loss": 11.6728, "step": 35147 }, { "epoch": 0.7357447877417734, "grad_norm": 0.4148426353931427, "learning_rate": 0.0001717928981457517, "loss": 11.674, "step": 35148 }, { "epoch": 0.7357657205057356, "grad_norm": 0.4031405448913574, "learning_rate": 0.00017179137187050666, "loss": 11.6761, "step": 35149 }, { "epoch": 0.7357866532696977, "grad_norm": 0.3905605375766754, "learning_rate": 0.00017178984556075007, "loss": 11.6857, "step": 35150 }, { "epoch": 0.7358075860336599, "grad_norm": 0.30933547019958496, "learning_rate": 0.00017178831921648268, "loss": 11.6812, "step": 35151 }, { "epoch": 0.735828518797622, "grad_norm": 0.2838510572910309, "learning_rate": 0.0001717867928377052, "loss": 11.6798, "step": 35152 }, { "epoch": 0.7358494515615842, "grad_norm": 0.30894234776496887, "learning_rate": 0.00017178526642441837, "loss": 11.6716, "step": 35153 }, { "epoch": 0.7358703843255463, "grad_norm": 0.29486680030822754, "learning_rate": 0.00017178373997662292, "loss": 11.6564, "step": 35154 }, { "epoch": 0.7358913170895085, "grad_norm": 0.2907141447067261, "learning_rate": 0.00017178221349431957, "loss": 11.6754, "step": 35155 }, { "epoch": 0.7359122498534707, "grad_norm": 0.34787705540657043, "learning_rate": 0.0001717806869775091, "loss": 11.6594, "step": 35156 }, { "epoch": 0.7359331826174328, "grad_norm": 0.4076278805732727, "learning_rate": 0.0001717791604261922, "loss": 11.6802, "step": 35157 }, { "epoch": 0.735954115381395, "grad_norm": 0.3098595142364502, "learning_rate": 0.00017177763384036961, "loss": 11.6775, "step": 35158 }, { "epoch": 0.7359750481453571, "grad_norm": 0.33370834589004517, "learning_rate": 0.0001717761072200421, "loss": 11.6727, "step": 35159 }, { "epoch": 0.7359959809093193, "grad_norm": 0.3449336588382721, "learning_rate": 0.00017177458056521035, "loss": 11.6738, "step": 35160 }, { "epoch": 0.7360169136732814, "grad_norm": 0.5215007662773132, "learning_rate": 0.00017177305387587513, "loss": 11.6744, "step": 35161 }, { "epoch": 0.7360378464372436, "grad_norm": 0.2966731786727905, "learning_rate": 0.00017177152715203717, "loss": 11.6632, "step": 35162 }, { "epoch": 0.7360587792012058, "grad_norm": 0.23945167660713196, "learning_rate": 0.0001717700003936972, "loss": 11.6667, "step": 35163 }, { "epoch": 0.7360797119651679, "grad_norm": 0.24254700541496277, "learning_rate": 0.00017176847360085592, "loss": 11.6682, "step": 35164 }, { "epoch": 0.7361006447291301, "grad_norm": 0.25853821635246277, "learning_rate": 0.00017176694677351414, "loss": 11.6741, "step": 35165 }, { "epoch": 0.7361215774930921, "grad_norm": 0.3037451505661011, "learning_rate": 0.00017176541991167254, "loss": 11.6863, "step": 35166 }, { "epoch": 0.7361425102570543, "grad_norm": 0.35486724972724915, "learning_rate": 0.00017176389301533184, "loss": 11.6752, "step": 35167 }, { "epoch": 0.7361634430210166, "grad_norm": 0.32511106133461, "learning_rate": 0.00017176236608449282, "loss": 11.6898, "step": 35168 }, { "epoch": 0.7361843757849786, "grad_norm": 0.28872397541999817, "learning_rate": 0.0001717608391191562, "loss": 11.6696, "step": 35169 }, { "epoch": 0.7362053085489408, "grad_norm": 0.4470973610877991, "learning_rate": 0.0001717593121193227, "loss": 11.6762, "step": 35170 }, { "epoch": 0.7362262413129029, "grad_norm": 0.2507561147212982, "learning_rate": 0.00017175778508499306, "loss": 11.6912, "step": 35171 }, { "epoch": 0.7362471740768651, "grad_norm": 0.321102112531662, "learning_rate": 0.00017175625801616802, "loss": 11.6815, "step": 35172 }, { "epoch": 0.7362681068408272, "grad_norm": 0.3581046164035797, "learning_rate": 0.0001717547309128483, "loss": 11.6603, "step": 35173 }, { "epoch": 0.7362890396047894, "grad_norm": 0.32427167892456055, "learning_rate": 0.00017175320377503463, "loss": 11.67, "step": 35174 }, { "epoch": 0.7363099723687516, "grad_norm": 0.27815964818000793, "learning_rate": 0.00017175167660272775, "loss": 11.7005, "step": 35175 }, { "epoch": 0.7363309051327137, "grad_norm": 0.35366106033325195, "learning_rate": 0.00017175014939592844, "loss": 11.6642, "step": 35176 }, { "epoch": 0.7363518378966759, "grad_norm": 0.30996429920196533, "learning_rate": 0.00017174862215463738, "loss": 11.6514, "step": 35177 }, { "epoch": 0.736372770660638, "grad_norm": 0.3716924488544464, "learning_rate": 0.00017174709487885532, "loss": 11.6743, "step": 35178 }, { "epoch": 0.7363937034246002, "grad_norm": 0.3928034007549286, "learning_rate": 0.00017174556756858298, "loss": 11.6739, "step": 35179 }, { "epoch": 0.7364146361885623, "grad_norm": 0.3127676844596863, "learning_rate": 0.00017174404022382111, "loss": 11.6797, "step": 35180 }, { "epoch": 0.7364355689525245, "grad_norm": 0.3024749457836151, "learning_rate": 0.00017174251284457045, "loss": 11.6685, "step": 35181 }, { "epoch": 0.7364565017164867, "grad_norm": 0.35735976696014404, "learning_rate": 0.00017174098543083172, "loss": 11.6825, "step": 35182 }, { "epoch": 0.7364774344804488, "grad_norm": 0.2778598666191101, "learning_rate": 0.00017173945798260566, "loss": 11.6797, "step": 35183 }, { "epoch": 0.736498367244411, "grad_norm": 0.3192540109157562, "learning_rate": 0.00017173793049989302, "loss": 11.6802, "step": 35184 }, { "epoch": 0.7365193000083731, "grad_norm": 0.26851189136505127, "learning_rate": 0.00017173640298269452, "loss": 11.6714, "step": 35185 }, { "epoch": 0.7365402327723353, "grad_norm": 0.4439859688282013, "learning_rate": 0.00017173487543101088, "loss": 11.683, "step": 35186 }, { "epoch": 0.7365611655362974, "grad_norm": 0.28970059752464294, "learning_rate": 0.00017173334784484286, "loss": 11.6782, "step": 35187 }, { "epoch": 0.7365820983002596, "grad_norm": 0.36187613010406494, "learning_rate": 0.00017173182022419114, "loss": 11.6474, "step": 35188 }, { "epoch": 0.7366030310642218, "grad_norm": 0.3087356984615326, "learning_rate": 0.00017173029256905656, "loss": 11.6638, "step": 35189 }, { "epoch": 0.7366239638281838, "grad_norm": 0.2983347177505493, "learning_rate": 0.00017172876487943977, "loss": 11.6642, "step": 35190 }, { "epoch": 0.736644896592146, "grad_norm": 0.3335464894771576, "learning_rate": 0.00017172723715534153, "loss": 11.6741, "step": 35191 }, { "epoch": 0.7366658293561081, "grad_norm": 0.3100977838039398, "learning_rate": 0.00017172570939676254, "loss": 11.6656, "step": 35192 }, { "epoch": 0.7366867621200703, "grad_norm": 0.272443562746048, "learning_rate": 0.00017172418160370356, "loss": 11.6698, "step": 35193 }, { "epoch": 0.7367076948840325, "grad_norm": 0.314579576253891, "learning_rate": 0.00017172265377616536, "loss": 11.6568, "step": 35194 }, { "epoch": 0.7367286276479946, "grad_norm": 0.2913103401660919, "learning_rate": 0.00017172112591414863, "loss": 11.6661, "step": 35195 }, { "epoch": 0.7367495604119568, "grad_norm": 0.34408724308013916, "learning_rate": 0.00017171959801765411, "loss": 11.662, "step": 35196 }, { "epoch": 0.7367704931759189, "grad_norm": 0.27337008714675903, "learning_rate": 0.00017171807008668254, "loss": 11.6702, "step": 35197 }, { "epoch": 0.7367914259398811, "grad_norm": 0.28261926770210266, "learning_rate": 0.00017171654212123468, "loss": 11.6681, "step": 35198 }, { "epoch": 0.7368123587038432, "grad_norm": 0.24683594703674316, "learning_rate": 0.0001717150141213112, "loss": 11.675, "step": 35199 }, { "epoch": 0.7368332914678054, "grad_norm": 0.28421327471733093, "learning_rate": 0.00017171348608691292, "loss": 11.6712, "step": 35200 }, { "epoch": 0.7368542242317676, "grad_norm": 0.3274960517883301, "learning_rate": 0.00017171195801804053, "loss": 11.6734, "step": 35201 }, { "epoch": 0.7368751569957297, "grad_norm": 0.2728465795516968, "learning_rate": 0.00017171042991469474, "loss": 11.6747, "step": 35202 }, { "epoch": 0.7368960897596919, "grad_norm": 0.3115949034690857, "learning_rate": 0.00017170890177687633, "loss": 11.671, "step": 35203 }, { "epoch": 0.736917022523654, "grad_norm": 0.3051207959651947, "learning_rate": 0.000171707373604586, "loss": 11.6668, "step": 35204 }, { "epoch": 0.7369379552876162, "grad_norm": 0.3010806441307068, "learning_rate": 0.0001717058453978245, "loss": 11.6836, "step": 35205 }, { "epoch": 0.7369588880515783, "grad_norm": 0.279981404542923, "learning_rate": 0.00017170431715659255, "loss": 11.6685, "step": 35206 }, { "epoch": 0.7369798208155405, "grad_norm": 0.3428593575954437, "learning_rate": 0.0001717027888808909, "loss": 11.6635, "step": 35207 }, { "epoch": 0.7370007535795027, "grad_norm": 0.369253545999527, "learning_rate": 0.00017170126057072032, "loss": 11.6732, "step": 35208 }, { "epoch": 0.7370216863434648, "grad_norm": 0.34312278032302856, "learning_rate": 0.00017169973222608146, "loss": 11.6704, "step": 35209 }, { "epoch": 0.737042619107427, "grad_norm": 0.314009428024292, "learning_rate": 0.00017169820384697513, "loss": 11.6833, "step": 35210 }, { "epoch": 0.737063551871389, "grad_norm": 0.2509676218032837, "learning_rate": 0.00017169667543340204, "loss": 11.6651, "step": 35211 }, { "epoch": 0.7370844846353513, "grad_norm": 0.3221839368343353, "learning_rate": 0.0001716951469853629, "loss": 11.6571, "step": 35212 }, { "epoch": 0.7371054173993135, "grad_norm": 0.26548653841018677, "learning_rate": 0.00017169361850285846, "loss": 11.6823, "step": 35213 }, { "epoch": 0.7371263501632755, "grad_norm": 0.3137364089488983, "learning_rate": 0.00017169208998588952, "loss": 11.6772, "step": 35214 }, { "epoch": 0.7371472829272377, "grad_norm": 0.2793636620044708, "learning_rate": 0.0001716905614344567, "loss": 11.6738, "step": 35215 }, { "epoch": 0.7371682156911998, "grad_norm": 0.32984429597854614, "learning_rate": 0.0001716890328485608, "loss": 11.665, "step": 35216 }, { "epoch": 0.737189148455162, "grad_norm": 0.27207332849502563, "learning_rate": 0.00017168750422820255, "loss": 11.6573, "step": 35217 }, { "epoch": 0.7372100812191241, "grad_norm": 0.3312544524669647, "learning_rate": 0.00017168597557338266, "loss": 11.6888, "step": 35218 }, { "epoch": 0.7372310139830863, "grad_norm": 0.30343466997146606, "learning_rate": 0.0001716844468841019, "loss": 11.6776, "step": 35219 }, { "epoch": 0.7372519467470485, "grad_norm": 0.2703215479850769, "learning_rate": 0.00017168291816036101, "loss": 11.6562, "step": 35220 }, { "epoch": 0.7372728795110106, "grad_norm": 0.28590691089630127, "learning_rate": 0.0001716813894021607, "loss": 11.6591, "step": 35221 }, { "epoch": 0.7372938122749728, "grad_norm": 0.26475170254707336, "learning_rate": 0.00017167986060950168, "loss": 11.6644, "step": 35222 }, { "epoch": 0.7373147450389349, "grad_norm": 0.30627331137657166, "learning_rate": 0.00017167833178238472, "loss": 11.6619, "step": 35223 }, { "epoch": 0.7373356778028971, "grad_norm": 0.2345830351114273, "learning_rate": 0.0001716768029208106, "loss": 11.6748, "step": 35224 }, { "epoch": 0.7373566105668592, "grad_norm": 0.3598504066467285, "learning_rate": 0.00017167527402477993, "loss": 11.6639, "step": 35225 }, { "epoch": 0.7373775433308214, "grad_norm": 0.3505205810070038, "learning_rate": 0.00017167374509429356, "loss": 11.6694, "step": 35226 }, { "epoch": 0.7373984760947836, "grad_norm": 0.41065672039985657, "learning_rate": 0.0001716722161293522, "loss": 11.6647, "step": 35227 }, { "epoch": 0.7374194088587457, "grad_norm": 0.31208035349845886, "learning_rate": 0.00017167068712995652, "loss": 11.6764, "step": 35228 }, { "epoch": 0.7374403416227079, "grad_norm": 0.2887277603149414, "learning_rate": 0.00017166915809610736, "loss": 11.664, "step": 35229 }, { "epoch": 0.73746127438667, "grad_norm": 0.28940320014953613, "learning_rate": 0.00017166762902780537, "loss": 11.6743, "step": 35230 }, { "epoch": 0.7374822071506322, "grad_norm": 0.26224851608276367, "learning_rate": 0.0001716660999250513, "loss": 11.6641, "step": 35231 }, { "epoch": 0.7375031399145944, "grad_norm": 0.3381686806678772, "learning_rate": 0.00017166457078784592, "loss": 11.688, "step": 35232 }, { "epoch": 0.7375240726785565, "grad_norm": 0.2887246310710907, "learning_rate": 0.00017166304161618995, "loss": 11.676, "step": 35233 }, { "epoch": 0.7375450054425187, "grad_norm": 0.28522324562072754, "learning_rate": 0.00017166151241008413, "loss": 11.6746, "step": 35234 }, { "epoch": 0.7375659382064808, "grad_norm": 0.5075901746749878, "learning_rate": 0.00017165998316952917, "loss": 11.693, "step": 35235 }, { "epoch": 0.737586870970443, "grad_norm": 0.2805296778678894, "learning_rate": 0.0001716584538945258, "loss": 11.6693, "step": 35236 }, { "epoch": 0.737607803734405, "grad_norm": 0.3137982487678528, "learning_rate": 0.00017165692458507477, "loss": 11.6825, "step": 35237 }, { "epoch": 0.7376287364983672, "grad_norm": 0.4228808283805847, "learning_rate": 0.00017165539524117686, "loss": 11.6832, "step": 35238 }, { "epoch": 0.7376496692623294, "grad_norm": 0.39638248085975647, "learning_rate": 0.00017165386586283273, "loss": 11.6786, "step": 35239 }, { "epoch": 0.7376706020262915, "grad_norm": 0.3031436800956726, "learning_rate": 0.00017165233645004318, "loss": 11.683, "step": 35240 }, { "epoch": 0.7376915347902537, "grad_norm": 0.35731741786003113, "learning_rate": 0.00017165080700280892, "loss": 11.692, "step": 35241 }, { "epoch": 0.7377124675542158, "grad_norm": 0.26917022466659546, "learning_rate": 0.00017164927752113065, "loss": 11.6679, "step": 35242 }, { "epoch": 0.737733400318178, "grad_norm": 0.25408270955085754, "learning_rate": 0.00017164774800500915, "loss": 11.6645, "step": 35243 }, { "epoch": 0.7377543330821401, "grad_norm": 0.3116522431373596, "learning_rate": 0.00017164621845444514, "loss": 11.6653, "step": 35244 }, { "epoch": 0.7377752658461023, "grad_norm": 0.27838557958602905, "learning_rate": 0.00017164468886943937, "loss": 11.6723, "step": 35245 }, { "epoch": 0.7377961986100645, "grad_norm": 0.30504924058914185, "learning_rate": 0.00017164315924999256, "loss": 11.6927, "step": 35246 }, { "epoch": 0.7378171313740266, "grad_norm": 0.4094013571739197, "learning_rate": 0.00017164162959610544, "loss": 11.6612, "step": 35247 }, { "epoch": 0.7378380641379888, "grad_norm": 0.23908615112304688, "learning_rate": 0.00017164009990777875, "loss": 11.681, "step": 35248 }, { "epoch": 0.7378589969019509, "grad_norm": 0.3631795644760132, "learning_rate": 0.00017163857018501322, "loss": 11.6712, "step": 35249 }, { "epoch": 0.7378799296659131, "grad_norm": 0.2773705720901489, "learning_rate": 0.0001716370404278096, "loss": 11.67, "step": 35250 }, { "epoch": 0.7379008624298753, "grad_norm": 0.3542165458202362, "learning_rate": 0.00017163551063616864, "loss": 11.6598, "step": 35251 }, { "epoch": 0.7379217951938374, "grad_norm": 0.35502341389656067, "learning_rate": 0.00017163398081009102, "loss": 11.6765, "step": 35252 }, { "epoch": 0.7379427279577996, "grad_norm": 0.3513796925544739, "learning_rate": 0.00017163245094957755, "loss": 11.6809, "step": 35253 }, { "epoch": 0.7379636607217617, "grad_norm": 0.24293926358222961, "learning_rate": 0.00017163092105462887, "loss": 11.6664, "step": 35254 }, { "epoch": 0.7379845934857239, "grad_norm": 0.3079248368740082, "learning_rate": 0.00017162939112524582, "loss": 11.6736, "step": 35255 }, { "epoch": 0.738005526249686, "grad_norm": 0.2818501889705658, "learning_rate": 0.00017162786116142907, "loss": 11.6687, "step": 35256 }, { "epoch": 0.7380264590136482, "grad_norm": 0.29744288325309753, "learning_rate": 0.0001716263311631794, "loss": 11.6813, "step": 35257 }, { "epoch": 0.7380473917776104, "grad_norm": 0.3026994466781616, "learning_rate": 0.0001716248011304975, "loss": 11.6673, "step": 35258 }, { "epoch": 0.7380683245415725, "grad_norm": 0.2720445990562439, "learning_rate": 0.0001716232710633841, "loss": 11.6624, "step": 35259 }, { "epoch": 0.7380892573055347, "grad_norm": 0.44670939445495605, "learning_rate": 0.00017162174096183996, "loss": 11.6599, "step": 35260 }, { "epoch": 0.7381101900694967, "grad_norm": 0.44057247042655945, "learning_rate": 0.0001716202108258658, "loss": 11.6566, "step": 35261 }, { "epoch": 0.7381311228334589, "grad_norm": 0.3095381557941437, "learning_rate": 0.0001716186806554624, "loss": 11.6738, "step": 35262 }, { "epoch": 0.738152055597421, "grad_norm": 0.27934491634368896, "learning_rate": 0.00017161715045063046, "loss": 11.6737, "step": 35263 }, { "epoch": 0.7381729883613832, "grad_norm": 0.3246193528175354, "learning_rate": 0.00017161562021137075, "loss": 11.6708, "step": 35264 }, { "epoch": 0.7381939211253454, "grad_norm": 0.3316121995449066, "learning_rate": 0.0001716140899376839, "loss": 11.6726, "step": 35265 }, { "epoch": 0.7382148538893075, "grad_norm": 0.32803836464881897, "learning_rate": 0.0001716125596295708, "loss": 11.6553, "step": 35266 }, { "epoch": 0.7382357866532697, "grad_norm": 0.27588337659835815, "learning_rate": 0.00017161102928703206, "loss": 11.6726, "step": 35267 }, { "epoch": 0.7382567194172318, "grad_norm": 0.2552247643470764, "learning_rate": 0.0001716094989100685, "loss": 11.6579, "step": 35268 }, { "epoch": 0.738277652181194, "grad_norm": 0.26185303926467896, "learning_rate": 0.0001716079684986808, "loss": 11.6575, "step": 35269 }, { "epoch": 0.7382985849451562, "grad_norm": 0.3501763939857483, "learning_rate": 0.00017160643805286968, "loss": 11.6479, "step": 35270 }, { "epoch": 0.7383195177091183, "grad_norm": 0.3193682134151459, "learning_rate": 0.00017160490757263595, "loss": 11.6678, "step": 35271 }, { "epoch": 0.7383404504730805, "grad_norm": 0.2719956040382385, "learning_rate": 0.00017160337705798031, "loss": 11.667, "step": 35272 }, { "epoch": 0.7383613832370426, "grad_norm": 0.3807324469089508, "learning_rate": 0.0001716018465089035, "loss": 11.6847, "step": 35273 }, { "epoch": 0.7383823160010048, "grad_norm": 0.3129556477069855, "learning_rate": 0.0001716003159254062, "loss": 11.67, "step": 35274 }, { "epoch": 0.7384032487649669, "grad_norm": 0.40401890873908997, "learning_rate": 0.00017159878530748922, "loss": 11.6599, "step": 35275 }, { "epoch": 0.7384241815289291, "grad_norm": 0.27454593777656555, "learning_rate": 0.00017159725465515325, "loss": 11.6721, "step": 35276 }, { "epoch": 0.7384451142928913, "grad_norm": 0.24500519037246704, "learning_rate": 0.0001715957239683991, "loss": 11.6778, "step": 35277 }, { "epoch": 0.7384660470568534, "grad_norm": 0.3449438214302063, "learning_rate": 0.0001715941932472274, "loss": 11.6789, "step": 35278 }, { "epoch": 0.7384869798208156, "grad_norm": 0.29449787735939026, "learning_rate": 0.00017159266249163895, "loss": 11.673, "step": 35279 }, { "epoch": 0.7385079125847777, "grad_norm": 0.30432531237602234, "learning_rate": 0.00017159113170163445, "loss": 11.6823, "step": 35280 }, { "epoch": 0.7385288453487399, "grad_norm": 0.30199751257896423, "learning_rate": 0.0001715896008772147, "loss": 11.6757, "step": 35281 }, { "epoch": 0.738549778112702, "grad_norm": 0.25480589270591736, "learning_rate": 0.0001715880700183804, "loss": 11.6693, "step": 35282 }, { "epoch": 0.7385707108766641, "grad_norm": 0.34233447909355164, "learning_rate": 0.00017158653912513223, "loss": 11.6679, "step": 35283 }, { "epoch": 0.7385916436406263, "grad_norm": 0.3314628303050995, "learning_rate": 0.000171585008197471, "loss": 11.6673, "step": 35284 }, { "epoch": 0.7386125764045884, "grad_norm": 0.4491911232471466, "learning_rate": 0.00017158347723539744, "loss": 11.6663, "step": 35285 }, { "epoch": 0.7386335091685506, "grad_norm": 0.3293556571006775, "learning_rate": 0.00017158194623891223, "loss": 11.6696, "step": 35286 }, { "epoch": 0.7386544419325127, "grad_norm": 0.3462854325771332, "learning_rate": 0.00017158041520801617, "loss": 11.6572, "step": 35287 }, { "epoch": 0.7386753746964749, "grad_norm": 0.3367859423160553, "learning_rate": 0.00017157888414270995, "loss": 11.6819, "step": 35288 }, { "epoch": 0.7386963074604371, "grad_norm": 0.29047584533691406, "learning_rate": 0.00017157735304299435, "loss": 11.6732, "step": 35289 }, { "epoch": 0.7387172402243992, "grad_norm": 0.2680774927139282, "learning_rate": 0.00017157582190887007, "loss": 11.6691, "step": 35290 }, { "epoch": 0.7387381729883614, "grad_norm": 0.28922998905181885, "learning_rate": 0.00017157429074033789, "loss": 11.6697, "step": 35291 }, { "epoch": 0.7387591057523235, "grad_norm": 0.30747511982917786, "learning_rate": 0.00017157275953739846, "loss": 11.6777, "step": 35292 }, { "epoch": 0.7387800385162857, "grad_norm": 0.254348486661911, "learning_rate": 0.0001715712283000526, "loss": 11.6757, "step": 35293 }, { "epoch": 0.7388009712802478, "grad_norm": 0.30010733008384705, "learning_rate": 0.00017156969702830101, "loss": 11.6738, "step": 35294 }, { "epoch": 0.73882190404421, "grad_norm": 0.27703380584716797, "learning_rate": 0.00017156816572214446, "loss": 11.6606, "step": 35295 }, { "epoch": 0.7388428368081722, "grad_norm": 0.2519284784793854, "learning_rate": 0.0001715666343815836, "loss": 11.6708, "step": 35296 }, { "epoch": 0.7388637695721343, "grad_norm": 0.3221437931060791, "learning_rate": 0.00017156510300661925, "loss": 11.674, "step": 35297 }, { "epoch": 0.7388847023360965, "grad_norm": 0.3174917697906494, "learning_rate": 0.00017156357159725213, "loss": 11.6624, "step": 35298 }, { "epoch": 0.7389056351000586, "grad_norm": 0.34051740169525146, "learning_rate": 0.000171562040153483, "loss": 11.6759, "step": 35299 }, { "epoch": 0.7389265678640208, "grad_norm": 0.3935607075691223, "learning_rate": 0.0001715605086753125, "loss": 11.6597, "step": 35300 }, { "epoch": 0.7389475006279829, "grad_norm": 0.3313707113265991, "learning_rate": 0.00017155897716274144, "loss": 11.6627, "step": 35301 }, { "epoch": 0.7389684333919451, "grad_norm": 0.2903004586696625, "learning_rate": 0.0001715574456157706, "loss": 11.6724, "step": 35302 }, { "epoch": 0.7389893661559073, "grad_norm": 0.30105507373809814, "learning_rate": 0.00017155591403440062, "loss": 11.6776, "step": 35303 }, { "epoch": 0.7390102989198694, "grad_norm": 0.24847234785556793, "learning_rate": 0.00017155438241863227, "loss": 11.6602, "step": 35304 }, { "epoch": 0.7390312316838316, "grad_norm": 0.2843424677848816, "learning_rate": 0.0001715528507684663, "loss": 11.664, "step": 35305 }, { "epoch": 0.7390521644477936, "grad_norm": 0.28446412086486816, "learning_rate": 0.00017155131908390346, "loss": 11.6671, "step": 35306 }, { "epoch": 0.7390730972117558, "grad_norm": 0.3160066306591034, "learning_rate": 0.00017154978736494444, "loss": 11.6734, "step": 35307 }, { "epoch": 0.739094029975718, "grad_norm": 0.3191593289375305, "learning_rate": 0.00017154825561159002, "loss": 11.6849, "step": 35308 }, { "epoch": 0.7391149627396801, "grad_norm": 0.32301679253578186, "learning_rate": 0.00017154672382384094, "loss": 11.6463, "step": 35309 }, { "epoch": 0.7391358955036423, "grad_norm": 0.27134421467781067, "learning_rate": 0.0001715451920016979, "loss": 11.6699, "step": 35310 }, { "epoch": 0.7391568282676044, "grad_norm": 0.2896216809749603, "learning_rate": 0.00017154366014516165, "loss": 11.6607, "step": 35311 }, { "epoch": 0.7391777610315666, "grad_norm": 0.3461145758628845, "learning_rate": 0.00017154212825423292, "loss": 11.6718, "step": 35312 }, { "epoch": 0.7391986937955287, "grad_norm": 0.35609129071235657, "learning_rate": 0.00017154059632891247, "loss": 11.6774, "step": 35313 }, { "epoch": 0.7392196265594909, "grad_norm": 0.3090076148509979, "learning_rate": 0.00017153906436920098, "loss": 11.6646, "step": 35314 }, { "epoch": 0.7392405593234531, "grad_norm": 0.3459596037864685, "learning_rate": 0.00017153753237509925, "loss": 11.6754, "step": 35315 }, { "epoch": 0.7392614920874152, "grad_norm": 0.32978492975234985, "learning_rate": 0.00017153600034660804, "loss": 11.6713, "step": 35316 }, { "epoch": 0.7392824248513774, "grad_norm": 0.34430935978889465, "learning_rate": 0.000171534468283728, "loss": 11.6689, "step": 35317 }, { "epoch": 0.7393033576153395, "grad_norm": 0.30632129311561584, "learning_rate": 0.0001715329361864599, "loss": 11.6544, "step": 35318 }, { "epoch": 0.7393242903793017, "grad_norm": 0.31998151540756226, "learning_rate": 0.00017153140405480453, "loss": 11.6747, "step": 35319 }, { "epoch": 0.7393452231432638, "grad_norm": 0.3162531852722168, "learning_rate": 0.00017152987188876254, "loss": 11.6651, "step": 35320 }, { "epoch": 0.739366155907226, "grad_norm": 0.36258891224861145, "learning_rate": 0.0001715283396883347, "loss": 11.6686, "step": 35321 }, { "epoch": 0.7393870886711882, "grad_norm": 0.30151432752609253, "learning_rate": 0.00017152680745352177, "loss": 11.6773, "step": 35322 }, { "epoch": 0.7394080214351503, "grad_norm": 0.26486048102378845, "learning_rate": 0.00017152527518432447, "loss": 11.6705, "step": 35323 }, { "epoch": 0.7394289541991125, "grad_norm": 0.29250994324684143, "learning_rate": 0.00017152374288074355, "loss": 11.6667, "step": 35324 }, { "epoch": 0.7394498869630746, "grad_norm": 0.3627723455429077, "learning_rate": 0.0001715222105427797, "loss": 11.6927, "step": 35325 }, { "epoch": 0.7394708197270368, "grad_norm": 0.3224467933177948, "learning_rate": 0.00017152067817043372, "loss": 11.668, "step": 35326 }, { "epoch": 0.739491752490999, "grad_norm": 0.3274552822113037, "learning_rate": 0.0001715191457637063, "loss": 11.6811, "step": 35327 }, { "epoch": 0.739512685254961, "grad_norm": 0.23786863684654236, "learning_rate": 0.00017151761332259818, "loss": 11.666, "step": 35328 }, { "epoch": 0.7395336180189233, "grad_norm": 0.26457154750823975, "learning_rate": 0.00017151608084711015, "loss": 11.6807, "step": 35329 }, { "epoch": 0.7395545507828853, "grad_norm": 0.2862321436405182, "learning_rate": 0.0001715145483372429, "loss": 11.6859, "step": 35330 }, { "epoch": 0.7395754835468475, "grad_norm": 0.2262314409017563, "learning_rate": 0.00017151301579299715, "loss": 11.6716, "step": 35331 }, { "epoch": 0.7395964163108096, "grad_norm": 0.2838711142539978, "learning_rate": 0.00017151148321437366, "loss": 11.6552, "step": 35332 }, { "epoch": 0.7396173490747718, "grad_norm": 0.2805153429508209, "learning_rate": 0.00017150995060137317, "loss": 11.6746, "step": 35333 }, { "epoch": 0.739638281838734, "grad_norm": 0.3123942017555237, "learning_rate": 0.00017150841795399643, "loss": 11.6525, "step": 35334 }, { "epoch": 0.7396592146026961, "grad_norm": 0.2812204957008362, "learning_rate": 0.00017150688527224416, "loss": 11.6563, "step": 35335 }, { "epoch": 0.7396801473666583, "grad_norm": 0.31148412823677063, "learning_rate": 0.00017150535255611708, "loss": 11.6797, "step": 35336 }, { "epoch": 0.7397010801306204, "grad_norm": 0.338139146566391, "learning_rate": 0.00017150381980561596, "loss": 11.6777, "step": 35337 }, { "epoch": 0.7397220128945826, "grad_norm": 0.26815736293792725, "learning_rate": 0.0001715022870207415, "loss": 11.6599, "step": 35338 }, { "epoch": 0.7397429456585447, "grad_norm": 0.28852173686027527, "learning_rate": 0.0001715007542014945, "loss": 11.6628, "step": 35339 }, { "epoch": 0.7397638784225069, "grad_norm": 0.21261006593704224, "learning_rate": 0.0001714992213478756, "loss": 11.6845, "step": 35340 }, { "epoch": 0.7397848111864691, "grad_norm": 0.28219202160835266, "learning_rate": 0.00017149768845988562, "loss": 11.6718, "step": 35341 }, { "epoch": 0.7398057439504312, "grad_norm": 0.27809375524520874, "learning_rate": 0.00017149615553752528, "loss": 11.6629, "step": 35342 }, { "epoch": 0.7398266767143934, "grad_norm": 0.28961440920829773, "learning_rate": 0.00017149462258079528, "loss": 11.6728, "step": 35343 }, { "epoch": 0.7398476094783555, "grad_norm": 0.2660478353500366, "learning_rate": 0.0001714930895896964, "loss": 11.6572, "step": 35344 }, { "epoch": 0.7398685422423177, "grad_norm": 0.30150261521339417, "learning_rate": 0.00017149155656422934, "loss": 11.6726, "step": 35345 }, { "epoch": 0.7398894750062799, "grad_norm": 0.37482479214668274, "learning_rate": 0.00017149002350439488, "loss": 11.6755, "step": 35346 }, { "epoch": 0.739910407770242, "grad_norm": 0.3219822943210602, "learning_rate": 0.0001714884904101937, "loss": 11.6651, "step": 35347 }, { "epoch": 0.7399313405342042, "grad_norm": 0.23941275477409363, "learning_rate": 0.0001714869572816266, "loss": 11.6755, "step": 35348 }, { "epoch": 0.7399522732981663, "grad_norm": 0.2802467346191406, "learning_rate": 0.0001714854241186943, "loss": 11.6706, "step": 35349 }, { "epoch": 0.7399732060621285, "grad_norm": 0.275912344455719, "learning_rate": 0.0001714838909213975, "loss": 11.6785, "step": 35350 }, { "epoch": 0.7399941388260906, "grad_norm": 0.36737507581710815, "learning_rate": 0.00017148235768973694, "loss": 11.6807, "step": 35351 }, { "epoch": 0.7400150715900528, "grad_norm": 0.24027948081493378, "learning_rate": 0.0001714808244237134, "loss": 11.6697, "step": 35352 }, { "epoch": 0.740036004354015, "grad_norm": 0.3319072723388672, "learning_rate": 0.0001714792911233276, "loss": 11.6797, "step": 35353 }, { "epoch": 0.740056937117977, "grad_norm": 0.3195952773094177, "learning_rate": 0.00017147775778858027, "loss": 11.6774, "step": 35354 }, { "epoch": 0.7400778698819392, "grad_norm": 0.28384634852409363, "learning_rate": 0.00017147622441947215, "loss": 11.6675, "step": 35355 }, { "epoch": 0.7400988026459013, "grad_norm": 0.2837047874927521, "learning_rate": 0.00017147469101600398, "loss": 11.6537, "step": 35356 }, { "epoch": 0.7401197354098635, "grad_norm": 0.32082676887512207, "learning_rate": 0.0001714731575781765, "loss": 11.6846, "step": 35357 }, { "epoch": 0.7401406681738256, "grad_norm": 0.2731592655181885, "learning_rate": 0.00017147162410599044, "loss": 11.6788, "step": 35358 }, { "epoch": 0.7401616009377878, "grad_norm": 0.36923137307167053, "learning_rate": 0.0001714700905994465, "loss": 11.6624, "step": 35359 }, { "epoch": 0.74018253370175, "grad_norm": 0.29213038086891174, "learning_rate": 0.00017146855705854548, "loss": 11.6576, "step": 35360 }, { "epoch": 0.7402034664657121, "grad_norm": 0.9811877608299255, "learning_rate": 0.0001714670234832881, "loss": 11.6661, "step": 35361 }, { "epoch": 0.7402243992296743, "grad_norm": 0.2887418270111084, "learning_rate": 0.00017146548987367508, "loss": 11.6803, "step": 35362 }, { "epoch": 0.7402453319936364, "grad_norm": 0.20855310559272766, "learning_rate": 0.00017146395622970716, "loss": 11.6551, "step": 35363 }, { "epoch": 0.7402662647575986, "grad_norm": 0.3072860836982727, "learning_rate": 0.0001714624225513851, "loss": 11.6551, "step": 35364 }, { "epoch": 0.7402871975215607, "grad_norm": 0.3114137351512909, "learning_rate": 0.00017146088883870963, "loss": 11.6442, "step": 35365 }, { "epoch": 0.7403081302855229, "grad_norm": 0.33942052721977234, "learning_rate": 0.00017145935509168148, "loss": 11.6651, "step": 35366 }, { "epoch": 0.7403290630494851, "grad_norm": 0.39309704303741455, "learning_rate": 0.00017145782131030134, "loss": 11.6872, "step": 35367 }, { "epoch": 0.7403499958134472, "grad_norm": 0.3355443775653839, "learning_rate": 0.00017145628749457004, "loss": 11.6737, "step": 35368 }, { "epoch": 0.7403709285774094, "grad_norm": 0.30089014768600464, "learning_rate": 0.00017145475364448824, "loss": 11.6622, "step": 35369 }, { "epoch": 0.7403918613413715, "grad_norm": 0.26451513171195984, "learning_rate": 0.00017145321976005674, "loss": 11.6595, "step": 35370 }, { "epoch": 0.7404127941053337, "grad_norm": 0.3145037591457367, "learning_rate": 0.00017145168584127622, "loss": 11.6671, "step": 35371 }, { "epoch": 0.7404337268692959, "grad_norm": 0.23837454617023468, "learning_rate": 0.00017145015188814745, "loss": 11.6543, "step": 35372 }, { "epoch": 0.740454659633258, "grad_norm": 0.35274121165275574, "learning_rate": 0.00017144861790067118, "loss": 11.671, "step": 35373 }, { "epoch": 0.7404755923972202, "grad_norm": 0.31979018449783325, "learning_rate": 0.0001714470838788481, "loss": 11.6829, "step": 35374 }, { "epoch": 0.7404965251611823, "grad_norm": 0.2894665002822876, "learning_rate": 0.00017144554982267897, "loss": 11.672, "step": 35375 }, { "epoch": 0.7405174579251445, "grad_norm": 0.3197704553604126, "learning_rate": 0.00017144401573216455, "loss": 11.6788, "step": 35376 }, { "epoch": 0.7405383906891065, "grad_norm": 0.29902783036231995, "learning_rate": 0.00017144248160730555, "loss": 11.6653, "step": 35377 }, { "epoch": 0.7405593234530687, "grad_norm": 0.2858913242816925, "learning_rate": 0.00017144094744810272, "loss": 11.65, "step": 35378 }, { "epoch": 0.7405802562170309, "grad_norm": 0.33724215626716614, "learning_rate": 0.0001714394132545568, "loss": 11.6715, "step": 35379 }, { "epoch": 0.740601188980993, "grad_norm": 0.2981398105621338, "learning_rate": 0.00017143787902666852, "loss": 11.6793, "step": 35380 }, { "epoch": 0.7406221217449552, "grad_norm": 0.2931632697582245, "learning_rate": 0.0001714363447644386, "loss": 11.657, "step": 35381 }, { "epoch": 0.7406430545089173, "grad_norm": 0.3357861340045929, "learning_rate": 0.00017143481046786785, "loss": 11.6702, "step": 35382 }, { "epoch": 0.7406639872728795, "grad_norm": 0.24417316913604736, "learning_rate": 0.0001714332761369569, "loss": 11.6661, "step": 35383 }, { "epoch": 0.7406849200368416, "grad_norm": 0.30610600113868713, "learning_rate": 0.00017143174177170658, "loss": 11.6721, "step": 35384 }, { "epoch": 0.7407058528008038, "grad_norm": 0.2663578391075134, "learning_rate": 0.00017143020737211758, "loss": 11.6687, "step": 35385 }, { "epoch": 0.740726785564766, "grad_norm": 0.274716854095459, "learning_rate": 0.00017142867293819066, "loss": 11.6622, "step": 35386 }, { "epoch": 0.7407477183287281, "grad_norm": 0.21781101822853088, "learning_rate": 0.0001714271384699265, "loss": 11.6488, "step": 35387 }, { "epoch": 0.7407686510926903, "grad_norm": 0.29267793893814087, "learning_rate": 0.00017142560396732592, "loss": 11.659, "step": 35388 }, { "epoch": 0.7407895838566524, "grad_norm": 0.25644922256469727, "learning_rate": 0.00017142406943038962, "loss": 11.6748, "step": 35389 }, { "epoch": 0.7408105166206146, "grad_norm": 0.3333752453327179, "learning_rate": 0.00017142253485911835, "loss": 11.6668, "step": 35390 }, { "epoch": 0.7408314493845768, "grad_norm": 0.2901031970977783, "learning_rate": 0.00017142100025351278, "loss": 11.6687, "step": 35391 }, { "epoch": 0.7408523821485389, "grad_norm": 0.22236166894435883, "learning_rate": 0.00017141946561357376, "loss": 11.6609, "step": 35392 }, { "epoch": 0.7408733149125011, "grad_norm": 0.2965680658817291, "learning_rate": 0.00017141793093930195, "loss": 11.6715, "step": 35393 }, { "epoch": 0.7408942476764632, "grad_norm": 0.3795454502105713, "learning_rate": 0.00017141639623069813, "loss": 11.684, "step": 35394 }, { "epoch": 0.7409151804404254, "grad_norm": 0.34473735094070435, "learning_rate": 0.000171414861487763, "loss": 11.6603, "step": 35395 }, { "epoch": 0.7409361132043875, "grad_norm": 0.3246554434299469, "learning_rate": 0.00017141332671049729, "loss": 11.6514, "step": 35396 }, { "epoch": 0.7409570459683497, "grad_norm": 0.3381035029888153, "learning_rate": 0.0001714117918989018, "loss": 11.6652, "step": 35397 }, { "epoch": 0.7409779787323119, "grad_norm": 0.2895287871360779, "learning_rate": 0.0001714102570529772, "loss": 11.6712, "step": 35398 }, { "epoch": 0.740998911496274, "grad_norm": 0.2899506390094757, "learning_rate": 0.0001714087221727243, "loss": 11.6848, "step": 35399 }, { "epoch": 0.7410198442602361, "grad_norm": 0.25020986795425415, "learning_rate": 0.00017140718725814375, "loss": 11.6763, "step": 35400 }, { "epoch": 0.7410407770241982, "grad_norm": 0.26504772901535034, "learning_rate": 0.00017140565230923637, "loss": 11.6654, "step": 35401 }, { "epoch": 0.7410617097881604, "grad_norm": 0.28827935457229614, "learning_rate": 0.00017140411732600286, "loss": 11.6673, "step": 35402 }, { "epoch": 0.7410826425521225, "grad_norm": 0.3280256390571594, "learning_rate": 0.00017140258230844392, "loss": 11.6617, "step": 35403 }, { "epoch": 0.7411035753160847, "grad_norm": 0.3229728937149048, "learning_rate": 0.00017140104725656035, "loss": 11.678, "step": 35404 }, { "epoch": 0.7411245080800469, "grad_norm": 0.3280026912689209, "learning_rate": 0.00017139951217035287, "loss": 11.6592, "step": 35405 }, { "epoch": 0.741145440844009, "grad_norm": 0.32251670956611633, "learning_rate": 0.00017139797704982223, "loss": 11.6739, "step": 35406 }, { "epoch": 0.7411663736079712, "grad_norm": 0.24751310050487518, "learning_rate": 0.00017139644189496914, "loss": 11.6668, "step": 35407 }, { "epoch": 0.7411873063719333, "grad_norm": 0.3102966248989105, "learning_rate": 0.00017139490670579435, "loss": 11.6594, "step": 35408 }, { "epoch": 0.7412082391358955, "grad_norm": 0.32988929748535156, "learning_rate": 0.00017139337148229858, "loss": 11.6547, "step": 35409 }, { "epoch": 0.7412291718998577, "grad_norm": 0.26558923721313477, "learning_rate": 0.00017139183622448263, "loss": 11.6789, "step": 35410 }, { "epoch": 0.7412501046638198, "grad_norm": 0.2703890800476074, "learning_rate": 0.00017139030093234715, "loss": 11.6871, "step": 35411 }, { "epoch": 0.741271037427782, "grad_norm": 0.2944064140319824, "learning_rate": 0.0001713887656058929, "loss": 11.6578, "step": 35412 }, { "epoch": 0.7412919701917441, "grad_norm": 0.30988025665283203, "learning_rate": 0.00017138723024512072, "loss": 11.6687, "step": 35413 }, { "epoch": 0.7413129029557063, "grad_norm": 0.32971858978271484, "learning_rate": 0.0001713856948500312, "loss": 11.6624, "step": 35414 }, { "epoch": 0.7413338357196684, "grad_norm": 0.2824646532535553, "learning_rate": 0.00017138415942062518, "loss": 11.6758, "step": 35415 }, { "epoch": 0.7413547684836306, "grad_norm": 0.2874199151992798, "learning_rate": 0.00017138262395690335, "loss": 11.6689, "step": 35416 }, { "epoch": 0.7413757012475928, "grad_norm": 0.2896425127983093, "learning_rate": 0.00017138108845886647, "loss": 11.6693, "step": 35417 }, { "epoch": 0.7413966340115549, "grad_norm": 0.28512147068977356, "learning_rate": 0.00017137955292651524, "loss": 11.6651, "step": 35418 }, { "epoch": 0.7414175667755171, "grad_norm": 0.2906765043735504, "learning_rate": 0.00017137801735985045, "loss": 11.6672, "step": 35419 }, { "epoch": 0.7414384995394792, "grad_norm": 0.27146831154823303, "learning_rate": 0.00017137648175887283, "loss": 11.6687, "step": 35420 }, { "epoch": 0.7414594323034414, "grad_norm": 0.24842774868011475, "learning_rate": 0.0001713749461235831, "loss": 11.6775, "step": 35421 }, { "epoch": 0.7414803650674034, "grad_norm": 0.3468076288700104, "learning_rate": 0.000171373410453982, "loss": 11.6623, "step": 35422 }, { "epoch": 0.7415012978313656, "grad_norm": 0.2626955211162567, "learning_rate": 0.00017137187475007028, "loss": 11.6555, "step": 35423 }, { "epoch": 0.7415222305953278, "grad_norm": 0.2604994475841522, "learning_rate": 0.00017137033901184866, "loss": 11.6651, "step": 35424 }, { "epoch": 0.7415431633592899, "grad_norm": 0.25966840982437134, "learning_rate": 0.0001713688032393179, "loss": 11.6803, "step": 35425 }, { "epoch": 0.7415640961232521, "grad_norm": 0.2853061258792877, "learning_rate": 0.00017136726743247868, "loss": 11.6681, "step": 35426 }, { "epoch": 0.7415850288872142, "grad_norm": 0.304523229598999, "learning_rate": 0.00017136573159133182, "loss": 11.669, "step": 35427 }, { "epoch": 0.7416059616511764, "grad_norm": 0.30952581763267517, "learning_rate": 0.00017136419571587804, "loss": 11.6739, "step": 35428 }, { "epoch": 0.7416268944151386, "grad_norm": 0.3239162266254425, "learning_rate": 0.00017136265980611802, "loss": 11.661, "step": 35429 }, { "epoch": 0.7416478271791007, "grad_norm": 0.23471178114414215, "learning_rate": 0.00017136112386205253, "loss": 11.6619, "step": 35430 }, { "epoch": 0.7416687599430629, "grad_norm": 0.2951795160770416, "learning_rate": 0.00017135958788368235, "loss": 11.664, "step": 35431 }, { "epoch": 0.741689692707025, "grad_norm": 0.2865127623081207, "learning_rate": 0.0001713580518710082, "loss": 11.6609, "step": 35432 }, { "epoch": 0.7417106254709872, "grad_norm": 0.29307904839515686, "learning_rate": 0.00017135651582403082, "loss": 11.6736, "step": 35433 }, { "epoch": 0.7417315582349493, "grad_norm": 0.23373988270759583, "learning_rate": 0.00017135497974275088, "loss": 11.6666, "step": 35434 }, { "epoch": 0.7417524909989115, "grad_norm": 0.3800264894962311, "learning_rate": 0.0001713534436271692, "loss": 11.6775, "step": 35435 }, { "epoch": 0.7417734237628737, "grad_norm": 0.3157605528831482, "learning_rate": 0.00017135190747728646, "loss": 11.6814, "step": 35436 }, { "epoch": 0.7417943565268358, "grad_norm": 0.36549559235572815, "learning_rate": 0.00017135037129310346, "loss": 11.6868, "step": 35437 }, { "epoch": 0.741815289290798, "grad_norm": 0.34640049934387207, "learning_rate": 0.0001713488350746209, "loss": 11.671, "step": 35438 }, { "epoch": 0.7418362220547601, "grad_norm": 0.29570266604423523, "learning_rate": 0.0001713472988218395, "loss": 11.6822, "step": 35439 }, { "epoch": 0.7418571548187223, "grad_norm": 0.39404910802841187, "learning_rate": 0.00017134576253476005, "loss": 11.6956, "step": 35440 }, { "epoch": 0.7418780875826844, "grad_norm": 0.37437906861305237, "learning_rate": 0.00017134422621338326, "loss": 11.6656, "step": 35441 }, { "epoch": 0.7418990203466466, "grad_norm": 0.2616249620914459, "learning_rate": 0.00017134268985770986, "loss": 11.6649, "step": 35442 }, { "epoch": 0.7419199531106088, "grad_norm": 0.39076709747314453, "learning_rate": 0.00017134115346774062, "loss": 11.6706, "step": 35443 }, { "epoch": 0.7419408858745709, "grad_norm": 0.22794777154922485, "learning_rate": 0.00017133961704347622, "loss": 11.6676, "step": 35444 }, { "epoch": 0.741961818638533, "grad_norm": 0.27315428853034973, "learning_rate": 0.0001713380805849175, "loss": 11.674, "step": 35445 }, { "epoch": 0.7419827514024951, "grad_norm": 0.22755050659179688, "learning_rate": 0.00017133654409206506, "loss": 11.6763, "step": 35446 }, { "epoch": 0.7420036841664573, "grad_norm": 0.2655471861362457, "learning_rate": 0.00017133500756491977, "loss": 11.6613, "step": 35447 }, { "epoch": 0.7420246169304195, "grad_norm": 0.2553790211677551, "learning_rate": 0.0001713334710034823, "loss": 11.6623, "step": 35448 }, { "epoch": 0.7420455496943816, "grad_norm": 0.3321421146392822, "learning_rate": 0.0001713319344077534, "loss": 11.6672, "step": 35449 }, { "epoch": 0.7420664824583438, "grad_norm": 0.27558714151382446, "learning_rate": 0.0001713303977777338, "loss": 11.6622, "step": 35450 }, { "epoch": 0.7420874152223059, "grad_norm": 0.2980630099773407, "learning_rate": 0.00017132886111342424, "loss": 11.6596, "step": 35451 }, { "epoch": 0.7421083479862681, "grad_norm": 0.32219719886779785, "learning_rate": 0.0001713273244148255, "loss": 11.6495, "step": 35452 }, { "epoch": 0.7421292807502302, "grad_norm": 0.32931655645370483, "learning_rate": 0.00017132578768193826, "loss": 11.6714, "step": 35453 }, { "epoch": 0.7421502135141924, "grad_norm": 0.3850933909416199, "learning_rate": 0.00017132425091476327, "loss": 11.6815, "step": 35454 }, { "epoch": 0.7421711462781546, "grad_norm": 0.26895585656166077, "learning_rate": 0.00017132271411330133, "loss": 11.6857, "step": 35455 }, { "epoch": 0.7421920790421167, "grad_norm": 0.32483237981796265, "learning_rate": 0.0001713211772775531, "loss": 11.6658, "step": 35456 }, { "epoch": 0.7422130118060789, "grad_norm": 0.3236805200576782, "learning_rate": 0.00017131964040751932, "loss": 11.6736, "step": 35457 }, { "epoch": 0.742233944570041, "grad_norm": 0.308169424533844, "learning_rate": 0.0001713181035032008, "loss": 11.6705, "step": 35458 }, { "epoch": 0.7422548773340032, "grad_norm": 0.3086314797401428, "learning_rate": 0.00017131656656459824, "loss": 11.6661, "step": 35459 }, { "epoch": 0.7422758100979653, "grad_norm": 0.2719019651412964, "learning_rate": 0.00017131502959171236, "loss": 11.6841, "step": 35460 }, { "epoch": 0.7422967428619275, "grad_norm": 0.3320292830467224, "learning_rate": 0.00017131349258454393, "loss": 11.6708, "step": 35461 }, { "epoch": 0.7423176756258897, "grad_norm": 0.23439311981201172, "learning_rate": 0.00017131195554309368, "loss": 11.672, "step": 35462 }, { "epoch": 0.7423386083898518, "grad_norm": 0.3289117217063904, "learning_rate": 0.00017131041846736234, "loss": 11.6502, "step": 35463 }, { "epoch": 0.742359541153814, "grad_norm": 0.39320746064186096, "learning_rate": 0.00017130888135735063, "loss": 11.6468, "step": 35464 }, { "epoch": 0.7423804739177761, "grad_norm": 0.33177366852760315, "learning_rate": 0.00017130734421305933, "loss": 11.67, "step": 35465 }, { "epoch": 0.7424014066817383, "grad_norm": 0.3777225911617279, "learning_rate": 0.00017130580703448916, "loss": 11.6544, "step": 35466 }, { "epoch": 0.7424223394457005, "grad_norm": 0.3169926404953003, "learning_rate": 0.00017130426982164089, "loss": 11.6702, "step": 35467 }, { "epoch": 0.7424432722096626, "grad_norm": 0.290789932012558, "learning_rate": 0.00017130273257451518, "loss": 11.6669, "step": 35468 }, { "epoch": 0.7424642049736248, "grad_norm": 0.28223541378974915, "learning_rate": 0.00017130119529311282, "loss": 11.6727, "step": 35469 }, { "epoch": 0.7424851377375868, "grad_norm": 0.357953280210495, "learning_rate": 0.00017129965797743458, "loss": 11.6709, "step": 35470 }, { "epoch": 0.742506070501549, "grad_norm": 0.3086935877799988, "learning_rate": 0.00017129812062748113, "loss": 11.658, "step": 35471 }, { "epoch": 0.7425270032655111, "grad_norm": 0.2640412747859955, "learning_rate": 0.0001712965832432533, "loss": 11.6378, "step": 35472 }, { "epoch": 0.7425479360294733, "grad_norm": 0.30429401993751526, "learning_rate": 0.0001712950458247517, "loss": 11.6564, "step": 35473 }, { "epoch": 0.7425688687934355, "grad_norm": 0.27208003401756287, "learning_rate": 0.0001712935083719772, "loss": 11.6667, "step": 35474 }, { "epoch": 0.7425898015573976, "grad_norm": 0.31321603059768677, "learning_rate": 0.00017129197088493046, "loss": 11.6732, "step": 35475 }, { "epoch": 0.7426107343213598, "grad_norm": 0.2694735527038574, "learning_rate": 0.00017129043336361224, "loss": 11.6768, "step": 35476 }, { "epoch": 0.7426316670853219, "grad_norm": 0.30481913685798645, "learning_rate": 0.00017128889580802326, "loss": 11.6677, "step": 35477 }, { "epoch": 0.7426525998492841, "grad_norm": 0.21751642227172852, "learning_rate": 0.0001712873582181643, "loss": 11.6727, "step": 35478 }, { "epoch": 0.7426735326132462, "grad_norm": 0.3442683517932892, "learning_rate": 0.00017128582059403608, "loss": 11.6601, "step": 35479 }, { "epoch": 0.7426944653772084, "grad_norm": 0.31687280535697937, "learning_rate": 0.00017128428293563934, "loss": 11.6833, "step": 35480 }, { "epoch": 0.7427153981411706, "grad_norm": 0.24458418786525726, "learning_rate": 0.0001712827452429748, "loss": 11.6491, "step": 35481 }, { "epoch": 0.7427363309051327, "grad_norm": 0.31522291898727417, "learning_rate": 0.00017128120751604324, "loss": 11.6641, "step": 35482 }, { "epoch": 0.7427572636690949, "grad_norm": 0.26508471369743347, "learning_rate": 0.00017127966975484537, "loss": 11.6721, "step": 35483 }, { "epoch": 0.742778196433057, "grad_norm": 0.28512945771217346, "learning_rate": 0.00017127813195938192, "loss": 11.667, "step": 35484 }, { "epoch": 0.7427991291970192, "grad_norm": 0.33516085147857666, "learning_rate": 0.00017127659412965364, "loss": 11.669, "step": 35485 }, { "epoch": 0.7428200619609814, "grad_norm": 0.3971147835254669, "learning_rate": 0.0001712750562656613, "loss": 11.6869, "step": 35486 }, { "epoch": 0.7428409947249435, "grad_norm": 0.3161323070526123, "learning_rate": 0.00017127351836740558, "loss": 11.6868, "step": 35487 }, { "epoch": 0.7428619274889057, "grad_norm": 0.25757738947868347, "learning_rate": 0.00017127198043488727, "loss": 11.6621, "step": 35488 }, { "epoch": 0.7428828602528678, "grad_norm": 0.36556878685951233, "learning_rate": 0.00017127044246810708, "loss": 11.6736, "step": 35489 }, { "epoch": 0.74290379301683, "grad_norm": 0.3549436628818512, "learning_rate": 0.00017126890446706578, "loss": 11.6843, "step": 35490 }, { "epoch": 0.742924725780792, "grad_norm": 0.29413914680480957, "learning_rate": 0.00017126736643176407, "loss": 11.6617, "step": 35491 }, { "epoch": 0.7429456585447543, "grad_norm": 0.23128490149974823, "learning_rate": 0.00017126582836220273, "loss": 11.6681, "step": 35492 }, { "epoch": 0.7429665913087165, "grad_norm": 0.3123672902584076, "learning_rate": 0.00017126429025838245, "loss": 11.6687, "step": 35493 }, { "epoch": 0.7429875240726785, "grad_norm": 0.3334389925003052, "learning_rate": 0.000171262752120304, "loss": 11.6818, "step": 35494 }, { "epoch": 0.7430084568366407, "grad_norm": 0.2612732946872711, "learning_rate": 0.00017126121394796815, "loss": 11.6664, "step": 35495 }, { "epoch": 0.7430293896006028, "grad_norm": 0.3134554922580719, "learning_rate": 0.00017125967574137558, "loss": 11.6595, "step": 35496 }, { "epoch": 0.743050322364565, "grad_norm": 0.3560885190963745, "learning_rate": 0.00017125813750052706, "loss": 11.6617, "step": 35497 }, { "epoch": 0.7430712551285271, "grad_norm": 0.2703804671764374, "learning_rate": 0.00017125659922542332, "loss": 11.6557, "step": 35498 }, { "epoch": 0.7430921878924893, "grad_norm": 0.4902733862400055, "learning_rate": 0.00017125506091606512, "loss": 11.6545, "step": 35499 }, { "epoch": 0.7431131206564515, "grad_norm": 0.32005220651626587, "learning_rate": 0.0001712535225724532, "loss": 11.664, "step": 35500 }, { "epoch": 0.7431340534204136, "grad_norm": 0.3191401958465576, "learning_rate": 0.00017125198419458824, "loss": 11.6881, "step": 35501 }, { "epoch": 0.7431549861843758, "grad_norm": 0.30613574385643005, "learning_rate": 0.00017125044578247104, "loss": 11.6828, "step": 35502 }, { "epoch": 0.7431759189483379, "grad_norm": 0.2860943675041199, "learning_rate": 0.00017124890733610234, "loss": 11.6736, "step": 35503 }, { "epoch": 0.7431968517123001, "grad_norm": 0.3259929120540619, "learning_rate": 0.00017124736885548286, "loss": 11.683, "step": 35504 }, { "epoch": 0.7432177844762623, "grad_norm": 0.29344117641448975, "learning_rate": 0.0001712458303406133, "loss": 11.6656, "step": 35505 }, { "epoch": 0.7432387172402244, "grad_norm": 0.24133320152759552, "learning_rate": 0.00017124429179149448, "loss": 11.6736, "step": 35506 }, { "epoch": 0.7432596500041866, "grad_norm": 0.2257125973701477, "learning_rate": 0.00017124275320812708, "loss": 11.6723, "step": 35507 }, { "epoch": 0.7432805827681487, "grad_norm": 0.2528809905052185, "learning_rate": 0.00017124121459051187, "loss": 11.6589, "step": 35508 }, { "epoch": 0.7433015155321109, "grad_norm": 0.29024508595466614, "learning_rate": 0.0001712396759386496, "loss": 11.6633, "step": 35509 }, { "epoch": 0.743322448296073, "grad_norm": 0.3477051556110382, "learning_rate": 0.00017123813725254097, "loss": 11.6658, "step": 35510 }, { "epoch": 0.7433433810600352, "grad_norm": 0.2558479309082031, "learning_rate": 0.00017123659853218674, "loss": 11.66, "step": 35511 }, { "epoch": 0.7433643138239974, "grad_norm": 0.31041714549064636, "learning_rate": 0.00017123505977758763, "loss": 11.6631, "step": 35512 }, { "epoch": 0.7433852465879595, "grad_norm": 0.33522921800613403, "learning_rate": 0.00017123352098874443, "loss": 11.682, "step": 35513 }, { "epoch": 0.7434061793519217, "grad_norm": 0.3504248857498169, "learning_rate": 0.00017123198216565785, "loss": 11.6603, "step": 35514 }, { "epoch": 0.7434271121158837, "grad_norm": 0.296594500541687, "learning_rate": 0.00017123044330832863, "loss": 11.6696, "step": 35515 }, { "epoch": 0.743448044879846, "grad_norm": 0.31734544038772583, "learning_rate": 0.0001712289044167575, "loss": 11.6694, "step": 35516 }, { "epoch": 0.743468977643808, "grad_norm": 0.30893146991729736, "learning_rate": 0.00017122736549094519, "loss": 11.6827, "step": 35517 }, { "epoch": 0.7434899104077702, "grad_norm": 0.3071962893009186, "learning_rate": 0.00017122582653089248, "loss": 11.6777, "step": 35518 }, { "epoch": 0.7435108431717324, "grad_norm": 0.30990511178970337, "learning_rate": 0.0001712242875366001, "loss": 11.6748, "step": 35519 }, { "epoch": 0.7435317759356945, "grad_norm": 0.3237372636795044, "learning_rate": 0.00017122274850806874, "loss": 11.6614, "step": 35520 }, { "epoch": 0.7435527086996567, "grad_norm": 0.3115268647670746, "learning_rate": 0.0001712212094452992, "loss": 11.6561, "step": 35521 }, { "epoch": 0.7435736414636188, "grad_norm": 0.28146088123321533, "learning_rate": 0.0001712196703482922, "loss": 11.6771, "step": 35522 }, { "epoch": 0.743594574227581, "grad_norm": 0.31248655915260315, "learning_rate": 0.00017121813121704846, "loss": 11.6713, "step": 35523 }, { "epoch": 0.7436155069915432, "grad_norm": 0.2448599487543106, "learning_rate": 0.00017121659205156874, "loss": 11.6737, "step": 35524 }, { "epoch": 0.7436364397555053, "grad_norm": 0.30184826254844666, "learning_rate": 0.0001712150528518538, "loss": 11.6643, "step": 35525 }, { "epoch": 0.7436573725194675, "grad_norm": 0.31310176849365234, "learning_rate": 0.00017121351361790434, "loss": 11.6608, "step": 35526 }, { "epoch": 0.7436783052834296, "grad_norm": 0.40282100439071655, "learning_rate": 0.00017121197434972113, "loss": 11.6758, "step": 35527 }, { "epoch": 0.7436992380473918, "grad_norm": 0.32061058282852173, "learning_rate": 0.00017121043504730488, "loss": 11.673, "step": 35528 }, { "epoch": 0.7437201708113539, "grad_norm": 0.3502505123615265, "learning_rate": 0.00017120889571065634, "loss": 11.6432, "step": 35529 }, { "epoch": 0.7437411035753161, "grad_norm": 0.33698999881744385, "learning_rate": 0.0001712073563397763, "loss": 11.6768, "step": 35530 }, { "epoch": 0.7437620363392783, "grad_norm": 0.27773982286453247, "learning_rate": 0.0001712058169346654, "loss": 11.6571, "step": 35531 }, { "epoch": 0.7437829691032404, "grad_norm": 0.3651716709136963, "learning_rate": 0.00017120427749532446, "loss": 11.6605, "step": 35532 }, { "epoch": 0.7438039018672026, "grad_norm": 0.27941083908081055, "learning_rate": 0.00017120273802175422, "loss": 11.6561, "step": 35533 }, { "epoch": 0.7438248346311647, "grad_norm": 0.31401553750038147, "learning_rate": 0.00017120119851395536, "loss": 11.6539, "step": 35534 }, { "epoch": 0.7438457673951269, "grad_norm": 0.3175595998764038, "learning_rate": 0.0001711996589719287, "loss": 11.6707, "step": 35535 }, { "epoch": 0.743866700159089, "grad_norm": 0.31930187344551086, "learning_rate": 0.0001711981193956749, "loss": 11.6664, "step": 35536 }, { "epoch": 0.7438876329230512, "grad_norm": 0.26588818430900574, "learning_rate": 0.00017119657978519474, "loss": 11.6748, "step": 35537 }, { "epoch": 0.7439085656870134, "grad_norm": 0.34280434250831604, "learning_rate": 0.00017119504014048896, "loss": 11.6713, "step": 35538 }, { "epoch": 0.7439294984509754, "grad_norm": 0.33402860164642334, "learning_rate": 0.0001711935004615583, "loss": 11.6599, "step": 35539 }, { "epoch": 0.7439504312149376, "grad_norm": 0.3424321413040161, "learning_rate": 0.0001711919607484035, "loss": 11.6593, "step": 35540 }, { "epoch": 0.7439713639788997, "grad_norm": 0.302908718585968, "learning_rate": 0.0001711904210010253, "loss": 11.6695, "step": 35541 }, { "epoch": 0.7439922967428619, "grad_norm": 0.258761465549469, "learning_rate": 0.00017118888121942446, "loss": 11.6426, "step": 35542 }, { "epoch": 0.7440132295068241, "grad_norm": 0.2869667708873749, "learning_rate": 0.00017118734140360168, "loss": 11.6621, "step": 35543 }, { "epoch": 0.7440341622707862, "grad_norm": 0.3257898986339569, "learning_rate": 0.0001711858015535577, "loss": 11.6807, "step": 35544 }, { "epoch": 0.7440550950347484, "grad_norm": 0.3587244749069214, "learning_rate": 0.00017118426166929325, "loss": 11.6676, "step": 35545 }, { "epoch": 0.7440760277987105, "grad_norm": 0.3016511797904968, "learning_rate": 0.00017118272175080916, "loss": 11.6676, "step": 35546 }, { "epoch": 0.7440969605626727, "grad_norm": 0.2808135747909546, "learning_rate": 0.00017118118179810612, "loss": 11.672, "step": 35547 }, { "epoch": 0.7441178933266348, "grad_norm": 0.35489845275878906, "learning_rate": 0.0001711796418111848, "loss": 11.6672, "step": 35548 }, { "epoch": 0.744138826090597, "grad_norm": 0.2683136463165283, "learning_rate": 0.00017117810179004606, "loss": 11.6713, "step": 35549 }, { "epoch": 0.7441597588545592, "grad_norm": 0.3002530038356781, "learning_rate": 0.00017117656173469054, "loss": 11.6732, "step": 35550 }, { "epoch": 0.7441806916185213, "grad_norm": 0.31199175119400024, "learning_rate": 0.00017117502164511902, "loss": 11.6755, "step": 35551 }, { "epoch": 0.7442016243824835, "grad_norm": 0.2983306646347046, "learning_rate": 0.00017117348152133227, "loss": 11.6699, "step": 35552 }, { "epoch": 0.7442225571464456, "grad_norm": 0.3224383294582367, "learning_rate": 0.00017117194136333097, "loss": 11.6814, "step": 35553 }, { "epoch": 0.7442434899104078, "grad_norm": 0.2903018295764923, "learning_rate": 0.0001711704011711159, "loss": 11.677, "step": 35554 }, { "epoch": 0.7442644226743699, "grad_norm": 0.30752041935920715, "learning_rate": 0.0001711688609446878, "loss": 11.6712, "step": 35555 }, { "epoch": 0.7442853554383321, "grad_norm": 0.36103516817092896, "learning_rate": 0.0001711673206840474, "loss": 11.6781, "step": 35556 }, { "epoch": 0.7443062882022943, "grad_norm": 0.31187087297439575, "learning_rate": 0.0001711657803891954, "loss": 11.6832, "step": 35557 }, { "epoch": 0.7443272209662564, "grad_norm": 0.32806718349456787, "learning_rate": 0.00017116424006013265, "loss": 11.6674, "step": 35558 }, { "epoch": 0.7443481537302186, "grad_norm": 0.3498815596103668, "learning_rate": 0.0001711626996968598, "loss": 11.6656, "step": 35559 }, { "epoch": 0.7443690864941807, "grad_norm": 0.3161967694759369, "learning_rate": 0.00017116115929937759, "loss": 11.6691, "step": 35560 }, { "epoch": 0.7443900192581429, "grad_norm": 0.3212192952632904, "learning_rate": 0.0001711596188676868, "loss": 11.6638, "step": 35561 }, { "epoch": 0.7444109520221049, "grad_norm": 0.33990323543548584, "learning_rate": 0.00017115807840178818, "loss": 11.6832, "step": 35562 }, { "epoch": 0.7444318847860671, "grad_norm": 0.30726268887519836, "learning_rate": 0.00017115653790168242, "loss": 11.6595, "step": 35563 }, { "epoch": 0.7444528175500293, "grad_norm": 0.2989965081214905, "learning_rate": 0.00017115499736737027, "loss": 11.6749, "step": 35564 }, { "epoch": 0.7444737503139914, "grad_norm": 0.4173703193664551, "learning_rate": 0.00017115345679885255, "loss": 11.6774, "step": 35565 }, { "epoch": 0.7444946830779536, "grad_norm": 0.33831673860549927, "learning_rate": 0.0001711519161961299, "loss": 11.6774, "step": 35566 }, { "epoch": 0.7445156158419157, "grad_norm": 0.30501827597618103, "learning_rate": 0.00017115037555920308, "loss": 11.6708, "step": 35567 }, { "epoch": 0.7445365486058779, "grad_norm": 0.26458609104156494, "learning_rate": 0.00017114883488807286, "loss": 11.6591, "step": 35568 }, { "epoch": 0.7445574813698401, "grad_norm": 0.27083784341812134, "learning_rate": 0.00017114729418273998, "loss": 11.6677, "step": 35569 }, { "epoch": 0.7445784141338022, "grad_norm": 0.39608150720596313, "learning_rate": 0.00017114575344320517, "loss": 11.6734, "step": 35570 }, { "epoch": 0.7445993468977644, "grad_norm": 0.2630484104156494, "learning_rate": 0.00017114421266946917, "loss": 11.6768, "step": 35571 }, { "epoch": 0.7446202796617265, "grad_norm": 0.30960792303085327, "learning_rate": 0.0001711426718615327, "loss": 11.6685, "step": 35572 }, { "epoch": 0.7446412124256887, "grad_norm": 0.33338838815689087, "learning_rate": 0.00017114113101939654, "loss": 11.6576, "step": 35573 }, { "epoch": 0.7446621451896508, "grad_norm": 0.3097199499607086, "learning_rate": 0.0001711395901430614, "loss": 11.6704, "step": 35574 }, { "epoch": 0.744683077953613, "grad_norm": 0.7366117835044861, "learning_rate": 0.00017113804923252803, "loss": 11.5642, "step": 35575 }, { "epoch": 0.7447040107175752, "grad_norm": 0.46339353919029236, "learning_rate": 0.0001711365082877972, "loss": 11.6866, "step": 35576 }, { "epoch": 0.7447249434815373, "grad_norm": 0.3197387754917145, "learning_rate": 0.00017113496730886962, "loss": 11.6619, "step": 35577 }, { "epoch": 0.7447458762454995, "grad_norm": 0.2684664726257324, "learning_rate": 0.00017113342629574603, "loss": 11.6688, "step": 35578 }, { "epoch": 0.7447668090094616, "grad_norm": 0.301693856716156, "learning_rate": 0.00017113188524842715, "loss": 11.6748, "step": 35579 }, { "epoch": 0.7447877417734238, "grad_norm": 0.36531862616539, "learning_rate": 0.00017113034416691377, "loss": 11.6589, "step": 35580 }, { "epoch": 0.7448086745373859, "grad_norm": 0.2697838544845581, "learning_rate": 0.00017112880305120663, "loss": 11.6671, "step": 35581 }, { "epoch": 0.7448296073013481, "grad_norm": 0.2983509600162506, "learning_rate": 0.00017112726190130642, "loss": 11.6804, "step": 35582 }, { "epoch": 0.7448505400653103, "grad_norm": 0.29849973320961, "learning_rate": 0.0001711257207172139, "loss": 11.6745, "step": 35583 }, { "epoch": 0.7448714728292724, "grad_norm": 0.3152044415473938, "learning_rate": 0.00017112417949892984, "loss": 11.668, "step": 35584 }, { "epoch": 0.7448924055932346, "grad_norm": 0.38184285163879395, "learning_rate": 0.000171122638246455, "loss": 11.6679, "step": 35585 }, { "epoch": 0.7449133383571966, "grad_norm": 0.2895222008228302, "learning_rate": 0.00017112109695979, "loss": 11.6714, "step": 35586 }, { "epoch": 0.7449342711211588, "grad_norm": 0.2520141303539276, "learning_rate": 0.00017111955563893573, "loss": 11.6815, "step": 35587 }, { "epoch": 0.744955203885121, "grad_norm": 0.2822878956794739, "learning_rate": 0.00017111801428389283, "loss": 11.6967, "step": 35588 }, { "epoch": 0.7449761366490831, "grad_norm": 0.40616336464881897, "learning_rate": 0.00017111647289466206, "loss": 11.6841, "step": 35589 }, { "epoch": 0.7449970694130453, "grad_norm": 0.3655117154121399, "learning_rate": 0.00017111493147124423, "loss": 11.6649, "step": 35590 }, { "epoch": 0.7450180021770074, "grad_norm": 0.2893655598163605, "learning_rate": 0.00017111339001364, "loss": 11.6807, "step": 35591 }, { "epoch": 0.7450389349409696, "grad_norm": 0.3425512909889221, "learning_rate": 0.00017111184852185014, "loss": 11.6669, "step": 35592 }, { "epoch": 0.7450598677049317, "grad_norm": 0.2806457281112671, "learning_rate": 0.00017111030699587537, "loss": 11.6645, "step": 35593 }, { "epoch": 0.7450808004688939, "grad_norm": 0.2665700912475586, "learning_rate": 0.00017110876543571647, "loss": 11.6676, "step": 35594 }, { "epoch": 0.7451017332328561, "grad_norm": 0.279103547334671, "learning_rate": 0.00017110722384137413, "loss": 11.6721, "step": 35595 }, { "epoch": 0.7451226659968182, "grad_norm": 0.333119660615921, "learning_rate": 0.00017110568221284918, "loss": 11.6828, "step": 35596 }, { "epoch": 0.7451435987607804, "grad_norm": 0.30153751373291016, "learning_rate": 0.00017110414055014226, "loss": 11.6858, "step": 35597 }, { "epoch": 0.7451645315247425, "grad_norm": 0.2589690685272217, "learning_rate": 0.0001711025988532542, "loss": 11.6743, "step": 35598 }, { "epoch": 0.7451854642887047, "grad_norm": 0.35945507884025574, "learning_rate": 0.00017110105712218564, "loss": 11.6749, "step": 35599 }, { "epoch": 0.7452063970526668, "grad_norm": 0.32165494561195374, "learning_rate": 0.0001710995153569374, "loss": 11.6651, "step": 35600 }, { "epoch": 0.745227329816629, "grad_norm": 0.39559948444366455, "learning_rate": 0.00017109797355751018, "loss": 11.6585, "step": 35601 }, { "epoch": 0.7452482625805912, "grad_norm": 0.28226739168167114, "learning_rate": 0.00017109643172390478, "loss": 11.6835, "step": 35602 }, { "epoch": 0.7452691953445533, "grad_norm": 0.30751803517341614, "learning_rate": 0.00017109488985612187, "loss": 11.6789, "step": 35603 }, { "epoch": 0.7452901281085155, "grad_norm": 0.3596994876861572, "learning_rate": 0.00017109334795416224, "loss": 11.6741, "step": 35604 }, { "epoch": 0.7453110608724776, "grad_norm": 0.3460332453250885, "learning_rate": 0.0001710918060180266, "loss": 11.6669, "step": 35605 }, { "epoch": 0.7453319936364398, "grad_norm": 0.2836100161075592, "learning_rate": 0.0001710902640477157, "loss": 11.6875, "step": 35606 }, { "epoch": 0.745352926400402, "grad_norm": 0.27525344491004944, "learning_rate": 0.00017108872204323029, "loss": 11.6789, "step": 35607 }, { "epoch": 0.745373859164364, "grad_norm": 0.38784193992614746, "learning_rate": 0.00017108718000457111, "loss": 11.6496, "step": 35608 }, { "epoch": 0.7453947919283263, "grad_norm": 0.35561448335647583, "learning_rate": 0.0001710856379317389, "loss": 11.6719, "step": 35609 }, { "epoch": 0.7454157246922883, "grad_norm": 0.4223025143146515, "learning_rate": 0.00017108409582473438, "loss": 11.687, "step": 35610 }, { "epoch": 0.7454366574562505, "grad_norm": 0.3262288570404053, "learning_rate": 0.00017108255368355834, "loss": 11.6728, "step": 35611 }, { "epoch": 0.7454575902202126, "grad_norm": 0.29436326026916504, "learning_rate": 0.0001710810115082115, "loss": 11.6612, "step": 35612 }, { "epoch": 0.7454785229841748, "grad_norm": 0.2712590992450714, "learning_rate": 0.00017107946929869456, "loss": 11.6633, "step": 35613 }, { "epoch": 0.745499455748137, "grad_norm": 0.3965291678905487, "learning_rate": 0.0001710779270550083, "loss": 11.6772, "step": 35614 }, { "epoch": 0.7455203885120991, "grad_norm": 0.24048855900764465, "learning_rate": 0.00017107638477715348, "loss": 11.6729, "step": 35615 }, { "epoch": 0.7455413212760613, "grad_norm": 0.3277967870235443, "learning_rate": 0.0001710748424651308, "loss": 11.6659, "step": 35616 }, { "epoch": 0.7455622540400234, "grad_norm": 0.3406684100627899, "learning_rate": 0.00017107330011894103, "loss": 11.6671, "step": 35617 }, { "epoch": 0.7455831868039856, "grad_norm": 0.3199023902416229, "learning_rate": 0.0001710717577385849, "loss": 11.6591, "step": 35618 }, { "epoch": 0.7456041195679477, "grad_norm": 0.25679466128349304, "learning_rate": 0.00017107021532406313, "loss": 11.677, "step": 35619 }, { "epoch": 0.7456250523319099, "grad_norm": 0.3143952786922455, "learning_rate": 0.00017106867287537648, "loss": 11.6637, "step": 35620 }, { "epoch": 0.7456459850958721, "grad_norm": 0.21955075860023499, "learning_rate": 0.00017106713039252572, "loss": 11.6678, "step": 35621 }, { "epoch": 0.7456669178598342, "grad_norm": 0.29864662885665894, "learning_rate": 0.00017106558787551154, "loss": 11.6765, "step": 35622 }, { "epoch": 0.7456878506237964, "grad_norm": 0.3884347081184387, "learning_rate": 0.00017106404532433474, "loss": 11.6733, "step": 35623 }, { "epoch": 0.7457087833877585, "grad_norm": 0.3353561460971832, "learning_rate": 0.000171062502738996, "loss": 11.6787, "step": 35624 }, { "epoch": 0.7457297161517207, "grad_norm": 0.3168042302131653, "learning_rate": 0.00017106096011949612, "loss": 11.6631, "step": 35625 }, { "epoch": 0.7457506489156829, "grad_norm": 0.2615717649459839, "learning_rate": 0.00017105941746583582, "loss": 11.6803, "step": 35626 }, { "epoch": 0.745771581679645, "grad_norm": 0.24878759682178497, "learning_rate": 0.00017105787477801577, "loss": 11.658, "step": 35627 }, { "epoch": 0.7457925144436072, "grad_norm": 0.3302120268344879, "learning_rate": 0.00017105633205603682, "loss": 11.6622, "step": 35628 }, { "epoch": 0.7458134472075693, "grad_norm": 0.3177138864994049, "learning_rate": 0.00017105478929989966, "loss": 11.6763, "step": 35629 }, { "epoch": 0.7458343799715315, "grad_norm": 0.3374760150909424, "learning_rate": 0.00017105324650960502, "loss": 11.6826, "step": 35630 }, { "epoch": 0.7458553127354935, "grad_norm": 0.2653234004974365, "learning_rate": 0.00017105170368515374, "loss": 11.6766, "step": 35631 }, { "epoch": 0.7458762454994557, "grad_norm": 0.272733598947525, "learning_rate": 0.0001710501608265464, "loss": 11.6799, "step": 35632 }, { "epoch": 0.745897178263418, "grad_norm": 0.2971307337284088, "learning_rate": 0.00017104861793378385, "loss": 11.6737, "step": 35633 }, { "epoch": 0.74591811102738, "grad_norm": 0.36129245162010193, "learning_rate": 0.0001710470750068668, "loss": 11.6663, "step": 35634 }, { "epoch": 0.7459390437913422, "grad_norm": 0.3014172315597534, "learning_rate": 0.000171045532045796, "loss": 11.6592, "step": 35635 }, { "epoch": 0.7459599765553043, "grad_norm": 0.273739218711853, "learning_rate": 0.0001710439890505722, "loss": 11.6688, "step": 35636 }, { "epoch": 0.7459809093192665, "grad_norm": 0.39275801181793213, "learning_rate": 0.00017104244602119613, "loss": 11.6746, "step": 35637 }, { "epoch": 0.7460018420832286, "grad_norm": 0.293717622756958, "learning_rate": 0.0001710409029576685, "loss": 11.6735, "step": 35638 }, { "epoch": 0.7460227748471908, "grad_norm": 0.30548641085624695, "learning_rate": 0.00017103935985999011, "loss": 11.6661, "step": 35639 }, { "epoch": 0.746043707611153, "grad_norm": 0.2465551346540451, "learning_rate": 0.00017103781672816166, "loss": 11.6615, "step": 35640 }, { "epoch": 0.7460646403751151, "grad_norm": 0.4583887457847595, "learning_rate": 0.00017103627356218395, "loss": 11.6684, "step": 35641 }, { "epoch": 0.7460855731390773, "grad_norm": 0.35212892293930054, "learning_rate": 0.00017103473036205765, "loss": 11.6794, "step": 35642 }, { "epoch": 0.7461065059030394, "grad_norm": 0.4031876027584076, "learning_rate": 0.00017103318712778352, "loss": 11.6763, "step": 35643 }, { "epoch": 0.7461274386670016, "grad_norm": 0.27562880516052246, "learning_rate": 0.00017103164385936232, "loss": 11.6647, "step": 35644 }, { "epoch": 0.7461483714309638, "grad_norm": 0.32544800639152527, "learning_rate": 0.0001710301005567948, "loss": 11.6876, "step": 35645 }, { "epoch": 0.7461693041949259, "grad_norm": 0.2870819866657257, "learning_rate": 0.0001710285572200817, "loss": 11.6593, "step": 35646 }, { "epoch": 0.7461902369588881, "grad_norm": 0.28333136439323425, "learning_rate": 0.00017102701384922372, "loss": 11.6658, "step": 35647 }, { "epoch": 0.7462111697228502, "grad_norm": 0.27875688672065735, "learning_rate": 0.0001710254704442216, "loss": 11.6716, "step": 35648 }, { "epoch": 0.7462321024868124, "grad_norm": 0.312490850687027, "learning_rate": 0.00017102392700507615, "loss": 11.6743, "step": 35649 }, { "epoch": 0.7462530352507745, "grad_norm": 0.2890472412109375, "learning_rate": 0.0001710223835317881, "loss": 11.6596, "step": 35650 }, { "epoch": 0.7462739680147367, "grad_norm": 0.28261125087738037, "learning_rate": 0.00017102084002435815, "loss": 11.6747, "step": 35651 }, { "epoch": 0.7462949007786989, "grad_norm": 0.33682671189308167, "learning_rate": 0.00017101929648278706, "loss": 11.6608, "step": 35652 }, { "epoch": 0.746315833542661, "grad_norm": 0.3129670023918152, "learning_rate": 0.00017101775290707556, "loss": 11.6627, "step": 35653 }, { "epoch": 0.7463367663066232, "grad_norm": 0.21868547797203064, "learning_rate": 0.0001710162092972244, "loss": 11.6686, "step": 35654 }, { "epoch": 0.7463576990705852, "grad_norm": 0.2993462383747101, "learning_rate": 0.00017101466565323431, "loss": 11.6544, "step": 35655 }, { "epoch": 0.7463786318345474, "grad_norm": 0.3792531490325928, "learning_rate": 0.0001710131219751061, "loss": 11.666, "step": 35656 }, { "epoch": 0.7463995645985095, "grad_norm": 0.3780359625816345, "learning_rate": 0.0001710115782628404, "loss": 11.6675, "step": 35657 }, { "epoch": 0.7464204973624717, "grad_norm": 0.337555468082428, "learning_rate": 0.00017101003451643805, "loss": 11.6507, "step": 35658 }, { "epoch": 0.7464414301264339, "grad_norm": 0.3238089680671692, "learning_rate": 0.00017100849073589975, "loss": 11.6776, "step": 35659 }, { "epoch": 0.746462362890396, "grad_norm": 0.24853992462158203, "learning_rate": 0.00017100694692122624, "loss": 11.6553, "step": 35660 }, { "epoch": 0.7464832956543582, "grad_norm": 0.42498114705085754, "learning_rate": 0.00017100540307241825, "loss": 11.6774, "step": 35661 }, { "epoch": 0.7465042284183203, "grad_norm": 0.3287677764892578, "learning_rate": 0.00017100385918947657, "loss": 11.6826, "step": 35662 }, { "epoch": 0.7465251611822825, "grad_norm": 0.27584517002105713, "learning_rate": 0.0001710023152724019, "loss": 11.6626, "step": 35663 }, { "epoch": 0.7465460939462447, "grad_norm": 0.3021588623523712, "learning_rate": 0.000171000771321195, "loss": 11.6676, "step": 35664 }, { "epoch": 0.7465670267102068, "grad_norm": 0.3073869049549103, "learning_rate": 0.0001709992273358566, "loss": 11.6691, "step": 35665 }, { "epoch": 0.746587959474169, "grad_norm": 0.26937180757522583, "learning_rate": 0.0001709976833163874, "loss": 11.6688, "step": 35666 }, { "epoch": 0.7466088922381311, "grad_norm": 0.35261350870132446, "learning_rate": 0.00017099613926278826, "loss": 11.6712, "step": 35667 }, { "epoch": 0.7466298250020933, "grad_norm": 0.36822688579559326, "learning_rate": 0.0001709945951750598, "loss": 11.6772, "step": 35668 }, { "epoch": 0.7466507577660554, "grad_norm": 0.25514501333236694, "learning_rate": 0.00017099305105320288, "loss": 11.6637, "step": 35669 }, { "epoch": 0.7466716905300176, "grad_norm": 0.3597189784049988, "learning_rate": 0.00017099150689721813, "loss": 11.6761, "step": 35670 }, { "epoch": 0.7466926232939798, "grad_norm": 0.3310767114162445, "learning_rate": 0.00017098996270710636, "loss": 11.6653, "step": 35671 }, { "epoch": 0.7467135560579419, "grad_norm": 0.30045080184936523, "learning_rate": 0.0001709884184828683, "loss": 11.656, "step": 35672 }, { "epoch": 0.7467344888219041, "grad_norm": 0.2929893136024475, "learning_rate": 0.00017098687422450467, "loss": 11.6903, "step": 35673 }, { "epoch": 0.7467554215858662, "grad_norm": 0.30317583680152893, "learning_rate": 0.00017098532993201624, "loss": 11.669, "step": 35674 }, { "epoch": 0.7467763543498284, "grad_norm": 0.2929207384586334, "learning_rate": 0.00017098378560540371, "loss": 11.6651, "step": 35675 }, { "epoch": 0.7467972871137905, "grad_norm": 0.3076342046260834, "learning_rate": 0.00017098224124466788, "loss": 11.6716, "step": 35676 }, { "epoch": 0.7468182198777527, "grad_norm": 0.35288578271865845, "learning_rate": 0.00017098069684980943, "loss": 11.6771, "step": 35677 }, { "epoch": 0.7468391526417149, "grad_norm": 0.29540297389030457, "learning_rate": 0.00017097915242082916, "loss": 11.6522, "step": 35678 }, { "epoch": 0.7468600854056769, "grad_norm": 0.3142146170139313, "learning_rate": 0.00017097760795772778, "loss": 11.6795, "step": 35679 }, { "epoch": 0.7468810181696391, "grad_norm": 0.32904690504074097, "learning_rate": 0.00017097606346050607, "loss": 11.6723, "step": 35680 }, { "epoch": 0.7469019509336012, "grad_norm": 0.30520668625831604, "learning_rate": 0.00017097451892916473, "loss": 11.677, "step": 35681 }, { "epoch": 0.7469228836975634, "grad_norm": 0.28661343455314636, "learning_rate": 0.0001709729743637045, "loss": 11.67, "step": 35682 }, { "epoch": 0.7469438164615256, "grad_norm": 0.2770831286907196, "learning_rate": 0.00017097142976412616, "loss": 11.6783, "step": 35683 }, { "epoch": 0.7469647492254877, "grad_norm": 0.35568952560424805, "learning_rate": 0.00017096988513043039, "loss": 11.6729, "step": 35684 }, { "epoch": 0.7469856819894499, "grad_norm": 0.2749788165092468, "learning_rate": 0.00017096834046261803, "loss": 11.6655, "step": 35685 }, { "epoch": 0.747006614753412, "grad_norm": 0.2762262523174286, "learning_rate": 0.00017096679576068973, "loss": 11.6632, "step": 35686 }, { "epoch": 0.7470275475173742, "grad_norm": 0.29866987466812134, "learning_rate": 0.00017096525102464627, "loss": 11.6623, "step": 35687 }, { "epoch": 0.7470484802813363, "grad_norm": 0.2586401402950287, "learning_rate": 0.0001709637062544884, "loss": 11.6733, "step": 35688 }, { "epoch": 0.7470694130452985, "grad_norm": 0.31833615899086, "learning_rate": 0.00017096216145021686, "loss": 11.6705, "step": 35689 }, { "epoch": 0.7470903458092607, "grad_norm": 0.32611560821533203, "learning_rate": 0.0001709606166118324, "loss": 11.6639, "step": 35690 }, { "epoch": 0.7471112785732228, "grad_norm": 0.27131038904190063, "learning_rate": 0.00017095907173933573, "loss": 11.6517, "step": 35691 }, { "epoch": 0.747132211337185, "grad_norm": 0.3313872218132019, "learning_rate": 0.0001709575268327276, "loss": 11.663, "step": 35692 }, { "epoch": 0.7471531441011471, "grad_norm": 0.2860935628414154, "learning_rate": 0.00017095598189200879, "loss": 11.6739, "step": 35693 }, { "epoch": 0.7471740768651093, "grad_norm": 0.28569504618644714, "learning_rate": 0.00017095443691718, "loss": 11.6632, "step": 35694 }, { "epoch": 0.7471950096290714, "grad_norm": 0.26825591921806335, "learning_rate": 0.000170952891908242, "loss": 11.6669, "step": 35695 }, { "epoch": 0.7472159423930336, "grad_norm": 0.296799898147583, "learning_rate": 0.00017095134686519553, "loss": 11.6705, "step": 35696 }, { "epoch": 0.7472368751569958, "grad_norm": 0.27925071120262146, "learning_rate": 0.0001709498017880413, "loss": 11.6683, "step": 35697 }, { "epoch": 0.7472578079209579, "grad_norm": 0.3429533839225769, "learning_rate": 0.0001709482566767801, "loss": 11.6768, "step": 35698 }, { "epoch": 0.7472787406849201, "grad_norm": 0.46016618609428406, "learning_rate": 0.00017094671153141265, "loss": 11.677, "step": 35699 }, { "epoch": 0.7472996734488822, "grad_norm": 0.3327236473560333, "learning_rate": 0.0001709451663519397, "loss": 11.6858, "step": 35700 }, { "epoch": 0.7473206062128444, "grad_norm": 0.3000609576702118, "learning_rate": 0.00017094362113836194, "loss": 11.6696, "step": 35701 }, { "epoch": 0.7473415389768066, "grad_norm": 0.3499327003955841, "learning_rate": 0.0001709420758906802, "loss": 11.6579, "step": 35702 }, { "epoch": 0.7473624717407686, "grad_norm": 0.2891506552696228, "learning_rate": 0.0001709405306088952, "loss": 11.6535, "step": 35703 }, { "epoch": 0.7473834045047308, "grad_norm": 0.27719777822494507, "learning_rate": 0.00017093898529300762, "loss": 11.6649, "step": 35704 }, { "epoch": 0.7474043372686929, "grad_norm": 0.2794775068759918, "learning_rate": 0.00017093743994301826, "loss": 11.666, "step": 35705 }, { "epoch": 0.7474252700326551, "grad_norm": 0.33014747500419617, "learning_rate": 0.0001709358945589279, "loss": 11.6615, "step": 35706 }, { "epoch": 0.7474462027966172, "grad_norm": 0.26477912068367004, "learning_rate": 0.00017093434914073718, "loss": 11.6761, "step": 35707 }, { "epoch": 0.7474671355605794, "grad_norm": 0.24941587448120117, "learning_rate": 0.0001709328036884469, "loss": 11.6549, "step": 35708 }, { "epoch": 0.7474880683245416, "grad_norm": 0.26214075088500977, "learning_rate": 0.00017093125820205783, "loss": 11.6647, "step": 35709 }, { "epoch": 0.7475090010885037, "grad_norm": 0.35718873143196106, "learning_rate": 0.00017092971268157066, "loss": 11.6786, "step": 35710 }, { "epoch": 0.7475299338524659, "grad_norm": 0.3102942109107971, "learning_rate": 0.00017092816712698617, "loss": 11.6697, "step": 35711 }, { "epoch": 0.747550866616428, "grad_norm": 0.28944122791290283, "learning_rate": 0.00017092662153830505, "loss": 11.6668, "step": 35712 }, { "epoch": 0.7475717993803902, "grad_norm": 0.23152223229408264, "learning_rate": 0.00017092507591552814, "loss": 11.6622, "step": 35713 }, { "epoch": 0.7475927321443523, "grad_norm": 0.2609061896800995, "learning_rate": 0.0001709235302586561, "loss": 11.6793, "step": 35714 }, { "epoch": 0.7476136649083145, "grad_norm": 0.3415341079235077, "learning_rate": 0.00017092198456768965, "loss": 11.681, "step": 35715 }, { "epoch": 0.7476345976722767, "grad_norm": 0.31501275300979614, "learning_rate": 0.00017092043884262966, "loss": 11.6742, "step": 35716 }, { "epoch": 0.7476555304362388, "grad_norm": 0.29887038469314575, "learning_rate": 0.00017091889308347674, "loss": 11.6603, "step": 35717 }, { "epoch": 0.747676463200201, "grad_norm": 0.2501613199710846, "learning_rate": 0.00017091734729023173, "loss": 11.676, "step": 35718 }, { "epoch": 0.7476973959641631, "grad_norm": 0.26780951023101807, "learning_rate": 0.0001709158014628953, "loss": 11.6562, "step": 35719 }, { "epoch": 0.7477183287281253, "grad_norm": 0.29033362865448, "learning_rate": 0.0001709142556014682, "loss": 11.6542, "step": 35720 }, { "epoch": 0.7477392614920875, "grad_norm": 0.3410637080669403, "learning_rate": 0.00017091270970595124, "loss": 11.6663, "step": 35721 }, { "epoch": 0.7477601942560496, "grad_norm": 0.30456972122192383, "learning_rate": 0.0001709111637763451, "loss": 11.6731, "step": 35722 }, { "epoch": 0.7477811270200118, "grad_norm": 0.36701488494873047, "learning_rate": 0.00017090961781265053, "loss": 11.6795, "step": 35723 }, { "epoch": 0.7478020597839738, "grad_norm": 0.2818042039871216, "learning_rate": 0.0001709080718148683, "loss": 11.6414, "step": 35724 }, { "epoch": 0.747822992547936, "grad_norm": 0.3422297537326813, "learning_rate": 0.00017090652578299916, "loss": 11.6845, "step": 35725 }, { "epoch": 0.7478439253118981, "grad_norm": 0.28580740094184875, "learning_rate": 0.0001709049797170438, "loss": 11.6658, "step": 35726 }, { "epoch": 0.7478648580758603, "grad_norm": 0.2359512895345688, "learning_rate": 0.000170903433617003, "loss": 11.6672, "step": 35727 }, { "epoch": 0.7478857908398225, "grad_norm": 0.3057677745819092, "learning_rate": 0.00017090188748287752, "loss": 11.6697, "step": 35728 }, { "epoch": 0.7479067236037846, "grad_norm": 0.24224364757537842, "learning_rate": 0.00017090034131466808, "loss": 11.6592, "step": 35729 }, { "epoch": 0.7479276563677468, "grad_norm": 0.2612738609313965, "learning_rate": 0.0001708987951123754, "loss": 11.6702, "step": 35730 }, { "epoch": 0.7479485891317089, "grad_norm": 0.34696173667907715, "learning_rate": 0.00017089724887600028, "loss": 11.6678, "step": 35731 }, { "epoch": 0.7479695218956711, "grad_norm": 0.3627983331680298, "learning_rate": 0.00017089570260554342, "loss": 11.6478, "step": 35732 }, { "epoch": 0.7479904546596332, "grad_norm": 0.2809673845767975, "learning_rate": 0.00017089415630100553, "loss": 11.6589, "step": 35733 }, { "epoch": 0.7480113874235954, "grad_norm": 0.3259449899196625, "learning_rate": 0.00017089260996238745, "loss": 11.6761, "step": 35734 }, { "epoch": 0.7480323201875576, "grad_norm": 0.31841179728507996, "learning_rate": 0.00017089106358968988, "loss": 11.6517, "step": 35735 }, { "epoch": 0.7480532529515197, "grad_norm": 0.2761150598526001, "learning_rate": 0.00017088951718291352, "loss": 11.649, "step": 35736 }, { "epoch": 0.7480741857154819, "grad_norm": 0.3568720817565918, "learning_rate": 0.00017088797074205916, "loss": 11.6736, "step": 35737 }, { "epoch": 0.748095118479444, "grad_norm": 0.5070633888244629, "learning_rate": 0.00017088642426712754, "loss": 11.6694, "step": 35738 }, { "epoch": 0.7481160512434062, "grad_norm": 0.3049783408641815, "learning_rate": 0.0001708848777581194, "loss": 11.6651, "step": 35739 }, { "epoch": 0.7481369840073683, "grad_norm": 0.2725543677806854, "learning_rate": 0.00017088333121503547, "loss": 11.6665, "step": 35740 }, { "epoch": 0.7481579167713305, "grad_norm": 0.3653641641139984, "learning_rate": 0.00017088178463787646, "loss": 11.6601, "step": 35741 }, { "epoch": 0.7481788495352927, "grad_norm": 0.27217984199523926, "learning_rate": 0.00017088023802664323, "loss": 11.6832, "step": 35742 }, { "epoch": 0.7481997822992548, "grad_norm": 0.315211683511734, "learning_rate": 0.00017087869138133642, "loss": 11.6716, "step": 35743 }, { "epoch": 0.748220715063217, "grad_norm": 0.3092883825302124, "learning_rate": 0.00017087714470195677, "loss": 11.677, "step": 35744 }, { "epoch": 0.7482416478271791, "grad_norm": 0.3414127826690674, "learning_rate": 0.0001708755979885051, "loss": 11.6942, "step": 35745 }, { "epoch": 0.7482625805911413, "grad_norm": 0.2988668382167816, "learning_rate": 0.00017087405124098208, "loss": 11.6723, "step": 35746 }, { "epoch": 0.7482835133551035, "grad_norm": 0.29850172996520996, "learning_rate": 0.0001708725044593885, "loss": 11.6631, "step": 35747 }, { "epoch": 0.7483044461190655, "grad_norm": 0.32949501276016235, "learning_rate": 0.00017087095764372508, "loss": 11.6806, "step": 35748 }, { "epoch": 0.7483253788830277, "grad_norm": 0.29823359847068787, "learning_rate": 0.00017086941079399258, "loss": 11.6641, "step": 35749 }, { "epoch": 0.7483463116469898, "grad_norm": 0.3060014545917511, "learning_rate": 0.0001708678639101917, "loss": 11.6581, "step": 35750 }, { "epoch": 0.748367244410952, "grad_norm": 0.3866627812385559, "learning_rate": 0.00017086631699232327, "loss": 11.6812, "step": 35751 }, { "epoch": 0.7483881771749141, "grad_norm": 0.2778002619743347, "learning_rate": 0.00017086477004038794, "loss": 11.6684, "step": 35752 }, { "epoch": 0.7484091099388763, "grad_norm": 0.3822802007198334, "learning_rate": 0.0001708632230543865, "loss": 11.6669, "step": 35753 }, { "epoch": 0.7484300427028385, "grad_norm": 0.32504943013191223, "learning_rate": 0.00017086167603431971, "loss": 11.6536, "step": 35754 }, { "epoch": 0.7484509754668006, "grad_norm": 0.2866661548614502, "learning_rate": 0.0001708601289801883, "loss": 11.6711, "step": 35755 }, { "epoch": 0.7484719082307628, "grad_norm": 2.3571889400482178, "learning_rate": 0.000170858581891993, "loss": 11.7426, "step": 35756 }, { "epoch": 0.7484928409947249, "grad_norm": 0.36496400833129883, "learning_rate": 0.00017085703476973454, "loss": 11.6831, "step": 35757 }, { "epoch": 0.7485137737586871, "grad_norm": 0.2936921715736389, "learning_rate": 0.00017085548761341368, "loss": 11.674, "step": 35758 }, { "epoch": 0.7485347065226492, "grad_norm": 0.3367595672607422, "learning_rate": 0.00017085394042303117, "loss": 11.6749, "step": 35759 }, { "epoch": 0.7485556392866114, "grad_norm": 0.25971922278404236, "learning_rate": 0.00017085239319858777, "loss": 11.6759, "step": 35760 }, { "epoch": 0.7485765720505736, "grad_norm": 0.3155675232410431, "learning_rate": 0.0001708508459400842, "loss": 11.6586, "step": 35761 }, { "epoch": 0.7485975048145357, "grad_norm": 0.3118494153022766, "learning_rate": 0.0001708492986475212, "loss": 11.6834, "step": 35762 }, { "epoch": 0.7486184375784979, "grad_norm": 0.2956767678260803, "learning_rate": 0.00017084775132089952, "loss": 11.6627, "step": 35763 }, { "epoch": 0.74863937034246, "grad_norm": 0.3375340402126312, "learning_rate": 0.0001708462039602199, "loss": 11.6721, "step": 35764 }, { "epoch": 0.7486603031064222, "grad_norm": 0.3029879927635193, "learning_rate": 0.0001708446565654831, "loss": 11.6703, "step": 35765 }, { "epoch": 0.7486812358703844, "grad_norm": 0.3542468249797821, "learning_rate": 0.00017084310913668987, "loss": 11.6779, "step": 35766 }, { "epoch": 0.7487021686343465, "grad_norm": 0.358138769865036, "learning_rate": 0.0001708415616738409, "loss": 11.6665, "step": 35767 }, { "epoch": 0.7487231013983087, "grad_norm": 0.30832308530807495, "learning_rate": 0.00017084001417693703, "loss": 11.6632, "step": 35768 }, { "epoch": 0.7487440341622708, "grad_norm": 0.3245459496974945, "learning_rate": 0.0001708384666459789, "loss": 11.6823, "step": 35769 }, { "epoch": 0.748764966926233, "grad_norm": 0.27364781498908997, "learning_rate": 0.00017083691908096729, "loss": 11.6677, "step": 35770 }, { "epoch": 0.748785899690195, "grad_norm": 0.3518971800804138, "learning_rate": 0.00017083537148190295, "loss": 11.6776, "step": 35771 }, { "epoch": 0.7488068324541572, "grad_norm": 0.40449875593185425, "learning_rate": 0.00017083382384878667, "loss": 11.6753, "step": 35772 }, { "epoch": 0.7488277652181194, "grad_norm": 0.28671666979789734, "learning_rate": 0.00017083227618161915, "loss": 11.6637, "step": 35773 }, { "epoch": 0.7488486979820815, "grad_norm": 0.2467823177576065, "learning_rate": 0.00017083072848040113, "loss": 11.6662, "step": 35774 }, { "epoch": 0.7488696307460437, "grad_norm": 0.3034474849700928, "learning_rate": 0.00017082918074513332, "loss": 11.6559, "step": 35775 }, { "epoch": 0.7488905635100058, "grad_norm": 0.2746365964412689, "learning_rate": 0.00017082763297581653, "loss": 11.6687, "step": 35776 }, { "epoch": 0.748911496273968, "grad_norm": 0.29548418521881104, "learning_rate": 0.00017082608517245146, "loss": 11.675, "step": 35777 }, { "epoch": 0.7489324290379301, "grad_norm": 0.3747265934944153, "learning_rate": 0.0001708245373350389, "loss": 11.6595, "step": 35778 }, { "epoch": 0.7489533618018923, "grad_norm": 0.2702605128288269, "learning_rate": 0.00017082298946357956, "loss": 11.6649, "step": 35779 }, { "epoch": 0.7489742945658545, "grad_norm": 0.3499888777732849, "learning_rate": 0.00017082144155807419, "loss": 11.6622, "step": 35780 }, { "epoch": 0.7489952273298166, "grad_norm": 0.27804628014564514, "learning_rate": 0.0001708198936185235, "loss": 11.6668, "step": 35781 }, { "epoch": 0.7490161600937788, "grad_norm": 0.3663742244243622, "learning_rate": 0.0001708183456449283, "loss": 11.6741, "step": 35782 }, { "epoch": 0.7490370928577409, "grad_norm": 0.35593846440315247, "learning_rate": 0.0001708167976372893, "loss": 11.6871, "step": 35783 }, { "epoch": 0.7490580256217031, "grad_norm": 0.2828093469142914, "learning_rate": 0.00017081524959560725, "loss": 11.6562, "step": 35784 }, { "epoch": 0.7490789583856653, "grad_norm": 0.29661640524864197, "learning_rate": 0.00017081370151988287, "loss": 11.6802, "step": 35785 }, { "epoch": 0.7490998911496274, "grad_norm": 0.2447177916765213, "learning_rate": 0.00017081215341011697, "loss": 11.6616, "step": 35786 }, { "epoch": 0.7491208239135896, "grad_norm": 0.2963944673538208, "learning_rate": 0.0001708106052663102, "loss": 11.669, "step": 35787 }, { "epoch": 0.7491417566775517, "grad_norm": 0.3050091862678528, "learning_rate": 0.0001708090570884634, "loss": 11.6729, "step": 35788 }, { "epoch": 0.7491626894415139, "grad_norm": 0.3001987040042877, "learning_rate": 0.00017080750887657722, "loss": 11.6696, "step": 35789 }, { "epoch": 0.749183622205476, "grad_norm": 0.30624595284461975, "learning_rate": 0.00017080596063065247, "loss": 11.6565, "step": 35790 }, { "epoch": 0.7492045549694382, "grad_norm": 0.3080730140209198, "learning_rate": 0.00017080441235068988, "loss": 11.6695, "step": 35791 }, { "epoch": 0.7492254877334004, "grad_norm": 0.2938360571861267, "learning_rate": 0.00017080286403669016, "loss": 11.669, "step": 35792 }, { "epoch": 0.7492464204973625, "grad_norm": 0.2680341303348541, "learning_rate": 0.0001708013156886541, "loss": 11.6719, "step": 35793 }, { "epoch": 0.7492673532613247, "grad_norm": 0.29388925433158875, "learning_rate": 0.00017079976730658244, "loss": 11.6611, "step": 35794 }, { "epoch": 0.7492882860252867, "grad_norm": 0.33958712220191956, "learning_rate": 0.00017079821889047592, "loss": 11.6645, "step": 35795 }, { "epoch": 0.7493092187892489, "grad_norm": 0.2603019177913666, "learning_rate": 0.00017079667044033524, "loss": 11.6464, "step": 35796 }, { "epoch": 0.749330151553211, "grad_norm": 0.2857820391654968, "learning_rate": 0.00017079512195616124, "loss": 11.6624, "step": 35797 }, { "epoch": 0.7493510843171732, "grad_norm": 0.551362156867981, "learning_rate": 0.00017079357343795457, "loss": 11.6852, "step": 35798 }, { "epoch": 0.7493720170811354, "grad_norm": 0.3868453800678253, "learning_rate": 0.000170792024885716, "loss": 11.6778, "step": 35799 }, { "epoch": 0.7493929498450975, "grad_norm": 0.2879643440246582, "learning_rate": 0.00017079047629944635, "loss": 11.6726, "step": 35800 }, { "epoch": 0.7494138826090597, "grad_norm": 0.29129138588905334, "learning_rate": 0.0001707889276791462, "loss": 11.6645, "step": 35801 }, { "epoch": 0.7494348153730218, "grad_norm": 0.23464874923229218, "learning_rate": 0.00017078737902481646, "loss": 11.6523, "step": 35802 }, { "epoch": 0.749455748136984, "grad_norm": 0.27962440252304077, "learning_rate": 0.00017078583033645778, "loss": 11.6672, "step": 35803 }, { "epoch": 0.7494766809009462, "grad_norm": 0.3256130814552307, "learning_rate": 0.00017078428161407095, "loss": 11.6627, "step": 35804 }, { "epoch": 0.7494976136649083, "grad_norm": 0.27192002534866333, "learning_rate": 0.0001707827328576567, "loss": 11.6773, "step": 35805 }, { "epoch": 0.7495185464288705, "grad_norm": 0.31539326906204224, "learning_rate": 0.00017078118406721576, "loss": 11.653, "step": 35806 }, { "epoch": 0.7495394791928326, "grad_norm": 0.2886430025100708, "learning_rate": 0.00017077963524274892, "loss": 11.6818, "step": 35807 }, { "epoch": 0.7495604119567948, "grad_norm": 0.3921700417995453, "learning_rate": 0.00017077808638425683, "loss": 11.6712, "step": 35808 }, { "epoch": 0.7495813447207569, "grad_norm": 0.24038900434970856, "learning_rate": 0.00017077653749174033, "loss": 11.66, "step": 35809 }, { "epoch": 0.7496022774847191, "grad_norm": 0.25194111466407776, "learning_rate": 0.00017077498856520013, "loss": 11.6393, "step": 35810 }, { "epoch": 0.7496232102486813, "grad_norm": 0.387455016374588, "learning_rate": 0.000170773439604637, "loss": 11.6717, "step": 35811 }, { "epoch": 0.7496441430126434, "grad_norm": 0.2955837547779083, "learning_rate": 0.00017077189061005162, "loss": 11.6881, "step": 35812 }, { "epoch": 0.7496650757766056, "grad_norm": 0.30089759826660156, "learning_rate": 0.0001707703415814448, "loss": 11.677, "step": 35813 }, { "epoch": 0.7496860085405677, "grad_norm": 0.38546136021614075, "learning_rate": 0.00017076879251881722, "loss": 11.6777, "step": 35814 }, { "epoch": 0.7497069413045299, "grad_norm": 0.3116283416748047, "learning_rate": 0.00017076724342216968, "loss": 11.676, "step": 35815 }, { "epoch": 0.749727874068492, "grad_norm": 0.32089608907699585, "learning_rate": 0.00017076569429150292, "loss": 11.6693, "step": 35816 }, { "epoch": 0.7497488068324542, "grad_norm": 0.28509584069252014, "learning_rate": 0.00017076414512681766, "loss": 11.664, "step": 35817 }, { "epoch": 0.7497697395964164, "grad_norm": 0.32312026619911194, "learning_rate": 0.00017076259592811466, "loss": 11.682, "step": 35818 }, { "epoch": 0.7497906723603784, "grad_norm": 0.29995694756507874, "learning_rate": 0.00017076104669539466, "loss": 11.6472, "step": 35819 }, { "epoch": 0.7498116051243406, "grad_norm": 0.3653595745563507, "learning_rate": 0.0001707594974286584, "loss": 11.6811, "step": 35820 }, { "epoch": 0.7498325378883027, "grad_norm": 0.27930304408073425, "learning_rate": 0.00017075794812790668, "loss": 11.682, "step": 35821 }, { "epoch": 0.7498534706522649, "grad_norm": 0.32781752943992615, "learning_rate": 0.00017075639879314016, "loss": 11.6708, "step": 35822 }, { "epoch": 0.7498744034162271, "grad_norm": 0.24387003481388092, "learning_rate": 0.00017075484942435958, "loss": 11.6562, "step": 35823 }, { "epoch": 0.7498953361801892, "grad_norm": 0.25692784786224365, "learning_rate": 0.0001707533000215658, "loss": 11.6855, "step": 35824 }, { "epoch": 0.7499162689441514, "grad_norm": 0.35053640604019165, "learning_rate": 0.00017075175058475946, "loss": 11.6811, "step": 35825 }, { "epoch": 0.7499372017081135, "grad_norm": 0.3533616364002228, "learning_rate": 0.00017075020111394134, "loss": 11.682, "step": 35826 }, { "epoch": 0.7499581344720757, "grad_norm": 0.3353854715824127, "learning_rate": 0.00017074865160911218, "loss": 11.6717, "step": 35827 }, { "epoch": 0.7499790672360378, "grad_norm": 0.26104268431663513, "learning_rate": 0.0001707471020702727, "loss": 11.6748, "step": 35828 }, { "epoch": 0.75, "grad_norm": 0.35184985399246216, "learning_rate": 0.0001707455524974237, "loss": 11.6728, "step": 35829 }, { "epoch": 0.7500209327639622, "grad_norm": 0.3468216359615326, "learning_rate": 0.0001707440028905659, "loss": 11.6538, "step": 35830 }, { "epoch": 0.7500418655279243, "grad_norm": 0.36102572083473206, "learning_rate": 0.0001707424532497, "loss": 11.6766, "step": 35831 }, { "epoch": 0.7500627982918865, "grad_norm": 0.33038583397865295, "learning_rate": 0.0001707409035748268, "loss": 11.658, "step": 35832 }, { "epoch": 0.7500837310558486, "grad_norm": 0.259133517742157, "learning_rate": 0.00017073935386594706, "loss": 11.6662, "step": 35833 }, { "epoch": 0.7501046638198108, "grad_norm": 0.3151705861091614, "learning_rate": 0.00017073780412306147, "loss": 11.6695, "step": 35834 }, { "epoch": 0.7501255965837729, "grad_norm": 0.32260143756866455, "learning_rate": 0.0001707362543461708, "loss": 11.6669, "step": 35835 }, { "epoch": 0.7501465293477351, "grad_norm": 0.39809802174568176, "learning_rate": 0.0001707347045352758, "loss": 11.68, "step": 35836 }, { "epoch": 0.7501674621116973, "grad_norm": 0.28925973176956177, "learning_rate": 0.0001707331546903772, "loss": 11.6723, "step": 35837 }, { "epoch": 0.7501883948756594, "grad_norm": 0.37175220251083374, "learning_rate": 0.00017073160481147578, "loss": 11.6588, "step": 35838 }, { "epoch": 0.7502093276396216, "grad_norm": 0.26767244935035706, "learning_rate": 0.00017073005489857225, "loss": 11.6792, "step": 35839 }, { "epoch": 0.7502302604035836, "grad_norm": 0.3206498622894287, "learning_rate": 0.00017072850495166738, "loss": 11.656, "step": 35840 }, { "epoch": 0.7502511931675458, "grad_norm": 0.3351666033267975, "learning_rate": 0.00017072695497076187, "loss": 11.6685, "step": 35841 }, { "epoch": 0.750272125931508, "grad_norm": 0.3105296492576599, "learning_rate": 0.0001707254049558565, "loss": 11.6592, "step": 35842 }, { "epoch": 0.7502930586954701, "grad_norm": 0.2704389691352844, "learning_rate": 0.00017072385490695202, "loss": 11.6727, "step": 35843 }, { "epoch": 0.7503139914594323, "grad_norm": 0.33844852447509766, "learning_rate": 0.00017072230482404916, "loss": 11.6718, "step": 35844 }, { "epoch": 0.7503349242233944, "grad_norm": 0.2796694338321686, "learning_rate": 0.00017072075470714868, "loss": 11.6593, "step": 35845 }, { "epoch": 0.7503558569873566, "grad_norm": 0.24115589261054993, "learning_rate": 0.0001707192045562513, "loss": 11.6544, "step": 35846 }, { "epoch": 0.7503767897513187, "grad_norm": 0.28989169001579285, "learning_rate": 0.00017071765437135777, "loss": 11.6638, "step": 35847 }, { "epoch": 0.7503977225152809, "grad_norm": 0.3745371103286743, "learning_rate": 0.00017071610415246889, "loss": 11.6703, "step": 35848 }, { "epoch": 0.7504186552792431, "grad_norm": 0.25119510293006897, "learning_rate": 0.00017071455389958536, "loss": 11.6692, "step": 35849 }, { "epoch": 0.7504395880432052, "grad_norm": 0.41581088304519653, "learning_rate": 0.0001707130036127079, "loss": 11.6711, "step": 35850 }, { "epoch": 0.7504605208071674, "grad_norm": 0.3391117453575134, "learning_rate": 0.0001707114532918373, "loss": 11.6707, "step": 35851 }, { "epoch": 0.7504814535711295, "grad_norm": 0.3548978567123413, "learning_rate": 0.00017070990293697425, "loss": 11.6663, "step": 35852 }, { "epoch": 0.7505023863350917, "grad_norm": 0.3747021555900574, "learning_rate": 0.00017070835254811954, "loss": 11.6752, "step": 35853 }, { "epoch": 0.7505233190990538, "grad_norm": 0.4392734467983246, "learning_rate": 0.00017070680212527393, "loss": 11.662, "step": 35854 }, { "epoch": 0.750544251863016, "grad_norm": 0.3184124529361725, "learning_rate": 0.00017070525166843815, "loss": 11.6516, "step": 35855 }, { "epoch": 0.7505651846269782, "grad_norm": 0.3251815438270569, "learning_rate": 0.00017070370117761293, "loss": 11.6829, "step": 35856 }, { "epoch": 0.7505861173909403, "grad_norm": 0.29800945520401, "learning_rate": 0.000170702150652799, "loss": 11.6723, "step": 35857 }, { "epoch": 0.7506070501549025, "grad_norm": 0.24385714530944824, "learning_rate": 0.00017070060009399715, "loss": 11.6754, "step": 35858 }, { "epoch": 0.7506279829188646, "grad_norm": 0.35267648100852966, "learning_rate": 0.00017069904950120813, "loss": 11.6675, "step": 35859 }, { "epoch": 0.7506489156828268, "grad_norm": 0.2580684721469879, "learning_rate": 0.00017069749887443263, "loss": 11.6512, "step": 35860 }, { "epoch": 0.750669848446789, "grad_norm": 0.3365701735019684, "learning_rate": 0.0001706959482136714, "loss": 11.6746, "step": 35861 }, { "epoch": 0.7506907812107511, "grad_norm": 0.3054298460483551, "learning_rate": 0.00017069439751892526, "loss": 11.6687, "step": 35862 }, { "epoch": 0.7507117139747133, "grad_norm": 0.5554733872413635, "learning_rate": 0.0001706928467901949, "loss": 11.6778, "step": 35863 }, { "epoch": 0.7507326467386753, "grad_norm": 0.27227890491485596, "learning_rate": 0.00017069129602748103, "loss": 11.6691, "step": 35864 }, { "epoch": 0.7507535795026375, "grad_norm": 0.34691908955574036, "learning_rate": 0.0001706897452307845, "loss": 11.6735, "step": 35865 }, { "epoch": 0.7507745122665996, "grad_norm": 0.342308908700943, "learning_rate": 0.00017068819440010596, "loss": 11.6717, "step": 35866 }, { "epoch": 0.7507954450305618, "grad_norm": 0.37546002864837646, "learning_rate": 0.00017068664353544618, "loss": 11.6673, "step": 35867 }, { "epoch": 0.750816377794524, "grad_norm": 0.3295362591743469, "learning_rate": 0.00017068509263680592, "loss": 11.6497, "step": 35868 }, { "epoch": 0.7508373105584861, "grad_norm": 0.3214530646800995, "learning_rate": 0.00017068354170418594, "loss": 11.6699, "step": 35869 }, { "epoch": 0.7508582433224483, "grad_norm": 0.3490692675113678, "learning_rate": 0.00017068199073758691, "loss": 11.6659, "step": 35870 }, { "epoch": 0.7508791760864104, "grad_norm": 0.2841387391090393, "learning_rate": 0.0001706804397370097, "loss": 11.6707, "step": 35871 }, { "epoch": 0.7509001088503726, "grad_norm": 0.2659304141998291, "learning_rate": 0.00017067888870245495, "loss": 11.6608, "step": 35872 }, { "epoch": 0.7509210416143347, "grad_norm": 0.30693796277046204, "learning_rate": 0.00017067733763392342, "loss": 11.6539, "step": 35873 }, { "epoch": 0.7509419743782969, "grad_norm": 0.28047069907188416, "learning_rate": 0.0001706757865314159, "loss": 11.6628, "step": 35874 }, { "epoch": 0.7509629071422591, "grad_norm": 0.37906408309936523, "learning_rate": 0.00017067423539493315, "loss": 11.6659, "step": 35875 }, { "epoch": 0.7509838399062212, "grad_norm": 0.309436172246933, "learning_rate": 0.00017067268422447582, "loss": 11.6738, "step": 35876 }, { "epoch": 0.7510047726701834, "grad_norm": 0.3684021234512329, "learning_rate": 0.00017067113302004475, "loss": 11.6852, "step": 35877 }, { "epoch": 0.7510257054341455, "grad_norm": 0.36940109729766846, "learning_rate": 0.00017066958178164063, "loss": 11.6757, "step": 35878 }, { "epoch": 0.7510466381981077, "grad_norm": 0.2696022689342499, "learning_rate": 0.00017066803050926424, "loss": 11.668, "step": 35879 }, { "epoch": 0.7510675709620699, "grad_norm": 0.4722646474838257, "learning_rate": 0.0001706664792029163, "loss": 11.7034, "step": 35880 }, { "epoch": 0.751088503726032, "grad_norm": 0.25687330961227417, "learning_rate": 0.00017066492786259758, "loss": 11.6669, "step": 35881 }, { "epoch": 0.7511094364899942, "grad_norm": 0.25425365567207336, "learning_rate": 0.00017066337648830878, "loss": 11.6913, "step": 35882 }, { "epoch": 0.7511303692539563, "grad_norm": 0.26085060834884644, "learning_rate": 0.00017066182508005073, "loss": 11.6618, "step": 35883 }, { "epoch": 0.7511513020179185, "grad_norm": 0.3357151746749878, "learning_rate": 0.0001706602736378241, "loss": 11.6538, "step": 35884 }, { "epoch": 0.7511722347818806, "grad_norm": 0.27964767813682556, "learning_rate": 0.00017065872216162967, "loss": 11.6713, "step": 35885 }, { "epoch": 0.7511931675458428, "grad_norm": 0.2615993916988373, "learning_rate": 0.00017065717065146814, "loss": 11.6731, "step": 35886 }, { "epoch": 0.751214100309805, "grad_norm": 0.3326960504055023, "learning_rate": 0.00017065561910734034, "loss": 11.6577, "step": 35887 }, { "epoch": 0.751235033073767, "grad_norm": 0.2523491084575653, "learning_rate": 0.00017065406752924693, "loss": 11.6628, "step": 35888 }, { "epoch": 0.7512559658377292, "grad_norm": 0.2473219633102417, "learning_rate": 0.0001706525159171887, "loss": 11.6588, "step": 35889 }, { "epoch": 0.7512768986016913, "grad_norm": 0.3016607165336609, "learning_rate": 0.00017065096427116642, "loss": 11.6659, "step": 35890 }, { "epoch": 0.7512978313656535, "grad_norm": 0.32644668221473694, "learning_rate": 0.00017064941259118077, "loss": 11.6776, "step": 35891 }, { "epoch": 0.7513187641296156, "grad_norm": 0.287962406873703, "learning_rate": 0.00017064786087723257, "loss": 11.6745, "step": 35892 }, { "epoch": 0.7513396968935778, "grad_norm": 0.2715151309967041, "learning_rate": 0.0001706463091293225, "loss": 11.6684, "step": 35893 }, { "epoch": 0.75136062965754, "grad_norm": 0.35152488946914673, "learning_rate": 0.00017064475734745131, "loss": 11.6893, "step": 35894 }, { "epoch": 0.7513815624215021, "grad_norm": 0.34918561577796936, "learning_rate": 0.0001706432055316198, "loss": 11.6699, "step": 35895 }, { "epoch": 0.7514024951854643, "grad_norm": 0.25768545269966125, "learning_rate": 0.00017064165368182869, "loss": 11.6517, "step": 35896 }, { "epoch": 0.7514234279494264, "grad_norm": 0.27613475918769836, "learning_rate": 0.00017064010179807873, "loss": 11.6601, "step": 35897 }, { "epoch": 0.7514443607133886, "grad_norm": 0.2704591751098633, "learning_rate": 0.00017063854988037062, "loss": 11.6706, "step": 35898 }, { "epoch": 0.7514652934773508, "grad_norm": 0.2788948118686676, "learning_rate": 0.0001706369979287052, "loss": 11.6753, "step": 35899 }, { "epoch": 0.7514862262413129, "grad_norm": 0.2783948481082916, "learning_rate": 0.0001706354459430831, "loss": 11.6651, "step": 35900 }, { "epoch": 0.7515071590052751, "grad_norm": 0.2942907214164734, "learning_rate": 0.00017063389392350516, "loss": 11.6775, "step": 35901 }, { "epoch": 0.7515280917692372, "grad_norm": 0.30502375960350037, "learning_rate": 0.00017063234186997204, "loss": 11.6703, "step": 35902 }, { "epoch": 0.7515490245331994, "grad_norm": 0.41896241903305054, "learning_rate": 0.0001706307897824846, "loss": 11.6739, "step": 35903 }, { "epoch": 0.7515699572971615, "grad_norm": 0.2783319056034088, "learning_rate": 0.0001706292376610435, "loss": 11.6635, "step": 35904 }, { "epoch": 0.7515908900611237, "grad_norm": 0.2583133578300476, "learning_rate": 0.00017062768550564955, "loss": 11.6614, "step": 35905 }, { "epoch": 0.7516118228250859, "grad_norm": 0.36290332674980164, "learning_rate": 0.0001706261333163034, "loss": 11.6584, "step": 35906 }, { "epoch": 0.751632755589048, "grad_norm": 0.3168981969356537, "learning_rate": 0.00017062458109300588, "loss": 11.6672, "step": 35907 }, { "epoch": 0.7516536883530102, "grad_norm": 0.35181567072868347, "learning_rate": 0.00017062302883575768, "loss": 11.6739, "step": 35908 }, { "epoch": 0.7516746211169723, "grad_norm": 0.3101594150066376, "learning_rate": 0.0001706214765445596, "loss": 11.6707, "step": 35909 }, { "epoch": 0.7516955538809345, "grad_norm": 0.2826799750328064, "learning_rate": 0.00017061992421941235, "loss": 11.6715, "step": 35910 }, { "epoch": 0.7517164866448965, "grad_norm": 0.40327781438827515, "learning_rate": 0.0001706183718603167, "loss": 11.6651, "step": 35911 }, { "epoch": 0.7517374194088587, "grad_norm": 0.328616738319397, "learning_rate": 0.0001706168194672734, "loss": 11.6781, "step": 35912 }, { "epoch": 0.7517583521728209, "grad_norm": 0.34318748116493225, "learning_rate": 0.00017061526704028313, "loss": 11.6784, "step": 35913 }, { "epoch": 0.751779284936783, "grad_norm": 0.319878488779068, "learning_rate": 0.00017061371457934672, "loss": 11.6782, "step": 35914 }, { "epoch": 0.7518002177007452, "grad_norm": 0.23255594074726105, "learning_rate": 0.00017061216208446485, "loss": 11.6911, "step": 35915 }, { "epoch": 0.7518211504647073, "grad_norm": 0.3006382882595062, "learning_rate": 0.00017061060955563835, "loss": 11.6613, "step": 35916 }, { "epoch": 0.7518420832286695, "grad_norm": 0.2436751127243042, "learning_rate": 0.00017060905699286787, "loss": 11.6754, "step": 35917 }, { "epoch": 0.7518630159926317, "grad_norm": 0.34992027282714844, "learning_rate": 0.00017060750439615424, "loss": 11.6825, "step": 35918 }, { "epoch": 0.7518839487565938, "grad_norm": 0.2812798321247101, "learning_rate": 0.00017060595176549814, "loss": 11.6672, "step": 35919 }, { "epoch": 0.751904881520556, "grad_norm": 0.31314817070961, "learning_rate": 0.00017060439910090037, "loss": 11.6565, "step": 35920 }, { "epoch": 0.7519258142845181, "grad_norm": 0.30350741744041443, "learning_rate": 0.0001706028464023616, "loss": 11.6698, "step": 35921 }, { "epoch": 0.7519467470484803, "grad_norm": 0.35861194133758545, "learning_rate": 0.0001706012936698827, "loss": 11.6658, "step": 35922 }, { "epoch": 0.7519676798124424, "grad_norm": 0.33716490864753723, "learning_rate": 0.00017059974090346427, "loss": 11.6735, "step": 35923 }, { "epoch": 0.7519886125764046, "grad_norm": 0.3551047146320343, "learning_rate": 0.00017059818810310717, "loss": 11.6726, "step": 35924 }, { "epoch": 0.7520095453403668, "grad_norm": 0.29771819710731506, "learning_rate": 0.0001705966352688121, "loss": 11.6774, "step": 35925 }, { "epoch": 0.7520304781043289, "grad_norm": 0.27278146147727966, "learning_rate": 0.0001705950824005798, "loss": 11.6643, "step": 35926 }, { "epoch": 0.7520514108682911, "grad_norm": 0.3578847646713257, "learning_rate": 0.00017059352949841104, "loss": 11.6685, "step": 35927 }, { "epoch": 0.7520723436322532, "grad_norm": 0.2678295075893402, "learning_rate": 0.00017059197656230657, "loss": 11.6746, "step": 35928 }, { "epoch": 0.7520932763962154, "grad_norm": 0.42816734313964844, "learning_rate": 0.00017059042359226712, "loss": 11.6734, "step": 35929 }, { "epoch": 0.7521142091601775, "grad_norm": 0.28270798921585083, "learning_rate": 0.0001705888705882934, "loss": 11.6819, "step": 35930 }, { "epoch": 0.7521351419241397, "grad_norm": 0.30018311738967896, "learning_rate": 0.00017058731755038623, "loss": 11.674, "step": 35931 }, { "epoch": 0.7521560746881019, "grad_norm": 0.3266603648662567, "learning_rate": 0.0001705857644785463, "loss": 11.6741, "step": 35932 }, { "epoch": 0.752177007452064, "grad_norm": 0.4221355617046356, "learning_rate": 0.0001705842113727744, "loss": 11.6628, "step": 35933 }, { "epoch": 0.7521979402160262, "grad_norm": 0.2630273401737213, "learning_rate": 0.00017058265823307125, "loss": 11.6732, "step": 35934 }, { "epoch": 0.7522188729799882, "grad_norm": 0.34533610939979553, "learning_rate": 0.00017058110505943758, "loss": 11.673, "step": 35935 }, { "epoch": 0.7522398057439504, "grad_norm": 0.3012472689151764, "learning_rate": 0.0001705795518518742, "loss": 11.6701, "step": 35936 }, { "epoch": 0.7522607385079125, "grad_norm": 0.3319011330604553, "learning_rate": 0.00017057799861038177, "loss": 11.6615, "step": 35937 }, { "epoch": 0.7522816712718747, "grad_norm": 0.3074733018875122, "learning_rate": 0.0001705764453349611, "loss": 11.6669, "step": 35938 }, { "epoch": 0.7523026040358369, "grad_norm": 0.3243635594844818, "learning_rate": 0.00017057489202561293, "loss": 11.6776, "step": 35939 }, { "epoch": 0.752323536799799, "grad_norm": 0.3608187735080719, "learning_rate": 0.00017057333868233802, "loss": 11.6582, "step": 35940 }, { "epoch": 0.7523444695637612, "grad_norm": 0.2752807140350342, "learning_rate": 0.00017057178530513703, "loss": 11.6569, "step": 35941 }, { "epoch": 0.7523654023277233, "grad_norm": 0.2986081540584564, "learning_rate": 0.0001705702318940108, "loss": 11.6676, "step": 35942 }, { "epoch": 0.7523863350916855, "grad_norm": 0.2933773100376129, "learning_rate": 0.00017056867844896003, "loss": 11.6674, "step": 35943 }, { "epoch": 0.7524072678556477, "grad_norm": 0.2866809666156769, "learning_rate": 0.0001705671249699855, "loss": 11.6614, "step": 35944 }, { "epoch": 0.7524282006196098, "grad_norm": 0.2883074879646301, "learning_rate": 0.0001705655714570879, "loss": 11.6687, "step": 35945 }, { "epoch": 0.752449133383572, "grad_norm": 0.34096914529800415, "learning_rate": 0.0001705640179102681, "loss": 11.6628, "step": 35946 }, { "epoch": 0.7524700661475341, "grad_norm": 0.2976933717727661, "learning_rate": 0.0001705624643295267, "loss": 11.6813, "step": 35947 }, { "epoch": 0.7524909989114963, "grad_norm": 0.3674238324165344, "learning_rate": 0.00017056091071486452, "loss": 11.6739, "step": 35948 }, { "epoch": 0.7525119316754584, "grad_norm": 0.3326709270477295, "learning_rate": 0.00017055935706628228, "loss": 11.6766, "step": 35949 }, { "epoch": 0.7525328644394206, "grad_norm": 0.28329089283943176, "learning_rate": 0.00017055780338378078, "loss": 11.6609, "step": 35950 }, { "epoch": 0.7525537972033828, "grad_norm": 0.3136393427848816, "learning_rate": 0.0001705562496673607, "loss": 11.666, "step": 35951 }, { "epoch": 0.7525747299673449, "grad_norm": 0.3847963809967041, "learning_rate": 0.00017055469591702283, "loss": 11.6608, "step": 35952 }, { "epoch": 0.7525956627313071, "grad_norm": 0.306893914937973, "learning_rate": 0.00017055314213276792, "loss": 11.6507, "step": 35953 }, { "epoch": 0.7526165954952692, "grad_norm": 0.3315337300300598, "learning_rate": 0.00017055158831459665, "loss": 11.6773, "step": 35954 }, { "epoch": 0.7526375282592314, "grad_norm": 0.30049261450767517, "learning_rate": 0.00017055003446250986, "loss": 11.6663, "step": 35955 }, { "epoch": 0.7526584610231934, "grad_norm": 0.23916883766651154, "learning_rate": 0.00017054848057650824, "loss": 11.6846, "step": 35956 }, { "epoch": 0.7526793937871556, "grad_norm": 0.3234044015407562, "learning_rate": 0.00017054692665659254, "loss": 11.6581, "step": 35957 }, { "epoch": 0.7527003265511178, "grad_norm": 0.25871944427490234, "learning_rate": 0.00017054537270276354, "loss": 11.6524, "step": 35958 }, { "epoch": 0.7527212593150799, "grad_norm": 0.44739583134651184, "learning_rate": 0.00017054381871502196, "loss": 11.6895, "step": 35959 }, { "epoch": 0.7527421920790421, "grad_norm": 0.25592538714408875, "learning_rate": 0.00017054226469336855, "loss": 11.6686, "step": 35960 }, { "epoch": 0.7527631248430042, "grad_norm": 0.30125120282173157, "learning_rate": 0.00017054071063780409, "loss": 11.6769, "step": 35961 }, { "epoch": 0.7527840576069664, "grad_norm": 0.2744368314743042, "learning_rate": 0.00017053915654832925, "loss": 11.6731, "step": 35962 }, { "epoch": 0.7528049903709286, "grad_norm": 0.3424280285835266, "learning_rate": 0.00017053760242494485, "loss": 11.6659, "step": 35963 }, { "epoch": 0.7528259231348907, "grad_norm": 0.298911452293396, "learning_rate": 0.0001705360482676516, "loss": 11.6817, "step": 35964 }, { "epoch": 0.7528468558988529, "grad_norm": 0.3185688555240631, "learning_rate": 0.00017053449407645026, "loss": 11.6833, "step": 35965 }, { "epoch": 0.752867788662815, "grad_norm": 0.3203486502170563, "learning_rate": 0.00017053293985134157, "loss": 11.6646, "step": 35966 }, { "epoch": 0.7528887214267772, "grad_norm": 0.29081153869628906, "learning_rate": 0.0001705313855923263, "loss": 11.6748, "step": 35967 }, { "epoch": 0.7529096541907393, "grad_norm": 0.30909135937690735, "learning_rate": 0.00017052983129940517, "loss": 11.6571, "step": 35968 }, { "epoch": 0.7529305869547015, "grad_norm": 0.356545090675354, "learning_rate": 0.0001705282769725789, "loss": 11.6617, "step": 35969 }, { "epoch": 0.7529515197186637, "grad_norm": 0.2561669647693634, "learning_rate": 0.00017052672261184832, "loss": 11.6601, "step": 35970 }, { "epoch": 0.7529724524826258, "grad_norm": 0.28512293100357056, "learning_rate": 0.00017052516821721412, "loss": 11.6713, "step": 35971 }, { "epoch": 0.752993385246588, "grad_norm": 0.356089323759079, "learning_rate": 0.00017052361378867706, "loss": 11.6663, "step": 35972 }, { "epoch": 0.7530143180105501, "grad_norm": 0.2921696603298187, "learning_rate": 0.0001705220593262379, "loss": 11.6574, "step": 35973 }, { "epoch": 0.7530352507745123, "grad_norm": 0.3042115569114685, "learning_rate": 0.00017052050482989733, "loss": 11.6681, "step": 35974 }, { "epoch": 0.7530561835384744, "grad_norm": 0.32888707518577576, "learning_rate": 0.00017051895029965617, "loss": 11.6545, "step": 35975 }, { "epoch": 0.7530771163024366, "grad_norm": 0.41198521852493286, "learning_rate": 0.00017051739573551515, "loss": 11.6663, "step": 35976 }, { "epoch": 0.7530980490663988, "grad_norm": 0.3695579171180725, "learning_rate": 0.00017051584113747498, "loss": 11.6729, "step": 35977 }, { "epoch": 0.7531189818303609, "grad_norm": 0.24550187587738037, "learning_rate": 0.00017051428650553642, "loss": 11.6602, "step": 35978 }, { "epoch": 0.7531399145943231, "grad_norm": 0.3149205446243286, "learning_rate": 0.00017051273183970027, "loss": 11.6647, "step": 35979 }, { "epoch": 0.7531608473582851, "grad_norm": 0.2591232657432556, "learning_rate": 0.00017051117713996723, "loss": 11.6746, "step": 35980 }, { "epoch": 0.7531817801222473, "grad_norm": 0.29510608315467834, "learning_rate": 0.000170509622406338, "loss": 11.6814, "step": 35981 }, { "epoch": 0.7532027128862095, "grad_norm": 0.2987681031227112, "learning_rate": 0.00017050806763881343, "loss": 11.6681, "step": 35982 }, { "epoch": 0.7532236456501716, "grad_norm": 0.28897905349731445, "learning_rate": 0.00017050651283739422, "loss": 11.6742, "step": 35983 }, { "epoch": 0.7532445784141338, "grad_norm": 0.2761997878551483, "learning_rate": 0.0001705049580020811, "loss": 11.6689, "step": 35984 }, { "epoch": 0.7532655111780959, "grad_norm": 0.2484450489282608, "learning_rate": 0.00017050340313287484, "loss": 11.6637, "step": 35985 }, { "epoch": 0.7532864439420581, "grad_norm": 0.2726197838783264, "learning_rate": 0.0001705018482297762, "loss": 11.6772, "step": 35986 }, { "epoch": 0.7533073767060202, "grad_norm": 0.32332760095596313, "learning_rate": 0.00017050029329278586, "loss": 11.6642, "step": 35987 }, { "epoch": 0.7533283094699824, "grad_norm": 0.29825448989868164, "learning_rate": 0.0001704987383219047, "loss": 11.6685, "step": 35988 }, { "epoch": 0.7533492422339446, "grad_norm": 0.28082790970802307, "learning_rate": 0.0001704971833171333, "loss": 11.6825, "step": 35989 }, { "epoch": 0.7533701749979067, "grad_norm": 0.2366330772638321, "learning_rate": 0.00017049562827847251, "loss": 11.6552, "step": 35990 }, { "epoch": 0.7533911077618689, "grad_norm": 0.35486921668052673, "learning_rate": 0.00017049407320592308, "loss": 11.6638, "step": 35991 }, { "epoch": 0.753412040525831, "grad_norm": 0.32431331276893616, "learning_rate": 0.0001704925180994857, "loss": 11.6551, "step": 35992 }, { "epoch": 0.7534329732897932, "grad_norm": 0.2776777148246765, "learning_rate": 0.0001704909629591612, "loss": 11.6748, "step": 35993 }, { "epoch": 0.7534539060537553, "grad_norm": 0.26511985063552856, "learning_rate": 0.00017048940778495027, "loss": 11.6711, "step": 35994 }, { "epoch": 0.7534748388177175, "grad_norm": 0.2684192955493927, "learning_rate": 0.00017048785257685366, "loss": 11.6637, "step": 35995 }, { "epoch": 0.7534957715816797, "grad_norm": 0.33502304553985596, "learning_rate": 0.0001704862973348721, "loss": 11.6829, "step": 35996 }, { "epoch": 0.7535167043456418, "grad_norm": 0.26134300231933594, "learning_rate": 0.0001704847420590064, "loss": 11.6562, "step": 35997 }, { "epoch": 0.753537637109604, "grad_norm": 0.2554415762424469, "learning_rate": 0.0001704831867492573, "loss": 11.68, "step": 35998 }, { "epoch": 0.7535585698735661, "grad_norm": 0.2942545711994171, "learning_rate": 0.00017048163140562545, "loss": 11.6729, "step": 35999 }, { "epoch": 0.7535795026375283, "grad_norm": 0.33546358346939087, "learning_rate": 0.00017048007602811174, "loss": 11.6737, "step": 36000 }, { "epoch": 0.7535795026375283, "eval_loss": 11.6700439453125, "eval_runtime": 34.3265, "eval_samples_per_second": 27.996, "eval_steps_per_second": 7.021, "step": 36000 }, { "epoch": 0.7536004354014905, "grad_norm": 0.47079363465309143, "learning_rate": 0.0001704785206167168, "loss": 11.6706, "step": 36001 }, { "epoch": 0.7536213681654526, "grad_norm": 0.2631748616695404, "learning_rate": 0.00017047696517144144, "loss": 11.6787, "step": 36002 }, { "epoch": 0.7536423009294148, "grad_norm": 0.2841275632381439, "learning_rate": 0.00017047540969228635, "loss": 11.6514, "step": 36003 }, { "epoch": 0.7536632336933768, "grad_norm": 0.274432897567749, "learning_rate": 0.00017047385417925235, "loss": 11.6547, "step": 36004 }, { "epoch": 0.753684166457339, "grad_norm": 0.31348031759262085, "learning_rate": 0.00017047229863234015, "loss": 11.6648, "step": 36005 }, { "epoch": 0.7537050992213011, "grad_norm": 0.2676912546157837, "learning_rate": 0.0001704707430515505, "loss": 11.672, "step": 36006 }, { "epoch": 0.7537260319852633, "grad_norm": 0.24712465703487396, "learning_rate": 0.00017046918743688418, "loss": 11.6594, "step": 36007 }, { "epoch": 0.7537469647492255, "grad_norm": 0.2764896750450134, "learning_rate": 0.00017046763178834188, "loss": 11.6689, "step": 36008 }, { "epoch": 0.7537678975131876, "grad_norm": 0.3254071772098541, "learning_rate": 0.00017046607610592438, "loss": 11.6673, "step": 36009 }, { "epoch": 0.7537888302771498, "grad_norm": 0.2593710124492645, "learning_rate": 0.00017046452038963243, "loss": 11.6583, "step": 36010 }, { "epoch": 0.7538097630411119, "grad_norm": 0.34215500950813293, "learning_rate": 0.00017046296463946677, "loss": 11.6588, "step": 36011 }, { "epoch": 0.7538306958050741, "grad_norm": 0.27914732694625854, "learning_rate": 0.00017046140885542817, "loss": 11.6767, "step": 36012 }, { "epoch": 0.7538516285690362, "grad_norm": 0.31992998719215393, "learning_rate": 0.00017045985303751733, "loss": 11.6834, "step": 36013 }, { "epoch": 0.7538725613329984, "grad_norm": 0.3433693051338196, "learning_rate": 0.00017045829718573505, "loss": 11.6682, "step": 36014 }, { "epoch": 0.7538934940969606, "grad_norm": 0.2724977731704712, "learning_rate": 0.000170456741300082, "loss": 11.6684, "step": 36015 }, { "epoch": 0.7539144268609227, "grad_norm": 0.2963431775569916, "learning_rate": 0.00017045518538055907, "loss": 11.6556, "step": 36016 }, { "epoch": 0.7539353596248849, "grad_norm": 0.37392833828926086, "learning_rate": 0.00017045362942716687, "loss": 11.6728, "step": 36017 }, { "epoch": 0.753956292388847, "grad_norm": 0.4337664842605591, "learning_rate": 0.0001704520734399062, "loss": 11.6665, "step": 36018 }, { "epoch": 0.7539772251528092, "grad_norm": 0.333501935005188, "learning_rate": 0.00017045051741877782, "loss": 11.6808, "step": 36019 }, { "epoch": 0.7539981579167714, "grad_norm": 0.32966551184654236, "learning_rate": 0.00017044896136378244, "loss": 11.6732, "step": 36020 }, { "epoch": 0.7540190906807335, "grad_norm": 0.3369203805923462, "learning_rate": 0.00017044740527492086, "loss": 11.6795, "step": 36021 }, { "epoch": 0.7540400234446957, "grad_norm": 0.4040283262729645, "learning_rate": 0.00017044584915219376, "loss": 11.6753, "step": 36022 }, { "epoch": 0.7540609562086578, "grad_norm": 0.30351483821868896, "learning_rate": 0.000170444292995602, "loss": 11.6689, "step": 36023 }, { "epoch": 0.75408188897262, "grad_norm": 0.29408785700798035, "learning_rate": 0.0001704427368051462, "loss": 11.657, "step": 36024 }, { "epoch": 0.754102821736582, "grad_norm": 0.24384862184524536, "learning_rate": 0.00017044118058082716, "loss": 11.676, "step": 36025 }, { "epoch": 0.7541237545005443, "grad_norm": 0.2859691381454468, "learning_rate": 0.00017043962432264565, "loss": 11.6491, "step": 36026 }, { "epoch": 0.7541446872645065, "grad_norm": 0.33793193101882935, "learning_rate": 0.00017043806803060242, "loss": 11.6678, "step": 36027 }, { "epoch": 0.7541656200284685, "grad_norm": 0.32660284638404846, "learning_rate": 0.0001704365117046982, "loss": 11.6732, "step": 36028 }, { "epoch": 0.7541865527924307, "grad_norm": 0.2941240966320038, "learning_rate": 0.0001704349553449337, "loss": 11.6576, "step": 36029 }, { "epoch": 0.7542074855563928, "grad_norm": 0.3313888609409332, "learning_rate": 0.00017043339895130973, "loss": 11.6766, "step": 36030 }, { "epoch": 0.754228418320355, "grad_norm": 0.2754700779914856, "learning_rate": 0.000170431842523827, "loss": 11.6732, "step": 36031 }, { "epoch": 0.7542493510843171, "grad_norm": 0.2914373576641083, "learning_rate": 0.0001704302860624863, "loss": 11.6539, "step": 36032 }, { "epoch": 0.7542702838482793, "grad_norm": 0.33096054196357727, "learning_rate": 0.0001704287295672883, "loss": 11.6629, "step": 36033 }, { "epoch": 0.7542912166122415, "grad_norm": 0.2946828305721283, "learning_rate": 0.00017042717303823385, "loss": 11.6769, "step": 36034 }, { "epoch": 0.7543121493762036, "grad_norm": 0.3936012387275696, "learning_rate": 0.00017042561647532362, "loss": 11.6733, "step": 36035 }, { "epoch": 0.7543330821401658, "grad_norm": 0.2759968936443329, "learning_rate": 0.00017042405987855838, "loss": 11.6745, "step": 36036 }, { "epoch": 0.7543540149041279, "grad_norm": 0.24143411219120026, "learning_rate": 0.00017042250324793892, "loss": 11.6548, "step": 36037 }, { "epoch": 0.7543749476680901, "grad_norm": 0.32910656929016113, "learning_rate": 0.0001704209465834659, "loss": 11.6624, "step": 36038 }, { "epoch": 0.7543958804320523, "grad_norm": 0.29545313119888306, "learning_rate": 0.00017041938988514015, "loss": 11.6732, "step": 36039 }, { "epoch": 0.7544168131960144, "grad_norm": 0.29629069566726685, "learning_rate": 0.00017041783315296238, "loss": 11.6538, "step": 36040 }, { "epoch": 0.7544377459599766, "grad_norm": 0.2461668998003006, "learning_rate": 0.00017041627638693334, "loss": 11.6606, "step": 36041 }, { "epoch": 0.7544586787239387, "grad_norm": 0.23788557946681976, "learning_rate": 0.0001704147195870538, "loss": 11.6803, "step": 36042 }, { "epoch": 0.7544796114879009, "grad_norm": 0.3459590971469879, "learning_rate": 0.00017041316275332447, "loss": 11.6691, "step": 36043 }, { "epoch": 0.754500544251863, "grad_norm": 0.330685555934906, "learning_rate": 0.00017041160588574618, "loss": 11.6816, "step": 36044 }, { "epoch": 0.7545214770158252, "grad_norm": 0.2627783715724945, "learning_rate": 0.00017041004898431955, "loss": 11.666, "step": 36045 }, { "epoch": 0.7545424097797874, "grad_norm": 0.34488341212272644, "learning_rate": 0.00017040849204904543, "loss": 11.6739, "step": 36046 }, { "epoch": 0.7545633425437495, "grad_norm": 0.27031373977661133, "learning_rate": 0.00017040693507992452, "loss": 11.6658, "step": 36047 }, { "epoch": 0.7545842753077117, "grad_norm": 0.32220664620399475, "learning_rate": 0.0001704053780769576, "loss": 11.6778, "step": 36048 }, { "epoch": 0.7546052080716737, "grad_norm": 0.34916946291923523, "learning_rate": 0.00017040382104014542, "loss": 11.668, "step": 36049 }, { "epoch": 0.754626140835636, "grad_norm": 0.2982726991176605, "learning_rate": 0.00017040226396948868, "loss": 11.6653, "step": 36050 }, { "epoch": 0.754647073599598, "grad_norm": 0.29498517513275146, "learning_rate": 0.0001704007068649882, "loss": 11.6769, "step": 36051 }, { "epoch": 0.7546680063635602, "grad_norm": 0.3160424530506134, "learning_rate": 0.00017039914972664466, "loss": 11.6695, "step": 36052 }, { "epoch": 0.7546889391275224, "grad_norm": 0.23622027039527893, "learning_rate": 0.00017039759255445882, "loss": 11.6719, "step": 36053 }, { "epoch": 0.7547098718914845, "grad_norm": 0.2581103444099426, "learning_rate": 0.00017039603534843147, "loss": 11.6675, "step": 36054 }, { "epoch": 0.7547308046554467, "grad_norm": 0.25884872674942017, "learning_rate": 0.00017039447810856336, "loss": 11.6718, "step": 36055 }, { "epoch": 0.7547517374194088, "grad_norm": 1.1899060010910034, "learning_rate": 0.00017039292083485518, "loss": 11.6467, "step": 36056 }, { "epoch": 0.754772670183371, "grad_norm": 0.37305712699890137, "learning_rate": 0.00017039136352730772, "loss": 11.662, "step": 36057 }, { "epoch": 0.7547936029473332, "grad_norm": 0.2961595356464386, "learning_rate": 0.00017038980618592172, "loss": 11.6646, "step": 36058 }, { "epoch": 0.7548145357112953, "grad_norm": 0.23487265408039093, "learning_rate": 0.00017038824881069792, "loss": 11.6711, "step": 36059 }, { "epoch": 0.7548354684752575, "grad_norm": 0.2874479591846466, "learning_rate": 0.00017038669140163708, "loss": 11.6723, "step": 36060 }, { "epoch": 0.7548564012392196, "grad_norm": 0.404336154460907, "learning_rate": 0.00017038513395874, "loss": 11.6656, "step": 36061 }, { "epoch": 0.7548773340031818, "grad_norm": 0.2645518183708191, "learning_rate": 0.00017038357648200728, "loss": 11.668, "step": 36062 }, { "epoch": 0.7548982667671439, "grad_norm": 0.2997690737247467, "learning_rate": 0.00017038201897143983, "loss": 11.6796, "step": 36063 }, { "epoch": 0.7549191995311061, "grad_norm": 0.27830609679222107, "learning_rate": 0.00017038046142703832, "loss": 11.6837, "step": 36064 }, { "epoch": 0.7549401322950683, "grad_norm": 0.2681579291820526, "learning_rate": 0.0001703789038488035, "loss": 11.6764, "step": 36065 }, { "epoch": 0.7549610650590304, "grad_norm": 0.2775689959526062, "learning_rate": 0.00017037734623673615, "loss": 11.697, "step": 36066 }, { "epoch": 0.7549819978229926, "grad_norm": 0.2835332751274109, "learning_rate": 0.00017037578859083698, "loss": 11.6862, "step": 36067 }, { "epoch": 0.7550029305869547, "grad_norm": 0.3355284333229065, "learning_rate": 0.00017037423091110677, "loss": 11.6824, "step": 36068 }, { "epoch": 0.7550238633509169, "grad_norm": 0.3412368893623352, "learning_rate": 0.00017037267319754625, "loss": 11.6764, "step": 36069 }, { "epoch": 0.755044796114879, "grad_norm": 0.3199652433395386, "learning_rate": 0.0001703711154501562, "loss": 11.6762, "step": 36070 }, { "epoch": 0.7550657288788412, "grad_norm": 0.3303194046020508, "learning_rate": 0.0001703695576689373, "loss": 11.6703, "step": 36071 }, { "epoch": 0.7550866616428034, "grad_norm": 0.2858184278011322, "learning_rate": 0.00017036799985389037, "loss": 11.6763, "step": 36072 }, { "epoch": 0.7551075944067654, "grad_norm": 0.3319416046142578, "learning_rate": 0.00017036644200501617, "loss": 11.6788, "step": 36073 }, { "epoch": 0.7551285271707276, "grad_norm": 0.34899961948394775, "learning_rate": 0.00017036488412231534, "loss": 11.6643, "step": 36074 }, { "epoch": 0.7551494599346897, "grad_norm": 0.28427654504776, "learning_rate": 0.00017036332620578874, "loss": 11.6677, "step": 36075 }, { "epoch": 0.7551703926986519, "grad_norm": 0.3196268379688263, "learning_rate": 0.00017036176825543708, "loss": 11.6725, "step": 36076 }, { "epoch": 0.7551913254626141, "grad_norm": 0.28253474831581116, "learning_rate": 0.0001703602102712611, "loss": 11.6647, "step": 36077 }, { "epoch": 0.7552122582265762, "grad_norm": 0.3115803897380829, "learning_rate": 0.00017035865225326156, "loss": 11.681, "step": 36078 }, { "epoch": 0.7552331909905384, "grad_norm": 0.3265114724636078, "learning_rate": 0.0001703570942014392, "loss": 11.6633, "step": 36079 }, { "epoch": 0.7552541237545005, "grad_norm": 0.2641846239566803, "learning_rate": 0.00017035553611579476, "loss": 11.6662, "step": 36080 }, { "epoch": 0.7552750565184627, "grad_norm": 0.29408419132232666, "learning_rate": 0.000170353977996329, "loss": 11.675, "step": 36081 }, { "epoch": 0.7552959892824248, "grad_norm": 0.2753238081932068, "learning_rate": 0.0001703524198430427, "loss": 11.6602, "step": 36082 }, { "epoch": 0.755316922046387, "grad_norm": 0.32093140482902527, "learning_rate": 0.00017035086165593662, "loss": 11.6616, "step": 36083 }, { "epoch": 0.7553378548103492, "grad_norm": 0.26065120100975037, "learning_rate": 0.0001703493034350114, "loss": 11.6753, "step": 36084 }, { "epoch": 0.7553587875743113, "grad_norm": 0.4423089623451233, "learning_rate": 0.0001703477451802679, "loss": 11.6594, "step": 36085 }, { "epoch": 0.7553797203382735, "grad_norm": 0.3663363754749298, "learning_rate": 0.0001703461868917068, "loss": 11.6716, "step": 36086 }, { "epoch": 0.7554006531022356, "grad_norm": 0.34081247448921204, "learning_rate": 0.0001703446285693289, "loss": 11.6682, "step": 36087 }, { "epoch": 0.7554215858661978, "grad_norm": 0.2913220226764679, "learning_rate": 0.0001703430702131349, "loss": 11.6656, "step": 36088 }, { "epoch": 0.7554425186301599, "grad_norm": 0.28618159890174866, "learning_rate": 0.00017034151182312562, "loss": 11.6715, "step": 36089 }, { "epoch": 0.7554634513941221, "grad_norm": 0.2735610604286194, "learning_rate": 0.00017033995339930176, "loss": 11.6529, "step": 36090 }, { "epoch": 0.7554843841580843, "grad_norm": 0.354046493768692, "learning_rate": 0.00017033839494166403, "loss": 11.6796, "step": 36091 }, { "epoch": 0.7555053169220464, "grad_norm": 0.24252888560295105, "learning_rate": 0.00017033683645021323, "loss": 11.6559, "step": 36092 }, { "epoch": 0.7555262496860086, "grad_norm": 0.2694252133369446, "learning_rate": 0.00017033527792495017, "loss": 11.6675, "step": 36093 }, { "epoch": 0.7555471824499707, "grad_norm": 0.2734922170639038, "learning_rate": 0.00017033371936587549, "loss": 11.6604, "step": 36094 }, { "epoch": 0.7555681152139329, "grad_norm": 0.3455670177936554, "learning_rate": 0.00017033216077298998, "loss": 11.67, "step": 36095 }, { "epoch": 0.7555890479778951, "grad_norm": 0.29576435685157776, "learning_rate": 0.00017033060214629434, "loss": 11.6815, "step": 36096 }, { "epoch": 0.7556099807418571, "grad_norm": 0.3386550843715668, "learning_rate": 0.00017032904348578945, "loss": 11.6694, "step": 36097 }, { "epoch": 0.7556309135058193, "grad_norm": 0.2392713725566864, "learning_rate": 0.00017032748479147594, "loss": 11.6601, "step": 36098 }, { "epoch": 0.7556518462697814, "grad_norm": 0.3896838426589966, "learning_rate": 0.00017032592606335461, "loss": 11.6697, "step": 36099 }, { "epoch": 0.7556727790337436, "grad_norm": 0.23423759639263153, "learning_rate": 0.0001703243673014262, "loss": 11.6718, "step": 36100 }, { "epoch": 0.7556937117977057, "grad_norm": 0.2910193204879761, "learning_rate": 0.00017032280850569145, "loss": 11.674, "step": 36101 }, { "epoch": 0.7557146445616679, "grad_norm": 0.2994016110897064, "learning_rate": 0.00017032124967615112, "loss": 11.664, "step": 36102 }, { "epoch": 0.7557355773256301, "grad_norm": 0.2343241423368454, "learning_rate": 0.00017031969081280596, "loss": 11.6454, "step": 36103 }, { "epoch": 0.7557565100895922, "grad_norm": 0.3084678649902344, "learning_rate": 0.00017031813191565668, "loss": 11.6679, "step": 36104 }, { "epoch": 0.7557774428535544, "grad_norm": 0.3580176532268524, "learning_rate": 0.00017031657298470414, "loss": 11.6629, "step": 36105 }, { "epoch": 0.7557983756175165, "grad_norm": 0.2770105302333832, "learning_rate": 0.00017031501401994895, "loss": 11.6642, "step": 36106 }, { "epoch": 0.7558193083814787, "grad_norm": 0.2449626624584198, "learning_rate": 0.00017031345502139194, "loss": 11.6679, "step": 36107 }, { "epoch": 0.7558402411454408, "grad_norm": 0.29308250546455383, "learning_rate": 0.00017031189598903386, "loss": 11.6725, "step": 36108 }, { "epoch": 0.755861173909403, "grad_norm": 0.3020628094673157, "learning_rate": 0.0001703103369228754, "loss": 11.671, "step": 36109 }, { "epoch": 0.7558821066733652, "grad_norm": 0.3362700641155243, "learning_rate": 0.0001703087778229174, "loss": 11.6698, "step": 36110 }, { "epoch": 0.7559030394373273, "grad_norm": 0.26035717129707336, "learning_rate": 0.00017030721868916055, "loss": 11.6693, "step": 36111 }, { "epoch": 0.7559239722012895, "grad_norm": 0.31652435660362244, "learning_rate": 0.00017030565952160559, "loss": 11.6652, "step": 36112 }, { "epoch": 0.7559449049652516, "grad_norm": 0.36310839653015137, "learning_rate": 0.00017030410032025333, "loss": 11.6668, "step": 36113 }, { "epoch": 0.7559658377292138, "grad_norm": 0.2518608868122101, "learning_rate": 0.00017030254108510443, "loss": 11.6855, "step": 36114 }, { "epoch": 0.7559867704931759, "grad_norm": 0.31934061646461487, "learning_rate": 0.00017030098181615972, "loss": 11.6522, "step": 36115 }, { "epoch": 0.7560077032571381, "grad_norm": 0.3294576108455658, "learning_rate": 0.0001702994225134199, "loss": 11.6625, "step": 36116 }, { "epoch": 0.7560286360211003, "grad_norm": 0.2954123914241791, "learning_rate": 0.00017029786317688576, "loss": 11.6924, "step": 36117 }, { "epoch": 0.7560495687850624, "grad_norm": 0.3333481550216675, "learning_rate": 0.00017029630380655802, "loss": 11.6758, "step": 36118 }, { "epoch": 0.7560705015490246, "grad_norm": 0.33902207016944885, "learning_rate": 0.00017029474440243743, "loss": 11.6706, "step": 36119 }, { "epoch": 0.7560914343129866, "grad_norm": 0.2967073917388916, "learning_rate": 0.00017029318496452475, "loss": 11.6767, "step": 36120 }, { "epoch": 0.7561123670769488, "grad_norm": 0.3349306583404541, "learning_rate": 0.00017029162549282077, "loss": 11.655, "step": 36121 }, { "epoch": 0.756133299840911, "grad_norm": 0.28234052658081055, "learning_rate": 0.00017029006598732614, "loss": 11.682, "step": 36122 }, { "epoch": 0.7561542326048731, "grad_norm": 0.29944396018981934, "learning_rate": 0.00017028850644804167, "loss": 11.667, "step": 36123 }, { "epoch": 0.7561751653688353, "grad_norm": 0.27524998784065247, "learning_rate": 0.00017028694687496815, "loss": 11.6651, "step": 36124 }, { "epoch": 0.7561960981327974, "grad_norm": 0.29477497935295105, "learning_rate": 0.00017028538726810626, "loss": 11.676, "step": 36125 }, { "epoch": 0.7562170308967596, "grad_norm": 0.27842825651168823, "learning_rate": 0.00017028382762745677, "loss": 11.6647, "step": 36126 }, { "epoch": 0.7562379636607217, "grad_norm": 0.26302000880241394, "learning_rate": 0.00017028226795302046, "loss": 11.6656, "step": 36127 }, { "epoch": 0.7562588964246839, "grad_norm": 0.30738094449043274, "learning_rate": 0.00017028070824479802, "loss": 11.6602, "step": 36128 }, { "epoch": 0.7562798291886461, "grad_norm": 0.24687011539936066, "learning_rate": 0.00017027914850279024, "loss": 11.6863, "step": 36129 }, { "epoch": 0.7563007619526082, "grad_norm": 0.39255568385124207, "learning_rate": 0.00017027758872699788, "loss": 11.6833, "step": 36130 }, { "epoch": 0.7563216947165704, "grad_norm": 0.26391705870628357, "learning_rate": 0.00017027602891742167, "loss": 11.6882, "step": 36131 }, { "epoch": 0.7563426274805325, "grad_norm": 0.37227141857147217, "learning_rate": 0.00017027446907406238, "loss": 11.67, "step": 36132 }, { "epoch": 0.7563635602444947, "grad_norm": 0.2691003382205963, "learning_rate": 0.00017027290919692073, "loss": 11.6469, "step": 36133 }, { "epoch": 0.7563844930084568, "grad_norm": 0.2640029489994049, "learning_rate": 0.00017027134928599747, "loss": 11.6822, "step": 36134 }, { "epoch": 0.756405425772419, "grad_norm": 0.3214889168739319, "learning_rate": 0.00017026978934129336, "loss": 11.6633, "step": 36135 }, { "epoch": 0.7564263585363812, "grad_norm": 0.27441900968551636, "learning_rate": 0.00017026822936280919, "loss": 11.6589, "step": 36136 }, { "epoch": 0.7564472913003433, "grad_norm": 0.2667291760444641, "learning_rate": 0.00017026666935054564, "loss": 11.6665, "step": 36137 }, { "epoch": 0.7564682240643055, "grad_norm": 0.3907681703567505, "learning_rate": 0.00017026510930450353, "loss": 11.6783, "step": 36138 }, { "epoch": 0.7564891568282676, "grad_norm": 0.273512601852417, "learning_rate": 0.00017026354922468356, "loss": 11.6793, "step": 36139 }, { "epoch": 0.7565100895922298, "grad_norm": 0.302906334400177, "learning_rate": 0.0001702619891110865, "loss": 11.6655, "step": 36140 }, { "epoch": 0.756531022356192, "grad_norm": 0.385189950466156, "learning_rate": 0.00017026042896371306, "loss": 11.6826, "step": 36141 }, { "epoch": 0.756551955120154, "grad_norm": 0.3305220603942871, "learning_rate": 0.00017025886878256405, "loss": 11.6606, "step": 36142 }, { "epoch": 0.7565728878841163, "grad_norm": 0.40477776527404785, "learning_rate": 0.00017025730856764018, "loss": 11.6754, "step": 36143 }, { "epoch": 0.7565938206480783, "grad_norm": 0.3054448664188385, "learning_rate": 0.00017025574831894226, "loss": 11.6799, "step": 36144 }, { "epoch": 0.7566147534120405, "grad_norm": 0.263823002576828, "learning_rate": 0.00017025418803647094, "loss": 11.6536, "step": 36145 }, { "epoch": 0.7566356861760026, "grad_norm": 0.2900514304637909, "learning_rate": 0.00017025262772022703, "loss": 11.641, "step": 36146 }, { "epoch": 0.7566566189399648, "grad_norm": 0.27570831775665283, "learning_rate": 0.00017025106737021128, "loss": 11.6781, "step": 36147 }, { "epoch": 0.756677551703927, "grad_norm": 0.2931928038597107, "learning_rate": 0.00017024950698642444, "loss": 11.6805, "step": 36148 }, { "epoch": 0.7566984844678891, "grad_norm": 0.2858645021915436, "learning_rate": 0.00017024794656886723, "loss": 11.6752, "step": 36149 }, { "epoch": 0.7567194172318513, "grad_norm": 0.26169952750205994, "learning_rate": 0.00017024638611754046, "loss": 11.6575, "step": 36150 }, { "epoch": 0.7567403499958134, "grad_norm": 0.318694531917572, "learning_rate": 0.00017024482563244485, "loss": 11.678, "step": 36151 }, { "epoch": 0.7567612827597756, "grad_norm": 0.3017253279685974, "learning_rate": 0.00017024326511358112, "loss": 11.6685, "step": 36152 }, { "epoch": 0.7567822155237377, "grad_norm": 0.29493987560272217, "learning_rate": 0.00017024170456095004, "loss": 11.6675, "step": 36153 }, { "epoch": 0.7568031482876999, "grad_norm": 0.3737122416496277, "learning_rate": 0.00017024014397455238, "loss": 11.6566, "step": 36154 }, { "epoch": 0.7568240810516621, "grad_norm": 0.35255417227745056, "learning_rate": 0.00017023858335438887, "loss": 11.6745, "step": 36155 }, { "epoch": 0.7568450138156242, "grad_norm": 0.40134599804878235, "learning_rate": 0.00017023702270046028, "loss": 11.6641, "step": 36156 }, { "epoch": 0.7568659465795864, "grad_norm": 0.2682948112487793, "learning_rate": 0.00017023546201276732, "loss": 11.6734, "step": 36157 }, { "epoch": 0.7568868793435485, "grad_norm": 0.35741886496543884, "learning_rate": 0.00017023390129131078, "loss": 11.6836, "step": 36158 }, { "epoch": 0.7569078121075107, "grad_norm": 0.3052443265914917, "learning_rate": 0.0001702323405360914, "loss": 11.6606, "step": 36159 }, { "epoch": 0.7569287448714729, "grad_norm": 0.35139480233192444, "learning_rate": 0.00017023077974710993, "loss": 11.664, "step": 36160 }, { "epoch": 0.756949677635435, "grad_norm": 0.24112847447395325, "learning_rate": 0.0001702292189243671, "loss": 11.6734, "step": 36161 }, { "epoch": 0.7569706103993972, "grad_norm": 0.36978864669799805, "learning_rate": 0.00017022765806786366, "loss": 11.683, "step": 36162 }, { "epoch": 0.7569915431633593, "grad_norm": 0.31228581070899963, "learning_rate": 0.0001702260971776004, "loss": 11.6545, "step": 36163 }, { "epoch": 0.7570124759273215, "grad_norm": 0.27109163999557495, "learning_rate": 0.00017022453625357807, "loss": 11.6673, "step": 36164 }, { "epoch": 0.7570334086912835, "grad_norm": 0.35879912972450256, "learning_rate": 0.0001702229752957974, "loss": 11.6694, "step": 36165 }, { "epoch": 0.7570543414552457, "grad_norm": 0.26322540640830994, "learning_rate": 0.00017022141430425908, "loss": 11.6722, "step": 36166 }, { "epoch": 0.757075274219208, "grad_norm": 0.2635929584503174, "learning_rate": 0.00017021985327896397, "loss": 11.6698, "step": 36167 }, { "epoch": 0.75709620698317, "grad_norm": 0.2952120900154114, "learning_rate": 0.00017021829221991274, "loss": 11.669, "step": 36168 }, { "epoch": 0.7571171397471322, "grad_norm": 0.2678494155406952, "learning_rate": 0.00017021673112710618, "loss": 11.657, "step": 36169 }, { "epoch": 0.7571380725110943, "grad_norm": 0.26093873381614685, "learning_rate": 0.00017021517000054504, "loss": 11.651, "step": 36170 }, { "epoch": 0.7571590052750565, "grad_norm": 0.39430075883865356, "learning_rate": 0.00017021360884023005, "loss": 11.668, "step": 36171 }, { "epoch": 0.7571799380390186, "grad_norm": 0.270254522562027, "learning_rate": 0.000170212047646162, "loss": 11.675, "step": 36172 }, { "epoch": 0.7572008708029808, "grad_norm": 0.2756791114807129, "learning_rate": 0.00017021048641834157, "loss": 11.6786, "step": 36173 }, { "epoch": 0.757221803566943, "grad_norm": 0.28982606530189514, "learning_rate": 0.00017020892515676956, "loss": 11.6486, "step": 36174 }, { "epoch": 0.7572427363309051, "grad_norm": 0.32424765825271606, "learning_rate": 0.00017020736386144672, "loss": 11.6832, "step": 36175 }, { "epoch": 0.7572636690948673, "grad_norm": 0.24288450181484222, "learning_rate": 0.00017020580253237382, "loss": 11.6753, "step": 36176 }, { "epoch": 0.7572846018588294, "grad_norm": 0.2594524919986725, "learning_rate": 0.00017020424116955156, "loss": 11.657, "step": 36177 }, { "epoch": 0.7573055346227916, "grad_norm": 0.23523947596549988, "learning_rate": 0.0001702026797729807, "loss": 11.6474, "step": 36178 }, { "epoch": 0.7573264673867538, "grad_norm": 0.43221232295036316, "learning_rate": 0.000170201118342662, "loss": 11.6713, "step": 36179 }, { "epoch": 0.7573474001507159, "grad_norm": 0.33340275287628174, "learning_rate": 0.00017019955687859624, "loss": 11.6757, "step": 36180 }, { "epoch": 0.7573683329146781, "grad_norm": 0.2086775302886963, "learning_rate": 0.00017019799538078417, "loss": 11.6409, "step": 36181 }, { "epoch": 0.7573892656786402, "grad_norm": 0.3670059144496918, "learning_rate": 0.0001701964338492265, "loss": 11.6674, "step": 36182 }, { "epoch": 0.7574101984426024, "grad_norm": 0.2813158929347992, "learning_rate": 0.00017019487228392394, "loss": 11.6573, "step": 36183 }, { "epoch": 0.7574311312065645, "grad_norm": 0.2730178236961365, "learning_rate": 0.00017019331068487734, "loss": 11.6601, "step": 36184 }, { "epoch": 0.7574520639705267, "grad_norm": 0.2993873953819275, "learning_rate": 0.0001701917490520874, "loss": 11.6488, "step": 36185 }, { "epoch": 0.7574729967344889, "grad_norm": 0.3648853302001953, "learning_rate": 0.00017019018738555493, "loss": 11.6545, "step": 36186 }, { "epoch": 0.757493929498451, "grad_norm": 0.3049842417240143, "learning_rate": 0.00017018862568528057, "loss": 11.6626, "step": 36187 }, { "epoch": 0.7575148622624132, "grad_norm": 0.31127554178237915, "learning_rate": 0.00017018706395126518, "loss": 11.6746, "step": 36188 }, { "epoch": 0.7575357950263752, "grad_norm": 0.3106630742549896, "learning_rate": 0.00017018550218350943, "loss": 11.6668, "step": 36189 }, { "epoch": 0.7575567277903374, "grad_norm": 0.306516170501709, "learning_rate": 0.0001701839403820141, "loss": 11.6687, "step": 36190 }, { "epoch": 0.7575776605542995, "grad_norm": 0.32292938232421875, "learning_rate": 0.00017018237854677995, "loss": 11.6634, "step": 36191 }, { "epoch": 0.7575985933182617, "grad_norm": 0.3295068144798279, "learning_rate": 0.0001701808166778077, "loss": 11.6898, "step": 36192 }, { "epoch": 0.7576195260822239, "grad_norm": 0.2865491211414337, "learning_rate": 0.00017017925477509817, "loss": 11.6699, "step": 36193 }, { "epoch": 0.757640458846186, "grad_norm": 0.35358306765556335, "learning_rate": 0.00017017769283865204, "loss": 11.6578, "step": 36194 }, { "epoch": 0.7576613916101482, "grad_norm": 0.29547613859176636, "learning_rate": 0.00017017613086847008, "loss": 11.6705, "step": 36195 }, { "epoch": 0.7576823243741103, "grad_norm": 0.24709151685237885, "learning_rate": 0.00017017456886455308, "loss": 11.654, "step": 36196 }, { "epoch": 0.7577032571380725, "grad_norm": 0.35400325059890747, "learning_rate": 0.00017017300682690173, "loss": 11.6721, "step": 36197 }, { "epoch": 0.7577241899020347, "grad_norm": 0.25963228940963745, "learning_rate": 0.0001701714447555168, "loss": 11.668, "step": 36198 }, { "epoch": 0.7577451226659968, "grad_norm": 0.36241230368614197, "learning_rate": 0.00017016988265039908, "loss": 11.6739, "step": 36199 }, { "epoch": 0.757766055429959, "grad_norm": 0.3002358376979828, "learning_rate": 0.00017016832051154927, "loss": 11.6562, "step": 36200 }, { "epoch": 0.7577869881939211, "grad_norm": 0.29273319244384766, "learning_rate": 0.00017016675833896817, "loss": 11.6635, "step": 36201 }, { "epoch": 0.7578079209578833, "grad_norm": 0.2979114353656769, "learning_rate": 0.00017016519613265646, "loss": 11.6675, "step": 36202 }, { "epoch": 0.7578288537218454, "grad_norm": 0.30396005511283875, "learning_rate": 0.00017016363389261497, "loss": 11.673, "step": 36203 }, { "epoch": 0.7578497864858076, "grad_norm": 0.3261297941207886, "learning_rate": 0.0001701620716188444, "loss": 11.6737, "step": 36204 }, { "epoch": 0.7578707192497698, "grad_norm": 0.2453644871711731, "learning_rate": 0.00017016050931134552, "loss": 11.6629, "step": 36205 }, { "epoch": 0.7578916520137319, "grad_norm": 0.3054039478302002, "learning_rate": 0.00017015894697011908, "loss": 11.6749, "step": 36206 }, { "epoch": 0.7579125847776941, "grad_norm": 0.3041529357433319, "learning_rate": 0.00017015738459516582, "loss": 11.6602, "step": 36207 }, { "epoch": 0.7579335175416562, "grad_norm": 0.30141228437423706, "learning_rate": 0.00017015582218648652, "loss": 11.6755, "step": 36208 }, { "epoch": 0.7579544503056184, "grad_norm": 0.2816251814365387, "learning_rate": 0.00017015425974408187, "loss": 11.671, "step": 36209 }, { "epoch": 0.7579753830695805, "grad_norm": 0.32265615463256836, "learning_rate": 0.0001701526972679527, "loss": 11.6775, "step": 36210 }, { "epoch": 0.7579963158335427, "grad_norm": 0.2994433045387268, "learning_rate": 0.0001701511347580997, "loss": 11.6703, "step": 36211 }, { "epoch": 0.7580172485975049, "grad_norm": 0.2993679940700531, "learning_rate": 0.00017014957221452365, "loss": 11.6813, "step": 36212 }, { "epoch": 0.758038181361467, "grad_norm": 0.33793285489082336, "learning_rate": 0.0001701480096372253, "loss": 11.6635, "step": 36213 }, { "epoch": 0.7580591141254291, "grad_norm": 0.2977391183376312, "learning_rate": 0.0001701464470262054, "loss": 11.6717, "step": 36214 }, { "epoch": 0.7580800468893912, "grad_norm": 0.36083531379699707, "learning_rate": 0.00017014488438146468, "loss": 11.677, "step": 36215 }, { "epoch": 0.7581009796533534, "grad_norm": 0.25724804401397705, "learning_rate": 0.0001701433217030039, "loss": 11.6698, "step": 36216 }, { "epoch": 0.7581219124173156, "grad_norm": 0.339731365442276, "learning_rate": 0.00017014175899082384, "loss": 11.6637, "step": 36217 }, { "epoch": 0.7581428451812777, "grad_norm": 0.3407057523727417, "learning_rate": 0.00017014019624492525, "loss": 11.6643, "step": 36218 }, { "epoch": 0.7581637779452399, "grad_norm": 0.2994658946990967, "learning_rate": 0.00017013863346530882, "loss": 11.6471, "step": 36219 }, { "epoch": 0.758184710709202, "grad_norm": 0.41993528604507446, "learning_rate": 0.00017013707065197537, "loss": 11.6623, "step": 36220 }, { "epoch": 0.7582056434731642, "grad_norm": 0.2618211507797241, "learning_rate": 0.00017013550780492566, "loss": 11.6693, "step": 36221 }, { "epoch": 0.7582265762371263, "grad_norm": 0.2911691665649414, "learning_rate": 0.00017013394492416034, "loss": 11.6657, "step": 36222 }, { "epoch": 0.7582475090010885, "grad_norm": 0.3182973265647888, "learning_rate": 0.00017013238200968024, "loss": 11.6706, "step": 36223 }, { "epoch": 0.7582684417650507, "grad_norm": 0.269987016916275, "learning_rate": 0.00017013081906148611, "loss": 11.6738, "step": 36224 }, { "epoch": 0.7582893745290128, "grad_norm": 0.2606460452079773, "learning_rate": 0.0001701292560795787, "loss": 11.6586, "step": 36225 }, { "epoch": 0.758310307292975, "grad_norm": 0.3493959307670593, "learning_rate": 0.00017012769306395871, "loss": 11.664, "step": 36226 }, { "epoch": 0.7583312400569371, "grad_norm": 0.2961171567440033, "learning_rate": 0.00017012613001462698, "loss": 11.6601, "step": 36227 }, { "epoch": 0.7583521728208993, "grad_norm": 0.28904303908348083, "learning_rate": 0.0001701245669315842, "loss": 11.6666, "step": 36228 }, { "epoch": 0.7583731055848614, "grad_norm": 0.2975746691226959, "learning_rate": 0.00017012300381483113, "loss": 11.6715, "step": 36229 }, { "epoch": 0.7583940383488236, "grad_norm": 0.24125324189662933, "learning_rate": 0.0001701214406643685, "loss": 11.6546, "step": 36230 }, { "epoch": 0.7584149711127858, "grad_norm": 0.33550262451171875, "learning_rate": 0.00017011987748019716, "loss": 11.6783, "step": 36231 }, { "epoch": 0.7584359038767479, "grad_norm": 0.29479458928108215, "learning_rate": 0.00017011831426231775, "loss": 11.6847, "step": 36232 }, { "epoch": 0.7584568366407101, "grad_norm": 0.27844148874282837, "learning_rate": 0.00017011675101073106, "loss": 11.6674, "step": 36233 }, { "epoch": 0.7584777694046722, "grad_norm": 0.2961413562297821, "learning_rate": 0.00017011518772543785, "loss": 11.6943, "step": 36234 }, { "epoch": 0.7584987021686344, "grad_norm": 0.2890315353870392, "learning_rate": 0.00017011362440643883, "loss": 11.6715, "step": 36235 }, { "epoch": 0.7585196349325966, "grad_norm": 0.2722203731536865, "learning_rate": 0.00017011206105373484, "loss": 11.6899, "step": 36236 }, { "epoch": 0.7585405676965586, "grad_norm": 0.2593642771244049, "learning_rate": 0.00017011049766732655, "loss": 11.6697, "step": 36237 }, { "epoch": 0.7585615004605208, "grad_norm": 0.2825601100921631, "learning_rate": 0.00017010893424721472, "loss": 11.6695, "step": 36238 }, { "epoch": 0.7585824332244829, "grad_norm": 0.2639685571193695, "learning_rate": 0.00017010737079340012, "loss": 11.6726, "step": 36239 }, { "epoch": 0.7586033659884451, "grad_norm": 0.2883937358856201, "learning_rate": 0.00017010580730588353, "loss": 11.6763, "step": 36240 }, { "epoch": 0.7586242987524072, "grad_norm": 0.28034400939941406, "learning_rate": 0.00017010424378466565, "loss": 11.6849, "step": 36241 }, { "epoch": 0.7586452315163694, "grad_norm": 0.26663652062416077, "learning_rate": 0.00017010268022974728, "loss": 11.6686, "step": 36242 }, { "epoch": 0.7586661642803316, "grad_norm": 0.252375990152359, "learning_rate": 0.00017010111664112913, "loss": 11.649, "step": 36243 }, { "epoch": 0.7586870970442937, "grad_norm": 0.3059383034706116, "learning_rate": 0.00017009955301881198, "loss": 11.6752, "step": 36244 }, { "epoch": 0.7587080298082559, "grad_norm": 0.2565572261810303, "learning_rate": 0.00017009798936279657, "loss": 11.6659, "step": 36245 }, { "epoch": 0.758728962572218, "grad_norm": 0.3273810148239136, "learning_rate": 0.00017009642567308365, "loss": 11.6812, "step": 36246 }, { "epoch": 0.7587498953361802, "grad_norm": 0.28319963812828064, "learning_rate": 0.00017009486194967396, "loss": 11.6741, "step": 36247 }, { "epoch": 0.7587708281001423, "grad_norm": 0.4334604740142822, "learning_rate": 0.00017009329819256828, "loss": 11.6758, "step": 36248 }, { "epoch": 0.7587917608641045, "grad_norm": 0.2587875723838806, "learning_rate": 0.00017009173440176733, "loss": 11.6547, "step": 36249 }, { "epoch": 0.7588126936280667, "grad_norm": 0.3317091464996338, "learning_rate": 0.0001700901705772719, "loss": 11.6681, "step": 36250 }, { "epoch": 0.7588336263920288, "grad_norm": 0.25936922430992126, "learning_rate": 0.00017008860671908268, "loss": 11.6728, "step": 36251 }, { "epoch": 0.758854559155991, "grad_norm": 0.3664582669734955, "learning_rate": 0.00017008704282720053, "loss": 11.6906, "step": 36252 }, { "epoch": 0.7588754919199531, "grad_norm": 0.2749718129634857, "learning_rate": 0.00017008547890162606, "loss": 11.6712, "step": 36253 }, { "epoch": 0.7588964246839153, "grad_norm": 0.25774502754211426, "learning_rate": 0.00017008391494236015, "loss": 11.6533, "step": 36254 }, { "epoch": 0.7589173574478775, "grad_norm": 0.31365516781806946, "learning_rate": 0.0001700823509494035, "loss": 11.6744, "step": 36255 }, { "epoch": 0.7589382902118396, "grad_norm": 0.3070521652698517, "learning_rate": 0.0001700807869227568, "loss": 11.6438, "step": 36256 }, { "epoch": 0.7589592229758018, "grad_norm": 0.2635336220264435, "learning_rate": 0.0001700792228624209, "loss": 11.6558, "step": 36257 }, { "epoch": 0.7589801557397639, "grad_norm": 0.27226635813713074, "learning_rate": 0.00017007765876839653, "loss": 11.675, "step": 36258 }, { "epoch": 0.759001088503726, "grad_norm": 0.23179960250854492, "learning_rate": 0.00017007609464068438, "loss": 11.6618, "step": 36259 }, { "epoch": 0.7590220212676881, "grad_norm": 0.3093956410884857, "learning_rate": 0.00017007453047928527, "loss": 11.6533, "step": 36260 }, { "epoch": 0.7590429540316503, "grad_norm": 0.3149545192718506, "learning_rate": 0.00017007296628419996, "loss": 11.6706, "step": 36261 }, { "epoch": 0.7590638867956125, "grad_norm": 0.2737277150154114, "learning_rate": 0.00017007140205542914, "loss": 11.6766, "step": 36262 }, { "epoch": 0.7590848195595746, "grad_norm": 0.33717870712280273, "learning_rate": 0.0001700698377929736, "loss": 11.6827, "step": 36263 }, { "epoch": 0.7591057523235368, "grad_norm": 0.39267653226852417, "learning_rate": 0.00017006827349683407, "loss": 11.6587, "step": 36264 }, { "epoch": 0.7591266850874989, "grad_norm": 0.2549571990966797, "learning_rate": 0.00017006670916701135, "loss": 11.6479, "step": 36265 }, { "epoch": 0.7591476178514611, "grad_norm": 0.29177966713905334, "learning_rate": 0.00017006514480350613, "loss": 11.6525, "step": 36266 }, { "epoch": 0.7591685506154232, "grad_norm": 0.37784191966056824, "learning_rate": 0.00017006358040631917, "loss": 11.6694, "step": 36267 }, { "epoch": 0.7591894833793854, "grad_norm": 0.32619738578796387, "learning_rate": 0.00017006201597545128, "loss": 11.6723, "step": 36268 }, { "epoch": 0.7592104161433476, "grad_norm": 0.3074261546134949, "learning_rate": 0.00017006045151090316, "loss": 11.6743, "step": 36269 }, { "epoch": 0.7592313489073097, "grad_norm": 0.32278361916542053, "learning_rate": 0.00017005888701267558, "loss": 11.6747, "step": 36270 }, { "epoch": 0.7592522816712719, "grad_norm": 0.2603890895843506, "learning_rate": 0.00017005732248076932, "loss": 11.6497, "step": 36271 }, { "epoch": 0.759273214435234, "grad_norm": 0.24736087024211884, "learning_rate": 0.00017005575791518508, "loss": 11.6715, "step": 36272 }, { "epoch": 0.7592941471991962, "grad_norm": 0.28888627886772156, "learning_rate": 0.0001700541933159236, "loss": 11.6614, "step": 36273 }, { "epoch": 0.7593150799631584, "grad_norm": 0.3117770552635193, "learning_rate": 0.0001700526286829857, "loss": 11.6779, "step": 36274 }, { "epoch": 0.7593360127271205, "grad_norm": 0.294628381729126, "learning_rate": 0.00017005106401637207, "loss": 11.6693, "step": 36275 }, { "epoch": 0.7593569454910827, "grad_norm": 0.32560262084007263, "learning_rate": 0.00017004949931608354, "loss": 11.6852, "step": 36276 }, { "epoch": 0.7593778782550448, "grad_norm": 0.2581879198551178, "learning_rate": 0.00017004793458212078, "loss": 11.6875, "step": 36277 }, { "epoch": 0.759398811019007, "grad_norm": 0.26462873816490173, "learning_rate": 0.00017004636981448457, "loss": 11.6772, "step": 36278 }, { "epoch": 0.7594197437829691, "grad_norm": 0.2722145915031433, "learning_rate": 0.00017004480501317567, "loss": 11.6717, "step": 36279 }, { "epoch": 0.7594406765469313, "grad_norm": 0.33394524455070496, "learning_rate": 0.0001700432401781948, "loss": 11.6649, "step": 36280 }, { "epoch": 0.7594616093108935, "grad_norm": 0.3129551112651825, "learning_rate": 0.0001700416753095428, "loss": 11.6863, "step": 36281 }, { "epoch": 0.7594825420748555, "grad_norm": 0.34161141514778137, "learning_rate": 0.00017004011040722033, "loss": 11.6629, "step": 36282 }, { "epoch": 0.7595034748388177, "grad_norm": 0.3310759663581848, "learning_rate": 0.00017003854547122815, "loss": 11.6815, "step": 36283 }, { "epoch": 0.7595244076027798, "grad_norm": 0.25502076745033264, "learning_rate": 0.0001700369805015671, "loss": 11.6691, "step": 36284 }, { "epoch": 0.759545340366742, "grad_norm": 0.4300549328327179, "learning_rate": 0.0001700354154982378, "loss": 11.6893, "step": 36285 }, { "epoch": 0.7595662731307041, "grad_norm": 0.30054959654808044, "learning_rate": 0.00017003385046124113, "loss": 11.6734, "step": 36286 }, { "epoch": 0.7595872058946663, "grad_norm": 0.3714703619480133, "learning_rate": 0.00017003228539057776, "loss": 11.6622, "step": 36287 }, { "epoch": 0.7596081386586285, "grad_norm": 0.315550297498703, "learning_rate": 0.00017003072028624848, "loss": 11.6769, "step": 36288 }, { "epoch": 0.7596290714225906, "grad_norm": 0.34250959753990173, "learning_rate": 0.000170029155148254, "loss": 11.661, "step": 36289 }, { "epoch": 0.7596500041865528, "grad_norm": 0.3130801022052765, "learning_rate": 0.00017002758997659512, "loss": 11.6754, "step": 36290 }, { "epoch": 0.7596709369505149, "grad_norm": 0.30636218190193176, "learning_rate": 0.00017002602477127259, "loss": 11.6957, "step": 36291 }, { "epoch": 0.7596918697144771, "grad_norm": 0.32210564613342285, "learning_rate": 0.00017002445953228712, "loss": 11.695, "step": 36292 }, { "epoch": 0.7597128024784393, "grad_norm": 0.3137284219264984, "learning_rate": 0.00017002289425963952, "loss": 11.6929, "step": 36293 }, { "epoch": 0.7597337352424014, "grad_norm": 0.291606068611145, "learning_rate": 0.00017002132895333047, "loss": 11.6764, "step": 36294 }, { "epoch": 0.7597546680063636, "grad_norm": 0.27783438563346863, "learning_rate": 0.0001700197636133608, "loss": 11.6642, "step": 36295 }, { "epoch": 0.7597756007703257, "grad_norm": 0.3323717713356018, "learning_rate": 0.00017001819823973121, "loss": 11.6669, "step": 36296 }, { "epoch": 0.7597965335342879, "grad_norm": 0.2792816460132599, "learning_rate": 0.00017001663283244247, "loss": 11.6779, "step": 36297 }, { "epoch": 0.75981746629825, "grad_norm": 0.3234308958053589, "learning_rate": 0.00017001506739149533, "loss": 11.6809, "step": 36298 }, { "epoch": 0.7598383990622122, "grad_norm": 0.38487115502357483, "learning_rate": 0.00017001350191689056, "loss": 11.6681, "step": 36299 }, { "epoch": 0.7598593318261744, "grad_norm": 0.32763147354125977, "learning_rate": 0.00017001193640862886, "loss": 11.665, "step": 36300 }, { "epoch": 0.7598802645901365, "grad_norm": 0.2807998061180115, "learning_rate": 0.00017001037086671105, "loss": 11.6679, "step": 36301 }, { "epoch": 0.7599011973540987, "grad_norm": 0.3162354826927185, "learning_rate": 0.00017000880529113784, "loss": 11.6599, "step": 36302 }, { "epoch": 0.7599221301180608, "grad_norm": 0.2449706792831421, "learning_rate": 0.00017000723968191, "loss": 11.6565, "step": 36303 }, { "epoch": 0.759943062882023, "grad_norm": 0.3836630582809448, "learning_rate": 0.00017000567403902827, "loss": 11.653, "step": 36304 }, { "epoch": 0.759963995645985, "grad_norm": 0.3416324555873871, "learning_rate": 0.0001700041083624934, "loss": 11.6893, "step": 36305 }, { "epoch": 0.7599849284099472, "grad_norm": 0.23934254050254822, "learning_rate": 0.0001700025426523062, "loss": 11.6555, "step": 36306 }, { "epoch": 0.7600058611739094, "grad_norm": 0.3856661915779114, "learning_rate": 0.00017000097690846733, "loss": 11.6838, "step": 36307 }, { "epoch": 0.7600267939378715, "grad_norm": 0.35375356674194336, "learning_rate": 0.0001699994111309776, "loss": 11.6596, "step": 36308 }, { "epoch": 0.7600477267018337, "grad_norm": 0.3154267966747284, "learning_rate": 0.00016999784531983775, "loss": 11.6677, "step": 36309 }, { "epoch": 0.7600686594657958, "grad_norm": 0.25198236107826233, "learning_rate": 0.00016999627947504852, "loss": 11.6538, "step": 36310 }, { "epoch": 0.760089592229758, "grad_norm": 0.27328935265541077, "learning_rate": 0.0001699947135966107, "loss": 11.6602, "step": 36311 }, { "epoch": 0.7601105249937201, "grad_norm": 0.27622032165527344, "learning_rate": 0.000169993147684525, "loss": 11.6595, "step": 36312 }, { "epoch": 0.7601314577576823, "grad_norm": 0.32443079352378845, "learning_rate": 0.0001699915817387922, "loss": 11.6799, "step": 36313 }, { "epoch": 0.7601523905216445, "grad_norm": 0.3692864775657654, "learning_rate": 0.00016999001575941305, "loss": 11.6732, "step": 36314 }, { "epoch": 0.7601733232856066, "grad_norm": 0.34883859753608704, "learning_rate": 0.0001699884497463883, "loss": 11.706, "step": 36315 }, { "epoch": 0.7601942560495688, "grad_norm": 0.35905441641807556, "learning_rate": 0.00016998688369971866, "loss": 11.6633, "step": 36316 }, { "epoch": 0.7602151888135309, "grad_norm": 0.33498692512512207, "learning_rate": 0.00016998531761940495, "loss": 11.6516, "step": 36317 }, { "epoch": 0.7602361215774931, "grad_norm": 0.2964326739311218, "learning_rate": 0.00016998375150544792, "loss": 11.6522, "step": 36318 }, { "epoch": 0.7602570543414553, "grad_norm": 0.364472895860672, "learning_rate": 0.00016998218535784826, "loss": 11.6906, "step": 36319 }, { "epoch": 0.7602779871054174, "grad_norm": 0.28652292490005493, "learning_rate": 0.00016998061917660676, "loss": 11.6749, "step": 36320 }, { "epoch": 0.7602989198693796, "grad_norm": 0.29731470346450806, "learning_rate": 0.0001699790529617242, "loss": 11.6693, "step": 36321 }, { "epoch": 0.7603198526333417, "grad_norm": 0.263143390417099, "learning_rate": 0.0001699774867132013, "loss": 11.6754, "step": 36322 }, { "epoch": 0.7603407853973039, "grad_norm": 0.2991928458213806, "learning_rate": 0.0001699759204310388, "loss": 11.6632, "step": 36323 }, { "epoch": 0.760361718161266, "grad_norm": 0.29691585898399353, "learning_rate": 0.0001699743541152375, "loss": 11.6728, "step": 36324 }, { "epoch": 0.7603826509252282, "grad_norm": 0.331304132938385, "learning_rate": 0.00016997278776579813, "loss": 11.6799, "step": 36325 }, { "epoch": 0.7604035836891904, "grad_norm": 0.3014395833015442, "learning_rate": 0.00016997122138272142, "loss": 11.6561, "step": 36326 }, { "epoch": 0.7604245164531525, "grad_norm": 0.5276086926460266, "learning_rate": 0.00016996965496600814, "loss": 11.6867, "step": 36327 }, { "epoch": 0.7604454492171147, "grad_norm": 0.24639315903186798, "learning_rate": 0.00016996808851565905, "loss": 11.6765, "step": 36328 }, { "epoch": 0.7604663819810767, "grad_norm": 0.30789151787757874, "learning_rate": 0.00016996652203167488, "loss": 11.6771, "step": 36329 }, { "epoch": 0.760487314745039, "grad_norm": 0.28643766045570374, "learning_rate": 0.00016996495551405643, "loss": 11.6692, "step": 36330 }, { "epoch": 0.760508247509001, "grad_norm": 0.2268049716949463, "learning_rate": 0.00016996338896280443, "loss": 11.6752, "step": 36331 }, { "epoch": 0.7605291802729632, "grad_norm": 0.27965253591537476, "learning_rate": 0.00016996182237791958, "loss": 11.6733, "step": 36332 }, { "epoch": 0.7605501130369254, "grad_norm": 0.2827088534832001, "learning_rate": 0.0001699602557594027, "loss": 11.6728, "step": 36333 }, { "epoch": 0.7605710458008875, "grad_norm": 0.27881690859794617, "learning_rate": 0.00016995868910725455, "loss": 11.6567, "step": 36334 }, { "epoch": 0.7605919785648497, "grad_norm": 0.288784384727478, "learning_rate": 0.00016995712242147583, "loss": 11.6571, "step": 36335 }, { "epoch": 0.7606129113288118, "grad_norm": 0.3418952226638794, "learning_rate": 0.00016995555570206732, "loss": 11.6793, "step": 36336 }, { "epoch": 0.760633844092774, "grad_norm": 0.27275916934013367, "learning_rate": 0.0001699539889490298, "loss": 11.6547, "step": 36337 }, { "epoch": 0.7606547768567362, "grad_norm": 0.2573411464691162, "learning_rate": 0.00016995242216236397, "loss": 11.6823, "step": 36338 }, { "epoch": 0.7606757096206983, "grad_norm": 0.2905670702457428, "learning_rate": 0.0001699508553420706, "loss": 11.6616, "step": 36339 }, { "epoch": 0.7606966423846605, "grad_norm": 0.282783180475235, "learning_rate": 0.0001699492884881505, "loss": 11.6643, "step": 36340 }, { "epoch": 0.7607175751486226, "grad_norm": 0.2909705936908722, "learning_rate": 0.0001699477216006043, "loss": 11.6813, "step": 36341 }, { "epoch": 0.7607385079125848, "grad_norm": 0.310774564743042, "learning_rate": 0.0001699461546794329, "loss": 11.6691, "step": 36342 }, { "epoch": 0.7607594406765469, "grad_norm": 0.39249250292778015, "learning_rate": 0.00016994458772463694, "loss": 11.6674, "step": 36343 }, { "epoch": 0.7607803734405091, "grad_norm": 0.32500767707824707, "learning_rate": 0.0001699430207362172, "loss": 11.6769, "step": 36344 }, { "epoch": 0.7608013062044713, "grad_norm": 0.28199347853660583, "learning_rate": 0.0001699414537141745, "loss": 11.6542, "step": 36345 }, { "epoch": 0.7608222389684334, "grad_norm": 0.33634600043296814, "learning_rate": 0.00016993988665850947, "loss": 11.6621, "step": 36346 }, { "epoch": 0.7608431717323956, "grad_norm": 0.3499554395675659, "learning_rate": 0.000169938319569223, "loss": 11.6727, "step": 36347 }, { "epoch": 0.7608641044963577, "grad_norm": 0.4185498356819153, "learning_rate": 0.00016993675244631577, "loss": 11.6919, "step": 36348 }, { "epoch": 0.7608850372603199, "grad_norm": 0.2887320816516876, "learning_rate": 0.00016993518528978852, "loss": 11.6615, "step": 36349 }, { "epoch": 0.760905970024282, "grad_norm": 0.2853013575077057, "learning_rate": 0.00016993361809964204, "loss": 11.6577, "step": 36350 }, { "epoch": 0.7609269027882442, "grad_norm": 0.4404395818710327, "learning_rate": 0.00016993205087587708, "loss": 11.6659, "step": 36351 }, { "epoch": 0.7609478355522064, "grad_norm": 0.2549532651901245, "learning_rate": 0.00016993048361849437, "loss": 11.6656, "step": 36352 }, { "epoch": 0.7609687683161684, "grad_norm": 0.30786073207855225, "learning_rate": 0.00016992891632749467, "loss": 11.6615, "step": 36353 }, { "epoch": 0.7609897010801306, "grad_norm": 0.34568750858306885, "learning_rate": 0.0001699273490028787, "loss": 11.6705, "step": 36354 }, { "epoch": 0.7610106338440927, "grad_norm": 0.3160022795200348, "learning_rate": 0.0001699257816446473, "loss": 11.6693, "step": 36355 }, { "epoch": 0.7610315666080549, "grad_norm": 0.34681206941604614, "learning_rate": 0.0001699242142528012, "loss": 11.6825, "step": 36356 }, { "epoch": 0.7610524993720171, "grad_norm": 0.3044549822807312, "learning_rate": 0.00016992264682734107, "loss": 11.6703, "step": 36357 }, { "epoch": 0.7610734321359792, "grad_norm": 0.3082088232040405, "learning_rate": 0.00016992107936826775, "loss": 11.6729, "step": 36358 }, { "epoch": 0.7610943648999414, "grad_norm": 0.2874886393547058, "learning_rate": 0.00016991951187558197, "loss": 11.661, "step": 36359 }, { "epoch": 0.7611152976639035, "grad_norm": 0.2490520477294922, "learning_rate": 0.0001699179443492845, "loss": 11.6759, "step": 36360 }, { "epoch": 0.7611362304278657, "grad_norm": 0.39052441716194153, "learning_rate": 0.00016991637678937603, "loss": 11.6662, "step": 36361 }, { "epoch": 0.7611571631918278, "grad_norm": 0.26433324813842773, "learning_rate": 0.00016991480919585736, "loss": 11.6669, "step": 36362 }, { "epoch": 0.76117809595579, "grad_norm": 0.29581665992736816, "learning_rate": 0.00016991324156872926, "loss": 11.6722, "step": 36363 }, { "epoch": 0.7611990287197522, "grad_norm": 0.23692016303539276, "learning_rate": 0.00016991167390799245, "loss": 11.6562, "step": 36364 }, { "epoch": 0.7612199614837143, "grad_norm": 0.26611223816871643, "learning_rate": 0.00016991010621364772, "loss": 11.6586, "step": 36365 }, { "epoch": 0.7612408942476765, "grad_norm": 0.24779821932315826, "learning_rate": 0.00016990853848569576, "loss": 11.6599, "step": 36366 }, { "epoch": 0.7612618270116386, "grad_norm": 0.2722793519496918, "learning_rate": 0.00016990697072413738, "loss": 11.6501, "step": 36367 }, { "epoch": 0.7612827597756008, "grad_norm": 0.425805002450943, "learning_rate": 0.00016990540292897334, "loss": 11.6655, "step": 36368 }, { "epoch": 0.7613036925395629, "grad_norm": 0.3874906599521637, "learning_rate": 0.00016990383510020435, "loss": 11.6733, "step": 36369 }, { "epoch": 0.7613246253035251, "grad_norm": 0.323894202709198, "learning_rate": 0.00016990226723783122, "loss": 11.6812, "step": 36370 }, { "epoch": 0.7613455580674873, "grad_norm": 0.2968387007713318, "learning_rate": 0.00016990069934185462, "loss": 11.6648, "step": 36371 }, { "epoch": 0.7613664908314494, "grad_norm": 0.2561280429363251, "learning_rate": 0.00016989913141227537, "loss": 11.6814, "step": 36372 }, { "epoch": 0.7613874235954116, "grad_norm": 0.290033757686615, "learning_rate": 0.00016989756344909423, "loss": 11.6642, "step": 36373 }, { "epoch": 0.7614083563593737, "grad_norm": 0.3175086975097656, "learning_rate": 0.00016989599545231193, "loss": 11.6705, "step": 36374 }, { "epoch": 0.7614292891233359, "grad_norm": 0.24044354259967804, "learning_rate": 0.00016989442742192917, "loss": 11.6657, "step": 36375 }, { "epoch": 0.761450221887298, "grad_norm": 0.28647688031196594, "learning_rate": 0.0001698928593579468, "loss": 11.6577, "step": 36376 }, { "epoch": 0.7614711546512601, "grad_norm": 0.3742649555206299, "learning_rate": 0.00016989129126036555, "loss": 11.6532, "step": 36377 }, { "epoch": 0.7614920874152223, "grad_norm": 0.2948654294013977, "learning_rate": 0.00016988972312918614, "loss": 11.6788, "step": 36378 }, { "epoch": 0.7615130201791844, "grad_norm": 0.2766578495502472, "learning_rate": 0.00016988815496440932, "loss": 11.6723, "step": 36379 }, { "epoch": 0.7615339529431466, "grad_norm": 0.3310756981372833, "learning_rate": 0.00016988658676603588, "loss": 11.6606, "step": 36380 }, { "epoch": 0.7615548857071087, "grad_norm": 0.3149385154247284, "learning_rate": 0.00016988501853406655, "loss": 11.672, "step": 36381 }, { "epoch": 0.7615758184710709, "grad_norm": 0.2957318425178528, "learning_rate": 0.0001698834502685021, "loss": 11.6723, "step": 36382 }, { "epoch": 0.7615967512350331, "grad_norm": 0.45477724075317383, "learning_rate": 0.00016988188196934324, "loss": 11.6756, "step": 36383 }, { "epoch": 0.7616176839989952, "grad_norm": 0.34341949224472046, "learning_rate": 0.00016988031363659084, "loss": 11.6733, "step": 36384 }, { "epoch": 0.7616386167629574, "grad_norm": 0.29188475012779236, "learning_rate": 0.0001698787452702455, "loss": 11.6696, "step": 36385 }, { "epoch": 0.7616595495269195, "grad_norm": 0.3692660927772522, "learning_rate": 0.00016987717687030806, "loss": 11.6851, "step": 36386 }, { "epoch": 0.7616804822908817, "grad_norm": 0.26165851950645447, "learning_rate": 0.00016987560843677928, "loss": 11.676, "step": 36387 }, { "epoch": 0.7617014150548438, "grad_norm": 0.28382301330566406, "learning_rate": 0.00016987403996965986, "loss": 11.6751, "step": 36388 }, { "epoch": 0.761722347818806, "grad_norm": 0.3095532953739166, "learning_rate": 0.00016987247146895063, "loss": 11.6673, "step": 36389 }, { "epoch": 0.7617432805827682, "grad_norm": 0.2635822296142578, "learning_rate": 0.00016987090293465226, "loss": 11.6517, "step": 36390 }, { "epoch": 0.7617642133467303, "grad_norm": 0.2682296335697174, "learning_rate": 0.0001698693343667656, "loss": 11.6732, "step": 36391 }, { "epoch": 0.7617851461106925, "grad_norm": 0.3759341835975647, "learning_rate": 0.0001698677657652913, "loss": 11.6431, "step": 36392 }, { "epoch": 0.7618060788746546, "grad_norm": 0.2616214156150818, "learning_rate": 0.0001698661971302302, "loss": 11.6728, "step": 36393 }, { "epoch": 0.7618270116386168, "grad_norm": 0.35168978571891785, "learning_rate": 0.00016986462846158303, "loss": 11.6758, "step": 36394 }, { "epoch": 0.761847944402579, "grad_norm": 0.31017136573791504, "learning_rate": 0.00016986305975935049, "loss": 11.6698, "step": 36395 }, { "epoch": 0.7618688771665411, "grad_norm": 0.2502034306526184, "learning_rate": 0.0001698614910235334, "loss": 11.672, "step": 36396 }, { "epoch": 0.7618898099305033, "grad_norm": 0.3235902488231659, "learning_rate": 0.00016985992225413248, "loss": 11.6901, "step": 36397 }, { "epoch": 0.7619107426944653, "grad_norm": 0.3051145076751709, "learning_rate": 0.0001698583534511485, "loss": 11.6714, "step": 36398 }, { "epoch": 0.7619316754584275, "grad_norm": 0.2908729612827301, "learning_rate": 0.0001698567846145822, "loss": 11.681, "step": 36399 }, { "epoch": 0.7619526082223896, "grad_norm": 0.35183581709861755, "learning_rate": 0.00016985521574443437, "loss": 11.6844, "step": 36400 }, { "epoch": 0.7619735409863518, "grad_norm": 0.28648319840431213, "learning_rate": 0.00016985364684070576, "loss": 11.6534, "step": 36401 }, { "epoch": 0.761994473750314, "grad_norm": 0.4392259418964386, "learning_rate": 0.00016985207790339704, "loss": 11.6747, "step": 36402 }, { "epoch": 0.7620154065142761, "grad_norm": 0.2963956296443939, "learning_rate": 0.00016985050893250905, "loss": 11.6702, "step": 36403 }, { "epoch": 0.7620363392782383, "grad_norm": 0.314073771238327, "learning_rate": 0.00016984893992804252, "loss": 11.6944, "step": 36404 }, { "epoch": 0.7620572720422004, "grad_norm": 0.36858755350112915, "learning_rate": 0.0001698473708899982, "loss": 11.6797, "step": 36405 }, { "epoch": 0.7620782048061626, "grad_norm": 0.33431753516197205, "learning_rate": 0.00016984580181837687, "loss": 11.6625, "step": 36406 }, { "epoch": 0.7620991375701247, "grad_norm": 0.2726686894893646, "learning_rate": 0.00016984423271317923, "loss": 11.6625, "step": 36407 }, { "epoch": 0.7621200703340869, "grad_norm": 0.5138498544692993, "learning_rate": 0.0001698426635744061, "loss": 11.6566, "step": 36408 }, { "epoch": 0.7621410030980491, "grad_norm": 0.2681242525577545, "learning_rate": 0.0001698410944020582, "loss": 11.6504, "step": 36409 }, { "epoch": 0.7621619358620112, "grad_norm": 0.43007394671440125, "learning_rate": 0.00016983952519613625, "loss": 11.6729, "step": 36410 }, { "epoch": 0.7621828686259734, "grad_norm": 0.2785223722457886, "learning_rate": 0.0001698379559566411, "loss": 11.6665, "step": 36411 }, { "epoch": 0.7622038013899355, "grad_norm": 0.3233356773853302, "learning_rate": 0.0001698363866835734, "loss": 11.6687, "step": 36412 }, { "epoch": 0.7622247341538977, "grad_norm": 0.26631706953048706, "learning_rate": 0.00016983481737693396, "loss": 11.6798, "step": 36413 }, { "epoch": 0.7622456669178599, "grad_norm": 0.39489373564720154, "learning_rate": 0.00016983324803672352, "loss": 11.6569, "step": 36414 }, { "epoch": 0.762266599681822, "grad_norm": 0.2804791033267975, "learning_rate": 0.00016983167866294285, "loss": 11.6468, "step": 36415 }, { "epoch": 0.7622875324457842, "grad_norm": 0.3505663275718689, "learning_rate": 0.00016983010925559268, "loss": 11.6609, "step": 36416 }, { "epoch": 0.7623084652097463, "grad_norm": 0.3297669589519501, "learning_rate": 0.00016982853981467377, "loss": 11.6837, "step": 36417 }, { "epoch": 0.7623293979737085, "grad_norm": 0.3327062726020813, "learning_rate": 0.0001698269703401869, "loss": 11.672, "step": 36418 }, { "epoch": 0.7623503307376706, "grad_norm": 0.3062549829483032, "learning_rate": 0.0001698254008321328, "loss": 11.6454, "step": 36419 }, { "epoch": 0.7623712635016328, "grad_norm": 0.2734168469905853, "learning_rate": 0.00016982383129051222, "loss": 11.6661, "step": 36420 }, { "epoch": 0.762392196265595, "grad_norm": 0.3201538026332855, "learning_rate": 0.00016982226171532595, "loss": 11.6672, "step": 36421 }, { "epoch": 0.762413129029557, "grad_norm": 0.26443344354629517, "learning_rate": 0.0001698206921065747, "loss": 11.6616, "step": 36422 }, { "epoch": 0.7624340617935192, "grad_norm": 0.32106029987335205, "learning_rate": 0.00016981912246425926, "loss": 11.6654, "step": 36423 }, { "epoch": 0.7624549945574813, "grad_norm": 0.2758626639842987, "learning_rate": 0.00016981755278838034, "loss": 11.6601, "step": 36424 }, { "epoch": 0.7624759273214435, "grad_norm": 0.28386595845222473, "learning_rate": 0.00016981598307893876, "loss": 11.6589, "step": 36425 }, { "epoch": 0.7624968600854056, "grad_norm": 0.32971280813217163, "learning_rate": 0.0001698144133359352, "loss": 11.6778, "step": 36426 }, { "epoch": 0.7625177928493678, "grad_norm": 0.29298022389411926, "learning_rate": 0.00016981284355937047, "loss": 11.6754, "step": 36427 }, { "epoch": 0.76253872561333, "grad_norm": 0.3068832755088806, "learning_rate": 0.00016981127374924532, "loss": 11.6711, "step": 36428 }, { "epoch": 0.7625596583772921, "grad_norm": 0.35713791847229004, "learning_rate": 0.00016980970390556048, "loss": 11.6684, "step": 36429 }, { "epoch": 0.7625805911412543, "grad_norm": 0.3827199935913086, "learning_rate": 0.00016980813402831674, "loss": 11.6787, "step": 36430 }, { "epoch": 0.7626015239052164, "grad_norm": 0.3324935734272003, "learning_rate": 0.0001698065641175148, "loss": 11.6645, "step": 36431 }, { "epoch": 0.7626224566691786, "grad_norm": 0.3181750476360321, "learning_rate": 0.00016980499417315544, "loss": 11.6664, "step": 36432 }, { "epoch": 0.7626433894331408, "grad_norm": 0.36609598994255066, "learning_rate": 0.00016980342419523945, "loss": 11.6685, "step": 36433 }, { "epoch": 0.7626643221971029, "grad_norm": 0.30429819226264954, "learning_rate": 0.0001698018541837675, "loss": 11.6667, "step": 36434 }, { "epoch": 0.7626852549610651, "grad_norm": 0.2897481322288513, "learning_rate": 0.00016980028413874048, "loss": 11.6865, "step": 36435 }, { "epoch": 0.7627061877250272, "grad_norm": 0.37622520327568054, "learning_rate": 0.00016979871406015899, "loss": 11.6868, "step": 36436 }, { "epoch": 0.7627271204889894, "grad_norm": 0.27761170268058777, "learning_rate": 0.0001697971439480239, "loss": 11.6804, "step": 36437 }, { "epoch": 0.7627480532529515, "grad_norm": 0.2939397692680359, "learning_rate": 0.00016979557380233592, "loss": 11.6636, "step": 36438 }, { "epoch": 0.7627689860169137, "grad_norm": 0.27992314100265503, "learning_rate": 0.00016979400362309582, "loss": 11.6662, "step": 36439 }, { "epoch": 0.7627899187808759, "grad_norm": 0.35602498054504395, "learning_rate": 0.00016979243341030433, "loss": 11.6502, "step": 36440 }, { "epoch": 0.762810851544838, "grad_norm": 0.399082213640213, "learning_rate": 0.0001697908631639622, "loss": 11.6701, "step": 36441 }, { "epoch": 0.7628317843088002, "grad_norm": 0.2509075999259949, "learning_rate": 0.0001697892928840702, "loss": 11.6729, "step": 36442 }, { "epoch": 0.7628527170727623, "grad_norm": 0.2932797372341156, "learning_rate": 0.00016978772257062914, "loss": 11.6613, "step": 36443 }, { "epoch": 0.7628736498367245, "grad_norm": 0.31435415148735046, "learning_rate": 0.0001697861522236397, "loss": 11.6658, "step": 36444 }, { "epoch": 0.7628945826006865, "grad_norm": 0.30274099111557007, "learning_rate": 0.00016978458184310264, "loss": 11.6555, "step": 36445 }, { "epoch": 0.7629155153646487, "grad_norm": 0.38293325901031494, "learning_rate": 0.00016978301142901874, "loss": 11.6917, "step": 36446 }, { "epoch": 0.762936448128611, "grad_norm": 0.277422159910202, "learning_rate": 0.00016978144098138875, "loss": 11.6801, "step": 36447 }, { "epoch": 0.762957380892573, "grad_norm": 0.25746241211891174, "learning_rate": 0.00016977987050021342, "loss": 11.6573, "step": 36448 }, { "epoch": 0.7629783136565352, "grad_norm": 0.2462434321641922, "learning_rate": 0.00016977829998549352, "loss": 11.6674, "step": 36449 }, { "epoch": 0.7629992464204973, "grad_norm": 0.303092896938324, "learning_rate": 0.0001697767294372298, "loss": 11.6674, "step": 36450 }, { "epoch": 0.7630201791844595, "grad_norm": 0.419260710477829, "learning_rate": 0.00016977515885542298, "loss": 11.6933, "step": 36451 }, { "epoch": 0.7630411119484217, "grad_norm": 0.26197096705436707, "learning_rate": 0.00016977358824007385, "loss": 11.6754, "step": 36452 }, { "epoch": 0.7630620447123838, "grad_norm": 0.3227844834327698, "learning_rate": 0.00016977201759118318, "loss": 11.6842, "step": 36453 }, { "epoch": 0.763082977476346, "grad_norm": 0.3250148892402649, "learning_rate": 0.00016977044690875167, "loss": 11.6873, "step": 36454 }, { "epoch": 0.7631039102403081, "grad_norm": 0.3141281008720398, "learning_rate": 0.00016976887619278013, "loss": 11.6634, "step": 36455 }, { "epoch": 0.7631248430042703, "grad_norm": 0.32029178738594055, "learning_rate": 0.00016976730544326929, "loss": 11.6761, "step": 36456 }, { "epoch": 0.7631457757682324, "grad_norm": 0.26020172238349915, "learning_rate": 0.0001697657346602199, "loss": 11.6629, "step": 36457 }, { "epoch": 0.7631667085321946, "grad_norm": 0.2970905900001526, "learning_rate": 0.00016976416384363274, "loss": 11.6677, "step": 36458 }, { "epoch": 0.7631876412961568, "grad_norm": 0.2666636109352112, "learning_rate": 0.00016976259299350855, "loss": 11.6543, "step": 36459 }, { "epoch": 0.7632085740601189, "grad_norm": 0.3854585289955139, "learning_rate": 0.00016976102210984807, "loss": 11.6962, "step": 36460 }, { "epoch": 0.7632295068240811, "grad_norm": 0.3101634979248047, "learning_rate": 0.00016975945119265205, "loss": 11.6748, "step": 36461 }, { "epoch": 0.7632504395880432, "grad_norm": 0.2355044037103653, "learning_rate": 0.0001697578802419213, "loss": 11.665, "step": 36462 }, { "epoch": 0.7632713723520054, "grad_norm": 0.24479074776172638, "learning_rate": 0.0001697563092576565, "loss": 11.6779, "step": 36463 }, { "epoch": 0.7632923051159675, "grad_norm": 0.32593753933906555, "learning_rate": 0.00016975473823985847, "loss": 11.6772, "step": 36464 }, { "epoch": 0.7633132378799297, "grad_norm": 0.37725329399108887, "learning_rate": 0.00016975316718852794, "loss": 11.6696, "step": 36465 }, { "epoch": 0.7633341706438919, "grad_norm": 0.3838755786418915, "learning_rate": 0.00016975159610366566, "loss": 11.6777, "step": 36466 }, { "epoch": 0.763355103407854, "grad_norm": 0.2498132288455963, "learning_rate": 0.0001697500249852724, "loss": 11.6584, "step": 36467 }, { "epoch": 0.7633760361718162, "grad_norm": 0.3147321045398712, "learning_rate": 0.0001697484538333489, "loss": 11.6755, "step": 36468 }, { "epoch": 0.7633969689357782, "grad_norm": 0.28842365741729736, "learning_rate": 0.0001697468826478959, "loss": 11.6853, "step": 36469 }, { "epoch": 0.7634179016997404, "grad_norm": 0.35522258281707764, "learning_rate": 0.00016974531142891421, "loss": 11.6812, "step": 36470 }, { "epoch": 0.7634388344637026, "grad_norm": 0.34399595856666565, "learning_rate": 0.00016974374017640452, "loss": 11.6728, "step": 36471 }, { "epoch": 0.7634597672276647, "grad_norm": 0.33161237835884094, "learning_rate": 0.00016974216889036764, "loss": 11.6607, "step": 36472 }, { "epoch": 0.7634806999916269, "grad_norm": 0.3965985178947449, "learning_rate": 0.00016974059757080427, "loss": 11.6836, "step": 36473 }, { "epoch": 0.763501632755589, "grad_norm": 0.3153911232948303, "learning_rate": 0.00016973902621771525, "loss": 11.6754, "step": 36474 }, { "epoch": 0.7635225655195512, "grad_norm": 0.3436530530452728, "learning_rate": 0.0001697374548311012, "loss": 11.6664, "step": 36475 }, { "epoch": 0.7635434982835133, "grad_norm": 0.3871200978755951, "learning_rate": 0.00016973588341096302, "loss": 11.6787, "step": 36476 }, { "epoch": 0.7635644310474755, "grad_norm": 0.3167109787464142, "learning_rate": 0.00016973431195730143, "loss": 11.6638, "step": 36477 }, { "epoch": 0.7635853638114377, "grad_norm": 0.32989320158958435, "learning_rate": 0.0001697327404701171, "loss": 11.6549, "step": 36478 }, { "epoch": 0.7636062965753998, "grad_norm": 0.25296059250831604, "learning_rate": 0.0001697311689494109, "loss": 11.6538, "step": 36479 }, { "epoch": 0.763627229339362, "grad_norm": 0.32577404379844666, "learning_rate": 0.00016972959739518345, "loss": 11.6668, "step": 36480 }, { "epoch": 0.7636481621033241, "grad_norm": 0.2639584243297577, "learning_rate": 0.00016972802580743563, "loss": 11.6725, "step": 36481 }, { "epoch": 0.7636690948672863, "grad_norm": 0.2800123989582062, "learning_rate": 0.00016972645418616815, "loss": 11.6758, "step": 36482 }, { "epoch": 0.7636900276312484, "grad_norm": 0.3171868622303009, "learning_rate": 0.0001697248825313818, "loss": 11.656, "step": 36483 }, { "epoch": 0.7637109603952106, "grad_norm": 0.2915373742580414, "learning_rate": 0.00016972331084307726, "loss": 11.6726, "step": 36484 }, { "epoch": 0.7637318931591728, "grad_norm": 0.27546754479408264, "learning_rate": 0.00016972173912125534, "loss": 11.6594, "step": 36485 }, { "epoch": 0.7637528259231349, "grad_norm": 0.3004986345767975, "learning_rate": 0.00016972016736591675, "loss": 11.6767, "step": 36486 }, { "epoch": 0.7637737586870971, "grad_norm": 0.35968348383903503, "learning_rate": 0.00016971859557706232, "loss": 11.6712, "step": 36487 }, { "epoch": 0.7637946914510592, "grad_norm": 0.2895621657371521, "learning_rate": 0.00016971702375469274, "loss": 11.681, "step": 36488 }, { "epoch": 0.7638156242150214, "grad_norm": 0.23439186811447144, "learning_rate": 0.0001697154518988088, "loss": 11.6539, "step": 36489 }, { "epoch": 0.7638365569789834, "grad_norm": 0.27531465888023376, "learning_rate": 0.00016971388000941125, "loss": 11.6564, "step": 36490 }, { "epoch": 0.7638574897429457, "grad_norm": 0.26615965366363525, "learning_rate": 0.00016971230808650084, "loss": 11.6834, "step": 36491 }, { "epoch": 0.7638784225069079, "grad_norm": 0.26297619938850403, "learning_rate": 0.00016971073613007833, "loss": 11.6649, "step": 36492 }, { "epoch": 0.7638993552708699, "grad_norm": 0.4689635634422302, "learning_rate": 0.00016970916414014447, "loss": 11.6579, "step": 36493 }, { "epoch": 0.7639202880348321, "grad_norm": 0.293147474527359, "learning_rate": 0.0001697075921167, "loss": 11.6568, "step": 36494 }, { "epoch": 0.7639412207987942, "grad_norm": 0.3340289294719696, "learning_rate": 0.00016970602005974568, "loss": 11.6546, "step": 36495 }, { "epoch": 0.7639621535627564, "grad_norm": 0.26645857095718384, "learning_rate": 0.00016970444796928234, "loss": 11.6598, "step": 36496 }, { "epoch": 0.7639830863267186, "grad_norm": 0.34089386463165283, "learning_rate": 0.00016970287584531065, "loss": 11.6734, "step": 36497 }, { "epoch": 0.7640040190906807, "grad_norm": 0.2540642023086548, "learning_rate": 0.00016970130368783136, "loss": 11.6705, "step": 36498 }, { "epoch": 0.7640249518546429, "grad_norm": 0.29048070311546326, "learning_rate": 0.0001696997314968453, "loss": 11.6739, "step": 36499 }, { "epoch": 0.764045884618605, "grad_norm": 0.29777079820632935, "learning_rate": 0.00016969815927235312, "loss": 11.6683, "step": 36500 }, { "epoch": 0.7640668173825672, "grad_norm": 0.2650335729122162, "learning_rate": 0.00016969658701435569, "loss": 11.6678, "step": 36501 }, { "epoch": 0.7640877501465293, "grad_norm": 0.2924206852912903, "learning_rate": 0.00016969501472285372, "loss": 11.6783, "step": 36502 }, { "epoch": 0.7641086829104915, "grad_norm": 0.30893567204475403, "learning_rate": 0.00016969344239784793, "loss": 11.6752, "step": 36503 }, { "epoch": 0.7641296156744537, "grad_norm": 0.40129879117012024, "learning_rate": 0.00016969187003933913, "loss": 11.6732, "step": 36504 }, { "epoch": 0.7641505484384158, "grad_norm": 0.3158710300922394, "learning_rate": 0.00016969029764732802, "loss": 11.6743, "step": 36505 }, { "epoch": 0.764171481202378, "grad_norm": 0.30228814482688904, "learning_rate": 0.00016968872522181541, "loss": 11.667, "step": 36506 }, { "epoch": 0.7641924139663401, "grad_norm": 0.24358633160591125, "learning_rate": 0.00016968715276280202, "loss": 11.6587, "step": 36507 }, { "epoch": 0.7642133467303023, "grad_norm": 0.2935481369495392, "learning_rate": 0.00016968558027028862, "loss": 11.6647, "step": 36508 }, { "epoch": 0.7642342794942644, "grad_norm": 0.33775001764297485, "learning_rate": 0.000169684007744276, "loss": 11.6604, "step": 36509 }, { "epoch": 0.7642552122582266, "grad_norm": 0.34358352422714233, "learning_rate": 0.00016968243518476484, "loss": 11.6678, "step": 36510 }, { "epoch": 0.7642761450221888, "grad_norm": 0.29614579677581787, "learning_rate": 0.00016968086259175596, "loss": 11.693, "step": 36511 }, { "epoch": 0.7642970777861509, "grad_norm": 0.2678017020225525, "learning_rate": 0.00016967928996525008, "loss": 11.6627, "step": 36512 }, { "epoch": 0.7643180105501131, "grad_norm": 0.34781092405319214, "learning_rate": 0.00016967771730524796, "loss": 11.6852, "step": 36513 }, { "epoch": 0.7643389433140751, "grad_norm": 0.2857927680015564, "learning_rate": 0.0001696761446117504, "loss": 11.6771, "step": 36514 }, { "epoch": 0.7643598760780373, "grad_norm": 0.31412991881370544, "learning_rate": 0.00016967457188475808, "loss": 11.667, "step": 36515 }, { "epoch": 0.7643808088419995, "grad_norm": 0.2866732180118561, "learning_rate": 0.00016967299912427184, "loss": 11.6729, "step": 36516 }, { "epoch": 0.7644017416059616, "grad_norm": 0.3910354673862457, "learning_rate": 0.00016967142633029237, "loss": 11.6602, "step": 36517 }, { "epoch": 0.7644226743699238, "grad_norm": 0.30995404720306396, "learning_rate": 0.00016966985350282045, "loss": 11.6619, "step": 36518 }, { "epoch": 0.7644436071338859, "grad_norm": 0.24049273133277893, "learning_rate": 0.0001696682806418568, "loss": 11.6563, "step": 36519 }, { "epoch": 0.7644645398978481, "grad_norm": 0.34136122465133667, "learning_rate": 0.00016966670774740225, "loss": 11.6809, "step": 36520 }, { "epoch": 0.7644854726618102, "grad_norm": 0.30286848545074463, "learning_rate": 0.00016966513481945751, "loss": 11.6467, "step": 36521 }, { "epoch": 0.7645064054257724, "grad_norm": 0.33420330286026, "learning_rate": 0.00016966356185802334, "loss": 11.6789, "step": 36522 }, { "epoch": 0.7645273381897346, "grad_norm": 0.3099990487098694, "learning_rate": 0.00016966198886310055, "loss": 11.6619, "step": 36523 }, { "epoch": 0.7645482709536967, "grad_norm": 0.2931160032749176, "learning_rate": 0.00016966041583468976, "loss": 11.6567, "step": 36524 }, { "epoch": 0.7645692037176589, "grad_norm": 0.31179311871528625, "learning_rate": 0.00016965884277279187, "loss": 11.6784, "step": 36525 }, { "epoch": 0.764590136481621, "grad_norm": 0.2571931481361389, "learning_rate": 0.00016965726967740755, "loss": 11.6555, "step": 36526 }, { "epoch": 0.7646110692455832, "grad_norm": 0.4265284538269043, "learning_rate": 0.0001696556965485376, "loss": 11.6632, "step": 36527 }, { "epoch": 0.7646320020095453, "grad_norm": 0.3325086236000061, "learning_rate": 0.00016965412338618277, "loss": 11.6877, "step": 36528 }, { "epoch": 0.7646529347735075, "grad_norm": 0.31412506103515625, "learning_rate": 0.00016965255019034379, "loss": 11.6544, "step": 36529 }, { "epoch": 0.7646738675374697, "grad_norm": 0.3811328113079071, "learning_rate": 0.00016965097696102142, "loss": 11.6722, "step": 36530 }, { "epoch": 0.7646948003014318, "grad_norm": 0.3208370804786682, "learning_rate": 0.00016964940369821645, "loss": 11.6585, "step": 36531 }, { "epoch": 0.764715733065394, "grad_norm": 0.309099942445755, "learning_rate": 0.00016964783040192963, "loss": 11.6804, "step": 36532 }, { "epoch": 0.7647366658293561, "grad_norm": 0.2664477527141571, "learning_rate": 0.00016964625707216168, "loss": 11.6593, "step": 36533 }, { "epoch": 0.7647575985933183, "grad_norm": 0.2737651467323303, "learning_rate": 0.00016964468370891337, "loss": 11.6558, "step": 36534 }, { "epoch": 0.7647785313572805, "grad_norm": 0.2877562344074249, "learning_rate": 0.00016964311031218547, "loss": 11.67, "step": 36535 }, { "epoch": 0.7647994641212426, "grad_norm": 0.31997886300086975, "learning_rate": 0.00016964153688197876, "loss": 11.6531, "step": 36536 }, { "epoch": 0.7648203968852048, "grad_norm": 0.27512362599372864, "learning_rate": 0.00016963996341829395, "loss": 11.6709, "step": 36537 }, { "epoch": 0.7648413296491668, "grad_norm": 0.27537497878074646, "learning_rate": 0.0001696383899211318, "loss": 11.6602, "step": 36538 }, { "epoch": 0.764862262413129, "grad_norm": 0.39251306653022766, "learning_rate": 0.0001696368163904931, "loss": 11.6762, "step": 36539 }, { "epoch": 0.7648831951770911, "grad_norm": 0.30885598063468933, "learning_rate": 0.0001696352428263786, "loss": 11.6716, "step": 36540 }, { "epoch": 0.7649041279410533, "grad_norm": 0.30450350046157837, "learning_rate": 0.00016963366922878898, "loss": 11.6701, "step": 36541 }, { "epoch": 0.7649250607050155, "grad_norm": 0.25308576226234436, "learning_rate": 0.0001696320955977251, "loss": 11.6606, "step": 36542 }, { "epoch": 0.7649459934689776, "grad_norm": 0.40053725242614746, "learning_rate": 0.00016963052193318768, "loss": 11.6679, "step": 36543 }, { "epoch": 0.7649669262329398, "grad_norm": 0.29036954045295715, "learning_rate": 0.00016962894823517748, "loss": 11.6735, "step": 36544 }, { "epoch": 0.7649878589969019, "grad_norm": 0.35094088315963745, "learning_rate": 0.00016962737450369525, "loss": 11.6764, "step": 36545 }, { "epoch": 0.7650087917608641, "grad_norm": 0.3161355257034302, "learning_rate": 0.00016962580073874171, "loss": 11.6659, "step": 36546 }, { "epoch": 0.7650297245248262, "grad_norm": 0.34903615713119507, "learning_rate": 0.0001696242269403177, "loss": 11.6632, "step": 36547 }, { "epoch": 0.7650506572887884, "grad_norm": 0.303570032119751, "learning_rate": 0.00016962265310842392, "loss": 11.6821, "step": 36548 }, { "epoch": 0.7650715900527506, "grad_norm": 0.27237120270729065, "learning_rate": 0.00016962107924306107, "loss": 11.6442, "step": 36549 }, { "epoch": 0.7650925228167127, "grad_norm": 0.27138859033584595, "learning_rate": 0.00016961950534423006, "loss": 11.6574, "step": 36550 }, { "epoch": 0.7651134555806749, "grad_norm": 0.3080703020095825, "learning_rate": 0.0001696179314119315, "loss": 11.6629, "step": 36551 }, { "epoch": 0.765134388344637, "grad_norm": 0.34648606181144714, "learning_rate": 0.0001696163574461662, "loss": 11.6568, "step": 36552 }, { "epoch": 0.7651553211085992, "grad_norm": 0.3257621228694916, "learning_rate": 0.000169614783446935, "loss": 11.6746, "step": 36553 }, { "epoch": 0.7651762538725614, "grad_norm": 0.260940819978714, "learning_rate": 0.00016961320941423852, "loss": 11.6639, "step": 36554 }, { "epoch": 0.7651971866365235, "grad_norm": 0.2955452501773834, "learning_rate": 0.00016961163534807757, "loss": 11.65, "step": 36555 }, { "epoch": 0.7652181194004857, "grad_norm": 0.31929224729537964, "learning_rate": 0.00016961006124845292, "loss": 11.6698, "step": 36556 }, { "epoch": 0.7652390521644478, "grad_norm": 0.3510802388191223, "learning_rate": 0.00016960848711536532, "loss": 11.6811, "step": 36557 }, { "epoch": 0.76525998492841, "grad_norm": 0.27238789200782776, "learning_rate": 0.00016960691294881555, "loss": 11.6713, "step": 36558 }, { "epoch": 0.765280917692372, "grad_norm": 0.2481730580329895, "learning_rate": 0.00016960533874880432, "loss": 11.6682, "step": 36559 }, { "epoch": 0.7653018504563343, "grad_norm": 0.3380166292190552, "learning_rate": 0.0001696037645153324, "loss": 11.6731, "step": 36560 }, { "epoch": 0.7653227832202965, "grad_norm": 0.26981765031814575, "learning_rate": 0.00016960219024840056, "loss": 11.6714, "step": 36561 }, { "epoch": 0.7653437159842585, "grad_norm": 0.24161092936992645, "learning_rate": 0.00016960061594800956, "loss": 11.6567, "step": 36562 }, { "epoch": 0.7653646487482207, "grad_norm": 0.2804189920425415, "learning_rate": 0.00016959904161416013, "loss": 11.6646, "step": 36563 }, { "epoch": 0.7653855815121828, "grad_norm": 0.2690368890762329, "learning_rate": 0.0001695974672468531, "loss": 11.669, "step": 36564 }, { "epoch": 0.765406514276145, "grad_norm": 0.28210949897766113, "learning_rate": 0.00016959589284608911, "loss": 11.6646, "step": 36565 }, { "epoch": 0.7654274470401071, "grad_norm": 0.325317919254303, "learning_rate": 0.000169594318411869, "loss": 11.679, "step": 36566 }, { "epoch": 0.7654483798040693, "grad_norm": 0.2933379113674164, "learning_rate": 0.0001695927439441935, "loss": 11.6698, "step": 36567 }, { "epoch": 0.7654693125680315, "grad_norm": 0.2505965530872345, "learning_rate": 0.00016959116944306342, "loss": 11.6634, "step": 36568 }, { "epoch": 0.7654902453319936, "grad_norm": 0.2632332444190979, "learning_rate": 0.0001695895949084794, "loss": 11.6682, "step": 36569 }, { "epoch": 0.7655111780959558, "grad_norm": 0.24323545396327972, "learning_rate": 0.00016958802034044233, "loss": 11.6673, "step": 36570 }, { "epoch": 0.7655321108599179, "grad_norm": 0.2914885878562927, "learning_rate": 0.00016958644573895285, "loss": 11.6712, "step": 36571 }, { "epoch": 0.7655530436238801, "grad_norm": 0.2795955538749695, "learning_rate": 0.00016958487110401183, "loss": 11.6603, "step": 36572 }, { "epoch": 0.7655739763878423, "grad_norm": 0.279313862323761, "learning_rate": 0.00016958329643561993, "loss": 11.6839, "step": 36573 }, { "epoch": 0.7655949091518044, "grad_norm": 0.3056442141532898, "learning_rate": 0.00016958172173377794, "loss": 11.6768, "step": 36574 }, { "epoch": 0.7656158419157666, "grad_norm": 0.2865944504737854, "learning_rate": 0.0001695801469984866, "loss": 11.6699, "step": 36575 }, { "epoch": 0.7656367746797287, "grad_norm": 0.3369302749633789, "learning_rate": 0.00016957857222974672, "loss": 11.6807, "step": 36576 }, { "epoch": 0.7656577074436909, "grad_norm": 0.3169116675853729, "learning_rate": 0.00016957699742755903, "loss": 11.6729, "step": 36577 }, { "epoch": 0.765678640207653, "grad_norm": 0.38244396448135376, "learning_rate": 0.0001695754225919243, "loss": 11.6634, "step": 36578 }, { "epoch": 0.7656995729716152, "grad_norm": 0.3038569688796997, "learning_rate": 0.00016957384772284324, "loss": 11.6773, "step": 36579 }, { "epoch": 0.7657205057355774, "grad_norm": 0.2899945378303528, "learning_rate": 0.00016957227282031668, "loss": 11.6645, "step": 36580 }, { "epoch": 0.7657414384995395, "grad_norm": 0.3371853530406952, "learning_rate": 0.00016957069788434525, "loss": 11.6761, "step": 36581 }, { "epoch": 0.7657623712635017, "grad_norm": 0.28092482686042786, "learning_rate": 0.0001695691229149299, "loss": 11.6646, "step": 36582 }, { "epoch": 0.7657833040274638, "grad_norm": 0.33246177434921265, "learning_rate": 0.00016956754791207118, "loss": 11.6827, "step": 36583 }, { "epoch": 0.765804236791426, "grad_norm": 0.5604219436645508, "learning_rate": 0.00016956597287577, "loss": 11.6794, "step": 36584 }, { "epoch": 0.765825169555388, "grad_norm": 0.29827681183815, "learning_rate": 0.00016956439780602703, "loss": 11.6647, "step": 36585 }, { "epoch": 0.7658461023193502, "grad_norm": 0.3360059857368469, "learning_rate": 0.0001695628227028431, "loss": 11.6603, "step": 36586 }, { "epoch": 0.7658670350833124, "grad_norm": 0.2743746042251587, "learning_rate": 0.0001695612475662189, "loss": 11.655, "step": 36587 }, { "epoch": 0.7658879678472745, "grad_norm": 0.24614153802394867, "learning_rate": 0.00016955967239615523, "loss": 11.6794, "step": 36588 }, { "epoch": 0.7659089006112367, "grad_norm": 0.2547518312931061, "learning_rate": 0.00016955809719265282, "loss": 11.6642, "step": 36589 }, { "epoch": 0.7659298333751988, "grad_norm": 0.27199307084083557, "learning_rate": 0.00016955652195571244, "loss": 11.6764, "step": 36590 }, { "epoch": 0.765950766139161, "grad_norm": 0.3285207450389862, "learning_rate": 0.00016955494668533482, "loss": 11.6542, "step": 36591 }, { "epoch": 0.7659716989031232, "grad_norm": 0.292816162109375, "learning_rate": 0.00016955337138152077, "loss": 11.6716, "step": 36592 }, { "epoch": 0.7659926316670853, "grad_norm": 0.27376413345336914, "learning_rate": 0.00016955179604427102, "loss": 11.6679, "step": 36593 }, { "epoch": 0.7660135644310475, "grad_norm": 0.2895156145095825, "learning_rate": 0.00016955022067358634, "loss": 11.6657, "step": 36594 }, { "epoch": 0.7660344971950096, "grad_norm": 0.2753194570541382, "learning_rate": 0.00016954864526946747, "loss": 11.6664, "step": 36595 }, { "epoch": 0.7660554299589718, "grad_norm": 0.280031681060791, "learning_rate": 0.00016954706983191516, "loss": 11.6547, "step": 36596 }, { "epoch": 0.7660763627229339, "grad_norm": 0.26666614413261414, "learning_rate": 0.00016954549436093018, "loss": 11.6672, "step": 36597 }, { "epoch": 0.7660972954868961, "grad_norm": 0.2775585353374481, "learning_rate": 0.00016954391885651328, "loss": 11.683, "step": 36598 }, { "epoch": 0.7661182282508583, "grad_norm": 0.2587413191795349, "learning_rate": 0.00016954234331866523, "loss": 11.6666, "step": 36599 }, { "epoch": 0.7661391610148204, "grad_norm": 0.26785486936569214, "learning_rate": 0.0001695407677473868, "loss": 11.6611, "step": 36600 }, { "epoch": 0.7661600937787826, "grad_norm": 0.3226046860218048, "learning_rate": 0.00016953919214267874, "loss": 11.6612, "step": 36601 }, { "epoch": 0.7661810265427447, "grad_norm": 0.2527143359184265, "learning_rate": 0.00016953761650454174, "loss": 11.683, "step": 36602 }, { "epoch": 0.7662019593067069, "grad_norm": 0.31710708141326904, "learning_rate": 0.00016953604083297665, "loss": 11.6914, "step": 36603 }, { "epoch": 0.766222892070669, "grad_norm": 0.27479034662246704, "learning_rate": 0.00016953446512798422, "loss": 11.6423, "step": 36604 }, { "epoch": 0.7662438248346312, "grad_norm": 0.2762272357940674, "learning_rate": 0.00016953288938956513, "loss": 11.6744, "step": 36605 }, { "epoch": 0.7662647575985934, "grad_norm": 0.36528265476226807, "learning_rate": 0.0001695313136177202, "loss": 11.6793, "step": 36606 }, { "epoch": 0.7662856903625554, "grad_norm": 0.3158976137638092, "learning_rate": 0.0001695297378124502, "loss": 11.652, "step": 36607 }, { "epoch": 0.7663066231265176, "grad_norm": 0.277763694524765, "learning_rate": 0.00016952816197375584, "loss": 11.681, "step": 36608 }, { "epoch": 0.7663275558904797, "grad_norm": 0.329658567905426, "learning_rate": 0.00016952658610163787, "loss": 11.6565, "step": 36609 }, { "epoch": 0.7663484886544419, "grad_norm": 0.2898719310760498, "learning_rate": 0.00016952501019609715, "loss": 11.6666, "step": 36610 }, { "epoch": 0.7663694214184041, "grad_norm": 0.3089331090450287, "learning_rate": 0.0001695234342571343, "loss": 11.6666, "step": 36611 }, { "epoch": 0.7663903541823662, "grad_norm": 0.3278174102306366, "learning_rate": 0.00016952185828475016, "loss": 11.6954, "step": 36612 }, { "epoch": 0.7664112869463284, "grad_norm": 0.28445538878440857, "learning_rate": 0.0001695202822789455, "loss": 11.6804, "step": 36613 }, { "epoch": 0.7664322197102905, "grad_norm": 0.3320484459400177, "learning_rate": 0.000169518706239721, "loss": 11.6717, "step": 36614 }, { "epoch": 0.7664531524742527, "grad_norm": 0.27760037779808044, "learning_rate": 0.0001695171301670775, "loss": 11.6849, "step": 36615 }, { "epoch": 0.7664740852382148, "grad_norm": 0.30010950565338135, "learning_rate": 0.0001695155540610157, "loss": 11.6771, "step": 36616 }, { "epoch": 0.766495018002177, "grad_norm": 0.2800162136554718, "learning_rate": 0.00016951397792153638, "loss": 11.6635, "step": 36617 }, { "epoch": 0.7665159507661392, "grad_norm": 0.3087526559829712, "learning_rate": 0.0001695124017486403, "loss": 11.6578, "step": 36618 }, { "epoch": 0.7665368835301013, "grad_norm": 0.3088741898536682, "learning_rate": 0.00016951082554232823, "loss": 11.6601, "step": 36619 }, { "epoch": 0.7665578162940635, "grad_norm": 0.3532962203025818, "learning_rate": 0.00016950924930260087, "loss": 11.6805, "step": 36620 }, { "epoch": 0.7665787490580256, "grad_norm": 0.3640216886997223, "learning_rate": 0.0001695076730294591, "loss": 11.6705, "step": 36621 }, { "epoch": 0.7665996818219878, "grad_norm": 0.3239554464817047, "learning_rate": 0.00016950609672290354, "loss": 11.6693, "step": 36622 }, { "epoch": 0.7666206145859499, "grad_norm": 0.273788183927536, "learning_rate": 0.000169504520382935, "loss": 11.668, "step": 36623 }, { "epoch": 0.7666415473499121, "grad_norm": 0.3236002027988434, "learning_rate": 0.0001695029440095543, "loss": 11.6849, "step": 36624 }, { "epoch": 0.7666624801138743, "grad_norm": 0.26843488216400146, "learning_rate": 0.0001695013676027621, "loss": 11.6592, "step": 36625 }, { "epoch": 0.7666834128778364, "grad_norm": 0.3199401795864105, "learning_rate": 0.0001694997911625592, "loss": 11.6772, "step": 36626 }, { "epoch": 0.7667043456417986, "grad_norm": 0.2601143717765808, "learning_rate": 0.00016949821468894636, "loss": 11.669, "step": 36627 }, { "epoch": 0.7667252784057607, "grad_norm": 0.27122873067855835, "learning_rate": 0.00016949663818192434, "loss": 11.6617, "step": 36628 }, { "epoch": 0.7667462111697229, "grad_norm": 0.3336026668548584, "learning_rate": 0.0001694950616414939, "loss": 11.6654, "step": 36629 }, { "epoch": 0.7667671439336851, "grad_norm": 0.26445919275283813, "learning_rate": 0.00016949348506765577, "loss": 11.6608, "step": 36630 }, { "epoch": 0.7667880766976471, "grad_norm": 0.2673525810241699, "learning_rate": 0.00016949190846041073, "loss": 11.6798, "step": 36631 }, { "epoch": 0.7668090094616093, "grad_norm": 0.39833325147628784, "learning_rate": 0.00016949033181975955, "loss": 11.6629, "step": 36632 }, { "epoch": 0.7668299422255714, "grad_norm": 0.2939856946468353, "learning_rate": 0.000169488755145703, "loss": 11.6783, "step": 36633 }, { "epoch": 0.7668508749895336, "grad_norm": 0.3224800229072571, "learning_rate": 0.0001694871784382418, "loss": 11.6739, "step": 36634 }, { "epoch": 0.7668718077534957, "grad_norm": 0.2929381728172302, "learning_rate": 0.00016948560169737666, "loss": 11.6773, "step": 36635 }, { "epoch": 0.7668927405174579, "grad_norm": 0.2572418749332428, "learning_rate": 0.00016948402492310846, "loss": 11.667, "step": 36636 }, { "epoch": 0.7669136732814201, "grad_norm": 0.3240463435649872, "learning_rate": 0.00016948244811543786, "loss": 11.6794, "step": 36637 }, { "epoch": 0.7669346060453822, "grad_norm": 0.270918607711792, "learning_rate": 0.0001694808712743657, "loss": 11.6605, "step": 36638 }, { "epoch": 0.7669555388093444, "grad_norm": 0.295497864484787, "learning_rate": 0.00016947929439989267, "loss": 11.6746, "step": 36639 }, { "epoch": 0.7669764715733065, "grad_norm": 0.40952154994010925, "learning_rate": 0.00016947771749201954, "loss": 11.674, "step": 36640 }, { "epoch": 0.7669974043372687, "grad_norm": 0.2849102318286896, "learning_rate": 0.0001694761405507471, "loss": 11.6661, "step": 36641 }, { "epoch": 0.7670183371012308, "grad_norm": 0.3062073290348053, "learning_rate": 0.00016947456357607608, "loss": 11.6815, "step": 36642 }, { "epoch": 0.767039269865193, "grad_norm": 0.2972705364227295, "learning_rate": 0.00016947298656800724, "loss": 11.6787, "step": 36643 }, { "epoch": 0.7670602026291552, "grad_norm": 0.25281181931495667, "learning_rate": 0.00016947140952654134, "loss": 11.6705, "step": 36644 }, { "epoch": 0.7670811353931173, "grad_norm": 0.30548086762428284, "learning_rate": 0.00016946983245167915, "loss": 11.6735, "step": 36645 }, { "epoch": 0.7671020681570795, "grad_norm": 0.4210948646068573, "learning_rate": 0.00016946825534342144, "loss": 11.6635, "step": 36646 }, { "epoch": 0.7671230009210416, "grad_norm": 0.31582343578338623, "learning_rate": 0.00016946667820176892, "loss": 11.6878, "step": 36647 }, { "epoch": 0.7671439336850038, "grad_norm": 0.2765289545059204, "learning_rate": 0.00016946510102672238, "loss": 11.6787, "step": 36648 }, { "epoch": 0.767164866448966, "grad_norm": 0.2813844084739685, "learning_rate": 0.00016946352381828255, "loss": 11.6955, "step": 36649 }, { "epoch": 0.7671857992129281, "grad_norm": 0.33579084277153015, "learning_rate": 0.00016946194657645023, "loss": 11.6836, "step": 36650 }, { "epoch": 0.7672067319768903, "grad_norm": 0.25324854254722595, "learning_rate": 0.00016946036930122618, "loss": 11.6925, "step": 36651 }, { "epoch": 0.7672276647408524, "grad_norm": 0.3454922139644623, "learning_rate": 0.00016945879199261113, "loss": 11.6694, "step": 36652 }, { "epoch": 0.7672485975048146, "grad_norm": 0.3363674283027649, "learning_rate": 0.00016945721465060586, "loss": 11.6854, "step": 36653 }, { "epoch": 0.7672695302687766, "grad_norm": 0.24336165189743042, "learning_rate": 0.00016945563727521106, "loss": 11.6696, "step": 36654 }, { "epoch": 0.7672904630327388, "grad_norm": 0.2621912956237793, "learning_rate": 0.0001694540598664276, "loss": 11.6657, "step": 36655 }, { "epoch": 0.767311395796701, "grad_norm": 0.27144184708595276, "learning_rate": 0.00016945248242425615, "loss": 11.6709, "step": 36656 }, { "epoch": 0.7673323285606631, "grad_norm": 0.3193696141242981, "learning_rate": 0.00016945090494869752, "loss": 11.6813, "step": 36657 }, { "epoch": 0.7673532613246253, "grad_norm": 0.30364710092544556, "learning_rate": 0.00016944932743975243, "loss": 11.6712, "step": 36658 }, { "epoch": 0.7673741940885874, "grad_norm": 0.28680846095085144, "learning_rate": 0.00016944774989742167, "loss": 11.6763, "step": 36659 }, { "epoch": 0.7673951268525496, "grad_norm": 0.297617107629776, "learning_rate": 0.00016944617232170597, "loss": 11.6648, "step": 36660 }, { "epoch": 0.7674160596165117, "grad_norm": 0.3629031479358673, "learning_rate": 0.00016944459471260613, "loss": 11.6737, "step": 36661 }, { "epoch": 0.7674369923804739, "grad_norm": 0.2813427746295929, "learning_rate": 0.00016944301707012285, "loss": 11.6818, "step": 36662 }, { "epoch": 0.7674579251444361, "grad_norm": 0.28039857745170593, "learning_rate": 0.00016944143939425695, "loss": 11.6601, "step": 36663 }, { "epoch": 0.7674788579083982, "grad_norm": 0.33100688457489014, "learning_rate": 0.00016943986168500911, "loss": 11.664, "step": 36664 }, { "epoch": 0.7674997906723604, "grad_norm": 0.33625051379203796, "learning_rate": 0.0001694382839423802, "loss": 11.6689, "step": 36665 }, { "epoch": 0.7675207234363225, "grad_norm": 0.2887594997882843, "learning_rate": 0.00016943670616637087, "loss": 11.6644, "step": 36666 }, { "epoch": 0.7675416562002847, "grad_norm": 0.2696986198425293, "learning_rate": 0.00016943512835698195, "loss": 11.6728, "step": 36667 }, { "epoch": 0.7675625889642469, "grad_norm": 0.5366118550300598, "learning_rate": 0.00016943355051421413, "loss": 11.6588, "step": 36668 }, { "epoch": 0.767583521728209, "grad_norm": 0.3129426836967468, "learning_rate": 0.00016943197263806826, "loss": 11.6778, "step": 36669 }, { "epoch": 0.7676044544921712, "grad_norm": 0.3161923289299011, "learning_rate": 0.00016943039472854503, "loss": 11.6747, "step": 36670 }, { "epoch": 0.7676253872561333, "grad_norm": 0.35834625363349915, "learning_rate": 0.00016942881678564523, "loss": 11.6779, "step": 36671 }, { "epoch": 0.7676463200200955, "grad_norm": 0.2909797132015228, "learning_rate": 0.0001694272388093696, "loss": 11.6587, "step": 36672 }, { "epoch": 0.7676672527840576, "grad_norm": 0.3220113515853882, "learning_rate": 0.00016942566079971888, "loss": 11.6729, "step": 36673 }, { "epoch": 0.7676881855480198, "grad_norm": 0.27888166904449463, "learning_rate": 0.00016942408275669388, "loss": 11.6834, "step": 36674 }, { "epoch": 0.767709118311982, "grad_norm": 0.3162980079650879, "learning_rate": 0.00016942250468029535, "loss": 11.6814, "step": 36675 }, { "epoch": 0.767730051075944, "grad_norm": 0.2968996465206146, "learning_rate": 0.000169420926570524, "loss": 11.6752, "step": 36676 }, { "epoch": 0.7677509838399063, "grad_norm": 0.2934114634990692, "learning_rate": 0.00016941934842738064, "loss": 11.665, "step": 36677 }, { "epoch": 0.7677719166038683, "grad_norm": 0.294109046459198, "learning_rate": 0.00016941777025086596, "loss": 11.6661, "step": 36678 }, { "epoch": 0.7677928493678305, "grad_norm": 0.2527696490287781, "learning_rate": 0.0001694161920409808, "loss": 11.6595, "step": 36679 }, { "epoch": 0.7678137821317926, "grad_norm": 0.29046207666397095, "learning_rate": 0.0001694146137977259, "loss": 11.6854, "step": 36680 }, { "epoch": 0.7678347148957548, "grad_norm": 0.3275732398033142, "learning_rate": 0.00016941303552110197, "loss": 11.6706, "step": 36681 }, { "epoch": 0.767855647659717, "grad_norm": 0.26043701171875, "learning_rate": 0.00016941145721110985, "loss": 11.6647, "step": 36682 }, { "epoch": 0.7678765804236791, "grad_norm": 0.2609696090221405, "learning_rate": 0.00016940987886775023, "loss": 11.6734, "step": 36683 }, { "epoch": 0.7678975131876413, "grad_norm": 0.2726339101791382, "learning_rate": 0.00016940830049102384, "loss": 11.6863, "step": 36684 }, { "epoch": 0.7679184459516034, "grad_norm": 0.5020561218261719, "learning_rate": 0.00016940672208093157, "loss": 11.6758, "step": 36685 }, { "epoch": 0.7679393787155656, "grad_norm": 0.2815961241722107, "learning_rate": 0.00016940514363747403, "loss": 11.6546, "step": 36686 }, { "epoch": 0.7679603114795277, "grad_norm": 0.36748939752578735, "learning_rate": 0.00016940356516065213, "loss": 11.6686, "step": 36687 }, { "epoch": 0.7679812442434899, "grad_norm": 0.3390345275402069, "learning_rate": 0.00016940198665046645, "loss": 11.6783, "step": 36688 }, { "epoch": 0.7680021770074521, "grad_norm": 0.29550978541374207, "learning_rate": 0.0001694004081069179, "loss": 11.6697, "step": 36689 }, { "epoch": 0.7680231097714142, "grad_norm": 0.25922101736068726, "learning_rate": 0.00016939882953000715, "loss": 11.6749, "step": 36690 }, { "epoch": 0.7680440425353764, "grad_norm": 0.34102386236190796, "learning_rate": 0.00016939725091973503, "loss": 11.6732, "step": 36691 }, { "epoch": 0.7680649752993385, "grad_norm": 0.2679104208946228, "learning_rate": 0.00016939567227610227, "loss": 11.6859, "step": 36692 }, { "epoch": 0.7680859080633007, "grad_norm": 0.28145894408226013, "learning_rate": 0.0001693940935991096, "loss": 11.6657, "step": 36693 }, { "epoch": 0.7681068408272629, "grad_norm": 0.29802703857421875, "learning_rate": 0.00016939251488875775, "loss": 11.6575, "step": 36694 }, { "epoch": 0.768127773591225, "grad_norm": 0.303451269865036, "learning_rate": 0.0001693909361450476, "loss": 11.6657, "step": 36695 }, { "epoch": 0.7681487063551872, "grad_norm": 0.2904547452926636, "learning_rate": 0.0001693893573679798, "loss": 11.6717, "step": 36696 }, { "epoch": 0.7681696391191493, "grad_norm": 0.348218709230423, "learning_rate": 0.00016938777855755513, "loss": 11.668, "step": 36697 }, { "epoch": 0.7681905718831115, "grad_norm": 0.355276882648468, "learning_rate": 0.0001693861997137744, "loss": 11.6804, "step": 36698 }, { "epoch": 0.7682115046470736, "grad_norm": 0.27977612614631653, "learning_rate": 0.0001693846208366383, "loss": 11.663, "step": 36699 }, { "epoch": 0.7682324374110358, "grad_norm": 0.31189703941345215, "learning_rate": 0.00016938304192614764, "loss": 11.6764, "step": 36700 }, { "epoch": 0.768253370174998, "grad_norm": 0.29805490374565125, "learning_rate": 0.00016938146298230318, "loss": 11.6704, "step": 36701 }, { "epoch": 0.76827430293896, "grad_norm": 0.2987874448299408, "learning_rate": 0.00016937988400510565, "loss": 11.6626, "step": 36702 }, { "epoch": 0.7682952357029222, "grad_norm": 0.31180262565612793, "learning_rate": 0.0001693783049945558, "loss": 11.6641, "step": 36703 }, { "epoch": 0.7683161684668843, "grad_norm": 0.25221991539001465, "learning_rate": 0.0001693767259506544, "loss": 11.676, "step": 36704 }, { "epoch": 0.7683371012308465, "grad_norm": 0.3455228805541992, "learning_rate": 0.00016937514687340226, "loss": 11.6674, "step": 36705 }, { "epoch": 0.7683580339948086, "grad_norm": 0.31637775897979736, "learning_rate": 0.00016937356776280008, "loss": 11.6728, "step": 36706 }, { "epoch": 0.7683789667587708, "grad_norm": 0.2635413110256195, "learning_rate": 0.00016937198861884863, "loss": 11.6728, "step": 36707 }, { "epoch": 0.768399899522733, "grad_norm": 0.2783423364162445, "learning_rate": 0.00016937040944154869, "loss": 11.6517, "step": 36708 }, { "epoch": 0.7684208322866951, "grad_norm": 0.3515833914279938, "learning_rate": 0.00016936883023090096, "loss": 11.6789, "step": 36709 }, { "epoch": 0.7684417650506573, "grad_norm": 0.2542274296283722, "learning_rate": 0.0001693672509869063, "loss": 11.6656, "step": 36710 }, { "epoch": 0.7684626978146194, "grad_norm": 0.48342254757881165, "learning_rate": 0.00016936567170956537, "loss": 11.6786, "step": 36711 }, { "epoch": 0.7684836305785816, "grad_norm": 0.39250314235687256, "learning_rate": 0.000169364092398879, "loss": 11.6765, "step": 36712 }, { "epoch": 0.7685045633425438, "grad_norm": 0.42503777146339417, "learning_rate": 0.00016936251305484787, "loss": 11.6708, "step": 36713 }, { "epoch": 0.7685254961065059, "grad_norm": 0.3242357075214386, "learning_rate": 0.00016936093367747287, "loss": 11.6798, "step": 36714 }, { "epoch": 0.7685464288704681, "grad_norm": 0.30245041847229004, "learning_rate": 0.0001693593542667546, "loss": 11.6748, "step": 36715 }, { "epoch": 0.7685673616344302, "grad_norm": 0.39113789796829224, "learning_rate": 0.00016935777482269396, "loss": 11.6834, "step": 36716 }, { "epoch": 0.7685882943983924, "grad_norm": 0.3101884126663208, "learning_rate": 0.0001693561953452916, "loss": 11.6629, "step": 36717 }, { "epoch": 0.7686092271623545, "grad_norm": 0.30884093046188354, "learning_rate": 0.00016935461583454835, "loss": 11.6695, "step": 36718 }, { "epoch": 0.7686301599263167, "grad_norm": 0.2622620463371277, "learning_rate": 0.00016935303629046495, "loss": 11.6559, "step": 36719 }, { "epoch": 0.7686510926902789, "grad_norm": 0.32066822052001953, "learning_rate": 0.00016935145671304213, "loss": 11.6746, "step": 36720 }, { "epoch": 0.768672025454241, "grad_norm": 0.3477109968662262, "learning_rate": 0.0001693498771022807, "loss": 11.6677, "step": 36721 }, { "epoch": 0.7686929582182032, "grad_norm": 0.24482972919940948, "learning_rate": 0.0001693482974581814, "loss": 11.6523, "step": 36722 }, { "epoch": 0.7687138909821652, "grad_norm": 0.35808801651000977, "learning_rate": 0.00016934671778074498, "loss": 11.6733, "step": 36723 }, { "epoch": 0.7687348237461274, "grad_norm": 0.295092910528183, "learning_rate": 0.0001693451380699722, "loss": 11.6698, "step": 36724 }, { "epoch": 0.7687557565100895, "grad_norm": 0.287180632352829, "learning_rate": 0.0001693435583258638, "loss": 11.6646, "step": 36725 }, { "epoch": 0.7687766892740517, "grad_norm": 0.32305052876472473, "learning_rate": 0.0001693419785484206, "loss": 11.6936, "step": 36726 }, { "epoch": 0.7687976220380139, "grad_norm": 0.3556648790836334, "learning_rate": 0.0001693403987376433, "loss": 11.6734, "step": 36727 }, { "epoch": 0.768818554801976, "grad_norm": 0.2389204353094101, "learning_rate": 0.00016933881889353268, "loss": 11.6697, "step": 36728 }, { "epoch": 0.7688394875659382, "grad_norm": 0.3294129967689514, "learning_rate": 0.0001693372390160895, "loss": 11.6633, "step": 36729 }, { "epoch": 0.7688604203299003, "grad_norm": 0.2845872640609741, "learning_rate": 0.00016933565910531452, "loss": 11.6707, "step": 36730 }, { "epoch": 0.7688813530938625, "grad_norm": 0.28726911544799805, "learning_rate": 0.0001693340791612085, "loss": 11.6624, "step": 36731 }, { "epoch": 0.7689022858578247, "grad_norm": 0.31687095761299133, "learning_rate": 0.0001693324991837722, "loss": 11.6736, "step": 36732 }, { "epoch": 0.7689232186217868, "grad_norm": 0.2658711373806, "learning_rate": 0.00016933091917300635, "loss": 11.6907, "step": 36733 }, { "epoch": 0.768944151385749, "grad_norm": 0.31487804651260376, "learning_rate": 0.0001693293391289118, "loss": 11.665, "step": 36734 }, { "epoch": 0.7689650841497111, "grad_norm": 0.2839412987232208, "learning_rate": 0.0001693277590514892, "loss": 11.6736, "step": 36735 }, { "epoch": 0.7689860169136733, "grad_norm": 0.29726946353912354, "learning_rate": 0.0001693261789407394, "loss": 11.668, "step": 36736 }, { "epoch": 0.7690069496776354, "grad_norm": 0.29830583930015564, "learning_rate": 0.00016932459879666307, "loss": 11.6635, "step": 36737 }, { "epoch": 0.7690278824415976, "grad_norm": 0.2989027798175812, "learning_rate": 0.00016932301861926106, "loss": 11.6797, "step": 36738 }, { "epoch": 0.7690488152055598, "grad_norm": 0.2831578254699707, "learning_rate": 0.00016932143840853403, "loss": 11.65, "step": 36739 }, { "epoch": 0.7690697479695219, "grad_norm": 0.264766663312912, "learning_rate": 0.0001693198581644828, "loss": 11.6582, "step": 36740 }, { "epoch": 0.7690906807334841, "grad_norm": 0.3117254674434662, "learning_rate": 0.00016931827788710816, "loss": 11.6669, "step": 36741 }, { "epoch": 0.7691116134974462, "grad_norm": 0.27838966250419617, "learning_rate": 0.00016931669757641084, "loss": 11.6643, "step": 36742 }, { "epoch": 0.7691325462614084, "grad_norm": 0.38833263516426086, "learning_rate": 0.0001693151172323916, "loss": 11.6728, "step": 36743 }, { "epoch": 0.7691534790253705, "grad_norm": 0.2749769389629364, "learning_rate": 0.00016931353685505115, "loss": 11.6736, "step": 36744 }, { "epoch": 0.7691744117893327, "grad_norm": 0.2936086058616638, "learning_rate": 0.0001693119564443903, "loss": 11.6733, "step": 36745 }, { "epoch": 0.7691953445532949, "grad_norm": 0.28470975160598755, "learning_rate": 0.00016931037600040983, "loss": 11.6863, "step": 36746 }, { "epoch": 0.769216277317257, "grad_norm": 0.23817096650600433, "learning_rate": 0.00016930879552311048, "loss": 11.6623, "step": 36747 }, { "epoch": 0.7692372100812191, "grad_norm": 0.3482801914215088, "learning_rate": 0.00016930721501249297, "loss": 11.692, "step": 36748 }, { "epoch": 0.7692581428451812, "grad_norm": 0.32308509945869446, "learning_rate": 0.00016930563446855812, "loss": 11.6635, "step": 36749 }, { "epoch": 0.7692790756091434, "grad_norm": 0.253686785697937, "learning_rate": 0.00016930405389130662, "loss": 11.6762, "step": 36750 }, { "epoch": 0.7693000083731056, "grad_norm": 0.26255226135253906, "learning_rate": 0.00016930247328073933, "loss": 11.6476, "step": 36751 }, { "epoch": 0.7693209411370677, "grad_norm": 0.2929622530937195, "learning_rate": 0.00016930089263685692, "loss": 11.6697, "step": 36752 }, { "epoch": 0.7693418739010299, "grad_norm": 0.2866995334625244, "learning_rate": 0.0001692993119596602, "loss": 11.6613, "step": 36753 }, { "epoch": 0.769362806664992, "grad_norm": 0.29442963004112244, "learning_rate": 0.00016929773124914988, "loss": 11.6648, "step": 36754 }, { "epoch": 0.7693837394289542, "grad_norm": 0.36862486600875854, "learning_rate": 0.00016929615050532678, "loss": 11.66, "step": 36755 }, { "epoch": 0.7694046721929163, "grad_norm": 0.3074605464935303, "learning_rate": 0.0001692945697281916, "loss": 11.6693, "step": 36756 }, { "epoch": 0.7694256049568785, "grad_norm": 0.2638534605503082, "learning_rate": 0.00016929298891774517, "loss": 11.6774, "step": 36757 }, { "epoch": 0.7694465377208407, "grad_norm": 0.349323570728302, "learning_rate": 0.0001692914080739882, "loss": 11.6662, "step": 36758 }, { "epoch": 0.7694674704848028, "grad_norm": 0.31104451417922974, "learning_rate": 0.00016928982719692149, "loss": 11.6743, "step": 36759 }, { "epoch": 0.769488403248765, "grad_norm": 0.29896607995033264, "learning_rate": 0.0001692882462865457, "loss": 11.6699, "step": 36760 }, { "epoch": 0.7695093360127271, "grad_norm": 0.30474910140037537, "learning_rate": 0.00016928666534286172, "loss": 11.6806, "step": 36761 }, { "epoch": 0.7695302687766893, "grad_norm": 0.23200583457946777, "learning_rate": 0.00016928508436587022, "loss": 11.6626, "step": 36762 }, { "epoch": 0.7695512015406514, "grad_norm": 0.25866034626960754, "learning_rate": 0.000169283503355572, "loss": 11.6697, "step": 36763 }, { "epoch": 0.7695721343046136, "grad_norm": 0.2476511150598526, "learning_rate": 0.00016928192231196784, "loss": 11.6622, "step": 36764 }, { "epoch": 0.7695930670685758, "grad_norm": 0.3220303952693939, "learning_rate": 0.00016928034123505844, "loss": 11.6729, "step": 36765 }, { "epoch": 0.7696139998325379, "grad_norm": 0.26613688468933105, "learning_rate": 0.00016927876012484458, "loss": 11.6586, "step": 36766 }, { "epoch": 0.7696349325965001, "grad_norm": 0.25348570942878723, "learning_rate": 0.00016927717898132706, "loss": 11.6593, "step": 36767 }, { "epoch": 0.7696558653604622, "grad_norm": 0.4104480743408203, "learning_rate": 0.00016927559780450662, "loss": 11.6715, "step": 36768 }, { "epoch": 0.7696767981244244, "grad_norm": 0.2903044819831848, "learning_rate": 0.00016927401659438398, "loss": 11.6735, "step": 36769 }, { "epoch": 0.7696977308883866, "grad_norm": 0.33533576130867004, "learning_rate": 0.00016927243535095997, "loss": 11.6827, "step": 36770 }, { "epoch": 0.7697186636523486, "grad_norm": 0.37876030802726746, "learning_rate": 0.00016927085407423525, "loss": 11.7041, "step": 36771 }, { "epoch": 0.7697395964163108, "grad_norm": 0.34747114777565, "learning_rate": 0.0001692692727642107, "loss": 11.6631, "step": 36772 }, { "epoch": 0.7697605291802729, "grad_norm": 0.25729116797447205, "learning_rate": 0.000169267691420887, "loss": 11.6567, "step": 36773 }, { "epoch": 0.7697814619442351, "grad_norm": 0.33099693059921265, "learning_rate": 0.00016926611004426494, "loss": 11.6815, "step": 36774 }, { "epoch": 0.7698023947081972, "grad_norm": 0.30959466099739075, "learning_rate": 0.00016926452863434528, "loss": 11.6863, "step": 36775 }, { "epoch": 0.7698233274721594, "grad_norm": 0.2925817370414734, "learning_rate": 0.00016926294719112875, "loss": 11.6645, "step": 36776 }, { "epoch": 0.7698442602361216, "grad_norm": 0.33509451150894165, "learning_rate": 0.00016926136571461612, "loss": 11.6802, "step": 36777 }, { "epoch": 0.7698651930000837, "grad_norm": 0.31242433190345764, "learning_rate": 0.00016925978420480821, "loss": 11.6673, "step": 36778 }, { "epoch": 0.7698861257640459, "grad_norm": 0.3527827858924866, "learning_rate": 0.00016925820266170568, "loss": 11.6612, "step": 36779 }, { "epoch": 0.769907058528008, "grad_norm": 0.3125337064266205, "learning_rate": 0.0001692566210853094, "loss": 11.6835, "step": 36780 }, { "epoch": 0.7699279912919702, "grad_norm": 0.24241776764392853, "learning_rate": 0.00016925503947562006, "loss": 11.6728, "step": 36781 }, { "epoch": 0.7699489240559323, "grad_norm": 0.25055593252182007, "learning_rate": 0.0001692534578326384, "loss": 11.6585, "step": 36782 }, { "epoch": 0.7699698568198945, "grad_norm": 0.33315521478652954, "learning_rate": 0.00016925187615636524, "loss": 11.6862, "step": 36783 }, { "epoch": 0.7699907895838567, "grad_norm": 0.2998979091644287, "learning_rate": 0.00016925029444680133, "loss": 11.6631, "step": 36784 }, { "epoch": 0.7700117223478188, "grad_norm": 0.29185810685157776, "learning_rate": 0.0001692487127039474, "loss": 11.6826, "step": 36785 }, { "epoch": 0.770032655111781, "grad_norm": 0.22258338332176208, "learning_rate": 0.00016924713092780422, "loss": 11.6599, "step": 36786 }, { "epoch": 0.7700535878757431, "grad_norm": 0.3504367470741272, "learning_rate": 0.00016924554911837257, "loss": 11.6778, "step": 36787 }, { "epoch": 0.7700745206397053, "grad_norm": 0.26917126774787903, "learning_rate": 0.0001692439672756532, "loss": 11.6553, "step": 36788 }, { "epoch": 0.7700954534036675, "grad_norm": 0.5871591567993164, "learning_rate": 0.00016924238539964685, "loss": 11.6957, "step": 36789 }, { "epoch": 0.7701163861676296, "grad_norm": 0.2968660295009613, "learning_rate": 0.00016924080349035433, "loss": 11.6684, "step": 36790 }, { "epoch": 0.7701373189315918, "grad_norm": 0.28542476892471313, "learning_rate": 0.0001692392215477763, "loss": 11.6808, "step": 36791 }, { "epoch": 0.7701582516955539, "grad_norm": 0.2851111888885498, "learning_rate": 0.00016923763957191367, "loss": 11.6691, "step": 36792 }, { "epoch": 0.770179184459516, "grad_norm": 0.3265693783760071, "learning_rate": 0.00016923605756276707, "loss": 11.6729, "step": 36793 }, { "epoch": 0.7702001172234781, "grad_norm": 0.3347422182559967, "learning_rate": 0.0001692344755203373, "loss": 11.6665, "step": 36794 }, { "epoch": 0.7702210499874403, "grad_norm": 0.3283422887325287, "learning_rate": 0.00016923289344462515, "loss": 11.6597, "step": 36795 }, { "epoch": 0.7702419827514025, "grad_norm": 0.26110848784446716, "learning_rate": 0.00016923131133563137, "loss": 11.6509, "step": 36796 }, { "epoch": 0.7702629155153646, "grad_norm": 0.32075756788253784, "learning_rate": 0.0001692297291933567, "loss": 11.6704, "step": 36797 }, { "epoch": 0.7702838482793268, "grad_norm": 0.28254273533821106, "learning_rate": 0.00016922814701780193, "loss": 11.6745, "step": 36798 }, { "epoch": 0.7703047810432889, "grad_norm": 0.38346484303474426, "learning_rate": 0.00016922656480896774, "loss": 11.6662, "step": 36799 }, { "epoch": 0.7703257138072511, "grad_norm": 0.3059454560279846, "learning_rate": 0.00016922498256685505, "loss": 11.6757, "step": 36800 }, { "epoch": 0.7703466465712132, "grad_norm": 0.28662559390068054, "learning_rate": 0.00016922340029146447, "loss": 11.6627, "step": 36801 }, { "epoch": 0.7703675793351754, "grad_norm": 0.30559462308883667, "learning_rate": 0.0001692218179827968, "loss": 11.6532, "step": 36802 }, { "epoch": 0.7703885120991376, "grad_norm": 0.28164827823638916, "learning_rate": 0.00016922023564085284, "loss": 11.6715, "step": 36803 }, { "epoch": 0.7704094448630997, "grad_norm": 0.28287455439567566, "learning_rate": 0.0001692186532656333, "loss": 11.6741, "step": 36804 }, { "epoch": 0.7704303776270619, "grad_norm": 0.29254043102264404, "learning_rate": 0.000169217070857139, "loss": 11.679, "step": 36805 }, { "epoch": 0.770451310391024, "grad_norm": 0.26988905668258667, "learning_rate": 0.00016921548841537063, "loss": 11.6705, "step": 36806 }, { "epoch": 0.7704722431549862, "grad_norm": 0.26023223996162415, "learning_rate": 0.00016921390594032903, "loss": 11.6579, "step": 36807 }, { "epoch": 0.7704931759189484, "grad_norm": 0.3068180978298187, "learning_rate": 0.00016921232343201489, "loss": 11.6672, "step": 36808 }, { "epoch": 0.7705141086829105, "grad_norm": 0.27312391996383667, "learning_rate": 0.00016921074089042902, "loss": 11.6843, "step": 36809 }, { "epoch": 0.7705350414468727, "grad_norm": 0.3126775622367859, "learning_rate": 0.0001692091583155721, "loss": 11.6829, "step": 36810 }, { "epoch": 0.7705559742108348, "grad_norm": 0.27563437819480896, "learning_rate": 0.00016920757570744504, "loss": 11.6736, "step": 36811 }, { "epoch": 0.770576906974797, "grad_norm": 0.46812719106674194, "learning_rate": 0.00016920599306604846, "loss": 11.6736, "step": 36812 }, { "epoch": 0.7705978397387591, "grad_norm": 0.3643437623977661, "learning_rate": 0.00016920441039138318, "loss": 11.6665, "step": 36813 }, { "epoch": 0.7706187725027213, "grad_norm": 0.40165430307388306, "learning_rate": 0.00016920282768344996, "loss": 11.6748, "step": 36814 }, { "epoch": 0.7706397052666835, "grad_norm": 0.28757813572883606, "learning_rate": 0.00016920124494224954, "loss": 11.6708, "step": 36815 }, { "epoch": 0.7706606380306456, "grad_norm": 0.2737647593021393, "learning_rate": 0.0001691996621677827, "loss": 11.6608, "step": 36816 }, { "epoch": 0.7706815707946078, "grad_norm": 0.3082471191883087, "learning_rate": 0.00016919807936005018, "loss": 11.6638, "step": 36817 }, { "epoch": 0.7707025035585698, "grad_norm": 0.28507280349731445, "learning_rate": 0.00016919649651905277, "loss": 11.6911, "step": 36818 }, { "epoch": 0.770723436322532, "grad_norm": 0.305696576833725, "learning_rate": 0.00016919491364479123, "loss": 11.6853, "step": 36819 }, { "epoch": 0.7707443690864941, "grad_norm": 0.24592043459415436, "learning_rate": 0.00016919333073726631, "loss": 11.6667, "step": 36820 }, { "epoch": 0.7707653018504563, "grad_norm": 0.32194995880126953, "learning_rate": 0.00016919174779647875, "loss": 11.6655, "step": 36821 }, { "epoch": 0.7707862346144185, "grad_norm": 0.27700087428092957, "learning_rate": 0.00016919016482242936, "loss": 11.6482, "step": 36822 }, { "epoch": 0.7708071673783806, "grad_norm": 0.24771739542484283, "learning_rate": 0.00016918858181511884, "loss": 11.669, "step": 36823 }, { "epoch": 0.7708281001423428, "grad_norm": 0.3836807310581207, "learning_rate": 0.00016918699877454797, "loss": 11.6652, "step": 36824 }, { "epoch": 0.7708490329063049, "grad_norm": 0.33445948362350464, "learning_rate": 0.00016918541570071756, "loss": 11.6682, "step": 36825 }, { "epoch": 0.7708699656702671, "grad_norm": 0.3313753604888916, "learning_rate": 0.00016918383259362832, "loss": 11.6789, "step": 36826 }, { "epoch": 0.7708908984342293, "grad_norm": 0.34538277983665466, "learning_rate": 0.00016918224945328104, "loss": 11.671, "step": 36827 }, { "epoch": 0.7709118311981914, "grad_norm": 0.322448194026947, "learning_rate": 0.00016918066627967643, "loss": 11.673, "step": 36828 }, { "epoch": 0.7709327639621536, "grad_norm": 0.31501710414886475, "learning_rate": 0.00016917908307281532, "loss": 11.6894, "step": 36829 }, { "epoch": 0.7709536967261157, "grad_norm": 0.30829063057899475, "learning_rate": 0.00016917749983269843, "loss": 11.6807, "step": 36830 }, { "epoch": 0.7709746294900779, "grad_norm": 0.37571951746940613, "learning_rate": 0.00016917591655932652, "loss": 11.6753, "step": 36831 }, { "epoch": 0.77099556225404, "grad_norm": 0.2600105404853821, "learning_rate": 0.00016917433325270035, "loss": 11.6759, "step": 36832 }, { "epoch": 0.7710164950180022, "grad_norm": 0.30243733525276184, "learning_rate": 0.00016917274991282074, "loss": 11.6722, "step": 36833 }, { "epoch": 0.7710374277819644, "grad_norm": 0.36606737971305847, "learning_rate": 0.00016917116653968837, "loss": 11.6659, "step": 36834 }, { "epoch": 0.7710583605459265, "grad_norm": 0.3285045921802521, "learning_rate": 0.00016916958313330403, "loss": 11.6684, "step": 36835 }, { "epoch": 0.7710792933098887, "grad_norm": 0.35767966508865356, "learning_rate": 0.0001691679996936685, "loss": 11.6527, "step": 36836 }, { "epoch": 0.7711002260738508, "grad_norm": 0.2880367040634155, "learning_rate": 0.0001691664162207825, "loss": 11.6675, "step": 36837 }, { "epoch": 0.771121158837813, "grad_norm": 0.2980346977710724, "learning_rate": 0.00016916483271464685, "loss": 11.6844, "step": 36838 }, { "epoch": 0.771142091601775, "grad_norm": 0.2869119346141815, "learning_rate": 0.00016916324917526224, "loss": 11.6947, "step": 36839 }, { "epoch": 0.7711630243657372, "grad_norm": 0.25221768021583557, "learning_rate": 0.0001691616656026295, "loss": 11.6693, "step": 36840 }, { "epoch": 0.7711839571296994, "grad_norm": 0.37333250045776367, "learning_rate": 0.00016916008199674936, "loss": 11.6852, "step": 36841 }, { "epoch": 0.7712048898936615, "grad_norm": 0.3386063277721405, "learning_rate": 0.00016915849835762257, "loss": 11.6601, "step": 36842 }, { "epoch": 0.7712258226576237, "grad_norm": 0.30402442812919617, "learning_rate": 0.0001691569146852499, "loss": 11.6628, "step": 36843 }, { "epoch": 0.7712467554215858, "grad_norm": 0.33422061800956726, "learning_rate": 0.00016915533097963215, "loss": 11.6605, "step": 36844 }, { "epoch": 0.771267688185548, "grad_norm": 0.31388336420059204, "learning_rate": 0.00016915374724077, "loss": 11.6669, "step": 36845 }, { "epoch": 0.7712886209495102, "grad_norm": 0.26164501905441284, "learning_rate": 0.00016915216346866433, "loss": 11.6652, "step": 36846 }, { "epoch": 0.7713095537134723, "grad_norm": 0.30717700719833374, "learning_rate": 0.00016915057966331577, "loss": 11.6786, "step": 36847 }, { "epoch": 0.7713304864774345, "grad_norm": 0.2519274353981018, "learning_rate": 0.00016914899582472513, "loss": 11.6766, "step": 36848 }, { "epoch": 0.7713514192413966, "grad_norm": 0.26397082209587097, "learning_rate": 0.0001691474119528932, "loss": 11.673, "step": 36849 }, { "epoch": 0.7713723520053588, "grad_norm": 0.28926363587379456, "learning_rate": 0.00016914582804782074, "loss": 11.6627, "step": 36850 }, { "epoch": 0.7713932847693209, "grad_norm": 0.27974599599838257, "learning_rate": 0.0001691442441095085, "loss": 11.6759, "step": 36851 }, { "epoch": 0.7714142175332831, "grad_norm": 0.3182912766933441, "learning_rate": 0.00016914266013795718, "loss": 11.6859, "step": 36852 }, { "epoch": 0.7714351502972453, "grad_norm": 0.35321855545043945, "learning_rate": 0.00016914107613316765, "loss": 11.6842, "step": 36853 }, { "epoch": 0.7714560830612074, "grad_norm": 0.29266807436943054, "learning_rate": 0.0001691394920951406, "loss": 11.6722, "step": 36854 }, { "epoch": 0.7714770158251696, "grad_norm": 0.3066652715206146, "learning_rate": 0.00016913790802387682, "loss": 11.6879, "step": 36855 }, { "epoch": 0.7714979485891317, "grad_norm": 0.24439822137355804, "learning_rate": 0.00016913632391937707, "loss": 11.6674, "step": 36856 }, { "epoch": 0.7715188813530939, "grad_norm": 0.26915812492370605, "learning_rate": 0.00016913473978164207, "loss": 11.6573, "step": 36857 }, { "epoch": 0.771539814117056, "grad_norm": 0.3035781979560852, "learning_rate": 0.00016913315561067264, "loss": 11.6599, "step": 36858 }, { "epoch": 0.7715607468810182, "grad_norm": 0.3130723536014557, "learning_rate": 0.0001691315714064695, "loss": 11.6647, "step": 36859 }, { "epoch": 0.7715816796449804, "grad_norm": 0.3098725974559784, "learning_rate": 0.00016912998716903343, "loss": 11.6758, "step": 36860 }, { "epoch": 0.7716026124089425, "grad_norm": 0.24283522367477417, "learning_rate": 0.0001691284028983652, "loss": 11.6589, "step": 36861 }, { "epoch": 0.7716235451729047, "grad_norm": 0.3489251136779785, "learning_rate": 0.00016912681859446556, "loss": 11.652, "step": 36862 }, { "epoch": 0.7716444779368667, "grad_norm": 0.32598355412483215, "learning_rate": 0.0001691252342573353, "loss": 11.6673, "step": 36863 }, { "epoch": 0.771665410700829, "grad_norm": 0.30281487107276917, "learning_rate": 0.0001691236498869751, "loss": 11.6751, "step": 36864 }, { "epoch": 0.771686343464791, "grad_norm": 0.25461456179618835, "learning_rate": 0.00016912206548338582, "loss": 11.6764, "step": 36865 }, { "epoch": 0.7717072762287532, "grad_norm": 0.28041863441467285, "learning_rate": 0.00016912048104656817, "loss": 11.683, "step": 36866 }, { "epoch": 0.7717282089927154, "grad_norm": 0.35299956798553467, "learning_rate": 0.0001691188965765229, "loss": 11.6696, "step": 36867 }, { "epoch": 0.7717491417566775, "grad_norm": 0.31877073645591736, "learning_rate": 0.0001691173120732508, "loss": 11.664, "step": 36868 }, { "epoch": 0.7717700745206397, "grad_norm": 0.3332737982273102, "learning_rate": 0.0001691157275367526, "loss": 11.6649, "step": 36869 }, { "epoch": 0.7717910072846018, "grad_norm": 0.28237149119377136, "learning_rate": 0.00016911414296702913, "loss": 11.668, "step": 36870 }, { "epoch": 0.771811940048564, "grad_norm": 0.3098170757293701, "learning_rate": 0.00016911255836408112, "loss": 11.6686, "step": 36871 }, { "epoch": 0.7718328728125262, "grad_norm": 0.33201685547828674, "learning_rate": 0.00016911097372790924, "loss": 11.6693, "step": 36872 }, { "epoch": 0.7718538055764883, "grad_norm": 0.26656395196914673, "learning_rate": 0.0001691093890585144, "loss": 11.6683, "step": 36873 }, { "epoch": 0.7718747383404505, "grad_norm": 0.26946908235549927, "learning_rate": 0.00016910780435589726, "loss": 11.6681, "step": 36874 }, { "epoch": 0.7718956711044126, "grad_norm": 0.2566118538379669, "learning_rate": 0.00016910621962005863, "loss": 11.6638, "step": 36875 }, { "epoch": 0.7719166038683748, "grad_norm": 0.333988755941391, "learning_rate": 0.00016910463485099924, "loss": 11.6615, "step": 36876 }, { "epoch": 0.7719375366323369, "grad_norm": 0.3060370087623596, "learning_rate": 0.0001691030500487199, "loss": 11.6711, "step": 36877 }, { "epoch": 0.7719584693962991, "grad_norm": 0.25046634674072266, "learning_rate": 0.0001691014652132213, "loss": 11.6617, "step": 36878 }, { "epoch": 0.7719794021602613, "grad_norm": 0.28018835186958313, "learning_rate": 0.00016909988034450425, "loss": 11.6631, "step": 36879 }, { "epoch": 0.7720003349242234, "grad_norm": 0.26367759704589844, "learning_rate": 0.0001690982954425695, "loss": 11.676, "step": 36880 }, { "epoch": 0.7720212676881856, "grad_norm": 0.27553603053092957, "learning_rate": 0.00016909671050741783, "loss": 11.6679, "step": 36881 }, { "epoch": 0.7720422004521477, "grad_norm": 0.33267661929130554, "learning_rate": 0.00016909512553904996, "loss": 11.6804, "step": 36882 }, { "epoch": 0.7720631332161099, "grad_norm": 0.339081734418869, "learning_rate": 0.00016909354053746672, "loss": 11.6831, "step": 36883 }, { "epoch": 0.772084065980072, "grad_norm": 0.26068753004074097, "learning_rate": 0.00016909195550266878, "loss": 11.6666, "step": 36884 }, { "epoch": 0.7721049987440342, "grad_norm": 0.35067424178123474, "learning_rate": 0.000169090370434657, "loss": 11.6759, "step": 36885 }, { "epoch": 0.7721259315079964, "grad_norm": 0.340846985578537, "learning_rate": 0.00016908878533343207, "loss": 11.6587, "step": 36886 }, { "epoch": 0.7721468642719584, "grad_norm": 0.274446964263916, "learning_rate": 0.00016908720019899475, "loss": 11.6628, "step": 36887 }, { "epoch": 0.7721677970359206, "grad_norm": 0.2483183741569519, "learning_rate": 0.0001690856150313459, "loss": 11.6566, "step": 36888 }, { "epoch": 0.7721887297998827, "grad_norm": 0.3724949061870575, "learning_rate": 0.00016908402983048615, "loss": 11.667, "step": 36889 }, { "epoch": 0.7722096625638449, "grad_norm": 0.26921212673187256, "learning_rate": 0.00016908244459641636, "loss": 11.6665, "step": 36890 }, { "epoch": 0.7722305953278071, "grad_norm": 0.27334684133529663, "learning_rate": 0.00016908085932913723, "loss": 11.6657, "step": 36891 }, { "epoch": 0.7722515280917692, "grad_norm": 0.30193012952804565, "learning_rate": 0.00016907927402864953, "loss": 11.6608, "step": 36892 }, { "epoch": 0.7722724608557314, "grad_norm": 0.3033040761947632, "learning_rate": 0.00016907768869495407, "loss": 11.6676, "step": 36893 }, { "epoch": 0.7722933936196935, "grad_norm": 0.2705994248390198, "learning_rate": 0.00016907610332805154, "loss": 11.6599, "step": 36894 }, { "epoch": 0.7723143263836557, "grad_norm": 0.3295598030090332, "learning_rate": 0.0001690745179279428, "loss": 11.6727, "step": 36895 }, { "epoch": 0.7723352591476178, "grad_norm": 0.3648149371147156, "learning_rate": 0.00016907293249462853, "loss": 11.6715, "step": 36896 }, { "epoch": 0.77235619191158, "grad_norm": 0.2843099534511566, "learning_rate": 0.0001690713470281095, "loss": 11.6654, "step": 36897 }, { "epoch": 0.7723771246755422, "grad_norm": 0.32434728741645813, "learning_rate": 0.0001690697615283865, "loss": 11.68, "step": 36898 }, { "epoch": 0.7723980574395043, "grad_norm": 0.2954353988170624, "learning_rate": 0.00016906817599546029, "loss": 11.6773, "step": 36899 }, { "epoch": 0.7724189902034665, "grad_norm": 0.28955167531967163, "learning_rate": 0.00016906659042933164, "loss": 11.6637, "step": 36900 }, { "epoch": 0.7724399229674286, "grad_norm": 0.3274228572845459, "learning_rate": 0.00016906500483000126, "loss": 11.6772, "step": 36901 }, { "epoch": 0.7724608557313908, "grad_norm": 0.35789886116981506, "learning_rate": 0.00016906341919747, "loss": 11.6641, "step": 36902 }, { "epoch": 0.7724817884953529, "grad_norm": 0.3473539352416992, "learning_rate": 0.00016906183353173853, "loss": 11.688, "step": 36903 }, { "epoch": 0.7725027212593151, "grad_norm": 0.32438671588897705, "learning_rate": 0.00016906024783280762, "loss": 11.6642, "step": 36904 }, { "epoch": 0.7725236540232773, "grad_norm": 0.3683798611164093, "learning_rate": 0.00016905866210067814, "loss": 11.6822, "step": 36905 }, { "epoch": 0.7725445867872394, "grad_norm": 0.2600732147693634, "learning_rate": 0.0001690570763353507, "loss": 11.6723, "step": 36906 }, { "epoch": 0.7725655195512016, "grad_norm": 0.359443336725235, "learning_rate": 0.00016905549053682618, "loss": 11.6751, "step": 36907 }, { "epoch": 0.7725864523151637, "grad_norm": 0.27647119760513306, "learning_rate": 0.00016905390470510533, "loss": 11.6552, "step": 36908 }, { "epoch": 0.7726073850791259, "grad_norm": 0.2839508056640625, "learning_rate": 0.0001690523188401888, "loss": 11.6701, "step": 36909 }, { "epoch": 0.772628317843088, "grad_norm": 0.42011719942092896, "learning_rate": 0.00016905073294207754, "loss": 11.6742, "step": 36910 }, { "epoch": 0.7726492506070501, "grad_norm": 0.36286434531211853, "learning_rate": 0.00016904914701077215, "loss": 11.6699, "step": 36911 }, { "epoch": 0.7726701833710123, "grad_norm": 0.26661786437034607, "learning_rate": 0.00016904756104627345, "loss": 11.6824, "step": 36912 }, { "epoch": 0.7726911161349744, "grad_norm": 0.33702704310417175, "learning_rate": 0.0001690459750485822, "loss": 11.6675, "step": 36913 }, { "epoch": 0.7727120488989366, "grad_norm": 0.3478063941001892, "learning_rate": 0.0001690443890176992, "loss": 11.6744, "step": 36914 }, { "epoch": 0.7727329816628987, "grad_norm": 0.313664048910141, "learning_rate": 0.00016904280295362516, "loss": 11.6752, "step": 36915 }, { "epoch": 0.7727539144268609, "grad_norm": 0.2805934250354767, "learning_rate": 0.00016904121685636087, "loss": 11.6667, "step": 36916 }, { "epoch": 0.7727748471908231, "grad_norm": 0.32147273421287537, "learning_rate": 0.00016903963072590708, "loss": 11.6659, "step": 36917 }, { "epoch": 0.7727957799547852, "grad_norm": 0.28698253631591797, "learning_rate": 0.00016903804456226454, "loss": 11.6754, "step": 36918 }, { "epoch": 0.7728167127187474, "grad_norm": 0.3085034191608429, "learning_rate": 0.00016903645836543403, "loss": 11.654, "step": 36919 }, { "epoch": 0.7728376454827095, "grad_norm": 0.30423519015312195, "learning_rate": 0.00016903487213541633, "loss": 11.6762, "step": 36920 }, { "epoch": 0.7728585782466717, "grad_norm": 0.2574470043182373, "learning_rate": 0.00016903328587221215, "loss": 11.6737, "step": 36921 }, { "epoch": 0.7728795110106338, "grad_norm": 0.25999125838279724, "learning_rate": 0.0001690316995758223, "loss": 11.6593, "step": 36922 }, { "epoch": 0.772900443774596, "grad_norm": 0.30800774693489075, "learning_rate": 0.00016903011324624756, "loss": 11.6666, "step": 36923 }, { "epoch": 0.7729213765385582, "grad_norm": 0.30603161454200745, "learning_rate": 0.00016902852688348866, "loss": 11.6804, "step": 36924 }, { "epoch": 0.7729423093025203, "grad_norm": 0.3223744034767151, "learning_rate": 0.00016902694048754634, "loss": 11.6535, "step": 36925 }, { "epoch": 0.7729632420664825, "grad_norm": 0.32398515939712524, "learning_rate": 0.00016902535405842136, "loss": 11.6694, "step": 36926 }, { "epoch": 0.7729841748304446, "grad_norm": 0.260877400636673, "learning_rate": 0.00016902376759611457, "loss": 11.6628, "step": 36927 }, { "epoch": 0.7730051075944068, "grad_norm": 0.3409534692764282, "learning_rate": 0.0001690221811006266, "loss": 11.6746, "step": 36928 }, { "epoch": 0.773026040358369, "grad_norm": 0.2936646342277527, "learning_rate": 0.00016902059457195832, "loss": 11.6755, "step": 36929 }, { "epoch": 0.7730469731223311, "grad_norm": 0.32967066764831543, "learning_rate": 0.00016901900801011047, "loss": 11.6583, "step": 36930 }, { "epoch": 0.7730679058862933, "grad_norm": 0.43724995851516724, "learning_rate": 0.00016901742141508378, "loss": 11.6777, "step": 36931 }, { "epoch": 0.7730888386502553, "grad_norm": 0.2706523537635803, "learning_rate": 0.00016901583478687906, "loss": 11.659, "step": 36932 }, { "epoch": 0.7731097714142176, "grad_norm": 0.4210945665836334, "learning_rate": 0.00016901424812549706, "loss": 11.6921, "step": 36933 }, { "epoch": 0.7731307041781796, "grad_norm": 0.29041481018066406, "learning_rate": 0.00016901266143093846, "loss": 11.6565, "step": 36934 }, { "epoch": 0.7731516369421418, "grad_norm": 0.3277434706687927, "learning_rate": 0.00016901107470320415, "loss": 11.692, "step": 36935 }, { "epoch": 0.773172569706104, "grad_norm": 0.33501216769218445, "learning_rate": 0.0001690094879422948, "loss": 11.6745, "step": 36936 }, { "epoch": 0.7731935024700661, "grad_norm": 0.22132602334022522, "learning_rate": 0.00016900790114821122, "loss": 11.6574, "step": 36937 }, { "epoch": 0.7732144352340283, "grad_norm": 0.31811320781707764, "learning_rate": 0.00016900631432095414, "loss": 11.666, "step": 36938 }, { "epoch": 0.7732353679979904, "grad_norm": 0.3103875517845154, "learning_rate": 0.00016900472746052435, "loss": 11.6578, "step": 36939 }, { "epoch": 0.7732563007619526, "grad_norm": 0.27492353320121765, "learning_rate": 0.0001690031405669226, "loss": 11.6621, "step": 36940 }, { "epoch": 0.7732772335259147, "grad_norm": 0.2823631763458252, "learning_rate": 0.00016900155364014968, "loss": 11.6875, "step": 36941 }, { "epoch": 0.7732981662898769, "grad_norm": 0.26764121651649475, "learning_rate": 0.0001689999666802063, "loss": 11.662, "step": 36942 }, { "epoch": 0.7733190990538391, "grad_norm": 0.321556955575943, "learning_rate": 0.0001689983796870933, "loss": 11.6627, "step": 36943 }, { "epoch": 0.7733400318178012, "grad_norm": 0.27961307764053345, "learning_rate": 0.00016899679266081137, "loss": 11.6511, "step": 36944 }, { "epoch": 0.7733609645817634, "grad_norm": 0.2564241290092468, "learning_rate": 0.00016899520560136128, "loss": 11.6687, "step": 36945 }, { "epoch": 0.7733818973457255, "grad_norm": 0.3201131224632263, "learning_rate": 0.00016899361850874385, "loss": 11.6725, "step": 36946 }, { "epoch": 0.7734028301096877, "grad_norm": 0.2735704481601715, "learning_rate": 0.00016899203138295979, "loss": 11.6578, "step": 36947 }, { "epoch": 0.7734237628736499, "grad_norm": 0.3141309916973114, "learning_rate": 0.00016899044422400986, "loss": 11.6578, "step": 36948 }, { "epoch": 0.773444695637612, "grad_norm": 0.34198668599128723, "learning_rate": 0.00016898885703189485, "loss": 11.6716, "step": 36949 }, { "epoch": 0.7734656284015742, "grad_norm": 0.2886371314525604, "learning_rate": 0.00016898726980661553, "loss": 11.6688, "step": 36950 }, { "epoch": 0.7734865611655363, "grad_norm": 0.29561635851860046, "learning_rate": 0.00016898568254817265, "loss": 11.6776, "step": 36951 }, { "epoch": 0.7735074939294985, "grad_norm": 0.34228605031967163, "learning_rate": 0.00016898409525656694, "loss": 11.6705, "step": 36952 }, { "epoch": 0.7735284266934606, "grad_norm": 0.5224062204360962, "learning_rate": 0.0001689825079317992, "loss": 11.6862, "step": 36953 }, { "epoch": 0.7735493594574228, "grad_norm": 0.34397298097610474, "learning_rate": 0.0001689809205738702, "loss": 11.6551, "step": 36954 }, { "epoch": 0.773570292221385, "grad_norm": 0.27295416593551636, "learning_rate": 0.00016897933318278066, "loss": 11.667, "step": 36955 }, { "epoch": 0.773591224985347, "grad_norm": 0.237738698720932, "learning_rate": 0.0001689777457585314, "loss": 11.664, "step": 36956 }, { "epoch": 0.7736121577493092, "grad_norm": 0.2880452275276184, "learning_rate": 0.00016897615830112318, "loss": 11.6718, "step": 36957 }, { "epoch": 0.7736330905132713, "grad_norm": 0.3532673418521881, "learning_rate": 0.00016897457081055668, "loss": 11.6846, "step": 36958 }, { "epoch": 0.7736540232772335, "grad_norm": 0.3460839092731476, "learning_rate": 0.00016897298328683274, "loss": 11.6744, "step": 36959 }, { "epoch": 0.7736749560411956, "grad_norm": 0.30177363753318787, "learning_rate": 0.00016897139572995214, "loss": 11.6808, "step": 36960 }, { "epoch": 0.7736958888051578, "grad_norm": 0.34555575251579285, "learning_rate": 0.0001689698081399156, "loss": 11.6762, "step": 36961 }, { "epoch": 0.77371682156912, "grad_norm": 0.46948954463005066, "learning_rate": 0.00016896822051672383, "loss": 11.6806, "step": 36962 }, { "epoch": 0.7737377543330821, "grad_norm": 0.32685717940330505, "learning_rate": 0.0001689666328603777, "loss": 11.697, "step": 36963 }, { "epoch": 0.7737586870970443, "grad_norm": 0.27663952112197876, "learning_rate": 0.00016896504517087792, "loss": 11.655, "step": 36964 }, { "epoch": 0.7737796198610064, "grad_norm": 0.27927064895629883, "learning_rate": 0.00016896345744822527, "loss": 11.674, "step": 36965 }, { "epoch": 0.7738005526249686, "grad_norm": 0.3304773271083832, "learning_rate": 0.0001689618696924205, "loss": 11.671, "step": 36966 }, { "epoch": 0.7738214853889308, "grad_norm": 0.26813337206840515, "learning_rate": 0.00016896028190346437, "loss": 11.6516, "step": 36967 }, { "epoch": 0.7738424181528929, "grad_norm": 0.26607728004455566, "learning_rate": 0.00016895869408135766, "loss": 11.6618, "step": 36968 }, { "epoch": 0.7738633509168551, "grad_norm": 0.2976207435131073, "learning_rate": 0.00016895710622610115, "loss": 11.6617, "step": 36969 }, { "epoch": 0.7738842836808172, "grad_norm": 0.3098345398902893, "learning_rate": 0.00016895551833769553, "loss": 11.6761, "step": 36970 }, { "epoch": 0.7739052164447794, "grad_norm": 0.24948212504386902, "learning_rate": 0.00016895393041614165, "loss": 11.643, "step": 36971 }, { "epoch": 0.7739261492087415, "grad_norm": 0.2419021725654602, "learning_rate": 0.0001689523424614402, "loss": 11.6567, "step": 36972 }, { "epoch": 0.7739470819727037, "grad_norm": 0.29685068130493164, "learning_rate": 0.000168950754473592, "loss": 11.6687, "step": 36973 }, { "epoch": 0.7739680147366659, "grad_norm": 0.49703794717788696, "learning_rate": 0.00016894916645259777, "loss": 11.6799, "step": 36974 }, { "epoch": 0.773988947500628, "grad_norm": 0.33345213532447815, "learning_rate": 0.0001689475783984583, "loss": 11.6727, "step": 36975 }, { "epoch": 0.7740098802645902, "grad_norm": 0.3009989559650421, "learning_rate": 0.00016894599031117436, "loss": 11.6574, "step": 36976 }, { "epoch": 0.7740308130285523, "grad_norm": 0.284542977809906, "learning_rate": 0.00016894440219074666, "loss": 11.6595, "step": 36977 }, { "epoch": 0.7740517457925145, "grad_norm": 0.37503013014793396, "learning_rate": 0.00016894281403717604, "loss": 11.672, "step": 36978 }, { "epoch": 0.7740726785564765, "grad_norm": 0.24028493463993073, "learning_rate": 0.00016894122585046323, "loss": 11.6741, "step": 36979 }, { "epoch": 0.7740936113204387, "grad_norm": 0.27829089760780334, "learning_rate": 0.000168939637630609, "loss": 11.6667, "step": 36980 }, { "epoch": 0.774114544084401, "grad_norm": 0.3151693344116211, "learning_rate": 0.00016893804937761407, "loss": 11.6816, "step": 36981 }, { "epoch": 0.774135476848363, "grad_norm": 0.270126610994339, "learning_rate": 0.00016893646109147924, "loss": 11.6592, "step": 36982 }, { "epoch": 0.7741564096123252, "grad_norm": 0.3502766191959381, "learning_rate": 0.00016893487277220532, "loss": 11.6634, "step": 36983 }, { "epoch": 0.7741773423762873, "grad_norm": 0.25241392850875854, "learning_rate": 0.000168933284419793, "loss": 11.6727, "step": 36984 }, { "epoch": 0.7741982751402495, "grad_norm": 0.2801942229270935, "learning_rate": 0.00016893169603424306, "loss": 11.6517, "step": 36985 }, { "epoch": 0.7742192079042117, "grad_norm": 0.27929386496543884, "learning_rate": 0.00016893010761555627, "loss": 11.6663, "step": 36986 }, { "epoch": 0.7742401406681738, "grad_norm": 0.3792955279350281, "learning_rate": 0.0001689285191637334, "loss": 11.6884, "step": 36987 }, { "epoch": 0.774261073432136, "grad_norm": 0.2779597043991089, "learning_rate": 0.00016892693067877522, "loss": 11.6771, "step": 36988 }, { "epoch": 0.7742820061960981, "grad_norm": 0.3045414090156555, "learning_rate": 0.00016892534216068246, "loss": 11.6718, "step": 36989 }, { "epoch": 0.7743029389600603, "grad_norm": 0.3181528151035309, "learning_rate": 0.00016892375360945595, "loss": 11.6811, "step": 36990 }, { "epoch": 0.7743238717240224, "grad_norm": 0.3765808641910553, "learning_rate": 0.00016892216502509638, "loss": 11.6663, "step": 36991 }, { "epoch": 0.7743448044879846, "grad_norm": 0.33253583312034607, "learning_rate": 0.0001689205764076045, "loss": 11.6681, "step": 36992 }, { "epoch": 0.7743657372519468, "grad_norm": 0.29625651240348816, "learning_rate": 0.00016891898775698119, "loss": 11.6592, "step": 36993 }, { "epoch": 0.7743866700159089, "grad_norm": 0.2585362195968628, "learning_rate": 0.0001689173990732271, "loss": 11.6665, "step": 36994 }, { "epoch": 0.7744076027798711, "grad_norm": 0.28338566422462463, "learning_rate": 0.00016891581035634307, "loss": 11.6632, "step": 36995 }, { "epoch": 0.7744285355438332, "grad_norm": 0.22339850664138794, "learning_rate": 0.0001689142216063298, "loss": 11.6666, "step": 36996 }, { "epoch": 0.7744494683077954, "grad_norm": 0.2986636757850647, "learning_rate": 0.00016891263282318807, "loss": 11.6665, "step": 36997 }, { "epoch": 0.7744704010717575, "grad_norm": 0.2769662141799927, "learning_rate": 0.00016891104400691868, "loss": 11.6783, "step": 36998 }, { "epoch": 0.7744913338357197, "grad_norm": 0.289447546005249, "learning_rate": 0.00016890945515752237, "loss": 11.6662, "step": 36999 }, { "epoch": 0.7745122665996819, "grad_norm": 0.36813488602638245, "learning_rate": 0.00016890786627499994, "loss": 11.6815, "step": 37000 }, { "epoch": 0.7745122665996819, "eval_loss": 11.669808387756348, "eval_runtime": 34.3874, "eval_samples_per_second": 27.946, "eval_steps_per_second": 7.008, "step": 37000 }, { "epoch": 0.774533199363644, "grad_norm": 0.2714853584766388, "learning_rate": 0.00016890627735935204, "loss": 11.6465, "step": 37001 }, { "epoch": 0.7745541321276062, "grad_norm": 0.2879411578178406, "learning_rate": 0.00016890468841057957, "loss": 11.6724, "step": 37002 }, { "epoch": 0.7745750648915682, "grad_norm": 0.42914116382598877, "learning_rate": 0.0001689030994286832, "loss": 11.6665, "step": 37003 }, { "epoch": 0.7745959976555304, "grad_norm": 0.286065012216568, "learning_rate": 0.00016890151041366376, "loss": 11.674, "step": 37004 }, { "epoch": 0.7746169304194926, "grad_norm": 0.27701354026794434, "learning_rate": 0.00016889992136552196, "loss": 11.6666, "step": 37005 }, { "epoch": 0.7746378631834547, "grad_norm": 0.3130984902381897, "learning_rate": 0.0001688983322842586, "loss": 11.6794, "step": 37006 }, { "epoch": 0.7746587959474169, "grad_norm": 0.36686941981315613, "learning_rate": 0.00016889674316987444, "loss": 11.674, "step": 37007 }, { "epoch": 0.774679728711379, "grad_norm": 0.320056676864624, "learning_rate": 0.0001688951540223702, "loss": 11.6791, "step": 37008 }, { "epoch": 0.7747006614753412, "grad_norm": 0.33047738671302795, "learning_rate": 0.0001688935648417467, "loss": 11.6807, "step": 37009 }, { "epoch": 0.7747215942393033, "grad_norm": 0.27901166677474976, "learning_rate": 0.0001688919756280047, "loss": 11.6654, "step": 37010 }, { "epoch": 0.7747425270032655, "grad_norm": 0.34175339341163635, "learning_rate": 0.00016889038638114492, "loss": 11.6733, "step": 37011 }, { "epoch": 0.7747634597672277, "grad_norm": 0.3225160241127014, "learning_rate": 0.00016888879710116815, "loss": 11.6683, "step": 37012 }, { "epoch": 0.7747843925311898, "grad_norm": 0.35035258531570435, "learning_rate": 0.00016888720778807516, "loss": 11.6755, "step": 37013 }, { "epoch": 0.774805325295152, "grad_norm": 0.2713562846183777, "learning_rate": 0.00016888561844186672, "loss": 11.6661, "step": 37014 }, { "epoch": 0.7748262580591141, "grad_norm": 0.27578070759773254, "learning_rate": 0.00016888402906254358, "loss": 11.6733, "step": 37015 }, { "epoch": 0.7748471908230763, "grad_norm": 0.270504355430603, "learning_rate": 0.0001688824396501065, "loss": 11.6581, "step": 37016 }, { "epoch": 0.7748681235870384, "grad_norm": 0.27387934923171997, "learning_rate": 0.00016888085020455628, "loss": 11.6642, "step": 37017 }, { "epoch": 0.7748890563510006, "grad_norm": 0.3484691083431244, "learning_rate": 0.0001688792607258936, "loss": 11.6763, "step": 37018 }, { "epoch": 0.7749099891149628, "grad_norm": 0.2700980305671692, "learning_rate": 0.0001688776712141193, "loss": 11.6736, "step": 37019 }, { "epoch": 0.7749309218789249, "grad_norm": 0.26376378536224365, "learning_rate": 0.00016887608166923416, "loss": 11.6665, "step": 37020 }, { "epoch": 0.7749518546428871, "grad_norm": 0.25970056653022766, "learning_rate": 0.00016887449209123886, "loss": 11.6579, "step": 37021 }, { "epoch": 0.7749727874068492, "grad_norm": 0.28030267357826233, "learning_rate": 0.00016887290248013422, "loss": 11.6656, "step": 37022 }, { "epoch": 0.7749937201708114, "grad_norm": 0.265542209148407, "learning_rate": 0.00016887131283592103, "loss": 11.6744, "step": 37023 }, { "epoch": 0.7750146529347736, "grad_norm": 0.33647188544273376, "learning_rate": 0.00016886972315859996, "loss": 11.679, "step": 37024 }, { "epoch": 0.7750355856987357, "grad_norm": 0.31215983629226685, "learning_rate": 0.0001688681334481719, "loss": 11.6728, "step": 37025 }, { "epoch": 0.7750565184626979, "grad_norm": 0.2282021939754486, "learning_rate": 0.0001688665437046375, "loss": 11.6818, "step": 37026 }, { "epoch": 0.7750774512266599, "grad_norm": 0.30894726514816284, "learning_rate": 0.00016886495392799762, "loss": 11.6714, "step": 37027 }, { "epoch": 0.7750983839906221, "grad_norm": 0.35738685727119446, "learning_rate": 0.0001688633641182529, "loss": 11.6792, "step": 37028 }, { "epoch": 0.7751193167545842, "grad_norm": 0.4383680820465088, "learning_rate": 0.00016886177427540426, "loss": 11.6683, "step": 37029 }, { "epoch": 0.7751402495185464, "grad_norm": 0.2934320867061615, "learning_rate": 0.00016886018439945234, "loss": 11.6742, "step": 37030 }, { "epoch": 0.7751611822825086, "grad_norm": 0.2805725932121277, "learning_rate": 0.00016885859449039796, "loss": 11.6771, "step": 37031 }, { "epoch": 0.7751821150464707, "grad_norm": 0.36364611983299255, "learning_rate": 0.00016885700454824187, "loss": 11.6579, "step": 37032 }, { "epoch": 0.7752030478104329, "grad_norm": 0.30101990699768066, "learning_rate": 0.00016885541457298486, "loss": 11.6833, "step": 37033 }, { "epoch": 0.775223980574395, "grad_norm": 0.2897701859474182, "learning_rate": 0.00016885382456462765, "loss": 11.6795, "step": 37034 }, { "epoch": 0.7752449133383572, "grad_norm": 0.29757410287857056, "learning_rate": 0.00016885223452317103, "loss": 11.6732, "step": 37035 }, { "epoch": 0.7752658461023193, "grad_norm": 0.32418134808540344, "learning_rate": 0.00016885064444861577, "loss": 11.6799, "step": 37036 }, { "epoch": 0.7752867788662815, "grad_norm": 0.3274845778942108, "learning_rate": 0.0001688490543409626, "loss": 11.6754, "step": 37037 }, { "epoch": 0.7753077116302437, "grad_norm": 0.29421237111091614, "learning_rate": 0.00016884746420021233, "loss": 11.6831, "step": 37038 }, { "epoch": 0.7753286443942058, "grad_norm": 0.24740727245807648, "learning_rate": 0.00016884587402636573, "loss": 11.6646, "step": 37039 }, { "epoch": 0.775349577158168, "grad_norm": 0.33210623264312744, "learning_rate": 0.0001688442838194235, "loss": 11.6877, "step": 37040 }, { "epoch": 0.7753705099221301, "grad_norm": 0.31716832518577576, "learning_rate": 0.00016884269357938644, "loss": 11.6856, "step": 37041 }, { "epoch": 0.7753914426860923, "grad_norm": 0.27174776792526245, "learning_rate": 0.00016884110330625535, "loss": 11.6757, "step": 37042 }, { "epoch": 0.7754123754500544, "grad_norm": 0.32864436507225037, "learning_rate": 0.0001688395130000309, "loss": 11.6692, "step": 37043 }, { "epoch": 0.7754333082140166, "grad_norm": 0.2990913987159729, "learning_rate": 0.00016883792266071402, "loss": 11.669, "step": 37044 }, { "epoch": 0.7754542409779788, "grad_norm": 0.30302688479423523, "learning_rate": 0.00016883633228830528, "loss": 11.6786, "step": 37045 }, { "epoch": 0.7754751737419409, "grad_norm": 0.4206870496273041, "learning_rate": 0.00016883474188280557, "loss": 11.6595, "step": 37046 }, { "epoch": 0.7754961065059031, "grad_norm": 0.2855050563812256, "learning_rate": 0.00016883315144421564, "loss": 11.6733, "step": 37047 }, { "epoch": 0.7755170392698651, "grad_norm": 0.3062458038330078, "learning_rate": 0.0001688315609725362, "loss": 11.6874, "step": 37048 }, { "epoch": 0.7755379720338273, "grad_norm": 0.49419236183166504, "learning_rate": 0.00016882997046776806, "loss": 11.6762, "step": 37049 }, { "epoch": 0.7755589047977896, "grad_norm": 0.235635906457901, "learning_rate": 0.00016882837992991196, "loss": 11.6869, "step": 37050 }, { "epoch": 0.7755798375617516, "grad_norm": 0.2854158580303192, "learning_rate": 0.0001688267893589687, "loss": 11.68, "step": 37051 }, { "epoch": 0.7756007703257138, "grad_norm": 0.32044756412506104, "learning_rate": 0.00016882519875493902, "loss": 11.6735, "step": 37052 }, { "epoch": 0.7756217030896759, "grad_norm": 0.26150259375572205, "learning_rate": 0.0001688236081178237, "loss": 11.6585, "step": 37053 }, { "epoch": 0.7756426358536381, "grad_norm": 0.28563550114631653, "learning_rate": 0.00016882201744762348, "loss": 11.6598, "step": 37054 }, { "epoch": 0.7756635686176002, "grad_norm": 0.27962201833724976, "learning_rate": 0.0001688204267443391, "loss": 11.6694, "step": 37055 }, { "epoch": 0.7756845013815624, "grad_norm": 0.2889162600040436, "learning_rate": 0.00016881883600797142, "loss": 11.6758, "step": 37056 }, { "epoch": 0.7757054341455246, "grad_norm": 0.2889574468135834, "learning_rate": 0.0001688172452385211, "loss": 11.6586, "step": 37057 }, { "epoch": 0.7757263669094867, "grad_norm": 0.31714051961898804, "learning_rate": 0.000168815654435989, "loss": 11.6532, "step": 37058 }, { "epoch": 0.7757472996734489, "grad_norm": 0.26685672998428345, "learning_rate": 0.0001688140636003758, "loss": 11.6719, "step": 37059 }, { "epoch": 0.775768232437411, "grad_norm": 0.2703494131565094, "learning_rate": 0.00016881247273168231, "loss": 11.6603, "step": 37060 }, { "epoch": 0.7757891652013732, "grad_norm": 0.2351071536540985, "learning_rate": 0.00016881088182990928, "loss": 11.6595, "step": 37061 }, { "epoch": 0.7758100979653353, "grad_norm": 0.4369858503341675, "learning_rate": 0.0001688092908950575, "loss": 11.6766, "step": 37062 }, { "epoch": 0.7758310307292975, "grad_norm": 0.30943676829338074, "learning_rate": 0.00016880769992712766, "loss": 11.6771, "step": 37063 }, { "epoch": 0.7758519634932597, "grad_norm": 0.37599360942840576, "learning_rate": 0.00016880610892612064, "loss": 11.6765, "step": 37064 }, { "epoch": 0.7758728962572218, "grad_norm": 0.36652258038520813, "learning_rate": 0.00016880451789203714, "loss": 11.6788, "step": 37065 }, { "epoch": 0.775893829021184, "grad_norm": 0.5251744985580444, "learning_rate": 0.0001688029268248779, "loss": 11.6669, "step": 37066 }, { "epoch": 0.7759147617851461, "grad_norm": 0.2935829758644104, "learning_rate": 0.00016880133572464372, "loss": 11.6595, "step": 37067 }, { "epoch": 0.7759356945491083, "grad_norm": 0.3408808708190918, "learning_rate": 0.00016879974459133536, "loss": 11.6742, "step": 37068 }, { "epoch": 0.7759566273130705, "grad_norm": 0.3048152029514313, "learning_rate": 0.0001687981534249536, "loss": 11.6621, "step": 37069 }, { "epoch": 0.7759775600770326, "grad_norm": 0.25350436568260193, "learning_rate": 0.00016879656222549918, "loss": 11.6606, "step": 37070 }, { "epoch": 0.7759984928409948, "grad_norm": 0.29054462909698486, "learning_rate": 0.00016879497099297285, "loss": 11.662, "step": 37071 }, { "epoch": 0.7760194256049568, "grad_norm": 0.2812977433204651, "learning_rate": 0.00016879337972737545, "loss": 11.6627, "step": 37072 }, { "epoch": 0.776040358368919, "grad_norm": 0.41598740220069885, "learning_rate": 0.00016879178842870766, "loss": 11.6718, "step": 37073 }, { "epoch": 0.7760612911328811, "grad_norm": 0.31941309571266174, "learning_rate": 0.00016879019709697027, "loss": 11.6601, "step": 37074 }, { "epoch": 0.7760822238968433, "grad_norm": 0.2824608385562897, "learning_rate": 0.00016878860573216406, "loss": 11.6688, "step": 37075 }, { "epoch": 0.7761031566608055, "grad_norm": 0.38347873091697693, "learning_rate": 0.00016878701433428983, "loss": 11.668, "step": 37076 }, { "epoch": 0.7761240894247676, "grad_norm": 0.36746394634246826, "learning_rate": 0.00016878542290334826, "loss": 11.6805, "step": 37077 }, { "epoch": 0.7761450221887298, "grad_norm": 0.2594509720802307, "learning_rate": 0.0001687838314393402, "loss": 11.6705, "step": 37078 }, { "epoch": 0.7761659549526919, "grad_norm": 0.2866310775279999, "learning_rate": 0.00016878223994226633, "loss": 11.6609, "step": 37079 }, { "epoch": 0.7761868877166541, "grad_norm": 0.22985713183879852, "learning_rate": 0.00016878064841212745, "loss": 11.6733, "step": 37080 }, { "epoch": 0.7762078204806162, "grad_norm": 0.3205503225326538, "learning_rate": 0.00016877905684892434, "loss": 11.6726, "step": 37081 }, { "epoch": 0.7762287532445784, "grad_norm": 0.2752530574798584, "learning_rate": 0.0001687774652526578, "loss": 11.6726, "step": 37082 }, { "epoch": 0.7762496860085406, "grad_norm": 0.30611294507980347, "learning_rate": 0.00016877587362332851, "loss": 11.6595, "step": 37083 }, { "epoch": 0.7762706187725027, "grad_norm": 0.7135556936264038, "learning_rate": 0.0001687742819609373, "loss": 11.6763, "step": 37084 }, { "epoch": 0.7762915515364649, "grad_norm": 0.27462634444236755, "learning_rate": 0.0001687726902654849, "loss": 11.6775, "step": 37085 }, { "epoch": 0.776312484300427, "grad_norm": 0.2802116274833679, "learning_rate": 0.00016877109853697213, "loss": 11.6904, "step": 37086 }, { "epoch": 0.7763334170643892, "grad_norm": 0.3305070996284485, "learning_rate": 0.00016876950677539968, "loss": 11.6823, "step": 37087 }, { "epoch": 0.7763543498283514, "grad_norm": 0.27569907903671265, "learning_rate": 0.00016876791498076835, "loss": 11.6734, "step": 37088 }, { "epoch": 0.7763752825923135, "grad_norm": 0.26567158102989197, "learning_rate": 0.0001687663231530789, "loss": 11.6812, "step": 37089 }, { "epoch": 0.7763962153562757, "grad_norm": 0.2924033999443054, "learning_rate": 0.00016876473129233212, "loss": 11.6768, "step": 37090 }, { "epoch": 0.7764171481202378, "grad_norm": 0.47998058795928955, "learning_rate": 0.00016876313939852877, "loss": 11.6044, "step": 37091 }, { "epoch": 0.7764380808842, "grad_norm": 0.32140687108039856, "learning_rate": 0.00016876154747166955, "loss": 11.6685, "step": 37092 }, { "epoch": 0.776459013648162, "grad_norm": 0.2919599711894989, "learning_rate": 0.00016875995551175535, "loss": 11.6468, "step": 37093 }, { "epoch": 0.7764799464121243, "grad_norm": 0.2759537100791931, "learning_rate": 0.00016875836351878677, "loss": 11.6673, "step": 37094 }, { "epoch": 0.7765008791760865, "grad_norm": 0.35503923892974854, "learning_rate": 0.00016875677149276471, "loss": 11.6563, "step": 37095 }, { "epoch": 0.7765218119400485, "grad_norm": 0.32015496492385864, "learning_rate": 0.0001687551794336899, "loss": 11.6698, "step": 37096 }, { "epoch": 0.7765427447040107, "grad_norm": 0.2896227240562439, "learning_rate": 0.0001687535873415631, "loss": 11.6853, "step": 37097 }, { "epoch": 0.7765636774679728, "grad_norm": 0.3982534408569336, "learning_rate": 0.00016875199521638508, "loss": 11.6562, "step": 37098 }, { "epoch": 0.776584610231935, "grad_norm": 0.25316038727760315, "learning_rate": 0.00016875040305815658, "loss": 11.6525, "step": 37099 }, { "epoch": 0.7766055429958971, "grad_norm": 0.287447452545166, "learning_rate": 0.0001687488108668784, "loss": 11.6834, "step": 37100 }, { "epoch": 0.7766264757598593, "grad_norm": 0.2912844717502594, "learning_rate": 0.00016874721864255126, "loss": 11.6471, "step": 37101 }, { "epoch": 0.7766474085238215, "grad_norm": 0.26970261335372925, "learning_rate": 0.00016874562638517597, "loss": 11.6717, "step": 37102 }, { "epoch": 0.7766683412877836, "grad_norm": 0.3296765387058258, "learning_rate": 0.00016874403409475328, "loss": 11.6674, "step": 37103 }, { "epoch": 0.7766892740517458, "grad_norm": 0.274368554353714, "learning_rate": 0.00016874244177128396, "loss": 11.6477, "step": 37104 }, { "epoch": 0.7767102068157079, "grad_norm": 0.3514323830604553, "learning_rate": 0.0001687408494147688, "loss": 11.666, "step": 37105 }, { "epoch": 0.7767311395796701, "grad_norm": 0.3498837947845459, "learning_rate": 0.00016873925702520846, "loss": 11.6735, "step": 37106 }, { "epoch": 0.7767520723436323, "grad_norm": 0.4562516510486603, "learning_rate": 0.00016873766460260382, "loss": 11.6692, "step": 37107 }, { "epoch": 0.7767730051075944, "grad_norm": 0.25780948996543884, "learning_rate": 0.00016873607214695562, "loss": 11.6649, "step": 37108 }, { "epoch": 0.7767939378715566, "grad_norm": 0.3223778009414673, "learning_rate": 0.00016873447965826458, "loss": 11.6702, "step": 37109 }, { "epoch": 0.7768148706355187, "grad_norm": 0.24852176010608673, "learning_rate": 0.00016873288713653155, "loss": 11.6702, "step": 37110 }, { "epoch": 0.7768358033994809, "grad_norm": 0.2716425955295563, "learning_rate": 0.0001687312945817572, "loss": 11.6729, "step": 37111 }, { "epoch": 0.776856736163443, "grad_norm": 0.31290704011917114, "learning_rate": 0.00016872970199394236, "loss": 11.6776, "step": 37112 }, { "epoch": 0.7768776689274052, "grad_norm": 0.2621136009693146, "learning_rate": 0.00016872810937308775, "loss": 11.6668, "step": 37113 }, { "epoch": 0.7768986016913674, "grad_norm": 0.2948116362094879, "learning_rate": 0.00016872651671919416, "loss": 11.6717, "step": 37114 }, { "epoch": 0.7769195344553295, "grad_norm": 0.2771669328212738, "learning_rate": 0.00016872492403226241, "loss": 11.6691, "step": 37115 }, { "epoch": 0.7769404672192917, "grad_norm": 0.3133395314216614, "learning_rate": 0.00016872333131229316, "loss": 11.681, "step": 37116 }, { "epoch": 0.7769613999832538, "grad_norm": 0.29584336280822754, "learning_rate": 0.00016872173855928723, "loss": 11.6782, "step": 37117 }, { "epoch": 0.776982332747216, "grad_norm": 0.3651360869407654, "learning_rate": 0.00016872014577324541, "loss": 11.6648, "step": 37118 }, { "epoch": 0.777003265511178, "grad_norm": 0.2975972592830658, "learning_rate": 0.00016871855295416842, "loss": 11.6677, "step": 37119 }, { "epoch": 0.7770241982751402, "grad_norm": 0.30882909893989563, "learning_rate": 0.00016871696010205706, "loss": 11.6594, "step": 37120 }, { "epoch": 0.7770451310391024, "grad_norm": 0.4192940294742584, "learning_rate": 0.00016871536721691207, "loss": 11.6791, "step": 37121 }, { "epoch": 0.7770660638030645, "grad_norm": 0.3518393933773041, "learning_rate": 0.00016871377429873423, "loss": 11.6957, "step": 37122 }, { "epoch": 0.7770869965670267, "grad_norm": 0.34216734766960144, "learning_rate": 0.00016871218134752433, "loss": 11.6651, "step": 37123 }, { "epoch": 0.7771079293309888, "grad_norm": 0.27133721113204956, "learning_rate": 0.00016871058836328304, "loss": 11.6849, "step": 37124 }, { "epoch": 0.777128862094951, "grad_norm": 0.2574659287929535, "learning_rate": 0.00016870899534601126, "loss": 11.6744, "step": 37125 }, { "epoch": 0.7771497948589132, "grad_norm": 0.3453865647315979, "learning_rate": 0.00016870740229570964, "loss": 11.6668, "step": 37126 }, { "epoch": 0.7771707276228753, "grad_norm": 0.3165714144706726, "learning_rate": 0.00016870580921237902, "loss": 11.6781, "step": 37127 }, { "epoch": 0.7771916603868375, "grad_norm": 0.2606751024723053, "learning_rate": 0.0001687042160960201, "loss": 11.6661, "step": 37128 }, { "epoch": 0.7772125931507996, "grad_norm": 0.30104929208755493, "learning_rate": 0.00016870262294663374, "loss": 11.674, "step": 37129 }, { "epoch": 0.7772335259147618, "grad_norm": 0.27968692779541016, "learning_rate": 0.00016870102976422065, "loss": 11.6582, "step": 37130 }, { "epoch": 0.7772544586787239, "grad_norm": 0.27612006664276123, "learning_rate": 0.00016869943654878155, "loss": 11.6639, "step": 37131 }, { "epoch": 0.7772753914426861, "grad_norm": 0.277118980884552, "learning_rate": 0.00016869784330031731, "loss": 11.6678, "step": 37132 }, { "epoch": 0.7772963242066483, "grad_norm": 0.2746342122554779, "learning_rate": 0.0001686962500188286, "loss": 11.6511, "step": 37133 }, { "epoch": 0.7773172569706104, "grad_norm": 0.4987852871417999, "learning_rate": 0.00016869465670431625, "loss": 11.6822, "step": 37134 }, { "epoch": 0.7773381897345726, "grad_norm": 0.2616281509399414, "learning_rate": 0.00016869306335678098, "loss": 11.656, "step": 37135 }, { "epoch": 0.7773591224985347, "grad_norm": 0.3378311097621918, "learning_rate": 0.0001686914699762236, "loss": 11.6506, "step": 37136 }, { "epoch": 0.7773800552624969, "grad_norm": 0.23236270248889923, "learning_rate": 0.00016868987656264487, "loss": 11.6656, "step": 37137 }, { "epoch": 0.777400988026459, "grad_norm": 0.27839621901512146, "learning_rate": 0.0001686882831160455, "loss": 11.6718, "step": 37138 }, { "epoch": 0.7774219207904212, "grad_norm": 0.33860331773757935, "learning_rate": 0.00016868668963642628, "loss": 11.6767, "step": 37139 }, { "epoch": 0.7774428535543834, "grad_norm": 0.3553614020347595, "learning_rate": 0.00016868509612378804, "loss": 11.6758, "step": 37140 }, { "epoch": 0.7774637863183455, "grad_norm": 0.3306494653224945, "learning_rate": 0.0001686835025781315, "loss": 11.6409, "step": 37141 }, { "epoch": 0.7774847190823077, "grad_norm": 0.34858202934265137, "learning_rate": 0.0001686819089994574, "loss": 11.6782, "step": 37142 }, { "epoch": 0.7775056518462697, "grad_norm": 0.3122691214084625, "learning_rate": 0.0001686803153877665, "loss": 11.6657, "step": 37143 }, { "epoch": 0.7775265846102319, "grad_norm": 0.2946438491344452, "learning_rate": 0.00016867872174305965, "loss": 11.6773, "step": 37144 }, { "epoch": 0.7775475173741941, "grad_norm": 0.25655704736709595, "learning_rate": 0.00016867712806533755, "loss": 11.6858, "step": 37145 }, { "epoch": 0.7775684501381562, "grad_norm": 0.2753184735774994, "learning_rate": 0.00016867553435460095, "loss": 11.6776, "step": 37146 }, { "epoch": 0.7775893829021184, "grad_norm": 0.3620801568031311, "learning_rate": 0.00016867394061085067, "loss": 11.6772, "step": 37147 }, { "epoch": 0.7776103156660805, "grad_norm": 0.29021281003952026, "learning_rate": 0.00016867234683408744, "loss": 11.6676, "step": 37148 }, { "epoch": 0.7776312484300427, "grad_norm": 0.27083075046539307, "learning_rate": 0.00016867075302431206, "loss": 11.6613, "step": 37149 }, { "epoch": 0.7776521811940048, "grad_norm": 0.2979530394077301, "learning_rate": 0.00016866915918152523, "loss": 11.6588, "step": 37150 }, { "epoch": 0.777673113957967, "grad_norm": 0.2968791425228119, "learning_rate": 0.00016866756530572777, "loss": 11.6583, "step": 37151 }, { "epoch": 0.7776940467219292, "grad_norm": 0.33139586448669434, "learning_rate": 0.00016866597139692047, "loss": 11.6744, "step": 37152 }, { "epoch": 0.7777149794858913, "grad_norm": 0.24149686098098755, "learning_rate": 0.00016866437745510405, "loss": 11.6544, "step": 37153 }, { "epoch": 0.7777359122498535, "grad_norm": 0.2465977817773819, "learning_rate": 0.00016866278348027928, "loss": 11.6623, "step": 37154 }, { "epoch": 0.7777568450138156, "grad_norm": 0.2958156168460846, "learning_rate": 0.00016866118947244692, "loss": 11.6791, "step": 37155 }, { "epoch": 0.7777777777777778, "grad_norm": 0.2885379493236542, "learning_rate": 0.00016865959543160773, "loss": 11.6638, "step": 37156 }, { "epoch": 0.7777987105417399, "grad_norm": 0.30106863379478455, "learning_rate": 0.00016865800135776256, "loss": 11.6682, "step": 37157 }, { "epoch": 0.7778196433057021, "grad_norm": 0.3322066366672516, "learning_rate": 0.00016865640725091208, "loss": 11.6607, "step": 37158 }, { "epoch": 0.7778405760696643, "grad_norm": 0.25574544072151184, "learning_rate": 0.0001686548131110571, "loss": 11.6602, "step": 37159 }, { "epoch": 0.7778615088336264, "grad_norm": 0.28592902421951294, "learning_rate": 0.00016865321893819835, "loss": 11.6722, "step": 37160 }, { "epoch": 0.7778824415975886, "grad_norm": 0.32030072808265686, "learning_rate": 0.00016865162473233663, "loss": 11.6597, "step": 37161 }, { "epoch": 0.7779033743615507, "grad_norm": 0.3087480068206787, "learning_rate": 0.0001686500304934727, "loss": 11.6664, "step": 37162 }, { "epoch": 0.7779243071255129, "grad_norm": 0.28870609402656555, "learning_rate": 0.00016864843622160734, "loss": 11.696, "step": 37163 }, { "epoch": 0.7779452398894751, "grad_norm": 0.2932834029197693, "learning_rate": 0.0001686468419167413, "loss": 11.6562, "step": 37164 }, { "epoch": 0.7779661726534371, "grad_norm": 0.3274855613708496, "learning_rate": 0.00016864524757887535, "loss": 11.6754, "step": 37165 }, { "epoch": 0.7779871054173993, "grad_norm": 0.28176960349082947, "learning_rate": 0.00016864365320801023, "loss": 11.6741, "step": 37166 }, { "epoch": 0.7780080381813614, "grad_norm": 0.3010099232196808, "learning_rate": 0.00016864205880414674, "loss": 11.6634, "step": 37167 }, { "epoch": 0.7780289709453236, "grad_norm": 0.30998560786247253, "learning_rate": 0.00016864046436728566, "loss": 11.6638, "step": 37168 }, { "epoch": 0.7780499037092857, "grad_norm": 0.3125348687171936, "learning_rate": 0.0001686388698974277, "loss": 11.6608, "step": 37169 }, { "epoch": 0.7780708364732479, "grad_norm": 0.28060051798820496, "learning_rate": 0.00016863727539457365, "loss": 11.661, "step": 37170 }, { "epoch": 0.7780917692372101, "grad_norm": 0.3652168810367584, "learning_rate": 0.00016863568085872434, "loss": 11.6756, "step": 37171 }, { "epoch": 0.7781127020011722, "grad_norm": 0.2822892963886261, "learning_rate": 0.00016863408628988043, "loss": 11.6753, "step": 37172 }, { "epoch": 0.7781336347651344, "grad_norm": 0.24239851534366608, "learning_rate": 0.00016863249168804276, "loss": 11.6629, "step": 37173 }, { "epoch": 0.7781545675290965, "grad_norm": 0.2845759689807892, "learning_rate": 0.00016863089705321208, "loss": 11.6523, "step": 37174 }, { "epoch": 0.7781755002930587, "grad_norm": 0.2993002235889435, "learning_rate": 0.00016862930238538914, "loss": 11.6484, "step": 37175 }, { "epoch": 0.7781964330570208, "grad_norm": 0.25798341631889343, "learning_rate": 0.00016862770768457476, "loss": 11.6778, "step": 37176 }, { "epoch": 0.778217365820983, "grad_norm": 0.3550070822238922, "learning_rate": 0.00016862611295076962, "loss": 11.671, "step": 37177 }, { "epoch": 0.7782382985849452, "grad_norm": 0.3171025812625885, "learning_rate": 0.00016862451818397455, "loss": 11.6639, "step": 37178 }, { "epoch": 0.7782592313489073, "grad_norm": 0.3345237672328949, "learning_rate": 0.0001686229233841903, "loss": 11.674, "step": 37179 }, { "epoch": 0.7782801641128695, "grad_norm": 0.3212585747241974, "learning_rate": 0.0001686213285514176, "loss": 11.6783, "step": 37180 }, { "epoch": 0.7783010968768316, "grad_norm": 0.286699503660202, "learning_rate": 0.0001686197336856573, "loss": 11.6595, "step": 37181 }, { "epoch": 0.7783220296407938, "grad_norm": 0.33909115195274353, "learning_rate": 0.0001686181387869101, "loss": 11.6676, "step": 37182 }, { "epoch": 0.778342962404756, "grad_norm": 0.33349424600601196, "learning_rate": 0.00016861654385517678, "loss": 11.6645, "step": 37183 }, { "epoch": 0.7783638951687181, "grad_norm": 0.368854284286499, "learning_rate": 0.00016861494889045813, "loss": 11.6777, "step": 37184 }, { "epoch": 0.7783848279326803, "grad_norm": 0.275947630405426, "learning_rate": 0.00016861335389275488, "loss": 11.6679, "step": 37185 }, { "epoch": 0.7784057606966424, "grad_norm": 0.3097822964191437, "learning_rate": 0.00016861175886206782, "loss": 11.6505, "step": 37186 }, { "epoch": 0.7784266934606046, "grad_norm": 0.26744747161865234, "learning_rate": 0.00016861016379839772, "loss": 11.6655, "step": 37187 }, { "epoch": 0.7784476262245666, "grad_norm": 0.2584201693534851, "learning_rate": 0.00016860856870174533, "loss": 11.6707, "step": 37188 }, { "epoch": 0.7784685589885288, "grad_norm": 0.34410181641578674, "learning_rate": 0.00016860697357211143, "loss": 11.6835, "step": 37189 }, { "epoch": 0.778489491752491, "grad_norm": 0.29781439900398254, "learning_rate": 0.0001686053784094968, "loss": 11.6578, "step": 37190 }, { "epoch": 0.7785104245164531, "grad_norm": 0.31530001759529114, "learning_rate": 0.00016860378321390215, "loss": 11.6722, "step": 37191 }, { "epoch": 0.7785313572804153, "grad_norm": 0.25267258286476135, "learning_rate": 0.00016860218798532832, "loss": 11.661, "step": 37192 }, { "epoch": 0.7785522900443774, "grad_norm": 0.31161704659461975, "learning_rate": 0.00016860059272377602, "loss": 11.6641, "step": 37193 }, { "epoch": 0.7785732228083396, "grad_norm": 0.2894624173641205, "learning_rate": 0.00016859899742924608, "loss": 11.663, "step": 37194 }, { "epoch": 0.7785941555723017, "grad_norm": 0.28087595105171204, "learning_rate": 0.00016859740210173918, "loss": 11.6656, "step": 37195 }, { "epoch": 0.7786150883362639, "grad_norm": 0.3043134808540344, "learning_rate": 0.00016859580674125616, "loss": 11.6682, "step": 37196 }, { "epoch": 0.7786360211002261, "grad_norm": 0.3540094494819641, "learning_rate": 0.00016859421134779776, "loss": 11.6851, "step": 37197 }, { "epoch": 0.7786569538641882, "grad_norm": 0.3284124433994293, "learning_rate": 0.00016859261592136476, "loss": 11.6479, "step": 37198 }, { "epoch": 0.7786778866281504, "grad_norm": 0.26872262358665466, "learning_rate": 0.0001685910204619579, "loss": 11.6643, "step": 37199 }, { "epoch": 0.7786988193921125, "grad_norm": 0.3735451400279999, "learning_rate": 0.00016858942496957794, "loss": 11.671, "step": 37200 }, { "epoch": 0.7787197521560747, "grad_norm": 0.3382411301136017, "learning_rate": 0.0001685878294442257, "loss": 11.659, "step": 37201 }, { "epoch": 0.7787406849200369, "grad_norm": 0.3032912015914917, "learning_rate": 0.00016858623388590189, "loss": 11.6451, "step": 37202 }, { "epoch": 0.778761617683999, "grad_norm": 0.2378002554178238, "learning_rate": 0.00016858463829460732, "loss": 11.6777, "step": 37203 }, { "epoch": 0.7787825504479612, "grad_norm": 0.29554057121276855, "learning_rate": 0.00016858304267034275, "loss": 11.6783, "step": 37204 }, { "epoch": 0.7788034832119233, "grad_norm": 0.28266507387161255, "learning_rate": 0.0001685814470131089, "loss": 11.6634, "step": 37205 }, { "epoch": 0.7788244159758855, "grad_norm": 0.2265225201845169, "learning_rate": 0.00016857985132290665, "loss": 11.6706, "step": 37206 }, { "epoch": 0.7788453487398476, "grad_norm": 0.2899670898914337, "learning_rate": 0.00016857825559973662, "loss": 11.6581, "step": 37207 }, { "epoch": 0.7788662815038098, "grad_norm": 0.3154008686542511, "learning_rate": 0.00016857665984359966, "loss": 11.666, "step": 37208 }, { "epoch": 0.778887214267772, "grad_norm": 0.282335102558136, "learning_rate": 0.00016857506405449653, "loss": 11.6701, "step": 37209 }, { "epoch": 0.778908147031734, "grad_norm": 0.2747757136821747, "learning_rate": 0.00016857346823242802, "loss": 11.6649, "step": 37210 }, { "epoch": 0.7789290797956963, "grad_norm": 0.4359038174152374, "learning_rate": 0.00016857187237739483, "loss": 11.6851, "step": 37211 }, { "epoch": 0.7789500125596583, "grad_norm": 0.2944655418395996, "learning_rate": 0.00016857027648939778, "loss": 11.6757, "step": 37212 }, { "epoch": 0.7789709453236205, "grad_norm": 0.3020426630973816, "learning_rate": 0.00016856868056843764, "loss": 11.6787, "step": 37213 }, { "epoch": 0.7789918780875826, "grad_norm": 0.35029691457748413, "learning_rate": 0.00016856708461451513, "loss": 11.6567, "step": 37214 }, { "epoch": 0.7790128108515448, "grad_norm": 0.28751933574676514, "learning_rate": 0.00016856548862763108, "loss": 11.673, "step": 37215 }, { "epoch": 0.779033743615507, "grad_norm": 0.3305205702781677, "learning_rate": 0.0001685638926077862, "loss": 11.6796, "step": 37216 }, { "epoch": 0.7790546763794691, "grad_norm": 0.3151865303516388, "learning_rate": 0.0001685622965549813, "loss": 11.6765, "step": 37217 }, { "epoch": 0.7790756091434313, "grad_norm": 0.312488317489624, "learning_rate": 0.00016856070046921713, "loss": 11.6776, "step": 37218 }, { "epoch": 0.7790965419073934, "grad_norm": 0.35436418652534485, "learning_rate": 0.00016855910435049446, "loss": 11.6667, "step": 37219 }, { "epoch": 0.7791174746713556, "grad_norm": 0.3018905222415924, "learning_rate": 0.00016855750819881404, "loss": 11.6718, "step": 37220 }, { "epoch": 0.7791384074353178, "grad_norm": 0.3274272680282593, "learning_rate": 0.00016855591201417666, "loss": 11.6582, "step": 37221 }, { "epoch": 0.7791593401992799, "grad_norm": 0.28241193294525146, "learning_rate": 0.00016855431579658307, "loss": 11.6684, "step": 37222 }, { "epoch": 0.7791802729632421, "grad_norm": 0.39667919278144836, "learning_rate": 0.00016855271954603407, "loss": 11.6897, "step": 37223 }, { "epoch": 0.7792012057272042, "grad_norm": 0.2914465665817261, "learning_rate": 0.00016855112326253038, "loss": 11.6872, "step": 37224 }, { "epoch": 0.7792221384911664, "grad_norm": 0.3079914450645447, "learning_rate": 0.0001685495269460728, "loss": 11.688, "step": 37225 }, { "epoch": 0.7792430712551285, "grad_norm": 0.25965359807014465, "learning_rate": 0.0001685479305966621, "loss": 11.6661, "step": 37226 }, { "epoch": 0.7792640040190907, "grad_norm": 0.35007619857788086, "learning_rate": 0.000168546334214299, "loss": 11.6866, "step": 37227 }, { "epoch": 0.7792849367830529, "grad_norm": 0.29759708046913147, "learning_rate": 0.00016854473779898433, "loss": 11.6814, "step": 37228 }, { "epoch": 0.779305869547015, "grad_norm": 0.2718287408351898, "learning_rate": 0.00016854314135071884, "loss": 11.6636, "step": 37229 }, { "epoch": 0.7793268023109772, "grad_norm": 0.26614123582839966, "learning_rate": 0.00016854154486950327, "loss": 11.6579, "step": 37230 }, { "epoch": 0.7793477350749393, "grad_norm": 0.2657404839992523, "learning_rate": 0.00016853994835533843, "loss": 11.6637, "step": 37231 }, { "epoch": 0.7793686678389015, "grad_norm": 0.26206517219543457, "learning_rate": 0.00016853835180822503, "loss": 11.6714, "step": 37232 }, { "epoch": 0.7793896006028636, "grad_norm": 0.2745704650878906, "learning_rate": 0.0001685367552281639, "loss": 11.6706, "step": 37233 }, { "epoch": 0.7794105333668258, "grad_norm": 0.2740944027900696, "learning_rate": 0.00016853515861515574, "loss": 11.6682, "step": 37234 }, { "epoch": 0.779431466130788, "grad_norm": 0.3534291982650757, "learning_rate": 0.0001685335619692014, "loss": 11.6623, "step": 37235 }, { "epoch": 0.77945239889475, "grad_norm": 0.2746847867965698, "learning_rate": 0.0001685319652903016, "loss": 11.6742, "step": 37236 }, { "epoch": 0.7794733316587122, "grad_norm": 0.3022423982620239, "learning_rate": 0.00016853036857845708, "loss": 11.67, "step": 37237 }, { "epoch": 0.7794942644226743, "grad_norm": 0.2940424382686615, "learning_rate": 0.00016852877183366863, "loss": 11.6783, "step": 37238 }, { "epoch": 0.7795151971866365, "grad_norm": 0.2941111922264099, "learning_rate": 0.00016852717505593708, "loss": 11.6744, "step": 37239 }, { "epoch": 0.7795361299505986, "grad_norm": 0.5088827610015869, "learning_rate": 0.0001685255782452631, "loss": 11.6866, "step": 37240 }, { "epoch": 0.7795570627145608, "grad_norm": 0.3358926475048065, "learning_rate": 0.00016852398140164752, "loss": 11.6576, "step": 37241 }, { "epoch": 0.779577995478523, "grad_norm": 0.35432732105255127, "learning_rate": 0.00016852238452509107, "loss": 11.6571, "step": 37242 }, { "epoch": 0.7795989282424851, "grad_norm": 0.2639811336994171, "learning_rate": 0.0001685207876155946, "loss": 11.6561, "step": 37243 }, { "epoch": 0.7796198610064473, "grad_norm": 0.23194551467895508, "learning_rate": 0.00016851919067315874, "loss": 11.6502, "step": 37244 }, { "epoch": 0.7796407937704094, "grad_norm": 0.32753342390060425, "learning_rate": 0.00016851759369778435, "loss": 11.666, "step": 37245 }, { "epoch": 0.7796617265343716, "grad_norm": 0.3319995403289795, "learning_rate": 0.0001685159966894722, "loss": 11.6558, "step": 37246 }, { "epoch": 0.7796826592983338, "grad_norm": 0.24123458564281464, "learning_rate": 0.00016851439964822302, "loss": 11.6637, "step": 37247 }, { "epoch": 0.7797035920622959, "grad_norm": 0.32036349177360535, "learning_rate": 0.00016851280257403759, "loss": 11.6764, "step": 37248 }, { "epoch": 0.7797245248262581, "grad_norm": 0.2712302803993225, "learning_rate": 0.0001685112054669167, "loss": 11.6722, "step": 37249 }, { "epoch": 0.7797454575902202, "grad_norm": 0.3006964921951294, "learning_rate": 0.00016850960832686106, "loss": 11.6586, "step": 37250 }, { "epoch": 0.7797663903541824, "grad_norm": 0.4041633903980255, "learning_rate": 0.00016850801115387152, "loss": 11.6705, "step": 37251 }, { "epoch": 0.7797873231181445, "grad_norm": 0.3066388964653015, "learning_rate": 0.00016850641394794882, "loss": 11.6965, "step": 37252 }, { "epoch": 0.7798082558821067, "grad_norm": 0.3196287155151367, "learning_rate": 0.00016850481670909365, "loss": 11.6583, "step": 37253 }, { "epoch": 0.7798291886460689, "grad_norm": 0.2694225311279297, "learning_rate": 0.0001685032194373069, "loss": 11.6782, "step": 37254 }, { "epoch": 0.779850121410031, "grad_norm": 0.25633230805397034, "learning_rate": 0.00016850162213258925, "loss": 11.6446, "step": 37255 }, { "epoch": 0.7798710541739932, "grad_norm": 0.34366998076438904, "learning_rate": 0.00016850002479494152, "loss": 11.6826, "step": 37256 }, { "epoch": 0.7798919869379553, "grad_norm": 0.39215025305747986, "learning_rate": 0.00016849842742436443, "loss": 11.6767, "step": 37257 }, { "epoch": 0.7799129197019175, "grad_norm": 0.24952419102191925, "learning_rate": 0.0001684968300208588, "loss": 11.6803, "step": 37258 }, { "epoch": 0.7799338524658795, "grad_norm": 0.34831973910331726, "learning_rate": 0.00016849523258442536, "loss": 11.668, "step": 37259 }, { "epoch": 0.7799547852298417, "grad_norm": 0.29243820905685425, "learning_rate": 0.0001684936351150649, "loss": 11.6806, "step": 37260 }, { "epoch": 0.7799757179938039, "grad_norm": 0.3540128171443939, "learning_rate": 0.00016849203761277814, "loss": 11.6784, "step": 37261 }, { "epoch": 0.779996650757766, "grad_norm": 0.31468138098716736, "learning_rate": 0.00016849044007756595, "loss": 11.6657, "step": 37262 }, { "epoch": 0.7800175835217282, "grad_norm": 0.39387282729148865, "learning_rate": 0.00016848884250942895, "loss": 11.654, "step": 37263 }, { "epoch": 0.7800385162856903, "grad_norm": 0.3121417164802551, "learning_rate": 0.00016848724490836804, "loss": 11.6729, "step": 37264 }, { "epoch": 0.7800594490496525, "grad_norm": 0.41679972410202026, "learning_rate": 0.00016848564727438395, "loss": 11.6771, "step": 37265 }, { "epoch": 0.7800803818136147, "grad_norm": 0.3210301995277405, "learning_rate": 0.0001684840496074774, "loss": 11.6718, "step": 37266 }, { "epoch": 0.7801013145775768, "grad_norm": 0.37655285000801086, "learning_rate": 0.00016848245190764923, "loss": 11.6827, "step": 37267 }, { "epoch": 0.780122247341539, "grad_norm": 0.3899962306022644, "learning_rate": 0.00016848085417490015, "loss": 11.6545, "step": 37268 }, { "epoch": 0.7801431801055011, "grad_norm": 0.331983357667923, "learning_rate": 0.00016847925640923098, "loss": 11.67, "step": 37269 }, { "epoch": 0.7801641128694633, "grad_norm": 0.3154776990413666, "learning_rate": 0.00016847765861064242, "loss": 11.6498, "step": 37270 }, { "epoch": 0.7801850456334254, "grad_norm": 0.33097973465919495, "learning_rate": 0.00016847606077913528, "loss": 11.6609, "step": 37271 }, { "epoch": 0.7802059783973876, "grad_norm": 0.36244943737983704, "learning_rate": 0.00016847446291471033, "loss": 11.6618, "step": 37272 }, { "epoch": 0.7802269111613498, "grad_norm": 0.35227081179618835, "learning_rate": 0.00016847286501736836, "loss": 11.665, "step": 37273 }, { "epoch": 0.7802478439253119, "grad_norm": 0.26981765031814575, "learning_rate": 0.00016847126708711009, "loss": 11.6599, "step": 37274 }, { "epoch": 0.7802687766892741, "grad_norm": 0.268704354763031, "learning_rate": 0.0001684696691239363, "loss": 11.6679, "step": 37275 }, { "epoch": 0.7802897094532362, "grad_norm": 0.3871007263660431, "learning_rate": 0.00016846807112784782, "loss": 11.6724, "step": 37276 }, { "epoch": 0.7803106422171984, "grad_norm": 0.3504740297794342, "learning_rate": 0.00016846647309884532, "loss": 11.6809, "step": 37277 }, { "epoch": 0.7803315749811605, "grad_norm": 0.2995426058769226, "learning_rate": 0.0001684648750369296, "loss": 11.6627, "step": 37278 }, { "epoch": 0.7803525077451227, "grad_norm": 0.31756076216697693, "learning_rate": 0.00016846327694210146, "loss": 11.6671, "step": 37279 }, { "epoch": 0.7803734405090849, "grad_norm": 0.26973235607147217, "learning_rate": 0.00016846167881436167, "loss": 11.674, "step": 37280 }, { "epoch": 0.780394373273047, "grad_norm": 0.3478764295578003, "learning_rate": 0.00016846008065371097, "loss": 11.665, "step": 37281 }, { "epoch": 0.7804153060370091, "grad_norm": 0.3144153654575348, "learning_rate": 0.00016845848246015014, "loss": 11.6461, "step": 37282 }, { "epoch": 0.7804362388009712, "grad_norm": 0.30356675386428833, "learning_rate": 0.00016845688423367992, "loss": 11.6837, "step": 37283 }, { "epoch": 0.7804571715649334, "grad_norm": 0.33236798644065857, "learning_rate": 0.00016845528597430112, "loss": 11.677, "step": 37284 }, { "epoch": 0.7804781043288956, "grad_norm": 0.2875380516052246, "learning_rate": 0.00016845368768201452, "loss": 11.6582, "step": 37285 }, { "epoch": 0.7804990370928577, "grad_norm": 0.2609240412712097, "learning_rate": 0.0001684520893568208, "loss": 11.6735, "step": 37286 }, { "epoch": 0.7805199698568199, "grad_norm": 0.2971446216106415, "learning_rate": 0.00016845049099872082, "loss": 11.6618, "step": 37287 }, { "epoch": 0.780540902620782, "grad_norm": 0.3488743305206299, "learning_rate": 0.00016844889260771533, "loss": 11.6581, "step": 37288 }, { "epoch": 0.7805618353847442, "grad_norm": 0.2791453003883362, "learning_rate": 0.00016844729418380506, "loss": 11.6529, "step": 37289 }, { "epoch": 0.7805827681487063, "grad_norm": 0.367321640253067, "learning_rate": 0.00016844569572699083, "loss": 11.6848, "step": 37290 }, { "epoch": 0.7806037009126685, "grad_norm": 0.32700854539871216, "learning_rate": 0.0001684440972372734, "loss": 11.6673, "step": 37291 }, { "epoch": 0.7806246336766307, "grad_norm": 0.3585801124572754, "learning_rate": 0.00016844249871465346, "loss": 11.6703, "step": 37292 }, { "epoch": 0.7806455664405928, "grad_norm": 0.35375362634658813, "learning_rate": 0.00016844090015913186, "loss": 11.6742, "step": 37293 }, { "epoch": 0.780666499204555, "grad_norm": 0.311982125043869, "learning_rate": 0.0001684393015707094, "loss": 11.6658, "step": 37294 }, { "epoch": 0.7806874319685171, "grad_norm": 0.3418479263782501, "learning_rate": 0.00016843770294938676, "loss": 11.6815, "step": 37295 }, { "epoch": 0.7807083647324793, "grad_norm": 0.32590726017951965, "learning_rate": 0.00016843610429516473, "loss": 11.6709, "step": 37296 }, { "epoch": 0.7807292974964414, "grad_norm": 0.263336181640625, "learning_rate": 0.0001684345056080441, "loss": 11.6683, "step": 37297 }, { "epoch": 0.7807502302604036, "grad_norm": 0.39827316999435425, "learning_rate": 0.00016843290688802563, "loss": 11.668, "step": 37298 }, { "epoch": 0.7807711630243658, "grad_norm": 0.3250720798969269, "learning_rate": 0.0001684313081351101, "loss": 11.656, "step": 37299 }, { "epoch": 0.7807920957883279, "grad_norm": 0.28548911213874817, "learning_rate": 0.00016842970934929826, "loss": 11.6656, "step": 37300 }, { "epoch": 0.7808130285522901, "grad_norm": 0.2829466164112091, "learning_rate": 0.0001684281105305909, "loss": 11.6572, "step": 37301 }, { "epoch": 0.7808339613162522, "grad_norm": 0.2990228831768036, "learning_rate": 0.00016842651167898876, "loss": 11.6758, "step": 37302 }, { "epoch": 0.7808548940802144, "grad_norm": 0.542716920375824, "learning_rate": 0.0001684249127944926, "loss": 11.6836, "step": 37303 }, { "epoch": 0.7808758268441766, "grad_norm": 0.2407987117767334, "learning_rate": 0.00016842331387710327, "loss": 11.6726, "step": 37304 }, { "epoch": 0.7808967596081386, "grad_norm": 0.3137069046497345, "learning_rate": 0.00016842171492682147, "loss": 11.6598, "step": 37305 }, { "epoch": 0.7809176923721008, "grad_norm": 0.3409239947795868, "learning_rate": 0.00016842011594364798, "loss": 11.6681, "step": 37306 }, { "epoch": 0.7809386251360629, "grad_norm": 0.2574414908885956, "learning_rate": 0.00016841851692758354, "loss": 11.6645, "step": 37307 }, { "epoch": 0.7809595579000251, "grad_norm": 0.3375842273235321, "learning_rate": 0.00016841691787862896, "loss": 11.6463, "step": 37308 }, { "epoch": 0.7809804906639872, "grad_norm": 0.29529041051864624, "learning_rate": 0.000168415318796785, "loss": 11.6735, "step": 37309 }, { "epoch": 0.7810014234279494, "grad_norm": 0.3781028091907501, "learning_rate": 0.0001684137196820524, "loss": 11.6946, "step": 37310 }, { "epoch": 0.7810223561919116, "grad_norm": 0.29666948318481445, "learning_rate": 0.00016841212053443202, "loss": 11.6802, "step": 37311 }, { "epoch": 0.7810432889558737, "grad_norm": 0.2766129672527313, "learning_rate": 0.00016841052135392452, "loss": 11.6749, "step": 37312 }, { "epoch": 0.7810642217198359, "grad_norm": 0.2649773359298706, "learning_rate": 0.00016840892214053073, "loss": 11.6584, "step": 37313 }, { "epoch": 0.781085154483798, "grad_norm": 0.38168588280677795, "learning_rate": 0.00016840732289425135, "loss": 11.6747, "step": 37314 }, { "epoch": 0.7811060872477602, "grad_norm": 0.41674932837486267, "learning_rate": 0.00016840572361508723, "loss": 11.6775, "step": 37315 }, { "epoch": 0.7811270200117223, "grad_norm": 0.3113171458244324, "learning_rate": 0.00016840412430303914, "loss": 11.6857, "step": 37316 }, { "epoch": 0.7811479527756845, "grad_norm": 0.25440046191215515, "learning_rate": 0.00016840252495810778, "loss": 11.6788, "step": 37317 }, { "epoch": 0.7811688855396467, "grad_norm": 0.316143661737442, "learning_rate": 0.00016840092558029397, "loss": 11.675, "step": 37318 }, { "epoch": 0.7811898183036088, "grad_norm": 0.30134445428848267, "learning_rate": 0.00016839932616959845, "loss": 11.6646, "step": 37319 }, { "epoch": 0.781210751067571, "grad_norm": 0.24358423054218292, "learning_rate": 0.00016839772672602198, "loss": 11.6619, "step": 37320 }, { "epoch": 0.7812316838315331, "grad_norm": 0.35896238684654236, "learning_rate": 0.00016839612724956542, "loss": 11.6589, "step": 37321 }, { "epoch": 0.7812526165954953, "grad_norm": 0.317430317401886, "learning_rate": 0.0001683945277402294, "loss": 11.6665, "step": 37322 }, { "epoch": 0.7812735493594575, "grad_norm": 0.30134516954421997, "learning_rate": 0.00016839292819801482, "loss": 11.6683, "step": 37323 }, { "epoch": 0.7812944821234196, "grad_norm": 0.3532428741455078, "learning_rate": 0.00016839132862292235, "loss": 11.666, "step": 37324 }, { "epoch": 0.7813154148873818, "grad_norm": 0.2673713266849518, "learning_rate": 0.00016838972901495283, "loss": 11.6744, "step": 37325 }, { "epoch": 0.7813363476513439, "grad_norm": 0.3054240047931671, "learning_rate": 0.00016838812937410697, "loss": 11.6732, "step": 37326 }, { "epoch": 0.781357280415306, "grad_norm": 0.28942158818244934, "learning_rate": 0.00016838652970038557, "loss": 11.6683, "step": 37327 }, { "epoch": 0.7813782131792681, "grad_norm": 0.3102048635482788, "learning_rate": 0.0001683849299937894, "loss": 11.6765, "step": 37328 }, { "epoch": 0.7813991459432303, "grad_norm": 0.28328296542167664, "learning_rate": 0.00016838333025431923, "loss": 11.663, "step": 37329 }, { "epoch": 0.7814200787071925, "grad_norm": 0.3043806850910187, "learning_rate": 0.0001683817304819758, "loss": 11.6651, "step": 37330 }, { "epoch": 0.7814410114711546, "grad_norm": 0.2662295699119568, "learning_rate": 0.00016838013067675992, "loss": 11.6649, "step": 37331 }, { "epoch": 0.7814619442351168, "grad_norm": 0.33201315999031067, "learning_rate": 0.00016837853083867231, "loss": 11.6756, "step": 37332 }, { "epoch": 0.7814828769990789, "grad_norm": 0.2768717408180237, "learning_rate": 0.00016837693096771384, "loss": 11.6697, "step": 37333 }, { "epoch": 0.7815038097630411, "grad_norm": 0.3127764165401459, "learning_rate": 0.00016837533106388514, "loss": 11.6449, "step": 37334 }, { "epoch": 0.7815247425270032, "grad_norm": 0.3083149194717407, "learning_rate": 0.0001683737311271871, "loss": 11.6827, "step": 37335 }, { "epoch": 0.7815456752909654, "grad_norm": 0.28006115555763245, "learning_rate": 0.00016837213115762038, "loss": 11.6632, "step": 37336 }, { "epoch": 0.7815666080549276, "grad_norm": 0.3429843783378601, "learning_rate": 0.00016837053115518585, "loss": 11.6746, "step": 37337 }, { "epoch": 0.7815875408188897, "grad_norm": 0.29099979996681213, "learning_rate": 0.00016836893111988422, "loss": 11.6787, "step": 37338 }, { "epoch": 0.7816084735828519, "grad_norm": 0.4471949636936188, "learning_rate": 0.00016836733105171628, "loss": 11.6744, "step": 37339 }, { "epoch": 0.781629406346814, "grad_norm": 0.34003177285194397, "learning_rate": 0.00016836573095068279, "loss": 11.6613, "step": 37340 }, { "epoch": 0.7816503391107762, "grad_norm": 0.41052424907684326, "learning_rate": 0.00016836413081678452, "loss": 11.6819, "step": 37341 }, { "epoch": 0.7816712718747384, "grad_norm": 0.4071502685546875, "learning_rate": 0.00016836253065002224, "loss": 11.6699, "step": 37342 }, { "epoch": 0.7816922046387005, "grad_norm": 0.3585416078567505, "learning_rate": 0.00016836093045039674, "loss": 11.6804, "step": 37343 }, { "epoch": 0.7817131374026627, "grad_norm": 0.23993973433971405, "learning_rate": 0.00016835933021790874, "loss": 11.669, "step": 37344 }, { "epoch": 0.7817340701666248, "grad_norm": 0.36750829219818115, "learning_rate": 0.00016835772995255906, "loss": 11.6679, "step": 37345 }, { "epoch": 0.781755002930587, "grad_norm": 0.3634064495563507, "learning_rate": 0.00016835612965434847, "loss": 11.6893, "step": 37346 }, { "epoch": 0.7817759356945491, "grad_norm": 0.2663864195346832, "learning_rate": 0.00016835452932327768, "loss": 11.6769, "step": 37347 }, { "epoch": 0.7817968684585113, "grad_norm": 0.32095539569854736, "learning_rate": 0.00016835292895934752, "loss": 11.6927, "step": 37348 }, { "epoch": 0.7818178012224735, "grad_norm": 0.2782139778137207, "learning_rate": 0.00016835132856255874, "loss": 11.6883, "step": 37349 }, { "epoch": 0.7818387339864356, "grad_norm": 0.3076971769332886, "learning_rate": 0.0001683497281329121, "loss": 11.68, "step": 37350 }, { "epoch": 0.7818596667503978, "grad_norm": 0.22875919938087463, "learning_rate": 0.00016834812767040837, "loss": 11.6692, "step": 37351 }, { "epoch": 0.7818805995143598, "grad_norm": 0.2615126669406891, "learning_rate": 0.00016834652717504834, "loss": 11.6594, "step": 37352 }, { "epoch": 0.781901532278322, "grad_norm": 0.34790557622909546, "learning_rate": 0.00016834492664683274, "loss": 11.6728, "step": 37353 }, { "epoch": 0.7819224650422841, "grad_norm": 0.28703275322914124, "learning_rate": 0.00016834332608576237, "loss": 11.6902, "step": 37354 }, { "epoch": 0.7819433978062463, "grad_norm": 0.25106075406074524, "learning_rate": 0.000168341725491838, "loss": 11.6805, "step": 37355 }, { "epoch": 0.7819643305702085, "grad_norm": 0.29202866554260254, "learning_rate": 0.0001683401248650604, "loss": 11.6664, "step": 37356 }, { "epoch": 0.7819852633341706, "grad_norm": 0.2645069658756256, "learning_rate": 0.00016833852420543034, "loss": 11.6707, "step": 37357 }, { "epoch": 0.7820061960981328, "grad_norm": 0.27496686577796936, "learning_rate": 0.00016833692351294856, "loss": 11.6651, "step": 37358 }, { "epoch": 0.7820271288620949, "grad_norm": 0.2654194235801697, "learning_rate": 0.00016833532278761584, "loss": 11.664, "step": 37359 }, { "epoch": 0.7820480616260571, "grad_norm": 0.3487429916858673, "learning_rate": 0.00016833372202943302, "loss": 11.6707, "step": 37360 }, { "epoch": 0.7820689943900193, "grad_norm": 0.41426166892051697, "learning_rate": 0.00016833212123840076, "loss": 11.6801, "step": 37361 }, { "epoch": 0.7820899271539814, "grad_norm": 0.3417889475822449, "learning_rate": 0.0001683305204145199, "loss": 11.6659, "step": 37362 }, { "epoch": 0.7821108599179436, "grad_norm": 0.3698365390300751, "learning_rate": 0.00016832891955779116, "loss": 11.6773, "step": 37363 }, { "epoch": 0.7821317926819057, "grad_norm": 0.3111494481563568, "learning_rate": 0.00016832731866821537, "loss": 11.6821, "step": 37364 }, { "epoch": 0.7821527254458679, "grad_norm": 0.345787912607193, "learning_rate": 0.00016832571774579326, "loss": 11.695, "step": 37365 }, { "epoch": 0.78217365820983, "grad_norm": 0.3577972650527954, "learning_rate": 0.0001683241167905256, "loss": 11.6463, "step": 37366 }, { "epoch": 0.7821945909737922, "grad_norm": 0.2971300482749939, "learning_rate": 0.00016832251580241317, "loss": 11.6733, "step": 37367 }, { "epoch": 0.7822155237377544, "grad_norm": 0.2977573871612549, "learning_rate": 0.00016832091478145676, "loss": 11.6696, "step": 37368 }, { "epoch": 0.7822364565017165, "grad_norm": 0.31311389803886414, "learning_rate": 0.0001683193137276571, "loss": 11.6795, "step": 37369 }, { "epoch": 0.7822573892656787, "grad_norm": 0.4063776731491089, "learning_rate": 0.00016831771264101496, "loss": 11.6858, "step": 37370 }, { "epoch": 0.7822783220296408, "grad_norm": 0.2970290780067444, "learning_rate": 0.00016831611152153114, "loss": 11.6658, "step": 37371 }, { "epoch": 0.782299254793603, "grad_norm": 0.2604427635669708, "learning_rate": 0.0001683145103692064, "loss": 11.6632, "step": 37372 }, { "epoch": 0.782320187557565, "grad_norm": 0.2988545894622803, "learning_rate": 0.0001683129091840415, "loss": 11.6584, "step": 37373 }, { "epoch": 0.7823411203215273, "grad_norm": 0.40579208731651306, "learning_rate": 0.00016831130796603726, "loss": 11.671, "step": 37374 }, { "epoch": 0.7823620530854895, "grad_norm": 0.3417021632194519, "learning_rate": 0.00016830970671519432, "loss": 11.6731, "step": 37375 }, { "epoch": 0.7823829858494515, "grad_norm": 0.30805981159210205, "learning_rate": 0.0001683081054315136, "loss": 11.6651, "step": 37376 }, { "epoch": 0.7824039186134137, "grad_norm": 0.3318510353565216, "learning_rate": 0.00016830650411499577, "loss": 11.6698, "step": 37377 }, { "epoch": 0.7824248513773758, "grad_norm": 0.33088254928588867, "learning_rate": 0.00016830490276564165, "loss": 11.6585, "step": 37378 }, { "epoch": 0.782445784141338, "grad_norm": 0.29250797629356384, "learning_rate": 0.00016830330138345204, "loss": 11.6575, "step": 37379 }, { "epoch": 0.7824667169053002, "grad_norm": 0.34617722034454346, "learning_rate": 0.00016830169996842758, "loss": 11.6831, "step": 37380 }, { "epoch": 0.7824876496692623, "grad_norm": 0.24582375586032867, "learning_rate": 0.00016830009852056917, "loss": 11.6678, "step": 37381 }, { "epoch": 0.7825085824332245, "grad_norm": 0.36311182379722595, "learning_rate": 0.00016829849703987754, "loss": 11.6662, "step": 37382 }, { "epoch": 0.7825295151971866, "grad_norm": 0.3079044222831726, "learning_rate": 0.00016829689552635345, "loss": 11.6728, "step": 37383 }, { "epoch": 0.7825504479611488, "grad_norm": 0.3122498691082001, "learning_rate": 0.00016829529397999763, "loss": 11.6675, "step": 37384 }, { "epoch": 0.7825713807251109, "grad_norm": 0.34739187359809875, "learning_rate": 0.00016829369240081094, "loss": 11.6884, "step": 37385 }, { "epoch": 0.7825923134890731, "grad_norm": 0.3469182848930359, "learning_rate": 0.00016829209078879408, "loss": 11.6762, "step": 37386 }, { "epoch": 0.7826132462530353, "grad_norm": 0.33501309156417847, "learning_rate": 0.00016829048914394784, "loss": 11.682, "step": 37387 }, { "epoch": 0.7826341790169974, "grad_norm": 0.34564903378486633, "learning_rate": 0.00016828888746627305, "loss": 11.6708, "step": 37388 }, { "epoch": 0.7826551117809596, "grad_norm": 0.3722706437110901, "learning_rate": 0.00016828728575577037, "loss": 11.6681, "step": 37389 }, { "epoch": 0.7826760445449217, "grad_norm": 0.24784448742866516, "learning_rate": 0.00016828568401244063, "loss": 11.6667, "step": 37390 }, { "epoch": 0.7826969773088839, "grad_norm": 0.37352174520492554, "learning_rate": 0.0001682840822362846, "loss": 11.6698, "step": 37391 }, { "epoch": 0.782717910072846, "grad_norm": 0.34251460433006287, "learning_rate": 0.00016828248042730304, "loss": 11.6612, "step": 37392 }, { "epoch": 0.7827388428368082, "grad_norm": 0.3214183449745178, "learning_rate": 0.0001682808785854967, "loss": 11.6654, "step": 37393 }, { "epoch": 0.7827597756007704, "grad_norm": 0.28809478878974915, "learning_rate": 0.00016827927671086643, "loss": 11.6694, "step": 37394 }, { "epoch": 0.7827807083647325, "grad_norm": 0.2688910961151123, "learning_rate": 0.0001682776748034129, "loss": 11.6692, "step": 37395 }, { "epoch": 0.7828016411286947, "grad_norm": 0.2604871988296509, "learning_rate": 0.00016827607286313694, "loss": 11.6756, "step": 37396 }, { "epoch": 0.7828225738926567, "grad_norm": 0.3334074914455414, "learning_rate": 0.00016827447089003928, "loss": 11.6758, "step": 37397 }, { "epoch": 0.782843506656619, "grad_norm": 0.2756977379322052, "learning_rate": 0.00016827286888412073, "loss": 11.6734, "step": 37398 }, { "epoch": 0.7828644394205811, "grad_norm": 0.2671971619129181, "learning_rate": 0.00016827126684538206, "loss": 11.6738, "step": 37399 }, { "epoch": 0.7828853721845432, "grad_norm": 0.2798328101634979, "learning_rate": 0.00016826966477382399, "loss": 11.6834, "step": 37400 }, { "epoch": 0.7829063049485054, "grad_norm": 0.3741086721420288, "learning_rate": 0.00016826806266944736, "loss": 11.678, "step": 37401 }, { "epoch": 0.7829272377124675, "grad_norm": 0.3519262969493866, "learning_rate": 0.0001682664605322529, "loss": 11.6541, "step": 37402 }, { "epoch": 0.7829481704764297, "grad_norm": 0.38542985916137695, "learning_rate": 0.00016826485836224134, "loss": 11.6671, "step": 37403 }, { "epoch": 0.7829691032403918, "grad_norm": 0.26940104365348816, "learning_rate": 0.00016826325615941354, "loss": 11.6707, "step": 37404 }, { "epoch": 0.782990036004354, "grad_norm": 0.3105551600456238, "learning_rate": 0.0001682616539237702, "loss": 11.6683, "step": 37405 }, { "epoch": 0.7830109687683162, "grad_norm": 0.29696226119995117, "learning_rate": 0.00016826005165531213, "loss": 11.6681, "step": 37406 }, { "epoch": 0.7830319015322783, "grad_norm": 0.2647700011730194, "learning_rate": 0.0001682584493540401, "loss": 11.6603, "step": 37407 }, { "epoch": 0.7830528342962405, "grad_norm": 0.2589566111564636, "learning_rate": 0.0001682568470199548, "loss": 11.6762, "step": 37408 }, { "epoch": 0.7830737670602026, "grad_norm": 0.32911306619644165, "learning_rate": 0.00016825524465305717, "loss": 11.6676, "step": 37409 }, { "epoch": 0.7830946998241648, "grad_norm": 0.3445326089859009, "learning_rate": 0.0001682536422533478, "loss": 11.6494, "step": 37410 }, { "epoch": 0.7831156325881269, "grad_norm": 0.3219868242740631, "learning_rate": 0.00016825203982082754, "loss": 11.6582, "step": 37411 }, { "epoch": 0.7831365653520891, "grad_norm": 0.3102589547634125, "learning_rate": 0.00016825043735549717, "loss": 11.6624, "step": 37412 }, { "epoch": 0.7831574981160513, "grad_norm": 0.34612053632736206, "learning_rate": 0.00016824883485735745, "loss": 11.6763, "step": 37413 }, { "epoch": 0.7831784308800134, "grad_norm": 0.2899186909198761, "learning_rate": 0.00016824723232640915, "loss": 11.6742, "step": 37414 }, { "epoch": 0.7831993636439756, "grad_norm": 0.3350101411342621, "learning_rate": 0.00016824562976265302, "loss": 11.6794, "step": 37415 }, { "epoch": 0.7832202964079377, "grad_norm": 0.3342377543449402, "learning_rate": 0.0001682440271660899, "loss": 11.6629, "step": 37416 }, { "epoch": 0.7832412291718999, "grad_norm": 0.3221418559551239, "learning_rate": 0.00016824242453672045, "loss": 11.664, "step": 37417 }, { "epoch": 0.783262161935862, "grad_norm": 0.3188098073005676, "learning_rate": 0.0001682408218745455, "loss": 11.6607, "step": 37418 }, { "epoch": 0.7832830946998242, "grad_norm": 0.42698055505752563, "learning_rate": 0.00016823921917956587, "loss": 11.677, "step": 37419 }, { "epoch": 0.7833040274637864, "grad_norm": 0.3171127736568451, "learning_rate": 0.0001682376164517822, "loss": 11.6938, "step": 37420 }, { "epoch": 0.7833249602277484, "grad_norm": 0.2916933596134186, "learning_rate": 0.00016823601369119541, "loss": 11.681, "step": 37421 }, { "epoch": 0.7833458929917106, "grad_norm": 0.3111289441585541, "learning_rate": 0.0001682344108978062, "loss": 11.6643, "step": 37422 }, { "epoch": 0.7833668257556727, "grad_norm": 0.3631458580493927, "learning_rate": 0.0001682328080716153, "loss": 11.6907, "step": 37423 }, { "epoch": 0.7833877585196349, "grad_norm": 0.4661068618297577, "learning_rate": 0.00016823120521262356, "loss": 11.6919, "step": 37424 }, { "epoch": 0.7834086912835971, "grad_norm": 0.3750484585762024, "learning_rate": 0.00016822960232083166, "loss": 11.6807, "step": 37425 }, { "epoch": 0.7834296240475592, "grad_norm": 0.30946269631385803, "learning_rate": 0.00016822799939624047, "loss": 11.6794, "step": 37426 }, { "epoch": 0.7834505568115214, "grad_norm": 0.2793349027633667, "learning_rate": 0.00016822639643885069, "loss": 11.6647, "step": 37427 }, { "epoch": 0.7834714895754835, "grad_norm": 0.30588680505752563, "learning_rate": 0.00016822479344866316, "loss": 11.6604, "step": 37428 }, { "epoch": 0.7834924223394457, "grad_norm": 0.2780963182449341, "learning_rate": 0.00016822319042567854, "loss": 11.6524, "step": 37429 }, { "epoch": 0.7835133551034078, "grad_norm": 0.28614455461502075, "learning_rate": 0.0001682215873698977, "loss": 11.6515, "step": 37430 }, { "epoch": 0.78353428786737, "grad_norm": 0.26557987928390503, "learning_rate": 0.00016821998428132137, "loss": 11.6683, "step": 37431 }, { "epoch": 0.7835552206313322, "grad_norm": 0.27904412150382996, "learning_rate": 0.00016821838115995032, "loss": 11.6764, "step": 37432 }, { "epoch": 0.7835761533952943, "grad_norm": 0.28531619906425476, "learning_rate": 0.00016821677800578534, "loss": 11.6777, "step": 37433 }, { "epoch": 0.7835970861592565, "grad_norm": 0.268415629863739, "learning_rate": 0.00016821517481882718, "loss": 11.6647, "step": 37434 }, { "epoch": 0.7836180189232186, "grad_norm": 0.2963639199733734, "learning_rate": 0.00016821357159907662, "loss": 11.6821, "step": 37435 }, { "epoch": 0.7836389516871808, "grad_norm": 0.28785377740859985, "learning_rate": 0.00016821196834653442, "loss": 11.6613, "step": 37436 }, { "epoch": 0.7836598844511429, "grad_norm": 0.29168900847435, "learning_rate": 0.0001682103650612014, "loss": 11.688, "step": 37437 }, { "epoch": 0.7836808172151051, "grad_norm": 0.34016895294189453, "learning_rate": 0.00016820876174307821, "loss": 11.6821, "step": 37438 }, { "epoch": 0.7837017499790673, "grad_norm": 0.27199316024780273, "learning_rate": 0.00016820715839216578, "loss": 11.6839, "step": 37439 }, { "epoch": 0.7837226827430294, "grad_norm": 0.36574679613113403, "learning_rate": 0.00016820555500846477, "loss": 11.6855, "step": 37440 }, { "epoch": 0.7837436155069916, "grad_norm": 0.2894309461116791, "learning_rate": 0.000168203951591976, "loss": 11.6676, "step": 37441 }, { "epoch": 0.7837645482709537, "grad_norm": 0.5141122341156006, "learning_rate": 0.0001682023481427002, "loss": 11.6814, "step": 37442 }, { "epoch": 0.7837854810349159, "grad_norm": 0.2780854105949402, "learning_rate": 0.0001682007446606382, "loss": 11.6704, "step": 37443 }, { "epoch": 0.783806413798878, "grad_norm": 0.25787779688835144, "learning_rate": 0.00016819914114579067, "loss": 11.6669, "step": 37444 }, { "epoch": 0.7838273465628401, "grad_norm": 0.3496687114238739, "learning_rate": 0.0001681975375981585, "loss": 11.6532, "step": 37445 }, { "epoch": 0.7838482793268023, "grad_norm": 0.35229918360710144, "learning_rate": 0.00016819593401774235, "loss": 11.6596, "step": 37446 }, { "epoch": 0.7838692120907644, "grad_norm": 0.27412840723991394, "learning_rate": 0.00016819433040454312, "loss": 11.6671, "step": 37447 }, { "epoch": 0.7838901448547266, "grad_norm": 0.2835572361946106, "learning_rate": 0.00016819272675856149, "loss": 11.6763, "step": 37448 }, { "epoch": 0.7839110776186887, "grad_norm": 0.25043851137161255, "learning_rate": 0.00016819112307979821, "loss": 11.6736, "step": 37449 }, { "epoch": 0.7839320103826509, "grad_norm": 0.27326884865760803, "learning_rate": 0.00016818951936825415, "loss": 11.6607, "step": 37450 }, { "epoch": 0.7839529431466131, "grad_norm": 0.4001614451408386, "learning_rate": 0.00016818791562392998, "loss": 11.6493, "step": 37451 }, { "epoch": 0.7839738759105752, "grad_norm": 0.35164743661880493, "learning_rate": 0.00016818631184682648, "loss": 11.6892, "step": 37452 }, { "epoch": 0.7839948086745374, "grad_norm": 0.32487016916275024, "learning_rate": 0.00016818470803694452, "loss": 11.6638, "step": 37453 }, { "epoch": 0.7840157414384995, "grad_norm": 0.4183599650859833, "learning_rate": 0.0001681831041942848, "loss": 11.6696, "step": 37454 }, { "epoch": 0.7840366742024617, "grad_norm": 0.3359285295009613, "learning_rate": 0.00016818150031884806, "loss": 11.6742, "step": 37455 }, { "epoch": 0.7840576069664238, "grad_norm": 0.2406679391860962, "learning_rate": 0.0001681798964106351, "loss": 11.6612, "step": 37456 }, { "epoch": 0.784078539730386, "grad_norm": 0.3177434206008911, "learning_rate": 0.00016817829246964676, "loss": 11.6707, "step": 37457 }, { "epoch": 0.7840994724943482, "grad_norm": 0.2707973122596741, "learning_rate": 0.0001681766884958837, "loss": 11.67, "step": 37458 }, { "epoch": 0.7841204052583103, "grad_norm": 0.3252662718296051, "learning_rate": 0.00016817508448934673, "loss": 11.6998, "step": 37459 }, { "epoch": 0.7841413380222725, "grad_norm": 0.3585386574268341, "learning_rate": 0.00016817348045003664, "loss": 11.6751, "step": 37460 }, { "epoch": 0.7841622707862346, "grad_norm": 0.3058958649635315, "learning_rate": 0.00016817187637795423, "loss": 11.671, "step": 37461 }, { "epoch": 0.7841832035501968, "grad_norm": 0.2553209960460663, "learning_rate": 0.00016817027227310018, "loss": 11.669, "step": 37462 }, { "epoch": 0.784204136314159, "grad_norm": 0.2578391134738922, "learning_rate": 0.00016816866813547536, "loss": 11.6622, "step": 37463 }, { "epoch": 0.7842250690781211, "grad_norm": 0.34961816668510437, "learning_rate": 0.00016816706396508043, "loss": 11.6625, "step": 37464 }, { "epoch": 0.7842460018420833, "grad_norm": 0.2909836173057556, "learning_rate": 0.00016816545976191632, "loss": 11.6679, "step": 37465 }, { "epoch": 0.7842669346060454, "grad_norm": 0.2652314007282257, "learning_rate": 0.00016816385552598364, "loss": 11.6749, "step": 37466 }, { "epoch": 0.7842878673700076, "grad_norm": 0.4073227345943451, "learning_rate": 0.00016816225125728326, "loss": 11.6638, "step": 37467 }, { "epoch": 0.7843088001339696, "grad_norm": 0.23166237771511078, "learning_rate": 0.0001681606469558159, "loss": 11.671, "step": 37468 }, { "epoch": 0.7843297328979318, "grad_norm": 0.32194995880126953, "learning_rate": 0.00016815904262158232, "loss": 11.68, "step": 37469 }, { "epoch": 0.784350665661894, "grad_norm": 0.2732475996017456, "learning_rate": 0.00016815743825458336, "loss": 11.674, "step": 37470 }, { "epoch": 0.7843715984258561, "grad_norm": 0.25465038418769836, "learning_rate": 0.00016815583385481974, "loss": 11.6653, "step": 37471 }, { "epoch": 0.7843925311898183, "grad_norm": 0.26317930221557617, "learning_rate": 0.00016815422942229228, "loss": 11.6511, "step": 37472 }, { "epoch": 0.7844134639537804, "grad_norm": 0.34159520268440247, "learning_rate": 0.00016815262495700168, "loss": 11.6597, "step": 37473 }, { "epoch": 0.7844343967177426, "grad_norm": 0.3137870132923126, "learning_rate": 0.00016815102045894875, "loss": 11.6972, "step": 37474 }, { "epoch": 0.7844553294817047, "grad_norm": 0.2982266843318939, "learning_rate": 0.00016814941592813428, "loss": 11.6775, "step": 37475 }, { "epoch": 0.7844762622456669, "grad_norm": 0.34481656551361084, "learning_rate": 0.000168147811364559, "loss": 11.6736, "step": 37476 }, { "epoch": 0.7844971950096291, "grad_norm": 0.33889055252075195, "learning_rate": 0.00016814620676822373, "loss": 11.6756, "step": 37477 }, { "epoch": 0.7845181277735912, "grad_norm": 0.2810271084308624, "learning_rate": 0.00016814460213912917, "loss": 11.6729, "step": 37478 }, { "epoch": 0.7845390605375534, "grad_norm": 0.3932252824306488, "learning_rate": 0.00016814299747727618, "loss": 11.6899, "step": 37479 }, { "epoch": 0.7845599933015155, "grad_norm": 0.30362603068351746, "learning_rate": 0.00016814139278266546, "loss": 11.6521, "step": 37480 }, { "epoch": 0.7845809260654777, "grad_norm": 0.2950442135334015, "learning_rate": 0.0001681397880552978, "loss": 11.6592, "step": 37481 }, { "epoch": 0.7846018588294399, "grad_norm": 0.31930264830589294, "learning_rate": 0.00016813818329517398, "loss": 11.6741, "step": 37482 }, { "epoch": 0.784622791593402, "grad_norm": 0.29761406779289246, "learning_rate": 0.00016813657850229475, "loss": 11.6568, "step": 37483 }, { "epoch": 0.7846437243573642, "grad_norm": 0.2812845706939697, "learning_rate": 0.00016813497367666096, "loss": 11.6628, "step": 37484 }, { "epoch": 0.7846646571213263, "grad_norm": 0.362068772315979, "learning_rate": 0.00016813336881827328, "loss": 11.6778, "step": 37485 }, { "epoch": 0.7846855898852885, "grad_norm": 0.2882671356201172, "learning_rate": 0.00016813176392713254, "loss": 11.6838, "step": 37486 }, { "epoch": 0.7847065226492506, "grad_norm": 0.32805728912353516, "learning_rate": 0.0001681301590032395, "loss": 11.6661, "step": 37487 }, { "epoch": 0.7847274554132128, "grad_norm": 0.29391539096832275, "learning_rate": 0.0001681285540465949, "loss": 11.6662, "step": 37488 }, { "epoch": 0.784748388177175, "grad_norm": 0.31860873103141785, "learning_rate": 0.00016812694905719958, "loss": 11.6679, "step": 37489 }, { "epoch": 0.784769320941137, "grad_norm": 0.28723153471946716, "learning_rate": 0.00016812534403505422, "loss": 11.6627, "step": 37490 }, { "epoch": 0.7847902537050992, "grad_norm": 0.2545418441295624, "learning_rate": 0.00016812373898015967, "loss": 11.6531, "step": 37491 }, { "epoch": 0.7848111864690613, "grad_norm": 0.3372846841812134, "learning_rate": 0.0001681221338925167, "loss": 11.679, "step": 37492 }, { "epoch": 0.7848321192330235, "grad_norm": 0.3243946433067322, "learning_rate": 0.000168120528772126, "loss": 11.6674, "step": 37493 }, { "epoch": 0.7848530519969856, "grad_norm": 0.3662620484828949, "learning_rate": 0.00016811892361898845, "loss": 11.6673, "step": 37494 }, { "epoch": 0.7848739847609478, "grad_norm": 0.33598828315734863, "learning_rate": 0.00016811731843310472, "loss": 11.6741, "step": 37495 }, { "epoch": 0.78489491752491, "grad_norm": 0.3014257550239563, "learning_rate": 0.00016811571321447566, "loss": 11.6762, "step": 37496 }, { "epoch": 0.7849158502888721, "grad_norm": 0.28215470910072327, "learning_rate": 0.000168114107963102, "loss": 11.6648, "step": 37497 }, { "epoch": 0.7849367830528343, "grad_norm": 0.35098522901535034, "learning_rate": 0.00016811250267898453, "loss": 11.6689, "step": 37498 }, { "epoch": 0.7849577158167964, "grad_norm": 0.32120004296302795, "learning_rate": 0.00016811089736212402, "loss": 11.6587, "step": 37499 }, { "epoch": 0.7849786485807586, "grad_norm": 0.28154170513153076, "learning_rate": 0.0001681092920125212, "loss": 11.6794, "step": 37500 }, { "epoch": 0.7849995813447208, "grad_norm": 0.38915300369262695, "learning_rate": 0.00016810768663017692, "loss": 11.6572, "step": 37501 }, { "epoch": 0.7850205141086829, "grad_norm": 0.2924419939517975, "learning_rate": 0.0001681060812150919, "loss": 11.669, "step": 37502 }, { "epoch": 0.7850414468726451, "grad_norm": 0.23998397588729858, "learning_rate": 0.00016810447576726692, "loss": 11.6758, "step": 37503 }, { "epoch": 0.7850623796366072, "grad_norm": 0.3036191463470459, "learning_rate": 0.00016810287028670275, "loss": 11.66, "step": 37504 }, { "epoch": 0.7850833124005694, "grad_norm": 0.36358579993247986, "learning_rate": 0.00016810126477340015, "loss": 11.6713, "step": 37505 }, { "epoch": 0.7851042451645315, "grad_norm": 0.30531173944473267, "learning_rate": 0.00016809965922735996, "loss": 11.6706, "step": 37506 }, { "epoch": 0.7851251779284937, "grad_norm": 0.36076828837394714, "learning_rate": 0.00016809805364858285, "loss": 11.6653, "step": 37507 }, { "epoch": 0.7851461106924559, "grad_norm": 0.33142417669296265, "learning_rate": 0.00016809644803706964, "loss": 11.6664, "step": 37508 }, { "epoch": 0.785167043456418, "grad_norm": 0.3257535994052887, "learning_rate": 0.00016809484239282114, "loss": 11.6544, "step": 37509 }, { "epoch": 0.7851879762203802, "grad_norm": 0.40097111463546753, "learning_rate": 0.00016809323671583806, "loss": 11.6948, "step": 37510 }, { "epoch": 0.7852089089843423, "grad_norm": 0.2985386252403259, "learning_rate": 0.0001680916310061212, "loss": 11.6551, "step": 37511 }, { "epoch": 0.7852298417483045, "grad_norm": 0.2906137704849243, "learning_rate": 0.00016809002526367133, "loss": 11.6793, "step": 37512 }, { "epoch": 0.7852507745122665, "grad_norm": 0.24849194288253784, "learning_rate": 0.0001680884194884892, "loss": 11.6764, "step": 37513 }, { "epoch": 0.7852717072762287, "grad_norm": 0.301528662443161, "learning_rate": 0.00016808681368057565, "loss": 11.6723, "step": 37514 }, { "epoch": 0.785292640040191, "grad_norm": 0.28147411346435547, "learning_rate": 0.00016808520783993136, "loss": 11.6651, "step": 37515 }, { "epoch": 0.785313572804153, "grad_norm": 0.40052828192710876, "learning_rate": 0.00016808360196655716, "loss": 11.6729, "step": 37516 }, { "epoch": 0.7853345055681152, "grad_norm": 0.26668494939804077, "learning_rate": 0.0001680819960604538, "loss": 11.6716, "step": 37517 }, { "epoch": 0.7853554383320773, "grad_norm": 0.26665374636650085, "learning_rate": 0.00016808039012162208, "loss": 11.6715, "step": 37518 }, { "epoch": 0.7853763710960395, "grad_norm": 0.3231586515903473, "learning_rate": 0.00016807878415006273, "loss": 11.6856, "step": 37519 }, { "epoch": 0.7853973038600017, "grad_norm": 0.27797913551330566, "learning_rate": 0.00016807717814577657, "loss": 11.6597, "step": 37520 }, { "epoch": 0.7854182366239638, "grad_norm": 0.33171749114990234, "learning_rate": 0.0001680755721087643, "loss": 11.6702, "step": 37521 }, { "epoch": 0.785439169387926, "grad_norm": 0.7042468190193176, "learning_rate": 0.0001680739660390268, "loss": 11.6893, "step": 37522 }, { "epoch": 0.7854601021518881, "grad_norm": 0.29066741466522217, "learning_rate": 0.00016807235993656475, "loss": 11.676, "step": 37523 }, { "epoch": 0.7854810349158503, "grad_norm": 0.362739622592926, "learning_rate": 0.00016807075380137896, "loss": 11.6702, "step": 37524 }, { "epoch": 0.7855019676798124, "grad_norm": 0.34330862760543823, "learning_rate": 0.00016806914763347017, "loss": 11.6669, "step": 37525 }, { "epoch": 0.7855229004437746, "grad_norm": 0.32347404956817627, "learning_rate": 0.0001680675414328392, "loss": 11.672, "step": 37526 }, { "epoch": 0.7855438332077368, "grad_norm": 0.40630602836608887, "learning_rate": 0.0001680659351994868, "loss": 11.6809, "step": 37527 }, { "epoch": 0.7855647659716989, "grad_norm": 0.290374755859375, "learning_rate": 0.0001680643289334137, "loss": 11.6718, "step": 37528 }, { "epoch": 0.7855856987356611, "grad_norm": 0.30378326773643494, "learning_rate": 0.00016806272263462075, "loss": 11.6686, "step": 37529 }, { "epoch": 0.7856066314996232, "grad_norm": 0.3741638660430908, "learning_rate": 0.00016806111630310867, "loss": 11.6721, "step": 37530 }, { "epoch": 0.7856275642635854, "grad_norm": 0.2806086242198944, "learning_rate": 0.00016805950993887829, "loss": 11.6692, "step": 37531 }, { "epoch": 0.7856484970275475, "grad_norm": 0.3062361776828766, "learning_rate": 0.0001680579035419303, "loss": 11.6602, "step": 37532 }, { "epoch": 0.7856694297915097, "grad_norm": 0.2447020411491394, "learning_rate": 0.00016805629711226551, "loss": 11.6679, "step": 37533 }, { "epoch": 0.7856903625554719, "grad_norm": 0.3023715019226074, "learning_rate": 0.00016805469064988472, "loss": 11.6459, "step": 37534 }, { "epoch": 0.785711295319434, "grad_norm": 0.3765155076980591, "learning_rate": 0.00016805308415478865, "loss": 11.6647, "step": 37535 }, { "epoch": 0.7857322280833962, "grad_norm": 0.33502402901649475, "learning_rate": 0.0001680514776269781, "loss": 11.666, "step": 37536 }, { "epoch": 0.7857531608473582, "grad_norm": 0.30323171615600586, "learning_rate": 0.00016804987106645383, "loss": 11.6708, "step": 37537 }, { "epoch": 0.7857740936113204, "grad_norm": 0.2719168961048126, "learning_rate": 0.0001680482644732167, "loss": 11.677, "step": 37538 }, { "epoch": 0.7857950263752826, "grad_norm": 0.32311931252479553, "learning_rate": 0.0001680466578472673, "loss": 11.6649, "step": 37539 }, { "epoch": 0.7858159591392447, "grad_norm": 0.25791582465171814, "learning_rate": 0.00016804505118860657, "loss": 11.6785, "step": 37540 }, { "epoch": 0.7858368919032069, "grad_norm": 0.351087749004364, "learning_rate": 0.00016804344449723523, "loss": 11.6801, "step": 37541 }, { "epoch": 0.785857824667169, "grad_norm": 0.23104137182235718, "learning_rate": 0.00016804183777315398, "loss": 11.67, "step": 37542 }, { "epoch": 0.7858787574311312, "grad_norm": 0.3178010880947113, "learning_rate": 0.00016804023101636372, "loss": 11.6734, "step": 37543 }, { "epoch": 0.7858996901950933, "grad_norm": 0.3413853347301483, "learning_rate": 0.00016803862422686515, "loss": 11.6816, "step": 37544 }, { "epoch": 0.7859206229590555, "grad_norm": 0.3991013467311859, "learning_rate": 0.000168037017404659, "loss": 11.6793, "step": 37545 }, { "epoch": 0.7859415557230177, "grad_norm": 0.28397735953330994, "learning_rate": 0.00016803541054974614, "loss": 11.659, "step": 37546 }, { "epoch": 0.7859624884869798, "grad_norm": 0.2852763533592224, "learning_rate": 0.00016803380366212727, "loss": 11.68, "step": 37547 }, { "epoch": 0.785983421250942, "grad_norm": 0.4112832844257355, "learning_rate": 0.00016803219674180322, "loss": 11.6737, "step": 37548 }, { "epoch": 0.7860043540149041, "grad_norm": 0.2933729588985443, "learning_rate": 0.00016803058978877468, "loss": 11.6493, "step": 37549 }, { "epoch": 0.7860252867788663, "grad_norm": 0.287293404340744, "learning_rate": 0.0001680289828030425, "loss": 11.6888, "step": 37550 }, { "epoch": 0.7860462195428284, "grad_norm": 0.46393340826034546, "learning_rate": 0.00016802737578460743, "loss": 11.6685, "step": 37551 }, { "epoch": 0.7860671523067906, "grad_norm": 0.3555554449558258, "learning_rate": 0.00016802576873347022, "loss": 11.6737, "step": 37552 }, { "epoch": 0.7860880850707528, "grad_norm": 0.27356982231140137, "learning_rate": 0.00016802416164963168, "loss": 11.6715, "step": 37553 }, { "epoch": 0.7861090178347149, "grad_norm": 0.3567855954170227, "learning_rate": 0.00016802255453309255, "loss": 11.6667, "step": 37554 }, { "epoch": 0.7861299505986771, "grad_norm": 0.25493088364601135, "learning_rate": 0.0001680209473838536, "loss": 11.6697, "step": 37555 }, { "epoch": 0.7861508833626392, "grad_norm": 0.34831154346466064, "learning_rate": 0.00016801934020191565, "loss": 11.6732, "step": 37556 }, { "epoch": 0.7861718161266014, "grad_norm": 0.3142407238483429, "learning_rate": 0.00016801773298727942, "loss": 11.6751, "step": 37557 }, { "epoch": 0.7861927488905636, "grad_norm": 0.3138580322265625, "learning_rate": 0.00016801612573994572, "loss": 11.6825, "step": 37558 }, { "epoch": 0.7862136816545257, "grad_norm": 0.3168836534023285, "learning_rate": 0.0001680145184599153, "loss": 11.6663, "step": 37559 }, { "epoch": 0.7862346144184879, "grad_norm": 0.3128208816051483, "learning_rate": 0.0001680129111471889, "loss": 11.6718, "step": 37560 }, { "epoch": 0.7862555471824499, "grad_norm": 0.2634250223636627, "learning_rate": 0.00016801130380176738, "loss": 11.684, "step": 37561 }, { "epoch": 0.7862764799464121, "grad_norm": 0.3015388250350952, "learning_rate": 0.00016800969642365144, "loss": 11.6613, "step": 37562 }, { "epoch": 0.7862974127103742, "grad_norm": 0.31923624873161316, "learning_rate": 0.0001680080890128419, "loss": 11.6868, "step": 37563 }, { "epoch": 0.7863183454743364, "grad_norm": 0.2805251181125641, "learning_rate": 0.00016800648156933945, "loss": 11.6681, "step": 37564 }, { "epoch": 0.7863392782382986, "grad_norm": 0.31744256615638733, "learning_rate": 0.00016800487409314498, "loss": 11.6721, "step": 37565 }, { "epoch": 0.7863602110022607, "grad_norm": 0.2498692274093628, "learning_rate": 0.00016800326658425918, "loss": 11.6678, "step": 37566 }, { "epoch": 0.7863811437662229, "grad_norm": 0.2935314476490021, "learning_rate": 0.00016800165904268285, "loss": 11.6641, "step": 37567 }, { "epoch": 0.786402076530185, "grad_norm": 0.3736999034881592, "learning_rate": 0.00016800005146841676, "loss": 11.6711, "step": 37568 }, { "epoch": 0.7864230092941472, "grad_norm": 0.31252437829971313, "learning_rate": 0.00016799844386146168, "loss": 11.6831, "step": 37569 }, { "epoch": 0.7864439420581093, "grad_norm": 0.2690703868865967, "learning_rate": 0.00016799683622181842, "loss": 11.6862, "step": 37570 }, { "epoch": 0.7864648748220715, "grad_norm": 0.3192990720272064, "learning_rate": 0.00016799522854948765, "loss": 11.6761, "step": 37571 }, { "epoch": 0.7864858075860337, "grad_norm": 0.3459849953651428, "learning_rate": 0.00016799362084447025, "loss": 11.6595, "step": 37572 }, { "epoch": 0.7865067403499958, "grad_norm": 0.3729512393474579, "learning_rate": 0.00016799201310676695, "loss": 11.6576, "step": 37573 }, { "epoch": 0.786527673113958, "grad_norm": 0.2659258544445038, "learning_rate": 0.00016799040533637853, "loss": 11.6754, "step": 37574 }, { "epoch": 0.7865486058779201, "grad_norm": 0.3357563018798828, "learning_rate": 0.00016798879753330577, "loss": 11.6623, "step": 37575 }, { "epoch": 0.7865695386418823, "grad_norm": 0.2993699908256531, "learning_rate": 0.00016798718969754942, "loss": 11.6846, "step": 37576 }, { "epoch": 0.7865904714058445, "grad_norm": 0.244526207447052, "learning_rate": 0.00016798558182911025, "loss": 11.6591, "step": 37577 }, { "epoch": 0.7866114041698066, "grad_norm": 0.2893027365207672, "learning_rate": 0.00016798397392798906, "loss": 11.6688, "step": 37578 }, { "epoch": 0.7866323369337688, "grad_norm": 0.32070210576057434, "learning_rate": 0.00016798236599418662, "loss": 11.6726, "step": 37579 }, { "epoch": 0.7866532696977309, "grad_norm": 0.3550964891910553, "learning_rate": 0.0001679807580277037, "loss": 11.6716, "step": 37580 }, { "epoch": 0.7866742024616931, "grad_norm": 0.2517394423484802, "learning_rate": 0.00016797915002854106, "loss": 11.6577, "step": 37581 }, { "epoch": 0.7866951352256552, "grad_norm": 0.3289087116718292, "learning_rate": 0.00016797754199669948, "loss": 11.6656, "step": 37582 }, { "epoch": 0.7867160679896174, "grad_norm": 0.38693758845329285, "learning_rate": 0.00016797593393217972, "loss": 11.6702, "step": 37583 }, { "epoch": 0.7867370007535796, "grad_norm": 0.30390360951423645, "learning_rate": 0.00016797432583498257, "loss": 11.6773, "step": 37584 }, { "epoch": 0.7867579335175416, "grad_norm": 0.29034823179244995, "learning_rate": 0.00016797271770510884, "loss": 11.6805, "step": 37585 }, { "epoch": 0.7867788662815038, "grad_norm": 0.2743704915046692, "learning_rate": 0.0001679711095425592, "loss": 11.6534, "step": 37586 }, { "epoch": 0.7867997990454659, "grad_norm": 0.3531193733215332, "learning_rate": 0.00016796950134733454, "loss": 11.6631, "step": 37587 }, { "epoch": 0.7868207318094281, "grad_norm": 0.2688106596469879, "learning_rate": 0.00016796789311943552, "loss": 11.6716, "step": 37588 }, { "epoch": 0.7868416645733902, "grad_norm": 0.3276277780532837, "learning_rate": 0.000167966284858863, "loss": 11.6784, "step": 37589 }, { "epoch": 0.7868625973373524, "grad_norm": 0.4621177613735199, "learning_rate": 0.00016796467656561773, "loss": 11.6708, "step": 37590 }, { "epoch": 0.7868835301013146, "grad_norm": 0.35000330209732056, "learning_rate": 0.00016796306823970046, "loss": 11.6907, "step": 37591 }, { "epoch": 0.7869044628652767, "grad_norm": 0.2870805561542511, "learning_rate": 0.00016796145988111203, "loss": 11.6919, "step": 37592 }, { "epoch": 0.7869253956292389, "grad_norm": 0.27407804131507874, "learning_rate": 0.00016795985148985315, "loss": 11.6513, "step": 37593 }, { "epoch": 0.786946328393201, "grad_norm": 0.3033418655395508, "learning_rate": 0.00016795824306592457, "loss": 11.6594, "step": 37594 }, { "epoch": 0.7869672611571632, "grad_norm": 0.3419065773487091, "learning_rate": 0.00016795663460932714, "loss": 11.674, "step": 37595 }, { "epoch": 0.7869881939211254, "grad_norm": 0.28093501925468445, "learning_rate": 0.00016795502612006156, "loss": 11.6769, "step": 37596 }, { "epoch": 0.7870091266850875, "grad_norm": 0.2867428958415985, "learning_rate": 0.00016795341759812864, "loss": 11.6788, "step": 37597 }, { "epoch": 0.7870300594490497, "grad_norm": 0.290876179933548, "learning_rate": 0.00016795180904352917, "loss": 11.6522, "step": 37598 }, { "epoch": 0.7870509922130118, "grad_norm": 0.3360133171081543, "learning_rate": 0.0001679502004562639, "loss": 11.6886, "step": 37599 }, { "epoch": 0.787071924976974, "grad_norm": 0.2847594916820526, "learning_rate": 0.00016794859183633362, "loss": 11.6662, "step": 37600 }, { "epoch": 0.7870928577409361, "grad_norm": 0.2688145339488983, "learning_rate": 0.00016794698318373905, "loss": 11.6578, "step": 37601 }, { "epoch": 0.7871137905048983, "grad_norm": 0.34836509823799133, "learning_rate": 0.00016794537449848105, "loss": 11.6744, "step": 37602 }, { "epoch": 0.7871347232688605, "grad_norm": 0.2848038673400879, "learning_rate": 0.00016794376578056032, "loss": 11.6787, "step": 37603 }, { "epoch": 0.7871556560328226, "grad_norm": 0.3476399779319763, "learning_rate": 0.00016794215702997764, "loss": 11.6772, "step": 37604 }, { "epoch": 0.7871765887967848, "grad_norm": 0.2895139455795288, "learning_rate": 0.0001679405482467338, "loss": 11.6726, "step": 37605 }, { "epoch": 0.7871975215607468, "grad_norm": 0.3567202687263489, "learning_rate": 0.00016793893943082964, "loss": 11.6794, "step": 37606 }, { "epoch": 0.787218454324709, "grad_norm": 0.26886221766471863, "learning_rate": 0.00016793733058226583, "loss": 11.6723, "step": 37607 }, { "epoch": 0.7872393870886711, "grad_norm": 0.28855428099632263, "learning_rate": 0.0001679357217010432, "loss": 11.6729, "step": 37608 }, { "epoch": 0.7872603198526333, "grad_norm": 0.31094393134117126, "learning_rate": 0.00016793411278716248, "loss": 11.6583, "step": 37609 }, { "epoch": 0.7872812526165955, "grad_norm": 0.2553448975086212, "learning_rate": 0.0001679325038406245, "loss": 11.6579, "step": 37610 }, { "epoch": 0.7873021853805576, "grad_norm": 0.26842716336250305, "learning_rate": 0.00016793089486143, "loss": 11.6588, "step": 37611 }, { "epoch": 0.7873231181445198, "grad_norm": 0.32590848207473755, "learning_rate": 0.00016792928584957972, "loss": 11.668, "step": 37612 }, { "epoch": 0.7873440509084819, "grad_norm": 0.2996680438518524, "learning_rate": 0.0001679276768050745, "loss": 11.6747, "step": 37613 }, { "epoch": 0.7873649836724441, "grad_norm": 0.30249306559562683, "learning_rate": 0.0001679260677279151, "loss": 11.6691, "step": 37614 }, { "epoch": 0.7873859164364062, "grad_norm": 0.2618393898010254, "learning_rate": 0.00016792445861810225, "loss": 11.648, "step": 37615 }, { "epoch": 0.7874068492003684, "grad_norm": 0.39211341738700867, "learning_rate": 0.00016792284947563676, "loss": 11.6556, "step": 37616 }, { "epoch": 0.7874277819643306, "grad_norm": 0.285889595746994, "learning_rate": 0.00016792124030051939, "loss": 11.6588, "step": 37617 }, { "epoch": 0.7874487147282927, "grad_norm": 0.31293609738349915, "learning_rate": 0.00016791963109275094, "loss": 11.6775, "step": 37618 }, { "epoch": 0.7874696474922549, "grad_norm": 0.298380583524704, "learning_rate": 0.00016791802185233215, "loss": 11.6584, "step": 37619 }, { "epoch": 0.787490580256217, "grad_norm": 0.29778286814689636, "learning_rate": 0.00016791641257926377, "loss": 11.671, "step": 37620 }, { "epoch": 0.7875115130201792, "grad_norm": 0.29388248920440674, "learning_rate": 0.00016791480327354666, "loss": 11.6657, "step": 37621 }, { "epoch": 0.7875324457841414, "grad_norm": 0.30195707082748413, "learning_rate": 0.00016791319393518153, "loss": 11.664, "step": 37622 }, { "epoch": 0.7875533785481035, "grad_norm": 0.4358924329280853, "learning_rate": 0.00016791158456416915, "loss": 11.6653, "step": 37623 }, { "epoch": 0.7875743113120657, "grad_norm": 0.2702380120754242, "learning_rate": 0.00016790997516051036, "loss": 11.6676, "step": 37624 }, { "epoch": 0.7875952440760278, "grad_norm": 0.29967769980430603, "learning_rate": 0.0001679083657242058, "loss": 11.6581, "step": 37625 }, { "epoch": 0.78761617683999, "grad_norm": 0.3236696422100067, "learning_rate": 0.0001679067562552564, "loss": 11.6852, "step": 37626 }, { "epoch": 0.7876371096039521, "grad_norm": 0.32908329367637634, "learning_rate": 0.0001679051467536628, "loss": 11.662, "step": 37627 }, { "epoch": 0.7876580423679143, "grad_norm": 0.3055492639541626, "learning_rate": 0.00016790353721942588, "loss": 11.664, "step": 37628 }, { "epoch": 0.7876789751318765, "grad_norm": 0.28123873472213745, "learning_rate": 0.00016790192765254636, "loss": 11.6692, "step": 37629 }, { "epoch": 0.7876999078958385, "grad_norm": 0.25751903653144836, "learning_rate": 0.00016790031805302504, "loss": 11.6691, "step": 37630 }, { "epoch": 0.7877208406598007, "grad_norm": 0.2976692020893097, "learning_rate": 0.00016789870842086267, "loss": 11.6581, "step": 37631 }, { "epoch": 0.7877417734237628, "grad_norm": 0.24583685398101807, "learning_rate": 0.00016789709875606002, "loss": 11.6671, "step": 37632 }, { "epoch": 0.787762706187725, "grad_norm": 0.32274672389030457, "learning_rate": 0.00016789548905861787, "loss": 11.6847, "step": 37633 }, { "epoch": 0.7877836389516871, "grad_norm": 0.29788070917129517, "learning_rate": 0.000167893879328537, "loss": 11.6827, "step": 37634 }, { "epoch": 0.7878045717156493, "grad_norm": 0.33042776584625244, "learning_rate": 0.00016789226956581818, "loss": 11.6704, "step": 37635 }, { "epoch": 0.7878255044796115, "grad_norm": 0.34423840045928955, "learning_rate": 0.0001678906597704622, "loss": 11.6659, "step": 37636 }, { "epoch": 0.7878464372435736, "grad_norm": 0.8908085227012634, "learning_rate": 0.0001678890499424698, "loss": 11.6068, "step": 37637 }, { "epoch": 0.7878673700075358, "grad_norm": 0.3562162518501282, "learning_rate": 0.00016788744008184176, "loss": 11.7045, "step": 37638 }, { "epoch": 0.7878883027714979, "grad_norm": 0.3092590868473053, "learning_rate": 0.0001678858301885789, "loss": 11.67, "step": 37639 }, { "epoch": 0.7879092355354601, "grad_norm": 0.3383086919784546, "learning_rate": 0.00016788422026268193, "loss": 11.6671, "step": 37640 }, { "epoch": 0.7879301682994223, "grad_norm": 0.41255253553390503, "learning_rate": 0.0001678826103041517, "loss": 11.6647, "step": 37641 }, { "epoch": 0.7879511010633844, "grad_norm": 0.2323896437883377, "learning_rate": 0.0001678810003129889, "loss": 11.6777, "step": 37642 }, { "epoch": 0.7879720338273466, "grad_norm": 0.2632928192615509, "learning_rate": 0.00016787939028919436, "loss": 11.6606, "step": 37643 }, { "epoch": 0.7879929665913087, "grad_norm": 0.32243800163269043, "learning_rate": 0.00016787778023276883, "loss": 11.6827, "step": 37644 }, { "epoch": 0.7880138993552709, "grad_norm": 0.44434699416160583, "learning_rate": 0.00016787617014371307, "loss": 11.6851, "step": 37645 }, { "epoch": 0.788034832119233, "grad_norm": 0.38734501600265503, "learning_rate": 0.00016787456002202792, "loss": 11.6749, "step": 37646 }, { "epoch": 0.7880557648831952, "grad_norm": 0.3127048909664154, "learning_rate": 0.0001678729498677141, "loss": 11.6759, "step": 37647 }, { "epoch": 0.7880766976471574, "grad_norm": 0.28437450528144836, "learning_rate": 0.00016787133968077238, "loss": 11.6709, "step": 37648 }, { "epoch": 0.7880976304111195, "grad_norm": 0.38457977771759033, "learning_rate": 0.00016786972946120353, "loss": 11.6753, "step": 37649 }, { "epoch": 0.7881185631750817, "grad_norm": 0.3950546979904175, "learning_rate": 0.00016786811920900836, "loss": 11.6745, "step": 37650 }, { "epoch": 0.7881394959390438, "grad_norm": 0.30646812915802, "learning_rate": 0.00016786650892418762, "loss": 11.6741, "step": 37651 }, { "epoch": 0.788160428703006, "grad_norm": 0.29745209217071533, "learning_rate": 0.0001678648986067421, "loss": 11.6725, "step": 37652 }, { "epoch": 0.788181361466968, "grad_norm": 0.30614006519317627, "learning_rate": 0.00016786328825667255, "loss": 11.6915, "step": 37653 }, { "epoch": 0.7882022942309302, "grad_norm": 0.2631128430366516, "learning_rate": 0.00016786167787397974, "loss": 11.666, "step": 37654 }, { "epoch": 0.7882232269948924, "grad_norm": 0.29168447852134705, "learning_rate": 0.0001678600674586645, "loss": 11.6773, "step": 37655 }, { "epoch": 0.7882441597588545, "grad_norm": 0.32109421491622925, "learning_rate": 0.00016785845701072755, "loss": 11.6607, "step": 37656 }, { "epoch": 0.7882650925228167, "grad_norm": 0.24046799540519714, "learning_rate": 0.00016785684653016967, "loss": 11.6814, "step": 37657 }, { "epoch": 0.7882860252867788, "grad_norm": 0.3279862701892853, "learning_rate": 0.00016785523601699166, "loss": 11.6669, "step": 37658 }, { "epoch": 0.788306958050741, "grad_norm": 0.3246726989746094, "learning_rate": 0.00016785362547119424, "loss": 11.6644, "step": 37659 }, { "epoch": 0.7883278908147032, "grad_norm": 0.2835330367088318, "learning_rate": 0.0001678520148927783, "loss": 11.6791, "step": 37660 }, { "epoch": 0.7883488235786653, "grad_norm": 0.2865506112575531, "learning_rate": 0.0001678504042817445, "loss": 11.6683, "step": 37661 }, { "epoch": 0.7883697563426275, "grad_norm": 0.32268208265304565, "learning_rate": 0.0001678487936380936, "loss": 11.6656, "step": 37662 }, { "epoch": 0.7883906891065896, "grad_norm": 0.28474166989326477, "learning_rate": 0.00016784718296182648, "loss": 11.6711, "step": 37663 }, { "epoch": 0.7884116218705518, "grad_norm": 0.30444061756134033, "learning_rate": 0.00016784557225294383, "loss": 11.6792, "step": 37664 }, { "epoch": 0.7884325546345139, "grad_norm": 0.301140159368515, "learning_rate": 0.00016784396151144648, "loss": 11.6711, "step": 37665 }, { "epoch": 0.7884534873984761, "grad_norm": 0.3278175890445709, "learning_rate": 0.00016784235073733517, "loss": 11.6764, "step": 37666 }, { "epoch": 0.7884744201624383, "grad_norm": 0.2674598693847656, "learning_rate": 0.00016784073993061067, "loss": 11.6733, "step": 37667 }, { "epoch": 0.7884953529264004, "grad_norm": 0.32797423005104065, "learning_rate": 0.0001678391290912738, "loss": 11.6695, "step": 37668 }, { "epoch": 0.7885162856903626, "grad_norm": 0.30124765634536743, "learning_rate": 0.00016783751821932525, "loss": 11.6763, "step": 37669 }, { "epoch": 0.7885372184543247, "grad_norm": 0.29293814301490784, "learning_rate": 0.0001678359073147659, "loss": 11.6577, "step": 37670 }, { "epoch": 0.7885581512182869, "grad_norm": 0.3559720814228058, "learning_rate": 0.00016783429637759643, "loss": 11.6716, "step": 37671 }, { "epoch": 0.788579083982249, "grad_norm": 0.27813488245010376, "learning_rate": 0.00016783268540781764, "loss": 11.6799, "step": 37672 }, { "epoch": 0.7886000167462112, "grad_norm": 0.26416581869125366, "learning_rate": 0.00016783107440543037, "loss": 11.6741, "step": 37673 }, { "epoch": 0.7886209495101734, "grad_norm": 0.3061053156852722, "learning_rate": 0.00016782946337043532, "loss": 11.6654, "step": 37674 }, { "epoch": 0.7886418822741355, "grad_norm": 0.2890906035900116, "learning_rate": 0.0001678278523028333, "loss": 11.6654, "step": 37675 }, { "epoch": 0.7886628150380977, "grad_norm": 0.26231205463409424, "learning_rate": 0.00016782624120262505, "loss": 11.6688, "step": 37676 }, { "epoch": 0.7886837478020597, "grad_norm": 0.314150869846344, "learning_rate": 0.00016782463006981139, "loss": 11.6639, "step": 37677 }, { "epoch": 0.7887046805660219, "grad_norm": 0.2823238670825958, "learning_rate": 0.00016782301890439305, "loss": 11.6519, "step": 37678 }, { "epoch": 0.7887256133299841, "grad_norm": 0.3123399019241333, "learning_rate": 0.0001678214077063708, "loss": 11.6762, "step": 37679 }, { "epoch": 0.7887465460939462, "grad_norm": 0.2947918176651001, "learning_rate": 0.00016781979647574548, "loss": 11.6665, "step": 37680 }, { "epoch": 0.7887674788579084, "grad_norm": 0.3248622715473175, "learning_rate": 0.00016781818521251782, "loss": 11.6406, "step": 37681 }, { "epoch": 0.7887884116218705, "grad_norm": 0.34366849064826965, "learning_rate": 0.0001678165739166886, "loss": 11.6619, "step": 37682 }, { "epoch": 0.7888093443858327, "grad_norm": 0.3034619688987732, "learning_rate": 0.0001678149625882586, "loss": 11.6726, "step": 37683 }, { "epoch": 0.7888302771497948, "grad_norm": 0.2672561705112457, "learning_rate": 0.00016781335122722856, "loss": 11.6829, "step": 37684 }, { "epoch": 0.788851209913757, "grad_norm": 0.28261974453926086, "learning_rate": 0.0001678117398335993, "loss": 11.673, "step": 37685 }, { "epoch": 0.7888721426777192, "grad_norm": 0.2727351784706116, "learning_rate": 0.0001678101284073716, "loss": 11.6678, "step": 37686 }, { "epoch": 0.7888930754416813, "grad_norm": 0.3239521384239197, "learning_rate": 0.0001678085169485462, "loss": 11.649, "step": 37687 }, { "epoch": 0.7889140082056435, "grad_norm": 0.39277219772338867, "learning_rate": 0.00016780690545712386, "loss": 11.6484, "step": 37688 }, { "epoch": 0.7889349409696056, "grad_norm": 0.2897093594074249, "learning_rate": 0.0001678052939331054, "loss": 11.6749, "step": 37689 }, { "epoch": 0.7889558737335678, "grad_norm": 0.4839327335357666, "learning_rate": 0.00016780368237649158, "loss": 11.6758, "step": 37690 }, { "epoch": 0.7889768064975299, "grad_norm": 0.375592976808548, "learning_rate": 0.00016780207078728317, "loss": 11.6513, "step": 37691 }, { "epoch": 0.7889977392614921, "grad_norm": 0.3873896896839142, "learning_rate": 0.00016780045916548093, "loss": 11.6728, "step": 37692 }, { "epoch": 0.7890186720254543, "grad_norm": 0.2644613981246948, "learning_rate": 0.00016779884751108565, "loss": 11.6811, "step": 37693 }, { "epoch": 0.7890396047894164, "grad_norm": 0.2846178114414215, "learning_rate": 0.00016779723582409814, "loss": 11.683, "step": 37694 }, { "epoch": 0.7890605375533786, "grad_norm": 0.36425793170928955, "learning_rate": 0.0001677956241045191, "loss": 11.6789, "step": 37695 }, { "epoch": 0.7890814703173407, "grad_norm": 0.28685462474823, "learning_rate": 0.00016779401235234937, "loss": 11.6745, "step": 37696 }, { "epoch": 0.7891024030813029, "grad_norm": 0.27614566683769226, "learning_rate": 0.00016779240056758973, "loss": 11.6699, "step": 37697 }, { "epoch": 0.7891233358452651, "grad_norm": 0.23338167369365692, "learning_rate": 0.00016779078875024087, "loss": 11.6787, "step": 37698 }, { "epoch": 0.7891442686092272, "grad_norm": 0.29185977578163147, "learning_rate": 0.00016778917690030363, "loss": 11.6762, "step": 37699 }, { "epoch": 0.7891652013731894, "grad_norm": 0.2884390652179718, "learning_rate": 0.0001677875650177788, "loss": 11.6706, "step": 37700 }, { "epoch": 0.7891861341371514, "grad_norm": 0.3470096290111542, "learning_rate": 0.0001677859531026671, "loss": 11.6691, "step": 37701 }, { "epoch": 0.7892070669011136, "grad_norm": 0.31219282746315, "learning_rate": 0.00016778434115496935, "loss": 11.6685, "step": 37702 }, { "epoch": 0.7892279996650757, "grad_norm": 0.32802140712738037, "learning_rate": 0.0001677827291746863, "loss": 11.6671, "step": 37703 }, { "epoch": 0.7892489324290379, "grad_norm": 0.2740883231163025, "learning_rate": 0.00016778111716181875, "loss": 11.6774, "step": 37704 }, { "epoch": 0.7892698651930001, "grad_norm": 0.32280412316322327, "learning_rate": 0.00016777950511636744, "loss": 11.6586, "step": 37705 }, { "epoch": 0.7892907979569622, "grad_norm": 0.6749688386917114, "learning_rate": 0.00016777789303833315, "loss": 11.6807, "step": 37706 }, { "epoch": 0.7893117307209244, "grad_norm": 0.3375507891178131, "learning_rate": 0.0001677762809277167, "loss": 11.6701, "step": 37707 }, { "epoch": 0.7893326634848865, "grad_norm": 0.2519869804382324, "learning_rate": 0.00016777466878451882, "loss": 11.6691, "step": 37708 }, { "epoch": 0.7893535962488487, "grad_norm": 0.31795984506607056, "learning_rate": 0.00016777305660874029, "loss": 11.6767, "step": 37709 }, { "epoch": 0.7893745290128108, "grad_norm": 0.3175925612449646, "learning_rate": 0.0001677714444003819, "loss": 11.6596, "step": 37710 }, { "epoch": 0.789395461776773, "grad_norm": 0.3114674687385559, "learning_rate": 0.00016776983215944445, "loss": 11.6704, "step": 37711 }, { "epoch": 0.7894163945407352, "grad_norm": 0.27098721265792847, "learning_rate": 0.00016776821988592864, "loss": 11.6595, "step": 37712 }, { "epoch": 0.7894373273046973, "grad_norm": 0.3564106822013855, "learning_rate": 0.00016776660757983529, "loss": 11.6737, "step": 37713 }, { "epoch": 0.7894582600686595, "grad_norm": 0.25831565260887146, "learning_rate": 0.0001677649952411652, "loss": 11.6731, "step": 37714 }, { "epoch": 0.7894791928326216, "grad_norm": 0.30898672342300415, "learning_rate": 0.00016776338286991912, "loss": 11.6824, "step": 37715 }, { "epoch": 0.7895001255965838, "grad_norm": 0.34187185764312744, "learning_rate": 0.0001677617704660978, "loss": 11.659, "step": 37716 }, { "epoch": 0.789521058360546, "grad_norm": 0.312895268201828, "learning_rate": 0.00016776015802970204, "loss": 11.6745, "step": 37717 }, { "epoch": 0.7895419911245081, "grad_norm": 0.3700239658355713, "learning_rate": 0.00016775854556073263, "loss": 11.6579, "step": 37718 }, { "epoch": 0.7895629238884703, "grad_norm": 0.3514717221260071, "learning_rate": 0.00016775693305919035, "loss": 11.6719, "step": 37719 }, { "epoch": 0.7895838566524324, "grad_norm": 0.2939964532852173, "learning_rate": 0.0001677553205250759, "loss": 11.6649, "step": 37720 }, { "epoch": 0.7896047894163946, "grad_norm": 0.3295139968395233, "learning_rate": 0.00016775370795839013, "loss": 11.6832, "step": 37721 }, { "epoch": 0.7896257221803566, "grad_norm": 0.31755489110946655, "learning_rate": 0.0001677520953591338, "loss": 11.6712, "step": 37722 }, { "epoch": 0.7896466549443188, "grad_norm": 0.22482529282569885, "learning_rate": 0.00016775048272730766, "loss": 11.6673, "step": 37723 }, { "epoch": 0.789667587708281, "grad_norm": 0.31944024562835693, "learning_rate": 0.00016774887006291257, "loss": 11.6859, "step": 37724 }, { "epoch": 0.7896885204722431, "grad_norm": 0.3269476294517517, "learning_rate": 0.00016774725736594918, "loss": 11.6679, "step": 37725 }, { "epoch": 0.7897094532362053, "grad_norm": 0.3089357316493988, "learning_rate": 0.00016774564463641834, "loss": 11.6882, "step": 37726 }, { "epoch": 0.7897303860001674, "grad_norm": 0.28580930829048157, "learning_rate": 0.0001677440318743208, "loss": 11.6634, "step": 37727 }, { "epoch": 0.7897513187641296, "grad_norm": 0.3513014316558838, "learning_rate": 0.00016774241907965736, "loss": 11.6777, "step": 37728 }, { "epoch": 0.7897722515280917, "grad_norm": 0.34284934401512146, "learning_rate": 0.0001677408062524288, "loss": 11.664, "step": 37729 }, { "epoch": 0.7897931842920539, "grad_norm": 0.3159066438674927, "learning_rate": 0.00016773919339263586, "loss": 11.6724, "step": 37730 }, { "epoch": 0.7898141170560161, "grad_norm": 0.28430625796318054, "learning_rate": 0.00016773758050027935, "loss": 11.6828, "step": 37731 }, { "epoch": 0.7898350498199782, "grad_norm": 0.22692978382110596, "learning_rate": 0.00016773596757536, "loss": 11.6689, "step": 37732 }, { "epoch": 0.7898559825839404, "grad_norm": 0.27286481857299805, "learning_rate": 0.0001677343546178786, "loss": 11.6641, "step": 37733 }, { "epoch": 0.7898769153479025, "grad_norm": 0.24457412958145142, "learning_rate": 0.000167732741627836, "loss": 11.6705, "step": 37734 }, { "epoch": 0.7898978481118647, "grad_norm": 0.45582273602485657, "learning_rate": 0.00016773112860523284, "loss": 11.6715, "step": 37735 }, { "epoch": 0.7899187808758269, "grad_norm": 0.2817875146865845, "learning_rate": 0.00016772951555007, "loss": 11.6704, "step": 37736 }, { "epoch": 0.789939713639789, "grad_norm": 0.2791109085083008, "learning_rate": 0.00016772790246234823, "loss": 11.6731, "step": 37737 }, { "epoch": 0.7899606464037512, "grad_norm": 0.3050895035266876, "learning_rate": 0.0001677262893420683, "loss": 11.6762, "step": 37738 }, { "epoch": 0.7899815791677133, "grad_norm": 0.2923409640789032, "learning_rate": 0.00016772467618923097, "loss": 11.6809, "step": 37739 }, { "epoch": 0.7900025119316755, "grad_norm": 0.2995794713497162, "learning_rate": 0.00016772306300383706, "loss": 11.6727, "step": 37740 }, { "epoch": 0.7900234446956376, "grad_norm": 0.39426958560943604, "learning_rate": 0.00016772144978588732, "loss": 11.6508, "step": 37741 }, { "epoch": 0.7900443774595998, "grad_norm": 0.2997681200504303, "learning_rate": 0.00016771983653538248, "loss": 11.6662, "step": 37742 }, { "epoch": 0.790065310223562, "grad_norm": 0.36862754821777344, "learning_rate": 0.0001677182232523234, "loss": 11.6841, "step": 37743 }, { "epoch": 0.7900862429875241, "grad_norm": 0.27008965611457825, "learning_rate": 0.00016771660993671078, "loss": 11.6758, "step": 37744 }, { "epoch": 0.7901071757514863, "grad_norm": 0.26642924547195435, "learning_rate": 0.00016771499658854543, "loss": 11.6542, "step": 37745 }, { "epoch": 0.7901281085154483, "grad_norm": 0.3130228519439697, "learning_rate": 0.00016771338320782815, "loss": 11.6824, "step": 37746 }, { "epoch": 0.7901490412794105, "grad_norm": 0.32498475909233093, "learning_rate": 0.00016771176979455966, "loss": 11.6697, "step": 37747 }, { "epoch": 0.7901699740433726, "grad_norm": 0.30188077688217163, "learning_rate": 0.0001677101563487408, "loss": 11.6491, "step": 37748 }, { "epoch": 0.7901909068073348, "grad_norm": 0.29026055335998535, "learning_rate": 0.00016770854287037227, "loss": 11.6909, "step": 37749 }, { "epoch": 0.790211839571297, "grad_norm": 0.2977602481842041, "learning_rate": 0.00016770692935945492, "loss": 11.6746, "step": 37750 }, { "epoch": 0.7902327723352591, "grad_norm": 0.2771066725254059, "learning_rate": 0.00016770531581598947, "loss": 11.6755, "step": 37751 }, { "epoch": 0.7902537050992213, "grad_norm": 0.31983622908592224, "learning_rate": 0.00016770370223997673, "loss": 11.6672, "step": 37752 }, { "epoch": 0.7902746378631834, "grad_norm": 0.3194541037082672, "learning_rate": 0.0001677020886314175, "loss": 11.6693, "step": 37753 }, { "epoch": 0.7902955706271456, "grad_norm": 0.30067187547683716, "learning_rate": 0.00016770047499031242, "loss": 11.6616, "step": 37754 }, { "epoch": 0.7903165033911078, "grad_norm": 0.2844502627849579, "learning_rate": 0.00016769886131666245, "loss": 11.69, "step": 37755 }, { "epoch": 0.7903374361550699, "grad_norm": 0.34435439109802246, "learning_rate": 0.00016769724761046825, "loss": 11.6867, "step": 37756 }, { "epoch": 0.7903583689190321, "grad_norm": 0.3112941384315491, "learning_rate": 0.00016769563387173064, "loss": 11.6365, "step": 37757 }, { "epoch": 0.7903793016829942, "grad_norm": 0.4215140640735626, "learning_rate": 0.00016769402010045035, "loss": 11.6832, "step": 37758 }, { "epoch": 0.7904002344469564, "grad_norm": 0.31731998920440674, "learning_rate": 0.00016769240629662822, "loss": 11.6737, "step": 37759 }, { "epoch": 0.7904211672109185, "grad_norm": 0.29636287689208984, "learning_rate": 0.00016769079246026501, "loss": 11.6651, "step": 37760 }, { "epoch": 0.7904420999748807, "grad_norm": 0.2652612626552582, "learning_rate": 0.00016768917859136145, "loss": 11.6857, "step": 37761 }, { "epoch": 0.7904630327388429, "grad_norm": 0.24881893396377563, "learning_rate": 0.00016768756468991836, "loss": 11.6713, "step": 37762 }, { "epoch": 0.790483965502805, "grad_norm": 0.3446410298347473, "learning_rate": 0.00016768595075593648, "loss": 11.6546, "step": 37763 }, { "epoch": 0.7905048982667672, "grad_norm": 0.3675723075866699, "learning_rate": 0.00016768433678941662, "loss": 11.6773, "step": 37764 }, { "epoch": 0.7905258310307293, "grad_norm": 0.3062461018562317, "learning_rate": 0.00016768272279035952, "loss": 11.669, "step": 37765 }, { "epoch": 0.7905467637946915, "grad_norm": 0.2962106466293335, "learning_rate": 0.00016768110875876603, "loss": 11.664, "step": 37766 }, { "epoch": 0.7905676965586536, "grad_norm": 0.2760709822177887, "learning_rate": 0.00016767949469463682, "loss": 11.662, "step": 37767 }, { "epoch": 0.7905886293226158, "grad_norm": 0.27031412720680237, "learning_rate": 0.00016767788059797277, "loss": 11.6705, "step": 37768 }, { "epoch": 0.790609562086578, "grad_norm": 0.3160209357738495, "learning_rate": 0.00016767626646877458, "loss": 11.6676, "step": 37769 }, { "epoch": 0.79063049485054, "grad_norm": 0.2991909086704254, "learning_rate": 0.00016767465230704305, "loss": 11.6752, "step": 37770 }, { "epoch": 0.7906514276145022, "grad_norm": 0.28880032896995544, "learning_rate": 0.00016767303811277896, "loss": 11.6732, "step": 37771 }, { "epoch": 0.7906723603784643, "grad_norm": 0.2914789021015167, "learning_rate": 0.00016767142388598308, "loss": 11.6535, "step": 37772 }, { "epoch": 0.7906932931424265, "grad_norm": 0.3109149634838104, "learning_rate": 0.0001676698096266562, "loss": 11.6746, "step": 37773 }, { "epoch": 0.7907142259063887, "grad_norm": 0.25085750222206116, "learning_rate": 0.00016766819533479908, "loss": 11.6651, "step": 37774 }, { "epoch": 0.7907351586703508, "grad_norm": 0.32258304953575134, "learning_rate": 0.00016766658101041251, "loss": 11.6603, "step": 37775 }, { "epoch": 0.790756091434313, "grad_norm": 0.28157299757003784, "learning_rate": 0.00016766496665349725, "loss": 11.6688, "step": 37776 }, { "epoch": 0.7907770241982751, "grad_norm": 0.28689220547676086, "learning_rate": 0.0001676633522640541, "loss": 11.6752, "step": 37777 }, { "epoch": 0.7907979569622373, "grad_norm": 0.26431944966316223, "learning_rate": 0.0001676617378420838, "loss": 11.6773, "step": 37778 }, { "epoch": 0.7908188897261994, "grad_norm": 0.365754634141922, "learning_rate": 0.00016766012338758715, "loss": 11.6691, "step": 37779 }, { "epoch": 0.7908398224901616, "grad_norm": 0.4094778597354889, "learning_rate": 0.00016765850890056497, "loss": 11.6656, "step": 37780 }, { "epoch": 0.7908607552541238, "grad_norm": 0.35281962156295776, "learning_rate": 0.00016765689438101792, "loss": 11.6876, "step": 37781 }, { "epoch": 0.7908816880180859, "grad_norm": 0.23979859054088593, "learning_rate": 0.00016765527982894689, "loss": 11.6743, "step": 37782 }, { "epoch": 0.7909026207820481, "grad_norm": 0.3286156952381134, "learning_rate": 0.0001676536652443526, "loss": 11.671, "step": 37783 }, { "epoch": 0.7909235535460102, "grad_norm": 0.3791476786136627, "learning_rate": 0.00016765205062723582, "loss": 11.6607, "step": 37784 }, { "epoch": 0.7909444863099724, "grad_norm": 0.3764228820800781, "learning_rate": 0.00016765043597759735, "loss": 11.6663, "step": 37785 }, { "epoch": 0.7909654190739345, "grad_norm": 0.3065030574798584, "learning_rate": 0.00016764882129543794, "loss": 11.6645, "step": 37786 }, { "epoch": 0.7909863518378967, "grad_norm": 0.27601301670074463, "learning_rate": 0.00016764720658075843, "loss": 11.6457, "step": 37787 }, { "epoch": 0.7910072846018589, "grad_norm": 0.31284767389297485, "learning_rate": 0.00016764559183355955, "loss": 11.6738, "step": 37788 }, { "epoch": 0.791028217365821, "grad_norm": 0.28398406505584717, "learning_rate": 0.00016764397705384206, "loss": 11.6581, "step": 37789 }, { "epoch": 0.7910491501297832, "grad_norm": 0.23656867444515228, "learning_rate": 0.00016764236224160674, "loss": 11.6734, "step": 37790 }, { "epoch": 0.7910700828937453, "grad_norm": 0.2903793156147003, "learning_rate": 0.0001676407473968544, "loss": 11.6691, "step": 37791 }, { "epoch": 0.7910910156577075, "grad_norm": 0.3935483396053314, "learning_rate": 0.0001676391325195858, "loss": 11.6714, "step": 37792 }, { "epoch": 0.7911119484216695, "grad_norm": 0.2385479211807251, "learning_rate": 0.00016763751760980172, "loss": 11.6636, "step": 37793 }, { "epoch": 0.7911328811856317, "grad_norm": 0.32010507583618164, "learning_rate": 0.0001676359026675029, "loss": 11.6727, "step": 37794 }, { "epoch": 0.7911538139495939, "grad_norm": 0.28507259488105774, "learning_rate": 0.00016763428769269017, "loss": 11.6539, "step": 37795 }, { "epoch": 0.791174746713556, "grad_norm": 0.3306312561035156, "learning_rate": 0.00016763267268536428, "loss": 11.6657, "step": 37796 }, { "epoch": 0.7911956794775182, "grad_norm": 0.373795747756958, "learning_rate": 0.000167631057645526, "loss": 11.6796, "step": 37797 }, { "epoch": 0.7912166122414803, "grad_norm": 0.3652423918247223, "learning_rate": 0.00016762944257317614, "loss": 11.6744, "step": 37798 }, { "epoch": 0.7912375450054425, "grad_norm": 0.30752453207969666, "learning_rate": 0.0001676278274683154, "loss": 11.652, "step": 37799 }, { "epoch": 0.7912584777694047, "grad_norm": 0.40767332911491394, "learning_rate": 0.00016762621233094467, "loss": 11.6688, "step": 37800 }, { "epoch": 0.7912794105333668, "grad_norm": 0.35499605536460876, "learning_rate": 0.00016762459716106464, "loss": 11.6795, "step": 37801 }, { "epoch": 0.791300343297329, "grad_norm": 0.2782471179962158, "learning_rate": 0.0001676229819586761, "loss": 11.6541, "step": 37802 }, { "epoch": 0.7913212760612911, "grad_norm": 0.27033770084381104, "learning_rate": 0.00016762136672377983, "loss": 11.663, "step": 37803 }, { "epoch": 0.7913422088252533, "grad_norm": 0.4295167922973633, "learning_rate": 0.0001676197514563766, "loss": 11.6819, "step": 37804 }, { "epoch": 0.7913631415892154, "grad_norm": 0.30854490399360657, "learning_rate": 0.00016761813615646725, "loss": 11.6727, "step": 37805 }, { "epoch": 0.7913840743531776, "grad_norm": 0.29040420055389404, "learning_rate": 0.0001676165208240525, "loss": 11.6675, "step": 37806 }, { "epoch": 0.7914050071171398, "grad_norm": 0.3079879581928253, "learning_rate": 0.0001676149054591331, "loss": 11.6774, "step": 37807 }, { "epoch": 0.7914259398811019, "grad_norm": 0.2743724286556244, "learning_rate": 0.00016761329006170986, "loss": 11.683, "step": 37808 }, { "epoch": 0.7914468726450641, "grad_norm": 0.3632081151008606, "learning_rate": 0.0001676116746317836, "loss": 11.688, "step": 37809 }, { "epoch": 0.7914678054090262, "grad_norm": 0.3140866756439209, "learning_rate": 0.00016761005916935502, "loss": 11.6635, "step": 37810 }, { "epoch": 0.7914887381729884, "grad_norm": 0.3145098090171814, "learning_rate": 0.00016760844367442494, "loss": 11.6501, "step": 37811 }, { "epoch": 0.7915096709369505, "grad_norm": 0.2759295403957367, "learning_rate": 0.00016760682814699414, "loss": 11.6486, "step": 37812 }, { "epoch": 0.7915306037009127, "grad_norm": 0.32499879598617554, "learning_rate": 0.00016760521258706336, "loss": 11.6731, "step": 37813 }, { "epoch": 0.7915515364648749, "grad_norm": 0.2916460335254669, "learning_rate": 0.0001676035969946334, "loss": 11.6676, "step": 37814 }, { "epoch": 0.791572469228837, "grad_norm": 0.3198065757751465, "learning_rate": 0.00016760198136970503, "loss": 11.6566, "step": 37815 }, { "epoch": 0.7915934019927992, "grad_norm": 0.3044133186340332, "learning_rate": 0.00016760036571227907, "loss": 11.6669, "step": 37816 }, { "epoch": 0.7916143347567612, "grad_norm": 0.26157182455062866, "learning_rate": 0.00016759875002235624, "loss": 11.6676, "step": 37817 }, { "epoch": 0.7916352675207234, "grad_norm": 0.310139924287796, "learning_rate": 0.00016759713429993732, "loss": 11.6717, "step": 37818 }, { "epoch": 0.7916562002846856, "grad_norm": 0.39387112855911255, "learning_rate": 0.00016759551854502312, "loss": 11.6702, "step": 37819 }, { "epoch": 0.7916771330486477, "grad_norm": 0.31029725074768066, "learning_rate": 0.00016759390275761438, "loss": 11.6797, "step": 37820 }, { "epoch": 0.7916980658126099, "grad_norm": 0.2699797749519348, "learning_rate": 0.0001675922869377119, "loss": 11.6583, "step": 37821 }, { "epoch": 0.791718998576572, "grad_norm": 0.30786874890327454, "learning_rate": 0.00016759067108531648, "loss": 11.661, "step": 37822 }, { "epoch": 0.7917399313405342, "grad_norm": 0.28620660305023193, "learning_rate": 0.00016758905520042883, "loss": 11.6824, "step": 37823 }, { "epoch": 0.7917608641044963, "grad_norm": 0.5460495948791504, "learning_rate": 0.0001675874392830498, "loss": 11.6777, "step": 37824 }, { "epoch": 0.7917817968684585, "grad_norm": 0.27901333570480347, "learning_rate": 0.0001675858233331801, "loss": 11.6843, "step": 37825 }, { "epoch": 0.7918027296324207, "grad_norm": 0.26627981662750244, "learning_rate": 0.00016758420735082057, "loss": 11.675, "step": 37826 }, { "epoch": 0.7918236623963828, "grad_norm": 0.3440936505794525, "learning_rate": 0.00016758259133597197, "loss": 11.6716, "step": 37827 }, { "epoch": 0.791844595160345, "grad_norm": 0.2778896689414978, "learning_rate": 0.00016758097528863503, "loss": 11.6737, "step": 37828 }, { "epoch": 0.7918655279243071, "grad_norm": 0.2954181432723999, "learning_rate": 0.0001675793592088106, "loss": 11.6822, "step": 37829 }, { "epoch": 0.7918864606882693, "grad_norm": 0.25951242446899414, "learning_rate": 0.00016757774309649937, "loss": 11.6633, "step": 37830 }, { "epoch": 0.7919073934522314, "grad_norm": 0.3315992057323456, "learning_rate": 0.00016757612695170217, "loss": 11.6699, "step": 37831 }, { "epoch": 0.7919283262161936, "grad_norm": 0.27226102352142334, "learning_rate": 0.0001675745107744198, "loss": 11.6729, "step": 37832 }, { "epoch": 0.7919492589801558, "grad_norm": 0.3410203158855438, "learning_rate": 0.00016757289456465299, "loss": 11.6629, "step": 37833 }, { "epoch": 0.7919701917441179, "grad_norm": 0.2868277430534363, "learning_rate": 0.00016757127832240254, "loss": 11.6517, "step": 37834 }, { "epoch": 0.7919911245080801, "grad_norm": 0.5258651971817017, "learning_rate": 0.00016756966204766922, "loss": 11.6704, "step": 37835 }, { "epoch": 0.7920120572720422, "grad_norm": 0.31184467673301697, "learning_rate": 0.00016756804574045382, "loss": 11.6699, "step": 37836 }, { "epoch": 0.7920329900360044, "grad_norm": 0.30783069133758545, "learning_rate": 0.0001675664294007571, "loss": 11.6656, "step": 37837 }, { "epoch": 0.7920539227999666, "grad_norm": 0.35639429092407227, "learning_rate": 0.00016756481302857985, "loss": 11.6586, "step": 37838 }, { "epoch": 0.7920748555639286, "grad_norm": 0.4038444459438324, "learning_rate": 0.00016756319662392284, "loss": 11.6588, "step": 37839 }, { "epoch": 0.7920957883278908, "grad_norm": 0.30263879895210266, "learning_rate": 0.00016756158018678681, "loss": 11.6595, "step": 37840 }, { "epoch": 0.7921167210918529, "grad_norm": 0.3148615062236786, "learning_rate": 0.00016755996371717262, "loss": 11.6734, "step": 37841 }, { "epoch": 0.7921376538558151, "grad_norm": 0.30187132954597473, "learning_rate": 0.00016755834721508098, "loss": 11.6872, "step": 37842 }, { "epoch": 0.7921585866197772, "grad_norm": 0.3261711299419403, "learning_rate": 0.0001675567306805127, "loss": 11.6774, "step": 37843 }, { "epoch": 0.7921795193837394, "grad_norm": 0.3010222315788269, "learning_rate": 0.00016755511411346854, "loss": 11.6618, "step": 37844 }, { "epoch": 0.7922004521477016, "grad_norm": 0.41893869638442993, "learning_rate": 0.00016755349751394928, "loss": 11.6717, "step": 37845 }, { "epoch": 0.7922213849116637, "grad_norm": 0.30115365982055664, "learning_rate": 0.00016755188088195573, "loss": 11.6815, "step": 37846 }, { "epoch": 0.7922423176756259, "grad_norm": 0.24659228324890137, "learning_rate": 0.0001675502642174886, "loss": 11.68, "step": 37847 }, { "epoch": 0.792263250439588, "grad_norm": 0.29900914430618286, "learning_rate": 0.00016754864752054874, "loss": 11.6432, "step": 37848 }, { "epoch": 0.7922841832035502, "grad_norm": 0.31782999634742737, "learning_rate": 0.00016754703079113683, "loss": 11.6754, "step": 37849 }, { "epoch": 0.7923051159675123, "grad_norm": 0.3073454201221466, "learning_rate": 0.00016754541402925373, "loss": 11.6783, "step": 37850 }, { "epoch": 0.7923260487314745, "grad_norm": 0.2558300793170929, "learning_rate": 0.00016754379723490023, "loss": 11.6633, "step": 37851 }, { "epoch": 0.7923469814954367, "grad_norm": 0.35530972480773926, "learning_rate": 0.00016754218040807706, "loss": 11.6737, "step": 37852 }, { "epoch": 0.7923679142593988, "grad_norm": 0.2890792489051819, "learning_rate": 0.000167540563548785, "loss": 11.6612, "step": 37853 }, { "epoch": 0.792388847023361, "grad_norm": 0.3140731751918793, "learning_rate": 0.00016753894665702486, "loss": 11.6723, "step": 37854 }, { "epoch": 0.7924097797873231, "grad_norm": 0.25629496574401855, "learning_rate": 0.00016753732973279737, "loss": 11.6697, "step": 37855 }, { "epoch": 0.7924307125512853, "grad_norm": 0.36036843061447144, "learning_rate": 0.00016753571277610335, "loss": 11.6525, "step": 37856 }, { "epoch": 0.7924516453152475, "grad_norm": 0.3040115535259247, "learning_rate": 0.00016753409578694353, "loss": 11.6782, "step": 37857 }, { "epoch": 0.7924725780792096, "grad_norm": 0.34688419103622437, "learning_rate": 0.00016753247876531875, "loss": 11.6777, "step": 37858 }, { "epoch": 0.7924935108431718, "grad_norm": 0.28525879979133606, "learning_rate": 0.0001675308617112297, "loss": 11.6559, "step": 37859 }, { "epoch": 0.7925144436071339, "grad_norm": 0.2679556906223297, "learning_rate": 0.00016752924462467726, "loss": 11.6731, "step": 37860 }, { "epoch": 0.7925353763710961, "grad_norm": 0.2657722234725952, "learning_rate": 0.00016752762750566218, "loss": 11.6566, "step": 37861 }, { "epoch": 0.7925563091350581, "grad_norm": 0.36219334602355957, "learning_rate": 0.00016752601035418518, "loss": 11.6692, "step": 37862 }, { "epoch": 0.7925772418990203, "grad_norm": 0.37013423442840576, "learning_rate": 0.0001675243931702471, "loss": 11.665, "step": 37863 }, { "epoch": 0.7925981746629825, "grad_norm": 0.3354994058609009, "learning_rate": 0.00016752277595384865, "loss": 11.6723, "step": 37864 }, { "epoch": 0.7926191074269446, "grad_norm": 0.4217473864555359, "learning_rate": 0.00016752115870499065, "loss": 11.6624, "step": 37865 }, { "epoch": 0.7926400401909068, "grad_norm": 0.2972410023212433, "learning_rate": 0.0001675195414236739, "loss": 11.6795, "step": 37866 }, { "epoch": 0.7926609729548689, "grad_norm": 0.29170721769332886, "learning_rate": 0.00016751792410989915, "loss": 11.6585, "step": 37867 }, { "epoch": 0.7926819057188311, "grad_norm": 0.37850242853164673, "learning_rate": 0.0001675163067636672, "loss": 11.6748, "step": 37868 }, { "epoch": 0.7927028384827932, "grad_norm": 0.3211849331855774, "learning_rate": 0.00016751468938497877, "loss": 11.6825, "step": 37869 }, { "epoch": 0.7927237712467554, "grad_norm": 0.36330336332321167, "learning_rate": 0.0001675130719738347, "loss": 11.6886, "step": 37870 }, { "epoch": 0.7927447040107176, "grad_norm": 0.2957238554954529, "learning_rate": 0.00016751145453023573, "loss": 11.6802, "step": 37871 }, { "epoch": 0.7927656367746797, "grad_norm": 0.2876003682613373, "learning_rate": 0.00016750983705418265, "loss": 11.6561, "step": 37872 }, { "epoch": 0.7927865695386419, "grad_norm": 0.3318217694759369, "learning_rate": 0.00016750821954567626, "loss": 11.6556, "step": 37873 }, { "epoch": 0.792807502302604, "grad_norm": 0.33183279633522034, "learning_rate": 0.0001675066020047173, "loss": 11.671, "step": 37874 }, { "epoch": 0.7928284350665662, "grad_norm": 0.36989542841911316, "learning_rate": 0.00016750498443130656, "loss": 11.6786, "step": 37875 }, { "epoch": 0.7928493678305284, "grad_norm": 0.2993851602077484, "learning_rate": 0.00016750336682544483, "loss": 11.6719, "step": 37876 }, { "epoch": 0.7928703005944905, "grad_norm": 0.3395244777202606, "learning_rate": 0.00016750174918713287, "loss": 11.671, "step": 37877 }, { "epoch": 0.7928912333584527, "grad_norm": 0.3702354431152344, "learning_rate": 0.00016750013151637148, "loss": 11.6687, "step": 37878 }, { "epoch": 0.7929121661224148, "grad_norm": 0.3263479769229889, "learning_rate": 0.00016749851381316143, "loss": 11.6595, "step": 37879 }, { "epoch": 0.792933098886377, "grad_norm": 0.30699223279953003, "learning_rate": 0.00016749689607750345, "loss": 11.6626, "step": 37880 }, { "epoch": 0.7929540316503391, "grad_norm": 0.24246028065681458, "learning_rate": 0.0001674952783093984, "loss": 11.656, "step": 37881 }, { "epoch": 0.7929749644143013, "grad_norm": 0.3397093415260315, "learning_rate": 0.00016749366050884698, "loss": 11.651, "step": 37882 }, { "epoch": 0.7929958971782635, "grad_norm": 0.38650161027908325, "learning_rate": 0.00016749204267585007, "loss": 11.6769, "step": 37883 }, { "epoch": 0.7930168299422256, "grad_norm": 0.3571012616157532, "learning_rate": 0.00016749042481040832, "loss": 11.6616, "step": 37884 }, { "epoch": 0.7930377627061878, "grad_norm": 0.3030156195163727, "learning_rate": 0.0001674888069125226, "loss": 11.6802, "step": 37885 }, { "epoch": 0.7930586954701498, "grad_norm": 0.27327144145965576, "learning_rate": 0.00016748718898219365, "loss": 11.6551, "step": 37886 }, { "epoch": 0.793079628234112, "grad_norm": 0.33982059359550476, "learning_rate": 0.00016748557101942224, "loss": 11.6457, "step": 37887 }, { "epoch": 0.7931005609980741, "grad_norm": 0.31895971298217773, "learning_rate": 0.0001674839530242092, "loss": 11.6774, "step": 37888 }, { "epoch": 0.7931214937620363, "grad_norm": 0.3933756947517395, "learning_rate": 0.00016748233499655523, "loss": 11.6665, "step": 37889 }, { "epoch": 0.7931424265259985, "grad_norm": 0.28521841764450073, "learning_rate": 0.00016748071693646117, "loss": 11.6772, "step": 37890 }, { "epoch": 0.7931633592899606, "grad_norm": 0.2949213683605194, "learning_rate": 0.00016747909884392778, "loss": 11.6695, "step": 37891 }, { "epoch": 0.7931842920539228, "grad_norm": 0.26792362332344055, "learning_rate": 0.00016747748071895582, "loss": 11.6681, "step": 37892 }, { "epoch": 0.7932052248178849, "grad_norm": 0.30017703771591187, "learning_rate": 0.0001674758625615461, "loss": 11.6803, "step": 37893 }, { "epoch": 0.7932261575818471, "grad_norm": 0.5205782055854797, "learning_rate": 0.00016747424437169937, "loss": 11.6643, "step": 37894 }, { "epoch": 0.7932470903458093, "grad_norm": 0.3518071174621582, "learning_rate": 0.00016747262614941644, "loss": 11.6652, "step": 37895 }, { "epoch": 0.7932680231097714, "grad_norm": 0.30391091108322144, "learning_rate": 0.00016747100789469803, "loss": 11.6908, "step": 37896 }, { "epoch": 0.7932889558737336, "grad_norm": 0.30367329716682434, "learning_rate": 0.000167469389607545, "loss": 11.6472, "step": 37897 }, { "epoch": 0.7933098886376957, "grad_norm": 0.3474540710449219, "learning_rate": 0.00016746777128795804, "loss": 11.6751, "step": 37898 }, { "epoch": 0.7933308214016579, "grad_norm": 0.33551013469696045, "learning_rate": 0.000167466152935938, "loss": 11.6608, "step": 37899 }, { "epoch": 0.79335175416562, "grad_norm": 0.3111577033996582, "learning_rate": 0.0001674645345514856, "loss": 11.6902, "step": 37900 }, { "epoch": 0.7933726869295822, "grad_norm": 0.2870032787322998, "learning_rate": 0.00016746291613460164, "loss": 11.6701, "step": 37901 }, { "epoch": 0.7933936196935444, "grad_norm": 0.30049973726272583, "learning_rate": 0.00016746129768528694, "loss": 11.6739, "step": 37902 }, { "epoch": 0.7934145524575065, "grad_norm": 0.3298070430755615, "learning_rate": 0.00016745967920354224, "loss": 11.6962, "step": 37903 }, { "epoch": 0.7934354852214687, "grad_norm": 0.3234574794769287, "learning_rate": 0.00016745806068936832, "loss": 11.6685, "step": 37904 }, { "epoch": 0.7934564179854308, "grad_norm": 0.33931729197502136, "learning_rate": 0.0001674564421427659, "loss": 11.6855, "step": 37905 }, { "epoch": 0.793477350749393, "grad_norm": 0.343665212392807, "learning_rate": 0.00016745482356373588, "loss": 11.6698, "step": 37906 }, { "epoch": 0.793498283513355, "grad_norm": 0.2259312868118286, "learning_rate": 0.000167453204952279, "loss": 11.6606, "step": 37907 }, { "epoch": 0.7935192162773173, "grad_norm": 0.2696809470653534, "learning_rate": 0.00016745158630839595, "loss": 11.6604, "step": 37908 }, { "epoch": 0.7935401490412795, "grad_norm": 0.39702022075653076, "learning_rate": 0.0001674499676320876, "loss": 11.6897, "step": 37909 }, { "epoch": 0.7935610818052415, "grad_norm": 0.31380942463874817, "learning_rate": 0.00016744834892335467, "loss": 11.6685, "step": 37910 }, { "epoch": 0.7935820145692037, "grad_norm": 0.34795624017715454, "learning_rate": 0.00016744673018219799, "loss": 11.6761, "step": 37911 }, { "epoch": 0.7936029473331658, "grad_norm": 0.23618240654468536, "learning_rate": 0.0001674451114086183, "loss": 11.67, "step": 37912 }, { "epoch": 0.793623880097128, "grad_norm": 0.27294015884399414, "learning_rate": 0.00016744349260261642, "loss": 11.658, "step": 37913 }, { "epoch": 0.7936448128610902, "grad_norm": 0.3304062783718109, "learning_rate": 0.0001674418737641931, "loss": 11.6807, "step": 37914 }, { "epoch": 0.7936657456250523, "grad_norm": 0.24432432651519775, "learning_rate": 0.00016744025489334909, "loss": 11.6612, "step": 37915 }, { "epoch": 0.7936866783890145, "grad_norm": 0.48143404722213745, "learning_rate": 0.0001674386359900852, "loss": 11.6746, "step": 37916 }, { "epoch": 0.7937076111529766, "grad_norm": 0.4371202290058136, "learning_rate": 0.00016743701705440223, "loss": 11.6755, "step": 37917 }, { "epoch": 0.7937285439169388, "grad_norm": 0.3616020977497101, "learning_rate": 0.00016743539808630091, "loss": 11.6786, "step": 37918 }, { "epoch": 0.7937494766809009, "grad_norm": 0.32059982419013977, "learning_rate": 0.00016743377908578206, "loss": 11.6643, "step": 37919 }, { "epoch": 0.7937704094448631, "grad_norm": 0.28790682554244995, "learning_rate": 0.00016743216005284646, "loss": 11.6627, "step": 37920 }, { "epoch": 0.7937913422088253, "grad_norm": 0.30719923973083496, "learning_rate": 0.00016743054098749483, "loss": 11.6794, "step": 37921 }, { "epoch": 0.7938122749727874, "grad_norm": 0.34441104531288147, "learning_rate": 0.00016742892188972803, "loss": 11.6621, "step": 37922 }, { "epoch": 0.7938332077367496, "grad_norm": 0.3175428509712219, "learning_rate": 0.00016742730275954676, "loss": 11.6622, "step": 37923 }, { "epoch": 0.7938541405007117, "grad_norm": 0.3195529878139496, "learning_rate": 0.00016742568359695182, "loss": 11.6532, "step": 37924 }, { "epoch": 0.7938750732646739, "grad_norm": 0.3213321566581726, "learning_rate": 0.00016742406440194404, "loss": 11.6844, "step": 37925 }, { "epoch": 0.793896006028636, "grad_norm": 0.2970277965068817, "learning_rate": 0.00016742244517452414, "loss": 11.6764, "step": 37926 }, { "epoch": 0.7939169387925982, "grad_norm": 0.35165685415267944, "learning_rate": 0.00016742082591469294, "loss": 11.6684, "step": 37927 }, { "epoch": 0.7939378715565604, "grad_norm": 0.39770203828811646, "learning_rate": 0.0001674192066224512, "loss": 11.685, "step": 37928 }, { "epoch": 0.7939588043205225, "grad_norm": 0.3278917074203491, "learning_rate": 0.0001674175872977997, "loss": 11.684, "step": 37929 }, { "epoch": 0.7939797370844847, "grad_norm": 0.3098738491535187, "learning_rate": 0.00016741596794073917, "loss": 11.6777, "step": 37930 }, { "epoch": 0.7940006698484467, "grad_norm": 0.29903680086135864, "learning_rate": 0.00016741434855127048, "loss": 11.6628, "step": 37931 }, { "epoch": 0.794021602612409, "grad_norm": 0.31870630383491516, "learning_rate": 0.00016741272912939431, "loss": 11.6605, "step": 37932 }, { "epoch": 0.7940425353763712, "grad_norm": 0.2810630798339844, "learning_rate": 0.00016741110967511154, "loss": 11.6456, "step": 37933 }, { "epoch": 0.7940634681403332, "grad_norm": 0.29817014932632446, "learning_rate": 0.0001674094901884229, "loss": 11.668, "step": 37934 }, { "epoch": 0.7940844009042954, "grad_norm": 0.29088717699050903, "learning_rate": 0.00016740787066932912, "loss": 11.6568, "step": 37935 }, { "epoch": 0.7941053336682575, "grad_norm": 0.2534063458442688, "learning_rate": 0.00016740625111783105, "loss": 11.6856, "step": 37936 }, { "epoch": 0.7941262664322197, "grad_norm": 0.288311243057251, "learning_rate": 0.00016740463153392947, "loss": 11.6656, "step": 37937 }, { "epoch": 0.7941471991961818, "grad_norm": 0.3422680199146271, "learning_rate": 0.00016740301191762509, "loss": 11.6786, "step": 37938 }, { "epoch": 0.794168131960144, "grad_norm": 0.2837081849575043, "learning_rate": 0.00016740139226891878, "loss": 11.6695, "step": 37939 }, { "epoch": 0.7941890647241062, "grad_norm": 0.32293781638145447, "learning_rate": 0.00016739977258781123, "loss": 11.6791, "step": 37940 }, { "epoch": 0.7942099974880683, "grad_norm": 0.3056349456310272, "learning_rate": 0.00016739815287430325, "loss": 11.6616, "step": 37941 }, { "epoch": 0.7942309302520305, "grad_norm": 0.28341639041900635, "learning_rate": 0.00016739653312839567, "loss": 11.6744, "step": 37942 }, { "epoch": 0.7942518630159926, "grad_norm": 0.3109400272369385, "learning_rate": 0.0001673949133500892, "loss": 11.6722, "step": 37943 }, { "epoch": 0.7942727957799548, "grad_norm": 0.28370729088783264, "learning_rate": 0.00016739329353938467, "loss": 11.655, "step": 37944 }, { "epoch": 0.7942937285439169, "grad_norm": 0.34262752532958984, "learning_rate": 0.00016739167369628279, "loss": 11.6677, "step": 37945 }, { "epoch": 0.7943146613078791, "grad_norm": 0.37795451283454895, "learning_rate": 0.0001673900538207844, "loss": 11.67, "step": 37946 }, { "epoch": 0.7943355940718413, "grad_norm": 0.26682955026626587, "learning_rate": 0.00016738843391289027, "loss": 11.6647, "step": 37947 }, { "epoch": 0.7943565268358034, "grad_norm": 0.3398766815662384, "learning_rate": 0.00016738681397260115, "loss": 11.6675, "step": 37948 }, { "epoch": 0.7943774595997656, "grad_norm": 0.3394105136394501, "learning_rate": 0.00016738519399991787, "loss": 11.6679, "step": 37949 }, { "epoch": 0.7943983923637277, "grad_norm": 0.2659735083580017, "learning_rate": 0.00016738357399484114, "loss": 11.6752, "step": 37950 }, { "epoch": 0.7944193251276899, "grad_norm": 0.37791284918785095, "learning_rate": 0.0001673819539573718, "loss": 11.6781, "step": 37951 }, { "epoch": 0.7944402578916521, "grad_norm": 0.3082415461540222, "learning_rate": 0.00016738033388751062, "loss": 11.6805, "step": 37952 }, { "epoch": 0.7944611906556142, "grad_norm": 0.2804604768753052, "learning_rate": 0.0001673787137852583, "loss": 11.6624, "step": 37953 }, { "epoch": 0.7944821234195764, "grad_norm": 0.6241700649261475, "learning_rate": 0.00016737709365061577, "loss": 11.6871, "step": 37954 }, { "epoch": 0.7945030561835384, "grad_norm": 0.41448915004730225, "learning_rate": 0.00016737547348358366, "loss": 11.6787, "step": 37955 }, { "epoch": 0.7945239889475006, "grad_norm": 0.43136414885520935, "learning_rate": 0.00016737385328416285, "loss": 11.6942, "step": 37956 }, { "epoch": 0.7945449217114627, "grad_norm": 0.35926729440689087, "learning_rate": 0.00016737223305235402, "loss": 11.6711, "step": 37957 }, { "epoch": 0.7945658544754249, "grad_norm": 0.31003516912460327, "learning_rate": 0.00016737061278815804, "loss": 11.6807, "step": 37958 }, { "epoch": 0.7945867872393871, "grad_norm": 0.2106677144765854, "learning_rate": 0.00016736899249157565, "loss": 11.6643, "step": 37959 }, { "epoch": 0.7946077200033492, "grad_norm": 0.2924819588661194, "learning_rate": 0.00016736737216260764, "loss": 11.6669, "step": 37960 }, { "epoch": 0.7946286527673114, "grad_norm": 0.30976492166519165, "learning_rate": 0.0001673657518012548, "loss": 11.6746, "step": 37961 }, { "epoch": 0.7946495855312735, "grad_norm": 0.354572057723999, "learning_rate": 0.00016736413140751788, "loss": 11.6654, "step": 37962 }, { "epoch": 0.7946705182952357, "grad_norm": 0.35998159646987915, "learning_rate": 0.00016736251098139766, "loss": 11.6656, "step": 37963 }, { "epoch": 0.7946914510591978, "grad_norm": 0.3060899078845978, "learning_rate": 0.00016736089052289495, "loss": 11.6593, "step": 37964 }, { "epoch": 0.79471238382316, "grad_norm": 0.27199798822402954, "learning_rate": 0.0001673592700320105, "loss": 11.6621, "step": 37965 }, { "epoch": 0.7947333165871222, "grad_norm": 0.23748765885829926, "learning_rate": 0.0001673576495087451, "loss": 11.6799, "step": 37966 }, { "epoch": 0.7947542493510843, "grad_norm": 0.29963812232017517, "learning_rate": 0.00016735602895309954, "loss": 11.6687, "step": 37967 }, { "epoch": 0.7947751821150465, "grad_norm": 0.3023170232772827, "learning_rate": 0.00016735440836507457, "loss": 11.6758, "step": 37968 }, { "epoch": 0.7947961148790086, "grad_norm": 0.429913729429245, "learning_rate": 0.00016735278774467101, "loss": 11.692, "step": 37969 }, { "epoch": 0.7948170476429708, "grad_norm": 0.33178508281707764, "learning_rate": 0.00016735116709188958, "loss": 11.6661, "step": 37970 }, { "epoch": 0.794837980406933, "grad_norm": 0.2858124077320099, "learning_rate": 0.00016734954640673113, "loss": 11.6717, "step": 37971 }, { "epoch": 0.7948589131708951, "grad_norm": 0.2602509558200836, "learning_rate": 0.00016734792568919637, "loss": 11.6766, "step": 37972 }, { "epoch": 0.7948798459348573, "grad_norm": 0.33185845613479614, "learning_rate": 0.00016734630493928614, "loss": 11.6772, "step": 37973 }, { "epoch": 0.7949007786988194, "grad_norm": 0.2922019064426422, "learning_rate": 0.00016734468415700117, "loss": 11.6742, "step": 37974 }, { "epoch": 0.7949217114627816, "grad_norm": 0.3634527921676636, "learning_rate": 0.00016734306334234228, "loss": 11.6663, "step": 37975 }, { "epoch": 0.7949426442267437, "grad_norm": 0.28544917702674866, "learning_rate": 0.0001673414424953102, "loss": 11.6553, "step": 37976 }, { "epoch": 0.7949635769907059, "grad_norm": 0.8769912719726562, "learning_rate": 0.00016733982161590575, "loss": 11.6035, "step": 37977 }, { "epoch": 0.7949845097546681, "grad_norm": 0.272758811712265, "learning_rate": 0.00016733820070412972, "loss": 11.6816, "step": 37978 }, { "epoch": 0.7950054425186301, "grad_norm": 0.2860296070575714, "learning_rate": 0.00016733657975998286, "loss": 11.6684, "step": 37979 }, { "epoch": 0.7950263752825923, "grad_norm": 0.46092015504837036, "learning_rate": 0.00016733495878346594, "loss": 11.6692, "step": 37980 }, { "epoch": 0.7950473080465544, "grad_norm": 0.30206015706062317, "learning_rate": 0.00016733333777457977, "loss": 11.6833, "step": 37981 }, { "epoch": 0.7950682408105166, "grad_norm": 0.2911284863948822, "learning_rate": 0.00016733171673332512, "loss": 11.6495, "step": 37982 }, { "epoch": 0.7950891735744787, "grad_norm": 0.38146907091140747, "learning_rate": 0.00016733009565970275, "loss": 11.6741, "step": 37983 }, { "epoch": 0.7951101063384409, "grad_norm": 0.33933958411216736, "learning_rate": 0.00016732847455371344, "loss": 11.6657, "step": 37984 }, { "epoch": 0.7951310391024031, "grad_norm": 0.4128289222717285, "learning_rate": 0.00016732685341535803, "loss": 11.6763, "step": 37985 }, { "epoch": 0.7951519718663652, "grad_norm": 0.3379380404949188, "learning_rate": 0.00016732523224463722, "loss": 11.6834, "step": 37986 }, { "epoch": 0.7951729046303274, "grad_norm": 0.24098512530326843, "learning_rate": 0.00016732361104155184, "loss": 11.6621, "step": 37987 }, { "epoch": 0.7951938373942895, "grad_norm": 0.3226071894168854, "learning_rate": 0.00016732198980610262, "loss": 11.6672, "step": 37988 }, { "epoch": 0.7952147701582517, "grad_norm": 0.3925155699253082, "learning_rate": 0.0001673203685382904, "loss": 11.6679, "step": 37989 }, { "epoch": 0.7952357029222138, "grad_norm": 0.3026562035083771, "learning_rate": 0.0001673187472381159, "loss": 11.661, "step": 37990 }, { "epoch": 0.795256635686176, "grad_norm": 0.30371537804603577, "learning_rate": 0.00016731712590557996, "loss": 11.6644, "step": 37991 }, { "epoch": 0.7952775684501382, "grad_norm": 0.30130288004875183, "learning_rate": 0.0001673155045406833, "loss": 11.6613, "step": 37992 }, { "epoch": 0.7952985012141003, "grad_norm": 0.31001248955726624, "learning_rate": 0.00016731388314342676, "loss": 11.6616, "step": 37993 }, { "epoch": 0.7953194339780625, "grad_norm": 0.3558642268180847, "learning_rate": 0.00016731226171381108, "loss": 11.6733, "step": 37994 }, { "epoch": 0.7953403667420246, "grad_norm": 1.1593294143676758, "learning_rate": 0.00016731064025183703, "loss": 11.6086, "step": 37995 }, { "epoch": 0.7953612995059868, "grad_norm": 0.3430405557155609, "learning_rate": 0.0001673090187575054, "loss": 11.6635, "step": 37996 }, { "epoch": 0.795382232269949, "grad_norm": 0.3337595760822296, "learning_rate": 0.000167307397230817, "loss": 11.682, "step": 37997 }, { "epoch": 0.7954031650339111, "grad_norm": 0.3405175507068634, "learning_rate": 0.00016730577567177258, "loss": 11.6632, "step": 37998 }, { "epoch": 0.7954240977978733, "grad_norm": 0.27161291241645813, "learning_rate": 0.00016730415408037293, "loss": 11.6766, "step": 37999 }, { "epoch": 0.7954450305618354, "grad_norm": 0.298955500125885, "learning_rate": 0.0001673025324566188, "loss": 11.685, "step": 38000 }, { "epoch": 0.7954450305618354, "eval_loss": 11.670167922973633, "eval_runtime": 34.2813, "eval_samples_per_second": 28.033, "eval_steps_per_second": 7.03, "step": 38000 }, { "epoch": 0.7954659633257976, "grad_norm": 0.31092068552970886, "learning_rate": 0.00016730091080051102, "loss": 11.6674, "step": 38001 }, { "epoch": 0.7954868960897596, "grad_norm": 0.30514708161354065, "learning_rate": 0.0001672992891120503, "loss": 11.6836, "step": 38002 }, { "epoch": 0.7955078288537218, "grad_norm": 0.31912556290626526, "learning_rate": 0.00016729766739123752, "loss": 11.6762, "step": 38003 }, { "epoch": 0.795528761617684, "grad_norm": 0.2616405487060547, "learning_rate": 0.00016729604563807336, "loss": 11.6701, "step": 38004 }, { "epoch": 0.7955496943816461, "grad_norm": 0.3603302240371704, "learning_rate": 0.00016729442385255867, "loss": 11.6668, "step": 38005 }, { "epoch": 0.7955706271456083, "grad_norm": 0.2848905324935913, "learning_rate": 0.0001672928020346942, "loss": 11.6621, "step": 38006 }, { "epoch": 0.7955915599095704, "grad_norm": 0.2896347939968109, "learning_rate": 0.00016729118018448072, "loss": 11.6813, "step": 38007 }, { "epoch": 0.7956124926735326, "grad_norm": 0.2552835941314697, "learning_rate": 0.00016728955830191904, "loss": 11.6502, "step": 38008 }, { "epoch": 0.7956334254374947, "grad_norm": 0.3046906292438507, "learning_rate": 0.00016728793638700988, "loss": 11.6813, "step": 38009 }, { "epoch": 0.7956543582014569, "grad_norm": 0.30974987149238586, "learning_rate": 0.0001672863144397541, "loss": 11.6614, "step": 38010 }, { "epoch": 0.7956752909654191, "grad_norm": 0.3529394865036011, "learning_rate": 0.00016728469246015242, "loss": 11.6502, "step": 38011 }, { "epoch": 0.7956962237293812, "grad_norm": 0.3249224126338959, "learning_rate": 0.00016728307044820565, "loss": 11.6507, "step": 38012 }, { "epoch": 0.7957171564933434, "grad_norm": 0.2818480432033539, "learning_rate": 0.00016728144840391456, "loss": 11.6745, "step": 38013 }, { "epoch": 0.7957380892573055, "grad_norm": 0.4009718894958496, "learning_rate": 0.0001672798263272799, "loss": 11.6819, "step": 38014 }, { "epoch": 0.7957590220212677, "grad_norm": 0.3677590787410736, "learning_rate": 0.00016727820421830254, "loss": 11.6596, "step": 38015 }, { "epoch": 0.7957799547852299, "grad_norm": 0.3408452570438385, "learning_rate": 0.00016727658207698313, "loss": 11.6653, "step": 38016 }, { "epoch": 0.795800887549192, "grad_norm": 0.3498556613922119, "learning_rate": 0.00016727495990332257, "loss": 11.6574, "step": 38017 }, { "epoch": 0.7958218203131542, "grad_norm": 0.24064557254314423, "learning_rate": 0.00016727333769732156, "loss": 11.6627, "step": 38018 }, { "epoch": 0.7958427530771163, "grad_norm": 0.3005785346031189, "learning_rate": 0.0001672717154589809, "loss": 11.6829, "step": 38019 }, { "epoch": 0.7958636858410785, "grad_norm": 0.27051612734794617, "learning_rate": 0.0001672700931883014, "loss": 11.681, "step": 38020 }, { "epoch": 0.7958846186050406, "grad_norm": 0.27185752987861633, "learning_rate": 0.00016726847088528385, "loss": 11.6521, "step": 38021 }, { "epoch": 0.7959055513690028, "grad_norm": 0.2676282227039337, "learning_rate": 0.00016726684854992895, "loss": 11.6745, "step": 38022 }, { "epoch": 0.795926484132965, "grad_norm": 0.35587334632873535, "learning_rate": 0.00016726522618223754, "loss": 11.659, "step": 38023 }, { "epoch": 0.795947416896927, "grad_norm": 0.3953093886375427, "learning_rate": 0.0001672636037822104, "loss": 11.6709, "step": 38024 }, { "epoch": 0.7959683496608893, "grad_norm": 0.26857542991638184, "learning_rate": 0.00016726198134984826, "loss": 11.6619, "step": 38025 }, { "epoch": 0.7959892824248513, "grad_norm": 0.3110383450984955, "learning_rate": 0.00016726035888515198, "loss": 11.6698, "step": 38026 }, { "epoch": 0.7960102151888135, "grad_norm": 0.37111714482307434, "learning_rate": 0.0001672587363881223, "loss": 11.654, "step": 38027 }, { "epoch": 0.7960311479527756, "grad_norm": 0.31528472900390625, "learning_rate": 0.00016725711385875996, "loss": 11.6752, "step": 38028 }, { "epoch": 0.7960520807167378, "grad_norm": 0.2751331031322479, "learning_rate": 0.0001672554912970658, "loss": 11.673, "step": 38029 }, { "epoch": 0.7960730134807, "grad_norm": 0.27820059657096863, "learning_rate": 0.0001672538687030406, "loss": 11.6671, "step": 38030 }, { "epoch": 0.7960939462446621, "grad_norm": 0.301431804895401, "learning_rate": 0.00016725224607668505, "loss": 11.6635, "step": 38031 }, { "epoch": 0.7961148790086243, "grad_norm": 0.34439268708229065, "learning_rate": 0.00016725062341800006, "loss": 11.6545, "step": 38032 }, { "epoch": 0.7961358117725864, "grad_norm": 0.29142242670059204, "learning_rate": 0.00016724900072698633, "loss": 11.6502, "step": 38033 }, { "epoch": 0.7961567445365486, "grad_norm": 0.30697232484817505, "learning_rate": 0.00016724737800364464, "loss": 11.6632, "step": 38034 }, { "epoch": 0.7961776773005108, "grad_norm": 0.3568876385688782, "learning_rate": 0.00016724575524797581, "loss": 11.6774, "step": 38035 }, { "epoch": 0.7961986100644729, "grad_norm": 0.4078356921672821, "learning_rate": 0.00016724413245998058, "loss": 11.6597, "step": 38036 }, { "epoch": 0.7962195428284351, "grad_norm": 0.294035404920578, "learning_rate": 0.00016724250963965975, "loss": 11.6816, "step": 38037 }, { "epoch": 0.7962404755923972, "grad_norm": 0.3732595145702362, "learning_rate": 0.0001672408867870141, "loss": 11.6772, "step": 38038 }, { "epoch": 0.7962614083563594, "grad_norm": 0.2728367745876312, "learning_rate": 0.00016723926390204444, "loss": 11.6772, "step": 38039 }, { "epoch": 0.7962823411203215, "grad_norm": 0.3309527039527893, "learning_rate": 0.0001672376409847515, "loss": 11.6682, "step": 38040 }, { "epoch": 0.7963032738842837, "grad_norm": 0.3960530161857605, "learning_rate": 0.00016723601803513604, "loss": 11.6801, "step": 38041 }, { "epoch": 0.7963242066482459, "grad_norm": 0.29044827818870544, "learning_rate": 0.00016723439505319894, "loss": 11.69, "step": 38042 }, { "epoch": 0.796345139412208, "grad_norm": 0.36594200134277344, "learning_rate": 0.00016723277203894088, "loss": 11.659, "step": 38043 }, { "epoch": 0.7963660721761702, "grad_norm": 0.25634562969207764, "learning_rate": 0.00016723114899236266, "loss": 11.6608, "step": 38044 }, { "epoch": 0.7963870049401323, "grad_norm": 0.27922889590263367, "learning_rate": 0.00016722952591346511, "loss": 11.6764, "step": 38045 }, { "epoch": 0.7964079377040945, "grad_norm": 0.260188490152359, "learning_rate": 0.000167227902802249, "loss": 11.6702, "step": 38046 }, { "epoch": 0.7964288704680565, "grad_norm": 0.27754145860671997, "learning_rate": 0.00016722627965871508, "loss": 11.683, "step": 38047 }, { "epoch": 0.7964498032320187, "grad_norm": 0.3474651873111725, "learning_rate": 0.0001672246564828641, "loss": 11.6497, "step": 38048 }, { "epoch": 0.796470735995981, "grad_norm": 0.2491903156042099, "learning_rate": 0.00016722303327469692, "loss": 11.6642, "step": 38049 }, { "epoch": 0.796491668759943, "grad_norm": 0.3812970519065857, "learning_rate": 0.00016722141003421424, "loss": 11.6623, "step": 38050 }, { "epoch": 0.7965126015239052, "grad_norm": 2.468670606613159, "learning_rate": 0.00016721978676141692, "loss": 11.635, "step": 38051 }, { "epoch": 0.7965335342878673, "grad_norm": 0.2825700640678406, "learning_rate": 0.0001672181634563057, "loss": 11.6665, "step": 38052 }, { "epoch": 0.7965544670518295, "grad_norm": 0.3362054228782654, "learning_rate": 0.00016721654011888135, "loss": 11.6691, "step": 38053 }, { "epoch": 0.7965753998157917, "grad_norm": 0.36167898774147034, "learning_rate": 0.00016721491674914467, "loss": 11.6653, "step": 38054 }, { "epoch": 0.7965963325797538, "grad_norm": 0.37190207839012146, "learning_rate": 0.0001672132933470964, "loss": 11.6708, "step": 38055 }, { "epoch": 0.796617265343716, "grad_norm": 0.2924271821975708, "learning_rate": 0.0001672116699127374, "loss": 11.6656, "step": 38056 }, { "epoch": 0.7966381981076781, "grad_norm": 0.269484281539917, "learning_rate": 0.0001672100464460684, "loss": 11.6578, "step": 38057 }, { "epoch": 0.7966591308716403, "grad_norm": 0.3075859844684601, "learning_rate": 0.00016720842294709013, "loss": 11.6588, "step": 38058 }, { "epoch": 0.7966800636356024, "grad_norm": 0.2699775993824005, "learning_rate": 0.00016720679941580348, "loss": 11.6628, "step": 38059 }, { "epoch": 0.7967009963995646, "grad_norm": 0.27619168162345886, "learning_rate": 0.00016720517585220917, "loss": 11.6698, "step": 38060 }, { "epoch": 0.7967219291635268, "grad_norm": 0.37008094787597656, "learning_rate": 0.00016720355225630798, "loss": 11.6747, "step": 38061 }, { "epoch": 0.7967428619274889, "grad_norm": 0.3082074224948883, "learning_rate": 0.00016720192862810067, "loss": 11.6782, "step": 38062 }, { "epoch": 0.7967637946914511, "grad_norm": 0.28000393509864807, "learning_rate": 0.00016720030496758805, "loss": 11.6669, "step": 38063 }, { "epoch": 0.7967847274554132, "grad_norm": 0.2437586933374405, "learning_rate": 0.0001671986812747709, "loss": 11.6686, "step": 38064 }, { "epoch": 0.7968056602193754, "grad_norm": 0.2736547291278839, "learning_rate": 0.00016719705754965003, "loss": 11.6576, "step": 38065 }, { "epoch": 0.7968265929833375, "grad_norm": 0.332450807094574, "learning_rate": 0.00016719543379222618, "loss": 11.6769, "step": 38066 }, { "epoch": 0.7968475257472997, "grad_norm": 0.368581086397171, "learning_rate": 0.00016719381000250013, "loss": 11.673, "step": 38067 }, { "epoch": 0.7968684585112619, "grad_norm": 0.2455260008573532, "learning_rate": 0.00016719218618047263, "loss": 11.6672, "step": 38068 }, { "epoch": 0.796889391275224, "grad_norm": 0.2954171299934387, "learning_rate": 0.00016719056232614453, "loss": 11.654, "step": 38069 }, { "epoch": 0.7969103240391862, "grad_norm": 0.34392112493515015, "learning_rate": 0.00016718893843951658, "loss": 11.6507, "step": 38070 }, { "epoch": 0.7969312568031482, "grad_norm": 0.27384111285209656, "learning_rate": 0.00016718731452058957, "loss": 11.6621, "step": 38071 }, { "epoch": 0.7969521895671104, "grad_norm": 0.2681390941143036, "learning_rate": 0.00016718569056936426, "loss": 11.6725, "step": 38072 }, { "epoch": 0.7969731223310726, "grad_norm": 0.27450501918792725, "learning_rate": 0.00016718406658584146, "loss": 11.6716, "step": 38073 }, { "epoch": 0.7969940550950347, "grad_norm": 0.28573501110076904, "learning_rate": 0.0001671824425700219, "loss": 11.669, "step": 38074 }, { "epoch": 0.7970149878589969, "grad_norm": 0.2854732275009155, "learning_rate": 0.0001671808185219064, "loss": 11.6741, "step": 38075 }, { "epoch": 0.797035920622959, "grad_norm": 0.3264962136745453, "learning_rate": 0.00016717919444149577, "loss": 11.6766, "step": 38076 }, { "epoch": 0.7970568533869212, "grad_norm": 0.33004990220069885, "learning_rate": 0.00016717757032879072, "loss": 11.6542, "step": 38077 }, { "epoch": 0.7970777861508833, "grad_norm": 0.28499871492385864, "learning_rate": 0.00016717594618379208, "loss": 11.6675, "step": 38078 }, { "epoch": 0.7970987189148455, "grad_norm": 0.28343021869659424, "learning_rate": 0.00016717432200650062, "loss": 11.6653, "step": 38079 }, { "epoch": 0.7971196516788077, "grad_norm": 0.2483915388584137, "learning_rate": 0.0001671726977969171, "loss": 11.6727, "step": 38080 }, { "epoch": 0.7971405844427698, "grad_norm": 0.4155890941619873, "learning_rate": 0.00016717107355504232, "loss": 11.6713, "step": 38081 }, { "epoch": 0.797161517206732, "grad_norm": 0.325730562210083, "learning_rate": 0.00016716944928087707, "loss": 11.6934, "step": 38082 }, { "epoch": 0.7971824499706941, "grad_norm": 0.27991965413093567, "learning_rate": 0.00016716782497442213, "loss": 11.6732, "step": 38083 }, { "epoch": 0.7972033827346563, "grad_norm": 0.4883565902709961, "learning_rate": 0.00016716620063567827, "loss": 11.6706, "step": 38084 }, { "epoch": 0.7972243154986184, "grad_norm": 0.28438496589660645, "learning_rate": 0.00016716457626464625, "loss": 11.6831, "step": 38085 }, { "epoch": 0.7972452482625806, "grad_norm": 0.31983494758605957, "learning_rate": 0.00016716295186132687, "loss": 11.6706, "step": 38086 }, { "epoch": 0.7972661810265428, "grad_norm": 0.2590491473674774, "learning_rate": 0.00016716132742572093, "loss": 11.6646, "step": 38087 }, { "epoch": 0.7972871137905049, "grad_norm": 0.273457795381546, "learning_rate": 0.0001671597029578292, "loss": 11.6688, "step": 38088 }, { "epoch": 0.7973080465544671, "grad_norm": 0.254987508058548, "learning_rate": 0.00016715807845765244, "loss": 11.659, "step": 38089 }, { "epoch": 0.7973289793184292, "grad_norm": 0.3454773724079132, "learning_rate": 0.00016715645392519144, "loss": 11.6663, "step": 38090 }, { "epoch": 0.7973499120823914, "grad_norm": 0.30870622396469116, "learning_rate": 0.000167154829360447, "loss": 11.6743, "step": 38091 }, { "epoch": 0.7973708448463536, "grad_norm": 0.3940100371837616, "learning_rate": 0.0001671532047634199, "loss": 11.6883, "step": 38092 }, { "epoch": 0.7973917776103157, "grad_norm": 0.3135506510734558, "learning_rate": 0.00016715158013411087, "loss": 11.6558, "step": 38093 }, { "epoch": 0.7974127103742779, "grad_norm": 0.2765977084636688, "learning_rate": 0.00016714995547252075, "loss": 11.6751, "step": 38094 }, { "epoch": 0.79743364313824, "grad_norm": 0.32012325525283813, "learning_rate": 0.0001671483307786503, "loss": 11.6724, "step": 38095 }, { "epoch": 0.7974545759022021, "grad_norm": 0.34699195623397827, "learning_rate": 0.0001671467060525003, "loss": 11.6834, "step": 38096 }, { "epoch": 0.7974755086661642, "grad_norm": 0.3369666635990143, "learning_rate": 0.00016714508129407154, "loss": 11.66, "step": 38097 }, { "epoch": 0.7974964414301264, "grad_norm": 0.27669084072113037, "learning_rate": 0.0001671434565033648, "loss": 11.6703, "step": 38098 }, { "epoch": 0.7975173741940886, "grad_norm": 0.3075515329837799, "learning_rate": 0.00016714183168038086, "loss": 11.659, "step": 38099 }, { "epoch": 0.7975383069580507, "grad_norm": 0.31325119733810425, "learning_rate": 0.00016714020682512048, "loss": 11.6673, "step": 38100 }, { "epoch": 0.7975592397220129, "grad_norm": 0.29485756158828735, "learning_rate": 0.00016713858193758444, "loss": 11.6575, "step": 38101 }, { "epoch": 0.797580172485975, "grad_norm": 0.32198336720466614, "learning_rate": 0.00016713695701777356, "loss": 11.6442, "step": 38102 }, { "epoch": 0.7976011052499372, "grad_norm": 0.26982682943344116, "learning_rate": 0.0001671353320656886, "loss": 11.6642, "step": 38103 }, { "epoch": 0.7976220380138993, "grad_norm": 0.33476462960243225, "learning_rate": 0.00016713370708133034, "loss": 11.6593, "step": 38104 }, { "epoch": 0.7976429707778615, "grad_norm": 0.25747087597846985, "learning_rate": 0.00016713208206469956, "loss": 11.6775, "step": 38105 }, { "epoch": 0.7976639035418237, "grad_norm": 0.3969934284687042, "learning_rate": 0.00016713045701579704, "loss": 11.6735, "step": 38106 }, { "epoch": 0.7976848363057858, "grad_norm": 0.2551781237125397, "learning_rate": 0.00016712883193462357, "loss": 11.6698, "step": 38107 }, { "epoch": 0.797705769069748, "grad_norm": 0.3260385990142822, "learning_rate": 0.0001671272068211799, "loss": 11.671, "step": 38108 }, { "epoch": 0.7977267018337101, "grad_norm": 0.29183369874954224, "learning_rate": 0.0001671255816754669, "loss": 11.6705, "step": 38109 }, { "epoch": 0.7977476345976723, "grad_norm": 0.4295802116394043, "learning_rate": 0.00016712395649748523, "loss": 11.6593, "step": 38110 }, { "epoch": 0.7977685673616345, "grad_norm": 0.2817612886428833, "learning_rate": 0.00016712233128723573, "loss": 11.6563, "step": 38111 }, { "epoch": 0.7977895001255966, "grad_norm": 0.3479014039039612, "learning_rate": 0.00016712070604471918, "loss": 11.6668, "step": 38112 }, { "epoch": 0.7978104328895588, "grad_norm": 0.2690390646457672, "learning_rate": 0.00016711908076993642, "loss": 11.673, "step": 38113 }, { "epoch": 0.7978313656535209, "grad_norm": 0.3798283040523529, "learning_rate": 0.00016711745546288807, "loss": 11.6779, "step": 38114 }, { "epoch": 0.7978522984174831, "grad_norm": 0.3866097629070282, "learning_rate": 0.0001671158301235751, "loss": 11.6822, "step": 38115 }, { "epoch": 0.7978732311814452, "grad_norm": 0.3263185918331146, "learning_rate": 0.00016711420475199818, "loss": 11.6621, "step": 38116 }, { "epoch": 0.7978941639454074, "grad_norm": 0.3114587962627411, "learning_rate": 0.0001671125793481581, "loss": 11.6737, "step": 38117 }, { "epoch": 0.7979150967093696, "grad_norm": 0.3196844756603241, "learning_rate": 0.0001671109539120557, "loss": 11.6673, "step": 38118 }, { "epoch": 0.7979360294733316, "grad_norm": 0.35609087347984314, "learning_rate": 0.00016710932844369168, "loss": 11.6646, "step": 38119 }, { "epoch": 0.7979569622372938, "grad_norm": 0.44605204463005066, "learning_rate": 0.00016710770294306684, "loss": 11.6728, "step": 38120 }, { "epoch": 0.7979778950012559, "grad_norm": 0.25510555505752563, "learning_rate": 0.00016710607741018203, "loss": 11.6771, "step": 38121 }, { "epoch": 0.7979988277652181, "grad_norm": 0.33701759576797485, "learning_rate": 0.00016710445184503798, "loss": 11.6732, "step": 38122 }, { "epoch": 0.7980197605291802, "grad_norm": 0.27458685636520386, "learning_rate": 0.00016710282624763547, "loss": 11.6544, "step": 38123 }, { "epoch": 0.7980406932931424, "grad_norm": 0.29792073369026184, "learning_rate": 0.00016710120061797526, "loss": 11.6777, "step": 38124 }, { "epoch": 0.7980616260571046, "grad_norm": 0.2577073872089386, "learning_rate": 0.00016709957495605822, "loss": 11.6527, "step": 38125 }, { "epoch": 0.7980825588210667, "grad_norm": 0.3546619117259979, "learning_rate": 0.000167097949261885, "loss": 11.6691, "step": 38126 }, { "epoch": 0.7981034915850289, "grad_norm": 0.26628199219703674, "learning_rate": 0.00016709632353545647, "loss": 11.6752, "step": 38127 }, { "epoch": 0.798124424348991, "grad_norm": 0.38342154026031494, "learning_rate": 0.0001670946977767734, "loss": 11.6782, "step": 38128 }, { "epoch": 0.7981453571129532, "grad_norm": 0.37056177854537964, "learning_rate": 0.00016709307198583656, "loss": 11.6654, "step": 38129 }, { "epoch": 0.7981662898769154, "grad_norm": 0.36742836236953735, "learning_rate": 0.00016709144616264675, "loss": 11.6712, "step": 38130 }, { "epoch": 0.7981872226408775, "grad_norm": 0.3193998634815216, "learning_rate": 0.00016708982030720474, "loss": 11.6726, "step": 38131 }, { "epoch": 0.7982081554048397, "grad_norm": 0.3394744098186493, "learning_rate": 0.0001670881944195113, "loss": 11.6702, "step": 38132 }, { "epoch": 0.7982290881688018, "grad_norm": 0.3642342686653137, "learning_rate": 0.00016708656849956724, "loss": 11.6933, "step": 38133 }, { "epoch": 0.798250020932764, "grad_norm": 0.2746576964855194, "learning_rate": 0.00016708494254737328, "loss": 11.6732, "step": 38134 }, { "epoch": 0.7982709536967261, "grad_norm": 0.34646305441856384, "learning_rate": 0.00016708331656293027, "loss": 11.6585, "step": 38135 }, { "epoch": 0.7982918864606883, "grad_norm": 0.29371076822280884, "learning_rate": 0.00016708169054623895, "loss": 11.6677, "step": 38136 }, { "epoch": 0.7983128192246505, "grad_norm": 0.305981308221817, "learning_rate": 0.00016708006449730014, "loss": 11.6543, "step": 38137 }, { "epoch": 0.7983337519886126, "grad_norm": 0.30536341667175293, "learning_rate": 0.00016707843841611458, "loss": 11.6612, "step": 38138 }, { "epoch": 0.7983546847525748, "grad_norm": 0.32752737402915955, "learning_rate": 0.00016707681230268307, "loss": 11.68, "step": 38139 }, { "epoch": 0.7983756175165369, "grad_norm": 0.25213590264320374, "learning_rate": 0.0001670751861570064, "loss": 11.6529, "step": 38140 }, { "epoch": 0.798396550280499, "grad_norm": 0.23911531269550323, "learning_rate": 0.00016707355997908535, "loss": 11.6622, "step": 38141 }, { "epoch": 0.7984174830444611, "grad_norm": 0.3326965868473053, "learning_rate": 0.0001670719337689207, "loss": 11.6787, "step": 38142 }, { "epoch": 0.7984384158084233, "grad_norm": 0.2211114764213562, "learning_rate": 0.0001670703075265132, "loss": 11.6684, "step": 38143 }, { "epoch": 0.7984593485723855, "grad_norm": 0.3166799545288086, "learning_rate": 0.00016706868125186366, "loss": 11.6707, "step": 38144 }, { "epoch": 0.7984802813363476, "grad_norm": 0.4273635447025299, "learning_rate": 0.00016706705494497288, "loss": 11.6732, "step": 38145 }, { "epoch": 0.7985012141003098, "grad_norm": 0.3068428933620453, "learning_rate": 0.0001670654286058416, "loss": 11.6743, "step": 38146 }, { "epoch": 0.7985221468642719, "grad_norm": 0.25526055693626404, "learning_rate": 0.00016706380223447068, "loss": 11.6622, "step": 38147 }, { "epoch": 0.7985430796282341, "grad_norm": 0.2751069962978363, "learning_rate": 0.0001670621758308608, "loss": 11.6795, "step": 38148 }, { "epoch": 0.7985640123921963, "grad_norm": 0.3975469768047333, "learning_rate": 0.00016706054939501276, "loss": 11.6724, "step": 38149 }, { "epoch": 0.7985849451561584, "grad_norm": 0.2639795243740082, "learning_rate": 0.00016705892292692744, "loss": 11.6644, "step": 38150 }, { "epoch": 0.7986058779201206, "grad_norm": 0.29869768023490906, "learning_rate": 0.0001670572964266055, "loss": 11.6749, "step": 38151 }, { "epoch": 0.7986268106840827, "grad_norm": 0.39616286754608154, "learning_rate": 0.0001670556698940478, "loss": 11.6739, "step": 38152 }, { "epoch": 0.7986477434480449, "grad_norm": 0.2753501534461975, "learning_rate": 0.0001670540433292551, "loss": 11.6778, "step": 38153 }, { "epoch": 0.798668676212007, "grad_norm": 0.3126978576183319, "learning_rate": 0.00016705241673222818, "loss": 11.6701, "step": 38154 }, { "epoch": 0.7986896089759692, "grad_norm": 0.3008732199668884, "learning_rate": 0.00016705079010296777, "loss": 11.6749, "step": 38155 }, { "epoch": 0.7987105417399314, "grad_norm": 0.3773413300514221, "learning_rate": 0.00016704916344147474, "loss": 11.6664, "step": 38156 }, { "epoch": 0.7987314745038935, "grad_norm": 0.3007645905017853, "learning_rate": 0.00016704753674774985, "loss": 11.6537, "step": 38157 }, { "epoch": 0.7987524072678557, "grad_norm": 0.3113258481025696, "learning_rate": 0.0001670459100217938, "loss": 11.6877, "step": 38158 }, { "epoch": 0.7987733400318178, "grad_norm": 0.23540449142456055, "learning_rate": 0.00016704428326360754, "loss": 11.6614, "step": 38159 }, { "epoch": 0.79879427279578, "grad_norm": 0.30095869302749634, "learning_rate": 0.00016704265647319165, "loss": 11.6765, "step": 38160 }, { "epoch": 0.7988152055597421, "grad_norm": 0.3652839660644531, "learning_rate": 0.00016704102965054706, "loss": 11.6708, "step": 38161 }, { "epoch": 0.7988361383237043, "grad_norm": 0.3382069766521454, "learning_rate": 0.00016703940279567448, "loss": 11.6667, "step": 38162 }, { "epoch": 0.7988570710876665, "grad_norm": 0.2978048026561737, "learning_rate": 0.00016703777590857476, "loss": 11.6677, "step": 38163 }, { "epoch": 0.7988780038516285, "grad_norm": 0.2914442718029022, "learning_rate": 0.0001670361489892486, "loss": 11.6746, "step": 38164 }, { "epoch": 0.7988989366155907, "grad_norm": 0.32464927434921265, "learning_rate": 0.00016703452203769686, "loss": 11.6685, "step": 38165 }, { "epoch": 0.7989198693795528, "grad_norm": 0.3378276228904724, "learning_rate": 0.0001670328950539202, "loss": 11.6788, "step": 38166 }, { "epoch": 0.798940802143515, "grad_norm": 0.2659095525741577, "learning_rate": 0.00016703126803791957, "loss": 11.6636, "step": 38167 }, { "epoch": 0.7989617349074771, "grad_norm": 0.3472149670124054, "learning_rate": 0.00016702964098969564, "loss": 11.6674, "step": 38168 }, { "epoch": 0.7989826676714393, "grad_norm": 0.2829311788082123, "learning_rate": 0.00016702801390924921, "loss": 11.6653, "step": 38169 }, { "epoch": 0.7990036004354015, "grad_norm": 0.27768179774284363, "learning_rate": 0.00016702638679658106, "loss": 11.6756, "step": 38170 }, { "epoch": 0.7990245331993636, "grad_norm": 0.2681349217891693, "learning_rate": 0.00016702475965169197, "loss": 11.6687, "step": 38171 }, { "epoch": 0.7990454659633258, "grad_norm": 0.35875341296195984, "learning_rate": 0.0001670231324745828, "loss": 11.6565, "step": 38172 }, { "epoch": 0.7990663987272879, "grad_norm": 0.24987733364105225, "learning_rate": 0.0001670215052652542, "loss": 11.6656, "step": 38173 }, { "epoch": 0.7990873314912501, "grad_norm": 0.36383530497550964, "learning_rate": 0.00016701987802370707, "loss": 11.6537, "step": 38174 }, { "epoch": 0.7991082642552123, "grad_norm": 0.29062771797180176, "learning_rate": 0.00016701825074994208, "loss": 11.686, "step": 38175 }, { "epoch": 0.7991291970191744, "grad_norm": 0.31415310502052307, "learning_rate": 0.00016701662344396011, "loss": 11.6744, "step": 38176 }, { "epoch": 0.7991501297831366, "grad_norm": 0.2931003272533417, "learning_rate": 0.00016701499610576192, "loss": 11.6593, "step": 38177 }, { "epoch": 0.7991710625470987, "grad_norm": 0.26134535670280457, "learning_rate": 0.00016701336873534825, "loss": 11.6744, "step": 38178 }, { "epoch": 0.7991919953110609, "grad_norm": 0.3454311490058899, "learning_rate": 0.00016701174133271996, "loss": 11.6865, "step": 38179 }, { "epoch": 0.799212928075023, "grad_norm": 0.34363988041877747, "learning_rate": 0.00016701011389787772, "loss": 11.6661, "step": 38180 }, { "epoch": 0.7992338608389852, "grad_norm": 0.40636277198791504, "learning_rate": 0.00016700848643082242, "loss": 11.6789, "step": 38181 }, { "epoch": 0.7992547936029474, "grad_norm": 0.35346871614456177, "learning_rate": 0.00016700685893155477, "loss": 11.6705, "step": 38182 }, { "epoch": 0.7992757263669095, "grad_norm": 0.326403945684433, "learning_rate": 0.00016700523140007558, "loss": 11.667, "step": 38183 }, { "epoch": 0.7992966591308717, "grad_norm": 0.2786133289337158, "learning_rate": 0.00016700360383638566, "loss": 11.6684, "step": 38184 }, { "epoch": 0.7993175918948338, "grad_norm": 0.3119369149208069, "learning_rate": 0.00016700197624048576, "loss": 11.666, "step": 38185 }, { "epoch": 0.799338524658796, "grad_norm": 0.2844897210597992, "learning_rate": 0.00016700034861237664, "loss": 11.6735, "step": 38186 }, { "epoch": 0.799359457422758, "grad_norm": 0.3272257149219513, "learning_rate": 0.00016699872095205914, "loss": 11.6755, "step": 38187 }, { "epoch": 0.7993803901867202, "grad_norm": 0.4701680541038513, "learning_rate": 0.000166997093259534, "loss": 11.6511, "step": 38188 }, { "epoch": 0.7994013229506824, "grad_norm": 0.2670307457447052, "learning_rate": 0.000166995465534802, "loss": 11.6725, "step": 38189 }, { "epoch": 0.7994222557146445, "grad_norm": 0.2861507833003998, "learning_rate": 0.00016699383777786395, "loss": 11.6728, "step": 38190 }, { "epoch": 0.7994431884786067, "grad_norm": 0.2463599294424057, "learning_rate": 0.00016699220998872062, "loss": 11.6648, "step": 38191 }, { "epoch": 0.7994641212425688, "grad_norm": 0.23431476950645447, "learning_rate": 0.00016699058216737277, "loss": 11.6633, "step": 38192 }, { "epoch": 0.799485054006531, "grad_norm": 0.23777735233306885, "learning_rate": 0.00016698895431382124, "loss": 11.6661, "step": 38193 }, { "epoch": 0.7995059867704932, "grad_norm": 0.48241180181503296, "learning_rate": 0.00016698732642806677, "loss": 11.6858, "step": 38194 }, { "epoch": 0.7995269195344553, "grad_norm": 0.3021637797355652, "learning_rate": 0.00016698569851011013, "loss": 11.6645, "step": 38195 }, { "epoch": 0.7995478522984175, "grad_norm": 0.309698224067688, "learning_rate": 0.00016698407055995213, "loss": 11.6673, "step": 38196 }, { "epoch": 0.7995687850623796, "grad_norm": 0.2821126878261566, "learning_rate": 0.00016698244257759356, "loss": 11.6647, "step": 38197 }, { "epoch": 0.7995897178263418, "grad_norm": 0.3630259931087494, "learning_rate": 0.0001669808145630352, "loss": 11.683, "step": 38198 }, { "epoch": 0.7996106505903039, "grad_norm": 0.6619381904602051, "learning_rate": 0.00016697918651627778, "loss": 11.5688, "step": 38199 }, { "epoch": 0.7996315833542661, "grad_norm": 0.3117094933986664, "learning_rate": 0.00016697755843732214, "loss": 11.6748, "step": 38200 }, { "epoch": 0.7996525161182283, "grad_norm": 0.2777281105518341, "learning_rate": 0.00016697593032616905, "loss": 11.6764, "step": 38201 }, { "epoch": 0.7996734488821904, "grad_norm": 0.28813835978507996, "learning_rate": 0.00016697430218281929, "loss": 11.6816, "step": 38202 }, { "epoch": 0.7996943816461526, "grad_norm": 0.3097333014011383, "learning_rate": 0.00016697267400727364, "loss": 11.6632, "step": 38203 }, { "epoch": 0.7997153144101147, "grad_norm": 0.3371273875236511, "learning_rate": 0.00016697104579953287, "loss": 11.6797, "step": 38204 }, { "epoch": 0.7997362471740769, "grad_norm": 0.38148033618927, "learning_rate": 0.00016696941755959776, "loss": 11.6861, "step": 38205 }, { "epoch": 0.799757179938039, "grad_norm": 0.2976330518722534, "learning_rate": 0.00016696778928746914, "loss": 11.6891, "step": 38206 }, { "epoch": 0.7997781127020012, "grad_norm": 0.31756290793418884, "learning_rate": 0.00016696616098314777, "loss": 11.6855, "step": 38207 }, { "epoch": 0.7997990454659634, "grad_norm": 0.2553996741771698, "learning_rate": 0.0001669645326466344, "loss": 11.6839, "step": 38208 }, { "epoch": 0.7998199782299255, "grad_norm": 0.27734246850013733, "learning_rate": 0.00016696290427792984, "loss": 11.6662, "step": 38209 }, { "epoch": 0.7998409109938877, "grad_norm": 0.2836940884590149, "learning_rate": 0.00016696127587703487, "loss": 11.6784, "step": 38210 }, { "epoch": 0.7998618437578497, "grad_norm": 0.3432863652706146, "learning_rate": 0.00016695964744395028, "loss": 11.6742, "step": 38211 }, { "epoch": 0.799882776521812, "grad_norm": 0.29624563455581665, "learning_rate": 0.00016695801897867685, "loss": 11.6671, "step": 38212 }, { "epoch": 0.7999037092857741, "grad_norm": 0.26951470971107483, "learning_rate": 0.00016695639048121537, "loss": 11.6624, "step": 38213 }, { "epoch": 0.7999246420497362, "grad_norm": 0.34311172366142273, "learning_rate": 0.0001669547619515666, "loss": 11.6741, "step": 38214 }, { "epoch": 0.7999455748136984, "grad_norm": 0.24758103489875793, "learning_rate": 0.00016695313338973132, "loss": 11.6695, "step": 38215 }, { "epoch": 0.7999665075776605, "grad_norm": 0.26260828971862793, "learning_rate": 0.00016695150479571034, "loss": 11.6766, "step": 38216 }, { "epoch": 0.7999874403416227, "grad_norm": 0.2872815430164337, "learning_rate": 0.0001669498761695044, "loss": 11.6678, "step": 38217 }, { "epoch": 0.8000083731055848, "grad_norm": 0.26070070266723633, "learning_rate": 0.00016694824751111435, "loss": 11.674, "step": 38218 }, { "epoch": 0.800029305869547, "grad_norm": 0.3068212866783142, "learning_rate": 0.00016694661882054094, "loss": 11.6755, "step": 38219 }, { "epoch": 0.8000502386335092, "grad_norm": 0.3555518090724945, "learning_rate": 0.00016694499009778493, "loss": 11.671, "step": 38220 }, { "epoch": 0.8000711713974713, "grad_norm": 0.32503706216812134, "learning_rate": 0.00016694336134284713, "loss": 11.6675, "step": 38221 }, { "epoch": 0.8000921041614335, "grad_norm": 0.390969455242157, "learning_rate": 0.0001669417325557283, "loss": 11.6605, "step": 38222 }, { "epoch": 0.8001130369253956, "grad_norm": 0.3316739499568939, "learning_rate": 0.00016694010373642925, "loss": 11.655, "step": 38223 }, { "epoch": 0.8001339696893578, "grad_norm": 0.2433246672153473, "learning_rate": 0.00016693847488495075, "loss": 11.6687, "step": 38224 }, { "epoch": 0.8001549024533199, "grad_norm": 0.3550771176815033, "learning_rate": 0.00016693684600129359, "loss": 11.6724, "step": 38225 }, { "epoch": 0.8001758352172821, "grad_norm": 0.34677115082740784, "learning_rate": 0.00016693521708545852, "loss": 11.6782, "step": 38226 }, { "epoch": 0.8001967679812443, "grad_norm": 0.33662134408950806, "learning_rate": 0.00016693358813744635, "loss": 11.6735, "step": 38227 }, { "epoch": 0.8002177007452064, "grad_norm": 0.2952177822589874, "learning_rate": 0.0001669319591572579, "loss": 11.6668, "step": 38228 }, { "epoch": 0.8002386335091686, "grad_norm": 0.28063735365867615, "learning_rate": 0.0001669303301448939, "loss": 11.6797, "step": 38229 }, { "epoch": 0.8002595662731307, "grad_norm": 0.4054931700229645, "learning_rate": 0.00016692870110035515, "loss": 11.6546, "step": 38230 }, { "epoch": 0.8002804990370929, "grad_norm": 0.3278752565383911, "learning_rate": 0.0001669270720236424, "loss": 11.6617, "step": 38231 }, { "epoch": 0.8003014318010551, "grad_norm": 0.24147477746009827, "learning_rate": 0.00016692544291475653, "loss": 11.6799, "step": 38232 }, { "epoch": 0.8003223645650172, "grad_norm": 0.3188403844833374, "learning_rate": 0.0001669238137736982, "loss": 11.6639, "step": 38233 }, { "epoch": 0.8003432973289794, "grad_norm": 0.3097442090511322, "learning_rate": 0.00016692218460046828, "loss": 11.68, "step": 38234 }, { "epoch": 0.8003642300929414, "grad_norm": 0.2991405129432678, "learning_rate": 0.0001669205553950675, "loss": 11.6803, "step": 38235 }, { "epoch": 0.8003851628569036, "grad_norm": 0.29388999938964844, "learning_rate": 0.0001669189261574967, "loss": 11.6704, "step": 38236 }, { "epoch": 0.8004060956208657, "grad_norm": 0.26236969232559204, "learning_rate": 0.00016691729688775658, "loss": 11.6642, "step": 38237 }, { "epoch": 0.8004270283848279, "grad_norm": 0.3030921518802643, "learning_rate": 0.00016691566758584805, "loss": 11.6615, "step": 38238 }, { "epoch": 0.8004479611487901, "grad_norm": 0.3977813124656677, "learning_rate": 0.00016691403825177175, "loss": 11.6867, "step": 38239 }, { "epoch": 0.8004688939127522, "grad_norm": 0.282744437456131, "learning_rate": 0.0001669124088855286, "loss": 11.6645, "step": 38240 }, { "epoch": 0.8004898266767144, "grad_norm": 0.2847446799278259, "learning_rate": 0.00016691077948711923, "loss": 11.6613, "step": 38241 }, { "epoch": 0.8005107594406765, "grad_norm": 0.3224063813686371, "learning_rate": 0.0001669091500565446, "loss": 11.6583, "step": 38242 }, { "epoch": 0.8005316922046387, "grad_norm": 0.3160589635372162, "learning_rate": 0.00016690752059380533, "loss": 11.6833, "step": 38243 }, { "epoch": 0.8005526249686008, "grad_norm": 0.2788943350315094, "learning_rate": 0.00016690589109890226, "loss": 11.6719, "step": 38244 }, { "epoch": 0.800573557732563, "grad_norm": 0.36642056703567505, "learning_rate": 0.00016690426157183624, "loss": 11.6638, "step": 38245 }, { "epoch": 0.8005944904965252, "grad_norm": 0.3374694585800171, "learning_rate": 0.000166902632012608, "loss": 11.6703, "step": 38246 }, { "epoch": 0.8006154232604873, "grad_norm": 0.3194127678871155, "learning_rate": 0.00016690100242121832, "loss": 11.6722, "step": 38247 }, { "epoch": 0.8006363560244495, "grad_norm": 0.25662660598754883, "learning_rate": 0.00016689937279766793, "loss": 11.6555, "step": 38248 }, { "epoch": 0.8006572887884116, "grad_norm": 0.32894033193588257, "learning_rate": 0.00016689774314195775, "loss": 11.6826, "step": 38249 }, { "epoch": 0.8006782215523738, "grad_norm": 0.35339489579200745, "learning_rate": 0.00016689611345408845, "loss": 11.6792, "step": 38250 }, { "epoch": 0.800699154316336, "grad_norm": 0.29513877630233765, "learning_rate": 0.00016689448373406082, "loss": 11.6685, "step": 38251 }, { "epoch": 0.8007200870802981, "grad_norm": 0.3210729658603668, "learning_rate": 0.00016689285398187572, "loss": 11.6792, "step": 38252 }, { "epoch": 0.8007410198442603, "grad_norm": 0.3408602774143219, "learning_rate": 0.00016689122419753387, "loss": 11.6875, "step": 38253 }, { "epoch": 0.8007619526082224, "grad_norm": 0.2711334526538849, "learning_rate": 0.00016688959438103604, "loss": 11.6619, "step": 38254 }, { "epoch": 0.8007828853721846, "grad_norm": 0.25437065958976746, "learning_rate": 0.00016688796453238306, "loss": 11.6514, "step": 38255 }, { "epoch": 0.8008038181361467, "grad_norm": 0.32611003518104553, "learning_rate": 0.0001668863346515757, "loss": 11.6652, "step": 38256 }, { "epoch": 0.8008247509001089, "grad_norm": 0.3773892819881439, "learning_rate": 0.00016688470473861475, "loss": 11.665, "step": 38257 }, { "epoch": 0.800845683664071, "grad_norm": 0.32241764664649963, "learning_rate": 0.00016688307479350095, "loss": 11.6792, "step": 38258 }, { "epoch": 0.8008666164280331, "grad_norm": 0.23923537135124207, "learning_rate": 0.00016688144481623513, "loss": 11.6777, "step": 38259 }, { "epoch": 0.8008875491919953, "grad_norm": 0.3761320412158966, "learning_rate": 0.00016687981480681806, "loss": 11.666, "step": 38260 }, { "epoch": 0.8009084819559574, "grad_norm": 0.31751933693885803, "learning_rate": 0.00016687818476525051, "loss": 11.684, "step": 38261 }, { "epoch": 0.8009294147199196, "grad_norm": 0.35761570930480957, "learning_rate": 0.0001668765546915333, "loss": 11.6722, "step": 38262 }, { "epoch": 0.8009503474838817, "grad_norm": 0.28555503487586975, "learning_rate": 0.0001668749245856672, "loss": 11.6635, "step": 38263 }, { "epoch": 0.8009712802478439, "grad_norm": 0.359881728887558, "learning_rate": 0.00016687329444765297, "loss": 11.6619, "step": 38264 }, { "epoch": 0.8009922130118061, "grad_norm": 0.4094175398349762, "learning_rate": 0.0001668716642774914, "loss": 11.6588, "step": 38265 }, { "epoch": 0.8010131457757682, "grad_norm": 0.3685741126537323, "learning_rate": 0.0001668700340751833, "loss": 11.6781, "step": 38266 }, { "epoch": 0.8010340785397304, "grad_norm": 0.33753228187561035, "learning_rate": 0.00016686840384072941, "loss": 11.6573, "step": 38267 }, { "epoch": 0.8010550113036925, "grad_norm": 0.3860035538673401, "learning_rate": 0.00016686677357413053, "loss": 11.6561, "step": 38268 }, { "epoch": 0.8010759440676547, "grad_norm": 0.326008677482605, "learning_rate": 0.00016686514327538747, "loss": 11.6716, "step": 38269 }, { "epoch": 0.8010968768316169, "grad_norm": 0.2523523271083832, "learning_rate": 0.000166863512944501, "loss": 11.682, "step": 38270 }, { "epoch": 0.801117809595579, "grad_norm": 0.33025968074798584, "learning_rate": 0.00016686188258147188, "loss": 11.6624, "step": 38271 }, { "epoch": 0.8011387423595412, "grad_norm": 0.36116325855255127, "learning_rate": 0.00016686025218630093, "loss": 11.6774, "step": 38272 }, { "epoch": 0.8011596751235033, "grad_norm": 0.329995334148407, "learning_rate": 0.00016685862175898892, "loss": 11.6747, "step": 38273 }, { "epoch": 0.8011806078874655, "grad_norm": 0.2803023159503937, "learning_rate": 0.00016685699129953663, "loss": 11.6575, "step": 38274 }, { "epoch": 0.8012015406514276, "grad_norm": 0.3290676176548004, "learning_rate": 0.00016685536080794484, "loss": 11.6813, "step": 38275 }, { "epoch": 0.8012224734153898, "grad_norm": 0.30271613597869873, "learning_rate": 0.00016685373028421432, "loss": 11.655, "step": 38276 }, { "epoch": 0.801243406179352, "grad_norm": 0.2874181270599365, "learning_rate": 0.0001668520997283459, "loss": 11.6854, "step": 38277 }, { "epoch": 0.8012643389433141, "grad_norm": 0.2608521580696106, "learning_rate": 0.0001668504691403403, "loss": 11.6549, "step": 38278 }, { "epoch": 0.8012852717072763, "grad_norm": 0.3161933720111847, "learning_rate": 0.00016684883852019838, "loss": 11.6694, "step": 38279 }, { "epoch": 0.8013062044712383, "grad_norm": 0.26197174191474915, "learning_rate": 0.00016684720786792087, "loss": 11.6664, "step": 38280 }, { "epoch": 0.8013271372352005, "grad_norm": 0.2570542097091675, "learning_rate": 0.00016684557718350855, "loss": 11.6719, "step": 38281 }, { "epoch": 0.8013480699991626, "grad_norm": 0.2878299653530121, "learning_rate": 0.00016684394646696227, "loss": 11.6577, "step": 38282 }, { "epoch": 0.8013690027631248, "grad_norm": 0.3280895948410034, "learning_rate": 0.0001668423157182827, "loss": 11.6716, "step": 38283 }, { "epoch": 0.801389935527087, "grad_norm": 0.28217175602912903, "learning_rate": 0.00016684068493747075, "loss": 11.6711, "step": 38284 }, { "epoch": 0.8014108682910491, "grad_norm": 0.3593786656856537, "learning_rate": 0.0001668390541245271, "loss": 11.674, "step": 38285 }, { "epoch": 0.8014318010550113, "grad_norm": 0.30240264534950256, "learning_rate": 0.0001668374232794526, "loss": 11.656, "step": 38286 }, { "epoch": 0.8014527338189734, "grad_norm": 0.3000154495239258, "learning_rate": 0.000166835792402248, "loss": 11.6592, "step": 38287 }, { "epoch": 0.8014736665829356, "grad_norm": 0.35542434453964233, "learning_rate": 0.00016683416149291408, "loss": 11.6803, "step": 38288 }, { "epoch": 0.8014945993468978, "grad_norm": 0.4760013222694397, "learning_rate": 0.00016683253055145168, "loss": 11.6794, "step": 38289 }, { "epoch": 0.8015155321108599, "grad_norm": 0.29464712738990784, "learning_rate": 0.0001668308995778615, "loss": 11.6665, "step": 38290 }, { "epoch": 0.8015364648748221, "grad_norm": 0.3409935534000397, "learning_rate": 0.00016682926857214443, "loss": 11.6801, "step": 38291 }, { "epoch": 0.8015573976387842, "grad_norm": 0.2615227699279785, "learning_rate": 0.0001668276375343011, "loss": 11.679, "step": 38292 }, { "epoch": 0.8015783304027464, "grad_norm": 0.30431511998176575, "learning_rate": 0.00016682600646433245, "loss": 11.6781, "step": 38293 }, { "epoch": 0.8015992631667085, "grad_norm": 0.2397182732820511, "learning_rate": 0.00016682437536223922, "loss": 11.6644, "step": 38294 }, { "epoch": 0.8016201959306707, "grad_norm": 0.3133751451969147, "learning_rate": 0.0001668227442280221, "loss": 11.6571, "step": 38295 }, { "epoch": 0.8016411286946329, "grad_norm": 0.35831785202026367, "learning_rate": 0.000166821113061682, "loss": 11.6757, "step": 38296 }, { "epoch": 0.801662061458595, "grad_norm": 0.3024733066558838, "learning_rate": 0.00016681948186321962, "loss": 11.6729, "step": 38297 }, { "epoch": 0.8016829942225572, "grad_norm": 0.2832067012786865, "learning_rate": 0.0001668178506326358, "loss": 11.6829, "step": 38298 }, { "epoch": 0.8017039269865193, "grad_norm": 0.2903519570827484, "learning_rate": 0.00016681621936993132, "loss": 11.6619, "step": 38299 }, { "epoch": 0.8017248597504815, "grad_norm": 0.2532433867454529, "learning_rate": 0.00016681458807510691, "loss": 11.6661, "step": 38300 }, { "epoch": 0.8017457925144436, "grad_norm": 0.27632808685302734, "learning_rate": 0.00016681295674816338, "loss": 11.6774, "step": 38301 }, { "epoch": 0.8017667252784058, "grad_norm": 0.3458297848701477, "learning_rate": 0.00016681132538910154, "loss": 11.662, "step": 38302 }, { "epoch": 0.801787658042368, "grad_norm": 0.3534805178642273, "learning_rate": 0.00016680969399792214, "loss": 11.6629, "step": 38303 }, { "epoch": 0.80180859080633, "grad_norm": 0.29393962025642395, "learning_rate": 0.000166808062574626, "loss": 11.679, "step": 38304 }, { "epoch": 0.8018295235702922, "grad_norm": 0.3321033716201782, "learning_rate": 0.0001668064311192139, "loss": 11.6637, "step": 38305 }, { "epoch": 0.8018504563342543, "grad_norm": 0.37051844596862793, "learning_rate": 0.0001668047996316866, "loss": 11.6683, "step": 38306 }, { "epoch": 0.8018713890982165, "grad_norm": 0.2763488292694092, "learning_rate": 0.00016680316811204486, "loss": 11.6668, "step": 38307 }, { "epoch": 0.8018923218621787, "grad_norm": 0.2853432595729828, "learning_rate": 0.00016680153656028956, "loss": 11.6757, "step": 38308 }, { "epoch": 0.8019132546261408, "grad_norm": 0.36912038922309875, "learning_rate": 0.00016679990497642138, "loss": 11.6746, "step": 38309 }, { "epoch": 0.801934187390103, "grad_norm": 0.3329235315322876, "learning_rate": 0.00016679827336044115, "loss": 11.6839, "step": 38310 }, { "epoch": 0.8019551201540651, "grad_norm": 0.39339300990104675, "learning_rate": 0.00016679664171234966, "loss": 11.6758, "step": 38311 }, { "epoch": 0.8019760529180273, "grad_norm": 0.3422795832157135, "learning_rate": 0.00016679501003214768, "loss": 11.6795, "step": 38312 }, { "epoch": 0.8019969856819894, "grad_norm": 0.3201063275337219, "learning_rate": 0.000166793378319836, "loss": 11.6675, "step": 38313 }, { "epoch": 0.8020179184459516, "grad_norm": 0.3862452507019043, "learning_rate": 0.0001667917465754154, "loss": 11.6625, "step": 38314 }, { "epoch": 0.8020388512099138, "grad_norm": 0.29876598715782166, "learning_rate": 0.00016679011479888667, "loss": 11.6832, "step": 38315 }, { "epoch": 0.8020597839738759, "grad_norm": 0.2735278904438019, "learning_rate": 0.0001667884829902506, "loss": 11.6666, "step": 38316 }, { "epoch": 0.8020807167378381, "grad_norm": 0.30320876836776733, "learning_rate": 0.00016678685114950796, "loss": 11.6706, "step": 38317 }, { "epoch": 0.8021016495018002, "grad_norm": 0.31702277064323425, "learning_rate": 0.00016678521927665954, "loss": 11.6732, "step": 38318 }, { "epoch": 0.8021225822657624, "grad_norm": 0.32255467772483826, "learning_rate": 0.00016678358737170617, "loss": 11.691, "step": 38319 }, { "epoch": 0.8021435150297245, "grad_norm": 0.25742092728614807, "learning_rate": 0.00016678195543464854, "loss": 11.6491, "step": 38320 }, { "epoch": 0.8021644477936867, "grad_norm": 0.3628285825252533, "learning_rate": 0.0001667803234654875, "loss": 11.6738, "step": 38321 }, { "epoch": 0.8021853805576489, "grad_norm": 0.3298124372959137, "learning_rate": 0.00016677869146422382, "loss": 11.6719, "step": 38322 }, { "epoch": 0.802206313321611, "grad_norm": 0.33262261748313904, "learning_rate": 0.00016677705943085827, "loss": 11.6725, "step": 38323 }, { "epoch": 0.8022272460855732, "grad_norm": 0.3072856664657593, "learning_rate": 0.0001667754273653917, "loss": 11.6725, "step": 38324 }, { "epoch": 0.8022481788495353, "grad_norm": 0.2729741632938385, "learning_rate": 0.0001667737952678248, "loss": 11.6578, "step": 38325 }, { "epoch": 0.8022691116134975, "grad_norm": 0.4455118775367737, "learning_rate": 0.0001667721631381584, "loss": 11.677, "step": 38326 }, { "epoch": 0.8022900443774597, "grad_norm": 0.2610820531845093, "learning_rate": 0.0001667705309763933, "loss": 11.6449, "step": 38327 }, { "epoch": 0.8023109771414217, "grad_norm": 0.28651902079582214, "learning_rate": 0.00016676889878253025, "loss": 11.6707, "step": 38328 }, { "epoch": 0.8023319099053839, "grad_norm": 0.27148208022117615, "learning_rate": 0.00016676726655657007, "loss": 11.6678, "step": 38329 }, { "epoch": 0.802352842669346, "grad_norm": 0.24620920419692993, "learning_rate": 0.00016676563429851351, "loss": 11.6641, "step": 38330 }, { "epoch": 0.8023737754333082, "grad_norm": 0.2440282702445984, "learning_rate": 0.0001667640020083614, "loss": 11.6617, "step": 38331 }, { "epoch": 0.8023947081972703, "grad_norm": 0.3167555034160614, "learning_rate": 0.00016676236968611447, "loss": 11.6868, "step": 38332 }, { "epoch": 0.8024156409612325, "grad_norm": 0.33872637152671814, "learning_rate": 0.00016676073733177353, "loss": 11.6622, "step": 38333 }, { "epoch": 0.8024365737251947, "grad_norm": 0.3776485323905945, "learning_rate": 0.00016675910494533939, "loss": 11.688, "step": 38334 }, { "epoch": 0.8024575064891568, "grad_norm": 0.3486925959587097, "learning_rate": 0.00016675747252681278, "loss": 11.6731, "step": 38335 }, { "epoch": 0.802478439253119, "grad_norm": 0.29415616393089294, "learning_rate": 0.00016675584007619452, "loss": 11.6457, "step": 38336 }, { "epoch": 0.8024993720170811, "grad_norm": 0.3446415662765503, "learning_rate": 0.00016675420759348542, "loss": 11.6682, "step": 38337 }, { "epoch": 0.8025203047810433, "grad_norm": 0.2886528968811035, "learning_rate": 0.00016675257507868621, "loss": 11.6465, "step": 38338 }, { "epoch": 0.8025412375450054, "grad_norm": 0.34015101194381714, "learning_rate": 0.0001667509425317977, "loss": 11.6696, "step": 38339 }, { "epoch": 0.8025621703089676, "grad_norm": 0.25896257162094116, "learning_rate": 0.00016674930995282065, "loss": 11.6804, "step": 38340 }, { "epoch": 0.8025831030729298, "grad_norm": 0.29616791009902954, "learning_rate": 0.00016674767734175592, "loss": 11.6682, "step": 38341 }, { "epoch": 0.8026040358368919, "grad_norm": 0.31304481625556946, "learning_rate": 0.0001667460446986042, "loss": 11.6634, "step": 38342 }, { "epoch": 0.8026249686008541, "grad_norm": 0.27872779965400696, "learning_rate": 0.00016674441202336635, "loss": 11.666, "step": 38343 }, { "epoch": 0.8026459013648162, "grad_norm": 0.312122106552124, "learning_rate": 0.00016674277931604312, "loss": 11.6795, "step": 38344 }, { "epoch": 0.8026668341287784, "grad_norm": 0.376287043094635, "learning_rate": 0.00016674114657663528, "loss": 11.6701, "step": 38345 }, { "epoch": 0.8026877668927406, "grad_norm": 0.30691513419151306, "learning_rate": 0.00016673951380514364, "loss": 11.6655, "step": 38346 }, { "epoch": 0.8027086996567027, "grad_norm": 0.300529420375824, "learning_rate": 0.00016673788100156898, "loss": 11.6479, "step": 38347 }, { "epoch": 0.8027296324206649, "grad_norm": 0.33503180742263794, "learning_rate": 0.0001667362481659121, "loss": 11.6873, "step": 38348 }, { "epoch": 0.802750565184627, "grad_norm": 0.3702578544616699, "learning_rate": 0.00016673461529817375, "loss": 11.6624, "step": 38349 }, { "epoch": 0.8027714979485892, "grad_norm": 0.29059088230133057, "learning_rate": 0.00016673298239835472, "loss": 11.6532, "step": 38350 }, { "epoch": 0.8027924307125512, "grad_norm": 0.2867702841758728, "learning_rate": 0.00016673134946645584, "loss": 11.6634, "step": 38351 }, { "epoch": 0.8028133634765134, "grad_norm": 0.35504424571990967, "learning_rate": 0.00016672971650247783, "loss": 11.6642, "step": 38352 }, { "epoch": 0.8028342962404756, "grad_norm": 0.332819402217865, "learning_rate": 0.00016672808350642152, "loss": 11.676, "step": 38353 }, { "epoch": 0.8028552290044377, "grad_norm": 0.28531986474990845, "learning_rate": 0.0001667264504782877, "loss": 11.6611, "step": 38354 }, { "epoch": 0.8028761617683999, "grad_norm": 0.28716757893562317, "learning_rate": 0.00016672481741807714, "loss": 11.6799, "step": 38355 }, { "epoch": 0.802897094532362, "grad_norm": 0.37683963775634766, "learning_rate": 0.0001667231843257906, "loss": 11.6493, "step": 38356 }, { "epoch": 0.8029180272963242, "grad_norm": 0.3060489296913147, "learning_rate": 0.0001667215512014289, "loss": 11.6863, "step": 38357 }, { "epoch": 0.8029389600602863, "grad_norm": 0.2799392342567444, "learning_rate": 0.0001667199180449928, "loss": 11.6707, "step": 38358 }, { "epoch": 0.8029598928242485, "grad_norm": 0.29820516705513, "learning_rate": 0.0001667182848564831, "loss": 11.6595, "step": 38359 }, { "epoch": 0.8029808255882107, "grad_norm": 0.25500190258026123, "learning_rate": 0.00016671665163590062, "loss": 11.6666, "step": 38360 }, { "epoch": 0.8030017583521728, "grad_norm": 0.24084582924842834, "learning_rate": 0.00016671501838324605, "loss": 11.661, "step": 38361 }, { "epoch": 0.803022691116135, "grad_norm": 0.2898303270339966, "learning_rate": 0.0001667133850985203, "loss": 11.6662, "step": 38362 }, { "epoch": 0.8030436238800971, "grad_norm": 0.32202231884002686, "learning_rate": 0.00016671175178172406, "loss": 11.6915, "step": 38363 }, { "epoch": 0.8030645566440593, "grad_norm": 0.23812048137187958, "learning_rate": 0.00016671011843285812, "loss": 11.6629, "step": 38364 }, { "epoch": 0.8030854894080214, "grad_norm": 0.39716386795043945, "learning_rate": 0.0001667084850519233, "loss": 11.6711, "step": 38365 }, { "epoch": 0.8031064221719836, "grad_norm": 0.2618269622325897, "learning_rate": 0.00016670685163892038, "loss": 11.6766, "step": 38366 }, { "epoch": 0.8031273549359458, "grad_norm": 0.3798638880252838, "learning_rate": 0.00016670521819385015, "loss": 11.6661, "step": 38367 }, { "epoch": 0.8031482876999079, "grad_norm": 0.2738945782184601, "learning_rate": 0.00016670358471671337, "loss": 11.6843, "step": 38368 }, { "epoch": 0.8031692204638701, "grad_norm": 0.32946717739105225, "learning_rate": 0.00016670195120751084, "loss": 11.668, "step": 38369 }, { "epoch": 0.8031901532278322, "grad_norm": 0.4256715476512909, "learning_rate": 0.00016670031766624334, "loss": 11.6627, "step": 38370 }, { "epoch": 0.8032110859917944, "grad_norm": 0.2623230814933777, "learning_rate": 0.00016669868409291167, "loss": 11.6699, "step": 38371 }, { "epoch": 0.8032320187557566, "grad_norm": 0.29376012086868286, "learning_rate": 0.0001666970504875166, "loss": 11.6722, "step": 38372 }, { "epoch": 0.8032529515197186, "grad_norm": 0.21837981045246124, "learning_rate": 0.00016669541685005894, "loss": 11.6582, "step": 38373 }, { "epoch": 0.8032738842836809, "grad_norm": 0.42348337173461914, "learning_rate": 0.00016669378318053942, "loss": 11.6795, "step": 38374 }, { "epoch": 0.8032948170476429, "grad_norm": 0.33784234523773193, "learning_rate": 0.00016669214947895894, "loss": 11.6825, "step": 38375 }, { "epoch": 0.8033157498116051, "grad_norm": 0.2813546359539032, "learning_rate": 0.0001666905157453181, "loss": 11.6723, "step": 38376 }, { "epoch": 0.8033366825755672, "grad_norm": 0.26213744282722473, "learning_rate": 0.00016668888197961788, "loss": 11.6745, "step": 38377 }, { "epoch": 0.8033576153395294, "grad_norm": 0.33331140875816345, "learning_rate": 0.00016668724818185893, "loss": 11.6608, "step": 38378 }, { "epoch": 0.8033785481034916, "grad_norm": 0.32069072127342224, "learning_rate": 0.0001666856143520421, "loss": 11.6739, "step": 38379 }, { "epoch": 0.8033994808674537, "grad_norm": 0.3195241093635559, "learning_rate": 0.00016668398049016819, "loss": 11.678, "step": 38380 }, { "epoch": 0.8034204136314159, "grad_norm": 0.28914570808410645, "learning_rate": 0.0001666823465962379, "loss": 11.6814, "step": 38381 }, { "epoch": 0.803441346395378, "grad_norm": 0.41298216581344604, "learning_rate": 0.0001666807126702521, "loss": 11.6736, "step": 38382 }, { "epoch": 0.8034622791593402, "grad_norm": 0.29971423745155334, "learning_rate": 0.0001666790787122115, "loss": 11.6668, "step": 38383 }, { "epoch": 0.8034832119233023, "grad_norm": 0.27588537335395813, "learning_rate": 0.000166677444722117, "loss": 11.6847, "step": 38384 }, { "epoch": 0.8035041446872645, "grad_norm": 0.29121488332748413, "learning_rate": 0.00016667581069996926, "loss": 11.6805, "step": 38385 }, { "epoch": 0.8035250774512267, "grad_norm": 0.3456766903400421, "learning_rate": 0.00016667417664576915, "loss": 11.6716, "step": 38386 }, { "epoch": 0.8035460102151888, "grad_norm": 0.28244999051094055, "learning_rate": 0.00016667254255951742, "loss": 11.6778, "step": 38387 }, { "epoch": 0.803566942979151, "grad_norm": 0.30608972907066345, "learning_rate": 0.00016667090844121483, "loss": 11.6688, "step": 38388 }, { "epoch": 0.8035878757431131, "grad_norm": 0.2776941657066345, "learning_rate": 0.00016666927429086228, "loss": 11.678, "step": 38389 }, { "epoch": 0.8036088085070753, "grad_norm": 0.27618351578712463, "learning_rate": 0.0001666676401084604, "loss": 11.6493, "step": 38390 }, { "epoch": 0.8036297412710375, "grad_norm": 0.3094186782836914, "learning_rate": 0.00016666600589401007, "loss": 11.6656, "step": 38391 }, { "epoch": 0.8036506740349996, "grad_norm": 0.32475796341896057, "learning_rate": 0.00016666437164751207, "loss": 11.6801, "step": 38392 }, { "epoch": 0.8036716067989618, "grad_norm": 0.28588730096817017, "learning_rate": 0.00016666273736896717, "loss": 11.6691, "step": 38393 }, { "epoch": 0.8036925395629239, "grad_norm": 0.28548380732536316, "learning_rate": 0.00016666110305837615, "loss": 11.6593, "step": 38394 }, { "epoch": 0.8037134723268861, "grad_norm": 0.32745012640953064, "learning_rate": 0.0001666594687157398, "loss": 11.6675, "step": 38395 }, { "epoch": 0.8037344050908481, "grad_norm": 0.42408376932144165, "learning_rate": 0.00016665783434105888, "loss": 11.6634, "step": 38396 }, { "epoch": 0.8037553378548103, "grad_norm": 0.2613201439380646, "learning_rate": 0.00016665619993433425, "loss": 11.6858, "step": 38397 }, { "epoch": 0.8037762706187725, "grad_norm": 0.284412682056427, "learning_rate": 0.00016665456549556663, "loss": 11.6631, "step": 38398 }, { "epoch": 0.8037972033827346, "grad_norm": 0.3025361895561218, "learning_rate": 0.00016665293102475682, "loss": 11.6691, "step": 38399 }, { "epoch": 0.8038181361466968, "grad_norm": 0.2792952358722687, "learning_rate": 0.0001666512965219056, "loss": 11.6946, "step": 38400 }, { "epoch": 0.8038390689106589, "grad_norm": 0.2623156011104584, "learning_rate": 0.00016664966198701379, "loss": 11.6651, "step": 38401 }, { "epoch": 0.8038600016746211, "grad_norm": 0.2636537551879883, "learning_rate": 0.00016664802742008214, "loss": 11.6744, "step": 38402 }, { "epoch": 0.8038809344385832, "grad_norm": 0.2536441683769226, "learning_rate": 0.00016664639282111144, "loss": 11.6765, "step": 38403 }, { "epoch": 0.8039018672025454, "grad_norm": 0.2886557877063751, "learning_rate": 0.0001666447581901025, "loss": 11.6735, "step": 38404 }, { "epoch": 0.8039227999665076, "grad_norm": 0.38750535249710083, "learning_rate": 0.00016664312352705605, "loss": 11.6832, "step": 38405 }, { "epoch": 0.8039437327304697, "grad_norm": 0.33318015933036804, "learning_rate": 0.00016664148883197298, "loss": 11.6694, "step": 38406 }, { "epoch": 0.8039646654944319, "grad_norm": 0.29496195912361145, "learning_rate": 0.00016663985410485398, "loss": 11.6683, "step": 38407 }, { "epoch": 0.803985598258394, "grad_norm": 0.30771613121032715, "learning_rate": 0.00016663821934569985, "loss": 11.6711, "step": 38408 }, { "epoch": 0.8040065310223562, "grad_norm": 0.3600921630859375, "learning_rate": 0.00016663658455451144, "loss": 11.6489, "step": 38409 }, { "epoch": 0.8040274637863184, "grad_norm": 0.3179391622543335, "learning_rate": 0.00016663494973128944, "loss": 11.683, "step": 38410 }, { "epoch": 0.8040483965502805, "grad_norm": 0.33334988355636597, "learning_rate": 0.0001666333148760347, "loss": 11.6709, "step": 38411 }, { "epoch": 0.8040693293142427, "grad_norm": 0.3000096082687378, "learning_rate": 0.000166631679988748, "loss": 11.6532, "step": 38412 }, { "epoch": 0.8040902620782048, "grad_norm": 0.33880946040153503, "learning_rate": 0.00016663004506943006, "loss": 11.6687, "step": 38413 }, { "epoch": 0.804111194842167, "grad_norm": 0.24567636847496033, "learning_rate": 0.0001666284101180818, "loss": 11.6627, "step": 38414 }, { "epoch": 0.8041321276061291, "grad_norm": 0.3358442783355713, "learning_rate": 0.0001666267751347039, "loss": 11.6668, "step": 38415 }, { "epoch": 0.8041530603700913, "grad_norm": 0.21474069356918335, "learning_rate": 0.00016662514011929719, "loss": 11.6636, "step": 38416 }, { "epoch": 0.8041739931340535, "grad_norm": 0.25457340478897095, "learning_rate": 0.0001666235050718624, "loss": 11.6645, "step": 38417 }, { "epoch": 0.8041949258980156, "grad_norm": 0.24937644600868225, "learning_rate": 0.00016662186999240035, "loss": 11.6702, "step": 38418 }, { "epoch": 0.8042158586619778, "grad_norm": 0.3721236288547516, "learning_rate": 0.00016662023488091187, "loss": 11.6725, "step": 38419 }, { "epoch": 0.8042367914259398, "grad_norm": 0.2729273736476898, "learning_rate": 0.00016661859973739768, "loss": 11.6702, "step": 38420 }, { "epoch": 0.804257724189902, "grad_norm": 0.2803531587123871, "learning_rate": 0.00016661696456185863, "loss": 11.6827, "step": 38421 }, { "epoch": 0.8042786569538641, "grad_norm": 0.3295285999774933, "learning_rate": 0.00016661532935429544, "loss": 11.6597, "step": 38422 }, { "epoch": 0.8042995897178263, "grad_norm": 0.3326262831687927, "learning_rate": 0.00016661369411470896, "loss": 11.6824, "step": 38423 }, { "epoch": 0.8043205224817885, "grad_norm": 0.3234328627586365, "learning_rate": 0.0001666120588430999, "loss": 11.6516, "step": 38424 }, { "epoch": 0.8043414552457506, "grad_norm": 0.3099967837333679, "learning_rate": 0.0001666104235394691, "loss": 11.6695, "step": 38425 }, { "epoch": 0.8043623880097128, "grad_norm": 0.299558162689209, "learning_rate": 0.00016660878820381735, "loss": 11.6842, "step": 38426 }, { "epoch": 0.8043833207736749, "grad_norm": 0.3020874559879303, "learning_rate": 0.00016660715283614542, "loss": 11.6454, "step": 38427 }, { "epoch": 0.8044042535376371, "grad_norm": 0.3085481524467468, "learning_rate": 0.00016660551743645407, "loss": 11.6639, "step": 38428 }, { "epoch": 0.8044251863015993, "grad_norm": 0.28401604294776917, "learning_rate": 0.00016660388200474412, "loss": 11.6698, "step": 38429 }, { "epoch": 0.8044461190655614, "grad_norm": 0.2674728333950043, "learning_rate": 0.00016660224654101638, "loss": 11.6684, "step": 38430 }, { "epoch": 0.8044670518295236, "grad_norm": 0.27009838819503784, "learning_rate": 0.00016660061104527158, "loss": 11.6524, "step": 38431 }, { "epoch": 0.8044879845934857, "grad_norm": 0.37678301334381104, "learning_rate": 0.00016659897551751053, "loss": 11.6725, "step": 38432 }, { "epoch": 0.8045089173574479, "grad_norm": 0.2623460590839386, "learning_rate": 0.00016659733995773405, "loss": 11.6788, "step": 38433 }, { "epoch": 0.80452985012141, "grad_norm": 0.3488495945930481, "learning_rate": 0.00016659570436594286, "loss": 11.6707, "step": 38434 }, { "epoch": 0.8045507828853722, "grad_norm": 0.4069957435131073, "learning_rate": 0.00016659406874213777, "loss": 11.6863, "step": 38435 }, { "epoch": 0.8045717156493344, "grad_norm": 0.35708415508270264, "learning_rate": 0.0001665924330863196, "loss": 11.6698, "step": 38436 }, { "epoch": 0.8045926484132965, "grad_norm": 0.34120312333106995, "learning_rate": 0.0001665907973984891, "loss": 11.6603, "step": 38437 }, { "epoch": 0.8046135811772587, "grad_norm": 0.3689386248588562, "learning_rate": 0.0001665891616786471, "loss": 11.6705, "step": 38438 }, { "epoch": 0.8046345139412208, "grad_norm": 0.28727713227272034, "learning_rate": 0.00016658752592679433, "loss": 11.6554, "step": 38439 }, { "epoch": 0.804655446705183, "grad_norm": 0.3285965919494629, "learning_rate": 0.00016658589014293158, "loss": 11.6787, "step": 38440 }, { "epoch": 0.804676379469145, "grad_norm": 0.3476659655570984, "learning_rate": 0.0001665842543270597, "loss": 11.6697, "step": 38441 }, { "epoch": 0.8046973122331073, "grad_norm": 0.2604939937591553, "learning_rate": 0.0001665826184791794, "loss": 11.6646, "step": 38442 }, { "epoch": 0.8047182449970695, "grad_norm": 0.255184143781662, "learning_rate": 0.00016658098259929153, "loss": 11.6637, "step": 38443 }, { "epoch": 0.8047391777610315, "grad_norm": 0.2933500111103058, "learning_rate": 0.00016657934668739684, "loss": 11.6647, "step": 38444 }, { "epoch": 0.8047601105249937, "grad_norm": 0.2649562954902649, "learning_rate": 0.00016657771074349612, "loss": 11.6485, "step": 38445 }, { "epoch": 0.8047810432889558, "grad_norm": 0.26205071806907654, "learning_rate": 0.0001665760747675902, "loss": 11.6739, "step": 38446 }, { "epoch": 0.804801976052918, "grad_norm": 0.3239108920097351, "learning_rate": 0.00016657443875967975, "loss": 11.6624, "step": 38447 }, { "epoch": 0.8048229088168802, "grad_norm": 0.24088773131370544, "learning_rate": 0.00016657280271976566, "loss": 11.6612, "step": 38448 }, { "epoch": 0.8048438415808423, "grad_norm": 0.32531026005744934, "learning_rate": 0.00016657116664784872, "loss": 11.6804, "step": 38449 }, { "epoch": 0.8048647743448045, "grad_norm": 0.3402107059955597, "learning_rate": 0.00016656953054392965, "loss": 11.6685, "step": 38450 }, { "epoch": 0.8048857071087666, "grad_norm": 0.32864195108413696, "learning_rate": 0.0001665678944080093, "loss": 11.6645, "step": 38451 }, { "epoch": 0.8049066398727288, "grad_norm": 0.275560587644577, "learning_rate": 0.00016656625824008843, "loss": 11.6667, "step": 38452 }, { "epoch": 0.8049275726366909, "grad_norm": 0.30729520320892334, "learning_rate": 0.00016656462204016781, "loss": 11.6707, "step": 38453 }, { "epoch": 0.8049485054006531, "grad_norm": 0.26847535371780396, "learning_rate": 0.00016656298580824822, "loss": 11.6677, "step": 38454 }, { "epoch": 0.8049694381646153, "grad_norm": 0.3102884888648987, "learning_rate": 0.0001665613495443305, "loss": 11.6746, "step": 38455 }, { "epoch": 0.8049903709285774, "grad_norm": 0.2975633442401886, "learning_rate": 0.0001665597132484154, "loss": 11.6632, "step": 38456 }, { "epoch": 0.8050113036925396, "grad_norm": 0.2813585102558136, "learning_rate": 0.0001665580769205037, "loss": 11.6773, "step": 38457 }, { "epoch": 0.8050322364565017, "grad_norm": 0.2682294547557831, "learning_rate": 0.00016655644056059622, "loss": 11.6611, "step": 38458 }, { "epoch": 0.8050531692204639, "grad_norm": 0.33384349942207336, "learning_rate": 0.00016655480416869373, "loss": 11.6847, "step": 38459 }, { "epoch": 0.805074101984426, "grad_norm": 0.3224143981933594, "learning_rate": 0.00016655316774479698, "loss": 11.6773, "step": 38460 }, { "epoch": 0.8050950347483882, "grad_norm": 0.3455347716808319, "learning_rate": 0.0001665515312889068, "loss": 11.6584, "step": 38461 }, { "epoch": 0.8051159675123504, "grad_norm": 0.27204424142837524, "learning_rate": 0.00016654989480102396, "loss": 11.6776, "step": 38462 }, { "epoch": 0.8051369002763125, "grad_norm": 0.3403608500957489, "learning_rate": 0.0001665482582811493, "loss": 11.6569, "step": 38463 }, { "epoch": 0.8051578330402747, "grad_norm": 0.27992138266563416, "learning_rate": 0.0001665466217292835, "loss": 11.6605, "step": 38464 }, { "epoch": 0.8051787658042368, "grad_norm": 0.3201957941055298, "learning_rate": 0.00016654498514542744, "loss": 11.6695, "step": 38465 }, { "epoch": 0.805199698568199, "grad_norm": 0.41082045435905457, "learning_rate": 0.00016654334852958183, "loss": 11.6767, "step": 38466 }, { "epoch": 0.8052206313321612, "grad_norm": 0.29061731696128845, "learning_rate": 0.00016654171188174756, "loss": 11.6942, "step": 38467 }, { "epoch": 0.8052415640961232, "grad_norm": 0.35274583101272583, "learning_rate": 0.00016654007520192532, "loss": 11.6751, "step": 38468 }, { "epoch": 0.8052624968600854, "grad_norm": 0.4413713812828064, "learning_rate": 0.00016653843849011592, "loss": 11.6044, "step": 38469 }, { "epoch": 0.8052834296240475, "grad_norm": 0.36770620942115784, "learning_rate": 0.0001665368017463202, "loss": 11.6929, "step": 38470 }, { "epoch": 0.8053043623880097, "grad_norm": 0.26560503244400024, "learning_rate": 0.00016653516497053887, "loss": 11.6917, "step": 38471 }, { "epoch": 0.8053252951519718, "grad_norm": 0.31374070048332214, "learning_rate": 0.00016653352816277278, "loss": 11.6729, "step": 38472 }, { "epoch": 0.805346227915934, "grad_norm": 0.3092510402202606, "learning_rate": 0.00016653189132302267, "loss": 11.6678, "step": 38473 }, { "epoch": 0.8053671606798962, "grad_norm": 0.2816981077194214, "learning_rate": 0.00016653025445128932, "loss": 11.6604, "step": 38474 }, { "epoch": 0.8053880934438583, "grad_norm": 0.31195271015167236, "learning_rate": 0.00016652861754757359, "loss": 11.6645, "step": 38475 }, { "epoch": 0.8054090262078205, "grad_norm": 0.3006475269794464, "learning_rate": 0.0001665269806118762, "loss": 11.6714, "step": 38476 }, { "epoch": 0.8054299589717826, "grad_norm": 0.352319598197937, "learning_rate": 0.00016652534364419797, "loss": 11.6713, "step": 38477 }, { "epoch": 0.8054508917357448, "grad_norm": 0.2356034219264984, "learning_rate": 0.00016652370664453967, "loss": 11.6746, "step": 38478 }, { "epoch": 0.8054718244997069, "grad_norm": 0.2613431215286255, "learning_rate": 0.00016652206961290206, "loss": 11.6681, "step": 38479 }, { "epoch": 0.8054927572636691, "grad_norm": 0.38049930334091187, "learning_rate": 0.000166520432549286, "loss": 11.6561, "step": 38480 }, { "epoch": 0.8055136900276313, "grad_norm": 0.4486103951931, "learning_rate": 0.0001665187954536922, "loss": 11.6686, "step": 38481 }, { "epoch": 0.8055346227915934, "grad_norm": 0.22497832775115967, "learning_rate": 0.00016651715832612153, "loss": 11.6757, "step": 38482 }, { "epoch": 0.8055555555555556, "grad_norm": 0.3842504620552063, "learning_rate": 0.00016651552116657466, "loss": 11.6597, "step": 38483 }, { "epoch": 0.8055764883195177, "grad_norm": 0.23801060020923615, "learning_rate": 0.0001665138839750525, "loss": 11.6674, "step": 38484 }, { "epoch": 0.8055974210834799, "grad_norm": 0.3477611541748047, "learning_rate": 0.00016651224675155577, "loss": 11.6549, "step": 38485 }, { "epoch": 0.8056183538474421, "grad_norm": 0.3012821674346924, "learning_rate": 0.00016651060949608528, "loss": 11.6741, "step": 38486 }, { "epoch": 0.8056392866114042, "grad_norm": 0.2734607458114624, "learning_rate": 0.0001665089722086418, "loss": 11.6609, "step": 38487 }, { "epoch": 0.8056602193753664, "grad_norm": 0.2795317769050598, "learning_rate": 0.0001665073348892261, "loss": 11.6656, "step": 38488 }, { "epoch": 0.8056811521393284, "grad_norm": 0.24353092908859253, "learning_rate": 0.00016650569753783905, "loss": 11.6624, "step": 38489 }, { "epoch": 0.8057020849032906, "grad_norm": 0.32082632184028625, "learning_rate": 0.0001665040601544813, "loss": 11.6627, "step": 38490 }, { "epoch": 0.8057230176672527, "grad_norm": 0.3441940248012543, "learning_rate": 0.00016650242273915375, "loss": 11.666, "step": 38491 }, { "epoch": 0.8057439504312149, "grad_norm": 0.2503313720226288, "learning_rate": 0.00016650078529185718, "loss": 11.6627, "step": 38492 }, { "epoch": 0.8057648831951771, "grad_norm": 0.2891578674316406, "learning_rate": 0.00016649914781259232, "loss": 11.6506, "step": 38493 }, { "epoch": 0.8057858159591392, "grad_norm": 0.23992477357387543, "learning_rate": 0.00016649751030136, "loss": 11.6658, "step": 38494 }, { "epoch": 0.8058067487231014, "grad_norm": 0.2671882212162018, "learning_rate": 0.000166495872758161, "loss": 11.6569, "step": 38495 }, { "epoch": 0.8058276814870635, "grad_norm": 0.32576364278793335, "learning_rate": 0.00016649423518299607, "loss": 11.6613, "step": 38496 }, { "epoch": 0.8058486142510257, "grad_norm": 0.26695170998573303, "learning_rate": 0.00016649259757586606, "loss": 11.6653, "step": 38497 }, { "epoch": 0.8058695470149878, "grad_norm": 0.33721020817756653, "learning_rate": 0.00016649095993677171, "loss": 11.6526, "step": 38498 }, { "epoch": 0.80589047977895, "grad_norm": 0.26907727122306824, "learning_rate": 0.00016648932226571384, "loss": 11.6663, "step": 38499 }, { "epoch": 0.8059114125429122, "grad_norm": 0.2948831021785736, "learning_rate": 0.0001664876845626932, "loss": 11.6615, "step": 38500 }, { "epoch": 0.8059323453068743, "grad_norm": 0.27992063760757446, "learning_rate": 0.0001664860468277106, "loss": 11.6621, "step": 38501 }, { "epoch": 0.8059532780708365, "grad_norm": 0.4568926990032196, "learning_rate": 0.00016648440906076684, "loss": 11.6749, "step": 38502 }, { "epoch": 0.8059742108347986, "grad_norm": 0.26314693689346313, "learning_rate": 0.00016648277126186267, "loss": 11.6564, "step": 38503 }, { "epoch": 0.8059951435987608, "grad_norm": 0.2890739440917969, "learning_rate": 0.00016648113343099892, "loss": 11.6734, "step": 38504 }, { "epoch": 0.806016076362723, "grad_norm": 0.30092257261276245, "learning_rate": 0.00016647949556817634, "loss": 11.6737, "step": 38505 }, { "epoch": 0.8060370091266851, "grad_norm": 0.3363928198814392, "learning_rate": 0.00016647785767339574, "loss": 11.6673, "step": 38506 }, { "epoch": 0.8060579418906473, "grad_norm": 0.3157958388328552, "learning_rate": 0.00016647621974665792, "loss": 11.6728, "step": 38507 }, { "epoch": 0.8060788746546094, "grad_norm": 0.28804242610931396, "learning_rate": 0.00016647458178796362, "loss": 11.6615, "step": 38508 }, { "epoch": 0.8060998074185716, "grad_norm": 0.39031657576560974, "learning_rate": 0.00016647294379731369, "loss": 11.6619, "step": 38509 }, { "epoch": 0.8061207401825337, "grad_norm": 0.31991448998451233, "learning_rate": 0.00016647130577470884, "loss": 11.6812, "step": 38510 }, { "epoch": 0.8061416729464959, "grad_norm": 0.2621525228023529, "learning_rate": 0.00016646966772014996, "loss": 11.6822, "step": 38511 }, { "epoch": 0.8061626057104581, "grad_norm": 0.2831382751464844, "learning_rate": 0.0001664680296336377, "loss": 11.6637, "step": 38512 }, { "epoch": 0.8061835384744201, "grad_norm": 0.27837473154067993, "learning_rate": 0.000166466391515173, "loss": 11.6781, "step": 38513 }, { "epoch": 0.8062044712383823, "grad_norm": 0.3981374502182007, "learning_rate": 0.00016646475336475652, "loss": 11.6651, "step": 38514 }, { "epoch": 0.8062254040023444, "grad_norm": 0.23376800119876862, "learning_rate": 0.00016646311518238912, "loss": 11.6793, "step": 38515 }, { "epoch": 0.8062463367663066, "grad_norm": 0.2824457287788391, "learning_rate": 0.00016646147696807158, "loss": 11.6701, "step": 38516 }, { "epoch": 0.8062672695302687, "grad_norm": 0.36340397596359253, "learning_rate": 0.0001664598387218047, "loss": 11.6801, "step": 38517 }, { "epoch": 0.8062882022942309, "grad_norm": 0.31237128376960754, "learning_rate": 0.0001664582004435892, "loss": 11.678, "step": 38518 }, { "epoch": 0.8063091350581931, "grad_norm": 0.25308749079704285, "learning_rate": 0.00016645656213342593, "loss": 11.6647, "step": 38519 }, { "epoch": 0.8063300678221552, "grad_norm": 0.3321131765842438, "learning_rate": 0.00016645492379131565, "loss": 11.6624, "step": 38520 }, { "epoch": 0.8063510005861174, "grad_norm": 0.36181578040122986, "learning_rate": 0.00016645328541725916, "loss": 11.6638, "step": 38521 }, { "epoch": 0.8063719333500795, "grad_norm": 0.32002490758895874, "learning_rate": 0.00016645164701125723, "loss": 11.6659, "step": 38522 }, { "epoch": 0.8063928661140417, "grad_norm": 0.36355599761009216, "learning_rate": 0.0001664500085733107, "loss": 11.676, "step": 38523 }, { "epoch": 0.8064137988780039, "grad_norm": 0.3870050013065338, "learning_rate": 0.0001664483701034203, "loss": 11.6627, "step": 38524 }, { "epoch": 0.806434731641966, "grad_norm": 0.3096647262573242, "learning_rate": 0.0001664467316015868, "loss": 11.683, "step": 38525 }, { "epoch": 0.8064556644059282, "grad_norm": 0.3976292908191681, "learning_rate": 0.00016644509306781106, "loss": 11.6756, "step": 38526 }, { "epoch": 0.8064765971698903, "grad_norm": 0.31483978033065796, "learning_rate": 0.00016644345450209382, "loss": 11.685, "step": 38527 }, { "epoch": 0.8064975299338525, "grad_norm": 0.33788198232650757, "learning_rate": 0.0001664418159044359, "loss": 11.6575, "step": 38528 }, { "epoch": 0.8065184626978146, "grad_norm": 0.2501106262207031, "learning_rate": 0.00016644017727483808, "loss": 11.6653, "step": 38529 }, { "epoch": 0.8065393954617768, "grad_norm": 0.3283396363258362, "learning_rate": 0.00016643853861330108, "loss": 11.6623, "step": 38530 }, { "epoch": 0.806560328225739, "grad_norm": 0.2893470525741577, "learning_rate": 0.00016643689991982578, "loss": 11.6458, "step": 38531 }, { "epoch": 0.8065812609897011, "grad_norm": 0.3080863952636719, "learning_rate": 0.00016643526119441293, "loss": 11.6624, "step": 38532 }, { "epoch": 0.8066021937536633, "grad_norm": 0.8167074918746948, "learning_rate": 0.00016643362243706333, "loss": 11.5797, "step": 38533 }, { "epoch": 0.8066231265176254, "grad_norm": 0.2769928574562073, "learning_rate": 0.0001664319836477777, "loss": 11.6716, "step": 38534 }, { "epoch": 0.8066440592815876, "grad_norm": 0.3578583598136902, "learning_rate": 0.00016643034482655695, "loss": 11.6735, "step": 38535 }, { "epoch": 0.8066649920455496, "grad_norm": 0.30506250262260437, "learning_rate": 0.00016642870597340177, "loss": 11.6666, "step": 38536 }, { "epoch": 0.8066859248095118, "grad_norm": 0.31717777252197266, "learning_rate": 0.00016642706708831298, "loss": 11.6761, "step": 38537 }, { "epoch": 0.806706857573474, "grad_norm": 0.3163982927799225, "learning_rate": 0.00016642542817129136, "loss": 11.6748, "step": 38538 }, { "epoch": 0.8067277903374361, "grad_norm": 0.28255659341812134, "learning_rate": 0.00016642378922233775, "loss": 11.6725, "step": 38539 }, { "epoch": 0.8067487231013983, "grad_norm": 0.2913455665111542, "learning_rate": 0.00016642215024145285, "loss": 11.6615, "step": 38540 }, { "epoch": 0.8067696558653604, "grad_norm": 0.319568932056427, "learning_rate": 0.0001664205112286375, "loss": 11.6665, "step": 38541 }, { "epoch": 0.8067905886293226, "grad_norm": 0.3097587823867798, "learning_rate": 0.00016641887218389248, "loss": 11.6578, "step": 38542 }, { "epoch": 0.8068115213932847, "grad_norm": 0.2964785695075989, "learning_rate": 0.0001664172331072186, "loss": 11.6725, "step": 38543 }, { "epoch": 0.8068324541572469, "grad_norm": 0.2768472731113434, "learning_rate": 0.0001664155939986166, "loss": 11.6498, "step": 38544 }, { "epoch": 0.8068533869212091, "grad_norm": 0.3740065395832062, "learning_rate": 0.0001664139548580873, "loss": 11.6559, "step": 38545 }, { "epoch": 0.8068743196851712, "grad_norm": 0.3009909987449646, "learning_rate": 0.00016641231568563148, "loss": 11.6666, "step": 38546 }, { "epoch": 0.8068952524491334, "grad_norm": 0.2679648697376251, "learning_rate": 0.0001664106764812499, "loss": 11.6716, "step": 38547 }, { "epoch": 0.8069161852130955, "grad_norm": 0.3038153350353241, "learning_rate": 0.00016640903724494342, "loss": 11.686, "step": 38548 }, { "epoch": 0.8069371179770577, "grad_norm": 0.28818202018737793, "learning_rate": 0.00016640739797671276, "loss": 11.6784, "step": 38549 }, { "epoch": 0.8069580507410199, "grad_norm": 0.32873451709747314, "learning_rate": 0.00016640575867655875, "loss": 11.6774, "step": 38550 }, { "epoch": 0.806978983504982, "grad_norm": 0.32776451110839844, "learning_rate": 0.00016640411934448215, "loss": 11.6497, "step": 38551 }, { "epoch": 0.8069999162689442, "grad_norm": 0.43752026557922363, "learning_rate": 0.00016640247998048375, "loss": 11.6838, "step": 38552 }, { "epoch": 0.8070208490329063, "grad_norm": 0.2709873914718628, "learning_rate": 0.0001664008405845644, "loss": 11.6657, "step": 38553 }, { "epoch": 0.8070417817968685, "grad_norm": 0.3503434360027313, "learning_rate": 0.00016639920115672476, "loss": 11.6653, "step": 38554 }, { "epoch": 0.8070627145608306, "grad_norm": 0.3259944021701813, "learning_rate": 0.00016639756169696575, "loss": 11.6639, "step": 38555 }, { "epoch": 0.8070836473247928, "grad_norm": 0.29593852162361145, "learning_rate": 0.00016639592220528805, "loss": 11.6593, "step": 38556 }, { "epoch": 0.807104580088755, "grad_norm": 0.2943882942199707, "learning_rate": 0.00016639428268169253, "loss": 11.654, "step": 38557 }, { "epoch": 0.807125512852717, "grad_norm": 0.3586551547050476, "learning_rate": 0.00016639264312617996, "loss": 11.6683, "step": 38558 }, { "epoch": 0.8071464456166793, "grad_norm": 0.2808155119419098, "learning_rate": 0.0001663910035387511, "loss": 11.6746, "step": 38559 }, { "epoch": 0.8071673783806413, "grad_norm": 0.33920174837112427, "learning_rate": 0.00016638936391940673, "loss": 11.6791, "step": 38560 }, { "epoch": 0.8071883111446035, "grad_norm": 0.279407799243927, "learning_rate": 0.0001663877242681477, "loss": 11.6618, "step": 38561 }, { "epoch": 0.8072092439085656, "grad_norm": 0.3266304135322571, "learning_rate": 0.00016638608458497474, "loss": 11.6862, "step": 38562 }, { "epoch": 0.8072301766725278, "grad_norm": 0.30984926223754883, "learning_rate": 0.0001663844448698887, "loss": 11.6795, "step": 38563 }, { "epoch": 0.80725110943649, "grad_norm": 0.28876450657844543, "learning_rate": 0.00016638280512289028, "loss": 11.6704, "step": 38564 }, { "epoch": 0.8072720422004521, "grad_norm": 0.2736598253250122, "learning_rate": 0.00016638116534398035, "loss": 11.6392, "step": 38565 }, { "epoch": 0.8072929749644143, "grad_norm": 0.34333521127700806, "learning_rate": 0.00016637952553315963, "loss": 11.6575, "step": 38566 }, { "epoch": 0.8073139077283764, "grad_norm": 0.37661558389663696, "learning_rate": 0.00016637788569042895, "loss": 11.6615, "step": 38567 }, { "epoch": 0.8073348404923386, "grad_norm": 0.31403467059135437, "learning_rate": 0.0001663762458157891, "loss": 11.6673, "step": 38568 }, { "epoch": 0.8073557732563008, "grad_norm": 0.3080039322376251, "learning_rate": 0.00016637460590924086, "loss": 11.6822, "step": 38569 }, { "epoch": 0.8073767060202629, "grad_norm": 0.32088854908943176, "learning_rate": 0.00016637296597078502, "loss": 11.6818, "step": 38570 }, { "epoch": 0.8073976387842251, "grad_norm": 0.346128910779953, "learning_rate": 0.00016637132600042236, "loss": 11.6682, "step": 38571 }, { "epoch": 0.8074185715481872, "grad_norm": 0.26846814155578613, "learning_rate": 0.00016636968599815366, "loss": 11.6723, "step": 38572 }, { "epoch": 0.8074395043121494, "grad_norm": 0.3032197952270508, "learning_rate": 0.00016636804596397974, "loss": 11.6843, "step": 38573 }, { "epoch": 0.8074604370761115, "grad_norm": 0.2720479667186737, "learning_rate": 0.00016636640589790135, "loss": 11.6656, "step": 38574 }, { "epoch": 0.8074813698400737, "grad_norm": 0.2917916774749756, "learning_rate": 0.0001663647657999193, "loss": 11.6722, "step": 38575 }, { "epoch": 0.8075023026040359, "grad_norm": 0.35209938883781433, "learning_rate": 0.00016636312567003438, "loss": 11.6675, "step": 38576 }, { "epoch": 0.807523235367998, "grad_norm": 0.3386928141117096, "learning_rate": 0.00016636148550824742, "loss": 11.6641, "step": 38577 }, { "epoch": 0.8075441681319602, "grad_norm": 0.3033151924610138, "learning_rate": 0.0001663598453145591, "loss": 11.6911, "step": 38578 }, { "epoch": 0.8075651008959223, "grad_norm": 0.2759319543838501, "learning_rate": 0.0001663582050889703, "loss": 11.6868, "step": 38579 }, { "epoch": 0.8075860336598845, "grad_norm": 0.3353491425514221, "learning_rate": 0.0001663565648314818, "loss": 11.6801, "step": 38580 }, { "epoch": 0.8076069664238466, "grad_norm": 0.33793696761131287, "learning_rate": 0.00016635492454209434, "loss": 11.6652, "step": 38581 }, { "epoch": 0.8076278991878088, "grad_norm": 0.2823706865310669, "learning_rate": 0.00016635328422080872, "loss": 11.6706, "step": 38582 }, { "epoch": 0.807648831951771, "grad_norm": 0.30828770995140076, "learning_rate": 0.0001663516438676258, "loss": 11.674, "step": 38583 }, { "epoch": 0.807669764715733, "grad_norm": 0.2883795499801636, "learning_rate": 0.0001663500034825463, "loss": 11.6682, "step": 38584 }, { "epoch": 0.8076906974796952, "grad_norm": 0.2806580662727356, "learning_rate": 0.000166348363065571, "loss": 11.6803, "step": 38585 }, { "epoch": 0.8077116302436573, "grad_norm": 0.4028621017932892, "learning_rate": 0.0001663467226167007, "loss": 11.6587, "step": 38586 }, { "epoch": 0.8077325630076195, "grad_norm": 0.31596165895462036, "learning_rate": 0.00016634508213593624, "loss": 11.6663, "step": 38587 }, { "epoch": 0.8077534957715817, "grad_norm": 0.30454012751579285, "learning_rate": 0.00016634344162327837, "loss": 11.6641, "step": 38588 }, { "epoch": 0.8077744285355438, "grad_norm": 0.35952040553092957, "learning_rate": 0.00016634180107872785, "loss": 11.6795, "step": 38589 }, { "epoch": 0.807795361299506, "grad_norm": 0.3014037609100342, "learning_rate": 0.00016634016050228554, "loss": 11.6665, "step": 38590 }, { "epoch": 0.8078162940634681, "grad_norm": 0.3401033580303192, "learning_rate": 0.00016633851989395214, "loss": 11.6774, "step": 38591 }, { "epoch": 0.8078372268274303, "grad_norm": 0.31836122274398804, "learning_rate": 0.0001663368792537285, "loss": 11.6711, "step": 38592 }, { "epoch": 0.8078581595913924, "grad_norm": 0.2827230393886566, "learning_rate": 0.0001663352385816154, "loss": 11.6666, "step": 38593 }, { "epoch": 0.8078790923553546, "grad_norm": 0.3438418507575989, "learning_rate": 0.0001663335978776136, "loss": 11.6828, "step": 38594 }, { "epoch": 0.8079000251193168, "grad_norm": 0.2629322409629822, "learning_rate": 0.00016633195714172393, "loss": 11.6751, "step": 38595 }, { "epoch": 0.8079209578832789, "grad_norm": 0.3122834265232086, "learning_rate": 0.00016633031637394713, "loss": 11.6706, "step": 38596 }, { "epoch": 0.8079418906472411, "grad_norm": 0.3423841595649719, "learning_rate": 0.00016632867557428405, "loss": 11.677, "step": 38597 }, { "epoch": 0.8079628234112032, "grad_norm": 0.26376646757125854, "learning_rate": 0.00016632703474273546, "loss": 11.6577, "step": 38598 }, { "epoch": 0.8079837561751654, "grad_norm": 0.30654892325401306, "learning_rate": 0.00016632539387930208, "loss": 11.6764, "step": 38599 }, { "epoch": 0.8080046889391275, "grad_norm": 0.32577991485595703, "learning_rate": 0.0001663237529839848, "loss": 11.6664, "step": 38600 }, { "epoch": 0.8080256217030897, "grad_norm": 0.33602190017700195, "learning_rate": 0.00016632211205678434, "loss": 11.6926, "step": 38601 }, { "epoch": 0.8080465544670519, "grad_norm": 0.2932741940021515, "learning_rate": 0.00016632047109770156, "loss": 11.6679, "step": 38602 }, { "epoch": 0.808067487231014, "grad_norm": 0.42822206020355225, "learning_rate": 0.00016631883010673714, "loss": 11.6453, "step": 38603 }, { "epoch": 0.8080884199949762, "grad_norm": 0.3154819905757904, "learning_rate": 0.000166317189083892, "loss": 11.6635, "step": 38604 }, { "epoch": 0.8081093527589382, "grad_norm": 0.3127763271331787, "learning_rate": 0.00016631554802916675, "loss": 11.675, "step": 38605 }, { "epoch": 0.8081302855229004, "grad_norm": 0.3557705879211426, "learning_rate": 0.00016631390694256236, "loss": 11.6664, "step": 38606 }, { "epoch": 0.8081512182868626, "grad_norm": 0.37239325046539307, "learning_rate": 0.00016631226582407952, "loss": 11.6606, "step": 38607 }, { "epoch": 0.8081721510508247, "grad_norm": 0.3832245469093323, "learning_rate": 0.00016631062467371908, "loss": 11.6675, "step": 38608 }, { "epoch": 0.8081930838147869, "grad_norm": 0.27367448806762695, "learning_rate": 0.00016630898349148177, "loss": 11.6744, "step": 38609 }, { "epoch": 0.808214016578749, "grad_norm": 0.2776012718677521, "learning_rate": 0.0001663073422773684, "loss": 11.6775, "step": 38610 }, { "epoch": 0.8082349493427112, "grad_norm": 0.3248794972896576, "learning_rate": 0.00016630570103137976, "loss": 11.6568, "step": 38611 }, { "epoch": 0.8082558821066733, "grad_norm": 0.3862018287181854, "learning_rate": 0.00016630405975351663, "loss": 11.6712, "step": 38612 }, { "epoch": 0.8082768148706355, "grad_norm": 0.3611728549003601, "learning_rate": 0.00016630241844377985, "loss": 11.6493, "step": 38613 }, { "epoch": 0.8082977476345977, "grad_norm": 0.2700539827346802, "learning_rate": 0.00016630077710217013, "loss": 11.6639, "step": 38614 }, { "epoch": 0.8083186803985598, "grad_norm": 0.3079199194908142, "learning_rate": 0.0001662991357286883, "loss": 11.6696, "step": 38615 }, { "epoch": 0.808339613162522, "grad_norm": 0.32814276218414307, "learning_rate": 0.00016629749432333518, "loss": 11.6644, "step": 38616 }, { "epoch": 0.8083605459264841, "grad_norm": 0.3292272090911865, "learning_rate": 0.00016629585288611151, "loss": 11.655, "step": 38617 }, { "epoch": 0.8083814786904463, "grad_norm": 0.30490192770957947, "learning_rate": 0.00016629421141701805, "loss": 11.6475, "step": 38618 }, { "epoch": 0.8084024114544084, "grad_norm": 0.2508311867713928, "learning_rate": 0.0001662925699160557, "loss": 11.6581, "step": 38619 }, { "epoch": 0.8084233442183706, "grad_norm": 0.3372722864151001, "learning_rate": 0.00016629092838322515, "loss": 11.6648, "step": 38620 }, { "epoch": 0.8084442769823328, "grad_norm": 0.25983133912086487, "learning_rate": 0.00016628928681852726, "loss": 11.668, "step": 38621 }, { "epoch": 0.8084652097462949, "grad_norm": 0.33479589223861694, "learning_rate": 0.00016628764522196273, "loss": 11.667, "step": 38622 }, { "epoch": 0.8084861425102571, "grad_norm": 0.3194567859172821, "learning_rate": 0.00016628600359353243, "loss": 11.6746, "step": 38623 }, { "epoch": 0.8085070752742192, "grad_norm": 0.4234633147716522, "learning_rate": 0.00016628436193323712, "loss": 11.6669, "step": 38624 }, { "epoch": 0.8085280080381814, "grad_norm": 0.28071892261505127, "learning_rate": 0.00016628272024107757, "loss": 11.6731, "step": 38625 }, { "epoch": 0.8085489408021436, "grad_norm": 0.34794795513153076, "learning_rate": 0.0001662810785170546, "loss": 11.666, "step": 38626 }, { "epoch": 0.8085698735661057, "grad_norm": 0.34680819511413574, "learning_rate": 0.00016627943676116898, "loss": 11.6784, "step": 38627 }, { "epoch": 0.8085908063300679, "grad_norm": 0.2535402178764343, "learning_rate": 0.0001662777949734215, "loss": 11.6637, "step": 38628 }, { "epoch": 0.80861173909403, "grad_norm": 0.33766478300094604, "learning_rate": 0.000166276153153813, "loss": 11.6669, "step": 38629 }, { "epoch": 0.8086326718579921, "grad_norm": 0.25068607926368713, "learning_rate": 0.00016627451130234416, "loss": 11.6561, "step": 38630 }, { "epoch": 0.8086536046219542, "grad_norm": 0.26337093114852905, "learning_rate": 0.00016627286941901586, "loss": 11.6682, "step": 38631 }, { "epoch": 0.8086745373859164, "grad_norm": 0.3115111291408539, "learning_rate": 0.00016627122750382888, "loss": 11.667, "step": 38632 }, { "epoch": 0.8086954701498786, "grad_norm": 0.32996439933776855, "learning_rate": 0.00016626958555678397, "loss": 11.6687, "step": 38633 }, { "epoch": 0.8087164029138407, "grad_norm": 0.27910926938056946, "learning_rate": 0.00016626794357788195, "loss": 11.6632, "step": 38634 }, { "epoch": 0.8087373356778029, "grad_norm": 0.30938348174095154, "learning_rate": 0.0001662663015671236, "loss": 11.6494, "step": 38635 }, { "epoch": 0.808758268441765, "grad_norm": 0.26990458369255066, "learning_rate": 0.00016626465952450975, "loss": 11.6883, "step": 38636 }, { "epoch": 0.8087792012057272, "grad_norm": 0.3082393705844879, "learning_rate": 0.0001662630174500411, "loss": 11.6502, "step": 38637 }, { "epoch": 0.8088001339696893, "grad_norm": 0.30591922998428345, "learning_rate": 0.0001662613753437185, "loss": 11.6716, "step": 38638 }, { "epoch": 0.8088210667336515, "grad_norm": 0.3014605641365051, "learning_rate": 0.00016625973320554276, "loss": 11.6537, "step": 38639 }, { "epoch": 0.8088419994976137, "grad_norm": 0.26059022545814514, "learning_rate": 0.00016625809103551462, "loss": 11.6663, "step": 38640 }, { "epoch": 0.8088629322615758, "grad_norm": 0.3121674656867981, "learning_rate": 0.00016625644883363488, "loss": 11.6717, "step": 38641 }, { "epoch": 0.808883865025538, "grad_norm": 0.27022939920425415, "learning_rate": 0.00016625480659990434, "loss": 11.649, "step": 38642 }, { "epoch": 0.8089047977895001, "grad_norm": 0.24096356332302094, "learning_rate": 0.0001662531643343238, "loss": 11.6668, "step": 38643 }, { "epoch": 0.8089257305534623, "grad_norm": 0.29387420415878296, "learning_rate": 0.000166251522036894, "loss": 11.6628, "step": 38644 }, { "epoch": 0.8089466633174245, "grad_norm": 0.3367714285850525, "learning_rate": 0.0001662498797076158, "loss": 11.6766, "step": 38645 }, { "epoch": 0.8089675960813866, "grad_norm": 0.36396029591560364, "learning_rate": 0.00016624823734648993, "loss": 11.6722, "step": 38646 }, { "epoch": 0.8089885288453488, "grad_norm": 0.25348323583602905, "learning_rate": 0.00016624659495351722, "loss": 11.6599, "step": 38647 }, { "epoch": 0.8090094616093109, "grad_norm": 0.2980644106864929, "learning_rate": 0.00016624495252869846, "loss": 11.6665, "step": 38648 }, { "epoch": 0.8090303943732731, "grad_norm": 0.2788926362991333, "learning_rate": 0.00016624331007203437, "loss": 11.6714, "step": 38649 }, { "epoch": 0.8090513271372352, "grad_norm": 0.31129229068756104, "learning_rate": 0.00016624166758352584, "loss": 11.6912, "step": 38650 }, { "epoch": 0.8090722599011974, "grad_norm": 0.2946949899196625, "learning_rate": 0.00016624002506317363, "loss": 11.683, "step": 38651 }, { "epoch": 0.8090931926651596, "grad_norm": 0.4321693778038025, "learning_rate": 0.00016623838251097846, "loss": 11.6773, "step": 38652 }, { "epoch": 0.8091141254291216, "grad_norm": 0.31295135617256165, "learning_rate": 0.0001662367399269412, "loss": 11.6597, "step": 38653 }, { "epoch": 0.8091350581930838, "grad_norm": 0.2777723968029022, "learning_rate": 0.0001662350973110626, "loss": 11.6614, "step": 38654 }, { "epoch": 0.8091559909570459, "grad_norm": 0.2822015583515167, "learning_rate": 0.0001662334546633435, "loss": 11.6729, "step": 38655 }, { "epoch": 0.8091769237210081, "grad_norm": 0.32625824213027954, "learning_rate": 0.0001662318119837846, "loss": 11.667, "step": 38656 }, { "epoch": 0.8091978564849702, "grad_norm": 0.29594334959983826, "learning_rate": 0.00016623016927238676, "loss": 11.6865, "step": 38657 }, { "epoch": 0.8092187892489324, "grad_norm": 0.3425973057746887, "learning_rate": 0.00016622852652915075, "loss": 11.6929, "step": 38658 }, { "epoch": 0.8092397220128946, "grad_norm": 0.23113854229450226, "learning_rate": 0.00016622688375407738, "loss": 11.6799, "step": 38659 }, { "epoch": 0.8092606547768567, "grad_norm": 0.42673173546791077, "learning_rate": 0.00016622524094716737, "loss": 11.6827, "step": 38660 }, { "epoch": 0.8092815875408189, "grad_norm": 0.29409894347190857, "learning_rate": 0.0001662235981084216, "loss": 11.6532, "step": 38661 }, { "epoch": 0.809302520304781, "grad_norm": 0.3611634373664856, "learning_rate": 0.00016622195523784082, "loss": 11.6867, "step": 38662 }, { "epoch": 0.8093234530687432, "grad_norm": 0.2727596163749695, "learning_rate": 0.0001662203123354258, "loss": 11.6646, "step": 38663 }, { "epoch": 0.8093443858327054, "grad_norm": 0.2945377826690674, "learning_rate": 0.00016621866940117735, "loss": 11.6792, "step": 38664 }, { "epoch": 0.8093653185966675, "grad_norm": 0.2995450794696808, "learning_rate": 0.00016621702643509627, "loss": 11.6867, "step": 38665 }, { "epoch": 0.8093862513606297, "grad_norm": 0.3254479169845581, "learning_rate": 0.00016621538343718334, "loss": 11.6755, "step": 38666 }, { "epoch": 0.8094071841245918, "grad_norm": 0.2663353383541107, "learning_rate": 0.00016621374040743937, "loss": 11.662, "step": 38667 }, { "epoch": 0.809428116888554, "grad_norm": 0.3158324658870697, "learning_rate": 0.0001662120973458651, "loss": 11.6755, "step": 38668 }, { "epoch": 0.8094490496525161, "grad_norm": 0.3202644884586334, "learning_rate": 0.00016621045425246133, "loss": 11.676, "step": 38669 }, { "epoch": 0.8094699824164783, "grad_norm": 0.3603833019733429, "learning_rate": 0.0001662088111272289, "loss": 11.6842, "step": 38670 }, { "epoch": 0.8094909151804405, "grad_norm": 0.2879908084869385, "learning_rate": 0.00016620716797016857, "loss": 11.6969, "step": 38671 }, { "epoch": 0.8095118479444026, "grad_norm": 0.348258376121521, "learning_rate": 0.0001662055247812811, "loss": 11.6642, "step": 38672 }, { "epoch": 0.8095327807083648, "grad_norm": 0.35076025128364563, "learning_rate": 0.00016620388156056732, "loss": 11.6685, "step": 38673 }, { "epoch": 0.8095537134723269, "grad_norm": 0.38165372610092163, "learning_rate": 0.000166202238308028, "loss": 11.6531, "step": 38674 }, { "epoch": 0.809574646236289, "grad_norm": 0.26846206188201904, "learning_rate": 0.00016620059502366394, "loss": 11.6412, "step": 38675 }, { "epoch": 0.8095955790002511, "grad_norm": 0.2637014389038086, "learning_rate": 0.00016619895170747593, "loss": 11.6614, "step": 38676 }, { "epoch": 0.8096165117642133, "grad_norm": 0.3286305367946625, "learning_rate": 0.00016619730835946473, "loss": 11.6565, "step": 38677 }, { "epoch": 0.8096374445281755, "grad_norm": 0.3245212435722351, "learning_rate": 0.00016619566497963123, "loss": 11.6719, "step": 38678 }, { "epoch": 0.8096583772921376, "grad_norm": 0.2737908959388733, "learning_rate": 0.00016619402156797607, "loss": 11.6789, "step": 38679 }, { "epoch": 0.8096793100560998, "grad_norm": 0.27768200635910034, "learning_rate": 0.00016619237812450017, "loss": 11.667, "step": 38680 }, { "epoch": 0.8097002428200619, "grad_norm": 0.45704665780067444, "learning_rate": 0.00016619073464920424, "loss": 11.6713, "step": 38681 }, { "epoch": 0.8097211755840241, "grad_norm": 0.2781151831150055, "learning_rate": 0.00016618909114208912, "loss": 11.667, "step": 38682 }, { "epoch": 0.8097421083479863, "grad_norm": 0.3599667549133301, "learning_rate": 0.00016618744760315556, "loss": 11.6798, "step": 38683 }, { "epoch": 0.8097630411119484, "grad_norm": 0.3398476541042328, "learning_rate": 0.00016618580403240439, "loss": 11.6868, "step": 38684 }, { "epoch": 0.8097839738759106, "grad_norm": 0.22187520563602448, "learning_rate": 0.00016618416042983632, "loss": 11.677, "step": 38685 }, { "epoch": 0.8098049066398727, "grad_norm": 0.3101770579814911, "learning_rate": 0.00016618251679545226, "loss": 11.6681, "step": 38686 }, { "epoch": 0.8098258394038349, "grad_norm": 0.3246547281742096, "learning_rate": 0.0001661808731292529, "loss": 11.6742, "step": 38687 }, { "epoch": 0.809846772167797, "grad_norm": 0.37032413482666016, "learning_rate": 0.00016617922943123909, "loss": 11.6886, "step": 38688 }, { "epoch": 0.8098677049317592, "grad_norm": 0.35039517283439636, "learning_rate": 0.00016617758570141157, "loss": 11.6803, "step": 38689 }, { "epoch": 0.8098886376957214, "grad_norm": 0.3717098832130432, "learning_rate": 0.0001661759419397712, "loss": 11.6922, "step": 38690 }, { "epoch": 0.8099095704596835, "grad_norm": 0.4117351174354553, "learning_rate": 0.0001661742981463187, "loss": 11.6674, "step": 38691 }, { "epoch": 0.8099305032236457, "grad_norm": 0.40619468688964844, "learning_rate": 0.0001661726543210549, "loss": 11.6747, "step": 38692 }, { "epoch": 0.8099514359876078, "grad_norm": 0.2779890298843384, "learning_rate": 0.00016617101046398057, "loss": 11.6561, "step": 38693 }, { "epoch": 0.80997236875157, "grad_norm": 0.2613614499568939, "learning_rate": 0.00016616936657509653, "loss": 11.6673, "step": 38694 }, { "epoch": 0.8099933015155321, "grad_norm": 0.29507550597190857, "learning_rate": 0.00016616772265440352, "loss": 11.673, "step": 38695 }, { "epoch": 0.8100142342794943, "grad_norm": 0.3608762323856354, "learning_rate": 0.00016616607870190237, "loss": 11.6673, "step": 38696 }, { "epoch": 0.8100351670434565, "grad_norm": 0.336792528629303, "learning_rate": 0.00016616443471759388, "loss": 11.6551, "step": 38697 }, { "epoch": 0.8100560998074186, "grad_norm": 0.2513379752635956, "learning_rate": 0.0001661627907014788, "loss": 11.6492, "step": 38698 }, { "epoch": 0.8100770325713808, "grad_norm": 0.3011761009693146, "learning_rate": 0.00016616114665355793, "loss": 11.6524, "step": 38699 }, { "epoch": 0.8100979653353428, "grad_norm": 0.26998135447502136, "learning_rate": 0.0001661595025738321, "loss": 11.6608, "step": 38700 }, { "epoch": 0.810118898099305, "grad_norm": 0.30052685737609863, "learning_rate": 0.00016615785846230203, "loss": 11.6704, "step": 38701 }, { "epoch": 0.8101398308632672, "grad_norm": 0.2746150493621826, "learning_rate": 0.0001661562143189686, "loss": 11.6539, "step": 38702 }, { "epoch": 0.8101607636272293, "grad_norm": 0.38048815727233887, "learning_rate": 0.00016615457014383252, "loss": 11.6809, "step": 38703 }, { "epoch": 0.8101816963911915, "grad_norm": 0.22207562625408173, "learning_rate": 0.00016615292593689463, "loss": 11.6864, "step": 38704 }, { "epoch": 0.8102026291551536, "grad_norm": 0.363147497177124, "learning_rate": 0.00016615128169815568, "loss": 11.6813, "step": 38705 }, { "epoch": 0.8102235619191158, "grad_norm": 0.3073442876338959, "learning_rate": 0.00016614963742761648, "loss": 11.6735, "step": 38706 }, { "epoch": 0.8102444946830779, "grad_norm": 0.3118492662906647, "learning_rate": 0.00016614799312527788, "loss": 11.6648, "step": 38707 }, { "epoch": 0.8102654274470401, "grad_norm": 0.29839837551116943, "learning_rate": 0.00016614634879114055, "loss": 11.6691, "step": 38708 }, { "epoch": 0.8102863602110023, "grad_norm": 0.34016963839530945, "learning_rate": 0.00016614470442520538, "loss": 11.6665, "step": 38709 }, { "epoch": 0.8103072929749644, "grad_norm": 0.2719334065914154, "learning_rate": 0.00016614306002747312, "loss": 11.6653, "step": 38710 }, { "epoch": 0.8103282257389266, "grad_norm": 0.3596566319465637, "learning_rate": 0.00016614141559794456, "loss": 11.6714, "step": 38711 }, { "epoch": 0.8103491585028887, "grad_norm": 0.25525954365730286, "learning_rate": 0.0001661397711366205, "loss": 11.6712, "step": 38712 }, { "epoch": 0.8103700912668509, "grad_norm": 0.30933359265327454, "learning_rate": 0.00016613812664350173, "loss": 11.6833, "step": 38713 }, { "epoch": 0.810391024030813, "grad_norm": 0.3452748656272888, "learning_rate": 0.00016613648211858903, "loss": 11.6635, "step": 38714 }, { "epoch": 0.8104119567947752, "grad_norm": 0.3322906792163849, "learning_rate": 0.0001661348375618832, "loss": 11.6569, "step": 38715 }, { "epoch": 0.8104328895587374, "grad_norm": 0.365113228559494, "learning_rate": 0.00016613319297338502, "loss": 11.6703, "step": 38716 }, { "epoch": 0.8104538223226995, "grad_norm": 0.27360737323760986, "learning_rate": 0.00016613154835309527, "loss": 11.6757, "step": 38717 }, { "epoch": 0.8104747550866617, "grad_norm": 0.34854426980018616, "learning_rate": 0.0001661299037010148, "loss": 11.6638, "step": 38718 }, { "epoch": 0.8104956878506238, "grad_norm": 0.3615831434726715, "learning_rate": 0.00016612825901714435, "loss": 11.6933, "step": 38719 }, { "epoch": 0.810516620614586, "grad_norm": 0.3136242628097534, "learning_rate": 0.0001661266143014847, "loss": 11.6605, "step": 38720 }, { "epoch": 0.8105375533785482, "grad_norm": 0.2679736018180847, "learning_rate": 0.0001661249695540367, "loss": 11.6742, "step": 38721 }, { "epoch": 0.8105584861425102, "grad_norm": 0.2960292398929596, "learning_rate": 0.00016612332477480108, "loss": 11.6728, "step": 38722 }, { "epoch": 0.8105794189064724, "grad_norm": 0.3861863911151886, "learning_rate": 0.00016612167996377862, "loss": 11.6764, "step": 38723 }, { "epoch": 0.8106003516704345, "grad_norm": 0.3587697446346283, "learning_rate": 0.0001661200351209702, "loss": 11.6501, "step": 38724 }, { "epoch": 0.8106212844343967, "grad_norm": 0.23553065955638885, "learning_rate": 0.0001661183902463765, "loss": 11.6554, "step": 38725 }, { "epoch": 0.8106422171983588, "grad_norm": 0.2853553295135498, "learning_rate": 0.0001661167453399984, "loss": 11.6718, "step": 38726 }, { "epoch": 0.810663149962321, "grad_norm": 0.336813360452652, "learning_rate": 0.00016611510040183666, "loss": 11.682, "step": 38727 }, { "epoch": 0.8106840827262832, "grad_norm": 0.3194640576839447, "learning_rate": 0.00016611345543189206, "loss": 11.6706, "step": 38728 }, { "epoch": 0.8107050154902453, "grad_norm": 0.3481977880001068, "learning_rate": 0.0001661118104301654, "loss": 11.6815, "step": 38729 }, { "epoch": 0.8107259482542075, "grad_norm": 0.275969922542572, "learning_rate": 0.00016611016539665744, "loss": 11.6598, "step": 38730 }, { "epoch": 0.8107468810181696, "grad_norm": 0.3022781014442444, "learning_rate": 0.00016610852033136902, "loss": 11.6812, "step": 38731 }, { "epoch": 0.8107678137821318, "grad_norm": 0.35527321696281433, "learning_rate": 0.0001661068752343009, "loss": 11.6893, "step": 38732 }, { "epoch": 0.8107887465460939, "grad_norm": 0.2754601538181305, "learning_rate": 0.0001661052301054539, "loss": 11.6603, "step": 38733 }, { "epoch": 0.8108096793100561, "grad_norm": 0.3513333797454834, "learning_rate": 0.00016610358494482879, "loss": 11.6725, "step": 38734 }, { "epoch": 0.8108306120740183, "grad_norm": 0.31248313188552856, "learning_rate": 0.00016610193975242634, "loss": 11.6583, "step": 38735 }, { "epoch": 0.8108515448379804, "grad_norm": 0.2954486906528473, "learning_rate": 0.00016610029452824738, "loss": 11.681, "step": 38736 }, { "epoch": 0.8108724776019426, "grad_norm": 0.3012588918209076, "learning_rate": 0.0001660986492722927, "loss": 11.6773, "step": 38737 }, { "epoch": 0.8108934103659047, "grad_norm": 0.26714009046554565, "learning_rate": 0.00016609700398456304, "loss": 11.6684, "step": 38738 }, { "epoch": 0.8109143431298669, "grad_norm": 0.325465589761734, "learning_rate": 0.00016609535866505924, "loss": 11.6739, "step": 38739 }, { "epoch": 0.810935275893829, "grad_norm": 0.27087995409965515, "learning_rate": 0.00016609371331378206, "loss": 11.6516, "step": 38740 }, { "epoch": 0.8109562086577912, "grad_norm": 0.2790396213531494, "learning_rate": 0.00016609206793073234, "loss": 11.6706, "step": 38741 }, { "epoch": 0.8109771414217534, "grad_norm": 0.3097476661205292, "learning_rate": 0.0001660904225159108, "loss": 11.6669, "step": 38742 }, { "epoch": 0.8109980741857155, "grad_norm": 0.4172716438770294, "learning_rate": 0.0001660887770693183, "loss": 11.6847, "step": 38743 }, { "epoch": 0.8110190069496777, "grad_norm": 0.3203580975532532, "learning_rate": 0.00016608713159095556, "loss": 11.6904, "step": 38744 }, { "epoch": 0.8110399397136397, "grad_norm": 0.2657630741596222, "learning_rate": 0.00016608548608082347, "loss": 11.6621, "step": 38745 }, { "epoch": 0.811060872477602, "grad_norm": 0.2765614688396454, "learning_rate": 0.00016608384053892273, "loss": 11.663, "step": 38746 }, { "epoch": 0.8110818052415641, "grad_norm": 0.2990517318248749, "learning_rate": 0.00016608219496525416, "loss": 11.669, "step": 38747 }, { "epoch": 0.8111027380055262, "grad_norm": 0.2568231225013733, "learning_rate": 0.00016608054935981855, "loss": 11.6766, "step": 38748 }, { "epoch": 0.8111236707694884, "grad_norm": 0.32391035556793213, "learning_rate": 0.00016607890372261674, "loss": 11.6669, "step": 38749 }, { "epoch": 0.8111446035334505, "grad_norm": 0.2586176097393036, "learning_rate": 0.00016607725805364943, "loss": 11.6755, "step": 38750 }, { "epoch": 0.8111655362974127, "grad_norm": 0.35260212421417236, "learning_rate": 0.00016607561235291746, "loss": 11.6744, "step": 38751 }, { "epoch": 0.8111864690613748, "grad_norm": 0.2669534981250763, "learning_rate": 0.00016607396662042162, "loss": 11.6699, "step": 38752 }, { "epoch": 0.811207401825337, "grad_norm": 0.34384527802467346, "learning_rate": 0.00016607232085616273, "loss": 11.6745, "step": 38753 }, { "epoch": 0.8112283345892992, "grad_norm": 0.2761881649494171, "learning_rate": 0.00016607067506014156, "loss": 11.6653, "step": 38754 }, { "epoch": 0.8112492673532613, "grad_norm": 0.2654015123844147, "learning_rate": 0.00016606902923235883, "loss": 11.6692, "step": 38755 }, { "epoch": 0.8112702001172235, "grad_norm": 0.3500474691390991, "learning_rate": 0.00016606738337281545, "loss": 11.6614, "step": 38756 }, { "epoch": 0.8112911328811856, "grad_norm": 0.28404733538627625, "learning_rate": 0.00016606573748151213, "loss": 11.6651, "step": 38757 }, { "epoch": 0.8113120656451478, "grad_norm": 0.30384644865989685, "learning_rate": 0.0001660640915584497, "loss": 11.6715, "step": 38758 }, { "epoch": 0.8113329984091099, "grad_norm": 0.30019596219062805, "learning_rate": 0.00016606244560362892, "loss": 11.6827, "step": 38759 }, { "epoch": 0.8113539311730721, "grad_norm": 0.3727007806301117, "learning_rate": 0.0001660607996170506, "loss": 11.6546, "step": 38760 }, { "epoch": 0.8113748639370343, "grad_norm": 0.2644402086734772, "learning_rate": 0.00016605915359871554, "loss": 11.6794, "step": 38761 }, { "epoch": 0.8113957967009964, "grad_norm": 0.3223366439342499, "learning_rate": 0.0001660575075486245, "loss": 11.6622, "step": 38762 }, { "epoch": 0.8114167294649586, "grad_norm": 0.34009915590286255, "learning_rate": 0.00016605586146677833, "loss": 11.6692, "step": 38763 }, { "epoch": 0.8114376622289207, "grad_norm": 0.30098989605903625, "learning_rate": 0.00016605421535317776, "loss": 11.6736, "step": 38764 }, { "epoch": 0.8114585949928829, "grad_norm": 0.37007659673690796, "learning_rate": 0.0001660525692078236, "loss": 11.6823, "step": 38765 }, { "epoch": 0.8114795277568451, "grad_norm": 0.321209579706192, "learning_rate": 0.00016605092303071666, "loss": 11.6713, "step": 38766 }, { "epoch": 0.8115004605208072, "grad_norm": 0.35600540041923523, "learning_rate": 0.00016604927682185768, "loss": 11.6883, "step": 38767 }, { "epoch": 0.8115213932847694, "grad_norm": 0.31768813729286194, "learning_rate": 0.00016604763058124754, "loss": 11.6735, "step": 38768 }, { "epoch": 0.8115423260487314, "grad_norm": 0.2991669178009033, "learning_rate": 0.00016604598430888695, "loss": 11.6555, "step": 38769 }, { "epoch": 0.8115632588126936, "grad_norm": 0.31359195709228516, "learning_rate": 0.00016604433800477674, "loss": 11.6772, "step": 38770 }, { "epoch": 0.8115841915766557, "grad_norm": 0.32001033425331116, "learning_rate": 0.00016604269166891773, "loss": 11.6676, "step": 38771 }, { "epoch": 0.8116051243406179, "grad_norm": 0.32266899943351746, "learning_rate": 0.00016604104530131063, "loss": 11.6849, "step": 38772 }, { "epoch": 0.8116260571045801, "grad_norm": 0.395293653011322, "learning_rate": 0.00016603939890195627, "loss": 11.6565, "step": 38773 }, { "epoch": 0.8116469898685422, "grad_norm": 0.33886009454727173, "learning_rate": 0.00016603775247085546, "loss": 11.6672, "step": 38774 }, { "epoch": 0.8116679226325044, "grad_norm": 0.3504188358783722, "learning_rate": 0.00016603610600800898, "loss": 11.685, "step": 38775 }, { "epoch": 0.8116888553964665, "grad_norm": 0.4535430371761322, "learning_rate": 0.00016603445951341765, "loss": 11.6703, "step": 38776 }, { "epoch": 0.8117097881604287, "grad_norm": 0.35313811898231506, "learning_rate": 0.0001660328129870822, "loss": 11.6629, "step": 38777 }, { "epoch": 0.8117307209243908, "grad_norm": 0.2785103917121887, "learning_rate": 0.00016603116642900347, "loss": 11.6608, "step": 38778 }, { "epoch": 0.811751653688353, "grad_norm": 0.38775622844696045, "learning_rate": 0.00016602951983918222, "loss": 11.6748, "step": 38779 }, { "epoch": 0.8117725864523152, "grad_norm": 0.29939648509025574, "learning_rate": 0.00016602787321761927, "loss": 11.6686, "step": 38780 }, { "epoch": 0.8117935192162773, "grad_norm": 0.2377939075231552, "learning_rate": 0.00016602622656431538, "loss": 11.6668, "step": 38781 }, { "epoch": 0.8118144519802395, "grad_norm": 0.3547742962837219, "learning_rate": 0.00016602457987927136, "loss": 11.6709, "step": 38782 }, { "epoch": 0.8118353847442016, "grad_norm": 0.30990496277809143, "learning_rate": 0.000166022933162488, "loss": 11.6735, "step": 38783 }, { "epoch": 0.8118563175081638, "grad_norm": 0.43483594059944153, "learning_rate": 0.0001660212864139661, "loss": 11.6749, "step": 38784 }, { "epoch": 0.811877250272126, "grad_norm": 0.2931477427482605, "learning_rate": 0.00016601963963370646, "loss": 11.6764, "step": 38785 }, { "epoch": 0.8118981830360881, "grad_norm": 0.47406116127967834, "learning_rate": 0.00016601799282170982, "loss": 11.6774, "step": 38786 }, { "epoch": 0.8119191158000503, "grad_norm": 0.3107089102268219, "learning_rate": 0.00016601634597797704, "loss": 11.6556, "step": 38787 }, { "epoch": 0.8119400485640124, "grad_norm": 0.325557678937912, "learning_rate": 0.00016601469910250887, "loss": 11.6794, "step": 38788 }, { "epoch": 0.8119609813279746, "grad_norm": 0.33120647072792053, "learning_rate": 0.0001660130521953061, "loss": 11.6634, "step": 38789 }, { "epoch": 0.8119819140919367, "grad_norm": 0.3049701750278473, "learning_rate": 0.00016601140525636957, "loss": 11.6762, "step": 38790 }, { "epoch": 0.8120028468558989, "grad_norm": 0.3048742115497589, "learning_rate": 0.0001660097582857, "loss": 11.6623, "step": 38791 }, { "epoch": 0.812023779619861, "grad_norm": 0.3054247200489044, "learning_rate": 0.0001660081112832982, "loss": 11.6799, "step": 38792 }, { "epoch": 0.8120447123838231, "grad_norm": 0.386719673871994, "learning_rate": 0.00016600646424916503, "loss": 11.6595, "step": 38793 }, { "epoch": 0.8120656451477853, "grad_norm": 0.28109419345855713, "learning_rate": 0.0001660048171833012, "loss": 11.6679, "step": 38794 }, { "epoch": 0.8120865779117474, "grad_norm": 0.33719730377197266, "learning_rate": 0.00016600317008570752, "loss": 11.6915, "step": 38795 }, { "epoch": 0.8121075106757096, "grad_norm": 0.2554500699043274, "learning_rate": 0.00016600152295638483, "loss": 11.6691, "step": 38796 }, { "epoch": 0.8121284434396717, "grad_norm": 0.2868924140930176, "learning_rate": 0.00016599987579533386, "loss": 11.678, "step": 38797 }, { "epoch": 0.8121493762036339, "grad_norm": 0.2816927134990692, "learning_rate": 0.00016599822860255542, "loss": 11.6622, "step": 38798 }, { "epoch": 0.8121703089675961, "grad_norm": 0.3189530670642853, "learning_rate": 0.00016599658137805032, "loss": 11.6731, "step": 38799 }, { "epoch": 0.8121912417315582, "grad_norm": 0.29066669940948486, "learning_rate": 0.00016599493412181935, "loss": 11.6786, "step": 38800 }, { "epoch": 0.8122121744955204, "grad_norm": 0.27532246708869934, "learning_rate": 0.00016599328683386327, "loss": 11.6908, "step": 38801 }, { "epoch": 0.8122331072594825, "grad_norm": 0.29416999220848083, "learning_rate": 0.00016599163951418292, "loss": 11.6629, "step": 38802 }, { "epoch": 0.8122540400234447, "grad_norm": 0.33317068219184875, "learning_rate": 0.00016598999216277903, "loss": 11.6769, "step": 38803 }, { "epoch": 0.8122749727874069, "grad_norm": 0.28235816955566406, "learning_rate": 0.00016598834477965247, "loss": 11.6837, "step": 38804 }, { "epoch": 0.812295905551369, "grad_norm": 0.32577645778656006, "learning_rate": 0.00016598669736480397, "loss": 11.663, "step": 38805 }, { "epoch": 0.8123168383153312, "grad_norm": 0.32632341980934143, "learning_rate": 0.00016598504991823437, "loss": 11.6732, "step": 38806 }, { "epoch": 0.8123377710792933, "grad_norm": 0.31329289078712463, "learning_rate": 0.00016598340243994438, "loss": 11.6595, "step": 38807 }, { "epoch": 0.8123587038432555, "grad_norm": 0.333093523979187, "learning_rate": 0.00016598175492993489, "loss": 11.6571, "step": 38808 }, { "epoch": 0.8123796366072176, "grad_norm": 0.3065679967403412, "learning_rate": 0.00016598010738820662, "loss": 11.6461, "step": 38809 }, { "epoch": 0.8124005693711798, "grad_norm": 0.32911261916160583, "learning_rate": 0.0001659784598147604, "loss": 11.6739, "step": 38810 }, { "epoch": 0.812421502135142, "grad_norm": 0.2890286147594452, "learning_rate": 0.00016597681220959702, "loss": 11.6622, "step": 38811 }, { "epoch": 0.8124424348991041, "grad_norm": 0.25056788325309753, "learning_rate": 0.00016597516457271728, "loss": 11.671, "step": 38812 }, { "epoch": 0.8124633676630663, "grad_norm": 0.28179821372032166, "learning_rate": 0.00016597351690412194, "loss": 11.6646, "step": 38813 }, { "epoch": 0.8124843004270283, "grad_norm": 0.2661411464214325, "learning_rate": 0.0001659718692038118, "loss": 11.6675, "step": 38814 }, { "epoch": 0.8125052331909906, "grad_norm": 0.3217196762561798, "learning_rate": 0.00016597022147178764, "loss": 11.6669, "step": 38815 }, { "epoch": 0.8125261659549526, "grad_norm": 0.38291436433792114, "learning_rate": 0.0001659685737080503, "loss": 11.6794, "step": 38816 }, { "epoch": 0.8125470987189148, "grad_norm": 0.2972801625728607, "learning_rate": 0.00016596692591260055, "loss": 11.6603, "step": 38817 }, { "epoch": 0.812568031482877, "grad_norm": 0.33495160937309265, "learning_rate": 0.00016596527808543916, "loss": 11.6737, "step": 38818 }, { "epoch": 0.8125889642468391, "grad_norm": 0.27827104926109314, "learning_rate": 0.00016596363022656693, "loss": 11.6664, "step": 38819 }, { "epoch": 0.8126098970108013, "grad_norm": 0.27153876423835754, "learning_rate": 0.0001659619823359847, "loss": 11.6479, "step": 38820 }, { "epoch": 0.8126308297747634, "grad_norm": 0.3191957473754883, "learning_rate": 0.0001659603344136932, "loss": 11.6826, "step": 38821 }, { "epoch": 0.8126517625387256, "grad_norm": 0.2906217575073242, "learning_rate": 0.00016595868645969325, "loss": 11.6781, "step": 38822 }, { "epoch": 0.8126726953026878, "grad_norm": 0.3272019922733307, "learning_rate": 0.00016595703847398563, "loss": 11.67, "step": 38823 }, { "epoch": 0.8126936280666499, "grad_norm": 0.295958012342453, "learning_rate": 0.00016595539045657115, "loss": 11.6535, "step": 38824 }, { "epoch": 0.8127145608306121, "grad_norm": 0.3542090654373169, "learning_rate": 0.00016595374240745055, "loss": 11.6685, "step": 38825 }, { "epoch": 0.8127354935945742, "grad_norm": 0.2532348930835724, "learning_rate": 0.00016595209432662472, "loss": 11.6595, "step": 38826 }, { "epoch": 0.8127564263585364, "grad_norm": 0.3756925165653229, "learning_rate": 0.0001659504462140944, "loss": 11.6705, "step": 38827 }, { "epoch": 0.8127773591224985, "grad_norm": 0.2854594886302948, "learning_rate": 0.00016594879806986032, "loss": 11.6519, "step": 38828 }, { "epoch": 0.8127982918864607, "grad_norm": 0.3666938543319702, "learning_rate": 0.0001659471498939234, "loss": 11.694, "step": 38829 }, { "epoch": 0.8128192246504229, "grad_norm": 0.3594217300415039, "learning_rate": 0.0001659455016862843, "loss": 11.689, "step": 38830 }, { "epoch": 0.812840157414385, "grad_norm": 0.26183295249938965, "learning_rate": 0.00016594385344694391, "loss": 11.6593, "step": 38831 }, { "epoch": 0.8128610901783472, "grad_norm": 0.28323590755462646, "learning_rate": 0.000165942205175903, "loss": 11.6712, "step": 38832 }, { "epoch": 0.8128820229423093, "grad_norm": 0.29342028498649597, "learning_rate": 0.00016594055687316233, "loss": 11.6648, "step": 38833 }, { "epoch": 0.8129029557062715, "grad_norm": 0.3637664318084717, "learning_rate": 0.0001659389085387227, "loss": 11.664, "step": 38834 }, { "epoch": 0.8129238884702336, "grad_norm": 0.3851379454135895, "learning_rate": 0.00016593726017258495, "loss": 11.6767, "step": 38835 }, { "epoch": 0.8129448212341958, "grad_norm": 0.4343002736568451, "learning_rate": 0.0001659356117747498, "loss": 11.6614, "step": 38836 }, { "epoch": 0.812965753998158, "grad_norm": 0.3667752742767334, "learning_rate": 0.0001659339633452181, "loss": 11.6685, "step": 38837 }, { "epoch": 0.81298668676212, "grad_norm": 0.2629035711288452, "learning_rate": 0.00016593231488399064, "loss": 11.6762, "step": 38838 }, { "epoch": 0.8130076195260822, "grad_norm": 0.3135707378387451, "learning_rate": 0.00016593066639106817, "loss": 11.6602, "step": 38839 }, { "epoch": 0.8130285522900443, "grad_norm": 0.39304259419441223, "learning_rate": 0.00016592901786645153, "loss": 11.6875, "step": 38840 }, { "epoch": 0.8130494850540065, "grad_norm": 0.5400828719139099, "learning_rate": 0.00016592736931014145, "loss": 11.6866, "step": 38841 }, { "epoch": 0.8130704178179687, "grad_norm": 0.3844684362411499, "learning_rate": 0.00016592572072213882, "loss": 11.6646, "step": 38842 }, { "epoch": 0.8130913505819308, "grad_norm": 0.2573237717151642, "learning_rate": 0.00016592407210244434, "loss": 11.6596, "step": 38843 }, { "epoch": 0.813112283345893, "grad_norm": 0.25880247354507446, "learning_rate": 0.00016592242345105885, "loss": 11.689, "step": 38844 }, { "epoch": 0.8131332161098551, "grad_norm": 0.3383754789829254, "learning_rate": 0.0001659207747679831, "loss": 11.6888, "step": 38845 }, { "epoch": 0.8131541488738173, "grad_norm": 0.26914867758750916, "learning_rate": 0.00016591912605321793, "loss": 11.6688, "step": 38846 }, { "epoch": 0.8131750816377794, "grad_norm": 0.23559878766536713, "learning_rate": 0.00016591747730676412, "loss": 11.6631, "step": 38847 }, { "epoch": 0.8131960144017416, "grad_norm": 0.31099462509155273, "learning_rate": 0.00016591582852862243, "loss": 11.6612, "step": 38848 }, { "epoch": 0.8132169471657038, "grad_norm": 0.29198014736175537, "learning_rate": 0.00016591417971879372, "loss": 11.6528, "step": 38849 }, { "epoch": 0.8132378799296659, "grad_norm": 0.24138957262039185, "learning_rate": 0.00016591253087727874, "loss": 11.6641, "step": 38850 }, { "epoch": 0.8132588126936281, "grad_norm": 0.3006691336631775, "learning_rate": 0.00016591088200407827, "loss": 11.6713, "step": 38851 }, { "epoch": 0.8132797454575902, "grad_norm": 0.3734908998012543, "learning_rate": 0.0001659092330991931, "loss": 11.6587, "step": 38852 }, { "epoch": 0.8133006782215524, "grad_norm": 0.2746850848197937, "learning_rate": 0.00016590758416262408, "loss": 11.6555, "step": 38853 }, { "epoch": 0.8133216109855145, "grad_norm": 0.2843743860721588, "learning_rate": 0.00016590593519437197, "loss": 11.6393, "step": 38854 }, { "epoch": 0.8133425437494767, "grad_norm": 0.36985960602760315, "learning_rate": 0.0001659042861944375, "loss": 11.6612, "step": 38855 }, { "epoch": 0.8133634765134389, "grad_norm": 0.27700668573379517, "learning_rate": 0.00016590263716282158, "loss": 11.6589, "step": 38856 }, { "epoch": 0.813384409277401, "grad_norm": 0.3618435561656952, "learning_rate": 0.0001659009880995249, "loss": 11.6693, "step": 38857 }, { "epoch": 0.8134053420413632, "grad_norm": 0.360164612531662, "learning_rate": 0.0001658993390045483, "loss": 11.6847, "step": 38858 }, { "epoch": 0.8134262748053253, "grad_norm": 0.29519450664520264, "learning_rate": 0.00016589768987789255, "loss": 11.6705, "step": 38859 }, { "epoch": 0.8134472075692875, "grad_norm": 0.41615015268325806, "learning_rate": 0.0001658960407195585, "loss": 11.6636, "step": 38860 }, { "epoch": 0.8134681403332497, "grad_norm": 0.3294401466846466, "learning_rate": 0.0001658943915295469, "loss": 11.673, "step": 38861 }, { "epoch": 0.8134890730972117, "grad_norm": 0.2644745409488678, "learning_rate": 0.00016589274230785852, "loss": 11.6734, "step": 38862 }, { "epoch": 0.813510005861174, "grad_norm": 0.32349956035614014, "learning_rate": 0.00016589109305449417, "loss": 11.6602, "step": 38863 }, { "epoch": 0.813530938625136, "grad_norm": 0.2783571183681488, "learning_rate": 0.0001658894437694547, "loss": 11.6848, "step": 38864 }, { "epoch": 0.8135518713890982, "grad_norm": 0.31402963399887085, "learning_rate": 0.00016588779445274082, "loss": 11.6721, "step": 38865 }, { "epoch": 0.8135728041530603, "grad_norm": 0.32185032963752747, "learning_rate": 0.00016588614510435335, "loss": 11.6792, "step": 38866 }, { "epoch": 0.8135937369170225, "grad_norm": 0.3730427026748657, "learning_rate": 0.0001658844957242931, "loss": 11.6673, "step": 38867 }, { "epoch": 0.8136146696809847, "grad_norm": 0.33761298656463623, "learning_rate": 0.00016588284631256087, "loss": 11.6794, "step": 38868 }, { "epoch": 0.8136356024449468, "grad_norm": 0.37154534459114075, "learning_rate": 0.00016588119686915742, "loss": 11.6583, "step": 38869 }, { "epoch": 0.813656535208909, "grad_norm": 0.31807276606559753, "learning_rate": 0.00016587954739408355, "loss": 11.6496, "step": 38870 }, { "epoch": 0.8136774679728711, "grad_norm": 0.2938431203365326, "learning_rate": 0.00016587789788734007, "loss": 11.6735, "step": 38871 }, { "epoch": 0.8136984007368333, "grad_norm": 0.25949081778526306, "learning_rate": 0.00016587624834892776, "loss": 11.6608, "step": 38872 }, { "epoch": 0.8137193335007954, "grad_norm": 0.2692491114139557, "learning_rate": 0.00016587459877884743, "loss": 11.6464, "step": 38873 }, { "epoch": 0.8137402662647576, "grad_norm": 0.33746474981307983, "learning_rate": 0.00016587294917709983, "loss": 11.6748, "step": 38874 }, { "epoch": 0.8137611990287198, "grad_norm": 0.31464242935180664, "learning_rate": 0.00016587129954368583, "loss": 11.6588, "step": 38875 }, { "epoch": 0.8137821317926819, "grad_norm": 0.41705381870269775, "learning_rate": 0.00016586964987860614, "loss": 11.6646, "step": 38876 }, { "epoch": 0.8138030645566441, "grad_norm": 0.3426836133003235, "learning_rate": 0.00016586800018186158, "loss": 11.6577, "step": 38877 }, { "epoch": 0.8138239973206062, "grad_norm": 0.3493644893169403, "learning_rate": 0.000165866350453453, "loss": 11.6791, "step": 38878 }, { "epoch": 0.8138449300845684, "grad_norm": 0.36091017723083496, "learning_rate": 0.0001658647006933811, "loss": 11.6665, "step": 38879 }, { "epoch": 0.8138658628485306, "grad_norm": 0.32394835352897644, "learning_rate": 0.00016586305090164673, "loss": 11.6757, "step": 38880 }, { "epoch": 0.8138867956124927, "grad_norm": 0.24930541217327118, "learning_rate": 0.0001658614010782507, "loss": 11.6769, "step": 38881 }, { "epoch": 0.8139077283764549, "grad_norm": 0.3225199580192566, "learning_rate": 0.00016585975122319373, "loss": 11.6691, "step": 38882 }, { "epoch": 0.813928661140417, "grad_norm": 0.28657981753349304, "learning_rate": 0.0001658581013364767, "loss": 11.6719, "step": 38883 }, { "epoch": 0.8139495939043792, "grad_norm": 0.3156115412712097, "learning_rate": 0.00016585645141810033, "loss": 11.6516, "step": 38884 }, { "epoch": 0.8139705266683412, "grad_norm": 0.3303609788417816, "learning_rate": 0.00016585480146806547, "loss": 11.6871, "step": 38885 }, { "epoch": 0.8139914594323034, "grad_norm": 0.32438284158706665, "learning_rate": 0.00016585315148637286, "loss": 11.689, "step": 38886 }, { "epoch": 0.8140123921962656, "grad_norm": 0.3650186359882355, "learning_rate": 0.00016585150147302335, "loss": 11.6681, "step": 38887 }, { "epoch": 0.8140333249602277, "grad_norm": 0.3830805718898773, "learning_rate": 0.00016584985142801767, "loss": 11.6459, "step": 38888 }, { "epoch": 0.8140542577241899, "grad_norm": 0.36351874470710754, "learning_rate": 0.00016584820135135666, "loss": 11.6803, "step": 38889 }, { "epoch": 0.814075190488152, "grad_norm": 0.32328641414642334, "learning_rate": 0.0001658465512430411, "loss": 11.683, "step": 38890 }, { "epoch": 0.8140961232521142, "grad_norm": 0.39070558547973633, "learning_rate": 0.00016584490110307178, "loss": 11.6714, "step": 38891 }, { "epoch": 0.8141170560160763, "grad_norm": 0.26959002017974854, "learning_rate": 0.0001658432509314495, "loss": 11.6712, "step": 38892 }, { "epoch": 0.8141379887800385, "grad_norm": 0.2853894829750061, "learning_rate": 0.00016584160072817506, "loss": 11.6683, "step": 38893 }, { "epoch": 0.8141589215440007, "grad_norm": 0.3319523334503174, "learning_rate": 0.00016583995049324925, "loss": 11.6737, "step": 38894 }, { "epoch": 0.8141798543079628, "grad_norm": 0.3143308460712433, "learning_rate": 0.00016583830022667283, "loss": 11.6636, "step": 38895 }, { "epoch": 0.814200787071925, "grad_norm": 0.2960745394229889, "learning_rate": 0.00016583664992844663, "loss": 11.6637, "step": 38896 }, { "epoch": 0.8142217198358871, "grad_norm": 0.3264292776584625, "learning_rate": 0.00016583499959857144, "loss": 11.6645, "step": 38897 }, { "epoch": 0.8142426525998493, "grad_norm": 0.3777768611907959, "learning_rate": 0.00016583334923704802, "loss": 11.6813, "step": 38898 }, { "epoch": 0.8142635853638115, "grad_norm": 0.2431040108203888, "learning_rate": 0.00016583169884387724, "loss": 11.6636, "step": 38899 }, { "epoch": 0.8142845181277736, "grad_norm": 0.3153570294380188, "learning_rate": 0.0001658300484190598, "loss": 11.6577, "step": 38900 }, { "epoch": 0.8143054508917358, "grad_norm": 0.2882923185825348, "learning_rate": 0.00016582839796259658, "loss": 11.662, "step": 38901 }, { "epoch": 0.8143263836556979, "grad_norm": 0.2928292751312256, "learning_rate": 0.00016582674747448828, "loss": 11.6709, "step": 38902 }, { "epoch": 0.8143473164196601, "grad_norm": 0.4727206826210022, "learning_rate": 0.00016582509695473575, "loss": 11.6577, "step": 38903 }, { "epoch": 0.8143682491836222, "grad_norm": 0.2938522696495056, "learning_rate": 0.0001658234464033398, "loss": 11.6697, "step": 38904 }, { "epoch": 0.8143891819475844, "grad_norm": 0.2827182412147522, "learning_rate": 0.00016582179582030118, "loss": 11.6466, "step": 38905 }, { "epoch": 0.8144101147115466, "grad_norm": 0.2961401641368866, "learning_rate": 0.00016582014520562073, "loss": 11.6778, "step": 38906 }, { "epoch": 0.8144310474755087, "grad_norm": 0.422321617603302, "learning_rate": 0.0001658184945592992, "loss": 11.6621, "step": 38907 }, { "epoch": 0.8144519802394709, "grad_norm": 0.2747926712036133, "learning_rate": 0.0001658168438813374, "loss": 11.6855, "step": 38908 }, { "epoch": 0.8144729130034329, "grad_norm": 0.31461483240127563, "learning_rate": 0.00016581519317173613, "loss": 11.6479, "step": 38909 }, { "epoch": 0.8144938457673951, "grad_norm": 0.2677023112773895, "learning_rate": 0.00016581354243049617, "loss": 11.6778, "step": 38910 }, { "epoch": 0.8145147785313572, "grad_norm": 0.2829027473926544, "learning_rate": 0.0001658118916576183, "loss": 11.6666, "step": 38911 }, { "epoch": 0.8145357112953194, "grad_norm": 0.3323074281215668, "learning_rate": 0.00016581024085310337, "loss": 11.6802, "step": 38912 }, { "epoch": 0.8145566440592816, "grad_norm": 0.3210941255092621, "learning_rate": 0.00016580859001695216, "loss": 11.6699, "step": 38913 }, { "epoch": 0.8145775768232437, "grad_norm": 0.31985825300216675, "learning_rate": 0.00016580693914916537, "loss": 11.6648, "step": 38914 }, { "epoch": 0.8145985095872059, "grad_norm": 0.5113242864608765, "learning_rate": 0.00016580528824974394, "loss": 11.6771, "step": 38915 }, { "epoch": 0.814619442351168, "grad_norm": 0.3833133578300476, "learning_rate": 0.00016580363731868853, "loss": 11.6509, "step": 38916 }, { "epoch": 0.8146403751151302, "grad_norm": 0.34037476778030396, "learning_rate": 0.000165801986356, "loss": 11.6735, "step": 38917 }, { "epoch": 0.8146613078790923, "grad_norm": 0.2909722924232483, "learning_rate": 0.00016580033536167917, "loss": 11.6647, "step": 38918 }, { "epoch": 0.8146822406430545, "grad_norm": 0.38361024856567383, "learning_rate": 0.00016579868433572677, "loss": 11.6585, "step": 38919 }, { "epoch": 0.8147031734070167, "grad_norm": 0.2675565183162689, "learning_rate": 0.00016579703327814364, "loss": 11.6722, "step": 38920 }, { "epoch": 0.8147241061709788, "grad_norm": 0.24985185265541077, "learning_rate": 0.00016579538218893056, "loss": 11.6527, "step": 38921 }, { "epoch": 0.814745038934941, "grad_norm": 0.3118126690387726, "learning_rate": 0.00016579373106808833, "loss": 11.6673, "step": 38922 }, { "epoch": 0.8147659716989031, "grad_norm": 0.3323442339897156, "learning_rate": 0.00016579207991561772, "loss": 11.6728, "step": 38923 }, { "epoch": 0.8147869044628653, "grad_norm": 0.32307636737823486, "learning_rate": 0.00016579042873151952, "loss": 11.6591, "step": 38924 }, { "epoch": 0.8148078372268275, "grad_norm": 0.3379132151603699, "learning_rate": 0.00016578877751579458, "loss": 11.6839, "step": 38925 }, { "epoch": 0.8148287699907896, "grad_norm": 0.35198867321014404, "learning_rate": 0.00016578712626844365, "loss": 11.6687, "step": 38926 }, { "epoch": 0.8148497027547518, "grad_norm": 0.27733707427978516, "learning_rate": 0.00016578547498946752, "loss": 11.6598, "step": 38927 }, { "epoch": 0.8148706355187139, "grad_norm": 0.2999327778816223, "learning_rate": 0.00016578382367886697, "loss": 11.6831, "step": 38928 }, { "epoch": 0.8148915682826761, "grad_norm": 0.28012800216674805, "learning_rate": 0.00016578217233664285, "loss": 11.6564, "step": 38929 }, { "epoch": 0.8149125010466381, "grad_norm": 0.34917163848876953, "learning_rate": 0.00016578052096279593, "loss": 11.6855, "step": 38930 }, { "epoch": 0.8149334338106003, "grad_norm": 0.29057639837265015, "learning_rate": 0.00016577886955732697, "loss": 11.6683, "step": 38931 }, { "epoch": 0.8149543665745625, "grad_norm": 0.252676397562027, "learning_rate": 0.00016577721812023678, "loss": 11.6604, "step": 38932 }, { "epoch": 0.8149752993385246, "grad_norm": 0.2986614406108856, "learning_rate": 0.0001657755666515262, "loss": 11.6632, "step": 38933 }, { "epoch": 0.8149962321024868, "grad_norm": 0.4211488366127014, "learning_rate": 0.00016577391515119593, "loss": 11.668, "step": 38934 }, { "epoch": 0.8150171648664489, "grad_norm": 0.3223332464694977, "learning_rate": 0.00016577226361924686, "loss": 11.6694, "step": 38935 }, { "epoch": 0.8150380976304111, "grad_norm": 0.34245237708091736, "learning_rate": 0.00016577061205567977, "loss": 11.6667, "step": 38936 }, { "epoch": 0.8150590303943732, "grad_norm": 0.30875882506370544, "learning_rate": 0.0001657689604604954, "loss": 11.675, "step": 38937 }, { "epoch": 0.8150799631583354, "grad_norm": 0.35391879081726074, "learning_rate": 0.00016576730883369455, "loss": 11.6874, "step": 38938 }, { "epoch": 0.8151008959222976, "grad_norm": 0.3309619426727295, "learning_rate": 0.00016576565717527807, "loss": 11.6802, "step": 38939 }, { "epoch": 0.8151218286862597, "grad_norm": 0.45446497201919556, "learning_rate": 0.00016576400548524672, "loss": 11.6862, "step": 38940 }, { "epoch": 0.8151427614502219, "grad_norm": 0.36022359132766724, "learning_rate": 0.00016576235376360127, "loss": 11.683, "step": 38941 }, { "epoch": 0.815163694214184, "grad_norm": 0.30964192748069763, "learning_rate": 0.00016576070201034258, "loss": 11.6681, "step": 38942 }, { "epoch": 0.8151846269781462, "grad_norm": 0.3188613951206207, "learning_rate": 0.00016575905022547135, "loss": 11.6604, "step": 38943 }, { "epoch": 0.8152055597421084, "grad_norm": 0.32418230175971985, "learning_rate": 0.00016575739840898846, "loss": 11.6648, "step": 38944 }, { "epoch": 0.8152264925060705, "grad_norm": 0.31541046500205994, "learning_rate": 0.0001657557465608947, "loss": 11.6669, "step": 38945 }, { "epoch": 0.8152474252700327, "grad_norm": 0.35789674520492554, "learning_rate": 0.00016575409468119076, "loss": 11.6678, "step": 38946 }, { "epoch": 0.8152683580339948, "grad_norm": 0.28905346989631653, "learning_rate": 0.00016575244276987756, "loss": 11.6672, "step": 38947 }, { "epoch": 0.815289290797957, "grad_norm": 0.270284503698349, "learning_rate": 0.0001657507908269558, "loss": 11.6586, "step": 38948 }, { "epoch": 0.8153102235619191, "grad_norm": 0.2663031816482544, "learning_rate": 0.00016574913885242638, "loss": 11.6717, "step": 38949 }, { "epoch": 0.8153311563258813, "grad_norm": 0.27248042821884155, "learning_rate": 0.00016574748684628998, "loss": 11.6612, "step": 38950 }, { "epoch": 0.8153520890898435, "grad_norm": 0.29029661417007446, "learning_rate": 0.00016574583480854745, "loss": 11.663, "step": 38951 }, { "epoch": 0.8153730218538056, "grad_norm": 0.2926161587238312, "learning_rate": 0.00016574418273919962, "loss": 11.683, "step": 38952 }, { "epoch": 0.8153939546177678, "grad_norm": 0.2633541524410248, "learning_rate": 0.0001657425306382472, "loss": 11.6775, "step": 38953 }, { "epoch": 0.8154148873817298, "grad_norm": 0.2614325284957886, "learning_rate": 0.00016574087850569107, "loss": 11.6729, "step": 38954 }, { "epoch": 0.815435820145692, "grad_norm": 0.3146923780441284, "learning_rate": 0.00016573922634153195, "loss": 11.6694, "step": 38955 }, { "epoch": 0.8154567529096541, "grad_norm": 0.2790992856025696, "learning_rate": 0.00016573757414577064, "loss": 11.6617, "step": 38956 }, { "epoch": 0.8154776856736163, "grad_norm": 0.2871488034725189, "learning_rate": 0.00016573592191840804, "loss": 11.6713, "step": 38957 }, { "epoch": 0.8154986184375785, "grad_norm": 0.24007439613342285, "learning_rate": 0.0001657342696594448, "loss": 11.6737, "step": 38958 }, { "epoch": 0.8155195512015406, "grad_norm": 0.2648918330669403, "learning_rate": 0.0001657326173688818, "loss": 11.6768, "step": 38959 }, { "epoch": 0.8155404839655028, "grad_norm": 0.28591811656951904, "learning_rate": 0.0001657309650467198, "loss": 11.6649, "step": 38960 }, { "epoch": 0.8155614167294649, "grad_norm": 0.35857146978378296, "learning_rate": 0.00016572931269295964, "loss": 11.6523, "step": 38961 }, { "epoch": 0.8155823494934271, "grad_norm": 0.3193778991699219, "learning_rate": 0.00016572766030760206, "loss": 11.6661, "step": 38962 }, { "epoch": 0.8156032822573893, "grad_norm": 0.30846408009529114, "learning_rate": 0.00016572600789064787, "loss": 11.6657, "step": 38963 }, { "epoch": 0.8156242150213514, "grad_norm": 0.2940088212490082, "learning_rate": 0.0001657243554420979, "loss": 11.6417, "step": 38964 }, { "epoch": 0.8156451477853136, "grad_norm": 0.2766648530960083, "learning_rate": 0.0001657227029619529, "loss": 11.689, "step": 38965 }, { "epoch": 0.8156660805492757, "grad_norm": 0.3784257769584656, "learning_rate": 0.00016572105045021366, "loss": 11.6853, "step": 38966 }, { "epoch": 0.8156870133132379, "grad_norm": 0.26075032353401184, "learning_rate": 0.000165719397906881, "loss": 11.669, "step": 38967 }, { "epoch": 0.8157079460772, "grad_norm": 0.2545379102230072, "learning_rate": 0.00016571774533195573, "loss": 11.6701, "step": 38968 }, { "epoch": 0.8157288788411622, "grad_norm": 0.3342784643173218, "learning_rate": 0.0001657160927254386, "loss": 11.6718, "step": 38969 }, { "epoch": 0.8157498116051244, "grad_norm": 0.3007090091705322, "learning_rate": 0.00016571444008733045, "loss": 11.6715, "step": 38970 }, { "epoch": 0.8157707443690865, "grad_norm": 0.2601918578147888, "learning_rate": 0.00016571278741763204, "loss": 11.6659, "step": 38971 }, { "epoch": 0.8157916771330487, "grad_norm": 0.3330645263195038, "learning_rate": 0.00016571113471634417, "loss": 11.6845, "step": 38972 }, { "epoch": 0.8158126098970108, "grad_norm": 0.34613296389579773, "learning_rate": 0.00016570948198346765, "loss": 11.6736, "step": 38973 }, { "epoch": 0.815833542660973, "grad_norm": 0.35209226608276367, "learning_rate": 0.00016570782921900324, "loss": 11.6763, "step": 38974 }, { "epoch": 0.815854475424935, "grad_norm": 0.32136228680610657, "learning_rate": 0.0001657061764229518, "loss": 11.6827, "step": 38975 }, { "epoch": 0.8158754081888973, "grad_norm": 0.3863736093044281, "learning_rate": 0.00016570452359531408, "loss": 11.681, "step": 38976 }, { "epoch": 0.8158963409528595, "grad_norm": 0.2798081636428833, "learning_rate": 0.00016570287073609086, "loss": 11.6643, "step": 38977 }, { "epoch": 0.8159172737168215, "grad_norm": 0.4202588200569153, "learning_rate": 0.00016570121784528294, "loss": 11.6773, "step": 38978 }, { "epoch": 0.8159382064807837, "grad_norm": 0.3714663088321686, "learning_rate": 0.00016569956492289115, "loss": 11.6635, "step": 38979 }, { "epoch": 0.8159591392447458, "grad_norm": 0.2874114513397217, "learning_rate": 0.00016569791196891626, "loss": 11.6776, "step": 38980 }, { "epoch": 0.815980072008708, "grad_norm": 0.2512364089488983, "learning_rate": 0.00016569625898335907, "loss": 11.6631, "step": 38981 }, { "epoch": 0.8160010047726702, "grad_norm": 0.29526016116142273, "learning_rate": 0.00016569460596622037, "loss": 11.6538, "step": 38982 }, { "epoch": 0.8160219375366323, "grad_norm": 0.2587377429008484, "learning_rate": 0.00016569295291750095, "loss": 11.6838, "step": 38983 }, { "epoch": 0.8160428703005945, "grad_norm": 0.2710776627063751, "learning_rate": 0.00016569129983720163, "loss": 11.6698, "step": 38984 }, { "epoch": 0.8160638030645566, "grad_norm": 0.26588404178619385, "learning_rate": 0.00016568964672532315, "loss": 11.6687, "step": 38985 }, { "epoch": 0.8160847358285188, "grad_norm": 0.39384737610816956, "learning_rate": 0.00016568799358186636, "loss": 11.6716, "step": 38986 }, { "epoch": 0.8161056685924809, "grad_norm": 0.5269784927368164, "learning_rate": 0.00016568634040683205, "loss": 11.6843, "step": 38987 }, { "epoch": 0.8161266013564431, "grad_norm": 0.33543387055397034, "learning_rate": 0.000165684687200221, "loss": 11.6789, "step": 38988 }, { "epoch": 0.8161475341204053, "grad_norm": 0.29535773396492004, "learning_rate": 0.00016568303396203397, "loss": 11.663, "step": 38989 }, { "epoch": 0.8161684668843674, "grad_norm": 0.3392636179924011, "learning_rate": 0.0001656813806922718, "loss": 11.6598, "step": 38990 }, { "epoch": 0.8161893996483296, "grad_norm": 0.3545564115047455, "learning_rate": 0.00016567972739093528, "loss": 11.6616, "step": 38991 }, { "epoch": 0.8162103324122917, "grad_norm": 0.3014112412929535, "learning_rate": 0.0001656780740580252, "loss": 11.6764, "step": 38992 }, { "epoch": 0.8162312651762539, "grad_norm": 0.31605228781700134, "learning_rate": 0.00016567642069354236, "loss": 11.6642, "step": 38993 }, { "epoch": 0.816252197940216, "grad_norm": 0.307271808385849, "learning_rate": 0.00016567476729748755, "loss": 11.6714, "step": 38994 }, { "epoch": 0.8162731307041782, "grad_norm": 0.25370708107948303, "learning_rate": 0.00016567311386986156, "loss": 11.664, "step": 38995 }, { "epoch": 0.8162940634681404, "grad_norm": 0.3611057996749878, "learning_rate": 0.00016567146041066518, "loss": 11.6577, "step": 38996 }, { "epoch": 0.8163149962321025, "grad_norm": 0.3351098895072937, "learning_rate": 0.00016566980691989921, "loss": 11.6731, "step": 38997 }, { "epoch": 0.8163359289960647, "grad_norm": 0.3235853314399719, "learning_rate": 0.00016566815339756445, "loss": 11.692, "step": 38998 }, { "epoch": 0.8163568617600268, "grad_norm": 0.2853372097015381, "learning_rate": 0.0001656664998436617, "loss": 11.6837, "step": 38999 }, { "epoch": 0.816377794523989, "grad_norm": 0.28315916657447815, "learning_rate": 0.00016566484625819174, "loss": 11.667, "step": 39000 }, { "epoch": 0.816377794523989, "eval_loss": 11.669824600219727, "eval_runtime": 34.3451, "eval_samples_per_second": 27.981, "eval_steps_per_second": 7.017, "step": 39000 }, { "epoch": 0.8163987272879512, "grad_norm": 0.29660356044769287, "learning_rate": 0.0001656631926411554, "loss": 11.6665, "step": 39001 }, { "epoch": 0.8164196600519132, "grad_norm": 0.34358757734298706, "learning_rate": 0.00016566153899255341, "loss": 11.6805, "step": 39002 }, { "epoch": 0.8164405928158754, "grad_norm": 0.30385622382164, "learning_rate": 0.00016565988531238663, "loss": 11.6471, "step": 39003 }, { "epoch": 0.8164615255798375, "grad_norm": 0.2778504490852356, "learning_rate": 0.00016565823160065581, "loss": 11.664, "step": 39004 }, { "epoch": 0.8164824583437997, "grad_norm": 0.32957613468170166, "learning_rate": 0.00016565657785736176, "loss": 11.6677, "step": 39005 }, { "epoch": 0.8165033911077618, "grad_norm": 0.2964209020137787, "learning_rate": 0.00016565492408250528, "loss": 11.6563, "step": 39006 }, { "epoch": 0.816524323871724, "grad_norm": 0.26972174644470215, "learning_rate": 0.0001656532702760872, "loss": 11.6692, "step": 39007 }, { "epoch": 0.8165452566356862, "grad_norm": 0.26838502287864685, "learning_rate": 0.00016565161643810824, "loss": 11.6809, "step": 39008 }, { "epoch": 0.8165661893996483, "grad_norm": 0.34335872530937195, "learning_rate": 0.00016564996256856923, "loss": 11.6595, "step": 39009 }, { "epoch": 0.8165871221636105, "grad_norm": 0.2791919708251953, "learning_rate": 0.000165648308667471, "loss": 11.6641, "step": 39010 }, { "epoch": 0.8166080549275726, "grad_norm": 0.2931641638278961, "learning_rate": 0.00016564665473481428, "loss": 11.6753, "step": 39011 }, { "epoch": 0.8166289876915348, "grad_norm": 0.29915568232536316, "learning_rate": 0.00016564500077059988, "loss": 11.6714, "step": 39012 }, { "epoch": 0.8166499204554969, "grad_norm": 0.25459980964660645, "learning_rate": 0.00016564334677482865, "loss": 11.6577, "step": 39013 }, { "epoch": 0.8166708532194591, "grad_norm": 0.2619275450706482, "learning_rate": 0.00016564169274750135, "loss": 11.664, "step": 39014 }, { "epoch": 0.8166917859834213, "grad_norm": 0.30097922682762146, "learning_rate": 0.00016564003868861874, "loss": 11.6659, "step": 39015 }, { "epoch": 0.8167127187473834, "grad_norm": 0.28480058908462524, "learning_rate": 0.00016563838459818166, "loss": 11.6666, "step": 39016 }, { "epoch": 0.8167336515113456, "grad_norm": 0.2992388606071472, "learning_rate": 0.0001656367304761909, "loss": 11.6792, "step": 39017 }, { "epoch": 0.8167545842753077, "grad_norm": 0.2934909164905548, "learning_rate": 0.00016563507632264725, "loss": 11.6755, "step": 39018 }, { "epoch": 0.8167755170392699, "grad_norm": 0.3557066023349762, "learning_rate": 0.0001656334221375515, "loss": 11.6672, "step": 39019 }, { "epoch": 0.8167964498032321, "grad_norm": 0.3762716054916382, "learning_rate": 0.00016563176792090445, "loss": 11.6813, "step": 39020 }, { "epoch": 0.8168173825671942, "grad_norm": 0.3035680949687958, "learning_rate": 0.00016563011367270688, "loss": 11.678, "step": 39021 }, { "epoch": 0.8168383153311564, "grad_norm": 0.26295533776283264, "learning_rate": 0.00016562845939295962, "loss": 11.6782, "step": 39022 }, { "epoch": 0.8168592480951185, "grad_norm": 0.25480833649635315, "learning_rate": 0.00016562680508166343, "loss": 11.6656, "step": 39023 }, { "epoch": 0.8168801808590807, "grad_norm": 0.429460734128952, "learning_rate": 0.00016562515073881913, "loss": 11.6763, "step": 39024 }, { "epoch": 0.8169011136230427, "grad_norm": 0.30796006321907043, "learning_rate": 0.0001656234963644275, "loss": 11.6606, "step": 39025 }, { "epoch": 0.8169220463870049, "grad_norm": 0.3240021765232086, "learning_rate": 0.00016562184195848936, "loss": 11.6767, "step": 39026 }, { "epoch": 0.8169429791509671, "grad_norm": 0.27992019057273865, "learning_rate": 0.00016562018752100545, "loss": 11.6613, "step": 39027 }, { "epoch": 0.8169639119149292, "grad_norm": 0.35501328110694885, "learning_rate": 0.0001656185330519766, "loss": 11.6796, "step": 39028 }, { "epoch": 0.8169848446788914, "grad_norm": 0.30917423963546753, "learning_rate": 0.00016561687855140363, "loss": 11.6768, "step": 39029 }, { "epoch": 0.8170057774428535, "grad_norm": 0.29693683981895447, "learning_rate": 0.00016561522401928734, "loss": 11.6638, "step": 39030 }, { "epoch": 0.8170267102068157, "grad_norm": 0.3603189289569855, "learning_rate": 0.00016561356945562846, "loss": 11.6873, "step": 39031 }, { "epoch": 0.8170476429707778, "grad_norm": 0.3215640187263489, "learning_rate": 0.0001656119148604278, "loss": 11.6622, "step": 39032 }, { "epoch": 0.81706857573474, "grad_norm": 0.24161291122436523, "learning_rate": 0.0001656102602336862, "loss": 11.6477, "step": 39033 }, { "epoch": 0.8170895084987022, "grad_norm": 0.2685464322566986, "learning_rate": 0.00016560860557540444, "loss": 11.6756, "step": 39034 }, { "epoch": 0.8171104412626643, "grad_norm": 0.28075140714645386, "learning_rate": 0.0001656069508855833, "loss": 11.6819, "step": 39035 }, { "epoch": 0.8171313740266265, "grad_norm": 0.3175852298736572, "learning_rate": 0.00016560529616422358, "loss": 11.6713, "step": 39036 }, { "epoch": 0.8171523067905886, "grad_norm": 0.3235185146331787, "learning_rate": 0.0001656036414113261, "loss": 11.6603, "step": 39037 }, { "epoch": 0.8171732395545508, "grad_norm": 0.30528929829597473, "learning_rate": 0.00016560198662689162, "loss": 11.665, "step": 39038 }, { "epoch": 0.817194172318513, "grad_norm": 0.28263241052627563, "learning_rate": 0.00016560033181092093, "loss": 11.6676, "step": 39039 }, { "epoch": 0.8172151050824751, "grad_norm": 0.31182974576950073, "learning_rate": 0.00016559867696341489, "loss": 11.6857, "step": 39040 }, { "epoch": 0.8172360378464373, "grad_norm": 0.25128665566444397, "learning_rate": 0.00016559702208437423, "loss": 11.645, "step": 39041 }, { "epoch": 0.8172569706103994, "grad_norm": 0.364440381526947, "learning_rate": 0.00016559536717379976, "loss": 11.6663, "step": 39042 }, { "epoch": 0.8172779033743616, "grad_norm": 0.2915739119052887, "learning_rate": 0.00016559371223169226, "loss": 11.6625, "step": 39043 }, { "epoch": 0.8172988361383237, "grad_norm": 0.3172471821308136, "learning_rate": 0.00016559205725805257, "loss": 11.674, "step": 39044 }, { "epoch": 0.8173197689022859, "grad_norm": 0.28590062260627747, "learning_rate": 0.0001655904022528815, "loss": 11.6788, "step": 39045 }, { "epoch": 0.8173407016662481, "grad_norm": 0.31698980927467346, "learning_rate": 0.00016558874721617976, "loss": 11.6714, "step": 39046 }, { "epoch": 0.8173616344302101, "grad_norm": 0.37297821044921875, "learning_rate": 0.00016558709214794823, "loss": 11.6717, "step": 39047 }, { "epoch": 0.8173825671941723, "grad_norm": 0.3064858019351959, "learning_rate": 0.00016558543704818765, "loss": 11.6804, "step": 39048 }, { "epoch": 0.8174034999581344, "grad_norm": 0.2847924828529358, "learning_rate": 0.00016558378191689884, "loss": 11.6619, "step": 39049 }, { "epoch": 0.8174244327220966, "grad_norm": 0.27446243166923523, "learning_rate": 0.0001655821267540826, "loss": 11.6666, "step": 39050 }, { "epoch": 0.8174453654860587, "grad_norm": 0.3004550337791443, "learning_rate": 0.0001655804715597397, "loss": 11.6745, "step": 39051 }, { "epoch": 0.8174662982500209, "grad_norm": 0.2620658576488495, "learning_rate": 0.00016557881633387096, "loss": 11.6658, "step": 39052 }, { "epoch": 0.8174872310139831, "grad_norm": 0.4055626690387726, "learning_rate": 0.0001655771610764772, "loss": 11.6744, "step": 39053 }, { "epoch": 0.8175081637779452, "grad_norm": 0.3700830340385437, "learning_rate": 0.00016557550578755913, "loss": 11.6758, "step": 39054 }, { "epoch": 0.8175290965419074, "grad_norm": 0.25414419174194336, "learning_rate": 0.00016557385046711765, "loss": 11.6562, "step": 39055 }, { "epoch": 0.8175500293058695, "grad_norm": 0.25796133279800415, "learning_rate": 0.00016557219511515348, "loss": 11.661, "step": 39056 }, { "epoch": 0.8175709620698317, "grad_norm": 0.33531075716018677, "learning_rate": 0.00016557053973166743, "loss": 11.6593, "step": 39057 }, { "epoch": 0.8175918948337939, "grad_norm": 0.2972966730594635, "learning_rate": 0.00016556888431666032, "loss": 11.6715, "step": 39058 }, { "epoch": 0.817612827597756, "grad_norm": 0.27847519516944885, "learning_rate": 0.00016556722887013295, "loss": 11.6756, "step": 39059 }, { "epoch": 0.8176337603617182, "grad_norm": 0.3149246573448181, "learning_rate": 0.00016556557339208604, "loss": 11.6555, "step": 39060 }, { "epoch": 0.8176546931256803, "grad_norm": 0.27817052602767944, "learning_rate": 0.0001655639178825205, "loss": 11.6558, "step": 39061 }, { "epoch": 0.8176756258896425, "grad_norm": 0.2814579904079437, "learning_rate": 0.00016556226234143708, "loss": 11.6755, "step": 39062 }, { "epoch": 0.8176965586536046, "grad_norm": 0.3089553415775299, "learning_rate": 0.00016556060676883654, "loss": 11.6499, "step": 39063 }, { "epoch": 0.8177174914175668, "grad_norm": 0.44582271575927734, "learning_rate": 0.00016555895116471972, "loss": 11.6915, "step": 39064 }, { "epoch": 0.817738424181529, "grad_norm": 0.32264795899391174, "learning_rate": 0.00016555729552908738, "loss": 11.6656, "step": 39065 }, { "epoch": 0.8177593569454911, "grad_norm": 0.3330128788948059, "learning_rate": 0.00016555563986194032, "loss": 11.6852, "step": 39066 }, { "epoch": 0.8177802897094533, "grad_norm": 0.3120129704475403, "learning_rate": 0.0001655539841632794, "loss": 11.6529, "step": 39067 }, { "epoch": 0.8178012224734154, "grad_norm": 0.285454124212265, "learning_rate": 0.0001655523284331053, "loss": 11.6756, "step": 39068 }, { "epoch": 0.8178221552373776, "grad_norm": 0.3029199242591858, "learning_rate": 0.00016555067267141896, "loss": 11.6565, "step": 39069 }, { "epoch": 0.8178430880013396, "grad_norm": 0.24904951453208923, "learning_rate": 0.00016554901687822104, "loss": 11.6686, "step": 39070 }, { "epoch": 0.8178640207653018, "grad_norm": 0.4263720214366913, "learning_rate": 0.0001655473610535124, "loss": 11.6572, "step": 39071 }, { "epoch": 0.817884953529264, "grad_norm": 0.38382217288017273, "learning_rate": 0.00016554570519729386, "loss": 11.6603, "step": 39072 }, { "epoch": 0.8179058862932261, "grad_norm": 0.362662672996521, "learning_rate": 0.00016554404930956614, "loss": 11.6763, "step": 39073 }, { "epoch": 0.8179268190571883, "grad_norm": 0.3866129517555237, "learning_rate": 0.00016554239339033013, "loss": 11.6735, "step": 39074 }, { "epoch": 0.8179477518211504, "grad_norm": 0.32342737913131714, "learning_rate": 0.00016554073743958655, "loss": 11.672, "step": 39075 }, { "epoch": 0.8179686845851126, "grad_norm": 0.37428978085517883, "learning_rate": 0.00016553908145733624, "loss": 11.6681, "step": 39076 }, { "epoch": 0.8179896173490748, "grad_norm": 0.3429654538631439, "learning_rate": 0.00016553742544357997, "loss": 11.675, "step": 39077 }, { "epoch": 0.8180105501130369, "grad_norm": 0.27101975679397583, "learning_rate": 0.00016553576939831851, "loss": 11.6687, "step": 39078 }, { "epoch": 0.8180314828769991, "grad_norm": 0.2992861866950989, "learning_rate": 0.00016553411332155275, "loss": 11.6496, "step": 39079 }, { "epoch": 0.8180524156409612, "grad_norm": 0.4066108167171478, "learning_rate": 0.0001655324572132834, "loss": 11.6793, "step": 39080 }, { "epoch": 0.8180733484049234, "grad_norm": 0.29246577620506287, "learning_rate": 0.0001655308010735113, "loss": 11.6847, "step": 39081 }, { "epoch": 0.8180942811688855, "grad_norm": 0.2446056455373764, "learning_rate": 0.00016552914490223724, "loss": 11.6744, "step": 39082 }, { "epoch": 0.8181152139328477, "grad_norm": 0.24275217950344086, "learning_rate": 0.00016552748869946198, "loss": 11.669, "step": 39083 }, { "epoch": 0.8181361466968099, "grad_norm": 0.33867383003234863, "learning_rate": 0.00016552583246518636, "loss": 11.6641, "step": 39084 }, { "epoch": 0.818157079460772, "grad_norm": 0.32729268074035645, "learning_rate": 0.00016552417619941114, "loss": 11.669, "step": 39085 }, { "epoch": 0.8181780122247342, "grad_norm": 0.3397572338581085, "learning_rate": 0.00016552251990213716, "loss": 11.6821, "step": 39086 }, { "epoch": 0.8181989449886963, "grad_norm": 0.3086735010147095, "learning_rate": 0.00016552086357336516, "loss": 11.6721, "step": 39087 }, { "epoch": 0.8182198777526585, "grad_norm": 0.29518795013427734, "learning_rate": 0.000165519207213096, "loss": 11.6696, "step": 39088 }, { "epoch": 0.8182408105166206, "grad_norm": 0.29277071356773376, "learning_rate": 0.00016551755082133042, "loss": 11.6804, "step": 39089 }, { "epoch": 0.8182617432805828, "grad_norm": 0.34227028489112854, "learning_rate": 0.00016551589439806925, "loss": 11.6671, "step": 39090 }, { "epoch": 0.818282676044545, "grad_norm": 0.27341726422309875, "learning_rate": 0.00016551423794331332, "loss": 11.6643, "step": 39091 }, { "epoch": 0.818303608808507, "grad_norm": 0.3213977515697479, "learning_rate": 0.0001655125814570633, "loss": 11.6751, "step": 39092 }, { "epoch": 0.8183245415724693, "grad_norm": 0.2600082457065582, "learning_rate": 0.00016551092493932013, "loss": 11.6733, "step": 39093 }, { "epoch": 0.8183454743364313, "grad_norm": 0.28774377703666687, "learning_rate": 0.0001655092683900845, "loss": 11.6521, "step": 39094 }, { "epoch": 0.8183664071003935, "grad_norm": 0.3127298951148987, "learning_rate": 0.0001655076118093573, "loss": 11.6883, "step": 39095 }, { "epoch": 0.8183873398643556, "grad_norm": 0.2986105978488922, "learning_rate": 0.00016550595519713928, "loss": 11.6649, "step": 39096 }, { "epoch": 0.8184082726283178, "grad_norm": 0.2631148099899292, "learning_rate": 0.00016550429855343118, "loss": 11.6653, "step": 39097 }, { "epoch": 0.81842920539228, "grad_norm": 0.2594817876815796, "learning_rate": 0.0001655026418782339, "loss": 11.6637, "step": 39098 }, { "epoch": 0.8184501381562421, "grad_norm": 0.3807549476623535, "learning_rate": 0.00016550098517154814, "loss": 11.6409, "step": 39099 }, { "epoch": 0.8184710709202043, "grad_norm": 0.4606989324092865, "learning_rate": 0.00016549932843337478, "loss": 11.6704, "step": 39100 }, { "epoch": 0.8184920036841664, "grad_norm": 0.3790939450263977, "learning_rate": 0.00016549767166371457, "loss": 11.6667, "step": 39101 }, { "epoch": 0.8185129364481286, "grad_norm": 0.33730000257492065, "learning_rate": 0.00016549601486256835, "loss": 11.672, "step": 39102 }, { "epoch": 0.8185338692120908, "grad_norm": 0.3134332299232483, "learning_rate": 0.00016549435802993686, "loss": 11.6789, "step": 39103 }, { "epoch": 0.8185548019760529, "grad_norm": 0.35561060905456543, "learning_rate": 0.0001654927011658209, "loss": 11.6891, "step": 39104 }, { "epoch": 0.8185757347400151, "grad_norm": 0.2711980938911438, "learning_rate": 0.00016549104427022128, "loss": 11.6651, "step": 39105 }, { "epoch": 0.8185966675039772, "grad_norm": 0.29376021027565, "learning_rate": 0.00016548938734313882, "loss": 11.6671, "step": 39106 }, { "epoch": 0.8186176002679394, "grad_norm": 0.3039877116680145, "learning_rate": 0.00016548773038457427, "loss": 11.6528, "step": 39107 }, { "epoch": 0.8186385330319015, "grad_norm": 0.2921238839626312, "learning_rate": 0.00016548607339452853, "loss": 11.6851, "step": 39108 }, { "epoch": 0.8186594657958637, "grad_norm": 0.34260207414627075, "learning_rate": 0.00016548441637300227, "loss": 11.6591, "step": 39109 }, { "epoch": 0.8186803985598259, "grad_norm": 0.29342126846313477, "learning_rate": 0.00016548275931999634, "loss": 11.6778, "step": 39110 }, { "epoch": 0.818701331323788, "grad_norm": 0.2805202901363373, "learning_rate": 0.00016548110223551154, "loss": 11.6686, "step": 39111 }, { "epoch": 0.8187222640877502, "grad_norm": 0.3580070734024048, "learning_rate": 0.00016547944511954867, "loss": 11.6679, "step": 39112 }, { "epoch": 0.8187431968517123, "grad_norm": 0.33270329236984253, "learning_rate": 0.00016547778797210852, "loss": 11.6688, "step": 39113 }, { "epoch": 0.8187641296156745, "grad_norm": 0.39072614908218384, "learning_rate": 0.00016547613079319186, "loss": 11.6844, "step": 39114 }, { "epoch": 0.8187850623796366, "grad_norm": 0.3408176600933075, "learning_rate": 0.00016547447358279953, "loss": 11.676, "step": 39115 }, { "epoch": 0.8188059951435988, "grad_norm": 0.4355420768260956, "learning_rate": 0.0001654728163409323, "loss": 11.6729, "step": 39116 }, { "epoch": 0.818826927907561, "grad_norm": 0.29263582825660706, "learning_rate": 0.00016547115906759098, "loss": 11.6765, "step": 39117 }, { "epoch": 0.818847860671523, "grad_norm": 0.26502108573913574, "learning_rate": 0.00016546950176277639, "loss": 11.6552, "step": 39118 }, { "epoch": 0.8188687934354852, "grad_norm": 0.3612822890281677, "learning_rate": 0.00016546784442648927, "loss": 11.6721, "step": 39119 }, { "epoch": 0.8188897261994473, "grad_norm": 0.3135310113430023, "learning_rate": 0.00016546618705873045, "loss": 11.6712, "step": 39120 }, { "epoch": 0.8189106589634095, "grad_norm": 0.3289758563041687, "learning_rate": 0.0001654645296595007, "loss": 11.6839, "step": 39121 }, { "epoch": 0.8189315917273717, "grad_norm": 0.3163582980632782, "learning_rate": 0.00016546287222880086, "loss": 11.6592, "step": 39122 }, { "epoch": 0.8189525244913338, "grad_norm": 0.4485832750797272, "learning_rate": 0.00016546121476663172, "loss": 11.6784, "step": 39123 }, { "epoch": 0.818973457255296, "grad_norm": 0.3401772081851959, "learning_rate": 0.00016545955727299404, "loss": 11.6632, "step": 39124 }, { "epoch": 0.8189943900192581, "grad_norm": 0.39137446880340576, "learning_rate": 0.00016545789974788865, "loss": 11.6588, "step": 39125 }, { "epoch": 0.8190153227832203, "grad_norm": 0.3808474540710449, "learning_rate": 0.00016545624219131631, "loss": 11.6672, "step": 39126 }, { "epoch": 0.8190362555471824, "grad_norm": 0.3088573217391968, "learning_rate": 0.00016545458460327786, "loss": 11.6726, "step": 39127 }, { "epoch": 0.8190571883111446, "grad_norm": 0.29282572865486145, "learning_rate": 0.00016545292698377407, "loss": 11.6852, "step": 39128 }, { "epoch": 0.8190781210751068, "grad_norm": 0.3216685354709625, "learning_rate": 0.00016545126933280576, "loss": 11.6869, "step": 39129 }, { "epoch": 0.8190990538390689, "grad_norm": 0.32410916686058044, "learning_rate": 0.00016544961165037374, "loss": 11.6455, "step": 39130 }, { "epoch": 0.8191199866030311, "grad_norm": 0.31303852796554565, "learning_rate": 0.00016544795393647874, "loss": 11.6649, "step": 39131 }, { "epoch": 0.8191409193669932, "grad_norm": 0.4218817353248596, "learning_rate": 0.00016544629619112163, "loss": 11.6675, "step": 39132 }, { "epoch": 0.8191618521309554, "grad_norm": 0.33231881260871887, "learning_rate": 0.00016544463841430314, "loss": 11.6794, "step": 39133 }, { "epoch": 0.8191827848949175, "grad_norm": 0.33765754103660583, "learning_rate": 0.00016544298060602414, "loss": 11.65, "step": 39134 }, { "epoch": 0.8192037176588797, "grad_norm": 0.32715538144111633, "learning_rate": 0.00016544132276628536, "loss": 11.6714, "step": 39135 }, { "epoch": 0.8192246504228419, "grad_norm": 0.3516748547554016, "learning_rate": 0.0001654396648950876, "loss": 11.6701, "step": 39136 }, { "epoch": 0.819245583186804, "grad_norm": 0.39960435032844543, "learning_rate": 0.0001654380069924317, "loss": 11.6632, "step": 39137 }, { "epoch": 0.8192665159507662, "grad_norm": 0.3378969728946686, "learning_rate": 0.00016543634905831847, "loss": 11.6731, "step": 39138 }, { "epoch": 0.8192874487147283, "grad_norm": 0.2222139835357666, "learning_rate": 0.00016543469109274862, "loss": 11.6778, "step": 39139 }, { "epoch": 0.8193083814786905, "grad_norm": 0.36839988827705383, "learning_rate": 0.00016543303309572306, "loss": 11.6832, "step": 39140 }, { "epoch": 0.8193293142426527, "grad_norm": 0.3599088788032532, "learning_rate": 0.0001654313750672425, "loss": 11.6583, "step": 39141 }, { "epoch": 0.8193502470066147, "grad_norm": 0.23762363195419312, "learning_rate": 0.00016542971700730775, "loss": 11.6668, "step": 39142 }, { "epoch": 0.8193711797705769, "grad_norm": 0.399759441614151, "learning_rate": 0.00016542805891591966, "loss": 11.6664, "step": 39143 }, { "epoch": 0.819392112534539, "grad_norm": 0.3388098180294037, "learning_rate": 0.000165426400793079, "loss": 11.6687, "step": 39144 }, { "epoch": 0.8194130452985012, "grad_norm": 0.3139989972114563, "learning_rate": 0.00016542474263878655, "loss": 11.673, "step": 39145 }, { "epoch": 0.8194339780624633, "grad_norm": 0.34512820839881897, "learning_rate": 0.00016542308445304307, "loss": 11.6638, "step": 39146 }, { "epoch": 0.8194549108264255, "grad_norm": 0.40162235498428345, "learning_rate": 0.00016542142623584945, "loss": 11.6654, "step": 39147 }, { "epoch": 0.8194758435903877, "grad_norm": 0.3050730526447296, "learning_rate": 0.0001654197679872064, "loss": 11.6668, "step": 39148 }, { "epoch": 0.8194967763543498, "grad_norm": 0.3230202794075012, "learning_rate": 0.00016541810970711475, "loss": 11.6737, "step": 39149 }, { "epoch": 0.819517709118312, "grad_norm": 0.3221115171909332, "learning_rate": 0.00016541645139557537, "loss": 11.6544, "step": 39150 }, { "epoch": 0.8195386418822741, "grad_norm": 0.31810587644577026, "learning_rate": 0.00016541479305258894, "loss": 11.6739, "step": 39151 }, { "epoch": 0.8195595746462363, "grad_norm": 0.34133443236351013, "learning_rate": 0.00016541313467815632, "loss": 11.6887, "step": 39152 }, { "epoch": 0.8195805074101984, "grad_norm": 0.3835006654262543, "learning_rate": 0.0001654114762722783, "loss": 11.6673, "step": 39153 }, { "epoch": 0.8196014401741606, "grad_norm": 0.3461964726448059, "learning_rate": 0.00016540981783495564, "loss": 11.6734, "step": 39154 }, { "epoch": 0.8196223729381228, "grad_norm": 0.3052826523780823, "learning_rate": 0.00016540815936618922, "loss": 11.6568, "step": 39155 }, { "epoch": 0.8196433057020849, "grad_norm": 0.28624656796455383, "learning_rate": 0.00016540650086597976, "loss": 11.6774, "step": 39156 }, { "epoch": 0.8196642384660471, "grad_norm": 0.2817332446575165, "learning_rate": 0.0001654048423343281, "loss": 11.6801, "step": 39157 }, { "epoch": 0.8196851712300092, "grad_norm": 0.33475834131240845, "learning_rate": 0.000165403183771235, "loss": 11.664, "step": 39158 }, { "epoch": 0.8197061039939714, "grad_norm": 0.4620792269706726, "learning_rate": 0.00016540152517670128, "loss": 11.6737, "step": 39159 }, { "epoch": 0.8197270367579336, "grad_norm": 0.3057491183280945, "learning_rate": 0.00016539986655072773, "loss": 11.6793, "step": 39160 }, { "epoch": 0.8197479695218957, "grad_norm": 0.38931700587272644, "learning_rate": 0.0001653982078933152, "loss": 11.668, "step": 39161 }, { "epoch": 0.8197689022858579, "grad_norm": 0.4311157166957855, "learning_rate": 0.0001653965492044644, "loss": 11.673, "step": 39162 }, { "epoch": 0.81978983504982, "grad_norm": 0.3169238269329071, "learning_rate": 0.00016539489048417617, "loss": 11.6708, "step": 39163 }, { "epoch": 0.8198107678137821, "grad_norm": 0.3397432267665863, "learning_rate": 0.00016539323173245132, "loss": 11.6896, "step": 39164 }, { "epoch": 0.8198317005777442, "grad_norm": 0.27660924196243286, "learning_rate": 0.0001653915729492906, "loss": 11.6748, "step": 39165 }, { "epoch": 0.8198526333417064, "grad_norm": 1.087200403213501, "learning_rate": 0.00016538991413469489, "loss": 11.666, "step": 39166 }, { "epoch": 0.8198735661056686, "grad_norm": 0.32687845826148987, "learning_rate": 0.00016538825528866492, "loss": 11.678, "step": 39167 }, { "epoch": 0.8198944988696307, "grad_norm": 0.28258922696113586, "learning_rate": 0.0001653865964112015, "loss": 11.6635, "step": 39168 }, { "epoch": 0.8199154316335929, "grad_norm": 0.3347833454608917, "learning_rate": 0.00016538493750230544, "loss": 11.6937, "step": 39169 }, { "epoch": 0.819936364397555, "grad_norm": 0.2526896595954895, "learning_rate": 0.0001653832785619775, "loss": 11.669, "step": 39170 }, { "epoch": 0.8199572971615172, "grad_norm": 0.2754112482070923, "learning_rate": 0.00016538161959021855, "loss": 11.6739, "step": 39171 }, { "epoch": 0.8199782299254793, "grad_norm": 0.27681615948677063, "learning_rate": 0.00016537996058702933, "loss": 11.6896, "step": 39172 }, { "epoch": 0.8199991626894415, "grad_norm": 0.24571657180786133, "learning_rate": 0.00016537830155241063, "loss": 11.6612, "step": 39173 }, { "epoch": 0.8200200954534037, "grad_norm": 0.28113654255867004, "learning_rate": 0.0001653766424863633, "loss": 11.6582, "step": 39174 }, { "epoch": 0.8200410282173658, "grad_norm": 0.3270702064037323, "learning_rate": 0.0001653749833888881, "loss": 11.6658, "step": 39175 }, { "epoch": 0.820061960981328, "grad_norm": 0.3146873414516449, "learning_rate": 0.00016537332425998582, "loss": 11.68, "step": 39176 }, { "epoch": 0.8200828937452901, "grad_norm": 0.3141399621963501, "learning_rate": 0.0001653716650996573, "loss": 11.6875, "step": 39177 }, { "epoch": 0.8201038265092523, "grad_norm": 0.29241102933883667, "learning_rate": 0.0001653700059079033, "loss": 11.6598, "step": 39178 }, { "epoch": 0.8201247592732145, "grad_norm": 0.2677963376045227, "learning_rate": 0.00016536834668472462, "loss": 11.6691, "step": 39179 }, { "epoch": 0.8201456920371766, "grad_norm": 0.3254697322845459, "learning_rate": 0.00016536668743012206, "loss": 11.6844, "step": 39180 }, { "epoch": 0.8201666248011388, "grad_norm": 0.30021002888679504, "learning_rate": 0.00016536502814409643, "loss": 11.6739, "step": 39181 }, { "epoch": 0.8201875575651009, "grad_norm": 0.26140663027763367, "learning_rate": 0.00016536336882664853, "loss": 11.6621, "step": 39182 }, { "epoch": 0.8202084903290631, "grad_norm": 0.3135533928871155, "learning_rate": 0.00016536170947777914, "loss": 11.676, "step": 39183 }, { "epoch": 0.8202294230930252, "grad_norm": 0.3626488447189331, "learning_rate": 0.00016536005009748907, "loss": 11.6664, "step": 39184 }, { "epoch": 0.8202503558569874, "grad_norm": 0.3243216276168823, "learning_rate": 0.0001653583906857791, "loss": 11.6857, "step": 39185 }, { "epoch": 0.8202712886209496, "grad_norm": 0.25279614329338074, "learning_rate": 0.00016535673124265008, "loss": 11.6753, "step": 39186 }, { "epoch": 0.8202922213849116, "grad_norm": 0.3800225555896759, "learning_rate": 0.00016535507176810272, "loss": 11.6776, "step": 39187 }, { "epoch": 0.8203131541488738, "grad_norm": 0.3054516017436981, "learning_rate": 0.0001653534122621379, "loss": 11.6735, "step": 39188 }, { "epoch": 0.8203340869128359, "grad_norm": 0.257647842168808, "learning_rate": 0.00016535175272475638, "loss": 11.6694, "step": 39189 }, { "epoch": 0.8203550196767981, "grad_norm": 0.2699582874774933, "learning_rate": 0.00016535009315595895, "loss": 11.6817, "step": 39190 }, { "epoch": 0.8203759524407602, "grad_norm": 0.2751602232456207, "learning_rate": 0.00016534843355574643, "loss": 11.6481, "step": 39191 }, { "epoch": 0.8203968852047224, "grad_norm": 0.32878369092941284, "learning_rate": 0.0001653467739241196, "loss": 11.6637, "step": 39192 }, { "epoch": 0.8204178179686846, "grad_norm": 0.3132472038269043, "learning_rate": 0.0001653451142610793, "loss": 11.6569, "step": 39193 }, { "epoch": 0.8204387507326467, "grad_norm": 0.34368184208869934, "learning_rate": 0.00016534345456662626, "loss": 11.671, "step": 39194 }, { "epoch": 0.8204596834966089, "grad_norm": 0.3067016005516052, "learning_rate": 0.0001653417948407613, "loss": 11.6805, "step": 39195 }, { "epoch": 0.820480616260571, "grad_norm": 0.29111436009407043, "learning_rate": 0.00016534013508348526, "loss": 11.6534, "step": 39196 }, { "epoch": 0.8205015490245332, "grad_norm": 0.2839582562446594, "learning_rate": 0.0001653384752947989, "loss": 11.6791, "step": 39197 }, { "epoch": 0.8205224817884954, "grad_norm": 0.2460659146308899, "learning_rate": 0.00016533681547470304, "loss": 11.6537, "step": 39198 }, { "epoch": 0.8205434145524575, "grad_norm": 0.325499564409256, "learning_rate": 0.00016533515562319843, "loss": 11.6752, "step": 39199 }, { "epoch": 0.8205643473164197, "grad_norm": 0.2679901421070099, "learning_rate": 0.0001653334957402859, "loss": 11.663, "step": 39200 }, { "epoch": 0.8205852800803818, "grad_norm": 0.3407355844974518, "learning_rate": 0.0001653318358259663, "loss": 11.6772, "step": 39201 }, { "epoch": 0.820606212844344, "grad_norm": 0.35116755962371826, "learning_rate": 0.00016533017588024032, "loss": 11.6641, "step": 39202 }, { "epoch": 0.8206271456083061, "grad_norm": 0.4162808060646057, "learning_rate": 0.00016532851590310885, "loss": 11.6768, "step": 39203 }, { "epoch": 0.8206480783722683, "grad_norm": 0.31051862239837646, "learning_rate": 0.00016532685589457265, "loss": 11.662, "step": 39204 }, { "epoch": 0.8206690111362305, "grad_norm": 0.268611878156662, "learning_rate": 0.0001653251958546325, "loss": 11.6581, "step": 39205 }, { "epoch": 0.8206899439001926, "grad_norm": 0.2281152456998825, "learning_rate": 0.00016532353578328925, "loss": 11.678, "step": 39206 }, { "epoch": 0.8207108766641548, "grad_norm": 0.2663955092430115, "learning_rate": 0.00016532187568054366, "loss": 11.6776, "step": 39207 }, { "epoch": 0.8207318094281169, "grad_norm": 0.3506512939929962, "learning_rate": 0.0001653202155463965, "loss": 11.6792, "step": 39208 }, { "epoch": 0.820752742192079, "grad_norm": 0.3676694929599762, "learning_rate": 0.00016531855538084864, "loss": 11.6636, "step": 39209 }, { "epoch": 0.8207736749560411, "grad_norm": 0.2499806135892868, "learning_rate": 0.00016531689518390082, "loss": 11.6591, "step": 39210 }, { "epoch": 0.8207946077200033, "grad_norm": 0.3287551999092102, "learning_rate": 0.00016531523495555387, "loss": 11.6588, "step": 39211 }, { "epoch": 0.8208155404839655, "grad_norm": 0.31962087750434875, "learning_rate": 0.0001653135746958086, "loss": 11.6772, "step": 39212 }, { "epoch": 0.8208364732479276, "grad_norm": 0.4043225944042206, "learning_rate": 0.00016531191440466575, "loss": 11.6819, "step": 39213 }, { "epoch": 0.8208574060118898, "grad_norm": 0.29931747913360596, "learning_rate": 0.00016531025408212616, "loss": 11.6808, "step": 39214 }, { "epoch": 0.8208783387758519, "grad_norm": 0.39414873719215393, "learning_rate": 0.00016530859372819063, "loss": 11.6668, "step": 39215 }, { "epoch": 0.8208992715398141, "grad_norm": 0.3006879687309265, "learning_rate": 0.00016530693334285997, "loss": 11.6795, "step": 39216 }, { "epoch": 0.8209202043037763, "grad_norm": 0.338137686252594, "learning_rate": 0.00016530527292613493, "loss": 11.6657, "step": 39217 }, { "epoch": 0.8209411370677384, "grad_norm": 0.3244530260562897, "learning_rate": 0.00016530361247801633, "loss": 11.6705, "step": 39218 }, { "epoch": 0.8209620698317006, "grad_norm": 0.3497249484062195, "learning_rate": 0.000165301951998505, "loss": 11.6931, "step": 39219 }, { "epoch": 0.8209830025956627, "grad_norm": 0.2670833468437195, "learning_rate": 0.00016530029148760172, "loss": 11.6663, "step": 39220 }, { "epoch": 0.8210039353596249, "grad_norm": 0.35337838530540466, "learning_rate": 0.00016529863094530723, "loss": 11.6669, "step": 39221 }, { "epoch": 0.821024868123587, "grad_norm": 0.28337347507476807, "learning_rate": 0.0001652969703716224, "loss": 11.6651, "step": 39222 }, { "epoch": 0.8210458008875492, "grad_norm": 0.2977236807346344, "learning_rate": 0.00016529530976654806, "loss": 11.6572, "step": 39223 }, { "epoch": 0.8210667336515114, "grad_norm": 0.30316606163978577, "learning_rate": 0.0001652936491300849, "loss": 11.6546, "step": 39224 }, { "epoch": 0.8210876664154735, "grad_norm": 0.2923673987388611, "learning_rate": 0.0001652919884622338, "loss": 11.67, "step": 39225 }, { "epoch": 0.8211085991794357, "grad_norm": 0.2680121660232544, "learning_rate": 0.00016529032776299552, "loss": 11.6595, "step": 39226 }, { "epoch": 0.8211295319433978, "grad_norm": 0.29667580127716064, "learning_rate": 0.00016528866703237086, "loss": 11.6652, "step": 39227 }, { "epoch": 0.82115046470736, "grad_norm": 0.3098072111606598, "learning_rate": 0.00016528700627036066, "loss": 11.6634, "step": 39228 }, { "epoch": 0.8211713974713221, "grad_norm": 0.29039236903190613, "learning_rate": 0.00016528534547696565, "loss": 11.668, "step": 39229 }, { "epoch": 0.8211923302352843, "grad_norm": 0.32166263461112976, "learning_rate": 0.0001652836846521867, "loss": 11.6816, "step": 39230 }, { "epoch": 0.8212132629992465, "grad_norm": 0.34931430220603943, "learning_rate": 0.00016528202379602458, "loss": 11.6532, "step": 39231 }, { "epoch": 0.8212341957632086, "grad_norm": 0.30273252725601196, "learning_rate": 0.00016528036290848006, "loss": 11.6601, "step": 39232 }, { "epoch": 0.8212551285271708, "grad_norm": 0.40212908387184143, "learning_rate": 0.00016527870198955398, "loss": 11.6604, "step": 39233 }, { "epoch": 0.8212760612911328, "grad_norm": 0.31395891308784485, "learning_rate": 0.0001652770410392471, "loss": 11.6578, "step": 39234 }, { "epoch": 0.821296994055095, "grad_norm": 0.32171374559402466, "learning_rate": 0.00016527538005756026, "loss": 11.6728, "step": 39235 }, { "epoch": 0.8213179268190572, "grad_norm": 0.33385440707206726, "learning_rate": 0.0001652737190444942, "loss": 11.6821, "step": 39236 }, { "epoch": 0.8213388595830193, "grad_norm": 0.2800716757774353, "learning_rate": 0.00016527205800004978, "loss": 11.6669, "step": 39237 }, { "epoch": 0.8213597923469815, "grad_norm": 0.3098412752151489, "learning_rate": 0.00016527039692422777, "loss": 11.6718, "step": 39238 }, { "epoch": 0.8213807251109436, "grad_norm": 0.359215646982193, "learning_rate": 0.00016526873581702896, "loss": 11.6637, "step": 39239 }, { "epoch": 0.8214016578749058, "grad_norm": 0.2724643051624298, "learning_rate": 0.0001652670746784542, "loss": 11.6673, "step": 39240 }, { "epoch": 0.8214225906388679, "grad_norm": 0.2861449420452118, "learning_rate": 0.0001652654135085042, "loss": 11.6788, "step": 39241 }, { "epoch": 0.8214435234028301, "grad_norm": 0.29199615120887756, "learning_rate": 0.00016526375230717984, "loss": 11.683, "step": 39242 }, { "epoch": 0.8214644561667923, "grad_norm": 0.3618820607662201, "learning_rate": 0.00016526209107448186, "loss": 11.6639, "step": 39243 }, { "epoch": 0.8214853889307544, "grad_norm": 0.3511008620262146, "learning_rate": 0.0001652604298104111, "loss": 11.6806, "step": 39244 }, { "epoch": 0.8215063216947166, "grad_norm": 0.30722346901893616, "learning_rate": 0.00016525876851496837, "loss": 11.664, "step": 39245 }, { "epoch": 0.8215272544586787, "grad_norm": 0.36189553141593933, "learning_rate": 0.0001652571071881544, "loss": 11.6762, "step": 39246 }, { "epoch": 0.8215481872226409, "grad_norm": 0.32773080468177795, "learning_rate": 0.00016525544582997006, "loss": 11.6892, "step": 39247 }, { "epoch": 0.821569119986603, "grad_norm": 0.2998257875442505, "learning_rate": 0.0001652537844404161, "loss": 11.6537, "step": 39248 }, { "epoch": 0.8215900527505652, "grad_norm": 0.34099280834198, "learning_rate": 0.00016525212301949336, "loss": 11.6672, "step": 39249 }, { "epoch": 0.8216109855145274, "grad_norm": 0.3129735589027405, "learning_rate": 0.0001652504615672026, "loss": 11.684, "step": 39250 }, { "epoch": 0.8216319182784895, "grad_norm": 0.33271676301956177, "learning_rate": 0.00016524880008354464, "loss": 11.6563, "step": 39251 }, { "epoch": 0.8216528510424517, "grad_norm": 0.25536513328552246, "learning_rate": 0.00016524713856852028, "loss": 11.6688, "step": 39252 }, { "epoch": 0.8216737838064138, "grad_norm": 0.28100594878196716, "learning_rate": 0.0001652454770221303, "loss": 11.6787, "step": 39253 }, { "epoch": 0.821694716570376, "grad_norm": 0.3499321937561035, "learning_rate": 0.0001652438154443755, "loss": 11.6679, "step": 39254 }, { "epoch": 0.8217156493343382, "grad_norm": 0.32099881768226624, "learning_rate": 0.00016524215383525672, "loss": 11.6675, "step": 39255 }, { "epoch": 0.8217365820983003, "grad_norm": 0.3968994915485382, "learning_rate": 0.00016524049219477472, "loss": 11.6726, "step": 39256 }, { "epoch": 0.8217575148622625, "grad_norm": 0.2774965465068817, "learning_rate": 0.0001652388305229303, "loss": 11.6679, "step": 39257 }, { "epoch": 0.8217784476262245, "grad_norm": 0.36968570947647095, "learning_rate": 0.00016523716881972428, "loss": 11.669, "step": 39258 }, { "epoch": 0.8217993803901867, "grad_norm": 0.2732837200164795, "learning_rate": 0.0001652355070851574, "loss": 11.663, "step": 39259 }, { "epoch": 0.8218203131541488, "grad_norm": 0.2995823323726654, "learning_rate": 0.00016523384531923056, "loss": 11.6684, "step": 39260 }, { "epoch": 0.821841245918111, "grad_norm": 0.3572641611099243, "learning_rate": 0.0001652321835219445, "loss": 11.682, "step": 39261 }, { "epoch": 0.8218621786820732, "grad_norm": 0.2678690254688263, "learning_rate": 0.0001652305216933, "loss": 11.6786, "step": 39262 }, { "epoch": 0.8218831114460353, "grad_norm": 0.295813649892807, "learning_rate": 0.0001652288598332979, "loss": 11.652, "step": 39263 }, { "epoch": 0.8219040442099975, "grad_norm": 0.3010903000831604, "learning_rate": 0.00016522719794193896, "loss": 11.671, "step": 39264 }, { "epoch": 0.8219249769739596, "grad_norm": 0.37805479764938354, "learning_rate": 0.00016522553601922398, "loss": 11.6532, "step": 39265 }, { "epoch": 0.8219459097379218, "grad_norm": 0.3039786219596863, "learning_rate": 0.0001652238740651538, "loss": 11.6758, "step": 39266 }, { "epoch": 0.8219668425018839, "grad_norm": 0.3107636272907257, "learning_rate": 0.00016522221207972924, "loss": 11.6579, "step": 39267 }, { "epoch": 0.8219877752658461, "grad_norm": 0.28863200545310974, "learning_rate": 0.000165220550062951, "loss": 11.6617, "step": 39268 }, { "epoch": 0.8220087080298083, "grad_norm": 0.29701390862464905, "learning_rate": 0.00016521888801481995, "loss": 11.6707, "step": 39269 }, { "epoch": 0.8220296407937704, "grad_norm": 0.33491840958595276, "learning_rate": 0.00016521722593533687, "loss": 11.6562, "step": 39270 }, { "epoch": 0.8220505735577326, "grad_norm": 0.30771496891975403, "learning_rate": 0.00016521556382450257, "loss": 11.6723, "step": 39271 }, { "epoch": 0.8220715063216947, "grad_norm": 0.3607608675956726, "learning_rate": 0.00016521390168231786, "loss": 11.6743, "step": 39272 }, { "epoch": 0.8220924390856569, "grad_norm": 0.32211577892303467, "learning_rate": 0.0001652122395087835, "loss": 11.6637, "step": 39273 }, { "epoch": 0.8221133718496191, "grad_norm": 0.24733519554138184, "learning_rate": 0.0001652105773039003, "loss": 11.6797, "step": 39274 }, { "epoch": 0.8221343046135812, "grad_norm": 0.3051501214504242, "learning_rate": 0.00016520891506766904, "loss": 11.6977, "step": 39275 }, { "epoch": 0.8221552373775434, "grad_norm": 0.26350849866867065, "learning_rate": 0.0001652072528000906, "loss": 11.6918, "step": 39276 }, { "epoch": 0.8221761701415055, "grad_norm": 0.3681040406227112, "learning_rate": 0.0001652055905011657, "loss": 11.6668, "step": 39277 }, { "epoch": 0.8221971029054677, "grad_norm": 0.2785869240760803, "learning_rate": 0.00016520392817089517, "loss": 11.6633, "step": 39278 }, { "epoch": 0.8222180356694297, "grad_norm": 0.3053569197654724, "learning_rate": 0.00016520226580927984, "loss": 11.6848, "step": 39279 }, { "epoch": 0.822238968433392, "grad_norm": 0.295844703912735, "learning_rate": 0.00016520060341632044, "loss": 11.6512, "step": 39280 }, { "epoch": 0.8222599011973541, "grad_norm": 0.3337985873222351, "learning_rate": 0.0001651989409920178, "loss": 11.6617, "step": 39281 }, { "epoch": 0.8222808339613162, "grad_norm": 0.26640623807907104, "learning_rate": 0.00016519727853637274, "loss": 11.6863, "step": 39282 }, { "epoch": 0.8223017667252784, "grad_norm": 0.3221205770969391, "learning_rate": 0.00016519561604938602, "loss": 11.6733, "step": 39283 }, { "epoch": 0.8223226994892405, "grad_norm": 0.2814594507217407, "learning_rate": 0.00016519395353105848, "loss": 11.6543, "step": 39284 }, { "epoch": 0.8223436322532027, "grad_norm": 0.2847398817539215, "learning_rate": 0.0001651922909813909, "loss": 11.6434, "step": 39285 }, { "epoch": 0.8223645650171648, "grad_norm": 0.35236042737960815, "learning_rate": 0.0001651906284003841, "loss": 11.6665, "step": 39286 }, { "epoch": 0.822385497781127, "grad_norm": 0.2900153696537018, "learning_rate": 0.00016518896578803882, "loss": 11.6629, "step": 39287 }, { "epoch": 0.8224064305450892, "grad_norm": 0.3094833493232727, "learning_rate": 0.0001651873031443559, "loss": 11.6571, "step": 39288 }, { "epoch": 0.8224273633090513, "grad_norm": 0.2838604748249054, "learning_rate": 0.00016518564046933616, "loss": 11.6598, "step": 39289 }, { "epoch": 0.8224482960730135, "grad_norm": 0.3645072877407074, "learning_rate": 0.00016518397776298037, "loss": 11.6712, "step": 39290 }, { "epoch": 0.8224692288369756, "grad_norm": 0.33987629413604736, "learning_rate": 0.00016518231502528932, "loss": 11.6623, "step": 39291 }, { "epoch": 0.8224901616009378, "grad_norm": 0.3243900537490845, "learning_rate": 0.00016518065225626383, "loss": 11.6753, "step": 39292 }, { "epoch": 0.8225110943648999, "grad_norm": 0.3103408217430115, "learning_rate": 0.0001651789894559047, "loss": 11.6727, "step": 39293 }, { "epoch": 0.8225320271288621, "grad_norm": 0.3265204131603241, "learning_rate": 0.0001651773266242127, "loss": 11.6846, "step": 39294 }, { "epoch": 0.8225529598928243, "grad_norm": 0.2878590524196625, "learning_rate": 0.00016517566376118866, "loss": 11.683, "step": 39295 }, { "epoch": 0.8225738926567864, "grad_norm": 0.454107403755188, "learning_rate": 0.00016517400086683343, "loss": 11.6884, "step": 39296 }, { "epoch": 0.8225948254207486, "grad_norm": 0.3214527368545532, "learning_rate": 0.0001651723379411477, "loss": 11.6733, "step": 39297 }, { "epoch": 0.8226157581847107, "grad_norm": 0.29556697607040405, "learning_rate": 0.00016517067498413233, "loss": 11.6549, "step": 39298 }, { "epoch": 0.8226366909486729, "grad_norm": 0.31181758642196655, "learning_rate": 0.0001651690119957881, "loss": 11.6834, "step": 39299 }, { "epoch": 0.8226576237126351, "grad_norm": 0.4767076373100281, "learning_rate": 0.00016516734897611583, "loss": 11.654, "step": 39300 }, { "epoch": 0.8226785564765972, "grad_norm": 0.27436745166778564, "learning_rate": 0.00016516568592511632, "loss": 11.657, "step": 39301 }, { "epoch": 0.8226994892405594, "grad_norm": 0.2777934968471527, "learning_rate": 0.00016516402284279033, "loss": 11.6758, "step": 39302 }, { "epoch": 0.8227204220045214, "grad_norm": 0.2847859561443329, "learning_rate": 0.0001651623597291387, "loss": 11.67, "step": 39303 }, { "epoch": 0.8227413547684836, "grad_norm": 0.3026444613933563, "learning_rate": 0.00016516069658416226, "loss": 11.6895, "step": 39304 }, { "epoch": 0.8227622875324457, "grad_norm": 0.29609283804893494, "learning_rate": 0.0001651590334078617, "loss": 11.6769, "step": 39305 }, { "epoch": 0.8227832202964079, "grad_norm": 0.31215915083885193, "learning_rate": 0.00016515737020023793, "loss": 11.6628, "step": 39306 }, { "epoch": 0.8228041530603701, "grad_norm": 0.31174132227897644, "learning_rate": 0.0001651557069612917, "loss": 11.6622, "step": 39307 }, { "epoch": 0.8228250858243322, "grad_norm": 0.3020670711994171, "learning_rate": 0.00016515404369102382, "loss": 11.6783, "step": 39308 }, { "epoch": 0.8228460185882944, "grad_norm": 0.28997063636779785, "learning_rate": 0.00016515238038943507, "loss": 11.6731, "step": 39309 }, { "epoch": 0.8228669513522565, "grad_norm": 0.2728862166404724, "learning_rate": 0.00016515071705652625, "loss": 11.6768, "step": 39310 }, { "epoch": 0.8228878841162187, "grad_norm": 0.3241429030895233, "learning_rate": 0.0001651490536922982, "loss": 11.6759, "step": 39311 }, { "epoch": 0.8229088168801808, "grad_norm": 0.31559574604034424, "learning_rate": 0.00016514739029675171, "loss": 11.6863, "step": 39312 }, { "epoch": 0.822929749644143, "grad_norm": 0.33870333433151245, "learning_rate": 0.00016514572686988753, "loss": 11.6862, "step": 39313 }, { "epoch": 0.8229506824081052, "grad_norm": 0.39491087198257446, "learning_rate": 0.0001651440634117065, "loss": 11.6687, "step": 39314 }, { "epoch": 0.8229716151720673, "grad_norm": 0.35277992486953735, "learning_rate": 0.00016514239992220943, "loss": 11.6648, "step": 39315 }, { "epoch": 0.8229925479360295, "grad_norm": 0.24985826015472412, "learning_rate": 0.00016514073640139712, "loss": 11.6717, "step": 39316 }, { "epoch": 0.8230134806999916, "grad_norm": 0.31568098068237305, "learning_rate": 0.0001651390728492703, "loss": 11.664, "step": 39317 }, { "epoch": 0.8230344134639538, "grad_norm": 0.4387149214744568, "learning_rate": 0.00016513740926582985, "loss": 11.6895, "step": 39318 }, { "epoch": 0.823055346227916, "grad_norm": 0.35922905802726746, "learning_rate": 0.00016513574565107654, "loss": 11.6714, "step": 39319 }, { "epoch": 0.8230762789918781, "grad_norm": 0.326629638671875, "learning_rate": 0.00016513408200501117, "loss": 11.6787, "step": 39320 }, { "epoch": 0.8230972117558403, "grad_norm": 0.2768756151199341, "learning_rate": 0.00016513241832763454, "loss": 11.6893, "step": 39321 }, { "epoch": 0.8231181445198024, "grad_norm": 0.3307337760925293, "learning_rate": 0.00016513075461894747, "loss": 11.6622, "step": 39322 }, { "epoch": 0.8231390772837646, "grad_norm": 0.3240987956523895, "learning_rate": 0.0001651290908789507, "loss": 11.6905, "step": 39323 }, { "epoch": 0.8231600100477267, "grad_norm": 0.2793816924095154, "learning_rate": 0.00016512742710764512, "loss": 11.6498, "step": 39324 }, { "epoch": 0.8231809428116889, "grad_norm": 0.28260689973831177, "learning_rate": 0.00016512576330503147, "loss": 11.681, "step": 39325 }, { "epoch": 0.823201875575651, "grad_norm": 0.24172669649124146, "learning_rate": 0.00016512409947111055, "loss": 11.6701, "step": 39326 }, { "epoch": 0.8232228083396131, "grad_norm": 0.40764516592025757, "learning_rate": 0.00016512243560588315, "loss": 11.6591, "step": 39327 }, { "epoch": 0.8232437411035753, "grad_norm": 0.3146563172340393, "learning_rate": 0.00016512077170935011, "loss": 11.6697, "step": 39328 }, { "epoch": 0.8232646738675374, "grad_norm": 0.36116278171539307, "learning_rate": 0.00016511910778151223, "loss": 11.6645, "step": 39329 }, { "epoch": 0.8232856066314996, "grad_norm": 0.29864200949668884, "learning_rate": 0.00016511744382237024, "loss": 11.6662, "step": 39330 }, { "epoch": 0.8233065393954617, "grad_norm": 0.34536585211753845, "learning_rate": 0.00016511577983192502, "loss": 11.6716, "step": 39331 }, { "epoch": 0.8233274721594239, "grad_norm": 0.32712477445602417, "learning_rate": 0.00016511411581017734, "loss": 11.6706, "step": 39332 }, { "epoch": 0.8233484049233861, "grad_norm": 0.26405084133148193, "learning_rate": 0.00016511245175712802, "loss": 11.6654, "step": 39333 }, { "epoch": 0.8233693376873482, "grad_norm": 0.3874315321445465, "learning_rate": 0.0001651107876727778, "loss": 11.6681, "step": 39334 }, { "epoch": 0.8233902704513104, "grad_norm": 0.3827393651008606, "learning_rate": 0.00016510912355712756, "loss": 11.6735, "step": 39335 }, { "epoch": 0.8234112032152725, "grad_norm": 0.3028516471385956, "learning_rate": 0.00016510745941017803, "loss": 11.6781, "step": 39336 }, { "epoch": 0.8234321359792347, "grad_norm": 0.3972070813179016, "learning_rate": 0.00016510579523193003, "loss": 11.6719, "step": 39337 }, { "epoch": 0.8234530687431969, "grad_norm": 0.35056355595588684, "learning_rate": 0.00016510413102238439, "loss": 11.667, "step": 39338 }, { "epoch": 0.823474001507159, "grad_norm": 0.336666077375412, "learning_rate": 0.0001651024667815419, "loss": 11.677, "step": 39339 }, { "epoch": 0.8234949342711212, "grad_norm": 0.2908214032649994, "learning_rate": 0.0001651008025094033, "loss": 11.6656, "step": 39340 }, { "epoch": 0.8235158670350833, "grad_norm": 0.3123135268688202, "learning_rate": 0.0001650991382059695, "loss": 11.6712, "step": 39341 }, { "epoch": 0.8235367997990455, "grad_norm": 0.3086979389190674, "learning_rate": 0.0001650974738712412, "loss": 11.6821, "step": 39342 }, { "epoch": 0.8235577325630076, "grad_norm": 0.2819243371486664, "learning_rate": 0.00016509580950521926, "loss": 11.6767, "step": 39343 }, { "epoch": 0.8235786653269698, "grad_norm": 0.298309326171875, "learning_rate": 0.00016509414510790444, "loss": 11.6664, "step": 39344 }, { "epoch": 0.823599598090932, "grad_norm": 0.2876291275024414, "learning_rate": 0.0001650924806792976, "loss": 11.6603, "step": 39345 }, { "epoch": 0.8236205308548941, "grad_norm": 0.3506060838699341, "learning_rate": 0.00016509081621939945, "loss": 11.6773, "step": 39346 }, { "epoch": 0.8236414636188563, "grad_norm": 0.34821051359176636, "learning_rate": 0.00016508915172821088, "loss": 11.6686, "step": 39347 }, { "epoch": 0.8236623963828184, "grad_norm": 0.27719441056251526, "learning_rate": 0.00016508748720573264, "loss": 11.6633, "step": 39348 }, { "epoch": 0.8236833291467806, "grad_norm": 0.40361863374710083, "learning_rate": 0.00016508582265196552, "loss": 11.6864, "step": 39349 }, { "epoch": 0.8237042619107426, "grad_norm": 0.3256518840789795, "learning_rate": 0.00016508415806691038, "loss": 11.6516, "step": 39350 }, { "epoch": 0.8237251946747048, "grad_norm": 0.30265164375305176, "learning_rate": 0.0001650824934505679, "loss": 11.6776, "step": 39351 }, { "epoch": 0.823746127438667, "grad_norm": 0.4479729235172272, "learning_rate": 0.00016508082880293905, "loss": 11.6616, "step": 39352 }, { "epoch": 0.8237670602026291, "grad_norm": 0.34464091062545776, "learning_rate": 0.0001650791641240245, "loss": 11.6645, "step": 39353 }, { "epoch": 0.8237879929665913, "grad_norm": 0.3565762937068939, "learning_rate": 0.00016507749941382512, "loss": 11.656, "step": 39354 }, { "epoch": 0.8238089257305534, "grad_norm": 0.31534716486930847, "learning_rate": 0.00016507583467234165, "loss": 11.688, "step": 39355 }, { "epoch": 0.8238298584945156, "grad_norm": 0.3426595628261566, "learning_rate": 0.00016507416989957493, "loss": 11.669, "step": 39356 }, { "epoch": 0.8238507912584778, "grad_norm": 0.38222262263298035, "learning_rate": 0.00016507250509552573, "loss": 11.6596, "step": 39357 }, { "epoch": 0.8238717240224399, "grad_norm": 0.3280813992023468, "learning_rate": 0.00016507084026019494, "loss": 11.6587, "step": 39358 }, { "epoch": 0.8238926567864021, "grad_norm": 0.3250921368598938, "learning_rate": 0.00016506917539358323, "loss": 11.6706, "step": 39359 }, { "epoch": 0.8239135895503642, "grad_norm": 0.26892346143722534, "learning_rate": 0.00016506751049569148, "loss": 11.6615, "step": 39360 }, { "epoch": 0.8239345223143264, "grad_norm": 0.39590731263160706, "learning_rate": 0.00016506584556652052, "loss": 11.6722, "step": 39361 }, { "epoch": 0.8239554550782885, "grad_norm": 0.35435351729393005, "learning_rate": 0.00016506418060607106, "loss": 11.6671, "step": 39362 }, { "epoch": 0.8239763878422507, "grad_norm": 0.3522055745124817, "learning_rate": 0.00016506251561434395, "loss": 11.6671, "step": 39363 }, { "epoch": 0.8239973206062129, "grad_norm": 0.2763649523258209, "learning_rate": 0.00016506085059133995, "loss": 11.683, "step": 39364 }, { "epoch": 0.824018253370175, "grad_norm": 0.32089850306510925, "learning_rate": 0.00016505918553705992, "loss": 11.6509, "step": 39365 }, { "epoch": 0.8240391861341372, "grad_norm": 0.32144275307655334, "learning_rate": 0.00016505752045150468, "loss": 11.6731, "step": 39366 }, { "epoch": 0.8240601188980993, "grad_norm": 0.3126251697540283, "learning_rate": 0.00016505585533467494, "loss": 11.665, "step": 39367 }, { "epoch": 0.8240810516620615, "grad_norm": 0.3842293322086334, "learning_rate": 0.00016505419018657155, "loss": 11.6724, "step": 39368 }, { "epoch": 0.8241019844260236, "grad_norm": 0.27456748485565186, "learning_rate": 0.0001650525250071953, "loss": 11.6958, "step": 39369 }, { "epoch": 0.8241229171899858, "grad_norm": 0.3003537356853485, "learning_rate": 0.00016505085979654703, "loss": 11.6712, "step": 39370 }, { "epoch": 0.824143849953948, "grad_norm": 0.22840774059295654, "learning_rate": 0.0001650491945546275, "loss": 11.665, "step": 39371 }, { "epoch": 0.82416478271791, "grad_norm": 0.41029053926467896, "learning_rate": 0.0001650475292814375, "loss": 11.6599, "step": 39372 }, { "epoch": 0.8241857154818722, "grad_norm": 0.29026827216148376, "learning_rate": 0.00016504586397697785, "loss": 11.6707, "step": 39373 }, { "epoch": 0.8242066482458343, "grad_norm": 0.3212136924266815, "learning_rate": 0.00016504419864124932, "loss": 11.6601, "step": 39374 }, { "epoch": 0.8242275810097965, "grad_norm": 0.3144906461238861, "learning_rate": 0.0001650425332742528, "loss": 11.6698, "step": 39375 }, { "epoch": 0.8242485137737587, "grad_norm": 0.3162729740142822, "learning_rate": 0.00016504086787598902, "loss": 11.6765, "step": 39376 }, { "epoch": 0.8242694465377208, "grad_norm": 0.35400906205177307, "learning_rate": 0.00016503920244645878, "loss": 11.6783, "step": 39377 }, { "epoch": 0.824290379301683, "grad_norm": 0.31795743107795715, "learning_rate": 0.0001650375369856629, "loss": 11.6866, "step": 39378 }, { "epoch": 0.8243113120656451, "grad_norm": 0.30473029613494873, "learning_rate": 0.00016503587149360215, "loss": 11.6731, "step": 39379 }, { "epoch": 0.8243322448296073, "grad_norm": 0.3263189196586609, "learning_rate": 0.00016503420597027737, "loss": 11.6678, "step": 39380 }, { "epoch": 0.8243531775935694, "grad_norm": 0.3167683184146881, "learning_rate": 0.00016503254041568935, "loss": 11.6739, "step": 39381 }, { "epoch": 0.8243741103575316, "grad_norm": 0.3608197867870331, "learning_rate": 0.00016503087482983887, "loss": 11.6666, "step": 39382 }, { "epoch": 0.8243950431214938, "grad_norm": 0.3771132230758667, "learning_rate": 0.00016502920921272675, "loss": 11.6728, "step": 39383 }, { "epoch": 0.8244159758854559, "grad_norm": 0.28383344411849976, "learning_rate": 0.00016502754356435378, "loss": 11.6616, "step": 39384 }, { "epoch": 0.8244369086494181, "grad_norm": 0.3150169849395752, "learning_rate": 0.00016502587788472077, "loss": 11.6642, "step": 39385 }, { "epoch": 0.8244578414133802, "grad_norm": 0.321389377117157, "learning_rate": 0.00016502421217382853, "loss": 11.6705, "step": 39386 }, { "epoch": 0.8244787741773424, "grad_norm": 0.40001288056373596, "learning_rate": 0.00016502254643167784, "loss": 11.6677, "step": 39387 }, { "epoch": 0.8244997069413045, "grad_norm": 0.30547720193862915, "learning_rate": 0.0001650208806582695, "loss": 11.6713, "step": 39388 }, { "epoch": 0.8245206397052667, "grad_norm": 0.29425957798957825, "learning_rate": 0.00016501921485360434, "loss": 11.65, "step": 39389 }, { "epoch": 0.8245415724692289, "grad_norm": 0.2662442922592163, "learning_rate": 0.00016501754901768312, "loss": 11.6666, "step": 39390 }, { "epoch": 0.824562505233191, "grad_norm": 0.32731789350509644, "learning_rate": 0.00016501588315050666, "loss": 11.6826, "step": 39391 }, { "epoch": 0.8245834379971532, "grad_norm": 0.32940417528152466, "learning_rate": 0.0001650142172520758, "loss": 11.6725, "step": 39392 }, { "epoch": 0.8246043707611153, "grad_norm": 0.3663317859172821, "learning_rate": 0.00016501255132239125, "loss": 11.6494, "step": 39393 }, { "epoch": 0.8246253035250775, "grad_norm": 0.28326061367988586, "learning_rate": 0.0001650108853614539, "loss": 11.6675, "step": 39394 }, { "epoch": 0.8246462362890397, "grad_norm": 0.2762089967727661, "learning_rate": 0.0001650092193692645, "loss": 11.6714, "step": 39395 }, { "epoch": 0.8246671690530017, "grad_norm": 0.3269067108631134, "learning_rate": 0.00016500755334582386, "loss": 11.6742, "step": 39396 }, { "epoch": 0.824688101816964, "grad_norm": 0.3830025792121887, "learning_rate": 0.0001650058872911328, "loss": 11.6778, "step": 39397 }, { "epoch": 0.824709034580926, "grad_norm": 0.30763062834739685, "learning_rate": 0.0001650042212051921, "loss": 11.6783, "step": 39398 }, { "epoch": 0.8247299673448882, "grad_norm": 0.2917865514755249, "learning_rate": 0.00016500255508800256, "loss": 11.6782, "step": 39399 }, { "epoch": 0.8247509001088503, "grad_norm": 0.25683632493019104, "learning_rate": 0.00016500088893956497, "loss": 11.6645, "step": 39400 }, { "epoch": 0.8247718328728125, "grad_norm": 0.3084496259689331, "learning_rate": 0.00016499922275988021, "loss": 11.66, "step": 39401 }, { "epoch": 0.8247927656367747, "grad_norm": 0.3100821375846863, "learning_rate": 0.000164997556548949, "loss": 11.6809, "step": 39402 }, { "epoch": 0.8248136984007368, "grad_norm": 0.46488985419273376, "learning_rate": 0.00016499589030677213, "loss": 11.6853, "step": 39403 }, { "epoch": 0.824834631164699, "grad_norm": 0.34966158866882324, "learning_rate": 0.00016499422403335045, "loss": 11.6553, "step": 39404 }, { "epoch": 0.8248555639286611, "grad_norm": 0.2943579852581024, "learning_rate": 0.00016499255772868475, "loss": 11.6659, "step": 39405 }, { "epoch": 0.8248764966926233, "grad_norm": 0.30437013506889343, "learning_rate": 0.00016499089139277584, "loss": 11.6718, "step": 39406 }, { "epoch": 0.8248974294565854, "grad_norm": 0.31113195419311523, "learning_rate": 0.00016498922502562448, "loss": 11.6583, "step": 39407 }, { "epoch": 0.8249183622205476, "grad_norm": 0.3763241171836853, "learning_rate": 0.0001649875586272315, "loss": 11.6688, "step": 39408 }, { "epoch": 0.8249392949845098, "grad_norm": 0.2742694318294525, "learning_rate": 0.0001649858921975977, "loss": 11.6861, "step": 39409 }, { "epoch": 0.8249602277484719, "grad_norm": 0.2617236375808716, "learning_rate": 0.0001649842257367239, "loss": 11.6769, "step": 39410 }, { "epoch": 0.8249811605124341, "grad_norm": 0.2826865017414093, "learning_rate": 0.00016498255924461087, "loss": 11.6753, "step": 39411 }, { "epoch": 0.8250020932763962, "grad_norm": 0.36077114939689636, "learning_rate": 0.0001649808927212594, "loss": 11.6849, "step": 39412 }, { "epoch": 0.8250230260403584, "grad_norm": 0.3548283576965332, "learning_rate": 0.00016497922616667032, "loss": 11.677, "step": 39413 }, { "epoch": 0.8250439588043206, "grad_norm": 0.33079084753990173, "learning_rate": 0.00016497755958084443, "loss": 11.6816, "step": 39414 }, { "epoch": 0.8250648915682827, "grad_norm": 0.3129456639289856, "learning_rate": 0.00016497589296378252, "loss": 11.6593, "step": 39415 }, { "epoch": 0.8250858243322449, "grad_norm": 0.3457024395465851, "learning_rate": 0.00016497422631548542, "loss": 11.6768, "step": 39416 }, { "epoch": 0.825106757096207, "grad_norm": 0.31384196877479553, "learning_rate": 0.0001649725596359539, "loss": 11.673, "step": 39417 }, { "epoch": 0.8251276898601692, "grad_norm": 0.32363495230674744, "learning_rate": 0.00016497089292518875, "loss": 11.6644, "step": 39418 }, { "epoch": 0.8251486226241312, "grad_norm": 0.32335418462753296, "learning_rate": 0.00016496922618319083, "loss": 11.6696, "step": 39419 }, { "epoch": 0.8251695553880934, "grad_norm": 0.3702022135257721, "learning_rate": 0.00016496755940996084, "loss": 11.6647, "step": 39420 }, { "epoch": 0.8251904881520556, "grad_norm": 0.24311669170856476, "learning_rate": 0.00016496589260549968, "loss": 11.6657, "step": 39421 }, { "epoch": 0.8252114209160177, "grad_norm": 0.3004315197467804, "learning_rate": 0.0001649642257698081, "loss": 11.6568, "step": 39422 }, { "epoch": 0.8252323536799799, "grad_norm": 0.37994909286499023, "learning_rate": 0.0001649625589028869, "loss": 11.6637, "step": 39423 }, { "epoch": 0.825253286443942, "grad_norm": 0.4030134975910187, "learning_rate": 0.0001649608920047369, "loss": 11.6864, "step": 39424 }, { "epoch": 0.8252742192079042, "grad_norm": 0.2479357272386551, "learning_rate": 0.00016495922507535894, "loss": 11.6721, "step": 39425 }, { "epoch": 0.8252951519718663, "grad_norm": 0.2755812704563141, "learning_rate": 0.00016495755811475377, "loss": 11.6685, "step": 39426 }, { "epoch": 0.8253160847358285, "grad_norm": 0.3778286874294281, "learning_rate": 0.00016495589112292217, "loss": 11.6686, "step": 39427 }, { "epoch": 0.8253370174997907, "grad_norm": 0.24855133891105652, "learning_rate": 0.00016495422409986497, "loss": 11.6581, "step": 39428 }, { "epoch": 0.8253579502637528, "grad_norm": 0.2887355387210846, "learning_rate": 0.000164952557045583, "loss": 11.6812, "step": 39429 }, { "epoch": 0.825378883027715, "grad_norm": 0.3163887858390808, "learning_rate": 0.000164950889960077, "loss": 11.6695, "step": 39430 }, { "epoch": 0.8253998157916771, "grad_norm": 0.4552535116672516, "learning_rate": 0.00016494922284334787, "loss": 11.6616, "step": 39431 }, { "epoch": 0.8254207485556393, "grad_norm": 0.32330548763275146, "learning_rate": 0.00016494755569539628, "loss": 11.6734, "step": 39432 }, { "epoch": 0.8254416813196015, "grad_norm": 0.29864874482154846, "learning_rate": 0.00016494588851622313, "loss": 11.6625, "step": 39433 }, { "epoch": 0.8254626140835636, "grad_norm": 0.31667032837867737, "learning_rate": 0.00016494422130582917, "loss": 11.6479, "step": 39434 }, { "epoch": 0.8254835468475258, "grad_norm": 0.44228026270866394, "learning_rate": 0.00016494255406421525, "loss": 11.6811, "step": 39435 }, { "epoch": 0.8255044796114879, "grad_norm": 0.30051690340042114, "learning_rate": 0.0001649408867913821, "loss": 11.6752, "step": 39436 }, { "epoch": 0.8255254123754501, "grad_norm": 0.37133029103279114, "learning_rate": 0.0001649392194873306, "loss": 11.6904, "step": 39437 }, { "epoch": 0.8255463451394122, "grad_norm": 0.2884576618671417, "learning_rate": 0.00016493755215206152, "loss": 11.6544, "step": 39438 }, { "epoch": 0.8255672779033744, "grad_norm": 0.31061428785324097, "learning_rate": 0.00016493588478557562, "loss": 11.659, "step": 39439 }, { "epoch": 0.8255882106673366, "grad_norm": 0.2906360924243927, "learning_rate": 0.00016493421738787375, "loss": 11.6567, "step": 39440 }, { "epoch": 0.8256091434312987, "grad_norm": 0.30538448691368103, "learning_rate": 0.00016493254995895672, "loss": 11.6585, "step": 39441 }, { "epoch": 0.8256300761952609, "grad_norm": 0.25873249769210815, "learning_rate": 0.00016493088249882533, "loss": 11.6597, "step": 39442 }, { "epoch": 0.8256510089592229, "grad_norm": 0.33049488067626953, "learning_rate": 0.00016492921500748032, "loss": 11.6745, "step": 39443 }, { "epoch": 0.8256719417231851, "grad_norm": 0.3459375202655792, "learning_rate": 0.00016492754748492257, "loss": 11.6615, "step": 39444 }, { "epoch": 0.8256928744871472, "grad_norm": 0.26383236050605774, "learning_rate": 0.00016492587993115283, "loss": 11.6678, "step": 39445 }, { "epoch": 0.8257138072511094, "grad_norm": 0.2991989552974701, "learning_rate": 0.00016492421234617192, "loss": 11.6706, "step": 39446 }, { "epoch": 0.8257347400150716, "grad_norm": 0.32124197483062744, "learning_rate": 0.00016492254472998062, "loss": 11.6601, "step": 39447 }, { "epoch": 0.8257556727790337, "grad_norm": 0.28432756662368774, "learning_rate": 0.00016492087708257978, "loss": 11.6639, "step": 39448 }, { "epoch": 0.8257766055429959, "grad_norm": 0.2805566191673279, "learning_rate": 0.00016491920940397015, "loss": 11.6657, "step": 39449 }, { "epoch": 0.825797538306958, "grad_norm": 0.3569413125514984, "learning_rate": 0.00016491754169415258, "loss": 11.6669, "step": 39450 }, { "epoch": 0.8258184710709202, "grad_norm": 0.28960826992988586, "learning_rate": 0.00016491587395312785, "loss": 11.6717, "step": 39451 }, { "epoch": 0.8258394038348824, "grad_norm": 0.32743412256240845, "learning_rate": 0.00016491420618089674, "loss": 11.6678, "step": 39452 }, { "epoch": 0.8258603365988445, "grad_norm": 0.27492648363113403, "learning_rate": 0.00016491253837746007, "loss": 11.6673, "step": 39453 }, { "epoch": 0.8258812693628067, "grad_norm": 0.32367604970932007, "learning_rate": 0.00016491087054281865, "loss": 11.6766, "step": 39454 }, { "epoch": 0.8259022021267688, "grad_norm": 0.36914968490600586, "learning_rate": 0.00016490920267697327, "loss": 11.678, "step": 39455 }, { "epoch": 0.825923134890731, "grad_norm": 0.32348230481147766, "learning_rate": 0.00016490753477992471, "loss": 11.6664, "step": 39456 }, { "epoch": 0.8259440676546931, "grad_norm": 0.3300347328186035, "learning_rate": 0.00016490586685167385, "loss": 11.6726, "step": 39457 }, { "epoch": 0.8259650004186553, "grad_norm": 0.2975545823574066, "learning_rate": 0.00016490419889222138, "loss": 11.6629, "step": 39458 }, { "epoch": 0.8259859331826175, "grad_norm": 0.32294148206710815, "learning_rate": 0.0001649025309015682, "loss": 11.6636, "step": 39459 }, { "epoch": 0.8260068659465796, "grad_norm": 0.3374990224838257, "learning_rate": 0.00016490086287971508, "loss": 11.6758, "step": 39460 }, { "epoch": 0.8260277987105418, "grad_norm": 0.3235531151294708, "learning_rate": 0.0001648991948266628, "loss": 11.6795, "step": 39461 }, { "epoch": 0.8260487314745039, "grad_norm": 0.2960229814052582, "learning_rate": 0.00016489752674241217, "loss": 11.661, "step": 39462 }, { "epoch": 0.8260696642384661, "grad_norm": 0.3289538323879242, "learning_rate": 0.000164895858626964, "loss": 11.6746, "step": 39463 }, { "epoch": 0.8260905970024282, "grad_norm": 0.24189598858356476, "learning_rate": 0.00016489419048031908, "loss": 11.6661, "step": 39464 }, { "epoch": 0.8261115297663904, "grad_norm": 0.375537246465683, "learning_rate": 0.00016489252230247824, "loss": 11.6728, "step": 39465 }, { "epoch": 0.8261324625303526, "grad_norm": 0.3205725848674774, "learning_rate": 0.00016489085409344227, "loss": 11.6689, "step": 39466 }, { "epoch": 0.8261533952943146, "grad_norm": 0.2532677948474884, "learning_rate": 0.00016488918585321196, "loss": 11.6624, "step": 39467 }, { "epoch": 0.8261743280582768, "grad_norm": 0.26424485445022583, "learning_rate": 0.00016488751758178812, "loss": 11.6735, "step": 39468 }, { "epoch": 0.8261952608222389, "grad_norm": 0.2752295732498169, "learning_rate": 0.00016488584927917154, "loss": 11.669, "step": 39469 }, { "epoch": 0.8262161935862011, "grad_norm": 0.272941529750824, "learning_rate": 0.00016488418094536304, "loss": 11.6542, "step": 39470 }, { "epoch": 0.8262371263501632, "grad_norm": 0.2561609745025635, "learning_rate": 0.00016488251258036342, "loss": 11.6813, "step": 39471 }, { "epoch": 0.8262580591141254, "grad_norm": 0.36663949489593506, "learning_rate": 0.00016488084418417347, "loss": 11.6746, "step": 39472 }, { "epoch": 0.8262789918780876, "grad_norm": 0.2575491964817047, "learning_rate": 0.00016487917575679396, "loss": 11.6704, "step": 39473 }, { "epoch": 0.8262999246420497, "grad_norm": 0.317206472158432, "learning_rate": 0.00016487750729822577, "loss": 11.6675, "step": 39474 }, { "epoch": 0.8263208574060119, "grad_norm": 0.31387317180633545, "learning_rate": 0.0001648758388084697, "loss": 11.6506, "step": 39475 }, { "epoch": 0.826341790169974, "grad_norm": 0.2618270516395569, "learning_rate": 0.00016487417028752645, "loss": 11.6733, "step": 39476 }, { "epoch": 0.8263627229339362, "grad_norm": 0.3322145640850067, "learning_rate": 0.0001648725017353969, "loss": 11.6683, "step": 39477 }, { "epoch": 0.8263836556978984, "grad_norm": 0.295391321182251, "learning_rate": 0.00016487083315208184, "loss": 11.6735, "step": 39478 }, { "epoch": 0.8264045884618605, "grad_norm": 0.24655820429325104, "learning_rate": 0.00016486916453758207, "loss": 11.6667, "step": 39479 }, { "epoch": 0.8264255212258227, "grad_norm": 0.3582656681537628, "learning_rate": 0.0001648674958918984, "loss": 11.6756, "step": 39480 }, { "epoch": 0.8264464539897848, "grad_norm": 0.3167054057121277, "learning_rate": 0.00016486582721503162, "loss": 11.6501, "step": 39481 }, { "epoch": 0.826467386753747, "grad_norm": 0.33656206727027893, "learning_rate": 0.00016486415850698255, "loss": 11.6799, "step": 39482 }, { "epoch": 0.8264883195177091, "grad_norm": 0.2709413170814514, "learning_rate": 0.00016486248976775198, "loss": 11.6668, "step": 39483 }, { "epoch": 0.8265092522816713, "grad_norm": 0.2705536484718323, "learning_rate": 0.00016486082099734069, "loss": 11.6822, "step": 39484 }, { "epoch": 0.8265301850456335, "grad_norm": 0.3020389676094055, "learning_rate": 0.00016485915219574952, "loss": 11.6545, "step": 39485 }, { "epoch": 0.8265511178095956, "grad_norm": 0.3829314410686493, "learning_rate": 0.00016485748336297922, "loss": 11.6813, "step": 39486 }, { "epoch": 0.8265720505735578, "grad_norm": 0.26894524693489075, "learning_rate": 0.0001648558144990307, "loss": 11.6669, "step": 39487 }, { "epoch": 0.8265929833375198, "grad_norm": 0.31738340854644775, "learning_rate": 0.00016485414560390464, "loss": 11.6698, "step": 39488 }, { "epoch": 0.826613916101482, "grad_norm": 0.3465786278247833, "learning_rate": 0.00016485247667760188, "loss": 11.6562, "step": 39489 }, { "epoch": 0.8266348488654441, "grad_norm": 0.25793951749801636, "learning_rate": 0.00016485080772012326, "loss": 11.679, "step": 39490 }, { "epoch": 0.8266557816294063, "grad_norm": 0.31847748160362244, "learning_rate": 0.00016484913873146954, "loss": 11.6809, "step": 39491 }, { "epoch": 0.8266767143933685, "grad_norm": 0.37091681361198425, "learning_rate": 0.00016484746971164157, "loss": 11.6683, "step": 39492 }, { "epoch": 0.8266976471573306, "grad_norm": 0.23867787420749664, "learning_rate": 0.0001648458006606401, "loss": 11.6782, "step": 39493 }, { "epoch": 0.8267185799212928, "grad_norm": 0.2776179611682892, "learning_rate": 0.00016484413157846593, "loss": 11.6725, "step": 39494 }, { "epoch": 0.8267395126852549, "grad_norm": 0.2791401445865631, "learning_rate": 0.00016484246246511994, "loss": 11.6772, "step": 39495 }, { "epoch": 0.8267604454492171, "grad_norm": 0.3515281677246094, "learning_rate": 0.00016484079332060284, "loss": 11.6715, "step": 39496 }, { "epoch": 0.8267813782131793, "grad_norm": 0.3308868408203125, "learning_rate": 0.00016483912414491552, "loss": 11.6669, "step": 39497 }, { "epoch": 0.8268023109771414, "grad_norm": 0.33707478642463684, "learning_rate": 0.0001648374549380587, "loss": 11.6553, "step": 39498 }, { "epoch": 0.8268232437411036, "grad_norm": 0.3064318597316742, "learning_rate": 0.0001648357857000332, "loss": 11.6602, "step": 39499 }, { "epoch": 0.8268441765050657, "grad_norm": 0.2625412046909332, "learning_rate": 0.00016483411643083984, "loss": 11.6775, "step": 39500 }, { "epoch": 0.8268651092690279, "grad_norm": 0.34677091240882874, "learning_rate": 0.00016483244713047943, "loss": 11.6762, "step": 39501 }, { "epoch": 0.82688604203299, "grad_norm": 0.36429107189178467, "learning_rate": 0.00016483077779895277, "loss": 11.6729, "step": 39502 }, { "epoch": 0.8269069747969522, "grad_norm": 0.32665151357650757, "learning_rate": 0.00016482910843626064, "loss": 11.6714, "step": 39503 }, { "epoch": 0.8269279075609144, "grad_norm": 0.3513784408569336, "learning_rate": 0.0001648274390424039, "loss": 11.7077, "step": 39504 }, { "epoch": 0.8269488403248765, "grad_norm": 0.29059332609176636, "learning_rate": 0.00016482576961738326, "loss": 11.6722, "step": 39505 }, { "epoch": 0.8269697730888387, "grad_norm": 0.31846895813941956, "learning_rate": 0.0001648241001611996, "loss": 11.6684, "step": 39506 }, { "epoch": 0.8269907058528008, "grad_norm": 0.27532103657722473, "learning_rate": 0.0001648224306738537, "loss": 11.6661, "step": 39507 }, { "epoch": 0.827011638616763, "grad_norm": 0.26524418592453003, "learning_rate": 0.00016482076115534634, "loss": 11.6602, "step": 39508 }, { "epoch": 0.8270325713807251, "grad_norm": 0.2757309377193451, "learning_rate": 0.00016481909160567833, "loss": 11.6741, "step": 39509 }, { "epoch": 0.8270535041446873, "grad_norm": 0.43424898386001587, "learning_rate": 0.00016481742202485052, "loss": 11.6818, "step": 39510 }, { "epoch": 0.8270744369086495, "grad_norm": 0.3080790340900421, "learning_rate": 0.00016481575241286364, "loss": 11.666, "step": 39511 }, { "epoch": 0.8270953696726115, "grad_norm": 0.32685214281082153, "learning_rate": 0.00016481408276971857, "loss": 11.6667, "step": 39512 }, { "epoch": 0.8271163024365737, "grad_norm": 0.36198902130126953, "learning_rate": 0.00016481241309541608, "loss": 11.6757, "step": 39513 }, { "epoch": 0.8271372352005358, "grad_norm": 0.304393470287323, "learning_rate": 0.00016481074338995692, "loss": 11.6769, "step": 39514 }, { "epoch": 0.827158167964498, "grad_norm": 0.30232563614845276, "learning_rate": 0.00016480907365334194, "loss": 11.6678, "step": 39515 }, { "epoch": 0.8271791007284602, "grad_norm": 0.3169408440589905, "learning_rate": 0.00016480740388557196, "loss": 11.676, "step": 39516 }, { "epoch": 0.8272000334924223, "grad_norm": 0.2978608310222626, "learning_rate": 0.00016480573408664774, "loss": 11.669, "step": 39517 }, { "epoch": 0.8272209662563845, "grad_norm": 0.3111022412776947, "learning_rate": 0.00016480406425657012, "loss": 11.6771, "step": 39518 }, { "epoch": 0.8272418990203466, "grad_norm": 0.2987709939479828, "learning_rate": 0.0001648023943953399, "loss": 11.6668, "step": 39519 }, { "epoch": 0.8272628317843088, "grad_norm": 0.34146445989608765, "learning_rate": 0.00016480072450295784, "loss": 11.6823, "step": 39520 }, { "epoch": 0.8272837645482709, "grad_norm": 0.2745459973812103, "learning_rate": 0.0001647990545794248, "loss": 11.6634, "step": 39521 }, { "epoch": 0.8273046973122331, "grad_norm": 0.27394765615463257, "learning_rate": 0.00016479738462474154, "loss": 11.672, "step": 39522 }, { "epoch": 0.8273256300761953, "grad_norm": 0.3491719663143158, "learning_rate": 0.0001647957146389089, "loss": 11.6584, "step": 39523 }, { "epoch": 0.8273465628401574, "grad_norm": 0.27064231038093567, "learning_rate": 0.00016479404462192763, "loss": 11.6614, "step": 39524 }, { "epoch": 0.8273674956041196, "grad_norm": 0.349886417388916, "learning_rate": 0.00016479237457379858, "loss": 11.6602, "step": 39525 }, { "epoch": 0.8273884283680817, "grad_norm": 0.2760883867740631, "learning_rate": 0.00016479070449452253, "loss": 11.6862, "step": 39526 }, { "epoch": 0.8274093611320439, "grad_norm": 0.3374282419681549, "learning_rate": 0.00016478903438410031, "loss": 11.6637, "step": 39527 }, { "epoch": 0.827430293896006, "grad_norm": 0.3273395001888275, "learning_rate": 0.00016478736424253268, "loss": 11.6735, "step": 39528 }, { "epoch": 0.8274512266599682, "grad_norm": 1.058664083480835, "learning_rate": 0.00016478569406982047, "loss": 11.5787, "step": 39529 }, { "epoch": 0.8274721594239304, "grad_norm": 0.37243208289146423, "learning_rate": 0.0001647840238659645, "loss": 11.6575, "step": 39530 }, { "epoch": 0.8274930921878925, "grad_norm": 0.2252536416053772, "learning_rate": 0.00016478235363096553, "loss": 11.6531, "step": 39531 }, { "epoch": 0.8275140249518547, "grad_norm": 0.38396188616752625, "learning_rate": 0.0001647806833648244, "loss": 11.6807, "step": 39532 }, { "epoch": 0.8275349577158168, "grad_norm": 0.28765320777893066, "learning_rate": 0.00016477901306754186, "loss": 11.6801, "step": 39533 }, { "epoch": 0.827555890479779, "grad_norm": 0.3556584417819977, "learning_rate": 0.0001647773427391188, "loss": 11.6651, "step": 39534 }, { "epoch": 0.8275768232437412, "grad_norm": 0.32306718826293945, "learning_rate": 0.00016477567237955593, "loss": 11.6602, "step": 39535 }, { "epoch": 0.8275977560077032, "grad_norm": 0.30488091707229614, "learning_rate": 0.00016477400198885414, "loss": 11.663, "step": 39536 }, { "epoch": 0.8276186887716654, "grad_norm": 0.3434644639492035, "learning_rate": 0.00016477233156701417, "loss": 11.6733, "step": 39537 }, { "epoch": 0.8276396215356275, "grad_norm": 0.37594181299209595, "learning_rate": 0.00016477066111403682, "loss": 11.6595, "step": 39538 }, { "epoch": 0.8276605542995897, "grad_norm": 0.34619542956352234, "learning_rate": 0.00016476899062992293, "loss": 11.684, "step": 39539 }, { "epoch": 0.8276814870635518, "grad_norm": 0.37935781478881836, "learning_rate": 0.00016476732011467328, "loss": 11.6517, "step": 39540 }, { "epoch": 0.827702419827514, "grad_norm": 0.3766559660434723, "learning_rate": 0.00016476564956828874, "loss": 11.6809, "step": 39541 }, { "epoch": 0.8277233525914762, "grad_norm": 0.27163368463516235, "learning_rate": 0.00016476397899076997, "loss": 11.6752, "step": 39542 }, { "epoch": 0.8277442853554383, "grad_norm": 0.30125781893730164, "learning_rate": 0.00016476230838211793, "loss": 11.6564, "step": 39543 }, { "epoch": 0.8277652181194005, "grad_norm": 0.3084862530231476, "learning_rate": 0.00016476063774233327, "loss": 11.6585, "step": 39544 }, { "epoch": 0.8277861508833626, "grad_norm": 0.33722516894340515, "learning_rate": 0.00016475896707141693, "loss": 11.6639, "step": 39545 }, { "epoch": 0.8278070836473248, "grad_norm": 0.2858356833457947, "learning_rate": 0.00016475729636936966, "loss": 11.6711, "step": 39546 }, { "epoch": 0.8278280164112869, "grad_norm": 0.2968699634075165, "learning_rate": 0.00016475562563619223, "loss": 11.682, "step": 39547 }, { "epoch": 0.8278489491752491, "grad_norm": 0.329532653093338, "learning_rate": 0.00016475395487188549, "loss": 11.6675, "step": 39548 }, { "epoch": 0.8278698819392113, "grad_norm": 0.2782784700393677, "learning_rate": 0.0001647522840764502, "loss": 11.6713, "step": 39549 }, { "epoch": 0.8278908147031734, "grad_norm": 0.25755566358566284, "learning_rate": 0.00016475061324988723, "loss": 11.6593, "step": 39550 }, { "epoch": 0.8279117474671356, "grad_norm": 0.27848538756370544, "learning_rate": 0.00016474894239219733, "loss": 11.6833, "step": 39551 }, { "epoch": 0.8279326802310977, "grad_norm": 0.2676735818386078, "learning_rate": 0.00016474727150338133, "loss": 11.6732, "step": 39552 }, { "epoch": 0.8279536129950599, "grad_norm": 0.2676481604576111, "learning_rate": 0.00016474560058344, "loss": 11.6678, "step": 39553 }, { "epoch": 0.8279745457590221, "grad_norm": 0.4121203124523163, "learning_rate": 0.00016474392963237414, "loss": 11.6851, "step": 39554 }, { "epoch": 0.8279954785229842, "grad_norm": 0.2878895699977875, "learning_rate": 0.0001647422586501846, "loss": 11.6727, "step": 39555 }, { "epoch": 0.8280164112869464, "grad_norm": 0.33043932914733887, "learning_rate": 0.00016474058763687216, "loss": 11.6645, "step": 39556 }, { "epoch": 0.8280373440509085, "grad_norm": 0.26770466566085815, "learning_rate": 0.00016473891659243763, "loss": 11.6682, "step": 39557 }, { "epoch": 0.8280582768148707, "grad_norm": 0.39590147137641907, "learning_rate": 0.00016473724551688181, "loss": 11.6569, "step": 39558 }, { "epoch": 0.8280792095788327, "grad_norm": 0.2920260727405548, "learning_rate": 0.0001647355744102055, "loss": 11.6642, "step": 39559 }, { "epoch": 0.8281001423427949, "grad_norm": 0.3033800721168518, "learning_rate": 0.00016473390327240948, "loss": 11.6572, "step": 39560 }, { "epoch": 0.8281210751067571, "grad_norm": 0.28858694434165955, "learning_rate": 0.00016473223210349458, "loss": 11.683, "step": 39561 }, { "epoch": 0.8281420078707192, "grad_norm": 0.3264102041721344, "learning_rate": 0.00016473056090346157, "loss": 11.6738, "step": 39562 }, { "epoch": 0.8281629406346814, "grad_norm": 0.3765208125114441, "learning_rate": 0.00016472888967231133, "loss": 11.6687, "step": 39563 }, { "epoch": 0.8281838733986435, "grad_norm": 0.29173171520233154, "learning_rate": 0.0001647272184100446, "loss": 11.6752, "step": 39564 }, { "epoch": 0.8282048061626057, "grad_norm": 0.29831886291503906, "learning_rate": 0.0001647255471166622, "loss": 11.6622, "step": 39565 }, { "epoch": 0.8282257389265678, "grad_norm": 0.30742618441581726, "learning_rate": 0.00016472387579216492, "loss": 11.661, "step": 39566 }, { "epoch": 0.82824667169053, "grad_norm": 0.30238083004951477, "learning_rate": 0.00016472220443655358, "loss": 11.6771, "step": 39567 }, { "epoch": 0.8282676044544922, "grad_norm": 0.2728825509548187, "learning_rate": 0.00016472053304982898, "loss": 11.6698, "step": 39568 }, { "epoch": 0.8282885372184543, "grad_norm": 0.350539892911911, "learning_rate": 0.00016471886163199192, "loss": 11.6648, "step": 39569 }, { "epoch": 0.8283094699824165, "grad_norm": 0.30283963680267334, "learning_rate": 0.00016471719018304324, "loss": 11.6747, "step": 39570 }, { "epoch": 0.8283304027463786, "grad_norm": 0.3401685357093811, "learning_rate": 0.00016471551870298368, "loss": 11.6529, "step": 39571 }, { "epoch": 0.8283513355103408, "grad_norm": 0.3215751647949219, "learning_rate": 0.00016471384719181405, "loss": 11.683, "step": 39572 }, { "epoch": 0.828372268274303, "grad_norm": 0.3280247449874878, "learning_rate": 0.00016471217564953522, "loss": 11.6666, "step": 39573 }, { "epoch": 0.8283932010382651, "grad_norm": 0.34439370036125183, "learning_rate": 0.00016471050407614794, "loss": 11.6602, "step": 39574 }, { "epoch": 0.8284141338022273, "grad_norm": 0.3160904049873352, "learning_rate": 0.00016470883247165304, "loss": 11.6697, "step": 39575 }, { "epoch": 0.8284350665661894, "grad_norm": 0.49768558144569397, "learning_rate": 0.00016470716083605126, "loss": 11.6861, "step": 39576 }, { "epoch": 0.8284559993301516, "grad_norm": 0.23969966173171997, "learning_rate": 0.00016470548916934348, "loss": 11.6608, "step": 39577 }, { "epoch": 0.8284769320941137, "grad_norm": 0.30600738525390625, "learning_rate": 0.00016470381747153048, "loss": 11.6664, "step": 39578 }, { "epoch": 0.8284978648580759, "grad_norm": 0.32865846157073975, "learning_rate": 0.00016470214574261304, "loss": 11.6782, "step": 39579 }, { "epoch": 0.8285187976220381, "grad_norm": 0.3827492296695709, "learning_rate": 0.000164700473982592, "loss": 11.6565, "step": 39580 }, { "epoch": 0.8285397303860002, "grad_norm": 0.2772785425186157, "learning_rate": 0.0001646988021914681, "loss": 11.6869, "step": 39581 }, { "epoch": 0.8285606631499624, "grad_norm": 0.3002772033214569, "learning_rate": 0.00016469713036924223, "loss": 11.6688, "step": 39582 }, { "epoch": 0.8285815959139244, "grad_norm": 2.783205509185791, "learning_rate": 0.00016469545851591514, "loss": 11.6781, "step": 39583 }, { "epoch": 0.8286025286778866, "grad_norm": 0.27529802918434143, "learning_rate": 0.00016469378663148767, "loss": 11.6732, "step": 39584 }, { "epoch": 0.8286234614418487, "grad_norm": 0.3273457884788513, "learning_rate": 0.00016469211471596058, "loss": 11.6701, "step": 39585 }, { "epoch": 0.8286443942058109, "grad_norm": 0.32074177265167236, "learning_rate": 0.00016469044276933473, "loss": 11.6652, "step": 39586 }, { "epoch": 0.8286653269697731, "grad_norm": 0.6027844548225403, "learning_rate": 0.00016468877079161084, "loss": 11.6665, "step": 39587 }, { "epoch": 0.8286862597337352, "grad_norm": 0.2911580204963684, "learning_rate": 0.00016468709878278979, "loss": 11.6749, "step": 39588 }, { "epoch": 0.8287071924976974, "grad_norm": 0.28465721011161804, "learning_rate": 0.00016468542674287232, "loss": 11.6608, "step": 39589 }, { "epoch": 0.8287281252616595, "grad_norm": 0.3007550835609436, "learning_rate": 0.00016468375467185931, "loss": 11.6639, "step": 39590 }, { "epoch": 0.8287490580256217, "grad_norm": 0.29619523882865906, "learning_rate": 0.00016468208256975147, "loss": 11.6677, "step": 39591 }, { "epoch": 0.8287699907895839, "grad_norm": 0.3307803273200989, "learning_rate": 0.0001646804104365497, "loss": 11.6782, "step": 39592 }, { "epoch": 0.828790923553546, "grad_norm": 0.8400987386703491, "learning_rate": 0.00016467873827225477, "loss": 11.6993, "step": 39593 }, { "epoch": 0.8288118563175082, "grad_norm": 0.29668939113616943, "learning_rate": 0.00016467706607686746, "loss": 11.6611, "step": 39594 }, { "epoch": 0.8288327890814703, "grad_norm": 0.33998432755470276, "learning_rate": 0.0001646753938503886, "loss": 11.6591, "step": 39595 }, { "epoch": 0.8288537218454325, "grad_norm": 0.2433069497346878, "learning_rate": 0.00016467372159281895, "loss": 11.6617, "step": 39596 }, { "epoch": 0.8288746546093946, "grad_norm": 0.30799421668052673, "learning_rate": 0.00016467204930415937, "loss": 11.6776, "step": 39597 }, { "epoch": 0.8288955873733568, "grad_norm": 0.2834407389163971, "learning_rate": 0.00016467037698441063, "loss": 11.6757, "step": 39598 }, { "epoch": 0.828916520137319, "grad_norm": 0.3137953281402588, "learning_rate": 0.00016466870463357352, "loss": 11.6549, "step": 39599 }, { "epoch": 0.8289374529012811, "grad_norm": 0.3843342959880829, "learning_rate": 0.00016466703225164893, "loss": 11.67, "step": 39600 }, { "epoch": 0.8289583856652433, "grad_norm": 0.3325310945510864, "learning_rate": 0.00016466535983863755, "loss": 11.6706, "step": 39601 }, { "epoch": 0.8289793184292054, "grad_norm": 0.25145646929740906, "learning_rate": 0.00016466368739454027, "loss": 11.6626, "step": 39602 }, { "epoch": 0.8290002511931676, "grad_norm": 0.3629666864871979, "learning_rate": 0.00016466201491935784, "loss": 11.6628, "step": 39603 }, { "epoch": 0.8290211839571296, "grad_norm": 0.35103076696395874, "learning_rate": 0.00016466034241309108, "loss": 11.6896, "step": 39604 }, { "epoch": 0.8290421167210918, "grad_norm": 0.32608452439308167, "learning_rate": 0.00016465866987574082, "loss": 11.6628, "step": 39605 }, { "epoch": 0.829063049485054, "grad_norm": 0.2778143286705017, "learning_rate": 0.00016465699730730779, "loss": 11.6711, "step": 39606 }, { "epoch": 0.8290839822490161, "grad_norm": 0.27870601415634155, "learning_rate": 0.0001646553247077929, "loss": 11.6675, "step": 39607 }, { "epoch": 0.8291049150129783, "grad_norm": 0.3341194689273834, "learning_rate": 0.00016465365207719688, "loss": 11.6482, "step": 39608 }, { "epoch": 0.8291258477769404, "grad_norm": 0.29247623682022095, "learning_rate": 0.00016465197941552056, "loss": 11.6833, "step": 39609 }, { "epoch": 0.8291467805409026, "grad_norm": 0.2699805498123169, "learning_rate": 0.00016465030672276472, "loss": 11.6731, "step": 39610 }, { "epoch": 0.8291677133048648, "grad_norm": 0.34843650460243225, "learning_rate": 0.0001646486339989302, "loss": 11.6684, "step": 39611 }, { "epoch": 0.8291886460688269, "grad_norm": 0.2766028046607971, "learning_rate": 0.00016464696124401777, "loss": 11.6819, "step": 39612 }, { "epoch": 0.8292095788327891, "grad_norm": 0.39800500869750977, "learning_rate": 0.00016464528845802824, "loss": 11.6478, "step": 39613 }, { "epoch": 0.8292305115967512, "grad_norm": 0.28339189291000366, "learning_rate": 0.00016464361564096246, "loss": 11.6753, "step": 39614 }, { "epoch": 0.8292514443607134, "grad_norm": 0.4212089776992798, "learning_rate": 0.0001646419427928212, "loss": 11.6718, "step": 39615 }, { "epoch": 0.8292723771246755, "grad_norm": 0.2724648118019104, "learning_rate": 0.00016464026991360522, "loss": 11.683, "step": 39616 }, { "epoch": 0.8292933098886377, "grad_norm": 0.35887396335601807, "learning_rate": 0.0001646385970033154, "loss": 11.66, "step": 39617 }, { "epoch": 0.8293142426525999, "grad_norm": 0.26362890005111694, "learning_rate": 0.0001646369240619525, "loss": 11.6544, "step": 39618 }, { "epoch": 0.829335175416562, "grad_norm": 0.40560945868492126, "learning_rate": 0.00016463525108951737, "loss": 11.6541, "step": 39619 }, { "epoch": 0.8293561081805242, "grad_norm": 0.3049190938472748, "learning_rate": 0.00016463357808601072, "loss": 11.6558, "step": 39620 }, { "epoch": 0.8293770409444863, "grad_norm": 0.3985920548439026, "learning_rate": 0.00016463190505143342, "loss": 11.6803, "step": 39621 }, { "epoch": 0.8293979737084485, "grad_norm": 0.30550557374954224, "learning_rate": 0.00016463023198578632, "loss": 11.6816, "step": 39622 }, { "epoch": 0.8294189064724106, "grad_norm": 0.3560972511768341, "learning_rate": 0.00016462855888907014, "loss": 11.6808, "step": 39623 }, { "epoch": 0.8294398392363728, "grad_norm": 0.2833670973777771, "learning_rate": 0.0001646268857612857, "loss": 11.6701, "step": 39624 }, { "epoch": 0.829460772000335, "grad_norm": 0.2929566502571106, "learning_rate": 0.00016462521260243384, "loss": 11.6702, "step": 39625 }, { "epoch": 0.8294817047642971, "grad_norm": 0.3100277781486511, "learning_rate": 0.00016462353941251533, "loss": 11.6872, "step": 39626 }, { "epoch": 0.8295026375282593, "grad_norm": 0.2781696915626526, "learning_rate": 0.00016462186619153104, "loss": 11.6594, "step": 39627 }, { "epoch": 0.8295235702922213, "grad_norm": 0.35152819752693176, "learning_rate": 0.00016462019293948167, "loss": 11.6731, "step": 39628 }, { "epoch": 0.8295445030561835, "grad_norm": 0.30387866497039795, "learning_rate": 0.0001646185196563681, "loss": 11.6593, "step": 39629 }, { "epoch": 0.8295654358201457, "grad_norm": 0.32535165548324585, "learning_rate": 0.0001646168463421911, "loss": 11.6769, "step": 39630 }, { "epoch": 0.8295863685841078, "grad_norm": 0.2682834267616272, "learning_rate": 0.0001646151729969515, "loss": 11.6691, "step": 39631 }, { "epoch": 0.82960730134807, "grad_norm": 0.34465491771698, "learning_rate": 0.00016461349962065009, "loss": 11.6813, "step": 39632 }, { "epoch": 0.8296282341120321, "grad_norm": 0.5252966284751892, "learning_rate": 0.00016461182621328767, "loss": 11.6573, "step": 39633 }, { "epoch": 0.8296491668759943, "grad_norm": 0.3300120234489441, "learning_rate": 0.00016461015277486504, "loss": 11.6627, "step": 39634 }, { "epoch": 0.8296700996399564, "grad_norm": 0.32626453042030334, "learning_rate": 0.00016460847930538304, "loss": 11.6614, "step": 39635 }, { "epoch": 0.8296910324039186, "grad_norm": 0.302282452583313, "learning_rate": 0.00016460680580484245, "loss": 11.6702, "step": 39636 }, { "epoch": 0.8297119651678808, "grad_norm": 0.26428884267807007, "learning_rate": 0.00016460513227324402, "loss": 11.68, "step": 39637 }, { "epoch": 0.8297328979318429, "grad_norm": 1.250330924987793, "learning_rate": 0.00016460345871058867, "loss": 11.6812, "step": 39638 }, { "epoch": 0.8297538306958051, "grad_norm": 0.3208487033843994, "learning_rate": 0.00016460178511687712, "loss": 11.6808, "step": 39639 }, { "epoch": 0.8297747634597672, "grad_norm": 0.35247713327407837, "learning_rate": 0.0001646001114921102, "loss": 11.6817, "step": 39640 }, { "epoch": 0.8297956962237294, "grad_norm": 0.3196355700492859, "learning_rate": 0.00016459843783628872, "loss": 11.6605, "step": 39641 }, { "epoch": 0.8298166289876915, "grad_norm": 0.33823162317276, "learning_rate": 0.00016459676414941346, "loss": 11.6727, "step": 39642 }, { "epoch": 0.8298375617516537, "grad_norm": 0.3356427848339081, "learning_rate": 0.00016459509043148524, "loss": 11.684, "step": 39643 }, { "epoch": 0.8298584945156159, "grad_norm": 0.3458981513977051, "learning_rate": 0.0001645934166825049, "loss": 11.6715, "step": 39644 }, { "epoch": 0.829879427279578, "grad_norm": 0.3513336777687073, "learning_rate": 0.00016459174290247316, "loss": 11.6864, "step": 39645 }, { "epoch": 0.8299003600435402, "grad_norm": 0.2882665693759918, "learning_rate": 0.00016459006909139091, "loss": 11.6676, "step": 39646 }, { "epoch": 0.8299212928075023, "grad_norm": 0.38567057251930237, "learning_rate": 0.0001645883952492589, "loss": 11.6724, "step": 39647 }, { "epoch": 0.8299422255714645, "grad_norm": 0.2885335385799408, "learning_rate": 0.00016458672137607794, "loss": 11.6783, "step": 39648 }, { "epoch": 0.8299631583354267, "grad_norm": 0.26055434346199036, "learning_rate": 0.00016458504747184888, "loss": 11.6727, "step": 39649 }, { "epoch": 0.8299840910993888, "grad_norm": 0.3047668933868408, "learning_rate": 0.0001645833735365725, "loss": 11.6584, "step": 39650 }, { "epoch": 0.830005023863351, "grad_norm": 0.2221127301454544, "learning_rate": 0.0001645816995702496, "loss": 11.6633, "step": 39651 }, { "epoch": 0.830025956627313, "grad_norm": 0.27655234932899475, "learning_rate": 0.00016458002557288094, "loss": 11.6708, "step": 39652 }, { "epoch": 0.8300468893912752, "grad_norm": 0.30858293175697327, "learning_rate": 0.0001645783515444674, "loss": 11.6718, "step": 39653 }, { "epoch": 0.8300678221552373, "grad_norm": 0.3119736611843109, "learning_rate": 0.00016457667748500976, "loss": 11.6869, "step": 39654 }, { "epoch": 0.8300887549191995, "grad_norm": 0.25249481201171875, "learning_rate": 0.00016457500339450877, "loss": 11.6541, "step": 39655 }, { "epoch": 0.8301096876831617, "grad_norm": 0.2537461817264557, "learning_rate": 0.00016457332927296534, "loss": 11.6698, "step": 39656 }, { "epoch": 0.8301306204471238, "grad_norm": 0.30836206674575806, "learning_rate": 0.00016457165512038017, "loss": 11.689, "step": 39657 }, { "epoch": 0.830151553211086, "grad_norm": 0.36426374316215515, "learning_rate": 0.00016456998093675415, "loss": 11.6665, "step": 39658 }, { "epoch": 0.8301724859750481, "grad_norm": 0.32792267203330994, "learning_rate": 0.000164568306722088, "loss": 11.6622, "step": 39659 }, { "epoch": 0.8301934187390103, "grad_norm": 0.2673652768135071, "learning_rate": 0.0001645666324763826, "loss": 11.6626, "step": 39660 }, { "epoch": 0.8302143515029724, "grad_norm": 0.37906619906425476, "learning_rate": 0.00016456495819963874, "loss": 11.6833, "step": 39661 }, { "epoch": 0.8302352842669346, "grad_norm": 0.26199790835380554, "learning_rate": 0.0001645632838918572, "loss": 11.6542, "step": 39662 }, { "epoch": 0.8302562170308968, "grad_norm": 0.35526612401008606, "learning_rate": 0.00016456160955303882, "loss": 11.69, "step": 39663 }, { "epoch": 0.8302771497948589, "grad_norm": 0.2568477392196655, "learning_rate": 0.0001645599351831843, "loss": 11.6714, "step": 39664 }, { "epoch": 0.8302980825588211, "grad_norm": 0.27090442180633545, "learning_rate": 0.00016455826078229458, "loss": 11.6642, "step": 39665 }, { "epoch": 0.8303190153227832, "grad_norm": 0.24306927621364594, "learning_rate": 0.0001645565863503704, "loss": 11.6844, "step": 39666 }, { "epoch": 0.8303399480867454, "grad_norm": 0.32876431941986084, "learning_rate": 0.0001645549118874126, "loss": 11.6748, "step": 39667 }, { "epoch": 0.8303608808507075, "grad_norm": 0.29394790530204773, "learning_rate": 0.00016455323739342193, "loss": 11.6494, "step": 39668 }, { "epoch": 0.8303818136146697, "grad_norm": 0.39628201723098755, "learning_rate": 0.00016455156286839925, "loss": 11.6763, "step": 39669 }, { "epoch": 0.8304027463786319, "grad_norm": 0.24463802576065063, "learning_rate": 0.0001645498883123453, "loss": 11.6746, "step": 39670 }, { "epoch": 0.830423679142594, "grad_norm": 0.29654088616371155, "learning_rate": 0.00016454821372526097, "loss": 11.6849, "step": 39671 }, { "epoch": 0.8304446119065562, "grad_norm": 0.29253560304641724, "learning_rate": 0.000164546539107147, "loss": 11.6605, "step": 39672 }, { "epoch": 0.8304655446705183, "grad_norm": 0.27195653319358826, "learning_rate": 0.00016454486445800423, "loss": 11.6532, "step": 39673 }, { "epoch": 0.8304864774344805, "grad_norm": 0.37643298506736755, "learning_rate": 0.00016454318977783344, "loss": 11.6633, "step": 39674 }, { "epoch": 0.8305074101984427, "grad_norm": 0.2958337664604187, "learning_rate": 0.00016454151506663546, "loss": 11.6789, "step": 39675 }, { "epoch": 0.8305283429624047, "grad_norm": 0.29621025919914246, "learning_rate": 0.00016453984032441103, "loss": 11.6647, "step": 39676 }, { "epoch": 0.8305492757263669, "grad_norm": 0.25335219502449036, "learning_rate": 0.00016453816555116104, "loss": 11.6663, "step": 39677 }, { "epoch": 0.830570208490329, "grad_norm": 0.30028462409973145, "learning_rate": 0.00016453649074688625, "loss": 11.6798, "step": 39678 }, { "epoch": 0.8305911412542912, "grad_norm": 0.3309844136238098, "learning_rate": 0.0001645348159115875, "loss": 11.6689, "step": 39679 }, { "epoch": 0.8306120740182533, "grad_norm": 0.4914068281650543, "learning_rate": 0.00016453314104526555, "loss": 11.6548, "step": 39680 }, { "epoch": 0.8306330067822155, "grad_norm": 0.26772668957710266, "learning_rate": 0.00016453146614792122, "loss": 11.6815, "step": 39681 }, { "epoch": 0.8306539395461777, "grad_norm": 0.3220071494579315, "learning_rate": 0.00016452979121955535, "loss": 11.6857, "step": 39682 }, { "epoch": 0.8306748723101398, "grad_norm": 0.3337877690792084, "learning_rate": 0.0001645281162601687, "loss": 11.6596, "step": 39683 }, { "epoch": 0.830695805074102, "grad_norm": 0.3363964855670929, "learning_rate": 0.0001645264412697621, "loss": 11.6569, "step": 39684 }, { "epoch": 0.8307167378380641, "grad_norm": 0.3213743567466736, "learning_rate": 0.00016452476624833633, "loss": 11.6695, "step": 39685 }, { "epoch": 0.8307376706020263, "grad_norm": 0.3177211582660675, "learning_rate": 0.00016452309119589222, "loss": 11.6722, "step": 39686 }, { "epoch": 0.8307586033659884, "grad_norm": 0.249663844704628, "learning_rate": 0.00016452141611243058, "loss": 11.6839, "step": 39687 }, { "epoch": 0.8307795361299506, "grad_norm": 0.2752735912799835, "learning_rate": 0.00016451974099795217, "loss": 11.6495, "step": 39688 }, { "epoch": 0.8308004688939128, "grad_norm": 0.28081220388412476, "learning_rate": 0.00016451806585245784, "loss": 11.6499, "step": 39689 }, { "epoch": 0.8308214016578749, "grad_norm": 0.335414320230484, "learning_rate": 0.0001645163906759484, "loss": 11.6817, "step": 39690 }, { "epoch": 0.8308423344218371, "grad_norm": 0.32680457830429077, "learning_rate": 0.0001645147154684246, "loss": 11.6518, "step": 39691 }, { "epoch": 0.8308632671857992, "grad_norm": 0.3989526033401489, "learning_rate": 0.00016451304022988735, "loss": 11.6679, "step": 39692 }, { "epoch": 0.8308841999497614, "grad_norm": 0.25185492634773254, "learning_rate": 0.0001645113649603373, "loss": 11.6622, "step": 39693 }, { "epoch": 0.8309051327137236, "grad_norm": 0.27458351850509644, "learning_rate": 0.0001645096896597754, "loss": 11.6735, "step": 39694 }, { "epoch": 0.8309260654776857, "grad_norm": 0.4207455813884735, "learning_rate": 0.00016450801432820242, "loss": 11.6826, "step": 39695 }, { "epoch": 0.8309469982416479, "grad_norm": 0.3405736982822418, "learning_rate": 0.0001645063389656191, "loss": 11.6536, "step": 39696 }, { "epoch": 0.83096793100561, "grad_norm": 0.33992651104927063, "learning_rate": 0.0001645046635720263, "loss": 11.6817, "step": 39697 }, { "epoch": 0.8309888637695722, "grad_norm": 0.29482313990592957, "learning_rate": 0.00016450298814742485, "loss": 11.6825, "step": 39698 }, { "epoch": 0.8310097965335342, "grad_norm": 0.24101358652114868, "learning_rate": 0.00016450131269181548, "loss": 11.6472, "step": 39699 }, { "epoch": 0.8310307292974964, "grad_norm": 0.2989726662635803, "learning_rate": 0.00016449963720519906, "loss": 11.6636, "step": 39700 }, { "epoch": 0.8310516620614586, "grad_norm": 0.24364694952964783, "learning_rate": 0.00016449796168757633, "loss": 11.6836, "step": 39701 }, { "epoch": 0.8310725948254207, "grad_norm": 0.35159075260162354, "learning_rate": 0.00016449628613894818, "loss": 11.6679, "step": 39702 }, { "epoch": 0.8310935275893829, "grad_norm": 0.2783138155937195, "learning_rate": 0.00016449461055931537, "loss": 11.662, "step": 39703 }, { "epoch": 0.831114460353345, "grad_norm": 0.2503949999809265, "learning_rate": 0.0001644929349486787, "loss": 11.6727, "step": 39704 }, { "epoch": 0.8311353931173072, "grad_norm": 0.33081769943237305, "learning_rate": 0.00016449125930703896, "loss": 11.6934, "step": 39705 }, { "epoch": 0.8311563258812693, "grad_norm": 0.37092325091362, "learning_rate": 0.00016448958363439703, "loss": 11.6509, "step": 39706 }, { "epoch": 0.8311772586452315, "grad_norm": 0.30433389544487, "learning_rate": 0.0001644879079307536, "loss": 11.679, "step": 39707 }, { "epoch": 0.8311981914091937, "grad_norm": 0.30809563398361206, "learning_rate": 0.0001644862321961096, "loss": 11.6754, "step": 39708 }, { "epoch": 0.8312191241731558, "grad_norm": 0.2723316550254822, "learning_rate": 0.00016448455643046573, "loss": 11.6739, "step": 39709 }, { "epoch": 0.831240056937118, "grad_norm": 0.33401650190353394, "learning_rate": 0.0001644828806338229, "loss": 11.6516, "step": 39710 }, { "epoch": 0.8312609897010801, "grad_norm": 0.31042325496673584, "learning_rate": 0.00016448120480618182, "loss": 11.685, "step": 39711 }, { "epoch": 0.8312819224650423, "grad_norm": 0.2513679265975952, "learning_rate": 0.00016447952894754333, "loss": 11.6537, "step": 39712 }, { "epoch": 0.8313028552290045, "grad_norm": 0.3623749315738678, "learning_rate": 0.00016447785305790822, "loss": 11.679, "step": 39713 }, { "epoch": 0.8313237879929666, "grad_norm": 0.28338897228240967, "learning_rate": 0.00016447617713727736, "loss": 11.6712, "step": 39714 }, { "epoch": 0.8313447207569288, "grad_norm": 0.28638970851898193, "learning_rate": 0.00016447450118565148, "loss": 11.6589, "step": 39715 }, { "epoch": 0.8313656535208909, "grad_norm": 0.29762324690818787, "learning_rate": 0.00016447282520303143, "loss": 11.6696, "step": 39716 }, { "epoch": 0.8313865862848531, "grad_norm": 0.2950426936149597, "learning_rate": 0.00016447114918941797, "loss": 11.6679, "step": 39717 }, { "epoch": 0.8314075190488152, "grad_norm": 0.25796154141426086, "learning_rate": 0.00016446947314481195, "loss": 11.6719, "step": 39718 }, { "epoch": 0.8314284518127774, "grad_norm": 0.3610004782676697, "learning_rate": 0.0001644677970692142, "loss": 11.6756, "step": 39719 }, { "epoch": 0.8314493845767396, "grad_norm": 0.2656373679637909, "learning_rate": 0.00016446612096262544, "loss": 11.6653, "step": 39720 }, { "epoch": 0.8314703173407016, "grad_norm": 0.3581995666027069, "learning_rate": 0.00016446444482504652, "loss": 11.6701, "step": 39721 }, { "epoch": 0.8314912501046638, "grad_norm": 0.41044026613235474, "learning_rate": 0.00016446276865647832, "loss": 11.6588, "step": 39722 }, { "epoch": 0.8315121828686259, "grad_norm": 0.30064302682876587, "learning_rate": 0.0001644610924569215, "loss": 11.673, "step": 39723 }, { "epoch": 0.8315331156325881, "grad_norm": 0.35609859228134155, "learning_rate": 0.00016445941622637698, "loss": 11.6633, "step": 39724 }, { "epoch": 0.8315540483965502, "grad_norm": 0.36919134855270386, "learning_rate": 0.00016445773996484552, "loss": 11.6594, "step": 39725 }, { "epoch": 0.8315749811605124, "grad_norm": 0.2694510817527771, "learning_rate": 0.0001644560636723279, "loss": 11.6546, "step": 39726 }, { "epoch": 0.8315959139244746, "grad_norm": 0.37995392084121704, "learning_rate": 0.000164454387348825, "loss": 11.6807, "step": 39727 }, { "epoch": 0.8316168466884367, "grad_norm": 0.3621516823768616, "learning_rate": 0.00016445271099433756, "loss": 11.6643, "step": 39728 }, { "epoch": 0.8316377794523989, "grad_norm": 0.24711935222148895, "learning_rate": 0.00016445103460886645, "loss": 11.6556, "step": 39729 }, { "epoch": 0.831658712216361, "grad_norm": 0.3273555636405945, "learning_rate": 0.0001644493581924124, "loss": 11.6477, "step": 39730 }, { "epoch": 0.8316796449803232, "grad_norm": 0.26997110247612, "learning_rate": 0.00016444768174497622, "loss": 11.6561, "step": 39731 }, { "epoch": 0.8317005777442854, "grad_norm": 0.28114113211631775, "learning_rate": 0.0001644460052665588, "loss": 11.664, "step": 39732 }, { "epoch": 0.8317215105082475, "grad_norm": 0.3085523247718811, "learning_rate": 0.00016444432875716087, "loss": 11.6687, "step": 39733 }, { "epoch": 0.8317424432722097, "grad_norm": 0.37517407536506653, "learning_rate": 0.0001644426522167833, "loss": 11.6622, "step": 39734 }, { "epoch": 0.8317633760361718, "grad_norm": 0.3268377482891083, "learning_rate": 0.00016444097564542683, "loss": 11.6673, "step": 39735 }, { "epoch": 0.831784308800134, "grad_norm": 0.26307857036590576, "learning_rate": 0.00016443929904309227, "loss": 11.6781, "step": 39736 }, { "epoch": 0.8318052415640961, "grad_norm": 0.28760695457458496, "learning_rate": 0.00016443762240978048, "loss": 11.6614, "step": 39737 }, { "epoch": 0.8318261743280583, "grad_norm": 0.2535071074962616, "learning_rate": 0.00016443594574549222, "loss": 11.6586, "step": 39738 }, { "epoch": 0.8318471070920205, "grad_norm": 0.44668686389923096, "learning_rate": 0.0001644342690502283, "loss": 11.6877, "step": 39739 }, { "epoch": 0.8318680398559826, "grad_norm": 0.3122086524963379, "learning_rate": 0.00016443259232398953, "loss": 11.6773, "step": 39740 }, { "epoch": 0.8318889726199448, "grad_norm": 0.24740780889987946, "learning_rate": 0.00016443091556677674, "loss": 11.6567, "step": 39741 }, { "epoch": 0.8319099053839069, "grad_norm": 0.23556596040725708, "learning_rate": 0.0001644292387785907, "loss": 11.6646, "step": 39742 }, { "epoch": 0.8319308381478691, "grad_norm": 0.25719401240348816, "learning_rate": 0.00016442756195943225, "loss": 11.6762, "step": 39743 }, { "epoch": 0.8319517709118311, "grad_norm": 0.392684668302536, "learning_rate": 0.0001644258851093022, "loss": 11.6694, "step": 39744 }, { "epoch": 0.8319727036757933, "grad_norm": 0.23363304138183594, "learning_rate": 0.0001644242082282013, "loss": 11.6713, "step": 39745 }, { "epoch": 0.8319936364397555, "grad_norm": 0.2555144131183624, "learning_rate": 0.00016442253131613042, "loss": 11.6705, "step": 39746 }, { "epoch": 0.8320145692037176, "grad_norm": 0.256961852312088, "learning_rate": 0.0001644208543730903, "loss": 11.6895, "step": 39747 }, { "epoch": 0.8320355019676798, "grad_norm": 0.2990618348121643, "learning_rate": 0.0001644191773990818, "loss": 11.6761, "step": 39748 }, { "epoch": 0.8320564347316419, "grad_norm": 0.3010408282279968, "learning_rate": 0.00016441750039410572, "loss": 11.6676, "step": 39749 }, { "epoch": 0.8320773674956041, "grad_norm": 0.311273992061615, "learning_rate": 0.00016441582335816285, "loss": 11.6906, "step": 39750 }, { "epoch": 0.8320983002595663, "grad_norm": 0.3537743389606476, "learning_rate": 0.00016441414629125403, "loss": 11.6814, "step": 39751 }, { "epoch": 0.8321192330235284, "grad_norm": 0.22376500070095062, "learning_rate": 0.00016441246919338, "loss": 11.6693, "step": 39752 }, { "epoch": 0.8321401657874906, "grad_norm": 0.2967754602432251, "learning_rate": 0.0001644107920645416, "loss": 11.6794, "step": 39753 }, { "epoch": 0.8321610985514527, "grad_norm": 0.31380459666252136, "learning_rate": 0.00016440911490473966, "loss": 11.6733, "step": 39754 }, { "epoch": 0.8321820313154149, "grad_norm": 0.34708964824676514, "learning_rate": 0.000164407437713975, "loss": 11.6663, "step": 39755 }, { "epoch": 0.832202964079377, "grad_norm": 0.311396062374115, "learning_rate": 0.00016440576049224837, "loss": 11.6589, "step": 39756 }, { "epoch": 0.8322238968433392, "grad_norm": 0.29458728432655334, "learning_rate": 0.00016440408323956058, "loss": 11.6882, "step": 39757 }, { "epoch": 0.8322448296073014, "grad_norm": 0.29900360107421875, "learning_rate": 0.00016440240595591246, "loss": 11.678, "step": 39758 }, { "epoch": 0.8322657623712635, "grad_norm": 0.3244853913784027, "learning_rate": 0.00016440072864130483, "loss": 11.6657, "step": 39759 }, { "epoch": 0.8322866951352257, "grad_norm": 0.32227402925491333, "learning_rate": 0.00016439905129573846, "loss": 11.6601, "step": 39760 }, { "epoch": 0.8323076278991878, "grad_norm": 0.6159155368804932, "learning_rate": 0.00016439737391921421, "loss": 11.6689, "step": 39761 }, { "epoch": 0.83232856066315, "grad_norm": 0.29799383878707886, "learning_rate": 0.0001643956965117328, "loss": 11.6634, "step": 39762 }, { "epoch": 0.8323494934271121, "grad_norm": 0.3249427080154419, "learning_rate": 0.00016439401907329513, "loss": 11.6606, "step": 39763 }, { "epoch": 0.8323704261910743, "grad_norm": 0.29070743918418884, "learning_rate": 0.00016439234160390195, "loss": 11.6833, "step": 39764 }, { "epoch": 0.8323913589550365, "grad_norm": 0.3931485116481781, "learning_rate": 0.0001643906641035541, "loss": 11.6702, "step": 39765 }, { "epoch": 0.8324122917189986, "grad_norm": 0.2755717933177948, "learning_rate": 0.00016438898657225234, "loss": 11.6511, "step": 39766 }, { "epoch": 0.8324332244829608, "grad_norm": 0.46082600951194763, "learning_rate": 0.00016438730900999752, "loss": 11.6803, "step": 39767 }, { "epoch": 0.8324541572469228, "grad_norm": 0.29588162899017334, "learning_rate": 0.00016438563141679043, "loss": 11.6429, "step": 39768 }, { "epoch": 0.832475090010885, "grad_norm": 0.32015278935432434, "learning_rate": 0.00016438395379263186, "loss": 11.6667, "step": 39769 }, { "epoch": 0.8324960227748472, "grad_norm": 0.33992820978164673, "learning_rate": 0.00016438227613752265, "loss": 11.6757, "step": 39770 }, { "epoch": 0.8325169555388093, "grad_norm": 0.2439434677362442, "learning_rate": 0.0001643805984514636, "loss": 11.6629, "step": 39771 }, { "epoch": 0.8325378883027715, "grad_norm": 0.31564339995384216, "learning_rate": 0.00016437892073445547, "loss": 11.6758, "step": 39772 }, { "epoch": 0.8325588210667336, "grad_norm": 0.34612977504730225, "learning_rate": 0.00016437724298649911, "loss": 11.6662, "step": 39773 }, { "epoch": 0.8325797538306958, "grad_norm": 0.3882790505886078, "learning_rate": 0.00016437556520759535, "loss": 11.6607, "step": 39774 }, { "epoch": 0.8326006865946579, "grad_norm": 0.38128119707107544, "learning_rate": 0.00016437388739774493, "loss": 11.6612, "step": 39775 }, { "epoch": 0.8326216193586201, "grad_norm": 0.2747221887111664, "learning_rate": 0.00016437220955694873, "loss": 11.6675, "step": 39776 }, { "epoch": 0.8326425521225823, "grad_norm": 0.24044080078601837, "learning_rate": 0.0001643705316852075, "loss": 11.6488, "step": 39777 }, { "epoch": 0.8326634848865444, "grad_norm": 0.3210832476615906, "learning_rate": 0.00016436885378252205, "loss": 11.6653, "step": 39778 }, { "epoch": 0.8326844176505066, "grad_norm": 0.30507227778434753, "learning_rate": 0.0001643671758488932, "loss": 11.6903, "step": 39779 }, { "epoch": 0.8327053504144687, "grad_norm": 0.30816319584846497, "learning_rate": 0.00016436549788432177, "loss": 11.6689, "step": 39780 }, { "epoch": 0.8327262831784309, "grad_norm": 0.2953324019908905, "learning_rate": 0.0001643638198888086, "loss": 11.6657, "step": 39781 }, { "epoch": 0.832747215942393, "grad_norm": 0.28067731857299805, "learning_rate": 0.0001643621418623544, "loss": 11.6534, "step": 39782 }, { "epoch": 0.8327681487063552, "grad_norm": 0.40972572565078735, "learning_rate": 0.00016436046380496005, "loss": 11.6895, "step": 39783 }, { "epoch": 0.8327890814703174, "grad_norm": 0.29812297224998474, "learning_rate": 0.0001643587857166263, "loss": 11.6641, "step": 39784 }, { "epoch": 0.8328100142342795, "grad_norm": 0.29544317722320557, "learning_rate": 0.000164357107597354, "loss": 11.6707, "step": 39785 }, { "epoch": 0.8328309469982417, "grad_norm": 0.26654675602912903, "learning_rate": 0.000164355429447144, "loss": 11.6601, "step": 39786 }, { "epoch": 0.8328518797622038, "grad_norm": 0.46869999170303345, "learning_rate": 0.000164353751265997, "loss": 11.6819, "step": 39787 }, { "epoch": 0.832872812526166, "grad_norm": 0.3336676359176636, "learning_rate": 0.00016435207305391393, "loss": 11.6715, "step": 39788 }, { "epoch": 0.8328937452901282, "grad_norm": 0.3011229932308197, "learning_rate": 0.00016435039481089544, "loss": 11.6723, "step": 39789 }, { "epoch": 0.8329146780540903, "grad_norm": 0.377687007188797, "learning_rate": 0.00016434871653694247, "loss": 11.6667, "step": 39790 }, { "epoch": 0.8329356108180525, "grad_norm": 0.3133946657180786, "learning_rate": 0.0001643470382320558, "loss": 11.6676, "step": 39791 }, { "epoch": 0.8329565435820145, "grad_norm": 0.31148144602775574, "learning_rate": 0.00016434535989623617, "loss": 11.6703, "step": 39792 }, { "epoch": 0.8329774763459767, "grad_norm": 0.36068442463874817, "learning_rate": 0.0001643436815294845, "loss": 11.6638, "step": 39793 }, { "epoch": 0.8329984091099388, "grad_norm": 0.30908676981925964, "learning_rate": 0.00016434200313180148, "loss": 11.6801, "step": 39794 }, { "epoch": 0.833019341873901, "grad_norm": 0.3222334086894989, "learning_rate": 0.00016434032470318802, "loss": 11.6774, "step": 39795 }, { "epoch": 0.8330402746378632, "grad_norm": 0.3245813250541687, "learning_rate": 0.00016433864624364484, "loss": 11.6757, "step": 39796 }, { "epoch": 0.8330612074018253, "grad_norm": 0.31543976068496704, "learning_rate": 0.00016433696775317274, "loss": 11.6546, "step": 39797 }, { "epoch": 0.8330821401657875, "grad_norm": 0.29491645097732544, "learning_rate": 0.00016433528923177266, "loss": 11.6692, "step": 39798 }, { "epoch": 0.8331030729297496, "grad_norm": 0.32406535744667053, "learning_rate": 0.00016433361067944526, "loss": 11.6578, "step": 39799 }, { "epoch": 0.8331240056937118, "grad_norm": 0.2697191834449768, "learning_rate": 0.00016433193209619142, "loss": 11.669, "step": 39800 }, { "epoch": 0.8331449384576739, "grad_norm": 0.2792038321495056, "learning_rate": 0.00016433025348201193, "loss": 11.6594, "step": 39801 }, { "epoch": 0.8331658712216361, "grad_norm": 0.2520598769187927, "learning_rate": 0.0001643285748369076, "loss": 11.6494, "step": 39802 }, { "epoch": 0.8331868039855983, "grad_norm": 0.2878229320049286, "learning_rate": 0.00016432689616087924, "loss": 11.6622, "step": 39803 }, { "epoch": 0.8332077367495604, "grad_norm": 0.2625914216041565, "learning_rate": 0.00016432521745392764, "loss": 11.6615, "step": 39804 }, { "epoch": 0.8332286695135226, "grad_norm": 0.33386698365211487, "learning_rate": 0.00016432353871605364, "loss": 11.6651, "step": 39805 }, { "epoch": 0.8332496022774847, "grad_norm": 0.28312718868255615, "learning_rate": 0.000164321859947258, "loss": 11.6779, "step": 39806 }, { "epoch": 0.8332705350414469, "grad_norm": 0.33126574754714966, "learning_rate": 0.00016432018114754155, "loss": 11.6679, "step": 39807 }, { "epoch": 0.8332914678054091, "grad_norm": 0.26073381304740906, "learning_rate": 0.0001643185023169051, "loss": 11.6586, "step": 39808 }, { "epoch": 0.8333124005693712, "grad_norm": 0.36845630407333374, "learning_rate": 0.0001643168234553495, "loss": 11.6653, "step": 39809 }, { "epoch": 0.8333333333333334, "grad_norm": 0.27333152294158936, "learning_rate": 0.00016431514456287548, "loss": 11.6545, "step": 39810 }, { "epoch": 0.8333542660972955, "grad_norm": 0.339135080575943, "learning_rate": 0.0001643134656394839, "loss": 11.6724, "step": 39811 }, { "epoch": 0.8333751988612577, "grad_norm": 0.2848415672779083, "learning_rate": 0.00016431178668517552, "loss": 11.6553, "step": 39812 }, { "epoch": 0.8333961316252197, "grad_norm": 0.231832355260849, "learning_rate": 0.00016431010769995117, "loss": 11.6732, "step": 39813 }, { "epoch": 0.833417064389182, "grad_norm": 0.32170337438583374, "learning_rate": 0.0001643084286838117, "loss": 11.6718, "step": 39814 }, { "epoch": 0.8334379971531441, "grad_norm": 0.298118531703949, "learning_rate": 0.00016430674963675786, "loss": 11.6761, "step": 39815 }, { "epoch": 0.8334589299171062, "grad_norm": 0.3352767825126648, "learning_rate": 0.00016430507055879048, "loss": 11.6685, "step": 39816 }, { "epoch": 0.8334798626810684, "grad_norm": 0.3188370168209076, "learning_rate": 0.00016430339144991036, "loss": 11.6735, "step": 39817 }, { "epoch": 0.8335007954450305, "grad_norm": 0.337703675031662, "learning_rate": 0.0001643017123101183, "loss": 11.6602, "step": 39818 }, { "epoch": 0.8335217282089927, "grad_norm": 0.2695256769657135, "learning_rate": 0.00016430003313941513, "loss": 11.6738, "step": 39819 }, { "epoch": 0.8335426609729548, "grad_norm": 0.29922303557395935, "learning_rate": 0.00016429835393780165, "loss": 11.678, "step": 39820 }, { "epoch": 0.833563593736917, "grad_norm": 0.264658659696579, "learning_rate": 0.00016429667470527868, "loss": 11.6646, "step": 39821 }, { "epoch": 0.8335845265008792, "grad_norm": 0.45078715682029724, "learning_rate": 0.00016429499544184698, "loss": 11.6601, "step": 39822 }, { "epoch": 0.8336054592648413, "grad_norm": 0.3092343211174011, "learning_rate": 0.0001642933161475074, "loss": 11.678, "step": 39823 }, { "epoch": 0.8336263920288035, "grad_norm": 0.33879554271698, "learning_rate": 0.00016429163682226073, "loss": 11.6726, "step": 39824 }, { "epoch": 0.8336473247927656, "grad_norm": 0.2689576745033264, "learning_rate": 0.00016428995746610778, "loss": 11.6756, "step": 39825 }, { "epoch": 0.8336682575567278, "grad_norm": 0.38249415159225464, "learning_rate": 0.00016428827807904935, "loss": 11.6752, "step": 39826 }, { "epoch": 0.83368919032069, "grad_norm": 0.28712403774261475, "learning_rate": 0.00016428659866108626, "loss": 11.6787, "step": 39827 }, { "epoch": 0.8337101230846521, "grad_norm": 0.2763538658618927, "learning_rate": 0.00016428491921221934, "loss": 11.6529, "step": 39828 }, { "epoch": 0.8337310558486143, "grad_norm": 0.2949061691761017, "learning_rate": 0.00016428323973244932, "loss": 11.651, "step": 39829 }, { "epoch": 0.8337519886125764, "grad_norm": 0.30949699878692627, "learning_rate": 0.00016428156022177713, "loss": 11.6814, "step": 39830 }, { "epoch": 0.8337729213765386, "grad_norm": 0.35630616545677185, "learning_rate": 0.00016427988068020346, "loss": 11.6794, "step": 39831 }, { "epoch": 0.8337938541405007, "grad_norm": 0.255604088306427, "learning_rate": 0.00016427820110772914, "loss": 11.6677, "step": 39832 }, { "epoch": 0.8338147869044629, "grad_norm": 0.2304411679506302, "learning_rate": 0.00016427652150435504, "loss": 11.6594, "step": 39833 }, { "epoch": 0.8338357196684251, "grad_norm": 0.2910088896751404, "learning_rate": 0.00016427484187008193, "loss": 11.6746, "step": 39834 }, { "epoch": 0.8338566524323872, "grad_norm": 0.2952984571456909, "learning_rate": 0.0001642731622049106, "loss": 11.6635, "step": 39835 }, { "epoch": 0.8338775851963494, "grad_norm": 0.33302924036979675, "learning_rate": 0.00016427148250884187, "loss": 11.6722, "step": 39836 }, { "epoch": 0.8338985179603114, "grad_norm": 0.2709106206893921, "learning_rate": 0.00016426980278187657, "loss": 11.6407, "step": 39837 }, { "epoch": 0.8339194507242736, "grad_norm": 0.3355311453342438, "learning_rate": 0.00016426812302401547, "loss": 11.6768, "step": 39838 }, { "epoch": 0.8339403834882357, "grad_norm": 0.2631605863571167, "learning_rate": 0.0001642664432352594, "loss": 11.6575, "step": 39839 }, { "epoch": 0.8339613162521979, "grad_norm": 0.2815013825893402, "learning_rate": 0.00016426476341560917, "loss": 11.6821, "step": 39840 }, { "epoch": 0.8339822490161601, "grad_norm": 0.3083473742008209, "learning_rate": 0.00016426308356506556, "loss": 11.6761, "step": 39841 }, { "epoch": 0.8340031817801222, "grad_norm": 0.34105709195137024, "learning_rate": 0.0001642614036836294, "loss": 11.6551, "step": 39842 }, { "epoch": 0.8340241145440844, "grad_norm": 0.30834195017814636, "learning_rate": 0.00016425972377130153, "loss": 11.6661, "step": 39843 }, { "epoch": 0.8340450473080465, "grad_norm": 0.23731426894664764, "learning_rate": 0.0001642580438280827, "loss": 11.6659, "step": 39844 }, { "epoch": 0.8340659800720087, "grad_norm": 0.3426615595817566, "learning_rate": 0.00016425636385397375, "loss": 11.6741, "step": 39845 }, { "epoch": 0.8340869128359708, "grad_norm": 0.2943728268146515, "learning_rate": 0.00016425468384897546, "loss": 11.6618, "step": 39846 }, { "epoch": 0.834107845599933, "grad_norm": 0.31502819061279297, "learning_rate": 0.00016425300381308865, "loss": 11.6685, "step": 39847 }, { "epoch": 0.8341287783638952, "grad_norm": 0.32585570216178894, "learning_rate": 0.00016425132374631415, "loss": 11.675, "step": 39848 }, { "epoch": 0.8341497111278573, "grad_norm": 0.3838004767894745, "learning_rate": 0.00016424964364865276, "loss": 11.6743, "step": 39849 }, { "epoch": 0.8341706438918195, "grad_norm": 0.27985435724258423, "learning_rate": 0.00016424796352010524, "loss": 11.6731, "step": 39850 }, { "epoch": 0.8341915766557816, "grad_norm": 0.290461927652359, "learning_rate": 0.00016424628336067246, "loss": 11.6577, "step": 39851 }, { "epoch": 0.8342125094197438, "grad_norm": 0.2850929796695709, "learning_rate": 0.00016424460317035524, "loss": 11.6726, "step": 39852 }, { "epoch": 0.834233442183706, "grad_norm": 0.27478039264678955, "learning_rate": 0.00016424292294915432, "loss": 11.6623, "step": 39853 }, { "epoch": 0.8342543749476681, "grad_norm": 0.25640609860420227, "learning_rate": 0.00016424124269707053, "loss": 11.6856, "step": 39854 }, { "epoch": 0.8342753077116303, "grad_norm": 0.28606948256492615, "learning_rate": 0.0001642395624141047, "loss": 11.6827, "step": 39855 }, { "epoch": 0.8342962404755924, "grad_norm": 0.28106018900871277, "learning_rate": 0.00016423788210025763, "loss": 11.6551, "step": 39856 }, { "epoch": 0.8343171732395546, "grad_norm": 0.2533259689807892, "learning_rate": 0.00016423620175553012, "loss": 11.6581, "step": 39857 }, { "epoch": 0.8343381060035167, "grad_norm": 0.2906017601490021, "learning_rate": 0.00016423452137992296, "loss": 11.6636, "step": 39858 }, { "epoch": 0.8343590387674789, "grad_norm": 0.6317877173423767, "learning_rate": 0.00016423284097343702, "loss": 11.5953, "step": 39859 }, { "epoch": 0.8343799715314411, "grad_norm": 0.30864161252975464, "learning_rate": 0.000164231160536073, "loss": 11.6591, "step": 39860 }, { "epoch": 0.8344009042954031, "grad_norm": 0.2875896990299225, "learning_rate": 0.00016422948006783184, "loss": 11.6844, "step": 39861 }, { "epoch": 0.8344218370593653, "grad_norm": 0.2737908661365509, "learning_rate": 0.00016422779956871424, "loss": 11.6647, "step": 39862 }, { "epoch": 0.8344427698233274, "grad_norm": 0.3058261275291443, "learning_rate": 0.00016422611903872106, "loss": 11.6562, "step": 39863 }, { "epoch": 0.8344637025872896, "grad_norm": 0.30078360438346863, "learning_rate": 0.00016422443847785313, "loss": 11.6634, "step": 39864 }, { "epoch": 0.8344846353512517, "grad_norm": 0.3552423417568207, "learning_rate": 0.00016422275788611116, "loss": 11.681, "step": 39865 }, { "epoch": 0.8345055681152139, "grad_norm": 0.3091481029987335, "learning_rate": 0.0001642210772634961, "loss": 11.67, "step": 39866 }, { "epoch": 0.8345265008791761, "grad_norm": 0.32968828082084656, "learning_rate": 0.0001642193966100086, "loss": 11.6745, "step": 39867 }, { "epoch": 0.8345474336431382, "grad_norm": 0.3353171646595001, "learning_rate": 0.00016421771592564958, "loss": 11.6571, "step": 39868 }, { "epoch": 0.8345683664071004, "grad_norm": 0.28925469517707825, "learning_rate": 0.00016421603521041983, "loss": 11.6665, "step": 39869 }, { "epoch": 0.8345892991710625, "grad_norm": 0.3658929765224457, "learning_rate": 0.00016421435446432016, "loss": 11.67, "step": 39870 }, { "epoch": 0.8346102319350247, "grad_norm": 1.1414958238601685, "learning_rate": 0.00016421267368735135, "loss": 11.6419, "step": 39871 }, { "epoch": 0.8346311646989869, "grad_norm": 0.29456838965415955, "learning_rate": 0.00016421099287951423, "loss": 11.6702, "step": 39872 }, { "epoch": 0.834652097462949, "grad_norm": 0.29481571912765503, "learning_rate": 0.00016420931204080957, "loss": 11.6702, "step": 39873 }, { "epoch": 0.8346730302269112, "grad_norm": 0.3558582365512848, "learning_rate": 0.0001642076311712382, "loss": 11.6687, "step": 39874 }, { "epoch": 0.8346939629908733, "grad_norm": 0.282193124294281, "learning_rate": 0.00016420595027080096, "loss": 11.6783, "step": 39875 }, { "epoch": 0.8347148957548355, "grad_norm": 0.31266114115715027, "learning_rate": 0.00016420426933949864, "loss": 11.6742, "step": 39876 }, { "epoch": 0.8347358285187976, "grad_norm": 0.39202073216438293, "learning_rate": 0.00016420258837733203, "loss": 11.6771, "step": 39877 }, { "epoch": 0.8347567612827598, "grad_norm": 0.28567901253700256, "learning_rate": 0.00016420090738430195, "loss": 11.6523, "step": 39878 }, { "epoch": 0.834777694046722, "grad_norm": 0.2774566113948822, "learning_rate": 0.00016419922636040922, "loss": 11.6679, "step": 39879 }, { "epoch": 0.8347986268106841, "grad_norm": 0.32143527269363403, "learning_rate": 0.00016419754530565462, "loss": 11.6751, "step": 39880 }, { "epoch": 0.8348195595746463, "grad_norm": 0.3254731595516205, "learning_rate": 0.000164195864220039, "loss": 11.6781, "step": 39881 }, { "epoch": 0.8348404923386084, "grad_norm": 0.339250385761261, "learning_rate": 0.0001641941831035631, "loss": 11.6777, "step": 39882 }, { "epoch": 0.8348614251025706, "grad_norm": 0.31438055634498596, "learning_rate": 0.0001641925019562278, "loss": 11.6589, "step": 39883 }, { "epoch": 0.8348823578665326, "grad_norm": 0.2790663242340088, "learning_rate": 0.00016419082077803385, "loss": 11.6639, "step": 39884 }, { "epoch": 0.8349032906304948, "grad_norm": 0.28015097975730896, "learning_rate": 0.0001641891395689821, "loss": 11.6664, "step": 39885 }, { "epoch": 0.834924223394457, "grad_norm": 0.2627134621143341, "learning_rate": 0.00016418745832907336, "loss": 11.6584, "step": 39886 }, { "epoch": 0.8349451561584191, "grad_norm": 0.3472633361816406, "learning_rate": 0.0001641857770583084, "loss": 11.6792, "step": 39887 }, { "epoch": 0.8349660889223813, "grad_norm": 0.32734215259552, "learning_rate": 0.00016418409575668806, "loss": 11.6912, "step": 39888 }, { "epoch": 0.8349870216863434, "grad_norm": 0.2369595170021057, "learning_rate": 0.00016418241442421315, "loss": 11.6723, "step": 39889 }, { "epoch": 0.8350079544503056, "grad_norm": 0.33051469922065735, "learning_rate": 0.00016418073306088445, "loss": 11.653, "step": 39890 }, { "epoch": 0.8350288872142678, "grad_norm": 0.3621835708618164, "learning_rate": 0.00016417905166670276, "loss": 11.6624, "step": 39891 }, { "epoch": 0.8350498199782299, "grad_norm": 0.3318089544773102, "learning_rate": 0.00016417737024166894, "loss": 11.6537, "step": 39892 }, { "epoch": 0.8350707527421921, "grad_norm": 0.32552775740623474, "learning_rate": 0.0001641756887857838, "loss": 11.6817, "step": 39893 }, { "epoch": 0.8350916855061542, "grad_norm": 0.3415771722793579, "learning_rate": 0.0001641740072990481, "loss": 11.6667, "step": 39894 }, { "epoch": 0.8351126182701164, "grad_norm": 0.2543778717517853, "learning_rate": 0.00016417232578146265, "loss": 11.6764, "step": 39895 }, { "epoch": 0.8351335510340785, "grad_norm": 0.3545922338962555, "learning_rate": 0.0001641706442330283, "loss": 11.6649, "step": 39896 }, { "epoch": 0.8351544837980407, "grad_norm": 0.2500554323196411, "learning_rate": 0.00016416896265374582, "loss": 11.6596, "step": 39897 }, { "epoch": 0.8351754165620029, "grad_norm": 0.4137142300605774, "learning_rate": 0.00016416728104361606, "loss": 11.6749, "step": 39898 }, { "epoch": 0.835196349325965, "grad_norm": 0.24148446321487427, "learning_rate": 0.00016416559940263977, "loss": 11.6688, "step": 39899 }, { "epoch": 0.8352172820899272, "grad_norm": 0.30432066321372986, "learning_rate": 0.00016416391773081782, "loss": 11.6642, "step": 39900 }, { "epoch": 0.8352382148538893, "grad_norm": 0.28940731287002563, "learning_rate": 0.00016416223602815094, "loss": 11.6872, "step": 39901 }, { "epoch": 0.8352591476178515, "grad_norm": 0.30698734521865845, "learning_rate": 0.00016416055429464003, "loss": 11.6695, "step": 39902 }, { "epoch": 0.8352800803818136, "grad_norm": 0.2927589416503906, "learning_rate": 0.00016415887253028587, "loss": 11.6596, "step": 39903 }, { "epoch": 0.8353010131457758, "grad_norm": 0.3487957715988159, "learning_rate": 0.00016415719073508922, "loss": 11.6768, "step": 39904 }, { "epoch": 0.835321945909738, "grad_norm": 0.27910545468330383, "learning_rate": 0.00016415550890905092, "loss": 11.6781, "step": 39905 }, { "epoch": 0.8353428786737, "grad_norm": 0.36060234904289246, "learning_rate": 0.00016415382705217178, "loss": 11.6596, "step": 39906 }, { "epoch": 0.8353638114376623, "grad_norm": 0.29373472929000854, "learning_rate": 0.00016415214516445263, "loss": 11.6873, "step": 39907 }, { "epoch": 0.8353847442016243, "grad_norm": 0.35818493366241455, "learning_rate": 0.00016415046324589426, "loss": 11.6701, "step": 39908 }, { "epoch": 0.8354056769655865, "grad_norm": 0.33116960525512695, "learning_rate": 0.00016414878129649747, "loss": 11.6535, "step": 39909 }, { "epoch": 0.8354266097295487, "grad_norm": 0.2628309428691864, "learning_rate": 0.0001641470993162631, "loss": 11.6766, "step": 39910 }, { "epoch": 0.8354475424935108, "grad_norm": 0.33193260431289673, "learning_rate": 0.00016414541730519188, "loss": 11.6746, "step": 39911 }, { "epoch": 0.835468475257473, "grad_norm": 0.31950995326042175, "learning_rate": 0.0001641437352632847, "loss": 11.658, "step": 39912 }, { "epoch": 0.8354894080214351, "grad_norm": 0.2753004729747772, "learning_rate": 0.00016414205319054234, "loss": 11.6706, "step": 39913 }, { "epoch": 0.8355103407853973, "grad_norm": 0.2887028157711029, "learning_rate": 0.0001641403710869656, "loss": 11.6629, "step": 39914 }, { "epoch": 0.8355312735493594, "grad_norm": 0.45464083552360535, "learning_rate": 0.0001641386889525553, "loss": 11.6829, "step": 39915 }, { "epoch": 0.8355522063133216, "grad_norm": 0.29937902092933655, "learning_rate": 0.00016413700678731227, "loss": 11.6626, "step": 39916 }, { "epoch": 0.8355731390772838, "grad_norm": 0.33353373408317566, "learning_rate": 0.00016413532459123728, "loss": 11.6862, "step": 39917 }, { "epoch": 0.8355940718412459, "grad_norm": 0.2993420362472534, "learning_rate": 0.00016413364236433114, "loss": 11.6768, "step": 39918 }, { "epoch": 0.8356150046052081, "grad_norm": 0.31829264760017395, "learning_rate": 0.0001641319601065947, "loss": 11.675, "step": 39919 }, { "epoch": 0.8356359373691702, "grad_norm": 0.33213627338409424, "learning_rate": 0.00016413027781802874, "loss": 11.6638, "step": 39920 }, { "epoch": 0.8356568701331324, "grad_norm": 0.31560543179512024, "learning_rate": 0.00016412859549863406, "loss": 11.6516, "step": 39921 }, { "epoch": 0.8356778028970945, "grad_norm": 0.3454817235469818, "learning_rate": 0.00016412691314841147, "loss": 11.6721, "step": 39922 }, { "epoch": 0.8356987356610567, "grad_norm": 0.2893848121166229, "learning_rate": 0.00016412523076736178, "loss": 11.6608, "step": 39923 }, { "epoch": 0.8357196684250189, "grad_norm": 0.2564536929130554, "learning_rate": 0.00016412354835548584, "loss": 11.6752, "step": 39924 }, { "epoch": 0.835740601188981, "grad_norm": 0.33295169472694397, "learning_rate": 0.0001641218659127844, "loss": 11.677, "step": 39925 }, { "epoch": 0.8357615339529432, "grad_norm": 0.5079537630081177, "learning_rate": 0.00016412018343925832, "loss": 11.6895, "step": 39926 }, { "epoch": 0.8357824667169053, "grad_norm": 0.323600172996521, "learning_rate": 0.00016411850093490837, "loss": 11.6586, "step": 39927 }, { "epoch": 0.8358033994808675, "grad_norm": 0.3064766228199005, "learning_rate": 0.00016411681839973536, "loss": 11.6712, "step": 39928 }, { "epoch": 0.8358243322448297, "grad_norm": 0.24212247133255005, "learning_rate": 0.0001641151358337401, "loss": 11.6663, "step": 39929 }, { "epoch": 0.8358452650087917, "grad_norm": 0.3694489896297455, "learning_rate": 0.00016411345323692343, "loss": 11.6632, "step": 39930 }, { "epoch": 0.835866197772754, "grad_norm": 0.28605520725250244, "learning_rate": 0.00016411177060928614, "loss": 11.6627, "step": 39931 }, { "epoch": 0.835887130536716, "grad_norm": 0.28471264243125916, "learning_rate": 0.00016411008795082905, "loss": 11.6612, "step": 39932 }, { "epoch": 0.8359080633006782, "grad_norm": 0.27584049105644226, "learning_rate": 0.00016410840526155293, "loss": 11.6626, "step": 39933 }, { "epoch": 0.8359289960646403, "grad_norm": 0.2979605495929718, "learning_rate": 0.00016410672254145862, "loss": 11.6565, "step": 39934 }, { "epoch": 0.8359499288286025, "grad_norm": 0.24201609194278717, "learning_rate": 0.00016410503979054692, "loss": 11.6792, "step": 39935 }, { "epoch": 0.8359708615925647, "grad_norm": 0.2898852527141571, "learning_rate": 0.00016410335700881866, "loss": 11.6765, "step": 39936 }, { "epoch": 0.8359917943565268, "grad_norm": 0.29606831073760986, "learning_rate": 0.0001641016741962746, "loss": 11.6599, "step": 39937 }, { "epoch": 0.836012727120489, "grad_norm": 0.28027328848838806, "learning_rate": 0.00016409999135291563, "loss": 11.6809, "step": 39938 }, { "epoch": 0.8360336598844511, "grad_norm": 0.26055747270584106, "learning_rate": 0.00016409830847874247, "loss": 11.6786, "step": 39939 }, { "epoch": 0.8360545926484133, "grad_norm": 0.2922966778278351, "learning_rate": 0.000164096625573756, "loss": 11.6597, "step": 39940 }, { "epoch": 0.8360755254123754, "grad_norm": 0.27683350443840027, "learning_rate": 0.00016409494263795694, "loss": 11.6714, "step": 39941 }, { "epoch": 0.8360964581763376, "grad_norm": 0.28196683526039124, "learning_rate": 0.00016409325967134622, "loss": 11.6817, "step": 39942 }, { "epoch": 0.8361173909402998, "grad_norm": 0.39073115587234497, "learning_rate": 0.00016409157667392457, "loss": 11.6819, "step": 39943 }, { "epoch": 0.8361383237042619, "grad_norm": 0.2545928657054901, "learning_rate": 0.00016408989364569276, "loss": 11.6739, "step": 39944 }, { "epoch": 0.8361592564682241, "grad_norm": 0.38042718172073364, "learning_rate": 0.00016408821058665175, "loss": 11.6855, "step": 39945 }, { "epoch": 0.8361801892321862, "grad_norm": 0.3915984332561493, "learning_rate": 0.00016408652749680215, "loss": 11.6722, "step": 39946 }, { "epoch": 0.8362011219961484, "grad_norm": 0.2596876919269562, "learning_rate": 0.00016408484437614496, "loss": 11.6619, "step": 39947 }, { "epoch": 0.8362220547601106, "grad_norm": 0.37369051575660706, "learning_rate": 0.00016408316122468083, "loss": 11.6912, "step": 39948 }, { "epoch": 0.8362429875240727, "grad_norm": 0.23473799228668213, "learning_rate": 0.0001640814780424107, "loss": 11.6593, "step": 39949 }, { "epoch": 0.8362639202880349, "grad_norm": 0.3950623571872711, "learning_rate": 0.0001640797948293353, "loss": 11.6728, "step": 39950 }, { "epoch": 0.836284853051997, "grad_norm": 0.3595142066478729, "learning_rate": 0.00016407811158545543, "loss": 11.6711, "step": 39951 }, { "epoch": 0.8363057858159592, "grad_norm": 0.2818942666053772, "learning_rate": 0.00016407642831077195, "loss": 11.6645, "step": 39952 }, { "epoch": 0.8363267185799212, "grad_norm": 0.27117058634757996, "learning_rate": 0.00016407474500528565, "loss": 11.6733, "step": 39953 }, { "epoch": 0.8363476513438834, "grad_norm": 0.3446410298347473, "learning_rate": 0.00016407306166899734, "loss": 11.6575, "step": 39954 }, { "epoch": 0.8363685841078456, "grad_norm": 0.3044282793998718, "learning_rate": 0.0001640713783019078, "loss": 11.6628, "step": 39955 }, { "epoch": 0.8363895168718077, "grad_norm": 0.3262965679168701, "learning_rate": 0.00016406969490401788, "loss": 11.68, "step": 39956 }, { "epoch": 0.8364104496357699, "grad_norm": 1.628007411956787, "learning_rate": 0.0001640680114753284, "loss": 11.6604, "step": 39957 }, { "epoch": 0.836431382399732, "grad_norm": 0.2758106589317322, "learning_rate": 0.0001640663280158401, "loss": 11.6785, "step": 39958 }, { "epoch": 0.8364523151636942, "grad_norm": 0.3496786952018738, "learning_rate": 0.00016406464452555387, "loss": 11.6669, "step": 39959 }, { "epoch": 0.8364732479276563, "grad_norm": 0.36412835121154785, "learning_rate": 0.00016406296100447042, "loss": 11.6591, "step": 39960 }, { "epoch": 0.8364941806916185, "grad_norm": 0.6367095708847046, "learning_rate": 0.00016406127745259067, "loss": 11.683, "step": 39961 }, { "epoch": 0.8365151134555807, "grad_norm": 0.30246731638908386, "learning_rate": 0.00016405959386991538, "loss": 11.664, "step": 39962 }, { "epoch": 0.8365360462195428, "grad_norm": 0.27310317754745483, "learning_rate": 0.00016405791025644536, "loss": 11.6646, "step": 39963 }, { "epoch": 0.836556978983505, "grad_norm": 0.25734636187553406, "learning_rate": 0.0001640562266121814, "loss": 11.649, "step": 39964 }, { "epoch": 0.8365779117474671, "grad_norm": 0.2600732445716858, "learning_rate": 0.00016405454293712433, "loss": 11.6796, "step": 39965 }, { "epoch": 0.8365988445114293, "grad_norm": 0.32009583711624146, "learning_rate": 0.00016405285923127498, "loss": 11.6946, "step": 39966 }, { "epoch": 0.8366197772753915, "grad_norm": 0.2962128818035126, "learning_rate": 0.00016405117549463414, "loss": 11.6658, "step": 39967 }, { "epoch": 0.8366407100393536, "grad_norm": 0.3844311535358429, "learning_rate": 0.0001640494917272026, "loss": 11.6761, "step": 39968 }, { "epoch": 0.8366616428033158, "grad_norm": 0.24614454805850983, "learning_rate": 0.0001640478079289812, "loss": 11.6785, "step": 39969 }, { "epoch": 0.8366825755672779, "grad_norm": 0.31026002764701843, "learning_rate": 0.00016404612409997072, "loss": 11.6752, "step": 39970 }, { "epoch": 0.8367035083312401, "grad_norm": 0.3897988498210907, "learning_rate": 0.00016404444024017197, "loss": 11.6848, "step": 39971 }, { "epoch": 0.8367244410952022, "grad_norm": 0.27962473034858704, "learning_rate": 0.00016404275634958579, "loss": 11.6732, "step": 39972 }, { "epoch": 0.8367453738591644, "grad_norm": 0.30845120549201965, "learning_rate": 0.00016404107242821297, "loss": 11.6495, "step": 39973 }, { "epoch": 0.8367663066231266, "grad_norm": 0.26534736156463623, "learning_rate": 0.00016403938847605432, "loss": 11.6829, "step": 39974 }, { "epoch": 0.8367872393870887, "grad_norm": 0.2629487216472626, "learning_rate": 0.00016403770449311065, "loss": 11.6497, "step": 39975 }, { "epoch": 0.8368081721510509, "grad_norm": 0.3129669427871704, "learning_rate": 0.0001640360204793828, "loss": 11.6623, "step": 39976 }, { "epoch": 0.836829104915013, "grad_norm": 0.45284503698349, "learning_rate": 0.00016403433643487155, "loss": 11.6635, "step": 39977 }, { "epoch": 0.8368500376789751, "grad_norm": 0.2924928367137909, "learning_rate": 0.00016403265235957764, "loss": 11.6787, "step": 39978 }, { "epoch": 0.8368709704429372, "grad_norm": 0.4662567973136902, "learning_rate": 0.00016403096825350205, "loss": 11.6599, "step": 39979 }, { "epoch": 0.8368919032068994, "grad_norm": 0.410266637802124, "learning_rate": 0.0001640292841166454, "loss": 11.6673, "step": 39980 }, { "epoch": 0.8369128359708616, "grad_norm": 0.26855364441871643, "learning_rate": 0.00016402759994900868, "loss": 11.674, "step": 39981 }, { "epoch": 0.8369337687348237, "grad_norm": 0.34696006774902344, "learning_rate": 0.00016402591575059253, "loss": 11.6634, "step": 39982 }, { "epoch": 0.8369547014987859, "grad_norm": 0.3070741891860962, "learning_rate": 0.00016402423152139787, "loss": 11.6686, "step": 39983 }, { "epoch": 0.836975634262748, "grad_norm": 0.24890708923339844, "learning_rate": 0.0001640225472614255, "loss": 11.6674, "step": 39984 }, { "epoch": 0.8369965670267102, "grad_norm": 0.34176769852638245, "learning_rate": 0.0001640208629706762, "loss": 11.6856, "step": 39985 }, { "epoch": 0.8370174997906724, "grad_norm": 0.30005428194999695, "learning_rate": 0.00016401917864915076, "loss": 11.6683, "step": 39986 }, { "epoch": 0.8370384325546345, "grad_norm": 0.3698892891407013, "learning_rate": 0.00016401749429685006, "loss": 11.6744, "step": 39987 }, { "epoch": 0.8370593653185967, "grad_norm": 0.3585242033004761, "learning_rate": 0.00016401580991377482, "loss": 11.6714, "step": 39988 }, { "epoch": 0.8370802980825588, "grad_norm": 0.28438809514045715, "learning_rate": 0.00016401412549992594, "loss": 11.6633, "step": 39989 }, { "epoch": 0.837101230846521, "grad_norm": 0.3381403088569641, "learning_rate": 0.00016401244105530415, "loss": 11.6758, "step": 39990 }, { "epoch": 0.8371221636104831, "grad_norm": 0.38741111755371094, "learning_rate": 0.00016401075657991032, "loss": 11.6694, "step": 39991 }, { "epoch": 0.8371430963744453, "grad_norm": 0.3131888508796692, "learning_rate": 0.00016400907207374523, "loss": 11.6776, "step": 39992 }, { "epoch": 0.8371640291384075, "grad_norm": 0.3596073389053345, "learning_rate": 0.00016400738753680972, "loss": 11.6667, "step": 39993 }, { "epoch": 0.8371849619023696, "grad_norm": 0.311767578125, "learning_rate": 0.00016400570296910453, "loss": 11.6846, "step": 39994 }, { "epoch": 0.8372058946663318, "grad_norm": 0.2543273866176605, "learning_rate": 0.00016400401837063055, "loss": 11.6746, "step": 39995 }, { "epoch": 0.8372268274302939, "grad_norm": 0.3190299868583679, "learning_rate": 0.00016400233374138856, "loss": 11.6722, "step": 39996 }, { "epoch": 0.8372477601942561, "grad_norm": 0.2822956144809723, "learning_rate": 0.00016400064908137933, "loss": 11.6584, "step": 39997 }, { "epoch": 0.8372686929582182, "grad_norm": 0.3017444610595703, "learning_rate": 0.00016399896439060375, "loss": 11.6715, "step": 39998 }, { "epoch": 0.8372896257221804, "grad_norm": 0.2643708884716034, "learning_rate": 0.00016399727966906255, "loss": 11.6693, "step": 39999 }, { "epoch": 0.8373105584861426, "grad_norm": 0.29362839460372925, "learning_rate": 0.00016399559491675657, "loss": 11.6738, "step": 40000 }, { "epoch": 0.8373105584861426, "eval_loss": 11.669519424438477, "eval_runtime": 34.356, "eval_samples_per_second": 27.972, "eval_steps_per_second": 7.015, "step": 40000 }, { "epoch": 0.8373314912501046, "grad_norm": 0.3424674868583679, "learning_rate": 0.00016399391013368665, "loss": 11.6667, "step": 40001 }, { "epoch": 0.8373524240140668, "grad_norm": 0.2401113361120224, "learning_rate": 0.00016399222531985358, "loss": 11.6839, "step": 40002 }, { "epoch": 0.8373733567780289, "grad_norm": 0.39377790689468384, "learning_rate": 0.00016399054047525814, "loss": 11.6599, "step": 40003 }, { "epoch": 0.8373942895419911, "grad_norm": 0.3009583353996277, "learning_rate": 0.00016398885559990118, "loss": 11.6846, "step": 40004 }, { "epoch": 0.8374152223059533, "grad_norm": 0.2865438759326935, "learning_rate": 0.00016398717069378348, "loss": 11.6762, "step": 40005 }, { "epoch": 0.8374361550699154, "grad_norm": 0.3027656376361847, "learning_rate": 0.0001639854857569059, "loss": 11.6726, "step": 40006 }, { "epoch": 0.8374570878338776, "grad_norm": 0.30113258957862854, "learning_rate": 0.00016398380078926918, "loss": 11.6757, "step": 40007 }, { "epoch": 0.8374780205978397, "grad_norm": 0.3104364573955536, "learning_rate": 0.0001639821157908742, "loss": 11.6583, "step": 40008 }, { "epoch": 0.8374989533618019, "grad_norm": 0.3375166356563568, "learning_rate": 0.0001639804307617217, "loss": 11.6712, "step": 40009 }, { "epoch": 0.837519886125764, "grad_norm": 0.41546446084976196, "learning_rate": 0.00016397874570181254, "loss": 11.6519, "step": 40010 }, { "epoch": 0.8375408188897262, "grad_norm": 0.2474120706319809, "learning_rate": 0.0001639770606111475, "loss": 11.6502, "step": 40011 }, { "epoch": 0.8375617516536884, "grad_norm": 0.31440469622612, "learning_rate": 0.0001639753754897274, "loss": 11.6824, "step": 40012 }, { "epoch": 0.8375826844176505, "grad_norm": 0.3313238322734833, "learning_rate": 0.0001639736903375531, "loss": 11.6647, "step": 40013 }, { "epoch": 0.8376036171816127, "grad_norm": 0.3317870497703552, "learning_rate": 0.0001639720051546253, "loss": 11.6694, "step": 40014 }, { "epoch": 0.8376245499455748, "grad_norm": 0.27080807089805603, "learning_rate": 0.00016397031994094493, "loss": 11.6674, "step": 40015 }, { "epoch": 0.837645482709537, "grad_norm": 0.4354541599750519, "learning_rate": 0.00016396863469651273, "loss": 11.6632, "step": 40016 }, { "epoch": 0.8376664154734991, "grad_norm": 0.28411614894866943, "learning_rate": 0.00016396694942132951, "loss": 11.6558, "step": 40017 }, { "epoch": 0.8376873482374613, "grad_norm": 0.26161882281303406, "learning_rate": 0.00016396526411539613, "loss": 11.6621, "step": 40018 }, { "epoch": 0.8377082810014235, "grad_norm": 0.25797897577285767, "learning_rate": 0.0001639635787787133, "loss": 11.6463, "step": 40019 }, { "epoch": 0.8377292137653856, "grad_norm": 0.3254646062850952, "learning_rate": 0.00016396189341128195, "loss": 11.6901, "step": 40020 }, { "epoch": 0.8377501465293478, "grad_norm": 0.3406299352645874, "learning_rate": 0.00016396020801310283, "loss": 11.6749, "step": 40021 }, { "epoch": 0.8377710792933099, "grad_norm": 0.3020149767398834, "learning_rate": 0.00016395852258417674, "loss": 11.6634, "step": 40022 }, { "epoch": 0.837792012057272, "grad_norm": 0.2873133420944214, "learning_rate": 0.00016395683712450455, "loss": 11.6683, "step": 40023 }, { "epoch": 0.8378129448212343, "grad_norm": 0.3221453130245209, "learning_rate": 0.00016395515163408696, "loss": 11.6745, "step": 40024 }, { "epoch": 0.8378338775851963, "grad_norm": 0.2910365164279938, "learning_rate": 0.0001639534661129249, "loss": 11.665, "step": 40025 }, { "epoch": 0.8378548103491585, "grad_norm": 0.3229566812515259, "learning_rate": 0.00016395178056101913, "loss": 11.6638, "step": 40026 }, { "epoch": 0.8378757431131206, "grad_norm": 0.25355011224746704, "learning_rate": 0.0001639500949783704, "loss": 11.6556, "step": 40027 }, { "epoch": 0.8378966758770828, "grad_norm": 0.3240433633327484, "learning_rate": 0.00016394840936497963, "loss": 11.6605, "step": 40028 }, { "epoch": 0.8379176086410449, "grad_norm": 0.3040665090084076, "learning_rate": 0.00016394672372084754, "loss": 11.671, "step": 40029 }, { "epoch": 0.8379385414050071, "grad_norm": 0.3171047568321228, "learning_rate": 0.000163945038045975, "loss": 11.6706, "step": 40030 }, { "epoch": 0.8379594741689693, "grad_norm": 0.32250332832336426, "learning_rate": 0.0001639433523403628, "loss": 11.6633, "step": 40031 }, { "epoch": 0.8379804069329314, "grad_norm": 0.34259235858917236, "learning_rate": 0.00016394166660401176, "loss": 11.6867, "step": 40032 }, { "epoch": 0.8380013396968936, "grad_norm": 0.33716699481010437, "learning_rate": 0.00016393998083692269, "loss": 11.6807, "step": 40033 }, { "epoch": 0.8380222724608557, "grad_norm": 0.29045796394348145, "learning_rate": 0.00016393829503909635, "loss": 11.6757, "step": 40034 }, { "epoch": 0.8380432052248179, "grad_norm": 0.3309776186943054, "learning_rate": 0.0001639366092105336, "loss": 11.6859, "step": 40035 }, { "epoch": 0.83806413798878, "grad_norm": 0.26471492648124695, "learning_rate": 0.00016393492335123523, "loss": 11.6536, "step": 40036 }, { "epoch": 0.8380850707527422, "grad_norm": 0.3349403142929077, "learning_rate": 0.0001639332374612021, "loss": 11.6593, "step": 40037 }, { "epoch": 0.8381060035167044, "grad_norm": 0.4077167212963104, "learning_rate": 0.00016393155154043493, "loss": 11.6662, "step": 40038 }, { "epoch": 0.8381269362806665, "grad_norm": 0.29815179109573364, "learning_rate": 0.00016392986558893464, "loss": 11.663, "step": 40039 }, { "epoch": 0.8381478690446287, "grad_norm": 0.2994590103626251, "learning_rate": 0.00016392817960670194, "loss": 11.6784, "step": 40040 }, { "epoch": 0.8381688018085908, "grad_norm": 0.26402539014816284, "learning_rate": 0.00016392649359373772, "loss": 11.6724, "step": 40041 }, { "epoch": 0.838189734572553, "grad_norm": 0.36449235677719116, "learning_rate": 0.0001639248075500427, "loss": 11.6769, "step": 40042 }, { "epoch": 0.8382106673365151, "grad_norm": 0.2786915600299835, "learning_rate": 0.0001639231214756178, "loss": 11.6861, "step": 40043 }, { "epoch": 0.8382316001004773, "grad_norm": 0.30228865146636963, "learning_rate": 0.00016392143537046373, "loss": 11.66, "step": 40044 }, { "epoch": 0.8382525328644395, "grad_norm": 0.3302310109138489, "learning_rate": 0.00016391974923458139, "loss": 11.6589, "step": 40045 }, { "epoch": 0.8382734656284015, "grad_norm": 0.32500365376472473, "learning_rate": 0.0001639180630679715, "loss": 11.6584, "step": 40046 }, { "epoch": 0.8382943983923637, "grad_norm": 0.33337968587875366, "learning_rate": 0.0001639163768706349, "loss": 11.6674, "step": 40047 }, { "epoch": 0.8383153311563258, "grad_norm": 0.2949357330799103, "learning_rate": 0.00016391469064257249, "loss": 11.6719, "step": 40048 }, { "epoch": 0.838336263920288, "grad_norm": 0.3040509819984436, "learning_rate": 0.00016391300438378496, "loss": 11.6432, "step": 40049 }, { "epoch": 0.8383571966842502, "grad_norm": 0.3042868375778198, "learning_rate": 0.00016391131809427318, "loss": 11.666, "step": 40050 }, { "epoch": 0.8383781294482123, "grad_norm": 0.3383433222770691, "learning_rate": 0.00016390963177403796, "loss": 11.6646, "step": 40051 }, { "epoch": 0.8383990622121745, "grad_norm": 0.32524025440216064, "learning_rate": 0.00016390794542308008, "loss": 11.6644, "step": 40052 }, { "epoch": 0.8384199949761366, "grad_norm": 0.25933244824409485, "learning_rate": 0.00016390625904140038, "loss": 11.6663, "step": 40053 }, { "epoch": 0.8384409277400988, "grad_norm": 0.28208431601524353, "learning_rate": 0.00016390457262899965, "loss": 11.6684, "step": 40054 }, { "epoch": 0.8384618605040609, "grad_norm": 0.38111668825149536, "learning_rate": 0.00016390288618587872, "loss": 11.6733, "step": 40055 }, { "epoch": 0.8384827932680231, "grad_norm": 0.27760744094848633, "learning_rate": 0.0001639011997120384, "loss": 11.6794, "step": 40056 }, { "epoch": 0.8385037260319853, "grad_norm": 0.31296905875205994, "learning_rate": 0.00016389951320747948, "loss": 11.6696, "step": 40057 }, { "epoch": 0.8385246587959474, "grad_norm": 0.3120945394039154, "learning_rate": 0.00016389782667220278, "loss": 11.6842, "step": 40058 }, { "epoch": 0.8385455915599096, "grad_norm": 0.45658716559410095, "learning_rate": 0.00016389614010620912, "loss": 11.6814, "step": 40059 }, { "epoch": 0.8385665243238717, "grad_norm": 0.27686309814453125, "learning_rate": 0.0001638944535094993, "loss": 11.6883, "step": 40060 }, { "epoch": 0.8385874570878339, "grad_norm": 0.25844910740852356, "learning_rate": 0.00016389276688207413, "loss": 11.6835, "step": 40061 }, { "epoch": 0.838608389851796, "grad_norm": 0.3938990533351898, "learning_rate": 0.00016389108022393445, "loss": 11.6712, "step": 40062 }, { "epoch": 0.8386293226157582, "grad_norm": 0.32275527715682983, "learning_rate": 0.000163889393535081, "loss": 11.6685, "step": 40063 }, { "epoch": 0.8386502553797204, "grad_norm": 0.3832565248012543, "learning_rate": 0.0001638877068155147, "loss": 11.6665, "step": 40064 }, { "epoch": 0.8386711881436825, "grad_norm": 0.3314451575279236, "learning_rate": 0.00016388602006523628, "loss": 11.6721, "step": 40065 }, { "epoch": 0.8386921209076447, "grad_norm": 0.3564099967479706, "learning_rate": 0.00016388433328424655, "loss": 11.6775, "step": 40066 }, { "epoch": 0.8387130536716068, "grad_norm": 0.2532339096069336, "learning_rate": 0.00016388264647254635, "loss": 11.6677, "step": 40067 }, { "epoch": 0.838733986435569, "grad_norm": 0.29012438654899597, "learning_rate": 0.00016388095963013649, "loss": 11.6801, "step": 40068 }, { "epoch": 0.8387549191995312, "grad_norm": 0.29654833674430847, "learning_rate": 0.00016387927275701779, "loss": 11.67, "step": 40069 }, { "epoch": 0.8387758519634932, "grad_norm": 0.30955296754837036, "learning_rate": 0.000163877585853191, "loss": 11.6667, "step": 40070 }, { "epoch": 0.8387967847274554, "grad_norm": 0.4707316756248474, "learning_rate": 0.00016387589891865697, "loss": 11.6832, "step": 40071 }, { "epoch": 0.8388177174914175, "grad_norm": 0.24993765354156494, "learning_rate": 0.00016387421195341654, "loss": 11.6744, "step": 40072 }, { "epoch": 0.8388386502553797, "grad_norm": 0.29193902015686035, "learning_rate": 0.0001638725249574705, "loss": 11.6541, "step": 40073 }, { "epoch": 0.8388595830193418, "grad_norm": 0.2851371169090271, "learning_rate": 0.0001638708379308197, "loss": 11.6665, "step": 40074 }, { "epoch": 0.838880515783304, "grad_norm": 0.3912666141986847, "learning_rate": 0.00016386915087346484, "loss": 11.6856, "step": 40075 }, { "epoch": 0.8389014485472662, "grad_norm": 0.33521872758865356, "learning_rate": 0.00016386746378540678, "loss": 11.6687, "step": 40076 }, { "epoch": 0.8389223813112283, "grad_norm": 0.31766027212142944, "learning_rate": 0.00016386577666664643, "loss": 11.6664, "step": 40077 }, { "epoch": 0.8389433140751905, "grad_norm": 0.3639063537120819, "learning_rate": 0.00016386408951718447, "loss": 11.6669, "step": 40078 }, { "epoch": 0.8389642468391526, "grad_norm": 0.3291463255882263, "learning_rate": 0.0001638624023370218, "loss": 11.6853, "step": 40079 }, { "epoch": 0.8389851796031148, "grad_norm": 0.36879241466522217, "learning_rate": 0.00016386071512615913, "loss": 11.6744, "step": 40080 }, { "epoch": 0.8390061123670769, "grad_norm": 0.28988945484161377, "learning_rate": 0.00016385902788459739, "loss": 11.665, "step": 40081 }, { "epoch": 0.8390270451310391, "grad_norm": 0.37078502774238586, "learning_rate": 0.0001638573406123373, "loss": 11.6802, "step": 40082 }, { "epoch": 0.8390479778950013, "grad_norm": 0.4232436716556549, "learning_rate": 0.00016385565330937973, "loss": 11.6965, "step": 40083 }, { "epoch": 0.8390689106589634, "grad_norm": 0.37662461400032043, "learning_rate": 0.00016385396597572545, "loss": 11.673, "step": 40084 }, { "epoch": 0.8390898434229256, "grad_norm": 0.31921833753585815, "learning_rate": 0.00016385227861137532, "loss": 11.6658, "step": 40085 }, { "epoch": 0.8391107761868877, "grad_norm": 0.3660433888435364, "learning_rate": 0.0001638505912163301, "loss": 11.6703, "step": 40086 }, { "epoch": 0.8391317089508499, "grad_norm": 0.2976745069026947, "learning_rate": 0.00016384890379059062, "loss": 11.6558, "step": 40087 }, { "epoch": 0.8391526417148121, "grad_norm": 0.27589160203933716, "learning_rate": 0.0001638472163341577, "loss": 11.6773, "step": 40088 }, { "epoch": 0.8391735744787742, "grad_norm": 0.2959577441215515, "learning_rate": 0.00016384552884703214, "loss": 11.6773, "step": 40089 }, { "epoch": 0.8391945072427364, "grad_norm": 0.25206151604652405, "learning_rate": 0.00016384384132921476, "loss": 11.677, "step": 40090 }, { "epoch": 0.8392154400066985, "grad_norm": 0.3150099217891693, "learning_rate": 0.00016384215378070636, "loss": 11.6553, "step": 40091 }, { "epoch": 0.8392363727706607, "grad_norm": 0.2820166349411011, "learning_rate": 0.00016384046620150774, "loss": 11.6577, "step": 40092 }, { "epoch": 0.8392573055346227, "grad_norm": 0.42015644907951355, "learning_rate": 0.00016383877859161972, "loss": 11.6923, "step": 40093 }, { "epoch": 0.8392782382985849, "grad_norm": 0.32507458329200745, "learning_rate": 0.00016383709095104315, "loss": 11.6739, "step": 40094 }, { "epoch": 0.8392991710625471, "grad_norm": 0.2907055616378784, "learning_rate": 0.00016383540327977883, "loss": 11.6763, "step": 40095 }, { "epoch": 0.8393201038265092, "grad_norm": 0.3356992304325104, "learning_rate": 0.00016383371557782752, "loss": 11.6737, "step": 40096 }, { "epoch": 0.8393410365904714, "grad_norm": 0.33734235167503357, "learning_rate": 0.00016383202784519006, "loss": 11.6666, "step": 40097 }, { "epoch": 0.8393619693544335, "grad_norm": 0.31829071044921875, "learning_rate": 0.00016383034008186727, "loss": 11.6648, "step": 40098 }, { "epoch": 0.8393829021183957, "grad_norm": 0.268677294254303, "learning_rate": 0.00016382865228785996, "loss": 11.6771, "step": 40099 }, { "epoch": 0.8394038348823578, "grad_norm": 1.899037480354309, "learning_rate": 0.00016382696446316897, "loss": 11.6409, "step": 40100 }, { "epoch": 0.83942476764632, "grad_norm": 0.31674668192863464, "learning_rate": 0.00016382527660779506, "loss": 11.6792, "step": 40101 }, { "epoch": 0.8394457004102822, "grad_norm": 0.36644062399864197, "learning_rate": 0.000163823588721739, "loss": 11.6715, "step": 40102 }, { "epoch": 0.8394666331742443, "grad_norm": 0.3258093297481537, "learning_rate": 0.00016382190080500175, "loss": 11.6819, "step": 40103 }, { "epoch": 0.8394875659382065, "grad_norm": 0.28114739060401917, "learning_rate": 0.000163820212857584, "loss": 11.6684, "step": 40104 }, { "epoch": 0.8395084987021686, "grad_norm": 0.29537728428840637, "learning_rate": 0.00016381852487948656, "loss": 11.6772, "step": 40105 }, { "epoch": 0.8395294314661308, "grad_norm": 0.35149386525154114, "learning_rate": 0.00016381683687071032, "loss": 11.6608, "step": 40106 }, { "epoch": 0.839550364230093, "grad_norm": 0.37309977412223816, "learning_rate": 0.00016381514883125604, "loss": 11.6598, "step": 40107 }, { "epoch": 0.8395712969940551, "grad_norm": 0.2995986342430115, "learning_rate": 0.00016381346076112452, "loss": 11.6682, "step": 40108 }, { "epoch": 0.8395922297580173, "grad_norm": 0.36398231983184814, "learning_rate": 0.0001638117726603166, "loss": 11.6671, "step": 40109 }, { "epoch": 0.8396131625219794, "grad_norm": 0.29137739539146423, "learning_rate": 0.0001638100845288331, "loss": 11.6741, "step": 40110 }, { "epoch": 0.8396340952859416, "grad_norm": 0.280456006526947, "learning_rate": 0.00016380839636667477, "loss": 11.6715, "step": 40111 }, { "epoch": 0.8396550280499037, "grad_norm": 0.3082890212535858, "learning_rate": 0.0001638067081738425, "loss": 11.672, "step": 40112 }, { "epoch": 0.8396759608138659, "grad_norm": 0.3772861361503601, "learning_rate": 0.00016380501995033709, "loss": 11.6628, "step": 40113 }, { "epoch": 0.8396968935778281, "grad_norm": 0.3222215175628662, "learning_rate": 0.00016380333169615927, "loss": 11.6827, "step": 40114 }, { "epoch": 0.8397178263417902, "grad_norm": 0.26427555084228516, "learning_rate": 0.00016380164341130995, "loss": 11.681, "step": 40115 }, { "epoch": 0.8397387591057524, "grad_norm": 0.30636999011039734, "learning_rate": 0.0001637999550957899, "loss": 11.6872, "step": 40116 }, { "epoch": 0.8397596918697144, "grad_norm": 0.4269827902317047, "learning_rate": 0.0001637982667495999, "loss": 11.6941, "step": 40117 }, { "epoch": 0.8397806246336766, "grad_norm": 0.2616913616657257, "learning_rate": 0.00016379657837274083, "loss": 11.6745, "step": 40118 }, { "epoch": 0.8398015573976387, "grad_norm": 0.3393152058124542, "learning_rate": 0.00016379488996521343, "loss": 11.6642, "step": 40119 }, { "epoch": 0.8398224901616009, "grad_norm": 0.24116939306259155, "learning_rate": 0.0001637932015270186, "loss": 11.65, "step": 40120 }, { "epoch": 0.8398434229255631, "grad_norm": 0.2692449688911438, "learning_rate": 0.00016379151305815707, "loss": 11.6646, "step": 40121 }, { "epoch": 0.8398643556895252, "grad_norm": 0.28053855895996094, "learning_rate": 0.00016378982455862967, "loss": 11.6629, "step": 40122 }, { "epoch": 0.8398852884534874, "grad_norm": 0.2908099889755249, "learning_rate": 0.00016378813602843726, "loss": 11.6743, "step": 40123 }, { "epoch": 0.8399062212174495, "grad_norm": 0.3328583836555481, "learning_rate": 0.0001637864474675806, "loss": 11.6786, "step": 40124 }, { "epoch": 0.8399271539814117, "grad_norm": 0.3129224479198456, "learning_rate": 0.0001637847588760605, "loss": 11.6712, "step": 40125 }, { "epoch": 0.8399480867453739, "grad_norm": 0.2724301815032959, "learning_rate": 0.00016378307025387778, "loss": 11.686, "step": 40126 }, { "epoch": 0.839969019509336, "grad_norm": 0.2910912036895752, "learning_rate": 0.00016378138160103327, "loss": 11.6676, "step": 40127 }, { "epoch": 0.8399899522732982, "grad_norm": 0.24821889400482178, "learning_rate": 0.0001637796929175278, "loss": 11.6818, "step": 40128 }, { "epoch": 0.8400108850372603, "grad_norm": 0.3284318447113037, "learning_rate": 0.0001637780042033621, "loss": 11.6725, "step": 40129 }, { "epoch": 0.8400318178012225, "grad_norm": 0.38319191336631775, "learning_rate": 0.00016377631545853709, "loss": 11.6768, "step": 40130 }, { "epoch": 0.8400527505651846, "grad_norm": 0.33657175302505493, "learning_rate": 0.0001637746266830535, "loss": 11.6619, "step": 40131 }, { "epoch": 0.8400736833291468, "grad_norm": 0.35350945591926575, "learning_rate": 0.00016377293787691216, "loss": 11.6596, "step": 40132 }, { "epoch": 0.840094616093109, "grad_norm": 0.37092795968055725, "learning_rate": 0.0001637712490401139, "loss": 11.6735, "step": 40133 }, { "epoch": 0.8401155488570711, "grad_norm": 0.25759685039520264, "learning_rate": 0.00016376956017265953, "loss": 11.6908, "step": 40134 }, { "epoch": 0.8401364816210333, "grad_norm": 0.30476894974708557, "learning_rate": 0.00016376787127454984, "loss": 11.675, "step": 40135 }, { "epoch": 0.8401574143849954, "grad_norm": 0.4150811731815338, "learning_rate": 0.00016376618234578566, "loss": 11.6729, "step": 40136 }, { "epoch": 0.8401783471489576, "grad_norm": 0.3423002064228058, "learning_rate": 0.0001637644933863678, "loss": 11.6599, "step": 40137 }, { "epoch": 0.8401992799129196, "grad_norm": 0.3197689950466156, "learning_rate": 0.0001637628043962971, "loss": 11.6726, "step": 40138 }, { "epoch": 0.8402202126768819, "grad_norm": 0.37173202633857727, "learning_rate": 0.0001637611153755743, "loss": 11.6628, "step": 40139 }, { "epoch": 0.840241145440844, "grad_norm": 0.2860107421875, "learning_rate": 0.00016375942632420028, "loss": 11.6665, "step": 40140 }, { "epoch": 0.8402620782048061, "grad_norm": 0.3405359983444214, "learning_rate": 0.00016375773724217583, "loss": 11.6775, "step": 40141 }, { "epoch": 0.8402830109687683, "grad_norm": 0.43255478143692017, "learning_rate": 0.00016375604812950175, "loss": 11.6689, "step": 40142 }, { "epoch": 0.8403039437327304, "grad_norm": 0.2581997513771057, "learning_rate": 0.00016375435898617884, "loss": 11.666, "step": 40143 }, { "epoch": 0.8403248764966926, "grad_norm": 0.31210216879844666, "learning_rate": 0.00016375266981220795, "loss": 11.6647, "step": 40144 }, { "epoch": 0.8403458092606548, "grad_norm": 0.27091097831726074, "learning_rate": 0.00016375098060758986, "loss": 11.6778, "step": 40145 }, { "epoch": 0.8403667420246169, "grad_norm": 0.31862321496009827, "learning_rate": 0.0001637492913723254, "loss": 11.6759, "step": 40146 }, { "epoch": 0.8403876747885791, "grad_norm": 0.28622764348983765, "learning_rate": 0.0001637476021064154, "loss": 11.6756, "step": 40147 }, { "epoch": 0.8404086075525412, "grad_norm": 0.34557750821113586, "learning_rate": 0.00016374591280986063, "loss": 11.6671, "step": 40148 }, { "epoch": 0.8404295403165034, "grad_norm": 0.28444549441337585, "learning_rate": 0.0001637442234826619, "loss": 11.6814, "step": 40149 }, { "epoch": 0.8404504730804655, "grad_norm": 0.29017141461372375, "learning_rate": 0.0001637425341248201, "loss": 11.6774, "step": 40150 }, { "epoch": 0.8404714058444277, "grad_norm": 0.35510268807411194, "learning_rate": 0.00016374084473633595, "loss": 11.6746, "step": 40151 }, { "epoch": 0.8404923386083899, "grad_norm": 0.2888842523097992, "learning_rate": 0.0001637391553172103, "loss": 11.6585, "step": 40152 }, { "epoch": 0.840513271372352, "grad_norm": 0.3254806697368622, "learning_rate": 0.00016373746586744397, "loss": 11.682, "step": 40153 }, { "epoch": 0.8405342041363142, "grad_norm": 0.26310357451438904, "learning_rate": 0.00016373577638703775, "loss": 11.6783, "step": 40154 }, { "epoch": 0.8405551369002763, "grad_norm": 0.31365951895713806, "learning_rate": 0.00016373408687599247, "loss": 11.6813, "step": 40155 }, { "epoch": 0.8405760696642385, "grad_norm": 0.299246609210968, "learning_rate": 0.00016373239733430897, "loss": 11.6716, "step": 40156 }, { "epoch": 0.8405970024282006, "grad_norm": 0.3140028119087219, "learning_rate": 0.00016373070776198797, "loss": 11.6687, "step": 40157 }, { "epoch": 0.8406179351921628, "grad_norm": 0.2690807580947876, "learning_rate": 0.0001637290181590304, "loss": 11.6757, "step": 40158 }, { "epoch": 0.840638867956125, "grad_norm": 0.3168582618236542, "learning_rate": 0.00016372732852543696, "loss": 11.6658, "step": 40159 }, { "epoch": 0.8406598007200871, "grad_norm": 0.2916642129421234, "learning_rate": 0.00016372563886120855, "loss": 11.6653, "step": 40160 }, { "epoch": 0.8406807334840493, "grad_norm": 0.3341037333011627, "learning_rate": 0.00016372394916634595, "loss": 11.6888, "step": 40161 }, { "epoch": 0.8407016662480113, "grad_norm": 0.3011930584907532, "learning_rate": 0.00016372225944084995, "loss": 11.678, "step": 40162 }, { "epoch": 0.8407225990119735, "grad_norm": 0.29789233207702637, "learning_rate": 0.0001637205696847214, "loss": 11.6722, "step": 40163 }, { "epoch": 0.8407435317759357, "grad_norm": 0.33844447135925293, "learning_rate": 0.00016371887989796109, "loss": 11.6646, "step": 40164 }, { "epoch": 0.8407644645398978, "grad_norm": 0.3562626838684082, "learning_rate": 0.00016371719008056985, "loss": 11.6683, "step": 40165 }, { "epoch": 0.84078539730386, "grad_norm": 0.28028327226638794, "learning_rate": 0.00016371550023254846, "loss": 11.6499, "step": 40166 }, { "epoch": 0.8408063300678221, "grad_norm": 0.357517808675766, "learning_rate": 0.00016371381035389774, "loss": 11.6702, "step": 40167 }, { "epoch": 0.8408272628317843, "grad_norm": 0.4598408639431, "learning_rate": 0.00016371212044461855, "loss": 11.6731, "step": 40168 }, { "epoch": 0.8408481955957464, "grad_norm": 0.31192705035209656, "learning_rate": 0.00016371043050471165, "loss": 11.678, "step": 40169 }, { "epoch": 0.8408691283597086, "grad_norm": 0.2693224847316742, "learning_rate": 0.00016370874053417785, "loss": 11.6734, "step": 40170 }, { "epoch": 0.8408900611236708, "grad_norm": 0.24137535691261292, "learning_rate": 0.000163707050533018, "loss": 11.6585, "step": 40171 }, { "epoch": 0.8409109938876329, "grad_norm": 0.2674597501754761, "learning_rate": 0.00016370536050123292, "loss": 11.6652, "step": 40172 }, { "epoch": 0.8409319266515951, "grad_norm": 0.3364354968070984, "learning_rate": 0.00016370367043882337, "loss": 11.6732, "step": 40173 }, { "epoch": 0.8409528594155572, "grad_norm": 0.4087088406085968, "learning_rate": 0.00016370198034579018, "loss": 11.6652, "step": 40174 }, { "epoch": 0.8409737921795194, "grad_norm": 0.3426646888256073, "learning_rate": 0.0001637002902221342, "loss": 11.6621, "step": 40175 }, { "epoch": 0.8409947249434815, "grad_norm": 0.3216027021408081, "learning_rate": 0.00016369860006785618, "loss": 11.673, "step": 40176 }, { "epoch": 0.8410156577074437, "grad_norm": 0.32839521765708923, "learning_rate": 0.000163696909882957, "loss": 11.6637, "step": 40177 }, { "epoch": 0.8410365904714059, "grad_norm": 0.4438440501689911, "learning_rate": 0.00016369521966743742, "loss": 11.6679, "step": 40178 }, { "epoch": 0.841057523235368, "grad_norm": 0.5529181361198425, "learning_rate": 0.00016369352942129827, "loss": 11.6921, "step": 40179 }, { "epoch": 0.8410784559993302, "grad_norm": 0.24048703908920288, "learning_rate": 0.00016369183914454036, "loss": 11.6475, "step": 40180 }, { "epoch": 0.8410993887632923, "grad_norm": 0.2614881992340088, "learning_rate": 0.0001636901488371645, "loss": 11.6729, "step": 40181 }, { "epoch": 0.8411203215272545, "grad_norm": 0.360876202583313, "learning_rate": 0.00016368845849917154, "loss": 11.6579, "step": 40182 }, { "epoch": 0.8411412542912167, "grad_norm": 0.2678987681865692, "learning_rate": 0.00016368676813056224, "loss": 11.6681, "step": 40183 }, { "epoch": 0.8411621870551788, "grad_norm": 0.32630982995033264, "learning_rate": 0.00016368507773133746, "loss": 11.6715, "step": 40184 }, { "epoch": 0.841183119819141, "grad_norm": 0.25911346077919006, "learning_rate": 0.00016368338730149792, "loss": 11.664, "step": 40185 }, { "epoch": 0.841204052583103, "grad_norm": 0.3023369312286377, "learning_rate": 0.00016368169684104455, "loss": 11.6591, "step": 40186 }, { "epoch": 0.8412249853470652, "grad_norm": 0.23470905423164368, "learning_rate": 0.0001636800063499781, "loss": 11.6742, "step": 40187 }, { "epoch": 0.8412459181110273, "grad_norm": 0.306431382894516, "learning_rate": 0.0001636783158282994, "loss": 11.6599, "step": 40188 }, { "epoch": 0.8412668508749895, "grad_norm": 0.29035690426826477, "learning_rate": 0.0001636766252760093, "loss": 11.6587, "step": 40189 }, { "epoch": 0.8412877836389517, "grad_norm": 0.27434882521629333, "learning_rate": 0.0001636749346931085, "loss": 11.6756, "step": 40190 }, { "epoch": 0.8413087164029138, "grad_norm": 0.39196038246154785, "learning_rate": 0.00016367324407959793, "loss": 11.6728, "step": 40191 }, { "epoch": 0.841329649166876, "grad_norm": 0.3569072186946869, "learning_rate": 0.00016367155343547833, "loss": 11.678, "step": 40192 }, { "epoch": 0.8413505819308381, "grad_norm": 0.373727947473526, "learning_rate": 0.00016366986276075053, "loss": 11.656, "step": 40193 }, { "epoch": 0.8413715146948003, "grad_norm": 0.36354947090148926, "learning_rate": 0.0001636681720554154, "loss": 11.6755, "step": 40194 }, { "epoch": 0.8413924474587624, "grad_norm": 0.33057162165641785, "learning_rate": 0.00016366648131947367, "loss": 11.6661, "step": 40195 }, { "epoch": 0.8414133802227246, "grad_norm": 0.28456223011016846, "learning_rate": 0.00016366479055292618, "loss": 11.6602, "step": 40196 }, { "epoch": 0.8414343129866868, "grad_norm": 0.3274836540222168, "learning_rate": 0.00016366309975577377, "loss": 11.6809, "step": 40197 }, { "epoch": 0.8414552457506489, "grad_norm": 0.2804376184940338, "learning_rate": 0.0001636614089280172, "loss": 11.6706, "step": 40198 }, { "epoch": 0.8414761785146111, "grad_norm": 0.3195313811302185, "learning_rate": 0.00016365971806965734, "loss": 11.6807, "step": 40199 }, { "epoch": 0.8414971112785732, "grad_norm": 0.2918829619884491, "learning_rate": 0.00016365802718069496, "loss": 11.6721, "step": 40200 }, { "epoch": 0.8415180440425354, "grad_norm": 0.38106489181518555, "learning_rate": 0.0001636563362611309, "loss": 11.6785, "step": 40201 }, { "epoch": 0.8415389768064976, "grad_norm": 0.33890995383262634, "learning_rate": 0.00016365464531096596, "loss": 11.6637, "step": 40202 }, { "epoch": 0.8415599095704597, "grad_norm": 0.2833409309387207, "learning_rate": 0.00016365295433020096, "loss": 11.6519, "step": 40203 }, { "epoch": 0.8415808423344219, "grad_norm": 0.3770799934864044, "learning_rate": 0.00016365126331883674, "loss": 11.6852, "step": 40204 }, { "epoch": 0.841601775098384, "grad_norm": 0.2982647120952606, "learning_rate": 0.00016364957227687403, "loss": 11.6677, "step": 40205 }, { "epoch": 0.8416227078623462, "grad_norm": 0.3198651671409607, "learning_rate": 0.0001636478812043137, "loss": 11.6714, "step": 40206 }, { "epoch": 0.8416436406263083, "grad_norm": 0.29093319177627563, "learning_rate": 0.00016364619010115662, "loss": 11.6623, "step": 40207 }, { "epoch": 0.8416645733902705, "grad_norm": 0.30784058570861816, "learning_rate": 0.00016364449896740348, "loss": 11.6603, "step": 40208 }, { "epoch": 0.8416855061542327, "grad_norm": 0.3567942678928375, "learning_rate": 0.00016364280780305516, "loss": 11.6795, "step": 40209 }, { "epoch": 0.8417064389181947, "grad_norm": 0.3353605568408966, "learning_rate": 0.00016364111660811248, "loss": 11.6617, "step": 40210 }, { "epoch": 0.8417273716821569, "grad_norm": 0.392403781414032, "learning_rate": 0.00016363942538257623, "loss": 11.6736, "step": 40211 }, { "epoch": 0.841748304446119, "grad_norm": 0.2734748125076294, "learning_rate": 0.00016363773412644725, "loss": 11.6652, "step": 40212 }, { "epoch": 0.8417692372100812, "grad_norm": 0.34238579869270325, "learning_rate": 0.00016363604283972633, "loss": 11.6741, "step": 40213 }, { "epoch": 0.8417901699740433, "grad_norm": 0.3396378457546234, "learning_rate": 0.0001636343515224143, "loss": 11.6689, "step": 40214 }, { "epoch": 0.8418111027380055, "grad_norm": 0.3332914710044861, "learning_rate": 0.0001636326601745119, "loss": 11.6686, "step": 40215 }, { "epoch": 0.8418320355019677, "grad_norm": 0.30793628096580505, "learning_rate": 0.00016363096879602008, "loss": 11.6605, "step": 40216 }, { "epoch": 0.8418529682659298, "grad_norm": 0.3143186569213867, "learning_rate": 0.00016362927738693955, "loss": 11.6777, "step": 40217 }, { "epoch": 0.841873901029892, "grad_norm": 0.26931074261665344, "learning_rate": 0.00016362758594727116, "loss": 11.6759, "step": 40218 }, { "epoch": 0.8418948337938541, "grad_norm": 0.2979162633419037, "learning_rate": 0.0001636258944770157, "loss": 11.68, "step": 40219 }, { "epoch": 0.8419157665578163, "grad_norm": 0.27821943163871765, "learning_rate": 0.000163624202976174, "loss": 11.6634, "step": 40220 }, { "epoch": 0.8419366993217784, "grad_norm": 0.4122184216976166, "learning_rate": 0.0001636225114447469, "loss": 11.6952, "step": 40221 }, { "epoch": 0.8419576320857406, "grad_norm": 0.29742759466171265, "learning_rate": 0.00016362081988273514, "loss": 11.667, "step": 40222 }, { "epoch": 0.8419785648497028, "grad_norm": 0.26356786489486694, "learning_rate": 0.0001636191282901396, "loss": 11.6552, "step": 40223 }, { "epoch": 0.8419994976136649, "grad_norm": 0.30785608291625977, "learning_rate": 0.00016361743666696104, "loss": 11.6565, "step": 40224 }, { "epoch": 0.8420204303776271, "grad_norm": 0.29230648279190063, "learning_rate": 0.00016361574501320034, "loss": 11.6572, "step": 40225 }, { "epoch": 0.8420413631415892, "grad_norm": 0.33420756459236145, "learning_rate": 0.00016361405332885828, "loss": 11.6733, "step": 40226 }, { "epoch": 0.8420622959055514, "grad_norm": 0.2899010181427002, "learning_rate": 0.00016361236161393565, "loss": 11.6576, "step": 40227 }, { "epoch": 0.8420832286695136, "grad_norm": 0.31646203994750977, "learning_rate": 0.0001636106698684333, "loss": 11.6689, "step": 40228 }, { "epoch": 0.8421041614334757, "grad_norm": 0.2978184223175049, "learning_rate": 0.000163608978092352, "loss": 11.6512, "step": 40229 }, { "epoch": 0.8421250941974379, "grad_norm": 0.27413466572761536, "learning_rate": 0.0001636072862856926, "loss": 11.6511, "step": 40230 }, { "epoch": 0.8421460269614, "grad_norm": 0.2808758020401001, "learning_rate": 0.0001636055944484559, "loss": 11.6825, "step": 40231 }, { "epoch": 0.8421669597253622, "grad_norm": 0.2747938334941864, "learning_rate": 0.0001636039025806427, "loss": 11.6602, "step": 40232 }, { "epoch": 0.8421878924893242, "grad_norm": 0.3234601318836212, "learning_rate": 0.00016360221068225387, "loss": 11.6812, "step": 40233 }, { "epoch": 0.8422088252532864, "grad_norm": 0.32437214255332947, "learning_rate": 0.00016360051875329015, "loss": 11.668, "step": 40234 }, { "epoch": 0.8422297580172486, "grad_norm": 0.2647271454334259, "learning_rate": 0.0001635988267937524, "loss": 11.668, "step": 40235 }, { "epoch": 0.8422506907812107, "grad_norm": 0.3077884912490845, "learning_rate": 0.00016359713480364145, "loss": 11.6783, "step": 40236 }, { "epoch": 0.8422716235451729, "grad_norm": 0.28306135535240173, "learning_rate": 0.00016359544278295803, "loss": 11.6531, "step": 40237 }, { "epoch": 0.842292556309135, "grad_norm": 0.29798567295074463, "learning_rate": 0.00016359375073170303, "loss": 11.6653, "step": 40238 }, { "epoch": 0.8423134890730972, "grad_norm": 0.3380371630191803, "learning_rate": 0.00016359205864987722, "loss": 11.6611, "step": 40239 }, { "epoch": 0.8423344218370593, "grad_norm": 0.26923710107803345, "learning_rate": 0.0001635903665374815, "loss": 11.6624, "step": 40240 }, { "epoch": 0.8423553546010215, "grad_norm": 0.28026288747787476, "learning_rate": 0.00016358867439451654, "loss": 11.6644, "step": 40241 }, { "epoch": 0.8423762873649837, "grad_norm": 0.2849390208721161, "learning_rate": 0.00016358698222098325, "loss": 11.6788, "step": 40242 }, { "epoch": 0.8423972201289458, "grad_norm": 0.26490798592567444, "learning_rate": 0.00016358529001688244, "loss": 11.6821, "step": 40243 }, { "epoch": 0.842418152892908, "grad_norm": 0.2517586052417755, "learning_rate": 0.0001635835977822149, "loss": 11.6523, "step": 40244 }, { "epoch": 0.8424390856568701, "grad_norm": 0.2960301339626312, "learning_rate": 0.00016358190551698148, "loss": 11.6675, "step": 40245 }, { "epoch": 0.8424600184208323, "grad_norm": 0.4191882610321045, "learning_rate": 0.0001635802132211829, "loss": 11.6703, "step": 40246 }, { "epoch": 0.8424809511847945, "grad_norm": 0.3256548047065735, "learning_rate": 0.00016357852089482006, "loss": 11.6797, "step": 40247 }, { "epoch": 0.8425018839487566, "grad_norm": 0.32373595237731934, "learning_rate": 0.00016357682853789375, "loss": 11.6799, "step": 40248 }, { "epoch": 0.8425228167127188, "grad_norm": 0.2936096787452698, "learning_rate": 0.0001635751361504048, "loss": 11.6757, "step": 40249 }, { "epoch": 0.8425437494766809, "grad_norm": 0.27383357286453247, "learning_rate": 0.00016357344373235403, "loss": 11.675, "step": 40250 }, { "epoch": 0.8425646822406431, "grad_norm": 0.3789249360561371, "learning_rate": 0.0001635717512837422, "loss": 11.6679, "step": 40251 }, { "epoch": 0.8425856150046052, "grad_norm": 0.27667927742004395, "learning_rate": 0.00016357005880457013, "loss": 11.6736, "step": 40252 }, { "epoch": 0.8426065477685674, "grad_norm": 0.29758119583129883, "learning_rate": 0.0001635683662948387, "loss": 11.6683, "step": 40253 }, { "epoch": 0.8426274805325296, "grad_norm": 0.27219638228416443, "learning_rate": 0.00016356667375454865, "loss": 11.6711, "step": 40254 }, { "epoch": 0.8426484132964916, "grad_norm": 0.3564451336860657, "learning_rate": 0.00016356498118370088, "loss": 11.6732, "step": 40255 }, { "epoch": 0.8426693460604538, "grad_norm": 0.31100407242774963, "learning_rate": 0.0001635632885822961, "loss": 11.6644, "step": 40256 }, { "epoch": 0.8426902788244159, "grad_norm": 0.35356026887893677, "learning_rate": 0.0001635615959503352, "loss": 11.6528, "step": 40257 }, { "epoch": 0.8427112115883781, "grad_norm": 0.28768742084503174, "learning_rate": 0.00016355990328781897, "loss": 11.6721, "step": 40258 }, { "epoch": 0.8427321443523402, "grad_norm": 0.3564850091934204, "learning_rate": 0.0001635582105947482, "loss": 11.6708, "step": 40259 }, { "epoch": 0.8427530771163024, "grad_norm": 0.3077084720134735, "learning_rate": 0.00016355651787112373, "loss": 11.6653, "step": 40260 }, { "epoch": 0.8427740098802646, "grad_norm": 0.31385794281959534, "learning_rate": 0.00016355482511694637, "loss": 11.6728, "step": 40261 }, { "epoch": 0.8427949426442267, "grad_norm": 0.3532032072544098, "learning_rate": 0.00016355313233221695, "loss": 11.6823, "step": 40262 }, { "epoch": 0.8428158754081889, "grad_norm": 0.310983806848526, "learning_rate": 0.00016355143951693623, "loss": 11.6733, "step": 40263 }, { "epoch": 0.842836808172151, "grad_norm": 0.3529216945171356, "learning_rate": 0.00016354974667110507, "loss": 11.6694, "step": 40264 }, { "epoch": 0.8428577409361132, "grad_norm": 0.25305965542793274, "learning_rate": 0.00016354805379472428, "loss": 11.6686, "step": 40265 }, { "epoch": 0.8428786737000754, "grad_norm": 0.2774280905723572, "learning_rate": 0.00016354636088779467, "loss": 11.6709, "step": 40266 }, { "epoch": 0.8428996064640375, "grad_norm": 0.31896090507507324, "learning_rate": 0.00016354466795031705, "loss": 11.6734, "step": 40267 }, { "epoch": 0.8429205392279997, "grad_norm": 0.3764544725418091, "learning_rate": 0.00016354297498229222, "loss": 11.6637, "step": 40268 }, { "epoch": 0.8429414719919618, "grad_norm": 0.3223098814487457, "learning_rate": 0.00016354128198372103, "loss": 11.6795, "step": 40269 }, { "epoch": 0.842962404755924, "grad_norm": 0.3106556236743927, "learning_rate": 0.00016353958895460425, "loss": 11.6788, "step": 40270 }, { "epoch": 0.8429833375198861, "grad_norm": 0.4361123740673065, "learning_rate": 0.00016353789589494273, "loss": 11.6848, "step": 40271 }, { "epoch": 0.8430042702838483, "grad_norm": 0.33633846044540405, "learning_rate": 0.00016353620280473727, "loss": 11.686, "step": 40272 }, { "epoch": 0.8430252030478105, "grad_norm": 0.28439128398895264, "learning_rate": 0.00016353450968398868, "loss": 11.6585, "step": 40273 }, { "epoch": 0.8430461358117726, "grad_norm": 0.3691810965538025, "learning_rate": 0.0001635328165326978, "loss": 11.6842, "step": 40274 }, { "epoch": 0.8430670685757348, "grad_norm": 0.3015793263912201, "learning_rate": 0.0001635311233508654, "loss": 11.667, "step": 40275 }, { "epoch": 0.8430880013396969, "grad_norm": 0.3355412483215332, "learning_rate": 0.00016352943013849233, "loss": 11.6632, "step": 40276 }, { "epoch": 0.8431089341036591, "grad_norm": 0.2842198610305786, "learning_rate": 0.00016352773689557937, "loss": 11.6762, "step": 40277 }, { "epoch": 0.8431298668676211, "grad_norm": 0.2671637535095215, "learning_rate": 0.00016352604362212736, "loss": 11.6576, "step": 40278 }, { "epoch": 0.8431507996315833, "grad_norm": 0.3737185299396515, "learning_rate": 0.00016352435031813712, "loss": 11.6592, "step": 40279 }, { "epoch": 0.8431717323955455, "grad_norm": 0.30701735615730286, "learning_rate": 0.00016352265698360943, "loss": 11.6749, "step": 40280 }, { "epoch": 0.8431926651595076, "grad_norm": 0.2553219199180603, "learning_rate": 0.00016352096361854515, "loss": 11.6572, "step": 40281 }, { "epoch": 0.8432135979234698, "grad_norm": 0.3570215106010437, "learning_rate": 0.00016351927022294506, "loss": 11.685, "step": 40282 }, { "epoch": 0.8432345306874319, "grad_norm": 0.3356691896915436, "learning_rate": 0.00016351757679680998, "loss": 11.6581, "step": 40283 }, { "epoch": 0.8432554634513941, "grad_norm": 0.3352442979812622, "learning_rate": 0.0001635158833401407, "loss": 11.6741, "step": 40284 }, { "epoch": 0.8432763962153563, "grad_norm": 0.3025902211666107, "learning_rate": 0.0001635141898529381, "loss": 11.6638, "step": 40285 }, { "epoch": 0.8432973289793184, "grad_norm": 0.3466378450393677, "learning_rate": 0.00016351249633520296, "loss": 11.6677, "step": 40286 }, { "epoch": 0.8433182617432806, "grad_norm": 0.26483702659606934, "learning_rate": 0.0001635108027869361, "loss": 11.6658, "step": 40287 }, { "epoch": 0.8433391945072427, "grad_norm": 0.33302071690559387, "learning_rate": 0.00016350910920813827, "loss": 11.6789, "step": 40288 }, { "epoch": 0.8433601272712049, "grad_norm": 0.3272804319858551, "learning_rate": 0.00016350741559881038, "loss": 11.6601, "step": 40289 }, { "epoch": 0.843381060035167, "grad_norm": 0.33134448528289795, "learning_rate": 0.0001635057219589532, "loss": 11.6652, "step": 40290 }, { "epoch": 0.8434019927991292, "grad_norm": 0.29525113105773926, "learning_rate": 0.00016350402828856756, "loss": 11.6658, "step": 40291 }, { "epoch": 0.8434229255630914, "grad_norm": 0.30356234312057495, "learning_rate": 0.00016350233458765423, "loss": 11.6692, "step": 40292 }, { "epoch": 0.8434438583270535, "grad_norm": 0.25192344188690186, "learning_rate": 0.00016350064085621403, "loss": 11.6541, "step": 40293 }, { "epoch": 0.8434647910910157, "grad_norm": 0.42780882120132446, "learning_rate": 0.00016349894709424787, "loss": 11.6684, "step": 40294 }, { "epoch": 0.8434857238549778, "grad_norm": 0.2678000032901764, "learning_rate": 0.00016349725330175644, "loss": 11.6582, "step": 40295 }, { "epoch": 0.84350665661894, "grad_norm": 0.31066107749938965, "learning_rate": 0.00016349555947874062, "loss": 11.6697, "step": 40296 }, { "epoch": 0.8435275893829021, "grad_norm": 0.4189987778663635, "learning_rate": 0.00016349386562520123, "loss": 11.6834, "step": 40297 }, { "epoch": 0.8435485221468643, "grad_norm": 0.2539219260215759, "learning_rate": 0.00016349217174113908, "loss": 11.6525, "step": 40298 }, { "epoch": 0.8435694549108265, "grad_norm": 0.3022332787513733, "learning_rate": 0.00016349047782655493, "loss": 11.6613, "step": 40299 }, { "epoch": 0.8435903876747886, "grad_norm": 0.2770511507987976, "learning_rate": 0.00016348878388144965, "loss": 11.6777, "step": 40300 }, { "epoch": 0.8436113204387508, "grad_norm": 0.32929614186286926, "learning_rate": 0.00016348708990582403, "loss": 11.6709, "step": 40301 }, { "epoch": 0.8436322532027128, "grad_norm": 0.32582545280456543, "learning_rate": 0.00016348539589967892, "loss": 11.6741, "step": 40302 }, { "epoch": 0.843653185966675, "grad_norm": 0.32860618829727173, "learning_rate": 0.0001634837018630151, "loss": 11.6675, "step": 40303 }, { "epoch": 0.8436741187306372, "grad_norm": 0.29682251811027527, "learning_rate": 0.00016348200779583337, "loss": 11.6842, "step": 40304 }, { "epoch": 0.8436950514945993, "grad_norm": 0.4876078963279724, "learning_rate": 0.00016348031369813458, "loss": 11.6768, "step": 40305 }, { "epoch": 0.8437159842585615, "grad_norm": 0.33907750248908997, "learning_rate": 0.00016347861956991953, "loss": 11.6671, "step": 40306 }, { "epoch": 0.8437369170225236, "grad_norm": 0.28744131326675415, "learning_rate": 0.00016347692541118905, "loss": 11.6676, "step": 40307 }, { "epoch": 0.8437578497864858, "grad_norm": 0.2969227433204651, "learning_rate": 0.0001634752312219439, "loss": 11.6633, "step": 40308 }, { "epoch": 0.8437787825504479, "grad_norm": 0.32491564750671387, "learning_rate": 0.000163473537002185, "loss": 11.6732, "step": 40309 }, { "epoch": 0.8437997153144101, "grad_norm": 0.3608047366142273, "learning_rate": 0.00016347184275191304, "loss": 11.6832, "step": 40310 }, { "epoch": 0.8438206480783723, "grad_norm": 0.31610268354415894, "learning_rate": 0.00016347014847112892, "loss": 11.6808, "step": 40311 }, { "epoch": 0.8438415808423344, "grad_norm": 0.2898937165737152, "learning_rate": 0.00016346845415983344, "loss": 11.6725, "step": 40312 }, { "epoch": 0.8438625136062966, "grad_norm": 0.46877458691596985, "learning_rate": 0.00016346675981802734, "loss": 11.6753, "step": 40313 }, { "epoch": 0.8438834463702587, "grad_norm": 0.37654194235801697, "learning_rate": 0.00016346506544571157, "loss": 11.6672, "step": 40314 }, { "epoch": 0.8439043791342209, "grad_norm": 0.3699777126312256, "learning_rate": 0.00016346337104288682, "loss": 11.6809, "step": 40315 }, { "epoch": 0.843925311898183, "grad_norm": 0.29035094380378723, "learning_rate": 0.00016346167660955397, "loss": 11.6503, "step": 40316 }, { "epoch": 0.8439462446621452, "grad_norm": 0.31235283613204956, "learning_rate": 0.00016345998214571385, "loss": 11.6729, "step": 40317 }, { "epoch": 0.8439671774261074, "grad_norm": 0.28921693563461304, "learning_rate": 0.0001634582876513672, "loss": 11.6826, "step": 40318 }, { "epoch": 0.8439881101900695, "grad_norm": 0.28128644824028015, "learning_rate": 0.00016345659312651495, "loss": 11.6695, "step": 40319 }, { "epoch": 0.8440090429540317, "grad_norm": 0.29448068141937256, "learning_rate": 0.00016345489857115775, "loss": 11.6828, "step": 40320 }, { "epoch": 0.8440299757179938, "grad_norm": 0.26809221506118774, "learning_rate": 0.00016345320398529654, "loss": 11.6788, "step": 40321 }, { "epoch": 0.844050908481956, "grad_norm": 0.2347566932439804, "learning_rate": 0.00016345150936893216, "loss": 11.6635, "step": 40322 }, { "epoch": 0.8440718412459182, "grad_norm": 0.2698507010936737, "learning_rate": 0.00016344981472206528, "loss": 11.6738, "step": 40323 }, { "epoch": 0.8440927740098803, "grad_norm": 0.3195616602897644, "learning_rate": 0.00016344812004469688, "loss": 11.6824, "step": 40324 }, { "epoch": 0.8441137067738425, "grad_norm": 0.3210156261920929, "learning_rate": 0.00016344642533682767, "loss": 11.6657, "step": 40325 }, { "epoch": 0.8441346395378045, "grad_norm": 0.3145112097263336, "learning_rate": 0.0001634447305984585, "loss": 11.6693, "step": 40326 }, { "epoch": 0.8441555723017667, "grad_norm": 0.28571081161499023, "learning_rate": 0.00016344303582959015, "loss": 11.6595, "step": 40327 }, { "epoch": 0.8441765050657288, "grad_norm": 0.2818470299243927, "learning_rate": 0.0001634413410302235, "loss": 11.6802, "step": 40328 }, { "epoch": 0.844197437829691, "grad_norm": 0.4161181151866913, "learning_rate": 0.00016343964620035926, "loss": 11.6747, "step": 40329 }, { "epoch": 0.8442183705936532, "grad_norm": 0.3067597448825836, "learning_rate": 0.00016343795133999834, "loss": 11.6539, "step": 40330 }, { "epoch": 0.8442393033576153, "grad_norm": 0.343194842338562, "learning_rate": 0.00016343625644914156, "loss": 11.6684, "step": 40331 }, { "epoch": 0.8442602361215775, "grad_norm": 0.3903956115245819, "learning_rate": 0.00016343456152778966, "loss": 11.6746, "step": 40332 }, { "epoch": 0.8442811688855396, "grad_norm": 0.32444196939468384, "learning_rate": 0.00016343286657594352, "loss": 11.663, "step": 40333 }, { "epoch": 0.8443021016495018, "grad_norm": 0.34042543172836304, "learning_rate": 0.00016343117159360392, "loss": 11.6644, "step": 40334 }, { "epoch": 0.8443230344134639, "grad_norm": 0.32162532210350037, "learning_rate": 0.00016342947658077168, "loss": 11.6737, "step": 40335 }, { "epoch": 0.8443439671774261, "grad_norm": 0.37739068269729614, "learning_rate": 0.00016342778153744764, "loss": 11.6762, "step": 40336 }, { "epoch": 0.8443648999413883, "grad_norm": 0.30692437291145325, "learning_rate": 0.00016342608646363255, "loss": 11.6645, "step": 40337 }, { "epoch": 0.8443858327053504, "grad_norm": 0.25228801369667053, "learning_rate": 0.0001634243913593273, "loss": 11.6693, "step": 40338 }, { "epoch": 0.8444067654693126, "grad_norm": 0.34958332777023315, "learning_rate": 0.0001634226962245327, "loss": 11.6856, "step": 40339 }, { "epoch": 0.8444276982332747, "grad_norm": 0.48434141278266907, "learning_rate": 0.0001634210010592495, "loss": 11.6555, "step": 40340 }, { "epoch": 0.8444486309972369, "grad_norm": 0.2693319618701935, "learning_rate": 0.00016341930586347858, "loss": 11.6627, "step": 40341 }, { "epoch": 0.8444695637611991, "grad_norm": 0.3585335612297058, "learning_rate": 0.00016341761063722073, "loss": 11.6745, "step": 40342 }, { "epoch": 0.8444904965251612, "grad_norm": 0.2793755829334259, "learning_rate": 0.00016341591538047675, "loss": 11.6688, "step": 40343 }, { "epoch": 0.8445114292891234, "grad_norm": 0.320339173078537, "learning_rate": 0.00016341422009324744, "loss": 11.671, "step": 40344 }, { "epoch": 0.8445323620530855, "grad_norm": 0.31465649604797363, "learning_rate": 0.00016341252477553367, "loss": 11.69, "step": 40345 }, { "epoch": 0.8445532948170477, "grad_norm": 0.3264671862125397, "learning_rate": 0.00016341082942733623, "loss": 11.6604, "step": 40346 }, { "epoch": 0.8445742275810098, "grad_norm": 0.3009842038154602, "learning_rate": 0.00016340913404865593, "loss": 11.682, "step": 40347 }, { "epoch": 0.844595160344972, "grad_norm": 0.35364773869514465, "learning_rate": 0.0001634074386394936, "loss": 11.6751, "step": 40348 }, { "epoch": 0.8446160931089342, "grad_norm": 0.32667726278305054, "learning_rate": 0.00016340574319985004, "loss": 11.6768, "step": 40349 }, { "epoch": 0.8446370258728962, "grad_norm": 0.32095828652381897, "learning_rate": 0.00016340404772972606, "loss": 11.6573, "step": 40350 }, { "epoch": 0.8446579586368584, "grad_norm": 0.2713441550731659, "learning_rate": 0.0001634023522291225, "loss": 11.6697, "step": 40351 }, { "epoch": 0.8446788914008205, "grad_norm": 0.3152732849121094, "learning_rate": 0.00016340065669804012, "loss": 11.6672, "step": 40352 }, { "epoch": 0.8446998241647827, "grad_norm": 0.42162561416625977, "learning_rate": 0.00016339896113647982, "loss": 11.6711, "step": 40353 }, { "epoch": 0.8447207569287448, "grad_norm": 0.31896737217903137, "learning_rate": 0.00016339726554444233, "loss": 11.6755, "step": 40354 }, { "epoch": 0.844741689692707, "grad_norm": 0.3225385844707489, "learning_rate": 0.00016339556992192854, "loss": 11.6592, "step": 40355 }, { "epoch": 0.8447626224566692, "grad_norm": 0.4990248680114746, "learning_rate": 0.00016339387426893918, "loss": 11.6822, "step": 40356 }, { "epoch": 0.8447835552206313, "grad_norm": 0.24965794384479523, "learning_rate": 0.00016339217858547515, "loss": 11.6723, "step": 40357 }, { "epoch": 0.8448044879845935, "grad_norm": 0.266061931848526, "learning_rate": 0.00016339048287153724, "loss": 11.6794, "step": 40358 }, { "epoch": 0.8448254207485556, "grad_norm": 0.31221237778663635, "learning_rate": 0.00016338878712712624, "loss": 11.6784, "step": 40359 }, { "epoch": 0.8448463535125178, "grad_norm": 0.3547174334526062, "learning_rate": 0.00016338709135224296, "loss": 11.6642, "step": 40360 }, { "epoch": 0.84486728627648, "grad_norm": 0.24764461815357208, "learning_rate": 0.0001633853955468883, "loss": 11.6439, "step": 40361 }, { "epoch": 0.8448882190404421, "grad_norm": 0.28243130445480347, "learning_rate": 0.00016338369971106293, "loss": 11.668, "step": 40362 }, { "epoch": 0.8449091518044043, "grad_norm": 0.31872275471687317, "learning_rate": 0.0001633820038447678, "loss": 11.6817, "step": 40363 }, { "epoch": 0.8449300845683664, "grad_norm": 0.2823815643787384, "learning_rate": 0.00016338030794800363, "loss": 11.6741, "step": 40364 }, { "epoch": 0.8449510173323286, "grad_norm": 0.3405340611934662, "learning_rate": 0.0001633786120207713, "loss": 11.644, "step": 40365 }, { "epoch": 0.8449719500962907, "grad_norm": 0.32502034306526184, "learning_rate": 0.0001633769160630716, "loss": 11.656, "step": 40366 }, { "epoch": 0.8449928828602529, "grad_norm": 0.34352537989616394, "learning_rate": 0.00016337522007490532, "loss": 11.6736, "step": 40367 }, { "epoch": 0.8450138156242151, "grad_norm": 0.3028814196586609, "learning_rate": 0.00016337352405627336, "loss": 11.665, "step": 40368 }, { "epoch": 0.8450347483881772, "grad_norm": 0.251247763633728, "learning_rate": 0.0001633718280071764, "loss": 11.6584, "step": 40369 }, { "epoch": 0.8450556811521394, "grad_norm": 0.26683327555656433, "learning_rate": 0.0001633701319276154, "loss": 11.6718, "step": 40370 }, { "epoch": 0.8450766139161014, "grad_norm": 0.24301670491695404, "learning_rate": 0.00016336843581759108, "loss": 11.6759, "step": 40371 }, { "epoch": 0.8450975466800636, "grad_norm": 0.28411003947257996, "learning_rate": 0.0001633667396771043, "loss": 11.6666, "step": 40372 }, { "epoch": 0.8451184794440257, "grad_norm": 0.3161334693431854, "learning_rate": 0.00016336504350615582, "loss": 11.6782, "step": 40373 }, { "epoch": 0.8451394122079879, "grad_norm": 0.28714290261268616, "learning_rate": 0.00016336334730474654, "loss": 11.6444, "step": 40374 }, { "epoch": 0.8451603449719501, "grad_norm": 0.2547372877597809, "learning_rate": 0.0001633616510728772, "loss": 11.6735, "step": 40375 }, { "epoch": 0.8451812777359122, "grad_norm": 0.2664775550365448, "learning_rate": 0.00016335995481054865, "loss": 11.6668, "step": 40376 }, { "epoch": 0.8452022104998744, "grad_norm": 0.29697978496551514, "learning_rate": 0.0001633582585177617, "loss": 11.6751, "step": 40377 }, { "epoch": 0.8452231432638365, "grad_norm": 0.2987104058265686, "learning_rate": 0.00016335656219451715, "loss": 11.6534, "step": 40378 }, { "epoch": 0.8452440760277987, "grad_norm": 0.29062342643737793, "learning_rate": 0.00016335486584081584, "loss": 11.665, "step": 40379 }, { "epoch": 0.8452650087917609, "grad_norm": 0.32962971925735474, "learning_rate": 0.00016335316945665862, "loss": 11.6666, "step": 40380 }, { "epoch": 0.845285941555723, "grad_norm": 0.30316659808158875, "learning_rate": 0.0001633514730420462, "loss": 11.6753, "step": 40381 }, { "epoch": 0.8453068743196852, "grad_norm": 0.2867332994937897, "learning_rate": 0.0001633497765969795, "loss": 11.6555, "step": 40382 }, { "epoch": 0.8453278070836473, "grad_norm": 0.33393141627311707, "learning_rate": 0.00016334808012145925, "loss": 11.658, "step": 40383 }, { "epoch": 0.8453487398476095, "grad_norm": 0.31100741028785706, "learning_rate": 0.00016334638361548636, "loss": 11.6837, "step": 40384 }, { "epoch": 0.8453696726115716, "grad_norm": 0.3492835462093353, "learning_rate": 0.00016334468707906153, "loss": 11.6727, "step": 40385 }, { "epoch": 0.8453906053755338, "grad_norm": 0.31690531969070435, "learning_rate": 0.00016334299051218572, "loss": 11.6727, "step": 40386 }, { "epoch": 0.845411538139496, "grad_norm": 0.2897304594516754, "learning_rate": 0.0001633412939148596, "loss": 11.6635, "step": 40387 }, { "epoch": 0.8454324709034581, "grad_norm": 0.3108791410923004, "learning_rate": 0.00016333959728708408, "loss": 11.6638, "step": 40388 }, { "epoch": 0.8454534036674203, "grad_norm": 0.3383415937423706, "learning_rate": 0.00016333790062885993, "loss": 11.6759, "step": 40389 }, { "epoch": 0.8454743364313824, "grad_norm": 0.2986017167568207, "learning_rate": 0.00016333620394018802, "loss": 11.6759, "step": 40390 }, { "epoch": 0.8454952691953446, "grad_norm": 0.35876789689064026, "learning_rate": 0.00016333450722106908, "loss": 11.6625, "step": 40391 }, { "epoch": 0.8455162019593067, "grad_norm": 0.27608370780944824, "learning_rate": 0.00016333281047150398, "loss": 11.6894, "step": 40392 }, { "epoch": 0.8455371347232689, "grad_norm": 0.24325156211853027, "learning_rate": 0.00016333111369149354, "loss": 11.6663, "step": 40393 }, { "epoch": 0.8455580674872311, "grad_norm": 0.3831334412097931, "learning_rate": 0.00016332941688103857, "loss": 11.6658, "step": 40394 }, { "epoch": 0.8455790002511931, "grad_norm": 0.251307874917984, "learning_rate": 0.00016332772004013987, "loss": 11.6638, "step": 40395 }, { "epoch": 0.8455999330151553, "grad_norm": 0.3822430968284607, "learning_rate": 0.00016332602316879825, "loss": 11.6859, "step": 40396 }, { "epoch": 0.8456208657791174, "grad_norm": 0.291342556476593, "learning_rate": 0.00016332432626701458, "loss": 11.6692, "step": 40397 }, { "epoch": 0.8456417985430796, "grad_norm": 0.32171544432640076, "learning_rate": 0.00016332262933478964, "loss": 11.6661, "step": 40398 }, { "epoch": 0.8456627313070418, "grad_norm": 0.3280559182167053, "learning_rate": 0.00016332093237212422, "loss": 11.6638, "step": 40399 }, { "epoch": 0.8456836640710039, "grad_norm": 0.30966123938560486, "learning_rate": 0.00016331923537901916, "loss": 11.6604, "step": 40400 }, { "epoch": 0.8457045968349661, "grad_norm": 0.33933061361312866, "learning_rate": 0.00016331753835547525, "loss": 11.6643, "step": 40401 }, { "epoch": 0.8457255295989282, "grad_norm": 0.24971462786197662, "learning_rate": 0.00016331584130149337, "loss": 11.6666, "step": 40402 }, { "epoch": 0.8457464623628904, "grad_norm": 0.3901101350784302, "learning_rate": 0.0001633141442170743, "loss": 11.6653, "step": 40403 }, { "epoch": 0.8457673951268525, "grad_norm": 0.32567504048347473, "learning_rate": 0.0001633124471022188, "loss": 11.6788, "step": 40404 }, { "epoch": 0.8457883278908147, "grad_norm": 0.31396573781967163, "learning_rate": 0.00016331074995692778, "loss": 11.6675, "step": 40405 }, { "epoch": 0.8458092606547769, "grad_norm": 0.29165440797805786, "learning_rate": 0.000163309052781202, "loss": 11.6815, "step": 40406 }, { "epoch": 0.845830193418739, "grad_norm": 0.3082236051559448, "learning_rate": 0.0001633073555750423, "loss": 11.6711, "step": 40407 }, { "epoch": 0.8458511261827012, "grad_norm": 0.294646680355072, "learning_rate": 0.0001633056583384495, "loss": 11.6672, "step": 40408 }, { "epoch": 0.8458720589466633, "grad_norm": 0.293600469827652, "learning_rate": 0.00016330396107142438, "loss": 11.6681, "step": 40409 }, { "epoch": 0.8458929917106255, "grad_norm": 0.26151683926582336, "learning_rate": 0.00016330226377396777, "loss": 11.6621, "step": 40410 }, { "epoch": 0.8459139244745876, "grad_norm": 0.28326013684272766, "learning_rate": 0.00016330056644608048, "loss": 11.6724, "step": 40411 }, { "epoch": 0.8459348572385498, "grad_norm": 0.2871707081794739, "learning_rate": 0.0001632988690877634, "loss": 11.6529, "step": 40412 }, { "epoch": 0.845955790002512, "grad_norm": 0.3569072484970093, "learning_rate": 0.00016329717169901725, "loss": 11.6634, "step": 40413 }, { "epoch": 0.8459767227664741, "grad_norm": 0.3797702491283417, "learning_rate": 0.0001632954742798429, "loss": 11.6699, "step": 40414 }, { "epoch": 0.8459976555304363, "grad_norm": 0.29161322116851807, "learning_rate": 0.0001632937768302411, "loss": 11.668, "step": 40415 }, { "epoch": 0.8460185882943984, "grad_norm": 0.3568337559700012, "learning_rate": 0.00016329207935021273, "loss": 11.6589, "step": 40416 }, { "epoch": 0.8460395210583606, "grad_norm": 0.3547915518283844, "learning_rate": 0.0001632903818397586, "loss": 11.669, "step": 40417 }, { "epoch": 0.8460604538223226, "grad_norm": 0.23766133189201355, "learning_rate": 0.0001632886842988795, "loss": 11.6577, "step": 40418 }, { "epoch": 0.8460813865862848, "grad_norm": 0.38334232568740845, "learning_rate": 0.0001632869867275763, "loss": 11.6673, "step": 40419 }, { "epoch": 0.846102319350247, "grad_norm": 0.3393062651157379, "learning_rate": 0.00016328528912584975, "loss": 11.6812, "step": 40420 }, { "epoch": 0.8461232521142091, "grad_norm": 0.2388073205947876, "learning_rate": 0.0001632835914937007, "loss": 11.687, "step": 40421 }, { "epoch": 0.8461441848781713, "grad_norm": 0.3063308000564575, "learning_rate": 0.00016328189383112992, "loss": 11.6652, "step": 40422 }, { "epoch": 0.8461651176421334, "grad_norm": 0.2692165970802307, "learning_rate": 0.00016328019613813833, "loss": 11.6661, "step": 40423 }, { "epoch": 0.8461860504060956, "grad_norm": 0.33754512667655945, "learning_rate": 0.00016327849841472664, "loss": 11.6686, "step": 40424 }, { "epoch": 0.8462069831700578, "grad_norm": 0.23578087985515594, "learning_rate": 0.00016327680066089572, "loss": 11.6743, "step": 40425 }, { "epoch": 0.8462279159340199, "grad_norm": 0.32055187225341797, "learning_rate": 0.00016327510287664638, "loss": 11.6686, "step": 40426 }, { "epoch": 0.8462488486979821, "grad_norm": 0.42875936627388, "learning_rate": 0.00016327340506197942, "loss": 11.686, "step": 40427 }, { "epoch": 0.8462697814619442, "grad_norm": 0.33109867572784424, "learning_rate": 0.00016327170721689566, "loss": 11.6763, "step": 40428 }, { "epoch": 0.8462907142259064, "grad_norm": 0.3079073429107666, "learning_rate": 0.00016327000934139593, "loss": 11.6821, "step": 40429 }, { "epoch": 0.8463116469898685, "grad_norm": 0.29150715470314026, "learning_rate": 0.00016326831143548102, "loss": 11.6736, "step": 40430 }, { "epoch": 0.8463325797538307, "grad_norm": 0.38365086913108826, "learning_rate": 0.0001632666134991518, "loss": 11.6875, "step": 40431 }, { "epoch": 0.8463535125177929, "grad_norm": 0.2970592677593231, "learning_rate": 0.00016326491553240904, "loss": 11.6726, "step": 40432 }, { "epoch": 0.846374445281755, "grad_norm": 0.4394167959690094, "learning_rate": 0.00016326321753525356, "loss": 11.6565, "step": 40433 }, { "epoch": 0.8463953780457172, "grad_norm": 0.2665935754776001, "learning_rate": 0.0001632615195076862, "loss": 11.661, "step": 40434 }, { "epoch": 0.8464163108096793, "grad_norm": 0.2947029769420624, "learning_rate": 0.00016325982144970772, "loss": 11.685, "step": 40435 }, { "epoch": 0.8464372435736415, "grad_norm": 0.31237533688545227, "learning_rate": 0.00016325812336131902, "loss": 11.6807, "step": 40436 }, { "epoch": 0.8464581763376036, "grad_norm": 0.2553609609603882, "learning_rate": 0.00016325642524252085, "loss": 11.6733, "step": 40437 }, { "epoch": 0.8464791091015658, "grad_norm": 0.26446452736854553, "learning_rate": 0.00016325472709331407, "loss": 11.6635, "step": 40438 }, { "epoch": 0.846500041865528, "grad_norm": 0.43942615389823914, "learning_rate": 0.00016325302891369945, "loss": 11.6801, "step": 40439 }, { "epoch": 0.84652097462949, "grad_norm": 0.3540719747543335, "learning_rate": 0.00016325133070367782, "loss": 11.6728, "step": 40440 }, { "epoch": 0.8465419073934523, "grad_norm": 0.2939513027667999, "learning_rate": 0.00016324963246325003, "loss": 11.6729, "step": 40441 }, { "epoch": 0.8465628401574143, "grad_norm": 0.2850920855998993, "learning_rate": 0.00016324793419241688, "loss": 11.6643, "step": 40442 }, { "epoch": 0.8465837729213765, "grad_norm": 0.30365872383117676, "learning_rate": 0.00016324623589117918, "loss": 11.6817, "step": 40443 }, { "epoch": 0.8466047056853387, "grad_norm": 0.4660455584526062, "learning_rate": 0.00016324453755953773, "loss": 11.6922, "step": 40444 }, { "epoch": 0.8466256384493008, "grad_norm": 0.3476380705833435, "learning_rate": 0.00016324283919749336, "loss": 11.6733, "step": 40445 }, { "epoch": 0.846646571213263, "grad_norm": 0.27587956190109253, "learning_rate": 0.00016324114080504693, "loss": 11.6718, "step": 40446 }, { "epoch": 0.8466675039772251, "grad_norm": 0.2898353040218353, "learning_rate": 0.00016323944238219918, "loss": 11.6697, "step": 40447 }, { "epoch": 0.8466884367411873, "grad_norm": 0.3228232264518738, "learning_rate": 0.00016323774392895096, "loss": 11.6844, "step": 40448 }, { "epoch": 0.8467093695051494, "grad_norm": 0.2987232208251953, "learning_rate": 0.0001632360454453031, "loss": 11.6618, "step": 40449 }, { "epoch": 0.8467303022691116, "grad_norm": 0.308508962392807, "learning_rate": 0.0001632343469312564, "loss": 11.6653, "step": 40450 }, { "epoch": 0.8467512350330738, "grad_norm": 0.4489627778530121, "learning_rate": 0.00016323264838681171, "loss": 11.6545, "step": 40451 }, { "epoch": 0.8467721677970359, "grad_norm": 0.3295113146305084, "learning_rate": 0.0001632309498119698, "loss": 11.6594, "step": 40452 }, { "epoch": 0.8467931005609981, "grad_norm": 0.3252656161785126, "learning_rate": 0.0001632292512067315, "loss": 11.6885, "step": 40453 }, { "epoch": 0.8468140333249602, "grad_norm": 0.321726530790329, "learning_rate": 0.0001632275525710976, "loss": 11.657, "step": 40454 }, { "epoch": 0.8468349660889224, "grad_norm": 0.29249292612075806, "learning_rate": 0.000163225853905069, "loss": 11.6739, "step": 40455 }, { "epoch": 0.8468558988528845, "grad_norm": 0.2754981219768524, "learning_rate": 0.00016322415520864648, "loss": 11.6808, "step": 40456 }, { "epoch": 0.8468768316168467, "grad_norm": 0.4255065619945526, "learning_rate": 0.00016322245648183077, "loss": 11.6889, "step": 40457 }, { "epoch": 0.8468977643808089, "grad_norm": 0.2822383642196655, "learning_rate": 0.0001632207577246228, "loss": 11.6686, "step": 40458 }, { "epoch": 0.846918697144771, "grad_norm": 0.3237033486366272, "learning_rate": 0.00016321905893702335, "loss": 11.6474, "step": 40459 }, { "epoch": 0.8469396299087332, "grad_norm": 0.42948517203330994, "learning_rate": 0.0001632173601190332, "loss": 11.6748, "step": 40460 }, { "epoch": 0.8469605626726953, "grad_norm": 0.47617924213409424, "learning_rate": 0.00016321566127065325, "loss": 11.6718, "step": 40461 }, { "epoch": 0.8469814954366575, "grad_norm": 0.2654132544994354, "learning_rate": 0.00016321396239188423, "loss": 11.6659, "step": 40462 }, { "epoch": 0.8470024282006197, "grad_norm": 0.32704243063926697, "learning_rate": 0.000163212263482727, "loss": 11.6615, "step": 40463 }, { "epoch": 0.8470233609645818, "grad_norm": 0.335946649312973, "learning_rate": 0.0001632105645431824, "loss": 11.6662, "step": 40464 }, { "epoch": 0.847044293728544, "grad_norm": 0.3429763615131378, "learning_rate": 0.00016320886557325115, "loss": 11.6624, "step": 40465 }, { "epoch": 0.847065226492506, "grad_norm": 0.3283393681049347, "learning_rate": 0.00016320716657293417, "loss": 11.6653, "step": 40466 }, { "epoch": 0.8470861592564682, "grad_norm": 0.43935638666152954, "learning_rate": 0.0001632054675422322, "loss": 11.7011, "step": 40467 }, { "epoch": 0.8471070920204303, "grad_norm": 0.2623019516468048, "learning_rate": 0.00016320376848114613, "loss": 11.6709, "step": 40468 }, { "epoch": 0.8471280247843925, "grad_norm": 0.3130813241004944, "learning_rate": 0.00016320206938967674, "loss": 11.6752, "step": 40469 }, { "epoch": 0.8471489575483547, "grad_norm": 0.37768006324768066, "learning_rate": 0.00016320037026782487, "loss": 11.6674, "step": 40470 }, { "epoch": 0.8471698903123168, "grad_norm": 0.28314515948295593, "learning_rate": 0.00016319867111559127, "loss": 11.6815, "step": 40471 }, { "epoch": 0.847190823076279, "grad_norm": 0.2549648880958557, "learning_rate": 0.00016319697193297684, "loss": 11.6637, "step": 40472 }, { "epoch": 0.8472117558402411, "grad_norm": 0.39494216442108154, "learning_rate": 0.00016319527271998235, "loss": 11.657, "step": 40473 }, { "epoch": 0.8472326886042033, "grad_norm": 0.37461793422698975, "learning_rate": 0.0001631935734766086, "loss": 11.6885, "step": 40474 }, { "epoch": 0.8472536213681654, "grad_norm": 0.2918367385864258, "learning_rate": 0.00016319187420285646, "loss": 11.6735, "step": 40475 }, { "epoch": 0.8472745541321276, "grad_norm": 0.29662594199180603, "learning_rate": 0.0001631901748987267, "loss": 11.6893, "step": 40476 }, { "epoch": 0.8472954868960898, "grad_norm": 0.3144616186618805, "learning_rate": 0.00016318847556422018, "loss": 11.6724, "step": 40477 }, { "epoch": 0.8473164196600519, "grad_norm": 0.36047497391700745, "learning_rate": 0.00016318677619933767, "loss": 11.6717, "step": 40478 }, { "epoch": 0.8473373524240141, "grad_norm": 0.3398999273777008, "learning_rate": 0.00016318507680408, "loss": 11.6796, "step": 40479 }, { "epoch": 0.8473582851879762, "grad_norm": 0.22811132669448853, "learning_rate": 0.00016318337737844806, "loss": 11.6763, "step": 40480 }, { "epoch": 0.8473792179519384, "grad_norm": 0.33038878440856934, "learning_rate": 0.00016318167792244256, "loss": 11.6741, "step": 40481 }, { "epoch": 0.8474001507159006, "grad_norm": 0.3121867775917053, "learning_rate": 0.00016317997843606435, "loss": 11.6632, "step": 40482 }, { "epoch": 0.8474210834798627, "grad_norm": 0.3479434847831726, "learning_rate": 0.00016317827891931428, "loss": 11.6681, "step": 40483 }, { "epoch": 0.8474420162438249, "grad_norm": 0.31654977798461914, "learning_rate": 0.00016317657937219314, "loss": 11.6791, "step": 40484 }, { "epoch": 0.847462949007787, "grad_norm": 0.30062344670295715, "learning_rate": 0.00016317487979470175, "loss": 11.6765, "step": 40485 }, { "epoch": 0.8474838817717492, "grad_norm": 0.33309847116470337, "learning_rate": 0.00016317318018684094, "loss": 11.6633, "step": 40486 }, { "epoch": 0.8475048145357112, "grad_norm": 0.30622103810310364, "learning_rate": 0.0001631714805486115, "loss": 11.6679, "step": 40487 }, { "epoch": 0.8475257472996734, "grad_norm": 0.2749719023704529, "learning_rate": 0.00016316978088001425, "loss": 11.6601, "step": 40488 }, { "epoch": 0.8475466800636356, "grad_norm": 0.28045302629470825, "learning_rate": 0.00016316808118105005, "loss": 11.6716, "step": 40489 }, { "epoch": 0.8475676128275977, "grad_norm": 0.36988890171051025, "learning_rate": 0.00016316638145171967, "loss": 11.6639, "step": 40490 }, { "epoch": 0.8475885455915599, "grad_norm": 0.46747103333473206, "learning_rate": 0.00016316468169202394, "loss": 11.665, "step": 40491 }, { "epoch": 0.847609478355522, "grad_norm": 0.3089941143989563, "learning_rate": 0.00016316298190196374, "loss": 11.686, "step": 40492 }, { "epoch": 0.8476304111194842, "grad_norm": 0.33657458424568176, "learning_rate": 0.00016316128208153976, "loss": 11.688, "step": 40493 }, { "epoch": 0.8476513438834463, "grad_norm": 0.3197484612464905, "learning_rate": 0.00016315958223075288, "loss": 11.6574, "step": 40494 }, { "epoch": 0.8476722766474085, "grad_norm": 0.2969968318939209, "learning_rate": 0.00016315788234960395, "loss": 11.6579, "step": 40495 }, { "epoch": 0.8476932094113707, "grad_norm": 0.2843592166900635, "learning_rate": 0.0001631561824380938, "loss": 11.67, "step": 40496 }, { "epoch": 0.8477141421753328, "grad_norm": 0.3270570635795593, "learning_rate": 0.00016315448249622317, "loss": 11.6642, "step": 40497 }, { "epoch": 0.847735074939295, "grad_norm": 0.39357811212539673, "learning_rate": 0.00016315278252399288, "loss": 11.6653, "step": 40498 }, { "epoch": 0.8477560077032571, "grad_norm": 0.28067854046821594, "learning_rate": 0.00016315108252140382, "loss": 11.6623, "step": 40499 }, { "epoch": 0.8477769404672193, "grad_norm": 0.30736663937568665, "learning_rate": 0.00016314938248845678, "loss": 11.6663, "step": 40500 }, { "epoch": 0.8477978732311815, "grad_norm": 0.3148680329322815, "learning_rate": 0.00016314768242515254, "loss": 11.6718, "step": 40501 }, { "epoch": 0.8478188059951436, "grad_norm": 0.36653992533683777, "learning_rate": 0.00016314598233149196, "loss": 11.6677, "step": 40502 }, { "epoch": 0.8478397387591058, "grad_norm": 0.29783400893211365, "learning_rate": 0.00016314428220747583, "loss": 11.668, "step": 40503 }, { "epoch": 0.8478606715230679, "grad_norm": 0.27815404534339905, "learning_rate": 0.000163142582053105, "loss": 11.669, "step": 40504 }, { "epoch": 0.8478816042870301, "grad_norm": 0.2900395095348358, "learning_rate": 0.00016314088186838023, "loss": 11.6806, "step": 40505 }, { "epoch": 0.8479025370509922, "grad_norm": 0.29348981380462646, "learning_rate": 0.0001631391816533024, "loss": 11.679, "step": 40506 }, { "epoch": 0.8479234698149544, "grad_norm": 0.30145034193992615, "learning_rate": 0.00016313748140787227, "loss": 11.6717, "step": 40507 }, { "epoch": 0.8479444025789166, "grad_norm": 0.3544493615627289, "learning_rate": 0.00016313578113209075, "loss": 11.6782, "step": 40508 }, { "epoch": 0.8479653353428787, "grad_norm": 0.3169718086719513, "learning_rate": 0.00016313408082595853, "loss": 11.6674, "step": 40509 }, { "epoch": 0.8479862681068409, "grad_norm": 0.2853129208087921, "learning_rate": 0.0001631323804894765, "loss": 11.6676, "step": 40510 }, { "epoch": 0.848007200870803, "grad_norm": 0.25878286361694336, "learning_rate": 0.0001631306801226455, "loss": 11.6484, "step": 40511 }, { "epoch": 0.8480281336347651, "grad_norm": 0.3273886740207672, "learning_rate": 0.0001631289797254663, "loss": 11.6571, "step": 40512 }, { "epoch": 0.8480490663987272, "grad_norm": 0.33000606298446655, "learning_rate": 0.00016312727929793973, "loss": 11.6694, "step": 40513 }, { "epoch": 0.8480699991626894, "grad_norm": 0.3283202648162842, "learning_rate": 0.00016312557884006664, "loss": 11.6834, "step": 40514 }, { "epoch": 0.8480909319266516, "grad_norm": 0.24885383248329163, "learning_rate": 0.0001631238783518478, "loss": 11.6783, "step": 40515 }, { "epoch": 0.8481118646906137, "grad_norm": 0.3773044943809509, "learning_rate": 0.00016312217783328402, "loss": 11.6512, "step": 40516 }, { "epoch": 0.8481327974545759, "grad_norm": 0.3096030652523041, "learning_rate": 0.0001631204772843762, "loss": 11.6795, "step": 40517 }, { "epoch": 0.848153730218538, "grad_norm": 0.3230718970298767, "learning_rate": 0.00016311877670512504, "loss": 11.6659, "step": 40518 }, { "epoch": 0.8481746629825002, "grad_norm": 0.27009889483451843, "learning_rate": 0.00016311707609553146, "loss": 11.6531, "step": 40519 }, { "epoch": 0.8481955957464624, "grad_norm": 0.34775474667549133, "learning_rate": 0.00016311537545559622, "loss": 11.6786, "step": 40520 }, { "epoch": 0.8482165285104245, "grad_norm": 0.31816422939300537, "learning_rate": 0.00016311367478532017, "loss": 11.6693, "step": 40521 }, { "epoch": 0.8482374612743867, "grad_norm": 0.31997764110565186, "learning_rate": 0.0001631119740847041, "loss": 11.6795, "step": 40522 }, { "epoch": 0.8482583940383488, "grad_norm": 0.3534063398838043, "learning_rate": 0.00016311027335374885, "loss": 11.6837, "step": 40523 }, { "epoch": 0.848279326802311, "grad_norm": 0.40277448296546936, "learning_rate": 0.00016310857259245522, "loss": 11.6642, "step": 40524 }, { "epoch": 0.8483002595662731, "grad_norm": 0.2947123348712921, "learning_rate": 0.000163106871800824, "loss": 11.666, "step": 40525 }, { "epoch": 0.8483211923302353, "grad_norm": 0.4458824098110199, "learning_rate": 0.00016310517097885608, "loss": 11.6781, "step": 40526 }, { "epoch": 0.8483421250941975, "grad_norm": 0.35052502155303955, "learning_rate": 0.00016310347012655224, "loss": 11.663, "step": 40527 }, { "epoch": 0.8483630578581596, "grad_norm": 0.32913273572921753, "learning_rate": 0.0001631017692439133, "loss": 11.6723, "step": 40528 }, { "epoch": 0.8483839906221218, "grad_norm": 0.2719402611255646, "learning_rate": 0.00016310006833094006, "loss": 11.6685, "step": 40529 }, { "epoch": 0.8484049233860839, "grad_norm": 0.3300516605377197, "learning_rate": 0.00016309836738763337, "loss": 11.6589, "step": 40530 }, { "epoch": 0.8484258561500461, "grad_norm": 0.2988063097000122, "learning_rate": 0.00016309666641399403, "loss": 11.6588, "step": 40531 }, { "epoch": 0.8484467889140082, "grad_norm": 0.3208429515361786, "learning_rate": 0.00016309496541002283, "loss": 11.6801, "step": 40532 }, { "epoch": 0.8484677216779704, "grad_norm": 0.3199491500854492, "learning_rate": 0.00016309326437572062, "loss": 11.6645, "step": 40533 }, { "epoch": 0.8484886544419326, "grad_norm": 0.3333527147769928, "learning_rate": 0.00016309156331108823, "loss": 11.6791, "step": 40534 }, { "epoch": 0.8485095872058946, "grad_norm": 0.34101420640945435, "learning_rate": 0.00016308986221612645, "loss": 11.6654, "step": 40535 }, { "epoch": 0.8485305199698568, "grad_norm": 0.3711792230606079, "learning_rate": 0.00016308816109083613, "loss": 11.6677, "step": 40536 }, { "epoch": 0.8485514527338189, "grad_norm": 0.3438384234905243, "learning_rate": 0.00016308645993521806, "loss": 11.6603, "step": 40537 }, { "epoch": 0.8485723854977811, "grad_norm": 0.41902294754981995, "learning_rate": 0.00016308475874927302, "loss": 11.6693, "step": 40538 }, { "epoch": 0.8485933182617433, "grad_norm": 0.2839201092720032, "learning_rate": 0.00016308305753300192, "loss": 11.6845, "step": 40539 }, { "epoch": 0.8486142510257054, "grad_norm": 0.2773512303829193, "learning_rate": 0.00016308135628640552, "loss": 11.668, "step": 40540 }, { "epoch": 0.8486351837896676, "grad_norm": 0.40106603503227234, "learning_rate": 0.00016307965500948467, "loss": 11.6895, "step": 40541 }, { "epoch": 0.8486561165536297, "grad_norm": 0.34532487392425537, "learning_rate": 0.00016307795370224012, "loss": 11.6537, "step": 40542 }, { "epoch": 0.8486770493175919, "grad_norm": 0.3769342601299286, "learning_rate": 0.00016307625236467276, "loss": 11.6603, "step": 40543 }, { "epoch": 0.848697982081554, "grad_norm": 0.30708906054496765, "learning_rate": 0.0001630745509967834, "loss": 11.6724, "step": 40544 }, { "epoch": 0.8487189148455162, "grad_norm": 0.26935622096061707, "learning_rate": 0.0001630728495985728, "loss": 11.6609, "step": 40545 }, { "epoch": 0.8487398476094784, "grad_norm": 0.3497489094734192, "learning_rate": 0.00016307114817004187, "loss": 11.6677, "step": 40546 }, { "epoch": 0.8487607803734405, "grad_norm": 0.38291165232658386, "learning_rate": 0.00016306944671119132, "loss": 11.6698, "step": 40547 }, { "epoch": 0.8487817131374027, "grad_norm": 0.31761589646339417, "learning_rate": 0.000163067745222022, "loss": 11.6808, "step": 40548 }, { "epoch": 0.8488026459013648, "grad_norm": 0.22288718819618225, "learning_rate": 0.00016306604370253482, "loss": 11.6793, "step": 40549 }, { "epoch": 0.848823578665327, "grad_norm": 0.41632384061813354, "learning_rate": 0.0001630643421527305, "loss": 11.6632, "step": 40550 }, { "epoch": 0.8488445114292891, "grad_norm": 0.31806138157844543, "learning_rate": 0.00016306264057260988, "loss": 11.6624, "step": 40551 }, { "epoch": 0.8488654441932513, "grad_norm": 0.37785181403160095, "learning_rate": 0.0001630609389621738, "loss": 11.6798, "step": 40552 }, { "epoch": 0.8488863769572135, "grad_norm": 0.32922279834747314, "learning_rate": 0.00016305923732142308, "loss": 11.6632, "step": 40553 }, { "epoch": 0.8489073097211756, "grad_norm": 0.2976846396923065, "learning_rate": 0.0001630575356503585, "loss": 11.6742, "step": 40554 }, { "epoch": 0.8489282424851378, "grad_norm": 0.3852764666080475, "learning_rate": 0.00016305583394898087, "loss": 11.6646, "step": 40555 }, { "epoch": 0.8489491752490999, "grad_norm": 0.33136430382728577, "learning_rate": 0.00016305413221729106, "loss": 11.6717, "step": 40556 }, { "epoch": 0.848970108013062, "grad_norm": 0.3502141833305359, "learning_rate": 0.00016305243045528989, "loss": 11.66, "step": 40557 }, { "epoch": 0.8489910407770243, "grad_norm": 0.25294938683509827, "learning_rate": 0.00016305072866297816, "loss": 11.6838, "step": 40558 }, { "epoch": 0.8490119735409863, "grad_norm": 0.31515783071517944, "learning_rate": 0.00016304902684035661, "loss": 11.6825, "step": 40559 }, { "epoch": 0.8490329063049485, "grad_norm": 0.37248918414115906, "learning_rate": 0.00016304732498742617, "loss": 11.686, "step": 40560 }, { "epoch": 0.8490538390689106, "grad_norm": 0.33673295378685, "learning_rate": 0.00016304562310418764, "loss": 11.6881, "step": 40561 }, { "epoch": 0.8490747718328728, "grad_norm": 0.2960594892501831, "learning_rate": 0.00016304392119064178, "loss": 11.6755, "step": 40562 }, { "epoch": 0.8490957045968349, "grad_norm": 0.3223221004009247, "learning_rate": 0.0001630422192467895, "loss": 11.6594, "step": 40563 }, { "epoch": 0.8491166373607971, "grad_norm": 0.292800635099411, "learning_rate": 0.00016304051727263148, "loss": 11.668, "step": 40564 }, { "epoch": 0.8491375701247593, "grad_norm": 0.3793843686580658, "learning_rate": 0.00016303881526816866, "loss": 11.6454, "step": 40565 }, { "epoch": 0.8491585028887214, "grad_norm": 0.3127160370349884, "learning_rate": 0.00016303711323340184, "loss": 11.662, "step": 40566 }, { "epoch": 0.8491794356526836, "grad_norm": 0.48986485600471497, "learning_rate": 0.0001630354111683318, "loss": 11.6484, "step": 40567 }, { "epoch": 0.8492003684166457, "grad_norm": 0.3294469714164734, "learning_rate": 0.00016303370907295937, "loss": 11.6813, "step": 40568 }, { "epoch": 0.8492213011806079, "grad_norm": 0.6437450051307678, "learning_rate": 0.00016303200694728535, "loss": 11.6109, "step": 40569 }, { "epoch": 0.84924223394457, "grad_norm": 0.27806803584098816, "learning_rate": 0.00016303030479131061, "loss": 11.6779, "step": 40570 }, { "epoch": 0.8492631667085322, "grad_norm": 0.24031294882297516, "learning_rate": 0.00016302860260503592, "loss": 11.6714, "step": 40571 }, { "epoch": 0.8492840994724944, "grad_norm": 0.31910833716392517, "learning_rate": 0.00016302690038846215, "loss": 11.6645, "step": 40572 }, { "epoch": 0.8493050322364565, "grad_norm": 0.3300321400165558, "learning_rate": 0.00016302519814159004, "loss": 11.6579, "step": 40573 }, { "epoch": 0.8493259650004187, "grad_norm": 0.28795260190963745, "learning_rate": 0.0001630234958644205, "loss": 11.6671, "step": 40574 }, { "epoch": 0.8493468977643808, "grad_norm": 0.26740822196006775, "learning_rate": 0.0001630217935569543, "loss": 11.6564, "step": 40575 }, { "epoch": 0.849367830528343, "grad_norm": 0.3135569989681244, "learning_rate": 0.00016302009121919224, "loss": 11.664, "step": 40576 }, { "epoch": 0.8493887632923052, "grad_norm": 0.3544210195541382, "learning_rate": 0.00016301838885113515, "loss": 11.6843, "step": 40577 }, { "epoch": 0.8494096960562673, "grad_norm": 0.36661094427108765, "learning_rate": 0.00016301668645278387, "loss": 11.6558, "step": 40578 }, { "epoch": 0.8494306288202295, "grad_norm": 0.30858752131462097, "learning_rate": 0.0001630149840241392, "loss": 11.6685, "step": 40579 }, { "epoch": 0.8494515615841916, "grad_norm": 0.25142380595207214, "learning_rate": 0.000163013281565202, "loss": 11.6636, "step": 40580 }, { "epoch": 0.8494724943481538, "grad_norm": 0.5117033123970032, "learning_rate": 0.00016301157907597303, "loss": 11.6667, "step": 40581 }, { "epoch": 0.8494934271121158, "grad_norm": 0.39532557129859924, "learning_rate": 0.00016300987655645308, "loss": 11.6634, "step": 40582 }, { "epoch": 0.849514359876078, "grad_norm": 0.3002423942089081, "learning_rate": 0.0001630081740066431, "loss": 11.6508, "step": 40583 }, { "epoch": 0.8495352926400402, "grad_norm": 0.3797335624694824, "learning_rate": 0.00016300647142654376, "loss": 11.6664, "step": 40584 }, { "epoch": 0.8495562254040023, "grad_norm": 0.3051406443119049, "learning_rate": 0.00016300476881615602, "loss": 11.6539, "step": 40585 }, { "epoch": 0.8495771581679645, "grad_norm": 0.29651299118995667, "learning_rate": 0.00016300306617548057, "loss": 11.6633, "step": 40586 }, { "epoch": 0.8495980909319266, "grad_norm": 0.32596465945243835, "learning_rate": 0.00016300136350451825, "loss": 11.689, "step": 40587 }, { "epoch": 0.8496190236958888, "grad_norm": 0.3484823405742645, "learning_rate": 0.00016299966080327, "loss": 11.6702, "step": 40588 }, { "epoch": 0.8496399564598509, "grad_norm": 0.31803447008132935, "learning_rate": 0.0001629979580717365, "loss": 11.6642, "step": 40589 }, { "epoch": 0.8496608892238131, "grad_norm": 0.47355377674102783, "learning_rate": 0.00016299625530991866, "loss": 11.6871, "step": 40590 }, { "epoch": 0.8496818219877753, "grad_norm": 0.30839255452156067, "learning_rate": 0.0001629945525178172, "loss": 11.6615, "step": 40591 }, { "epoch": 0.8497027547517374, "grad_norm": 0.33304449915885925, "learning_rate": 0.00016299284969543303, "loss": 11.6554, "step": 40592 }, { "epoch": 0.8497236875156996, "grad_norm": 0.30815207958221436, "learning_rate": 0.00016299114684276692, "loss": 11.6751, "step": 40593 }, { "epoch": 0.8497446202796617, "grad_norm": 0.38713937997817993, "learning_rate": 0.00016298944395981972, "loss": 11.6727, "step": 40594 }, { "epoch": 0.8497655530436239, "grad_norm": 0.2900761663913727, "learning_rate": 0.00016298774104659222, "loss": 11.6713, "step": 40595 }, { "epoch": 0.849786485807586, "grad_norm": 0.30817171931266785, "learning_rate": 0.00016298603810308525, "loss": 11.6514, "step": 40596 }, { "epoch": 0.8498074185715482, "grad_norm": 0.2837691605091095, "learning_rate": 0.00016298433512929966, "loss": 11.6698, "step": 40597 }, { "epoch": 0.8498283513355104, "grad_norm": 0.29162725806236267, "learning_rate": 0.00016298263212523618, "loss": 11.652, "step": 40598 }, { "epoch": 0.8498492840994725, "grad_norm": 0.39218759536743164, "learning_rate": 0.00016298092909089573, "loss": 11.6763, "step": 40599 }, { "epoch": 0.8498702168634347, "grad_norm": 0.29035601019859314, "learning_rate": 0.00016297922602627907, "loss": 11.6668, "step": 40600 }, { "epoch": 0.8498911496273968, "grad_norm": 0.2993698716163635, "learning_rate": 0.00016297752293138704, "loss": 11.6684, "step": 40601 }, { "epoch": 0.849912082391359, "grad_norm": 0.3608846664428711, "learning_rate": 0.00016297581980622046, "loss": 11.6621, "step": 40602 }, { "epoch": 0.8499330151553212, "grad_norm": 0.3680787980556488, "learning_rate": 0.0001629741166507801, "loss": 11.6818, "step": 40603 }, { "epoch": 0.8499539479192832, "grad_norm": 0.30277878046035767, "learning_rate": 0.0001629724134650669, "loss": 11.6769, "step": 40604 }, { "epoch": 0.8499748806832454, "grad_norm": 0.2635309398174286, "learning_rate": 0.00016297071024908152, "loss": 11.671, "step": 40605 }, { "epoch": 0.8499958134472075, "grad_norm": 0.2519659996032715, "learning_rate": 0.00016296900700282487, "loss": 11.6726, "step": 40606 }, { "epoch": 0.8500167462111697, "grad_norm": 0.3430703580379486, "learning_rate": 0.0001629673037262978, "loss": 11.6786, "step": 40607 }, { "epoch": 0.8500376789751318, "grad_norm": 0.34480544924736023, "learning_rate": 0.00016296560041950103, "loss": 11.669, "step": 40608 }, { "epoch": 0.850058611739094, "grad_norm": 0.2929154634475708, "learning_rate": 0.00016296389708243548, "loss": 11.6854, "step": 40609 }, { "epoch": 0.8500795445030562, "grad_norm": 0.2736392319202423, "learning_rate": 0.0001629621937151019, "loss": 11.665, "step": 40610 }, { "epoch": 0.8501004772670183, "grad_norm": 0.3590606153011322, "learning_rate": 0.00016296049031750114, "loss": 11.6764, "step": 40611 }, { "epoch": 0.8501214100309805, "grad_norm": 0.2823004126548767, "learning_rate": 0.000162958786889634, "loss": 11.6598, "step": 40612 }, { "epoch": 0.8501423427949426, "grad_norm": 0.40609562397003174, "learning_rate": 0.00016295708343150132, "loss": 11.6716, "step": 40613 }, { "epoch": 0.8501632755589048, "grad_norm": 0.2959239184856415, "learning_rate": 0.00016295537994310392, "loss": 11.6667, "step": 40614 }, { "epoch": 0.8501842083228669, "grad_norm": 0.4099336564540863, "learning_rate": 0.0001629536764244426, "loss": 11.6642, "step": 40615 }, { "epoch": 0.8502051410868291, "grad_norm": 0.2720009684562683, "learning_rate": 0.00016295197287551818, "loss": 11.6688, "step": 40616 }, { "epoch": 0.8502260738507913, "grad_norm": 0.39824536442756653, "learning_rate": 0.0001629502692963315, "loss": 11.6767, "step": 40617 }, { "epoch": 0.8502470066147534, "grad_norm": 0.2646592855453491, "learning_rate": 0.00016294856568688332, "loss": 11.6696, "step": 40618 }, { "epoch": 0.8502679393787156, "grad_norm": 0.3479088544845581, "learning_rate": 0.00016294686204717452, "loss": 11.655, "step": 40619 }, { "epoch": 0.8502888721426777, "grad_norm": 0.3486779034137726, "learning_rate": 0.00016294515837720591, "loss": 11.6684, "step": 40620 }, { "epoch": 0.8503098049066399, "grad_norm": 0.313400000333786, "learning_rate": 0.0001629434546769783, "loss": 11.6863, "step": 40621 }, { "epoch": 0.8503307376706021, "grad_norm": 0.2599705159664154, "learning_rate": 0.00016294175094649257, "loss": 11.6687, "step": 40622 }, { "epoch": 0.8503516704345642, "grad_norm": 0.2887107729911804, "learning_rate": 0.0001629400471857494, "loss": 11.6603, "step": 40623 }, { "epoch": 0.8503726031985264, "grad_norm": 0.3402940332889557, "learning_rate": 0.0001629383433947497, "loss": 11.6551, "step": 40624 }, { "epoch": 0.8503935359624885, "grad_norm": 0.31359970569610596, "learning_rate": 0.0001629366395734943, "loss": 11.6505, "step": 40625 }, { "epoch": 0.8504144687264507, "grad_norm": 0.42119595408439636, "learning_rate": 0.00016293493572198397, "loss": 11.6809, "step": 40626 }, { "epoch": 0.8504354014904127, "grad_norm": 0.32104024291038513, "learning_rate": 0.00016293323184021958, "loss": 11.6617, "step": 40627 }, { "epoch": 0.850456334254375, "grad_norm": 0.2359786033630371, "learning_rate": 0.0001629315279282019, "loss": 11.6779, "step": 40628 }, { "epoch": 0.8504772670183371, "grad_norm": 0.2624551057815552, "learning_rate": 0.00016292982398593178, "loss": 11.6719, "step": 40629 }, { "epoch": 0.8504981997822992, "grad_norm": 0.2712320387363434, "learning_rate": 0.00016292812001341008, "loss": 11.6612, "step": 40630 }, { "epoch": 0.8505191325462614, "grad_norm": 0.2769581079483032, "learning_rate": 0.00016292641601063748, "loss": 11.6703, "step": 40631 }, { "epoch": 0.8505400653102235, "grad_norm": 0.2892954647541046, "learning_rate": 0.000162924711977615, "loss": 11.6825, "step": 40632 }, { "epoch": 0.8505609980741857, "grad_norm": 0.28366634249687195, "learning_rate": 0.00016292300791434324, "loss": 11.6576, "step": 40633 }, { "epoch": 0.8505819308381478, "grad_norm": 0.25853922963142395, "learning_rate": 0.00016292130382082322, "loss": 11.6734, "step": 40634 }, { "epoch": 0.85060286360211, "grad_norm": 0.3695984482765198, "learning_rate": 0.0001629195996970556, "loss": 11.6814, "step": 40635 }, { "epoch": 0.8506237963660722, "grad_norm": 0.33639979362487793, "learning_rate": 0.0001629178955430413, "loss": 11.6577, "step": 40636 }, { "epoch": 0.8506447291300343, "grad_norm": 0.35674846172332764, "learning_rate": 0.0001629161913587811, "loss": 11.6755, "step": 40637 }, { "epoch": 0.8506656618939965, "grad_norm": 0.3510604202747345, "learning_rate": 0.00016291448714427585, "loss": 11.6553, "step": 40638 }, { "epoch": 0.8506865946579586, "grad_norm": 0.2665995955467224, "learning_rate": 0.00016291278289952632, "loss": 11.6735, "step": 40639 }, { "epoch": 0.8507075274219208, "grad_norm": 0.2349424809217453, "learning_rate": 0.00016291107862453336, "loss": 11.6596, "step": 40640 }, { "epoch": 0.850728460185883, "grad_norm": 0.27358561754226685, "learning_rate": 0.00016290937431929782, "loss": 11.6624, "step": 40641 }, { "epoch": 0.8507493929498451, "grad_norm": 0.3559092879295349, "learning_rate": 0.0001629076699838204, "loss": 11.6658, "step": 40642 }, { "epoch": 0.8507703257138073, "grad_norm": 0.4651302099227905, "learning_rate": 0.00016290596561810205, "loss": 11.6808, "step": 40643 }, { "epoch": 0.8507912584777694, "grad_norm": 0.25551432371139526, "learning_rate": 0.00016290426122214356, "loss": 11.6485, "step": 40644 }, { "epoch": 0.8508121912417316, "grad_norm": 0.3097212314605713, "learning_rate": 0.00016290255679594567, "loss": 11.674, "step": 40645 }, { "epoch": 0.8508331240056937, "grad_norm": 0.2763642966747284, "learning_rate": 0.00016290085233950933, "loss": 11.6746, "step": 40646 }, { "epoch": 0.8508540567696559, "grad_norm": 0.28906145691871643, "learning_rate": 0.00016289914785283527, "loss": 11.6631, "step": 40647 }, { "epoch": 0.8508749895336181, "grad_norm": 0.2703879177570343, "learning_rate": 0.00016289744333592432, "loss": 11.678, "step": 40648 }, { "epoch": 0.8508959222975802, "grad_norm": 0.30049633979797363, "learning_rate": 0.0001628957387887773, "loss": 11.6788, "step": 40649 }, { "epoch": 0.8509168550615424, "grad_norm": 0.2863095700740814, "learning_rate": 0.00016289403421139504, "loss": 11.6717, "step": 40650 }, { "epoch": 0.8509377878255044, "grad_norm": 0.27463603019714355, "learning_rate": 0.00016289232960377836, "loss": 11.6422, "step": 40651 }, { "epoch": 0.8509587205894666, "grad_norm": 0.30782970786094666, "learning_rate": 0.00016289062496592808, "loss": 11.6602, "step": 40652 }, { "epoch": 0.8509796533534287, "grad_norm": 0.28089064359664917, "learning_rate": 0.00016288892029784501, "loss": 11.6601, "step": 40653 }, { "epoch": 0.8510005861173909, "grad_norm": 0.3131904900074005, "learning_rate": 0.00016288721559953002, "loss": 11.6771, "step": 40654 }, { "epoch": 0.8510215188813531, "grad_norm": 0.3587454855442047, "learning_rate": 0.00016288551087098382, "loss": 11.6518, "step": 40655 }, { "epoch": 0.8510424516453152, "grad_norm": 0.3125525712966919, "learning_rate": 0.00016288380611220732, "loss": 11.6765, "step": 40656 }, { "epoch": 0.8510633844092774, "grad_norm": 0.2917167842388153, "learning_rate": 0.0001628821013232013, "loss": 11.6696, "step": 40657 }, { "epoch": 0.8510843171732395, "grad_norm": 0.3624359369277954, "learning_rate": 0.00016288039650396664, "loss": 11.6836, "step": 40658 }, { "epoch": 0.8511052499372017, "grad_norm": 0.3446916937828064, "learning_rate": 0.00016287869165450408, "loss": 11.6807, "step": 40659 }, { "epoch": 0.8511261827011639, "grad_norm": 0.30250266194343567, "learning_rate": 0.00016287698677481448, "loss": 11.6657, "step": 40660 }, { "epoch": 0.851147115465126, "grad_norm": 0.27792659401893616, "learning_rate": 0.00016287528186489865, "loss": 11.6848, "step": 40661 }, { "epoch": 0.8511680482290882, "grad_norm": 0.36022812128067017, "learning_rate": 0.0001628735769247574, "loss": 11.6685, "step": 40662 }, { "epoch": 0.8511889809930503, "grad_norm": 0.25708991289138794, "learning_rate": 0.00016287187195439156, "loss": 11.6749, "step": 40663 }, { "epoch": 0.8512099137570125, "grad_norm": 0.30532902479171753, "learning_rate": 0.00016287016695380198, "loss": 11.6649, "step": 40664 }, { "epoch": 0.8512308465209746, "grad_norm": 0.28019264340400696, "learning_rate": 0.00016286846192298943, "loss": 11.6771, "step": 40665 }, { "epoch": 0.8512517792849368, "grad_norm": 0.2488236427307129, "learning_rate": 0.00016286675686195476, "loss": 11.6665, "step": 40666 }, { "epoch": 0.851272712048899, "grad_norm": 0.28300389647483826, "learning_rate": 0.00016286505177069878, "loss": 11.6542, "step": 40667 }, { "epoch": 0.8512936448128611, "grad_norm": 0.3083256185054779, "learning_rate": 0.00016286334664922228, "loss": 11.673, "step": 40668 }, { "epoch": 0.8513145775768233, "grad_norm": 0.31458792090415955, "learning_rate": 0.00016286164149752614, "loss": 11.6876, "step": 40669 }, { "epoch": 0.8513355103407854, "grad_norm": 0.33616697788238525, "learning_rate": 0.00016285993631561115, "loss": 11.6724, "step": 40670 }, { "epoch": 0.8513564431047476, "grad_norm": 0.34616219997406006, "learning_rate": 0.00016285823110347814, "loss": 11.6678, "step": 40671 }, { "epoch": 0.8513773758687097, "grad_norm": 0.3390997350215912, "learning_rate": 0.00016285652586112788, "loss": 11.672, "step": 40672 }, { "epoch": 0.8513983086326719, "grad_norm": 0.3172599673271179, "learning_rate": 0.00016285482058856125, "loss": 11.6792, "step": 40673 }, { "epoch": 0.851419241396634, "grad_norm": 0.3146420419216156, "learning_rate": 0.00016285311528577907, "loss": 11.6693, "step": 40674 }, { "epoch": 0.8514401741605961, "grad_norm": 0.36721891164779663, "learning_rate": 0.0001628514099527821, "loss": 11.6633, "step": 40675 }, { "epoch": 0.8514611069245583, "grad_norm": 0.35526007413864136, "learning_rate": 0.00016284970458957122, "loss": 11.698, "step": 40676 }, { "epoch": 0.8514820396885204, "grad_norm": 0.4329136908054352, "learning_rate": 0.00016284799919614722, "loss": 11.6727, "step": 40677 }, { "epoch": 0.8515029724524826, "grad_norm": 0.36866071820259094, "learning_rate": 0.00016284629377251092, "loss": 11.6785, "step": 40678 }, { "epoch": 0.8515239052164448, "grad_norm": 0.30013421177864075, "learning_rate": 0.00016284458831866317, "loss": 11.6728, "step": 40679 }, { "epoch": 0.8515448379804069, "grad_norm": 0.29230859875679016, "learning_rate": 0.00016284288283460475, "loss": 11.6535, "step": 40680 }, { "epoch": 0.8515657707443691, "grad_norm": 0.35764771699905396, "learning_rate": 0.00016284117732033647, "loss": 11.6815, "step": 40681 }, { "epoch": 0.8515867035083312, "grad_norm": 0.29050517082214355, "learning_rate": 0.00016283947177585923, "loss": 11.6746, "step": 40682 }, { "epoch": 0.8516076362722934, "grad_norm": 0.35094887018203735, "learning_rate": 0.00016283776620117378, "loss": 11.6918, "step": 40683 }, { "epoch": 0.8516285690362555, "grad_norm": 0.2835085988044739, "learning_rate": 0.00016283606059628095, "loss": 11.6698, "step": 40684 }, { "epoch": 0.8516495018002177, "grad_norm": 0.34666040539741516, "learning_rate": 0.00016283435496118153, "loss": 11.6688, "step": 40685 }, { "epoch": 0.8516704345641799, "grad_norm": 0.2759227156639099, "learning_rate": 0.00016283264929587644, "loss": 11.6748, "step": 40686 }, { "epoch": 0.851691367328142, "grad_norm": 0.3049512207508087, "learning_rate": 0.0001628309436003664, "loss": 11.6936, "step": 40687 }, { "epoch": 0.8517123000921042, "grad_norm": 0.3773881793022156, "learning_rate": 0.00016282923787465226, "loss": 11.6812, "step": 40688 }, { "epoch": 0.8517332328560663, "grad_norm": 0.28875619173049927, "learning_rate": 0.00016282753211873486, "loss": 11.6457, "step": 40689 }, { "epoch": 0.8517541656200285, "grad_norm": 0.3479233682155609, "learning_rate": 0.00016282582633261503, "loss": 11.6856, "step": 40690 }, { "epoch": 0.8517750983839906, "grad_norm": 0.3203223645687103, "learning_rate": 0.00016282412051629353, "loss": 11.6708, "step": 40691 }, { "epoch": 0.8517960311479528, "grad_norm": 0.27164262533187866, "learning_rate": 0.00016282241466977122, "loss": 11.6698, "step": 40692 }, { "epoch": 0.851816963911915, "grad_norm": 0.29637426137924194, "learning_rate": 0.00016282070879304893, "loss": 11.6869, "step": 40693 }, { "epoch": 0.8518378966758771, "grad_norm": 0.37668466567993164, "learning_rate": 0.00016281900288612745, "loss": 11.6711, "step": 40694 }, { "epoch": 0.8518588294398393, "grad_norm": 0.3932672441005707, "learning_rate": 0.00016281729694900764, "loss": 11.6674, "step": 40695 }, { "epoch": 0.8518797622038013, "grad_norm": 0.29193344712257385, "learning_rate": 0.00016281559098169028, "loss": 11.6779, "step": 40696 }, { "epoch": 0.8519006949677635, "grad_norm": 0.2550751864910126, "learning_rate": 0.00016281388498417623, "loss": 11.6775, "step": 40697 }, { "epoch": 0.8519216277317258, "grad_norm": 0.3808445930480957, "learning_rate": 0.00016281217895646626, "loss": 11.6878, "step": 40698 }, { "epoch": 0.8519425604956878, "grad_norm": 0.31750616431236267, "learning_rate": 0.0001628104728985612, "loss": 11.6809, "step": 40699 }, { "epoch": 0.85196349325965, "grad_norm": 0.28000545501708984, "learning_rate": 0.00016280876681046196, "loss": 11.6685, "step": 40700 }, { "epoch": 0.8519844260236121, "grad_norm": 0.27737346291542053, "learning_rate": 0.00016280706069216923, "loss": 11.6715, "step": 40701 }, { "epoch": 0.8520053587875743, "grad_norm": 0.34851664304733276, "learning_rate": 0.00016280535454368385, "loss": 11.6666, "step": 40702 }, { "epoch": 0.8520262915515364, "grad_norm": 0.2633921802043915, "learning_rate": 0.00016280364836500675, "loss": 11.6712, "step": 40703 }, { "epoch": 0.8520472243154986, "grad_norm": 0.3439578413963318, "learning_rate": 0.00016280194215613863, "loss": 11.65, "step": 40704 }, { "epoch": 0.8520681570794608, "grad_norm": 0.32271501421928406, "learning_rate": 0.0001628002359170804, "loss": 11.6808, "step": 40705 }, { "epoch": 0.8520890898434229, "grad_norm": 0.27296510338783264, "learning_rate": 0.0001627985296478328, "loss": 11.6759, "step": 40706 }, { "epoch": 0.8521100226073851, "grad_norm": 0.32744669914245605, "learning_rate": 0.00016279682334839672, "loss": 11.669, "step": 40707 }, { "epoch": 0.8521309553713472, "grad_norm": 0.34692659974098206, "learning_rate": 0.00016279511701877293, "loss": 11.6752, "step": 40708 }, { "epoch": 0.8521518881353094, "grad_norm": 0.3074098825454712, "learning_rate": 0.00016279341065896226, "loss": 11.6748, "step": 40709 }, { "epoch": 0.8521728208992715, "grad_norm": 0.287229061126709, "learning_rate": 0.00016279170426896557, "loss": 11.6562, "step": 40710 }, { "epoch": 0.8521937536632337, "grad_norm": 0.34326204657554626, "learning_rate": 0.00016278999784878363, "loss": 11.6854, "step": 40711 }, { "epoch": 0.8522146864271959, "grad_norm": 0.34096214175224304, "learning_rate": 0.0001627882913984173, "loss": 11.6804, "step": 40712 }, { "epoch": 0.852235619191158, "grad_norm": 0.40500232577323914, "learning_rate": 0.00016278658491786734, "loss": 11.6664, "step": 40713 }, { "epoch": 0.8522565519551202, "grad_norm": 0.2945719361305237, "learning_rate": 0.00016278487840713465, "loss": 11.6867, "step": 40714 }, { "epoch": 0.8522774847190823, "grad_norm": 0.3204408586025238, "learning_rate": 0.00016278317186621996, "loss": 11.6867, "step": 40715 }, { "epoch": 0.8522984174830445, "grad_norm": 0.3517926037311554, "learning_rate": 0.00016278146529512418, "loss": 11.6602, "step": 40716 }, { "epoch": 0.8523193502470067, "grad_norm": 0.31669971346855164, "learning_rate": 0.00016277975869384808, "loss": 11.6601, "step": 40717 }, { "epoch": 0.8523402830109688, "grad_norm": 0.343012273311615, "learning_rate": 0.0001627780520623925, "loss": 11.674, "step": 40718 }, { "epoch": 0.852361215774931, "grad_norm": 0.37087929248809814, "learning_rate": 0.00016277634540075826, "loss": 11.6722, "step": 40719 }, { "epoch": 0.852382148538893, "grad_norm": 0.33457234501838684, "learning_rate": 0.00016277463870894616, "loss": 11.6617, "step": 40720 }, { "epoch": 0.8524030813028552, "grad_norm": 0.29446128010749817, "learning_rate": 0.00016277293198695706, "loss": 11.6809, "step": 40721 }, { "epoch": 0.8524240140668173, "grad_norm": 0.34299197793006897, "learning_rate": 0.0001627712252347917, "loss": 11.6913, "step": 40722 }, { "epoch": 0.8524449468307795, "grad_norm": 0.3651779592037201, "learning_rate": 0.00016276951845245097, "loss": 11.6847, "step": 40723 }, { "epoch": 0.8524658795947417, "grad_norm": 0.3664301037788391, "learning_rate": 0.00016276781163993568, "loss": 11.6582, "step": 40724 }, { "epoch": 0.8524868123587038, "grad_norm": 0.3043466806411743, "learning_rate": 0.00016276610479724668, "loss": 11.6763, "step": 40725 }, { "epoch": 0.852507745122666, "grad_norm": 0.3839707672595978, "learning_rate": 0.00016276439792438472, "loss": 11.6808, "step": 40726 }, { "epoch": 0.8525286778866281, "grad_norm": 0.3832838833332062, "learning_rate": 0.00016276269102135065, "loss": 11.6706, "step": 40727 }, { "epoch": 0.8525496106505903, "grad_norm": 0.27521130442619324, "learning_rate": 0.0001627609840881453, "loss": 11.6755, "step": 40728 }, { "epoch": 0.8525705434145524, "grad_norm": 0.3108169436454773, "learning_rate": 0.0001627592771247695, "loss": 11.6482, "step": 40729 }, { "epoch": 0.8525914761785146, "grad_norm": 0.3924404978752136, "learning_rate": 0.00016275757013122405, "loss": 11.6771, "step": 40730 }, { "epoch": 0.8526124089424768, "grad_norm": 0.36163899302482605, "learning_rate": 0.0001627558631075098, "loss": 11.6682, "step": 40731 }, { "epoch": 0.8526333417064389, "grad_norm": 0.29596322774887085, "learning_rate": 0.00016275415605362755, "loss": 11.671, "step": 40732 }, { "epoch": 0.8526542744704011, "grad_norm": 0.3591034710407257, "learning_rate": 0.0001627524489695781, "loss": 11.6671, "step": 40733 }, { "epoch": 0.8526752072343632, "grad_norm": 0.33156004548072815, "learning_rate": 0.00016275074185536227, "loss": 11.6688, "step": 40734 }, { "epoch": 0.8526961399983254, "grad_norm": 0.2792825400829315, "learning_rate": 0.00016274903471098093, "loss": 11.6474, "step": 40735 }, { "epoch": 0.8527170727622876, "grad_norm": 0.3066912591457367, "learning_rate": 0.00016274732753643486, "loss": 11.6839, "step": 40736 }, { "epoch": 0.8527380055262497, "grad_norm": 0.3328866958618164, "learning_rate": 0.0001627456203317249, "loss": 11.6581, "step": 40737 }, { "epoch": 0.8527589382902119, "grad_norm": 0.23662276566028595, "learning_rate": 0.00016274391309685187, "loss": 11.652, "step": 40738 }, { "epoch": 0.852779871054174, "grad_norm": 0.31851014494895935, "learning_rate": 0.00016274220583181655, "loss": 11.6755, "step": 40739 }, { "epoch": 0.8528008038181362, "grad_norm": 0.28751203417778015, "learning_rate": 0.00016274049853661984, "loss": 11.666, "step": 40740 }, { "epoch": 0.8528217365820983, "grad_norm": 0.37196221947669983, "learning_rate": 0.00016273879121126247, "loss": 11.6779, "step": 40741 }, { "epoch": 0.8528426693460605, "grad_norm": 0.28105470538139343, "learning_rate": 0.00016273708385574533, "loss": 11.6637, "step": 40742 }, { "epoch": 0.8528636021100227, "grad_norm": 0.2886112928390503, "learning_rate": 0.0001627353764700692, "loss": 11.6594, "step": 40743 }, { "epoch": 0.8528845348739847, "grad_norm": 0.2828524112701416, "learning_rate": 0.00016273366905423498, "loss": 11.6928, "step": 40744 }, { "epoch": 0.852905467637947, "grad_norm": 0.28567370772361755, "learning_rate": 0.00016273196160824336, "loss": 11.6525, "step": 40745 }, { "epoch": 0.852926400401909, "grad_norm": 0.333438903093338, "learning_rate": 0.00016273025413209523, "loss": 11.6689, "step": 40746 }, { "epoch": 0.8529473331658712, "grad_norm": 0.3434959053993225, "learning_rate": 0.00016272854662579146, "loss": 11.6576, "step": 40747 }, { "epoch": 0.8529682659298333, "grad_norm": 0.27219414710998535, "learning_rate": 0.00016272683908933277, "loss": 11.6678, "step": 40748 }, { "epoch": 0.8529891986937955, "grad_norm": 0.3586190938949585, "learning_rate": 0.00016272513152272005, "loss": 11.6718, "step": 40749 }, { "epoch": 0.8530101314577577, "grad_norm": 0.28468331694602966, "learning_rate": 0.00016272342392595414, "loss": 11.6707, "step": 40750 }, { "epoch": 0.8530310642217198, "grad_norm": 0.27555394172668457, "learning_rate": 0.00016272171629903574, "loss": 11.6516, "step": 40751 }, { "epoch": 0.853051996985682, "grad_norm": 0.41871628165245056, "learning_rate": 0.00016272000864196582, "loss": 11.6574, "step": 40752 }, { "epoch": 0.8530729297496441, "grad_norm": 0.31098926067352295, "learning_rate": 0.0001627183009547451, "loss": 11.6777, "step": 40753 }, { "epoch": 0.8530938625136063, "grad_norm": 0.2795964479446411, "learning_rate": 0.00016271659323737446, "loss": 11.6795, "step": 40754 }, { "epoch": 0.8531147952775685, "grad_norm": 0.2696712613105774, "learning_rate": 0.00016271488548985467, "loss": 11.6686, "step": 40755 }, { "epoch": 0.8531357280415306, "grad_norm": 0.33805564045906067, "learning_rate": 0.0001627131777121866, "loss": 11.6813, "step": 40756 }, { "epoch": 0.8531566608054928, "grad_norm": 0.3518674373626709, "learning_rate": 0.00016271146990437106, "loss": 11.6898, "step": 40757 }, { "epoch": 0.8531775935694549, "grad_norm": 0.29316532611846924, "learning_rate": 0.00016270976206640881, "loss": 11.6547, "step": 40758 }, { "epoch": 0.8531985263334171, "grad_norm": 0.34562939405441284, "learning_rate": 0.00016270805419830078, "loss": 11.6657, "step": 40759 }, { "epoch": 0.8532194590973792, "grad_norm": 0.33126822113990784, "learning_rate": 0.0001627063463000477, "loss": 11.6731, "step": 40760 }, { "epoch": 0.8532403918613414, "grad_norm": 0.33490562438964844, "learning_rate": 0.00016270463837165044, "loss": 11.6814, "step": 40761 }, { "epoch": 0.8532613246253036, "grad_norm": 0.28103727102279663, "learning_rate": 0.0001627029304131098, "loss": 11.6669, "step": 40762 }, { "epoch": 0.8532822573892657, "grad_norm": 0.3005002439022064, "learning_rate": 0.00016270122242442657, "loss": 11.6562, "step": 40763 }, { "epoch": 0.8533031901532279, "grad_norm": 0.3662984073162079, "learning_rate": 0.00016269951440560165, "loss": 11.6715, "step": 40764 }, { "epoch": 0.85332412291719, "grad_norm": 0.283197283744812, "learning_rate": 0.0001626978063566358, "loss": 11.6679, "step": 40765 }, { "epoch": 0.8533450556811522, "grad_norm": 0.31068724393844604, "learning_rate": 0.00016269609827752985, "loss": 11.6617, "step": 40766 }, { "epoch": 0.8533659884451142, "grad_norm": 0.2713937759399414, "learning_rate": 0.00016269439016828466, "loss": 11.6523, "step": 40767 }, { "epoch": 0.8533869212090764, "grad_norm": 0.2805789113044739, "learning_rate": 0.00016269268202890097, "loss": 11.6679, "step": 40768 }, { "epoch": 0.8534078539730386, "grad_norm": 0.27196529507637024, "learning_rate": 0.00016269097385937967, "loss": 11.6696, "step": 40769 }, { "epoch": 0.8534287867370007, "grad_norm": 0.25009647011756897, "learning_rate": 0.00016268926565972158, "loss": 11.6552, "step": 40770 }, { "epoch": 0.8534497195009629, "grad_norm": 0.3022288978099823, "learning_rate": 0.0001626875574299275, "loss": 11.6709, "step": 40771 }, { "epoch": 0.853470652264925, "grad_norm": 0.2486056089401245, "learning_rate": 0.00016268584916999824, "loss": 11.6559, "step": 40772 }, { "epoch": 0.8534915850288872, "grad_norm": 0.3004709780216217, "learning_rate": 0.00016268414087993463, "loss": 11.6733, "step": 40773 }, { "epoch": 0.8535125177928494, "grad_norm": 0.3031511604785919, "learning_rate": 0.00016268243255973755, "loss": 11.6731, "step": 40774 }, { "epoch": 0.8535334505568115, "grad_norm": 0.2814471423625946, "learning_rate": 0.00016268072420940773, "loss": 11.6645, "step": 40775 }, { "epoch": 0.8535543833207737, "grad_norm": 0.2892810106277466, "learning_rate": 0.00016267901582894603, "loss": 11.6759, "step": 40776 }, { "epoch": 0.8535753160847358, "grad_norm": 0.36979472637176514, "learning_rate": 0.00016267730741835327, "loss": 11.6667, "step": 40777 }, { "epoch": 0.853596248848698, "grad_norm": 0.337808221578598, "learning_rate": 0.00016267559897763028, "loss": 11.6834, "step": 40778 }, { "epoch": 0.8536171816126601, "grad_norm": 0.2820945382118225, "learning_rate": 0.00016267389050677786, "loss": 11.6687, "step": 40779 }, { "epoch": 0.8536381143766223, "grad_norm": 0.30633091926574707, "learning_rate": 0.00016267218200579684, "loss": 11.6518, "step": 40780 }, { "epoch": 0.8536590471405845, "grad_norm": 0.24998502433300018, "learning_rate": 0.00016267047347468807, "loss": 11.6843, "step": 40781 }, { "epoch": 0.8536799799045466, "grad_norm": 0.32829898595809937, "learning_rate": 0.00016266876491345233, "loss": 11.6689, "step": 40782 }, { "epoch": 0.8537009126685088, "grad_norm": 0.3873397409915924, "learning_rate": 0.00016266705632209048, "loss": 11.6582, "step": 40783 }, { "epoch": 0.8537218454324709, "grad_norm": 0.3503989577293396, "learning_rate": 0.0001626653477006033, "loss": 11.6762, "step": 40784 }, { "epoch": 0.8537427781964331, "grad_norm": 0.3521091938018799, "learning_rate": 0.00016266363904899163, "loss": 11.6528, "step": 40785 }, { "epoch": 0.8537637109603952, "grad_norm": 0.2596279978752136, "learning_rate": 0.0001626619303672563, "loss": 11.6672, "step": 40786 }, { "epoch": 0.8537846437243574, "grad_norm": 0.27457621693611145, "learning_rate": 0.00016266022165539813, "loss": 11.6652, "step": 40787 }, { "epoch": 0.8538055764883196, "grad_norm": 0.29202359914779663, "learning_rate": 0.00016265851291341794, "loss": 11.6685, "step": 40788 }, { "epoch": 0.8538265092522817, "grad_norm": 0.2896798551082611, "learning_rate": 0.00016265680414131653, "loss": 11.6752, "step": 40789 }, { "epoch": 0.8538474420162439, "grad_norm": 0.29055652022361755, "learning_rate": 0.0001626550953390947, "loss": 11.6709, "step": 40790 }, { "epoch": 0.8538683747802059, "grad_norm": 0.30673718452453613, "learning_rate": 0.0001626533865067534, "loss": 11.6685, "step": 40791 }, { "epoch": 0.8538893075441681, "grad_norm": 0.3295055627822876, "learning_rate": 0.0001626516776442933, "loss": 11.6679, "step": 40792 }, { "epoch": 0.8539102403081302, "grad_norm": 0.31127476692199707, "learning_rate": 0.0001626499687517153, "loss": 11.6635, "step": 40793 }, { "epoch": 0.8539311730720924, "grad_norm": 0.49668291211128235, "learning_rate": 0.00016264825982902022, "loss": 11.6858, "step": 40794 }, { "epoch": 0.8539521058360546, "grad_norm": 0.31253260374069214, "learning_rate": 0.00016264655087620884, "loss": 11.6807, "step": 40795 }, { "epoch": 0.8539730386000167, "grad_norm": 0.2710600197315216, "learning_rate": 0.00016264484189328201, "loss": 11.6501, "step": 40796 }, { "epoch": 0.8539939713639789, "grad_norm": 0.28648826479911804, "learning_rate": 0.00016264313288024055, "loss": 11.6714, "step": 40797 }, { "epoch": 0.854014904127941, "grad_norm": 0.29740604758262634, "learning_rate": 0.0001626414238370853, "loss": 11.6694, "step": 40798 }, { "epoch": 0.8540358368919032, "grad_norm": 0.29312795400619507, "learning_rate": 0.00016263971476381705, "loss": 11.6581, "step": 40799 }, { "epoch": 0.8540567696558654, "grad_norm": 0.3605350852012634, "learning_rate": 0.00016263800566043663, "loss": 11.6691, "step": 40800 }, { "epoch": 0.8540777024198275, "grad_norm": 0.25418615341186523, "learning_rate": 0.00016263629652694486, "loss": 11.6643, "step": 40801 }, { "epoch": 0.8540986351837897, "grad_norm": 0.31761810183525085, "learning_rate": 0.00016263458736334258, "loss": 11.6764, "step": 40802 }, { "epoch": 0.8541195679477518, "grad_norm": 0.25856083631515503, "learning_rate": 0.00016263287816963058, "loss": 11.671, "step": 40803 }, { "epoch": 0.854140500711714, "grad_norm": 0.2576001286506653, "learning_rate": 0.00016263116894580973, "loss": 11.6444, "step": 40804 }, { "epoch": 0.8541614334756761, "grad_norm": 0.3721950948238373, "learning_rate": 0.00016262945969188082, "loss": 11.6741, "step": 40805 }, { "epoch": 0.8541823662396383, "grad_norm": 0.3291324973106384, "learning_rate": 0.00016262775040784465, "loss": 11.684, "step": 40806 }, { "epoch": 0.8542032990036005, "grad_norm": 0.2989729344844818, "learning_rate": 0.0001626260410937021, "loss": 11.692, "step": 40807 }, { "epoch": 0.8542242317675626, "grad_norm": 0.32721978425979614, "learning_rate": 0.00016262433174945394, "loss": 11.6526, "step": 40808 }, { "epoch": 0.8542451645315248, "grad_norm": 0.407585471868515, "learning_rate": 0.00016262262237510096, "loss": 11.6639, "step": 40809 }, { "epoch": 0.8542660972954869, "grad_norm": 0.32680627703666687, "learning_rate": 0.0001626209129706441, "loss": 11.671, "step": 40810 }, { "epoch": 0.8542870300594491, "grad_norm": 0.29268014430999756, "learning_rate": 0.00016261920353608406, "loss": 11.66, "step": 40811 }, { "epoch": 0.8543079628234111, "grad_norm": 0.3401218354701996, "learning_rate": 0.00016261749407142176, "loss": 11.6691, "step": 40812 }, { "epoch": 0.8543288955873733, "grad_norm": 0.239791139960289, "learning_rate": 0.00016261578457665797, "loss": 11.6754, "step": 40813 }, { "epoch": 0.8543498283513355, "grad_norm": 0.3501925468444824, "learning_rate": 0.0001626140750517935, "loss": 11.6575, "step": 40814 }, { "epoch": 0.8543707611152976, "grad_norm": 0.3420194089412689, "learning_rate": 0.0001626123654968292, "loss": 11.693, "step": 40815 }, { "epoch": 0.8543916938792598, "grad_norm": 0.34337902069091797, "learning_rate": 0.00016261065591176586, "loss": 11.6679, "step": 40816 }, { "epoch": 0.8544126266432219, "grad_norm": 0.29971790313720703, "learning_rate": 0.0001626089462966043, "loss": 11.6719, "step": 40817 }, { "epoch": 0.8544335594071841, "grad_norm": 0.31499627232551575, "learning_rate": 0.0001626072366513454, "loss": 11.6618, "step": 40818 }, { "epoch": 0.8544544921711463, "grad_norm": 0.30271029472351074, "learning_rate": 0.00016260552697598994, "loss": 11.6937, "step": 40819 }, { "epoch": 0.8544754249351084, "grad_norm": 0.3341377079486847, "learning_rate": 0.00016260381727053878, "loss": 11.6573, "step": 40820 }, { "epoch": 0.8544963576990706, "grad_norm": 0.449008971452713, "learning_rate": 0.00016260210753499268, "loss": 11.6816, "step": 40821 }, { "epoch": 0.8545172904630327, "grad_norm": 0.31297311186790466, "learning_rate": 0.00016260039776935253, "loss": 11.6611, "step": 40822 }, { "epoch": 0.8545382232269949, "grad_norm": 0.26483672857284546, "learning_rate": 0.00016259868797361906, "loss": 11.6656, "step": 40823 }, { "epoch": 0.854559155990957, "grad_norm": 0.35139161348342896, "learning_rate": 0.00016259697814779316, "loss": 11.6474, "step": 40824 }, { "epoch": 0.8545800887549192, "grad_norm": 0.2536066770553589, "learning_rate": 0.00016259526829187567, "loss": 11.6744, "step": 40825 }, { "epoch": 0.8546010215188814, "grad_norm": 0.30717185139656067, "learning_rate": 0.00016259355840586733, "loss": 11.6615, "step": 40826 }, { "epoch": 0.8546219542828435, "grad_norm": 0.2731783390045166, "learning_rate": 0.00016259184848976905, "loss": 11.6596, "step": 40827 }, { "epoch": 0.8546428870468057, "grad_norm": 0.27761030197143555, "learning_rate": 0.0001625901385435816, "loss": 11.6717, "step": 40828 }, { "epoch": 0.8546638198107678, "grad_norm": 0.40418004989624023, "learning_rate": 0.00016258842856730583, "loss": 11.6887, "step": 40829 }, { "epoch": 0.85468475257473, "grad_norm": 0.3277680575847626, "learning_rate": 0.0001625867185609425, "loss": 11.6746, "step": 40830 }, { "epoch": 0.8547056853386921, "grad_norm": 0.26408252120018005, "learning_rate": 0.00016258500852449253, "loss": 11.6666, "step": 40831 }, { "epoch": 0.8547266181026543, "grad_norm": 0.24100418388843536, "learning_rate": 0.00016258329845795667, "loss": 11.6681, "step": 40832 }, { "epoch": 0.8547475508666165, "grad_norm": 0.2767849266529083, "learning_rate": 0.0001625815883613358, "loss": 11.6601, "step": 40833 }, { "epoch": 0.8547684836305786, "grad_norm": 0.3163813650608063, "learning_rate": 0.00016257987823463065, "loss": 11.6547, "step": 40834 }, { "epoch": 0.8547894163945408, "grad_norm": 0.2785432040691376, "learning_rate": 0.00016257816807784217, "loss": 11.6749, "step": 40835 }, { "epoch": 0.8548103491585028, "grad_norm": 0.27118122577667236, "learning_rate": 0.00016257645789097104, "loss": 11.6906, "step": 40836 }, { "epoch": 0.854831281922465, "grad_norm": 0.27653229236602783, "learning_rate": 0.0001625747476740182, "loss": 11.6794, "step": 40837 }, { "epoch": 0.8548522146864272, "grad_norm": 0.2884751260280609, "learning_rate": 0.0001625730374269844, "loss": 11.6497, "step": 40838 }, { "epoch": 0.8548731474503893, "grad_norm": 0.3282117247581482, "learning_rate": 0.00016257132714987048, "loss": 11.6643, "step": 40839 }, { "epoch": 0.8548940802143515, "grad_norm": 0.31415170431137085, "learning_rate": 0.00016256961684267727, "loss": 11.6596, "step": 40840 }, { "epoch": 0.8549150129783136, "grad_norm": 0.3266335725784302, "learning_rate": 0.00016256790650540564, "loss": 11.6788, "step": 40841 }, { "epoch": 0.8549359457422758, "grad_norm": 0.31151121854782104, "learning_rate": 0.00016256619613805632, "loss": 11.6626, "step": 40842 }, { "epoch": 0.8549568785062379, "grad_norm": 0.3647398352622986, "learning_rate": 0.0001625644857406302, "loss": 11.6791, "step": 40843 }, { "epoch": 0.8549778112702001, "grad_norm": 0.38651594519615173, "learning_rate": 0.00016256277531312804, "loss": 11.6802, "step": 40844 }, { "epoch": 0.8549987440341623, "grad_norm": 0.32175979018211365, "learning_rate": 0.0001625610648555507, "loss": 11.6681, "step": 40845 }, { "epoch": 0.8550196767981244, "grad_norm": 0.28170743584632874, "learning_rate": 0.000162559354367899, "loss": 11.6746, "step": 40846 }, { "epoch": 0.8550406095620866, "grad_norm": 0.2524636387825012, "learning_rate": 0.00016255764385017383, "loss": 11.6694, "step": 40847 }, { "epoch": 0.8550615423260487, "grad_norm": 0.25650203227996826, "learning_rate": 0.00016255593330237589, "loss": 11.6689, "step": 40848 }, { "epoch": 0.8550824750900109, "grad_norm": 0.31813672184944153, "learning_rate": 0.00016255422272450608, "loss": 11.6695, "step": 40849 }, { "epoch": 0.855103407853973, "grad_norm": 0.25276774168014526, "learning_rate": 0.0001625525121165652, "loss": 11.6635, "step": 40850 }, { "epoch": 0.8551243406179352, "grad_norm": 0.31154608726501465, "learning_rate": 0.00016255080147855406, "loss": 11.6689, "step": 40851 }, { "epoch": 0.8551452733818974, "grad_norm": 0.35194846987724304, "learning_rate": 0.0001625490908104735, "loss": 11.6642, "step": 40852 }, { "epoch": 0.8551662061458595, "grad_norm": 0.34609368443489075, "learning_rate": 0.00016254738011232435, "loss": 11.6558, "step": 40853 }, { "epoch": 0.8551871389098217, "grad_norm": 0.3125108778476715, "learning_rate": 0.0001625456693841074, "loss": 11.662, "step": 40854 }, { "epoch": 0.8552080716737838, "grad_norm": 0.28479981422424316, "learning_rate": 0.00016254395862582354, "loss": 11.6691, "step": 40855 }, { "epoch": 0.855229004437746, "grad_norm": 0.24879275262355804, "learning_rate": 0.0001625422478374735, "loss": 11.6607, "step": 40856 }, { "epoch": 0.8552499372017082, "grad_norm": 0.35564178228378296, "learning_rate": 0.00016254053701905815, "loss": 11.6734, "step": 40857 }, { "epoch": 0.8552708699656703, "grad_norm": 0.3052408993244171, "learning_rate": 0.00016253882617057834, "loss": 11.6787, "step": 40858 }, { "epoch": 0.8552918027296325, "grad_norm": 0.27669838070869446, "learning_rate": 0.00016253711529203485, "loss": 11.6512, "step": 40859 }, { "epoch": 0.8553127354935945, "grad_norm": 0.33726173639297485, "learning_rate": 0.0001625354043834285, "loss": 11.6729, "step": 40860 }, { "epoch": 0.8553336682575567, "grad_norm": 0.2980039119720459, "learning_rate": 0.00016253369344476016, "loss": 11.6763, "step": 40861 }, { "epoch": 0.8553546010215188, "grad_norm": 0.39657333493232727, "learning_rate": 0.0001625319824760306, "loss": 11.6804, "step": 40862 }, { "epoch": 0.855375533785481, "grad_norm": 0.36846181750297546, "learning_rate": 0.00016253027147724062, "loss": 11.6812, "step": 40863 }, { "epoch": 0.8553964665494432, "grad_norm": 0.30110469460487366, "learning_rate": 0.00016252856044839115, "loss": 11.6645, "step": 40864 }, { "epoch": 0.8554173993134053, "grad_norm": 0.3476616442203522, "learning_rate": 0.00016252684938948292, "loss": 11.676, "step": 40865 }, { "epoch": 0.8554383320773675, "grad_norm": 0.4381335377693176, "learning_rate": 0.0001625251383005168, "loss": 11.6851, "step": 40866 }, { "epoch": 0.8554592648413296, "grad_norm": 0.3123415410518646, "learning_rate": 0.00016252342718149358, "loss": 11.6693, "step": 40867 }, { "epoch": 0.8554801976052918, "grad_norm": 0.3068656623363495, "learning_rate": 0.00016252171603241408, "loss": 11.6758, "step": 40868 }, { "epoch": 0.8555011303692539, "grad_norm": 0.2626236081123352, "learning_rate": 0.00016252000485327916, "loss": 11.6502, "step": 40869 }, { "epoch": 0.8555220631332161, "grad_norm": 0.3502982556819916, "learning_rate": 0.00016251829364408963, "loss": 11.6637, "step": 40870 }, { "epoch": 0.8555429958971783, "grad_norm": 0.5496237874031067, "learning_rate": 0.00016251658240484627, "loss": 11.6804, "step": 40871 }, { "epoch": 0.8555639286611404, "grad_norm": 0.25946158170700073, "learning_rate": 0.00016251487113554995, "loss": 11.6648, "step": 40872 }, { "epoch": 0.8555848614251026, "grad_norm": 0.2673434019088745, "learning_rate": 0.00016251315983620146, "loss": 11.6741, "step": 40873 }, { "epoch": 0.8556057941890647, "grad_norm": 0.30102047324180603, "learning_rate": 0.0001625114485068017, "loss": 11.6736, "step": 40874 }, { "epoch": 0.8556267269530269, "grad_norm": 0.38506171107292175, "learning_rate": 0.00016250973714735135, "loss": 11.6655, "step": 40875 }, { "epoch": 0.8556476597169891, "grad_norm": 0.2822480797767639, "learning_rate": 0.0001625080257578514, "loss": 11.6703, "step": 40876 }, { "epoch": 0.8556685924809512, "grad_norm": 0.3141055703163147, "learning_rate": 0.00016250631433830254, "loss": 11.6723, "step": 40877 }, { "epoch": 0.8556895252449134, "grad_norm": 0.38208478689193726, "learning_rate": 0.00016250460288870565, "loss": 11.6621, "step": 40878 }, { "epoch": 0.8557104580088755, "grad_norm": 0.3254272937774658, "learning_rate": 0.00016250289140906155, "loss": 11.6602, "step": 40879 }, { "epoch": 0.8557313907728377, "grad_norm": 0.31431064009666443, "learning_rate": 0.00016250117989937107, "loss": 11.6858, "step": 40880 }, { "epoch": 0.8557523235367998, "grad_norm": 0.31375595927238464, "learning_rate": 0.00016249946835963501, "loss": 11.6669, "step": 40881 }, { "epoch": 0.855773256300762, "grad_norm": 0.2582603693008423, "learning_rate": 0.0001624977567898542, "loss": 11.6647, "step": 40882 }, { "epoch": 0.8557941890647242, "grad_norm": 0.26178619265556335, "learning_rate": 0.00016249604519002944, "loss": 11.6738, "step": 40883 }, { "epoch": 0.8558151218286862, "grad_norm": 0.3305073380470276, "learning_rate": 0.00016249433356016165, "loss": 11.6763, "step": 40884 }, { "epoch": 0.8558360545926484, "grad_norm": 0.35863062739372253, "learning_rate": 0.00016249262190025149, "loss": 11.6859, "step": 40885 }, { "epoch": 0.8558569873566105, "grad_norm": 0.2956008315086365, "learning_rate": 0.00016249091021029995, "loss": 11.6449, "step": 40886 }, { "epoch": 0.8558779201205727, "grad_norm": 0.35646766424179077, "learning_rate": 0.00016248919849030772, "loss": 11.6615, "step": 40887 }, { "epoch": 0.8558988528845348, "grad_norm": 0.2555736005306244, "learning_rate": 0.00016248748674027575, "loss": 11.6742, "step": 40888 }, { "epoch": 0.855919785648497, "grad_norm": 0.29378199577331543, "learning_rate": 0.0001624857749602047, "loss": 11.682, "step": 40889 }, { "epoch": 0.8559407184124592, "grad_norm": 0.2875673174858093, "learning_rate": 0.00016248406315009554, "loss": 11.6808, "step": 40890 }, { "epoch": 0.8559616511764213, "grad_norm": 0.33639636635780334, "learning_rate": 0.00016248235130994907, "loss": 11.6702, "step": 40891 }, { "epoch": 0.8559825839403835, "grad_norm": 0.3754991292953491, "learning_rate": 0.00016248063943976603, "loss": 11.6507, "step": 40892 }, { "epoch": 0.8560035167043456, "grad_norm": 0.4014092981815338, "learning_rate": 0.0001624789275395473, "loss": 11.6843, "step": 40893 }, { "epoch": 0.8560244494683078, "grad_norm": 0.2771742641925812, "learning_rate": 0.00016247721560929372, "loss": 11.6831, "step": 40894 }, { "epoch": 0.85604538223227, "grad_norm": 0.3128455877304077, "learning_rate": 0.00016247550364900608, "loss": 11.6705, "step": 40895 }, { "epoch": 0.8560663149962321, "grad_norm": 0.34104102849960327, "learning_rate": 0.0001624737916586852, "loss": 11.6524, "step": 40896 }, { "epoch": 0.8560872477601943, "grad_norm": 0.24683628976345062, "learning_rate": 0.00016247207963833194, "loss": 11.6711, "step": 40897 }, { "epoch": 0.8561081805241564, "grad_norm": 0.40652668476104736, "learning_rate": 0.0001624703675879471, "loss": 11.6685, "step": 40898 }, { "epoch": 0.8561291132881186, "grad_norm": 0.42142102122306824, "learning_rate": 0.00016246865550753148, "loss": 11.6543, "step": 40899 }, { "epoch": 0.8561500460520807, "grad_norm": 0.2871125042438507, "learning_rate": 0.00016246694339708593, "loss": 11.6706, "step": 40900 }, { "epoch": 0.8561709788160429, "grad_norm": 0.3728397786617279, "learning_rate": 0.00016246523125661128, "loss": 11.68, "step": 40901 }, { "epoch": 0.8561919115800051, "grad_norm": 0.271062970161438, "learning_rate": 0.0001624635190861083, "loss": 11.668, "step": 40902 }, { "epoch": 0.8562128443439672, "grad_norm": 0.24617771804332733, "learning_rate": 0.00016246180688557794, "loss": 11.6797, "step": 40903 }, { "epoch": 0.8562337771079294, "grad_norm": 0.2620568871498108, "learning_rate": 0.00016246009465502087, "loss": 11.6637, "step": 40904 }, { "epoch": 0.8562547098718915, "grad_norm": 0.34992554783821106, "learning_rate": 0.00016245838239443798, "loss": 11.6615, "step": 40905 }, { "epoch": 0.8562756426358537, "grad_norm": 0.32127803564071655, "learning_rate": 0.00016245667010383012, "loss": 11.6702, "step": 40906 }, { "epoch": 0.8562965753998157, "grad_norm": 0.41656965017318726, "learning_rate": 0.00016245495778319808, "loss": 11.6675, "step": 40907 }, { "epoch": 0.8563175081637779, "grad_norm": 0.3143949508666992, "learning_rate": 0.0001624532454325427, "loss": 11.6845, "step": 40908 }, { "epoch": 0.8563384409277401, "grad_norm": 0.3154115378856659, "learning_rate": 0.00016245153305186478, "loss": 11.655, "step": 40909 }, { "epoch": 0.8563593736917022, "grad_norm": 0.31236955523490906, "learning_rate": 0.00016244982064116516, "loss": 11.6748, "step": 40910 }, { "epoch": 0.8563803064556644, "grad_norm": 0.3683456778526306, "learning_rate": 0.00016244810820044466, "loss": 11.6832, "step": 40911 }, { "epoch": 0.8564012392196265, "grad_norm": 0.27210482954978943, "learning_rate": 0.0001624463957297041, "loss": 11.6673, "step": 40912 }, { "epoch": 0.8564221719835887, "grad_norm": 0.3045133948326111, "learning_rate": 0.00016244468322894432, "loss": 11.6795, "step": 40913 }, { "epoch": 0.8564431047475509, "grad_norm": 0.3773040771484375, "learning_rate": 0.0001624429706981661, "loss": 11.6652, "step": 40914 }, { "epoch": 0.856464037511513, "grad_norm": 0.34904661774635315, "learning_rate": 0.0001624412581373703, "loss": 11.6576, "step": 40915 }, { "epoch": 0.8564849702754752, "grad_norm": 0.3354807496070862, "learning_rate": 0.00016243954554655775, "loss": 11.678, "step": 40916 }, { "epoch": 0.8565059030394373, "grad_norm": 0.32771894335746765, "learning_rate": 0.00016243783292572926, "loss": 11.6576, "step": 40917 }, { "epoch": 0.8565268358033995, "grad_norm": 0.3565520942211151, "learning_rate": 0.00016243612027488563, "loss": 11.6716, "step": 40918 }, { "epoch": 0.8565477685673616, "grad_norm": 0.29773128032684326, "learning_rate": 0.00016243440759402774, "loss": 11.675, "step": 40919 }, { "epoch": 0.8565687013313238, "grad_norm": 0.2928125858306885, "learning_rate": 0.00016243269488315637, "loss": 11.6604, "step": 40920 }, { "epoch": 0.856589634095286, "grad_norm": 0.2669776380062103, "learning_rate": 0.00016243098214227235, "loss": 11.6563, "step": 40921 }, { "epoch": 0.8566105668592481, "grad_norm": 0.39978885650634766, "learning_rate": 0.00016242926937137648, "loss": 11.6744, "step": 40922 }, { "epoch": 0.8566314996232103, "grad_norm": 0.42256006598472595, "learning_rate": 0.00016242755657046965, "loss": 11.6772, "step": 40923 }, { "epoch": 0.8566524323871724, "grad_norm": 0.28117215633392334, "learning_rate": 0.0001624258437395526, "loss": 11.6724, "step": 40924 }, { "epoch": 0.8566733651511346, "grad_norm": 0.3842521011829376, "learning_rate": 0.00016242413087862626, "loss": 11.6572, "step": 40925 }, { "epoch": 0.8566942979150967, "grad_norm": 0.37337926030158997, "learning_rate": 0.00016242241798769132, "loss": 11.6708, "step": 40926 }, { "epoch": 0.8567152306790589, "grad_norm": 0.35863688588142395, "learning_rate": 0.0001624207050667487, "loss": 11.6719, "step": 40927 }, { "epoch": 0.8567361634430211, "grad_norm": 0.3331589698791504, "learning_rate": 0.0001624189921157992, "loss": 11.6753, "step": 40928 }, { "epoch": 0.8567570962069831, "grad_norm": 0.27589118480682373, "learning_rate": 0.00016241727913484364, "loss": 11.6661, "step": 40929 }, { "epoch": 0.8567780289709453, "grad_norm": 0.29747387766838074, "learning_rate": 0.00016241556612388284, "loss": 11.685, "step": 40930 }, { "epoch": 0.8567989617349074, "grad_norm": 0.3596630394458771, "learning_rate": 0.0001624138530829176, "loss": 11.6585, "step": 40931 }, { "epoch": 0.8568198944988696, "grad_norm": 0.33772075176239014, "learning_rate": 0.0001624121400119488, "loss": 11.6789, "step": 40932 }, { "epoch": 0.8568408272628318, "grad_norm": 0.33985671401023865, "learning_rate": 0.00016241042691097723, "loss": 11.6758, "step": 40933 }, { "epoch": 0.8568617600267939, "grad_norm": 0.28793516755104065, "learning_rate": 0.0001624087137800037, "loss": 11.6653, "step": 40934 }, { "epoch": 0.8568826927907561, "grad_norm": 0.33226972818374634, "learning_rate": 0.0001624070006190291, "loss": 11.6706, "step": 40935 }, { "epoch": 0.8569036255547182, "grad_norm": 0.3088509440422058, "learning_rate": 0.00016240528742805416, "loss": 11.6725, "step": 40936 }, { "epoch": 0.8569245583186804, "grad_norm": 0.32984182238578796, "learning_rate": 0.00016240357420707976, "loss": 11.6668, "step": 40937 }, { "epoch": 0.8569454910826425, "grad_norm": 0.31896740198135376, "learning_rate": 0.0001624018609561067, "loss": 11.6588, "step": 40938 }, { "epoch": 0.8569664238466047, "grad_norm": 0.2806131839752197, "learning_rate": 0.00016240014767513584, "loss": 11.6684, "step": 40939 }, { "epoch": 0.8569873566105669, "grad_norm": 0.289529025554657, "learning_rate": 0.00016239843436416794, "loss": 11.6583, "step": 40940 }, { "epoch": 0.857008289374529, "grad_norm": 0.29711318016052246, "learning_rate": 0.00016239672102320388, "loss": 11.6776, "step": 40941 }, { "epoch": 0.8570292221384912, "grad_norm": 0.4212099313735962, "learning_rate": 0.0001623950076522445, "loss": 11.669, "step": 40942 }, { "epoch": 0.8570501549024533, "grad_norm": 0.31092968583106995, "learning_rate": 0.00016239329425129056, "loss": 11.6632, "step": 40943 }, { "epoch": 0.8570710876664155, "grad_norm": 0.2812863290309906, "learning_rate": 0.0001623915808203429, "loss": 11.6783, "step": 40944 }, { "epoch": 0.8570920204303776, "grad_norm": 0.3001057207584381, "learning_rate": 0.00016238986735940238, "loss": 11.6564, "step": 40945 }, { "epoch": 0.8571129531943398, "grad_norm": 0.29709920287132263, "learning_rate": 0.00016238815386846977, "loss": 11.6582, "step": 40946 }, { "epoch": 0.857133885958302, "grad_norm": 0.25816357135772705, "learning_rate": 0.00016238644034754596, "loss": 11.6677, "step": 40947 }, { "epoch": 0.8571548187222641, "grad_norm": 0.22925777733325958, "learning_rate": 0.00016238472679663173, "loss": 11.6622, "step": 40948 }, { "epoch": 0.8571757514862263, "grad_norm": 0.3292276859283447, "learning_rate": 0.00016238301321572788, "loss": 11.6739, "step": 40949 }, { "epoch": 0.8571966842501884, "grad_norm": 0.30470091104507446, "learning_rate": 0.00016238129960483531, "loss": 11.6663, "step": 40950 }, { "epoch": 0.8572176170141506, "grad_norm": 0.25718414783477783, "learning_rate": 0.00016237958596395478, "loss": 11.6735, "step": 40951 }, { "epoch": 0.8572385497781128, "grad_norm": 0.31686025857925415, "learning_rate": 0.00016237787229308713, "loss": 11.6711, "step": 40952 }, { "epoch": 0.8572594825420748, "grad_norm": 0.3872130215167999, "learning_rate": 0.0001623761585922332, "loss": 11.6674, "step": 40953 }, { "epoch": 0.857280415306037, "grad_norm": 0.3924373984336853, "learning_rate": 0.00016237444486139378, "loss": 11.6653, "step": 40954 }, { "epoch": 0.8573013480699991, "grad_norm": 0.27739113569259644, "learning_rate": 0.00016237273110056972, "loss": 11.6636, "step": 40955 }, { "epoch": 0.8573222808339613, "grad_norm": 0.2608489990234375, "learning_rate": 0.00016237101730976185, "loss": 11.6685, "step": 40956 }, { "epoch": 0.8573432135979234, "grad_norm": 0.2445397973060608, "learning_rate": 0.00016236930348897098, "loss": 11.654, "step": 40957 }, { "epoch": 0.8573641463618856, "grad_norm": 0.2728276550769806, "learning_rate": 0.00016236758963819793, "loss": 11.6503, "step": 40958 }, { "epoch": 0.8573850791258478, "grad_norm": 0.3826836347579956, "learning_rate": 0.00016236587575744351, "loss": 11.6658, "step": 40959 }, { "epoch": 0.8574060118898099, "grad_norm": 0.2998334765434265, "learning_rate": 0.00016236416184670858, "loss": 11.6769, "step": 40960 }, { "epoch": 0.8574269446537721, "grad_norm": 0.26523250341415405, "learning_rate": 0.00016236244790599394, "loss": 11.6823, "step": 40961 }, { "epoch": 0.8574478774177342, "grad_norm": 0.26085659861564636, "learning_rate": 0.00016236073393530046, "loss": 11.657, "step": 40962 }, { "epoch": 0.8574688101816964, "grad_norm": 0.27837297320365906, "learning_rate": 0.0001623590199346289, "loss": 11.6882, "step": 40963 }, { "epoch": 0.8574897429456585, "grad_norm": 0.21938461065292358, "learning_rate": 0.00016235730590398012, "loss": 11.6723, "step": 40964 }, { "epoch": 0.8575106757096207, "grad_norm": 0.2894669771194458, "learning_rate": 0.00016235559184335488, "loss": 11.67, "step": 40965 }, { "epoch": 0.8575316084735829, "grad_norm": 0.31171682476997375, "learning_rate": 0.0001623538777527541, "loss": 11.6621, "step": 40966 }, { "epoch": 0.857552541237545, "grad_norm": 0.30696818232536316, "learning_rate": 0.00016235216363217857, "loss": 11.6714, "step": 40967 }, { "epoch": 0.8575734740015072, "grad_norm": 0.30500614643096924, "learning_rate": 0.0001623504494816291, "loss": 11.6606, "step": 40968 }, { "epoch": 0.8575944067654693, "grad_norm": 0.26718348264694214, "learning_rate": 0.00016234873530110653, "loss": 11.6589, "step": 40969 }, { "epoch": 0.8576153395294315, "grad_norm": 0.4005269706249237, "learning_rate": 0.00016234702109061165, "loss": 11.6806, "step": 40970 }, { "epoch": 0.8576362722933936, "grad_norm": 0.2992355227470398, "learning_rate": 0.0001623453068501453, "loss": 11.6639, "step": 40971 }, { "epoch": 0.8576572050573558, "grad_norm": 0.34127604961395264, "learning_rate": 0.00016234359257970833, "loss": 11.6684, "step": 40972 }, { "epoch": 0.857678137821318, "grad_norm": 0.3142976462841034, "learning_rate": 0.00016234187827930154, "loss": 11.6664, "step": 40973 }, { "epoch": 0.85769907058528, "grad_norm": 0.29405224323272705, "learning_rate": 0.00016234016394892575, "loss": 11.6405, "step": 40974 }, { "epoch": 0.8577200033492423, "grad_norm": 0.32147371768951416, "learning_rate": 0.00016233844958858182, "loss": 11.6666, "step": 40975 }, { "epoch": 0.8577409361132043, "grad_norm": 0.37083539366722107, "learning_rate": 0.00016233673519827052, "loss": 11.6779, "step": 40976 }, { "epoch": 0.8577618688771665, "grad_norm": 0.3108684718608856, "learning_rate": 0.00016233502077799274, "loss": 11.6834, "step": 40977 }, { "epoch": 0.8577828016411287, "grad_norm": 0.3407093286514282, "learning_rate": 0.00016233330632774924, "loss": 11.6796, "step": 40978 }, { "epoch": 0.8578037344050908, "grad_norm": 0.2851909101009369, "learning_rate": 0.00016233159184754085, "loss": 11.6617, "step": 40979 }, { "epoch": 0.857824667169053, "grad_norm": 0.2958540916442871, "learning_rate": 0.00016232987733736843, "loss": 11.6629, "step": 40980 }, { "epoch": 0.8578455999330151, "grad_norm": 0.44406983256340027, "learning_rate": 0.00016232816279723283, "loss": 11.66, "step": 40981 }, { "epoch": 0.8578665326969773, "grad_norm": 0.2757442593574524, "learning_rate": 0.0001623264482271348, "loss": 11.6669, "step": 40982 }, { "epoch": 0.8578874654609394, "grad_norm": 0.3893777132034302, "learning_rate": 0.0001623247336270752, "loss": 11.6595, "step": 40983 }, { "epoch": 0.8579083982249016, "grad_norm": 0.3081628084182739, "learning_rate": 0.00016232301899705482, "loss": 11.6657, "step": 40984 }, { "epoch": 0.8579293309888638, "grad_norm": 0.24697256088256836, "learning_rate": 0.00016232130433707455, "loss": 11.6816, "step": 40985 }, { "epoch": 0.8579502637528259, "grad_norm": 0.3199857175350189, "learning_rate": 0.00016231958964713517, "loss": 11.6635, "step": 40986 }, { "epoch": 0.8579711965167881, "grad_norm": 0.3200927972793579, "learning_rate": 0.00016231787492723752, "loss": 11.6759, "step": 40987 }, { "epoch": 0.8579921292807502, "grad_norm": 0.2712458670139313, "learning_rate": 0.00016231616017738242, "loss": 11.666, "step": 40988 }, { "epoch": 0.8580130620447124, "grad_norm": 0.3199094235897064, "learning_rate": 0.0001623144453975707, "loss": 11.6569, "step": 40989 }, { "epoch": 0.8580339948086745, "grad_norm": 0.3768623471260071, "learning_rate": 0.00016231273058780316, "loss": 11.6817, "step": 40990 }, { "epoch": 0.8580549275726367, "grad_norm": 0.42475956678390503, "learning_rate": 0.00016231101574808068, "loss": 11.6761, "step": 40991 }, { "epoch": 0.8580758603365989, "grad_norm": 0.2834028899669647, "learning_rate": 0.000162309300878404, "loss": 11.6716, "step": 40992 }, { "epoch": 0.858096793100561, "grad_norm": 0.31413865089416504, "learning_rate": 0.00016230758597877398, "loss": 11.6484, "step": 40993 }, { "epoch": 0.8581177258645232, "grad_norm": 0.2804441452026367, "learning_rate": 0.0001623058710491915, "loss": 11.6763, "step": 40994 }, { "epoch": 0.8581386586284853, "grad_norm": 0.33080294728279114, "learning_rate": 0.0001623041560896573, "loss": 11.6667, "step": 40995 }, { "epoch": 0.8581595913924475, "grad_norm": 0.2914782464504242, "learning_rate": 0.00016230244110017228, "loss": 11.6818, "step": 40996 }, { "epoch": 0.8581805241564097, "grad_norm": 0.31533509492874146, "learning_rate": 0.00016230072608073722, "loss": 11.6589, "step": 40997 }, { "epoch": 0.8582014569203718, "grad_norm": 0.4127964973449707, "learning_rate": 0.0001622990110313529, "loss": 11.6971, "step": 40998 }, { "epoch": 0.858222389684334, "grad_norm": 0.3013831079006195, "learning_rate": 0.0001622972959520203, "loss": 11.664, "step": 40999 }, { "epoch": 0.858243322448296, "grad_norm": 0.33711007237434387, "learning_rate": 0.00016229558084274003, "loss": 11.6707, "step": 41000 }, { "epoch": 0.858243322448296, "eval_loss": 11.669546127319336, "eval_runtime": 34.3256, "eval_samples_per_second": 27.997, "eval_steps_per_second": 7.021, "step": 41000 }, { "epoch": 0.8582642552122582, "grad_norm": 0.3188372850418091, "learning_rate": 0.0001622938657035131, "loss": 11.6678, "step": 41001 }, { "epoch": 0.8582851879762203, "grad_norm": 0.264428973197937, "learning_rate": 0.00016229215053434023, "loss": 11.6661, "step": 41002 }, { "epoch": 0.8583061207401825, "grad_norm": 0.34162476658821106, "learning_rate": 0.0001622904353352223, "loss": 11.6606, "step": 41003 }, { "epoch": 0.8583270535041447, "grad_norm": 0.3365251421928406, "learning_rate": 0.00016228872010616007, "loss": 11.664, "step": 41004 }, { "epoch": 0.8583479862681068, "grad_norm": 0.31294679641723633, "learning_rate": 0.00016228700484715444, "loss": 11.6669, "step": 41005 }, { "epoch": 0.858368919032069, "grad_norm": 0.3071872293949127, "learning_rate": 0.00016228528955820618, "loss": 11.654, "step": 41006 }, { "epoch": 0.8583898517960311, "grad_norm": 0.278565913438797, "learning_rate": 0.00016228357423931614, "loss": 11.6685, "step": 41007 }, { "epoch": 0.8584107845599933, "grad_norm": 0.30654481053352356, "learning_rate": 0.00016228185889048511, "loss": 11.6622, "step": 41008 }, { "epoch": 0.8584317173239554, "grad_norm": 0.35772866010665894, "learning_rate": 0.00016228014351171399, "loss": 11.6584, "step": 41009 }, { "epoch": 0.8584526500879176, "grad_norm": 0.2834759056568146, "learning_rate": 0.00016227842810300353, "loss": 11.6815, "step": 41010 }, { "epoch": 0.8584735828518798, "grad_norm": 0.29158270359039307, "learning_rate": 0.0001622767126643546, "loss": 11.6781, "step": 41011 }, { "epoch": 0.8584945156158419, "grad_norm": 0.45476990938186646, "learning_rate": 0.000162274997195768, "loss": 11.6645, "step": 41012 }, { "epoch": 0.8585154483798041, "grad_norm": 0.2910623252391815, "learning_rate": 0.00016227328169724455, "loss": 11.6569, "step": 41013 }, { "epoch": 0.8585363811437662, "grad_norm": 0.2640611529350281, "learning_rate": 0.00016227156616878508, "loss": 11.667, "step": 41014 }, { "epoch": 0.8585573139077284, "grad_norm": 0.3669596314430237, "learning_rate": 0.0001622698506103904, "loss": 11.6824, "step": 41015 }, { "epoch": 0.8585782466716906, "grad_norm": 0.3230118751525879, "learning_rate": 0.0001622681350220614, "loss": 11.6737, "step": 41016 }, { "epoch": 0.8585991794356527, "grad_norm": 0.3129214942455292, "learning_rate": 0.00016226641940379885, "loss": 11.672, "step": 41017 }, { "epoch": 0.8586201121996149, "grad_norm": 0.5163232684135437, "learning_rate": 0.0001622647037556036, "loss": 11.644, "step": 41018 }, { "epoch": 0.858641044963577, "grad_norm": 0.3454103171825409, "learning_rate": 0.00016226298807747643, "loss": 11.6836, "step": 41019 }, { "epoch": 0.8586619777275392, "grad_norm": 0.3888654410839081, "learning_rate": 0.00016226127236941817, "loss": 11.6852, "step": 41020 }, { "epoch": 0.8586829104915013, "grad_norm": 0.353823184967041, "learning_rate": 0.00016225955663142974, "loss": 11.7034, "step": 41021 }, { "epoch": 0.8587038432554635, "grad_norm": 0.3273177444934845, "learning_rate": 0.0001622578408635118, "loss": 11.6686, "step": 41022 }, { "epoch": 0.8587247760194257, "grad_norm": 0.3064080774784088, "learning_rate": 0.00016225612506566534, "loss": 11.664, "step": 41023 }, { "epoch": 0.8587457087833877, "grad_norm": 0.39088088274002075, "learning_rate": 0.0001622544092378911, "loss": 11.674, "step": 41024 }, { "epoch": 0.8587666415473499, "grad_norm": 0.2715821862220764, "learning_rate": 0.00016225269338018993, "loss": 11.6672, "step": 41025 }, { "epoch": 0.858787574311312, "grad_norm": 0.3917746841907501, "learning_rate": 0.00016225097749256263, "loss": 11.6888, "step": 41026 }, { "epoch": 0.8588085070752742, "grad_norm": 0.29930803179740906, "learning_rate": 0.00016224926157501002, "loss": 11.6817, "step": 41027 }, { "epoch": 0.8588294398392363, "grad_norm": 0.31539034843444824, "learning_rate": 0.00016224754562753295, "loss": 11.6702, "step": 41028 }, { "epoch": 0.8588503726031985, "grad_norm": 0.3302721679210663, "learning_rate": 0.00016224582965013225, "loss": 11.6801, "step": 41029 }, { "epoch": 0.8588713053671607, "grad_norm": 0.3182383179664612, "learning_rate": 0.00016224411364280873, "loss": 11.6521, "step": 41030 }, { "epoch": 0.8588922381311228, "grad_norm": 0.3226138949394226, "learning_rate": 0.0001622423976055632, "loss": 11.6673, "step": 41031 }, { "epoch": 0.858913170895085, "grad_norm": 0.27932673692703247, "learning_rate": 0.0001622406815383965, "loss": 11.665, "step": 41032 }, { "epoch": 0.8589341036590471, "grad_norm": 0.2825419008731842, "learning_rate": 0.00016223896544130949, "loss": 11.6798, "step": 41033 }, { "epoch": 0.8589550364230093, "grad_norm": 0.33767274022102356, "learning_rate": 0.00016223724931430293, "loss": 11.6658, "step": 41034 }, { "epoch": 0.8589759691869715, "grad_norm": 0.32705312967300415, "learning_rate": 0.0001622355331573777, "loss": 11.6726, "step": 41035 }, { "epoch": 0.8589969019509336, "grad_norm": 0.3161049783229828, "learning_rate": 0.0001622338169705346, "loss": 11.6642, "step": 41036 }, { "epoch": 0.8590178347148958, "grad_norm": 0.2947133183479309, "learning_rate": 0.0001622321007537744, "loss": 11.6727, "step": 41037 }, { "epoch": 0.8590387674788579, "grad_norm": 0.3367173373699188, "learning_rate": 0.00016223038450709806, "loss": 11.675, "step": 41038 }, { "epoch": 0.8590597002428201, "grad_norm": 0.3295053541660309, "learning_rate": 0.00016222866823050632, "loss": 11.6739, "step": 41039 }, { "epoch": 0.8590806330067822, "grad_norm": 0.3817940354347229, "learning_rate": 0.00016222695192399998, "loss": 11.6587, "step": 41040 }, { "epoch": 0.8591015657707444, "grad_norm": 0.3287106454372406, "learning_rate": 0.0001622252355875799, "loss": 11.6735, "step": 41041 }, { "epoch": 0.8591224985347066, "grad_norm": 0.3649264872074127, "learning_rate": 0.0001622235192212469, "loss": 11.6571, "step": 41042 }, { "epoch": 0.8591434312986687, "grad_norm": 0.2599582076072693, "learning_rate": 0.0001622218028250018, "loss": 11.6825, "step": 41043 }, { "epoch": 0.8591643640626309, "grad_norm": 0.34425556659698486, "learning_rate": 0.00016222008639884546, "loss": 11.6737, "step": 41044 }, { "epoch": 0.859185296826593, "grad_norm": 0.26004841923713684, "learning_rate": 0.0001622183699427787, "loss": 11.6607, "step": 41045 }, { "epoch": 0.8592062295905551, "grad_norm": 0.3906795382499695, "learning_rate": 0.00016221665345680228, "loss": 11.6663, "step": 41046 }, { "epoch": 0.8592271623545172, "grad_norm": 0.3720709979534149, "learning_rate": 0.00016221493694091707, "loss": 11.6605, "step": 41047 }, { "epoch": 0.8592480951184794, "grad_norm": 0.3345790505409241, "learning_rate": 0.0001622132203951239, "loss": 11.6643, "step": 41048 }, { "epoch": 0.8592690278824416, "grad_norm": 0.3306848704814911, "learning_rate": 0.0001622115038194236, "loss": 11.681, "step": 41049 }, { "epoch": 0.8592899606464037, "grad_norm": 0.30152907967567444, "learning_rate": 0.00016220978721381697, "loss": 11.6748, "step": 41050 }, { "epoch": 0.8593108934103659, "grad_norm": 0.4344002902507782, "learning_rate": 0.00016220807057830484, "loss": 11.6728, "step": 41051 }, { "epoch": 0.859331826174328, "grad_norm": 0.30848461389541626, "learning_rate": 0.00016220635391288806, "loss": 11.6716, "step": 41052 }, { "epoch": 0.8593527589382902, "grad_norm": 0.4559133052825928, "learning_rate": 0.00016220463721756742, "loss": 11.6903, "step": 41053 }, { "epoch": 0.8593736917022524, "grad_norm": 0.32482898235321045, "learning_rate": 0.00016220292049234377, "loss": 11.6707, "step": 41054 }, { "epoch": 0.8593946244662145, "grad_norm": 0.3875803053379059, "learning_rate": 0.00016220120373721793, "loss": 11.6902, "step": 41055 }, { "epoch": 0.8594155572301767, "grad_norm": 0.29012995958328247, "learning_rate": 0.00016219948695219073, "loss": 11.6667, "step": 41056 }, { "epoch": 0.8594364899941388, "grad_norm": 0.31756263971328735, "learning_rate": 0.000162197770137263, "loss": 11.691, "step": 41057 }, { "epoch": 0.859457422758101, "grad_norm": 0.2965816557407379, "learning_rate": 0.00016219605329243555, "loss": 11.6695, "step": 41058 }, { "epoch": 0.8594783555220631, "grad_norm": 0.33017319440841675, "learning_rate": 0.00016219433641770923, "loss": 11.6704, "step": 41059 }, { "epoch": 0.8594992882860253, "grad_norm": 0.3082919120788574, "learning_rate": 0.00016219261951308482, "loss": 11.6613, "step": 41060 }, { "epoch": 0.8595202210499875, "grad_norm": 0.2943017780780792, "learning_rate": 0.00016219090257856315, "loss": 11.6736, "step": 41061 }, { "epoch": 0.8595411538139496, "grad_norm": 0.3221525251865387, "learning_rate": 0.00016218918561414512, "loss": 11.6528, "step": 41062 }, { "epoch": 0.8595620865779118, "grad_norm": 0.30727681517601013, "learning_rate": 0.00016218746861983147, "loss": 11.6704, "step": 41063 }, { "epoch": 0.8595830193418739, "grad_norm": 0.33712878823280334, "learning_rate": 0.00016218575159562305, "loss": 11.669, "step": 41064 }, { "epoch": 0.8596039521058361, "grad_norm": 0.30482566356658936, "learning_rate": 0.00016218403454152072, "loss": 11.6902, "step": 41065 }, { "epoch": 0.8596248848697982, "grad_norm": 0.31694328784942627, "learning_rate": 0.00016218231745752525, "loss": 11.6668, "step": 41066 }, { "epoch": 0.8596458176337604, "grad_norm": 0.31262901425361633, "learning_rate": 0.0001621806003436375, "loss": 11.6864, "step": 41067 }, { "epoch": 0.8596667503977226, "grad_norm": 0.34582948684692383, "learning_rate": 0.0001621788831998583, "loss": 11.6646, "step": 41068 }, { "epoch": 0.8596876831616846, "grad_norm": 0.28057965636253357, "learning_rate": 0.0001621771660261885, "loss": 11.6648, "step": 41069 }, { "epoch": 0.8597086159256468, "grad_norm": 0.32587194442749023, "learning_rate": 0.00016217544882262883, "loss": 11.6579, "step": 41070 }, { "epoch": 0.8597295486896089, "grad_norm": 0.28191882371902466, "learning_rate": 0.0001621737315891802, "loss": 11.6623, "step": 41071 }, { "epoch": 0.8597504814535711, "grad_norm": 0.395725280046463, "learning_rate": 0.0001621720143258434, "loss": 11.6584, "step": 41072 }, { "epoch": 0.8597714142175333, "grad_norm": 0.2653725743293762, "learning_rate": 0.00016217029703261929, "loss": 11.6841, "step": 41073 }, { "epoch": 0.8597923469814954, "grad_norm": 0.33635514974594116, "learning_rate": 0.00016216857970950864, "loss": 11.6533, "step": 41074 }, { "epoch": 0.8598132797454576, "grad_norm": 0.4301346242427826, "learning_rate": 0.00016216686235651234, "loss": 11.657, "step": 41075 }, { "epoch": 0.8598342125094197, "grad_norm": 0.23097285628318787, "learning_rate": 0.00016216514497363117, "loss": 11.6659, "step": 41076 }, { "epoch": 0.8598551452733819, "grad_norm": 0.4070819914340973, "learning_rate": 0.00016216342756086597, "loss": 11.6706, "step": 41077 }, { "epoch": 0.859876078037344, "grad_norm": 0.25786349177360535, "learning_rate": 0.00016216171011821756, "loss": 11.6573, "step": 41078 }, { "epoch": 0.8598970108013062, "grad_norm": 0.26934462785720825, "learning_rate": 0.00016215999264568678, "loss": 11.6724, "step": 41079 }, { "epoch": 0.8599179435652684, "grad_norm": 0.3263617157936096, "learning_rate": 0.00016215827514327443, "loss": 11.6775, "step": 41080 }, { "epoch": 0.8599388763292305, "grad_norm": 0.2926449179649353, "learning_rate": 0.00016215655761098137, "loss": 11.6721, "step": 41081 }, { "epoch": 0.8599598090931927, "grad_norm": 0.2791154980659485, "learning_rate": 0.0001621548400488084, "loss": 11.663, "step": 41082 }, { "epoch": 0.8599807418571548, "grad_norm": 0.3889442980289459, "learning_rate": 0.00016215312245675633, "loss": 11.6783, "step": 41083 }, { "epoch": 0.860001674621117, "grad_norm": 0.3383540213108063, "learning_rate": 0.00016215140483482604, "loss": 11.6954, "step": 41084 }, { "epoch": 0.8600226073850791, "grad_norm": 0.383095383644104, "learning_rate": 0.0001621496871830183, "loss": 11.6753, "step": 41085 }, { "epoch": 0.8600435401490413, "grad_norm": 0.33579882979393005, "learning_rate": 0.00016214796950133397, "loss": 11.6503, "step": 41086 }, { "epoch": 0.8600644729130035, "grad_norm": 0.30240321159362793, "learning_rate": 0.00016214625178977387, "loss": 11.6685, "step": 41087 }, { "epoch": 0.8600854056769656, "grad_norm": 0.3321729600429535, "learning_rate": 0.00016214453404833883, "loss": 11.6471, "step": 41088 }, { "epoch": 0.8601063384409278, "grad_norm": 0.31686681509017944, "learning_rate": 0.00016214281627702966, "loss": 11.6646, "step": 41089 }, { "epoch": 0.8601272712048899, "grad_norm": 0.25236669182777405, "learning_rate": 0.00016214109847584716, "loss": 11.6822, "step": 41090 }, { "epoch": 0.860148203968852, "grad_norm": 0.333909809589386, "learning_rate": 0.00016213938064479227, "loss": 11.6538, "step": 41091 }, { "epoch": 0.8601691367328143, "grad_norm": 0.31390348076820374, "learning_rate": 0.00016213766278386566, "loss": 11.6718, "step": 41092 }, { "epoch": 0.8601900694967763, "grad_norm": 0.35043853521347046, "learning_rate": 0.00016213594489306824, "loss": 11.6695, "step": 41093 }, { "epoch": 0.8602110022607385, "grad_norm": 0.3319718539714813, "learning_rate": 0.00016213422697240087, "loss": 11.676, "step": 41094 }, { "epoch": 0.8602319350247006, "grad_norm": 0.37921103835105896, "learning_rate": 0.00016213250902186426, "loss": 11.6806, "step": 41095 }, { "epoch": 0.8602528677886628, "grad_norm": 0.3264158368110657, "learning_rate": 0.00016213079104145938, "loss": 11.6775, "step": 41096 }, { "epoch": 0.8602738005526249, "grad_norm": 0.3485931158065796, "learning_rate": 0.00016212907303118695, "loss": 11.6614, "step": 41097 }, { "epoch": 0.8602947333165871, "grad_norm": 0.2674497663974762, "learning_rate": 0.00016212735499104782, "loss": 11.6782, "step": 41098 }, { "epoch": 0.8603156660805493, "grad_norm": 0.33933889865875244, "learning_rate": 0.00016212563692104284, "loss": 11.6633, "step": 41099 }, { "epoch": 0.8603365988445114, "grad_norm": 0.34734633564949036, "learning_rate": 0.0001621239188211728, "loss": 11.674, "step": 41100 }, { "epoch": 0.8603575316084736, "grad_norm": 0.2780756950378418, "learning_rate": 0.0001621222006914386, "loss": 11.6817, "step": 41101 }, { "epoch": 0.8603784643724357, "grad_norm": 0.31366896629333496, "learning_rate": 0.00016212048253184098, "loss": 11.6573, "step": 41102 }, { "epoch": 0.8603993971363979, "grad_norm": 0.27163878083229065, "learning_rate": 0.00016211876434238078, "loss": 11.6698, "step": 41103 }, { "epoch": 0.86042032990036, "grad_norm": 0.3669084310531616, "learning_rate": 0.00016211704612305886, "loss": 11.6984, "step": 41104 }, { "epoch": 0.8604412626643222, "grad_norm": 0.29720523953437805, "learning_rate": 0.00016211532787387603, "loss": 11.6547, "step": 41105 }, { "epoch": 0.8604621954282844, "grad_norm": 0.3453606367111206, "learning_rate": 0.00016211360959483316, "loss": 11.6594, "step": 41106 }, { "epoch": 0.8604831281922465, "grad_norm": 0.35128188133239746, "learning_rate": 0.00016211189128593096, "loss": 11.6622, "step": 41107 }, { "epoch": 0.8605040609562087, "grad_norm": 0.3225110173225403, "learning_rate": 0.00016211017294717037, "loss": 11.6657, "step": 41108 }, { "epoch": 0.8605249937201708, "grad_norm": 0.25790685415267944, "learning_rate": 0.00016210845457855218, "loss": 11.6713, "step": 41109 }, { "epoch": 0.860545926484133, "grad_norm": 0.26766476035118103, "learning_rate": 0.00016210673618007718, "loss": 11.6558, "step": 41110 }, { "epoch": 0.8605668592480952, "grad_norm": 0.3310876786708832, "learning_rate": 0.00016210501775174623, "loss": 11.6745, "step": 41111 }, { "epoch": 0.8605877920120573, "grad_norm": 0.3026551306247711, "learning_rate": 0.00016210329929356018, "loss": 11.6809, "step": 41112 }, { "epoch": 0.8606087247760195, "grad_norm": 0.34707626700401306, "learning_rate": 0.00016210158080551986, "loss": 11.6922, "step": 41113 }, { "epoch": 0.8606296575399816, "grad_norm": 0.30122190713882446, "learning_rate": 0.000162099862287626, "loss": 11.6583, "step": 41114 }, { "epoch": 0.8606505903039438, "grad_norm": 0.2293182909488678, "learning_rate": 0.00016209814373987952, "loss": 11.6691, "step": 41115 }, { "epoch": 0.8606715230679058, "grad_norm": 0.29965126514434814, "learning_rate": 0.00016209642516228122, "loss": 11.6769, "step": 41116 }, { "epoch": 0.860692455831868, "grad_norm": 0.302500456571579, "learning_rate": 0.0001620947065548319, "loss": 11.6613, "step": 41117 }, { "epoch": 0.8607133885958302, "grad_norm": 0.28166478872299194, "learning_rate": 0.00016209298791753244, "loss": 11.668, "step": 41118 }, { "epoch": 0.8607343213597923, "grad_norm": 0.2887320816516876, "learning_rate": 0.00016209126925038363, "loss": 11.663, "step": 41119 }, { "epoch": 0.8607552541237545, "grad_norm": 0.33148905634880066, "learning_rate": 0.00016208955055338627, "loss": 11.6623, "step": 41120 }, { "epoch": 0.8607761868877166, "grad_norm": 0.3205379545688629, "learning_rate": 0.0001620878318265413, "loss": 11.6765, "step": 41121 }, { "epoch": 0.8607971196516788, "grad_norm": 0.3754299581050873, "learning_rate": 0.0001620861130698494, "loss": 11.6724, "step": 41122 }, { "epoch": 0.8608180524156409, "grad_norm": 0.3877177834510803, "learning_rate": 0.00016208439428331146, "loss": 11.6794, "step": 41123 }, { "epoch": 0.8608389851796031, "grad_norm": 0.310311496257782, "learning_rate": 0.00016208267546692832, "loss": 11.6715, "step": 41124 }, { "epoch": 0.8608599179435653, "grad_norm": 0.32647642493247986, "learning_rate": 0.0001620809566207008, "loss": 11.6744, "step": 41125 }, { "epoch": 0.8608808507075274, "grad_norm": 0.2846166789531708, "learning_rate": 0.0001620792377446297, "loss": 11.6724, "step": 41126 }, { "epoch": 0.8609017834714896, "grad_norm": 0.3563547134399414, "learning_rate": 0.0001620775188387159, "loss": 11.6667, "step": 41127 }, { "epoch": 0.8609227162354517, "grad_norm": 0.33721309900283813, "learning_rate": 0.00016207579990296016, "loss": 11.6801, "step": 41128 }, { "epoch": 0.8609436489994139, "grad_norm": 0.298977792263031, "learning_rate": 0.00016207408093736337, "loss": 11.6619, "step": 41129 }, { "epoch": 0.8609645817633761, "grad_norm": 0.24241551756858826, "learning_rate": 0.0001620723619419263, "loss": 11.6553, "step": 41130 }, { "epoch": 0.8609855145273382, "grad_norm": 0.25271502137184143, "learning_rate": 0.00016207064291664982, "loss": 11.6733, "step": 41131 }, { "epoch": 0.8610064472913004, "grad_norm": 0.4681752324104309, "learning_rate": 0.0001620689238615347, "loss": 11.6526, "step": 41132 }, { "epoch": 0.8610273800552625, "grad_norm": 0.3036980628967285, "learning_rate": 0.00016206720477658185, "loss": 11.6631, "step": 41133 }, { "epoch": 0.8610483128192247, "grad_norm": 0.2729778289794922, "learning_rate": 0.00016206548566179203, "loss": 11.6886, "step": 41134 }, { "epoch": 0.8610692455831868, "grad_norm": 0.4305451214313507, "learning_rate": 0.0001620637665171661, "loss": 11.6859, "step": 41135 }, { "epoch": 0.861090178347149, "grad_norm": 0.3084641993045807, "learning_rate": 0.00016206204734270485, "loss": 11.6696, "step": 41136 }, { "epoch": 0.8611111111111112, "grad_norm": 0.33649513125419617, "learning_rate": 0.00016206032813840913, "loss": 11.6471, "step": 41137 }, { "epoch": 0.8611320438750732, "grad_norm": 0.3244655430316925, "learning_rate": 0.0001620586089042798, "loss": 11.6765, "step": 41138 }, { "epoch": 0.8611529766390355, "grad_norm": 0.2556741237640381, "learning_rate": 0.00016205688964031763, "loss": 11.647, "step": 41139 }, { "epoch": 0.8611739094029975, "grad_norm": 0.2774287760257721, "learning_rate": 0.0001620551703465235, "loss": 11.6574, "step": 41140 }, { "epoch": 0.8611948421669597, "grad_norm": 0.30436766147613525, "learning_rate": 0.00016205345102289815, "loss": 11.6737, "step": 41141 }, { "epoch": 0.8612157749309218, "grad_norm": 0.3037009537220001, "learning_rate": 0.0001620517316694425, "loss": 11.664, "step": 41142 }, { "epoch": 0.861236707694884, "grad_norm": 0.41421377658843994, "learning_rate": 0.00016205001228615732, "loss": 11.6786, "step": 41143 }, { "epoch": 0.8612576404588462, "grad_norm": 0.31495535373687744, "learning_rate": 0.00016204829287304345, "loss": 11.6808, "step": 41144 }, { "epoch": 0.8612785732228083, "grad_norm": 0.38816869258880615, "learning_rate": 0.00016204657343010177, "loss": 11.676, "step": 41145 }, { "epoch": 0.8612995059867705, "grad_norm": 0.34768661856651306, "learning_rate": 0.000162044853957333, "loss": 11.6757, "step": 41146 }, { "epoch": 0.8613204387507326, "grad_norm": 0.3730458617210388, "learning_rate": 0.00016204313445473804, "loss": 11.6686, "step": 41147 }, { "epoch": 0.8613413715146948, "grad_norm": 0.34138378500938416, "learning_rate": 0.00016204141492231775, "loss": 11.6751, "step": 41148 }, { "epoch": 0.8613623042786569, "grad_norm": 0.29230833053588867, "learning_rate": 0.00016203969536007285, "loss": 11.6645, "step": 41149 }, { "epoch": 0.8613832370426191, "grad_norm": 0.3026735484600067, "learning_rate": 0.00016203797576800426, "loss": 11.6764, "step": 41150 }, { "epoch": 0.8614041698065813, "grad_norm": 0.3139279782772064, "learning_rate": 0.00016203625614611276, "loss": 11.6742, "step": 41151 }, { "epoch": 0.8614251025705434, "grad_norm": 0.33529916405677795, "learning_rate": 0.0001620345364943992, "loss": 11.6571, "step": 41152 }, { "epoch": 0.8614460353345056, "grad_norm": 0.3231441080570221, "learning_rate": 0.00016203281681286439, "loss": 11.6883, "step": 41153 }, { "epoch": 0.8614669680984677, "grad_norm": 0.2991492450237274, "learning_rate": 0.00016203109710150913, "loss": 11.6525, "step": 41154 }, { "epoch": 0.8614879008624299, "grad_norm": 0.23520450294017792, "learning_rate": 0.0001620293773603343, "loss": 11.6473, "step": 41155 }, { "epoch": 0.8615088336263921, "grad_norm": 0.29410895705223083, "learning_rate": 0.00016202765758934073, "loss": 11.6928, "step": 41156 }, { "epoch": 0.8615297663903542, "grad_norm": 0.33283278346061707, "learning_rate": 0.0001620259377885292, "loss": 11.6669, "step": 41157 }, { "epoch": 0.8615506991543164, "grad_norm": 0.304402232170105, "learning_rate": 0.00016202421795790054, "loss": 11.6637, "step": 41158 }, { "epoch": 0.8615716319182785, "grad_norm": 0.28510555624961853, "learning_rate": 0.00016202249809745562, "loss": 11.6667, "step": 41159 }, { "epoch": 0.8615925646822407, "grad_norm": 0.33873194456100464, "learning_rate": 0.00016202077820719524, "loss": 11.6624, "step": 41160 }, { "epoch": 0.8616134974462027, "grad_norm": 0.2930690050125122, "learning_rate": 0.0001620190582871202, "loss": 11.6649, "step": 41161 }, { "epoch": 0.861634430210165, "grad_norm": 0.28103935718536377, "learning_rate": 0.00016201733833723142, "loss": 11.6724, "step": 41162 }, { "epoch": 0.8616553629741271, "grad_norm": 0.32203444838523865, "learning_rate": 0.0001620156183575296, "loss": 11.6924, "step": 41163 }, { "epoch": 0.8616762957380892, "grad_norm": 0.25758805871009827, "learning_rate": 0.00016201389834801566, "loss": 11.6686, "step": 41164 }, { "epoch": 0.8616972285020514, "grad_norm": 0.32359519600868225, "learning_rate": 0.0001620121783086904, "loss": 11.6717, "step": 41165 }, { "epoch": 0.8617181612660135, "grad_norm": 0.3764362931251526, "learning_rate": 0.00016201045823955462, "loss": 11.6698, "step": 41166 }, { "epoch": 0.8617390940299757, "grad_norm": 0.37075191736221313, "learning_rate": 0.0001620087381406092, "loss": 11.6787, "step": 41167 }, { "epoch": 0.8617600267939378, "grad_norm": 0.31370291113853455, "learning_rate": 0.00016200701801185493, "loss": 11.6745, "step": 41168 }, { "epoch": 0.8617809595579, "grad_norm": 0.251416951417923, "learning_rate": 0.00016200529785329263, "loss": 11.6613, "step": 41169 }, { "epoch": 0.8618018923218622, "grad_norm": 0.2748948037624359, "learning_rate": 0.00016200357766492314, "loss": 11.6778, "step": 41170 }, { "epoch": 0.8618228250858243, "grad_norm": 0.29078754782676697, "learning_rate": 0.00016200185744674728, "loss": 11.6534, "step": 41171 }, { "epoch": 0.8618437578497865, "grad_norm": 0.2827149033546448, "learning_rate": 0.00016200013719876592, "loss": 11.6738, "step": 41172 }, { "epoch": 0.8618646906137486, "grad_norm": 0.3044454753398895, "learning_rate": 0.0001619984169209798, "loss": 11.6608, "step": 41173 }, { "epoch": 0.8618856233777108, "grad_norm": 0.2965928912162781, "learning_rate": 0.00016199669661338983, "loss": 11.6565, "step": 41174 }, { "epoch": 0.861906556141673, "grad_norm": 0.2820483148097992, "learning_rate": 0.00016199497627599683, "loss": 11.6643, "step": 41175 }, { "epoch": 0.8619274889056351, "grad_norm": 0.3077366054058075, "learning_rate": 0.00016199325590880153, "loss": 11.6637, "step": 41176 }, { "epoch": 0.8619484216695973, "grad_norm": 0.33939918875694275, "learning_rate": 0.0001619915355118049, "loss": 11.6596, "step": 41177 }, { "epoch": 0.8619693544335594, "grad_norm": 0.3390738368034363, "learning_rate": 0.00016198981508500765, "loss": 11.6701, "step": 41178 }, { "epoch": 0.8619902871975216, "grad_norm": 0.33184751868247986, "learning_rate": 0.0001619880946284107, "loss": 11.6901, "step": 41179 }, { "epoch": 0.8620112199614837, "grad_norm": 0.34371182322502136, "learning_rate": 0.00016198637414201482, "loss": 11.6713, "step": 41180 }, { "epoch": 0.8620321527254459, "grad_norm": 0.31102973222732544, "learning_rate": 0.0001619846536258208, "loss": 11.6692, "step": 41181 }, { "epoch": 0.8620530854894081, "grad_norm": 0.3043740391731262, "learning_rate": 0.00016198293307982957, "loss": 11.6669, "step": 41182 }, { "epoch": 0.8620740182533702, "grad_norm": 0.43909019231796265, "learning_rate": 0.00016198121250404185, "loss": 11.6728, "step": 41183 }, { "epoch": 0.8620949510173324, "grad_norm": 0.3444395661354065, "learning_rate": 0.00016197949189845857, "loss": 11.672, "step": 41184 }, { "epoch": 0.8621158837812944, "grad_norm": 0.32751232385635376, "learning_rate": 0.00016197777126308047, "loss": 11.6579, "step": 41185 }, { "epoch": 0.8621368165452566, "grad_norm": 0.30817949771881104, "learning_rate": 0.00016197605059790844, "loss": 11.6669, "step": 41186 }, { "epoch": 0.8621577493092187, "grad_norm": 0.39749908447265625, "learning_rate": 0.00016197432990294327, "loss": 11.6732, "step": 41187 }, { "epoch": 0.8621786820731809, "grad_norm": 0.40175488591194153, "learning_rate": 0.0001619726091781858, "loss": 11.6729, "step": 41188 }, { "epoch": 0.8621996148371431, "grad_norm": 0.2863693833351135, "learning_rate": 0.00016197088842363685, "loss": 11.6652, "step": 41189 }, { "epoch": 0.8622205476011052, "grad_norm": 0.40773066878318787, "learning_rate": 0.00016196916763929723, "loss": 11.6635, "step": 41190 }, { "epoch": 0.8622414803650674, "grad_norm": 0.31148356199264526, "learning_rate": 0.00016196744682516782, "loss": 11.6583, "step": 41191 }, { "epoch": 0.8622624131290295, "grad_norm": 0.3401973843574524, "learning_rate": 0.00016196572598124943, "loss": 11.6714, "step": 41192 }, { "epoch": 0.8622833458929917, "grad_norm": 0.2741113603115082, "learning_rate": 0.00016196400510754284, "loss": 11.6651, "step": 41193 }, { "epoch": 0.8623042786569539, "grad_norm": 0.3098936378955841, "learning_rate": 0.00016196228420404892, "loss": 11.6709, "step": 41194 }, { "epoch": 0.862325211420916, "grad_norm": 0.31043580174446106, "learning_rate": 0.0001619605632707685, "loss": 11.6714, "step": 41195 }, { "epoch": 0.8623461441848782, "grad_norm": 0.27894923090934753, "learning_rate": 0.0001619588423077024, "loss": 11.6656, "step": 41196 }, { "epoch": 0.8623670769488403, "grad_norm": 0.279156357049942, "learning_rate": 0.0001619571213148514, "loss": 11.6519, "step": 41197 }, { "epoch": 0.8623880097128025, "grad_norm": 0.26873108744621277, "learning_rate": 0.00016195540029221642, "loss": 11.6737, "step": 41198 }, { "epoch": 0.8624089424767646, "grad_norm": 0.337540864944458, "learning_rate": 0.0001619536792397982, "loss": 11.6535, "step": 41199 }, { "epoch": 0.8624298752407268, "grad_norm": 0.2855257987976074, "learning_rate": 0.00016195195815759764, "loss": 11.6711, "step": 41200 }, { "epoch": 0.862450808004689, "grad_norm": 0.25211474299430847, "learning_rate": 0.0001619502370456155, "loss": 11.67, "step": 41201 }, { "epoch": 0.8624717407686511, "grad_norm": 0.32056254148483276, "learning_rate": 0.00016194851590385268, "loss": 11.6585, "step": 41202 }, { "epoch": 0.8624926735326133, "grad_norm": 0.2599952816963196, "learning_rate": 0.00016194679473230993, "loss": 11.6742, "step": 41203 }, { "epoch": 0.8625136062965754, "grad_norm": 0.2938421070575714, "learning_rate": 0.00016194507353098814, "loss": 11.6896, "step": 41204 }, { "epoch": 0.8625345390605376, "grad_norm": 0.3179870545864105, "learning_rate": 0.00016194335229988808, "loss": 11.6828, "step": 41205 }, { "epoch": 0.8625554718244997, "grad_norm": 0.3421188294887543, "learning_rate": 0.00016194163103901062, "loss": 11.6843, "step": 41206 }, { "epoch": 0.8625764045884619, "grad_norm": 0.2631263732910156, "learning_rate": 0.0001619399097483566, "loss": 11.6611, "step": 41207 }, { "epoch": 0.862597337352424, "grad_norm": 0.2890200912952423, "learning_rate": 0.0001619381884279268, "loss": 11.6812, "step": 41208 }, { "epoch": 0.8626182701163861, "grad_norm": 0.29492267966270447, "learning_rate": 0.0001619364670777221, "loss": 11.6709, "step": 41209 }, { "epoch": 0.8626392028803483, "grad_norm": 0.2763107717037201, "learning_rate": 0.00016193474569774327, "loss": 11.6742, "step": 41210 }, { "epoch": 0.8626601356443104, "grad_norm": 0.3826264441013336, "learning_rate": 0.00016193302428799117, "loss": 11.6535, "step": 41211 }, { "epoch": 0.8626810684082726, "grad_norm": 0.351330041885376, "learning_rate": 0.00016193130284846663, "loss": 11.6705, "step": 41212 }, { "epoch": 0.8627020011722348, "grad_norm": 0.2807389199733734, "learning_rate": 0.00016192958137917045, "loss": 11.65, "step": 41213 }, { "epoch": 0.8627229339361969, "grad_norm": 0.2855274975299835, "learning_rate": 0.00016192785988010353, "loss": 11.6723, "step": 41214 }, { "epoch": 0.8627438667001591, "grad_norm": 0.38291358947753906, "learning_rate": 0.0001619261383512666, "loss": 11.6697, "step": 41215 }, { "epoch": 0.8627647994641212, "grad_norm": 0.268410861492157, "learning_rate": 0.00016192441679266058, "loss": 11.6566, "step": 41216 }, { "epoch": 0.8627857322280834, "grad_norm": 0.3015710711479187, "learning_rate": 0.0001619226952042862, "loss": 11.6749, "step": 41217 }, { "epoch": 0.8628066649920455, "grad_norm": 0.4073185622692108, "learning_rate": 0.00016192097358614434, "loss": 11.6901, "step": 41218 }, { "epoch": 0.8628275977560077, "grad_norm": 0.3052608072757721, "learning_rate": 0.00016191925193823588, "loss": 11.6633, "step": 41219 }, { "epoch": 0.8628485305199699, "grad_norm": 0.30909162759780884, "learning_rate": 0.00016191753026056156, "loss": 11.6696, "step": 41220 }, { "epoch": 0.862869463283932, "grad_norm": 0.25747066736221313, "learning_rate": 0.00016191580855312224, "loss": 11.6749, "step": 41221 }, { "epoch": 0.8628903960478942, "grad_norm": 0.2565036118030548, "learning_rate": 0.00016191408681591875, "loss": 11.6472, "step": 41222 }, { "epoch": 0.8629113288118563, "grad_norm": 0.2595544457435608, "learning_rate": 0.00016191236504895193, "loss": 11.6799, "step": 41223 }, { "epoch": 0.8629322615758185, "grad_norm": 0.4387397766113281, "learning_rate": 0.0001619106432522226, "loss": 11.6561, "step": 41224 }, { "epoch": 0.8629531943397806, "grad_norm": 0.2688242197036743, "learning_rate": 0.00016190892142573155, "loss": 11.6776, "step": 41225 }, { "epoch": 0.8629741271037428, "grad_norm": 0.3434668183326721, "learning_rate": 0.00016190719956947968, "loss": 11.6594, "step": 41226 }, { "epoch": 0.862995059867705, "grad_norm": 0.2603737711906433, "learning_rate": 0.00016190547768346776, "loss": 11.6576, "step": 41227 }, { "epoch": 0.8630159926316671, "grad_norm": 0.2932853102684021, "learning_rate": 0.00016190375576769666, "loss": 11.6707, "step": 41228 }, { "epoch": 0.8630369253956293, "grad_norm": 0.2263471484184265, "learning_rate": 0.00016190203382216714, "loss": 11.6624, "step": 41229 }, { "epoch": 0.8630578581595914, "grad_norm": 0.28428202867507935, "learning_rate": 0.00016190031184688008, "loss": 11.6756, "step": 41230 }, { "epoch": 0.8630787909235536, "grad_norm": 0.3293589949607849, "learning_rate": 0.00016189858984183632, "loss": 11.6693, "step": 41231 }, { "epoch": 0.8630997236875158, "grad_norm": 0.31611165404319763, "learning_rate": 0.00016189686780703665, "loss": 11.6914, "step": 41232 }, { "epoch": 0.8631206564514778, "grad_norm": 0.26179635524749756, "learning_rate": 0.00016189514574248195, "loss": 11.6672, "step": 41233 }, { "epoch": 0.86314158921544, "grad_norm": 0.23942837119102478, "learning_rate": 0.00016189342364817297, "loss": 11.6678, "step": 41234 }, { "epoch": 0.8631625219794021, "grad_norm": 0.34600865840911865, "learning_rate": 0.00016189170152411058, "loss": 11.6499, "step": 41235 }, { "epoch": 0.8631834547433643, "grad_norm": 0.2751905918121338, "learning_rate": 0.00016188997937029566, "loss": 11.6929, "step": 41236 }, { "epoch": 0.8632043875073264, "grad_norm": 0.2986668050289154, "learning_rate": 0.00016188825718672892, "loss": 11.6531, "step": 41237 }, { "epoch": 0.8632253202712886, "grad_norm": 0.33962902426719666, "learning_rate": 0.00016188653497341133, "loss": 11.6569, "step": 41238 }, { "epoch": 0.8632462530352508, "grad_norm": 0.33342650532722473, "learning_rate": 0.00016188481273034358, "loss": 11.6876, "step": 41239 }, { "epoch": 0.8632671857992129, "grad_norm": 0.2982022166252136, "learning_rate": 0.0001618830904575266, "loss": 11.6827, "step": 41240 }, { "epoch": 0.8632881185631751, "grad_norm": 0.30944839119911194, "learning_rate": 0.00016188136815496116, "loss": 11.6666, "step": 41241 }, { "epoch": 0.8633090513271372, "grad_norm": 0.35568955540657043, "learning_rate": 0.0001618796458226481, "loss": 11.6707, "step": 41242 }, { "epoch": 0.8633299840910994, "grad_norm": 0.3122411370277405, "learning_rate": 0.00016187792346058827, "loss": 11.6483, "step": 41243 }, { "epoch": 0.8633509168550615, "grad_norm": 0.34408998489379883, "learning_rate": 0.00016187620106878248, "loss": 11.6567, "step": 41244 }, { "epoch": 0.8633718496190237, "grad_norm": 0.26570025086402893, "learning_rate": 0.00016187447864723153, "loss": 11.6842, "step": 41245 }, { "epoch": 0.8633927823829859, "grad_norm": 0.3359360694885254, "learning_rate": 0.0001618727561959363, "loss": 11.6652, "step": 41246 }, { "epoch": 0.863413715146948, "grad_norm": 0.25014612078666687, "learning_rate": 0.0001618710337148976, "loss": 11.6659, "step": 41247 }, { "epoch": 0.8634346479109102, "grad_norm": 0.3520568013191223, "learning_rate": 0.00016186931120411625, "loss": 11.6599, "step": 41248 }, { "epoch": 0.8634555806748723, "grad_norm": 0.2822279632091522, "learning_rate": 0.00016186758866359308, "loss": 11.6688, "step": 41249 }, { "epoch": 0.8634765134388345, "grad_norm": 0.3035741448402405, "learning_rate": 0.00016186586609332894, "loss": 11.6801, "step": 41250 }, { "epoch": 0.8634974462027967, "grad_norm": 0.28660479187965393, "learning_rate": 0.00016186414349332462, "loss": 11.6743, "step": 41251 }, { "epoch": 0.8635183789667588, "grad_norm": 0.27550560235977173, "learning_rate": 0.00016186242086358097, "loss": 11.6783, "step": 41252 }, { "epoch": 0.863539311730721, "grad_norm": 0.2743961215019226, "learning_rate": 0.0001618606982040988, "loss": 11.6649, "step": 41253 }, { "epoch": 0.863560244494683, "grad_norm": 0.32480424642562866, "learning_rate": 0.000161858975514879, "loss": 11.673, "step": 41254 }, { "epoch": 0.8635811772586452, "grad_norm": 0.2689140737056732, "learning_rate": 0.0001618572527959223, "loss": 11.6617, "step": 41255 }, { "epoch": 0.8636021100226073, "grad_norm": 0.29293093085289, "learning_rate": 0.00016185553004722958, "loss": 11.6812, "step": 41256 }, { "epoch": 0.8636230427865695, "grad_norm": 0.2746196389198303, "learning_rate": 0.0001618538072688017, "loss": 11.6592, "step": 41257 }, { "epoch": 0.8636439755505317, "grad_norm": 0.3191371262073517, "learning_rate": 0.00016185208446063945, "loss": 11.6573, "step": 41258 }, { "epoch": 0.8636649083144938, "grad_norm": 0.34859415888786316, "learning_rate": 0.00016185036162274364, "loss": 11.673, "step": 41259 }, { "epoch": 0.863685841078456, "grad_norm": 0.3334919214248657, "learning_rate": 0.00016184863875511513, "loss": 11.6712, "step": 41260 }, { "epoch": 0.8637067738424181, "grad_norm": 0.2841915786266327, "learning_rate": 0.00016184691585775473, "loss": 11.6688, "step": 41261 }, { "epoch": 0.8637277066063803, "grad_norm": 0.2826899290084839, "learning_rate": 0.0001618451929306633, "loss": 11.6771, "step": 41262 }, { "epoch": 0.8637486393703424, "grad_norm": 0.3557163178920746, "learning_rate": 0.00016184346997384164, "loss": 11.6764, "step": 41263 }, { "epoch": 0.8637695721343046, "grad_norm": 0.33325740694999695, "learning_rate": 0.00016184174698729057, "loss": 11.664, "step": 41264 }, { "epoch": 0.8637905048982668, "grad_norm": 0.3853236138820648, "learning_rate": 0.00016184002397101095, "loss": 11.6785, "step": 41265 }, { "epoch": 0.8638114376622289, "grad_norm": 0.31031250953674316, "learning_rate": 0.0001618383009250036, "loss": 11.6525, "step": 41266 }, { "epoch": 0.8638323704261911, "grad_norm": 0.2888024151325226, "learning_rate": 0.0001618365778492693, "loss": 11.6666, "step": 41267 }, { "epoch": 0.8638533031901532, "grad_norm": 0.2769889831542969, "learning_rate": 0.00016183485474380898, "loss": 11.6793, "step": 41268 }, { "epoch": 0.8638742359541154, "grad_norm": 0.3396814465522766, "learning_rate": 0.00016183313160862334, "loss": 11.6774, "step": 41269 }, { "epoch": 0.8638951687180776, "grad_norm": 0.3268686830997467, "learning_rate": 0.00016183140844371332, "loss": 11.6687, "step": 41270 }, { "epoch": 0.8639161014820397, "grad_norm": 0.3936644494533539, "learning_rate": 0.00016182968524907967, "loss": 11.6567, "step": 41271 }, { "epoch": 0.8639370342460019, "grad_norm": 0.332470566034317, "learning_rate": 0.0001618279620247233, "loss": 11.672, "step": 41272 }, { "epoch": 0.863957967009964, "grad_norm": 0.38034528493881226, "learning_rate": 0.00016182623877064494, "loss": 11.667, "step": 41273 }, { "epoch": 0.8639788997739262, "grad_norm": 0.3149417042732239, "learning_rate": 0.00016182451548684548, "loss": 11.6719, "step": 41274 }, { "epoch": 0.8639998325378883, "grad_norm": 0.3227684199810028, "learning_rate": 0.00016182279217332574, "loss": 11.6741, "step": 41275 }, { "epoch": 0.8640207653018505, "grad_norm": 0.27735164761543274, "learning_rate": 0.00016182106883008658, "loss": 11.6744, "step": 41276 }, { "epoch": 0.8640416980658127, "grad_norm": 0.31349843740463257, "learning_rate": 0.00016181934545712875, "loss": 11.6672, "step": 41277 }, { "epoch": 0.8640626308297747, "grad_norm": 0.26548489928245544, "learning_rate": 0.0001618176220544531, "loss": 11.6694, "step": 41278 }, { "epoch": 0.864083563593737, "grad_norm": 0.25352707505226135, "learning_rate": 0.00016181589862206052, "loss": 11.6726, "step": 41279 }, { "epoch": 0.864104496357699, "grad_norm": 0.3369984030723572, "learning_rate": 0.00016181417515995182, "loss": 11.659, "step": 41280 }, { "epoch": 0.8641254291216612, "grad_norm": 0.2764317989349365, "learning_rate": 0.00016181245166812776, "loss": 11.6588, "step": 41281 }, { "epoch": 0.8641463618856233, "grad_norm": 0.3022201359272003, "learning_rate": 0.00016181072814658925, "loss": 11.6687, "step": 41282 }, { "epoch": 0.8641672946495855, "grad_norm": 0.462504506111145, "learning_rate": 0.00016180900459533706, "loss": 11.6788, "step": 41283 }, { "epoch": 0.8641882274135477, "grad_norm": 0.35751911997795105, "learning_rate": 0.00016180728101437204, "loss": 11.6815, "step": 41284 }, { "epoch": 0.8642091601775098, "grad_norm": 0.3540484309196472, "learning_rate": 0.00016180555740369506, "loss": 11.6689, "step": 41285 }, { "epoch": 0.864230092941472, "grad_norm": 0.35314440727233887, "learning_rate": 0.00016180383376330685, "loss": 11.6713, "step": 41286 }, { "epoch": 0.8642510257054341, "grad_norm": 0.2773228585720062, "learning_rate": 0.00016180211009320835, "loss": 11.6643, "step": 41287 }, { "epoch": 0.8642719584693963, "grad_norm": 0.298673540353775, "learning_rate": 0.0001618003863934003, "loss": 11.6743, "step": 41288 }, { "epoch": 0.8642928912333585, "grad_norm": 0.2713399827480316, "learning_rate": 0.0001617986626638836, "loss": 11.6866, "step": 41289 }, { "epoch": 0.8643138239973206, "grad_norm": 0.3472873270511627, "learning_rate": 0.000161796938904659, "loss": 11.6453, "step": 41290 }, { "epoch": 0.8643347567612828, "grad_norm": 0.31360989809036255, "learning_rate": 0.00016179521511572743, "loss": 11.6825, "step": 41291 }, { "epoch": 0.8643556895252449, "grad_norm": 0.37209421396255493, "learning_rate": 0.00016179349129708963, "loss": 11.6754, "step": 41292 }, { "epoch": 0.8643766222892071, "grad_norm": 0.29925861954689026, "learning_rate": 0.00016179176744874645, "loss": 11.6644, "step": 41293 }, { "epoch": 0.8643975550531692, "grad_norm": 0.29954907298088074, "learning_rate": 0.00016179004357069876, "loss": 11.667, "step": 41294 }, { "epoch": 0.8644184878171314, "grad_norm": 0.349619597196579, "learning_rate": 0.00016178831966294735, "loss": 11.6898, "step": 41295 }, { "epoch": 0.8644394205810936, "grad_norm": 0.28263217210769653, "learning_rate": 0.00016178659572549305, "loss": 11.6669, "step": 41296 }, { "epoch": 0.8644603533450557, "grad_norm": 0.3134608268737793, "learning_rate": 0.00016178487175833668, "loss": 11.6696, "step": 41297 }, { "epoch": 0.8644812861090179, "grad_norm": 0.366440087556839, "learning_rate": 0.00016178314776147908, "loss": 11.6791, "step": 41298 }, { "epoch": 0.86450221887298, "grad_norm": 0.44419386982917786, "learning_rate": 0.00016178142373492112, "loss": 11.6785, "step": 41299 }, { "epoch": 0.8645231516369422, "grad_norm": 0.31916147470474243, "learning_rate": 0.00016177969967866358, "loss": 11.6417, "step": 41300 }, { "epoch": 0.8645440844009042, "grad_norm": 0.27423718571662903, "learning_rate": 0.00016177797559270725, "loss": 11.673, "step": 41301 }, { "epoch": 0.8645650171648664, "grad_norm": 0.4541437327861786, "learning_rate": 0.00016177625147705305, "loss": 11.645, "step": 41302 }, { "epoch": 0.8645859499288286, "grad_norm": 0.28043362498283386, "learning_rate": 0.0001617745273317018, "loss": 11.6673, "step": 41303 }, { "epoch": 0.8646068826927907, "grad_norm": 0.2651332914829254, "learning_rate": 0.00016177280315665427, "loss": 11.6895, "step": 41304 }, { "epoch": 0.8646278154567529, "grad_norm": 0.3255448639392853, "learning_rate": 0.0001617710789519113, "loss": 11.6706, "step": 41305 }, { "epoch": 0.864648748220715, "grad_norm": 0.259417861700058, "learning_rate": 0.00016176935471747374, "loss": 11.6828, "step": 41306 }, { "epoch": 0.8646696809846772, "grad_norm": 0.3073042333126068, "learning_rate": 0.00016176763045334242, "loss": 11.6588, "step": 41307 }, { "epoch": 0.8646906137486394, "grad_norm": 0.28989771008491516, "learning_rate": 0.00016176590615951816, "loss": 11.6716, "step": 41308 }, { "epoch": 0.8647115465126015, "grad_norm": 0.4985954165458679, "learning_rate": 0.0001617641818360018, "loss": 11.6678, "step": 41309 }, { "epoch": 0.8647324792765637, "grad_norm": 0.2735608220100403, "learning_rate": 0.00016176245748279416, "loss": 11.6664, "step": 41310 }, { "epoch": 0.8647534120405258, "grad_norm": 0.2817264795303345, "learning_rate": 0.00016176073309989602, "loss": 11.6756, "step": 41311 }, { "epoch": 0.864774344804488, "grad_norm": 0.3074282705783844, "learning_rate": 0.00016175900868730834, "loss": 11.6668, "step": 41312 }, { "epoch": 0.8647952775684501, "grad_norm": 0.3140886127948761, "learning_rate": 0.0001617572842450318, "loss": 11.6692, "step": 41313 }, { "epoch": 0.8648162103324123, "grad_norm": 0.38016197085380554, "learning_rate": 0.00016175555977306733, "loss": 11.6675, "step": 41314 }, { "epoch": 0.8648371430963745, "grad_norm": 0.3184560239315033, "learning_rate": 0.00016175383527141572, "loss": 11.6843, "step": 41315 }, { "epoch": 0.8648580758603366, "grad_norm": 0.41220682859420776, "learning_rate": 0.0001617521107400778, "loss": 11.6672, "step": 41316 }, { "epoch": 0.8648790086242988, "grad_norm": 0.4041725695133209, "learning_rate": 0.0001617503861790544, "loss": 11.6786, "step": 41317 }, { "epoch": 0.8648999413882609, "grad_norm": 0.35779935121536255, "learning_rate": 0.00016174866158834633, "loss": 11.6734, "step": 41318 }, { "epoch": 0.8649208741522231, "grad_norm": 0.3247112035751343, "learning_rate": 0.0001617469369679545, "loss": 11.6495, "step": 41319 }, { "epoch": 0.8649418069161852, "grad_norm": 0.28021925687789917, "learning_rate": 0.00016174521231787962, "loss": 11.6746, "step": 41320 }, { "epoch": 0.8649627396801474, "grad_norm": 0.31382086873054504, "learning_rate": 0.00016174348763812262, "loss": 11.673, "step": 41321 }, { "epoch": 0.8649836724441096, "grad_norm": 0.34549662470817566, "learning_rate": 0.00016174176292868428, "loss": 11.6755, "step": 41322 }, { "epoch": 0.8650046052080717, "grad_norm": 0.4485345780849457, "learning_rate": 0.00016174003818956542, "loss": 11.6656, "step": 41323 }, { "epoch": 0.8650255379720339, "grad_norm": 0.36941176652908325, "learning_rate": 0.00016173831342076688, "loss": 11.6824, "step": 41324 }, { "epoch": 0.8650464707359959, "grad_norm": 0.3814138174057007, "learning_rate": 0.0001617365886222895, "loss": 11.6735, "step": 41325 }, { "epoch": 0.8650674034999581, "grad_norm": 0.3123644292354584, "learning_rate": 0.00016173486379413414, "loss": 11.6644, "step": 41326 }, { "epoch": 0.8650883362639203, "grad_norm": 0.3893129825592041, "learning_rate": 0.00016173313893630157, "loss": 11.6687, "step": 41327 }, { "epoch": 0.8651092690278824, "grad_norm": 0.31761598587036133, "learning_rate": 0.00016173141404879265, "loss": 11.6808, "step": 41328 }, { "epoch": 0.8651302017918446, "grad_norm": 0.2674470543861389, "learning_rate": 0.0001617296891316082, "loss": 11.6582, "step": 41329 }, { "epoch": 0.8651511345558067, "grad_norm": 0.2731409966945648, "learning_rate": 0.00016172796418474901, "loss": 11.6553, "step": 41330 }, { "epoch": 0.8651720673197689, "grad_norm": 0.4740928113460541, "learning_rate": 0.000161726239208216, "loss": 11.6781, "step": 41331 }, { "epoch": 0.865193000083731, "grad_norm": 0.2621143162250519, "learning_rate": 0.00016172451420200994, "loss": 11.6625, "step": 41332 }, { "epoch": 0.8652139328476932, "grad_norm": 0.31003254652023315, "learning_rate": 0.00016172278916613166, "loss": 11.6603, "step": 41333 }, { "epoch": 0.8652348656116554, "grad_norm": 0.3332294523715973, "learning_rate": 0.000161721064100582, "loss": 11.6727, "step": 41334 }, { "epoch": 0.8652557983756175, "grad_norm": 0.3341565728187561, "learning_rate": 0.00016171933900536182, "loss": 11.6617, "step": 41335 }, { "epoch": 0.8652767311395797, "grad_norm": 0.3699752390384674, "learning_rate": 0.0001617176138804719, "loss": 11.6448, "step": 41336 }, { "epoch": 0.8652976639035418, "grad_norm": 0.279805988073349, "learning_rate": 0.00016171588872591306, "loss": 11.6693, "step": 41337 }, { "epoch": 0.865318596667504, "grad_norm": 0.2888354957103729, "learning_rate": 0.00016171416354168617, "loss": 11.6845, "step": 41338 }, { "epoch": 0.8653395294314661, "grad_norm": 0.3345554769039154, "learning_rate": 0.00016171243832779203, "loss": 11.694, "step": 41339 }, { "epoch": 0.8653604621954283, "grad_norm": 0.31711405515670776, "learning_rate": 0.00016171071308423152, "loss": 11.6731, "step": 41340 }, { "epoch": 0.8653813949593905, "grad_norm": 0.3657270669937134, "learning_rate": 0.0001617089878110054, "loss": 11.6829, "step": 41341 }, { "epoch": 0.8654023277233526, "grad_norm": 0.38347455859184265, "learning_rate": 0.00016170726250811453, "loss": 11.6707, "step": 41342 }, { "epoch": 0.8654232604873148, "grad_norm": 0.3564295172691345, "learning_rate": 0.00016170553717555976, "loss": 11.6598, "step": 41343 }, { "epoch": 0.8654441932512769, "grad_norm": 0.35347920656204224, "learning_rate": 0.0001617038118133419, "loss": 11.6526, "step": 41344 }, { "epoch": 0.8654651260152391, "grad_norm": 0.3548678159713745, "learning_rate": 0.0001617020864214618, "loss": 11.6564, "step": 41345 }, { "epoch": 0.8654860587792012, "grad_norm": 0.3083398938179016, "learning_rate": 0.00016170036099992022, "loss": 11.6695, "step": 41346 }, { "epoch": 0.8655069915431634, "grad_norm": 0.37447768449783325, "learning_rate": 0.0001616986355487181, "loss": 11.6739, "step": 41347 }, { "epoch": 0.8655279243071256, "grad_norm": 0.3113659620285034, "learning_rate": 0.00016169691006785616, "loss": 11.6582, "step": 41348 }, { "epoch": 0.8655488570710876, "grad_norm": 0.2750166356563568, "learning_rate": 0.0001616951845573353, "loss": 11.646, "step": 41349 }, { "epoch": 0.8655697898350498, "grad_norm": 0.2966017425060272, "learning_rate": 0.00016169345901715634, "loss": 11.6833, "step": 41350 }, { "epoch": 0.8655907225990119, "grad_norm": 0.34050077199935913, "learning_rate": 0.0001616917334473201, "loss": 11.6857, "step": 41351 }, { "epoch": 0.8656116553629741, "grad_norm": 0.2943972051143646, "learning_rate": 0.00016169000784782737, "loss": 11.6777, "step": 41352 }, { "epoch": 0.8656325881269363, "grad_norm": 0.34894248843193054, "learning_rate": 0.00016168828221867906, "loss": 11.6679, "step": 41353 }, { "epoch": 0.8656535208908984, "grad_norm": 0.2803359627723694, "learning_rate": 0.00016168655655987593, "loss": 11.6628, "step": 41354 }, { "epoch": 0.8656744536548606, "grad_norm": 0.3449608385562897, "learning_rate": 0.0001616848308714188, "loss": 11.6694, "step": 41355 }, { "epoch": 0.8656953864188227, "grad_norm": 0.30320170521736145, "learning_rate": 0.00016168310515330862, "loss": 11.6797, "step": 41356 }, { "epoch": 0.8657163191827849, "grad_norm": 0.32449910044670105, "learning_rate": 0.0001616813794055461, "loss": 11.6668, "step": 41357 }, { "epoch": 0.865737251946747, "grad_norm": 0.3286232352256775, "learning_rate": 0.00016167965362813208, "loss": 11.6656, "step": 41358 }, { "epoch": 0.8657581847107092, "grad_norm": 0.2937687039375305, "learning_rate": 0.00016167792782106744, "loss": 11.6723, "step": 41359 }, { "epoch": 0.8657791174746714, "grad_norm": 0.29816943407058716, "learning_rate": 0.000161676201984353, "loss": 11.6669, "step": 41360 }, { "epoch": 0.8658000502386335, "grad_norm": 0.29048943519592285, "learning_rate": 0.00016167447611798954, "loss": 11.6607, "step": 41361 }, { "epoch": 0.8658209830025957, "grad_norm": 0.3117368817329407, "learning_rate": 0.00016167275022197793, "loss": 11.6684, "step": 41362 }, { "epoch": 0.8658419157665578, "grad_norm": 0.2960778772830963, "learning_rate": 0.000161671024296319, "loss": 11.6536, "step": 41363 }, { "epoch": 0.86586284853052, "grad_norm": 0.29276517033576965, "learning_rate": 0.00016166929834101357, "loss": 11.6634, "step": 41364 }, { "epoch": 0.8658837812944821, "grad_norm": 0.28282642364501953, "learning_rate": 0.00016166757235606247, "loss": 11.6721, "step": 41365 }, { "epoch": 0.8659047140584443, "grad_norm": 0.41885194182395935, "learning_rate": 0.00016166584634146652, "loss": 11.6613, "step": 41366 }, { "epoch": 0.8659256468224065, "grad_norm": 0.27043646574020386, "learning_rate": 0.00016166412029722658, "loss": 11.6675, "step": 41367 }, { "epoch": 0.8659465795863686, "grad_norm": 0.2996695935726166, "learning_rate": 0.00016166239422334345, "loss": 11.6744, "step": 41368 }, { "epoch": 0.8659675123503308, "grad_norm": 0.3509141802787781, "learning_rate": 0.00016166066811981798, "loss": 11.6629, "step": 41369 }, { "epoch": 0.8659884451142928, "grad_norm": 0.26267150044441223, "learning_rate": 0.000161658941986651, "loss": 11.665, "step": 41370 }, { "epoch": 0.866009377878255, "grad_norm": 0.28163525462150574, "learning_rate": 0.00016165721582384334, "loss": 11.6716, "step": 41371 }, { "epoch": 0.8660303106422172, "grad_norm": 0.3435388207435608, "learning_rate": 0.00016165548963139575, "loss": 11.6677, "step": 41372 }, { "epoch": 0.8660512434061793, "grad_norm": 0.31218770146369934, "learning_rate": 0.0001616537634093092, "loss": 11.6641, "step": 41373 }, { "epoch": 0.8660721761701415, "grad_norm": 0.5709731578826904, "learning_rate": 0.00016165203715758444, "loss": 11.6935, "step": 41374 }, { "epoch": 0.8660931089341036, "grad_norm": 0.4933667778968811, "learning_rate": 0.0001616503108762223, "loss": 11.6939, "step": 41375 }, { "epoch": 0.8661140416980658, "grad_norm": 0.26251858472824097, "learning_rate": 0.00016164858456522361, "loss": 11.6645, "step": 41376 }, { "epoch": 0.8661349744620279, "grad_norm": 0.2964506447315216, "learning_rate": 0.00016164685822458923, "loss": 11.6618, "step": 41377 }, { "epoch": 0.8661559072259901, "grad_norm": 0.2296447604894638, "learning_rate": 0.00016164513185431997, "loss": 11.6731, "step": 41378 }, { "epoch": 0.8661768399899523, "grad_norm": 0.39543047547340393, "learning_rate": 0.00016164340545441665, "loss": 11.6705, "step": 41379 }, { "epoch": 0.8661977727539144, "grad_norm": 0.37743183970451355, "learning_rate": 0.00016164167902488013, "loss": 11.6787, "step": 41380 }, { "epoch": 0.8662187055178766, "grad_norm": 0.2955789864063263, "learning_rate": 0.0001616399525657112, "loss": 11.6536, "step": 41381 }, { "epoch": 0.8662396382818387, "grad_norm": 0.28106725215911865, "learning_rate": 0.0001616382260769107, "loss": 11.6557, "step": 41382 }, { "epoch": 0.8662605710458009, "grad_norm": 0.3328813910484314, "learning_rate": 0.00016163649955847949, "loss": 11.6625, "step": 41383 }, { "epoch": 0.866281503809763, "grad_norm": 0.3278651535511017, "learning_rate": 0.00016163477301041836, "loss": 11.661, "step": 41384 }, { "epoch": 0.8663024365737252, "grad_norm": 0.2929397225379944, "learning_rate": 0.00016163304643272816, "loss": 11.6643, "step": 41385 }, { "epoch": 0.8663233693376874, "grad_norm": 0.28930824995040894, "learning_rate": 0.00016163131982540972, "loss": 11.6877, "step": 41386 }, { "epoch": 0.8663443021016495, "grad_norm": 0.3580032289028168, "learning_rate": 0.0001616295931884639, "loss": 11.6879, "step": 41387 }, { "epoch": 0.8663652348656117, "grad_norm": 0.3976117968559265, "learning_rate": 0.00016162786652189145, "loss": 11.6736, "step": 41388 }, { "epoch": 0.8663861676295738, "grad_norm": 0.30706506967544556, "learning_rate": 0.0001616261398256933, "loss": 11.6795, "step": 41389 }, { "epoch": 0.866407100393536, "grad_norm": 0.3733194172382355, "learning_rate": 0.00016162441309987023, "loss": 11.6525, "step": 41390 }, { "epoch": 0.8664280331574982, "grad_norm": 0.29479914903640747, "learning_rate": 0.00016162268634442302, "loss": 11.6548, "step": 41391 }, { "epoch": 0.8664489659214603, "grad_norm": 0.2945954501628876, "learning_rate": 0.00016162095955935256, "loss": 11.6579, "step": 41392 }, { "epoch": 0.8664698986854225, "grad_norm": 0.33778345584869385, "learning_rate": 0.0001616192327446597, "loss": 11.6699, "step": 41393 }, { "epoch": 0.8664908314493845, "grad_norm": 0.317091166973114, "learning_rate": 0.0001616175059003452, "loss": 11.661, "step": 41394 }, { "epoch": 0.8665117642133467, "grad_norm": 0.33334237337112427, "learning_rate": 0.00016161577902641, "loss": 11.6721, "step": 41395 }, { "epoch": 0.8665326969773088, "grad_norm": 0.2973174452781677, "learning_rate": 0.0001616140521228548, "loss": 11.6703, "step": 41396 }, { "epoch": 0.866553629741271, "grad_norm": 0.2863679528236389, "learning_rate": 0.00016161232518968052, "loss": 11.6484, "step": 41397 }, { "epoch": 0.8665745625052332, "grad_norm": 0.32724037766456604, "learning_rate": 0.00016161059822688794, "loss": 11.6719, "step": 41398 }, { "epoch": 0.8665954952691953, "grad_norm": 0.3018515408039093, "learning_rate": 0.00016160887123447791, "loss": 11.6609, "step": 41399 }, { "epoch": 0.8666164280331575, "grad_norm": 0.32928672432899475, "learning_rate": 0.00016160714421245128, "loss": 11.6604, "step": 41400 }, { "epoch": 0.8666373607971196, "grad_norm": 0.27159541845321655, "learning_rate": 0.00016160541716080885, "loss": 11.652, "step": 41401 }, { "epoch": 0.8666582935610818, "grad_norm": 0.31078559160232544, "learning_rate": 0.00016160369007955146, "loss": 11.6637, "step": 41402 }, { "epoch": 0.8666792263250439, "grad_norm": 0.31439587473869324, "learning_rate": 0.00016160196296867993, "loss": 11.6762, "step": 41403 }, { "epoch": 0.8667001590890061, "grad_norm": 0.33439305424690247, "learning_rate": 0.00016160023582819513, "loss": 11.656, "step": 41404 }, { "epoch": 0.8667210918529683, "grad_norm": 0.3476206958293915, "learning_rate": 0.00016159850865809784, "loss": 11.684, "step": 41405 }, { "epoch": 0.8667420246169304, "grad_norm": 0.3016293942928314, "learning_rate": 0.00016159678145838893, "loss": 11.6654, "step": 41406 }, { "epoch": 0.8667629573808926, "grad_norm": 0.4250672459602356, "learning_rate": 0.0001615950542290692, "loss": 11.6496, "step": 41407 }, { "epoch": 0.8667838901448547, "grad_norm": 0.38163331151008606, "learning_rate": 0.0001615933269701395, "loss": 11.6644, "step": 41408 }, { "epoch": 0.8668048229088169, "grad_norm": 0.3241581320762634, "learning_rate": 0.00016159159968160065, "loss": 11.6661, "step": 41409 }, { "epoch": 0.8668257556727791, "grad_norm": 0.2783453166484833, "learning_rate": 0.00016158987236345346, "loss": 11.672, "step": 41410 }, { "epoch": 0.8668466884367412, "grad_norm": 0.2903721034526825, "learning_rate": 0.0001615881450156988, "loss": 11.6931, "step": 41411 }, { "epoch": 0.8668676212007034, "grad_norm": 0.36737340688705444, "learning_rate": 0.00016158641763833754, "loss": 11.671, "step": 41412 }, { "epoch": 0.8668885539646655, "grad_norm": 0.32250410318374634, "learning_rate": 0.00016158469023137037, "loss": 11.6746, "step": 41413 }, { "epoch": 0.8669094867286277, "grad_norm": 0.37114304304122925, "learning_rate": 0.00016158296279479825, "loss": 11.6757, "step": 41414 }, { "epoch": 0.8669304194925898, "grad_norm": 0.3391039967536926, "learning_rate": 0.00016158123532862196, "loss": 11.6815, "step": 41415 }, { "epoch": 0.866951352256552, "grad_norm": 0.3377164304256439, "learning_rate": 0.0001615795078328423, "loss": 11.677, "step": 41416 }, { "epoch": 0.8669722850205142, "grad_norm": 0.3287242650985718, "learning_rate": 0.0001615777803074602, "loss": 11.6794, "step": 41417 }, { "epoch": 0.8669932177844762, "grad_norm": 0.39371150732040405, "learning_rate": 0.00016157605275247638, "loss": 11.6631, "step": 41418 }, { "epoch": 0.8670141505484384, "grad_norm": 0.3272590637207031, "learning_rate": 0.00016157432516789174, "loss": 11.6558, "step": 41419 }, { "epoch": 0.8670350833124005, "grad_norm": 0.331348717212677, "learning_rate": 0.0001615725975537071, "loss": 11.6593, "step": 41420 }, { "epoch": 0.8670560160763627, "grad_norm": 0.34651440382003784, "learning_rate": 0.00016157086990992324, "loss": 11.6707, "step": 41421 }, { "epoch": 0.8670769488403248, "grad_norm": 0.27973681688308716, "learning_rate": 0.00016156914223654106, "loss": 11.6461, "step": 41422 }, { "epoch": 0.867097881604287, "grad_norm": 0.359145849943161, "learning_rate": 0.00016156741453356133, "loss": 11.6673, "step": 41423 }, { "epoch": 0.8671188143682492, "grad_norm": 0.3080633878707886, "learning_rate": 0.00016156568680098494, "loss": 11.6503, "step": 41424 }, { "epoch": 0.8671397471322113, "grad_norm": 0.3087426722049713, "learning_rate": 0.00016156395903881268, "loss": 11.6716, "step": 41425 }, { "epoch": 0.8671606798961735, "grad_norm": 0.3121113181114197, "learning_rate": 0.0001615622312470454, "loss": 11.685, "step": 41426 }, { "epoch": 0.8671816126601356, "grad_norm": 0.4258309304714203, "learning_rate": 0.00016156050342568392, "loss": 11.6567, "step": 41427 }, { "epoch": 0.8672025454240978, "grad_norm": 0.2678104639053345, "learning_rate": 0.00016155877557472905, "loss": 11.6715, "step": 41428 }, { "epoch": 0.86722347818806, "grad_norm": 0.2705956697463989, "learning_rate": 0.00016155704769418166, "loss": 11.6781, "step": 41429 }, { "epoch": 0.8672444109520221, "grad_norm": 0.26367032527923584, "learning_rate": 0.00016155531978404256, "loss": 11.6765, "step": 41430 }, { "epoch": 0.8672653437159843, "grad_norm": 0.2913586497306824, "learning_rate": 0.0001615535918443126, "loss": 11.661, "step": 41431 }, { "epoch": 0.8672862764799464, "grad_norm": 0.27000510692596436, "learning_rate": 0.00016155186387499258, "loss": 11.6836, "step": 41432 }, { "epoch": 0.8673072092439086, "grad_norm": 0.3117022216320038, "learning_rate": 0.00016155013587608333, "loss": 11.6763, "step": 41433 }, { "epoch": 0.8673281420078707, "grad_norm": 0.3137430250644684, "learning_rate": 0.00016154840784758572, "loss": 11.6675, "step": 41434 }, { "epoch": 0.8673490747718329, "grad_norm": 0.2693456709384918, "learning_rate": 0.00016154667978950053, "loss": 11.6474, "step": 41435 }, { "epoch": 0.8673700075357951, "grad_norm": 0.3196012079715729, "learning_rate": 0.00016154495170182863, "loss": 11.6497, "step": 41436 }, { "epoch": 0.8673909402997572, "grad_norm": 0.3293150067329407, "learning_rate": 0.00016154322358457086, "loss": 11.6669, "step": 41437 }, { "epoch": 0.8674118730637194, "grad_norm": 0.3425438106060028, "learning_rate": 0.000161541495437728, "loss": 11.6729, "step": 41438 }, { "epoch": 0.8674328058276815, "grad_norm": 0.2548070549964905, "learning_rate": 0.00016153976726130095, "loss": 11.6632, "step": 41439 }, { "epoch": 0.8674537385916437, "grad_norm": 0.3795400559902191, "learning_rate": 0.00016153803905529047, "loss": 11.668, "step": 41440 }, { "epoch": 0.8674746713556057, "grad_norm": 0.26593053340911865, "learning_rate": 0.00016153631081969742, "loss": 11.6608, "step": 41441 }, { "epoch": 0.8674956041195679, "grad_norm": 0.2488672286272049, "learning_rate": 0.00016153458255452265, "loss": 11.6626, "step": 41442 }, { "epoch": 0.8675165368835301, "grad_norm": 0.29655563831329346, "learning_rate": 0.00016153285425976694, "loss": 11.6472, "step": 41443 }, { "epoch": 0.8675374696474922, "grad_norm": 0.29274511337280273, "learning_rate": 0.0001615311259354312, "loss": 11.6624, "step": 41444 }, { "epoch": 0.8675584024114544, "grad_norm": 0.32657384872436523, "learning_rate": 0.00016152939758151615, "loss": 11.6755, "step": 41445 }, { "epoch": 0.8675793351754165, "grad_norm": 0.4060591459274292, "learning_rate": 0.00016152766919802275, "loss": 11.67, "step": 41446 }, { "epoch": 0.8676002679393787, "grad_norm": 0.2616925835609436, "learning_rate": 0.00016152594078495174, "loss": 11.662, "step": 41447 }, { "epoch": 0.8676212007033409, "grad_norm": 0.7945376038551331, "learning_rate": 0.00016152421234230397, "loss": 11.6662, "step": 41448 }, { "epoch": 0.867642133467303, "grad_norm": 0.32092881202697754, "learning_rate": 0.00016152248387008028, "loss": 11.679, "step": 41449 }, { "epoch": 0.8676630662312652, "grad_norm": 0.434101939201355, "learning_rate": 0.0001615207553682815, "loss": 11.6634, "step": 41450 }, { "epoch": 0.8676839989952273, "grad_norm": 0.348898708820343, "learning_rate": 0.0001615190268369085, "loss": 11.6729, "step": 41451 }, { "epoch": 0.8677049317591895, "grad_norm": 0.3590110242366791, "learning_rate": 0.00016151729827596203, "loss": 11.6758, "step": 41452 }, { "epoch": 0.8677258645231516, "grad_norm": 0.33565089106559753, "learning_rate": 0.00016151556968544296, "loss": 11.6733, "step": 41453 }, { "epoch": 0.8677467972871138, "grad_norm": 0.4316437840461731, "learning_rate": 0.0001615138410653521, "loss": 11.6746, "step": 41454 }, { "epoch": 0.867767730051076, "grad_norm": 0.3930342197418213, "learning_rate": 0.00016151211241569036, "loss": 11.6558, "step": 41455 }, { "epoch": 0.8677886628150381, "grad_norm": 0.23907741904258728, "learning_rate": 0.00016151038373645848, "loss": 11.6697, "step": 41456 }, { "epoch": 0.8678095955790003, "grad_norm": 0.2925017476081848, "learning_rate": 0.00016150865502765734, "loss": 11.6557, "step": 41457 }, { "epoch": 0.8678305283429624, "grad_norm": 0.31238192319869995, "learning_rate": 0.00016150692628928777, "loss": 11.6769, "step": 41458 }, { "epoch": 0.8678514611069246, "grad_norm": 0.319037526845932, "learning_rate": 0.00016150519752135056, "loss": 11.6577, "step": 41459 }, { "epoch": 0.8678723938708867, "grad_norm": 0.3813316226005554, "learning_rate": 0.0001615034687238466, "loss": 11.6744, "step": 41460 }, { "epoch": 0.8678933266348489, "grad_norm": 0.3097968101501465, "learning_rate": 0.00016150173989677664, "loss": 11.6804, "step": 41461 }, { "epoch": 0.8679142593988111, "grad_norm": 0.28468066453933716, "learning_rate": 0.0001615000110401416, "loss": 11.6701, "step": 41462 }, { "epoch": 0.8679351921627732, "grad_norm": 0.31696945428848267, "learning_rate": 0.00016149828215394226, "loss": 11.6689, "step": 41463 }, { "epoch": 0.8679561249267354, "grad_norm": 0.24867738783359528, "learning_rate": 0.00016149655323817948, "loss": 11.6614, "step": 41464 }, { "epoch": 0.8679770576906974, "grad_norm": 0.3486833870410919, "learning_rate": 0.00016149482429285406, "loss": 11.6534, "step": 41465 }, { "epoch": 0.8679979904546596, "grad_norm": 0.41042855381965637, "learning_rate": 0.00016149309531796684, "loss": 11.6605, "step": 41466 }, { "epoch": 0.8680189232186218, "grad_norm": 0.2915569245815277, "learning_rate": 0.00016149136631351865, "loss": 11.6589, "step": 41467 }, { "epoch": 0.8680398559825839, "grad_norm": 0.2714070677757263, "learning_rate": 0.00016148963727951037, "loss": 11.6627, "step": 41468 }, { "epoch": 0.8680607887465461, "grad_norm": 0.403701514005661, "learning_rate": 0.00016148790821594275, "loss": 11.6603, "step": 41469 }, { "epoch": 0.8680817215105082, "grad_norm": 0.4343242943286896, "learning_rate": 0.00016148617912281664, "loss": 11.683, "step": 41470 }, { "epoch": 0.8681026542744704, "grad_norm": 0.3476804792881012, "learning_rate": 0.00016148445000013296, "loss": 11.6608, "step": 41471 }, { "epoch": 0.8681235870384325, "grad_norm": 0.33034640550613403, "learning_rate": 0.0001614827208478924, "loss": 11.677, "step": 41472 }, { "epoch": 0.8681445198023947, "grad_norm": 0.3040459156036377, "learning_rate": 0.00016148099166609593, "loss": 11.6625, "step": 41473 }, { "epoch": 0.8681654525663569, "grad_norm": 0.44908276200294495, "learning_rate": 0.00016147926245474426, "loss": 11.6596, "step": 41474 }, { "epoch": 0.868186385330319, "grad_norm": 0.29201027750968933, "learning_rate": 0.00016147753321383833, "loss": 11.662, "step": 41475 }, { "epoch": 0.8682073180942812, "grad_norm": 0.25806131958961487, "learning_rate": 0.00016147580394337886, "loss": 11.6691, "step": 41476 }, { "epoch": 0.8682282508582433, "grad_norm": 0.32027456164360046, "learning_rate": 0.00016147407464336676, "loss": 11.6527, "step": 41477 }, { "epoch": 0.8682491836222055, "grad_norm": 0.23529386520385742, "learning_rate": 0.00016147234531380288, "loss": 11.6707, "step": 41478 }, { "epoch": 0.8682701163861676, "grad_norm": 0.36503130197525024, "learning_rate": 0.00016147061595468795, "loss": 11.6723, "step": 41479 }, { "epoch": 0.8682910491501298, "grad_norm": 0.29418614506721497, "learning_rate": 0.0001614688865660229, "loss": 11.683, "step": 41480 }, { "epoch": 0.868311981914092, "grad_norm": 0.2893274426460266, "learning_rate": 0.0001614671571478085, "loss": 11.669, "step": 41481 }, { "epoch": 0.8683329146780541, "grad_norm": 0.328191876411438, "learning_rate": 0.00016146542770004561, "loss": 11.6766, "step": 41482 }, { "epoch": 0.8683538474420163, "grad_norm": 0.3197733759880066, "learning_rate": 0.00016146369822273512, "loss": 11.6462, "step": 41483 }, { "epoch": 0.8683747802059784, "grad_norm": 0.3862401843070984, "learning_rate": 0.00016146196871587773, "loss": 11.675, "step": 41484 }, { "epoch": 0.8683957129699406, "grad_norm": 0.4127243459224701, "learning_rate": 0.00016146023917947434, "loss": 11.6552, "step": 41485 }, { "epoch": 0.8684166457339028, "grad_norm": 0.3188483417034149, "learning_rate": 0.0001614585096135258, "loss": 11.672, "step": 41486 }, { "epoch": 0.8684375784978648, "grad_norm": 0.24133864045143127, "learning_rate": 0.00016145678001803292, "loss": 11.6632, "step": 41487 }, { "epoch": 0.868458511261827, "grad_norm": 0.2817917466163635, "learning_rate": 0.00016145505039299654, "loss": 11.6723, "step": 41488 }, { "epoch": 0.8684794440257891, "grad_norm": 0.3608851432800293, "learning_rate": 0.00016145332073841747, "loss": 11.6653, "step": 41489 }, { "epoch": 0.8685003767897513, "grad_norm": 0.303073525428772, "learning_rate": 0.00016145159105429658, "loss": 11.6659, "step": 41490 }, { "epoch": 0.8685213095537134, "grad_norm": 0.29686689376831055, "learning_rate": 0.0001614498613406347, "loss": 11.6866, "step": 41491 }, { "epoch": 0.8685422423176756, "grad_norm": 0.29902541637420654, "learning_rate": 0.00016144813159743257, "loss": 11.6681, "step": 41492 }, { "epoch": 0.8685631750816378, "grad_norm": 0.3256054222583771, "learning_rate": 0.00016144640182469115, "loss": 11.6875, "step": 41493 }, { "epoch": 0.8685841078455999, "grad_norm": 0.27176785469055176, "learning_rate": 0.00016144467202241117, "loss": 11.6822, "step": 41494 }, { "epoch": 0.8686050406095621, "grad_norm": 0.2493286281824112, "learning_rate": 0.00016144294219059356, "loss": 11.6816, "step": 41495 }, { "epoch": 0.8686259733735242, "grad_norm": 0.2970162034034729, "learning_rate": 0.00016144121232923903, "loss": 11.6715, "step": 41496 }, { "epoch": 0.8686469061374864, "grad_norm": 0.3521029055118561, "learning_rate": 0.00016143948243834853, "loss": 11.6712, "step": 41497 }, { "epoch": 0.8686678389014485, "grad_norm": 0.3267803490161896, "learning_rate": 0.00016143775251792285, "loss": 11.6639, "step": 41498 }, { "epoch": 0.8686887716654107, "grad_norm": 0.33362486958503723, "learning_rate": 0.00016143602256796276, "loss": 11.6582, "step": 41499 }, { "epoch": 0.8687097044293729, "grad_norm": 0.3663404881954193, "learning_rate": 0.00016143429258846917, "loss": 11.6875, "step": 41500 }, { "epoch": 0.868730637193335, "grad_norm": 0.2848415970802307, "learning_rate": 0.00016143256257944287, "loss": 11.6589, "step": 41501 }, { "epoch": 0.8687515699572972, "grad_norm": 0.2643759846687317, "learning_rate": 0.0001614308325408847, "loss": 11.6802, "step": 41502 }, { "epoch": 0.8687725027212593, "grad_norm": 0.3181513249874115, "learning_rate": 0.00016142910247279554, "loss": 11.6736, "step": 41503 }, { "epoch": 0.8687934354852215, "grad_norm": 0.3247978687286377, "learning_rate": 0.00016142737237517614, "loss": 11.6761, "step": 41504 }, { "epoch": 0.8688143682491837, "grad_norm": 0.3437802791595459, "learning_rate": 0.0001614256422480274, "loss": 11.6736, "step": 41505 }, { "epoch": 0.8688353010131458, "grad_norm": 0.3391469120979309, "learning_rate": 0.0001614239120913501, "loss": 11.6805, "step": 41506 }, { "epoch": 0.868856233777108, "grad_norm": 0.3029596209526062, "learning_rate": 0.0001614221819051451, "loss": 11.6788, "step": 41507 }, { "epoch": 0.8688771665410701, "grad_norm": 0.3438583314418793, "learning_rate": 0.0001614204516894132, "loss": 11.679, "step": 41508 }, { "epoch": 0.8688980993050323, "grad_norm": 0.2938776910305023, "learning_rate": 0.0001614187214441553, "loss": 11.6646, "step": 41509 }, { "epoch": 0.8689190320689943, "grad_norm": 0.33099231123924255, "learning_rate": 0.0001614169911693722, "loss": 11.6627, "step": 41510 }, { "epoch": 0.8689399648329565, "grad_norm": 0.43891656398773193, "learning_rate": 0.00016141526086506467, "loss": 11.6946, "step": 41511 }, { "epoch": 0.8689608975969187, "grad_norm": 0.23163777589797974, "learning_rate": 0.00016141353053123362, "loss": 11.6724, "step": 41512 }, { "epoch": 0.8689818303608808, "grad_norm": 0.26076045632362366, "learning_rate": 0.00016141180016787985, "loss": 11.6734, "step": 41513 }, { "epoch": 0.869002763124843, "grad_norm": 0.2630177140235901, "learning_rate": 0.0001614100697750042, "loss": 11.6713, "step": 41514 }, { "epoch": 0.8690236958888051, "grad_norm": 0.3436269760131836, "learning_rate": 0.0001614083393526075, "loss": 11.6781, "step": 41515 }, { "epoch": 0.8690446286527673, "grad_norm": 0.4876256287097931, "learning_rate": 0.00016140660890069056, "loss": 11.6567, "step": 41516 }, { "epoch": 0.8690655614167294, "grad_norm": 0.3130044937133789, "learning_rate": 0.0001614048784192543, "loss": 11.6668, "step": 41517 }, { "epoch": 0.8690864941806916, "grad_norm": 0.3114302158355713, "learning_rate": 0.00016140314790829942, "loss": 11.677, "step": 41518 }, { "epoch": 0.8691074269446538, "grad_norm": 0.2895015776157379, "learning_rate": 0.00016140141736782683, "loss": 11.67, "step": 41519 }, { "epoch": 0.8691283597086159, "grad_norm": 0.4243984818458557, "learning_rate": 0.00016139968679783736, "loss": 11.7006, "step": 41520 }, { "epoch": 0.8691492924725781, "grad_norm": 0.3206726014614105, "learning_rate": 0.00016139795619833183, "loss": 11.6568, "step": 41521 }, { "epoch": 0.8691702252365402, "grad_norm": 0.29186850786209106, "learning_rate": 0.00016139622556931107, "loss": 11.6739, "step": 41522 }, { "epoch": 0.8691911580005024, "grad_norm": 0.427033931016922, "learning_rate": 0.0001613944949107759, "loss": 11.676, "step": 41523 }, { "epoch": 0.8692120907644645, "grad_norm": 0.31378185749053955, "learning_rate": 0.00016139276422272718, "loss": 11.6699, "step": 41524 }, { "epoch": 0.8692330235284267, "grad_norm": 0.24703164398670197, "learning_rate": 0.00016139103350516572, "loss": 11.6561, "step": 41525 }, { "epoch": 0.8692539562923889, "grad_norm": 0.3042919337749481, "learning_rate": 0.00016138930275809234, "loss": 11.6685, "step": 41526 }, { "epoch": 0.869274889056351, "grad_norm": 0.34096676111221313, "learning_rate": 0.00016138757198150793, "loss": 11.6617, "step": 41527 }, { "epoch": 0.8692958218203132, "grad_norm": 0.30268964171409607, "learning_rate": 0.00016138584117541326, "loss": 11.671, "step": 41528 }, { "epoch": 0.8693167545842753, "grad_norm": 0.3097028136253357, "learning_rate": 0.00016138411033980921, "loss": 11.6752, "step": 41529 }, { "epoch": 0.8693376873482375, "grad_norm": 0.29717588424682617, "learning_rate": 0.00016138237947469657, "loss": 11.6505, "step": 41530 }, { "epoch": 0.8693586201121997, "grad_norm": 0.28255823254585266, "learning_rate": 0.00016138064858007618, "loss": 11.6613, "step": 41531 }, { "epoch": 0.8693795528761618, "grad_norm": 0.3499869406223297, "learning_rate": 0.0001613789176559489, "loss": 11.6752, "step": 41532 }, { "epoch": 0.869400485640124, "grad_norm": 0.3701668977737427, "learning_rate": 0.00016137718670231554, "loss": 11.665, "step": 41533 }, { "epoch": 0.869421418404086, "grad_norm": 0.32956215739250183, "learning_rate": 0.00016137545571917695, "loss": 11.6869, "step": 41534 }, { "epoch": 0.8694423511680482, "grad_norm": 0.38620907068252563, "learning_rate": 0.00016137372470653393, "loss": 11.6683, "step": 41535 }, { "epoch": 0.8694632839320103, "grad_norm": 0.28618305921554565, "learning_rate": 0.00016137199366438734, "loss": 11.6679, "step": 41536 }, { "epoch": 0.8694842166959725, "grad_norm": 0.3080163598060608, "learning_rate": 0.00016137026259273798, "loss": 11.6845, "step": 41537 }, { "epoch": 0.8695051494599347, "grad_norm": 0.3041781783103943, "learning_rate": 0.00016136853149158672, "loss": 11.6433, "step": 41538 }, { "epoch": 0.8695260822238968, "grad_norm": 0.3745245933532715, "learning_rate": 0.0001613668003609344, "loss": 11.6654, "step": 41539 }, { "epoch": 0.869547014987859, "grad_norm": 0.25953197479248047, "learning_rate": 0.0001613650692007818, "loss": 11.6563, "step": 41540 }, { "epoch": 0.8695679477518211, "grad_norm": 0.31507834792137146, "learning_rate": 0.00016136333801112978, "loss": 11.6735, "step": 41541 }, { "epoch": 0.8695888805157833, "grad_norm": 0.2659655213356018, "learning_rate": 0.00016136160679197921, "loss": 11.6605, "step": 41542 }, { "epoch": 0.8696098132797454, "grad_norm": 0.3020716905593872, "learning_rate": 0.00016135987554333084, "loss": 11.674, "step": 41543 }, { "epoch": 0.8696307460437076, "grad_norm": 0.35593080520629883, "learning_rate": 0.00016135814426518555, "loss": 11.6608, "step": 41544 }, { "epoch": 0.8696516788076698, "grad_norm": 0.3910183012485504, "learning_rate": 0.0001613564129575442, "loss": 11.6546, "step": 41545 }, { "epoch": 0.8696726115716319, "grad_norm": 0.3121766448020935, "learning_rate": 0.00016135468162040758, "loss": 11.6719, "step": 41546 }, { "epoch": 0.8696935443355941, "grad_norm": 0.31138065457344055, "learning_rate": 0.00016135295025377654, "loss": 11.6503, "step": 41547 }, { "epoch": 0.8697144770995562, "grad_norm": 0.3188267648220062, "learning_rate": 0.0001613512188576519, "loss": 11.6721, "step": 41548 }, { "epoch": 0.8697354098635184, "grad_norm": 0.30248701572418213, "learning_rate": 0.00016134948743203452, "loss": 11.6596, "step": 41549 }, { "epoch": 0.8697563426274806, "grad_norm": 0.35272178053855896, "learning_rate": 0.00016134775597692518, "loss": 11.6658, "step": 41550 }, { "epoch": 0.8697772753914427, "grad_norm": 0.3459095358848572, "learning_rate": 0.00016134602449232474, "loss": 11.6781, "step": 41551 }, { "epoch": 0.8697982081554049, "grad_norm": 0.23297958076000214, "learning_rate": 0.00016134429297823405, "loss": 11.6611, "step": 41552 }, { "epoch": 0.869819140919367, "grad_norm": 0.30559325218200684, "learning_rate": 0.00016134256143465395, "loss": 11.6756, "step": 41553 }, { "epoch": 0.8698400736833292, "grad_norm": 0.3017711639404297, "learning_rate": 0.00016134082986158524, "loss": 11.6586, "step": 41554 }, { "epoch": 0.8698610064472913, "grad_norm": 0.2914983034133911, "learning_rate": 0.00016133909825902874, "loss": 11.6772, "step": 41555 }, { "epoch": 0.8698819392112535, "grad_norm": 0.2332545965909958, "learning_rate": 0.00016133736662698537, "loss": 11.6741, "step": 41556 }, { "epoch": 0.8699028719752157, "grad_norm": 0.29393574595451355, "learning_rate": 0.00016133563496545584, "loss": 11.6622, "step": 41557 }, { "epoch": 0.8699238047391777, "grad_norm": 0.32295170426368713, "learning_rate": 0.00016133390327444102, "loss": 11.6768, "step": 41558 }, { "epoch": 0.8699447375031399, "grad_norm": 0.3596092760562897, "learning_rate": 0.00016133217155394182, "loss": 11.6848, "step": 41559 }, { "epoch": 0.869965670267102, "grad_norm": 0.29875439405441284, "learning_rate": 0.00016133043980395901, "loss": 11.6529, "step": 41560 }, { "epoch": 0.8699866030310642, "grad_norm": 0.2786765396595001, "learning_rate": 0.00016132870802449342, "loss": 11.6655, "step": 41561 }, { "epoch": 0.8700075357950263, "grad_norm": 0.3700396418571472, "learning_rate": 0.00016132697621554587, "loss": 11.6804, "step": 41562 }, { "epoch": 0.8700284685589885, "grad_norm": 0.27975407242774963, "learning_rate": 0.00016132524437711723, "loss": 11.6568, "step": 41563 }, { "epoch": 0.8700494013229507, "grad_norm": 0.29016953706741333, "learning_rate": 0.0001613235125092083, "loss": 11.65, "step": 41564 }, { "epoch": 0.8700703340869128, "grad_norm": 0.32392486929893494, "learning_rate": 0.00016132178061181994, "loss": 11.6616, "step": 41565 }, { "epoch": 0.870091266850875, "grad_norm": 0.28889381885528564, "learning_rate": 0.000161320048684953, "loss": 11.6871, "step": 41566 }, { "epoch": 0.8701121996148371, "grad_norm": 0.2579106390476227, "learning_rate": 0.00016131831672860824, "loss": 11.6726, "step": 41567 }, { "epoch": 0.8701331323787993, "grad_norm": 0.3657260239124298, "learning_rate": 0.00016131658474278658, "loss": 11.6761, "step": 41568 }, { "epoch": 0.8701540651427615, "grad_norm": 0.2552708089351654, "learning_rate": 0.00016131485272748877, "loss": 11.6544, "step": 41569 }, { "epoch": 0.8701749979067236, "grad_norm": 0.4157559871673584, "learning_rate": 0.0001613131206827157, "loss": 11.6767, "step": 41570 }, { "epoch": 0.8701959306706858, "grad_norm": 0.29196834564208984, "learning_rate": 0.00016131138860846816, "loss": 11.6583, "step": 41571 }, { "epoch": 0.8702168634346479, "grad_norm": 0.2790775001049042, "learning_rate": 0.00016130965650474702, "loss": 11.6683, "step": 41572 }, { "epoch": 0.8702377961986101, "grad_norm": 0.2827301621437073, "learning_rate": 0.00016130792437155314, "loss": 11.6645, "step": 41573 }, { "epoch": 0.8702587289625722, "grad_norm": 0.30887123942375183, "learning_rate": 0.00016130619220888726, "loss": 11.6686, "step": 41574 }, { "epoch": 0.8702796617265344, "grad_norm": 0.35792234539985657, "learning_rate": 0.00016130446001675025, "loss": 11.6687, "step": 41575 }, { "epoch": 0.8703005944904966, "grad_norm": 0.33739611506462097, "learning_rate": 0.00016130272779514302, "loss": 11.6686, "step": 41576 }, { "epoch": 0.8703215272544587, "grad_norm": 0.2388240098953247, "learning_rate": 0.0001613009955440663, "loss": 11.6535, "step": 41577 }, { "epoch": 0.8703424600184209, "grad_norm": 0.35718604922294617, "learning_rate": 0.00016129926326352098, "loss": 11.6655, "step": 41578 }, { "epoch": 0.870363392782383, "grad_norm": 0.3811154067516327, "learning_rate": 0.00016129753095350786, "loss": 11.6887, "step": 41579 }, { "epoch": 0.8703843255463452, "grad_norm": 0.26992693543434143, "learning_rate": 0.00016129579861402777, "loss": 11.6647, "step": 41580 }, { "epoch": 0.8704052583103072, "grad_norm": 0.3610212802886963, "learning_rate": 0.0001612940662450816, "loss": 11.6682, "step": 41581 }, { "epoch": 0.8704261910742694, "grad_norm": 0.31851646304130554, "learning_rate": 0.00016129233384667012, "loss": 11.6577, "step": 41582 }, { "epoch": 0.8704471238382316, "grad_norm": 0.35601136088371277, "learning_rate": 0.0001612906014187942, "loss": 11.6733, "step": 41583 }, { "epoch": 0.8704680566021937, "grad_norm": 0.3631073236465454, "learning_rate": 0.00016128886896145468, "loss": 11.679, "step": 41584 }, { "epoch": 0.8704889893661559, "grad_norm": 0.2730477452278137, "learning_rate": 0.00016128713647465236, "loss": 11.6758, "step": 41585 }, { "epoch": 0.870509922130118, "grad_norm": 0.2792711853981018, "learning_rate": 0.00016128540395838806, "loss": 11.6727, "step": 41586 }, { "epoch": 0.8705308548940802, "grad_norm": 0.29875972867012024, "learning_rate": 0.00016128367141266266, "loss": 11.6565, "step": 41587 }, { "epoch": 0.8705517876580424, "grad_norm": 0.3656333386898041, "learning_rate": 0.000161281938837477, "loss": 11.6678, "step": 41588 }, { "epoch": 0.8705727204220045, "grad_norm": 0.27831169962882996, "learning_rate": 0.0001612802062328318, "loss": 11.6755, "step": 41589 }, { "epoch": 0.8705936531859667, "grad_norm": 0.2674487829208374, "learning_rate": 0.00016127847359872805, "loss": 11.6663, "step": 41590 }, { "epoch": 0.8706145859499288, "grad_norm": 0.2869369089603424, "learning_rate": 0.00016127674093516646, "loss": 11.6719, "step": 41591 }, { "epoch": 0.870635518713891, "grad_norm": 0.33078640699386597, "learning_rate": 0.00016127500824214796, "loss": 11.6676, "step": 41592 }, { "epoch": 0.8706564514778531, "grad_norm": 0.30321261286735535, "learning_rate": 0.00016127327551967332, "loss": 11.6672, "step": 41593 }, { "epoch": 0.8706773842418153, "grad_norm": 0.3037579655647278, "learning_rate": 0.00016127154276774337, "loss": 11.6576, "step": 41594 }, { "epoch": 0.8706983170057775, "grad_norm": 0.33651936054229736, "learning_rate": 0.00016126980998635898, "loss": 11.6705, "step": 41595 }, { "epoch": 0.8707192497697396, "grad_norm": 0.3303879201412201, "learning_rate": 0.00016126807717552094, "loss": 11.6527, "step": 41596 }, { "epoch": 0.8707401825337018, "grad_norm": 0.28696197271347046, "learning_rate": 0.0001612663443352301, "loss": 11.6632, "step": 41597 }, { "epoch": 0.8707611152976639, "grad_norm": 0.2608688175678253, "learning_rate": 0.00016126461146548737, "loss": 11.6573, "step": 41598 }, { "epoch": 0.8707820480616261, "grad_norm": 0.3687589764595032, "learning_rate": 0.00016126287856629346, "loss": 11.6626, "step": 41599 }, { "epoch": 0.8708029808255882, "grad_norm": 0.3020657002925873, "learning_rate": 0.00016126114563764927, "loss": 11.6807, "step": 41600 }, { "epoch": 0.8708239135895504, "grad_norm": 0.44925814867019653, "learning_rate": 0.0001612594126795556, "loss": 11.6548, "step": 41601 }, { "epoch": 0.8708448463535126, "grad_norm": 0.26925304532051086, "learning_rate": 0.00016125767969201333, "loss": 11.6742, "step": 41602 }, { "epoch": 0.8708657791174746, "grad_norm": 0.2897508144378662, "learning_rate": 0.00016125594667502327, "loss": 11.6735, "step": 41603 }, { "epoch": 0.8708867118814368, "grad_norm": 0.23880918323993683, "learning_rate": 0.00016125421362858623, "loss": 11.6802, "step": 41604 }, { "epoch": 0.8709076446453989, "grad_norm": 0.4357987642288208, "learning_rate": 0.00016125248055270307, "loss": 11.6946, "step": 41605 }, { "epoch": 0.8709285774093611, "grad_norm": 0.29751673340797424, "learning_rate": 0.0001612507474473746, "loss": 11.6833, "step": 41606 }, { "epoch": 0.8709495101733233, "grad_norm": 0.36800962686538696, "learning_rate": 0.00016124901431260167, "loss": 11.6819, "step": 41607 }, { "epoch": 0.8709704429372854, "grad_norm": 0.29922518134117126, "learning_rate": 0.00016124728114838511, "loss": 11.6769, "step": 41608 }, { "epoch": 0.8709913757012476, "grad_norm": 0.3170180916786194, "learning_rate": 0.00016124554795472577, "loss": 11.6601, "step": 41609 }, { "epoch": 0.8710123084652097, "grad_norm": 0.303254097700119, "learning_rate": 0.00016124381473162447, "loss": 11.6654, "step": 41610 }, { "epoch": 0.8710332412291719, "grad_norm": 0.2930491864681244, "learning_rate": 0.00016124208147908205, "loss": 11.6587, "step": 41611 }, { "epoch": 0.871054173993134, "grad_norm": 0.31312939524650574, "learning_rate": 0.0001612403481970993, "loss": 11.6724, "step": 41612 }, { "epoch": 0.8710751067570962, "grad_norm": 0.34902817010879517, "learning_rate": 0.0001612386148856771, "loss": 11.6557, "step": 41613 }, { "epoch": 0.8710960395210584, "grad_norm": 0.39601171016693115, "learning_rate": 0.00016123688154481627, "loss": 11.6744, "step": 41614 }, { "epoch": 0.8711169722850205, "grad_norm": 0.37453556060791016, "learning_rate": 0.00016123514817451766, "loss": 11.6703, "step": 41615 }, { "epoch": 0.8711379050489827, "grad_norm": 0.33591416478157043, "learning_rate": 0.00016123341477478206, "loss": 11.6704, "step": 41616 }, { "epoch": 0.8711588378129448, "grad_norm": 0.29099443554878235, "learning_rate": 0.00016123168134561037, "loss": 11.6784, "step": 41617 }, { "epoch": 0.871179770576907, "grad_norm": 0.32274964451789856, "learning_rate": 0.00016122994788700333, "loss": 11.6699, "step": 41618 }, { "epoch": 0.8712007033408691, "grad_norm": 0.42145803570747375, "learning_rate": 0.00016122821439896185, "loss": 11.7066, "step": 41619 }, { "epoch": 0.8712216361048313, "grad_norm": 0.26305723190307617, "learning_rate": 0.00016122648088148678, "loss": 11.6664, "step": 41620 }, { "epoch": 0.8712425688687935, "grad_norm": 0.3464835584163666, "learning_rate": 0.00016122474733457886, "loss": 11.6729, "step": 41621 }, { "epoch": 0.8712635016327556, "grad_norm": 0.26149311661720276, "learning_rate": 0.00016122301375823904, "loss": 11.6747, "step": 41622 }, { "epoch": 0.8712844343967178, "grad_norm": 0.3944241404533386, "learning_rate": 0.000161221280152468, "loss": 11.6697, "step": 41623 }, { "epoch": 0.8713053671606799, "grad_norm": 0.2570081651210785, "learning_rate": 0.0001612195465172667, "loss": 11.6425, "step": 41624 }, { "epoch": 0.8713262999246421, "grad_norm": 0.36394384503364563, "learning_rate": 0.00016121781285263596, "loss": 11.6536, "step": 41625 }, { "epoch": 0.8713472326886043, "grad_norm": 0.2635335624217987, "learning_rate": 0.00016121607915857658, "loss": 11.6595, "step": 41626 }, { "epoch": 0.8713681654525663, "grad_norm": 0.2623918950557709, "learning_rate": 0.00016121434543508943, "loss": 11.6627, "step": 41627 }, { "epoch": 0.8713890982165285, "grad_norm": 0.3203413188457489, "learning_rate": 0.00016121261168217528, "loss": 11.6808, "step": 41628 }, { "epoch": 0.8714100309804906, "grad_norm": 0.27675122022628784, "learning_rate": 0.00016121087789983503, "loss": 11.6799, "step": 41629 }, { "epoch": 0.8714309637444528, "grad_norm": 0.27620118856430054, "learning_rate": 0.00016120914408806946, "loss": 11.6768, "step": 41630 }, { "epoch": 0.8714518965084149, "grad_norm": 0.30238616466522217, "learning_rate": 0.00016120741024687943, "loss": 11.6675, "step": 41631 }, { "epoch": 0.8714728292723771, "grad_norm": 0.42116227746009827, "learning_rate": 0.00016120567637626575, "loss": 11.6739, "step": 41632 }, { "epoch": 0.8714937620363393, "grad_norm": 0.2495170533657074, "learning_rate": 0.0001612039424762293, "loss": 11.6573, "step": 41633 }, { "epoch": 0.8715146948003014, "grad_norm": 0.38026610016822815, "learning_rate": 0.0001612022085467709, "loss": 11.6611, "step": 41634 }, { "epoch": 0.8715356275642636, "grad_norm": 0.3154759705066681, "learning_rate": 0.00016120047458789135, "loss": 11.6608, "step": 41635 }, { "epoch": 0.8715565603282257, "grad_norm": 0.35186269879341125, "learning_rate": 0.0001611987405995915, "loss": 11.6577, "step": 41636 }, { "epoch": 0.8715774930921879, "grad_norm": 0.3400593400001526, "learning_rate": 0.00016119700658187225, "loss": 11.6665, "step": 41637 }, { "epoch": 0.87159842585615, "grad_norm": 0.297410786151886, "learning_rate": 0.00016119527253473433, "loss": 11.6632, "step": 41638 }, { "epoch": 0.8716193586201122, "grad_norm": 0.2861230969429016, "learning_rate": 0.00016119353845817858, "loss": 11.6819, "step": 41639 }, { "epoch": 0.8716402913840744, "grad_norm": 0.2872699201107025, "learning_rate": 0.00016119180435220592, "loss": 11.6719, "step": 41640 }, { "epoch": 0.8716612241480365, "grad_norm": 0.46683573722839355, "learning_rate": 0.0001611900702168171, "loss": 11.6689, "step": 41641 }, { "epoch": 0.8716821569119987, "grad_norm": 0.26040634512901306, "learning_rate": 0.00016118833605201303, "loss": 11.665, "step": 41642 }, { "epoch": 0.8717030896759608, "grad_norm": 0.3034731149673462, "learning_rate": 0.00016118660185779446, "loss": 11.6558, "step": 41643 }, { "epoch": 0.871724022439923, "grad_norm": 0.26342958211898804, "learning_rate": 0.0001611848676341623, "loss": 11.6559, "step": 41644 }, { "epoch": 0.8717449552038852, "grad_norm": 0.33095020055770874, "learning_rate": 0.0001611831333811173, "loss": 11.6645, "step": 41645 }, { "epoch": 0.8717658879678473, "grad_norm": 0.3606835901737213, "learning_rate": 0.00016118139909866036, "loss": 11.6702, "step": 41646 }, { "epoch": 0.8717868207318095, "grad_norm": 0.5527911186218262, "learning_rate": 0.00016117966478679233, "loss": 11.6639, "step": 41647 }, { "epoch": 0.8718077534957716, "grad_norm": 0.2567732036113739, "learning_rate": 0.00016117793044551396, "loss": 11.6699, "step": 41648 }, { "epoch": 0.8718286862597338, "grad_norm": 0.3636413514614105, "learning_rate": 0.00016117619607482618, "loss": 11.686, "step": 41649 }, { "epoch": 0.8718496190236958, "grad_norm": 0.3279118835926056, "learning_rate": 0.00016117446167472974, "loss": 11.6729, "step": 41650 }, { "epoch": 0.871870551787658, "grad_norm": 0.35594579577445984, "learning_rate": 0.00016117272724522553, "loss": 11.6637, "step": 41651 }, { "epoch": 0.8718914845516202, "grad_norm": 0.28541019558906555, "learning_rate": 0.00016117099278631434, "loss": 11.6608, "step": 41652 }, { "epoch": 0.8719124173155823, "grad_norm": 0.25298672914505005, "learning_rate": 0.00016116925829799705, "loss": 11.6605, "step": 41653 }, { "epoch": 0.8719333500795445, "grad_norm": 0.2841677665710449, "learning_rate": 0.00016116752378027445, "loss": 11.6731, "step": 41654 }, { "epoch": 0.8719542828435066, "grad_norm": 0.33298584818840027, "learning_rate": 0.00016116578923314743, "loss": 11.6686, "step": 41655 }, { "epoch": 0.8719752156074688, "grad_norm": 0.2834276854991913, "learning_rate": 0.00016116405465661676, "loss": 11.6682, "step": 41656 }, { "epoch": 0.8719961483714309, "grad_norm": 0.6351554989814758, "learning_rate": 0.00016116232005068332, "loss": 11.6566, "step": 41657 }, { "epoch": 0.8720170811353931, "grad_norm": 0.3132832944393158, "learning_rate": 0.0001611605854153479, "loss": 11.6498, "step": 41658 }, { "epoch": 0.8720380138993553, "grad_norm": 0.29865744709968567, "learning_rate": 0.0001611588507506114, "loss": 11.6679, "step": 41659 }, { "epoch": 0.8720589466633174, "grad_norm": 0.3019966781139374, "learning_rate": 0.0001611571160564746, "loss": 11.6657, "step": 41660 }, { "epoch": 0.8720798794272796, "grad_norm": 0.3987233340740204, "learning_rate": 0.00016115538133293835, "loss": 11.6742, "step": 41661 }, { "epoch": 0.8721008121912417, "grad_norm": 0.29025202989578247, "learning_rate": 0.00016115364658000347, "loss": 11.6854, "step": 41662 }, { "epoch": 0.8721217449552039, "grad_norm": 0.29655545949935913, "learning_rate": 0.0001611519117976708, "loss": 11.6561, "step": 41663 }, { "epoch": 0.8721426777191661, "grad_norm": 0.34995272755622864, "learning_rate": 0.00016115017698594122, "loss": 11.6623, "step": 41664 }, { "epoch": 0.8721636104831282, "grad_norm": 0.28792667388916016, "learning_rate": 0.00016114844214481548, "loss": 11.6733, "step": 41665 }, { "epoch": 0.8721845432470904, "grad_norm": 0.3364354372024536, "learning_rate": 0.00016114670727429448, "loss": 11.6848, "step": 41666 }, { "epoch": 0.8722054760110525, "grad_norm": 0.3181370496749878, "learning_rate": 0.00016114497237437901, "loss": 11.6825, "step": 41667 }, { "epoch": 0.8722264087750147, "grad_norm": 0.24462780356407166, "learning_rate": 0.00016114323744506997, "loss": 11.6701, "step": 41668 }, { "epoch": 0.8722473415389768, "grad_norm": 0.31700387597084045, "learning_rate": 0.00016114150248636814, "loss": 11.6768, "step": 41669 }, { "epoch": 0.872268274302939, "grad_norm": 0.32229092717170715, "learning_rate": 0.00016113976749827433, "loss": 11.6703, "step": 41670 }, { "epoch": 0.8722892070669012, "grad_norm": 0.333374947309494, "learning_rate": 0.00016113803248078945, "loss": 11.6724, "step": 41671 }, { "epoch": 0.8723101398308633, "grad_norm": 0.3122044503688812, "learning_rate": 0.00016113629743391426, "loss": 11.6524, "step": 41672 }, { "epoch": 0.8723310725948255, "grad_norm": 0.4015822112560272, "learning_rate": 0.00016113456235764963, "loss": 11.6862, "step": 41673 }, { "epoch": 0.8723520053587875, "grad_norm": 0.3449178636074066, "learning_rate": 0.0001611328272519964, "loss": 11.6823, "step": 41674 }, { "epoch": 0.8723729381227497, "grad_norm": 0.3804072439670563, "learning_rate": 0.0001611310921169554, "loss": 11.6747, "step": 41675 }, { "epoch": 0.8723938708867118, "grad_norm": 0.3925292193889618, "learning_rate": 0.00016112935695252747, "loss": 11.6516, "step": 41676 }, { "epoch": 0.872414803650674, "grad_norm": 0.28468766808509827, "learning_rate": 0.00016112762175871342, "loss": 11.6819, "step": 41677 }, { "epoch": 0.8724357364146362, "grad_norm": 0.3587043881416321, "learning_rate": 0.00016112588653551408, "loss": 11.6881, "step": 41678 }, { "epoch": 0.8724566691785983, "grad_norm": 0.418923556804657, "learning_rate": 0.0001611241512829303, "loss": 11.6759, "step": 41679 }, { "epoch": 0.8724776019425605, "grad_norm": 0.30273494124412537, "learning_rate": 0.00016112241600096293, "loss": 11.6786, "step": 41680 }, { "epoch": 0.8724985347065226, "grad_norm": 0.27734893560409546, "learning_rate": 0.0001611206806896128, "loss": 11.6746, "step": 41681 }, { "epoch": 0.8725194674704848, "grad_norm": 0.5724149942398071, "learning_rate": 0.00016111894534888072, "loss": 11.6609, "step": 41682 }, { "epoch": 0.872540400234447, "grad_norm": 0.3232153654098511, "learning_rate": 0.00016111720997876756, "loss": 11.669, "step": 41683 }, { "epoch": 0.8725613329984091, "grad_norm": 0.3132134675979614, "learning_rate": 0.0001611154745792741, "loss": 11.6717, "step": 41684 }, { "epoch": 0.8725822657623713, "grad_norm": 0.2755168378353119, "learning_rate": 0.00016111373915040123, "loss": 11.6514, "step": 41685 }, { "epoch": 0.8726031985263334, "grad_norm": 0.4063432812690735, "learning_rate": 0.00016111200369214976, "loss": 11.6728, "step": 41686 }, { "epoch": 0.8726241312902956, "grad_norm": 1.7221697568893433, "learning_rate": 0.0001611102682045205, "loss": 11.6459, "step": 41687 }, { "epoch": 0.8726450640542577, "grad_norm": 0.31147250533103943, "learning_rate": 0.00016110853268751435, "loss": 11.672, "step": 41688 }, { "epoch": 0.8726659968182199, "grad_norm": 0.27448415756225586, "learning_rate": 0.00016110679714113207, "loss": 11.6633, "step": 41689 }, { "epoch": 0.8726869295821821, "grad_norm": 0.2760554552078247, "learning_rate": 0.00016110506156537453, "loss": 11.6603, "step": 41690 }, { "epoch": 0.8727078623461442, "grad_norm": 0.310386061668396, "learning_rate": 0.00016110332596024258, "loss": 11.6655, "step": 41691 }, { "epoch": 0.8727287951101064, "grad_norm": 0.3118649125099182, "learning_rate": 0.00016110159032573703, "loss": 11.6535, "step": 41692 }, { "epoch": 0.8727497278740685, "grad_norm": 0.27959543466567993, "learning_rate": 0.00016109985466185872, "loss": 11.6614, "step": 41693 }, { "epoch": 0.8727706606380307, "grad_norm": 0.33430081605911255, "learning_rate": 0.00016109811896860846, "loss": 11.664, "step": 41694 }, { "epoch": 0.8727915934019927, "grad_norm": 0.2752947509288788, "learning_rate": 0.00016109638324598717, "loss": 11.6909, "step": 41695 }, { "epoch": 0.872812526165955, "grad_norm": 0.35221225023269653, "learning_rate": 0.00016109464749399557, "loss": 11.6625, "step": 41696 }, { "epoch": 0.8728334589299171, "grad_norm": 0.30546829104423523, "learning_rate": 0.00016109291171263456, "loss": 11.6727, "step": 41697 }, { "epoch": 0.8728543916938792, "grad_norm": 0.3118620216846466, "learning_rate": 0.00016109117590190497, "loss": 11.6665, "step": 41698 }, { "epoch": 0.8728753244578414, "grad_norm": 0.34776151180267334, "learning_rate": 0.0001610894400618076, "loss": 11.6734, "step": 41699 }, { "epoch": 0.8728962572218035, "grad_norm": 0.3641725778579712, "learning_rate": 0.00016108770419234338, "loss": 11.6591, "step": 41700 }, { "epoch": 0.8729171899857657, "grad_norm": 0.33862558007240295, "learning_rate": 0.00016108596829351305, "loss": 11.6889, "step": 41701 }, { "epoch": 0.8729381227497279, "grad_norm": 0.2824077904224396, "learning_rate": 0.00016108423236531742, "loss": 11.6632, "step": 41702 }, { "epoch": 0.87295905551369, "grad_norm": 0.28038105368614197, "learning_rate": 0.00016108249640775743, "loss": 11.6734, "step": 41703 }, { "epoch": 0.8729799882776522, "grad_norm": 0.29206666350364685, "learning_rate": 0.00016108076042083385, "loss": 11.6685, "step": 41704 }, { "epoch": 0.8730009210416143, "grad_norm": 0.43907949328422546, "learning_rate": 0.0001610790244045475, "loss": 11.6582, "step": 41705 }, { "epoch": 0.8730218538055765, "grad_norm": 0.5272208452224731, "learning_rate": 0.00016107728835889926, "loss": 11.664, "step": 41706 }, { "epoch": 0.8730427865695386, "grad_norm": 0.34615153074264526, "learning_rate": 0.0001610755522838899, "loss": 11.6648, "step": 41707 }, { "epoch": 0.8730637193335008, "grad_norm": 0.2523624897003174, "learning_rate": 0.00016107381617952037, "loss": 11.6606, "step": 41708 }, { "epoch": 0.873084652097463, "grad_norm": 0.44341567158699036, "learning_rate": 0.0001610720800457914, "loss": 11.6658, "step": 41709 }, { "epoch": 0.8731055848614251, "grad_norm": 0.4375672936439514, "learning_rate": 0.00016107034388270386, "loss": 11.6829, "step": 41710 }, { "epoch": 0.8731265176253873, "grad_norm": 0.30337971448898315, "learning_rate": 0.00016106860769025854, "loss": 11.6679, "step": 41711 }, { "epoch": 0.8731474503893494, "grad_norm": 0.2803330719470978, "learning_rate": 0.00016106687146845636, "loss": 11.664, "step": 41712 }, { "epoch": 0.8731683831533116, "grad_norm": 0.39934858679771423, "learning_rate": 0.00016106513521729814, "loss": 11.6774, "step": 41713 }, { "epoch": 0.8731893159172737, "grad_norm": 0.34355974197387695, "learning_rate": 0.00016106339893678465, "loss": 11.6797, "step": 41714 }, { "epoch": 0.8732102486812359, "grad_norm": 0.3636171221733093, "learning_rate": 0.00016106166262691676, "loss": 11.6707, "step": 41715 }, { "epoch": 0.8732311814451981, "grad_norm": 0.28603625297546387, "learning_rate": 0.00016105992628769527, "loss": 11.6616, "step": 41716 }, { "epoch": 0.8732521142091602, "grad_norm": 0.31399786472320557, "learning_rate": 0.00016105818991912112, "loss": 11.6543, "step": 41717 }, { "epoch": 0.8732730469731224, "grad_norm": 0.27485671639442444, "learning_rate": 0.00016105645352119504, "loss": 11.6795, "step": 41718 }, { "epoch": 0.8732939797370844, "grad_norm": 0.3105294108390808, "learning_rate": 0.00016105471709391788, "loss": 11.6611, "step": 41719 }, { "epoch": 0.8733149125010466, "grad_norm": 0.35457244515419006, "learning_rate": 0.00016105298063729052, "loss": 11.6744, "step": 41720 }, { "epoch": 0.8733358452650087, "grad_norm": 0.26445287466049194, "learning_rate": 0.00016105124415131375, "loss": 11.662, "step": 41721 }, { "epoch": 0.8733567780289709, "grad_norm": 0.3007584512233734, "learning_rate": 0.00016104950763598844, "loss": 11.6406, "step": 41722 }, { "epoch": 0.8733777107929331, "grad_norm": 0.2647814452648163, "learning_rate": 0.0001610477710913154, "loss": 11.6744, "step": 41723 }, { "epoch": 0.8733986435568952, "grad_norm": 0.3234645426273346, "learning_rate": 0.0001610460345172955, "loss": 11.6871, "step": 41724 }, { "epoch": 0.8734195763208574, "grad_norm": 0.3090822994709015, "learning_rate": 0.00016104429791392955, "loss": 11.6779, "step": 41725 }, { "epoch": 0.8734405090848195, "grad_norm": 0.37200307846069336, "learning_rate": 0.00016104256128121833, "loss": 11.6676, "step": 41726 }, { "epoch": 0.8734614418487817, "grad_norm": 0.30464136600494385, "learning_rate": 0.00016104082461916277, "loss": 11.6683, "step": 41727 }, { "epoch": 0.8734823746127439, "grad_norm": 0.3055204153060913, "learning_rate": 0.00016103908792776362, "loss": 11.6776, "step": 41728 }, { "epoch": 0.873503307376706, "grad_norm": 0.30461135506629944, "learning_rate": 0.0001610373512070218, "loss": 11.6668, "step": 41729 }, { "epoch": 0.8735242401406682, "grad_norm": 0.3181900084018707, "learning_rate": 0.0001610356144569381, "loss": 11.6573, "step": 41730 }, { "epoch": 0.8735451729046303, "grad_norm": 0.28355085849761963, "learning_rate": 0.0001610338776775133, "loss": 11.6724, "step": 41731 }, { "epoch": 0.8735661056685925, "grad_norm": 0.32019951939582825, "learning_rate": 0.0001610321408687484, "loss": 11.6832, "step": 41732 }, { "epoch": 0.8735870384325546, "grad_norm": 0.33347296714782715, "learning_rate": 0.00016103040403064403, "loss": 11.6797, "step": 41733 }, { "epoch": 0.8736079711965168, "grad_norm": 0.2986264228820801, "learning_rate": 0.00016102866716320114, "loss": 11.6695, "step": 41734 }, { "epoch": 0.873628903960479, "grad_norm": 0.3044898509979248, "learning_rate": 0.0001610269302664206, "loss": 11.6795, "step": 41735 }, { "epoch": 0.8736498367244411, "grad_norm": 0.3421670198440552, "learning_rate": 0.00016102519334030313, "loss": 11.6912, "step": 41736 }, { "epoch": 0.8736707694884033, "grad_norm": 0.29612085223197937, "learning_rate": 0.00016102345638484966, "loss": 11.67, "step": 41737 }, { "epoch": 0.8736917022523654, "grad_norm": 0.2696302831172943, "learning_rate": 0.00016102171940006097, "loss": 11.6671, "step": 41738 }, { "epoch": 0.8737126350163276, "grad_norm": 0.3374011516571045, "learning_rate": 0.0001610199823859379, "loss": 11.6755, "step": 41739 }, { "epoch": 0.8737335677802897, "grad_norm": 0.2689286768436432, "learning_rate": 0.00016101824534248135, "loss": 11.6701, "step": 41740 }, { "epoch": 0.8737545005442519, "grad_norm": 0.25269344449043274, "learning_rate": 0.00016101650826969209, "loss": 11.6541, "step": 41741 }, { "epoch": 0.8737754333082141, "grad_norm": 0.3110867142677307, "learning_rate": 0.00016101477116757098, "loss": 11.6645, "step": 41742 }, { "epoch": 0.8737963660721761, "grad_norm": 0.31417930126190186, "learning_rate": 0.0001610130340361188, "loss": 11.6608, "step": 41743 }, { "epoch": 0.8738172988361383, "grad_norm": 0.3254128098487854, "learning_rate": 0.00016101129687533648, "loss": 11.6718, "step": 41744 }, { "epoch": 0.8738382316001004, "grad_norm": 0.3619362711906433, "learning_rate": 0.0001610095596852248, "loss": 11.678, "step": 41745 }, { "epoch": 0.8738591643640626, "grad_norm": 0.2962490916252136, "learning_rate": 0.0001610078224657846, "loss": 11.6511, "step": 41746 }, { "epoch": 0.8738800971280248, "grad_norm": 0.26785194873809814, "learning_rate": 0.00016100608521701673, "loss": 11.6567, "step": 41747 }, { "epoch": 0.8739010298919869, "grad_norm": 0.3390181064605713, "learning_rate": 0.00016100434793892197, "loss": 11.673, "step": 41748 }, { "epoch": 0.8739219626559491, "grad_norm": 0.32238563895225525, "learning_rate": 0.00016100261063150124, "loss": 11.6694, "step": 41749 }, { "epoch": 0.8739428954199112, "grad_norm": 0.33505406975746155, "learning_rate": 0.00016100087329475531, "loss": 11.6691, "step": 41750 }, { "epoch": 0.8739638281838734, "grad_norm": 0.29908525943756104, "learning_rate": 0.00016099913592868502, "loss": 11.6653, "step": 41751 }, { "epoch": 0.8739847609478355, "grad_norm": 0.3149355351924896, "learning_rate": 0.00016099739853329126, "loss": 11.6666, "step": 41752 }, { "epoch": 0.8740056937117977, "grad_norm": 0.2778174877166748, "learning_rate": 0.0001609956611085748, "loss": 11.6661, "step": 41753 }, { "epoch": 0.8740266264757599, "grad_norm": 0.34494712948799133, "learning_rate": 0.00016099392365453654, "loss": 11.6704, "step": 41754 }, { "epoch": 0.874047559239722, "grad_norm": 0.31904521584510803, "learning_rate": 0.00016099218617117724, "loss": 11.6776, "step": 41755 }, { "epoch": 0.8740684920036842, "grad_norm": 0.3130086064338684, "learning_rate": 0.0001609904486584978, "loss": 11.6717, "step": 41756 }, { "epoch": 0.8740894247676463, "grad_norm": 0.40155988931655884, "learning_rate": 0.000160988711116499, "loss": 11.6651, "step": 41757 }, { "epoch": 0.8741103575316085, "grad_norm": 0.3445049226284027, "learning_rate": 0.00016098697354518173, "loss": 11.6497, "step": 41758 }, { "epoch": 0.8741312902955706, "grad_norm": 0.3358789086341858, "learning_rate": 0.0001609852359445468, "loss": 11.6732, "step": 41759 }, { "epoch": 0.8741522230595328, "grad_norm": 0.3224950134754181, "learning_rate": 0.00016098349831459503, "loss": 11.6655, "step": 41760 }, { "epoch": 0.874173155823495, "grad_norm": 0.3049129247665405, "learning_rate": 0.00016098176065532725, "loss": 11.669, "step": 41761 }, { "epoch": 0.8741940885874571, "grad_norm": 0.2819889187812805, "learning_rate": 0.00016098002296674436, "loss": 11.6484, "step": 41762 }, { "epoch": 0.8742150213514193, "grad_norm": 0.2545839250087738, "learning_rate": 0.00016097828524884716, "loss": 11.6644, "step": 41763 }, { "epoch": 0.8742359541153814, "grad_norm": 0.22857676446437836, "learning_rate": 0.00016097654750163643, "loss": 11.6573, "step": 41764 }, { "epoch": 0.8742568868793436, "grad_norm": 0.2243538796901703, "learning_rate": 0.00016097480972511309, "loss": 11.6684, "step": 41765 }, { "epoch": 0.8742778196433058, "grad_norm": 0.31374797224998474, "learning_rate": 0.0001609730719192779, "loss": 11.6737, "step": 41766 }, { "epoch": 0.8742987524072678, "grad_norm": 0.3586825728416443, "learning_rate": 0.00016097133408413177, "loss": 11.6666, "step": 41767 }, { "epoch": 0.87431968517123, "grad_norm": 0.310435950756073, "learning_rate": 0.00016096959621967547, "loss": 11.6766, "step": 41768 }, { "epoch": 0.8743406179351921, "grad_norm": 0.2944791913032532, "learning_rate": 0.00016096785832590986, "loss": 11.6801, "step": 41769 }, { "epoch": 0.8743615506991543, "grad_norm": 0.2983865737915039, "learning_rate": 0.00016096612040283578, "loss": 11.6619, "step": 41770 }, { "epoch": 0.8743824834631164, "grad_norm": 0.3387683629989624, "learning_rate": 0.00016096438245045408, "loss": 11.6776, "step": 41771 }, { "epoch": 0.8744034162270786, "grad_norm": 0.25395166873931885, "learning_rate": 0.00016096264446876558, "loss": 11.6636, "step": 41772 }, { "epoch": 0.8744243489910408, "grad_norm": 0.31665709614753723, "learning_rate": 0.0001609609064577711, "loss": 11.6771, "step": 41773 }, { "epoch": 0.8744452817550029, "grad_norm": 0.2942143976688385, "learning_rate": 0.00016095916841747153, "loss": 11.6663, "step": 41774 }, { "epoch": 0.8744662145189651, "grad_norm": 0.3306092321872711, "learning_rate": 0.00016095743034786762, "loss": 11.6733, "step": 41775 }, { "epoch": 0.8744871472829272, "grad_norm": 0.3338819742202759, "learning_rate": 0.00016095569224896026, "loss": 11.6693, "step": 41776 }, { "epoch": 0.8745080800468894, "grad_norm": 0.28402256965637207, "learning_rate": 0.0001609539541207503, "loss": 11.6752, "step": 41777 }, { "epoch": 0.8745290128108515, "grad_norm": 0.2765067517757416, "learning_rate": 0.00016095221596323852, "loss": 11.6735, "step": 41778 }, { "epoch": 0.8745499455748137, "grad_norm": 0.2828713655471802, "learning_rate": 0.00016095047777642583, "loss": 11.6778, "step": 41779 }, { "epoch": 0.8745708783387759, "grad_norm": 0.32279694080352783, "learning_rate": 0.00016094873956031298, "loss": 11.6745, "step": 41780 }, { "epoch": 0.874591811102738, "grad_norm": 0.3095073401927948, "learning_rate": 0.00016094700131490087, "loss": 11.6662, "step": 41781 }, { "epoch": 0.8746127438667002, "grad_norm": 0.3095453083515167, "learning_rate": 0.00016094526304019034, "loss": 11.6612, "step": 41782 }, { "epoch": 0.8746336766306623, "grad_norm": 0.31603720784187317, "learning_rate": 0.00016094352473618214, "loss": 11.6647, "step": 41783 }, { "epoch": 0.8746546093946245, "grad_norm": 0.2557670772075653, "learning_rate": 0.00016094178640287723, "loss": 11.6534, "step": 41784 }, { "epoch": 0.8746755421585867, "grad_norm": 0.338276743888855, "learning_rate": 0.00016094004804027633, "loss": 11.6853, "step": 41785 }, { "epoch": 0.8746964749225488, "grad_norm": 0.3156139850616455, "learning_rate": 0.00016093830964838035, "loss": 11.6678, "step": 41786 }, { "epoch": 0.874717407686511, "grad_norm": 0.40043187141418457, "learning_rate": 0.0001609365712271901, "loss": 11.674, "step": 41787 }, { "epoch": 0.874738340450473, "grad_norm": 0.2618582248687744, "learning_rate": 0.00016093483277670645, "loss": 11.6595, "step": 41788 }, { "epoch": 0.8747592732144353, "grad_norm": 0.29238641262054443, "learning_rate": 0.00016093309429693017, "loss": 11.6609, "step": 41789 }, { "epoch": 0.8747802059783973, "grad_norm": 0.3659440577030182, "learning_rate": 0.00016093135578786212, "loss": 11.6864, "step": 41790 }, { "epoch": 0.8748011387423595, "grad_norm": 0.269514799118042, "learning_rate": 0.00016092961724950317, "loss": 11.6622, "step": 41791 }, { "epoch": 0.8748220715063217, "grad_norm": 0.29745668172836304, "learning_rate": 0.00016092787868185414, "loss": 11.6777, "step": 41792 }, { "epoch": 0.8748430042702838, "grad_norm": 0.32362818717956543, "learning_rate": 0.00016092614008491585, "loss": 11.6816, "step": 41793 }, { "epoch": 0.874863937034246, "grad_norm": 0.3025921881198883, "learning_rate": 0.00016092440145868913, "loss": 11.6669, "step": 41794 }, { "epoch": 0.8748848697982081, "grad_norm": 0.32100343704223633, "learning_rate": 0.00016092266280317484, "loss": 11.6791, "step": 41795 }, { "epoch": 0.8749058025621703, "grad_norm": 0.33142295479774475, "learning_rate": 0.00016092092411837378, "loss": 11.6832, "step": 41796 }, { "epoch": 0.8749267353261324, "grad_norm": 0.3414056897163391, "learning_rate": 0.00016091918540428685, "loss": 11.6764, "step": 41797 }, { "epoch": 0.8749476680900946, "grad_norm": 0.3515624403953552, "learning_rate": 0.00016091744666091483, "loss": 11.6866, "step": 41798 }, { "epoch": 0.8749686008540568, "grad_norm": 0.30749833583831787, "learning_rate": 0.00016091570788825856, "loss": 11.6665, "step": 41799 }, { "epoch": 0.8749895336180189, "grad_norm": 0.31514227390289307, "learning_rate": 0.00016091396908631892, "loss": 11.6659, "step": 41800 }, { "epoch": 0.8750104663819811, "grad_norm": 0.24579229950904846, "learning_rate": 0.00016091223025509668, "loss": 11.6598, "step": 41801 }, { "epoch": 0.8750313991459432, "grad_norm": 0.2989250719547272, "learning_rate": 0.00016091049139459274, "loss": 11.6677, "step": 41802 }, { "epoch": 0.8750523319099054, "grad_norm": 0.43561238050460815, "learning_rate": 0.0001609087525048079, "loss": 11.6837, "step": 41803 }, { "epoch": 0.8750732646738676, "grad_norm": 0.3273785412311554, "learning_rate": 0.00016090701358574297, "loss": 11.6418, "step": 41804 }, { "epoch": 0.8750941974378297, "grad_norm": 0.25858274102211, "learning_rate": 0.00016090527463739885, "loss": 11.6584, "step": 41805 }, { "epoch": 0.8751151302017919, "grad_norm": 0.30405193567276, "learning_rate": 0.00016090353565977633, "loss": 11.6377, "step": 41806 }, { "epoch": 0.875136062965754, "grad_norm": 0.26379817724227905, "learning_rate": 0.00016090179665287626, "loss": 11.668, "step": 41807 }, { "epoch": 0.8751569957297162, "grad_norm": 0.30175039172172546, "learning_rate": 0.0001609000576166995, "loss": 11.6597, "step": 41808 }, { "epoch": 0.8751779284936783, "grad_norm": 0.2905322015285492, "learning_rate": 0.00016089831855124685, "loss": 11.6546, "step": 41809 }, { "epoch": 0.8751988612576405, "grad_norm": 0.3045593202114105, "learning_rate": 0.00016089657945651914, "loss": 11.6622, "step": 41810 }, { "epoch": 0.8752197940216027, "grad_norm": 0.354505330324173, "learning_rate": 0.00016089484033251722, "loss": 11.6554, "step": 41811 }, { "epoch": 0.8752407267855647, "grad_norm": 0.30840596556663513, "learning_rate": 0.00016089310117924194, "loss": 11.6737, "step": 41812 }, { "epoch": 0.875261659549527, "grad_norm": 0.2821980118751526, "learning_rate": 0.00016089136199669415, "loss": 11.6704, "step": 41813 }, { "epoch": 0.875282592313489, "grad_norm": 0.3123352527618408, "learning_rate": 0.00016088962278487464, "loss": 11.6654, "step": 41814 }, { "epoch": 0.8753035250774512, "grad_norm": 0.29461097717285156, "learning_rate": 0.00016088788354378427, "loss": 11.6722, "step": 41815 }, { "epoch": 0.8753244578414133, "grad_norm": 0.32116374373435974, "learning_rate": 0.0001608861442734239, "loss": 11.6588, "step": 41816 }, { "epoch": 0.8753453906053755, "grad_norm": 0.4021415710449219, "learning_rate": 0.00016088440497379427, "loss": 11.6651, "step": 41817 }, { "epoch": 0.8753663233693377, "grad_norm": 0.3068093955516815, "learning_rate": 0.00016088266564489635, "loss": 11.6698, "step": 41818 }, { "epoch": 0.8753872561332998, "grad_norm": 0.30570533871650696, "learning_rate": 0.00016088092628673088, "loss": 11.6604, "step": 41819 }, { "epoch": 0.875408188897262, "grad_norm": 0.37142255902290344, "learning_rate": 0.00016087918689929875, "loss": 11.6744, "step": 41820 }, { "epoch": 0.8754291216612241, "grad_norm": 0.31895768642425537, "learning_rate": 0.00016087744748260078, "loss": 11.6791, "step": 41821 }, { "epoch": 0.8754500544251863, "grad_norm": 0.3360322415828705, "learning_rate": 0.00016087570803663778, "loss": 11.6581, "step": 41822 }, { "epoch": 0.8754709871891485, "grad_norm": 0.2601318955421448, "learning_rate": 0.00016087396856141062, "loss": 11.6661, "step": 41823 }, { "epoch": 0.8754919199531106, "grad_norm": 0.2953692376613617, "learning_rate": 0.0001608722290569201, "loss": 11.6791, "step": 41824 }, { "epoch": 0.8755128527170728, "grad_norm": 0.3602292835712433, "learning_rate": 0.0001608704895231671, "loss": 11.6667, "step": 41825 }, { "epoch": 0.8755337854810349, "grad_norm": 0.3101348578929901, "learning_rate": 0.00016086874996015243, "loss": 11.6774, "step": 41826 }, { "epoch": 0.8755547182449971, "grad_norm": 0.33100810647010803, "learning_rate": 0.00016086701036787694, "loss": 11.6739, "step": 41827 }, { "epoch": 0.8755756510089592, "grad_norm": 0.3350456655025482, "learning_rate": 0.00016086527074634147, "loss": 11.674, "step": 41828 }, { "epoch": 0.8755965837729214, "grad_norm": 0.3328118920326233, "learning_rate": 0.00016086353109554682, "loss": 11.6741, "step": 41829 }, { "epoch": 0.8756175165368836, "grad_norm": 0.2807255983352661, "learning_rate": 0.00016086179141549387, "loss": 11.6592, "step": 41830 }, { "epoch": 0.8756384493008457, "grad_norm": 0.3908962309360504, "learning_rate": 0.00016086005170618344, "loss": 11.6803, "step": 41831 }, { "epoch": 0.8756593820648079, "grad_norm": 0.34864625334739685, "learning_rate": 0.00016085831196761637, "loss": 11.6653, "step": 41832 }, { "epoch": 0.87568031482877, "grad_norm": 0.3321298658847809, "learning_rate": 0.00016085657219979345, "loss": 11.6651, "step": 41833 }, { "epoch": 0.8757012475927322, "grad_norm": 0.3185032606124878, "learning_rate": 0.00016085483240271556, "loss": 11.6645, "step": 41834 }, { "epoch": 0.8757221803566942, "grad_norm": 0.3044901490211487, "learning_rate": 0.00016085309257638354, "loss": 11.6722, "step": 41835 }, { "epoch": 0.8757431131206564, "grad_norm": 0.40500205755233765, "learning_rate": 0.00016085135272079826, "loss": 11.6828, "step": 41836 }, { "epoch": 0.8757640458846186, "grad_norm": 0.32392656803131104, "learning_rate": 0.00016084961283596046, "loss": 11.6627, "step": 41837 }, { "epoch": 0.8757849786485807, "grad_norm": 0.3103273808956146, "learning_rate": 0.00016084787292187106, "loss": 11.6666, "step": 41838 }, { "epoch": 0.8758059114125429, "grad_norm": 0.30665624141693115, "learning_rate": 0.00016084613297853085, "loss": 11.6727, "step": 41839 }, { "epoch": 0.875826844176505, "grad_norm": 0.248083233833313, "learning_rate": 0.00016084439300594072, "loss": 11.6686, "step": 41840 }, { "epoch": 0.8758477769404672, "grad_norm": 0.2948307394981384, "learning_rate": 0.00016084265300410144, "loss": 11.6479, "step": 41841 }, { "epoch": 0.8758687097044294, "grad_norm": 0.3162928521633148, "learning_rate": 0.00016084091297301388, "loss": 11.6614, "step": 41842 }, { "epoch": 0.8758896424683915, "grad_norm": 0.27436381578445435, "learning_rate": 0.00016083917291267888, "loss": 11.669, "step": 41843 }, { "epoch": 0.8759105752323537, "grad_norm": 0.31549587845802307, "learning_rate": 0.00016083743282309726, "loss": 11.6571, "step": 41844 }, { "epoch": 0.8759315079963158, "grad_norm": 0.2800041139125824, "learning_rate": 0.00016083569270426988, "loss": 11.6617, "step": 41845 }, { "epoch": 0.875952440760278, "grad_norm": 0.292021781206131, "learning_rate": 0.00016083395255619759, "loss": 11.6643, "step": 41846 }, { "epoch": 0.8759733735242401, "grad_norm": 0.2660096287727356, "learning_rate": 0.00016083221237888116, "loss": 11.6665, "step": 41847 }, { "epoch": 0.8759943062882023, "grad_norm": 0.31915053725242615, "learning_rate": 0.00016083047217232147, "loss": 11.6778, "step": 41848 }, { "epoch": 0.8760152390521645, "grad_norm": 0.29268595576286316, "learning_rate": 0.00016082873193651935, "loss": 11.6695, "step": 41849 }, { "epoch": 0.8760361718161266, "grad_norm": 0.4114895164966583, "learning_rate": 0.00016082699167147566, "loss": 11.667, "step": 41850 }, { "epoch": 0.8760571045800888, "grad_norm": 0.3982568085193634, "learning_rate": 0.0001608252513771912, "loss": 11.6519, "step": 41851 }, { "epoch": 0.8760780373440509, "grad_norm": 0.25298070907592773, "learning_rate": 0.00016082351105366685, "loss": 11.6629, "step": 41852 }, { "epoch": 0.8760989701080131, "grad_norm": 0.3163021504878998, "learning_rate": 0.0001608217707009034, "loss": 11.6849, "step": 41853 }, { "epoch": 0.8761199028719752, "grad_norm": 0.4829370677471161, "learning_rate": 0.00016082003031890168, "loss": 11.6847, "step": 41854 }, { "epoch": 0.8761408356359374, "grad_norm": 0.3648805320262909, "learning_rate": 0.0001608182899076626, "loss": 11.6785, "step": 41855 }, { "epoch": 0.8761617683998996, "grad_norm": 0.3003794252872467, "learning_rate": 0.0001608165494671869, "loss": 11.6768, "step": 41856 }, { "epoch": 0.8761827011638617, "grad_norm": 0.29634782671928406, "learning_rate": 0.00016081480899747552, "loss": 11.6834, "step": 41857 }, { "epoch": 0.8762036339278239, "grad_norm": 0.3019481301307678, "learning_rate": 0.00016081306849852916, "loss": 11.6684, "step": 41858 }, { "epoch": 0.8762245666917859, "grad_norm": 0.3540928363800049, "learning_rate": 0.00016081132797034884, "loss": 11.6758, "step": 41859 }, { "epoch": 0.8762454994557481, "grad_norm": 0.25388914346694946, "learning_rate": 0.00016080958741293523, "loss": 11.638, "step": 41860 }, { "epoch": 0.8762664322197103, "grad_norm": 0.3156912326812744, "learning_rate": 0.00016080784682628925, "loss": 11.663, "step": 41861 }, { "epoch": 0.8762873649836724, "grad_norm": 0.33444443345069885, "learning_rate": 0.0001608061062104117, "loss": 11.6698, "step": 41862 }, { "epoch": 0.8763082977476346, "grad_norm": 0.32087552547454834, "learning_rate": 0.0001608043655653035, "loss": 11.6745, "step": 41863 }, { "epoch": 0.8763292305115967, "grad_norm": 0.3126615583896637, "learning_rate": 0.00016080262489096537, "loss": 11.6665, "step": 41864 }, { "epoch": 0.8763501632755589, "grad_norm": 0.2854512929916382, "learning_rate": 0.00016080088418739822, "loss": 11.6774, "step": 41865 }, { "epoch": 0.876371096039521, "grad_norm": 0.32629477977752686, "learning_rate": 0.00016079914345460284, "loss": 11.6725, "step": 41866 }, { "epoch": 0.8763920288034832, "grad_norm": 0.33451640605926514, "learning_rate": 0.0001607974026925801, "loss": 11.6744, "step": 41867 }, { "epoch": 0.8764129615674454, "grad_norm": 0.2409786581993103, "learning_rate": 0.00016079566190133085, "loss": 11.6626, "step": 41868 }, { "epoch": 0.8764338943314075, "grad_norm": 0.2495376020669937, "learning_rate": 0.0001607939210808559, "loss": 11.6841, "step": 41869 }, { "epoch": 0.8764548270953697, "grad_norm": 0.31688496470451355, "learning_rate": 0.0001607921802311561, "loss": 11.6791, "step": 41870 }, { "epoch": 0.8764757598593318, "grad_norm": 0.2834179997444153, "learning_rate": 0.00016079043935223226, "loss": 11.6781, "step": 41871 }, { "epoch": 0.876496692623294, "grad_norm": 0.28566280007362366, "learning_rate": 0.00016078869844408526, "loss": 11.6771, "step": 41872 }, { "epoch": 0.8765176253872561, "grad_norm": 0.2453376203775406, "learning_rate": 0.0001607869575067159, "loss": 11.6823, "step": 41873 }, { "epoch": 0.8765385581512183, "grad_norm": 0.31280508637428284, "learning_rate": 0.00016078521654012504, "loss": 11.6717, "step": 41874 }, { "epoch": 0.8765594909151805, "grad_norm": 0.27096322178840637, "learning_rate": 0.0001607834755443135, "loss": 11.6569, "step": 41875 }, { "epoch": 0.8765804236791426, "grad_norm": 0.29910480976104736, "learning_rate": 0.00016078173451928213, "loss": 11.6744, "step": 41876 }, { "epoch": 0.8766013564431048, "grad_norm": 0.31340521574020386, "learning_rate": 0.00016077999346503176, "loss": 11.6718, "step": 41877 }, { "epoch": 0.8766222892070669, "grad_norm": 0.43458685278892517, "learning_rate": 0.00016077825238156322, "loss": 11.669, "step": 41878 }, { "epoch": 0.8766432219710291, "grad_norm": 0.2640064060688019, "learning_rate": 0.00016077651126887737, "loss": 11.6583, "step": 41879 }, { "epoch": 0.8766641547349913, "grad_norm": 0.31501877307891846, "learning_rate": 0.00016077477012697503, "loss": 11.6717, "step": 41880 }, { "epoch": 0.8766850874989534, "grad_norm": 0.2818431258201599, "learning_rate": 0.00016077302895585704, "loss": 11.6669, "step": 41881 }, { "epoch": 0.8767060202629156, "grad_norm": 0.37836140394210815, "learning_rate": 0.00016077128775552423, "loss": 11.6633, "step": 41882 }, { "epoch": 0.8767269530268776, "grad_norm": 0.2916770577430725, "learning_rate": 0.00016076954652597745, "loss": 11.6719, "step": 41883 }, { "epoch": 0.8767478857908398, "grad_norm": 0.3636396825313568, "learning_rate": 0.00016076780526721752, "loss": 11.6611, "step": 41884 }, { "epoch": 0.8767688185548019, "grad_norm": 0.28963136672973633, "learning_rate": 0.0001607660639792453, "loss": 11.6798, "step": 41885 }, { "epoch": 0.8767897513187641, "grad_norm": 0.3164179027080536, "learning_rate": 0.00016076432266206163, "loss": 11.6638, "step": 41886 }, { "epoch": 0.8768106840827263, "grad_norm": 0.2648915946483612, "learning_rate": 0.00016076258131566732, "loss": 11.6518, "step": 41887 }, { "epoch": 0.8768316168466884, "grad_norm": 0.3387623131275177, "learning_rate": 0.00016076083994006322, "loss": 11.6837, "step": 41888 }, { "epoch": 0.8768525496106506, "grad_norm": 0.3089986741542816, "learning_rate": 0.00016075909853525015, "loss": 11.6771, "step": 41889 }, { "epoch": 0.8768734823746127, "grad_norm": 0.26677337288856506, "learning_rate": 0.000160757357101229, "loss": 11.6712, "step": 41890 }, { "epoch": 0.8768944151385749, "grad_norm": 0.3506271243095398, "learning_rate": 0.00016075561563800051, "loss": 11.6875, "step": 41891 }, { "epoch": 0.876915347902537, "grad_norm": 0.2754450738430023, "learning_rate": 0.00016075387414556564, "loss": 11.6761, "step": 41892 }, { "epoch": 0.8769362806664992, "grad_norm": 0.2987152934074402, "learning_rate": 0.00016075213262392512, "loss": 11.6709, "step": 41893 }, { "epoch": 0.8769572134304614, "grad_norm": 0.3389612138271332, "learning_rate": 0.00016075039107307988, "loss": 11.6779, "step": 41894 }, { "epoch": 0.8769781461944235, "grad_norm": 0.2914970815181732, "learning_rate": 0.00016074864949303067, "loss": 11.6675, "step": 41895 }, { "epoch": 0.8769990789583857, "grad_norm": 0.2955024242401123, "learning_rate": 0.0001607469078837784, "loss": 11.6609, "step": 41896 }, { "epoch": 0.8770200117223478, "grad_norm": 0.24552102386951447, "learning_rate": 0.00016074516624532383, "loss": 11.6571, "step": 41897 }, { "epoch": 0.87704094448631, "grad_norm": 0.28551456332206726, "learning_rate": 0.00016074342457766787, "loss": 11.6736, "step": 41898 }, { "epoch": 0.8770618772502721, "grad_norm": 0.3063744306564331, "learning_rate": 0.0001607416828808113, "loss": 11.6606, "step": 41899 }, { "epoch": 0.8770828100142343, "grad_norm": 0.32753247022628784, "learning_rate": 0.00016073994115475503, "loss": 11.6459, "step": 41900 }, { "epoch": 0.8771037427781965, "grad_norm": 0.29444822669029236, "learning_rate": 0.00016073819939949985, "loss": 11.6491, "step": 41901 }, { "epoch": 0.8771246755421586, "grad_norm": 0.24339373409748077, "learning_rate": 0.00016073645761504658, "loss": 11.6633, "step": 41902 }, { "epoch": 0.8771456083061208, "grad_norm": 0.2841392159461975, "learning_rate": 0.00016073471580139607, "loss": 11.6663, "step": 41903 }, { "epoch": 0.8771665410700829, "grad_norm": 0.27759596705436707, "learning_rate": 0.00016073297395854917, "loss": 11.6849, "step": 41904 }, { "epoch": 0.877187473834045, "grad_norm": 0.29817700386047363, "learning_rate": 0.0001607312320865067, "loss": 11.6622, "step": 41905 }, { "epoch": 0.8772084065980073, "grad_norm": 0.32448458671569824, "learning_rate": 0.00016072949018526952, "loss": 11.6673, "step": 41906 }, { "epoch": 0.8772293393619693, "grad_norm": 0.2671351432800293, "learning_rate": 0.00016072774825483848, "loss": 11.6669, "step": 41907 }, { "epoch": 0.8772502721259315, "grad_norm": 0.3786424398422241, "learning_rate": 0.00016072600629521438, "loss": 11.645, "step": 41908 }, { "epoch": 0.8772712048898936, "grad_norm": 0.2942032217979431, "learning_rate": 0.00016072426430639806, "loss": 11.6639, "step": 41909 }, { "epoch": 0.8772921376538558, "grad_norm": 0.2803254723548889, "learning_rate": 0.0001607225222883904, "loss": 11.6708, "step": 41910 }, { "epoch": 0.8773130704178179, "grad_norm": 0.2812544107437134, "learning_rate": 0.00016072078024119218, "loss": 11.6712, "step": 41911 }, { "epoch": 0.8773340031817801, "grad_norm": 0.38045480847358704, "learning_rate": 0.00016071903816480427, "loss": 11.6828, "step": 41912 }, { "epoch": 0.8773549359457423, "grad_norm": 0.26344630122184753, "learning_rate": 0.0001607172960592275, "loss": 11.6644, "step": 41913 }, { "epoch": 0.8773758687097044, "grad_norm": 0.31338486075401306, "learning_rate": 0.0001607155539244627, "loss": 11.6715, "step": 41914 }, { "epoch": 0.8773968014736666, "grad_norm": 0.3038193881511688, "learning_rate": 0.00016071381176051072, "loss": 11.6556, "step": 41915 }, { "epoch": 0.8774177342376287, "grad_norm": 0.2834101617336273, "learning_rate": 0.00016071206956737243, "loss": 11.6796, "step": 41916 }, { "epoch": 0.8774386670015909, "grad_norm": 0.3105391263961792, "learning_rate": 0.00016071032734504858, "loss": 11.6641, "step": 41917 }, { "epoch": 0.877459599765553, "grad_norm": 0.3246617913246155, "learning_rate": 0.0001607085850935401, "loss": 11.6679, "step": 41918 }, { "epoch": 0.8774805325295152, "grad_norm": 0.30944696068763733, "learning_rate": 0.00016070684281284777, "loss": 11.6544, "step": 41919 }, { "epoch": 0.8775014652934774, "grad_norm": 0.3163813650608063, "learning_rate": 0.00016070510050297245, "loss": 11.674, "step": 41920 }, { "epoch": 0.8775223980574395, "grad_norm": 0.2652249038219452, "learning_rate": 0.00016070335816391492, "loss": 11.6782, "step": 41921 }, { "epoch": 0.8775433308214017, "grad_norm": 0.25710320472717285, "learning_rate": 0.00016070161579567613, "loss": 11.659, "step": 41922 }, { "epoch": 0.8775642635853638, "grad_norm": 0.36845433712005615, "learning_rate": 0.00016069987339825686, "loss": 11.6625, "step": 41923 }, { "epoch": 0.877585196349326, "grad_norm": 0.31197527050971985, "learning_rate": 0.0001606981309716579, "loss": 11.6591, "step": 41924 }, { "epoch": 0.8776061291132882, "grad_norm": 0.36903128027915955, "learning_rate": 0.0001606963885158802, "loss": 11.6799, "step": 41925 }, { "epoch": 0.8776270618772503, "grad_norm": 0.27495014667510986, "learning_rate": 0.00016069464603092444, "loss": 11.6574, "step": 41926 }, { "epoch": 0.8776479946412125, "grad_norm": 0.3138444423675537, "learning_rate": 0.0001606929035167916, "loss": 11.6606, "step": 41927 }, { "epoch": 0.8776689274051745, "grad_norm": 0.2839592397212982, "learning_rate": 0.00016069116097348248, "loss": 11.6604, "step": 41928 }, { "epoch": 0.8776898601691367, "grad_norm": 0.48211055994033813, "learning_rate": 0.00016068941840099784, "loss": 11.6741, "step": 41929 }, { "epoch": 0.8777107929330988, "grad_norm": 0.28349557518959045, "learning_rate": 0.00016068767579933865, "loss": 11.6482, "step": 41930 }, { "epoch": 0.877731725697061, "grad_norm": 0.2868039309978485, "learning_rate": 0.00016068593316850564, "loss": 11.6804, "step": 41931 }, { "epoch": 0.8777526584610232, "grad_norm": 0.40370288491249084, "learning_rate": 0.0001606841905084997, "loss": 11.6659, "step": 41932 }, { "epoch": 0.8777735912249853, "grad_norm": 0.36497023701667786, "learning_rate": 0.00016068244781932165, "loss": 11.6742, "step": 41933 }, { "epoch": 0.8777945239889475, "grad_norm": 0.3483050763607025, "learning_rate": 0.0001606807051009723, "loss": 11.6721, "step": 41934 }, { "epoch": 0.8778154567529096, "grad_norm": 0.42289504408836365, "learning_rate": 0.00016067896235345258, "loss": 11.666, "step": 41935 }, { "epoch": 0.8778363895168718, "grad_norm": 0.2537093758583069, "learning_rate": 0.00016067721957676323, "loss": 11.6732, "step": 41936 }, { "epoch": 0.8778573222808339, "grad_norm": 0.32233303785324097, "learning_rate": 0.0001606754767709051, "loss": 11.6613, "step": 41937 }, { "epoch": 0.8778782550447961, "grad_norm": 0.32701554894447327, "learning_rate": 0.00016067373393587912, "loss": 11.6852, "step": 41938 }, { "epoch": 0.8778991878087583, "grad_norm": 0.3290606439113617, "learning_rate": 0.00016067199107168602, "loss": 11.6615, "step": 41939 }, { "epoch": 0.8779201205727204, "grad_norm": 0.395865261554718, "learning_rate": 0.00016067024817832668, "loss": 11.6535, "step": 41940 }, { "epoch": 0.8779410533366826, "grad_norm": 0.2695843577384949, "learning_rate": 0.00016066850525580192, "loss": 11.6605, "step": 41941 }, { "epoch": 0.8779619861006447, "grad_norm": 0.27009087800979614, "learning_rate": 0.0001606667623041126, "loss": 11.6785, "step": 41942 }, { "epoch": 0.8779829188646069, "grad_norm": 0.25542545318603516, "learning_rate": 0.0001606650193232596, "loss": 11.6576, "step": 41943 }, { "epoch": 0.8780038516285691, "grad_norm": 0.34607699513435364, "learning_rate": 0.00016066327631324365, "loss": 11.6665, "step": 41944 }, { "epoch": 0.8780247843925312, "grad_norm": 0.2865305542945862, "learning_rate": 0.00016066153327406568, "loss": 11.6758, "step": 41945 }, { "epoch": 0.8780457171564934, "grad_norm": 0.3026356101036072, "learning_rate": 0.0001606597902057265, "loss": 11.6763, "step": 41946 }, { "epoch": 0.8780666499204555, "grad_norm": 0.3619260787963867, "learning_rate": 0.0001606580471082269, "loss": 11.6874, "step": 41947 }, { "epoch": 0.8780875826844177, "grad_norm": 0.3322680592536926, "learning_rate": 0.0001606563039815678, "loss": 11.6635, "step": 41948 }, { "epoch": 0.8781085154483798, "grad_norm": 0.3195892572402954, "learning_rate": 0.00016065456082574997, "loss": 11.6714, "step": 41949 }, { "epoch": 0.878129448212342, "grad_norm": 0.30828291177749634, "learning_rate": 0.00016065281764077432, "loss": 11.6655, "step": 41950 }, { "epoch": 0.8781503809763042, "grad_norm": 0.2630203366279602, "learning_rate": 0.00016065107442664157, "loss": 11.6785, "step": 41951 }, { "epoch": 0.8781713137402662, "grad_norm": 0.28205856680870056, "learning_rate": 0.0001606493311833527, "loss": 11.6819, "step": 41952 }, { "epoch": 0.8781922465042284, "grad_norm": 0.319873183965683, "learning_rate": 0.00016064758791090847, "loss": 11.6829, "step": 41953 }, { "epoch": 0.8782131792681905, "grad_norm": 0.274212121963501, "learning_rate": 0.00016064584460930971, "loss": 11.6811, "step": 41954 }, { "epoch": 0.8782341120321527, "grad_norm": 0.24006161093711853, "learning_rate": 0.0001606441012785573, "loss": 11.6606, "step": 41955 }, { "epoch": 0.8782550447961148, "grad_norm": 0.328315794467926, "learning_rate": 0.00016064235791865206, "loss": 11.676, "step": 41956 }, { "epoch": 0.878275977560077, "grad_norm": 0.33403828740119934, "learning_rate": 0.0001606406145295948, "loss": 11.6832, "step": 41957 }, { "epoch": 0.8782969103240392, "grad_norm": 0.2577173113822937, "learning_rate": 0.00016063887111138637, "loss": 11.6782, "step": 41958 }, { "epoch": 0.8783178430880013, "grad_norm": 0.30782049894332886, "learning_rate": 0.00016063712766402767, "loss": 11.668, "step": 41959 }, { "epoch": 0.8783387758519635, "grad_norm": 0.293329119682312, "learning_rate": 0.00016063538418751945, "loss": 11.6642, "step": 41960 }, { "epoch": 0.8783597086159256, "grad_norm": 0.4009096026420593, "learning_rate": 0.00016063364068186258, "loss": 11.6756, "step": 41961 }, { "epoch": 0.8783806413798878, "grad_norm": 0.38240063190460205, "learning_rate": 0.00016063189714705792, "loss": 11.6811, "step": 41962 }, { "epoch": 0.87840157414385, "grad_norm": 0.3509654700756073, "learning_rate": 0.0001606301535831063, "loss": 11.6721, "step": 41963 }, { "epoch": 0.8784225069078121, "grad_norm": 0.3465152084827423, "learning_rate": 0.00016062840999000853, "loss": 11.6745, "step": 41964 }, { "epoch": 0.8784434396717743, "grad_norm": 0.27273204922676086, "learning_rate": 0.0001606266663677655, "loss": 11.6623, "step": 41965 }, { "epoch": 0.8784643724357364, "grad_norm": 0.22157703340053558, "learning_rate": 0.00016062492271637794, "loss": 11.6627, "step": 41966 }, { "epoch": 0.8784853051996986, "grad_norm": 0.34164637327194214, "learning_rate": 0.00016062317903584684, "loss": 11.6739, "step": 41967 }, { "epoch": 0.8785062379636607, "grad_norm": 0.263534277677536, "learning_rate": 0.00016062143532617293, "loss": 11.6704, "step": 41968 }, { "epoch": 0.8785271707276229, "grad_norm": 0.2521350085735321, "learning_rate": 0.0001606196915873571, "loss": 11.6618, "step": 41969 }, { "epoch": 0.8785481034915851, "grad_norm": 0.41088810563087463, "learning_rate": 0.0001606179478194002, "loss": 11.6825, "step": 41970 }, { "epoch": 0.8785690362555472, "grad_norm": 0.2802416682243347, "learning_rate": 0.00016061620402230297, "loss": 11.6586, "step": 41971 }, { "epoch": 0.8785899690195094, "grad_norm": 0.31535014510154724, "learning_rate": 0.00016061446019606634, "loss": 11.6569, "step": 41972 }, { "epoch": 0.8786109017834715, "grad_norm": 0.31557223200798035, "learning_rate": 0.00016061271634069114, "loss": 11.6599, "step": 41973 }, { "epoch": 0.8786318345474337, "grad_norm": 0.2990714907646179, "learning_rate": 0.0001606109724561782, "loss": 11.6713, "step": 41974 }, { "epoch": 0.8786527673113957, "grad_norm": 0.30398261547088623, "learning_rate": 0.00016060922854252828, "loss": 11.6733, "step": 41975 }, { "epoch": 0.8786737000753579, "grad_norm": 0.38195639848709106, "learning_rate": 0.00016060748459974236, "loss": 11.676, "step": 41976 }, { "epoch": 0.8786946328393201, "grad_norm": 0.3126888573169708, "learning_rate": 0.0001606057406278212, "loss": 11.6649, "step": 41977 }, { "epoch": 0.8787155656032822, "grad_norm": 0.26643046736717224, "learning_rate": 0.00016060399662676562, "loss": 11.6646, "step": 41978 }, { "epoch": 0.8787364983672444, "grad_norm": 0.26322147250175476, "learning_rate": 0.0001606022525965765, "loss": 11.6647, "step": 41979 }, { "epoch": 0.8787574311312065, "grad_norm": 0.2762647569179535, "learning_rate": 0.00016060050853725465, "loss": 11.6755, "step": 41980 }, { "epoch": 0.8787783638951687, "grad_norm": 0.2950901985168457, "learning_rate": 0.00016059876444880094, "loss": 11.6635, "step": 41981 }, { "epoch": 0.8787992966591309, "grad_norm": 0.29215455055236816, "learning_rate": 0.0001605970203312162, "loss": 11.6721, "step": 41982 }, { "epoch": 0.878820229423093, "grad_norm": 0.38072487711906433, "learning_rate": 0.00016059527618450124, "loss": 11.67, "step": 41983 }, { "epoch": 0.8788411621870552, "grad_norm": 0.3827943503856659, "learning_rate": 0.00016059353200865693, "loss": 11.6804, "step": 41984 }, { "epoch": 0.8788620949510173, "grad_norm": 0.2652137577533722, "learning_rate": 0.00016059178780368407, "loss": 11.6683, "step": 41985 }, { "epoch": 0.8788830277149795, "grad_norm": 0.3515375554561615, "learning_rate": 0.00016059004356958355, "loss": 11.6664, "step": 41986 }, { "epoch": 0.8789039604789416, "grad_norm": 0.3547048568725586, "learning_rate": 0.00016058829930635615, "loss": 11.6697, "step": 41987 }, { "epoch": 0.8789248932429038, "grad_norm": 0.2966335713863373, "learning_rate": 0.00016058655501400274, "loss": 11.653, "step": 41988 }, { "epoch": 0.878945826006866, "grad_norm": 0.2962391972541809, "learning_rate": 0.0001605848106925242, "loss": 11.6786, "step": 41989 }, { "epoch": 0.8789667587708281, "grad_norm": 0.2994135022163391, "learning_rate": 0.0001605830663419213, "loss": 11.6621, "step": 41990 }, { "epoch": 0.8789876915347903, "grad_norm": 0.27670541405677795, "learning_rate": 0.00016058132196219491, "loss": 11.6652, "step": 41991 }, { "epoch": 0.8790086242987524, "grad_norm": 0.37609028816223145, "learning_rate": 0.00016057957755334586, "loss": 11.6788, "step": 41992 }, { "epoch": 0.8790295570627146, "grad_norm": 0.3109000027179718, "learning_rate": 0.000160577833115375, "loss": 11.6741, "step": 41993 }, { "epoch": 0.8790504898266767, "grad_norm": 0.2805554270744324, "learning_rate": 0.00016057608864828316, "loss": 11.6709, "step": 41994 }, { "epoch": 0.8790714225906389, "grad_norm": 0.34677475690841675, "learning_rate": 0.00016057434415207118, "loss": 11.6642, "step": 41995 }, { "epoch": 0.8790923553546011, "grad_norm": 0.2817400395870209, "learning_rate": 0.00016057259962673988, "loss": 11.6627, "step": 41996 }, { "epoch": 0.8791132881185632, "grad_norm": 0.36383968591690063, "learning_rate": 0.00016057085507229014, "loss": 11.6728, "step": 41997 }, { "epoch": 0.8791342208825254, "grad_norm": 0.30736830830574036, "learning_rate": 0.00016056911048872277, "loss": 11.67, "step": 41998 }, { "epoch": 0.8791551536464874, "grad_norm": 0.3553277254104614, "learning_rate": 0.0001605673658760386, "loss": 11.6726, "step": 41999 }, { "epoch": 0.8791760864104496, "grad_norm": 0.26524510979652405, "learning_rate": 0.00016056562123423852, "loss": 11.655, "step": 42000 }, { "epoch": 0.8791760864104496, "eval_loss": 11.669397354125977, "eval_runtime": 34.3659, "eval_samples_per_second": 27.964, "eval_steps_per_second": 7.013, "step": 42000 }, { "epoch": 0.8791970191744118, "grad_norm": 0.3392725884914398, "learning_rate": 0.00016056387656332332, "loss": 11.6805, "step": 42001 }, { "epoch": 0.8792179519383739, "grad_norm": 0.40831655263900757, "learning_rate": 0.00016056213186329386, "loss": 11.6824, "step": 42002 }, { "epoch": 0.8792388847023361, "grad_norm": 0.25924843549728394, "learning_rate": 0.0001605603871341509, "loss": 11.6674, "step": 42003 }, { "epoch": 0.8792598174662982, "grad_norm": 0.3152253329753876, "learning_rate": 0.00016055864237589544, "loss": 11.665, "step": 42004 }, { "epoch": 0.8792807502302604, "grad_norm": 0.29103460907936096, "learning_rate": 0.00016055689758852817, "loss": 11.685, "step": 42005 }, { "epoch": 0.8793016829942225, "grad_norm": 0.2835133969783783, "learning_rate": 0.00016055515277205, "loss": 11.6704, "step": 42006 }, { "epoch": 0.8793226157581847, "grad_norm": 0.9196601510047913, "learning_rate": 0.00016055340792646175, "loss": 11.6909, "step": 42007 }, { "epoch": 0.8793435485221469, "grad_norm": 0.2996886968612671, "learning_rate": 0.00016055166305176428, "loss": 11.6544, "step": 42008 }, { "epoch": 0.879364481286109, "grad_norm": 0.31699687242507935, "learning_rate": 0.00016054991814795844, "loss": 11.6703, "step": 42009 }, { "epoch": 0.8793854140500712, "grad_norm": 0.29737719893455505, "learning_rate": 0.00016054817321504498, "loss": 11.653, "step": 42010 }, { "epoch": 0.8794063468140333, "grad_norm": 0.3119979202747345, "learning_rate": 0.00016054642825302483, "loss": 11.655, "step": 42011 }, { "epoch": 0.8794272795779955, "grad_norm": 0.38917276263237, "learning_rate": 0.0001605446832618988, "loss": 11.6778, "step": 42012 }, { "epoch": 0.8794482123419576, "grad_norm": 0.30338501930236816, "learning_rate": 0.00016054293824166772, "loss": 11.6856, "step": 42013 }, { "epoch": 0.8794691451059198, "grad_norm": 0.29370221495628357, "learning_rate": 0.00016054119319233243, "loss": 11.6736, "step": 42014 }, { "epoch": 0.879490077869882, "grad_norm": 0.26475706696510315, "learning_rate": 0.00016053944811389377, "loss": 11.6587, "step": 42015 }, { "epoch": 0.8795110106338441, "grad_norm": 0.23619066178798676, "learning_rate": 0.0001605377030063526, "loss": 11.6544, "step": 42016 }, { "epoch": 0.8795319433978063, "grad_norm": 0.2804078161716461, "learning_rate": 0.00016053595786970975, "loss": 11.6627, "step": 42017 }, { "epoch": 0.8795528761617684, "grad_norm": 0.3300285041332245, "learning_rate": 0.00016053421270396606, "loss": 11.6547, "step": 42018 }, { "epoch": 0.8795738089257306, "grad_norm": 0.3617243766784668, "learning_rate": 0.0001605324675091223, "loss": 11.6777, "step": 42019 }, { "epoch": 0.8795947416896928, "grad_norm": 0.25354623794555664, "learning_rate": 0.00016053072228517943, "loss": 11.6685, "step": 42020 }, { "epoch": 0.8796156744536548, "grad_norm": 0.3106859028339386, "learning_rate": 0.0001605289770321382, "loss": 11.6449, "step": 42021 }, { "epoch": 0.879636607217617, "grad_norm": 0.2478911578655243, "learning_rate": 0.0001605272317499995, "loss": 11.6705, "step": 42022 }, { "epoch": 0.8796575399815791, "grad_norm": 0.3188765347003937, "learning_rate": 0.00016052548643876412, "loss": 11.6759, "step": 42023 }, { "epoch": 0.8796784727455413, "grad_norm": 0.23760265111923218, "learning_rate": 0.00016052374109843293, "loss": 11.6723, "step": 42024 }, { "epoch": 0.8796994055095034, "grad_norm": 0.2770445644855499, "learning_rate": 0.00016052199572900677, "loss": 11.6671, "step": 42025 }, { "epoch": 0.8797203382734656, "grad_norm": 0.31677576899528503, "learning_rate": 0.00016052025033048647, "loss": 11.666, "step": 42026 }, { "epoch": 0.8797412710374278, "grad_norm": 0.36985069513320923, "learning_rate": 0.00016051850490287289, "loss": 11.6941, "step": 42027 }, { "epoch": 0.8797622038013899, "grad_norm": 0.27450478076934814, "learning_rate": 0.00016051675944616688, "loss": 11.6647, "step": 42028 }, { "epoch": 0.8797831365653521, "grad_norm": 0.36734721064567566, "learning_rate": 0.00016051501396036917, "loss": 11.6664, "step": 42029 }, { "epoch": 0.8798040693293142, "grad_norm": 0.2805638015270233, "learning_rate": 0.00016051326844548073, "loss": 11.6786, "step": 42030 }, { "epoch": 0.8798250020932764, "grad_norm": 0.3178422749042511, "learning_rate": 0.00016051152290150235, "loss": 11.6814, "step": 42031 }, { "epoch": 0.8798459348572385, "grad_norm": 0.32404571771621704, "learning_rate": 0.00016050977732843486, "loss": 11.672, "step": 42032 }, { "epoch": 0.8798668676212007, "grad_norm": 0.33619293570518494, "learning_rate": 0.00016050803172627912, "loss": 11.6623, "step": 42033 }, { "epoch": 0.8798878003851629, "grad_norm": 0.32311293482780457, "learning_rate": 0.00016050628609503594, "loss": 11.6815, "step": 42034 }, { "epoch": 0.879908733149125, "grad_norm": 0.2955191731452942, "learning_rate": 0.00016050454043470616, "loss": 11.6768, "step": 42035 }, { "epoch": 0.8799296659130872, "grad_norm": 0.2645190954208374, "learning_rate": 0.00016050279474529066, "loss": 11.6662, "step": 42036 }, { "epoch": 0.8799505986770493, "grad_norm": 0.3733888864517212, "learning_rate": 0.00016050104902679026, "loss": 11.6851, "step": 42037 }, { "epoch": 0.8799715314410115, "grad_norm": 0.29364803433418274, "learning_rate": 0.0001604993032792058, "loss": 11.6708, "step": 42038 }, { "epoch": 0.8799924642049737, "grad_norm": 0.2780166268348694, "learning_rate": 0.00016049755750253806, "loss": 11.6536, "step": 42039 }, { "epoch": 0.8800133969689358, "grad_norm": 0.3180443048477173, "learning_rate": 0.000160495811696788, "loss": 11.6756, "step": 42040 }, { "epoch": 0.880034329732898, "grad_norm": 0.3263711929321289, "learning_rate": 0.00016049406586195634, "loss": 11.674, "step": 42041 }, { "epoch": 0.8800552624968601, "grad_norm": 0.24805527925491333, "learning_rate": 0.000160492319998044, "loss": 11.66, "step": 42042 }, { "epoch": 0.8800761952608223, "grad_norm": 0.2869216799736023, "learning_rate": 0.00016049057410505178, "loss": 11.66, "step": 42043 }, { "epoch": 0.8800971280247843, "grad_norm": 0.31256505846977234, "learning_rate": 0.00016048882818298053, "loss": 11.6753, "step": 42044 }, { "epoch": 0.8801180607887465, "grad_norm": 0.3057457506656647, "learning_rate": 0.00016048708223183108, "loss": 11.6795, "step": 42045 }, { "epoch": 0.8801389935527087, "grad_norm": 0.39133980870246887, "learning_rate": 0.00016048533625160428, "loss": 11.6467, "step": 42046 }, { "epoch": 0.8801599263166708, "grad_norm": 0.2990063428878784, "learning_rate": 0.00016048359024230097, "loss": 11.6658, "step": 42047 }, { "epoch": 0.880180859080633, "grad_norm": 0.38696593046188354, "learning_rate": 0.000160481844203922, "loss": 11.6766, "step": 42048 }, { "epoch": 0.8802017918445951, "grad_norm": 0.29272139072418213, "learning_rate": 0.00016048009813646817, "loss": 11.6523, "step": 42049 }, { "epoch": 0.8802227246085573, "grad_norm": 0.3769274950027466, "learning_rate": 0.00016047835203994036, "loss": 11.6627, "step": 42050 }, { "epoch": 0.8802436573725194, "grad_norm": 0.2610762417316437, "learning_rate": 0.00016047660591433937, "loss": 11.6757, "step": 42051 }, { "epoch": 0.8802645901364816, "grad_norm": 0.3196718394756317, "learning_rate": 0.0001604748597596661, "loss": 11.6848, "step": 42052 }, { "epoch": 0.8802855229004438, "grad_norm": 0.3387361764907837, "learning_rate": 0.00016047311357592135, "loss": 11.6749, "step": 42053 }, { "epoch": 0.8803064556644059, "grad_norm": 0.2538515031337738, "learning_rate": 0.00016047136736310592, "loss": 11.6648, "step": 42054 }, { "epoch": 0.8803273884283681, "grad_norm": 0.28178900480270386, "learning_rate": 0.00016046962112122072, "loss": 11.6823, "step": 42055 }, { "epoch": 0.8803483211923302, "grad_norm": 0.2886340320110321, "learning_rate": 0.00016046787485026655, "loss": 11.665, "step": 42056 }, { "epoch": 0.8803692539562924, "grad_norm": 0.398529589176178, "learning_rate": 0.0001604661285502443, "loss": 11.6818, "step": 42057 }, { "epoch": 0.8803901867202546, "grad_norm": 0.31864017248153687, "learning_rate": 0.0001604643822211547, "loss": 11.6775, "step": 42058 }, { "epoch": 0.8804111194842167, "grad_norm": 0.356808602809906, "learning_rate": 0.00016046263586299873, "loss": 11.6779, "step": 42059 }, { "epoch": 0.8804320522481789, "grad_norm": 0.2732256054878235, "learning_rate": 0.00016046088947577712, "loss": 11.6789, "step": 42060 }, { "epoch": 0.880452985012141, "grad_norm": 0.4090367555618286, "learning_rate": 0.00016045914305949077, "loss": 11.6689, "step": 42061 }, { "epoch": 0.8804739177761032, "grad_norm": 0.26335570216178894, "learning_rate": 0.0001604573966141405, "loss": 11.6696, "step": 42062 }, { "epoch": 0.8804948505400653, "grad_norm": 0.3542848229408264, "learning_rate": 0.00016045565013972715, "loss": 11.6679, "step": 42063 }, { "epoch": 0.8805157833040275, "grad_norm": 0.27568134665489197, "learning_rate": 0.00016045390363625154, "loss": 11.6696, "step": 42064 }, { "epoch": 0.8805367160679897, "grad_norm": 0.28374093770980835, "learning_rate": 0.00016045215710371453, "loss": 11.6705, "step": 42065 }, { "epoch": 0.8805576488319518, "grad_norm": 0.29843106865882874, "learning_rate": 0.00016045041054211693, "loss": 11.664, "step": 42066 }, { "epoch": 0.880578581595914, "grad_norm": 0.3158014416694641, "learning_rate": 0.00016044866395145967, "loss": 11.6661, "step": 42067 }, { "epoch": 0.880599514359876, "grad_norm": 0.3030645251274109, "learning_rate": 0.00016044691733174346, "loss": 11.6646, "step": 42068 }, { "epoch": 0.8806204471238382, "grad_norm": 0.3739195466041565, "learning_rate": 0.00016044517068296922, "loss": 11.6723, "step": 42069 }, { "epoch": 0.8806413798878003, "grad_norm": 0.294821560382843, "learning_rate": 0.0001604434240051378, "loss": 11.6621, "step": 42070 }, { "epoch": 0.8806623126517625, "grad_norm": 0.3526928126811981, "learning_rate": 0.00016044167729825, "loss": 11.6737, "step": 42071 }, { "epoch": 0.8806832454157247, "grad_norm": 0.30501484870910645, "learning_rate": 0.0001604399305623067, "loss": 11.666, "step": 42072 }, { "epoch": 0.8807041781796868, "grad_norm": 0.26501691341400146, "learning_rate": 0.0001604381837973087, "loss": 11.6473, "step": 42073 }, { "epoch": 0.880725110943649, "grad_norm": 0.3183709383010864, "learning_rate": 0.00016043643700325682, "loss": 11.6714, "step": 42074 }, { "epoch": 0.8807460437076111, "grad_norm": 0.32058587670326233, "learning_rate": 0.00016043469018015198, "loss": 11.6644, "step": 42075 }, { "epoch": 0.8807669764715733, "grad_norm": 0.34965845942497253, "learning_rate": 0.00016043294332799494, "loss": 11.6783, "step": 42076 }, { "epoch": 0.8807879092355355, "grad_norm": 0.34904950857162476, "learning_rate": 0.00016043119644678662, "loss": 11.6541, "step": 42077 }, { "epoch": 0.8808088419994976, "grad_norm": 0.2504878342151642, "learning_rate": 0.00016042944953652776, "loss": 11.6645, "step": 42078 }, { "epoch": 0.8808297747634598, "grad_norm": 0.41397330164909363, "learning_rate": 0.00016042770259721927, "loss": 11.6653, "step": 42079 }, { "epoch": 0.8808507075274219, "grad_norm": 0.30752575397491455, "learning_rate": 0.00016042595562886197, "loss": 11.6577, "step": 42080 }, { "epoch": 0.8808716402913841, "grad_norm": 0.2899611294269562, "learning_rate": 0.0001604242086314567, "loss": 11.6627, "step": 42081 }, { "epoch": 0.8808925730553462, "grad_norm": 0.26500219106674194, "learning_rate": 0.00016042246160500435, "loss": 11.6762, "step": 42082 }, { "epoch": 0.8809135058193084, "grad_norm": 0.31591796875, "learning_rate": 0.00016042071454950562, "loss": 11.6806, "step": 42083 }, { "epoch": 0.8809344385832706, "grad_norm": 0.2636338472366333, "learning_rate": 0.00016041896746496153, "loss": 11.6768, "step": 42084 }, { "epoch": 0.8809553713472327, "grad_norm": 0.2990235686302185, "learning_rate": 0.00016041722035137277, "loss": 11.6717, "step": 42085 }, { "epoch": 0.8809763041111949, "grad_norm": 0.3287983536720276, "learning_rate": 0.00016041547320874027, "loss": 11.6666, "step": 42086 }, { "epoch": 0.880997236875157, "grad_norm": 0.33610039949417114, "learning_rate": 0.00016041372603706484, "loss": 11.6934, "step": 42087 }, { "epoch": 0.8810181696391192, "grad_norm": 0.24800220131874084, "learning_rate": 0.00016041197883634732, "loss": 11.6659, "step": 42088 }, { "epoch": 0.8810391024030813, "grad_norm": 0.33666738867759705, "learning_rate": 0.00016041023160658858, "loss": 11.655, "step": 42089 }, { "epoch": 0.8810600351670435, "grad_norm": 0.368909627199173, "learning_rate": 0.00016040848434778937, "loss": 11.6813, "step": 42090 }, { "epoch": 0.8810809679310057, "grad_norm": 0.3785144090652466, "learning_rate": 0.00016040673705995063, "loss": 11.6681, "step": 42091 }, { "epoch": 0.8811019006949677, "grad_norm": 0.2973540723323822, "learning_rate": 0.00016040498974307315, "loss": 11.6607, "step": 42092 }, { "epoch": 0.8811228334589299, "grad_norm": 0.269212931394577, "learning_rate": 0.00016040324239715775, "loss": 11.6774, "step": 42093 }, { "epoch": 0.881143766222892, "grad_norm": 0.2678264379501343, "learning_rate": 0.00016040149502220537, "loss": 11.6703, "step": 42094 }, { "epoch": 0.8811646989868542, "grad_norm": 0.2732486128807068, "learning_rate": 0.00016039974761821672, "loss": 11.6739, "step": 42095 }, { "epoch": 0.8811856317508163, "grad_norm": 0.28495335578918457, "learning_rate": 0.00016039800018519273, "loss": 11.6753, "step": 42096 }, { "epoch": 0.8812065645147785, "grad_norm": 0.43730631470680237, "learning_rate": 0.0001603962527231342, "loss": 11.6808, "step": 42097 }, { "epoch": 0.8812274972787407, "grad_norm": 0.3597085773944855, "learning_rate": 0.000160394505232042, "loss": 11.6579, "step": 42098 }, { "epoch": 0.8812484300427028, "grad_norm": 0.3443435728549957, "learning_rate": 0.00016039275771191695, "loss": 11.6705, "step": 42099 }, { "epoch": 0.881269362806665, "grad_norm": 0.31565558910369873, "learning_rate": 0.0001603910101627599, "loss": 11.6811, "step": 42100 }, { "epoch": 0.8812902955706271, "grad_norm": 0.29156211018562317, "learning_rate": 0.00016038926258457164, "loss": 11.675, "step": 42101 }, { "epoch": 0.8813112283345893, "grad_norm": 0.40534186363220215, "learning_rate": 0.00016038751497735306, "loss": 11.6789, "step": 42102 }, { "epoch": 0.8813321610985515, "grad_norm": 0.3972615599632263, "learning_rate": 0.00016038576734110502, "loss": 11.6606, "step": 42103 }, { "epoch": 0.8813530938625136, "grad_norm": 0.30610391497612, "learning_rate": 0.0001603840196758283, "loss": 11.6818, "step": 42104 }, { "epoch": 0.8813740266264758, "grad_norm": 0.28434398770332336, "learning_rate": 0.00016038227198152377, "loss": 11.6692, "step": 42105 }, { "epoch": 0.8813949593904379, "grad_norm": 0.2779625654220581, "learning_rate": 0.0001603805242581923, "loss": 11.691, "step": 42106 }, { "epoch": 0.8814158921544001, "grad_norm": 0.2810108959674835, "learning_rate": 0.0001603787765058347, "loss": 11.6602, "step": 42107 }, { "epoch": 0.8814368249183622, "grad_norm": 0.4386070668697357, "learning_rate": 0.00016037702872445179, "loss": 11.658, "step": 42108 }, { "epoch": 0.8814577576823244, "grad_norm": 0.2824589014053345, "learning_rate": 0.00016037528091404445, "loss": 11.6837, "step": 42109 }, { "epoch": 0.8814786904462866, "grad_norm": 0.2544029653072357, "learning_rate": 0.00016037353307461348, "loss": 11.6602, "step": 42110 }, { "epoch": 0.8814996232102487, "grad_norm": 0.3036133050918579, "learning_rate": 0.00016037178520615978, "loss": 11.6642, "step": 42111 }, { "epoch": 0.8815205559742109, "grad_norm": 0.34089672565460205, "learning_rate": 0.00016037003730868415, "loss": 11.674, "step": 42112 }, { "epoch": 0.881541488738173, "grad_norm": 0.25284796953201294, "learning_rate": 0.0001603682893821874, "loss": 11.6716, "step": 42113 }, { "epoch": 0.8815624215021352, "grad_norm": 0.379738450050354, "learning_rate": 0.00016036654142667043, "loss": 11.6497, "step": 42114 }, { "epoch": 0.8815833542660972, "grad_norm": 0.4114341735839844, "learning_rate": 0.00016036479344213404, "loss": 11.6842, "step": 42115 }, { "epoch": 0.8816042870300594, "grad_norm": 0.3888779580593109, "learning_rate": 0.00016036304542857907, "loss": 11.6684, "step": 42116 }, { "epoch": 0.8816252197940216, "grad_norm": 0.32033950090408325, "learning_rate": 0.0001603612973860064, "loss": 11.6881, "step": 42117 }, { "epoch": 0.8816461525579837, "grad_norm": 0.3747483491897583, "learning_rate": 0.00016035954931441684, "loss": 11.6845, "step": 42118 }, { "epoch": 0.8816670853219459, "grad_norm": 0.26518914103507996, "learning_rate": 0.00016035780121381125, "loss": 11.6742, "step": 42119 }, { "epoch": 0.881688018085908, "grad_norm": 0.3010357916355133, "learning_rate": 0.00016035605308419043, "loss": 11.6761, "step": 42120 }, { "epoch": 0.8817089508498702, "grad_norm": 0.3139316439628601, "learning_rate": 0.00016035430492555524, "loss": 11.6837, "step": 42121 }, { "epoch": 0.8817298836138324, "grad_norm": 0.27820688486099243, "learning_rate": 0.00016035255673790653, "loss": 11.6715, "step": 42122 }, { "epoch": 0.8817508163777945, "grad_norm": 0.27921387553215027, "learning_rate": 0.00016035080852124515, "loss": 11.6668, "step": 42123 }, { "epoch": 0.8817717491417567, "grad_norm": 0.3492536246776581, "learning_rate": 0.00016034906027557195, "loss": 11.6802, "step": 42124 }, { "epoch": 0.8817926819057188, "grad_norm": 0.3511081635951996, "learning_rate": 0.0001603473120008877, "loss": 11.6547, "step": 42125 }, { "epoch": 0.881813614669681, "grad_norm": 0.29123830795288086, "learning_rate": 0.0001603455636971933, "loss": 11.6622, "step": 42126 }, { "epoch": 0.8818345474336431, "grad_norm": 0.3378157317638397, "learning_rate": 0.0001603438153644896, "loss": 11.6545, "step": 42127 }, { "epoch": 0.8818554801976053, "grad_norm": 0.3706226050853729, "learning_rate": 0.0001603420670027774, "loss": 11.664, "step": 42128 }, { "epoch": 0.8818764129615675, "grad_norm": 0.25266382098197937, "learning_rate": 0.00016034031861205755, "loss": 11.6696, "step": 42129 }, { "epoch": 0.8818973457255296, "grad_norm": 0.3625740706920624, "learning_rate": 0.00016033857019233093, "loss": 11.6811, "step": 42130 }, { "epoch": 0.8819182784894918, "grad_norm": 0.38108760118484497, "learning_rate": 0.00016033682174359832, "loss": 11.6582, "step": 42131 }, { "epoch": 0.8819392112534539, "grad_norm": 0.3197648227214813, "learning_rate": 0.0001603350732658606, "loss": 11.669, "step": 42132 }, { "epoch": 0.8819601440174161, "grad_norm": 0.29286113381385803, "learning_rate": 0.00016033332475911863, "loss": 11.6851, "step": 42133 }, { "epoch": 0.8819810767813782, "grad_norm": 0.28132012486457825, "learning_rate": 0.00016033157622337318, "loss": 11.6751, "step": 42134 }, { "epoch": 0.8820020095453404, "grad_norm": 0.3932543992996216, "learning_rate": 0.00016032982765862514, "loss": 11.6657, "step": 42135 }, { "epoch": 0.8820229423093026, "grad_norm": 0.32788795232772827, "learning_rate": 0.00016032807906487536, "loss": 11.6762, "step": 42136 }, { "epoch": 0.8820438750732646, "grad_norm": 0.3399496376514435, "learning_rate": 0.00016032633044212467, "loss": 11.6665, "step": 42137 }, { "epoch": 0.8820648078372268, "grad_norm": 0.31237998604774475, "learning_rate": 0.00016032458179037387, "loss": 11.685, "step": 42138 }, { "epoch": 0.8820857406011889, "grad_norm": 0.39322513341903687, "learning_rate": 0.00016032283310962386, "loss": 11.6579, "step": 42139 }, { "epoch": 0.8821066733651511, "grad_norm": 0.3168843984603882, "learning_rate": 0.00016032108439987543, "loss": 11.6541, "step": 42140 }, { "epoch": 0.8821276061291133, "grad_norm": 0.2838263511657715, "learning_rate": 0.0001603193356611295, "loss": 11.6761, "step": 42141 }, { "epoch": 0.8821485388930754, "grad_norm": 0.32438889145851135, "learning_rate": 0.00016031758689338677, "loss": 11.6645, "step": 42142 }, { "epoch": 0.8821694716570376, "grad_norm": 0.28719502687454224, "learning_rate": 0.00016031583809664825, "loss": 11.6703, "step": 42143 }, { "epoch": 0.8821904044209997, "grad_norm": 0.32871031761169434, "learning_rate": 0.00016031408927091466, "loss": 11.6607, "step": 42144 }, { "epoch": 0.8822113371849619, "grad_norm": 0.28183484077453613, "learning_rate": 0.0001603123404161869, "loss": 11.6578, "step": 42145 }, { "epoch": 0.882232269948924, "grad_norm": 0.2684788107872009, "learning_rate": 0.00016031059153246577, "loss": 11.6686, "step": 42146 }, { "epoch": 0.8822532027128862, "grad_norm": 0.30871427059173584, "learning_rate": 0.00016030884261975214, "loss": 11.6858, "step": 42147 }, { "epoch": 0.8822741354768484, "grad_norm": 0.3839309811592102, "learning_rate": 0.00016030709367804684, "loss": 11.6826, "step": 42148 }, { "epoch": 0.8822950682408105, "grad_norm": 0.3568270802497864, "learning_rate": 0.00016030534470735068, "loss": 11.6728, "step": 42149 }, { "epoch": 0.8823160010047727, "grad_norm": 0.27822771668434143, "learning_rate": 0.00016030359570766458, "loss": 11.6587, "step": 42150 }, { "epoch": 0.8823369337687348, "grad_norm": 0.2921748459339142, "learning_rate": 0.0001603018466789893, "loss": 11.6753, "step": 42151 }, { "epoch": 0.882357866532697, "grad_norm": 0.2703126072883606, "learning_rate": 0.00016030009762132573, "loss": 11.6671, "step": 42152 }, { "epoch": 0.8823787992966591, "grad_norm": 0.30820783972740173, "learning_rate": 0.0001602983485346747, "loss": 11.6736, "step": 42153 }, { "epoch": 0.8823997320606213, "grad_norm": 0.2508734464645386, "learning_rate": 0.00016029659941903704, "loss": 11.6736, "step": 42154 }, { "epoch": 0.8824206648245835, "grad_norm": 0.32034337520599365, "learning_rate": 0.00016029485027441357, "loss": 11.6872, "step": 42155 }, { "epoch": 0.8824415975885456, "grad_norm": 0.3247435688972473, "learning_rate": 0.00016029310110080518, "loss": 11.6792, "step": 42156 }, { "epoch": 0.8824625303525078, "grad_norm": 0.3381737470626831, "learning_rate": 0.00016029135189821268, "loss": 11.6526, "step": 42157 }, { "epoch": 0.8824834631164699, "grad_norm": 0.26809266209602356, "learning_rate": 0.00016028960266663696, "loss": 11.658, "step": 42158 }, { "epoch": 0.8825043958804321, "grad_norm": 0.2782619893550873, "learning_rate": 0.00016028785340607877, "loss": 11.6737, "step": 42159 }, { "epoch": 0.8825253286443943, "grad_norm": 0.26485446095466614, "learning_rate": 0.000160286104116539, "loss": 11.6641, "step": 42160 }, { "epoch": 0.8825462614083563, "grad_norm": 0.36860954761505127, "learning_rate": 0.00016028435479801853, "loss": 11.6855, "step": 42161 }, { "epoch": 0.8825671941723185, "grad_norm": 0.3581257164478302, "learning_rate": 0.00016028260545051813, "loss": 11.6562, "step": 42162 }, { "epoch": 0.8825881269362806, "grad_norm": 0.36410045623779297, "learning_rate": 0.0001602808560740387, "loss": 11.6703, "step": 42163 }, { "epoch": 0.8826090597002428, "grad_norm": 0.3257822096347809, "learning_rate": 0.00016027910666858102, "loss": 11.6726, "step": 42164 }, { "epoch": 0.8826299924642049, "grad_norm": 0.48374536633491516, "learning_rate": 0.000160277357234146, "loss": 11.6722, "step": 42165 }, { "epoch": 0.8826509252281671, "grad_norm": 0.3431119918823242, "learning_rate": 0.0001602756077707344, "loss": 11.6745, "step": 42166 }, { "epoch": 0.8826718579921293, "grad_norm": 0.3591051697731018, "learning_rate": 0.00016027385827834719, "loss": 11.6878, "step": 42167 }, { "epoch": 0.8826927907560914, "grad_norm": 0.26755934953689575, "learning_rate": 0.00016027210875698504, "loss": 11.6757, "step": 42168 }, { "epoch": 0.8827137235200536, "grad_norm": 0.29707691073417664, "learning_rate": 0.00016027035920664892, "loss": 11.6652, "step": 42169 }, { "epoch": 0.8827346562840157, "grad_norm": 0.2976718544960022, "learning_rate": 0.00016026860962733963, "loss": 11.6778, "step": 42170 }, { "epoch": 0.8827555890479779, "grad_norm": 0.3242807984352112, "learning_rate": 0.000160266860019058, "loss": 11.6585, "step": 42171 }, { "epoch": 0.88277652181194, "grad_norm": 0.2859058976173401, "learning_rate": 0.0001602651103818049, "loss": 11.6519, "step": 42172 }, { "epoch": 0.8827974545759022, "grad_norm": 0.3690873384475708, "learning_rate": 0.00016026336071558114, "loss": 11.6658, "step": 42173 }, { "epoch": 0.8828183873398644, "grad_norm": 0.4027274549007416, "learning_rate": 0.00016026161102038758, "loss": 11.6587, "step": 42174 }, { "epoch": 0.8828393201038265, "grad_norm": 0.3030133545398712, "learning_rate": 0.00016025986129622507, "loss": 11.6665, "step": 42175 }, { "epoch": 0.8828602528677887, "grad_norm": 0.33771803975105286, "learning_rate": 0.00016025811154309438, "loss": 11.6602, "step": 42176 }, { "epoch": 0.8828811856317508, "grad_norm": 0.2763645052909851, "learning_rate": 0.0001602563617609965, "loss": 11.658, "step": 42177 }, { "epoch": 0.882902118395713, "grad_norm": 0.36025792360305786, "learning_rate": 0.0001602546119499321, "loss": 11.6442, "step": 42178 }, { "epoch": 0.8829230511596752, "grad_norm": 0.29694148898124695, "learning_rate": 0.00016025286210990213, "loss": 11.6697, "step": 42179 }, { "epoch": 0.8829439839236373, "grad_norm": 0.37435367703437805, "learning_rate": 0.0001602511122409074, "loss": 11.6777, "step": 42180 }, { "epoch": 0.8829649166875995, "grad_norm": 0.35954853892326355, "learning_rate": 0.00016024936234294875, "loss": 11.6722, "step": 42181 }, { "epoch": 0.8829858494515616, "grad_norm": 0.353220134973526, "learning_rate": 0.00016024761241602704, "loss": 11.6803, "step": 42182 }, { "epoch": 0.8830067822155238, "grad_norm": 0.36476004123687744, "learning_rate": 0.0001602458624601431, "loss": 11.6675, "step": 42183 }, { "epoch": 0.8830277149794858, "grad_norm": 0.2682257890701294, "learning_rate": 0.00016024411247529772, "loss": 11.669, "step": 42184 }, { "epoch": 0.883048647743448, "grad_norm": 0.35002994537353516, "learning_rate": 0.0001602423624614918, "loss": 11.659, "step": 42185 }, { "epoch": 0.8830695805074102, "grad_norm": 0.348256379365921, "learning_rate": 0.00016024061241872622, "loss": 11.666, "step": 42186 }, { "epoch": 0.8830905132713723, "grad_norm": 0.24777619540691376, "learning_rate": 0.00016023886234700174, "loss": 11.6686, "step": 42187 }, { "epoch": 0.8831114460353345, "grad_norm": 0.25565293431282043, "learning_rate": 0.00016023711224631923, "loss": 11.6806, "step": 42188 }, { "epoch": 0.8831323787992966, "grad_norm": 0.34337157011032104, "learning_rate": 0.00016023536211667952, "loss": 11.6664, "step": 42189 }, { "epoch": 0.8831533115632588, "grad_norm": 0.405349999666214, "learning_rate": 0.0001602336119580835, "loss": 11.6711, "step": 42190 }, { "epoch": 0.8831742443272209, "grad_norm": 0.2566385269165039, "learning_rate": 0.00016023186177053193, "loss": 11.6644, "step": 42191 }, { "epoch": 0.8831951770911831, "grad_norm": 0.2727670669555664, "learning_rate": 0.00016023011155402573, "loss": 11.6515, "step": 42192 }, { "epoch": 0.8832161098551453, "grad_norm": 0.285428911447525, "learning_rate": 0.0001602283613085657, "loss": 11.6643, "step": 42193 }, { "epoch": 0.8832370426191074, "grad_norm": 0.3799222707748413, "learning_rate": 0.00016022661103415269, "loss": 11.6609, "step": 42194 }, { "epoch": 0.8832579753830696, "grad_norm": 0.3597728908061981, "learning_rate": 0.00016022486073078752, "loss": 11.6837, "step": 42195 }, { "epoch": 0.8832789081470317, "grad_norm": 0.26734426617622375, "learning_rate": 0.00016022311039847107, "loss": 11.6725, "step": 42196 }, { "epoch": 0.8832998409109939, "grad_norm": 0.3491881191730499, "learning_rate": 0.00016022136003720418, "loss": 11.6716, "step": 42197 }, { "epoch": 0.8833207736749561, "grad_norm": 0.36381879448890686, "learning_rate": 0.00016021960964698764, "loss": 11.6789, "step": 42198 }, { "epoch": 0.8833417064389182, "grad_norm": 0.27467721700668335, "learning_rate": 0.00016021785922782237, "loss": 11.6693, "step": 42199 }, { "epoch": 0.8833626392028804, "grad_norm": 0.29276642203330994, "learning_rate": 0.00016021610877970912, "loss": 11.662, "step": 42200 }, { "epoch": 0.8833835719668425, "grad_norm": 0.29961955547332764, "learning_rate": 0.0001602143583026488, "loss": 11.6697, "step": 42201 }, { "epoch": 0.8834045047308047, "grad_norm": 0.29744479060173035, "learning_rate": 0.00016021260779664224, "loss": 11.6622, "step": 42202 }, { "epoch": 0.8834254374947668, "grad_norm": 0.3376673758029938, "learning_rate": 0.00016021085726169028, "loss": 11.6955, "step": 42203 }, { "epoch": 0.883446370258729, "grad_norm": 0.27252131700515747, "learning_rate": 0.00016020910669779377, "loss": 11.679, "step": 42204 }, { "epoch": 0.8834673030226912, "grad_norm": 0.30242764949798584, "learning_rate": 0.0001602073561049535, "loss": 11.6736, "step": 42205 }, { "epoch": 0.8834882357866533, "grad_norm": 0.36619800329208374, "learning_rate": 0.00016020560548317034, "loss": 11.6576, "step": 42206 }, { "epoch": 0.8835091685506155, "grad_norm": 0.2650165557861328, "learning_rate": 0.00016020385483244516, "loss": 11.6672, "step": 42207 }, { "epoch": 0.8835301013145775, "grad_norm": 0.382479727268219, "learning_rate": 0.00016020210415277876, "loss": 11.654, "step": 42208 }, { "epoch": 0.8835510340785397, "grad_norm": 0.28127479553222656, "learning_rate": 0.00016020035344417203, "loss": 11.6554, "step": 42209 }, { "epoch": 0.8835719668425018, "grad_norm": 0.2783868610858917, "learning_rate": 0.00016019860270662578, "loss": 11.6574, "step": 42210 }, { "epoch": 0.883592899606464, "grad_norm": 0.36482980847358704, "learning_rate": 0.00016019685194014084, "loss": 11.6644, "step": 42211 }, { "epoch": 0.8836138323704262, "grad_norm": 0.2857396602630615, "learning_rate": 0.00016019510114471808, "loss": 11.6663, "step": 42212 }, { "epoch": 0.8836347651343883, "grad_norm": 0.3209630250930786, "learning_rate": 0.00016019335032035832, "loss": 11.6664, "step": 42213 }, { "epoch": 0.8836556978983505, "grad_norm": 0.35279321670532227, "learning_rate": 0.00016019159946706243, "loss": 11.6688, "step": 42214 }, { "epoch": 0.8836766306623126, "grad_norm": 0.2817082107067108, "learning_rate": 0.00016018984858483118, "loss": 11.6756, "step": 42215 }, { "epoch": 0.8836975634262748, "grad_norm": 0.28317660093307495, "learning_rate": 0.0001601880976736655, "loss": 11.6679, "step": 42216 }, { "epoch": 0.883718496190237, "grad_norm": 0.2667638659477234, "learning_rate": 0.00016018634673356622, "loss": 11.669, "step": 42217 }, { "epoch": 0.8837394289541991, "grad_norm": 0.3039962947368622, "learning_rate": 0.00016018459576453412, "loss": 11.6789, "step": 42218 }, { "epoch": 0.8837603617181613, "grad_norm": 0.3042329251766205, "learning_rate": 0.0001601828447665701, "loss": 11.6735, "step": 42219 }, { "epoch": 0.8837812944821234, "grad_norm": 0.3778664469718933, "learning_rate": 0.00016018109373967495, "loss": 11.6795, "step": 42220 }, { "epoch": 0.8838022272460856, "grad_norm": 0.4436868131160736, "learning_rate": 0.00016017934268384957, "loss": 11.6804, "step": 42221 }, { "epoch": 0.8838231600100477, "grad_norm": 0.3004530668258667, "learning_rate": 0.00016017759159909475, "loss": 11.6652, "step": 42222 }, { "epoch": 0.8838440927740099, "grad_norm": 0.3948303461074829, "learning_rate": 0.0001601758404854114, "loss": 11.6694, "step": 42223 }, { "epoch": 0.8838650255379721, "grad_norm": 0.4668336510658264, "learning_rate": 0.00016017408934280026, "loss": 11.6506, "step": 42224 }, { "epoch": 0.8838859583019342, "grad_norm": 0.3040045499801636, "learning_rate": 0.00016017233817126227, "loss": 11.643, "step": 42225 }, { "epoch": 0.8839068910658964, "grad_norm": 0.3817802965641022, "learning_rate": 0.00016017058697079823, "loss": 11.678, "step": 42226 }, { "epoch": 0.8839278238298585, "grad_norm": 0.3629734218120575, "learning_rate": 0.00016016883574140897, "loss": 11.6749, "step": 42227 }, { "epoch": 0.8839487565938207, "grad_norm": 0.3281264305114746, "learning_rate": 0.00016016708448309534, "loss": 11.6695, "step": 42228 }, { "epoch": 0.8839696893577828, "grad_norm": 0.3499103784561157, "learning_rate": 0.0001601653331958582, "loss": 11.6606, "step": 42229 }, { "epoch": 0.883990622121745, "grad_norm": 0.26588863134384155, "learning_rate": 0.00016016358187969837, "loss": 11.6713, "step": 42230 }, { "epoch": 0.8840115548857072, "grad_norm": 0.3471323549747467, "learning_rate": 0.00016016183053461672, "loss": 11.6525, "step": 42231 }, { "epoch": 0.8840324876496692, "grad_norm": 0.37313514947891235, "learning_rate": 0.00016016007916061403, "loss": 11.6628, "step": 42232 }, { "epoch": 0.8840534204136314, "grad_norm": 0.2898087501525879, "learning_rate": 0.00016015832775769125, "loss": 11.6698, "step": 42233 }, { "epoch": 0.8840743531775935, "grad_norm": 0.347795307636261, "learning_rate": 0.0001601565763258491, "loss": 11.6855, "step": 42234 }, { "epoch": 0.8840952859415557, "grad_norm": 0.32436203956604004, "learning_rate": 0.00016015482486508848, "loss": 11.6607, "step": 42235 }, { "epoch": 0.8841162187055179, "grad_norm": 0.3022027313709259, "learning_rate": 0.00016015307337541022, "loss": 11.6728, "step": 42236 }, { "epoch": 0.88413715146948, "grad_norm": 0.2779942452907562, "learning_rate": 0.00016015132185681524, "loss": 11.6488, "step": 42237 }, { "epoch": 0.8841580842334422, "grad_norm": 0.33872154355049133, "learning_rate": 0.00016014957030930423, "loss": 11.6579, "step": 42238 }, { "epoch": 0.8841790169974043, "grad_norm": 0.4133250415325165, "learning_rate": 0.00016014781873287818, "loss": 11.6791, "step": 42239 }, { "epoch": 0.8841999497613665, "grad_norm": 0.24757303297519684, "learning_rate": 0.00016014606712753783, "loss": 11.6576, "step": 42240 }, { "epoch": 0.8842208825253286, "grad_norm": 0.29324740171432495, "learning_rate": 0.00016014431549328412, "loss": 11.6727, "step": 42241 }, { "epoch": 0.8842418152892908, "grad_norm": 0.31472349166870117, "learning_rate": 0.00016014256383011777, "loss": 11.6497, "step": 42242 }, { "epoch": 0.884262748053253, "grad_norm": 0.31708824634552, "learning_rate": 0.0001601408121380397, "loss": 11.6611, "step": 42243 }, { "epoch": 0.8842836808172151, "grad_norm": 0.2568705081939697, "learning_rate": 0.00016013906041705074, "loss": 11.6794, "step": 42244 }, { "epoch": 0.8843046135811773, "grad_norm": 0.32062122225761414, "learning_rate": 0.0001601373086671517, "loss": 11.6694, "step": 42245 }, { "epoch": 0.8843255463451394, "grad_norm": 0.2824982702732086, "learning_rate": 0.0001601355568883435, "loss": 11.6735, "step": 42246 }, { "epoch": 0.8843464791091016, "grad_norm": 0.3008892238140106, "learning_rate": 0.00016013380508062689, "loss": 11.6756, "step": 42247 }, { "epoch": 0.8843674118730637, "grad_norm": 0.2689487636089325, "learning_rate": 0.0001601320532440028, "loss": 11.6547, "step": 42248 }, { "epoch": 0.8843883446370259, "grad_norm": 0.3466840386390686, "learning_rate": 0.00016013030137847198, "loss": 11.6598, "step": 42249 }, { "epoch": 0.8844092774009881, "grad_norm": 0.324598491191864, "learning_rate": 0.00016012854948403536, "loss": 11.6777, "step": 42250 }, { "epoch": 0.8844302101649502, "grad_norm": 0.4275309145450592, "learning_rate": 0.00016012679756069372, "loss": 11.6705, "step": 42251 }, { "epoch": 0.8844511429289124, "grad_norm": 0.2891021966934204, "learning_rate": 0.00016012504560844793, "loss": 11.6894, "step": 42252 }, { "epoch": 0.8844720756928744, "grad_norm": 0.3100816607475281, "learning_rate": 0.0001601232936272988, "loss": 11.6841, "step": 42253 }, { "epoch": 0.8844930084568366, "grad_norm": 0.35270437598228455, "learning_rate": 0.00016012154161724725, "loss": 11.6681, "step": 42254 }, { "epoch": 0.8845139412207988, "grad_norm": 0.2854290008544922, "learning_rate": 0.000160119789578294, "loss": 11.6703, "step": 42255 }, { "epoch": 0.8845348739847609, "grad_norm": 0.2916627824306488, "learning_rate": 0.00016011803751044004, "loss": 11.659, "step": 42256 }, { "epoch": 0.8845558067487231, "grad_norm": 0.3498707413673401, "learning_rate": 0.0001601162854136861, "loss": 11.6681, "step": 42257 }, { "epoch": 0.8845767395126852, "grad_norm": 0.2629568576812744, "learning_rate": 0.00016011453328803303, "loss": 11.6435, "step": 42258 }, { "epoch": 0.8845976722766474, "grad_norm": 0.259225994348526, "learning_rate": 0.00016011278113348172, "loss": 11.6828, "step": 42259 }, { "epoch": 0.8846186050406095, "grad_norm": 0.2803839445114136, "learning_rate": 0.00016011102895003302, "loss": 11.6618, "step": 42260 }, { "epoch": 0.8846395378045717, "grad_norm": 0.38058918714523315, "learning_rate": 0.0001601092767376877, "loss": 11.6575, "step": 42261 }, { "epoch": 0.8846604705685339, "grad_norm": 0.4006722569465637, "learning_rate": 0.00016010752449644664, "loss": 11.6813, "step": 42262 }, { "epoch": 0.884681403332496, "grad_norm": 0.2934236526489258, "learning_rate": 0.00016010577222631074, "loss": 11.672, "step": 42263 }, { "epoch": 0.8847023360964582, "grad_norm": 0.27784475684165955, "learning_rate": 0.00016010401992728074, "loss": 11.6584, "step": 42264 }, { "epoch": 0.8847232688604203, "grad_norm": 0.2814127802848816, "learning_rate": 0.00016010226759935757, "loss": 11.6575, "step": 42265 }, { "epoch": 0.8847442016243825, "grad_norm": 0.37356850504875183, "learning_rate": 0.000160100515242542, "loss": 11.6699, "step": 42266 }, { "epoch": 0.8847651343883446, "grad_norm": 0.38493770360946655, "learning_rate": 0.00016009876285683495, "loss": 11.6898, "step": 42267 }, { "epoch": 0.8847860671523068, "grad_norm": 0.27669450640678406, "learning_rate": 0.00016009701044223719, "loss": 11.659, "step": 42268 }, { "epoch": 0.884806999916269, "grad_norm": 0.2776430547237396, "learning_rate": 0.0001600952579987496, "loss": 11.6692, "step": 42269 }, { "epoch": 0.8848279326802311, "grad_norm": 0.3109874129295349, "learning_rate": 0.00016009350552637303, "loss": 11.6686, "step": 42270 }, { "epoch": 0.8848488654441933, "grad_norm": 0.3448595106601715, "learning_rate": 0.00016009175302510832, "loss": 11.6655, "step": 42271 }, { "epoch": 0.8848697982081554, "grad_norm": 0.2891189754009247, "learning_rate": 0.00016009000049495623, "loss": 11.67, "step": 42272 }, { "epoch": 0.8848907309721176, "grad_norm": 0.24458074569702148, "learning_rate": 0.00016008824793591772, "loss": 11.6489, "step": 42273 }, { "epoch": 0.8849116637360797, "grad_norm": 0.381755530834198, "learning_rate": 0.0001600864953479936, "loss": 11.6831, "step": 42274 }, { "epoch": 0.8849325965000419, "grad_norm": 0.25732845067977905, "learning_rate": 0.00016008474273118467, "loss": 11.6695, "step": 42275 }, { "epoch": 0.8849535292640041, "grad_norm": 0.30033522844314575, "learning_rate": 0.0001600829900854918, "loss": 11.6751, "step": 42276 }, { "epoch": 0.8849744620279661, "grad_norm": 0.34391888976097107, "learning_rate": 0.00016008123741091587, "loss": 11.6755, "step": 42277 }, { "epoch": 0.8849953947919283, "grad_norm": 0.34691116213798523, "learning_rate": 0.00016007948470745764, "loss": 11.6767, "step": 42278 }, { "epoch": 0.8850163275558904, "grad_norm": 0.35609015822410583, "learning_rate": 0.00016007773197511803, "loss": 11.6536, "step": 42279 }, { "epoch": 0.8850372603198526, "grad_norm": 0.362346351146698, "learning_rate": 0.00016007597921389782, "loss": 11.6593, "step": 42280 }, { "epoch": 0.8850581930838148, "grad_norm": 0.2994009256362915, "learning_rate": 0.0001600742264237979, "loss": 11.6585, "step": 42281 }, { "epoch": 0.8850791258477769, "grad_norm": 0.355889230966568, "learning_rate": 0.0001600724736048191, "loss": 11.6686, "step": 42282 }, { "epoch": 0.8851000586117391, "grad_norm": 0.2833573818206787, "learning_rate": 0.00016007072075696223, "loss": 11.6435, "step": 42283 }, { "epoch": 0.8851209913757012, "grad_norm": 0.31002292037010193, "learning_rate": 0.0001600689678802282, "loss": 11.6609, "step": 42284 }, { "epoch": 0.8851419241396634, "grad_norm": 0.2900174856185913, "learning_rate": 0.00016006721497461782, "loss": 11.6527, "step": 42285 }, { "epoch": 0.8851628569036255, "grad_norm": 0.3713349997997284, "learning_rate": 0.00016006546204013188, "loss": 11.6834, "step": 42286 }, { "epoch": 0.8851837896675877, "grad_norm": 0.2760840356349945, "learning_rate": 0.0001600637090767713, "loss": 11.6736, "step": 42287 }, { "epoch": 0.8852047224315499, "grad_norm": 0.26865848898887634, "learning_rate": 0.00016006195608453687, "loss": 11.6502, "step": 42288 }, { "epoch": 0.885225655195512, "grad_norm": 0.345101535320282, "learning_rate": 0.00016006020306342947, "loss": 11.6874, "step": 42289 }, { "epoch": 0.8852465879594742, "grad_norm": 0.26490160822868347, "learning_rate": 0.00016005845001344993, "loss": 11.6778, "step": 42290 }, { "epoch": 0.8852675207234363, "grad_norm": 0.2741613984107971, "learning_rate": 0.0001600566969345991, "loss": 11.6742, "step": 42291 }, { "epoch": 0.8852884534873985, "grad_norm": 0.33702340722084045, "learning_rate": 0.0001600549438268778, "loss": 11.6722, "step": 42292 }, { "epoch": 0.8853093862513606, "grad_norm": 0.33581313490867615, "learning_rate": 0.00016005319069028686, "loss": 11.6941, "step": 42293 }, { "epoch": 0.8853303190153228, "grad_norm": 0.3094775676727295, "learning_rate": 0.00016005143752482714, "loss": 11.6655, "step": 42294 }, { "epoch": 0.885351251779285, "grad_norm": 0.38716286420822144, "learning_rate": 0.00016004968433049955, "loss": 11.679, "step": 42295 }, { "epoch": 0.8853721845432471, "grad_norm": 0.42991527915000916, "learning_rate": 0.00016004793110730483, "loss": 11.6509, "step": 42296 }, { "epoch": 0.8853931173072093, "grad_norm": 0.2853865325450897, "learning_rate": 0.00016004617785524388, "loss": 11.667, "step": 42297 }, { "epoch": 0.8854140500711714, "grad_norm": 0.33549293875694275, "learning_rate": 0.00016004442457431752, "loss": 11.6672, "step": 42298 }, { "epoch": 0.8854349828351336, "grad_norm": 0.41483059525489807, "learning_rate": 0.0001600426712645266, "loss": 11.6833, "step": 42299 }, { "epoch": 0.8854559155990958, "grad_norm": 0.30387216806411743, "learning_rate": 0.000160040917925872, "loss": 11.665, "step": 42300 }, { "epoch": 0.8854768483630578, "grad_norm": 0.48798617720603943, "learning_rate": 0.00016003916455835447, "loss": 11.6766, "step": 42301 }, { "epoch": 0.88549778112702, "grad_norm": 0.3063472509384155, "learning_rate": 0.00016003741116197494, "loss": 11.6821, "step": 42302 }, { "epoch": 0.8855187138909821, "grad_norm": 0.27909019589424133, "learning_rate": 0.00016003565773673424, "loss": 11.6439, "step": 42303 }, { "epoch": 0.8855396466549443, "grad_norm": 0.28404539823532104, "learning_rate": 0.0001600339042826332, "loss": 11.6851, "step": 42304 }, { "epoch": 0.8855605794189064, "grad_norm": 0.28637775778770447, "learning_rate": 0.0001600321507996726, "loss": 11.6703, "step": 42305 }, { "epoch": 0.8855815121828686, "grad_norm": 0.35176029801368713, "learning_rate": 0.00016003039728785338, "loss": 11.658, "step": 42306 }, { "epoch": 0.8856024449468308, "grad_norm": 0.2940652668476105, "learning_rate": 0.00016002864374717635, "loss": 11.6794, "step": 42307 }, { "epoch": 0.8856233777107929, "grad_norm": 0.3298811614513397, "learning_rate": 0.00016002689017764236, "loss": 11.6715, "step": 42308 }, { "epoch": 0.8856443104747551, "grad_norm": 0.33947470784187317, "learning_rate": 0.00016002513657925222, "loss": 11.671, "step": 42309 }, { "epoch": 0.8856652432387172, "grad_norm": 0.2817697823047638, "learning_rate": 0.0001600233829520068, "loss": 11.681, "step": 42310 }, { "epoch": 0.8856861760026794, "grad_norm": 0.35832157731056213, "learning_rate": 0.0001600216292959069, "loss": 11.6634, "step": 42311 }, { "epoch": 0.8857071087666415, "grad_norm": 0.2524200677871704, "learning_rate": 0.0001600198756109535, "loss": 11.6527, "step": 42312 }, { "epoch": 0.8857280415306037, "grad_norm": 0.23409418761730194, "learning_rate": 0.00016001812189714724, "loss": 11.6713, "step": 42313 }, { "epoch": 0.8857489742945659, "grad_norm": 0.38384634256362915, "learning_rate": 0.0001600163681544891, "loss": 11.6662, "step": 42314 }, { "epoch": 0.885769907058528, "grad_norm": 0.325187087059021, "learning_rate": 0.00016001461438297992, "loss": 11.6731, "step": 42315 }, { "epoch": 0.8857908398224902, "grad_norm": 0.2760956883430481, "learning_rate": 0.00016001286058262045, "loss": 11.6625, "step": 42316 }, { "epoch": 0.8858117725864523, "grad_norm": 0.33016619086265564, "learning_rate": 0.00016001110675341166, "loss": 11.6765, "step": 42317 }, { "epoch": 0.8858327053504145, "grad_norm": 0.4551398754119873, "learning_rate": 0.00016000935289535432, "loss": 11.6757, "step": 42318 }, { "epoch": 0.8858536381143767, "grad_norm": 0.3225739598274231, "learning_rate": 0.00016000759900844925, "loss": 11.6577, "step": 42319 }, { "epoch": 0.8858745708783388, "grad_norm": 0.2804611921310425, "learning_rate": 0.00016000584509269733, "loss": 11.6592, "step": 42320 }, { "epoch": 0.885895503642301, "grad_norm": 0.38492849469184875, "learning_rate": 0.0001600040911480994, "loss": 11.6934, "step": 42321 }, { "epoch": 0.885916436406263, "grad_norm": 0.3594683110713959, "learning_rate": 0.00016000233717465632, "loss": 11.6729, "step": 42322 }, { "epoch": 0.8859373691702253, "grad_norm": 0.2988319993019104, "learning_rate": 0.0001600005831723689, "loss": 11.671, "step": 42323 }, { "epoch": 0.8859583019341873, "grad_norm": 0.29513779282569885, "learning_rate": 0.000159998829141238, "loss": 11.6799, "step": 42324 }, { "epoch": 0.8859792346981495, "grad_norm": 0.2848796248435974, "learning_rate": 0.00015999707508126441, "loss": 11.6651, "step": 42325 }, { "epoch": 0.8860001674621117, "grad_norm": 0.367052286863327, "learning_rate": 0.00015999532099244908, "loss": 11.6642, "step": 42326 }, { "epoch": 0.8860211002260738, "grad_norm": 0.29682064056396484, "learning_rate": 0.0001599935668747928, "loss": 11.6638, "step": 42327 }, { "epoch": 0.886042032990036, "grad_norm": 0.3117021322250366, "learning_rate": 0.0001599918127282964, "loss": 11.6552, "step": 42328 }, { "epoch": 0.8860629657539981, "grad_norm": 0.28485533595085144, "learning_rate": 0.00015999005855296073, "loss": 11.6604, "step": 42329 }, { "epoch": 0.8860838985179603, "grad_norm": 0.3497246503829956, "learning_rate": 0.00015998830434878664, "loss": 11.6622, "step": 42330 }, { "epoch": 0.8861048312819224, "grad_norm": 0.31627213954925537, "learning_rate": 0.00015998655011577496, "loss": 11.6817, "step": 42331 }, { "epoch": 0.8861257640458846, "grad_norm": 0.3225245177745819, "learning_rate": 0.00015998479585392655, "loss": 11.6585, "step": 42332 }, { "epoch": 0.8861466968098468, "grad_norm": 0.28597530722618103, "learning_rate": 0.00015998304156324226, "loss": 11.6741, "step": 42333 }, { "epoch": 0.8861676295738089, "grad_norm": 0.2868739366531372, "learning_rate": 0.0001599812872437229, "loss": 11.6535, "step": 42334 }, { "epoch": 0.8861885623377711, "grad_norm": 0.30955907702445984, "learning_rate": 0.00015997953289536933, "loss": 11.6561, "step": 42335 }, { "epoch": 0.8862094951017332, "grad_norm": 0.31093570590019226, "learning_rate": 0.0001599777785181824, "loss": 11.6728, "step": 42336 }, { "epoch": 0.8862304278656954, "grad_norm": 0.3100398778915405, "learning_rate": 0.00015997602411216296, "loss": 11.6682, "step": 42337 }, { "epoch": 0.8862513606296576, "grad_norm": 0.2813679277896881, "learning_rate": 0.00015997426967731185, "loss": 11.6662, "step": 42338 }, { "epoch": 0.8862722933936197, "grad_norm": 0.2918586730957031, "learning_rate": 0.0001599725152136299, "loss": 11.6539, "step": 42339 }, { "epoch": 0.8862932261575819, "grad_norm": 0.2802736461162567, "learning_rate": 0.00015997076072111796, "loss": 11.6666, "step": 42340 }, { "epoch": 0.886314158921544, "grad_norm": 0.3539721667766571, "learning_rate": 0.00015996900619977687, "loss": 11.6723, "step": 42341 }, { "epoch": 0.8863350916855062, "grad_norm": 0.3329099118709564, "learning_rate": 0.00015996725164960747, "loss": 11.6652, "step": 42342 }, { "epoch": 0.8863560244494683, "grad_norm": 0.2911486327648163, "learning_rate": 0.0001599654970706106, "loss": 11.6827, "step": 42343 }, { "epoch": 0.8863769572134305, "grad_norm": 0.36504554748535156, "learning_rate": 0.00015996374246278716, "loss": 11.6973, "step": 42344 }, { "epoch": 0.8863978899773927, "grad_norm": 0.34253981709480286, "learning_rate": 0.0001599619878261379, "loss": 11.6717, "step": 42345 }, { "epoch": 0.8864188227413548, "grad_norm": 0.3249973952770233, "learning_rate": 0.00015996023316066374, "loss": 11.6816, "step": 42346 }, { "epoch": 0.886439755505317, "grad_norm": 0.30736273527145386, "learning_rate": 0.00015995847846636546, "loss": 11.67, "step": 42347 }, { "epoch": 0.886460688269279, "grad_norm": 0.2953934073448181, "learning_rate": 0.00015995672374324396, "loss": 11.648, "step": 42348 }, { "epoch": 0.8864816210332412, "grad_norm": 0.2767072021961212, "learning_rate": 0.00015995496899130006, "loss": 11.6855, "step": 42349 }, { "epoch": 0.8865025537972033, "grad_norm": 0.3391656279563904, "learning_rate": 0.00015995321421053462, "loss": 11.6529, "step": 42350 }, { "epoch": 0.8865234865611655, "grad_norm": 0.33439892530441284, "learning_rate": 0.00015995145940094846, "loss": 11.6781, "step": 42351 }, { "epoch": 0.8865444193251277, "grad_norm": 0.3535902798175812, "learning_rate": 0.00015994970456254245, "loss": 11.6737, "step": 42352 }, { "epoch": 0.8865653520890898, "grad_norm": 0.2678808569908142, "learning_rate": 0.0001599479496953174, "loss": 11.6507, "step": 42353 }, { "epoch": 0.886586284853052, "grad_norm": 0.2753027379512787, "learning_rate": 0.00015994619479927415, "loss": 11.6715, "step": 42354 }, { "epoch": 0.8866072176170141, "grad_norm": 0.2955201268196106, "learning_rate": 0.00015994443987441356, "loss": 11.6672, "step": 42355 }, { "epoch": 0.8866281503809763, "grad_norm": 0.26702770590782166, "learning_rate": 0.0001599426849207365, "loss": 11.677, "step": 42356 }, { "epoch": 0.8866490831449385, "grad_norm": 0.3399888873100281, "learning_rate": 0.00015994092993824378, "loss": 11.6646, "step": 42357 }, { "epoch": 0.8866700159089006, "grad_norm": 0.329888254404068, "learning_rate": 0.00015993917492693625, "loss": 11.6662, "step": 42358 }, { "epoch": 0.8866909486728628, "grad_norm": 0.28754913806915283, "learning_rate": 0.00015993741988681477, "loss": 11.6739, "step": 42359 }, { "epoch": 0.8867118814368249, "grad_norm": 0.30206960439682007, "learning_rate": 0.00015993566481788016, "loss": 11.6638, "step": 42360 }, { "epoch": 0.8867328142007871, "grad_norm": 0.2759559154510498, "learning_rate": 0.00015993390972013328, "loss": 11.6775, "step": 42361 }, { "epoch": 0.8867537469647492, "grad_norm": 0.3098381757736206, "learning_rate": 0.00015993215459357495, "loss": 11.6699, "step": 42362 }, { "epoch": 0.8867746797287114, "grad_norm": 0.32572999596595764, "learning_rate": 0.00015993039943820606, "loss": 11.6732, "step": 42363 }, { "epoch": 0.8867956124926736, "grad_norm": 0.3056541085243225, "learning_rate": 0.0001599286442540274, "loss": 11.6668, "step": 42364 }, { "epoch": 0.8868165452566357, "grad_norm": 0.31074413657188416, "learning_rate": 0.00015992688904103988, "loss": 11.6603, "step": 42365 }, { "epoch": 0.8868374780205979, "grad_norm": 0.33193784952163696, "learning_rate": 0.00015992513379924428, "loss": 11.6613, "step": 42366 }, { "epoch": 0.88685841078456, "grad_norm": 0.30377909541130066, "learning_rate": 0.00015992337852864145, "loss": 11.6651, "step": 42367 }, { "epoch": 0.8868793435485222, "grad_norm": 0.3468880355358124, "learning_rate": 0.0001599216232292323, "loss": 11.679, "step": 42368 }, { "epoch": 0.8869002763124842, "grad_norm": 0.3410710394382477, "learning_rate": 0.00015991986790101756, "loss": 11.687, "step": 42369 }, { "epoch": 0.8869212090764464, "grad_norm": 0.30660179257392883, "learning_rate": 0.00015991811254399818, "loss": 11.6771, "step": 42370 }, { "epoch": 0.8869421418404086, "grad_norm": 0.2933928072452545, "learning_rate": 0.00015991635715817493, "loss": 11.6671, "step": 42371 }, { "epoch": 0.8869630746043707, "grad_norm": 0.41602587699890137, "learning_rate": 0.00015991460174354872, "loss": 11.6693, "step": 42372 }, { "epoch": 0.8869840073683329, "grad_norm": 0.32462576031684875, "learning_rate": 0.00015991284630012035, "loss": 11.6785, "step": 42373 }, { "epoch": 0.887004940132295, "grad_norm": 0.2881447970867157, "learning_rate": 0.0001599110908278907, "loss": 11.6733, "step": 42374 }, { "epoch": 0.8870258728962572, "grad_norm": 0.40705063939094543, "learning_rate": 0.00015990933532686053, "loss": 11.6442, "step": 42375 }, { "epoch": 0.8870468056602194, "grad_norm": 0.4213666021823883, "learning_rate": 0.0001599075797970308, "loss": 11.6733, "step": 42376 }, { "epoch": 0.8870677384241815, "grad_norm": 0.2785756587982178, "learning_rate": 0.00015990582423840226, "loss": 11.6822, "step": 42377 }, { "epoch": 0.8870886711881437, "grad_norm": 0.3396351933479309, "learning_rate": 0.0001599040686509758, "loss": 11.6753, "step": 42378 }, { "epoch": 0.8871096039521058, "grad_norm": 0.3814968168735504, "learning_rate": 0.00015990231303475224, "loss": 11.6702, "step": 42379 }, { "epoch": 0.887130536716068, "grad_norm": 0.2677001953125, "learning_rate": 0.0001599005573897325, "loss": 11.6502, "step": 42380 }, { "epoch": 0.8871514694800301, "grad_norm": 0.377106636762619, "learning_rate": 0.00015989880171591727, "loss": 11.6818, "step": 42381 }, { "epoch": 0.8871724022439923, "grad_norm": 0.30071815848350525, "learning_rate": 0.00015989704601330752, "loss": 11.6563, "step": 42382 }, { "epoch": 0.8871933350079545, "grad_norm": 0.22586242854595184, "learning_rate": 0.00015989529028190408, "loss": 11.6706, "step": 42383 }, { "epoch": 0.8872142677719166, "grad_norm": 0.3037024736404419, "learning_rate": 0.00015989353452170777, "loss": 11.6784, "step": 42384 }, { "epoch": 0.8872352005358788, "grad_norm": 0.34840893745422363, "learning_rate": 0.00015989177873271943, "loss": 11.6679, "step": 42385 }, { "epoch": 0.8872561332998409, "grad_norm": 0.28099027276039124, "learning_rate": 0.0001598900229149399, "loss": 11.6462, "step": 42386 }, { "epoch": 0.8872770660638031, "grad_norm": 0.35747647285461426, "learning_rate": 0.00015988826706837003, "loss": 11.6708, "step": 42387 }, { "epoch": 0.8872979988277652, "grad_norm": 0.3049188554286957, "learning_rate": 0.0001598865111930107, "loss": 11.6742, "step": 42388 }, { "epoch": 0.8873189315917274, "grad_norm": 0.33333906531333923, "learning_rate": 0.0001598847552888627, "loss": 11.6539, "step": 42389 }, { "epoch": 0.8873398643556896, "grad_norm": 0.3501106798648834, "learning_rate": 0.00015988299935592695, "loss": 11.6737, "step": 42390 }, { "epoch": 0.8873607971196517, "grad_norm": 0.3254586160182953, "learning_rate": 0.00015988124339420416, "loss": 11.664, "step": 42391 }, { "epoch": 0.8873817298836139, "grad_norm": 0.4180474877357483, "learning_rate": 0.00015987948740369532, "loss": 11.6576, "step": 42392 }, { "epoch": 0.887402662647576, "grad_norm": 0.36455702781677246, "learning_rate": 0.00015987773138440118, "loss": 11.6715, "step": 42393 }, { "epoch": 0.8874235954115381, "grad_norm": 0.29515159130096436, "learning_rate": 0.0001598759753363226, "loss": 11.6647, "step": 42394 }, { "epoch": 0.8874445281755003, "grad_norm": 0.3315224051475525, "learning_rate": 0.00015987421925946048, "loss": 11.6853, "step": 42395 }, { "epoch": 0.8874654609394624, "grad_norm": 0.3639940321445465, "learning_rate": 0.0001598724631538156, "loss": 11.6608, "step": 42396 }, { "epoch": 0.8874863937034246, "grad_norm": 0.3181955814361572, "learning_rate": 0.00015987070701938883, "loss": 11.6702, "step": 42397 }, { "epoch": 0.8875073264673867, "grad_norm": 0.283936470746994, "learning_rate": 0.00015986895085618103, "loss": 11.6673, "step": 42398 }, { "epoch": 0.8875282592313489, "grad_norm": 0.28940895199775696, "learning_rate": 0.00015986719466419297, "loss": 11.6654, "step": 42399 }, { "epoch": 0.887549191995311, "grad_norm": 0.4217894375324249, "learning_rate": 0.0001598654384434256, "loss": 11.6586, "step": 42400 }, { "epoch": 0.8875701247592732, "grad_norm": 0.3217822313308716, "learning_rate": 0.0001598636821938797, "loss": 11.6785, "step": 42401 }, { "epoch": 0.8875910575232354, "grad_norm": 0.27916982769966125, "learning_rate": 0.00015986192591555613, "loss": 11.6774, "step": 42402 }, { "epoch": 0.8876119902871975, "grad_norm": 0.3480215072631836, "learning_rate": 0.00015986016960845576, "loss": 11.6908, "step": 42403 }, { "epoch": 0.8876329230511597, "grad_norm": 0.35013818740844727, "learning_rate": 0.00015985841327257935, "loss": 11.6793, "step": 42404 }, { "epoch": 0.8876538558151218, "grad_norm": 0.3142088055610657, "learning_rate": 0.00015985665690792784, "loss": 11.6582, "step": 42405 }, { "epoch": 0.887674788579084, "grad_norm": 0.28012457489967346, "learning_rate": 0.00015985490051450202, "loss": 11.6577, "step": 42406 }, { "epoch": 0.8876957213430461, "grad_norm": 0.27488940954208374, "learning_rate": 0.00015985314409230276, "loss": 11.6545, "step": 42407 }, { "epoch": 0.8877166541070083, "grad_norm": 0.340498685836792, "learning_rate": 0.00015985138764133087, "loss": 11.6863, "step": 42408 }, { "epoch": 0.8877375868709705, "grad_norm": 0.48869386315345764, "learning_rate": 0.00015984963116158726, "loss": 11.64, "step": 42409 }, { "epoch": 0.8877585196349326, "grad_norm": 0.33425387740135193, "learning_rate": 0.00015984787465307272, "loss": 11.669, "step": 42410 }, { "epoch": 0.8877794523988948, "grad_norm": 0.3536413311958313, "learning_rate": 0.00015984611811578807, "loss": 11.6664, "step": 42411 }, { "epoch": 0.8878003851628569, "grad_norm": 0.2923223376274109, "learning_rate": 0.00015984436154973427, "loss": 11.6771, "step": 42412 }, { "epoch": 0.8878213179268191, "grad_norm": 0.29636818170547485, "learning_rate": 0.000159842604954912, "loss": 11.6667, "step": 42413 }, { "epoch": 0.8878422506907813, "grad_norm": 0.3418813943862915, "learning_rate": 0.00015984084833132225, "loss": 11.67, "step": 42414 }, { "epoch": 0.8878631834547434, "grad_norm": 0.3341364562511444, "learning_rate": 0.00015983909167896577, "loss": 11.6812, "step": 42415 }, { "epoch": 0.8878841162187056, "grad_norm": 0.3195858895778656, "learning_rate": 0.00015983733499784342, "loss": 11.6813, "step": 42416 }, { "epoch": 0.8879050489826676, "grad_norm": 0.3145981431007385, "learning_rate": 0.0001598355782879561, "loss": 11.6748, "step": 42417 }, { "epoch": 0.8879259817466298, "grad_norm": 0.34356334805488586, "learning_rate": 0.00015983382154930462, "loss": 11.682, "step": 42418 }, { "epoch": 0.8879469145105919, "grad_norm": 0.4620996415615082, "learning_rate": 0.00015983206478188984, "loss": 11.6832, "step": 42419 }, { "epoch": 0.8879678472745541, "grad_norm": 0.3640258014202118, "learning_rate": 0.00015983030798571256, "loss": 11.6712, "step": 42420 }, { "epoch": 0.8879887800385163, "grad_norm": 0.39923763275146484, "learning_rate": 0.00015982855116077364, "loss": 11.6793, "step": 42421 }, { "epoch": 0.8880097128024784, "grad_norm": 0.3759700059890747, "learning_rate": 0.00015982679430707396, "loss": 11.6817, "step": 42422 }, { "epoch": 0.8880306455664406, "grad_norm": 0.4010407030582428, "learning_rate": 0.00015982503742461432, "loss": 11.6727, "step": 42423 }, { "epoch": 0.8880515783304027, "grad_norm": 0.2669910192489624, "learning_rate": 0.00015982328051339562, "loss": 11.6793, "step": 42424 }, { "epoch": 0.8880725110943649, "grad_norm": 0.3500312864780426, "learning_rate": 0.00015982152357341863, "loss": 11.6654, "step": 42425 }, { "epoch": 0.888093443858327, "grad_norm": 0.3404151499271393, "learning_rate": 0.00015981976660468426, "loss": 11.6659, "step": 42426 }, { "epoch": 0.8881143766222892, "grad_norm": 0.26692327857017517, "learning_rate": 0.00015981800960719334, "loss": 11.6642, "step": 42427 }, { "epoch": 0.8881353093862514, "grad_norm": 0.4141881763935089, "learning_rate": 0.00015981625258094665, "loss": 11.6883, "step": 42428 }, { "epoch": 0.8881562421502135, "grad_norm": 0.3335327208042145, "learning_rate": 0.00015981449552594516, "loss": 11.6796, "step": 42429 }, { "epoch": 0.8881771749141757, "grad_norm": 0.3712586760520935, "learning_rate": 0.0001598127384421896, "loss": 11.6647, "step": 42430 }, { "epoch": 0.8881981076781378, "grad_norm": 0.2599101662635803, "learning_rate": 0.00015981098132968088, "loss": 11.6498, "step": 42431 }, { "epoch": 0.8882190404421, "grad_norm": 0.263412207365036, "learning_rate": 0.0001598092241884198, "loss": 11.6606, "step": 42432 }, { "epoch": 0.8882399732060622, "grad_norm": 0.32553645968437195, "learning_rate": 0.00015980746701840722, "loss": 11.6555, "step": 42433 }, { "epoch": 0.8882609059700243, "grad_norm": 0.3810827434062958, "learning_rate": 0.000159805709819644, "loss": 11.662, "step": 42434 }, { "epoch": 0.8882818387339865, "grad_norm": 0.2559218108654022, "learning_rate": 0.00015980395259213102, "loss": 11.674, "step": 42435 }, { "epoch": 0.8883027714979486, "grad_norm": 0.35226839780807495, "learning_rate": 0.00015980219533586905, "loss": 11.6769, "step": 42436 }, { "epoch": 0.8883237042619108, "grad_norm": 0.3716288208961487, "learning_rate": 0.00015980043805085894, "loss": 11.6763, "step": 42437 }, { "epoch": 0.8883446370258729, "grad_norm": 0.3334673345088959, "learning_rate": 0.0001597986807371016, "loss": 11.6586, "step": 42438 }, { "epoch": 0.888365569789835, "grad_norm": 0.3101648688316345, "learning_rate": 0.00015979692339459782, "loss": 11.6722, "step": 42439 }, { "epoch": 0.8883865025537973, "grad_norm": 0.31431758403778076, "learning_rate": 0.00015979516602334845, "loss": 11.6761, "step": 42440 }, { "epoch": 0.8884074353177593, "grad_norm": 0.32355377078056335, "learning_rate": 0.00015979340862335434, "loss": 11.6504, "step": 42441 }, { "epoch": 0.8884283680817215, "grad_norm": 0.3351548910140991, "learning_rate": 0.00015979165119461635, "loss": 11.6674, "step": 42442 }, { "epoch": 0.8884493008456836, "grad_norm": 0.3229614496231079, "learning_rate": 0.00015978989373713532, "loss": 11.6687, "step": 42443 }, { "epoch": 0.8884702336096458, "grad_norm": 0.2867116332054138, "learning_rate": 0.00015978813625091212, "loss": 11.6667, "step": 42444 }, { "epoch": 0.8884911663736079, "grad_norm": 0.32860973477363586, "learning_rate": 0.00015978637873594753, "loss": 11.6918, "step": 42445 }, { "epoch": 0.8885120991375701, "grad_norm": 0.29417532682418823, "learning_rate": 0.00015978462119224242, "loss": 11.6621, "step": 42446 }, { "epoch": 0.8885330319015323, "grad_norm": 0.28859439492225647, "learning_rate": 0.0001597828636197977, "loss": 11.6659, "step": 42447 }, { "epoch": 0.8885539646654944, "grad_norm": 0.3770142197608948, "learning_rate": 0.0001597811060186141, "loss": 11.6767, "step": 42448 }, { "epoch": 0.8885748974294566, "grad_norm": 0.3226976692676544, "learning_rate": 0.00015977934838869255, "loss": 11.6541, "step": 42449 }, { "epoch": 0.8885958301934187, "grad_norm": 0.2382972687482834, "learning_rate": 0.00015977759073003384, "loss": 11.6732, "step": 42450 }, { "epoch": 0.8886167629573809, "grad_norm": 0.3193766474723816, "learning_rate": 0.00015977583304263888, "loss": 11.6728, "step": 42451 }, { "epoch": 0.8886376957213431, "grad_norm": 0.33064964413642883, "learning_rate": 0.00015977407532650846, "loss": 11.6773, "step": 42452 }, { "epoch": 0.8886586284853052, "grad_norm": 0.32426124811172485, "learning_rate": 0.00015977231758164347, "loss": 11.6783, "step": 42453 }, { "epoch": 0.8886795612492674, "grad_norm": 0.3224082887172699, "learning_rate": 0.0001597705598080447, "loss": 11.6742, "step": 42454 }, { "epoch": 0.8887004940132295, "grad_norm": 0.3175310790538788, "learning_rate": 0.00015976880200571303, "loss": 11.6512, "step": 42455 }, { "epoch": 0.8887214267771917, "grad_norm": 0.2786644995212555, "learning_rate": 0.00015976704417464932, "loss": 11.6789, "step": 42456 }, { "epoch": 0.8887423595411538, "grad_norm": 0.2978697419166565, "learning_rate": 0.00015976528631485434, "loss": 11.6621, "step": 42457 }, { "epoch": 0.888763292305116, "grad_norm": 0.33051297068595886, "learning_rate": 0.00015976352842632906, "loss": 11.6567, "step": 42458 }, { "epoch": 0.8887842250690782, "grad_norm": 0.38316503167152405, "learning_rate": 0.0001597617705090742, "loss": 11.6738, "step": 42459 }, { "epoch": 0.8888051578330403, "grad_norm": 0.2901427447795868, "learning_rate": 0.00015976001256309068, "loss": 11.6588, "step": 42460 }, { "epoch": 0.8888260905970025, "grad_norm": 0.2986246943473816, "learning_rate": 0.00015975825458837935, "loss": 11.6492, "step": 42461 }, { "epoch": 0.8888470233609645, "grad_norm": 0.3038817346096039, "learning_rate": 0.000159756496584941, "loss": 11.6754, "step": 42462 }, { "epoch": 0.8888679561249268, "grad_norm": 0.31143563985824585, "learning_rate": 0.0001597547385527765, "loss": 11.6634, "step": 42463 }, { "epoch": 0.8888888888888888, "grad_norm": 0.2848186194896698, "learning_rate": 0.0001597529804918867, "loss": 11.6631, "step": 42464 }, { "epoch": 0.888909821652851, "grad_norm": 0.31623828411102295, "learning_rate": 0.00015975122240227246, "loss": 11.6599, "step": 42465 }, { "epoch": 0.8889307544168132, "grad_norm": 0.40203291177749634, "learning_rate": 0.0001597494642839346, "loss": 11.6728, "step": 42466 }, { "epoch": 0.8889516871807753, "grad_norm": 0.2789837121963501, "learning_rate": 0.00015974770613687395, "loss": 11.6746, "step": 42467 }, { "epoch": 0.8889726199447375, "grad_norm": 0.2658753991127014, "learning_rate": 0.00015974594796109144, "loss": 11.6707, "step": 42468 }, { "epoch": 0.8889935527086996, "grad_norm": 0.3306282162666321, "learning_rate": 0.0001597441897565878, "loss": 11.677, "step": 42469 }, { "epoch": 0.8890144854726618, "grad_norm": 0.24627219140529633, "learning_rate": 0.00015974243152336393, "loss": 11.6634, "step": 42470 }, { "epoch": 0.8890354182366239, "grad_norm": 0.3464285135269165, "learning_rate": 0.00015974067326142075, "loss": 11.679, "step": 42471 }, { "epoch": 0.8890563510005861, "grad_norm": 0.30204975605010986, "learning_rate": 0.00015973891497075897, "loss": 11.6681, "step": 42472 }, { "epoch": 0.8890772837645483, "grad_norm": 0.40584298968315125, "learning_rate": 0.0001597371566513795, "loss": 11.6661, "step": 42473 }, { "epoch": 0.8890982165285104, "grad_norm": 0.31039586663246155, "learning_rate": 0.0001597353983032832, "loss": 11.6735, "step": 42474 }, { "epoch": 0.8891191492924726, "grad_norm": 0.3429683744907379, "learning_rate": 0.00015973363992647087, "loss": 11.6568, "step": 42475 }, { "epoch": 0.8891400820564347, "grad_norm": 0.2942798435688019, "learning_rate": 0.0001597318815209434, "loss": 11.6559, "step": 42476 }, { "epoch": 0.8891610148203969, "grad_norm": 0.25965616106987, "learning_rate": 0.0001597301230867016, "loss": 11.684, "step": 42477 }, { "epoch": 0.8891819475843591, "grad_norm": 0.30632761120796204, "learning_rate": 0.00015972836462374636, "loss": 11.6755, "step": 42478 }, { "epoch": 0.8892028803483212, "grad_norm": 0.23732908070087433, "learning_rate": 0.00015972660613207848, "loss": 11.6489, "step": 42479 }, { "epoch": 0.8892238131122834, "grad_norm": 0.291422575712204, "learning_rate": 0.00015972484761169885, "loss": 11.6562, "step": 42480 }, { "epoch": 0.8892447458762455, "grad_norm": 0.34405750036239624, "learning_rate": 0.00015972308906260827, "loss": 11.6673, "step": 42481 }, { "epoch": 0.8892656786402077, "grad_norm": 0.3064914643764496, "learning_rate": 0.0001597213304848076, "loss": 11.6556, "step": 42482 }, { "epoch": 0.8892866114041698, "grad_norm": 0.2674451172351837, "learning_rate": 0.00015971957187829768, "loss": 11.6584, "step": 42483 }, { "epoch": 0.889307544168132, "grad_norm": 0.3637392520904541, "learning_rate": 0.0001597178132430794, "loss": 11.6765, "step": 42484 }, { "epoch": 0.8893284769320942, "grad_norm": 0.26758310198783875, "learning_rate": 0.00015971605457915352, "loss": 11.6563, "step": 42485 }, { "epoch": 0.8893494096960562, "grad_norm": 0.31554466485977173, "learning_rate": 0.00015971429588652097, "loss": 11.6614, "step": 42486 }, { "epoch": 0.8893703424600184, "grad_norm": 0.3051382303237915, "learning_rate": 0.00015971253716518257, "loss": 11.6601, "step": 42487 }, { "epoch": 0.8893912752239805, "grad_norm": 0.2893187701702118, "learning_rate": 0.00015971077841513912, "loss": 11.6681, "step": 42488 }, { "epoch": 0.8894122079879427, "grad_norm": 0.2932512164115906, "learning_rate": 0.00015970901963639154, "loss": 11.6761, "step": 42489 }, { "epoch": 0.8894331407519048, "grad_norm": 0.4910019338130951, "learning_rate": 0.00015970726082894062, "loss": 11.675, "step": 42490 }, { "epoch": 0.889454073515867, "grad_norm": 0.3512064218521118, "learning_rate": 0.00015970550199278724, "loss": 11.6654, "step": 42491 }, { "epoch": 0.8894750062798292, "grad_norm": 0.3570026457309723, "learning_rate": 0.00015970374312793218, "loss": 11.6677, "step": 42492 }, { "epoch": 0.8894959390437913, "grad_norm": 0.4075886905193329, "learning_rate": 0.0001597019842343764, "loss": 11.6521, "step": 42493 }, { "epoch": 0.8895168718077535, "grad_norm": 0.3674662411212921, "learning_rate": 0.00015970022531212064, "loss": 11.6753, "step": 42494 }, { "epoch": 0.8895378045717156, "grad_norm": 0.28905341029167175, "learning_rate": 0.0001596984663611658, "loss": 11.681, "step": 42495 }, { "epoch": 0.8895587373356778, "grad_norm": 0.30654728412628174, "learning_rate": 0.00015969670738151268, "loss": 11.6732, "step": 42496 }, { "epoch": 0.88957967009964, "grad_norm": 0.24841473996639252, "learning_rate": 0.0001596949483731622, "loss": 11.6618, "step": 42497 }, { "epoch": 0.8896006028636021, "grad_norm": 0.2937736511230469, "learning_rate": 0.00015969318933611515, "loss": 11.6608, "step": 42498 }, { "epoch": 0.8896215356275643, "grad_norm": 0.27369263768196106, "learning_rate": 0.00015969143027037234, "loss": 11.6715, "step": 42499 }, { "epoch": 0.8896424683915264, "grad_norm": 0.33685728907585144, "learning_rate": 0.00015968967117593473, "loss": 11.6562, "step": 42500 }, { "epoch": 0.8896634011554886, "grad_norm": 0.2599555552005768, "learning_rate": 0.00015968791205280306, "loss": 11.6651, "step": 42501 }, { "epoch": 0.8896843339194507, "grad_norm": 0.33366039395332336, "learning_rate": 0.00015968615290097823, "loss": 11.6613, "step": 42502 }, { "epoch": 0.8897052666834129, "grad_norm": 0.25593528151512146, "learning_rate": 0.00015968439372046107, "loss": 11.6766, "step": 42503 }, { "epoch": 0.8897261994473751, "grad_norm": 0.27870020270347595, "learning_rate": 0.0001596826345112524, "loss": 11.6652, "step": 42504 }, { "epoch": 0.8897471322113372, "grad_norm": 0.3103085458278656, "learning_rate": 0.00015968087527335313, "loss": 11.6669, "step": 42505 }, { "epoch": 0.8897680649752994, "grad_norm": 0.41742411255836487, "learning_rate": 0.00015967911600676406, "loss": 11.6732, "step": 42506 }, { "epoch": 0.8897889977392615, "grad_norm": 0.310639888048172, "learning_rate": 0.00015967735671148606, "loss": 11.6807, "step": 42507 }, { "epoch": 0.8898099305032237, "grad_norm": 0.27536457777023315, "learning_rate": 0.00015967559738751992, "loss": 11.6758, "step": 42508 }, { "epoch": 0.8898308632671857, "grad_norm": 0.3340592682361603, "learning_rate": 0.00015967383803486653, "loss": 11.6873, "step": 42509 }, { "epoch": 0.889851796031148, "grad_norm": 0.35343194007873535, "learning_rate": 0.00015967207865352672, "loss": 11.662, "step": 42510 }, { "epoch": 0.8898727287951101, "grad_norm": 0.3499900698661804, "learning_rate": 0.00015967031924350137, "loss": 11.6637, "step": 42511 }, { "epoch": 0.8898936615590722, "grad_norm": 0.31951838731765747, "learning_rate": 0.0001596685598047913, "loss": 11.6787, "step": 42512 }, { "epoch": 0.8899145943230344, "grad_norm": 0.3992701470851898, "learning_rate": 0.00015966680033739733, "loss": 11.6716, "step": 42513 }, { "epoch": 0.8899355270869965, "grad_norm": 0.34445008635520935, "learning_rate": 0.00015966504084132037, "loss": 11.6837, "step": 42514 }, { "epoch": 0.8899564598509587, "grad_norm": 0.2644634544849396, "learning_rate": 0.00015966328131656124, "loss": 11.6589, "step": 42515 }, { "epoch": 0.8899773926149209, "grad_norm": 0.3052677512168884, "learning_rate": 0.00015966152176312073, "loss": 11.6585, "step": 42516 }, { "epoch": 0.889998325378883, "grad_norm": 0.30210942029953003, "learning_rate": 0.00015965976218099975, "loss": 11.6605, "step": 42517 }, { "epoch": 0.8900192581428452, "grad_norm": 0.41760313510894775, "learning_rate": 0.00015965800257019912, "loss": 11.6857, "step": 42518 }, { "epoch": 0.8900401909068073, "grad_norm": 0.2407306432723999, "learning_rate": 0.0001596562429307197, "loss": 11.6733, "step": 42519 }, { "epoch": 0.8900611236707695, "grad_norm": 0.28794580698013306, "learning_rate": 0.00015965448326256233, "loss": 11.6624, "step": 42520 }, { "epoch": 0.8900820564347316, "grad_norm": 0.30255910754203796, "learning_rate": 0.00015965272356572788, "loss": 11.6603, "step": 42521 }, { "epoch": 0.8901029891986938, "grad_norm": 0.409547358751297, "learning_rate": 0.00015965096384021714, "loss": 11.6695, "step": 42522 }, { "epoch": 0.890123921962656, "grad_norm": 0.28888779878616333, "learning_rate": 0.000159649204086031, "loss": 11.6695, "step": 42523 }, { "epoch": 0.8901448547266181, "grad_norm": 0.32288163900375366, "learning_rate": 0.00015964744430317028, "loss": 11.6917, "step": 42524 }, { "epoch": 0.8901657874905803, "grad_norm": 0.3430628776550293, "learning_rate": 0.00015964568449163582, "loss": 11.6684, "step": 42525 }, { "epoch": 0.8901867202545424, "grad_norm": 0.31259000301361084, "learning_rate": 0.0001596439246514285, "loss": 11.6732, "step": 42526 }, { "epoch": 0.8902076530185046, "grad_norm": 0.35544371604919434, "learning_rate": 0.00015964216478254917, "loss": 11.6659, "step": 42527 }, { "epoch": 0.8902285857824667, "grad_norm": 0.3710440397262573, "learning_rate": 0.00015964040488499864, "loss": 11.6663, "step": 42528 }, { "epoch": 0.8902495185464289, "grad_norm": 0.2679177522659302, "learning_rate": 0.00015963864495877777, "loss": 11.6558, "step": 42529 }, { "epoch": 0.8902704513103911, "grad_norm": 0.27257153391838074, "learning_rate": 0.0001596368850038874, "loss": 11.6781, "step": 42530 }, { "epoch": 0.8902913840743532, "grad_norm": 0.3006438612937927, "learning_rate": 0.00015963512502032838, "loss": 11.6657, "step": 42531 }, { "epoch": 0.8903123168383154, "grad_norm": 0.34454646706581116, "learning_rate": 0.0001596333650081016, "loss": 11.6473, "step": 42532 }, { "epoch": 0.8903332496022774, "grad_norm": 0.41604354977607727, "learning_rate": 0.00015963160496720784, "loss": 11.6836, "step": 42533 }, { "epoch": 0.8903541823662396, "grad_norm": 0.31208446621894836, "learning_rate": 0.00015962984489764796, "loss": 11.6663, "step": 42534 }, { "epoch": 0.8903751151302018, "grad_norm": 0.3239724934101105, "learning_rate": 0.00015962808479942284, "loss": 11.6571, "step": 42535 }, { "epoch": 0.8903960478941639, "grad_norm": 0.3926127851009369, "learning_rate": 0.00015962632467253328, "loss": 11.6622, "step": 42536 }, { "epoch": 0.8904169806581261, "grad_norm": 0.34492531418800354, "learning_rate": 0.00015962456451698017, "loss": 11.6738, "step": 42537 }, { "epoch": 0.8904379134220882, "grad_norm": 0.3383818566799164, "learning_rate": 0.00015962280433276434, "loss": 11.6607, "step": 42538 }, { "epoch": 0.8904588461860504, "grad_norm": 0.25783345103263855, "learning_rate": 0.00015962104411988665, "loss": 11.6584, "step": 42539 }, { "epoch": 0.8904797789500125, "grad_norm": 0.28580284118652344, "learning_rate": 0.0001596192838783479, "loss": 11.661, "step": 42540 }, { "epoch": 0.8905007117139747, "grad_norm": 0.3014243543148041, "learning_rate": 0.00015961752360814897, "loss": 11.6707, "step": 42541 }, { "epoch": 0.8905216444779369, "grad_norm": 0.28081411123275757, "learning_rate": 0.00015961576330929072, "loss": 11.6808, "step": 42542 }, { "epoch": 0.890542577241899, "grad_norm": 0.3158896863460541, "learning_rate": 0.00015961400298177396, "loss": 11.6741, "step": 42543 }, { "epoch": 0.8905635100058612, "grad_norm": 0.33278369903564453, "learning_rate": 0.00015961224262559955, "loss": 11.6862, "step": 42544 }, { "epoch": 0.8905844427698233, "grad_norm": 0.26611050963401794, "learning_rate": 0.00015961048224076834, "loss": 11.6689, "step": 42545 }, { "epoch": 0.8906053755337855, "grad_norm": 0.2562503516674042, "learning_rate": 0.00015960872182728118, "loss": 11.661, "step": 42546 }, { "epoch": 0.8906263082977476, "grad_norm": 0.321127325296402, "learning_rate": 0.00015960696138513894, "loss": 11.6674, "step": 42547 }, { "epoch": 0.8906472410617098, "grad_norm": 0.3622608780860901, "learning_rate": 0.0001596052009143424, "loss": 11.6653, "step": 42548 }, { "epoch": 0.890668173825672, "grad_norm": 0.29313722252845764, "learning_rate": 0.00015960344041489246, "loss": 11.6596, "step": 42549 }, { "epoch": 0.8906891065896341, "grad_norm": 0.30930522084236145, "learning_rate": 0.00015960167988678995, "loss": 11.663, "step": 42550 }, { "epoch": 0.8907100393535963, "grad_norm": 0.3521932363510132, "learning_rate": 0.00015959991933003572, "loss": 11.6711, "step": 42551 }, { "epoch": 0.8907309721175584, "grad_norm": 0.363680362701416, "learning_rate": 0.0001595981587446306, "loss": 11.6615, "step": 42552 }, { "epoch": 0.8907519048815206, "grad_norm": 0.4059275984764099, "learning_rate": 0.00015959639813057549, "loss": 11.6745, "step": 42553 }, { "epoch": 0.8907728376454828, "grad_norm": 0.3447102904319763, "learning_rate": 0.00015959463748787118, "loss": 11.6678, "step": 42554 }, { "epoch": 0.8907937704094449, "grad_norm": 0.3242432475090027, "learning_rate": 0.00015959287681651852, "loss": 11.6672, "step": 42555 }, { "epoch": 0.890814703173407, "grad_norm": 0.31129708886146545, "learning_rate": 0.0001595911161165184, "loss": 11.6732, "step": 42556 }, { "epoch": 0.8908356359373691, "grad_norm": 0.39052820205688477, "learning_rate": 0.00015958935538787158, "loss": 11.6531, "step": 42557 }, { "epoch": 0.8908565687013313, "grad_norm": 0.322465717792511, "learning_rate": 0.00015958759463057903, "loss": 11.6815, "step": 42558 }, { "epoch": 0.8908775014652934, "grad_norm": 0.29889845848083496, "learning_rate": 0.00015958583384464148, "loss": 11.6796, "step": 42559 }, { "epoch": 0.8908984342292556, "grad_norm": 0.32032591104507446, "learning_rate": 0.00015958407303005983, "loss": 11.6597, "step": 42560 }, { "epoch": 0.8909193669932178, "grad_norm": 0.32090139389038086, "learning_rate": 0.00015958231218683494, "loss": 11.6855, "step": 42561 }, { "epoch": 0.8909402997571799, "grad_norm": 0.2939845323562622, "learning_rate": 0.0001595805513149676, "loss": 11.6647, "step": 42562 }, { "epoch": 0.8909612325211421, "grad_norm": 0.3825523853302002, "learning_rate": 0.00015957879041445875, "loss": 11.6644, "step": 42563 }, { "epoch": 0.8909821652851042, "grad_norm": 0.2578670084476471, "learning_rate": 0.00015957702948530915, "loss": 11.6601, "step": 42564 }, { "epoch": 0.8910030980490664, "grad_norm": 0.2854200601577759, "learning_rate": 0.0001595752685275197, "loss": 11.6735, "step": 42565 }, { "epoch": 0.8910240308130285, "grad_norm": 0.3403259813785553, "learning_rate": 0.00015957350754109123, "loss": 11.6705, "step": 42566 }, { "epoch": 0.8910449635769907, "grad_norm": 0.2756156027317047, "learning_rate": 0.00015957174652602454, "loss": 11.6723, "step": 42567 }, { "epoch": 0.8910658963409529, "grad_norm": 0.264645516872406, "learning_rate": 0.00015956998548232053, "loss": 11.6784, "step": 42568 }, { "epoch": 0.891086829104915, "grad_norm": 0.31128406524658203, "learning_rate": 0.00015956822440998006, "loss": 11.6793, "step": 42569 }, { "epoch": 0.8911077618688772, "grad_norm": 0.25796598196029663, "learning_rate": 0.0001595664633090039, "loss": 11.6602, "step": 42570 }, { "epoch": 0.8911286946328393, "grad_norm": 0.4070076644420624, "learning_rate": 0.00015956470217939298, "loss": 11.653, "step": 42571 }, { "epoch": 0.8911496273968015, "grad_norm": 0.30717897415161133, "learning_rate": 0.00015956294102114813, "loss": 11.6831, "step": 42572 }, { "epoch": 0.8911705601607637, "grad_norm": 0.34945228695869446, "learning_rate": 0.00015956117983427018, "loss": 11.6722, "step": 42573 }, { "epoch": 0.8911914929247258, "grad_norm": 0.2805214524269104, "learning_rate": 0.00015955941861875994, "loss": 11.6636, "step": 42574 }, { "epoch": 0.891212425688688, "grad_norm": 0.2962501645088196, "learning_rate": 0.0001595576573746183, "loss": 11.6728, "step": 42575 }, { "epoch": 0.8912333584526501, "grad_norm": 0.2596419155597687, "learning_rate": 0.00015955589610184613, "loss": 11.6707, "step": 42576 }, { "epoch": 0.8912542912166123, "grad_norm": 0.3886100947856903, "learning_rate": 0.00015955413480044426, "loss": 11.6801, "step": 42577 }, { "epoch": 0.8912752239805743, "grad_norm": 0.31106850504875183, "learning_rate": 0.0001595523734704135, "loss": 11.6871, "step": 42578 }, { "epoch": 0.8912961567445365, "grad_norm": 0.2887093126773834, "learning_rate": 0.0001595506121117547, "loss": 11.6749, "step": 42579 }, { "epoch": 0.8913170895084987, "grad_norm": 0.7356005907058716, "learning_rate": 0.00015954885072446873, "loss": 11.6295, "step": 42580 }, { "epoch": 0.8913380222724608, "grad_norm": 0.30373653769493103, "learning_rate": 0.0001595470893085565, "loss": 11.6805, "step": 42581 }, { "epoch": 0.891358955036423, "grad_norm": 0.2795315086841583, "learning_rate": 0.0001595453278640187, "loss": 11.664, "step": 42582 }, { "epoch": 0.8913798878003851, "grad_norm": 0.3097928762435913, "learning_rate": 0.0001595435663908563, "loss": 11.6799, "step": 42583 }, { "epoch": 0.8914008205643473, "grad_norm": 0.28896617889404297, "learning_rate": 0.00015954180488907013, "loss": 11.6523, "step": 42584 }, { "epoch": 0.8914217533283094, "grad_norm": 0.3210109770298004, "learning_rate": 0.00015954004335866102, "loss": 11.6836, "step": 42585 }, { "epoch": 0.8914426860922716, "grad_norm": 0.28214871883392334, "learning_rate": 0.00015953828179962983, "loss": 11.6787, "step": 42586 }, { "epoch": 0.8914636188562338, "grad_norm": 0.2980288565158844, "learning_rate": 0.00015953652021197736, "loss": 11.6688, "step": 42587 }, { "epoch": 0.8914845516201959, "grad_norm": 0.28558021783828735, "learning_rate": 0.0001595347585957045, "loss": 11.6706, "step": 42588 }, { "epoch": 0.8915054843841581, "grad_norm": 0.2808058559894562, "learning_rate": 0.0001595329969508121, "loss": 11.6517, "step": 42589 }, { "epoch": 0.8915264171481202, "grad_norm": 0.2943267524242401, "learning_rate": 0.00015953123527730098, "loss": 11.6571, "step": 42590 }, { "epoch": 0.8915473499120824, "grad_norm": 0.24781911075115204, "learning_rate": 0.00015952947357517202, "loss": 11.6865, "step": 42591 }, { "epoch": 0.8915682826760446, "grad_norm": 0.2926732301712036, "learning_rate": 0.00015952771184442606, "loss": 11.6855, "step": 42592 }, { "epoch": 0.8915892154400067, "grad_norm": 0.3344879746437073, "learning_rate": 0.00015952595008506391, "loss": 11.6741, "step": 42593 }, { "epoch": 0.8916101482039689, "grad_norm": 0.3407593369483948, "learning_rate": 0.00015952418829708647, "loss": 11.6738, "step": 42594 }, { "epoch": 0.891631080967931, "grad_norm": 0.275656133890152, "learning_rate": 0.00015952242648049452, "loss": 11.6759, "step": 42595 }, { "epoch": 0.8916520137318932, "grad_norm": 0.2521640658378601, "learning_rate": 0.00015952066463528896, "loss": 11.6603, "step": 42596 }, { "epoch": 0.8916729464958553, "grad_norm": 0.43573155999183655, "learning_rate": 0.00015951890276147065, "loss": 11.6606, "step": 42597 }, { "epoch": 0.8916938792598175, "grad_norm": 0.27656227350234985, "learning_rate": 0.00015951714085904039, "loss": 11.6687, "step": 42598 }, { "epoch": 0.8917148120237797, "grad_norm": 0.30422475934028625, "learning_rate": 0.00015951537892799905, "loss": 11.654, "step": 42599 }, { "epoch": 0.8917357447877418, "grad_norm": 0.3870892822742462, "learning_rate": 0.00015951361696834747, "loss": 11.6574, "step": 42600 }, { "epoch": 0.891756677551704, "grad_norm": 0.4083144962787628, "learning_rate": 0.00015951185498008652, "loss": 11.6628, "step": 42601 }, { "epoch": 0.891777610315666, "grad_norm": 0.3013903498649597, "learning_rate": 0.000159510092963217, "loss": 11.6645, "step": 42602 }, { "epoch": 0.8917985430796282, "grad_norm": 0.29541394114494324, "learning_rate": 0.00015950833091773982, "loss": 11.6747, "step": 42603 }, { "epoch": 0.8918194758435903, "grad_norm": 0.2943742275238037, "learning_rate": 0.00015950656884365576, "loss": 11.6708, "step": 42604 }, { "epoch": 0.8918404086075525, "grad_norm": 0.31179529428482056, "learning_rate": 0.00015950480674096573, "loss": 11.6791, "step": 42605 }, { "epoch": 0.8918613413715147, "grad_norm": 0.30603110790252686, "learning_rate": 0.00015950304460967053, "loss": 11.6708, "step": 42606 }, { "epoch": 0.8918822741354768, "grad_norm": 0.29301097989082336, "learning_rate": 0.000159501282449771, "loss": 11.655, "step": 42607 }, { "epoch": 0.891903206899439, "grad_norm": 0.32745447754859924, "learning_rate": 0.00015949952026126805, "loss": 11.6866, "step": 42608 }, { "epoch": 0.8919241396634011, "grad_norm": 0.38033780455589294, "learning_rate": 0.00015949775804416247, "loss": 11.6881, "step": 42609 }, { "epoch": 0.8919450724273633, "grad_norm": 0.2503020763397217, "learning_rate": 0.00015949599579845515, "loss": 11.6662, "step": 42610 }, { "epoch": 0.8919660051913255, "grad_norm": 0.2911904454231262, "learning_rate": 0.00015949423352414687, "loss": 11.6532, "step": 42611 }, { "epoch": 0.8919869379552876, "grad_norm": 0.3204619586467743, "learning_rate": 0.00015949247122123853, "loss": 11.6585, "step": 42612 }, { "epoch": 0.8920078707192498, "grad_norm": 0.41073137521743774, "learning_rate": 0.00015949070888973102, "loss": 11.6744, "step": 42613 }, { "epoch": 0.8920288034832119, "grad_norm": 0.317772775888443, "learning_rate": 0.00015948894652962508, "loss": 11.6442, "step": 42614 }, { "epoch": 0.8920497362471741, "grad_norm": 0.3630022704601288, "learning_rate": 0.0001594871841409216, "loss": 11.6522, "step": 42615 }, { "epoch": 0.8920706690111362, "grad_norm": 0.3161190450191498, "learning_rate": 0.00015948542172362147, "loss": 11.6638, "step": 42616 }, { "epoch": 0.8920916017750984, "grad_norm": 0.34877607226371765, "learning_rate": 0.00015948365927772552, "loss": 11.6739, "step": 42617 }, { "epoch": 0.8921125345390606, "grad_norm": 0.27273303270339966, "learning_rate": 0.00015948189680323457, "loss": 11.6603, "step": 42618 }, { "epoch": 0.8921334673030227, "grad_norm": 0.24842199683189392, "learning_rate": 0.00015948013430014944, "loss": 11.6696, "step": 42619 }, { "epoch": 0.8921544000669849, "grad_norm": 0.3102636933326721, "learning_rate": 0.00015947837176847105, "loss": 11.6683, "step": 42620 }, { "epoch": 0.892175332830947, "grad_norm": 0.3713718056678772, "learning_rate": 0.0001594766092082002, "loss": 11.674, "step": 42621 }, { "epoch": 0.8921962655949092, "grad_norm": 0.3305005431175232, "learning_rate": 0.00015947484661933778, "loss": 11.6559, "step": 42622 }, { "epoch": 0.8922171983588713, "grad_norm": 0.3012774586677551, "learning_rate": 0.00015947308400188459, "loss": 11.6714, "step": 42623 }, { "epoch": 0.8922381311228335, "grad_norm": 0.27900803089141846, "learning_rate": 0.00015947132135584152, "loss": 11.6724, "step": 42624 }, { "epoch": 0.8922590638867957, "grad_norm": 0.3214973211288452, "learning_rate": 0.00015946955868120937, "loss": 11.6633, "step": 42625 }, { "epoch": 0.8922799966507577, "grad_norm": 0.4065462350845337, "learning_rate": 0.000159467795977989, "loss": 11.6755, "step": 42626 }, { "epoch": 0.89230092941472, "grad_norm": 0.3078945577144623, "learning_rate": 0.0001594660332461813, "loss": 11.6838, "step": 42627 }, { "epoch": 0.892321862178682, "grad_norm": 0.2767099142074585, "learning_rate": 0.00015946427048578706, "loss": 11.656, "step": 42628 }, { "epoch": 0.8923427949426442, "grad_norm": 0.31144681572914124, "learning_rate": 0.00015946250769680717, "loss": 11.6743, "step": 42629 }, { "epoch": 0.8923637277066064, "grad_norm": 0.3140397369861603, "learning_rate": 0.00015946074487924246, "loss": 11.6797, "step": 42630 }, { "epoch": 0.8923846604705685, "grad_norm": 0.33691132068634033, "learning_rate": 0.00015945898203309375, "loss": 11.6744, "step": 42631 }, { "epoch": 0.8924055932345307, "grad_norm": 0.32561010122299194, "learning_rate": 0.00015945721915836195, "loss": 11.661, "step": 42632 }, { "epoch": 0.8924265259984928, "grad_norm": 0.3790479898452759, "learning_rate": 0.00015945545625504787, "loss": 11.6616, "step": 42633 }, { "epoch": 0.892447458762455, "grad_norm": 0.35510411858558655, "learning_rate": 0.00015945369332315235, "loss": 11.6774, "step": 42634 }, { "epoch": 0.8924683915264171, "grad_norm": 0.27902624011039734, "learning_rate": 0.00015945193036267624, "loss": 11.6703, "step": 42635 }, { "epoch": 0.8924893242903793, "grad_norm": 0.31683364510536194, "learning_rate": 0.00015945016737362042, "loss": 11.6651, "step": 42636 }, { "epoch": 0.8925102570543415, "grad_norm": 0.3179527521133423, "learning_rate": 0.0001594484043559857, "loss": 11.6817, "step": 42637 }, { "epoch": 0.8925311898183036, "grad_norm": 0.30260226130485535, "learning_rate": 0.00015944664130977294, "loss": 11.6562, "step": 42638 }, { "epoch": 0.8925521225822658, "grad_norm": 0.2895900011062622, "learning_rate": 0.000159444878234983, "loss": 11.6714, "step": 42639 }, { "epoch": 0.8925730553462279, "grad_norm": 0.29625415802001953, "learning_rate": 0.0001594431151316167, "loss": 11.6851, "step": 42640 }, { "epoch": 0.8925939881101901, "grad_norm": 0.3331298232078552, "learning_rate": 0.00015944135199967493, "loss": 11.6684, "step": 42641 }, { "epoch": 0.8926149208741522, "grad_norm": 0.277295857667923, "learning_rate": 0.0001594395888391585, "loss": 11.6605, "step": 42642 }, { "epoch": 0.8926358536381144, "grad_norm": 0.2948295772075653, "learning_rate": 0.0001594378256500682, "loss": 11.6675, "step": 42643 }, { "epoch": 0.8926567864020766, "grad_norm": 0.3684539496898651, "learning_rate": 0.00015943606243240503, "loss": 11.6648, "step": 42644 }, { "epoch": 0.8926777191660387, "grad_norm": 0.3509501814842224, "learning_rate": 0.00015943429918616973, "loss": 11.6663, "step": 42645 }, { "epoch": 0.8926986519300009, "grad_norm": 0.33359065651893616, "learning_rate": 0.00015943253591136315, "loss": 11.6827, "step": 42646 }, { "epoch": 0.892719584693963, "grad_norm": 0.3209956884384155, "learning_rate": 0.00015943077260798617, "loss": 11.6581, "step": 42647 }, { "epoch": 0.8927405174579252, "grad_norm": 0.2806743085384369, "learning_rate": 0.00015942900927603965, "loss": 11.6572, "step": 42648 }, { "epoch": 0.8927614502218872, "grad_norm": 0.28520599007606506, "learning_rate": 0.00015942724591552442, "loss": 11.6739, "step": 42649 }, { "epoch": 0.8927823829858494, "grad_norm": 0.3179619610309601, "learning_rate": 0.00015942548252644126, "loss": 11.6517, "step": 42650 }, { "epoch": 0.8928033157498116, "grad_norm": 0.27250993251800537, "learning_rate": 0.00015942371910879116, "loss": 11.6524, "step": 42651 }, { "epoch": 0.8928242485137737, "grad_norm": 0.31126803159713745, "learning_rate": 0.00015942195566257482, "loss": 11.6684, "step": 42652 }, { "epoch": 0.8928451812777359, "grad_norm": 0.3243180513381958, "learning_rate": 0.00015942019218779317, "loss": 11.6679, "step": 42653 }, { "epoch": 0.892866114041698, "grad_norm": 0.3536723852157593, "learning_rate": 0.00015941842868444708, "loss": 11.6616, "step": 42654 }, { "epoch": 0.8928870468056602, "grad_norm": 0.3237757384777069, "learning_rate": 0.00015941666515253733, "loss": 11.6491, "step": 42655 }, { "epoch": 0.8929079795696224, "grad_norm": 0.38203203678131104, "learning_rate": 0.00015941490159206482, "loss": 11.6738, "step": 42656 }, { "epoch": 0.8929289123335845, "grad_norm": 0.2791083753108978, "learning_rate": 0.00015941313800303035, "loss": 11.668, "step": 42657 }, { "epoch": 0.8929498450975467, "grad_norm": 0.3235009014606476, "learning_rate": 0.0001594113743854348, "loss": 11.6552, "step": 42658 }, { "epoch": 0.8929707778615088, "grad_norm": 0.34740787744522095, "learning_rate": 0.00015940961073927903, "loss": 11.6705, "step": 42659 }, { "epoch": 0.892991710625471, "grad_norm": 0.2852475643157959, "learning_rate": 0.00015940784706456384, "loss": 11.6476, "step": 42660 }, { "epoch": 0.8930126433894331, "grad_norm": 0.3115772306919098, "learning_rate": 0.00015940608336129013, "loss": 11.6586, "step": 42661 }, { "epoch": 0.8930335761533953, "grad_norm": 0.29386430978775024, "learning_rate": 0.00015940431962945873, "loss": 11.6672, "step": 42662 }, { "epoch": 0.8930545089173575, "grad_norm": 0.34923022985458374, "learning_rate": 0.00015940255586907048, "loss": 11.6837, "step": 42663 }, { "epoch": 0.8930754416813196, "grad_norm": 0.5346822142601013, "learning_rate": 0.0001594007920801262, "loss": 11.6693, "step": 42664 }, { "epoch": 0.8930963744452818, "grad_norm": 0.41038644313812256, "learning_rate": 0.00015939902826262681, "loss": 11.684, "step": 42665 }, { "epoch": 0.8931173072092439, "grad_norm": 0.27659451961517334, "learning_rate": 0.0001593972644165731, "loss": 11.6618, "step": 42666 }, { "epoch": 0.8931382399732061, "grad_norm": 0.3439423143863678, "learning_rate": 0.00015939550054196592, "loss": 11.6582, "step": 42667 }, { "epoch": 0.8931591727371682, "grad_norm": 0.32286521792411804, "learning_rate": 0.00015939373663880616, "loss": 11.6548, "step": 42668 }, { "epoch": 0.8931801055011304, "grad_norm": 0.33215299248695374, "learning_rate": 0.0001593919727070946, "loss": 11.672, "step": 42669 }, { "epoch": 0.8932010382650926, "grad_norm": 0.30985376238822937, "learning_rate": 0.00015939020874683216, "loss": 11.6674, "step": 42670 }, { "epoch": 0.8932219710290547, "grad_norm": 0.30405324697494507, "learning_rate": 0.00015938844475801966, "loss": 11.6708, "step": 42671 }, { "epoch": 0.8932429037930169, "grad_norm": 0.3212146461009979, "learning_rate": 0.00015938668074065793, "loss": 11.6791, "step": 42672 }, { "epoch": 0.8932638365569789, "grad_norm": 0.2433003932237625, "learning_rate": 0.00015938491669474782, "loss": 11.6751, "step": 42673 }, { "epoch": 0.8932847693209411, "grad_norm": 0.25223952531814575, "learning_rate": 0.00015938315262029021, "loss": 11.666, "step": 42674 }, { "epoch": 0.8933057020849033, "grad_norm": 0.40187087655067444, "learning_rate": 0.00015938138851728594, "loss": 11.6815, "step": 42675 }, { "epoch": 0.8933266348488654, "grad_norm": 0.3093782365322113, "learning_rate": 0.00015937962438573584, "loss": 11.6556, "step": 42676 }, { "epoch": 0.8933475676128276, "grad_norm": 0.5318964719772339, "learning_rate": 0.00015937786022564074, "loss": 11.662, "step": 42677 }, { "epoch": 0.8933685003767897, "grad_norm": 0.2927986681461334, "learning_rate": 0.0001593760960370015, "loss": 11.6803, "step": 42678 }, { "epoch": 0.8933894331407519, "grad_norm": 0.24669697880744934, "learning_rate": 0.00015937433181981904, "loss": 11.6731, "step": 42679 }, { "epoch": 0.893410365904714, "grad_norm": 0.3429569602012634, "learning_rate": 0.00015937256757409413, "loss": 11.6597, "step": 42680 }, { "epoch": 0.8934312986686762, "grad_norm": 0.2725781500339508, "learning_rate": 0.00015937080329982763, "loss": 11.6833, "step": 42681 }, { "epoch": 0.8934522314326384, "grad_norm": 0.32094770669937134, "learning_rate": 0.00015936903899702037, "loss": 11.6685, "step": 42682 }, { "epoch": 0.8934731641966005, "grad_norm": 0.33940619230270386, "learning_rate": 0.00015936727466567328, "loss": 11.6686, "step": 42683 }, { "epoch": 0.8934940969605627, "grad_norm": 0.40456143021583557, "learning_rate": 0.0001593655103057871, "loss": 11.6746, "step": 42684 }, { "epoch": 0.8935150297245248, "grad_norm": 0.2706643342971802, "learning_rate": 0.00015936374591736275, "loss": 11.6756, "step": 42685 }, { "epoch": 0.893535962488487, "grad_norm": 0.27233102917671204, "learning_rate": 0.00015936198150040104, "loss": 11.6596, "step": 42686 }, { "epoch": 0.8935568952524491, "grad_norm": 0.25099095702171326, "learning_rate": 0.00015936021705490285, "loss": 11.668, "step": 42687 }, { "epoch": 0.8935778280164113, "grad_norm": 0.33247554302215576, "learning_rate": 0.00015935845258086904, "loss": 11.6721, "step": 42688 }, { "epoch": 0.8935987607803735, "grad_norm": 0.36891719698905945, "learning_rate": 0.0001593566880783004, "loss": 11.6732, "step": 42689 }, { "epoch": 0.8936196935443356, "grad_norm": 0.2507898211479187, "learning_rate": 0.0001593549235471978, "loss": 11.6708, "step": 42690 }, { "epoch": 0.8936406263082978, "grad_norm": 0.32785025238990784, "learning_rate": 0.00015935315898756215, "loss": 11.6631, "step": 42691 }, { "epoch": 0.8936615590722599, "grad_norm": 0.3439721465110779, "learning_rate": 0.0001593513943993942, "loss": 11.6706, "step": 42692 }, { "epoch": 0.8936824918362221, "grad_norm": 0.44161340594291687, "learning_rate": 0.00015934962978269486, "loss": 11.689, "step": 42693 }, { "epoch": 0.8937034246001843, "grad_norm": 0.3354402184486389, "learning_rate": 0.00015934786513746495, "loss": 11.6607, "step": 42694 }, { "epoch": 0.8937243573641463, "grad_norm": 0.32677432894706726, "learning_rate": 0.00015934610046370532, "loss": 11.6724, "step": 42695 }, { "epoch": 0.8937452901281085, "grad_norm": 0.29451948404312134, "learning_rate": 0.00015934433576141686, "loss": 11.6727, "step": 42696 }, { "epoch": 0.8937662228920706, "grad_norm": 0.2726023495197296, "learning_rate": 0.00015934257103060036, "loss": 11.6655, "step": 42697 }, { "epoch": 0.8937871556560328, "grad_norm": 0.31986016035079956, "learning_rate": 0.00015934080627125674, "loss": 11.6766, "step": 42698 }, { "epoch": 0.8938080884199949, "grad_norm": 0.22982940077781677, "learning_rate": 0.00015933904148338678, "loss": 11.6844, "step": 42699 }, { "epoch": 0.8938290211839571, "grad_norm": 0.25102248787879944, "learning_rate": 0.00015933727666699136, "loss": 11.6734, "step": 42700 }, { "epoch": 0.8938499539479193, "grad_norm": 0.306102991104126, "learning_rate": 0.0001593355118220713, "loss": 11.6675, "step": 42701 }, { "epoch": 0.8938708867118814, "grad_norm": 0.3055650591850281, "learning_rate": 0.00015933374694862752, "loss": 11.6525, "step": 42702 }, { "epoch": 0.8938918194758436, "grad_norm": 0.29302096366882324, "learning_rate": 0.00015933198204666078, "loss": 11.6721, "step": 42703 }, { "epoch": 0.8939127522398057, "grad_norm": 0.3994215130805969, "learning_rate": 0.00015933021711617197, "loss": 11.6797, "step": 42704 }, { "epoch": 0.8939336850037679, "grad_norm": 0.2967834770679474, "learning_rate": 0.00015932845215716195, "loss": 11.67, "step": 42705 }, { "epoch": 0.89395461776773, "grad_norm": 0.4920945465564728, "learning_rate": 0.00015932668716963155, "loss": 11.6786, "step": 42706 }, { "epoch": 0.8939755505316922, "grad_norm": 0.27611786127090454, "learning_rate": 0.00015932492215358163, "loss": 11.6745, "step": 42707 }, { "epoch": 0.8939964832956544, "grad_norm": 0.30764472484588623, "learning_rate": 0.00015932315710901303, "loss": 11.6635, "step": 42708 }, { "epoch": 0.8940174160596165, "grad_norm": 0.41750091314315796, "learning_rate": 0.00015932139203592656, "loss": 11.682, "step": 42709 }, { "epoch": 0.8940383488235787, "grad_norm": 0.22521191835403442, "learning_rate": 0.00015931962693432314, "loss": 11.6681, "step": 42710 }, { "epoch": 0.8940592815875408, "grad_norm": 0.2903309762477875, "learning_rate": 0.0001593178618042036, "loss": 11.6509, "step": 42711 }, { "epoch": 0.894080214351503, "grad_norm": 0.3563849925994873, "learning_rate": 0.00015931609664556872, "loss": 11.6777, "step": 42712 }, { "epoch": 0.8941011471154652, "grad_norm": 0.40190204977989197, "learning_rate": 0.00015931433145841947, "loss": 11.6777, "step": 42713 }, { "epoch": 0.8941220798794273, "grad_norm": 0.2777068018913269, "learning_rate": 0.0001593125662427566, "loss": 11.6687, "step": 42714 }, { "epoch": 0.8941430126433895, "grad_norm": 0.3721765875816345, "learning_rate": 0.000159310800998581, "loss": 11.6761, "step": 42715 }, { "epoch": 0.8941639454073516, "grad_norm": 0.2744125425815582, "learning_rate": 0.00015930903572589352, "loss": 11.6708, "step": 42716 }, { "epoch": 0.8941848781713138, "grad_norm": 0.3410884141921997, "learning_rate": 0.00015930727042469495, "loss": 11.6547, "step": 42717 }, { "epoch": 0.8942058109352758, "grad_norm": 0.31704506278038025, "learning_rate": 0.00015930550509498623, "loss": 11.6655, "step": 42718 }, { "epoch": 0.894226743699238, "grad_norm": 0.2983931005001068, "learning_rate": 0.00015930373973676814, "loss": 11.666, "step": 42719 }, { "epoch": 0.8942476764632002, "grad_norm": 0.28292757272720337, "learning_rate": 0.0001593019743500416, "loss": 11.6691, "step": 42720 }, { "epoch": 0.8942686092271623, "grad_norm": 0.3448883295059204, "learning_rate": 0.00015930020893480736, "loss": 11.6623, "step": 42721 }, { "epoch": 0.8942895419911245, "grad_norm": 0.34027132391929626, "learning_rate": 0.00015929844349106632, "loss": 11.6778, "step": 42722 }, { "epoch": 0.8943104747550866, "grad_norm": 0.27292218804359436, "learning_rate": 0.00015929667801881938, "loss": 11.6587, "step": 42723 }, { "epoch": 0.8943314075190488, "grad_norm": 0.3083263039588928, "learning_rate": 0.0001592949125180673, "loss": 11.6785, "step": 42724 }, { "epoch": 0.8943523402830109, "grad_norm": 0.27483683824539185, "learning_rate": 0.00015929314698881098, "loss": 11.6627, "step": 42725 }, { "epoch": 0.8943732730469731, "grad_norm": 0.3179738223552704, "learning_rate": 0.00015929138143105126, "loss": 11.656, "step": 42726 }, { "epoch": 0.8943942058109353, "grad_norm": 0.33663061261177063, "learning_rate": 0.00015928961584478897, "loss": 11.6771, "step": 42727 }, { "epoch": 0.8944151385748974, "grad_norm": 0.36998236179351807, "learning_rate": 0.00015928785023002498, "loss": 11.6677, "step": 42728 }, { "epoch": 0.8944360713388596, "grad_norm": 0.33329951763153076, "learning_rate": 0.00015928608458676014, "loss": 11.6782, "step": 42729 }, { "epoch": 0.8944570041028217, "grad_norm": 0.25040045380592346, "learning_rate": 0.00015928431891499528, "loss": 11.6603, "step": 42730 }, { "epoch": 0.8944779368667839, "grad_norm": 0.28765350580215454, "learning_rate": 0.00015928255321473125, "loss": 11.6706, "step": 42731 }, { "epoch": 0.8944988696307461, "grad_norm": 0.518327534198761, "learning_rate": 0.00015928078748596894, "loss": 11.6868, "step": 42732 }, { "epoch": 0.8945198023947082, "grad_norm": 0.3370855450630188, "learning_rate": 0.00015927902172870915, "loss": 11.6725, "step": 42733 }, { "epoch": 0.8945407351586704, "grad_norm": 0.35004109144210815, "learning_rate": 0.00015927725594295272, "loss": 11.6737, "step": 42734 }, { "epoch": 0.8945616679226325, "grad_norm": 0.3253951072692871, "learning_rate": 0.00015927549012870057, "loss": 11.6704, "step": 42735 }, { "epoch": 0.8945826006865947, "grad_norm": 0.2908967435359955, "learning_rate": 0.00015927372428595348, "loss": 11.6588, "step": 42736 }, { "epoch": 0.8946035334505568, "grad_norm": 0.2537193298339844, "learning_rate": 0.00015927195841471234, "loss": 11.6731, "step": 42737 }, { "epoch": 0.894624466214519, "grad_norm": 0.31477490067481995, "learning_rate": 0.00015927019251497796, "loss": 11.6778, "step": 42738 }, { "epoch": 0.8946453989784812, "grad_norm": 0.41600003838539124, "learning_rate": 0.0001592684265867512, "loss": 11.6636, "step": 42739 }, { "epoch": 0.8946663317424433, "grad_norm": 0.2721916735172272, "learning_rate": 0.00015926666063003294, "loss": 11.6705, "step": 42740 }, { "epoch": 0.8946872645064055, "grad_norm": 0.33648815751075745, "learning_rate": 0.00015926489464482404, "loss": 11.645, "step": 42741 }, { "epoch": 0.8947081972703675, "grad_norm": 0.2738501727581024, "learning_rate": 0.0001592631286311253, "loss": 11.6643, "step": 42742 }, { "epoch": 0.8947291300343297, "grad_norm": 0.46857982873916626, "learning_rate": 0.00015926136258893756, "loss": 11.6672, "step": 42743 }, { "epoch": 0.8947500627982918, "grad_norm": 0.28312820196151733, "learning_rate": 0.00015925959651826168, "loss": 11.673, "step": 42744 }, { "epoch": 0.894770995562254, "grad_norm": 0.35308000445365906, "learning_rate": 0.00015925783041909858, "loss": 11.6584, "step": 42745 }, { "epoch": 0.8947919283262162, "grad_norm": 0.31372299790382385, "learning_rate": 0.00015925606429144902, "loss": 11.6769, "step": 42746 }, { "epoch": 0.8948128610901783, "grad_norm": 0.3721465766429901, "learning_rate": 0.00015925429813531388, "loss": 11.67, "step": 42747 }, { "epoch": 0.8948337938541405, "grad_norm": 0.3403419852256775, "learning_rate": 0.00015925253195069402, "loss": 11.652, "step": 42748 }, { "epoch": 0.8948547266181026, "grad_norm": 0.9565567970275879, "learning_rate": 0.0001592507657375903, "loss": 11.5894, "step": 42749 }, { "epoch": 0.8948756593820648, "grad_norm": 0.2755599915981293, "learning_rate": 0.0001592489994960035, "loss": 11.6606, "step": 42750 }, { "epoch": 0.894896592146027, "grad_norm": 0.37852829694747925, "learning_rate": 0.00015924723322593455, "loss": 11.6657, "step": 42751 }, { "epoch": 0.8949175249099891, "grad_norm": 0.30165335536003113, "learning_rate": 0.00015924546692738426, "loss": 11.6761, "step": 42752 }, { "epoch": 0.8949384576739513, "grad_norm": 0.25033557415008545, "learning_rate": 0.0001592437006003535, "loss": 11.666, "step": 42753 }, { "epoch": 0.8949593904379134, "grad_norm": 0.3044746518135071, "learning_rate": 0.0001592419342448431, "loss": 11.6559, "step": 42754 }, { "epoch": 0.8949803232018756, "grad_norm": 0.23691916465759277, "learning_rate": 0.00015924016786085393, "loss": 11.6609, "step": 42755 }, { "epoch": 0.8950012559658377, "grad_norm": 0.404813677072525, "learning_rate": 0.0001592384014483868, "loss": 11.6654, "step": 42756 }, { "epoch": 0.8950221887297999, "grad_norm": 0.28026679158210754, "learning_rate": 0.0001592366350074426, "loss": 11.6515, "step": 42757 }, { "epoch": 0.8950431214937621, "grad_norm": 0.2920229434967041, "learning_rate": 0.00015923486853802214, "loss": 11.6865, "step": 42758 }, { "epoch": 0.8950640542577242, "grad_norm": 0.3340038061141968, "learning_rate": 0.00015923310204012632, "loss": 11.6527, "step": 42759 }, { "epoch": 0.8950849870216864, "grad_norm": 0.2530581057071686, "learning_rate": 0.00015923133551375593, "loss": 11.6596, "step": 42760 }, { "epoch": 0.8951059197856485, "grad_norm": 0.3448720872402191, "learning_rate": 0.00015922956895891187, "loss": 11.6571, "step": 42761 }, { "epoch": 0.8951268525496107, "grad_norm": 0.32078656554222107, "learning_rate": 0.00015922780237559496, "loss": 11.6518, "step": 42762 }, { "epoch": 0.8951477853135728, "grad_norm": 0.34901443123817444, "learning_rate": 0.00015922603576380606, "loss": 11.6455, "step": 42763 }, { "epoch": 0.895168718077535, "grad_norm": 0.2968454658985138, "learning_rate": 0.00015922426912354603, "loss": 11.6674, "step": 42764 }, { "epoch": 0.8951896508414972, "grad_norm": 0.45575278997421265, "learning_rate": 0.0001592225024548157, "loss": 11.6857, "step": 42765 }, { "epoch": 0.8952105836054592, "grad_norm": 0.34870001673698425, "learning_rate": 0.00015922073575761592, "loss": 11.6771, "step": 42766 }, { "epoch": 0.8952315163694214, "grad_norm": 0.30341318249702454, "learning_rate": 0.00015921896903194754, "loss": 11.6499, "step": 42767 }, { "epoch": 0.8952524491333835, "grad_norm": 0.30132317543029785, "learning_rate": 0.00015921720227781143, "loss": 11.6569, "step": 42768 }, { "epoch": 0.8952733818973457, "grad_norm": 0.3105502724647522, "learning_rate": 0.00015921543549520843, "loss": 11.667, "step": 42769 }, { "epoch": 0.8952943146613079, "grad_norm": 0.2628900110721588, "learning_rate": 0.00015921366868413937, "loss": 11.6502, "step": 42770 }, { "epoch": 0.89531524742527, "grad_norm": 0.4322647750377655, "learning_rate": 0.0001592119018446051, "loss": 11.677, "step": 42771 }, { "epoch": 0.8953361801892322, "grad_norm": 0.3703600764274597, "learning_rate": 0.0001592101349766065, "loss": 11.6701, "step": 42772 }, { "epoch": 0.8953571129531943, "grad_norm": 0.35618558526039124, "learning_rate": 0.00015920836808014442, "loss": 11.6753, "step": 42773 }, { "epoch": 0.8953780457171565, "grad_norm": 0.3410325348377228, "learning_rate": 0.00015920660115521966, "loss": 11.6665, "step": 42774 }, { "epoch": 0.8953989784811186, "grad_norm": 0.2596162259578705, "learning_rate": 0.00015920483420183313, "loss": 11.6593, "step": 42775 }, { "epoch": 0.8954199112450808, "grad_norm": 0.2976694107055664, "learning_rate": 0.00015920306721998561, "loss": 11.6531, "step": 42776 }, { "epoch": 0.895440844009043, "grad_norm": 0.3636378347873688, "learning_rate": 0.000159201300209678, "loss": 11.6669, "step": 42777 }, { "epoch": 0.8954617767730051, "grad_norm": 0.38464027643203735, "learning_rate": 0.00015919953317091115, "loss": 11.6574, "step": 42778 }, { "epoch": 0.8954827095369673, "grad_norm": 0.4039284586906433, "learning_rate": 0.00015919776610368592, "loss": 11.6627, "step": 42779 }, { "epoch": 0.8955036423009294, "grad_norm": 0.39875590801239014, "learning_rate": 0.0001591959990080031, "loss": 11.6629, "step": 42780 }, { "epoch": 0.8955245750648916, "grad_norm": 0.3545205891132355, "learning_rate": 0.0001591942318838636, "loss": 11.6844, "step": 42781 }, { "epoch": 0.8955455078288537, "grad_norm": 0.3146789073944092, "learning_rate": 0.00015919246473126823, "loss": 11.6729, "step": 42782 }, { "epoch": 0.8955664405928159, "grad_norm": 0.2788640260696411, "learning_rate": 0.00015919069755021784, "loss": 11.6634, "step": 42783 }, { "epoch": 0.8955873733567781, "grad_norm": 0.2984737455844879, "learning_rate": 0.00015918893034071334, "loss": 11.6734, "step": 42784 }, { "epoch": 0.8956083061207402, "grad_norm": 0.39241665601730347, "learning_rate": 0.0001591871631027555, "loss": 11.6852, "step": 42785 }, { "epoch": 0.8956292388847024, "grad_norm": 0.3394955098628998, "learning_rate": 0.00015918539583634523, "loss": 11.6622, "step": 42786 }, { "epoch": 0.8956501716486645, "grad_norm": 0.2858045995235443, "learning_rate": 0.00015918362854148334, "loss": 11.667, "step": 42787 }, { "epoch": 0.8956711044126267, "grad_norm": 0.3766179084777832, "learning_rate": 0.0001591818612181707, "loss": 11.6703, "step": 42788 }, { "epoch": 0.8956920371765889, "grad_norm": 0.3110087811946869, "learning_rate": 0.00015918009386640814, "loss": 11.6808, "step": 42789 }, { "epoch": 0.8957129699405509, "grad_norm": 0.36061134934425354, "learning_rate": 0.00015917832648619653, "loss": 11.6753, "step": 42790 }, { "epoch": 0.8957339027045131, "grad_norm": 0.2897372841835022, "learning_rate": 0.00015917655907753673, "loss": 11.6623, "step": 42791 }, { "epoch": 0.8957548354684752, "grad_norm": 0.3107621371746063, "learning_rate": 0.00015917479164042955, "loss": 11.6581, "step": 42792 }, { "epoch": 0.8957757682324374, "grad_norm": 0.33873510360717773, "learning_rate": 0.00015917302417487586, "loss": 11.6739, "step": 42793 }, { "epoch": 0.8957967009963995, "grad_norm": 0.34816327691078186, "learning_rate": 0.0001591712566808765, "loss": 11.6659, "step": 42794 }, { "epoch": 0.8958176337603617, "grad_norm": 0.35353314876556396, "learning_rate": 0.00015916948915843237, "loss": 11.6689, "step": 42795 }, { "epoch": 0.8958385665243239, "grad_norm": 0.26706188917160034, "learning_rate": 0.00015916772160754427, "loss": 11.6529, "step": 42796 }, { "epoch": 0.895859499288286, "grad_norm": 0.3426342308521271, "learning_rate": 0.00015916595402821305, "loss": 11.6734, "step": 42797 }, { "epoch": 0.8958804320522482, "grad_norm": 0.29772642254829407, "learning_rate": 0.00015916418642043957, "loss": 11.6738, "step": 42798 }, { "epoch": 0.8959013648162103, "grad_norm": 0.3034721612930298, "learning_rate": 0.00015916241878422466, "loss": 11.6731, "step": 42799 }, { "epoch": 0.8959222975801725, "grad_norm": 0.3234468698501587, "learning_rate": 0.00015916065111956922, "loss": 11.6687, "step": 42800 }, { "epoch": 0.8959432303441346, "grad_norm": 0.31719303131103516, "learning_rate": 0.00015915888342647407, "loss": 11.6919, "step": 42801 }, { "epoch": 0.8959641631080968, "grad_norm": 0.3359937071800232, "learning_rate": 0.00015915711570494006, "loss": 11.6763, "step": 42802 }, { "epoch": 0.895985095872059, "grad_norm": 0.2620926797389984, "learning_rate": 0.00015915534795496802, "loss": 11.6854, "step": 42803 }, { "epoch": 0.8960060286360211, "grad_norm": 0.30488234758377075, "learning_rate": 0.00015915358017655883, "loss": 11.6701, "step": 42804 }, { "epoch": 0.8960269613999833, "grad_norm": 0.3741981089115143, "learning_rate": 0.00015915181236971334, "loss": 11.6688, "step": 42805 }, { "epoch": 0.8960478941639454, "grad_norm": 0.3664136528968811, "learning_rate": 0.00015915004453443237, "loss": 11.6847, "step": 42806 }, { "epoch": 0.8960688269279076, "grad_norm": 0.37057358026504517, "learning_rate": 0.0001591482766707168, "loss": 11.6698, "step": 42807 }, { "epoch": 0.8960897596918698, "grad_norm": 0.267877995967865, "learning_rate": 0.00015914650877856744, "loss": 11.68, "step": 42808 }, { "epoch": 0.8961106924558319, "grad_norm": 0.2909516990184784, "learning_rate": 0.00015914474085798518, "loss": 11.6702, "step": 42809 }, { "epoch": 0.8961316252197941, "grad_norm": 0.4932716190814972, "learning_rate": 0.0001591429729089709, "loss": 11.6676, "step": 42810 }, { "epoch": 0.8961525579837561, "grad_norm": 0.35661789774894714, "learning_rate": 0.00015914120493152538, "loss": 11.6755, "step": 42811 }, { "epoch": 0.8961734907477183, "grad_norm": 0.2894938886165619, "learning_rate": 0.00015913943692564945, "loss": 11.6799, "step": 42812 }, { "epoch": 0.8961944235116804, "grad_norm": 0.30420488119125366, "learning_rate": 0.00015913766889134406, "loss": 11.6614, "step": 42813 }, { "epoch": 0.8962153562756426, "grad_norm": 0.3041507303714752, "learning_rate": 0.00015913590082861, "loss": 11.6798, "step": 42814 }, { "epoch": 0.8962362890396048, "grad_norm": 0.3178754448890686, "learning_rate": 0.00015913413273744813, "loss": 11.6621, "step": 42815 }, { "epoch": 0.8962572218035669, "grad_norm": 0.29872533679008484, "learning_rate": 0.0001591323646178593, "loss": 11.6807, "step": 42816 }, { "epoch": 0.8962781545675291, "grad_norm": 0.2574421465396881, "learning_rate": 0.00015913059646984433, "loss": 11.6612, "step": 42817 }, { "epoch": 0.8962990873314912, "grad_norm": 0.330403208732605, "learning_rate": 0.00015912882829340413, "loss": 11.67, "step": 42818 }, { "epoch": 0.8963200200954534, "grad_norm": 0.3177258372306824, "learning_rate": 0.00015912706008853951, "loss": 11.6707, "step": 42819 }, { "epoch": 0.8963409528594155, "grad_norm": 0.32107892632484436, "learning_rate": 0.00015912529185525132, "loss": 11.6602, "step": 42820 }, { "epoch": 0.8963618856233777, "grad_norm": 0.291786789894104, "learning_rate": 0.0001591235235935404, "loss": 11.6516, "step": 42821 }, { "epoch": 0.8963828183873399, "grad_norm": 0.35036700963974, "learning_rate": 0.00015912175530340762, "loss": 11.6723, "step": 42822 }, { "epoch": 0.896403751151302, "grad_norm": 0.3545790910720825, "learning_rate": 0.00015911998698485386, "loss": 11.666, "step": 42823 }, { "epoch": 0.8964246839152642, "grad_norm": 0.27832603454589844, "learning_rate": 0.00015911821863787994, "loss": 11.6522, "step": 42824 }, { "epoch": 0.8964456166792263, "grad_norm": 0.281951904296875, "learning_rate": 0.00015911645026248666, "loss": 11.6809, "step": 42825 }, { "epoch": 0.8964665494431885, "grad_norm": 0.3456314504146576, "learning_rate": 0.00015911468185867499, "loss": 11.6745, "step": 42826 }, { "epoch": 0.8964874822071506, "grad_norm": 0.2790725529193878, "learning_rate": 0.00015911291342644564, "loss": 11.6938, "step": 42827 }, { "epoch": 0.8965084149711128, "grad_norm": 0.33817923069000244, "learning_rate": 0.00015911114496579954, "loss": 11.6717, "step": 42828 }, { "epoch": 0.896529347735075, "grad_norm": 0.26566073298454285, "learning_rate": 0.00015910937647673756, "loss": 11.6595, "step": 42829 }, { "epoch": 0.8965502804990371, "grad_norm": 0.3340957462787628, "learning_rate": 0.00015910760795926048, "loss": 11.689, "step": 42830 }, { "epoch": 0.8965712132629993, "grad_norm": 0.29350990056991577, "learning_rate": 0.00015910583941336923, "loss": 11.6617, "step": 42831 }, { "epoch": 0.8965921460269614, "grad_norm": 0.2530081868171692, "learning_rate": 0.00015910407083906457, "loss": 11.6684, "step": 42832 }, { "epoch": 0.8966130787909236, "grad_norm": 0.30023491382598877, "learning_rate": 0.00015910230223634744, "loss": 11.6764, "step": 42833 }, { "epoch": 0.8966340115548858, "grad_norm": 0.2558087110519409, "learning_rate": 0.00015910053360521862, "loss": 11.6759, "step": 42834 }, { "epoch": 0.8966549443188478, "grad_norm": 0.2882556617259979, "learning_rate": 0.00015909876494567902, "loss": 11.6436, "step": 42835 }, { "epoch": 0.89667587708281, "grad_norm": 0.2595576345920563, "learning_rate": 0.00015909699625772943, "loss": 11.6665, "step": 42836 }, { "epoch": 0.8966968098467721, "grad_norm": 0.3069404363632202, "learning_rate": 0.00015909522754137072, "loss": 11.6883, "step": 42837 }, { "epoch": 0.8967177426107343, "grad_norm": 0.29851776361465454, "learning_rate": 0.0001590934587966038, "loss": 11.6732, "step": 42838 }, { "epoch": 0.8967386753746964, "grad_norm": 0.27730634808540344, "learning_rate": 0.00015909169002342944, "loss": 11.6794, "step": 42839 }, { "epoch": 0.8967596081386586, "grad_norm": 0.28457024693489075, "learning_rate": 0.0001590899212218485, "loss": 11.6512, "step": 42840 }, { "epoch": 0.8967805409026208, "grad_norm": 0.30042627453804016, "learning_rate": 0.00015908815239186187, "loss": 11.676, "step": 42841 }, { "epoch": 0.8968014736665829, "grad_norm": 0.29455268383026123, "learning_rate": 0.00015908638353347038, "loss": 11.6732, "step": 42842 }, { "epoch": 0.8968224064305451, "grad_norm": 0.2974441647529602, "learning_rate": 0.0001590846146466749, "loss": 11.6794, "step": 42843 }, { "epoch": 0.8968433391945072, "grad_norm": 0.2761704623699188, "learning_rate": 0.00015908284573147623, "loss": 11.6658, "step": 42844 }, { "epoch": 0.8968642719584694, "grad_norm": 0.2679971754550934, "learning_rate": 0.00015908107678787528, "loss": 11.6641, "step": 42845 }, { "epoch": 0.8968852047224315, "grad_norm": 0.3458254933357239, "learning_rate": 0.00015907930781587285, "loss": 11.6805, "step": 42846 }, { "epoch": 0.8969061374863937, "grad_norm": 0.26079070568084717, "learning_rate": 0.00015907753881546982, "loss": 11.6658, "step": 42847 }, { "epoch": 0.8969270702503559, "grad_norm": 0.3156726360321045, "learning_rate": 0.00015907576978666703, "loss": 11.6792, "step": 42848 }, { "epoch": 0.896948003014318, "grad_norm": 0.4106091558933258, "learning_rate": 0.00015907400072946533, "loss": 11.6502, "step": 42849 }, { "epoch": 0.8969689357782802, "grad_norm": 0.26877036690711975, "learning_rate": 0.00015907223164386559, "loss": 11.6966, "step": 42850 }, { "epoch": 0.8969898685422423, "grad_norm": 0.29338791966438293, "learning_rate": 0.00015907046252986863, "loss": 11.6673, "step": 42851 }, { "epoch": 0.8970108013062045, "grad_norm": 0.3048464357852936, "learning_rate": 0.00015906869338747534, "loss": 11.6602, "step": 42852 }, { "epoch": 0.8970317340701667, "grad_norm": 0.33718571066856384, "learning_rate": 0.00015906692421668655, "loss": 11.6566, "step": 42853 }, { "epoch": 0.8970526668341288, "grad_norm": 0.31218937039375305, "learning_rate": 0.00015906515501750305, "loss": 11.6747, "step": 42854 }, { "epoch": 0.897073599598091, "grad_norm": 0.30965662002563477, "learning_rate": 0.00015906338578992578, "loss": 11.6784, "step": 42855 }, { "epoch": 0.897094532362053, "grad_norm": 0.2068859487771988, "learning_rate": 0.00015906161653395556, "loss": 11.6523, "step": 42856 }, { "epoch": 0.8971154651260153, "grad_norm": 0.27571916580200195, "learning_rate": 0.00015905984724959323, "loss": 11.6618, "step": 42857 }, { "epoch": 0.8971363978899773, "grad_norm": 0.32078877091407776, "learning_rate": 0.00015905807793683966, "loss": 11.6666, "step": 42858 }, { "epoch": 0.8971573306539395, "grad_norm": 0.2755894958972931, "learning_rate": 0.00015905630859569566, "loss": 11.6612, "step": 42859 }, { "epoch": 0.8971782634179017, "grad_norm": 0.3359493017196655, "learning_rate": 0.00015905453922616215, "loss": 11.6782, "step": 42860 }, { "epoch": 0.8971991961818638, "grad_norm": 0.43159985542297363, "learning_rate": 0.0001590527698282399, "loss": 11.647, "step": 42861 }, { "epoch": 0.897220128945826, "grad_norm": 0.34067392349243164, "learning_rate": 0.00015905100040192985, "loss": 11.6632, "step": 42862 }, { "epoch": 0.8972410617097881, "grad_norm": 0.36600005626678467, "learning_rate": 0.00015904923094723277, "loss": 11.6713, "step": 42863 }, { "epoch": 0.8972619944737503, "grad_norm": 0.3214929401874542, "learning_rate": 0.00015904746146414955, "loss": 11.6796, "step": 42864 }, { "epoch": 0.8972829272377124, "grad_norm": 0.23846988379955292, "learning_rate": 0.000159045691952681, "loss": 11.6536, "step": 42865 }, { "epoch": 0.8973038600016746, "grad_norm": 0.43354618549346924, "learning_rate": 0.00015904392241282802, "loss": 11.6562, "step": 42866 }, { "epoch": 0.8973247927656368, "grad_norm": 0.23698197305202484, "learning_rate": 0.00015904215284459145, "loss": 11.6404, "step": 42867 }, { "epoch": 0.8973457255295989, "grad_norm": 0.28103405237197876, "learning_rate": 0.00015904038324797214, "loss": 11.6658, "step": 42868 }, { "epoch": 0.8973666582935611, "grad_norm": 0.2438843548297882, "learning_rate": 0.00015903861362297095, "loss": 11.6575, "step": 42869 }, { "epoch": 0.8973875910575232, "grad_norm": 0.3200390934944153, "learning_rate": 0.00015903684396958867, "loss": 11.6864, "step": 42870 }, { "epoch": 0.8974085238214854, "grad_norm": 0.30732300877571106, "learning_rate": 0.00015903507428782621, "loss": 11.6631, "step": 42871 }, { "epoch": 0.8974294565854476, "grad_norm": 0.31407642364501953, "learning_rate": 0.0001590333045776844, "loss": 11.6794, "step": 42872 }, { "epoch": 0.8974503893494097, "grad_norm": 0.2856162488460541, "learning_rate": 0.00015903153483916413, "loss": 11.676, "step": 42873 }, { "epoch": 0.8974713221133719, "grad_norm": 0.3487932085990906, "learning_rate": 0.0001590297650722662, "loss": 11.6925, "step": 42874 }, { "epoch": 0.897492254877334, "grad_norm": 0.26486897468566895, "learning_rate": 0.00015902799527699147, "loss": 11.6655, "step": 42875 }, { "epoch": 0.8975131876412962, "grad_norm": 0.293578177690506, "learning_rate": 0.0001590262254533408, "loss": 11.6674, "step": 42876 }, { "epoch": 0.8975341204052583, "grad_norm": 0.2784161865711212, "learning_rate": 0.00015902445560131507, "loss": 11.6501, "step": 42877 }, { "epoch": 0.8975550531692205, "grad_norm": 0.30271637439727783, "learning_rate": 0.00015902268572091506, "loss": 11.6676, "step": 42878 }, { "epoch": 0.8975759859331827, "grad_norm": 0.30378684401512146, "learning_rate": 0.0001590209158121417, "loss": 11.6705, "step": 42879 }, { "epoch": 0.8975969186971448, "grad_norm": 0.34250307083129883, "learning_rate": 0.0001590191458749958, "loss": 11.6676, "step": 42880 }, { "epoch": 0.897617851461107, "grad_norm": 0.35371285676956177, "learning_rate": 0.00015901737590947817, "loss": 11.6722, "step": 42881 }, { "epoch": 0.897638784225069, "grad_norm": 0.3609612286090851, "learning_rate": 0.00015901560591558974, "loss": 11.6643, "step": 42882 }, { "epoch": 0.8976597169890312, "grad_norm": 0.3602308928966522, "learning_rate": 0.00015901383589333134, "loss": 11.6783, "step": 42883 }, { "epoch": 0.8976806497529933, "grad_norm": 0.5391528606414795, "learning_rate": 0.00015901206584270378, "loss": 11.6641, "step": 42884 }, { "epoch": 0.8977015825169555, "grad_norm": 0.3069484531879425, "learning_rate": 0.00015901029576370794, "loss": 11.6498, "step": 42885 }, { "epoch": 0.8977225152809177, "grad_norm": 0.30011436343193054, "learning_rate": 0.00015900852565634468, "loss": 11.6728, "step": 42886 }, { "epoch": 0.8977434480448798, "grad_norm": 0.2942827045917511, "learning_rate": 0.00015900675552061482, "loss": 11.6708, "step": 42887 }, { "epoch": 0.897764380808842, "grad_norm": 0.2826820909976959, "learning_rate": 0.00015900498535651925, "loss": 11.6566, "step": 42888 }, { "epoch": 0.8977853135728041, "grad_norm": 0.29062139987945557, "learning_rate": 0.0001590032151640588, "loss": 11.6588, "step": 42889 }, { "epoch": 0.8978062463367663, "grad_norm": 0.30497226119041443, "learning_rate": 0.0001590014449432343, "loss": 11.6837, "step": 42890 }, { "epoch": 0.8978271791007285, "grad_norm": 0.5032128691673279, "learning_rate": 0.00015899967469404667, "loss": 11.6908, "step": 42891 }, { "epoch": 0.8978481118646906, "grad_norm": 0.362217515707016, "learning_rate": 0.00015899790441649663, "loss": 11.6905, "step": 42892 }, { "epoch": 0.8978690446286528, "grad_norm": 0.3612109124660492, "learning_rate": 0.0001589961341105852, "loss": 11.6692, "step": 42893 }, { "epoch": 0.8978899773926149, "grad_norm": 0.32733890414237976, "learning_rate": 0.0001589943637763131, "loss": 11.6677, "step": 42894 }, { "epoch": 0.8979109101565771, "grad_norm": 0.2721709609031677, "learning_rate": 0.00015899259341368126, "loss": 11.6732, "step": 42895 }, { "epoch": 0.8979318429205392, "grad_norm": 0.3052407503128052, "learning_rate": 0.00015899082302269047, "loss": 11.6688, "step": 42896 }, { "epoch": 0.8979527756845014, "grad_norm": 0.2742316424846649, "learning_rate": 0.0001589890526033416, "loss": 11.6697, "step": 42897 }, { "epoch": 0.8979737084484636, "grad_norm": 0.35485634207725525, "learning_rate": 0.00015898728215563553, "loss": 11.6652, "step": 42898 }, { "epoch": 0.8979946412124257, "grad_norm": 0.26690593361854553, "learning_rate": 0.00015898551167957308, "loss": 11.6518, "step": 42899 }, { "epoch": 0.8980155739763879, "grad_norm": 0.28637024760246277, "learning_rate": 0.00015898374117515512, "loss": 11.6745, "step": 42900 }, { "epoch": 0.89803650674035, "grad_norm": 0.2941166162490845, "learning_rate": 0.0001589819706423825, "loss": 11.6591, "step": 42901 }, { "epoch": 0.8980574395043122, "grad_norm": 0.37148433923721313, "learning_rate": 0.00015898020008125606, "loss": 11.6968, "step": 42902 }, { "epoch": 0.8980783722682742, "grad_norm": 0.4345003366470337, "learning_rate": 0.00015897842949177667, "loss": 11.6294, "step": 42903 }, { "epoch": 0.8980993050322365, "grad_norm": 0.31851136684417725, "learning_rate": 0.00015897665887394514, "loss": 11.6617, "step": 42904 }, { "epoch": 0.8981202377961987, "grad_norm": 0.3601120710372925, "learning_rate": 0.00015897488822776238, "loss": 11.6657, "step": 42905 }, { "epoch": 0.8981411705601607, "grad_norm": 0.2929290533065796, "learning_rate": 0.0001589731175532292, "loss": 11.6759, "step": 42906 }, { "epoch": 0.8981621033241229, "grad_norm": 0.33520564436912537, "learning_rate": 0.00015897134685034642, "loss": 11.6705, "step": 42907 }, { "epoch": 0.898183036088085, "grad_norm": 0.2971991300582886, "learning_rate": 0.00015896957611911497, "loss": 11.657, "step": 42908 }, { "epoch": 0.8982039688520472, "grad_norm": 0.335854172706604, "learning_rate": 0.00015896780535953564, "loss": 11.6689, "step": 42909 }, { "epoch": 0.8982249016160094, "grad_norm": 0.39770621061325073, "learning_rate": 0.00015896603457160935, "loss": 11.6816, "step": 42910 }, { "epoch": 0.8982458343799715, "grad_norm": 0.32507672905921936, "learning_rate": 0.00015896426375533687, "loss": 11.6896, "step": 42911 }, { "epoch": 0.8982667671439337, "grad_norm": 0.29594847559928894, "learning_rate": 0.0001589624929107191, "loss": 11.676, "step": 42912 }, { "epoch": 0.8982876999078958, "grad_norm": 0.28461387753486633, "learning_rate": 0.00015896072203775685, "loss": 11.6802, "step": 42913 }, { "epoch": 0.898308632671858, "grad_norm": 0.2793348431587219, "learning_rate": 0.00015895895113645102, "loss": 11.6633, "step": 42914 }, { "epoch": 0.8983295654358201, "grad_norm": 0.2670280635356903, "learning_rate": 0.00015895718020680243, "loss": 11.6573, "step": 42915 }, { "epoch": 0.8983504981997823, "grad_norm": 0.29336416721343994, "learning_rate": 0.00015895540924881196, "loss": 11.6647, "step": 42916 }, { "epoch": 0.8983714309637445, "grad_norm": 0.29306212067604065, "learning_rate": 0.00015895363826248042, "loss": 11.6658, "step": 42917 }, { "epoch": 0.8983923637277066, "grad_norm": 0.2971486747264862, "learning_rate": 0.0001589518672478087, "loss": 11.6663, "step": 42918 }, { "epoch": 0.8984132964916688, "grad_norm": 0.2782466411590576, "learning_rate": 0.00015895009620479767, "loss": 11.6722, "step": 42919 }, { "epoch": 0.8984342292556309, "grad_norm": 0.29055488109588623, "learning_rate": 0.0001589483251334481, "loss": 11.6696, "step": 42920 }, { "epoch": 0.8984551620195931, "grad_norm": 0.29742318391799927, "learning_rate": 0.0001589465540337609, "loss": 11.6897, "step": 42921 }, { "epoch": 0.8984760947835552, "grad_norm": 0.32359030842781067, "learning_rate": 0.0001589447829057369, "loss": 11.6805, "step": 42922 }, { "epoch": 0.8984970275475174, "grad_norm": 0.30895158648490906, "learning_rate": 0.00015894301174937698, "loss": 11.6666, "step": 42923 }, { "epoch": 0.8985179603114796, "grad_norm": 0.34612834453582764, "learning_rate": 0.00015894124056468195, "loss": 11.6708, "step": 42924 }, { "epoch": 0.8985388930754417, "grad_norm": 0.38693806529045105, "learning_rate": 0.0001589394693516527, "loss": 11.6832, "step": 42925 }, { "epoch": 0.8985598258394039, "grad_norm": 0.3819095492362976, "learning_rate": 0.0001589376981102901, "loss": 11.6604, "step": 42926 }, { "epoch": 0.898580758603366, "grad_norm": 0.4048415720462799, "learning_rate": 0.0001589359268405949, "loss": 11.6615, "step": 42927 }, { "epoch": 0.8986016913673281, "grad_norm": 0.30027705430984497, "learning_rate": 0.00015893415554256806, "loss": 11.6619, "step": 42928 }, { "epoch": 0.8986226241312903, "grad_norm": 0.41728124022483826, "learning_rate": 0.0001589323842162104, "loss": 11.68, "step": 42929 }, { "epoch": 0.8986435568952524, "grad_norm": 0.3355856239795685, "learning_rate": 0.00015893061286152276, "loss": 11.6701, "step": 42930 }, { "epoch": 0.8986644896592146, "grad_norm": 0.2730790674686432, "learning_rate": 0.00015892884147850596, "loss": 11.6624, "step": 42931 }, { "epoch": 0.8986854224231767, "grad_norm": 0.312382310628891, "learning_rate": 0.0001589270700671609, "loss": 11.669, "step": 42932 }, { "epoch": 0.8987063551871389, "grad_norm": 0.37031710147857666, "learning_rate": 0.0001589252986274884, "loss": 11.6898, "step": 42933 }, { "epoch": 0.898727287951101, "grad_norm": 0.3500698506832123, "learning_rate": 0.00015892352715948937, "loss": 11.6688, "step": 42934 }, { "epoch": 0.8987482207150632, "grad_norm": 0.2994866371154785, "learning_rate": 0.0001589217556631646, "loss": 11.6711, "step": 42935 }, { "epoch": 0.8987691534790254, "grad_norm": 0.27486664056777954, "learning_rate": 0.00015891998413851494, "loss": 11.6696, "step": 42936 }, { "epoch": 0.8987900862429875, "grad_norm": 0.3914303779602051, "learning_rate": 0.0001589182125855413, "loss": 11.6721, "step": 42937 }, { "epoch": 0.8988110190069497, "grad_norm": 0.2805018126964569, "learning_rate": 0.00015891644100424448, "loss": 11.663, "step": 42938 }, { "epoch": 0.8988319517709118, "grad_norm": 0.3158378303050995, "learning_rate": 0.00015891466939462533, "loss": 11.6732, "step": 42939 }, { "epoch": 0.898852884534874, "grad_norm": 0.2997204661369324, "learning_rate": 0.00015891289775668473, "loss": 11.6699, "step": 42940 }, { "epoch": 0.8988738172988361, "grad_norm": 0.2766023278236389, "learning_rate": 0.00015891112609042353, "loss": 11.6637, "step": 42941 }, { "epoch": 0.8988947500627983, "grad_norm": 0.37344661355018616, "learning_rate": 0.00015890935439584255, "loss": 11.6628, "step": 42942 }, { "epoch": 0.8989156828267605, "grad_norm": 0.3111315965652466, "learning_rate": 0.0001589075826729427, "loss": 11.6635, "step": 42943 }, { "epoch": 0.8989366155907226, "grad_norm": 0.4066263437271118, "learning_rate": 0.00015890581092172475, "loss": 11.6805, "step": 42944 }, { "epoch": 0.8989575483546848, "grad_norm": 0.3543893098831177, "learning_rate": 0.00015890403914218962, "loss": 11.6708, "step": 42945 }, { "epoch": 0.8989784811186469, "grad_norm": 0.3371325433254242, "learning_rate": 0.00015890226733433812, "loss": 11.6625, "step": 42946 }, { "epoch": 0.8989994138826091, "grad_norm": 0.363446980714798, "learning_rate": 0.00015890049549817113, "loss": 11.6742, "step": 42947 }, { "epoch": 0.8990203466465713, "grad_norm": 0.29800546169281006, "learning_rate": 0.0001588987236336895, "loss": 11.6651, "step": 42948 }, { "epoch": 0.8990412794105334, "grad_norm": 0.3012842833995819, "learning_rate": 0.00015889695174089408, "loss": 11.6562, "step": 42949 }, { "epoch": 0.8990622121744956, "grad_norm": 0.29291635751724243, "learning_rate": 0.0001588951798197857, "loss": 11.6518, "step": 42950 }, { "epoch": 0.8990831449384576, "grad_norm": 0.2451855093240738, "learning_rate": 0.0001588934078703652, "loss": 11.6423, "step": 42951 }, { "epoch": 0.8991040777024198, "grad_norm": 0.3780209720134735, "learning_rate": 0.0001588916358926335, "loss": 11.6863, "step": 42952 }, { "epoch": 0.8991250104663819, "grad_norm": 0.3034366965293884, "learning_rate": 0.0001588898638865914, "loss": 11.6554, "step": 42953 }, { "epoch": 0.8991459432303441, "grad_norm": 0.30942490696907043, "learning_rate": 0.00015888809185223975, "loss": 11.6713, "step": 42954 }, { "epoch": 0.8991668759943063, "grad_norm": 0.2975815236568451, "learning_rate": 0.00015888631978957943, "loss": 11.6707, "step": 42955 }, { "epoch": 0.8991878087582684, "grad_norm": 0.2950820028781891, "learning_rate": 0.00015888454769861127, "loss": 11.6639, "step": 42956 }, { "epoch": 0.8992087415222306, "grad_norm": 0.3193666338920593, "learning_rate": 0.00015888277557933616, "loss": 11.6517, "step": 42957 }, { "epoch": 0.8992296742861927, "grad_norm": 0.35235852003097534, "learning_rate": 0.00015888100343175484, "loss": 11.6719, "step": 42958 }, { "epoch": 0.8992506070501549, "grad_norm": 0.29480013251304626, "learning_rate": 0.0001588792312558683, "loss": 11.682, "step": 42959 }, { "epoch": 0.899271539814117, "grad_norm": 0.3293943405151367, "learning_rate": 0.00015887745905167732, "loss": 11.6822, "step": 42960 }, { "epoch": 0.8992924725780792, "grad_norm": 0.2543279528617859, "learning_rate": 0.0001588756868191828, "loss": 11.6631, "step": 42961 }, { "epoch": 0.8993134053420414, "grad_norm": 0.2505030035972595, "learning_rate": 0.0001588739145583855, "loss": 11.668, "step": 42962 }, { "epoch": 0.8993343381060035, "grad_norm": 0.3263258635997772, "learning_rate": 0.00015887214226928634, "loss": 11.669, "step": 42963 }, { "epoch": 0.8993552708699657, "grad_norm": 0.33664649724960327, "learning_rate": 0.00015887036995188618, "loss": 11.6629, "step": 42964 }, { "epoch": 0.8993762036339278, "grad_norm": 0.4189959168434143, "learning_rate": 0.00015886859760618586, "loss": 11.6692, "step": 42965 }, { "epoch": 0.89939713639789, "grad_norm": 0.31169965863227844, "learning_rate": 0.0001588668252321862, "loss": 11.6772, "step": 42966 }, { "epoch": 0.8994180691618522, "grad_norm": 0.32646533846855164, "learning_rate": 0.0001588650528298881, "loss": 11.6752, "step": 42967 }, { "epoch": 0.8994390019258143, "grad_norm": 0.2898939251899719, "learning_rate": 0.00015886328039929238, "loss": 11.6585, "step": 42968 }, { "epoch": 0.8994599346897765, "grad_norm": 0.31475743651390076, "learning_rate": 0.00015886150794039991, "loss": 11.6907, "step": 42969 }, { "epoch": 0.8994808674537386, "grad_norm": 0.306344211101532, "learning_rate": 0.00015885973545321153, "loss": 11.6741, "step": 42970 }, { "epoch": 0.8995018002177008, "grad_norm": 0.312822163105011, "learning_rate": 0.00015885796293772808, "loss": 11.6693, "step": 42971 }, { "epoch": 0.8995227329816629, "grad_norm": 0.3169357180595398, "learning_rate": 0.00015885619039395047, "loss": 11.6687, "step": 42972 }, { "epoch": 0.899543665745625, "grad_norm": 0.33364611864089966, "learning_rate": 0.00015885441782187947, "loss": 11.6703, "step": 42973 }, { "epoch": 0.8995645985095873, "grad_norm": 0.3086967170238495, "learning_rate": 0.000158852645221516, "loss": 11.6678, "step": 42974 }, { "epoch": 0.8995855312735493, "grad_norm": 0.2647934556007385, "learning_rate": 0.00015885087259286085, "loss": 11.6743, "step": 42975 }, { "epoch": 0.8996064640375115, "grad_norm": 0.4303978979587555, "learning_rate": 0.0001588490999359149, "loss": 11.6616, "step": 42976 }, { "epoch": 0.8996273968014736, "grad_norm": 0.33103081583976746, "learning_rate": 0.00015884732725067905, "loss": 11.6595, "step": 42977 }, { "epoch": 0.8996483295654358, "grad_norm": 0.3574047088623047, "learning_rate": 0.00015884555453715409, "loss": 11.6601, "step": 42978 }, { "epoch": 0.8996692623293979, "grad_norm": 0.2768685519695282, "learning_rate": 0.0001588437817953409, "loss": 11.669, "step": 42979 }, { "epoch": 0.8996901950933601, "grad_norm": 0.25819122791290283, "learning_rate": 0.00015884200902524032, "loss": 11.6693, "step": 42980 }, { "epoch": 0.8997111278573223, "grad_norm": 0.29295045137405396, "learning_rate": 0.0001588402362268532, "loss": 11.6775, "step": 42981 }, { "epoch": 0.8997320606212844, "grad_norm": 0.2951871156692505, "learning_rate": 0.0001588384634001804, "loss": 11.6719, "step": 42982 }, { "epoch": 0.8997529933852466, "grad_norm": 0.2998408377170563, "learning_rate": 0.00015883669054522276, "loss": 11.6703, "step": 42983 }, { "epoch": 0.8997739261492087, "grad_norm": 0.2897014915943146, "learning_rate": 0.00015883491766198118, "loss": 11.6912, "step": 42984 }, { "epoch": 0.8997948589131709, "grad_norm": 0.32115525007247925, "learning_rate": 0.00015883314475045646, "loss": 11.6734, "step": 42985 }, { "epoch": 0.8998157916771331, "grad_norm": 0.26617804169654846, "learning_rate": 0.00015883137181064947, "loss": 11.6515, "step": 42986 }, { "epoch": 0.8998367244410952, "grad_norm": 0.2962108850479126, "learning_rate": 0.00015882959884256102, "loss": 11.6593, "step": 42987 }, { "epoch": 0.8998576572050574, "grad_norm": 0.24082863330841064, "learning_rate": 0.00015882782584619208, "loss": 11.6504, "step": 42988 }, { "epoch": 0.8998785899690195, "grad_norm": 0.2674519121646881, "learning_rate": 0.00015882605282154338, "loss": 11.6561, "step": 42989 }, { "epoch": 0.8998995227329817, "grad_norm": 0.35625776648521423, "learning_rate": 0.0001588242797686158, "loss": 11.6506, "step": 42990 }, { "epoch": 0.8999204554969438, "grad_norm": 0.2626383602619171, "learning_rate": 0.00015882250668741024, "loss": 11.6541, "step": 42991 }, { "epoch": 0.899941388260906, "grad_norm": 0.36808010935783386, "learning_rate": 0.00015882073357792752, "loss": 11.6514, "step": 42992 }, { "epoch": 0.8999623210248682, "grad_norm": 0.30730506777763367, "learning_rate": 0.00015881896044016847, "loss": 11.6673, "step": 42993 }, { "epoch": 0.8999832537888303, "grad_norm": 0.255160927772522, "learning_rate": 0.000158817187274134, "loss": 11.6654, "step": 42994 }, { "epoch": 0.9000041865527925, "grad_norm": 0.321186363697052, "learning_rate": 0.0001588154140798249, "loss": 11.6656, "step": 42995 }, { "epoch": 0.9000251193167546, "grad_norm": 0.3204724192619324, "learning_rate": 0.00015881364085724208, "loss": 11.6685, "step": 42996 }, { "epoch": 0.9000460520807168, "grad_norm": 0.3348761796951294, "learning_rate": 0.00015881186760638633, "loss": 11.6718, "step": 42997 }, { "epoch": 0.9000669848446788, "grad_norm": 0.38933536410331726, "learning_rate": 0.00015881009432725855, "loss": 11.6896, "step": 42998 }, { "epoch": 0.900087917608641, "grad_norm": 0.3758862316608429, "learning_rate": 0.0001588083210198596, "loss": 11.6855, "step": 42999 }, { "epoch": 0.9001088503726032, "grad_norm": 0.29921895265579224, "learning_rate": 0.0001588065476841903, "loss": 11.6811, "step": 43000 }, { "epoch": 0.9001088503726032, "eval_loss": 11.669366836547852, "eval_runtime": 34.3559, "eval_samples_per_second": 27.972, "eval_steps_per_second": 7.015, "step": 43000 }, { "epoch": 0.9001297831365653, "grad_norm": 0.26787427067756653, "learning_rate": 0.00015880477432025152, "loss": 11.6711, "step": 43001 }, { "epoch": 0.9001507159005275, "grad_norm": 0.3031543791294098, "learning_rate": 0.00015880300092804408, "loss": 11.6752, "step": 43002 }, { "epoch": 0.9001716486644896, "grad_norm": 0.434181809425354, "learning_rate": 0.00015880122750756888, "loss": 11.6703, "step": 43003 }, { "epoch": 0.9001925814284518, "grad_norm": 0.2874484062194824, "learning_rate": 0.00015879945405882675, "loss": 11.6749, "step": 43004 }, { "epoch": 0.900213514192414, "grad_norm": 0.36803439259529114, "learning_rate": 0.00015879768058181854, "loss": 11.6656, "step": 43005 }, { "epoch": 0.9002344469563761, "grad_norm": 0.3642181158065796, "learning_rate": 0.0001587959070765451, "loss": 11.6572, "step": 43006 }, { "epoch": 0.9002553797203383, "grad_norm": 0.5766654014587402, "learning_rate": 0.0001587941335430073, "loss": 11.6163, "step": 43007 }, { "epoch": 0.9002763124843004, "grad_norm": 0.2868765890598297, "learning_rate": 0.000158792359981206, "loss": 11.6861, "step": 43008 }, { "epoch": 0.9002972452482626, "grad_norm": 0.25314608216285706, "learning_rate": 0.00015879058639114199, "loss": 11.6664, "step": 43009 }, { "epoch": 0.9003181780122247, "grad_norm": 0.30175861716270447, "learning_rate": 0.00015878881277281618, "loss": 11.6749, "step": 43010 }, { "epoch": 0.9003391107761869, "grad_norm": 0.3371259570121765, "learning_rate": 0.00015878703912622943, "loss": 11.6926, "step": 43011 }, { "epoch": 0.9003600435401491, "grad_norm": 0.31645286083221436, "learning_rate": 0.00015878526545138257, "loss": 11.6827, "step": 43012 }, { "epoch": 0.9003809763041112, "grad_norm": 0.23216776549816132, "learning_rate": 0.00015878349174827644, "loss": 11.675, "step": 43013 }, { "epoch": 0.9004019090680734, "grad_norm": 0.31022366881370544, "learning_rate": 0.00015878171801691191, "loss": 11.6815, "step": 43014 }, { "epoch": 0.9004228418320355, "grad_norm": 0.306567907333374, "learning_rate": 0.00015877994425728982, "loss": 11.679, "step": 43015 }, { "epoch": 0.9004437745959977, "grad_norm": 0.32368895411491394, "learning_rate": 0.0001587781704694111, "loss": 11.6673, "step": 43016 }, { "epoch": 0.9004647073599598, "grad_norm": 0.3032514154911041, "learning_rate": 0.00015877639665327643, "loss": 11.6598, "step": 43017 }, { "epoch": 0.900485640123922, "grad_norm": 0.4566997289657593, "learning_rate": 0.00015877462280888683, "loss": 11.6641, "step": 43018 }, { "epoch": 0.9005065728878842, "grad_norm": 0.31590497493743896, "learning_rate": 0.00015877284893624308, "loss": 11.6661, "step": 43019 }, { "epoch": 0.9005275056518462, "grad_norm": 0.315685510635376, "learning_rate": 0.00015877107503534606, "loss": 11.6626, "step": 43020 }, { "epoch": 0.9005484384158084, "grad_norm": 0.30445045232772827, "learning_rate": 0.0001587693011061966, "loss": 11.6721, "step": 43021 }, { "epoch": 0.9005693711797705, "grad_norm": 0.3236493468284607, "learning_rate": 0.00015876752714879557, "loss": 11.6894, "step": 43022 }, { "epoch": 0.9005903039437327, "grad_norm": 0.38685891032218933, "learning_rate": 0.00015876575316314378, "loss": 11.6673, "step": 43023 }, { "epoch": 0.9006112367076948, "grad_norm": 0.27581268548965454, "learning_rate": 0.00015876397914924213, "loss": 11.6596, "step": 43024 }, { "epoch": 0.900632169471657, "grad_norm": 0.25960293412208557, "learning_rate": 0.00015876220510709142, "loss": 11.6792, "step": 43025 }, { "epoch": 0.9006531022356192, "grad_norm": 0.337321937084198, "learning_rate": 0.0001587604310366926, "loss": 11.6795, "step": 43026 }, { "epoch": 0.9006740349995813, "grad_norm": 0.330615371465683, "learning_rate": 0.00015875865693804647, "loss": 11.6721, "step": 43027 }, { "epoch": 0.9006949677635435, "grad_norm": 0.3557097911834717, "learning_rate": 0.00015875688281115382, "loss": 11.66, "step": 43028 }, { "epoch": 0.9007159005275056, "grad_norm": 0.36007165908813477, "learning_rate": 0.0001587551086560156, "loss": 11.6664, "step": 43029 }, { "epoch": 0.9007368332914678, "grad_norm": 0.2795952260494232, "learning_rate": 0.0001587533344726326, "loss": 11.6514, "step": 43030 }, { "epoch": 0.90075776605543, "grad_norm": 0.3505457043647766, "learning_rate": 0.0001587515602610057, "loss": 11.6655, "step": 43031 }, { "epoch": 0.9007786988193921, "grad_norm": 0.29954949021339417, "learning_rate": 0.00015874978602113576, "loss": 11.6765, "step": 43032 }, { "epoch": 0.9007996315833543, "grad_norm": 0.30846497416496277, "learning_rate": 0.0001587480117530236, "loss": 11.6565, "step": 43033 }, { "epoch": 0.9008205643473164, "grad_norm": 0.3424602448940277, "learning_rate": 0.0001587462374566701, "loss": 11.665, "step": 43034 }, { "epoch": 0.9008414971112786, "grad_norm": 0.2879946529865265, "learning_rate": 0.00015874446313207612, "loss": 11.6707, "step": 43035 }, { "epoch": 0.9008624298752407, "grad_norm": 0.2952583432197571, "learning_rate": 0.00015874268877924248, "loss": 11.6929, "step": 43036 }, { "epoch": 0.9008833626392029, "grad_norm": 0.3914988040924072, "learning_rate": 0.00015874091439817004, "loss": 11.6653, "step": 43037 }, { "epoch": 0.9009042954031651, "grad_norm": 0.33231788873672485, "learning_rate": 0.0001587391399888597, "loss": 11.6843, "step": 43038 }, { "epoch": 0.9009252281671272, "grad_norm": 0.2663217782974243, "learning_rate": 0.00015873736555131228, "loss": 11.66, "step": 43039 }, { "epoch": 0.9009461609310894, "grad_norm": 0.26812809705734253, "learning_rate": 0.0001587355910855286, "loss": 11.6685, "step": 43040 }, { "epoch": 0.9009670936950515, "grad_norm": 0.37095722556114197, "learning_rate": 0.00015873381659150957, "loss": 11.6939, "step": 43041 }, { "epoch": 0.9009880264590137, "grad_norm": 0.3153611421585083, "learning_rate": 0.000158732042069256, "loss": 11.6567, "step": 43042 }, { "epoch": 0.9010089592229757, "grad_norm": 0.3450479805469513, "learning_rate": 0.0001587302675187688, "loss": 11.6712, "step": 43043 }, { "epoch": 0.901029891986938, "grad_norm": 0.30066466331481934, "learning_rate": 0.0001587284929400487, "loss": 11.6655, "step": 43044 }, { "epoch": 0.9010508247509001, "grad_norm": 0.3165276348590851, "learning_rate": 0.0001587267183330967, "loss": 11.6568, "step": 43045 }, { "epoch": 0.9010717575148622, "grad_norm": 0.2684590220451355, "learning_rate": 0.0001587249436979136, "loss": 11.6476, "step": 43046 }, { "epoch": 0.9010926902788244, "grad_norm": 0.24172581732273102, "learning_rate": 0.0001587231690345002, "loss": 11.6683, "step": 43047 }, { "epoch": 0.9011136230427865, "grad_norm": 0.33584970235824585, "learning_rate": 0.00015872139434285743, "loss": 11.6682, "step": 43048 }, { "epoch": 0.9011345558067487, "grad_norm": 0.2964869737625122, "learning_rate": 0.0001587196196229861, "loss": 11.6543, "step": 43049 }, { "epoch": 0.9011554885707109, "grad_norm": 0.34743455052375793, "learning_rate": 0.00015871784487488704, "loss": 11.6805, "step": 43050 }, { "epoch": 0.901176421334673, "grad_norm": 0.5682017803192139, "learning_rate": 0.00015871607009856117, "loss": 11.6479, "step": 43051 }, { "epoch": 0.9011973540986352, "grad_norm": 0.2855835556983948, "learning_rate": 0.0001587142952940093, "loss": 11.6644, "step": 43052 }, { "epoch": 0.9012182868625973, "grad_norm": 0.35967159271240234, "learning_rate": 0.0001587125204612323, "loss": 11.6743, "step": 43053 }, { "epoch": 0.9012392196265595, "grad_norm": 0.2627122700214386, "learning_rate": 0.00015871074560023097, "loss": 11.6823, "step": 43054 }, { "epoch": 0.9012601523905216, "grad_norm": 0.322530597448349, "learning_rate": 0.00015870897071100629, "loss": 11.66, "step": 43055 }, { "epoch": 0.9012810851544838, "grad_norm": 0.30060985684394836, "learning_rate": 0.00015870719579355897, "loss": 11.6729, "step": 43056 }, { "epoch": 0.901302017918446, "grad_norm": 0.2601970136165619, "learning_rate": 0.00015870542084788994, "loss": 11.6402, "step": 43057 }, { "epoch": 0.9013229506824081, "grad_norm": 0.3281349241733551, "learning_rate": 0.00015870364587400004, "loss": 11.6777, "step": 43058 }, { "epoch": 0.9013438834463703, "grad_norm": 0.3002360761165619, "learning_rate": 0.0001587018708718901, "loss": 11.6709, "step": 43059 }, { "epoch": 0.9013648162103324, "grad_norm": 0.3759864568710327, "learning_rate": 0.00015870009584156101, "loss": 11.6842, "step": 43060 }, { "epoch": 0.9013857489742946, "grad_norm": 0.29363903403282166, "learning_rate": 0.00015869832078301363, "loss": 11.6728, "step": 43061 }, { "epoch": 0.9014066817382567, "grad_norm": 0.3113829791545868, "learning_rate": 0.00015869654569624877, "loss": 11.6556, "step": 43062 }, { "epoch": 0.9014276145022189, "grad_norm": 0.2915043532848358, "learning_rate": 0.00015869477058126733, "loss": 11.6542, "step": 43063 }, { "epoch": 0.9014485472661811, "grad_norm": 0.3443947732448578, "learning_rate": 0.00015869299543807007, "loss": 11.6796, "step": 43064 }, { "epoch": 0.9014694800301432, "grad_norm": 0.33757397532463074, "learning_rate": 0.000158691220266658, "loss": 11.6703, "step": 43065 }, { "epoch": 0.9014904127941054, "grad_norm": 0.4235913157463074, "learning_rate": 0.0001586894450670318, "loss": 11.6846, "step": 43066 }, { "epoch": 0.9015113455580674, "grad_norm": 0.4136250913143158, "learning_rate": 0.00015868766983919246, "loss": 11.6738, "step": 43067 }, { "epoch": 0.9015322783220296, "grad_norm": 0.4525207579135895, "learning_rate": 0.0001586858945831408, "loss": 11.6675, "step": 43068 }, { "epoch": 0.9015532110859918, "grad_norm": 0.39198413491249084, "learning_rate": 0.00015868411929887758, "loss": 11.6728, "step": 43069 }, { "epoch": 0.9015741438499539, "grad_norm": 0.2550238072872162, "learning_rate": 0.00015868234398640378, "loss": 11.6585, "step": 43070 }, { "epoch": 0.9015950766139161, "grad_norm": 0.34258028864860535, "learning_rate": 0.00015868056864572018, "loss": 11.6772, "step": 43071 }, { "epoch": 0.9016160093778782, "grad_norm": 0.25944244861602783, "learning_rate": 0.00015867879327682768, "loss": 11.6641, "step": 43072 }, { "epoch": 0.9016369421418404, "grad_norm": 0.32024914026260376, "learning_rate": 0.00015867701787972708, "loss": 11.6608, "step": 43073 }, { "epoch": 0.9016578749058025, "grad_norm": 0.3039511740207672, "learning_rate": 0.00015867524245441927, "loss": 11.6597, "step": 43074 }, { "epoch": 0.9016788076697647, "grad_norm": 0.29562878608703613, "learning_rate": 0.00015867346700090514, "loss": 11.6546, "step": 43075 }, { "epoch": 0.9016997404337269, "grad_norm": 0.32699233293533325, "learning_rate": 0.00015867169151918543, "loss": 11.6632, "step": 43076 }, { "epoch": 0.901720673197689, "grad_norm": 0.36252549290657043, "learning_rate": 0.0001586699160092611, "loss": 11.6579, "step": 43077 }, { "epoch": 0.9017416059616512, "grad_norm": 0.30290183424949646, "learning_rate": 0.00015866814047113295, "loss": 11.6825, "step": 43078 }, { "epoch": 0.9017625387256133, "grad_norm": 0.3296325206756592, "learning_rate": 0.00015866636490480184, "loss": 11.6606, "step": 43079 }, { "epoch": 0.9017834714895755, "grad_norm": 0.32269030809402466, "learning_rate": 0.00015866458931026867, "loss": 11.6811, "step": 43080 }, { "epoch": 0.9018044042535376, "grad_norm": 0.39919817447662354, "learning_rate": 0.00015866281368753421, "loss": 11.6734, "step": 43081 }, { "epoch": 0.9018253370174998, "grad_norm": 0.3557130992412567, "learning_rate": 0.00015866103803659941, "loss": 11.6536, "step": 43082 }, { "epoch": 0.901846269781462, "grad_norm": 0.27241218090057373, "learning_rate": 0.00015865926235746503, "loss": 11.669, "step": 43083 }, { "epoch": 0.9018672025454241, "grad_norm": 0.3014434278011322, "learning_rate": 0.00015865748665013199, "loss": 11.6746, "step": 43084 }, { "epoch": 0.9018881353093863, "grad_norm": 0.3141587972640991, "learning_rate": 0.00015865571091460112, "loss": 11.6517, "step": 43085 }, { "epoch": 0.9019090680733484, "grad_norm": 0.3178281784057617, "learning_rate": 0.00015865393515087327, "loss": 11.6775, "step": 43086 }, { "epoch": 0.9019300008373106, "grad_norm": 0.3235960900783539, "learning_rate": 0.0001586521593589493, "loss": 11.6873, "step": 43087 }, { "epoch": 0.9019509336012728, "grad_norm": 0.2979569733142853, "learning_rate": 0.00015865038353883004, "loss": 11.6845, "step": 43088 }, { "epoch": 0.9019718663652349, "grad_norm": 0.3655044138431549, "learning_rate": 0.0001586486076905164, "loss": 11.662, "step": 43089 }, { "epoch": 0.901992799129197, "grad_norm": 0.38444074988365173, "learning_rate": 0.0001586468318140092, "loss": 11.6837, "step": 43090 }, { "epoch": 0.9020137318931591, "grad_norm": 0.30734384059906006, "learning_rate": 0.00015864505590930926, "loss": 11.6532, "step": 43091 }, { "epoch": 0.9020346646571213, "grad_norm": 0.3151533305644989, "learning_rate": 0.0001586432799764175, "loss": 11.6625, "step": 43092 }, { "epoch": 0.9020555974210834, "grad_norm": 0.359770804643631, "learning_rate": 0.0001586415040153347, "loss": 11.6815, "step": 43093 }, { "epoch": 0.9020765301850456, "grad_norm": 0.25490450859069824, "learning_rate": 0.00015863972802606177, "loss": 11.6564, "step": 43094 }, { "epoch": 0.9020974629490078, "grad_norm": 0.33681514859199524, "learning_rate": 0.00015863795200859958, "loss": 11.6703, "step": 43095 }, { "epoch": 0.9021183957129699, "grad_norm": 0.3961499333381653, "learning_rate": 0.00015863617596294891, "loss": 11.6803, "step": 43096 }, { "epoch": 0.9021393284769321, "grad_norm": 0.46835386753082275, "learning_rate": 0.00015863439988911068, "loss": 11.6592, "step": 43097 }, { "epoch": 0.9021602612408942, "grad_norm": 0.3000149428844452, "learning_rate": 0.0001586326237870857, "loss": 11.6461, "step": 43098 }, { "epoch": 0.9021811940048564, "grad_norm": 0.3529490828514099, "learning_rate": 0.00015863084765687486, "loss": 11.6753, "step": 43099 }, { "epoch": 0.9022021267688185, "grad_norm": 0.2939223349094391, "learning_rate": 0.00015862907149847896, "loss": 11.6791, "step": 43100 }, { "epoch": 0.9022230595327807, "grad_norm": 0.3221966326236725, "learning_rate": 0.00015862729531189893, "loss": 11.6633, "step": 43101 }, { "epoch": 0.9022439922967429, "grad_norm": 0.3175378143787384, "learning_rate": 0.00015862551909713558, "loss": 11.6812, "step": 43102 }, { "epoch": 0.902264925060705, "grad_norm": 0.3799766004085541, "learning_rate": 0.00015862374285418977, "loss": 11.6656, "step": 43103 }, { "epoch": 0.9022858578246672, "grad_norm": 0.31695616245269775, "learning_rate": 0.00015862196658306235, "loss": 11.658, "step": 43104 }, { "epoch": 0.9023067905886293, "grad_norm": 0.38397273421287537, "learning_rate": 0.00015862019028375416, "loss": 11.6882, "step": 43105 }, { "epoch": 0.9023277233525915, "grad_norm": 3.3135910034179688, "learning_rate": 0.00015861841395626607, "loss": 11.6599, "step": 43106 }, { "epoch": 0.9023486561165537, "grad_norm": 0.28448182344436646, "learning_rate": 0.00015861663760059896, "loss": 11.6825, "step": 43107 }, { "epoch": 0.9023695888805158, "grad_norm": 0.3044486939907074, "learning_rate": 0.00015861486121675363, "loss": 11.66, "step": 43108 }, { "epoch": 0.902390521644478, "grad_norm": 0.275956928730011, "learning_rate": 0.00015861308480473097, "loss": 11.6839, "step": 43109 }, { "epoch": 0.9024114544084401, "grad_norm": 0.3982935845851898, "learning_rate": 0.00015861130836453184, "loss": 11.6981, "step": 43110 }, { "epoch": 0.9024323871724023, "grad_norm": 0.2826576828956604, "learning_rate": 0.00015860953189615707, "loss": 11.6624, "step": 43111 }, { "epoch": 0.9024533199363644, "grad_norm": 0.4031607508659363, "learning_rate": 0.0001586077553996075, "loss": 11.6857, "step": 43112 }, { "epoch": 0.9024742527003266, "grad_norm": 0.33319389820098877, "learning_rate": 0.000158605978874884, "loss": 11.6783, "step": 43113 }, { "epoch": 0.9024951854642888, "grad_norm": 0.2668972313404083, "learning_rate": 0.0001586042023219875, "loss": 11.6765, "step": 43114 }, { "epoch": 0.9025161182282508, "grad_norm": 0.24749763309955597, "learning_rate": 0.00015860242574091874, "loss": 11.6696, "step": 43115 }, { "epoch": 0.902537050992213, "grad_norm": 0.3022176921367645, "learning_rate": 0.00015860064913167862, "loss": 11.6722, "step": 43116 }, { "epoch": 0.9025579837561751, "grad_norm": 0.2815481722354889, "learning_rate": 0.00015859887249426802, "loss": 11.6537, "step": 43117 }, { "epoch": 0.9025789165201373, "grad_norm": 0.32974424958229065, "learning_rate": 0.00015859709582868772, "loss": 11.6804, "step": 43118 }, { "epoch": 0.9025998492840994, "grad_norm": 0.39325544238090515, "learning_rate": 0.00015859531913493864, "loss": 11.6866, "step": 43119 }, { "epoch": 0.9026207820480616, "grad_norm": 0.3226124048233032, "learning_rate": 0.00015859354241302165, "loss": 11.6775, "step": 43120 }, { "epoch": 0.9026417148120238, "grad_norm": 0.47610458731651306, "learning_rate": 0.0001585917656629375, "loss": 11.6767, "step": 43121 }, { "epoch": 0.9026626475759859, "grad_norm": 0.28726691007614136, "learning_rate": 0.00015858998888468719, "loss": 11.6794, "step": 43122 }, { "epoch": 0.9026835803399481, "grad_norm": 0.25851377844810486, "learning_rate": 0.00015858821207827146, "loss": 11.6842, "step": 43123 }, { "epoch": 0.9027045131039102, "grad_norm": 0.3300892412662506, "learning_rate": 0.00015858643524369122, "loss": 11.6851, "step": 43124 }, { "epoch": 0.9027254458678724, "grad_norm": 0.28616175055503845, "learning_rate": 0.0001585846583809473, "loss": 11.6838, "step": 43125 }, { "epoch": 0.9027463786318346, "grad_norm": 0.3941107988357544, "learning_rate": 0.00015858288149004054, "loss": 11.6923, "step": 43126 }, { "epoch": 0.9027673113957967, "grad_norm": 0.35095396637916565, "learning_rate": 0.00015858110457097184, "loss": 11.6718, "step": 43127 }, { "epoch": 0.9027882441597589, "grad_norm": 0.5251399874687195, "learning_rate": 0.000158579327623742, "loss": 11.6482, "step": 43128 }, { "epoch": 0.902809176923721, "grad_norm": 0.3292465806007385, "learning_rate": 0.00015857755064835192, "loss": 11.6746, "step": 43129 }, { "epoch": 0.9028301096876832, "grad_norm": 0.3594089448451996, "learning_rate": 0.00015857577364480246, "loss": 11.6644, "step": 43130 }, { "epoch": 0.9028510424516453, "grad_norm": 0.3291962742805481, "learning_rate": 0.00015857399661309442, "loss": 11.6633, "step": 43131 }, { "epoch": 0.9028719752156075, "grad_norm": 0.31739911437034607, "learning_rate": 0.0001585722195532287, "loss": 11.673, "step": 43132 }, { "epoch": 0.9028929079795697, "grad_norm": 0.27353450655937195, "learning_rate": 0.00015857044246520612, "loss": 11.6559, "step": 43133 }, { "epoch": 0.9029138407435318, "grad_norm": 0.2997760474681854, "learning_rate": 0.0001585686653490276, "loss": 11.6774, "step": 43134 }, { "epoch": 0.902934773507494, "grad_norm": 0.35058659315109253, "learning_rate": 0.00015856688820469392, "loss": 11.6702, "step": 43135 }, { "epoch": 0.902955706271456, "grad_norm": 0.2982107698917389, "learning_rate": 0.00015856511103220594, "loss": 11.6798, "step": 43136 }, { "epoch": 0.9029766390354182, "grad_norm": 0.37258997559547424, "learning_rate": 0.0001585633338315646, "loss": 11.6628, "step": 43137 }, { "epoch": 0.9029975717993803, "grad_norm": 0.3413236737251282, "learning_rate": 0.00015856155660277064, "loss": 11.6763, "step": 43138 }, { "epoch": 0.9030185045633425, "grad_norm": 0.3165160119533539, "learning_rate": 0.00015855977934582497, "loss": 11.6743, "step": 43139 }, { "epoch": 0.9030394373273047, "grad_norm": 0.30325743556022644, "learning_rate": 0.00015855800206072846, "loss": 11.675, "step": 43140 }, { "epoch": 0.9030603700912668, "grad_norm": 0.3095998167991638, "learning_rate": 0.00015855622474748193, "loss": 11.6565, "step": 43141 }, { "epoch": 0.903081302855229, "grad_norm": 0.2740717828273773, "learning_rate": 0.00015855444740608626, "loss": 11.678, "step": 43142 }, { "epoch": 0.9031022356191911, "grad_norm": 0.33501243591308594, "learning_rate": 0.0001585526700365423, "loss": 11.6625, "step": 43143 }, { "epoch": 0.9031231683831533, "grad_norm": 0.2855799198150635, "learning_rate": 0.00015855089263885086, "loss": 11.6665, "step": 43144 }, { "epoch": 0.9031441011471155, "grad_norm": 0.2779453992843628, "learning_rate": 0.00015854911521301287, "loss": 11.6756, "step": 43145 }, { "epoch": 0.9031650339110776, "grad_norm": 0.3815445005893707, "learning_rate": 0.00015854733775902912, "loss": 11.6566, "step": 43146 }, { "epoch": 0.9031859666750398, "grad_norm": 0.3245026171207428, "learning_rate": 0.0001585455602769005, "loss": 11.6516, "step": 43147 }, { "epoch": 0.9032068994390019, "grad_norm": 0.3413768708705902, "learning_rate": 0.00015854378276662789, "loss": 11.6698, "step": 43148 }, { "epoch": 0.9032278322029641, "grad_norm": 0.2607811987400055, "learning_rate": 0.00015854200522821206, "loss": 11.6623, "step": 43149 }, { "epoch": 0.9032487649669262, "grad_norm": 0.2955178916454315, "learning_rate": 0.00015854022766165392, "loss": 11.6766, "step": 43150 }, { "epoch": 0.9032696977308884, "grad_norm": 0.30136048793792725, "learning_rate": 0.00015853845006695436, "loss": 11.6625, "step": 43151 }, { "epoch": 0.9032906304948506, "grad_norm": 0.3107112944126129, "learning_rate": 0.0001585366724441142, "loss": 11.6788, "step": 43152 }, { "epoch": 0.9033115632588127, "grad_norm": 0.25868234038352966, "learning_rate": 0.00015853489479313422, "loss": 11.6666, "step": 43153 }, { "epoch": 0.9033324960227749, "grad_norm": 0.34927666187286377, "learning_rate": 0.0001585331171140154, "loss": 11.6688, "step": 43154 }, { "epoch": 0.903353428786737, "grad_norm": 0.3579149544239044, "learning_rate": 0.0001585313394067585, "loss": 11.6581, "step": 43155 }, { "epoch": 0.9033743615506992, "grad_norm": 0.2590177655220032, "learning_rate": 0.00015852956167136445, "loss": 11.673, "step": 43156 }, { "epoch": 0.9033952943146613, "grad_norm": 0.2935592532157898, "learning_rate": 0.00015852778390783404, "loss": 11.6673, "step": 43157 }, { "epoch": 0.9034162270786235, "grad_norm": 0.31910502910614014, "learning_rate": 0.00015852600611616818, "loss": 11.6718, "step": 43158 }, { "epoch": 0.9034371598425857, "grad_norm": 0.3006818890571594, "learning_rate": 0.00015852422829636765, "loss": 11.6613, "step": 43159 }, { "epoch": 0.9034580926065477, "grad_norm": 0.303528368473053, "learning_rate": 0.00015852245044843338, "loss": 11.6722, "step": 43160 }, { "epoch": 0.90347902537051, "grad_norm": 0.28405770659446716, "learning_rate": 0.0001585206725723662, "loss": 11.6762, "step": 43161 }, { "epoch": 0.903499958134472, "grad_norm": 0.3033456802368164, "learning_rate": 0.00015851889466816696, "loss": 11.6694, "step": 43162 }, { "epoch": 0.9035208908984342, "grad_norm": 0.36545202136039734, "learning_rate": 0.00015851711673583652, "loss": 11.6726, "step": 43163 }, { "epoch": 0.9035418236623964, "grad_norm": 0.3103824853897095, "learning_rate": 0.0001585153387753757, "loss": 11.6618, "step": 43164 }, { "epoch": 0.9035627564263585, "grad_norm": 0.29336535930633545, "learning_rate": 0.0001585135607867854, "loss": 11.6604, "step": 43165 }, { "epoch": 0.9035836891903207, "grad_norm": 0.38515546917915344, "learning_rate": 0.00015851178277006647, "loss": 11.6736, "step": 43166 }, { "epoch": 0.9036046219542828, "grad_norm": 0.28967994451522827, "learning_rate": 0.00015851000472521973, "loss": 11.6748, "step": 43167 }, { "epoch": 0.903625554718245, "grad_norm": 0.3147515654563904, "learning_rate": 0.0001585082266522461, "loss": 11.6587, "step": 43168 }, { "epoch": 0.9036464874822071, "grad_norm": 0.267148494720459, "learning_rate": 0.00015850644855114633, "loss": 11.6624, "step": 43169 }, { "epoch": 0.9036674202461693, "grad_norm": 0.3178045153617859, "learning_rate": 0.0001585046704219214, "loss": 11.6622, "step": 43170 }, { "epoch": 0.9036883530101315, "grad_norm": 0.25208762288093567, "learning_rate": 0.00015850289226457208, "loss": 11.6497, "step": 43171 }, { "epoch": 0.9037092857740936, "grad_norm": 0.2892199456691742, "learning_rate": 0.0001585011140790992, "loss": 11.6608, "step": 43172 }, { "epoch": 0.9037302185380558, "grad_norm": 0.41070321202278137, "learning_rate": 0.00015849933586550372, "loss": 11.6545, "step": 43173 }, { "epoch": 0.9037511513020179, "grad_norm": 0.30555495619773865, "learning_rate": 0.00015849755762378643, "loss": 11.6532, "step": 43174 }, { "epoch": 0.9037720840659801, "grad_norm": 0.276619553565979, "learning_rate": 0.00015849577935394816, "loss": 11.6652, "step": 43175 }, { "epoch": 0.9037930168299422, "grad_norm": 0.32822468876838684, "learning_rate": 0.00015849400105598984, "loss": 11.6649, "step": 43176 }, { "epoch": 0.9038139495939044, "grad_norm": 0.3529682755470276, "learning_rate": 0.00015849222272991225, "loss": 11.6748, "step": 43177 }, { "epoch": 0.9038348823578666, "grad_norm": 0.31245681643486023, "learning_rate": 0.0001584904443757163, "loss": 11.6667, "step": 43178 }, { "epoch": 0.9038558151218287, "grad_norm": 0.2624601125717163, "learning_rate": 0.0001584886659934028, "loss": 11.6802, "step": 43179 }, { "epoch": 0.9038767478857909, "grad_norm": 0.3429892659187317, "learning_rate": 0.00015848688758297264, "loss": 11.6744, "step": 43180 }, { "epoch": 0.903897680649753, "grad_norm": 0.34474673867225647, "learning_rate": 0.00015848510914442664, "loss": 11.6553, "step": 43181 }, { "epoch": 0.9039186134137152, "grad_norm": 0.30178454518318176, "learning_rate": 0.0001584833306777657, "loss": 11.6727, "step": 43182 }, { "epoch": 0.9039395461776774, "grad_norm": 0.29311782121658325, "learning_rate": 0.00015848155218299063, "loss": 11.6622, "step": 43183 }, { "epoch": 0.9039604789416394, "grad_norm": 0.333963543176651, "learning_rate": 0.00015847977366010232, "loss": 11.6697, "step": 43184 }, { "epoch": 0.9039814117056016, "grad_norm": 0.2632341980934143, "learning_rate": 0.0001584779951091016, "loss": 11.6727, "step": 43185 }, { "epoch": 0.9040023444695637, "grad_norm": 0.3638286888599396, "learning_rate": 0.00015847621652998934, "loss": 11.675, "step": 43186 }, { "epoch": 0.9040232772335259, "grad_norm": 0.28500860929489136, "learning_rate": 0.0001584744379227664, "loss": 11.6515, "step": 43187 }, { "epoch": 0.904044209997488, "grad_norm": 0.2763601541519165, "learning_rate": 0.00015847265928743362, "loss": 11.6693, "step": 43188 }, { "epoch": 0.9040651427614502, "grad_norm": 0.28296661376953125, "learning_rate": 0.00015847088062399185, "loss": 11.6639, "step": 43189 }, { "epoch": 0.9040860755254124, "grad_norm": 0.33054792881011963, "learning_rate": 0.00015846910193244198, "loss": 11.6638, "step": 43190 }, { "epoch": 0.9041070082893745, "grad_norm": 0.3697070777416229, "learning_rate": 0.00015846732321278482, "loss": 11.6619, "step": 43191 }, { "epoch": 0.9041279410533367, "grad_norm": 0.30005335807800293, "learning_rate": 0.0001584655444650213, "loss": 11.6721, "step": 43192 }, { "epoch": 0.9041488738172988, "grad_norm": 0.2504357099533081, "learning_rate": 0.00015846376568915216, "loss": 11.6572, "step": 43193 }, { "epoch": 0.904169806581261, "grad_norm": 0.2739078998565674, "learning_rate": 0.0001584619868851783, "loss": 11.6609, "step": 43194 }, { "epoch": 0.9041907393452231, "grad_norm": 0.30834195017814636, "learning_rate": 0.00015846020805310065, "loss": 11.6561, "step": 43195 }, { "epoch": 0.9042116721091853, "grad_norm": 0.3180604577064514, "learning_rate": 0.00015845842919291997, "loss": 11.6668, "step": 43196 }, { "epoch": 0.9042326048731475, "grad_norm": 0.28460294008255005, "learning_rate": 0.00015845665030463717, "loss": 11.6675, "step": 43197 }, { "epoch": 0.9042535376371096, "grad_norm": 0.3753930330276489, "learning_rate": 0.00015845487138825307, "loss": 11.6831, "step": 43198 }, { "epoch": 0.9042744704010718, "grad_norm": 0.2539580762386322, "learning_rate": 0.00015845309244376856, "loss": 11.6838, "step": 43199 }, { "epoch": 0.9042954031650339, "grad_norm": 0.44331642985343933, "learning_rate": 0.00015845131347118447, "loss": 11.6894, "step": 43200 }, { "epoch": 0.9043163359289961, "grad_norm": 0.24027039110660553, "learning_rate": 0.00015844953447050164, "loss": 11.6716, "step": 43201 }, { "epoch": 0.9043372686929582, "grad_norm": 0.3368558883666992, "learning_rate": 0.00015844775544172096, "loss": 11.6693, "step": 43202 }, { "epoch": 0.9043582014569204, "grad_norm": 0.3715645968914032, "learning_rate": 0.0001584459763848433, "loss": 11.6347, "step": 43203 }, { "epoch": 0.9043791342208826, "grad_norm": 0.3125216066837311, "learning_rate": 0.00015844419729986947, "loss": 11.6677, "step": 43204 }, { "epoch": 0.9044000669848447, "grad_norm": 0.3492240011692047, "learning_rate": 0.00015844241818680033, "loss": 11.6688, "step": 43205 }, { "epoch": 0.9044209997488069, "grad_norm": 0.3329169750213623, "learning_rate": 0.00015844063904563676, "loss": 11.6588, "step": 43206 }, { "epoch": 0.9044419325127689, "grad_norm": 0.31067925691604614, "learning_rate": 0.0001584388598763796, "loss": 11.6716, "step": 43207 }, { "epoch": 0.9044628652767311, "grad_norm": 0.32554003596305847, "learning_rate": 0.00015843708067902972, "loss": 11.676, "step": 43208 }, { "epoch": 0.9044837980406933, "grad_norm": 0.30476322770118713, "learning_rate": 0.00015843530145358793, "loss": 11.6495, "step": 43209 }, { "epoch": 0.9045047308046554, "grad_norm": 0.29206737875938416, "learning_rate": 0.00015843352220005515, "loss": 11.667, "step": 43210 }, { "epoch": 0.9045256635686176, "grad_norm": 0.2724830210208893, "learning_rate": 0.00015843174291843218, "loss": 11.6669, "step": 43211 }, { "epoch": 0.9045465963325797, "grad_norm": 0.2862175703048706, "learning_rate": 0.00015842996360871997, "loss": 11.6605, "step": 43212 }, { "epoch": 0.9045675290965419, "grad_norm": 0.2712564170360565, "learning_rate": 0.00015842818427091922, "loss": 11.6781, "step": 43213 }, { "epoch": 0.904588461860504, "grad_norm": 0.34390905499458313, "learning_rate": 0.00015842640490503092, "loss": 11.6651, "step": 43214 }, { "epoch": 0.9046093946244662, "grad_norm": 0.24484457075595856, "learning_rate": 0.00015842462551105585, "loss": 11.6558, "step": 43215 }, { "epoch": 0.9046303273884284, "grad_norm": 0.33161628246307373, "learning_rate": 0.0001584228460889949, "loss": 11.6591, "step": 43216 }, { "epoch": 0.9046512601523905, "grad_norm": 0.33317533135414124, "learning_rate": 0.00015842106663884896, "loss": 11.6754, "step": 43217 }, { "epoch": 0.9046721929163527, "grad_norm": 0.2615850865840912, "learning_rate": 0.00015841928716061875, "loss": 11.6662, "step": 43218 }, { "epoch": 0.9046931256803148, "grad_norm": 0.3191591501235962, "learning_rate": 0.0001584175076543053, "loss": 11.6563, "step": 43219 }, { "epoch": 0.904714058444277, "grad_norm": 0.2953648269176483, "learning_rate": 0.00015841572811990936, "loss": 11.6851, "step": 43220 }, { "epoch": 0.9047349912082391, "grad_norm": 0.4248623847961426, "learning_rate": 0.0001584139485574318, "loss": 11.661, "step": 43221 }, { "epoch": 0.9047559239722013, "grad_norm": 0.2986622154712677, "learning_rate": 0.00015841216896687348, "loss": 11.6754, "step": 43222 }, { "epoch": 0.9047768567361635, "grad_norm": 0.3521212637424469, "learning_rate": 0.00015841038934823526, "loss": 11.6786, "step": 43223 }, { "epoch": 0.9047977895001256, "grad_norm": 0.301300585269928, "learning_rate": 0.000158408609701518, "loss": 11.679, "step": 43224 }, { "epoch": 0.9048187222640878, "grad_norm": 0.9811218976974487, "learning_rate": 0.00015840683002672255, "loss": 11.6634, "step": 43225 }, { "epoch": 0.9048396550280499, "grad_norm": 0.30000361800193787, "learning_rate": 0.0001584050503238498, "loss": 11.6687, "step": 43226 }, { "epoch": 0.9048605877920121, "grad_norm": 0.3599352538585663, "learning_rate": 0.0001584032705929005, "loss": 11.6568, "step": 43227 }, { "epoch": 0.9048815205559743, "grad_norm": 0.3974059224128723, "learning_rate": 0.00015840149083387565, "loss": 11.6631, "step": 43228 }, { "epoch": 0.9049024533199364, "grad_norm": 0.29867711663246155, "learning_rate": 0.000158399711046776, "loss": 11.679, "step": 43229 }, { "epoch": 0.9049233860838986, "grad_norm": 0.302317351102829, "learning_rate": 0.00015839793123160244, "loss": 11.6627, "step": 43230 }, { "epoch": 0.9049443188478606, "grad_norm": 0.3419921100139618, "learning_rate": 0.00015839615138835584, "loss": 11.6791, "step": 43231 }, { "epoch": 0.9049652516118228, "grad_norm": 0.31937941908836365, "learning_rate": 0.000158394371517037, "loss": 11.665, "step": 43232 }, { "epoch": 0.9049861843757849, "grad_norm": 0.3851069509983063, "learning_rate": 0.00015839259161764684, "loss": 11.6673, "step": 43233 }, { "epoch": 0.9050071171397471, "grad_norm": 0.41899481415748596, "learning_rate": 0.0001583908116901862, "loss": 11.6794, "step": 43234 }, { "epoch": 0.9050280499037093, "grad_norm": 0.2805517315864563, "learning_rate": 0.0001583890317346559, "loss": 11.675, "step": 43235 }, { "epoch": 0.9050489826676714, "grad_norm": 0.31045427918434143, "learning_rate": 0.00015838725175105686, "loss": 11.6651, "step": 43236 }, { "epoch": 0.9050699154316336, "grad_norm": 0.4060206711292267, "learning_rate": 0.00015838547173938988, "loss": 11.6541, "step": 43237 }, { "epoch": 0.9050908481955957, "grad_norm": 0.3476192355155945, "learning_rate": 0.00015838369169965584, "loss": 11.6724, "step": 43238 }, { "epoch": 0.9051117809595579, "grad_norm": 0.35261252522468567, "learning_rate": 0.0001583819116318556, "loss": 11.6632, "step": 43239 }, { "epoch": 0.90513271372352, "grad_norm": 0.3931097090244293, "learning_rate": 0.00015838013153598996, "loss": 11.6796, "step": 43240 }, { "epoch": 0.9051536464874822, "grad_norm": 0.33691534399986267, "learning_rate": 0.00015837835141205986, "loss": 11.6767, "step": 43241 }, { "epoch": 0.9051745792514444, "grad_norm": 0.33075058460235596, "learning_rate": 0.00015837657126006608, "loss": 11.6644, "step": 43242 }, { "epoch": 0.9051955120154065, "grad_norm": 0.3578922748565674, "learning_rate": 0.00015837479108000958, "loss": 11.6745, "step": 43243 }, { "epoch": 0.9052164447793687, "grad_norm": 0.290123850107193, "learning_rate": 0.0001583730108718911, "loss": 11.6679, "step": 43244 }, { "epoch": 0.9052373775433308, "grad_norm": 0.33155882358551025, "learning_rate": 0.00015837123063571154, "loss": 11.6809, "step": 43245 }, { "epoch": 0.905258310307293, "grad_norm": 0.3272763192653656, "learning_rate": 0.00015836945037147175, "loss": 11.6686, "step": 43246 }, { "epoch": 0.9052792430712552, "grad_norm": 0.3046555519104004, "learning_rate": 0.00015836767007917262, "loss": 11.6741, "step": 43247 }, { "epoch": 0.9053001758352173, "grad_norm": 0.2525078058242798, "learning_rate": 0.00015836588975881499, "loss": 11.6583, "step": 43248 }, { "epoch": 0.9053211085991795, "grad_norm": 0.33008742332458496, "learning_rate": 0.00015836410941039966, "loss": 11.6572, "step": 43249 }, { "epoch": 0.9053420413631416, "grad_norm": 0.30051156878471375, "learning_rate": 0.00015836232903392757, "loss": 11.6647, "step": 43250 }, { "epoch": 0.9053629741271038, "grad_norm": 0.27003875374794006, "learning_rate": 0.00015836054862939953, "loss": 11.652, "step": 43251 }, { "epoch": 0.9053839068910658, "grad_norm": 0.28806066513061523, "learning_rate": 0.00015835876819681642, "loss": 11.6662, "step": 43252 }, { "epoch": 0.905404839655028, "grad_norm": 0.3437804579734802, "learning_rate": 0.00015835698773617908, "loss": 11.6805, "step": 43253 }, { "epoch": 0.9054257724189902, "grad_norm": 0.3498261570930481, "learning_rate": 0.00015835520724748835, "loss": 11.6857, "step": 43254 }, { "epoch": 0.9054467051829523, "grad_norm": 0.33515429496765137, "learning_rate": 0.0001583534267307451, "loss": 11.6829, "step": 43255 }, { "epoch": 0.9054676379469145, "grad_norm": 0.2915237247943878, "learning_rate": 0.0001583516461859502, "loss": 11.6717, "step": 43256 }, { "epoch": 0.9054885707108766, "grad_norm": 0.43947407603263855, "learning_rate": 0.0001583498656131045, "loss": 11.6694, "step": 43257 }, { "epoch": 0.9055095034748388, "grad_norm": 0.2494341880083084, "learning_rate": 0.00015834808501220883, "loss": 11.673, "step": 43258 }, { "epoch": 0.9055304362388009, "grad_norm": 0.23203887045383453, "learning_rate": 0.00015834630438326408, "loss": 11.695, "step": 43259 }, { "epoch": 0.9055513690027631, "grad_norm": 0.3982263505458832, "learning_rate": 0.00015834452372627108, "loss": 11.6924, "step": 43260 }, { "epoch": 0.9055723017667253, "grad_norm": 0.256156861782074, "learning_rate": 0.0001583427430412307, "loss": 11.6756, "step": 43261 }, { "epoch": 0.9055932345306874, "grad_norm": 0.26181289553642273, "learning_rate": 0.00015834096232814383, "loss": 11.6748, "step": 43262 }, { "epoch": 0.9056141672946496, "grad_norm": 0.33120957016944885, "learning_rate": 0.00015833918158701126, "loss": 11.6694, "step": 43263 }, { "epoch": 0.9056351000586117, "grad_norm": 0.3128099739551544, "learning_rate": 0.00015833740081783385, "loss": 11.6619, "step": 43264 }, { "epoch": 0.9056560328225739, "grad_norm": 0.30009761452674866, "learning_rate": 0.00015833562002061252, "loss": 11.6897, "step": 43265 }, { "epoch": 0.9056769655865361, "grad_norm": 0.3273361027240753, "learning_rate": 0.0001583338391953481, "loss": 11.6952, "step": 43266 }, { "epoch": 0.9056978983504982, "grad_norm": 0.4337814152240753, "learning_rate": 0.0001583320583420414, "loss": 11.6521, "step": 43267 }, { "epoch": 0.9057188311144604, "grad_norm": 0.2583570182323456, "learning_rate": 0.00015833027746069332, "loss": 11.6674, "step": 43268 }, { "epoch": 0.9057397638784225, "grad_norm": 0.35149484872817993, "learning_rate": 0.00015832849655130472, "loss": 11.6586, "step": 43269 }, { "epoch": 0.9057606966423847, "grad_norm": 0.29028961062431335, "learning_rate": 0.00015832671561387645, "loss": 11.6782, "step": 43270 }, { "epoch": 0.9057816294063468, "grad_norm": 0.2917845845222473, "learning_rate": 0.00015832493464840934, "loss": 11.6659, "step": 43271 }, { "epoch": 0.905802562170309, "grad_norm": 0.2814798355102539, "learning_rate": 0.00015832315365490424, "loss": 11.6788, "step": 43272 }, { "epoch": 0.9058234949342712, "grad_norm": 0.31989455223083496, "learning_rate": 0.00015832137263336205, "loss": 11.675, "step": 43273 }, { "epoch": 0.9058444276982333, "grad_norm": 0.3166753649711609, "learning_rate": 0.00015831959158378361, "loss": 11.6701, "step": 43274 }, { "epoch": 0.9058653604621955, "grad_norm": 0.43406492471694946, "learning_rate": 0.0001583178105061698, "loss": 11.6738, "step": 43275 }, { "epoch": 0.9058862932261575, "grad_norm": 0.3081721067428589, "learning_rate": 0.00015831602940052144, "loss": 11.67, "step": 43276 }, { "epoch": 0.9059072259901197, "grad_norm": 0.32962268590927124, "learning_rate": 0.00015831424826683938, "loss": 11.6668, "step": 43277 }, { "epoch": 0.9059281587540818, "grad_norm": 0.30587640404701233, "learning_rate": 0.00015831246710512452, "loss": 11.6574, "step": 43278 }, { "epoch": 0.905949091518044, "grad_norm": 0.2581382393836975, "learning_rate": 0.00015831068591537766, "loss": 11.6745, "step": 43279 }, { "epoch": 0.9059700242820062, "grad_norm": 0.3207783102989197, "learning_rate": 0.00015830890469759967, "loss": 11.6614, "step": 43280 }, { "epoch": 0.9059909570459683, "grad_norm": 0.33561086654663086, "learning_rate": 0.00015830712345179143, "loss": 11.6701, "step": 43281 }, { "epoch": 0.9060118898099305, "grad_norm": 0.4213184714317322, "learning_rate": 0.0001583053421779538, "loss": 11.675, "step": 43282 }, { "epoch": 0.9060328225738926, "grad_norm": 0.29522255063056946, "learning_rate": 0.00015830356087608764, "loss": 11.6713, "step": 43283 }, { "epoch": 0.9060537553378548, "grad_norm": 0.2931918501853943, "learning_rate": 0.00015830177954619375, "loss": 11.6714, "step": 43284 }, { "epoch": 0.906074688101817, "grad_norm": 0.3224424123764038, "learning_rate": 0.00015829999818827307, "loss": 11.6811, "step": 43285 }, { "epoch": 0.9060956208657791, "grad_norm": 0.3209574520587921, "learning_rate": 0.00015829821680232638, "loss": 11.6706, "step": 43286 }, { "epoch": 0.9061165536297413, "grad_norm": 0.2578122913837433, "learning_rate": 0.00015829643538835456, "loss": 11.6719, "step": 43287 }, { "epoch": 0.9061374863937034, "grad_norm": 0.5747664570808411, "learning_rate": 0.00015829465394635853, "loss": 11.6852, "step": 43288 }, { "epoch": 0.9061584191576656, "grad_norm": 0.2739187180995941, "learning_rate": 0.00015829287247633907, "loss": 11.6727, "step": 43289 }, { "epoch": 0.9061793519216277, "grad_norm": 0.29418009519577026, "learning_rate": 0.00015829109097829705, "loss": 11.6781, "step": 43290 }, { "epoch": 0.9062002846855899, "grad_norm": 0.35181158781051636, "learning_rate": 0.0001582893094522333, "loss": 11.6801, "step": 43291 }, { "epoch": 0.9062212174495521, "grad_norm": 0.28383854031562805, "learning_rate": 0.00015828752789814874, "loss": 11.6743, "step": 43292 }, { "epoch": 0.9062421502135142, "grad_norm": 0.294468492269516, "learning_rate": 0.0001582857463160442, "loss": 11.6601, "step": 43293 }, { "epoch": 0.9062630829774764, "grad_norm": 0.2780486047267914, "learning_rate": 0.00015828396470592054, "loss": 11.6712, "step": 43294 }, { "epoch": 0.9062840157414385, "grad_norm": 0.32696211338043213, "learning_rate": 0.0001582821830677786, "loss": 11.6696, "step": 43295 }, { "epoch": 0.9063049485054007, "grad_norm": 0.37064430117607117, "learning_rate": 0.00015828040140161923, "loss": 11.6497, "step": 43296 }, { "epoch": 0.9063258812693628, "grad_norm": 0.3046661913394928, "learning_rate": 0.00015827861970744333, "loss": 11.6725, "step": 43297 }, { "epoch": 0.906346814033325, "grad_norm": 0.3711928129196167, "learning_rate": 0.00015827683798525172, "loss": 11.6529, "step": 43298 }, { "epoch": 0.9063677467972872, "grad_norm": 0.25371742248535156, "learning_rate": 0.00015827505623504525, "loss": 11.6677, "step": 43299 }, { "epoch": 0.9063886795612492, "grad_norm": 0.34403303265571594, "learning_rate": 0.00015827327445682481, "loss": 11.6725, "step": 43300 }, { "epoch": 0.9064096123252114, "grad_norm": 0.3278067708015442, "learning_rate": 0.00015827149265059125, "loss": 11.6691, "step": 43301 }, { "epoch": 0.9064305450891735, "grad_norm": 0.34420496225357056, "learning_rate": 0.00015826971081634543, "loss": 11.6785, "step": 43302 }, { "epoch": 0.9064514778531357, "grad_norm": 0.2854181230068207, "learning_rate": 0.00015826792895408813, "loss": 11.6819, "step": 43303 }, { "epoch": 0.9064724106170979, "grad_norm": 0.30789732933044434, "learning_rate": 0.0001582661470638203, "loss": 11.6524, "step": 43304 }, { "epoch": 0.90649334338106, "grad_norm": 0.3067631423473358, "learning_rate": 0.0001582643651455428, "loss": 11.6598, "step": 43305 }, { "epoch": 0.9065142761450222, "grad_norm": 0.3236839771270752, "learning_rate": 0.0001582625831992564, "loss": 11.6623, "step": 43306 }, { "epoch": 0.9065352089089843, "grad_norm": 0.2674328684806824, "learning_rate": 0.00015826080122496205, "loss": 11.6615, "step": 43307 }, { "epoch": 0.9065561416729465, "grad_norm": 0.4044097065925598, "learning_rate": 0.00015825901922266055, "loss": 11.6795, "step": 43308 }, { "epoch": 0.9065770744369086, "grad_norm": 0.33591189980506897, "learning_rate": 0.00015825723719235275, "loss": 11.6721, "step": 43309 }, { "epoch": 0.9065980072008708, "grad_norm": 0.3181750178337097, "learning_rate": 0.00015825545513403956, "loss": 11.6715, "step": 43310 }, { "epoch": 0.906618939964833, "grad_norm": 0.32129204273223877, "learning_rate": 0.0001582536730477218, "loss": 11.6766, "step": 43311 }, { "epoch": 0.9066398727287951, "grad_norm": 0.25171858072280884, "learning_rate": 0.0001582518909334003, "loss": 11.6607, "step": 43312 }, { "epoch": 0.9066608054927573, "grad_norm": 0.2700372040271759, "learning_rate": 0.00015825010879107597, "loss": 11.6745, "step": 43313 }, { "epoch": 0.9066817382567194, "grad_norm": 0.2725033462047577, "learning_rate": 0.00015824832662074967, "loss": 11.6545, "step": 43314 }, { "epoch": 0.9067026710206816, "grad_norm": 0.32858604192733765, "learning_rate": 0.0001582465444224222, "loss": 11.6588, "step": 43315 }, { "epoch": 0.9067236037846437, "grad_norm": 0.2917746901512146, "learning_rate": 0.00015824476219609445, "loss": 11.6719, "step": 43316 }, { "epoch": 0.9067445365486059, "grad_norm": 0.29941827058792114, "learning_rate": 0.00015824297994176728, "loss": 11.6829, "step": 43317 }, { "epoch": 0.9067654693125681, "grad_norm": 0.3555566072463989, "learning_rate": 0.00015824119765944157, "loss": 11.6654, "step": 43318 }, { "epoch": 0.9067864020765302, "grad_norm": 0.30518826842308044, "learning_rate": 0.00015823941534911812, "loss": 11.6666, "step": 43319 }, { "epoch": 0.9068073348404924, "grad_norm": 0.36415132880210876, "learning_rate": 0.00015823763301079784, "loss": 11.6706, "step": 43320 }, { "epoch": 0.9068282676044545, "grad_norm": 0.38361015915870667, "learning_rate": 0.0001582358506444815, "loss": 11.6666, "step": 43321 }, { "epoch": 0.9068492003684167, "grad_norm": 0.3034123480319977, "learning_rate": 0.0001582340682501701, "loss": 11.6718, "step": 43322 }, { "epoch": 0.9068701331323789, "grad_norm": 0.33226850628852844, "learning_rate": 0.00015823228582786437, "loss": 11.6591, "step": 43323 }, { "epoch": 0.9068910658963409, "grad_norm": 0.33172842860221863, "learning_rate": 0.00015823050337756525, "loss": 11.6825, "step": 43324 }, { "epoch": 0.9069119986603031, "grad_norm": 0.3133459985256195, "learning_rate": 0.00015822872089927352, "loss": 11.6672, "step": 43325 }, { "epoch": 0.9069329314242652, "grad_norm": 0.31664344668388367, "learning_rate": 0.00015822693839299008, "loss": 11.6686, "step": 43326 }, { "epoch": 0.9069538641882274, "grad_norm": 0.40192437171936035, "learning_rate": 0.0001582251558587158, "loss": 11.6596, "step": 43327 }, { "epoch": 0.9069747969521895, "grad_norm": 0.2668447494506836, "learning_rate": 0.00015822337329645148, "loss": 11.6657, "step": 43328 }, { "epoch": 0.9069957297161517, "grad_norm": 0.3006962537765503, "learning_rate": 0.0001582215907061981, "loss": 11.6726, "step": 43329 }, { "epoch": 0.9070166624801139, "grad_norm": 0.3108607232570648, "learning_rate": 0.00015821980808795636, "loss": 11.6762, "step": 43330 }, { "epoch": 0.907037595244076, "grad_norm": 0.34661227464675903, "learning_rate": 0.00015821802544172724, "loss": 11.6599, "step": 43331 }, { "epoch": 0.9070585280080382, "grad_norm": 0.2852190434932709, "learning_rate": 0.00015821624276751153, "loss": 11.6747, "step": 43332 }, { "epoch": 0.9070794607720003, "grad_norm": 0.28678998351097107, "learning_rate": 0.0001582144600653101, "loss": 11.6627, "step": 43333 }, { "epoch": 0.9071003935359625, "grad_norm": 0.3781501352787018, "learning_rate": 0.0001582126773351238, "loss": 11.6859, "step": 43334 }, { "epoch": 0.9071213262999246, "grad_norm": 0.34115853905677795, "learning_rate": 0.0001582108945769535, "loss": 11.6833, "step": 43335 }, { "epoch": 0.9071422590638868, "grad_norm": 0.3031524121761322, "learning_rate": 0.0001582091117908001, "loss": 11.6706, "step": 43336 }, { "epoch": 0.907163191827849, "grad_norm": 0.3108784556388855, "learning_rate": 0.0001582073289766644, "loss": 11.6789, "step": 43337 }, { "epoch": 0.9071841245918111, "grad_norm": 0.38265514373779297, "learning_rate": 0.00015820554613454722, "loss": 11.67, "step": 43338 }, { "epoch": 0.9072050573557733, "grad_norm": 0.3037407696247101, "learning_rate": 0.00015820376326444952, "loss": 11.661, "step": 43339 }, { "epoch": 0.9072259901197354, "grad_norm": 0.35203731060028076, "learning_rate": 0.00015820198036637208, "loss": 11.669, "step": 43340 }, { "epoch": 0.9072469228836976, "grad_norm": 0.28807294368743896, "learning_rate": 0.0001582001974403158, "loss": 11.6721, "step": 43341 }, { "epoch": 0.9072678556476598, "grad_norm": 0.2735525667667389, "learning_rate": 0.0001581984144862815, "loss": 11.6523, "step": 43342 }, { "epoch": 0.9072887884116219, "grad_norm": 0.4214884042739868, "learning_rate": 0.00015819663150427003, "loss": 11.6926, "step": 43343 }, { "epoch": 0.9073097211755841, "grad_norm": 0.4239043891429901, "learning_rate": 0.00015819484849428233, "loss": 11.6584, "step": 43344 }, { "epoch": 0.9073306539395462, "grad_norm": 0.27528154850006104, "learning_rate": 0.00015819306545631918, "loss": 11.6696, "step": 43345 }, { "epoch": 0.9073515867035084, "grad_norm": 0.5263230800628662, "learning_rate": 0.00015819128239038147, "loss": 11.6854, "step": 43346 }, { "epoch": 0.9073725194674704, "grad_norm": 0.29404255747795105, "learning_rate": 0.00015818949929647, "loss": 11.6674, "step": 43347 }, { "epoch": 0.9073934522314326, "grad_norm": 0.311972439289093, "learning_rate": 0.0001581877161745857, "loss": 11.6701, "step": 43348 }, { "epoch": 0.9074143849953948, "grad_norm": 0.39635926485061646, "learning_rate": 0.00015818593302472942, "loss": 11.659, "step": 43349 }, { "epoch": 0.9074353177593569, "grad_norm": 0.28463664650917053, "learning_rate": 0.00015818414984690197, "loss": 11.6739, "step": 43350 }, { "epoch": 0.9074562505233191, "grad_norm": 0.3804028630256653, "learning_rate": 0.00015818236664110423, "loss": 11.6662, "step": 43351 }, { "epoch": 0.9074771832872812, "grad_norm": 0.3130177855491638, "learning_rate": 0.00015818058340733707, "loss": 11.6808, "step": 43352 }, { "epoch": 0.9074981160512434, "grad_norm": 0.3497775197029114, "learning_rate": 0.00015817880014560134, "loss": 11.6755, "step": 43353 }, { "epoch": 0.9075190488152055, "grad_norm": 0.25420743227005005, "learning_rate": 0.0001581770168558979, "loss": 11.6714, "step": 43354 }, { "epoch": 0.9075399815791677, "grad_norm": 0.27871066331863403, "learning_rate": 0.00015817523353822756, "loss": 11.6951, "step": 43355 }, { "epoch": 0.9075609143431299, "grad_norm": 0.3869751989841461, "learning_rate": 0.0001581734501925913, "loss": 11.6661, "step": 43356 }, { "epoch": 0.907581847107092, "grad_norm": 0.29662182927131653, "learning_rate": 0.00015817166681898984, "loss": 11.6608, "step": 43357 }, { "epoch": 0.9076027798710542, "grad_norm": 0.32212623953819275, "learning_rate": 0.0001581698834174241, "loss": 11.6881, "step": 43358 }, { "epoch": 0.9076237126350163, "grad_norm": 0.31817102432250977, "learning_rate": 0.00015816809998789492, "loss": 11.6734, "step": 43359 }, { "epoch": 0.9076446453989785, "grad_norm": 0.2798905074596405, "learning_rate": 0.00015816631653040316, "loss": 11.6682, "step": 43360 }, { "epoch": 0.9076655781629407, "grad_norm": 0.38258177042007446, "learning_rate": 0.00015816453304494973, "loss": 11.6735, "step": 43361 }, { "epoch": 0.9076865109269028, "grad_norm": 0.5350638031959534, "learning_rate": 0.00015816274953153544, "loss": 11.6722, "step": 43362 }, { "epoch": 0.907707443690865, "grad_norm": 0.30116572976112366, "learning_rate": 0.00015816096599016112, "loss": 11.6655, "step": 43363 }, { "epoch": 0.9077283764548271, "grad_norm": 0.36964938044548035, "learning_rate": 0.00015815918242082769, "loss": 11.6726, "step": 43364 }, { "epoch": 0.9077493092187893, "grad_norm": 0.24360577762126923, "learning_rate": 0.00015815739882353593, "loss": 11.6546, "step": 43365 }, { "epoch": 0.9077702419827514, "grad_norm": 0.38252145051956177, "learning_rate": 0.00015815561519828676, "loss": 11.6585, "step": 43366 }, { "epoch": 0.9077911747467136, "grad_norm": 0.26452791690826416, "learning_rate": 0.00015815383154508104, "loss": 11.6605, "step": 43367 }, { "epoch": 0.9078121075106758, "grad_norm": 0.2778446674346924, "learning_rate": 0.00015815204786391961, "loss": 11.6858, "step": 43368 }, { "epoch": 0.9078330402746378, "grad_norm": 0.3160082995891571, "learning_rate": 0.0001581502641548033, "loss": 11.6935, "step": 43369 }, { "epoch": 0.9078539730386, "grad_norm": 0.32161402702331543, "learning_rate": 0.000158148480417733, "loss": 11.6497, "step": 43370 }, { "epoch": 0.9078749058025621, "grad_norm": 0.2886876165866852, "learning_rate": 0.00015814669665270955, "loss": 11.6768, "step": 43371 }, { "epoch": 0.9078958385665243, "grad_norm": 0.27196818590164185, "learning_rate": 0.00015814491285973386, "loss": 11.6758, "step": 43372 }, { "epoch": 0.9079167713304864, "grad_norm": 0.2807685136795044, "learning_rate": 0.00015814312903880673, "loss": 11.6702, "step": 43373 }, { "epoch": 0.9079377040944486, "grad_norm": 0.23782917857170105, "learning_rate": 0.000158141345189929, "loss": 11.6667, "step": 43374 }, { "epoch": 0.9079586368584108, "grad_norm": 0.4881154000759125, "learning_rate": 0.0001581395613131016, "loss": 11.6539, "step": 43375 }, { "epoch": 0.9079795696223729, "grad_norm": 0.31031540036201477, "learning_rate": 0.0001581377774083253, "loss": 11.6788, "step": 43376 }, { "epoch": 0.9080005023863351, "grad_norm": 0.3014286160469055, "learning_rate": 0.00015813599347560105, "loss": 11.6755, "step": 43377 }, { "epoch": 0.9080214351502972, "grad_norm": 0.3073737621307373, "learning_rate": 0.00015813420951492964, "loss": 11.6468, "step": 43378 }, { "epoch": 0.9080423679142594, "grad_norm": 0.33193692564964294, "learning_rate": 0.00015813242552631195, "loss": 11.6566, "step": 43379 }, { "epoch": 0.9080633006782216, "grad_norm": 0.30204424262046814, "learning_rate": 0.00015813064150974887, "loss": 11.654, "step": 43380 }, { "epoch": 0.9080842334421837, "grad_norm": 0.29326820373535156, "learning_rate": 0.00015812885746524117, "loss": 11.6675, "step": 43381 }, { "epoch": 0.9081051662061459, "grad_norm": 0.29173916578292847, "learning_rate": 0.0001581270733927898, "loss": 11.6726, "step": 43382 }, { "epoch": 0.908126098970108, "grad_norm": 0.2688264846801758, "learning_rate": 0.00015812528929239558, "loss": 11.6609, "step": 43383 }, { "epoch": 0.9081470317340702, "grad_norm": 0.3434450030326843, "learning_rate": 0.00015812350516405935, "loss": 11.6619, "step": 43384 }, { "epoch": 0.9081679644980323, "grad_norm": 0.29297882318496704, "learning_rate": 0.00015812172100778202, "loss": 11.6779, "step": 43385 }, { "epoch": 0.9081888972619945, "grad_norm": 0.26433083415031433, "learning_rate": 0.0001581199368235644, "loss": 11.6864, "step": 43386 }, { "epoch": 0.9082098300259567, "grad_norm": 0.308651864528656, "learning_rate": 0.00015811815261140735, "loss": 11.6592, "step": 43387 }, { "epoch": 0.9082307627899188, "grad_norm": 0.35074904561042786, "learning_rate": 0.00015811636837131173, "loss": 11.6746, "step": 43388 }, { "epoch": 0.908251695553881, "grad_norm": 0.34033480286598206, "learning_rate": 0.00015811458410327844, "loss": 11.6709, "step": 43389 }, { "epoch": 0.9082726283178431, "grad_norm": 0.3421621322631836, "learning_rate": 0.0001581127998073083, "loss": 11.6697, "step": 43390 }, { "epoch": 0.9082935610818053, "grad_norm": 0.30601873993873596, "learning_rate": 0.00015811101548340214, "loss": 11.6603, "step": 43391 }, { "epoch": 0.9083144938457673, "grad_norm": 0.33121395111083984, "learning_rate": 0.00015810923113156086, "loss": 11.6609, "step": 43392 }, { "epoch": 0.9083354266097295, "grad_norm": 0.34294578433036804, "learning_rate": 0.00015810744675178536, "loss": 11.6819, "step": 43393 }, { "epoch": 0.9083563593736917, "grad_norm": 0.3128425180912018, "learning_rate": 0.0001581056623440764, "loss": 11.6633, "step": 43394 }, { "epoch": 0.9083772921376538, "grad_norm": 0.33531221747398376, "learning_rate": 0.00015810387790843487, "loss": 11.6705, "step": 43395 }, { "epoch": 0.908398224901616, "grad_norm": 0.3142588436603546, "learning_rate": 0.00015810209344486166, "loss": 11.6851, "step": 43396 }, { "epoch": 0.9084191576655781, "grad_norm": 0.30771517753601074, "learning_rate": 0.0001581003089533576, "loss": 11.6617, "step": 43397 }, { "epoch": 0.9084400904295403, "grad_norm": 0.3370717763900757, "learning_rate": 0.00015809852443392358, "loss": 11.6648, "step": 43398 }, { "epoch": 0.9084610231935024, "grad_norm": 0.27300193905830383, "learning_rate": 0.0001580967398865604, "loss": 11.6647, "step": 43399 }, { "epoch": 0.9084819559574646, "grad_norm": 0.32323169708251953, "learning_rate": 0.00015809495531126898, "loss": 11.6747, "step": 43400 }, { "epoch": 0.9085028887214268, "grad_norm": 0.2783326804637909, "learning_rate": 0.00015809317070805015, "loss": 11.6722, "step": 43401 }, { "epoch": 0.9085238214853889, "grad_norm": 0.3145958185195923, "learning_rate": 0.00015809138607690475, "loss": 11.6656, "step": 43402 }, { "epoch": 0.9085447542493511, "grad_norm": 0.27358153462409973, "learning_rate": 0.00015808960141783368, "loss": 11.6689, "step": 43403 }, { "epoch": 0.9085656870133132, "grad_norm": 0.2614051103591919, "learning_rate": 0.00015808781673083775, "loss": 11.6832, "step": 43404 }, { "epoch": 0.9085866197772754, "grad_norm": 0.2834535539150238, "learning_rate": 0.00015808603201591787, "loss": 11.6706, "step": 43405 }, { "epoch": 0.9086075525412376, "grad_norm": 0.3063521385192871, "learning_rate": 0.00015808424727307486, "loss": 11.6664, "step": 43406 }, { "epoch": 0.9086284853051997, "grad_norm": 0.3006851375102997, "learning_rate": 0.00015808246250230957, "loss": 11.6673, "step": 43407 }, { "epoch": 0.9086494180691619, "grad_norm": 0.2813118100166321, "learning_rate": 0.00015808067770362288, "loss": 11.6698, "step": 43408 }, { "epoch": 0.908670350833124, "grad_norm": 0.24491403996944427, "learning_rate": 0.00015807889287701565, "loss": 11.6572, "step": 43409 }, { "epoch": 0.9086912835970862, "grad_norm": 0.28258216381073, "learning_rate": 0.00015807710802248875, "loss": 11.669, "step": 43410 }, { "epoch": 0.9087122163610483, "grad_norm": 0.36551880836486816, "learning_rate": 0.000158075323140043, "loss": 11.677, "step": 43411 }, { "epoch": 0.9087331491250105, "grad_norm": 0.33534500002861023, "learning_rate": 0.0001580735382296793, "loss": 11.6714, "step": 43412 }, { "epoch": 0.9087540818889727, "grad_norm": 0.29795774817466736, "learning_rate": 0.00015807175329139847, "loss": 11.6741, "step": 43413 }, { "epoch": 0.9087750146529348, "grad_norm": 0.3168084919452667, "learning_rate": 0.00015806996832520137, "loss": 11.6682, "step": 43414 }, { "epoch": 0.908795947416897, "grad_norm": 0.2687673270702362, "learning_rate": 0.0001580681833310889, "loss": 11.6723, "step": 43415 }, { "epoch": 0.908816880180859, "grad_norm": 0.3461684286594391, "learning_rate": 0.00015806639830906187, "loss": 11.6699, "step": 43416 }, { "epoch": 0.9088378129448212, "grad_norm": 0.248471200466156, "learning_rate": 0.0001580646132591212, "loss": 11.6734, "step": 43417 }, { "epoch": 0.9088587457087833, "grad_norm": 0.2596660256385803, "learning_rate": 0.00015806282818126764, "loss": 11.6733, "step": 43418 }, { "epoch": 0.9088796784727455, "grad_norm": 0.3076117932796478, "learning_rate": 0.00015806104307550215, "loss": 11.6677, "step": 43419 }, { "epoch": 0.9089006112367077, "grad_norm": 0.2739178538322449, "learning_rate": 0.00015805925794182559, "loss": 11.6514, "step": 43420 }, { "epoch": 0.9089215440006698, "grad_norm": 0.288624107837677, "learning_rate": 0.00015805747278023873, "loss": 11.6678, "step": 43421 }, { "epoch": 0.908942476764632, "grad_norm": 0.2395784556865692, "learning_rate": 0.0001580556875907425, "loss": 11.6544, "step": 43422 }, { "epoch": 0.9089634095285941, "grad_norm": 0.29667502641677856, "learning_rate": 0.0001580539023733377, "loss": 11.6769, "step": 43423 }, { "epoch": 0.9089843422925563, "grad_norm": 0.2878524661064148, "learning_rate": 0.00015805211712802527, "loss": 11.6644, "step": 43424 }, { "epoch": 0.9090052750565185, "grad_norm": 0.30694782733917236, "learning_rate": 0.00015805033185480602, "loss": 11.6898, "step": 43425 }, { "epoch": 0.9090262078204806, "grad_norm": 0.29674020409584045, "learning_rate": 0.0001580485465536808, "loss": 11.6675, "step": 43426 }, { "epoch": 0.9090471405844428, "grad_norm": 0.3304233253002167, "learning_rate": 0.00015804676122465046, "loss": 11.6857, "step": 43427 }, { "epoch": 0.9090680733484049, "grad_norm": 0.3083244562149048, "learning_rate": 0.0001580449758677159, "loss": 11.6515, "step": 43428 }, { "epoch": 0.9090890061123671, "grad_norm": 0.254072904586792, "learning_rate": 0.000158043190482878, "loss": 11.6638, "step": 43429 }, { "epoch": 0.9091099388763292, "grad_norm": 0.4158591330051422, "learning_rate": 0.00015804140507013752, "loss": 11.6666, "step": 43430 }, { "epoch": 0.9091308716402914, "grad_norm": 0.41005581617355347, "learning_rate": 0.00015803961962949537, "loss": 11.6673, "step": 43431 }, { "epoch": 0.9091518044042536, "grad_norm": 0.3397809863090515, "learning_rate": 0.00015803783416095245, "loss": 11.6671, "step": 43432 }, { "epoch": 0.9091727371682157, "grad_norm": 0.32799822092056274, "learning_rate": 0.00015803604866450956, "loss": 11.6784, "step": 43433 }, { "epoch": 0.9091936699321779, "grad_norm": 0.35753750801086426, "learning_rate": 0.00015803426314016757, "loss": 11.6769, "step": 43434 }, { "epoch": 0.90921460269614, "grad_norm": 0.30037614703178406, "learning_rate": 0.00015803247758792733, "loss": 11.6611, "step": 43435 }, { "epoch": 0.9092355354601022, "grad_norm": 0.3159421682357788, "learning_rate": 0.00015803069200778978, "loss": 11.6765, "step": 43436 }, { "epoch": 0.9092564682240643, "grad_norm": 0.2716262638568878, "learning_rate": 0.00015802890639975568, "loss": 11.6752, "step": 43437 }, { "epoch": 0.9092774009880265, "grad_norm": 0.274844229221344, "learning_rate": 0.0001580271207638259, "loss": 11.6744, "step": 43438 }, { "epoch": 0.9092983337519887, "grad_norm": 0.3404693603515625, "learning_rate": 0.00015802533510000133, "loss": 11.6863, "step": 43439 }, { "epoch": 0.9093192665159507, "grad_norm": 0.2642669379711151, "learning_rate": 0.0001580235494082828, "loss": 11.6538, "step": 43440 }, { "epoch": 0.9093401992799129, "grad_norm": 0.3375180661678314, "learning_rate": 0.00015802176368867122, "loss": 11.6794, "step": 43441 }, { "epoch": 0.909361132043875, "grad_norm": 0.3042330741882324, "learning_rate": 0.00015801997794116744, "loss": 11.6551, "step": 43442 }, { "epoch": 0.9093820648078372, "grad_norm": 0.3406248390674591, "learning_rate": 0.00015801819216577223, "loss": 11.67, "step": 43443 }, { "epoch": 0.9094029975717994, "grad_norm": 0.3641761839389801, "learning_rate": 0.00015801640636248655, "loss": 11.6631, "step": 43444 }, { "epoch": 0.9094239303357615, "grad_norm": 0.2851963937282562, "learning_rate": 0.0001580146205313112, "loss": 11.6848, "step": 43445 }, { "epoch": 0.9094448630997237, "grad_norm": 0.3167511224746704, "learning_rate": 0.0001580128346722471, "loss": 11.6679, "step": 43446 }, { "epoch": 0.9094657958636858, "grad_norm": 0.2885151207447052, "learning_rate": 0.000158011048785295, "loss": 11.676, "step": 43447 }, { "epoch": 0.909486728627648, "grad_norm": 0.3711124360561371, "learning_rate": 0.00015800926287045587, "loss": 11.6671, "step": 43448 }, { "epoch": 0.9095076613916101, "grad_norm": 0.3142905533313751, "learning_rate": 0.00015800747692773054, "loss": 11.6563, "step": 43449 }, { "epoch": 0.9095285941555723, "grad_norm": 0.29392269253730774, "learning_rate": 0.00015800569095711982, "loss": 11.6965, "step": 43450 }, { "epoch": 0.9095495269195345, "grad_norm": 0.31594139337539673, "learning_rate": 0.00015800390495862462, "loss": 11.6661, "step": 43451 }, { "epoch": 0.9095704596834966, "grad_norm": 0.26045140624046326, "learning_rate": 0.00015800211893224578, "loss": 11.6553, "step": 43452 }, { "epoch": 0.9095913924474588, "grad_norm": 0.3356103301048279, "learning_rate": 0.00015800033287798415, "loss": 11.6689, "step": 43453 }, { "epoch": 0.9096123252114209, "grad_norm": 0.31589871644973755, "learning_rate": 0.0001579985467958406, "loss": 11.6653, "step": 43454 }, { "epoch": 0.9096332579753831, "grad_norm": 0.2859342694282532, "learning_rate": 0.000157996760685816, "loss": 11.6753, "step": 43455 }, { "epoch": 0.9096541907393452, "grad_norm": 0.28346025943756104, "learning_rate": 0.00015799497454791118, "loss": 11.6549, "step": 43456 }, { "epoch": 0.9096751235033074, "grad_norm": 0.2915479838848114, "learning_rate": 0.00015799318838212703, "loss": 11.6637, "step": 43457 }, { "epoch": 0.9096960562672696, "grad_norm": 0.2799774706363678, "learning_rate": 0.00015799140218846435, "loss": 11.6724, "step": 43458 }, { "epoch": 0.9097169890312317, "grad_norm": 0.2596319019794464, "learning_rate": 0.00015798961596692407, "loss": 11.674, "step": 43459 }, { "epoch": 0.9097379217951939, "grad_norm": 0.3745073676109314, "learning_rate": 0.00015798782971750703, "loss": 11.6618, "step": 43460 }, { "epoch": 0.909758854559156, "grad_norm": 0.429577112197876, "learning_rate": 0.0001579860434402141, "loss": 11.6687, "step": 43461 }, { "epoch": 0.9097797873231181, "grad_norm": 0.36463090777397156, "learning_rate": 0.00015798425713504604, "loss": 11.67, "step": 43462 }, { "epoch": 0.9098007200870804, "grad_norm": 0.322401225566864, "learning_rate": 0.00015798247080200386, "loss": 11.6818, "step": 43463 }, { "epoch": 0.9098216528510424, "grad_norm": 0.28372740745544434, "learning_rate": 0.0001579806844410883, "loss": 11.6569, "step": 43464 }, { "epoch": 0.9098425856150046, "grad_norm": 0.31947222352027893, "learning_rate": 0.00015797889805230027, "loss": 11.6639, "step": 43465 }, { "epoch": 0.9098635183789667, "grad_norm": 0.3655543327331543, "learning_rate": 0.00015797711163564063, "loss": 11.6689, "step": 43466 }, { "epoch": 0.9098844511429289, "grad_norm": 0.26401573419570923, "learning_rate": 0.00015797532519111024, "loss": 11.6754, "step": 43467 }, { "epoch": 0.909905383906891, "grad_norm": 0.2839658260345459, "learning_rate": 0.00015797353871870997, "loss": 11.6746, "step": 43468 }, { "epoch": 0.9099263166708532, "grad_norm": 0.2799621820449829, "learning_rate": 0.0001579717522184406, "loss": 11.6684, "step": 43469 }, { "epoch": 0.9099472494348154, "grad_norm": 0.29157647490501404, "learning_rate": 0.00015796996569030308, "loss": 11.6725, "step": 43470 }, { "epoch": 0.9099681821987775, "grad_norm": 0.331510990858078, "learning_rate": 0.00015796817913429822, "loss": 11.676, "step": 43471 }, { "epoch": 0.9099891149627397, "grad_norm": 0.3093520998954773, "learning_rate": 0.00015796639255042692, "loss": 11.6665, "step": 43472 }, { "epoch": 0.9100100477267018, "grad_norm": 0.2846301198005676, "learning_rate": 0.00015796460593869, "loss": 11.6667, "step": 43473 }, { "epoch": 0.910030980490664, "grad_norm": 0.2699134647846222, "learning_rate": 0.00015796281929908833, "loss": 11.6805, "step": 43474 }, { "epoch": 0.9100519132546261, "grad_norm": 0.3107813894748688, "learning_rate": 0.00015796103263162277, "loss": 11.6794, "step": 43475 }, { "epoch": 0.9100728460185883, "grad_norm": 0.39822667837142944, "learning_rate": 0.0001579592459362942, "loss": 11.6804, "step": 43476 }, { "epoch": 0.9100937787825505, "grad_norm": 0.2598588466644287, "learning_rate": 0.0001579574592131034, "loss": 11.661, "step": 43477 }, { "epoch": 0.9101147115465126, "grad_norm": 0.3385702073574066, "learning_rate": 0.00015795567246205136, "loss": 11.6692, "step": 43478 }, { "epoch": 0.9101356443104748, "grad_norm": 0.26953014731407166, "learning_rate": 0.00015795388568313882, "loss": 11.6766, "step": 43479 }, { "epoch": 0.9101565770744369, "grad_norm": 0.2600756287574768, "learning_rate": 0.0001579520988763667, "loss": 11.666, "step": 43480 }, { "epoch": 0.9101775098383991, "grad_norm": 0.2915040850639343, "learning_rate": 0.00015795031204173584, "loss": 11.6456, "step": 43481 }, { "epoch": 0.9101984426023613, "grad_norm": 0.31310808658599854, "learning_rate": 0.0001579485251792471, "loss": 11.6648, "step": 43482 }, { "epoch": 0.9102193753663234, "grad_norm": 0.3158806264400482, "learning_rate": 0.00015794673828890138, "loss": 11.6608, "step": 43483 }, { "epoch": 0.9102403081302856, "grad_norm": 0.31180518865585327, "learning_rate": 0.00015794495137069946, "loss": 11.6835, "step": 43484 }, { "epoch": 0.9102612408942476, "grad_norm": 0.38902488350868225, "learning_rate": 0.00015794316442464227, "loss": 11.6683, "step": 43485 }, { "epoch": 0.9102821736582098, "grad_norm": 0.3807738721370697, "learning_rate": 0.00015794137745073062, "loss": 11.6766, "step": 43486 }, { "epoch": 0.9103031064221719, "grad_norm": 0.2528848946094513, "learning_rate": 0.0001579395904489654, "loss": 11.6769, "step": 43487 }, { "epoch": 0.9103240391861341, "grad_norm": 0.2788781225681305, "learning_rate": 0.00015793780341934746, "loss": 11.6645, "step": 43488 }, { "epoch": 0.9103449719500963, "grad_norm": 0.3313087522983551, "learning_rate": 0.00015793601636187763, "loss": 11.6715, "step": 43489 }, { "epoch": 0.9103659047140584, "grad_norm": 0.37145400047302246, "learning_rate": 0.00015793422927655684, "loss": 11.6804, "step": 43490 }, { "epoch": 0.9103868374780206, "grad_norm": 0.28304168581962585, "learning_rate": 0.00015793244216338588, "loss": 11.6756, "step": 43491 }, { "epoch": 0.9104077702419827, "grad_norm": 0.3002341091632843, "learning_rate": 0.00015793065502236559, "loss": 11.6663, "step": 43492 }, { "epoch": 0.9104287030059449, "grad_norm": 0.38012588024139404, "learning_rate": 0.0001579288678534969, "loss": 11.6767, "step": 43493 }, { "epoch": 0.910449635769907, "grad_norm": 0.2488023191690445, "learning_rate": 0.00015792708065678071, "loss": 11.6694, "step": 43494 }, { "epoch": 0.9104705685338692, "grad_norm": 0.3088107109069824, "learning_rate": 0.00015792529343221774, "loss": 11.658, "step": 43495 }, { "epoch": 0.9104915012978314, "grad_norm": 0.2883904278278351, "learning_rate": 0.00015792350617980895, "loss": 11.6753, "step": 43496 }, { "epoch": 0.9105124340617935, "grad_norm": 0.31210505962371826, "learning_rate": 0.00015792171889955517, "loss": 11.6766, "step": 43497 }, { "epoch": 0.9105333668257557, "grad_norm": 0.33392706513404846, "learning_rate": 0.00015791993159145723, "loss": 11.6735, "step": 43498 }, { "epoch": 0.9105542995897178, "grad_norm": 0.3520670235157013, "learning_rate": 0.00015791814425551604, "loss": 11.6678, "step": 43499 }, { "epoch": 0.91057523235368, "grad_norm": 0.2701626121997833, "learning_rate": 0.00015791635689173242, "loss": 11.6608, "step": 43500 }, { "epoch": 0.9105961651176422, "grad_norm": 0.3578745722770691, "learning_rate": 0.00015791456950010728, "loss": 11.6721, "step": 43501 }, { "epoch": 0.9106170978816043, "grad_norm": 0.34890779852867126, "learning_rate": 0.00015791278208064142, "loss": 11.6772, "step": 43502 }, { "epoch": 0.9106380306455665, "grad_norm": 0.3113819658756256, "learning_rate": 0.00015791099463333575, "loss": 11.6562, "step": 43503 }, { "epoch": 0.9106589634095286, "grad_norm": 0.3651391863822937, "learning_rate": 0.00015790920715819107, "loss": 11.671, "step": 43504 }, { "epoch": 0.9106798961734908, "grad_norm": 0.29823675751686096, "learning_rate": 0.0001579074196552083, "loss": 11.6661, "step": 43505 }, { "epoch": 0.9107008289374529, "grad_norm": 0.3313584327697754, "learning_rate": 0.00015790563212438825, "loss": 11.6521, "step": 43506 }, { "epoch": 0.9107217617014151, "grad_norm": 0.32214096188545227, "learning_rate": 0.00015790384456573179, "loss": 11.6551, "step": 43507 }, { "epoch": 0.9107426944653773, "grad_norm": 0.293179988861084, "learning_rate": 0.00015790205697923984, "loss": 11.6613, "step": 43508 }, { "epoch": 0.9107636272293393, "grad_norm": 0.3021405339241028, "learning_rate": 0.0001579002693649132, "loss": 11.7018, "step": 43509 }, { "epoch": 0.9107845599933015, "grad_norm": 0.36115729808807373, "learning_rate": 0.0001578984817227527, "loss": 11.6565, "step": 43510 }, { "epoch": 0.9108054927572636, "grad_norm": 0.30559366941452026, "learning_rate": 0.00015789669405275928, "loss": 11.6802, "step": 43511 }, { "epoch": 0.9108264255212258, "grad_norm": 0.3776233196258545, "learning_rate": 0.00015789490635493376, "loss": 11.6815, "step": 43512 }, { "epoch": 0.9108473582851879, "grad_norm": 0.29157769680023193, "learning_rate": 0.00015789311862927696, "loss": 11.6705, "step": 43513 }, { "epoch": 0.9108682910491501, "grad_norm": 0.22795702517032623, "learning_rate": 0.0001578913308757898, "loss": 11.6594, "step": 43514 }, { "epoch": 0.9108892238131123, "grad_norm": 0.2749611437320709, "learning_rate": 0.00015788954309447313, "loss": 11.6602, "step": 43515 }, { "epoch": 0.9109101565770744, "grad_norm": 0.2816835045814514, "learning_rate": 0.00015788775528532777, "loss": 11.6727, "step": 43516 }, { "epoch": 0.9109310893410366, "grad_norm": 0.4098558723926544, "learning_rate": 0.00015788596744835463, "loss": 11.6556, "step": 43517 }, { "epoch": 0.9109520221049987, "grad_norm": 0.3302668333053589, "learning_rate": 0.00015788417958355455, "loss": 11.678, "step": 43518 }, { "epoch": 0.9109729548689609, "grad_norm": 0.3470490276813507, "learning_rate": 0.00015788239169092837, "loss": 11.6891, "step": 43519 }, { "epoch": 0.9109938876329231, "grad_norm": 0.2859945595264435, "learning_rate": 0.00015788060377047697, "loss": 11.6591, "step": 43520 }, { "epoch": 0.9110148203968852, "grad_norm": 0.31240367889404297, "learning_rate": 0.00015787881582220118, "loss": 11.6608, "step": 43521 }, { "epoch": 0.9110357531608474, "grad_norm": 0.27591148018836975, "learning_rate": 0.00015787702784610192, "loss": 11.6693, "step": 43522 }, { "epoch": 0.9110566859248095, "grad_norm": 0.3881208598613739, "learning_rate": 0.00015787523984217997, "loss": 11.6629, "step": 43523 }, { "epoch": 0.9110776186887717, "grad_norm": 0.31980568170547485, "learning_rate": 0.00015787345181043628, "loss": 11.6704, "step": 43524 }, { "epoch": 0.9110985514527338, "grad_norm": 0.2918780446052551, "learning_rate": 0.00015787166375087167, "loss": 11.6754, "step": 43525 }, { "epoch": 0.911119484216696, "grad_norm": 0.28253039717674255, "learning_rate": 0.00015786987566348691, "loss": 11.6751, "step": 43526 }, { "epoch": 0.9111404169806582, "grad_norm": 0.4045335352420807, "learning_rate": 0.00015786808754828302, "loss": 11.6698, "step": 43527 }, { "epoch": 0.9111613497446203, "grad_norm": 0.3293638229370117, "learning_rate": 0.00015786629940526075, "loss": 11.6709, "step": 43528 }, { "epoch": 0.9111822825085825, "grad_norm": 0.3501231074333191, "learning_rate": 0.000157864511234421, "loss": 11.6648, "step": 43529 }, { "epoch": 0.9112032152725446, "grad_norm": 0.3601694703102112, "learning_rate": 0.0001578627230357646, "loss": 11.676, "step": 43530 }, { "epoch": 0.9112241480365068, "grad_norm": 0.3690967857837677, "learning_rate": 0.00015786093480929246, "loss": 11.6608, "step": 43531 }, { "epoch": 0.9112450808004688, "grad_norm": 0.2695414125919342, "learning_rate": 0.0001578591465550054, "loss": 11.6643, "step": 43532 }, { "epoch": 0.911266013564431, "grad_norm": 0.3724025785923004, "learning_rate": 0.00015785735827290425, "loss": 11.6828, "step": 43533 }, { "epoch": 0.9112869463283932, "grad_norm": 0.26832813024520874, "learning_rate": 0.00015785556996298996, "loss": 11.6472, "step": 43534 }, { "epoch": 0.9113078790923553, "grad_norm": 0.32607942819595337, "learning_rate": 0.0001578537816252633, "loss": 11.6819, "step": 43535 }, { "epoch": 0.9113288118563175, "grad_norm": 0.2591119408607483, "learning_rate": 0.0001578519932597252, "loss": 11.6689, "step": 43536 }, { "epoch": 0.9113497446202796, "grad_norm": 0.3316587507724762, "learning_rate": 0.00015785020486637648, "loss": 11.6809, "step": 43537 }, { "epoch": 0.9113706773842418, "grad_norm": 0.318684458732605, "learning_rate": 0.000157848416445218, "loss": 11.6689, "step": 43538 }, { "epoch": 0.911391610148204, "grad_norm": 0.27714696526527405, "learning_rate": 0.00015784662799625063, "loss": 11.6563, "step": 43539 }, { "epoch": 0.9114125429121661, "grad_norm": 0.27088475227355957, "learning_rate": 0.0001578448395194752, "loss": 11.6493, "step": 43540 }, { "epoch": 0.9114334756761283, "grad_norm": 0.3666572868824005, "learning_rate": 0.00015784305101489263, "loss": 11.6748, "step": 43541 }, { "epoch": 0.9114544084400904, "grad_norm": 0.31757938861846924, "learning_rate": 0.00015784126248250373, "loss": 11.66, "step": 43542 }, { "epoch": 0.9114753412040526, "grad_norm": 0.251783549785614, "learning_rate": 0.0001578394739223094, "loss": 11.6672, "step": 43543 }, { "epoch": 0.9114962739680147, "grad_norm": 0.34520143270492554, "learning_rate": 0.00015783768533431046, "loss": 11.6702, "step": 43544 }, { "epoch": 0.9115172067319769, "grad_norm": 0.3297836184501648, "learning_rate": 0.00015783589671850777, "loss": 11.6499, "step": 43545 }, { "epoch": 0.9115381394959391, "grad_norm": 0.32382893562316895, "learning_rate": 0.00015783410807490222, "loss": 11.6474, "step": 43546 }, { "epoch": 0.9115590722599012, "grad_norm": 0.28246766328811646, "learning_rate": 0.00015783231940349465, "loss": 11.6746, "step": 43547 }, { "epoch": 0.9115800050238634, "grad_norm": 0.37368571758270264, "learning_rate": 0.0001578305307042859, "loss": 11.6603, "step": 43548 }, { "epoch": 0.9116009377878255, "grad_norm": 0.2553551495075226, "learning_rate": 0.0001578287419772769, "loss": 11.6575, "step": 43549 }, { "epoch": 0.9116218705517877, "grad_norm": 0.34904977679252625, "learning_rate": 0.00015782695322246845, "loss": 11.6813, "step": 43550 }, { "epoch": 0.9116428033157498, "grad_norm": 0.2652440667152405, "learning_rate": 0.00015782516443986141, "loss": 11.6592, "step": 43551 }, { "epoch": 0.911663736079712, "grad_norm": 0.3989601731300354, "learning_rate": 0.0001578233756294567, "loss": 11.6718, "step": 43552 }, { "epoch": 0.9116846688436742, "grad_norm": 0.3236140012741089, "learning_rate": 0.0001578215867912551, "loss": 11.6803, "step": 43553 }, { "epoch": 0.9117056016076363, "grad_norm": 0.2920709550380707, "learning_rate": 0.00015781979792525748, "loss": 11.6705, "step": 43554 }, { "epoch": 0.9117265343715985, "grad_norm": 0.3031892776489258, "learning_rate": 0.00015781800903146476, "loss": 11.6507, "step": 43555 }, { "epoch": 0.9117474671355605, "grad_norm": 0.3704359531402588, "learning_rate": 0.00015781622010987775, "loss": 11.675, "step": 43556 }, { "epoch": 0.9117683998995227, "grad_norm": 0.3929123878479004, "learning_rate": 0.00015781443116049735, "loss": 11.6822, "step": 43557 }, { "epoch": 0.9117893326634849, "grad_norm": 0.34495311975479126, "learning_rate": 0.00015781264218332435, "loss": 11.6776, "step": 43558 }, { "epoch": 0.911810265427447, "grad_norm": 0.27471932768821716, "learning_rate": 0.0001578108531783597, "loss": 11.668, "step": 43559 }, { "epoch": 0.9118311981914092, "grad_norm": 0.3032268285751343, "learning_rate": 0.00015780906414560419, "loss": 11.6492, "step": 43560 }, { "epoch": 0.9118521309553713, "grad_norm": 0.3560693562030792, "learning_rate": 0.00015780727508505872, "loss": 11.6705, "step": 43561 }, { "epoch": 0.9118730637193335, "grad_norm": 0.21630822122097015, "learning_rate": 0.0001578054859967241, "loss": 11.6508, "step": 43562 }, { "epoch": 0.9118939964832956, "grad_norm": 0.2848592698574066, "learning_rate": 0.00015780369688060125, "loss": 11.6575, "step": 43563 }, { "epoch": 0.9119149292472578, "grad_norm": 0.2684900462627411, "learning_rate": 0.00015780190773669102, "loss": 11.6698, "step": 43564 }, { "epoch": 0.91193586201122, "grad_norm": 0.32149937748908997, "learning_rate": 0.0001578001185649942, "loss": 11.6699, "step": 43565 }, { "epoch": 0.9119567947751821, "grad_norm": 0.36180657148361206, "learning_rate": 0.0001577983293655118, "loss": 11.6549, "step": 43566 }, { "epoch": 0.9119777275391443, "grad_norm": 0.31212669610977173, "learning_rate": 0.0001577965401382445, "loss": 11.6655, "step": 43567 }, { "epoch": 0.9119986603031064, "grad_norm": 0.4423016905784607, "learning_rate": 0.00015779475088319327, "loss": 11.6608, "step": 43568 }, { "epoch": 0.9120195930670686, "grad_norm": 0.350384920835495, "learning_rate": 0.00015779296160035893, "loss": 11.6636, "step": 43569 }, { "epoch": 0.9120405258310307, "grad_norm": 0.28050124645233154, "learning_rate": 0.00015779117228974235, "loss": 11.6623, "step": 43570 }, { "epoch": 0.9120614585949929, "grad_norm": 0.2810541093349457, "learning_rate": 0.00015778938295134444, "loss": 11.6749, "step": 43571 }, { "epoch": 0.9120823913589551, "grad_norm": 0.28352218866348267, "learning_rate": 0.000157787593585166, "loss": 11.6685, "step": 43572 }, { "epoch": 0.9121033241229172, "grad_norm": 0.36348217725753784, "learning_rate": 0.00015778580419120787, "loss": 11.6826, "step": 43573 }, { "epoch": 0.9121242568868794, "grad_norm": 0.3507115840911865, "learning_rate": 0.000157784014769471, "loss": 11.6494, "step": 43574 }, { "epoch": 0.9121451896508415, "grad_norm": 0.29748106002807617, "learning_rate": 0.00015778222531995614, "loss": 11.6583, "step": 43575 }, { "epoch": 0.9121661224148037, "grad_norm": 0.3225696086883545, "learning_rate": 0.00015778043584266426, "loss": 11.6713, "step": 43576 }, { "epoch": 0.9121870551787657, "grad_norm": 0.29350733757019043, "learning_rate": 0.00015777864633759615, "loss": 11.6544, "step": 43577 }, { "epoch": 0.912207987942728, "grad_norm": 0.3052155375480652, "learning_rate": 0.00015777685680475268, "loss": 11.665, "step": 43578 }, { "epoch": 0.9122289207066901, "grad_norm": 0.4751565456390381, "learning_rate": 0.0001577750672441347, "loss": 11.6584, "step": 43579 }, { "epoch": 0.9122498534706522, "grad_norm": 0.2679167687892914, "learning_rate": 0.0001577732776557431, "loss": 11.6615, "step": 43580 }, { "epoch": 0.9122707862346144, "grad_norm": 0.3522874116897583, "learning_rate": 0.00015777148803957874, "loss": 11.6678, "step": 43581 }, { "epoch": 0.9122917189985765, "grad_norm": 0.3247417211532593, "learning_rate": 0.00015776969839564244, "loss": 11.6708, "step": 43582 }, { "epoch": 0.9123126517625387, "grad_norm": 0.3287411630153656, "learning_rate": 0.0001577679087239351, "loss": 11.6669, "step": 43583 }, { "epoch": 0.9123335845265009, "grad_norm": 0.2854788899421692, "learning_rate": 0.00015776611902445757, "loss": 11.6669, "step": 43584 }, { "epoch": 0.912354517290463, "grad_norm": 0.3137321472167969, "learning_rate": 0.00015776432929721072, "loss": 11.6638, "step": 43585 }, { "epoch": 0.9123754500544252, "grad_norm": 0.3408927321434021, "learning_rate": 0.0001577625395421954, "loss": 11.6584, "step": 43586 }, { "epoch": 0.9123963828183873, "grad_norm": 0.2691967189311981, "learning_rate": 0.00015776074975941247, "loss": 11.6549, "step": 43587 }, { "epoch": 0.9124173155823495, "grad_norm": 0.2985885739326477, "learning_rate": 0.00015775895994886278, "loss": 11.6965, "step": 43588 }, { "epoch": 0.9124382483463116, "grad_norm": 0.27208763360977173, "learning_rate": 0.0001577571701105472, "loss": 11.6781, "step": 43589 }, { "epoch": 0.9124591811102738, "grad_norm": 0.3069908320903778, "learning_rate": 0.0001577553802444666, "loss": 11.6616, "step": 43590 }, { "epoch": 0.912480113874236, "grad_norm": 0.36850395798683167, "learning_rate": 0.00015775359035062183, "loss": 11.6752, "step": 43591 }, { "epoch": 0.9125010466381981, "grad_norm": 0.3086339831352234, "learning_rate": 0.00015775180042901373, "loss": 11.6725, "step": 43592 }, { "epoch": 0.9125219794021603, "grad_norm": 0.3080576956272125, "learning_rate": 0.0001577500104796432, "loss": 11.6527, "step": 43593 }, { "epoch": 0.9125429121661224, "grad_norm": 0.3149469494819641, "learning_rate": 0.0001577482205025111, "loss": 11.6604, "step": 43594 }, { "epoch": 0.9125638449300846, "grad_norm": 0.3236905038356781, "learning_rate": 0.00015774643049761827, "loss": 11.6608, "step": 43595 }, { "epoch": 0.9125847776940467, "grad_norm": 0.3170008957386017, "learning_rate": 0.00015774464046496554, "loss": 11.6633, "step": 43596 }, { "epoch": 0.9126057104580089, "grad_norm": 0.30489182472229004, "learning_rate": 0.00015774285040455382, "loss": 11.6833, "step": 43597 }, { "epoch": 0.9126266432219711, "grad_norm": 0.374979168176651, "learning_rate": 0.00015774106031638395, "loss": 11.6775, "step": 43598 }, { "epoch": 0.9126475759859332, "grad_norm": 0.38686659932136536, "learning_rate": 0.0001577392702004568, "loss": 11.6822, "step": 43599 }, { "epoch": 0.9126685087498954, "grad_norm": 0.2643148601055145, "learning_rate": 0.00015773748005677325, "loss": 11.6695, "step": 43600 }, { "epoch": 0.9126894415138574, "grad_norm": 0.34670794010162354, "learning_rate": 0.0001577356898853341, "loss": 11.6741, "step": 43601 }, { "epoch": 0.9127103742778196, "grad_norm": 0.3268032371997833, "learning_rate": 0.00015773389968614025, "loss": 11.6743, "step": 43602 }, { "epoch": 0.9127313070417818, "grad_norm": 0.3463371694087982, "learning_rate": 0.00015773210945919258, "loss": 11.6649, "step": 43603 }, { "epoch": 0.9127522398057439, "grad_norm": 0.23958168923854828, "learning_rate": 0.00015773031920449194, "loss": 11.6591, "step": 43604 }, { "epoch": 0.9127731725697061, "grad_norm": 0.2736458480358124, "learning_rate": 0.00015772852892203916, "loss": 11.6747, "step": 43605 }, { "epoch": 0.9127941053336682, "grad_norm": 0.32020851969718933, "learning_rate": 0.00015772673861183512, "loss": 11.6717, "step": 43606 }, { "epoch": 0.9128150380976304, "grad_norm": 0.2655104398727417, "learning_rate": 0.00015772494827388065, "loss": 11.6652, "step": 43607 }, { "epoch": 0.9128359708615925, "grad_norm": 0.484022319316864, "learning_rate": 0.00015772315790817665, "loss": 11.6682, "step": 43608 }, { "epoch": 0.9128569036255547, "grad_norm": 0.33066844940185547, "learning_rate": 0.000157721367514724, "loss": 11.6665, "step": 43609 }, { "epoch": 0.9128778363895169, "grad_norm": 0.2776983082294464, "learning_rate": 0.0001577195770935235, "loss": 11.6704, "step": 43610 }, { "epoch": 0.912898769153479, "grad_norm": 0.264278769493103, "learning_rate": 0.0001577177866445761, "loss": 11.6719, "step": 43611 }, { "epoch": 0.9129197019174412, "grad_norm": 0.3426366150379181, "learning_rate": 0.00015771599616788256, "loss": 11.6647, "step": 43612 }, { "epoch": 0.9129406346814033, "grad_norm": 0.3405935764312744, "learning_rate": 0.00015771420566344378, "loss": 11.6654, "step": 43613 }, { "epoch": 0.9129615674453655, "grad_norm": 0.3437144160270691, "learning_rate": 0.0001577124151312606, "loss": 11.6765, "step": 43614 }, { "epoch": 0.9129825002093276, "grad_norm": 0.406792551279068, "learning_rate": 0.00015771062457133398, "loss": 11.6915, "step": 43615 }, { "epoch": 0.9130034329732898, "grad_norm": 0.3429487347602844, "learning_rate": 0.00015770883398366465, "loss": 11.6671, "step": 43616 }, { "epoch": 0.913024365737252, "grad_norm": 0.30871090292930603, "learning_rate": 0.00015770704336825353, "loss": 11.678, "step": 43617 }, { "epoch": 0.9130452985012141, "grad_norm": 0.36832112073898315, "learning_rate": 0.00015770525272510152, "loss": 11.6643, "step": 43618 }, { "epoch": 0.9130662312651763, "grad_norm": 0.3883631229400635, "learning_rate": 0.00015770346205420938, "loss": 11.6751, "step": 43619 }, { "epoch": 0.9130871640291384, "grad_norm": 0.2707176208496094, "learning_rate": 0.00015770167135557806, "loss": 11.6711, "step": 43620 }, { "epoch": 0.9131080967931006, "grad_norm": 0.29979830980300903, "learning_rate": 0.00015769988062920839, "loss": 11.6844, "step": 43621 }, { "epoch": 0.9131290295570628, "grad_norm": 0.3001900911331177, "learning_rate": 0.0001576980898751012, "loss": 11.6603, "step": 43622 }, { "epoch": 0.9131499623210249, "grad_norm": 0.3385394811630249, "learning_rate": 0.00015769629909325743, "loss": 11.6816, "step": 43623 }, { "epoch": 0.913170895084987, "grad_norm": 0.3142526149749756, "learning_rate": 0.00015769450828367787, "loss": 11.6739, "step": 43624 }, { "epoch": 0.9131918278489491, "grad_norm": 0.3764304220676422, "learning_rate": 0.00015769271744636343, "loss": 11.675, "step": 43625 }, { "epoch": 0.9132127606129113, "grad_norm": 0.2515937089920044, "learning_rate": 0.0001576909265813149, "loss": 11.6698, "step": 43626 }, { "epoch": 0.9132336933768734, "grad_norm": 0.3652522563934326, "learning_rate": 0.00015768913568853322, "loss": 11.6608, "step": 43627 }, { "epoch": 0.9132546261408356, "grad_norm": 0.3851458728313446, "learning_rate": 0.0001576873447680192, "loss": 11.6751, "step": 43628 }, { "epoch": 0.9132755589047978, "grad_norm": 0.32551875710487366, "learning_rate": 0.00015768555381977372, "loss": 11.6774, "step": 43629 }, { "epoch": 0.9132964916687599, "grad_norm": 0.34405383467674255, "learning_rate": 0.00015768376284379764, "loss": 11.6765, "step": 43630 }, { "epoch": 0.9133174244327221, "grad_norm": 0.29103541374206543, "learning_rate": 0.0001576819718400918, "loss": 11.6735, "step": 43631 }, { "epoch": 0.9133383571966842, "grad_norm": 0.3383607864379883, "learning_rate": 0.00015768018080865713, "loss": 11.6718, "step": 43632 }, { "epoch": 0.9133592899606464, "grad_norm": 0.34463050961494446, "learning_rate": 0.00015767838974949441, "loss": 11.6608, "step": 43633 }, { "epoch": 0.9133802227246085, "grad_norm": 0.39238041639328003, "learning_rate": 0.0001576765986626045, "loss": 11.6807, "step": 43634 }, { "epoch": 0.9134011554885707, "grad_norm": 0.32781854271888733, "learning_rate": 0.00015767480754798837, "loss": 11.6768, "step": 43635 }, { "epoch": 0.9134220882525329, "grad_norm": 0.32016754150390625, "learning_rate": 0.00015767301640564674, "loss": 11.6739, "step": 43636 }, { "epoch": 0.913443021016495, "grad_norm": 0.3422195315361023, "learning_rate": 0.00015767122523558055, "loss": 11.6772, "step": 43637 }, { "epoch": 0.9134639537804572, "grad_norm": 0.2487921416759491, "learning_rate": 0.00015766943403779064, "loss": 11.6652, "step": 43638 }, { "epoch": 0.9134848865444193, "grad_norm": 0.3544681966304779, "learning_rate": 0.0001576676428122779, "loss": 11.6662, "step": 43639 }, { "epoch": 0.9135058193083815, "grad_norm": 0.36220216751098633, "learning_rate": 0.00015766585155904314, "loss": 11.6573, "step": 43640 }, { "epoch": 0.9135267520723437, "grad_norm": 0.3301977515220642, "learning_rate": 0.00015766406027808728, "loss": 11.6753, "step": 43641 }, { "epoch": 0.9135476848363058, "grad_norm": 0.31057479977607727, "learning_rate": 0.00015766226896941115, "loss": 11.6658, "step": 43642 }, { "epoch": 0.913568617600268, "grad_norm": 0.34596070647239685, "learning_rate": 0.00015766047763301558, "loss": 11.6707, "step": 43643 }, { "epoch": 0.9135895503642301, "grad_norm": 0.2880198061466217, "learning_rate": 0.0001576586862689015, "loss": 11.6534, "step": 43644 }, { "epoch": 0.9136104831281923, "grad_norm": 0.28226616978645325, "learning_rate": 0.00015765689487706969, "loss": 11.6656, "step": 43645 }, { "epoch": 0.9136314158921544, "grad_norm": 0.3471505343914032, "learning_rate": 0.00015765510345752107, "loss": 11.6848, "step": 43646 }, { "epoch": 0.9136523486561166, "grad_norm": 0.34661340713500977, "learning_rate": 0.00015765331201025653, "loss": 11.6546, "step": 43647 }, { "epoch": 0.9136732814200788, "grad_norm": 0.28174543380737305, "learning_rate": 0.00015765152053527683, "loss": 11.6798, "step": 43648 }, { "epoch": 0.9136942141840408, "grad_norm": 0.29107457399368286, "learning_rate": 0.0001576497290325829, "loss": 11.6598, "step": 43649 }, { "epoch": 0.913715146948003, "grad_norm": 0.2597367763519287, "learning_rate": 0.00015764793750217563, "loss": 11.6705, "step": 43650 }, { "epoch": 0.9137360797119651, "grad_norm": 0.296417236328125, "learning_rate": 0.0001576461459440558, "loss": 11.6466, "step": 43651 }, { "epoch": 0.9137570124759273, "grad_norm": 0.3203657567501068, "learning_rate": 0.00015764435435822432, "loss": 11.6713, "step": 43652 }, { "epoch": 0.9137779452398894, "grad_norm": 0.2637949287891388, "learning_rate": 0.00015764256274468206, "loss": 11.6742, "step": 43653 }, { "epoch": 0.9137988780038516, "grad_norm": 0.37549322843551636, "learning_rate": 0.00015764077110342988, "loss": 11.6548, "step": 43654 }, { "epoch": 0.9138198107678138, "grad_norm": 0.26115214824676514, "learning_rate": 0.0001576389794344686, "loss": 11.675, "step": 43655 }, { "epoch": 0.9138407435317759, "grad_norm": 0.390300452709198, "learning_rate": 0.00015763718773779907, "loss": 11.6831, "step": 43656 }, { "epoch": 0.9138616762957381, "grad_norm": 0.26067936420440674, "learning_rate": 0.00015763539601342224, "loss": 11.6695, "step": 43657 }, { "epoch": 0.9138826090597002, "grad_norm": 0.2485819309949875, "learning_rate": 0.0001576336042613389, "loss": 11.6775, "step": 43658 }, { "epoch": 0.9139035418236624, "grad_norm": 0.331000417470932, "learning_rate": 0.00015763181248154996, "loss": 11.6693, "step": 43659 }, { "epoch": 0.9139244745876246, "grad_norm": 0.32043352723121643, "learning_rate": 0.00015763002067405624, "loss": 11.6643, "step": 43660 }, { "epoch": 0.9139454073515867, "grad_norm": 0.31275463104248047, "learning_rate": 0.0001576282288388586, "loss": 11.6644, "step": 43661 }, { "epoch": 0.9139663401155489, "grad_norm": 0.335716187953949, "learning_rate": 0.00015762643697595793, "loss": 11.6618, "step": 43662 }, { "epoch": 0.913987272879511, "grad_norm": 0.3657715916633606, "learning_rate": 0.00015762464508535504, "loss": 11.6648, "step": 43663 }, { "epoch": 0.9140082056434732, "grad_norm": 0.30818259716033936, "learning_rate": 0.00015762285316705084, "loss": 11.6509, "step": 43664 }, { "epoch": 0.9140291384074353, "grad_norm": 0.2892293632030487, "learning_rate": 0.0001576210612210462, "loss": 11.6786, "step": 43665 }, { "epoch": 0.9140500711713975, "grad_norm": 0.2793271541595459, "learning_rate": 0.000157619269247342, "loss": 11.6672, "step": 43666 }, { "epoch": 0.9140710039353597, "grad_norm": 0.29461032152175903, "learning_rate": 0.000157617477245939, "loss": 11.6762, "step": 43667 }, { "epoch": 0.9140919366993218, "grad_norm": 0.2605469524860382, "learning_rate": 0.00015761568521683813, "loss": 11.6879, "step": 43668 }, { "epoch": 0.914112869463284, "grad_norm": 0.2988441586494446, "learning_rate": 0.00015761389316004028, "loss": 11.6638, "step": 43669 }, { "epoch": 0.914133802227246, "grad_norm": 0.306924045085907, "learning_rate": 0.00015761210107554624, "loss": 11.6712, "step": 43670 }, { "epoch": 0.9141547349912083, "grad_norm": 0.34502220153808594, "learning_rate": 0.00015761030896335694, "loss": 11.6796, "step": 43671 }, { "epoch": 0.9141756677551703, "grad_norm": 0.37902066111564636, "learning_rate": 0.0001576085168234732, "loss": 11.6586, "step": 43672 }, { "epoch": 0.9141966005191325, "grad_norm": 0.26477769017219543, "learning_rate": 0.00015760672465589589, "loss": 11.6817, "step": 43673 }, { "epoch": 0.9142175332830947, "grad_norm": 0.26082950830459595, "learning_rate": 0.00015760493246062587, "loss": 11.6547, "step": 43674 }, { "epoch": 0.9142384660470568, "grad_norm": 0.2730691134929657, "learning_rate": 0.00015760314023766399, "loss": 11.6755, "step": 43675 }, { "epoch": 0.914259398811019, "grad_norm": 0.23757176101207733, "learning_rate": 0.00015760134798701114, "loss": 11.6516, "step": 43676 }, { "epoch": 0.9142803315749811, "grad_norm": 0.2968084514141083, "learning_rate": 0.00015759955570866815, "loss": 11.6621, "step": 43677 }, { "epoch": 0.9143012643389433, "grad_norm": 0.2868477404117584, "learning_rate": 0.00015759776340263594, "loss": 11.6656, "step": 43678 }, { "epoch": 0.9143221971029055, "grad_norm": 0.2515839636325836, "learning_rate": 0.0001575959710689153, "loss": 11.67, "step": 43679 }, { "epoch": 0.9143431298668676, "grad_norm": 0.2914751172065735, "learning_rate": 0.00015759417870750712, "loss": 11.67, "step": 43680 }, { "epoch": 0.9143640626308298, "grad_norm": 0.2663935422897339, "learning_rate": 0.00015759238631841228, "loss": 11.6704, "step": 43681 }, { "epoch": 0.9143849953947919, "grad_norm": 0.3006533086299896, "learning_rate": 0.0001575905939016316, "loss": 11.6631, "step": 43682 }, { "epoch": 0.9144059281587541, "grad_norm": 0.6809436678886414, "learning_rate": 0.000157588801457166, "loss": 11.676, "step": 43683 }, { "epoch": 0.9144268609227162, "grad_norm": 0.29949116706848145, "learning_rate": 0.0001575870089850163, "loss": 11.6736, "step": 43684 }, { "epoch": 0.9144477936866784, "grad_norm": 0.3606092035770416, "learning_rate": 0.00015758521648518337, "loss": 11.6732, "step": 43685 }, { "epoch": 0.9144687264506406, "grad_norm": 0.32387930154800415, "learning_rate": 0.00015758342395766808, "loss": 11.6636, "step": 43686 }, { "epoch": 0.9144896592146027, "grad_norm": 0.29870638251304626, "learning_rate": 0.00015758163140247128, "loss": 11.665, "step": 43687 }, { "epoch": 0.9145105919785649, "grad_norm": 0.43498873710632324, "learning_rate": 0.00015757983881959379, "loss": 11.6656, "step": 43688 }, { "epoch": 0.914531524742527, "grad_norm": 0.3295828700065613, "learning_rate": 0.00015757804620903658, "loss": 11.6669, "step": 43689 }, { "epoch": 0.9145524575064892, "grad_norm": 0.2809517979621887, "learning_rate": 0.00015757625357080044, "loss": 11.6639, "step": 43690 }, { "epoch": 0.9145733902704513, "grad_norm": 0.27659353613853455, "learning_rate": 0.00015757446090488623, "loss": 11.6456, "step": 43691 }, { "epoch": 0.9145943230344135, "grad_norm": 0.3438109755516052, "learning_rate": 0.00015757266821129478, "loss": 11.6858, "step": 43692 }, { "epoch": 0.9146152557983757, "grad_norm": 0.41503357887268066, "learning_rate": 0.00015757087549002707, "loss": 11.6518, "step": 43693 }, { "epoch": 0.9146361885623377, "grad_norm": 0.3231738805770874, "learning_rate": 0.00015756908274108385, "loss": 11.6709, "step": 43694 }, { "epoch": 0.9146571213263, "grad_norm": 0.37274250388145447, "learning_rate": 0.00015756728996446602, "loss": 11.6826, "step": 43695 }, { "epoch": 0.914678054090262, "grad_norm": 0.25454118847846985, "learning_rate": 0.00015756549716017444, "loss": 11.6704, "step": 43696 }, { "epoch": 0.9146989868542242, "grad_norm": 0.3489629328250885, "learning_rate": 0.00015756370432820998, "loss": 11.666, "step": 43697 }, { "epoch": 0.9147199196181864, "grad_norm": 0.30568569898605347, "learning_rate": 0.0001575619114685735, "loss": 11.6668, "step": 43698 }, { "epoch": 0.9147408523821485, "grad_norm": 0.3700512647628784, "learning_rate": 0.0001575601185812658, "loss": 11.6773, "step": 43699 }, { "epoch": 0.9147617851461107, "grad_norm": 0.35971659421920776, "learning_rate": 0.00015755832566628785, "loss": 11.6886, "step": 43700 }, { "epoch": 0.9147827179100728, "grad_norm": 0.3211808502674103, "learning_rate": 0.00015755653272364045, "loss": 11.6664, "step": 43701 }, { "epoch": 0.914803650674035, "grad_norm": 0.30240383744239807, "learning_rate": 0.00015755473975332445, "loss": 11.6702, "step": 43702 }, { "epoch": 0.9148245834379971, "grad_norm": 0.3347243666648865, "learning_rate": 0.00015755294675534075, "loss": 11.6706, "step": 43703 }, { "epoch": 0.9148455162019593, "grad_norm": 0.3194285035133362, "learning_rate": 0.00015755115372969022, "loss": 11.6819, "step": 43704 }, { "epoch": 0.9148664489659215, "grad_norm": 0.3020629286766052, "learning_rate": 0.00015754936067637366, "loss": 11.6676, "step": 43705 }, { "epoch": 0.9148873817298836, "grad_norm": 0.2898366153240204, "learning_rate": 0.000157547567595392, "loss": 11.6777, "step": 43706 }, { "epoch": 0.9149083144938458, "grad_norm": 0.2895156741142273, "learning_rate": 0.00015754577448674604, "loss": 11.6458, "step": 43707 }, { "epoch": 0.9149292472578079, "grad_norm": 0.36070239543914795, "learning_rate": 0.00015754398135043668, "loss": 11.6805, "step": 43708 }, { "epoch": 0.9149501800217701, "grad_norm": 0.41963064670562744, "learning_rate": 0.0001575421881864648, "loss": 11.6838, "step": 43709 }, { "epoch": 0.9149711127857322, "grad_norm": 0.40059807896614075, "learning_rate": 0.0001575403949948312, "loss": 11.6837, "step": 43710 }, { "epoch": 0.9149920455496944, "grad_norm": 0.27121251821517944, "learning_rate": 0.0001575386017755368, "loss": 11.6624, "step": 43711 }, { "epoch": 0.9150129783136566, "grad_norm": 0.326701819896698, "learning_rate": 0.00015753680852858247, "loss": 11.6775, "step": 43712 }, { "epoch": 0.9150339110776187, "grad_norm": 0.34781569242477417, "learning_rate": 0.000157535015253969, "loss": 11.6739, "step": 43713 }, { "epoch": 0.9150548438415809, "grad_norm": 0.31393590569496155, "learning_rate": 0.0001575332219516973, "loss": 11.6704, "step": 43714 }, { "epoch": 0.915075776605543, "grad_norm": 0.3038957417011261, "learning_rate": 0.00015753142862176824, "loss": 11.6808, "step": 43715 }, { "epoch": 0.9150967093695052, "grad_norm": 0.3101770579814911, "learning_rate": 0.00015752963526418268, "loss": 11.65, "step": 43716 }, { "epoch": 0.9151176421334674, "grad_norm": 0.2604246139526367, "learning_rate": 0.00015752784187894142, "loss": 11.6637, "step": 43717 }, { "epoch": 0.9151385748974294, "grad_norm": 0.3544633984565735, "learning_rate": 0.00015752604846604542, "loss": 11.6607, "step": 43718 }, { "epoch": 0.9151595076613916, "grad_norm": 0.32705870270729065, "learning_rate": 0.0001575242550254955, "loss": 11.6723, "step": 43719 }, { "epoch": 0.9151804404253537, "grad_norm": 0.30944809317588806, "learning_rate": 0.00015752246155729252, "loss": 11.6818, "step": 43720 }, { "epoch": 0.9152013731893159, "grad_norm": 0.28484877943992615, "learning_rate": 0.0001575206680614373, "loss": 11.6766, "step": 43721 }, { "epoch": 0.915222305953278, "grad_norm": 0.28178513050079346, "learning_rate": 0.00015751887453793075, "loss": 11.6547, "step": 43722 }, { "epoch": 0.9152432387172402, "grad_norm": 0.30431947112083435, "learning_rate": 0.00015751708098677374, "loss": 11.6651, "step": 43723 }, { "epoch": 0.9152641714812024, "grad_norm": 0.3258439898490906, "learning_rate": 0.00015751528740796713, "loss": 11.6597, "step": 43724 }, { "epoch": 0.9152851042451645, "grad_norm": 0.3329002261161804, "learning_rate": 0.00015751349380151175, "loss": 11.683, "step": 43725 }, { "epoch": 0.9153060370091267, "grad_norm": 0.2899596393108368, "learning_rate": 0.0001575117001674085, "loss": 11.6712, "step": 43726 }, { "epoch": 0.9153269697730888, "grad_norm": 0.3219950497150421, "learning_rate": 0.00015750990650565818, "loss": 11.6571, "step": 43727 }, { "epoch": 0.915347902537051, "grad_norm": 0.33705830574035645, "learning_rate": 0.00015750811281626175, "loss": 11.6727, "step": 43728 }, { "epoch": 0.9153688353010131, "grad_norm": 0.24476729333400726, "learning_rate": 0.00015750631909921997, "loss": 11.6806, "step": 43729 }, { "epoch": 0.9153897680649753, "grad_norm": 0.277103453874588, "learning_rate": 0.0001575045253545338, "loss": 11.6695, "step": 43730 }, { "epoch": 0.9154107008289375, "grad_norm": 0.2966976761817932, "learning_rate": 0.000157502731582204, "loss": 11.6702, "step": 43731 }, { "epoch": 0.9154316335928996, "grad_norm": 0.3075301945209503, "learning_rate": 0.0001575009377822315, "loss": 11.6724, "step": 43732 }, { "epoch": 0.9154525663568618, "grad_norm": 0.28173357248306274, "learning_rate": 0.00015749914395461718, "loss": 11.6595, "step": 43733 }, { "epoch": 0.9154734991208239, "grad_norm": 0.3029716908931732, "learning_rate": 0.00015749735009936181, "loss": 11.6608, "step": 43734 }, { "epoch": 0.9154944318847861, "grad_norm": 0.30143243074417114, "learning_rate": 0.0001574955562164664, "loss": 11.684, "step": 43735 }, { "epoch": 0.9155153646487483, "grad_norm": 0.3639257252216339, "learning_rate": 0.00015749376230593165, "loss": 11.6618, "step": 43736 }, { "epoch": 0.9155362974127104, "grad_norm": 0.34523969888687134, "learning_rate": 0.0001574919683677585, "loss": 11.6797, "step": 43737 }, { "epoch": 0.9155572301766726, "grad_norm": 0.26909226179122925, "learning_rate": 0.0001574901744019478, "loss": 11.6669, "step": 43738 }, { "epoch": 0.9155781629406347, "grad_norm": 0.38859203457832336, "learning_rate": 0.00015748838040850047, "loss": 11.6636, "step": 43739 }, { "epoch": 0.9155990957045969, "grad_norm": 0.2558569610118866, "learning_rate": 0.0001574865863874173, "loss": 11.656, "step": 43740 }, { "epoch": 0.9156200284685589, "grad_norm": 0.27405261993408203, "learning_rate": 0.00015748479233869918, "loss": 11.667, "step": 43741 }, { "epoch": 0.9156409612325211, "grad_norm": 0.36296743154525757, "learning_rate": 0.00015748299826234697, "loss": 11.6739, "step": 43742 }, { "epoch": 0.9156618939964833, "grad_norm": 0.31510767340660095, "learning_rate": 0.00015748120415836153, "loss": 11.6724, "step": 43743 }, { "epoch": 0.9156828267604454, "grad_norm": 0.2953372001647949, "learning_rate": 0.00015747941002674368, "loss": 11.6589, "step": 43744 }, { "epoch": 0.9157037595244076, "grad_norm": 0.31262776255607605, "learning_rate": 0.0001574776158674944, "loss": 11.6598, "step": 43745 }, { "epoch": 0.9157246922883697, "grad_norm": 0.2895883321762085, "learning_rate": 0.00015747582168061444, "loss": 11.6872, "step": 43746 }, { "epoch": 0.9157456250523319, "grad_norm": 0.388088196516037, "learning_rate": 0.00015747402746610473, "loss": 11.6864, "step": 43747 }, { "epoch": 0.915766557816294, "grad_norm": 0.3276898264884949, "learning_rate": 0.00015747223322396602, "loss": 11.6541, "step": 43748 }, { "epoch": 0.9157874905802562, "grad_norm": 0.3520232141017914, "learning_rate": 0.00015747043895419932, "loss": 11.6707, "step": 43749 }, { "epoch": 0.9158084233442184, "grad_norm": 0.313421368598938, "learning_rate": 0.00015746864465680544, "loss": 11.6469, "step": 43750 }, { "epoch": 0.9158293561081805, "grad_norm": 0.3524366021156311, "learning_rate": 0.0001574668503317852, "loss": 11.6534, "step": 43751 }, { "epoch": 0.9158502888721427, "grad_norm": 0.36548706889152527, "learning_rate": 0.0001574650559791395, "loss": 11.6612, "step": 43752 }, { "epoch": 0.9158712216361048, "grad_norm": 0.26228630542755127, "learning_rate": 0.0001574632615988692, "loss": 11.6638, "step": 43753 }, { "epoch": 0.915892154400067, "grad_norm": 0.3592624366283417, "learning_rate": 0.00015746146719097513, "loss": 11.665, "step": 43754 }, { "epoch": 0.9159130871640292, "grad_norm": 0.31824493408203125, "learning_rate": 0.00015745967275545823, "loss": 11.6505, "step": 43755 }, { "epoch": 0.9159340199279913, "grad_norm": 0.3429669141769409, "learning_rate": 0.00015745787829231928, "loss": 11.6694, "step": 43756 }, { "epoch": 0.9159549526919535, "grad_norm": 0.3364109396934509, "learning_rate": 0.00015745608380155922, "loss": 11.6793, "step": 43757 }, { "epoch": 0.9159758854559156, "grad_norm": 0.34389519691467285, "learning_rate": 0.00015745428928317883, "loss": 11.6806, "step": 43758 }, { "epoch": 0.9159968182198778, "grad_norm": 0.33537787199020386, "learning_rate": 0.00015745249473717902, "loss": 11.6721, "step": 43759 }, { "epoch": 0.9160177509838399, "grad_norm": 0.30864211916923523, "learning_rate": 0.00015745070016356065, "loss": 11.6633, "step": 43760 }, { "epoch": 0.9160386837478021, "grad_norm": 0.34296876192092896, "learning_rate": 0.00015744890556232454, "loss": 11.6754, "step": 43761 }, { "epoch": 0.9160596165117643, "grad_norm": 0.37972235679626465, "learning_rate": 0.00015744711093347165, "loss": 11.6743, "step": 43762 }, { "epoch": 0.9160805492757264, "grad_norm": 0.30664345622062683, "learning_rate": 0.00015744531627700275, "loss": 11.6606, "step": 43763 }, { "epoch": 0.9161014820396886, "grad_norm": 0.3198918402194977, "learning_rate": 0.00015744352159291874, "loss": 11.6661, "step": 43764 }, { "epoch": 0.9161224148036506, "grad_norm": 0.3382595181465149, "learning_rate": 0.00015744172688122047, "loss": 11.6702, "step": 43765 }, { "epoch": 0.9161433475676128, "grad_norm": 0.3997773230075836, "learning_rate": 0.0001574399321419088, "loss": 11.6637, "step": 43766 }, { "epoch": 0.9161642803315749, "grad_norm": 0.354976087808609, "learning_rate": 0.00015743813737498466, "loss": 11.6637, "step": 43767 }, { "epoch": 0.9161852130955371, "grad_norm": 0.32035598158836365, "learning_rate": 0.0001574363425804488, "loss": 11.6735, "step": 43768 }, { "epoch": 0.9162061458594993, "grad_norm": 0.2990048825740814, "learning_rate": 0.00015743454775830217, "loss": 11.6699, "step": 43769 }, { "epoch": 0.9162270786234614, "grad_norm": 0.270689457654953, "learning_rate": 0.0001574327529085456, "loss": 11.6669, "step": 43770 }, { "epoch": 0.9162480113874236, "grad_norm": 0.34049496054649353, "learning_rate": 0.0001574309580311799, "loss": 11.6569, "step": 43771 }, { "epoch": 0.9162689441513857, "grad_norm": 0.3094845116138458, "learning_rate": 0.00015742916312620607, "loss": 11.6586, "step": 43772 }, { "epoch": 0.9162898769153479, "grad_norm": 0.31576594710350037, "learning_rate": 0.00015742736819362484, "loss": 11.6661, "step": 43773 }, { "epoch": 0.91631080967931, "grad_norm": 0.2922132909297943, "learning_rate": 0.00015742557323343714, "loss": 11.6527, "step": 43774 }, { "epoch": 0.9163317424432722, "grad_norm": 0.3289233446121216, "learning_rate": 0.0001574237782456438, "loss": 11.6699, "step": 43775 }, { "epoch": 0.9163526752072344, "grad_norm": 0.2608480751514435, "learning_rate": 0.0001574219832302457, "loss": 11.6595, "step": 43776 }, { "epoch": 0.9163736079711965, "grad_norm": 0.3196831941604614, "learning_rate": 0.00015742018818724372, "loss": 11.6678, "step": 43777 }, { "epoch": 0.9163945407351587, "grad_norm": 0.2767147123813629, "learning_rate": 0.00015741839311663868, "loss": 11.6746, "step": 43778 }, { "epoch": 0.9164154734991208, "grad_norm": 0.4019545614719391, "learning_rate": 0.0001574165980184315, "loss": 11.6771, "step": 43779 }, { "epoch": 0.916436406263083, "grad_norm": 0.35912051796913147, "learning_rate": 0.000157414802892623, "loss": 11.674, "step": 43780 }, { "epoch": 0.9164573390270452, "grad_norm": 0.3231385052204132, "learning_rate": 0.00015741300773921405, "loss": 11.6648, "step": 43781 }, { "epoch": 0.9164782717910073, "grad_norm": 0.3539418578147888, "learning_rate": 0.0001574112125582055, "loss": 11.6819, "step": 43782 }, { "epoch": 0.9164992045549695, "grad_norm": 0.37365007400512695, "learning_rate": 0.00015740941734959825, "loss": 11.6693, "step": 43783 }, { "epoch": 0.9165201373189316, "grad_norm": 0.28698205947875977, "learning_rate": 0.00015740762211339314, "loss": 11.6568, "step": 43784 }, { "epoch": 0.9165410700828938, "grad_norm": 0.33283770084381104, "learning_rate": 0.00015740582684959102, "loss": 11.6794, "step": 43785 }, { "epoch": 0.9165620028468559, "grad_norm": 0.363348126411438, "learning_rate": 0.00015740403155819279, "loss": 11.6632, "step": 43786 }, { "epoch": 0.916582935610818, "grad_norm": 0.32044947147369385, "learning_rate": 0.00015740223623919927, "loss": 11.6678, "step": 43787 }, { "epoch": 0.9166038683747803, "grad_norm": 0.32581204175949097, "learning_rate": 0.00015740044089261138, "loss": 11.6619, "step": 43788 }, { "epoch": 0.9166248011387423, "grad_norm": 0.3280275762081146, "learning_rate": 0.00015739864551842994, "loss": 11.6773, "step": 43789 }, { "epoch": 0.9166457339027045, "grad_norm": 0.2678229510784149, "learning_rate": 0.0001573968501166558, "loss": 11.6655, "step": 43790 }, { "epoch": 0.9166666666666666, "grad_norm": 0.3405246436595917, "learning_rate": 0.00015739505468728986, "loss": 11.6728, "step": 43791 }, { "epoch": 0.9166875994306288, "grad_norm": 0.2628442645072937, "learning_rate": 0.00015739325923033292, "loss": 11.6701, "step": 43792 }, { "epoch": 0.9167085321945909, "grad_norm": 0.39443060755729675, "learning_rate": 0.00015739146374578594, "loss": 11.6728, "step": 43793 }, { "epoch": 0.9167294649585531, "grad_norm": 0.32111966609954834, "learning_rate": 0.00015738966823364972, "loss": 11.672, "step": 43794 }, { "epoch": 0.9167503977225153, "grad_norm": 0.3072632849216461, "learning_rate": 0.00015738787269392512, "loss": 11.6558, "step": 43795 }, { "epoch": 0.9167713304864774, "grad_norm": 0.25435322523117065, "learning_rate": 0.00015738607712661304, "loss": 11.642, "step": 43796 }, { "epoch": 0.9167922632504396, "grad_norm": 0.28671640157699585, "learning_rate": 0.0001573842815317143, "loss": 11.6859, "step": 43797 }, { "epoch": 0.9168131960144017, "grad_norm": 0.3063538074493408, "learning_rate": 0.00015738248590922984, "loss": 11.6662, "step": 43798 }, { "epoch": 0.9168341287783639, "grad_norm": 0.35159754753112793, "learning_rate": 0.00015738069025916045, "loss": 11.6805, "step": 43799 }, { "epoch": 0.9168550615423261, "grad_norm": 0.2845803201198578, "learning_rate": 0.000157378894581507, "loss": 11.6485, "step": 43800 }, { "epoch": 0.9168759943062882, "grad_norm": 0.2956168055534363, "learning_rate": 0.00015737709887627034, "loss": 11.6727, "step": 43801 }, { "epoch": 0.9168969270702504, "grad_norm": 0.29911473393440247, "learning_rate": 0.00015737530314345136, "loss": 11.6579, "step": 43802 }, { "epoch": 0.9169178598342125, "grad_norm": 0.30508795380592346, "learning_rate": 0.00015737350738305095, "loss": 11.6606, "step": 43803 }, { "epoch": 0.9169387925981747, "grad_norm": 0.2484099268913269, "learning_rate": 0.00015737171159506996, "loss": 11.6611, "step": 43804 }, { "epoch": 0.9169597253621368, "grad_norm": 0.2954757809638977, "learning_rate": 0.00015736991577950922, "loss": 11.675, "step": 43805 }, { "epoch": 0.916980658126099, "grad_norm": 0.3088184893131256, "learning_rate": 0.0001573681199363696, "loss": 11.6643, "step": 43806 }, { "epoch": 0.9170015908900612, "grad_norm": 0.3717586100101471, "learning_rate": 0.00015736632406565198, "loss": 11.6806, "step": 43807 }, { "epoch": 0.9170225236540233, "grad_norm": 0.41966238617897034, "learning_rate": 0.00015736452816735724, "loss": 11.6786, "step": 43808 }, { "epoch": 0.9170434564179855, "grad_norm": 0.3277982175350189, "learning_rate": 0.00015736273224148617, "loss": 11.6607, "step": 43809 }, { "epoch": 0.9170643891819475, "grad_norm": 0.34215158224105835, "learning_rate": 0.00015736093628803972, "loss": 11.6601, "step": 43810 }, { "epoch": 0.9170853219459097, "grad_norm": 0.28760236501693726, "learning_rate": 0.00015735914030701873, "loss": 11.6509, "step": 43811 }, { "epoch": 0.9171062547098718, "grad_norm": 0.28337544202804565, "learning_rate": 0.000157357344298424, "loss": 11.6686, "step": 43812 }, { "epoch": 0.917127187473834, "grad_norm": 0.355729341506958, "learning_rate": 0.00015735554826225653, "loss": 11.6592, "step": 43813 }, { "epoch": 0.9171481202377962, "grad_norm": 0.3784213364124298, "learning_rate": 0.000157353752198517, "loss": 11.6724, "step": 43814 }, { "epoch": 0.9171690530017583, "grad_norm": 0.2605559527873993, "learning_rate": 0.00015735195610720645, "loss": 11.6756, "step": 43815 }, { "epoch": 0.9171899857657205, "grad_norm": 0.3360183835029602, "learning_rate": 0.00015735015998832565, "loss": 11.6837, "step": 43816 }, { "epoch": 0.9172109185296826, "grad_norm": 0.27609115839004517, "learning_rate": 0.0001573483638418755, "loss": 11.6713, "step": 43817 }, { "epoch": 0.9172318512936448, "grad_norm": 0.2790692150592804, "learning_rate": 0.00015734656766785678, "loss": 11.6698, "step": 43818 }, { "epoch": 0.917252784057607, "grad_norm": 0.30180394649505615, "learning_rate": 0.00015734477146627044, "loss": 11.6699, "step": 43819 }, { "epoch": 0.9172737168215691, "grad_norm": 0.37583112716674805, "learning_rate": 0.00015734297523711734, "loss": 11.6806, "step": 43820 }, { "epoch": 0.9172946495855313, "grad_norm": 0.2994332015514374, "learning_rate": 0.0001573411789803983, "loss": 11.6704, "step": 43821 }, { "epoch": 0.9173155823494934, "grad_norm": 0.29085108637809753, "learning_rate": 0.0001573393826961142, "loss": 11.6741, "step": 43822 }, { "epoch": 0.9173365151134556, "grad_norm": 0.36683276295661926, "learning_rate": 0.00015733758638426595, "loss": 11.6668, "step": 43823 }, { "epoch": 0.9173574478774177, "grad_norm": 0.2781656086444855, "learning_rate": 0.00015733579004485434, "loss": 11.6681, "step": 43824 }, { "epoch": 0.9173783806413799, "grad_norm": 0.3099590837955475, "learning_rate": 0.00015733399367788027, "loss": 11.6796, "step": 43825 }, { "epoch": 0.9173993134053421, "grad_norm": 0.3036591708660126, "learning_rate": 0.0001573321972833446, "loss": 11.6726, "step": 43826 }, { "epoch": 0.9174202461693042, "grad_norm": 0.35663387179374695, "learning_rate": 0.0001573304008612482, "loss": 11.6576, "step": 43827 }, { "epoch": 0.9174411789332664, "grad_norm": 0.4059715270996094, "learning_rate": 0.00015732860441159194, "loss": 11.6654, "step": 43828 }, { "epoch": 0.9174621116972285, "grad_norm": 0.37434056401252747, "learning_rate": 0.00015732680793437663, "loss": 11.6693, "step": 43829 }, { "epoch": 0.9174830444611907, "grad_norm": 0.28369343280792236, "learning_rate": 0.0001573250114296032, "loss": 11.6887, "step": 43830 }, { "epoch": 0.9175039772251528, "grad_norm": 0.3036670684814453, "learning_rate": 0.0001573232148972725, "loss": 11.6648, "step": 43831 }, { "epoch": 0.917524909989115, "grad_norm": 0.36207371950149536, "learning_rate": 0.00015732141833738536, "loss": 11.6742, "step": 43832 }, { "epoch": 0.9175458427530772, "grad_norm": 0.37540337443351746, "learning_rate": 0.00015731962174994268, "loss": 11.6769, "step": 43833 }, { "epoch": 0.9175667755170392, "grad_norm": 0.32046717405319214, "learning_rate": 0.00015731782513494534, "loss": 11.6784, "step": 43834 }, { "epoch": 0.9175877082810014, "grad_norm": 0.3809785842895508, "learning_rate": 0.0001573160284923941, "loss": 11.6756, "step": 43835 }, { "epoch": 0.9176086410449635, "grad_norm": 0.29800647497177124, "learning_rate": 0.00015731423182228994, "loss": 11.6567, "step": 43836 }, { "epoch": 0.9176295738089257, "grad_norm": 0.3455052077770233, "learning_rate": 0.00015731243512463367, "loss": 11.6559, "step": 43837 }, { "epoch": 0.9176505065728879, "grad_norm": 0.276967316865921, "learning_rate": 0.0001573106383994262, "loss": 11.6853, "step": 43838 }, { "epoch": 0.91767143933685, "grad_norm": 0.342989057302475, "learning_rate": 0.0001573088416466683, "loss": 11.6879, "step": 43839 }, { "epoch": 0.9176923721008122, "grad_norm": 0.32744789123535156, "learning_rate": 0.00015730704486636093, "loss": 11.6642, "step": 43840 }, { "epoch": 0.9177133048647743, "grad_norm": 0.29814600944519043, "learning_rate": 0.0001573052480585049, "loss": 11.6681, "step": 43841 }, { "epoch": 0.9177342376287365, "grad_norm": 0.33713531494140625, "learning_rate": 0.0001573034512231011, "loss": 11.6626, "step": 43842 }, { "epoch": 0.9177551703926986, "grad_norm": 0.29398834705352783, "learning_rate": 0.00015730165436015037, "loss": 11.6506, "step": 43843 }, { "epoch": 0.9177761031566608, "grad_norm": 0.26704877614974976, "learning_rate": 0.0001572998574696536, "loss": 11.6777, "step": 43844 }, { "epoch": 0.917797035920623, "grad_norm": 0.29713204503059387, "learning_rate": 0.00015729806055161163, "loss": 11.6704, "step": 43845 }, { "epoch": 0.9178179686845851, "grad_norm": 0.30465641617774963, "learning_rate": 0.00015729626360602532, "loss": 11.6741, "step": 43846 }, { "epoch": 0.9178389014485473, "grad_norm": 0.30529096722602844, "learning_rate": 0.00015729446663289556, "loss": 11.6616, "step": 43847 }, { "epoch": 0.9178598342125094, "grad_norm": 0.3471345603466034, "learning_rate": 0.00015729266963222322, "loss": 11.6683, "step": 43848 }, { "epoch": 0.9178807669764716, "grad_norm": 0.5027078986167908, "learning_rate": 0.00015729087260400913, "loss": 11.6696, "step": 43849 }, { "epoch": 0.9179016997404337, "grad_norm": 0.29175153374671936, "learning_rate": 0.00015728907554825417, "loss": 11.669, "step": 43850 }, { "epoch": 0.9179226325043959, "grad_norm": 0.2945271134376526, "learning_rate": 0.0001572872784649592, "loss": 11.6485, "step": 43851 }, { "epoch": 0.9179435652683581, "grad_norm": 0.31103795766830444, "learning_rate": 0.00015728548135412508, "loss": 11.6684, "step": 43852 }, { "epoch": 0.9179644980323202, "grad_norm": 0.33376049995422363, "learning_rate": 0.0001572836842157527, "loss": 11.6533, "step": 43853 }, { "epoch": 0.9179854307962824, "grad_norm": 0.3257754147052765, "learning_rate": 0.00015728188704984287, "loss": 11.6808, "step": 43854 }, { "epoch": 0.9180063635602445, "grad_norm": 0.5414913296699524, "learning_rate": 0.00015728008985639653, "loss": 11.6709, "step": 43855 }, { "epoch": 0.9180272963242067, "grad_norm": 0.3763280212879181, "learning_rate": 0.00015727829263541447, "loss": 11.6618, "step": 43856 }, { "epoch": 0.9180482290881689, "grad_norm": 0.27971571683883667, "learning_rate": 0.0001572764953868976, "loss": 11.6772, "step": 43857 }, { "epoch": 0.9180691618521309, "grad_norm": 0.30948302149772644, "learning_rate": 0.0001572746981108468, "loss": 11.6664, "step": 43858 }, { "epoch": 0.9180900946160931, "grad_norm": 0.3190278112888336, "learning_rate": 0.00015727290080726287, "loss": 11.6585, "step": 43859 }, { "epoch": 0.9181110273800552, "grad_norm": 0.3411237597465515, "learning_rate": 0.00015727110347614674, "loss": 11.671, "step": 43860 }, { "epoch": 0.9181319601440174, "grad_norm": 0.42315173149108887, "learning_rate": 0.0001572693061174992, "loss": 11.6821, "step": 43861 }, { "epoch": 0.9181528929079795, "grad_norm": 0.34880882501602173, "learning_rate": 0.0001572675087313212, "loss": 11.6678, "step": 43862 }, { "epoch": 0.9181738256719417, "grad_norm": 0.31182897090911865, "learning_rate": 0.00015726571131761353, "loss": 11.6648, "step": 43863 }, { "epoch": 0.9181947584359039, "grad_norm": 0.2935999929904938, "learning_rate": 0.0001572639138763771, "loss": 11.6621, "step": 43864 }, { "epoch": 0.918215691199866, "grad_norm": 0.30716437101364136, "learning_rate": 0.00015726211640761274, "loss": 11.6775, "step": 43865 }, { "epoch": 0.9182366239638282, "grad_norm": 0.3312935531139374, "learning_rate": 0.00015726031891132134, "loss": 11.6695, "step": 43866 }, { "epoch": 0.9182575567277903, "grad_norm": 0.3729814291000366, "learning_rate": 0.0001572585213875038, "loss": 11.6646, "step": 43867 }, { "epoch": 0.9182784894917525, "grad_norm": 0.31784629821777344, "learning_rate": 0.00015725672383616086, "loss": 11.6722, "step": 43868 }, { "epoch": 0.9182994222557146, "grad_norm": 0.28422462940216064, "learning_rate": 0.0001572549262572935, "loss": 11.6638, "step": 43869 }, { "epoch": 0.9183203550196768, "grad_norm": 0.3694569170475006, "learning_rate": 0.00015725312865090261, "loss": 11.6415, "step": 43870 }, { "epoch": 0.918341287783639, "grad_norm": 0.3307916820049286, "learning_rate": 0.00015725133101698893, "loss": 11.6598, "step": 43871 }, { "epoch": 0.9183622205476011, "grad_norm": 0.33848366141319275, "learning_rate": 0.0001572495333555534, "loss": 11.6658, "step": 43872 }, { "epoch": 0.9183831533115633, "grad_norm": 0.4120078682899475, "learning_rate": 0.00015724773566659685, "loss": 11.652, "step": 43873 }, { "epoch": 0.9184040860755254, "grad_norm": 0.3045220971107483, "learning_rate": 0.00015724593795012022, "loss": 11.6707, "step": 43874 }, { "epoch": 0.9184250188394876, "grad_norm": 0.3222894072532654, "learning_rate": 0.00015724414020612429, "loss": 11.6716, "step": 43875 }, { "epoch": 0.9184459516034498, "grad_norm": 0.43658971786499023, "learning_rate": 0.00015724234243460995, "loss": 11.6706, "step": 43876 }, { "epoch": 0.9184668843674119, "grad_norm": 0.2857574224472046, "learning_rate": 0.00015724054463557807, "loss": 11.6768, "step": 43877 }, { "epoch": 0.9184878171313741, "grad_norm": 0.2618759870529175, "learning_rate": 0.0001572387468090295, "loss": 11.6709, "step": 43878 }, { "epoch": 0.9185087498953362, "grad_norm": 0.3600783050060272, "learning_rate": 0.00015723694895496514, "loss": 11.6838, "step": 43879 }, { "epoch": 0.9185296826592984, "grad_norm": 0.2945992946624756, "learning_rate": 0.00015723515107338582, "loss": 11.6784, "step": 43880 }, { "epoch": 0.9185506154232604, "grad_norm": 0.37358060479164124, "learning_rate": 0.0001572333531642924, "loss": 11.6652, "step": 43881 }, { "epoch": 0.9185715481872226, "grad_norm": 0.338516503572464, "learning_rate": 0.00015723155522768582, "loss": 11.651, "step": 43882 }, { "epoch": 0.9185924809511848, "grad_norm": 0.26914510130882263, "learning_rate": 0.00015722975726356684, "loss": 11.6706, "step": 43883 }, { "epoch": 0.9186134137151469, "grad_norm": 0.30828237533569336, "learning_rate": 0.0001572279592719364, "loss": 11.689, "step": 43884 }, { "epoch": 0.9186343464791091, "grad_norm": 0.2666729986667633, "learning_rate": 0.00015722616125279527, "loss": 11.6628, "step": 43885 }, { "epoch": 0.9186552792430712, "grad_norm": 0.3630691468715668, "learning_rate": 0.0001572243632061444, "loss": 11.6745, "step": 43886 }, { "epoch": 0.9186762120070334, "grad_norm": 0.3627360761165619, "learning_rate": 0.00015722256513198469, "loss": 11.6734, "step": 43887 }, { "epoch": 0.9186971447709955, "grad_norm": 0.3105489909648895, "learning_rate": 0.00015722076703031688, "loss": 11.6752, "step": 43888 }, { "epoch": 0.9187180775349577, "grad_norm": 0.2957979738712311, "learning_rate": 0.00015721896890114195, "loss": 11.6652, "step": 43889 }, { "epoch": 0.9187390102989199, "grad_norm": 0.3991003930568695, "learning_rate": 0.00015721717074446064, "loss": 11.6611, "step": 43890 }, { "epoch": 0.918759943062882, "grad_norm": 0.2802274525165558, "learning_rate": 0.00015721537256027394, "loss": 11.6598, "step": 43891 }, { "epoch": 0.9187808758268442, "grad_norm": 0.2657528817653656, "learning_rate": 0.0001572135743485827, "loss": 11.6698, "step": 43892 }, { "epoch": 0.9188018085908063, "grad_norm": 0.2641509473323822, "learning_rate": 0.00015721177610938768, "loss": 11.6455, "step": 43893 }, { "epoch": 0.9188227413547685, "grad_norm": 0.3357328772544861, "learning_rate": 0.00015720997784268988, "loss": 11.6597, "step": 43894 }, { "epoch": 0.9188436741187307, "grad_norm": 0.2988656461238861, "learning_rate": 0.00015720817954849005, "loss": 11.6788, "step": 43895 }, { "epoch": 0.9188646068826928, "grad_norm": 0.560501217842102, "learning_rate": 0.00015720638122678914, "loss": 11.6836, "step": 43896 }, { "epoch": 0.918885539646655, "grad_norm": 0.3416864275932312, "learning_rate": 0.00015720458287758792, "loss": 11.6794, "step": 43897 }, { "epoch": 0.9189064724106171, "grad_norm": 0.3401821553707123, "learning_rate": 0.00015720278450088732, "loss": 11.683, "step": 43898 }, { "epoch": 0.9189274051745793, "grad_norm": 0.2686355710029602, "learning_rate": 0.00015720098609668822, "loss": 11.6529, "step": 43899 }, { "epoch": 0.9189483379385414, "grad_norm": 0.3074344992637634, "learning_rate": 0.00015719918766499147, "loss": 11.675, "step": 43900 }, { "epoch": 0.9189692707025036, "grad_norm": 0.31304624676704407, "learning_rate": 0.00015719738920579789, "loss": 11.682, "step": 43901 }, { "epoch": 0.9189902034664658, "grad_norm": 0.3448471426963806, "learning_rate": 0.00015719559071910842, "loss": 11.6538, "step": 43902 }, { "epoch": 0.9190111362304278, "grad_norm": 0.27805182337760925, "learning_rate": 0.00015719379220492384, "loss": 11.6709, "step": 43903 }, { "epoch": 0.91903206899439, "grad_norm": 0.28883686661720276, "learning_rate": 0.00015719199366324508, "loss": 11.6553, "step": 43904 }, { "epoch": 0.9190530017583521, "grad_norm": 0.2817962169647217, "learning_rate": 0.00015719019509407298, "loss": 11.663, "step": 43905 }, { "epoch": 0.9190739345223143, "grad_norm": 0.3377228081226349, "learning_rate": 0.0001571883964974084, "loss": 11.6618, "step": 43906 }, { "epoch": 0.9190948672862764, "grad_norm": 0.331948846578598, "learning_rate": 0.00015718659787325223, "loss": 11.6751, "step": 43907 }, { "epoch": 0.9191158000502386, "grad_norm": 0.373369038105011, "learning_rate": 0.00015718479922160528, "loss": 11.6723, "step": 43908 }, { "epoch": 0.9191367328142008, "grad_norm": 0.2759546637535095, "learning_rate": 0.0001571830005424685, "loss": 11.6606, "step": 43909 }, { "epoch": 0.9191576655781629, "grad_norm": 0.2522091567516327, "learning_rate": 0.0001571812018358427, "loss": 11.6578, "step": 43910 }, { "epoch": 0.9191785983421251, "grad_norm": 0.2955876588821411, "learning_rate": 0.00015717940310172872, "loss": 11.6729, "step": 43911 }, { "epoch": 0.9191995311060872, "grad_norm": 0.3233311176300049, "learning_rate": 0.00015717760434012744, "loss": 11.6736, "step": 43912 }, { "epoch": 0.9192204638700494, "grad_norm": 0.36741065979003906, "learning_rate": 0.00015717580555103976, "loss": 11.6531, "step": 43913 }, { "epoch": 0.9192413966340116, "grad_norm": 0.2874838411808014, "learning_rate": 0.00015717400673446655, "loss": 11.6812, "step": 43914 }, { "epoch": 0.9192623293979737, "grad_norm": 0.34272754192352295, "learning_rate": 0.00015717220789040863, "loss": 11.6692, "step": 43915 }, { "epoch": 0.9192832621619359, "grad_norm": 0.3124142587184906, "learning_rate": 0.0001571704090188669, "loss": 11.6537, "step": 43916 }, { "epoch": 0.919304194925898, "grad_norm": 0.2951606512069702, "learning_rate": 0.0001571686101198422, "loss": 11.6853, "step": 43917 }, { "epoch": 0.9193251276898602, "grad_norm": 0.29023313522338867, "learning_rate": 0.00015716681119333538, "loss": 11.6842, "step": 43918 }, { "epoch": 0.9193460604538223, "grad_norm": 0.3387458324432373, "learning_rate": 0.00015716501223934733, "loss": 11.667, "step": 43919 }, { "epoch": 0.9193669932177845, "grad_norm": 0.34794384241104126, "learning_rate": 0.00015716321325787895, "loss": 11.6682, "step": 43920 }, { "epoch": 0.9193879259817467, "grad_norm": 0.3041151762008667, "learning_rate": 0.00015716141424893104, "loss": 11.6706, "step": 43921 }, { "epoch": 0.9194088587457088, "grad_norm": 0.26782122254371643, "learning_rate": 0.0001571596152125045, "loss": 11.6776, "step": 43922 }, { "epoch": 0.919429791509671, "grad_norm": 0.2695791721343994, "learning_rate": 0.0001571578161486002, "loss": 11.6592, "step": 43923 }, { "epoch": 0.9194507242736331, "grad_norm": 0.26033949851989746, "learning_rate": 0.00015715601705721896, "loss": 11.6766, "step": 43924 }, { "epoch": 0.9194716570375953, "grad_norm": 0.3135157525539398, "learning_rate": 0.0001571542179383617, "loss": 11.67, "step": 43925 }, { "epoch": 0.9194925898015573, "grad_norm": 0.31220191717147827, "learning_rate": 0.00015715241879202924, "loss": 11.6764, "step": 43926 }, { "epoch": 0.9195135225655195, "grad_norm": 0.30913132429122925, "learning_rate": 0.0001571506196182225, "loss": 11.6618, "step": 43927 }, { "epoch": 0.9195344553294817, "grad_norm": 0.2854368984699249, "learning_rate": 0.0001571488204169423, "loss": 11.6614, "step": 43928 }, { "epoch": 0.9195553880934438, "grad_norm": 0.31012919545173645, "learning_rate": 0.0001571470211881895, "loss": 11.6635, "step": 43929 }, { "epoch": 0.919576320857406, "grad_norm": 0.36215463280677795, "learning_rate": 0.000157145221931965, "loss": 11.6774, "step": 43930 }, { "epoch": 0.9195972536213681, "grad_norm": 0.3724539875984192, "learning_rate": 0.00015714342264826965, "loss": 11.669, "step": 43931 }, { "epoch": 0.9196181863853303, "grad_norm": 0.350228488445282, "learning_rate": 0.00015714162333710428, "loss": 11.6481, "step": 43932 }, { "epoch": 0.9196391191492925, "grad_norm": 0.4124068021774292, "learning_rate": 0.0001571398239984698, "loss": 11.6899, "step": 43933 }, { "epoch": 0.9196600519132546, "grad_norm": 0.4127526879310608, "learning_rate": 0.00015713802463236708, "loss": 11.6643, "step": 43934 }, { "epoch": 0.9196809846772168, "grad_norm": 0.31407076120376587, "learning_rate": 0.00015713622523879698, "loss": 11.6632, "step": 43935 }, { "epoch": 0.9197019174411789, "grad_norm": 0.2974019944667816, "learning_rate": 0.00015713442581776033, "loss": 11.6638, "step": 43936 }, { "epoch": 0.9197228502051411, "grad_norm": 0.273380309343338, "learning_rate": 0.00015713262636925801, "loss": 11.6608, "step": 43937 }, { "epoch": 0.9197437829691032, "grad_norm": 0.301712304353714, "learning_rate": 0.0001571308268932909, "loss": 11.6626, "step": 43938 }, { "epoch": 0.9197647157330654, "grad_norm": 0.2639198303222656, "learning_rate": 0.00015712902738985985, "loss": 11.6766, "step": 43939 }, { "epoch": 0.9197856484970276, "grad_norm": 0.2562299966812134, "learning_rate": 0.00015712722785896573, "loss": 11.6674, "step": 43940 }, { "epoch": 0.9198065812609897, "grad_norm": 0.3349805474281311, "learning_rate": 0.0001571254283006094, "loss": 11.6804, "step": 43941 }, { "epoch": 0.9198275140249519, "grad_norm": 0.784683108329773, "learning_rate": 0.00015712362871479177, "loss": 11.5835, "step": 43942 }, { "epoch": 0.919848446788914, "grad_norm": 0.3504433333873749, "learning_rate": 0.00015712182910151363, "loss": 11.6699, "step": 43943 }, { "epoch": 0.9198693795528762, "grad_norm": 0.3140551447868347, "learning_rate": 0.00015712002946077587, "loss": 11.6785, "step": 43944 }, { "epoch": 0.9198903123168383, "grad_norm": 0.33494725823402405, "learning_rate": 0.00015711822979257942, "loss": 11.6618, "step": 43945 }, { "epoch": 0.9199112450808005, "grad_norm": 0.3229641616344452, "learning_rate": 0.00015711643009692504, "loss": 11.6648, "step": 43946 }, { "epoch": 0.9199321778447627, "grad_norm": 0.32633382081985474, "learning_rate": 0.0001571146303738137, "loss": 11.6619, "step": 43947 }, { "epoch": 0.9199531106087248, "grad_norm": 0.2939213514328003, "learning_rate": 0.00015711283062324615, "loss": 11.6852, "step": 43948 }, { "epoch": 0.919974043372687, "grad_norm": 0.38355761766433716, "learning_rate": 0.00015711103084522336, "loss": 11.6843, "step": 43949 }, { "epoch": 0.919994976136649, "grad_norm": 0.31971925497055054, "learning_rate": 0.00015710923103974615, "loss": 11.6752, "step": 43950 }, { "epoch": 0.9200159089006112, "grad_norm": 0.25644171237945557, "learning_rate": 0.0001571074312068154, "loss": 11.6657, "step": 43951 }, { "epoch": 0.9200368416645733, "grad_norm": 0.3055095970630646, "learning_rate": 0.00015710563134643192, "loss": 11.6666, "step": 43952 }, { "epoch": 0.9200577744285355, "grad_norm": 0.31927135586738586, "learning_rate": 0.00015710383145859665, "loss": 11.6744, "step": 43953 }, { "epoch": 0.9200787071924977, "grad_norm": 0.32763364911079407, "learning_rate": 0.0001571020315433104, "loss": 11.6506, "step": 43954 }, { "epoch": 0.9200996399564598, "grad_norm": 0.27808207273483276, "learning_rate": 0.0001571002316005741, "loss": 11.6403, "step": 43955 }, { "epoch": 0.920120572720422, "grad_norm": 0.33737045526504517, "learning_rate": 0.00015709843163038854, "loss": 11.6702, "step": 43956 }, { "epoch": 0.9201415054843841, "grad_norm": 0.3501201868057251, "learning_rate": 0.00015709663163275463, "loss": 11.6803, "step": 43957 }, { "epoch": 0.9201624382483463, "grad_norm": 0.274367094039917, "learning_rate": 0.00015709483160767325, "loss": 11.6604, "step": 43958 }, { "epoch": 0.9201833710123085, "grad_norm": 0.29229244589805603, "learning_rate": 0.00015709303155514518, "loss": 11.6896, "step": 43959 }, { "epoch": 0.9202043037762706, "grad_norm": 0.2924267649650574, "learning_rate": 0.0001570912314751714, "loss": 11.6763, "step": 43960 }, { "epoch": 0.9202252365402328, "grad_norm": 0.3104337453842163, "learning_rate": 0.0001570894313677527, "loss": 11.6677, "step": 43961 }, { "epoch": 0.9202461693041949, "grad_norm": 0.3049481213092804, "learning_rate": 0.00015708763123288997, "loss": 11.6631, "step": 43962 }, { "epoch": 0.9202671020681571, "grad_norm": 0.3526439070701599, "learning_rate": 0.00015708583107058405, "loss": 11.6633, "step": 43963 }, { "epoch": 0.9202880348321192, "grad_norm": 0.353382408618927, "learning_rate": 0.00015708403088083585, "loss": 11.6869, "step": 43964 }, { "epoch": 0.9203089675960814, "grad_norm": 0.2973499298095703, "learning_rate": 0.00015708223066364624, "loss": 11.6722, "step": 43965 }, { "epoch": 0.9203299003600436, "grad_norm": 0.3469234108924866, "learning_rate": 0.000157080430419016, "loss": 11.6883, "step": 43966 }, { "epoch": 0.9203508331240057, "grad_norm": 0.3668590784072876, "learning_rate": 0.00015707863014694607, "loss": 11.6606, "step": 43967 }, { "epoch": 0.9203717658879679, "grad_norm": 0.29372867941856384, "learning_rate": 0.00015707682984743732, "loss": 11.6487, "step": 43968 }, { "epoch": 0.92039269865193, "grad_norm": 0.33915093541145325, "learning_rate": 0.00015707502952049058, "loss": 11.6519, "step": 43969 }, { "epoch": 0.9204136314158922, "grad_norm": 0.3194781243801117, "learning_rate": 0.00015707322916610675, "loss": 11.6842, "step": 43970 }, { "epoch": 0.9204345641798543, "grad_norm": 0.39302903413772583, "learning_rate": 0.00015707142878428664, "loss": 11.6728, "step": 43971 }, { "epoch": 0.9204554969438165, "grad_norm": 0.27598437666893005, "learning_rate": 0.00015706962837503118, "loss": 11.672, "step": 43972 }, { "epoch": 0.9204764297077787, "grad_norm": 0.32928964495658875, "learning_rate": 0.0001570678279383412, "loss": 11.6551, "step": 43973 }, { "epoch": 0.9204973624717407, "grad_norm": 0.28568723797798157, "learning_rate": 0.00015706602747421754, "loss": 11.6666, "step": 43974 }, { "epoch": 0.9205182952357029, "grad_norm": 0.22332078218460083, "learning_rate": 0.00015706422698266113, "loss": 11.6401, "step": 43975 }, { "epoch": 0.920539227999665, "grad_norm": 0.2945877015590668, "learning_rate": 0.0001570624264636728, "loss": 11.6515, "step": 43976 }, { "epoch": 0.9205601607636272, "grad_norm": 0.3271581828594208, "learning_rate": 0.0001570606259172534, "loss": 11.6754, "step": 43977 }, { "epoch": 0.9205810935275894, "grad_norm": 0.3909124732017517, "learning_rate": 0.00015705882534340385, "loss": 11.675, "step": 43978 }, { "epoch": 0.9206020262915515, "grad_norm": 0.2589664161205292, "learning_rate": 0.00015705702474212493, "loss": 11.6609, "step": 43979 }, { "epoch": 0.9206229590555137, "grad_norm": 0.3605753183364868, "learning_rate": 0.0001570552241134176, "loss": 11.6709, "step": 43980 }, { "epoch": 0.9206438918194758, "grad_norm": 0.4153813123703003, "learning_rate": 0.00015705342345728266, "loss": 11.6885, "step": 43981 }, { "epoch": 0.920664824583438, "grad_norm": 0.32427939772605896, "learning_rate": 0.000157051622773721, "loss": 11.6982, "step": 43982 }, { "epoch": 0.9206857573474001, "grad_norm": 0.2769758999347687, "learning_rate": 0.00015704982206273346, "loss": 11.6773, "step": 43983 }, { "epoch": 0.9207066901113623, "grad_norm": 0.38053542375564575, "learning_rate": 0.00015704802132432096, "loss": 11.6805, "step": 43984 }, { "epoch": 0.9207276228753245, "grad_norm": 0.32018527388572693, "learning_rate": 0.0001570462205584843, "loss": 11.6701, "step": 43985 }, { "epoch": 0.9207485556392866, "grad_norm": 0.4243864119052887, "learning_rate": 0.0001570444197652244, "loss": 11.6677, "step": 43986 }, { "epoch": 0.9207694884032488, "grad_norm": 0.4033975899219513, "learning_rate": 0.0001570426189445421, "loss": 11.6604, "step": 43987 }, { "epoch": 0.9207904211672109, "grad_norm": 0.31477057933807373, "learning_rate": 0.00015704081809643828, "loss": 11.659, "step": 43988 }, { "epoch": 0.9208113539311731, "grad_norm": 0.3107859790325165, "learning_rate": 0.00015703901722091378, "loss": 11.6707, "step": 43989 }, { "epoch": 0.9208322866951352, "grad_norm": 0.2901911735534668, "learning_rate": 0.0001570372163179695, "loss": 11.6829, "step": 43990 }, { "epoch": 0.9208532194590974, "grad_norm": 0.39394906163215637, "learning_rate": 0.00015703541538760626, "loss": 11.6754, "step": 43991 }, { "epoch": 0.9208741522230596, "grad_norm": 0.3767986297607422, "learning_rate": 0.00015703361442982497, "loss": 11.6668, "step": 43992 }, { "epoch": 0.9208950849870217, "grad_norm": 0.3325771391391754, "learning_rate": 0.00015703181344462648, "loss": 11.6651, "step": 43993 }, { "epoch": 0.9209160177509839, "grad_norm": 0.3641984164714813, "learning_rate": 0.00015703001243201164, "loss": 11.6809, "step": 43994 }, { "epoch": 0.920936950514946, "grad_norm": 0.3127959072589874, "learning_rate": 0.00015702821139198134, "loss": 11.6508, "step": 43995 }, { "epoch": 0.9209578832789082, "grad_norm": 0.28869888186454773, "learning_rate": 0.00015702641032453644, "loss": 11.6656, "step": 43996 }, { "epoch": 0.9209788160428704, "grad_norm": 0.3011930286884308, "learning_rate": 0.00015702460922967782, "loss": 11.6617, "step": 43997 }, { "epoch": 0.9209997488068324, "grad_norm": 0.3528185486793518, "learning_rate": 0.00015702280810740628, "loss": 11.6797, "step": 43998 }, { "epoch": 0.9210206815707946, "grad_norm": 0.25321757793426514, "learning_rate": 0.00015702100695772278, "loss": 11.6696, "step": 43999 }, { "epoch": 0.9210416143347567, "grad_norm": 0.4616515338420868, "learning_rate": 0.00015701920578062813, "loss": 11.6689, "step": 44000 }, { "epoch": 0.9210416143347567, "eval_loss": 11.669382095336914, "eval_runtime": 34.3835, "eval_samples_per_second": 27.949, "eval_steps_per_second": 7.009, "step": 44000 }, { "epoch": 0.9210625470987189, "grad_norm": 0.34906435012817383, "learning_rate": 0.00015701740457612316, "loss": 11.6572, "step": 44001 }, { "epoch": 0.921083479862681, "grad_norm": 0.26407358050346375, "learning_rate": 0.0001570156033442088, "loss": 11.6689, "step": 44002 }, { "epoch": 0.9211044126266432, "grad_norm": 0.33444744348526, "learning_rate": 0.00015701380208488593, "loss": 11.6725, "step": 44003 }, { "epoch": 0.9211253453906054, "grad_norm": 0.30905044078826904, "learning_rate": 0.0001570120007981554, "loss": 11.6749, "step": 44004 }, { "epoch": 0.9211462781545675, "grad_norm": 0.34216901659965515, "learning_rate": 0.00015701019948401797, "loss": 11.6839, "step": 44005 }, { "epoch": 0.9211672109185297, "grad_norm": 0.4149874746799469, "learning_rate": 0.00015700839814247468, "loss": 11.6922, "step": 44006 }, { "epoch": 0.9211881436824918, "grad_norm": 0.33064860105514526, "learning_rate": 0.00015700659677352627, "loss": 11.6658, "step": 44007 }, { "epoch": 0.921209076446454, "grad_norm": 0.352148175239563, "learning_rate": 0.00015700479537717366, "loss": 11.6414, "step": 44008 }, { "epoch": 0.9212300092104161, "grad_norm": 0.24222484230995178, "learning_rate": 0.0001570029939534177, "loss": 11.6755, "step": 44009 }, { "epoch": 0.9212509419743783, "grad_norm": 0.33000025153160095, "learning_rate": 0.00015700119250225923, "loss": 11.6811, "step": 44010 }, { "epoch": 0.9212718747383405, "grad_norm": 0.27588796615600586, "learning_rate": 0.00015699939102369918, "loss": 11.6592, "step": 44011 }, { "epoch": 0.9212928075023026, "grad_norm": 0.2938835024833679, "learning_rate": 0.00015699758951773836, "loss": 11.6747, "step": 44012 }, { "epoch": 0.9213137402662648, "grad_norm": 0.2769520878791809, "learning_rate": 0.00015699578798437767, "loss": 11.6659, "step": 44013 }, { "epoch": 0.9213346730302269, "grad_norm": 0.2445097714662552, "learning_rate": 0.00015699398642361797, "loss": 11.6592, "step": 44014 }, { "epoch": 0.9213556057941891, "grad_norm": 0.3007536828517914, "learning_rate": 0.00015699218483546012, "loss": 11.652, "step": 44015 }, { "epoch": 0.9213765385581513, "grad_norm": 0.44084402918815613, "learning_rate": 0.00015699038321990497, "loss": 11.6653, "step": 44016 }, { "epoch": 0.9213974713221134, "grad_norm": 0.37195420265197754, "learning_rate": 0.0001569885815769534, "loss": 11.6772, "step": 44017 }, { "epoch": 0.9214184040860756, "grad_norm": 0.39283356070518494, "learning_rate": 0.00015698677990660627, "loss": 11.6557, "step": 44018 }, { "epoch": 0.9214393368500376, "grad_norm": 0.28959745168685913, "learning_rate": 0.00015698497820886446, "loss": 11.6774, "step": 44019 }, { "epoch": 0.9214602696139998, "grad_norm": 0.34410208463668823, "learning_rate": 0.00015698317648372883, "loss": 11.6883, "step": 44020 }, { "epoch": 0.9214812023779619, "grad_norm": 0.4960981607437134, "learning_rate": 0.00015698137473120025, "loss": 11.6799, "step": 44021 }, { "epoch": 0.9215021351419241, "grad_norm": 0.275638610124588, "learning_rate": 0.0001569795729512796, "loss": 11.6629, "step": 44022 }, { "epoch": 0.9215230679058863, "grad_norm": 0.33395031094551086, "learning_rate": 0.00015697777114396768, "loss": 11.6578, "step": 44023 }, { "epoch": 0.9215440006698484, "grad_norm": 0.29767683148384094, "learning_rate": 0.00015697596930926543, "loss": 11.6673, "step": 44024 }, { "epoch": 0.9215649334338106, "grad_norm": 0.2841915190219879, "learning_rate": 0.00015697416744717369, "loss": 11.6701, "step": 44025 }, { "epoch": 0.9215858661977727, "grad_norm": 0.25984466075897217, "learning_rate": 0.00015697236555769334, "loss": 11.6682, "step": 44026 }, { "epoch": 0.9216067989617349, "grad_norm": 0.3324401080608368, "learning_rate": 0.0001569705636408252, "loss": 11.6751, "step": 44027 }, { "epoch": 0.921627731725697, "grad_norm": 0.2475070357322693, "learning_rate": 0.0001569687616965702, "loss": 11.6645, "step": 44028 }, { "epoch": 0.9216486644896592, "grad_norm": 0.27602508664131165, "learning_rate": 0.00015696695972492917, "loss": 11.655, "step": 44029 }, { "epoch": 0.9216695972536214, "grad_norm": 0.35103386640548706, "learning_rate": 0.00015696515772590293, "loss": 11.6707, "step": 44030 }, { "epoch": 0.9216905300175835, "grad_norm": 0.30950644612312317, "learning_rate": 0.00015696335569949246, "loss": 11.664, "step": 44031 }, { "epoch": 0.9217114627815457, "grad_norm": 0.2934751808643341, "learning_rate": 0.00015696155364569854, "loss": 11.675, "step": 44032 }, { "epoch": 0.9217323955455078, "grad_norm": 0.31055596470832825, "learning_rate": 0.00015695975156452207, "loss": 11.6747, "step": 44033 }, { "epoch": 0.92175332830947, "grad_norm": 0.32200121879577637, "learning_rate": 0.0001569579494559639, "loss": 11.6507, "step": 44034 }, { "epoch": 0.9217742610734322, "grad_norm": 0.2749410569667816, "learning_rate": 0.0001569561473200249, "loss": 11.6615, "step": 44035 }, { "epoch": 0.9217951938373943, "grad_norm": 0.2863479554653168, "learning_rate": 0.00015695434515670593, "loss": 11.6654, "step": 44036 }, { "epoch": 0.9218161266013565, "grad_norm": 0.42193493247032166, "learning_rate": 0.00015695254296600787, "loss": 11.6587, "step": 44037 }, { "epoch": 0.9218370593653186, "grad_norm": 0.356929212808609, "learning_rate": 0.0001569507407479316, "loss": 11.6931, "step": 44038 }, { "epoch": 0.9218579921292808, "grad_norm": 0.3014066815376282, "learning_rate": 0.00015694893850247795, "loss": 11.6696, "step": 44039 }, { "epoch": 0.9218789248932429, "grad_norm": 0.26196321845054626, "learning_rate": 0.0001569471362296478, "loss": 11.6764, "step": 44040 }, { "epoch": 0.9218998576572051, "grad_norm": 0.3541502356529236, "learning_rate": 0.000156945333929442, "loss": 11.6712, "step": 44041 }, { "epoch": 0.9219207904211673, "grad_norm": 0.2808982729911804, "learning_rate": 0.00015694353160186148, "loss": 11.6689, "step": 44042 }, { "epoch": 0.9219417231851293, "grad_norm": 0.31442224979400635, "learning_rate": 0.00015694172924690704, "loss": 11.6692, "step": 44043 }, { "epoch": 0.9219626559490915, "grad_norm": 0.25682273507118225, "learning_rate": 0.00015693992686457956, "loss": 11.6556, "step": 44044 }, { "epoch": 0.9219835887130536, "grad_norm": 0.30187153816223145, "learning_rate": 0.00015693812445487994, "loss": 11.6622, "step": 44045 }, { "epoch": 0.9220045214770158, "grad_norm": 0.3002670407295227, "learning_rate": 0.000156936322017809, "loss": 11.6727, "step": 44046 }, { "epoch": 0.9220254542409779, "grad_norm": 0.32150763273239136, "learning_rate": 0.00015693451955336765, "loss": 11.677, "step": 44047 }, { "epoch": 0.9220463870049401, "grad_norm": 0.3510052561759949, "learning_rate": 0.00015693271706155673, "loss": 11.6726, "step": 44048 }, { "epoch": 0.9220673197689023, "grad_norm": 0.2658315598964691, "learning_rate": 0.0001569309145423771, "loss": 11.678, "step": 44049 }, { "epoch": 0.9220882525328644, "grad_norm": 0.2850986421108246, "learning_rate": 0.0001569291119958297, "loss": 11.664, "step": 44050 }, { "epoch": 0.9221091852968266, "grad_norm": 0.34350332617759705, "learning_rate": 0.00015692730942191524, "loss": 11.6686, "step": 44051 }, { "epoch": 0.9221301180607887, "grad_norm": 0.3188154101371765, "learning_rate": 0.00015692550682063473, "loss": 11.6579, "step": 44052 }, { "epoch": 0.9221510508247509, "grad_norm": 0.29545778036117554, "learning_rate": 0.000156923704191989, "loss": 11.67, "step": 44053 }, { "epoch": 0.9221719835887131, "grad_norm": 0.22213682532310486, "learning_rate": 0.0001569219015359789, "loss": 11.6597, "step": 44054 }, { "epoch": 0.9221929163526752, "grad_norm": 0.32599377632141113, "learning_rate": 0.00015692009885260528, "loss": 11.6596, "step": 44055 }, { "epoch": 0.9222138491166374, "grad_norm": 0.3346536457538605, "learning_rate": 0.000156918296141869, "loss": 11.6757, "step": 44056 }, { "epoch": 0.9222347818805995, "grad_norm": 1.626591444015503, "learning_rate": 0.00015691649340377102, "loss": 11.6436, "step": 44057 }, { "epoch": 0.9222557146445617, "grad_norm": 0.3285478353500366, "learning_rate": 0.0001569146906383121, "loss": 11.6679, "step": 44058 }, { "epoch": 0.9222766474085238, "grad_norm": 0.27768266201019287, "learning_rate": 0.00015691288784549316, "loss": 11.6775, "step": 44059 }, { "epoch": 0.922297580172486, "grad_norm": 0.306852251291275, "learning_rate": 0.00015691108502531508, "loss": 11.6575, "step": 44060 }, { "epoch": 0.9223185129364482, "grad_norm": 0.4359630346298218, "learning_rate": 0.00015690928217777867, "loss": 11.6771, "step": 44061 }, { "epoch": 0.9223394457004103, "grad_norm": 0.27253642678260803, "learning_rate": 0.0001569074793028848, "loss": 11.6804, "step": 44062 }, { "epoch": 0.9223603784643725, "grad_norm": 0.40261030197143555, "learning_rate": 0.00015690567640063444, "loss": 11.675, "step": 44063 }, { "epoch": 0.9223813112283346, "grad_norm": 0.3178529143333435, "learning_rate": 0.00015690387347102832, "loss": 11.6851, "step": 44064 }, { "epoch": 0.9224022439922968, "grad_norm": 0.28467652201652527, "learning_rate": 0.0001569020705140674, "loss": 11.6842, "step": 44065 }, { "epoch": 0.9224231767562588, "grad_norm": 0.34254133701324463, "learning_rate": 0.0001569002675297525, "loss": 11.6847, "step": 44066 }, { "epoch": 0.922444109520221, "grad_norm": 0.3096683919429779, "learning_rate": 0.00015689846451808447, "loss": 11.6622, "step": 44067 }, { "epoch": 0.9224650422841832, "grad_norm": 0.271079957485199, "learning_rate": 0.00015689666147906424, "loss": 11.6736, "step": 44068 }, { "epoch": 0.9224859750481453, "grad_norm": 0.28564801812171936, "learning_rate": 0.00015689485841269266, "loss": 11.6661, "step": 44069 }, { "epoch": 0.9225069078121075, "grad_norm": 0.3567773103713989, "learning_rate": 0.00015689305531897056, "loss": 11.6835, "step": 44070 }, { "epoch": 0.9225278405760696, "grad_norm": 0.2928332984447479, "learning_rate": 0.00015689125219789884, "loss": 11.6715, "step": 44071 }, { "epoch": 0.9225487733400318, "grad_norm": 0.44483909010887146, "learning_rate": 0.00015688944904947833, "loss": 11.6751, "step": 44072 }, { "epoch": 0.922569706103994, "grad_norm": 0.3943156898021698, "learning_rate": 0.00015688764587370996, "loss": 11.683, "step": 44073 }, { "epoch": 0.9225906388679561, "grad_norm": 0.30592653155326843, "learning_rate": 0.00015688584267059454, "loss": 11.6644, "step": 44074 }, { "epoch": 0.9226115716319183, "grad_norm": 0.28415507078170776, "learning_rate": 0.00015688403944013297, "loss": 11.6682, "step": 44075 }, { "epoch": 0.9226325043958804, "grad_norm": 0.35384002327919006, "learning_rate": 0.00015688223618232606, "loss": 11.6691, "step": 44076 }, { "epoch": 0.9226534371598426, "grad_norm": 0.33241036534309387, "learning_rate": 0.00015688043289717476, "loss": 11.6844, "step": 44077 }, { "epoch": 0.9226743699238047, "grad_norm": 0.3094843924045563, "learning_rate": 0.00015687862958467988, "loss": 11.6659, "step": 44078 }, { "epoch": 0.9226953026877669, "grad_norm": 0.24571700394153595, "learning_rate": 0.00015687682624484231, "loss": 11.6835, "step": 44079 }, { "epoch": 0.9227162354517291, "grad_norm": 0.3249763548374176, "learning_rate": 0.0001568750228776629, "loss": 11.6742, "step": 44080 }, { "epoch": 0.9227371682156912, "grad_norm": 0.3003208041191101, "learning_rate": 0.00015687321948314252, "loss": 11.6755, "step": 44081 }, { "epoch": 0.9227581009796534, "grad_norm": 0.35946252942085266, "learning_rate": 0.00015687141606128205, "loss": 11.6643, "step": 44082 }, { "epoch": 0.9227790337436155, "grad_norm": 0.3186916708946228, "learning_rate": 0.00015686961261208236, "loss": 11.6743, "step": 44083 }, { "epoch": 0.9227999665075777, "grad_norm": 0.30856403708457947, "learning_rate": 0.0001568678091355443, "loss": 11.6809, "step": 44084 }, { "epoch": 0.9228208992715398, "grad_norm": 0.352690726518631, "learning_rate": 0.00015686600563166877, "loss": 11.6931, "step": 44085 }, { "epoch": 0.922841832035502, "grad_norm": 0.2985236346721649, "learning_rate": 0.00015686420210045656, "loss": 11.6565, "step": 44086 }, { "epoch": 0.9228627647994642, "grad_norm": 0.3421851098537445, "learning_rate": 0.00015686239854190864, "loss": 11.6733, "step": 44087 }, { "epoch": 0.9228836975634263, "grad_norm": 0.32752153277397156, "learning_rate": 0.00015686059495602578, "loss": 11.6811, "step": 44088 }, { "epoch": 0.9229046303273885, "grad_norm": 0.42487141489982605, "learning_rate": 0.00015685879134280892, "loss": 11.6852, "step": 44089 }, { "epoch": 0.9229255630913505, "grad_norm": 0.334839403629303, "learning_rate": 0.0001568569877022589, "loss": 11.6512, "step": 44090 }, { "epoch": 0.9229464958553127, "grad_norm": 0.3208150565624237, "learning_rate": 0.00015685518403437657, "loss": 11.6692, "step": 44091 }, { "epoch": 0.9229674286192749, "grad_norm": 0.32176488637924194, "learning_rate": 0.00015685338033916284, "loss": 11.6851, "step": 44092 }, { "epoch": 0.922988361383237, "grad_norm": 0.2778056561946869, "learning_rate": 0.00015685157661661853, "loss": 11.6678, "step": 44093 }, { "epoch": 0.9230092941471992, "grad_norm": 0.28846049308776855, "learning_rate": 0.0001568497728667445, "loss": 11.6644, "step": 44094 }, { "epoch": 0.9230302269111613, "grad_norm": 0.24893078207969666, "learning_rate": 0.00015684796908954168, "loss": 11.6603, "step": 44095 }, { "epoch": 0.9230511596751235, "grad_norm": 0.34469178318977356, "learning_rate": 0.00015684616528501092, "loss": 11.6683, "step": 44096 }, { "epoch": 0.9230720924390856, "grad_norm": 0.3064071238040924, "learning_rate": 0.00015684436145315303, "loss": 11.6776, "step": 44097 }, { "epoch": 0.9230930252030478, "grad_norm": 0.26696619391441345, "learning_rate": 0.00015684255759396896, "loss": 11.6832, "step": 44098 }, { "epoch": 0.92311395796701, "grad_norm": 0.4148605465888977, "learning_rate": 0.00015684075370745953, "loss": 11.6704, "step": 44099 }, { "epoch": 0.9231348907309721, "grad_norm": 0.35675451159477234, "learning_rate": 0.00015683894979362558, "loss": 11.6755, "step": 44100 }, { "epoch": 0.9231558234949343, "grad_norm": 0.30568259954452515, "learning_rate": 0.000156837145852468, "loss": 11.666, "step": 44101 }, { "epoch": 0.9231767562588964, "grad_norm": 0.29689618945121765, "learning_rate": 0.00015683534188398767, "loss": 11.6711, "step": 44102 }, { "epoch": 0.9231976890228586, "grad_norm": 0.26179325580596924, "learning_rate": 0.00015683353788818546, "loss": 11.6693, "step": 44103 }, { "epoch": 0.9232186217868207, "grad_norm": 0.31591475009918213, "learning_rate": 0.00015683173386506227, "loss": 11.6956, "step": 44104 }, { "epoch": 0.9232395545507829, "grad_norm": 0.3725108802318573, "learning_rate": 0.0001568299298146189, "loss": 11.6825, "step": 44105 }, { "epoch": 0.9232604873147451, "grad_norm": 0.29883715510368347, "learning_rate": 0.0001568281257368562, "loss": 11.6637, "step": 44106 }, { "epoch": 0.9232814200787072, "grad_norm": 0.27223658561706543, "learning_rate": 0.00015682632163177513, "loss": 11.6734, "step": 44107 }, { "epoch": 0.9233023528426694, "grad_norm": 0.22732290625572205, "learning_rate": 0.00015682451749937648, "loss": 11.6662, "step": 44108 }, { "epoch": 0.9233232856066315, "grad_norm": 0.3333984315395355, "learning_rate": 0.0001568227133396612, "loss": 11.6756, "step": 44109 }, { "epoch": 0.9233442183705937, "grad_norm": 0.2769971787929535, "learning_rate": 0.00015682090915263003, "loss": 11.6542, "step": 44110 }, { "epoch": 0.9233651511345559, "grad_norm": 0.35808128118515015, "learning_rate": 0.00015681910493828394, "loss": 11.6839, "step": 44111 }, { "epoch": 0.923386083898518, "grad_norm": 0.3121561110019684, "learning_rate": 0.00015681730069662378, "loss": 11.6685, "step": 44112 }, { "epoch": 0.9234070166624802, "grad_norm": 0.281848281621933, "learning_rate": 0.00015681549642765038, "loss": 11.6519, "step": 44113 }, { "epoch": 0.9234279494264422, "grad_norm": 0.3142133057117462, "learning_rate": 0.00015681369213136465, "loss": 11.6752, "step": 44114 }, { "epoch": 0.9234488821904044, "grad_norm": 0.23594923317432404, "learning_rate": 0.00015681188780776743, "loss": 11.6571, "step": 44115 }, { "epoch": 0.9234698149543665, "grad_norm": 0.3283517062664032, "learning_rate": 0.0001568100834568596, "loss": 11.6592, "step": 44116 }, { "epoch": 0.9234907477183287, "grad_norm": 0.2586197257041931, "learning_rate": 0.00015680827907864204, "loss": 11.6627, "step": 44117 }, { "epoch": 0.9235116804822909, "grad_norm": 0.26491308212280273, "learning_rate": 0.00015680647467311557, "loss": 11.6711, "step": 44118 }, { "epoch": 0.923532613246253, "grad_norm": 0.34525975584983826, "learning_rate": 0.00015680467024028113, "loss": 11.6728, "step": 44119 }, { "epoch": 0.9235535460102152, "grad_norm": 0.31201431155204773, "learning_rate": 0.00015680286578013952, "loss": 11.663, "step": 44120 }, { "epoch": 0.9235744787741773, "grad_norm": 0.3352384865283966, "learning_rate": 0.00015680106129269166, "loss": 11.6675, "step": 44121 }, { "epoch": 0.9235954115381395, "grad_norm": 0.33137667179107666, "learning_rate": 0.00015679925677793838, "loss": 11.6697, "step": 44122 }, { "epoch": 0.9236163443021016, "grad_norm": 0.267541766166687, "learning_rate": 0.00015679745223588052, "loss": 11.6561, "step": 44123 }, { "epoch": 0.9236372770660638, "grad_norm": 0.2704985439777374, "learning_rate": 0.00015679564766651901, "loss": 11.6735, "step": 44124 }, { "epoch": 0.923658209830026, "grad_norm": 0.25788214802742004, "learning_rate": 0.0001567938430698547, "loss": 11.6551, "step": 44125 }, { "epoch": 0.9236791425939881, "grad_norm": 0.25995996594429016, "learning_rate": 0.00015679203844588848, "loss": 11.66, "step": 44126 }, { "epoch": 0.9237000753579503, "grad_norm": 0.28383320569992065, "learning_rate": 0.00015679023379462114, "loss": 11.6715, "step": 44127 }, { "epoch": 0.9237210081219124, "grad_norm": 0.3914446532726288, "learning_rate": 0.0001567884291160536, "loss": 11.6904, "step": 44128 }, { "epoch": 0.9237419408858746, "grad_norm": 0.32090941071510315, "learning_rate": 0.00015678662441018676, "loss": 11.6712, "step": 44129 }, { "epoch": 0.9237628736498368, "grad_norm": 0.2754870355129242, "learning_rate": 0.00015678481967702144, "loss": 11.6686, "step": 44130 }, { "epoch": 0.9237838064137989, "grad_norm": 0.33185088634490967, "learning_rate": 0.00015678301491655851, "loss": 11.6559, "step": 44131 }, { "epoch": 0.9238047391777611, "grad_norm": 0.28052157163619995, "learning_rate": 0.00015678121012879884, "loss": 11.6648, "step": 44132 }, { "epoch": 0.9238256719417232, "grad_norm": 0.25919485092163086, "learning_rate": 0.00015677940531374333, "loss": 11.6625, "step": 44133 }, { "epoch": 0.9238466047056854, "grad_norm": 0.3101571500301361, "learning_rate": 0.0001567776004713928, "loss": 11.6716, "step": 44134 }, { "epoch": 0.9238675374696474, "grad_norm": 0.34089815616607666, "learning_rate": 0.00015677579560174817, "loss": 11.6811, "step": 44135 }, { "epoch": 0.9238884702336096, "grad_norm": 0.26095813512802124, "learning_rate": 0.00015677399070481026, "loss": 11.6568, "step": 44136 }, { "epoch": 0.9239094029975718, "grad_norm": 0.25593021512031555, "learning_rate": 0.0001567721857805799, "loss": 11.6587, "step": 44137 }, { "epoch": 0.9239303357615339, "grad_norm": 0.34035253524780273, "learning_rate": 0.00015677038082905807, "loss": 11.6616, "step": 44138 }, { "epoch": 0.9239512685254961, "grad_norm": 0.4372546672821045, "learning_rate": 0.00015676857585024558, "loss": 11.6668, "step": 44139 }, { "epoch": 0.9239722012894582, "grad_norm": 0.2832339406013489, "learning_rate": 0.0001567667708441433, "loss": 11.6677, "step": 44140 }, { "epoch": 0.9239931340534204, "grad_norm": 0.3910232186317444, "learning_rate": 0.00015676496581075209, "loss": 11.6703, "step": 44141 }, { "epoch": 0.9240140668173825, "grad_norm": 0.3148162066936493, "learning_rate": 0.0001567631607500728, "loss": 11.6678, "step": 44142 }, { "epoch": 0.9240349995813447, "grad_norm": 0.26428326964378357, "learning_rate": 0.00015676135566210635, "loss": 11.6654, "step": 44143 }, { "epoch": 0.9240559323453069, "grad_norm": 0.4457511603832245, "learning_rate": 0.00015675955054685357, "loss": 11.668, "step": 44144 }, { "epoch": 0.924076865109269, "grad_norm": 0.2533442974090576, "learning_rate": 0.0001567577454043153, "loss": 11.6354, "step": 44145 }, { "epoch": 0.9240977978732312, "grad_norm": 0.28000935912132263, "learning_rate": 0.00015675594023449252, "loss": 11.6464, "step": 44146 }, { "epoch": 0.9241187306371933, "grad_norm": 0.3318753242492676, "learning_rate": 0.00015675413503738597, "loss": 11.6505, "step": 44147 }, { "epoch": 0.9241396634011555, "grad_norm": 0.29472222924232483, "learning_rate": 0.00015675232981299657, "loss": 11.6522, "step": 44148 }, { "epoch": 0.9241605961651176, "grad_norm": 0.27237245440483093, "learning_rate": 0.0001567505245613252, "loss": 11.686, "step": 44149 }, { "epoch": 0.9241815289290798, "grad_norm": 0.28805282711982727, "learning_rate": 0.00015674871928237271, "loss": 11.6617, "step": 44150 }, { "epoch": 0.924202461693042, "grad_norm": 0.2634695768356323, "learning_rate": 0.00015674691397613998, "loss": 11.6689, "step": 44151 }, { "epoch": 0.9242233944570041, "grad_norm": 0.30274173617362976, "learning_rate": 0.00015674510864262787, "loss": 11.6778, "step": 44152 }, { "epoch": 0.9242443272209663, "grad_norm": 0.47766920924186707, "learning_rate": 0.00015674330328183724, "loss": 11.6581, "step": 44153 }, { "epoch": 0.9242652599849284, "grad_norm": 0.32134532928466797, "learning_rate": 0.00015674149789376897, "loss": 11.6843, "step": 44154 }, { "epoch": 0.9242861927488906, "grad_norm": 0.39831671118736267, "learning_rate": 0.00015673969247842395, "loss": 11.6717, "step": 44155 }, { "epoch": 0.9243071255128528, "grad_norm": 0.3463621139526367, "learning_rate": 0.000156737887035803, "loss": 11.6684, "step": 44156 }, { "epoch": 0.9243280582768149, "grad_norm": 0.3086106479167938, "learning_rate": 0.000156736081565907, "loss": 11.6798, "step": 44157 }, { "epoch": 0.9243489910407771, "grad_norm": 0.2799650728702545, "learning_rate": 0.00015673427606873686, "loss": 11.6843, "step": 44158 }, { "epoch": 0.9243699238047391, "grad_norm": 0.4801270365715027, "learning_rate": 0.00015673247054429337, "loss": 11.695, "step": 44159 }, { "epoch": 0.9243908565687013, "grad_norm": 0.258027046918869, "learning_rate": 0.0001567306649925775, "loss": 11.6563, "step": 44160 }, { "epoch": 0.9244117893326634, "grad_norm": 0.31972140073776245, "learning_rate": 0.00015672885941359, "loss": 11.6663, "step": 44161 }, { "epoch": 0.9244327220966256, "grad_norm": 0.32906413078308105, "learning_rate": 0.00015672705380733186, "loss": 11.6645, "step": 44162 }, { "epoch": 0.9244536548605878, "grad_norm": 0.3206632137298584, "learning_rate": 0.00015672524817380388, "loss": 11.6713, "step": 44163 }, { "epoch": 0.9244745876245499, "grad_norm": 0.3802807033061981, "learning_rate": 0.0001567234425130069, "loss": 11.6647, "step": 44164 }, { "epoch": 0.9244955203885121, "grad_norm": 0.29359152913093567, "learning_rate": 0.00015672163682494184, "loss": 11.6654, "step": 44165 }, { "epoch": 0.9245164531524742, "grad_norm": 0.3312274217605591, "learning_rate": 0.00015671983110960956, "loss": 11.6702, "step": 44166 }, { "epoch": 0.9245373859164364, "grad_norm": 0.31215009093284607, "learning_rate": 0.0001567180253670109, "loss": 11.664, "step": 44167 }, { "epoch": 0.9245583186803985, "grad_norm": 0.41926026344299316, "learning_rate": 0.00015671621959714678, "loss": 11.6617, "step": 44168 }, { "epoch": 0.9245792514443607, "grad_norm": 0.2865964472293854, "learning_rate": 0.000156714413800018, "loss": 11.6491, "step": 44169 }, { "epoch": 0.9246001842083229, "grad_norm": 0.2789888381958008, "learning_rate": 0.0001567126079756255, "loss": 11.6562, "step": 44170 }, { "epoch": 0.924621116972285, "grad_norm": 0.2766152024269104, "learning_rate": 0.0001567108021239701, "loss": 11.6634, "step": 44171 }, { "epoch": 0.9246420497362472, "grad_norm": 0.30597123503685, "learning_rate": 0.00015670899624505269, "loss": 11.6583, "step": 44172 }, { "epoch": 0.9246629825002093, "grad_norm": 0.3852574825286865, "learning_rate": 0.0001567071903388741, "loss": 11.681, "step": 44173 }, { "epoch": 0.9246839152641715, "grad_norm": 0.27154919505119324, "learning_rate": 0.00015670538440543528, "loss": 11.6754, "step": 44174 }, { "epoch": 0.9247048480281337, "grad_norm": 0.32625892758369446, "learning_rate": 0.000156703578444737, "loss": 11.6567, "step": 44175 }, { "epoch": 0.9247257807920958, "grad_norm": 0.31855401396751404, "learning_rate": 0.00015670177245678018, "loss": 11.6783, "step": 44176 }, { "epoch": 0.924746713556058, "grad_norm": 0.38782060146331787, "learning_rate": 0.0001566999664415657, "loss": 11.6517, "step": 44177 }, { "epoch": 0.9247676463200201, "grad_norm": 0.3187311589717865, "learning_rate": 0.00015669816039909438, "loss": 11.6563, "step": 44178 }, { "epoch": 0.9247885790839823, "grad_norm": 0.25719499588012695, "learning_rate": 0.00015669635432936712, "loss": 11.6722, "step": 44179 }, { "epoch": 0.9248095118479444, "grad_norm": 0.27550676465034485, "learning_rate": 0.00015669454823238484, "loss": 11.6544, "step": 44180 }, { "epoch": 0.9248304446119066, "grad_norm": 0.38308843970298767, "learning_rate": 0.0001566927421081483, "loss": 11.6741, "step": 44181 }, { "epoch": 0.9248513773758688, "grad_norm": 0.33339056372642517, "learning_rate": 0.00015669093595665844, "loss": 11.652, "step": 44182 }, { "epoch": 0.9248723101398308, "grad_norm": 0.2861212193965912, "learning_rate": 0.00015668912977791612, "loss": 11.6617, "step": 44183 }, { "epoch": 0.924893242903793, "grad_norm": 0.35975027084350586, "learning_rate": 0.00015668732357192219, "loss": 11.6742, "step": 44184 }, { "epoch": 0.9249141756677551, "grad_norm": 0.36438873410224915, "learning_rate": 0.0001566855173386775, "loss": 11.6561, "step": 44185 }, { "epoch": 0.9249351084317173, "grad_norm": 0.3472166359424591, "learning_rate": 0.000156683711078183, "loss": 11.6579, "step": 44186 }, { "epoch": 0.9249560411956794, "grad_norm": 0.31308507919311523, "learning_rate": 0.00015668190479043947, "loss": 11.6659, "step": 44187 }, { "epoch": 0.9249769739596416, "grad_norm": 0.3257826268672943, "learning_rate": 0.00015668009847544782, "loss": 11.7036, "step": 44188 }, { "epoch": 0.9249979067236038, "grad_norm": 0.3564029932022095, "learning_rate": 0.0001566782921332089, "loss": 11.6696, "step": 44189 }, { "epoch": 0.9250188394875659, "grad_norm": 0.3535609841346741, "learning_rate": 0.0001566764857637236, "loss": 11.6806, "step": 44190 }, { "epoch": 0.9250397722515281, "grad_norm": 0.3251197338104248, "learning_rate": 0.0001566746793669928, "loss": 11.6575, "step": 44191 }, { "epoch": 0.9250607050154902, "grad_norm": 0.3215789496898651, "learning_rate": 0.0001566728729430173, "loss": 11.6579, "step": 44192 }, { "epoch": 0.9250816377794524, "grad_norm": 0.3430045545101166, "learning_rate": 0.00015667106649179803, "loss": 11.6652, "step": 44193 }, { "epoch": 0.9251025705434146, "grad_norm": 0.39277905225753784, "learning_rate": 0.00015666926001333583, "loss": 11.6537, "step": 44194 }, { "epoch": 0.9251235033073767, "grad_norm": 0.32661741971969604, "learning_rate": 0.00015666745350763163, "loss": 11.6676, "step": 44195 }, { "epoch": 0.9251444360713389, "grad_norm": 0.3468235731124878, "learning_rate": 0.00015666564697468623, "loss": 11.6757, "step": 44196 }, { "epoch": 0.925165368835301, "grad_norm": 0.27073678374290466, "learning_rate": 0.0001566638404145005, "loss": 11.6591, "step": 44197 }, { "epoch": 0.9251863015992632, "grad_norm": 0.33907443284988403, "learning_rate": 0.00015666203382707532, "loss": 11.6757, "step": 44198 }, { "epoch": 0.9252072343632253, "grad_norm": 0.31073933839797974, "learning_rate": 0.00015666022721241157, "loss": 11.6581, "step": 44199 }, { "epoch": 0.9252281671271875, "grad_norm": 0.29287973046302795, "learning_rate": 0.00015665842057051014, "loss": 11.6671, "step": 44200 }, { "epoch": 0.9252490998911497, "grad_norm": 0.3510626554489136, "learning_rate": 0.00015665661390137185, "loss": 11.6642, "step": 44201 }, { "epoch": 0.9252700326551118, "grad_norm": 0.36143624782562256, "learning_rate": 0.0001566548072049976, "loss": 11.6817, "step": 44202 }, { "epoch": 0.925290965419074, "grad_norm": 0.37101873755455017, "learning_rate": 0.00015665300048138824, "loss": 11.6647, "step": 44203 }, { "epoch": 0.925311898183036, "grad_norm": 0.2591080069541931, "learning_rate": 0.00015665119373054466, "loss": 11.6693, "step": 44204 }, { "epoch": 0.9253328309469983, "grad_norm": 0.3839907646179199, "learning_rate": 0.0001566493869524677, "loss": 11.6601, "step": 44205 }, { "epoch": 0.9253537637109603, "grad_norm": 0.28146353363990784, "learning_rate": 0.00015664758014715825, "loss": 11.6637, "step": 44206 }, { "epoch": 0.9253746964749225, "grad_norm": 0.3513466417789459, "learning_rate": 0.0001566457733146172, "loss": 11.6713, "step": 44207 }, { "epoch": 0.9253956292388847, "grad_norm": 0.38504162430763245, "learning_rate": 0.00015664396645484534, "loss": 11.6659, "step": 44208 }, { "epoch": 0.9254165620028468, "grad_norm": 0.27938663959503174, "learning_rate": 0.00015664215956784363, "loss": 11.6706, "step": 44209 }, { "epoch": 0.925437494766809, "grad_norm": 0.38851773738861084, "learning_rate": 0.00015664035265361287, "loss": 11.6658, "step": 44210 }, { "epoch": 0.9254584275307711, "grad_norm": 0.30619898438453674, "learning_rate": 0.00015663854571215397, "loss": 11.6653, "step": 44211 }, { "epoch": 0.9254793602947333, "grad_norm": 0.28999605774879456, "learning_rate": 0.00015663673874346778, "loss": 11.6579, "step": 44212 }, { "epoch": 0.9255002930586955, "grad_norm": 0.287432461977005, "learning_rate": 0.0001566349317475552, "loss": 11.6648, "step": 44213 }, { "epoch": 0.9255212258226576, "grad_norm": 0.2858746647834778, "learning_rate": 0.00015663312472441704, "loss": 11.665, "step": 44214 }, { "epoch": 0.9255421585866198, "grad_norm": 0.3194518983364105, "learning_rate": 0.00015663131767405423, "loss": 11.6644, "step": 44215 }, { "epoch": 0.9255630913505819, "grad_norm": 0.349942684173584, "learning_rate": 0.00015662951059646757, "loss": 11.6674, "step": 44216 }, { "epoch": 0.9255840241145441, "grad_norm": 0.3734959065914154, "learning_rate": 0.00015662770349165802, "loss": 11.6587, "step": 44217 }, { "epoch": 0.9256049568785062, "grad_norm": 0.28795450925827026, "learning_rate": 0.00015662589635962636, "loss": 11.6595, "step": 44218 }, { "epoch": 0.9256258896424684, "grad_norm": 0.2686452269554138, "learning_rate": 0.00015662408920037355, "loss": 11.6814, "step": 44219 }, { "epoch": 0.9256468224064306, "grad_norm": 0.31355568766593933, "learning_rate": 0.00015662228201390036, "loss": 11.6557, "step": 44220 }, { "epoch": 0.9256677551703927, "grad_norm": 0.29773616790771484, "learning_rate": 0.00015662047480020767, "loss": 11.6663, "step": 44221 }, { "epoch": 0.9256886879343549, "grad_norm": 0.2933875322341919, "learning_rate": 0.00015661866755929644, "loss": 11.6696, "step": 44222 }, { "epoch": 0.925709620698317, "grad_norm": 0.3859960436820984, "learning_rate": 0.00015661686029116748, "loss": 11.6557, "step": 44223 }, { "epoch": 0.9257305534622792, "grad_norm": 0.3212297856807709, "learning_rate": 0.00015661505299582163, "loss": 11.6642, "step": 44224 }, { "epoch": 0.9257514862262413, "grad_norm": 0.30563244223594666, "learning_rate": 0.0001566132456732598, "loss": 11.649, "step": 44225 }, { "epoch": 0.9257724189902035, "grad_norm": 0.2796228528022766, "learning_rate": 0.00015661143832348287, "loss": 11.6572, "step": 44226 }, { "epoch": 0.9257933517541657, "grad_norm": 0.26596367359161377, "learning_rate": 0.00015660963094649169, "loss": 11.692, "step": 44227 }, { "epoch": 0.9258142845181278, "grad_norm": 0.3202051818370819, "learning_rate": 0.00015660782354228709, "loss": 11.6869, "step": 44228 }, { "epoch": 0.92583521728209, "grad_norm": 0.35917988419532776, "learning_rate": 0.00015660601611087001, "loss": 11.684, "step": 44229 }, { "epoch": 0.925856150046052, "grad_norm": 0.3467434346675873, "learning_rate": 0.00015660420865224126, "loss": 11.6825, "step": 44230 }, { "epoch": 0.9258770828100142, "grad_norm": 0.26854637265205383, "learning_rate": 0.00015660240116640174, "loss": 11.664, "step": 44231 }, { "epoch": 0.9258980155739764, "grad_norm": 0.35466641187667847, "learning_rate": 0.00015660059365335232, "loss": 11.6802, "step": 44232 }, { "epoch": 0.9259189483379385, "grad_norm": 0.311555951833725, "learning_rate": 0.00015659878611309384, "loss": 11.6601, "step": 44233 }, { "epoch": 0.9259398811019007, "grad_norm": 0.4538800120353699, "learning_rate": 0.0001565969785456272, "loss": 11.6853, "step": 44234 }, { "epoch": 0.9259608138658628, "grad_norm": 0.28212660551071167, "learning_rate": 0.00015659517095095325, "loss": 11.6668, "step": 44235 }, { "epoch": 0.925981746629825, "grad_norm": 0.21773584187030792, "learning_rate": 0.00015659336332907292, "loss": 11.6534, "step": 44236 }, { "epoch": 0.9260026793937871, "grad_norm": 0.33217301964759827, "learning_rate": 0.00015659155567998695, "loss": 11.6745, "step": 44237 }, { "epoch": 0.9260236121577493, "grad_norm": 0.36272484064102173, "learning_rate": 0.00015658974800369634, "loss": 11.6784, "step": 44238 }, { "epoch": 0.9260445449217115, "grad_norm": 0.404010534286499, "learning_rate": 0.00015658794030020187, "loss": 11.6723, "step": 44239 }, { "epoch": 0.9260654776856736, "grad_norm": 0.3615671694278717, "learning_rate": 0.00015658613256950443, "loss": 11.6886, "step": 44240 }, { "epoch": 0.9260864104496358, "grad_norm": 0.3016502261161804, "learning_rate": 0.00015658432481160494, "loss": 11.6586, "step": 44241 }, { "epoch": 0.9261073432135979, "grad_norm": 0.3203132450580597, "learning_rate": 0.0001565825170265042, "loss": 11.6751, "step": 44242 }, { "epoch": 0.9261282759775601, "grad_norm": 0.2934265732765198, "learning_rate": 0.00015658070921420315, "loss": 11.6858, "step": 44243 }, { "epoch": 0.9261492087415222, "grad_norm": 0.3958497643470764, "learning_rate": 0.0001565789013747026, "loss": 11.6746, "step": 44244 }, { "epoch": 0.9261701415054844, "grad_norm": 0.2528696656227112, "learning_rate": 0.0001565770935080034, "loss": 11.6786, "step": 44245 }, { "epoch": 0.9261910742694466, "grad_norm": 0.29506558179855347, "learning_rate": 0.00015657528561410653, "loss": 11.6688, "step": 44246 }, { "epoch": 0.9262120070334087, "grad_norm": 0.34509745240211487, "learning_rate": 0.00015657347769301274, "loss": 11.6904, "step": 44247 }, { "epoch": 0.9262329397973709, "grad_norm": 0.29142266511917114, "learning_rate": 0.00015657166974472294, "loss": 11.694, "step": 44248 }, { "epoch": 0.926253872561333, "grad_norm": 0.28254079818725586, "learning_rate": 0.000156569861769238, "loss": 11.6707, "step": 44249 }, { "epoch": 0.9262748053252952, "grad_norm": 0.3218870460987091, "learning_rate": 0.00015656805376655883, "loss": 11.6718, "step": 44250 }, { "epoch": 0.9262957380892574, "grad_norm": 0.3844239413738251, "learning_rate": 0.00015656624573668625, "loss": 11.6658, "step": 44251 }, { "epoch": 0.9263166708532194, "grad_norm": 0.2943357229232788, "learning_rate": 0.00015656443767962116, "loss": 11.681, "step": 44252 }, { "epoch": 0.9263376036171816, "grad_norm": 0.36786213517189026, "learning_rate": 0.00015656262959536438, "loss": 11.6703, "step": 44253 }, { "epoch": 0.9263585363811437, "grad_norm": 0.44440680742263794, "learning_rate": 0.00015656082148391678, "loss": 11.6735, "step": 44254 }, { "epoch": 0.9263794691451059, "grad_norm": 0.29323166608810425, "learning_rate": 0.00015655901334527933, "loss": 11.6742, "step": 44255 }, { "epoch": 0.926400401909068, "grad_norm": 0.27811285853385925, "learning_rate": 0.0001565572051794528, "loss": 11.6711, "step": 44256 }, { "epoch": 0.9264213346730302, "grad_norm": 0.3322250247001648, "learning_rate": 0.0001565553969864381, "loss": 11.6576, "step": 44257 }, { "epoch": 0.9264422674369924, "grad_norm": 0.2911777198314667, "learning_rate": 0.00015655358876623607, "loss": 11.6536, "step": 44258 }, { "epoch": 0.9264632002009545, "grad_norm": 0.31016552448272705, "learning_rate": 0.00015655178051884758, "loss": 11.6735, "step": 44259 }, { "epoch": 0.9264841329649167, "grad_norm": 0.2780207395553589, "learning_rate": 0.00015654997224427356, "loss": 11.6742, "step": 44260 }, { "epoch": 0.9265050657288788, "grad_norm": 0.2736286222934723, "learning_rate": 0.0001565481639425148, "loss": 11.6635, "step": 44261 }, { "epoch": 0.926525998492841, "grad_norm": 0.2904680669307709, "learning_rate": 0.0001565463556135722, "loss": 11.6705, "step": 44262 }, { "epoch": 0.9265469312568031, "grad_norm": 0.2839343547821045, "learning_rate": 0.00015654454725744666, "loss": 11.6864, "step": 44263 }, { "epoch": 0.9265678640207653, "grad_norm": 0.321893572807312, "learning_rate": 0.000156542738874139, "loss": 11.6531, "step": 44264 }, { "epoch": 0.9265887967847275, "grad_norm": 0.2959725558757782, "learning_rate": 0.00015654093046365013, "loss": 11.6653, "step": 44265 }, { "epoch": 0.9266097295486896, "grad_norm": 0.32960450649261475, "learning_rate": 0.0001565391220259809, "loss": 11.6565, "step": 44266 }, { "epoch": 0.9266306623126518, "grad_norm": 0.3654397130012512, "learning_rate": 0.00015653731356113216, "loss": 11.662, "step": 44267 }, { "epoch": 0.9266515950766139, "grad_norm": 0.2746078372001648, "learning_rate": 0.00015653550506910483, "loss": 11.6666, "step": 44268 }, { "epoch": 0.9266725278405761, "grad_norm": 0.2755185067653656, "learning_rate": 0.00015653369654989972, "loss": 11.6711, "step": 44269 }, { "epoch": 0.9266934606045383, "grad_norm": 0.2987179458141327, "learning_rate": 0.00015653188800351776, "loss": 11.6717, "step": 44270 }, { "epoch": 0.9267143933685004, "grad_norm": 0.3322546184062958, "learning_rate": 0.00015653007942995978, "loss": 11.6841, "step": 44271 }, { "epoch": 0.9267353261324626, "grad_norm": 0.3117985725402832, "learning_rate": 0.00015652827082922666, "loss": 11.6613, "step": 44272 }, { "epoch": 0.9267562588964247, "grad_norm": 0.27075833082199097, "learning_rate": 0.00015652646220131925, "loss": 11.669, "step": 44273 }, { "epoch": 0.9267771916603869, "grad_norm": 0.29064318537712097, "learning_rate": 0.00015652465354623843, "loss": 11.65, "step": 44274 }, { "epoch": 0.926798124424349, "grad_norm": 0.2685932219028473, "learning_rate": 0.0001565228448639851, "loss": 11.6569, "step": 44275 }, { "epoch": 0.9268190571883111, "grad_norm": 0.3350415825843811, "learning_rate": 0.0001565210361545601, "loss": 11.6725, "step": 44276 }, { "epoch": 0.9268399899522733, "grad_norm": 0.21387793123722076, "learning_rate": 0.00015651922741796433, "loss": 11.6663, "step": 44277 }, { "epoch": 0.9268609227162354, "grad_norm": 0.27231064438819885, "learning_rate": 0.00015651741865419858, "loss": 11.6662, "step": 44278 }, { "epoch": 0.9268818554801976, "grad_norm": 0.29531604051589966, "learning_rate": 0.00015651560986326381, "loss": 11.6778, "step": 44279 }, { "epoch": 0.9269027882441597, "grad_norm": 0.29089418053627014, "learning_rate": 0.00015651380104516084, "loss": 11.6891, "step": 44280 }, { "epoch": 0.9269237210081219, "grad_norm": 0.28582799434661865, "learning_rate": 0.00015651199219989057, "loss": 11.6632, "step": 44281 }, { "epoch": 0.926944653772084, "grad_norm": 0.2797465920448303, "learning_rate": 0.00015651018332745383, "loss": 11.6696, "step": 44282 }, { "epoch": 0.9269655865360462, "grad_norm": 0.3441106379032135, "learning_rate": 0.00015650837442785154, "loss": 11.6626, "step": 44283 }, { "epoch": 0.9269865193000084, "grad_norm": 0.34568604826927185, "learning_rate": 0.00015650656550108454, "loss": 11.6737, "step": 44284 }, { "epoch": 0.9270074520639705, "grad_norm": 0.2579672038555145, "learning_rate": 0.00015650475654715366, "loss": 11.6531, "step": 44285 }, { "epoch": 0.9270283848279327, "grad_norm": 0.2743752598762512, "learning_rate": 0.00015650294756605986, "loss": 11.6624, "step": 44286 }, { "epoch": 0.9270493175918948, "grad_norm": 0.27839916944503784, "learning_rate": 0.00015650113855780393, "loss": 11.6713, "step": 44287 }, { "epoch": 0.927070250355857, "grad_norm": 0.3567608892917633, "learning_rate": 0.00015649932952238677, "loss": 11.6818, "step": 44288 }, { "epoch": 0.9270911831198192, "grad_norm": 0.3325870633125305, "learning_rate": 0.00015649752045980927, "loss": 11.6632, "step": 44289 }, { "epoch": 0.9271121158837813, "grad_norm": 0.3233419358730316, "learning_rate": 0.0001564957113700723, "loss": 11.6626, "step": 44290 }, { "epoch": 0.9271330486477435, "grad_norm": 0.24506565928459167, "learning_rate": 0.00015649390225317667, "loss": 11.6683, "step": 44291 }, { "epoch": 0.9271539814117056, "grad_norm": 0.305865615606308, "learning_rate": 0.00015649209310912327, "loss": 11.6676, "step": 44292 }, { "epoch": 0.9271749141756678, "grad_norm": 0.3578222692012787, "learning_rate": 0.00015649028393791302, "loss": 11.6723, "step": 44293 }, { "epoch": 0.9271958469396299, "grad_norm": 0.274003803730011, "learning_rate": 0.00015648847473954676, "loss": 11.6766, "step": 44294 }, { "epoch": 0.9272167797035921, "grad_norm": 0.2962898015975952, "learning_rate": 0.00015648666551402535, "loss": 11.6661, "step": 44295 }, { "epoch": 0.9272377124675543, "grad_norm": 0.27585965394973755, "learning_rate": 0.00015648485626134967, "loss": 11.6726, "step": 44296 }, { "epoch": 0.9272586452315164, "grad_norm": 0.28709855675697327, "learning_rate": 0.0001564830469815206, "loss": 11.6554, "step": 44297 }, { "epoch": 0.9272795779954786, "grad_norm": 0.3805713355541229, "learning_rate": 0.000156481237674539, "loss": 11.6691, "step": 44298 }, { "epoch": 0.9273005107594406, "grad_norm": 0.2697571814060211, "learning_rate": 0.0001564794283404057, "loss": 11.6681, "step": 44299 }, { "epoch": 0.9273214435234028, "grad_norm": 0.3338337242603302, "learning_rate": 0.00015647761897912162, "loss": 11.6631, "step": 44300 }, { "epoch": 0.9273423762873649, "grad_norm": 0.2881486415863037, "learning_rate": 0.00015647580959068767, "loss": 11.6659, "step": 44301 }, { "epoch": 0.9273633090513271, "grad_norm": 0.3597889542579651, "learning_rate": 0.00015647400017510462, "loss": 11.6676, "step": 44302 }, { "epoch": 0.9273842418152893, "grad_norm": 0.2534291446208954, "learning_rate": 0.00015647219073237337, "loss": 11.6641, "step": 44303 }, { "epoch": 0.9274051745792514, "grad_norm": 0.2608741819858551, "learning_rate": 0.00015647038126249483, "loss": 11.6742, "step": 44304 }, { "epoch": 0.9274261073432136, "grad_norm": 0.26504242420196533, "learning_rate": 0.00015646857176546984, "loss": 11.6825, "step": 44305 }, { "epoch": 0.9274470401071757, "grad_norm": 0.30963388085365295, "learning_rate": 0.00015646676224129928, "loss": 11.6732, "step": 44306 }, { "epoch": 0.9274679728711379, "grad_norm": 0.33544856309890747, "learning_rate": 0.000156464952689984, "loss": 11.6649, "step": 44307 }, { "epoch": 0.9274889056351001, "grad_norm": 0.33117446303367615, "learning_rate": 0.00015646314311152493, "loss": 11.6625, "step": 44308 }, { "epoch": 0.9275098383990622, "grad_norm": 0.26515457034111023, "learning_rate": 0.00015646133350592285, "loss": 11.6852, "step": 44309 }, { "epoch": 0.9275307711630244, "grad_norm": 0.3241927921772003, "learning_rate": 0.0001564595238731787, "loss": 11.6538, "step": 44310 }, { "epoch": 0.9275517039269865, "grad_norm": 0.2606949210166931, "learning_rate": 0.00015645771421329328, "loss": 11.6638, "step": 44311 }, { "epoch": 0.9275726366909487, "grad_norm": 0.2887609899044037, "learning_rate": 0.00015645590452626756, "loss": 11.6619, "step": 44312 }, { "epoch": 0.9275935694549108, "grad_norm": 0.4100254774093628, "learning_rate": 0.00015645409481210236, "loss": 11.6828, "step": 44313 }, { "epoch": 0.927614502218873, "grad_norm": 0.29078394174575806, "learning_rate": 0.0001564522850707985, "loss": 11.6658, "step": 44314 }, { "epoch": 0.9276354349828352, "grad_norm": 0.32885152101516724, "learning_rate": 0.00015645047530235692, "loss": 11.6938, "step": 44315 }, { "epoch": 0.9276563677467973, "grad_norm": 0.3016411364078522, "learning_rate": 0.00015644866550677846, "loss": 11.6696, "step": 44316 }, { "epoch": 0.9276773005107595, "grad_norm": 0.2899935245513916, "learning_rate": 0.000156446855684064, "loss": 11.6835, "step": 44317 }, { "epoch": 0.9276982332747216, "grad_norm": 0.29589399695396423, "learning_rate": 0.00015644504583421442, "loss": 11.6705, "step": 44318 }, { "epoch": 0.9277191660386838, "grad_norm": 0.32766732573509216, "learning_rate": 0.00015644323595723054, "loss": 11.6664, "step": 44319 }, { "epoch": 0.9277400988026459, "grad_norm": 0.25342392921447754, "learning_rate": 0.00015644142605311328, "loss": 11.6617, "step": 44320 }, { "epoch": 0.927761031566608, "grad_norm": 0.346976101398468, "learning_rate": 0.00015643961612186351, "loss": 11.6853, "step": 44321 }, { "epoch": 0.9277819643305703, "grad_norm": 0.2730723023414612, "learning_rate": 0.0001564378061634821, "loss": 11.671, "step": 44322 }, { "epoch": 0.9278028970945323, "grad_norm": 0.2927006483078003, "learning_rate": 0.00015643599617796986, "loss": 11.6572, "step": 44323 }, { "epoch": 0.9278238298584945, "grad_norm": 0.3217451572418213, "learning_rate": 0.00015643418616532776, "loss": 11.6759, "step": 44324 }, { "epoch": 0.9278447626224566, "grad_norm": 0.28581932187080383, "learning_rate": 0.00015643237612555656, "loss": 11.6653, "step": 44325 }, { "epoch": 0.9278656953864188, "grad_norm": 0.38272997736930847, "learning_rate": 0.0001564305660586572, "loss": 11.6853, "step": 44326 }, { "epoch": 0.9278866281503809, "grad_norm": 0.3105188012123108, "learning_rate": 0.00015642875596463056, "loss": 11.6732, "step": 44327 }, { "epoch": 0.9279075609143431, "grad_norm": 0.28305402398109436, "learning_rate": 0.00015642694584347747, "loss": 11.6625, "step": 44328 }, { "epoch": 0.9279284936783053, "grad_norm": 0.3365149199962616, "learning_rate": 0.0001564251356951988, "loss": 11.6849, "step": 44329 }, { "epoch": 0.9279494264422674, "grad_norm": 0.3034903407096863, "learning_rate": 0.00015642332551979546, "loss": 11.6684, "step": 44330 }, { "epoch": 0.9279703592062296, "grad_norm": 0.32304099202156067, "learning_rate": 0.00015642151531726828, "loss": 11.6566, "step": 44331 }, { "epoch": 0.9279912919701917, "grad_norm": 0.2813173234462738, "learning_rate": 0.00015641970508761815, "loss": 11.6528, "step": 44332 }, { "epoch": 0.9280122247341539, "grad_norm": 0.3540897071361542, "learning_rate": 0.00015641789483084595, "loss": 11.674, "step": 44333 }, { "epoch": 0.9280331574981161, "grad_norm": 0.36908042430877686, "learning_rate": 0.00015641608454695254, "loss": 11.6727, "step": 44334 }, { "epoch": 0.9280540902620782, "grad_norm": 0.2666456401348114, "learning_rate": 0.00015641427423593876, "loss": 11.6646, "step": 44335 }, { "epoch": 0.9280750230260404, "grad_norm": 0.3012525141239166, "learning_rate": 0.00015641246389780554, "loss": 11.6829, "step": 44336 }, { "epoch": 0.9280959557900025, "grad_norm": 0.26598724722862244, "learning_rate": 0.0001564106535325537, "loss": 11.6726, "step": 44337 }, { "epoch": 0.9281168885539647, "grad_norm": 0.26423075795173645, "learning_rate": 0.00015640884314018413, "loss": 11.6711, "step": 44338 }, { "epoch": 0.9281378213179268, "grad_norm": 0.28527334332466125, "learning_rate": 0.00015640703272069771, "loss": 11.662, "step": 44339 }, { "epoch": 0.928158754081889, "grad_norm": 0.25691619515419006, "learning_rate": 0.00015640522227409527, "loss": 11.6704, "step": 44340 }, { "epoch": 0.9281796868458512, "grad_norm": 0.3267049491405487, "learning_rate": 0.00015640341180037773, "loss": 11.6624, "step": 44341 }, { "epoch": 0.9282006196098133, "grad_norm": 0.2932853102684021, "learning_rate": 0.00015640160129954596, "loss": 11.6563, "step": 44342 }, { "epoch": 0.9282215523737755, "grad_norm": 0.32028523087501526, "learning_rate": 0.00015639979077160078, "loss": 11.6845, "step": 44343 }, { "epoch": 0.9282424851377375, "grad_norm": 0.31558215618133545, "learning_rate": 0.0001563979802165431, "loss": 11.6607, "step": 44344 }, { "epoch": 0.9282634179016997, "grad_norm": 0.24755160510540009, "learning_rate": 0.00015639616963437377, "loss": 11.6528, "step": 44345 }, { "epoch": 0.9282843506656618, "grad_norm": 0.36130574345588684, "learning_rate": 0.0001563943590250937, "loss": 11.6664, "step": 44346 }, { "epoch": 0.928305283429624, "grad_norm": 0.30594924092292786, "learning_rate": 0.00015639254838870369, "loss": 11.6756, "step": 44347 }, { "epoch": 0.9283262161935862, "grad_norm": 0.3694455623626709, "learning_rate": 0.0001563907377252047, "loss": 11.6719, "step": 44348 }, { "epoch": 0.9283471489575483, "grad_norm": 0.29206159710884094, "learning_rate": 0.0001563889270345975, "loss": 11.6802, "step": 44349 }, { "epoch": 0.9283680817215105, "grad_norm": 0.3476254642009735, "learning_rate": 0.00015638711631688306, "loss": 11.6767, "step": 44350 }, { "epoch": 0.9283890144854726, "grad_norm": 0.38678061962127686, "learning_rate": 0.0001563853055720622, "loss": 11.6695, "step": 44351 }, { "epoch": 0.9284099472494348, "grad_norm": 0.35067659616470337, "learning_rate": 0.00015638349480013574, "loss": 11.6765, "step": 44352 }, { "epoch": 0.928430880013397, "grad_norm": 0.5248422026634216, "learning_rate": 0.00015638168400110465, "loss": 11.6761, "step": 44353 }, { "epoch": 0.9284518127773591, "grad_norm": 0.28894558548927307, "learning_rate": 0.00015637987317496975, "loss": 11.6815, "step": 44354 }, { "epoch": 0.9284727455413213, "grad_norm": 0.3293212354183197, "learning_rate": 0.0001563780623217319, "loss": 11.6539, "step": 44355 }, { "epoch": 0.9284936783052834, "grad_norm": 0.2855997681617737, "learning_rate": 0.000156376251441392, "loss": 11.6528, "step": 44356 }, { "epoch": 0.9285146110692456, "grad_norm": 0.32992276549339294, "learning_rate": 0.0001563744405339509, "loss": 11.6886, "step": 44357 }, { "epoch": 0.9285355438332077, "grad_norm": 0.49931684136390686, "learning_rate": 0.00015637262959940947, "loss": 11.6626, "step": 44358 }, { "epoch": 0.9285564765971699, "grad_norm": 0.3597486913204193, "learning_rate": 0.00015637081863776863, "loss": 11.6766, "step": 44359 }, { "epoch": 0.9285774093611321, "grad_norm": 0.25176623463630676, "learning_rate": 0.00015636900764902915, "loss": 11.6567, "step": 44360 }, { "epoch": 0.9285983421250942, "grad_norm": 0.38245144486427307, "learning_rate": 0.000156367196633192, "loss": 11.5999, "step": 44361 }, { "epoch": 0.9286192748890564, "grad_norm": 0.29136407375335693, "learning_rate": 0.00015636538559025796, "loss": 11.6684, "step": 44362 }, { "epoch": 0.9286402076530185, "grad_norm": 0.32101330161094666, "learning_rate": 0.000156363574520228, "loss": 11.6729, "step": 44363 }, { "epoch": 0.9286611404169807, "grad_norm": 0.3621915578842163, "learning_rate": 0.0001563617634231029, "loss": 11.6637, "step": 44364 }, { "epoch": 0.9286820731809428, "grad_norm": 0.3571178615093231, "learning_rate": 0.00015635995229888361, "loss": 11.6695, "step": 44365 }, { "epoch": 0.928703005944905, "grad_norm": 0.29405879974365234, "learning_rate": 0.00015635814114757095, "loss": 11.6588, "step": 44366 }, { "epoch": 0.9287239387088672, "grad_norm": 0.3314550518989563, "learning_rate": 0.00015635632996916578, "loss": 11.6676, "step": 44367 }, { "epoch": 0.9287448714728292, "grad_norm": 0.26408126950263977, "learning_rate": 0.00015635451876366903, "loss": 11.6809, "step": 44368 }, { "epoch": 0.9287658042367914, "grad_norm": 0.37833714485168457, "learning_rate": 0.0001563527075310815, "loss": 11.6743, "step": 44369 }, { "epoch": 0.9287867370007535, "grad_norm": 0.3467799127101898, "learning_rate": 0.00015635089627140408, "loss": 11.6686, "step": 44370 }, { "epoch": 0.9288076697647157, "grad_norm": 0.3502539396286011, "learning_rate": 0.00015634908498463773, "loss": 11.6675, "step": 44371 }, { "epoch": 0.9288286025286779, "grad_norm": 0.30462050437927246, "learning_rate": 0.00015634727367078318, "loss": 11.6589, "step": 44372 }, { "epoch": 0.92884953529264, "grad_norm": 0.3437632918357849, "learning_rate": 0.00015634546232984138, "loss": 11.6768, "step": 44373 }, { "epoch": 0.9288704680566022, "grad_norm": 0.2786891758441925, "learning_rate": 0.00015634365096181318, "loss": 11.6869, "step": 44374 }, { "epoch": 0.9288914008205643, "grad_norm": 0.2829936146736145, "learning_rate": 0.00015634183956669945, "loss": 11.6542, "step": 44375 }, { "epoch": 0.9289123335845265, "grad_norm": 0.369220495223999, "learning_rate": 0.00015634002814450112, "loss": 11.6669, "step": 44376 }, { "epoch": 0.9289332663484886, "grad_norm": 0.3222444951534271, "learning_rate": 0.00015633821669521896, "loss": 11.6761, "step": 44377 }, { "epoch": 0.9289541991124508, "grad_norm": 0.2857821583747864, "learning_rate": 0.00015633640521885392, "loss": 11.6738, "step": 44378 }, { "epoch": 0.928975131876413, "grad_norm": 0.37702620029449463, "learning_rate": 0.0001563345937154068, "loss": 11.6537, "step": 44379 }, { "epoch": 0.9289960646403751, "grad_norm": 0.36813104152679443, "learning_rate": 0.00015633278218487852, "loss": 11.6813, "step": 44380 }, { "epoch": 0.9290169974043373, "grad_norm": 0.3763153851032257, "learning_rate": 0.00015633097062726998, "loss": 11.6787, "step": 44381 }, { "epoch": 0.9290379301682994, "grad_norm": 0.3359719216823578, "learning_rate": 0.00015632915904258197, "loss": 11.6704, "step": 44382 }, { "epoch": 0.9290588629322616, "grad_norm": 0.32855916023254395, "learning_rate": 0.00015632734743081548, "loss": 11.6638, "step": 44383 }, { "epoch": 0.9290797956962237, "grad_norm": 0.33683088421821594, "learning_rate": 0.00015632553579197125, "loss": 11.666, "step": 44384 }, { "epoch": 0.9291007284601859, "grad_norm": 0.35298535227775574, "learning_rate": 0.00015632372412605018, "loss": 11.6654, "step": 44385 }, { "epoch": 0.9291216612241481, "grad_norm": 0.48244357109069824, "learning_rate": 0.00015632191243305322, "loss": 11.688, "step": 44386 }, { "epoch": 0.9291425939881102, "grad_norm": 0.345563679933548, "learning_rate": 0.00015632010071298116, "loss": 11.6805, "step": 44387 }, { "epoch": 0.9291635267520724, "grad_norm": 0.31932970881462097, "learning_rate": 0.00015631828896583492, "loss": 11.6629, "step": 44388 }, { "epoch": 0.9291844595160345, "grad_norm": 0.3544057011604309, "learning_rate": 0.00015631647719161534, "loss": 11.6533, "step": 44389 }, { "epoch": 0.9292053922799967, "grad_norm": 0.340553343296051, "learning_rate": 0.00015631466539032328, "loss": 11.6766, "step": 44390 }, { "epoch": 0.9292263250439589, "grad_norm": 0.25703081488609314, "learning_rate": 0.00015631285356195964, "loss": 11.6735, "step": 44391 }, { "epoch": 0.929247257807921, "grad_norm": 0.2682408392429352, "learning_rate": 0.0001563110417065253, "loss": 11.6723, "step": 44392 }, { "epoch": 0.9292681905718831, "grad_norm": 0.3283361792564392, "learning_rate": 0.0001563092298240211, "loss": 11.6691, "step": 44393 }, { "epoch": 0.9292891233358452, "grad_norm": 0.3228759765625, "learning_rate": 0.00015630741791444793, "loss": 11.6717, "step": 44394 }, { "epoch": 0.9293100560998074, "grad_norm": 0.27069100737571716, "learning_rate": 0.00015630560597780665, "loss": 11.6641, "step": 44395 }, { "epoch": 0.9293309888637695, "grad_norm": 0.36142754554748535, "learning_rate": 0.00015630379401409815, "loss": 11.6833, "step": 44396 }, { "epoch": 0.9293519216277317, "grad_norm": 0.30997925996780396, "learning_rate": 0.00015630198202332328, "loss": 11.6617, "step": 44397 }, { "epoch": 0.9293728543916939, "grad_norm": 0.2640393376350403, "learning_rate": 0.00015630017000548292, "loss": 11.6723, "step": 44398 }, { "epoch": 0.929393787155656, "grad_norm": 0.28199419379234314, "learning_rate": 0.00015629835796057793, "loss": 11.676, "step": 44399 }, { "epoch": 0.9294147199196182, "grad_norm": 0.2622981667518616, "learning_rate": 0.00015629654588860921, "loss": 11.6761, "step": 44400 }, { "epoch": 0.9294356526835803, "grad_norm": 0.32756197452545166, "learning_rate": 0.00015629473378957757, "loss": 11.6748, "step": 44401 }, { "epoch": 0.9294565854475425, "grad_norm": 0.35731038451194763, "learning_rate": 0.00015629292166348394, "loss": 11.6846, "step": 44402 }, { "epoch": 0.9294775182115046, "grad_norm": 0.24498608708381653, "learning_rate": 0.00015629110951032922, "loss": 11.6639, "step": 44403 }, { "epoch": 0.9294984509754668, "grad_norm": 0.3402044177055359, "learning_rate": 0.0001562892973301142, "loss": 11.6722, "step": 44404 }, { "epoch": 0.929519383739429, "grad_norm": 0.27434152364730835, "learning_rate": 0.00015628748512283978, "loss": 11.6651, "step": 44405 }, { "epoch": 0.9295403165033911, "grad_norm": 0.29219600558280945, "learning_rate": 0.00015628567288850686, "loss": 11.6613, "step": 44406 }, { "epoch": 0.9295612492673533, "grad_norm": 0.3108076751232147, "learning_rate": 0.0001562838606271163, "loss": 11.6558, "step": 44407 }, { "epoch": 0.9295821820313154, "grad_norm": 0.34844356775283813, "learning_rate": 0.00015628204833866892, "loss": 11.6862, "step": 44408 }, { "epoch": 0.9296031147952776, "grad_norm": 0.5998864769935608, "learning_rate": 0.00015628023602316563, "loss": 11.6573, "step": 44409 }, { "epoch": 0.9296240475592398, "grad_norm": 0.2921004593372345, "learning_rate": 0.00015627842368060732, "loss": 11.6581, "step": 44410 }, { "epoch": 0.9296449803232019, "grad_norm": 0.27458319067955017, "learning_rate": 0.00015627661131099486, "loss": 11.6632, "step": 44411 }, { "epoch": 0.9296659130871641, "grad_norm": 0.32334333658218384, "learning_rate": 0.0001562747989143291, "loss": 11.6573, "step": 44412 }, { "epoch": 0.9296868458511262, "grad_norm": 0.33932769298553467, "learning_rate": 0.0001562729864906109, "loss": 11.6669, "step": 44413 }, { "epoch": 0.9297077786150884, "grad_norm": 0.24663421511650085, "learning_rate": 0.00015627117403984114, "loss": 11.674, "step": 44414 }, { "epoch": 0.9297287113790504, "grad_norm": 0.4205668568611145, "learning_rate": 0.00015626936156202072, "loss": 11.6521, "step": 44415 }, { "epoch": 0.9297496441430126, "grad_norm": 0.31137382984161377, "learning_rate": 0.0001562675490571505, "loss": 11.6656, "step": 44416 }, { "epoch": 0.9297705769069748, "grad_norm": 0.33231887221336365, "learning_rate": 0.00015626573652523132, "loss": 11.6705, "step": 44417 }, { "epoch": 0.9297915096709369, "grad_norm": 0.3405073285102844, "learning_rate": 0.00015626392396626409, "loss": 11.6657, "step": 44418 }, { "epoch": 0.9298124424348991, "grad_norm": 0.29073891043663025, "learning_rate": 0.00015626211138024964, "loss": 11.6712, "step": 44419 }, { "epoch": 0.9298333751988612, "grad_norm": 0.24587014317512512, "learning_rate": 0.00015626029876718888, "loss": 11.6515, "step": 44420 }, { "epoch": 0.9298543079628234, "grad_norm": 0.27077993750572205, "learning_rate": 0.00015625848612708267, "loss": 11.671, "step": 44421 }, { "epoch": 0.9298752407267855, "grad_norm": 0.3973686993122101, "learning_rate": 0.00015625667345993188, "loss": 11.6731, "step": 44422 }, { "epoch": 0.9298961734907477, "grad_norm": 0.36873742938041687, "learning_rate": 0.00015625486076573737, "loss": 11.6767, "step": 44423 }, { "epoch": 0.9299171062547099, "grad_norm": 0.3579637110233307, "learning_rate": 0.00015625304804450002, "loss": 11.6797, "step": 44424 }, { "epoch": 0.929938039018672, "grad_norm": 0.33067235350608826, "learning_rate": 0.0001562512352962207, "loss": 11.6576, "step": 44425 }, { "epoch": 0.9299589717826342, "grad_norm": 0.3448628783226013, "learning_rate": 0.00015624942252090028, "loss": 11.6654, "step": 44426 }, { "epoch": 0.9299799045465963, "grad_norm": 0.28554579615592957, "learning_rate": 0.00015624760971853966, "loss": 11.684, "step": 44427 }, { "epoch": 0.9300008373105585, "grad_norm": 0.44458192586898804, "learning_rate": 0.00015624579688913967, "loss": 11.6833, "step": 44428 }, { "epoch": 0.9300217700745207, "grad_norm": 0.3484940528869629, "learning_rate": 0.0001562439840327012, "loss": 11.6683, "step": 44429 }, { "epoch": 0.9300427028384828, "grad_norm": 0.3274620473384857, "learning_rate": 0.00015624217114922515, "loss": 11.6703, "step": 44430 }, { "epoch": 0.930063635602445, "grad_norm": 0.2853965759277344, "learning_rate": 0.0001562403582387123, "loss": 11.6766, "step": 44431 }, { "epoch": 0.9300845683664071, "grad_norm": 0.31740087270736694, "learning_rate": 0.00015623854530116363, "loss": 11.6604, "step": 44432 }, { "epoch": 0.9301055011303693, "grad_norm": 0.366184800863266, "learning_rate": 0.00015623673233657997, "loss": 11.6563, "step": 44433 }, { "epoch": 0.9301264338943314, "grad_norm": 0.2931063175201416, "learning_rate": 0.00015623491934496216, "loss": 11.6737, "step": 44434 }, { "epoch": 0.9301473666582936, "grad_norm": 0.30800995230674744, "learning_rate": 0.00015623310632631107, "loss": 11.676, "step": 44435 }, { "epoch": 0.9301682994222558, "grad_norm": 0.37350693345069885, "learning_rate": 0.00015623129328062765, "loss": 11.6846, "step": 44436 }, { "epoch": 0.9301892321862179, "grad_norm": 0.2978704869747162, "learning_rate": 0.0001562294802079127, "loss": 11.6589, "step": 44437 }, { "epoch": 0.93021016495018, "grad_norm": 0.29728424549102783, "learning_rate": 0.0001562276671081671, "loss": 11.6664, "step": 44438 }, { "epoch": 0.9302310977141421, "grad_norm": 0.29024267196655273, "learning_rate": 0.00015622585398139175, "loss": 11.6748, "step": 44439 }, { "epoch": 0.9302520304781043, "grad_norm": 0.42899054288864136, "learning_rate": 0.0001562240408275875, "loss": 11.6409, "step": 44440 }, { "epoch": 0.9302729632420664, "grad_norm": 0.3388327956199646, "learning_rate": 0.00015622222764675523, "loss": 11.6704, "step": 44441 }, { "epoch": 0.9302938960060286, "grad_norm": 0.321062833070755, "learning_rate": 0.00015622041443889582, "loss": 11.6596, "step": 44442 }, { "epoch": 0.9303148287699908, "grad_norm": 0.3396211266517639, "learning_rate": 0.00015621860120401012, "loss": 11.6649, "step": 44443 }, { "epoch": 0.9303357615339529, "grad_norm": 0.30394458770751953, "learning_rate": 0.000156216787942099, "loss": 11.6714, "step": 44444 }, { "epoch": 0.9303566942979151, "grad_norm": 0.4025353193283081, "learning_rate": 0.00015621497465316336, "loss": 11.6779, "step": 44445 }, { "epoch": 0.9303776270618772, "grad_norm": 0.30789506435394287, "learning_rate": 0.00015621316133720403, "loss": 11.6487, "step": 44446 }, { "epoch": 0.9303985598258394, "grad_norm": 0.3641672432422638, "learning_rate": 0.00015621134799422194, "loss": 11.6805, "step": 44447 }, { "epoch": 0.9304194925898016, "grad_norm": 0.27010902762413025, "learning_rate": 0.00015620953462421792, "loss": 11.676, "step": 44448 }, { "epoch": 0.9304404253537637, "grad_norm": 0.30495989322662354, "learning_rate": 0.00015620772122719285, "loss": 11.6687, "step": 44449 }, { "epoch": 0.9304613581177259, "grad_norm": 0.43184706568717957, "learning_rate": 0.00015620590780314757, "loss": 11.6496, "step": 44450 }, { "epoch": 0.930482290881688, "grad_norm": 0.3073213994503021, "learning_rate": 0.00015620409435208302, "loss": 11.6867, "step": 44451 }, { "epoch": 0.9305032236456502, "grad_norm": 0.2667650282382965, "learning_rate": 0.000156202280874, "loss": 11.6703, "step": 44452 }, { "epoch": 0.9305241564096123, "grad_norm": 0.34880977869033813, "learning_rate": 0.00015620046736889947, "loss": 11.6717, "step": 44453 }, { "epoch": 0.9305450891735745, "grad_norm": 0.35544121265411377, "learning_rate": 0.00015619865383678224, "loss": 11.6833, "step": 44454 }, { "epoch": 0.9305660219375367, "grad_norm": 0.2809920012950897, "learning_rate": 0.00015619684027764916, "loss": 11.6606, "step": 44455 }, { "epoch": 0.9305869547014988, "grad_norm": 0.3415144085884094, "learning_rate": 0.00015619502669150115, "loss": 11.6651, "step": 44456 }, { "epoch": 0.930607887465461, "grad_norm": 0.33072707056999207, "learning_rate": 0.00015619321307833904, "loss": 11.6617, "step": 44457 }, { "epoch": 0.9306288202294231, "grad_norm": 0.3780045211315155, "learning_rate": 0.00015619139943816375, "loss": 11.6792, "step": 44458 }, { "epoch": 0.9306497529933853, "grad_norm": 0.31673911213874817, "learning_rate": 0.00015618958577097612, "loss": 11.6554, "step": 44459 }, { "epoch": 0.9306706857573473, "grad_norm": 0.3755404055118561, "learning_rate": 0.00015618777207677704, "loss": 11.6703, "step": 44460 }, { "epoch": 0.9306916185213095, "grad_norm": 0.3236028254032135, "learning_rate": 0.00015618595835556739, "loss": 11.6586, "step": 44461 }, { "epoch": 0.9307125512852717, "grad_norm": 0.3825327754020691, "learning_rate": 0.00015618414460734798, "loss": 11.6586, "step": 44462 }, { "epoch": 0.9307334840492338, "grad_norm": 0.27955329418182373, "learning_rate": 0.00015618233083211975, "loss": 11.6651, "step": 44463 }, { "epoch": 0.930754416813196, "grad_norm": 0.30958205461502075, "learning_rate": 0.00015618051702988355, "loss": 11.6583, "step": 44464 }, { "epoch": 0.9307753495771581, "grad_norm": 0.27242881059646606, "learning_rate": 0.00015617870320064022, "loss": 11.6722, "step": 44465 }, { "epoch": 0.9307962823411203, "grad_norm": 0.31911319494247437, "learning_rate": 0.0001561768893443907, "loss": 11.6762, "step": 44466 }, { "epoch": 0.9308172151050825, "grad_norm": 0.36903253197669983, "learning_rate": 0.00015617507546113578, "loss": 11.6686, "step": 44467 }, { "epoch": 0.9308381478690446, "grad_norm": 0.45275062322616577, "learning_rate": 0.00015617326155087643, "loss": 11.6556, "step": 44468 }, { "epoch": 0.9308590806330068, "grad_norm": 0.3804459571838379, "learning_rate": 0.00015617144761361344, "loss": 11.6818, "step": 44469 }, { "epoch": 0.9308800133969689, "grad_norm": 0.2993249297142029, "learning_rate": 0.0001561696336493477, "loss": 11.6698, "step": 44470 }, { "epoch": 0.9309009461609311, "grad_norm": 0.28906312584877014, "learning_rate": 0.00015616781965808012, "loss": 11.6728, "step": 44471 }, { "epoch": 0.9309218789248932, "grad_norm": 0.39820563793182373, "learning_rate": 0.0001561660056398115, "loss": 11.6647, "step": 44472 }, { "epoch": 0.9309428116888554, "grad_norm": 0.321850448846817, "learning_rate": 0.00015616419159454278, "loss": 11.6847, "step": 44473 }, { "epoch": 0.9309637444528176, "grad_norm": 0.4143184721469879, "learning_rate": 0.00015616237752227483, "loss": 11.6837, "step": 44474 }, { "epoch": 0.9309846772167797, "grad_norm": 0.2661067843437195, "learning_rate": 0.00015616056342300847, "loss": 11.6688, "step": 44475 }, { "epoch": 0.9310056099807419, "grad_norm": 0.3550623059272766, "learning_rate": 0.00015615874929674462, "loss": 11.689, "step": 44476 }, { "epoch": 0.931026542744704, "grad_norm": 0.2513887584209442, "learning_rate": 0.00015615693514348412, "loss": 11.6634, "step": 44477 }, { "epoch": 0.9310474755086662, "grad_norm": 0.29798150062561035, "learning_rate": 0.00015615512096322788, "loss": 11.6531, "step": 44478 }, { "epoch": 0.9310684082726283, "grad_norm": 0.32929179072380066, "learning_rate": 0.0001561533067559767, "loss": 11.6671, "step": 44479 }, { "epoch": 0.9310893410365905, "grad_norm": 0.27934107184410095, "learning_rate": 0.0001561514925217315, "loss": 11.6575, "step": 44480 }, { "epoch": 0.9311102738005527, "grad_norm": 0.326212078332901, "learning_rate": 0.0001561496782604932, "loss": 11.6784, "step": 44481 }, { "epoch": 0.9311312065645148, "grad_norm": 0.29458141326904297, "learning_rate": 0.0001561478639722626, "loss": 11.6603, "step": 44482 }, { "epoch": 0.931152139328477, "grad_norm": 0.31007301807403564, "learning_rate": 0.0001561460496570406, "loss": 11.671, "step": 44483 }, { "epoch": 0.931173072092439, "grad_norm": 0.4027002155780792, "learning_rate": 0.0001561442353148281, "loss": 11.657, "step": 44484 }, { "epoch": 0.9311940048564012, "grad_norm": 0.33185312151908875, "learning_rate": 0.0001561424209456259, "loss": 11.6654, "step": 44485 }, { "epoch": 0.9312149376203634, "grad_norm": 0.24901966750621796, "learning_rate": 0.0001561406065494349, "loss": 11.656, "step": 44486 }, { "epoch": 0.9312358703843255, "grad_norm": 0.31023991107940674, "learning_rate": 0.00015613879212625603, "loss": 11.6568, "step": 44487 }, { "epoch": 0.9312568031482877, "grad_norm": 0.2579200565814972, "learning_rate": 0.0001561369776760901, "loss": 11.6667, "step": 44488 }, { "epoch": 0.9312777359122498, "grad_norm": 0.3621441125869751, "learning_rate": 0.00015613516319893796, "loss": 11.6829, "step": 44489 }, { "epoch": 0.931298668676212, "grad_norm": 0.37719523906707764, "learning_rate": 0.00015613334869480058, "loss": 11.6422, "step": 44490 }, { "epoch": 0.9313196014401741, "grad_norm": 0.291585773229599, "learning_rate": 0.00015613153416367873, "loss": 11.6722, "step": 44491 }, { "epoch": 0.9313405342041363, "grad_norm": 0.28311848640441895, "learning_rate": 0.00015612971960557337, "loss": 11.6534, "step": 44492 }, { "epoch": 0.9313614669680985, "grad_norm": 0.2971193194389343, "learning_rate": 0.0001561279050204853, "loss": 11.6814, "step": 44493 }, { "epoch": 0.9313823997320606, "grad_norm": 0.3050805926322937, "learning_rate": 0.00015612609040841543, "loss": 11.6506, "step": 44494 }, { "epoch": 0.9314033324960228, "grad_norm": 0.36191999912261963, "learning_rate": 0.00015612427576936462, "loss": 11.6825, "step": 44495 }, { "epoch": 0.9314242652599849, "grad_norm": 0.34487807750701904, "learning_rate": 0.00015612246110333376, "loss": 11.6773, "step": 44496 }, { "epoch": 0.9314451980239471, "grad_norm": 0.2603713572025299, "learning_rate": 0.0001561206464103237, "loss": 11.6543, "step": 44497 }, { "epoch": 0.9314661307879092, "grad_norm": 0.30451494455337524, "learning_rate": 0.00015611883169033532, "loss": 11.6699, "step": 44498 }, { "epoch": 0.9314870635518714, "grad_norm": 0.3652629554271698, "learning_rate": 0.00015611701694336947, "loss": 11.6681, "step": 44499 }, { "epoch": 0.9315079963158336, "grad_norm": 0.34491199254989624, "learning_rate": 0.00015611520216942708, "loss": 11.6718, "step": 44500 }, { "epoch": 0.9315289290797957, "grad_norm": 0.27031269669532776, "learning_rate": 0.00015611338736850897, "loss": 11.6652, "step": 44501 }, { "epoch": 0.9315498618437579, "grad_norm": 0.28714391589164734, "learning_rate": 0.00015611157254061603, "loss": 11.667, "step": 44502 }, { "epoch": 0.93157079460772, "grad_norm": 0.27394407987594604, "learning_rate": 0.00015610975768574914, "loss": 11.663, "step": 44503 }, { "epoch": 0.9315917273716822, "grad_norm": 0.2675815522670746, "learning_rate": 0.00015610794280390916, "loss": 11.6686, "step": 44504 }, { "epoch": 0.9316126601356444, "grad_norm": 0.3303830027580261, "learning_rate": 0.000156106127895097, "loss": 11.685, "step": 44505 }, { "epoch": 0.9316335928996065, "grad_norm": 0.2784757912158966, "learning_rate": 0.00015610431295931345, "loss": 11.6523, "step": 44506 }, { "epoch": 0.9316545256635687, "grad_norm": 0.24543949961662292, "learning_rate": 0.00015610249799655944, "loss": 11.6577, "step": 44507 }, { "epoch": 0.9316754584275307, "grad_norm": 0.34291136264801025, "learning_rate": 0.00015610068300683585, "loss": 11.673, "step": 44508 }, { "epoch": 0.931696391191493, "grad_norm": 0.3035728335380554, "learning_rate": 0.00015609886799014351, "loss": 11.6861, "step": 44509 }, { "epoch": 0.931717323955455, "grad_norm": 0.581039547920227, "learning_rate": 0.00015609705294648339, "loss": 11.6678, "step": 44510 }, { "epoch": 0.9317382567194172, "grad_norm": 0.30163997411727905, "learning_rate": 0.00015609523787585622, "loss": 11.6725, "step": 44511 }, { "epoch": 0.9317591894833794, "grad_norm": 0.29955729842185974, "learning_rate": 0.00015609342277826297, "loss": 11.6666, "step": 44512 }, { "epoch": 0.9317801222473415, "grad_norm": 0.3559274673461914, "learning_rate": 0.0001560916076537045, "loss": 11.682, "step": 44513 }, { "epoch": 0.9318010550113037, "grad_norm": 0.3076950013637543, "learning_rate": 0.00015608979250218165, "loss": 11.6667, "step": 44514 }, { "epoch": 0.9318219877752658, "grad_norm": 0.2903061807155609, "learning_rate": 0.00015608797732369535, "loss": 11.67, "step": 44515 }, { "epoch": 0.931842920539228, "grad_norm": 0.3022304177284241, "learning_rate": 0.0001560861621182464, "loss": 11.6585, "step": 44516 }, { "epoch": 0.9318638533031901, "grad_norm": 0.3176294267177582, "learning_rate": 0.0001560843468858357, "loss": 11.6518, "step": 44517 }, { "epoch": 0.9318847860671523, "grad_norm": 0.26930761337280273, "learning_rate": 0.00015608253162646417, "loss": 11.6587, "step": 44518 }, { "epoch": 0.9319057188311145, "grad_norm": 0.36231815814971924, "learning_rate": 0.0001560807163401326, "loss": 11.6609, "step": 44519 }, { "epoch": 0.9319266515950766, "grad_norm": 0.32299527525901794, "learning_rate": 0.00015607890102684193, "loss": 11.6788, "step": 44520 }, { "epoch": 0.9319475843590388, "grad_norm": 0.3132176101207733, "learning_rate": 0.000156077085686593, "loss": 11.666, "step": 44521 }, { "epoch": 0.9319685171230009, "grad_norm": 0.2628220319747925, "learning_rate": 0.00015607527031938668, "loss": 11.6692, "step": 44522 }, { "epoch": 0.9319894498869631, "grad_norm": 0.3318937420845032, "learning_rate": 0.00015607345492522386, "loss": 11.6674, "step": 44523 }, { "epoch": 0.9320103826509252, "grad_norm": 0.24617011845111847, "learning_rate": 0.0001560716395041054, "loss": 11.6576, "step": 44524 }, { "epoch": 0.9320313154148874, "grad_norm": 0.3090130388736725, "learning_rate": 0.0001560698240560322, "loss": 11.6801, "step": 44525 }, { "epoch": 0.9320522481788496, "grad_norm": 0.29445570707321167, "learning_rate": 0.0001560680085810051, "loss": 11.6632, "step": 44526 }, { "epoch": 0.9320731809428117, "grad_norm": 0.32448849081993103, "learning_rate": 0.00015606619307902502, "loss": 11.6822, "step": 44527 }, { "epoch": 0.9320941137067739, "grad_norm": 0.26247695088386536, "learning_rate": 0.00015606437755009278, "loss": 11.679, "step": 44528 }, { "epoch": 0.932115046470736, "grad_norm": 0.3188287019729614, "learning_rate": 0.00015606256199420922, "loss": 11.6734, "step": 44529 }, { "epoch": 0.9321359792346982, "grad_norm": 0.3338563144207001, "learning_rate": 0.0001560607464113753, "loss": 11.6629, "step": 44530 }, { "epoch": 0.9321569119986604, "grad_norm": 0.2627127170562744, "learning_rate": 0.00015605893080159188, "loss": 11.6692, "step": 44531 }, { "epoch": 0.9321778447626224, "grad_norm": 0.2613513469696045, "learning_rate": 0.00015605711516485977, "loss": 11.675, "step": 44532 }, { "epoch": 0.9321987775265846, "grad_norm": 0.39259466528892517, "learning_rate": 0.00015605529950117993, "loss": 11.6559, "step": 44533 }, { "epoch": 0.9322197102905467, "grad_norm": 0.2694799304008484, "learning_rate": 0.0001560534838105531, "loss": 11.6681, "step": 44534 }, { "epoch": 0.9322406430545089, "grad_norm": 0.3447844684123993, "learning_rate": 0.0001560516680929803, "loss": 11.7053, "step": 44535 }, { "epoch": 0.932261575818471, "grad_norm": 0.3753300607204437, "learning_rate": 0.00015604985234846233, "loss": 11.668, "step": 44536 }, { "epoch": 0.9322825085824332, "grad_norm": 0.3025691509246826, "learning_rate": 0.00015604803657700008, "loss": 11.6791, "step": 44537 }, { "epoch": 0.9323034413463954, "grad_norm": 0.3262563645839691, "learning_rate": 0.0001560462207785944, "loss": 11.6803, "step": 44538 }, { "epoch": 0.9323243741103575, "grad_norm": 0.29766935110092163, "learning_rate": 0.00015604440495324616, "loss": 11.6631, "step": 44539 }, { "epoch": 0.9323453068743197, "grad_norm": 0.2753773629665375, "learning_rate": 0.0001560425891009563, "loss": 11.673, "step": 44540 }, { "epoch": 0.9323662396382818, "grad_norm": 0.3140243589878082, "learning_rate": 0.0001560407732217256, "loss": 11.6576, "step": 44541 }, { "epoch": 0.932387172402244, "grad_norm": 0.27186858654022217, "learning_rate": 0.00015603895731555503, "loss": 11.6633, "step": 44542 }, { "epoch": 0.9324081051662061, "grad_norm": 0.3071606457233429, "learning_rate": 0.00015603714138244537, "loss": 11.6544, "step": 44543 }, { "epoch": 0.9324290379301683, "grad_norm": 0.25858190655708313, "learning_rate": 0.00015603532542239753, "loss": 11.6638, "step": 44544 }, { "epoch": 0.9324499706941305, "grad_norm": 0.30392172932624817, "learning_rate": 0.0001560335094354124, "loss": 11.6596, "step": 44545 }, { "epoch": 0.9324709034580926, "grad_norm": 0.46835002303123474, "learning_rate": 0.00015603169342149082, "loss": 11.6661, "step": 44546 }, { "epoch": 0.9324918362220548, "grad_norm": 0.2918470799922943, "learning_rate": 0.00015602987738063372, "loss": 11.667, "step": 44547 }, { "epoch": 0.9325127689860169, "grad_norm": 0.37606480717658997, "learning_rate": 0.00015602806131284188, "loss": 11.6811, "step": 44548 }, { "epoch": 0.9325337017499791, "grad_norm": 0.28390032052993774, "learning_rate": 0.0001560262452181163, "loss": 11.6734, "step": 44549 }, { "epoch": 0.9325546345139413, "grad_norm": 0.2989097535610199, "learning_rate": 0.00015602442909645774, "loss": 11.6736, "step": 44550 }, { "epoch": 0.9325755672779034, "grad_norm": 0.32316967844963074, "learning_rate": 0.0001560226129478671, "loss": 11.6608, "step": 44551 }, { "epoch": 0.9325965000418656, "grad_norm": 0.2814289927482605, "learning_rate": 0.0001560207967723453, "loss": 11.6687, "step": 44552 }, { "epoch": 0.9326174328058277, "grad_norm": 0.2572364807128906, "learning_rate": 0.00015601898056989314, "loss": 11.666, "step": 44553 }, { "epoch": 0.9326383655697899, "grad_norm": 0.33381929993629456, "learning_rate": 0.00015601716434051157, "loss": 11.6665, "step": 44554 }, { "epoch": 0.9326592983337519, "grad_norm": 0.30248165130615234, "learning_rate": 0.0001560153480842014, "loss": 11.6643, "step": 44555 }, { "epoch": 0.9326802310977141, "grad_norm": 0.4020764231681824, "learning_rate": 0.00015601353180096355, "loss": 11.6612, "step": 44556 }, { "epoch": 0.9327011638616763, "grad_norm": 0.3543650805950165, "learning_rate": 0.00015601171549079886, "loss": 11.6631, "step": 44557 }, { "epoch": 0.9327220966256384, "grad_norm": 0.3500102758407593, "learning_rate": 0.00015600989915370823, "loss": 11.6596, "step": 44558 }, { "epoch": 0.9327430293896006, "grad_norm": 0.3333989679813385, "learning_rate": 0.0001560080827896925, "loss": 11.662, "step": 44559 }, { "epoch": 0.9327639621535627, "grad_norm": 0.28755393624305725, "learning_rate": 0.00015600626639875257, "loss": 11.6706, "step": 44560 }, { "epoch": 0.9327848949175249, "grad_norm": 0.28896209597587585, "learning_rate": 0.0001560044499808893, "loss": 11.6746, "step": 44561 }, { "epoch": 0.932805827681487, "grad_norm": 0.2665419280529022, "learning_rate": 0.0001560026335361036, "loss": 11.6541, "step": 44562 }, { "epoch": 0.9328267604454492, "grad_norm": 0.31209978461265564, "learning_rate": 0.00015600081706439628, "loss": 11.683, "step": 44563 }, { "epoch": 0.9328476932094114, "grad_norm": 0.2663859724998474, "learning_rate": 0.00015599900056576825, "loss": 11.6817, "step": 44564 }, { "epoch": 0.9328686259733735, "grad_norm": 0.3087840676307678, "learning_rate": 0.00015599718404022038, "loss": 11.6764, "step": 44565 }, { "epoch": 0.9328895587373357, "grad_norm": 0.3113802373409271, "learning_rate": 0.00015599536748775355, "loss": 11.6794, "step": 44566 }, { "epoch": 0.9329104915012978, "grad_norm": 0.29961344599723816, "learning_rate": 0.00015599355090836862, "loss": 11.6655, "step": 44567 }, { "epoch": 0.93293142426526, "grad_norm": 0.2705190181732178, "learning_rate": 0.00015599173430206644, "loss": 11.681, "step": 44568 }, { "epoch": 0.9329523570292222, "grad_norm": 0.3254932165145874, "learning_rate": 0.00015598991766884796, "loss": 11.67, "step": 44569 }, { "epoch": 0.9329732897931843, "grad_norm": 0.2998065948486328, "learning_rate": 0.00015598810100871398, "loss": 11.6874, "step": 44570 }, { "epoch": 0.9329942225571465, "grad_norm": 0.2439134567975998, "learning_rate": 0.0001559862843216654, "loss": 11.6703, "step": 44571 }, { "epoch": 0.9330151553211086, "grad_norm": 0.2649686336517334, "learning_rate": 0.0001559844676077031, "loss": 11.6486, "step": 44572 }, { "epoch": 0.9330360880850708, "grad_norm": 0.39721670746803284, "learning_rate": 0.0001559826508668279, "loss": 11.6792, "step": 44573 }, { "epoch": 0.9330570208490329, "grad_norm": 0.3288131058216095, "learning_rate": 0.0001559808340990408, "loss": 11.6554, "step": 44574 }, { "epoch": 0.9330779536129951, "grad_norm": 0.46937838196754456, "learning_rate": 0.0001559790173043425, "loss": 11.6649, "step": 44575 }, { "epoch": 0.9330988863769573, "grad_norm": 0.41242632269859314, "learning_rate": 0.00015597720048273403, "loss": 11.668, "step": 44576 }, { "epoch": 0.9331198191409193, "grad_norm": 0.3078334331512451, "learning_rate": 0.00015597538363421615, "loss": 11.676, "step": 44577 }, { "epoch": 0.9331407519048815, "grad_norm": 0.34719935059547424, "learning_rate": 0.00015597356675878983, "loss": 11.6699, "step": 44578 }, { "epoch": 0.9331616846688436, "grad_norm": 0.2642001509666443, "learning_rate": 0.00015597174985645587, "loss": 11.665, "step": 44579 }, { "epoch": 0.9331826174328058, "grad_norm": 0.2750195264816284, "learning_rate": 0.00015596993292721516, "loss": 11.6526, "step": 44580 }, { "epoch": 0.9332035501967679, "grad_norm": 0.2665823996067047, "learning_rate": 0.00015596811597106859, "loss": 11.6865, "step": 44581 }, { "epoch": 0.9332244829607301, "grad_norm": 0.28464066982269287, "learning_rate": 0.00015596629898801702, "loss": 11.6753, "step": 44582 }, { "epoch": 0.9332454157246923, "grad_norm": 0.3347328007221222, "learning_rate": 0.00015596448197806133, "loss": 11.6823, "step": 44583 }, { "epoch": 0.9332663484886544, "grad_norm": 0.28544509410858154, "learning_rate": 0.0001559626649412024, "loss": 11.6622, "step": 44584 }, { "epoch": 0.9332872812526166, "grad_norm": 0.3103989362716675, "learning_rate": 0.00015596084787744113, "loss": 11.6626, "step": 44585 }, { "epoch": 0.9333082140165787, "grad_norm": 0.31344345211982727, "learning_rate": 0.0001559590307867783, "loss": 11.666, "step": 44586 }, { "epoch": 0.9333291467805409, "grad_norm": 0.37316226959228516, "learning_rate": 0.00015595721366921486, "loss": 11.6685, "step": 44587 }, { "epoch": 0.9333500795445031, "grad_norm": 0.3142651319503784, "learning_rate": 0.0001559553965247517, "loss": 11.6693, "step": 44588 }, { "epoch": 0.9333710123084652, "grad_norm": 0.37337779998779297, "learning_rate": 0.0001559535793533896, "loss": 11.6858, "step": 44589 }, { "epoch": 0.9333919450724274, "grad_norm": 0.30443713068962097, "learning_rate": 0.00015595176215512952, "loss": 11.6776, "step": 44590 }, { "epoch": 0.9334128778363895, "grad_norm": 0.3063587546348572, "learning_rate": 0.00015594994492997233, "loss": 11.666, "step": 44591 }, { "epoch": 0.9334338106003517, "grad_norm": 0.4007907211780548, "learning_rate": 0.00015594812767791885, "loss": 11.6655, "step": 44592 }, { "epoch": 0.9334547433643138, "grad_norm": 0.31738635897636414, "learning_rate": 0.00015594631039897, "loss": 11.6737, "step": 44593 }, { "epoch": 0.933475676128276, "grad_norm": 0.292417049407959, "learning_rate": 0.00015594449309312662, "loss": 11.649, "step": 44594 }, { "epoch": 0.9334966088922382, "grad_norm": 0.2943512201309204, "learning_rate": 0.0001559426757603896, "loss": 11.6738, "step": 44595 }, { "epoch": 0.9335175416562003, "grad_norm": 0.3054587244987488, "learning_rate": 0.00015594085840075984, "loss": 11.6583, "step": 44596 }, { "epoch": 0.9335384744201625, "grad_norm": 0.37747615575790405, "learning_rate": 0.00015593904101423816, "loss": 11.6854, "step": 44597 }, { "epoch": 0.9335594071841246, "grad_norm": 0.36910781264305115, "learning_rate": 0.0001559372236008255, "loss": 11.6658, "step": 44598 }, { "epoch": 0.9335803399480868, "grad_norm": 0.3109627962112427, "learning_rate": 0.00015593540616052267, "loss": 11.6705, "step": 44599 }, { "epoch": 0.9336012727120488, "grad_norm": 0.2834753096103668, "learning_rate": 0.00015593358869333055, "loss": 11.6674, "step": 44600 }, { "epoch": 0.933622205476011, "grad_norm": 0.2750377357006073, "learning_rate": 0.00015593177119925007, "loss": 11.6534, "step": 44601 }, { "epoch": 0.9336431382399732, "grad_norm": 0.24468044936656952, "learning_rate": 0.00015592995367828206, "loss": 11.6548, "step": 44602 }, { "epoch": 0.9336640710039353, "grad_norm": 0.2540428340435028, "learning_rate": 0.0001559281361304274, "loss": 11.6664, "step": 44603 }, { "epoch": 0.9336850037678975, "grad_norm": 0.2928154468536377, "learning_rate": 0.00015592631855568695, "loss": 11.6695, "step": 44604 }, { "epoch": 0.9337059365318596, "grad_norm": 0.34668588638305664, "learning_rate": 0.0001559245009540616, "loss": 11.6711, "step": 44605 }, { "epoch": 0.9337268692958218, "grad_norm": 0.30564481019973755, "learning_rate": 0.00015592268332555222, "loss": 11.6517, "step": 44606 }, { "epoch": 0.933747802059784, "grad_norm": 0.3240101933479309, "learning_rate": 0.0001559208656701597, "loss": 11.6695, "step": 44607 }, { "epoch": 0.9337687348237461, "grad_norm": 0.2969193458557129, "learning_rate": 0.00015591904798788487, "loss": 11.6588, "step": 44608 }, { "epoch": 0.9337896675877083, "grad_norm": 0.3280600309371948, "learning_rate": 0.00015591723027872867, "loss": 11.6761, "step": 44609 }, { "epoch": 0.9338106003516704, "grad_norm": 0.29107916355133057, "learning_rate": 0.00015591541254269192, "loss": 11.6789, "step": 44610 }, { "epoch": 0.9338315331156326, "grad_norm": 0.36016759276390076, "learning_rate": 0.0001559135947797755, "loss": 11.6761, "step": 44611 }, { "epoch": 0.9338524658795947, "grad_norm": 0.3353305160999298, "learning_rate": 0.0001559117769899803, "loss": 11.6517, "step": 44612 }, { "epoch": 0.9338733986435569, "grad_norm": 0.3215510845184326, "learning_rate": 0.00015590995917330721, "loss": 11.6773, "step": 44613 }, { "epoch": 0.9338943314075191, "grad_norm": 0.33375808596611023, "learning_rate": 0.00015590814132975707, "loss": 11.6744, "step": 44614 }, { "epoch": 0.9339152641714812, "grad_norm": 0.2817111909389496, "learning_rate": 0.00015590632345933075, "loss": 11.6653, "step": 44615 }, { "epoch": 0.9339361969354434, "grad_norm": 0.5706562995910645, "learning_rate": 0.00015590450556202915, "loss": 11.6049, "step": 44616 }, { "epoch": 0.9339571296994055, "grad_norm": 0.31563764810562134, "learning_rate": 0.00015590268763785315, "loss": 11.6661, "step": 44617 }, { "epoch": 0.9339780624633677, "grad_norm": 0.2741084396839142, "learning_rate": 0.00015590086968680358, "loss": 11.6448, "step": 44618 }, { "epoch": 0.9339989952273298, "grad_norm": 0.3215956687927246, "learning_rate": 0.00015589905170888135, "loss": 11.6665, "step": 44619 }, { "epoch": 0.934019927991292, "grad_norm": 0.2959059774875641, "learning_rate": 0.00015589723370408733, "loss": 11.6474, "step": 44620 }, { "epoch": 0.9340408607552542, "grad_norm": 0.36802974343299866, "learning_rate": 0.0001558954156724224, "loss": 11.6565, "step": 44621 }, { "epoch": 0.9340617935192163, "grad_norm": 0.31922876834869385, "learning_rate": 0.00015589359761388743, "loss": 11.6668, "step": 44622 }, { "epoch": 0.9340827262831785, "grad_norm": 0.2970297932624817, "learning_rate": 0.00015589177952848323, "loss": 11.6576, "step": 44623 }, { "epoch": 0.9341036590471405, "grad_norm": 0.32084017992019653, "learning_rate": 0.0001558899614162108, "loss": 11.6705, "step": 44624 }, { "epoch": 0.9341245918111027, "grad_norm": 0.24911850690841675, "learning_rate": 0.00015588814327707092, "loss": 11.6704, "step": 44625 }, { "epoch": 0.934145524575065, "grad_norm": 0.24714791774749756, "learning_rate": 0.0001558863251110645, "loss": 11.6538, "step": 44626 }, { "epoch": 0.934166457339027, "grad_norm": 0.2740059792995453, "learning_rate": 0.00015588450691819236, "loss": 11.6783, "step": 44627 }, { "epoch": 0.9341873901029892, "grad_norm": 0.30619925260543823, "learning_rate": 0.00015588268869845545, "loss": 11.6774, "step": 44628 }, { "epoch": 0.9342083228669513, "grad_norm": 0.3003280460834503, "learning_rate": 0.00015588087045185462, "loss": 11.6615, "step": 44629 }, { "epoch": 0.9342292556309135, "grad_norm": 0.3228391706943512, "learning_rate": 0.00015587905217839075, "loss": 11.669, "step": 44630 }, { "epoch": 0.9342501883948756, "grad_norm": 0.30305734276771545, "learning_rate": 0.00015587723387806466, "loss": 11.6542, "step": 44631 }, { "epoch": 0.9342711211588378, "grad_norm": 0.30799296498298645, "learning_rate": 0.00015587541555087724, "loss": 11.6803, "step": 44632 }, { "epoch": 0.9342920539228, "grad_norm": 0.3326459527015686, "learning_rate": 0.00015587359719682945, "loss": 11.6551, "step": 44633 }, { "epoch": 0.9343129866867621, "grad_norm": 0.34705233573913574, "learning_rate": 0.00015587177881592207, "loss": 11.686, "step": 44634 }, { "epoch": 0.9343339194507243, "grad_norm": 0.269133597612381, "learning_rate": 0.00015586996040815602, "loss": 11.6728, "step": 44635 }, { "epoch": 0.9343548522146864, "grad_norm": 0.33027204871177673, "learning_rate": 0.00015586814197353217, "loss": 11.6988, "step": 44636 }, { "epoch": 0.9343757849786486, "grad_norm": 0.2813620865345001, "learning_rate": 0.0001558663235120514, "loss": 11.6796, "step": 44637 }, { "epoch": 0.9343967177426107, "grad_norm": 0.24183563888072968, "learning_rate": 0.00015586450502371452, "loss": 11.677, "step": 44638 }, { "epoch": 0.9344176505065729, "grad_norm": 0.28915441036224365, "learning_rate": 0.00015586268650852247, "loss": 11.6547, "step": 44639 }, { "epoch": 0.9344385832705351, "grad_norm": 0.360195130109787, "learning_rate": 0.0001558608679664761, "loss": 11.6755, "step": 44640 }, { "epoch": 0.9344595160344972, "grad_norm": 0.3369184732437134, "learning_rate": 0.0001558590493975763, "loss": 11.6794, "step": 44641 }, { "epoch": 0.9344804487984594, "grad_norm": 0.32285407185554504, "learning_rate": 0.00015585723080182394, "loss": 11.6727, "step": 44642 }, { "epoch": 0.9345013815624215, "grad_norm": 0.4116548001766205, "learning_rate": 0.00015585541217921992, "loss": 11.6909, "step": 44643 }, { "epoch": 0.9345223143263837, "grad_norm": 0.30895665287971497, "learning_rate": 0.000155853593529765, "loss": 11.6572, "step": 44644 }, { "epoch": 0.9345432470903459, "grad_norm": 0.2922095060348511, "learning_rate": 0.0001558517748534602, "loss": 11.6679, "step": 44645 }, { "epoch": 0.934564179854308, "grad_norm": 0.30195340514183044, "learning_rate": 0.00015584995615030634, "loss": 11.6771, "step": 44646 }, { "epoch": 0.9345851126182702, "grad_norm": 0.33878645300865173, "learning_rate": 0.00015584813742030426, "loss": 11.6697, "step": 44647 }, { "epoch": 0.9346060453822322, "grad_norm": 0.3405431807041168, "learning_rate": 0.00015584631866345487, "loss": 11.6545, "step": 44648 }, { "epoch": 0.9346269781461944, "grad_norm": 0.29235386848449707, "learning_rate": 0.000155844499879759, "loss": 11.6399, "step": 44649 }, { "epoch": 0.9346479109101565, "grad_norm": 0.30882248282432556, "learning_rate": 0.00015584268106921765, "loss": 11.6593, "step": 44650 }, { "epoch": 0.9346688436741187, "grad_norm": 0.3094817101955414, "learning_rate": 0.00015584086223183153, "loss": 11.6596, "step": 44651 }, { "epoch": 0.9346897764380809, "grad_norm": 0.29218992590904236, "learning_rate": 0.00015583904336760162, "loss": 11.6727, "step": 44652 }, { "epoch": 0.934710709202043, "grad_norm": 0.26843154430389404, "learning_rate": 0.00015583722447652873, "loss": 11.6735, "step": 44653 }, { "epoch": 0.9347316419660052, "grad_norm": 0.3325548768043518, "learning_rate": 0.00015583540555861377, "loss": 11.6878, "step": 44654 }, { "epoch": 0.9347525747299673, "grad_norm": 0.31095606088638306, "learning_rate": 0.00015583358661385764, "loss": 11.6756, "step": 44655 }, { "epoch": 0.9347735074939295, "grad_norm": 0.3071368932723999, "learning_rate": 0.0001558317676422612, "loss": 11.6493, "step": 44656 }, { "epoch": 0.9347944402578916, "grad_norm": 0.32000818848609924, "learning_rate": 0.00015582994864382527, "loss": 11.6577, "step": 44657 }, { "epoch": 0.9348153730218538, "grad_norm": 0.406162291765213, "learning_rate": 0.00015582812961855078, "loss": 11.6683, "step": 44658 }, { "epoch": 0.934836305785816, "grad_norm": 0.36374929547309875, "learning_rate": 0.00015582631056643858, "loss": 11.6769, "step": 44659 }, { "epoch": 0.9348572385497781, "grad_norm": 0.38872164487838745, "learning_rate": 0.00015582449148748956, "loss": 11.6744, "step": 44660 }, { "epoch": 0.9348781713137403, "grad_norm": 0.5034059882164001, "learning_rate": 0.00015582267238170458, "loss": 11.6698, "step": 44661 }, { "epoch": 0.9348991040777024, "grad_norm": 0.2770078182220459, "learning_rate": 0.00015582085324908457, "loss": 11.6678, "step": 44662 }, { "epoch": 0.9349200368416646, "grad_norm": 0.3554079532623291, "learning_rate": 0.0001558190340896303, "loss": 11.6464, "step": 44663 }, { "epoch": 0.9349409696056268, "grad_norm": 0.45049571990966797, "learning_rate": 0.00015581721490334272, "loss": 11.6636, "step": 44664 }, { "epoch": 0.9349619023695889, "grad_norm": 0.34170442819595337, "learning_rate": 0.0001558153956902227, "loss": 11.6422, "step": 44665 }, { "epoch": 0.9349828351335511, "grad_norm": 0.3435032367706299, "learning_rate": 0.00015581357645027107, "loss": 11.6771, "step": 44666 }, { "epoch": 0.9350037678975132, "grad_norm": 0.3313094973564148, "learning_rate": 0.00015581175718348876, "loss": 11.6741, "step": 44667 }, { "epoch": 0.9350247006614754, "grad_norm": 0.3082811236381531, "learning_rate": 0.0001558099378898766, "loss": 11.6762, "step": 44668 }, { "epoch": 0.9350456334254375, "grad_norm": 0.33028388023376465, "learning_rate": 0.00015580811856943553, "loss": 11.6739, "step": 44669 }, { "epoch": 0.9350665661893997, "grad_norm": 0.33988845348358154, "learning_rate": 0.00015580629922216634, "loss": 11.6683, "step": 44670 }, { "epoch": 0.9350874989533619, "grad_norm": 0.29524675011634827, "learning_rate": 0.00015580447984806997, "loss": 11.6687, "step": 44671 }, { "epoch": 0.9351084317173239, "grad_norm": 0.3156417906284332, "learning_rate": 0.00015580266044714724, "loss": 11.6844, "step": 44672 }, { "epoch": 0.9351293644812861, "grad_norm": 0.43238845467567444, "learning_rate": 0.00015580084101939905, "loss": 11.6678, "step": 44673 }, { "epoch": 0.9351502972452482, "grad_norm": 0.28419700264930725, "learning_rate": 0.00015579902156482632, "loss": 11.671, "step": 44674 }, { "epoch": 0.9351712300092104, "grad_norm": 0.33662131428718567, "learning_rate": 0.00015579720208342982, "loss": 11.6708, "step": 44675 }, { "epoch": 0.9351921627731725, "grad_norm": 0.4004618227481842, "learning_rate": 0.00015579538257521052, "loss": 11.6688, "step": 44676 }, { "epoch": 0.9352130955371347, "grad_norm": 0.34860682487487793, "learning_rate": 0.00015579356304016927, "loss": 11.6483, "step": 44677 }, { "epoch": 0.9352340283010969, "grad_norm": 0.3089517652988434, "learning_rate": 0.00015579174347830693, "loss": 11.6729, "step": 44678 }, { "epoch": 0.935254961065059, "grad_norm": 0.32973864674568176, "learning_rate": 0.00015578992388962437, "loss": 11.6782, "step": 44679 }, { "epoch": 0.9352758938290212, "grad_norm": 0.40159669518470764, "learning_rate": 0.0001557881042741225, "loss": 11.6655, "step": 44680 }, { "epoch": 0.9352968265929833, "grad_norm": 0.3649134337902069, "learning_rate": 0.00015578628463180217, "loss": 11.6803, "step": 44681 }, { "epoch": 0.9353177593569455, "grad_norm": 0.40117982029914856, "learning_rate": 0.00015578446496266422, "loss": 11.6972, "step": 44682 }, { "epoch": 0.9353386921209077, "grad_norm": 0.3557417392730713, "learning_rate": 0.00015578264526670956, "loss": 11.6885, "step": 44683 }, { "epoch": 0.9353596248848698, "grad_norm": 0.3243153393268585, "learning_rate": 0.0001557808255439391, "loss": 11.6888, "step": 44684 }, { "epoch": 0.935380557648832, "grad_norm": 0.3514012396335602, "learning_rate": 0.00015577900579435364, "loss": 11.6525, "step": 44685 }, { "epoch": 0.9354014904127941, "grad_norm": 0.2971702218055725, "learning_rate": 0.00015577718601795412, "loss": 11.6798, "step": 44686 }, { "epoch": 0.9354224231767563, "grad_norm": 0.3443690836429596, "learning_rate": 0.00015577536621474137, "loss": 11.6632, "step": 44687 }, { "epoch": 0.9354433559407184, "grad_norm": 0.2770416736602783, "learning_rate": 0.0001557735463847163, "loss": 11.6738, "step": 44688 }, { "epoch": 0.9354642887046806, "grad_norm": 0.3532631993293762, "learning_rate": 0.0001557717265278798, "loss": 11.6684, "step": 44689 }, { "epoch": 0.9354852214686428, "grad_norm": 0.34655439853668213, "learning_rate": 0.00015576990664423266, "loss": 11.6711, "step": 44690 }, { "epoch": 0.9355061542326049, "grad_norm": 0.2986670434474945, "learning_rate": 0.00015576808673377585, "loss": 11.6672, "step": 44691 }, { "epoch": 0.9355270869965671, "grad_norm": 0.30381760001182556, "learning_rate": 0.00015576626679651018, "loss": 11.656, "step": 44692 }, { "epoch": 0.9355480197605291, "grad_norm": 0.3137884736061096, "learning_rate": 0.00015576444683243655, "loss": 11.6806, "step": 44693 }, { "epoch": 0.9355689525244913, "grad_norm": 0.3151209056377411, "learning_rate": 0.00015576262684155584, "loss": 11.6736, "step": 44694 }, { "epoch": 0.9355898852884534, "grad_norm": 0.28620585799217224, "learning_rate": 0.00015576080682386888, "loss": 11.6602, "step": 44695 }, { "epoch": 0.9356108180524156, "grad_norm": 0.3142413794994354, "learning_rate": 0.00015575898677937664, "loss": 11.6579, "step": 44696 }, { "epoch": 0.9356317508163778, "grad_norm": 0.3371553122997284, "learning_rate": 0.0001557571667080799, "loss": 11.6105, "step": 44697 }, { "epoch": 0.9356526835803399, "grad_norm": 0.3040817677974701, "learning_rate": 0.00015575534660997956, "loss": 11.6851, "step": 44698 }, { "epoch": 0.9356736163443021, "grad_norm": 0.24975843727588654, "learning_rate": 0.00015575352648507655, "loss": 11.6605, "step": 44699 }, { "epoch": 0.9356945491082642, "grad_norm": 0.2731866240501404, "learning_rate": 0.00015575170633337168, "loss": 11.6773, "step": 44700 }, { "epoch": 0.9357154818722264, "grad_norm": 0.268862783908844, "learning_rate": 0.00015574988615486585, "loss": 11.6715, "step": 44701 }, { "epoch": 0.9357364146361885, "grad_norm": 0.3844965398311615, "learning_rate": 0.00015574806594955993, "loss": 11.6566, "step": 44702 }, { "epoch": 0.9357573474001507, "grad_norm": 0.3063200116157532, "learning_rate": 0.00015574624571745483, "loss": 11.6615, "step": 44703 }, { "epoch": 0.9357782801641129, "grad_norm": 0.3315352499485016, "learning_rate": 0.00015574442545855134, "loss": 11.6644, "step": 44704 }, { "epoch": 0.935799212928075, "grad_norm": 0.31852635741233826, "learning_rate": 0.0001557426051728504, "loss": 11.6632, "step": 44705 }, { "epoch": 0.9358201456920372, "grad_norm": 0.3179791271686554, "learning_rate": 0.0001557407848603529, "loss": 11.6564, "step": 44706 }, { "epoch": 0.9358410784559993, "grad_norm": 0.3114337623119354, "learning_rate": 0.00015573896452105964, "loss": 11.6746, "step": 44707 }, { "epoch": 0.9358620112199615, "grad_norm": 0.28758615255355835, "learning_rate": 0.00015573714415497157, "loss": 11.6787, "step": 44708 }, { "epoch": 0.9358829439839237, "grad_norm": 0.2978857159614563, "learning_rate": 0.00015573532376208954, "loss": 11.6632, "step": 44709 }, { "epoch": 0.9359038767478858, "grad_norm": 0.2724080979824066, "learning_rate": 0.00015573350334241443, "loss": 11.6588, "step": 44710 }, { "epoch": 0.935924809511848, "grad_norm": 0.3262111246585846, "learning_rate": 0.0001557316828959471, "loss": 11.6516, "step": 44711 }, { "epoch": 0.9359457422758101, "grad_norm": 0.3429722785949707, "learning_rate": 0.00015572986242268845, "loss": 11.6649, "step": 44712 }, { "epoch": 0.9359666750397723, "grad_norm": 0.2941863536834717, "learning_rate": 0.00015572804192263932, "loss": 11.6608, "step": 44713 }, { "epoch": 0.9359876078037344, "grad_norm": 0.3244975209236145, "learning_rate": 0.0001557262213958006, "loss": 11.6972, "step": 44714 }, { "epoch": 0.9360085405676966, "grad_norm": 0.33737364411354065, "learning_rate": 0.00015572440084217316, "loss": 11.6816, "step": 44715 }, { "epoch": 0.9360294733316588, "grad_norm": 0.3162612318992615, "learning_rate": 0.00015572258026175795, "loss": 11.6774, "step": 44716 }, { "epoch": 0.9360504060956208, "grad_norm": 0.4191398322582245, "learning_rate": 0.0001557207596545557, "loss": 11.6978, "step": 44717 }, { "epoch": 0.936071338859583, "grad_norm": 0.28533443808555603, "learning_rate": 0.00015571893902056741, "loss": 11.6722, "step": 44718 }, { "epoch": 0.9360922716235451, "grad_norm": 0.376665860414505, "learning_rate": 0.00015571711835979388, "loss": 11.6679, "step": 44719 }, { "epoch": 0.9361132043875073, "grad_norm": 0.31626003980636597, "learning_rate": 0.00015571529767223602, "loss": 11.6681, "step": 44720 }, { "epoch": 0.9361341371514694, "grad_norm": 0.3397190272808075, "learning_rate": 0.00015571347695789473, "loss": 11.6594, "step": 44721 }, { "epoch": 0.9361550699154316, "grad_norm": 0.26597362756729126, "learning_rate": 0.0001557116562167708, "loss": 11.672, "step": 44722 }, { "epoch": 0.9361760026793938, "grad_norm": 0.34252214431762695, "learning_rate": 0.00015570983544886522, "loss": 11.656, "step": 44723 }, { "epoch": 0.9361969354433559, "grad_norm": 0.3116808533668518, "learning_rate": 0.00015570801465417876, "loss": 11.689, "step": 44724 }, { "epoch": 0.9362178682073181, "grad_norm": 0.24090264737606049, "learning_rate": 0.00015570619383271236, "loss": 11.6583, "step": 44725 }, { "epoch": 0.9362388009712802, "grad_norm": 0.3134766221046448, "learning_rate": 0.0001557043729844669, "loss": 11.6579, "step": 44726 }, { "epoch": 0.9362597337352424, "grad_norm": 0.3384222090244293, "learning_rate": 0.00015570255210944317, "loss": 11.6654, "step": 44727 }, { "epoch": 0.9362806664992046, "grad_norm": 0.35824647545814514, "learning_rate": 0.00015570073120764217, "loss": 11.672, "step": 44728 }, { "epoch": 0.9363015992631667, "grad_norm": 0.4268687069416046, "learning_rate": 0.0001556989102790647, "loss": 11.6799, "step": 44729 }, { "epoch": 0.9363225320271289, "grad_norm": 0.3475707173347473, "learning_rate": 0.00015569708932371162, "loss": 11.6906, "step": 44730 }, { "epoch": 0.936343464791091, "grad_norm": 0.4095080494880676, "learning_rate": 0.00015569526834158386, "loss": 11.6777, "step": 44731 }, { "epoch": 0.9363643975550532, "grad_norm": 0.45405957102775574, "learning_rate": 0.00015569344733268222, "loss": 11.6685, "step": 44732 }, { "epoch": 0.9363853303190153, "grad_norm": 0.33250781893730164, "learning_rate": 0.00015569162629700767, "loss": 11.6713, "step": 44733 }, { "epoch": 0.9364062630829775, "grad_norm": 0.33844655752182007, "learning_rate": 0.00015568980523456103, "loss": 11.6607, "step": 44734 }, { "epoch": 0.9364271958469397, "grad_norm": 0.2973429262638092, "learning_rate": 0.00015568798414534318, "loss": 11.6623, "step": 44735 }, { "epoch": 0.9364481286109018, "grad_norm": 0.3129986822605133, "learning_rate": 0.000155686163029355, "loss": 11.6603, "step": 44736 }, { "epoch": 0.936469061374864, "grad_norm": 0.417011559009552, "learning_rate": 0.00015568434188659738, "loss": 11.668, "step": 44737 }, { "epoch": 0.936489994138826, "grad_norm": 0.30041998624801636, "learning_rate": 0.00015568252071707116, "loss": 11.6844, "step": 44738 }, { "epoch": 0.9365109269027883, "grad_norm": 0.324293851852417, "learning_rate": 0.00015568069952077727, "loss": 11.6808, "step": 44739 }, { "epoch": 0.9365318596667503, "grad_norm": 0.4607362151145935, "learning_rate": 0.0001556788782977165, "loss": 11.6739, "step": 44740 }, { "epoch": 0.9365527924307125, "grad_norm": 0.317196249961853, "learning_rate": 0.00015567705704788983, "loss": 11.6622, "step": 44741 }, { "epoch": 0.9365737251946747, "grad_norm": 0.2890436351299286, "learning_rate": 0.00015567523577129802, "loss": 11.6725, "step": 44742 }, { "epoch": 0.9365946579586368, "grad_norm": 0.31819847226142883, "learning_rate": 0.00015567341446794206, "loss": 11.6733, "step": 44743 }, { "epoch": 0.936615590722599, "grad_norm": 0.3495379686355591, "learning_rate": 0.00015567159313782276, "loss": 11.6796, "step": 44744 }, { "epoch": 0.9366365234865611, "grad_norm": 0.27823781967163086, "learning_rate": 0.000155669771780941, "loss": 11.6574, "step": 44745 }, { "epoch": 0.9366574562505233, "grad_norm": 0.33513206243515015, "learning_rate": 0.00015566795039729768, "loss": 11.6698, "step": 44746 }, { "epoch": 0.9366783890144855, "grad_norm": 0.2960030436515808, "learning_rate": 0.00015566612898689365, "loss": 11.6567, "step": 44747 }, { "epoch": 0.9366993217784476, "grad_norm": 0.39086249470710754, "learning_rate": 0.00015566430754972982, "loss": 11.6641, "step": 44748 }, { "epoch": 0.9367202545424098, "grad_norm": 0.30426180362701416, "learning_rate": 0.000155662486085807, "loss": 11.659, "step": 44749 }, { "epoch": 0.9367411873063719, "grad_norm": 0.3323206603527069, "learning_rate": 0.00015566066459512615, "loss": 11.6657, "step": 44750 }, { "epoch": 0.9367621200703341, "grad_norm": 0.421911358833313, "learning_rate": 0.00015565884307768807, "loss": 11.6587, "step": 44751 }, { "epoch": 0.9367830528342962, "grad_norm": 0.3059611916542053, "learning_rate": 0.00015565702153349367, "loss": 11.674, "step": 44752 }, { "epoch": 0.9368039855982584, "grad_norm": 0.2856152653694153, "learning_rate": 0.00015565519996254388, "loss": 11.6819, "step": 44753 }, { "epoch": 0.9368249183622206, "grad_norm": 0.3293454647064209, "learning_rate": 0.00015565337836483947, "loss": 11.659, "step": 44754 }, { "epoch": 0.9368458511261827, "grad_norm": 0.28513550758361816, "learning_rate": 0.00015565155674038134, "loss": 11.6634, "step": 44755 }, { "epoch": 0.9368667838901449, "grad_norm": 0.36288660764694214, "learning_rate": 0.00015564973508917044, "loss": 11.668, "step": 44756 }, { "epoch": 0.936887716654107, "grad_norm": 0.2914571762084961, "learning_rate": 0.00015564791341120756, "loss": 11.6779, "step": 44757 }, { "epoch": 0.9369086494180692, "grad_norm": 0.30814382433891296, "learning_rate": 0.00015564609170649364, "loss": 11.661, "step": 44758 }, { "epoch": 0.9369295821820313, "grad_norm": 0.3150939643383026, "learning_rate": 0.00015564426997502953, "loss": 11.6874, "step": 44759 }, { "epoch": 0.9369505149459935, "grad_norm": 0.3724097013473511, "learning_rate": 0.00015564244821681612, "loss": 11.6627, "step": 44760 }, { "epoch": 0.9369714477099557, "grad_norm": 0.27864769101142883, "learning_rate": 0.00015564062643185422, "loss": 11.6524, "step": 44761 }, { "epoch": 0.9369923804739178, "grad_norm": 0.3103804588317871, "learning_rate": 0.0001556388046201448, "loss": 11.676, "step": 44762 }, { "epoch": 0.93701331323788, "grad_norm": 0.36130279302597046, "learning_rate": 0.00015563698278168867, "loss": 11.6543, "step": 44763 }, { "epoch": 0.937034246001842, "grad_norm": 0.3704151511192322, "learning_rate": 0.00015563516091648673, "loss": 11.6661, "step": 44764 }, { "epoch": 0.9370551787658042, "grad_norm": 0.2764827311038971, "learning_rate": 0.00015563333902453988, "loss": 11.6765, "step": 44765 }, { "epoch": 0.9370761115297664, "grad_norm": 0.250681608915329, "learning_rate": 0.0001556315171058489, "loss": 11.6703, "step": 44766 }, { "epoch": 0.9370970442937285, "grad_norm": 0.28733453154563904, "learning_rate": 0.0001556296951604148, "loss": 11.6507, "step": 44767 }, { "epoch": 0.9371179770576907, "grad_norm": 0.29761946201324463, "learning_rate": 0.00015562787318823836, "loss": 11.6675, "step": 44768 }, { "epoch": 0.9371389098216528, "grad_norm": 0.27541080117225647, "learning_rate": 0.00015562605118932049, "loss": 11.6698, "step": 44769 }, { "epoch": 0.937159842585615, "grad_norm": 0.28083688020706177, "learning_rate": 0.00015562422916366207, "loss": 11.6796, "step": 44770 }, { "epoch": 0.9371807753495771, "grad_norm": 0.303710401058197, "learning_rate": 0.00015562240711126398, "loss": 11.6586, "step": 44771 }, { "epoch": 0.9372017081135393, "grad_norm": 0.3498048782348633, "learning_rate": 0.00015562058503212708, "loss": 11.6812, "step": 44772 }, { "epoch": 0.9372226408775015, "grad_norm": 0.2675638496875763, "learning_rate": 0.0001556187629262522, "loss": 11.6466, "step": 44773 }, { "epoch": 0.9372435736414636, "grad_norm": 0.3065217137336731, "learning_rate": 0.00015561694079364036, "loss": 11.6654, "step": 44774 }, { "epoch": 0.9372645064054258, "grad_norm": 0.2825160026550293, "learning_rate": 0.00015561511863429228, "loss": 11.6625, "step": 44775 }, { "epoch": 0.9372854391693879, "grad_norm": 0.23135365545749664, "learning_rate": 0.00015561329644820887, "loss": 11.6602, "step": 44776 }, { "epoch": 0.9373063719333501, "grad_norm": 0.38078588247299194, "learning_rate": 0.00015561147423539108, "loss": 11.6774, "step": 44777 }, { "epoch": 0.9373273046973122, "grad_norm": 0.27988019585609436, "learning_rate": 0.00015560965199583974, "loss": 11.674, "step": 44778 }, { "epoch": 0.9373482374612744, "grad_norm": 0.29404252767562866, "learning_rate": 0.00015560782972955572, "loss": 11.6551, "step": 44779 }, { "epoch": 0.9373691702252366, "grad_norm": 0.42342665791511536, "learning_rate": 0.00015560600743653988, "loss": 11.6711, "step": 44780 }, { "epoch": 0.9373901029891987, "grad_norm": 0.32960909605026245, "learning_rate": 0.00015560418511679312, "loss": 11.6565, "step": 44781 }, { "epoch": 0.9374110357531609, "grad_norm": 0.2923834025859833, "learning_rate": 0.00015560236277031636, "loss": 11.684, "step": 44782 }, { "epoch": 0.937431968517123, "grad_norm": 0.30464789271354675, "learning_rate": 0.0001556005403971104, "loss": 11.6712, "step": 44783 }, { "epoch": 0.9374529012810852, "grad_norm": 0.25015076994895935, "learning_rate": 0.00015559871799717616, "loss": 11.6685, "step": 44784 }, { "epoch": 0.9374738340450474, "grad_norm": 0.24943441152572632, "learning_rate": 0.00015559689557051448, "loss": 11.6678, "step": 44785 }, { "epoch": 0.9374947668090094, "grad_norm": 0.32388949394226074, "learning_rate": 0.00015559507311712625, "loss": 11.6776, "step": 44786 }, { "epoch": 0.9375156995729717, "grad_norm": 0.6229342818260193, "learning_rate": 0.0001555932506370124, "loss": 11.6752, "step": 44787 }, { "epoch": 0.9375366323369337, "grad_norm": 0.26840701699256897, "learning_rate": 0.0001555914281301737, "loss": 11.6763, "step": 44788 }, { "epoch": 0.9375575651008959, "grad_norm": 0.27266451716423035, "learning_rate": 0.00015558960559661114, "loss": 11.6705, "step": 44789 }, { "epoch": 0.937578497864858, "grad_norm": 0.2652342915534973, "learning_rate": 0.0001555877830363255, "loss": 11.6809, "step": 44790 }, { "epoch": 0.9375994306288202, "grad_norm": 0.36339470744132996, "learning_rate": 0.00015558596044931773, "loss": 11.6621, "step": 44791 }, { "epoch": 0.9376203633927824, "grad_norm": 0.28415951132774353, "learning_rate": 0.00015558413783558868, "loss": 11.6641, "step": 44792 }, { "epoch": 0.9376412961567445, "grad_norm": 0.2508251965045929, "learning_rate": 0.00015558231519513917, "loss": 11.6678, "step": 44793 }, { "epoch": 0.9376622289207067, "grad_norm": 0.34585103392601013, "learning_rate": 0.00015558049252797017, "loss": 11.6722, "step": 44794 }, { "epoch": 0.9376831616846688, "grad_norm": 0.2850405275821686, "learning_rate": 0.0001555786698340825, "loss": 11.6503, "step": 44795 }, { "epoch": 0.937704094448631, "grad_norm": 0.30305615067481995, "learning_rate": 0.0001555768471134771, "loss": 11.6766, "step": 44796 }, { "epoch": 0.9377250272125931, "grad_norm": 0.29050105810165405, "learning_rate": 0.0001555750243661547, "loss": 11.6799, "step": 44797 }, { "epoch": 0.9377459599765553, "grad_norm": 0.36395201086997986, "learning_rate": 0.00015557320159211631, "loss": 11.6621, "step": 44798 }, { "epoch": 0.9377668927405175, "grad_norm": 0.36948123574256897, "learning_rate": 0.0001555713787913628, "loss": 11.6495, "step": 44799 }, { "epoch": 0.9377878255044796, "grad_norm": 0.319011926651001, "learning_rate": 0.000155569555963895, "loss": 11.6684, "step": 44800 }, { "epoch": 0.9378087582684418, "grad_norm": 0.2924407720565796, "learning_rate": 0.0001555677331097138, "loss": 11.6591, "step": 44801 }, { "epoch": 0.9378296910324039, "grad_norm": 0.3202439248561859, "learning_rate": 0.00015556591022882006, "loss": 11.6596, "step": 44802 }, { "epoch": 0.9378506237963661, "grad_norm": 0.33427926898002625, "learning_rate": 0.0001555640873212147, "loss": 11.6718, "step": 44803 }, { "epoch": 0.9378715565603283, "grad_norm": 0.2909451127052307, "learning_rate": 0.00015556226438689856, "loss": 11.6722, "step": 44804 }, { "epoch": 0.9378924893242904, "grad_norm": 0.3377234637737274, "learning_rate": 0.0001555604414258725, "loss": 11.681, "step": 44805 }, { "epoch": 0.9379134220882526, "grad_norm": 0.2891139090061188, "learning_rate": 0.00015555861843813746, "loss": 11.6633, "step": 44806 }, { "epoch": 0.9379343548522147, "grad_norm": 0.34323886036872864, "learning_rate": 0.00015555679542369425, "loss": 11.6749, "step": 44807 }, { "epoch": 0.9379552876161769, "grad_norm": 0.3371299207210541, "learning_rate": 0.00015555497238254378, "loss": 11.6744, "step": 44808 }, { "epoch": 0.937976220380139, "grad_norm": 0.29582351446151733, "learning_rate": 0.00015555314931468695, "loss": 11.6788, "step": 44809 }, { "epoch": 0.9379971531441011, "grad_norm": 0.29723843932151794, "learning_rate": 0.0001555513262201246, "loss": 11.6547, "step": 44810 }, { "epoch": 0.9380180859080633, "grad_norm": 0.3009403347969055, "learning_rate": 0.00015554950309885757, "loss": 11.6744, "step": 44811 }, { "epoch": 0.9380390186720254, "grad_norm": 0.31776493787765503, "learning_rate": 0.00015554767995088682, "loss": 11.6535, "step": 44812 }, { "epoch": 0.9380599514359876, "grad_norm": 0.5205772519111633, "learning_rate": 0.00015554585677621317, "loss": 11.6551, "step": 44813 }, { "epoch": 0.9380808841999497, "grad_norm": 0.3465172052383423, "learning_rate": 0.00015554403357483754, "loss": 11.6605, "step": 44814 }, { "epoch": 0.9381018169639119, "grad_norm": 0.34495869278907776, "learning_rate": 0.00015554221034676076, "loss": 11.6594, "step": 44815 }, { "epoch": 0.938122749727874, "grad_norm": 0.35126495361328125, "learning_rate": 0.00015554038709198373, "loss": 11.6648, "step": 44816 }, { "epoch": 0.9381436824918362, "grad_norm": 0.3320741355419159, "learning_rate": 0.0001555385638105073, "loss": 11.6617, "step": 44817 }, { "epoch": 0.9381646152557984, "grad_norm": 0.33071404695510864, "learning_rate": 0.00015553674050233242, "loss": 11.6696, "step": 44818 }, { "epoch": 0.9381855480197605, "grad_norm": 0.26320841908454895, "learning_rate": 0.00015553491716745988, "loss": 11.6803, "step": 44819 }, { "epoch": 0.9382064807837227, "grad_norm": 0.3466232717037201, "learning_rate": 0.0001555330938058906, "loss": 11.6599, "step": 44820 }, { "epoch": 0.9382274135476848, "grad_norm": 0.30069413781166077, "learning_rate": 0.00015553127041762545, "loss": 11.6468, "step": 44821 }, { "epoch": 0.938248346311647, "grad_norm": 0.426370769739151, "learning_rate": 0.0001555294470026653, "loss": 11.6747, "step": 44822 }, { "epoch": 0.9382692790756092, "grad_norm": 0.41923803091049194, "learning_rate": 0.00015552762356101104, "loss": 11.6699, "step": 44823 }, { "epoch": 0.9382902118395713, "grad_norm": 0.3512057363986969, "learning_rate": 0.00015552580009266356, "loss": 11.664, "step": 44824 }, { "epoch": 0.9383111446035335, "grad_norm": 0.28892049193382263, "learning_rate": 0.00015552397659762365, "loss": 11.655, "step": 44825 }, { "epoch": 0.9383320773674956, "grad_norm": 0.34331318736076355, "learning_rate": 0.0001555221530758923, "loss": 11.6672, "step": 44826 }, { "epoch": 0.9383530101314578, "grad_norm": 0.25936537981033325, "learning_rate": 0.00015552032952747034, "loss": 11.6749, "step": 44827 }, { "epoch": 0.9383739428954199, "grad_norm": 0.3293992280960083, "learning_rate": 0.00015551850595235865, "loss": 11.6734, "step": 44828 }, { "epoch": 0.9383948756593821, "grad_norm": 0.4287358820438385, "learning_rate": 0.00015551668235055808, "loss": 11.6648, "step": 44829 }, { "epoch": 0.9384158084233443, "grad_norm": 0.32987093925476074, "learning_rate": 0.00015551485872206952, "loss": 11.6602, "step": 44830 }, { "epoch": 0.9384367411873064, "grad_norm": 0.38460206985473633, "learning_rate": 0.0001555130350668939, "loss": 11.6705, "step": 44831 }, { "epoch": 0.9384576739512686, "grad_norm": 0.33932799100875854, "learning_rate": 0.000155511211385032, "loss": 11.655, "step": 44832 }, { "epoch": 0.9384786067152306, "grad_norm": 0.3138447105884552, "learning_rate": 0.00015550938767648476, "loss": 11.674, "step": 44833 }, { "epoch": 0.9384995394791928, "grad_norm": 0.3706345856189728, "learning_rate": 0.00015550756394125306, "loss": 11.6625, "step": 44834 }, { "epoch": 0.9385204722431549, "grad_norm": 0.244670569896698, "learning_rate": 0.00015550574017933778, "loss": 11.6833, "step": 44835 }, { "epoch": 0.9385414050071171, "grad_norm": 0.28521400690078735, "learning_rate": 0.00015550391639073975, "loss": 11.6564, "step": 44836 }, { "epoch": 0.9385623377710793, "grad_norm": 0.27748870849609375, "learning_rate": 0.00015550209257545987, "loss": 11.6697, "step": 44837 }, { "epoch": 0.9385832705350414, "grad_norm": 0.4649653732776642, "learning_rate": 0.00015550026873349903, "loss": 11.6745, "step": 44838 }, { "epoch": 0.9386042032990036, "grad_norm": 0.24991147220134735, "learning_rate": 0.00015549844486485807, "loss": 11.6595, "step": 44839 }, { "epoch": 0.9386251360629657, "grad_norm": 0.2947768568992615, "learning_rate": 0.00015549662096953797, "loss": 11.6865, "step": 44840 }, { "epoch": 0.9386460688269279, "grad_norm": 0.36779919266700745, "learning_rate": 0.00015549479704753946, "loss": 11.6689, "step": 44841 }, { "epoch": 0.9386670015908901, "grad_norm": 0.47138088941574097, "learning_rate": 0.0001554929730988635, "loss": 11.6797, "step": 44842 }, { "epoch": 0.9386879343548522, "grad_norm": 0.28897902369499207, "learning_rate": 0.000155491149123511, "loss": 11.6674, "step": 44843 }, { "epoch": 0.9387088671188144, "grad_norm": 0.34897565841674805, "learning_rate": 0.00015548932512148274, "loss": 11.6601, "step": 44844 }, { "epoch": 0.9387297998827765, "grad_norm": 0.31262150406837463, "learning_rate": 0.0001554875010927797, "loss": 11.6692, "step": 44845 }, { "epoch": 0.9387507326467387, "grad_norm": 0.30724120140075684, "learning_rate": 0.00015548567703740267, "loss": 11.6781, "step": 44846 }, { "epoch": 0.9387716654107008, "grad_norm": 5.4948906898498535, "learning_rate": 0.00015548385295535258, "loss": 11.634, "step": 44847 }, { "epoch": 0.938792598174663, "grad_norm": 0.2910798490047455, "learning_rate": 0.00015548202884663027, "loss": 11.6723, "step": 44848 }, { "epoch": 0.9388135309386252, "grad_norm": 0.3042779266834259, "learning_rate": 0.00015548020471123665, "loss": 11.6681, "step": 44849 }, { "epoch": 0.9388344637025873, "grad_norm": 0.3536053001880646, "learning_rate": 0.0001554783805491726, "loss": 11.6779, "step": 44850 }, { "epoch": 0.9388553964665495, "grad_norm": 0.38576921820640564, "learning_rate": 0.00015547655636043896, "loss": 11.6743, "step": 44851 }, { "epoch": 0.9388763292305116, "grad_norm": 0.3242069184780121, "learning_rate": 0.00015547473214503664, "loss": 11.6575, "step": 44852 }, { "epoch": 0.9388972619944738, "grad_norm": 0.31438371539115906, "learning_rate": 0.00015547290790296646, "loss": 11.6672, "step": 44853 }, { "epoch": 0.9389181947584359, "grad_norm": 0.3687600791454315, "learning_rate": 0.0001554710836342294, "loss": 11.6733, "step": 44854 }, { "epoch": 0.938939127522398, "grad_norm": 0.3154340088367462, "learning_rate": 0.00015546925933882625, "loss": 11.6651, "step": 44855 }, { "epoch": 0.9389600602863603, "grad_norm": 0.2744114398956299, "learning_rate": 0.00015546743501675792, "loss": 11.6673, "step": 44856 }, { "epoch": 0.9389809930503223, "grad_norm": 0.39977794885635376, "learning_rate": 0.00015546561066802526, "loss": 11.6788, "step": 44857 }, { "epoch": 0.9390019258142845, "grad_norm": 0.3347207009792328, "learning_rate": 0.00015546378629262923, "loss": 11.6618, "step": 44858 }, { "epoch": 0.9390228585782466, "grad_norm": 0.34372299909591675, "learning_rate": 0.0001554619618905706, "loss": 11.6801, "step": 44859 }, { "epoch": 0.9390437913422088, "grad_norm": 0.3189282715320587, "learning_rate": 0.0001554601374618503, "loss": 11.6661, "step": 44860 }, { "epoch": 0.939064724106171, "grad_norm": 0.3427184224128723, "learning_rate": 0.00015545831300646922, "loss": 11.6644, "step": 44861 }, { "epoch": 0.9390856568701331, "grad_norm": 0.33030328154563904, "learning_rate": 0.0001554564885244282, "loss": 11.6594, "step": 44862 }, { "epoch": 0.9391065896340953, "grad_norm": 0.35593584179878235, "learning_rate": 0.00015545466401572814, "loss": 11.6673, "step": 44863 }, { "epoch": 0.9391275223980574, "grad_norm": 0.3651689887046814, "learning_rate": 0.00015545283948036992, "loss": 11.6676, "step": 44864 }, { "epoch": 0.9391484551620196, "grad_norm": 0.3697129786014557, "learning_rate": 0.0001554510149183544, "loss": 11.6656, "step": 44865 }, { "epoch": 0.9391693879259817, "grad_norm": 0.2798863649368286, "learning_rate": 0.00015544919032968243, "loss": 11.672, "step": 44866 }, { "epoch": 0.9391903206899439, "grad_norm": 0.2861054837703705, "learning_rate": 0.00015544736571435498, "loss": 11.6646, "step": 44867 }, { "epoch": 0.9392112534539061, "grad_norm": 0.4239513874053955, "learning_rate": 0.00015544554107237285, "loss": 11.6924, "step": 44868 }, { "epoch": 0.9392321862178682, "grad_norm": 0.28651663661003113, "learning_rate": 0.0001554437164037369, "loss": 11.6747, "step": 44869 }, { "epoch": 0.9392531189818304, "grad_norm": 0.24849548935890198, "learning_rate": 0.0001554418917084481, "loss": 11.6617, "step": 44870 }, { "epoch": 0.9392740517457925, "grad_norm": 0.31711721420288086, "learning_rate": 0.00015544006698650723, "loss": 11.6575, "step": 44871 }, { "epoch": 0.9392949845097547, "grad_norm": 0.2858714759349823, "learning_rate": 0.00015543824223791526, "loss": 11.6666, "step": 44872 }, { "epoch": 0.9393159172737168, "grad_norm": 0.30481794476509094, "learning_rate": 0.00015543641746267297, "loss": 11.6562, "step": 44873 }, { "epoch": 0.939336850037679, "grad_norm": 0.836076021194458, "learning_rate": 0.0001554345926607813, "loss": 11.6671, "step": 44874 }, { "epoch": 0.9393577828016412, "grad_norm": 0.29385101795196533, "learning_rate": 0.0001554327678322411, "loss": 11.669, "step": 44875 }, { "epoch": 0.9393787155656033, "grad_norm": 0.3021792471408844, "learning_rate": 0.00015543094297705326, "loss": 11.6765, "step": 44876 }, { "epoch": 0.9393996483295655, "grad_norm": 0.3297015130519867, "learning_rate": 0.00015542911809521867, "loss": 11.6565, "step": 44877 }, { "epoch": 0.9394205810935276, "grad_norm": 0.38936418294906616, "learning_rate": 0.00015542729318673815, "loss": 11.6872, "step": 44878 }, { "epoch": 0.9394415138574898, "grad_norm": 0.35058411955833435, "learning_rate": 0.00015542546825161267, "loss": 11.6649, "step": 44879 }, { "epoch": 0.9394624466214518, "grad_norm": 0.36105382442474365, "learning_rate": 0.00015542364328984304, "loss": 11.6419, "step": 44880 }, { "epoch": 0.939483379385414, "grad_norm": 0.39018359780311584, "learning_rate": 0.00015542181830143016, "loss": 11.6686, "step": 44881 }, { "epoch": 0.9395043121493762, "grad_norm": 0.3856898248195648, "learning_rate": 0.0001554199932863749, "loss": 11.689, "step": 44882 }, { "epoch": 0.9395252449133383, "grad_norm": 0.2658068835735321, "learning_rate": 0.0001554181682446781, "loss": 11.6614, "step": 44883 }, { "epoch": 0.9395461776773005, "grad_norm": 0.34447136521339417, "learning_rate": 0.00015541634317634072, "loss": 11.6587, "step": 44884 }, { "epoch": 0.9395671104412626, "grad_norm": 0.360714316368103, "learning_rate": 0.00015541451808136358, "loss": 11.6688, "step": 44885 }, { "epoch": 0.9395880432052248, "grad_norm": 0.27837562561035156, "learning_rate": 0.00015541269295974756, "loss": 11.6828, "step": 44886 }, { "epoch": 0.939608975969187, "grad_norm": 0.3598938584327698, "learning_rate": 0.0001554108678114936, "loss": 11.6676, "step": 44887 }, { "epoch": 0.9396299087331491, "grad_norm": 0.29046791791915894, "learning_rate": 0.00015540904263660246, "loss": 11.6718, "step": 44888 }, { "epoch": 0.9396508414971113, "grad_norm": 0.3511282503604889, "learning_rate": 0.00015540721743507514, "loss": 11.6663, "step": 44889 }, { "epoch": 0.9396717742610734, "grad_norm": 0.29934781789779663, "learning_rate": 0.00015540539220691238, "loss": 11.6667, "step": 44890 }, { "epoch": 0.9396927070250356, "grad_norm": 0.30398571491241455, "learning_rate": 0.0001554035669521152, "loss": 11.6547, "step": 44891 }, { "epoch": 0.9397136397889977, "grad_norm": 0.31730934977531433, "learning_rate": 0.00015540174167068442, "loss": 11.6669, "step": 44892 }, { "epoch": 0.9397345725529599, "grad_norm": 0.3292100727558136, "learning_rate": 0.00015539991636262087, "loss": 11.6657, "step": 44893 }, { "epoch": 0.9397555053169221, "grad_norm": 0.470558226108551, "learning_rate": 0.00015539809102792552, "loss": 11.658, "step": 44894 }, { "epoch": 0.9397764380808842, "grad_norm": 0.3036835193634033, "learning_rate": 0.00015539626566659915, "loss": 11.6745, "step": 44895 }, { "epoch": 0.9397973708448464, "grad_norm": 0.2732681930065155, "learning_rate": 0.0001553944402786427, "loss": 11.6785, "step": 44896 }, { "epoch": 0.9398183036088085, "grad_norm": 0.30899444222450256, "learning_rate": 0.00015539261486405704, "loss": 11.6632, "step": 44897 }, { "epoch": 0.9398392363727707, "grad_norm": 0.2646040618419647, "learning_rate": 0.00015539078942284305, "loss": 11.6633, "step": 44898 }, { "epoch": 0.9398601691367328, "grad_norm": 0.39152097702026367, "learning_rate": 0.0001553889639550016, "loss": 11.6795, "step": 44899 }, { "epoch": 0.939881101900695, "grad_norm": 0.3276134133338928, "learning_rate": 0.00015538713846053353, "loss": 11.6736, "step": 44900 }, { "epoch": 0.9399020346646572, "grad_norm": 0.30044034123420715, "learning_rate": 0.00015538531293943976, "loss": 11.6776, "step": 44901 }, { "epoch": 0.9399229674286192, "grad_norm": 0.423086017370224, "learning_rate": 0.00015538348739172118, "loss": 11.6811, "step": 44902 }, { "epoch": 0.9399439001925814, "grad_norm": 0.34175071120262146, "learning_rate": 0.00015538166181737863, "loss": 11.6674, "step": 44903 }, { "epoch": 0.9399648329565435, "grad_norm": 0.31065449118614197, "learning_rate": 0.00015537983621641304, "loss": 11.6755, "step": 44904 }, { "epoch": 0.9399857657205057, "grad_norm": 0.35049572587013245, "learning_rate": 0.00015537801058882522, "loss": 11.6813, "step": 44905 }, { "epoch": 0.9400066984844679, "grad_norm": 0.37077754735946655, "learning_rate": 0.0001553761849346161, "loss": 11.6606, "step": 44906 }, { "epoch": 0.94002763124843, "grad_norm": 0.2976948916912079, "learning_rate": 0.00015537435925378653, "loss": 11.6756, "step": 44907 }, { "epoch": 0.9400485640123922, "grad_norm": 0.24177822470664978, "learning_rate": 0.00015537253354633735, "loss": 11.6682, "step": 44908 }, { "epoch": 0.9400694967763543, "grad_norm": 0.3087327480316162, "learning_rate": 0.00015537070781226957, "loss": 11.672, "step": 44909 }, { "epoch": 0.9400904295403165, "grad_norm": 0.2993883788585663, "learning_rate": 0.00015536888205158392, "loss": 11.6783, "step": 44910 }, { "epoch": 0.9401113623042786, "grad_norm": 0.29593273997306824, "learning_rate": 0.00015536705626428135, "loss": 11.6592, "step": 44911 }, { "epoch": 0.9401322950682408, "grad_norm": 0.3581914007663727, "learning_rate": 0.00015536523045036273, "loss": 11.6382, "step": 44912 }, { "epoch": 0.940153227832203, "grad_norm": 0.3627329468727112, "learning_rate": 0.00015536340460982895, "loss": 11.6773, "step": 44913 }, { "epoch": 0.9401741605961651, "grad_norm": 0.3808515965938568, "learning_rate": 0.00015536157874268086, "loss": 11.6696, "step": 44914 }, { "epoch": 0.9401950933601273, "grad_norm": 0.288603276014328, "learning_rate": 0.00015535975284891934, "loss": 11.6557, "step": 44915 }, { "epoch": 0.9402160261240894, "grad_norm": 0.2562299370765686, "learning_rate": 0.00015535792692854532, "loss": 11.6781, "step": 44916 }, { "epoch": 0.9402369588880516, "grad_norm": 0.33830809593200684, "learning_rate": 0.00015535610098155956, "loss": 11.6736, "step": 44917 }, { "epoch": 0.9402578916520137, "grad_norm": 0.31325042247772217, "learning_rate": 0.00015535427500796306, "loss": 11.6767, "step": 44918 }, { "epoch": 0.9402788244159759, "grad_norm": 0.2977619469165802, "learning_rate": 0.00015535244900775666, "loss": 11.6506, "step": 44919 }, { "epoch": 0.9402997571799381, "grad_norm": 0.37933000922203064, "learning_rate": 0.0001553506229809412, "loss": 11.6766, "step": 44920 }, { "epoch": 0.9403206899439002, "grad_norm": 0.2814137935638428, "learning_rate": 0.00015534879692751763, "loss": 11.6758, "step": 44921 }, { "epoch": 0.9403416227078624, "grad_norm": 0.2946074306964874, "learning_rate": 0.00015534697084748674, "loss": 11.6784, "step": 44922 }, { "epoch": 0.9403625554718245, "grad_norm": 0.393667995929718, "learning_rate": 0.00015534514474084947, "loss": 11.6545, "step": 44923 }, { "epoch": 0.9403834882357867, "grad_norm": 0.3775791823863983, "learning_rate": 0.00015534331860760667, "loss": 11.6739, "step": 44924 }, { "epoch": 0.9404044209997489, "grad_norm": 0.34883034229278564, "learning_rate": 0.00015534149244775925, "loss": 11.676, "step": 44925 }, { "epoch": 0.940425353763711, "grad_norm": 0.2739578187465668, "learning_rate": 0.00015533966626130803, "loss": 11.6713, "step": 44926 }, { "epoch": 0.9404462865276731, "grad_norm": 0.3301326334476471, "learning_rate": 0.00015533784004825393, "loss": 11.6697, "step": 44927 }, { "epoch": 0.9404672192916352, "grad_norm": 0.3510074019432068, "learning_rate": 0.00015533601380859785, "loss": 11.6735, "step": 44928 }, { "epoch": 0.9404881520555974, "grad_norm": 0.27860087156295776, "learning_rate": 0.00015533418754234063, "loss": 11.6769, "step": 44929 }, { "epoch": 0.9405090848195595, "grad_norm": 0.3198563754558563, "learning_rate": 0.00015533236124948312, "loss": 11.6801, "step": 44930 }, { "epoch": 0.9405300175835217, "grad_norm": 0.28871676325798035, "learning_rate": 0.0001553305349300263, "loss": 11.6472, "step": 44931 }, { "epoch": 0.9405509503474839, "grad_norm": 0.36022016406059265, "learning_rate": 0.00015532870858397092, "loss": 11.6726, "step": 44932 }, { "epoch": 0.940571883111446, "grad_norm": 0.26932546496391296, "learning_rate": 0.00015532688221131798, "loss": 11.6616, "step": 44933 }, { "epoch": 0.9405928158754082, "grad_norm": 0.2800549566745758, "learning_rate": 0.00015532505581206823, "loss": 11.67, "step": 44934 }, { "epoch": 0.9406137486393703, "grad_norm": 0.27424177527427673, "learning_rate": 0.00015532322938622265, "loss": 11.6722, "step": 44935 }, { "epoch": 0.9406346814033325, "grad_norm": 0.34120965003967285, "learning_rate": 0.0001553214029337821, "loss": 11.677, "step": 44936 }, { "epoch": 0.9406556141672946, "grad_norm": 0.275124728679657, "learning_rate": 0.00015531957645474744, "loss": 11.6636, "step": 44937 }, { "epoch": 0.9406765469312568, "grad_norm": 0.28782254457473755, "learning_rate": 0.00015531774994911955, "loss": 11.691, "step": 44938 }, { "epoch": 0.940697479695219, "grad_norm": 0.31330934166908264, "learning_rate": 0.00015531592341689931, "loss": 11.6803, "step": 44939 }, { "epoch": 0.9407184124591811, "grad_norm": 0.28003403544425964, "learning_rate": 0.0001553140968580876, "loss": 11.6674, "step": 44940 }, { "epoch": 0.9407393452231433, "grad_norm": 0.2922268211841583, "learning_rate": 0.00015531227027268526, "loss": 11.6721, "step": 44941 }, { "epoch": 0.9407602779871054, "grad_norm": 0.39936530590057373, "learning_rate": 0.00015531044366069323, "loss": 11.6838, "step": 44942 }, { "epoch": 0.9407812107510676, "grad_norm": 0.3800238072872162, "learning_rate": 0.00015530861702211236, "loss": 11.6742, "step": 44943 }, { "epoch": 0.9408021435150298, "grad_norm": 0.264976441860199, "learning_rate": 0.00015530679035694354, "loss": 11.6796, "step": 44944 }, { "epoch": 0.9408230762789919, "grad_norm": 0.2734275460243225, "learning_rate": 0.00015530496366518762, "loss": 11.6658, "step": 44945 }, { "epoch": 0.9408440090429541, "grad_norm": 0.33663007616996765, "learning_rate": 0.00015530313694684552, "loss": 11.6669, "step": 44946 }, { "epoch": 0.9408649418069162, "grad_norm": 0.33335885405540466, "learning_rate": 0.00015530131020191805, "loss": 11.6662, "step": 44947 }, { "epoch": 0.9408858745708784, "grad_norm": 0.31039878726005554, "learning_rate": 0.00015529948343040616, "loss": 11.6796, "step": 44948 }, { "epoch": 0.9409068073348404, "grad_norm": 0.37109747529029846, "learning_rate": 0.0001552976566323107, "loss": 11.677, "step": 44949 }, { "epoch": 0.9409277400988026, "grad_norm": 0.26604872941970825, "learning_rate": 0.00015529582980763256, "loss": 11.6539, "step": 44950 }, { "epoch": 0.9409486728627648, "grad_norm": 0.31343403458595276, "learning_rate": 0.00015529400295637258, "loss": 11.6886, "step": 44951 }, { "epoch": 0.9409696056267269, "grad_norm": 0.7138993144035339, "learning_rate": 0.00015529217607853165, "loss": 11.6266, "step": 44952 }, { "epoch": 0.9409905383906891, "grad_norm": 0.3120955228805542, "learning_rate": 0.00015529034917411073, "loss": 11.6788, "step": 44953 }, { "epoch": 0.9410114711546512, "grad_norm": 0.2943323850631714, "learning_rate": 0.00015528852224311058, "loss": 11.6791, "step": 44954 }, { "epoch": 0.9410324039186134, "grad_norm": 0.3241741359233856, "learning_rate": 0.00015528669528553214, "loss": 11.6917, "step": 44955 }, { "epoch": 0.9410533366825755, "grad_norm": 0.3609175682067871, "learning_rate": 0.00015528486830137627, "loss": 11.6752, "step": 44956 }, { "epoch": 0.9410742694465377, "grad_norm": 0.2793439030647278, "learning_rate": 0.00015528304129064385, "loss": 11.6742, "step": 44957 }, { "epoch": 0.9410952022104999, "grad_norm": 0.2537583112716675, "learning_rate": 0.00015528121425333578, "loss": 11.6665, "step": 44958 }, { "epoch": 0.941116134974462, "grad_norm": 0.25917840003967285, "learning_rate": 0.00015527938718945295, "loss": 11.6495, "step": 44959 }, { "epoch": 0.9411370677384242, "grad_norm": 0.2977425158023834, "learning_rate": 0.00015527756009899618, "loss": 11.6697, "step": 44960 }, { "epoch": 0.9411580005023863, "grad_norm": 0.38563400506973267, "learning_rate": 0.00015527573298196638, "loss": 11.6755, "step": 44961 }, { "epoch": 0.9411789332663485, "grad_norm": 0.31941038370132446, "learning_rate": 0.0001552739058383644, "loss": 11.6652, "step": 44962 }, { "epoch": 0.9411998660303107, "grad_norm": 0.34644562005996704, "learning_rate": 0.0001552720786681912, "loss": 11.6707, "step": 44963 }, { "epoch": 0.9412207987942728, "grad_norm": 0.30014121532440186, "learning_rate": 0.00015527025147144756, "loss": 11.6592, "step": 44964 }, { "epoch": 0.941241731558235, "grad_norm": 0.29648178815841675, "learning_rate": 0.00015526842424813444, "loss": 11.6604, "step": 44965 }, { "epoch": 0.9412626643221971, "grad_norm": 0.3006111681461334, "learning_rate": 0.00015526659699825266, "loss": 11.6576, "step": 44966 }, { "epoch": 0.9412835970861593, "grad_norm": 0.3081264793872833, "learning_rate": 0.00015526476972180312, "loss": 11.6585, "step": 44967 }, { "epoch": 0.9413045298501214, "grad_norm": 0.32378527522087097, "learning_rate": 0.0001552629424187867, "loss": 11.6626, "step": 44968 }, { "epoch": 0.9413254626140836, "grad_norm": 0.2579689025878906, "learning_rate": 0.00015526111508920427, "loss": 11.6609, "step": 44969 }, { "epoch": 0.9413463953780458, "grad_norm": 0.37298494577407837, "learning_rate": 0.0001552592877330567, "loss": 11.679, "step": 44970 }, { "epoch": 0.9413673281420079, "grad_norm": 0.34429582953453064, "learning_rate": 0.00015525746035034493, "loss": 11.6733, "step": 44971 }, { "epoch": 0.94138826090597, "grad_norm": 0.42703142762184143, "learning_rate": 0.00015525563294106976, "loss": 11.6854, "step": 44972 }, { "epoch": 0.9414091936699321, "grad_norm": 0.24552278220653534, "learning_rate": 0.0001552538055052321, "loss": 11.6658, "step": 44973 }, { "epoch": 0.9414301264338943, "grad_norm": 0.3908988833427429, "learning_rate": 0.00015525197804283283, "loss": 11.6591, "step": 44974 }, { "epoch": 0.9414510591978564, "grad_norm": 0.28026387095451355, "learning_rate": 0.00015525015055387281, "loss": 11.6648, "step": 44975 }, { "epoch": 0.9414719919618186, "grad_norm": 0.28701964020729065, "learning_rate": 0.00015524832303835296, "loss": 11.6634, "step": 44976 }, { "epoch": 0.9414929247257808, "grad_norm": 0.3075724244117737, "learning_rate": 0.00015524649549627414, "loss": 11.6728, "step": 44977 }, { "epoch": 0.9415138574897429, "grad_norm": 0.38850998878479004, "learning_rate": 0.0001552446679276372, "loss": 11.688, "step": 44978 }, { "epoch": 0.9415347902537051, "grad_norm": 0.33296653628349304, "learning_rate": 0.00015524284033244308, "loss": 11.6576, "step": 44979 }, { "epoch": 0.9415557230176672, "grad_norm": 0.3561582863330841, "learning_rate": 0.00015524101271069255, "loss": 11.6863, "step": 44980 }, { "epoch": 0.9415766557816294, "grad_norm": 0.2698567807674408, "learning_rate": 0.0001552391850623866, "loss": 11.6862, "step": 44981 }, { "epoch": 0.9415975885455916, "grad_norm": 0.36677902936935425, "learning_rate": 0.00015523735738752609, "loss": 11.6702, "step": 44982 }, { "epoch": 0.9416185213095537, "grad_norm": 0.29977962374687195, "learning_rate": 0.00015523552968611185, "loss": 11.6498, "step": 44983 }, { "epoch": 0.9416394540735159, "grad_norm": 0.23070967197418213, "learning_rate": 0.00015523370195814478, "loss": 11.6577, "step": 44984 }, { "epoch": 0.941660386837478, "grad_norm": 0.33116281032562256, "learning_rate": 0.00015523187420362578, "loss": 11.6742, "step": 44985 }, { "epoch": 0.9416813196014402, "grad_norm": 0.4620661437511444, "learning_rate": 0.0001552300464225557, "loss": 11.6814, "step": 44986 }, { "epoch": 0.9417022523654023, "grad_norm": 0.3063333034515381, "learning_rate": 0.0001552282186149354, "loss": 11.6736, "step": 44987 }, { "epoch": 0.9417231851293645, "grad_norm": 0.32750841975212097, "learning_rate": 0.00015522639078076583, "loss": 11.6706, "step": 44988 }, { "epoch": 0.9417441178933267, "grad_norm": 0.28215572237968445, "learning_rate": 0.00015522456292004782, "loss": 11.6618, "step": 44989 }, { "epoch": 0.9417650506572888, "grad_norm": 0.2985745966434479, "learning_rate": 0.00015522273503278225, "loss": 11.6552, "step": 44990 }, { "epoch": 0.941785983421251, "grad_norm": 0.24122311174869537, "learning_rate": 0.00015522090711897, "loss": 11.6494, "step": 44991 }, { "epoch": 0.9418069161852131, "grad_norm": 0.2874991297721863, "learning_rate": 0.00015521907917861197, "loss": 11.6596, "step": 44992 }, { "epoch": 0.9418278489491753, "grad_norm": 0.3188684582710266, "learning_rate": 0.00015521725121170902, "loss": 11.6602, "step": 44993 }, { "epoch": 0.9418487817131374, "grad_norm": 0.31366807222366333, "learning_rate": 0.000155215423218262, "loss": 11.6718, "step": 44994 }, { "epoch": 0.9418697144770996, "grad_norm": 0.2771015763282776, "learning_rate": 0.00015521359519827185, "loss": 11.6613, "step": 44995 }, { "epoch": 0.9418906472410618, "grad_norm": 0.31096750497817993, "learning_rate": 0.00015521176715173942, "loss": 11.6762, "step": 44996 }, { "epoch": 0.9419115800050238, "grad_norm": 0.3060480058193207, "learning_rate": 0.00015520993907866556, "loss": 11.68, "step": 44997 }, { "epoch": 0.941932512768986, "grad_norm": 0.3069608509540558, "learning_rate": 0.0001552081109790512, "loss": 11.6758, "step": 44998 }, { "epoch": 0.9419534455329481, "grad_norm": 0.3190724849700928, "learning_rate": 0.00015520628285289718, "loss": 11.6435, "step": 44999 }, { "epoch": 0.9419743782969103, "grad_norm": 0.3153766691684723, "learning_rate": 0.00015520445470020442, "loss": 11.6857, "step": 45000 }, { "epoch": 0.9419743782969103, "eval_loss": 11.669048309326172, "eval_runtime": 34.3611, "eval_samples_per_second": 27.968, "eval_steps_per_second": 7.014, "step": 45000 }, { "epoch": 0.9419953110608725, "grad_norm": 0.3014361560344696, "learning_rate": 0.00015520262652097373, "loss": 11.6856, "step": 45001 }, { "epoch": 0.9420162438248346, "grad_norm": 0.2817513048648834, "learning_rate": 0.00015520079831520607, "loss": 11.6656, "step": 45002 }, { "epoch": 0.9420371765887968, "grad_norm": 0.34361815452575684, "learning_rate": 0.00015519897008290225, "loss": 11.6554, "step": 45003 }, { "epoch": 0.9420581093527589, "grad_norm": 0.30452650785446167, "learning_rate": 0.0001551971418240632, "loss": 11.6733, "step": 45004 }, { "epoch": 0.9420790421167211, "grad_norm": 0.3147137463092804, "learning_rate": 0.00015519531353868976, "loss": 11.66, "step": 45005 }, { "epoch": 0.9420999748806832, "grad_norm": 0.3020966053009033, "learning_rate": 0.00015519348522678283, "loss": 11.6533, "step": 45006 }, { "epoch": 0.9421209076446454, "grad_norm": 0.32824140787124634, "learning_rate": 0.0001551916568883433, "loss": 11.6779, "step": 45007 }, { "epoch": 0.9421418404086076, "grad_norm": 0.30874693393707275, "learning_rate": 0.00015518982852337203, "loss": 11.6742, "step": 45008 }, { "epoch": 0.9421627731725697, "grad_norm": 0.38200220465660095, "learning_rate": 0.00015518800013186992, "loss": 11.6525, "step": 45009 }, { "epoch": 0.9421837059365319, "grad_norm": 0.31766557693481445, "learning_rate": 0.00015518617171383778, "loss": 11.6667, "step": 45010 }, { "epoch": 0.942204638700494, "grad_norm": 0.30051055550575256, "learning_rate": 0.00015518434326927658, "loss": 11.6822, "step": 45011 }, { "epoch": 0.9422255714644562, "grad_norm": 0.33354413509368896, "learning_rate": 0.00015518251479818717, "loss": 11.6706, "step": 45012 }, { "epoch": 0.9422465042284183, "grad_norm": 0.28870442509651184, "learning_rate": 0.0001551806863005704, "loss": 11.665, "step": 45013 }, { "epoch": 0.9422674369923805, "grad_norm": 0.35361403226852417, "learning_rate": 0.00015517885777642718, "loss": 11.6674, "step": 45014 }, { "epoch": 0.9422883697563427, "grad_norm": 0.2735860347747803, "learning_rate": 0.00015517702922575834, "loss": 11.6667, "step": 45015 }, { "epoch": 0.9423093025203048, "grad_norm": 0.3760971128940582, "learning_rate": 0.00015517520064856487, "loss": 11.6565, "step": 45016 }, { "epoch": 0.942330235284267, "grad_norm": 0.3937792181968689, "learning_rate": 0.0001551733720448475, "loss": 11.6807, "step": 45017 }, { "epoch": 0.942351168048229, "grad_norm": 0.37725260853767395, "learning_rate": 0.0001551715434146072, "loss": 11.6658, "step": 45018 }, { "epoch": 0.9423721008121912, "grad_norm": 0.26970863342285156, "learning_rate": 0.00015516971475784485, "loss": 11.6605, "step": 45019 }, { "epoch": 0.9423930335761534, "grad_norm": 0.32045599818229675, "learning_rate": 0.00015516788607456133, "loss": 11.6879, "step": 45020 }, { "epoch": 0.9424139663401155, "grad_norm": 0.25493448972702026, "learning_rate": 0.0001551660573647575, "loss": 11.6888, "step": 45021 }, { "epoch": 0.9424348991040777, "grad_norm": 0.37350112199783325, "learning_rate": 0.00015516422862843417, "loss": 11.6635, "step": 45022 }, { "epoch": 0.9424558318680398, "grad_norm": 0.3980310261249542, "learning_rate": 0.00015516239986559235, "loss": 11.6791, "step": 45023 }, { "epoch": 0.942476764632002, "grad_norm": 0.2701699733734131, "learning_rate": 0.00015516057107623288, "loss": 11.6773, "step": 45024 }, { "epoch": 0.9424976973959641, "grad_norm": 0.31532666087150574, "learning_rate": 0.00015515874226035658, "loss": 11.669, "step": 45025 }, { "epoch": 0.9425186301599263, "grad_norm": 0.3068319857120514, "learning_rate": 0.00015515691341796438, "loss": 11.6819, "step": 45026 }, { "epoch": 0.9425395629238885, "grad_norm": 0.314818412065506, "learning_rate": 0.00015515508454905713, "loss": 11.6612, "step": 45027 }, { "epoch": 0.9425604956878506, "grad_norm": 0.3454033434391022, "learning_rate": 0.00015515325565363574, "loss": 11.6742, "step": 45028 }, { "epoch": 0.9425814284518128, "grad_norm": 0.24522332847118378, "learning_rate": 0.0001551514267317011, "loss": 11.6761, "step": 45029 }, { "epoch": 0.9426023612157749, "grad_norm": 0.2835737466812134, "learning_rate": 0.000155149597783254, "loss": 11.6645, "step": 45030 }, { "epoch": 0.9426232939797371, "grad_norm": 0.3733251690864563, "learning_rate": 0.00015514776880829544, "loss": 11.6646, "step": 45031 }, { "epoch": 0.9426442267436992, "grad_norm": 0.36327409744262695, "learning_rate": 0.00015514593980682623, "loss": 11.6813, "step": 45032 }, { "epoch": 0.9426651595076614, "grad_norm": 0.3447919487953186, "learning_rate": 0.00015514411077884724, "loss": 11.6563, "step": 45033 }, { "epoch": 0.9426860922716236, "grad_norm": 0.28401830792427063, "learning_rate": 0.00015514228172435938, "loss": 11.6865, "step": 45034 }, { "epoch": 0.9427070250355857, "grad_norm": 0.41414961218833923, "learning_rate": 0.00015514045264336353, "loss": 11.673, "step": 45035 }, { "epoch": 0.9427279577995479, "grad_norm": 0.31186920404434204, "learning_rate": 0.00015513862353586056, "loss": 11.6664, "step": 45036 }, { "epoch": 0.94274889056351, "grad_norm": 0.29676565527915955, "learning_rate": 0.00015513679440185135, "loss": 11.6627, "step": 45037 }, { "epoch": 0.9427698233274722, "grad_norm": 0.35316500067710876, "learning_rate": 0.0001551349652413368, "loss": 11.6677, "step": 45038 }, { "epoch": 0.9427907560914344, "grad_norm": 0.3114457428455353, "learning_rate": 0.0001551331360543177, "loss": 11.6449, "step": 45039 }, { "epoch": 0.9428116888553965, "grad_norm": 0.3003208339214325, "learning_rate": 0.00015513130684079504, "loss": 11.6569, "step": 45040 }, { "epoch": 0.9428326216193587, "grad_norm": 0.43106603622436523, "learning_rate": 0.00015512947760076967, "loss": 11.6779, "step": 45041 }, { "epoch": 0.9428535543833207, "grad_norm": 0.326621413230896, "learning_rate": 0.0001551276483342424, "loss": 11.6756, "step": 45042 }, { "epoch": 0.942874487147283, "grad_norm": 0.34110814332962036, "learning_rate": 0.00015512581904121423, "loss": 11.6709, "step": 45043 }, { "epoch": 0.942895419911245, "grad_norm": 0.29375022649765015, "learning_rate": 0.00015512398972168594, "loss": 11.666, "step": 45044 }, { "epoch": 0.9429163526752072, "grad_norm": 0.31032732129096985, "learning_rate": 0.00015512216037565844, "loss": 11.6669, "step": 45045 }, { "epoch": 0.9429372854391694, "grad_norm": 0.28095829486846924, "learning_rate": 0.00015512033100313264, "loss": 11.6728, "step": 45046 }, { "epoch": 0.9429582182031315, "grad_norm": 0.44272634387016296, "learning_rate": 0.00015511850160410936, "loss": 11.668, "step": 45047 }, { "epoch": 0.9429791509670937, "grad_norm": 0.3419184982776642, "learning_rate": 0.00015511667217858955, "loss": 11.6551, "step": 45048 }, { "epoch": 0.9430000837310558, "grad_norm": 0.3132547438144684, "learning_rate": 0.00015511484272657402, "loss": 11.6774, "step": 45049 }, { "epoch": 0.943021016495018, "grad_norm": 0.2964974045753479, "learning_rate": 0.00015511301324806366, "loss": 11.6706, "step": 45050 }, { "epoch": 0.9430419492589801, "grad_norm": 0.30505311489105225, "learning_rate": 0.0001551111837430594, "loss": 11.6734, "step": 45051 }, { "epoch": 0.9430628820229423, "grad_norm": 0.3321032226085663, "learning_rate": 0.0001551093542115621, "loss": 11.6827, "step": 45052 }, { "epoch": 0.9430838147869045, "grad_norm": 0.23331204056739807, "learning_rate": 0.0001551075246535726, "loss": 11.6632, "step": 45053 }, { "epoch": 0.9431047475508666, "grad_norm": 0.26237404346466064, "learning_rate": 0.00015510569506909186, "loss": 11.6765, "step": 45054 }, { "epoch": 0.9431256803148288, "grad_norm": 0.31067022681236267, "learning_rate": 0.00015510386545812064, "loss": 11.674, "step": 45055 }, { "epoch": 0.9431466130787909, "grad_norm": 0.37300246953964233, "learning_rate": 0.00015510203582065995, "loss": 11.6728, "step": 45056 }, { "epoch": 0.9431675458427531, "grad_norm": 0.32318606972694397, "learning_rate": 0.00015510020615671055, "loss": 11.6659, "step": 45057 }, { "epoch": 0.9431884786067153, "grad_norm": 0.35745930671691895, "learning_rate": 0.0001550983764662734, "loss": 11.6828, "step": 45058 }, { "epoch": 0.9432094113706774, "grad_norm": 0.2921367585659027, "learning_rate": 0.00015509654674934937, "loss": 11.6838, "step": 45059 }, { "epoch": 0.9432303441346396, "grad_norm": 0.355619877576828, "learning_rate": 0.0001550947170059393, "loss": 11.6699, "step": 45060 }, { "epoch": 0.9432512768986017, "grad_norm": 0.3039504289627075, "learning_rate": 0.0001550928872360441, "loss": 11.6706, "step": 45061 }, { "epoch": 0.9432722096625639, "grad_norm": 0.29185429215431213, "learning_rate": 0.00015509105743966467, "loss": 11.6653, "step": 45062 }, { "epoch": 0.943293142426526, "grad_norm": 0.2769756615161896, "learning_rate": 0.00015508922761680186, "loss": 11.6703, "step": 45063 }, { "epoch": 0.9433140751904882, "grad_norm": 0.31187519431114197, "learning_rate": 0.00015508739776745655, "loss": 11.6569, "step": 45064 }, { "epoch": 0.9433350079544504, "grad_norm": 0.44150426983833313, "learning_rate": 0.00015508556789162962, "loss": 11.6558, "step": 45065 }, { "epoch": 0.9433559407184124, "grad_norm": 0.33040544390678406, "learning_rate": 0.00015508373798932198, "loss": 11.6672, "step": 45066 }, { "epoch": 0.9433768734823746, "grad_norm": 0.2692968547344208, "learning_rate": 0.00015508190806053443, "loss": 11.6536, "step": 45067 }, { "epoch": 0.9433978062463367, "grad_norm": 0.3348347842693329, "learning_rate": 0.00015508007810526793, "loss": 11.6794, "step": 45068 }, { "epoch": 0.9434187390102989, "grad_norm": 0.31311312317848206, "learning_rate": 0.00015507824812352334, "loss": 11.6704, "step": 45069 }, { "epoch": 0.943439671774261, "grad_norm": 0.2660413980484009, "learning_rate": 0.00015507641811530153, "loss": 11.6588, "step": 45070 }, { "epoch": 0.9434606045382232, "grad_norm": 0.4338878393173218, "learning_rate": 0.00015507458808060337, "loss": 11.6928, "step": 45071 }, { "epoch": 0.9434815373021854, "grad_norm": 0.29299432039260864, "learning_rate": 0.00015507275801942972, "loss": 11.6723, "step": 45072 }, { "epoch": 0.9435024700661475, "grad_norm": 0.32164639234542847, "learning_rate": 0.00015507092793178156, "loss": 11.6664, "step": 45073 }, { "epoch": 0.9435234028301097, "grad_norm": 0.3306029438972473, "learning_rate": 0.00015506909781765965, "loss": 11.6746, "step": 45074 }, { "epoch": 0.9435443355940718, "grad_norm": 0.3122420012950897, "learning_rate": 0.00015506726767706497, "loss": 11.669, "step": 45075 }, { "epoch": 0.943565268358034, "grad_norm": 0.3578818738460541, "learning_rate": 0.0001550654375099983, "loss": 11.6799, "step": 45076 }, { "epoch": 0.9435862011219961, "grad_norm": 0.30908501148223877, "learning_rate": 0.00015506360731646062, "loss": 11.6735, "step": 45077 }, { "epoch": 0.9436071338859583, "grad_norm": 0.2539427876472473, "learning_rate": 0.00015506177709645274, "loss": 11.6542, "step": 45078 }, { "epoch": 0.9436280666499205, "grad_norm": 0.29258227348327637, "learning_rate": 0.00015505994684997555, "loss": 11.6613, "step": 45079 }, { "epoch": 0.9436489994138826, "grad_norm": 0.3297821581363678, "learning_rate": 0.00015505811657702992, "loss": 11.6733, "step": 45080 }, { "epoch": 0.9436699321778448, "grad_norm": 0.3684234619140625, "learning_rate": 0.00015505628627761678, "loss": 11.6646, "step": 45081 }, { "epoch": 0.9436908649418069, "grad_norm": 0.4098428189754486, "learning_rate": 0.000155054455951737, "loss": 11.6631, "step": 45082 }, { "epoch": 0.9437117977057691, "grad_norm": 0.2555219829082489, "learning_rate": 0.0001550526255993914, "loss": 11.6461, "step": 45083 }, { "epoch": 0.9437327304697313, "grad_norm": 0.39822906255722046, "learning_rate": 0.0001550507952205809, "loss": 11.6627, "step": 45084 }, { "epoch": 0.9437536632336934, "grad_norm": 0.27295199036598206, "learning_rate": 0.0001550489648153064, "loss": 11.6707, "step": 45085 }, { "epoch": 0.9437745959976556, "grad_norm": 0.37454748153686523, "learning_rate": 0.00015504713438356876, "loss": 11.6817, "step": 45086 }, { "epoch": 0.9437955287616177, "grad_norm": 0.4252207279205322, "learning_rate": 0.00015504530392536884, "loss": 11.672, "step": 45087 }, { "epoch": 0.9438164615255799, "grad_norm": 0.2751481533050537, "learning_rate": 0.00015504347344070756, "loss": 11.6557, "step": 45088 }, { "epoch": 0.9438373942895419, "grad_norm": 0.32753610610961914, "learning_rate": 0.00015504164292958576, "loss": 11.6634, "step": 45089 }, { "epoch": 0.9438583270535041, "grad_norm": 0.32354459166526794, "learning_rate": 0.00015503981239200435, "loss": 11.669, "step": 45090 }, { "epoch": 0.9438792598174663, "grad_norm": 0.26762062311172485, "learning_rate": 0.0001550379818279642, "loss": 11.666, "step": 45091 }, { "epoch": 0.9439001925814284, "grad_norm": 0.30997562408447266, "learning_rate": 0.0001550361512374662, "loss": 11.667, "step": 45092 }, { "epoch": 0.9439211253453906, "grad_norm": 0.3265068531036377, "learning_rate": 0.0001550343206205112, "loss": 11.6646, "step": 45093 }, { "epoch": 0.9439420581093527, "grad_norm": 0.30979588627815247, "learning_rate": 0.00015503248997710011, "loss": 11.6703, "step": 45094 }, { "epoch": 0.9439629908733149, "grad_norm": 0.2662612497806549, "learning_rate": 0.0001550306593072338, "loss": 11.6575, "step": 45095 }, { "epoch": 0.943983923637277, "grad_norm": 0.31634944677352905, "learning_rate": 0.00015502882861091313, "loss": 11.6677, "step": 45096 }, { "epoch": 0.9440048564012392, "grad_norm": 0.35371869802474976, "learning_rate": 0.00015502699788813904, "loss": 11.6779, "step": 45097 }, { "epoch": 0.9440257891652014, "grad_norm": 0.2333596646785736, "learning_rate": 0.00015502516713891234, "loss": 11.6734, "step": 45098 }, { "epoch": 0.9440467219291635, "grad_norm": 0.28418785333633423, "learning_rate": 0.00015502333636323393, "loss": 11.6771, "step": 45099 }, { "epoch": 0.9440676546931257, "grad_norm": 0.26872822642326355, "learning_rate": 0.0001550215055611047, "loss": 11.6694, "step": 45100 }, { "epoch": 0.9440885874570878, "grad_norm": 0.2732512652873993, "learning_rate": 0.00015501967473252558, "loss": 11.6614, "step": 45101 }, { "epoch": 0.94410952022105, "grad_norm": 0.27027109265327454, "learning_rate": 0.00015501784387749734, "loss": 11.6659, "step": 45102 }, { "epoch": 0.9441304529850122, "grad_norm": 0.31843408942222595, "learning_rate": 0.00015501601299602093, "loss": 11.6537, "step": 45103 }, { "epoch": 0.9441513857489743, "grad_norm": 0.34469181299209595, "learning_rate": 0.0001550141820880972, "loss": 11.6519, "step": 45104 }, { "epoch": 0.9441723185129365, "grad_norm": 0.30887502431869507, "learning_rate": 0.0001550123511537271, "loss": 11.6565, "step": 45105 }, { "epoch": 0.9441932512768986, "grad_norm": 0.3593682050704956, "learning_rate": 0.00015501052019291147, "loss": 11.6562, "step": 45106 }, { "epoch": 0.9442141840408608, "grad_norm": 0.2485501766204834, "learning_rate": 0.00015500868920565114, "loss": 11.6673, "step": 45107 }, { "epoch": 0.9442351168048229, "grad_norm": 0.3346966505050659, "learning_rate": 0.00015500685819194704, "loss": 11.68, "step": 45108 }, { "epoch": 0.9442560495687851, "grad_norm": 0.3131881356239319, "learning_rate": 0.00015500502715180007, "loss": 11.6841, "step": 45109 }, { "epoch": 0.9442769823327473, "grad_norm": 0.2564942240715027, "learning_rate": 0.00015500319608521105, "loss": 11.6786, "step": 45110 }, { "epoch": 0.9442979150967094, "grad_norm": 0.2683843970298767, "learning_rate": 0.00015500136499218086, "loss": 11.6754, "step": 45111 }, { "epoch": 0.9443188478606716, "grad_norm": 0.36309871077537537, "learning_rate": 0.00015499953387271046, "loss": 11.6845, "step": 45112 }, { "epoch": 0.9443397806246336, "grad_norm": 0.2844506502151489, "learning_rate": 0.00015499770272680067, "loss": 11.666, "step": 45113 }, { "epoch": 0.9443607133885958, "grad_norm": 0.29850053787231445, "learning_rate": 0.0001549958715544524, "loss": 11.6686, "step": 45114 }, { "epoch": 0.9443816461525579, "grad_norm": 0.2916393578052521, "learning_rate": 0.0001549940403556665, "loss": 11.653, "step": 45115 }, { "epoch": 0.9444025789165201, "grad_norm": 0.3748915493488312, "learning_rate": 0.00015499220913044382, "loss": 11.6707, "step": 45116 }, { "epoch": 0.9444235116804823, "grad_norm": 0.3113120496273041, "learning_rate": 0.00015499037787878534, "loss": 11.6697, "step": 45117 }, { "epoch": 0.9444444444444444, "grad_norm": 0.3432699739933014, "learning_rate": 0.00015498854660069187, "loss": 11.6512, "step": 45118 }, { "epoch": 0.9444653772084066, "grad_norm": 0.2890027165412903, "learning_rate": 0.0001549867152961643, "loss": 11.6655, "step": 45119 }, { "epoch": 0.9444863099723687, "grad_norm": 0.24610213935375214, "learning_rate": 0.0001549848839652035, "loss": 11.661, "step": 45120 }, { "epoch": 0.9445072427363309, "grad_norm": 0.31096935272216797, "learning_rate": 0.0001549830526078104, "loss": 11.6595, "step": 45121 }, { "epoch": 0.9445281755002931, "grad_norm": 0.3741363286972046, "learning_rate": 0.0001549812212239858, "loss": 11.69, "step": 45122 }, { "epoch": 0.9445491082642552, "grad_norm": 0.31062859296798706, "learning_rate": 0.00015497938981373066, "loss": 11.6747, "step": 45123 }, { "epoch": 0.9445700410282174, "grad_norm": 0.32069480419158936, "learning_rate": 0.00015497755837704582, "loss": 11.6544, "step": 45124 }, { "epoch": 0.9445909737921795, "grad_norm": 0.3214872181415558, "learning_rate": 0.00015497572691393214, "loss": 11.6703, "step": 45125 }, { "epoch": 0.9446119065561417, "grad_norm": 0.30309686064720154, "learning_rate": 0.00015497389542439054, "loss": 11.6591, "step": 45126 }, { "epoch": 0.9446328393201038, "grad_norm": 0.3691936135292053, "learning_rate": 0.0001549720639084219, "loss": 11.659, "step": 45127 }, { "epoch": 0.944653772084066, "grad_norm": 0.3150064945220947, "learning_rate": 0.00015497023236602707, "loss": 11.6749, "step": 45128 }, { "epoch": 0.9446747048480282, "grad_norm": 0.2909405529499054, "learning_rate": 0.00015496840079720696, "loss": 11.664, "step": 45129 }, { "epoch": 0.9446956376119903, "grad_norm": 0.335405170917511, "learning_rate": 0.00015496656920196245, "loss": 11.665, "step": 45130 }, { "epoch": 0.9447165703759525, "grad_norm": 0.29811227321624756, "learning_rate": 0.00015496473758029437, "loss": 11.6921, "step": 45131 }, { "epoch": 0.9447375031399146, "grad_norm": 0.3492342531681061, "learning_rate": 0.00015496290593220366, "loss": 11.6526, "step": 45132 }, { "epoch": 0.9447584359038768, "grad_norm": 0.35070255398750305, "learning_rate": 0.00015496107425769118, "loss": 11.6687, "step": 45133 }, { "epoch": 0.9447793686678388, "grad_norm": 0.2801068425178528, "learning_rate": 0.00015495924255675783, "loss": 11.6625, "step": 45134 }, { "epoch": 0.944800301431801, "grad_norm": 0.28356194496154785, "learning_rate": 0.00015495741082940443, "loss": 11.6599, "step": 45135 }, { "epoch": 0.9448212341957632, "grad_norm": 0.2940295338630676, "learning_rate": 0.00015495557907563195, "loss": 11.6549, "step": 45136 }, { "epoch": 0.9448421669597253, "grad_norm": 0.30596065521240234, "learning_rate": 0.00015495374729544118, "loss": 11.6722, "step": 45137 }, { "epoch": 0.9448630997236875, "grad_norm": 0.3133717179298401, "learning_rate": 0.00015495191548883303, "loss": 11.659, "step": 45138 }, { "epoch": 0.9448840324876496, "grad_norm": 0.35447874665260315, "learning_rate": 0.00015495008365580843, "loss": 11.6631, "step": 45139 }, { "epoch": 0.9449049652516118, "grad_norm": 0.31013044714927673, "learning_rate": 0.0001549482517963682, "loss": 11.6735, "step": 45140 }, { "epoch": 0.944925898015574, "grad_norm": 0.3055110573768616, "learning_rate": 0.00015494641991051328, "loss": 11.6715, "step": 45141 }, { "epoch": 0.9449468307795361, "grad_norm": 0.351847380399704, "learning_rate": 0.0001549445879982445, "loss": 11.6631, "step": 45142 }, { "epoch": 0.9449677635434983, "grad_norm": 0.3006707429885864, "learning_rate": 0.00015494275605956276, "loss": 11.6771, "step": 45143 }, { "epoch": 0.9449886963074604, "grad_norm": 0.29978638887405396, "learning_rate": 0.0001549409240944689, "loss": 11.666, "step": 45144 }, { "epoch": 0.9450096290714226, "grad_norm": 0.2780725359916687, "learning_rate": 0.00015493909210296386, "loss": 11.6525, "step": 45145 }, { "epoch": 0.9450305618353847, "grad_norm": 0.3017772436141968, "learning_rate": 0.0001549372600850485, "loss": 11.6568, "step": 45146 }, { "epoch": 0.9450514945993469, "grad_norm": 0.27289265394210815, "learning_rate": 0.0001549354280407237, "loss": 11.6693, "step": 45147 }, { "epoch": 0.9450724273633091, "grad_norm": 0.31361088156700134, "learning_rate": 0.00015493359596999032, "loss": 11.6774, "step": 45148 }, { "epoch": 0.9450933601272712, "grad_norm": 0.27970558404922485, "learning_rate": 0.00015493176387284928, "loss": 11.6746, "step": 45149 }, { "epoch": 0.9451142928912334, "grad_norm": 0.35634174942970276, "learning_rate": 0.00015492993174930143, "loss": 11.6749, "step": 45150 }, { "epoch": 0.9451352256551955, "grad_norm": 0.45389455556869507, "learning_rate": 0.00015492809959934768, "loss": 11.6664, "step": 45151 }, { "epoch": 0.9451561584191577, "grad_norm": 0.32752227783203125, "learning_rate": 0.0001549262674229889, "loss": 11.652, "step": 45152 }, { "epoch": 0.9451770911831198, "grad_norm": 0.31942108273506165, "learning_rate": 0.00015492443522022593, "loss": 11.6721, "step": 45153 }, { "epoch": 0.945198023947082, "grad_norm": 0.33546146750450134, "learning_rate": 0.00015492260299105966, "loss": 11.6638, "step": 45154 }, { "epoch": 0.9452189567110442, "grad_norm": 0.28404632210731506, "learning_rate": 0.00015492077073549101, "loss": 11.6637, "step": 45155 }, { "epoch": 0.9452398894750063, "grad_norm": 0.35788941383361816, "learning_rate": 0.0001549189384535209, "loss": 11.6511, "step": 45156 }, { "epoch": 0.9452608222389685, "grad_norm": 0.32703927159309387, "learning_rate": 0.00015491710614515012, "loss": 11.6464, "step": 45157 }, { "epoch": 0.9452817550029305, "grad_norm": 0.3583047688007355, "learning_rate": 0.00015491527381037958, "loss": 11.6709, "step": 45158 }, { "epoch": 0.9453026877668927, "grad_norm": 0.35220855474472046, "learning_rate": 0.00015491344144921015, "loss": 11.6718, "step": 45159 }, { "epoch": 0.945323620530855, "grad_norm": 0.2754271924495697, "learning_rate": 0.00015491160906164276, "loss": 11.6854, "step": 45160 }, { "epoch": 0.945344553294817, "grad_norm": 0.28124529123306274, "learning_rate": 0.00015490977664767827, "loss": 11.6436, "step": 45161 }, { "epoch": 0.9453654860587792, "grad_norm": 0.4130071699619293, "learning_rate": 0.00015490794420731753, "loss": 11.6668, "step": 45162 }, { "epoch": 0.9453864188227413, "grad_norm": 0.29744261503219604, "learning_rate": 0.00015490611174056145, "loss": 11.6569, "step": 45163 }, { "epoch": 0.9454073515867035, "grad_norm": 0.29222095012664795, "learning_rate": 0.00015490427924741087, "loss": 11.6597, "step": 45164 }, { "epoch": 0.9454282843506656, "grad_norm": 0.3479248583316803, "learning_rate": 0.00015490244672786672, "loss": 11.667, "step": 45165 }, { "epoch": 0.9454492171146278, "grad_norm": 0.3764607608318329, "learning_rate": 0.0001549006141819299, "loss": 11.6836, "step": 45166 }, { "epoch": 0.94547014987859, "grad_norm": 0.3005776107311249, "learning_rate": 0.0001548987816096012, "loss": 11.6711, "step": 45167 }, { "epoch": 0.9454910826425521, "grad_norm": 0.30927374958992004, "learning_rate": 0.0001548969490108816, "loss": 11.6711, "step": 45168 }, { "epoch": 0.9455120154065143, "grad_norm": 0.2961595058441162, "learning_rate": 0.0001548951163857719, "loss": 11.662, "step": 45169 }, { "epoch": 0.9455329481704764, "grad_norm": 0.3047136962413788, "learning_rate": 0.000154893283734273, "loss": 11.6655, "step": 45170 }, { "epoch": 0.9455538809344386, "grad_norm": 0.33077535033226013, "learning_rate": 0.00015489145105638586, "loss": 11.6686, "step": 45171 }, { "epoch": 0.9455748136984007, "grad_norm": 0.28200843930244446, "learning_rate": 0.00015488961835211126, "loss": 11.6758, "step": 45172 }, { "epoch": 0.9455957464623629, "grad_norm": 0.34667733311653137, "learning_rate": 0.00015488778562145015, "loss": 11.6568, "step": 45173 }, { "epoch": 0.9456166792263251, "grad_norm": 0.2827996611595154, "learning_rate": 0.00015488595286440335, "loss": 11.6665, "step": 45174 }, { "epoch": 0.9456376119902872, "grad_norm": 0.26776042580604553, "learning_rate": 0.0001548841200809718, "loss": 11.6461, "step": 45175 }, { "epoch": 0.9456585447542494, "grad_norm": 0.38650864362716675, "learning_rate": 0.0001548822872711563, "loss": 11.6721, "step": 45176 }, { "epoch": 0.9456794775182115, "grad_norm": 0.512614369392395, "learning_rate": 0.00015488045443495785, "loss": 11.6705, "step": 45177 }, { "epoch": 0.9457004102821737, "grad_norm": 0.3387369215488434, "learning_rate": 0.00015487862157237726, "loss": 11.6911, "step": 45178 }, { "epoch": 0.9457213430461359, "grad_norm": 0.3461630642414093, "learning_rate": 0.00015487678868341534, "loss": 11.6632, "step": 45179 }, { "epoch": 0.945742275810098, "grad_norm": 0.2988804280757904, "learning_rate": 0.00015487495576807313, "loss": 11.6626, "step": 45180 }, { "epoch": 0.9457632085740602, "grad_norm": 0.29186108708381653, "learning_rate": 0.0001548731228263514, "loss": 11.6703, "step": 45181 }, { "epoch": 0.9457841413380222, "grad_norm": 0.26168495416641235, "learning_rate": 0.00015487128985825106, "loss": 11.6686, "step": 45182 }, { "epoch": 0.9458050741019844, "grad_norm": 0.38271021842956543, "learning_rate": 0.00015486945686377302, "loss": 11.6581, "step": 45183 }, { "epoch": 0.9458260068659465, "grad_norm": 0.44610992074012756, "learning_rate": 0.0001548676238429181, "loss": 11.6803, "step": 45184 }, { "epoch": 0.9458469396299087, "grad_norm": 0.3685505986213684, "learning_rate": 0.00015486579079568722, "loss": 11.6541, "step": 45185 }, { "epoch": 0.9458678723938709, "grad_norm": 0.25126349925994873, "learning_rate": 0.00015486395772208123, "loss": 11.6782, "step": 45186 }, { "epoch": 0.945888805157833, "grad_norm": 0.23464511334896088, "learning_rate": 0.00015486212462210105, "loss": 11.6762, "step": 45187 }, { "epoch": 0.9459097379217952, "grad_norm": 0.3205869197845459, "learning_rate": 0.0001548602914957476, "loss": 11.6785, "step": 45188 }, { "epoch": 0.9459306706857573, "grad_norm": 0.29925188422203064, "learning_rate": 0.00015485845834302164, "loss": 11.6619, "step": 45189 }, { "epoch": 0.9459516034497195, "grad_norm": 0.3505563735961914, "learning_rate": 0.00015485662516392416, "loss": 11.6829, "step": 45190 }, { "epoch": 0.9459725362136816, "grad_norm": 0.2823258936405182, "learning_rate": 0.000154854791958456, "loss": 11.6608, "step": 45191 }, { "epoch": 0.9459934689776438, "grad_norm": 0.3361894190311432, "learning_rate": 0.000154852958726618, "loss": 11.6511, "step": 45192 }, { "epoch": 0.946014401741606, "grad_norm": 0.3044587969779968, "learning_rate": 0.00015485112546841112, "loss": 11.6735, "step": 45193 }, { "epoch": 0.9460353345055681, "grad_norm": 0.2938549518585205, "learning_rate": 0.00015484929218383622, "loss": 11.6862, "step": 45194 }, { "epoch": 0.9460562672695303, "grad_norm": 0.3422144949436188, "learning_rate": 0.00015484745887289414, "loss": 11.6736, "step": 45195 }, { "epoch": 0.9460772000334924, "grad_norm": 0.3213154971599579, "learning_rate": 0.00015484562553558576, "loss": 11.6783, "step": 45196 }, { "epoch": 0.9460981327974546, "grad_norm": 0.30191949009895325, "learning_rate": 0.00015484379217191205, "loss": 11.6764, "step": 45197 }, { "epoch": 0.9461190655614168, "grad_norm": 0.2894095182418823, "learning_rate": 0.00015484195878187376, "loss": 11.662, "step": 45198 }, { "epoch": 0.9461399983253789, "grad_norm": 0.3539791405200958, "learning_rate": 0.0001548401253654719, "loss": 11.6521, "step": 45199 }, { "epoch": 0.9461609310893411, "grad_norm": 0.29502424597740173, "learning_rate": 0.00015483829192270728, "loss": 11.6692, "step": 45200 }, { "epoch": 0.9461818638533032, "grad_norm": 0.36344999074935913, "learning_rate": 0.00015483645845358078, "loss": 11.6666, "step": 45201 }, { "epoch": 0.9462027966172654, "grad_norm": 0.26560211181640625, "learning_rate": 0.00015483462495809332, "loss": 11.6472, "step": 45202 }, { "epoch": 0.9462237293812275, "grad_norm": 0.31999364495277405, "learning_rate": 0.00015483279143624573, "loss": 11.6688, "step": 45203 }, { "epoch": 0.9462446621451897, "grad_norm": 0.264261931180954, "learning_rate": 0.00015483095788803891, "loss": 11.6649, "step": 45204 }, { "epoch": 0.9462655949091519, "grad_norm": 0.3145926594734192, "learning_rate": 0.0001548291243134738, "loss": 11.6721, "step": 45205 }, { "epoch": 0.9462865276731139, "grad_norm": 0.2784421741962433, "learning_rate": 0.0001548272907125512, "loss": 11.666, "step": 45206 }, { "epoch": 0.9463074604370761, "grad_norm": 0.2990899085998535, "learning_rate": 0.00015482545708527204, "loss": 11.6698, "step": 45207 }, { "epoch": 0.9463283932010382, "grad_norm": 0.4724232256412506, "learning_rate": 0.00015482362343163716, "loss": 11.683, "step": 45208 }, { "epoch": 0.9463493259650004, "grad_norm": 0.2965490520000458, "learning_rate": 0.00015482178975164746, "loss": 11.6632, "step": 45209 }, { "epoch": 0.9463702587289625, "grad_norm": 0.2978843152523041, "learning_rate": 0.00015481995604530382, "loss": 11.6664, "step": 45210 }, { "epoch": 0.9463911914929247, "grad_norm": 0.3749420642852783, "learning_rate": 0.00015481812231260715, "loss": 11.682, "step": 45211 }, { "epoch": 0.9464121242568869, "grad_norm": 0.31977367401123047, "learning_rate": 0.00015481628855355833, "loss": 11.6767, "step": 45212 }, { "epoch": 0.946433057020849, "grad_norm": 0.32993951439857483, "learning_rate": 0.0001548144547681582, "loss": 11.6727, "step": 45213 }, { "epoch": 0.9464539897848112, "grad_norm": 0.2722446024417877, "learning_rate": 0.00015481262095640763, "loss": 11.6638, "step": 45214 }, { "epoch": 0.9464749225487733, "grad_norm": 0.44796645641326904, "learning_rate": 0.0001548107871183076, "loss": 11.6811, "step": 45215 }, { "epoch": 0.9464958553127355, "grad_norm": 0.40257585048675537, "learning_rate": 0.00015480895325385886, "loss": 11.6864, "step": 45216 }, { "epoch": 0.9465167880766977, "grad_norm": 0.3789002001285553, "learning_rate": 0.0001548071193630624, "loss": 11.6804, "step": 45217 }, { "epoch": 0.9465377208406598, "grad_norm": 0.3336537480354309, "learning_rate": 0.00015480528544591906, "loss": 11.6617, "step": 45218 }, { "epoch": 0.946558653604622, "grad_norm": 0.3192877173423767, "learning_rate": 0.0001548034515024297, "loss": 11.6526, "step": 45219 }, { "epoch": 0.9465795863685841, "grad_norm": 0.32070839405059814, "learning_rate": 0.00015480161753259525, "loss": 11.6642, "step": 45220 }, { "epoch": 0.9466005191325463, "grad_norm": 0.31968793272972107, "learning_rate": 0.00015479978353641653, "loss": 11.6743, "step": 45221 }, { "epoch": 0.9466214518965084, "grad_norm": 0.3058417737483978, "learning_rate": 0.0001547979495138945, "loss": 11.6773, "step": 45222 }, { "epoch": 0.9466423846604706, "grad_norm": 0.28216156363487244, "learning_rate": 0.00015479611546502997, "loss": 11.6655, "step": 45223 }, { "epoch": 0.9466633174244328, "grad_norm": 0.3562712073326111, "learning_rate": 0.00015479428138982383, "loss": 11.6752, "step": 45224 }, { "epoch": 0.9466842501883949, "grad_norm": 0.2887580692768097, "learning_rate": 0.000154792447288277, "loss": 11.6691, "step": 45225 }, { "epoch": 0.9467051829523571, "grad_norm": 0.2977466583251953, "learning_rate": 0.00015479061316039033, "loss": 11.6628, "step": 45226 }, { "epoch": 0.9467261157163191, "grad_norm": 0.273014098405838, "learning_rate": 0.00015478877900616475, "loss": 11.6699, "step": 45227 }, { "epoch": 0.9467470484802814, "grad_norm": 0.28329482674598694, "learning_rate": 0.00015478694482560108, "loss": 11.6741, "step": 45228 }, { "epoch": 0.9467679812442434, "grad_norm": 0.3035798966884613, "learning_rate": 0.00015478511061870024, "loss": 11.6741, "step": 45229 }, { "epoch": 0.9467889140082056, "grad_norm": 0.3394145667552948, "learning_rate": 0.00015478327638546307, "loss": 11.6751, "step": 45230 }, { "epoch": 0.9468098467721678, "grad_norm": 0.28416889905929565, "learning_rate": 0.0001547814421258905, "loss": 11.6867, "step": 45231 }, { "epoch": 0.9468307795361299, "grad_norm": 0.31005826592445374, "learning_rate": 0.00015477960783998341, "loss": 11.6731, "step": 45232 }, { "epoch": 0.9468517123000921, "grad_norm": 0.28860706090927124, "learning_rate": 0.00015477777352774266, "loss": 11.6671, "step": 45233 }, { "epoch": 0.9468726450640542, "grad_norm": 0.31619879603385925, "learning_rate": 0.00015477593918916912, "loss": 11.6603, "step": 45234 }, { "epoch": 0.9468935778280164, "grad_norm": 0.3837311863899231, "learning_rate": 0.00015477410482426368, "loss": 11.6718, "step": 45235 }, { "epoch": 0.9469145105919786, "grad_norm": 0.2649022340774536, "learning_rate": 0.00015477227043302725, "loss": 11.6602, "step": 45236 }, { "epoch": 0.9469354433559407, "grad_norm": 0.36639752984046936, "learning_rate": 0.0001547704360154607, "loss": 11.6822, "step": 45237 }, { "epoch": 0.9469563761199029, "grad_norm": 0.3170658349990845, "learning_rate": 0.00015476860157156487, "loss": 11.6533, "step": 45238 }, { "epoch": 0.946977308883865, "grad_norm": 0.3050295114517212, "learning_rate": 0.00015476676710134073, "loss": 11.6672, "step": 45239 }, { "epoch": 0.9469982416478272, "grad_norm": 0.2744581997394562, "learning_rate": 0.00015476493260478905, "loss": 11.6819, "step": 45240 }, { "epoch": 0.9470191744117893, "grad_norm": 0.35678353905677795, "learning_rate": 0.0001547630980819108, "loss": 11.6741, "step": 45241 }, { "epoch": 0.9470401071757515, "grad_norm": 0.3918173313140869, "learning_rate": 0.00015476126353270684, "loss": 11.6702, "step": 45242 }, { "epoch": 0.9470610399397137, "grad_norm": 0.3331345021724701, "learning_rate": 0.00015475942895717802, "loss": 11.6621, "step": 45243 }, { "epoch": 0.9470819727036758, "grad_norm": 0.301481693983078, "learning_rate": 0.00015475759435532523, "loss": 11.6685, "step": 45244 }, { "epoch": 0.947102905467638, "grad_norm": 0.38347095251083374, "learning_rate": 0.0001547557597271494, "loss": 11.6577, "step": 45245 }, { "epoch": 0.9471238382316001, "grad_norm": 0.2820083796977997, "learning_rate": 0.0001547539250726514, "loss": 11.6802, "step": 45246 }, { "epoch": 0.9471447709955623, "grad_norm": 0.2893267273902893, "learning_rate": 0.00015475209039183204, "loss": 11.6697, "step": 45247 }, { "epoch": 0.9471657037595244, "grad_norm": 0.3179507851600647, "learning_rate": 0.00015475025568469227, "loss": 11.6645, "step": 45248 }, { "epoch": 0.9471866365234866, "grad_norm": 0.31198248267173767, "learning_rate": 0.00015474842095123295, "loss": 11.6777, "step": 45249 }, { "epoch": 0.9472075692874488, "grad_norm": 0.2707776725292206, "learning_rate": 0.00015474658619145498, "loss": 11.668, "step": 45250 }, { "epoch": 0.9472285020514108, "grad_norm": 0.38039129972457886, "learning_rate": 0.00015474475140535922, "loss": 11.6752, "step": 45251 }, { "epoch": 0.947249434815373, "grad_norm": 0.4351130723953247, "learning_rate": 0.00015474291659294654, "loss": 11.6857, "step": 45252 }, { "epoch": 0.9472703675793351, "grad_norm": 0.36750683188438416, "learning_rate": 0.00015474108175421786, "loss": 11.6822, "step": 45253 }, { "epoch": 0.9472913003432973, "grad_norm": 0.4520271122455597, "learning_rate": 0.00015473924688917407, "loss": 11.698, "step": 45254 }, { "epoch": 0.9473122331072594, "grad_norm": 0.4152151942253113, "learning_rate": 0.000154737411997816, "loss": 11.674, "step": 45255 }, { "epoch": 0.9473331658712216, "grad_norm": 0.29846054315567017, "learning_rate": 0.00015473557708014458, "loss": 11.6643, "step": 45256 }, { "epoch": 0.9473540986351838, "grad_norm": 0.3591552674770355, "learning_rate": 0.00015473374213616066, "loss": 11.6712, "step": 45257 }, { "epoch": 0.9473750313991459, "grad_norm": 0.3595615327358246, "learning_rate": 0.00015473190716586513, "loss": 11.6526, "step": 45258 }, { "epoch": 0.9473959641631081, "grad_norm": 0.29595932364463806, "learning_rate": 0.00015473007216925888, "loss": 11.6759, "step": 45259 }, { "epoch": 0.9474168969270702, "grad_norm": 0.28294578194618225, "learning_rate": 0.00015472823714634276, "loss": 11.6491, "step": 45260 }, { "epoch": 0.9474378296910324, "grad_norm": 0.30771276354789734, "learning_rate": 0.00015472640209711775, "loss": 11.6752, "step": 45261 }, { "epoch": 0.9474587624549946, "grad_norm": 0.30865615606307983, "learning_rate": 0.00015472456702158457, "loss": 11.678, "step": 45262 }, { "epoch": 0.9474796952189567, "grad_norm": 0.34841233491897583, "learning_rate": 0.00015472273191974428, "loss": 11.6833, "step": 45263 }, { "epoch": 0.9475006279829189, "grad_norm": 0.30599135160446167, "learning_rate": 0.00015472089679159762, "loss": 11.6739, "step": 45264 }, { "epoch": 0.947521560746881, "grad_norm": 0.34677445888519287, "learning_rate": 0.00015471906163714552, "loss": 11.6758, "step": 45265 }, { "epoch": 0.9475424935108432, "grad_norm": 0.2784862220287323, "learning_rate": 0.00015471722645638892, "loss": 11.6566, "step": 45266 }, { "epoch": 0.9475634262748053, "grad_norm": 0.33572569489479065, "learning_rate": 0.0001547153912493286, "loss": 11.6756, "step": 45267 }, { "epoch": 0.9475843590387675, "grad_norm": 0.3012774884700775, "learning_rate": 0.00015471355601596556, "loss": 11.6614, "step": 45268 }, { "epoch": 0.9476052918027297, "grad_norm": 0.3320872485637665, "learning_rate": 0.00015471172075630057, "loss": 11.6799, "step": 45269 }, { "epoch": 0.9476262245666918, "grad_norm": 0.34664443135261536, "learning_rate": 0.00015470988547033456, "loss": 11.6661, "step": 45270 }, { "epoch": 0.947647157330654, "grad_norm": 0.30119383335113525, "learning_rate": 0.00015470805015806844, "loss": 11.6717, "step": 45271 }, { "epoch": 0.9476680900946161, "grad_norm": 0.307177871465683, "learning_rate": 0.00015470621481950304, "loss": 11.6713, "step": 45272 }, { "epoch": 0.9476890228585783, "grad_norm": 0.29491204023361206, "learning_rate": 0.00015470437945463926, "loss": 11.6699, "step": 45273 }, { "epoch": 0.9477099556225403, "grad_norm": 0.3221483826637268, "learning_rate": 0.00015470254406347802, "loss": 11.6658, "step": 45274 }, { "epoch": 0.9477308883865025, "grad_norm": 0.2807679772377014, "learning_rate": 0.00015470070864602014, "loss": 11.6712, "step": 45275 }, { "epoch": 0.9477518211504647, "grad_norm": 0.32071414589881897, "learning_rate": 0.00015469887320226656, "loss": 11.6602, "step": 45276 }, { "epoch": 0.9477727539144268, "grad_norm": 0.4115895628929138, "learning_rate": 0.00015469703773221812, "loss": 11.6751, "step": 45277 }, { "epoch": 0.947793686678389, "grad_norm": 0.3301587998867035, "learning_rate": 0.00015469520223587572, "loss": 11.6682, "step": 45278 }, { "epoch": 0.9478146194423511, "grad_norm": 0.33983439207077026, "learning_rate": 0.00015469336671324022, "loss": 11.6687, "step": 45279 }, { "epoch": 0.9478355522063133, "grad_norm": 0.30849698185920715, "learning_rate": 0.00015469153116431255, "loss": 11.6534, "step": 45280 }, { "epoch": 0.9478564849702755, "grad_norm": 0.3201560378074646, "learning_rate": 0.00015468969558909355, "loss": 11.6512, "step": 45281 }, { "epoch": 0.9478774177342376, "grad_norm": 0.30736467242240906, "learning_rate": 0.0001546878599875841, "loss": 11.6603, "step": 45282 }, { "epoch": 0.9478983504981998, "grad_norm": 0.28867530822753906, "learning_rate": 0.00015468602435978515, "loss": 11.6579, "step": 45283 }, { "epoch": 0.9479192832621619, "grad_norm": 0.2464885264635086, "learning_rate": 0.00015468418870569748, "loss": 11.6585, "step": 45284 }, { "epoch": 0.9479402160261241, "grad_norm": 0.2679848074913025, "learning_rate": 0.0001546823530253221, "loss": 11.6726, "step": 45285 }, { "epoch": 0.9479611487900862, "grad_norm": 0.34912729263305664, "learning_rate": 0.0001546805173186597, "loss": 11.6741, "step": 45286 }, { "epoch": 0.9479820815540484, "grad_norm": 0.271510511636734, "learning_rate": 0.00015467868158571137, "loss": 11.6711, "step": 45287 }, { "epoch": 0.9480030143180106, "grad_norm": 0.3205476403236389, "learning_rate": 0.0001546768458264779, "loss": 11.6487, "step": 45288 }, { "epoch": 0.9480239470819727, "grad_norm": 0.31718960404396057, "learning_rate": 0.00015467501004096015, "loss": 11.6743, "step": 45289 }, { "epoch": 0.9480448798459349, "grad_norm": 0.23755094408988953, "learning_rate": 0.00015467317422915904, "loss": 11.667, "step": 45290 }, { "epoch": 0.948065812609897, "grad_norm": 0.36180129647254944, "learning_rate": 0.00015467133839107538, "loss": 11.6589, "step": 45291 }, { "epoch": 0.9480867453738592, "grad_norm": 0.29273125529289246, "learning_rate": 0.00015466950252671018, "loss": 11.6621, "step": 45292 }, { "epoch": 0.9481076781378213, "grad_norm": 0.2566587030887604, "learning_rate": 0.00015466766663606424, "loss": 11.6737, "step": 45293 }, { "epoch": 0.9481286109017835, "grad_norm": 0.27659428119659424, "learning_rate": 0.00015466583071913843, "loss": 11.662, "step": 45294 }, { "epoch": 0.9481495436657457, "grad_norm": 0.3779045045375824, "learning_rate": 0.00015466399477593373, "loss": 11.6867, "step": 45295 }, { "epoch": 0.9481704764297078, "grad_norm": 0.3161810636520386, "learning_rate": 0.00015466215880645086, "loss": 11.6676, "step": 45296 }, { "epoch": 0.94819140919367, "grad_norm": 0.2682233452796936, "learning_rate": 0.00015466032281069082, "loss": 11.6667, "step": 45297 }, { "epoch": 0.948212341957632, "grad_norm": 0.3251267969608307, "learning_rate": 0.0001546584867886545, "loss": 11.6694, "step": 45298 }, { "epoch": 0.9482332747215942, "grad_norm": 0.338153600692749, "learning_rate": 0.00015465665074034275, "loss": 11.6746, "step": 45299 }, { "epoch": 0.9482542074855564, "grad_norm": 0.3258356750011444, "learning_rate": 0.00015465481466575643, "loss": 11.6747, "step": 45300 }, { "epoch": 0.9482751402495185, "grad_norm": 0.45602288842201233, "learning_rate": 0.00015465297856489646, "loss": 11.6477, "step": 45301 }, { "epoch": 0.9482960730134807, "grad_norm": 0.2952374219894409, "learning_rate": 0.0001546511424377637, "loss": 11.6737, "step": 45302 }, { "epoch": 0.9483170057774428, "grad_norm": 0.28630849719047546, "learning_rate": 0.00015464930628435904, "loss": 11.6803, "step": 45303 }, { "epoch": 0.948337938541405, "grad_norm": 0.2693096697330475, "learning_rate": 0.00015464747010468337, "loss": 11.6614, "step": 45304 }, { "epoch": 0.9483588713053671, "grad_norm": 0.29847317934036255, "learning_rate": 0.00015464563389873756, "loss": 11.6628, "step": 45305 }, { "epoch": 0.9483798040693293, "grad_norm": 0.29233378171920776, "learning_rate": 0.0001546437976665225, "loss": 11.6559, "step": 45306 }, { "epoch": 0.9484007368332915, "grad_norm": 0.3140392303466797, "learning_rate": 0.00015464196140803908, "loss": 11.6691, "step": 45307 }, { "epoch": 0.9484216695972536, "grad_norm": 0.2806655466556549, "learning_rate": 0.00015464012512328816, "loss": 11.6815, "step": 45308 }, { "epoch": 0.9484426023612158, "grad_norm": 0.3995459973812103, "learning_rate": 0.00015463828881227064, "loss": 11.6823, "step": 45309 }, { "epoch": 0.9484635351251779, "grad_norm": 0.2805752158164978, "learning_rate": 0.00015463645247498742, "loss": 11.6568, "step": 45310 }, { "epoch": 0.9484844678891401, "grad_norm": 0.29346513748168945, "learning_rate": 0.00015463461611143933, "loss": 11.6587, "step": 45311 }, { "epoch": 0.9485054006531022, "grad_norm": 0.30144044756889343, "learning_rate": 0.0001546327797216273, "loss": 11.6812, "step": 45312 }, { "epoch": 0.9485263334170644, "grad_norm": 0.34715259075164795, "learning_rate": 0.00015463094330555222, "loss": 11.6685, "step": 45313 }, { "epoch": 0.9485472661810266, "grad_norm": 0.4513319432735443, "learning_rate": 0.0001546291068632149, "loss": 11.6849, "step": 45314 }, { "epoch": 0.9485681989449887, "grad_norm": 0.32342758774757385, "learning_rate": 0.00015462727039461632, "loss": 11.6664, "step": 45315 }, { "epoch": 0.9485891317089509, "grad_norm": 0.35001927614212036, "learning_rate": 0.0001546254338997573, "loss": 11.6642, "step": 45316 }, { "epoch": 0.948610064472913, "grad_norm": 0.30166155099868774, "learning_rate": 0.00015462359737863874, "loss": 11.6727, "step": 45317 }, { "epoch": 0.9486309972368752, "grad_norm": 0.33485954999923706, "learning_rate": 0.0001546217608312615, "loss": 11.6783, "step": 45318 }, { "epoch": 0.9486519300008374, "grad_norm": 0.33476656675338745, "learning_rate": 0.0001546199242576265, "loss": 11.6535, "step": 45319 }, { "epoch": 0.9486728627647995, "grad_norm": 0.4006713926792145, "learning_rate": 0.00015461808765773462, "loss": 11.6663, "step": 45320 }, { "epoch": 0.9486937955287617, "grad_norm": 0.30597051978111267, "learning_rate": 0.0001546162510315867, "loss": 11.6719, "step": 45321 }, { "epoch": 0.9487147282927237, "grad_norm": 0.2970403730869293, "learning_rate": 0.0001546144143791837, "loss": 11.6532, "step": 45322 }, { "epoch": 0.9487356610566859, "grad_norm": 0.27374017238616943, "learning_rate": 0.00015461257770052643, "loss": 11.6869, "step": 45323 }, { "epoch": 0.948756593820648, "grad_norm": 0.2994527816772461, "learning_rate": 0.00015461074099561582, "loss": 11.6777, "step": 45324 }, { "epoch": 0.9487775265846102, "grad_norm": 0.32758060097694397, "learning_rate": 0.00015460890426445269, "loss": 11.6773, "step": 45325 }, { "epoch": 0.9487984593485724, "grad_norm": 0.3118872344493866, "learning_rate": 0.00015460706750703798, "loss": 11.6548, "step": 45326 }, { "epoch": 0.9488193921125345, "grad_norm": 0.35709813237190247, "learning_rate": 0.0001546052307233726, "loss": 11.6712, "step": 45327 }, { "epoch": 0.9488403248764967, "grad_norm": 0.4288516640663147, "learning_rate": 0.00015460339391345736, "loss": 11.6583, "step": 45328 }, { "epoch": 0.9488612576404588, "grad_norm": 0.3243870139122009, "learning_rate": 0.00015460155707729318, "loss": 11.6655, "step": 45329 }, { "epoch": 0.948882190404421, "grad_norm": 0.37822550535202026, "learning_rate": 0.00015459972021488093, "loss": 11.6764, "step": 45330 }, { "epoch": 0.9489031231683831, "grad_norm": 0.2918426990509033, "learning_rate": 0.0001545978833262215, "loss": 11.6589, "step": 45331 }, { "epoch": 0.9489240559323453, "grad_norm": 0.2890082001686096, "learning_rate": 0.00015459604641131577, "loss": 11.657, "step": 45332 }, { "epoch": 0.9489449886963075, "grad_norm": 0.33299916982650757, "learning_rate": 0.00015459420947016463, "loss": 11.6601, "step": 45333 }, { "epoch": 0.9489659214602696, "grad_norm": 0.31161072850227356, "learning_rate": 0.00015459237250276897, "loss": 11.6787, "step": 45334 }, { "epoch": 0.9489868542242318, "grad_norm": 0.3332022428512573, "learning_rate": 0.00015459053550912966, "loss": 11.6818, "step": 45335 }, { "epoch": 0.9490077869881939, "grad_norm": 0.3532710373401642, "learning_rate": 0.00015458869848924755, "loss": 11.671, "step": 45336 }, { "epoch": 0.9490287197521561, "grad_norm": 0.2884640693664551, "learning_rate": 0.0001545868614431236, "loss": 11.679, "step": 45337 }, { "epoch": 0.9490496525161183, "grad_norm": 0.3456571698188782, "learning_rate": 0.00015458502437075864, "loss": 11.6508, "step": 45338 }, { "epoch": 0.9490705852800804, "grad_norm": 0.340005099773407, "learning_rate": 0.00015458318727215356, "loss": 11.6575, "step": 45339 }, { "epoch": 0.9490915180440426, "grad_norm": 0.3483594059944153, "learning_rate": 0.00015458135014730928, "loss": 11.6629, "step": 45340 }, { "epoch": 0.9491124508080047, "grad_norm": 0.2648508548736572, "learning_rate": 0.0001545795129962266, "loss": 11.6892, "step": 45341 }, { "epoch": 0.9491333835719669, "grad_norm": 0.2660735845565796, "learning_rate": 0.0001545776758189065, "loss": 11.6606, "step": 45342 }, { "epoch": 0.949154316335929, "grad_norm": 0.3434384763240814, "learning_rate": 0.0001545758386153498, "loss": 11.6779, "step": 45343 }, { "epoch": 0.9491752490998911, "grad_norm": 0.244332954287529, "learning_rate": 0.0001545740013855574, "loss": 11.6644, "step": 45344 }, { "epoch": 0.9491961818638533, "grad_norm": 0.3030044734477997, "learning_rate": 0.00015457216412953018, "loss": 11.6733, "step": 45345 }, { "epoch": 0.9492171146278154, "grad_norm": 0.2902573049068451, "learning_rate": 0.000154570326847269, "loss": 11.6742, "step": 45346 }, { "epoch": 0.9492380473917776, "grad_norm": 0.31093910336494446, "learning_rate": 0.0001545684895387748, "loss": 11.66, "step": 45347 }, { "epoch": 0.9492589801557397, "grad_norm": 0.2861299216747284, "learning_rate": 0.00015456665220404841, "loss": 11.6656, "step": 45348 }, { "epoch": 0.9492799129197019, "grad_norm": 0.3535223603248596, "learning_rate": 0.0001545648148430908, "loss": 11.6875, "step": 45349 }, { "epoch": 0.949300845683664, "grad_norm": 0.3453103005886078, "learning_rate": 0.00015456297745590272, "loss": 11.6601, "step": 45350 }, { "epoch": 0.9493217784476262, "grad_norm": 0.32240885496139526, "learning_rate": 0.00015456114004248518, "loss": 11.6631, "step": 45351 }, { "epoch": 0.9493427112115884, "grad_norm": 0.30793502926826477, "learning_rate": 0.00015455930260283894, "loss": 11.6544, "step": 45352 }, { "epoch": 0.9493636439755505, "grad_norm": 0.3745853900909424, "learning_rate": 0.00015455746513696501, "loss": 11.6766, "step": 45353 }, { "epoch": 0.9493845767395127, "grad_norm": 0.25842463970184326, "learning_rate": 0.00015455562764486418, "loss": 11.6934, "step": 45354 }, { "epoch": 0.9494055095034748, "grad_norm": 0.2740957736968994, "learning_rate": 0.0001545537901265374, "loss": 11.6727, "step": 45355 }, { "epoch": 0.949426442267437, "grad_norm": 0.30906689167022705, "learning_rate": 0.0001545519525819855, "loss": 11.6622, "step": 45356 }, { "epoch": 0.9494473750313992, "grad_norm": 0.3581767678260803, "learning_rate": 0.00015455011501120937, "loss": 11.6965, "step": 45357 }, { "epoch": 0.9494683077953613, "grad_norm": 0.2834796607494354, "learning_rate": 0.0001545482774142099, "loss": 11.6668, "step": 45358 }, { "epoch": 0.9494892405593235, "grad_norm": 0.6493806838989258, "learning_rate": 0.00015454643979098803, "loss": 11.6924, "step": 45359 }, { "epoch": 0.9495101733232856, "grad_norm": 0.34979408979415894, "learning_rate": 0.00015454460214154453, "loss": 11.6768, "step": 45360 }, { "epoch": 0.9495311060872478, "grad_norm": 0.25880420207977295, "learning_rate": 0.00015454276446588038, "loss": 11.6701, "step": 45361 }, { "epoch": 0.9495520388512099, "grad_norm": 0.31746557354927063, "learning_rate": 0.00015454092676399643, "loss": 11.6523, "step": 45362 }, { "epoch": 0.9495729716151721, "grad_norm": 0.34660086035728455, "learning_rate": 0.00015453908903589355, "loss": 11.6634, "step": 45363 }, { "epoch": 0.9495939043791343, "grad_norm": 0.3680959939956665, "learning_rate": 0.00015453725128157265, "loss": 11.6773, "step": 45364 }, { "epoch": 0.9496148371430964, "grad_norm": 0.25840944051742554, "learning_rate": 0.00015453541350103457, "loss": 11.6714, "step": 45365 }, { "epoch": 0.9496357699070586, "grad_norm": 0.29483193159103394, "learning_rate": 0.0001545335756942803, "loss": 11.6584, "step": 45366 }, { "epoch": 0.9496567026710206, "grad_norm": 0.2810232639312744, "learning_rate": 0.00015453173786131056, "loss": 11.6723, "step": 45367 }, { "epoch": 0.9496776354349828, "grad_norm": 0.3077647089958191, "learning_rate": 0.00015452990000212636, "loss": 11.665, "step": 45368 }, { "epoch": 0.9496985681989449, "grad_norm": 0.279469758272171, "learning_rate": 0.00015452806211672856, "loss": 11.6796, "step": 45369 }, { "epoch": 0.9497195009629071, "grad_norm": 0.28093740344047546, "learning_rate": 0.000154526224205118, "loss": 11.6601, "step": 45370 }, { "epoch": 0.9497404337268693, "grad_norm": 0.3213612139225006, "learning_rate": 0.00015452438626729561, "loss": 11.6727, "step": 45371 }, { "epoch": 0.9497613664908314, "grad_norm": 0.27862322330474854, "learning_rate": 0.00015452254830326226, "loss": 11.6673, "step": 45372 }, { "epoch": 0.9497822992547936, "grad_norm": 0.33997413516044617, "learning_rate": 0.0001545207103130188, "loss": 11.6614, "step": 45373 }, { "epoch": 0.9498032320187557, "grad_norm": 0.3178834021091461, "learning_rate": 0.00015451887229656617, "loss": 11.6577, "step": 45374 }, { "epoch": 0.9498241647827179, "grad_norm": 0.32297614216804504, "learning_rate": 0.0001545170342539052, "loss": 11.6618, "step": 45375 }, { "epoch": 0.9498450975466801, "grad_norm": 0.324491411447525, "learning_rate": 0.00015451519618503684, "loss": 11.6648, "step": 45376 }, { "epoch": 0.9498660303106422, "grad_norm": 0.2461702674627304, "learning_rate": 0.00015451335808996187, "loss": 11.6605, "step": 45377 }, { "epoch": 0.9498869630746044, "grad_norm": 0.2837553918361664, "learning_rate": 0.00015451151996868128, "loss": 11.6803, "step": 45378 }, { "epoch": 0.9499078958385665, "grad_norm": 0.38834625482559204, "learning_rate": 0.0001545096818211959, "loss": 11.6731, "step": 45379 }, { "epoch": 0.9499288286025287, "grad_norm": 0.3138774633407593, "learning_rate": 0.0001545078436475066, "loss": 11.6691, "step": 45380 }, { "epoch": 0.9499497613664908, "grad_norm": 0.3259275257587433, "learning_rate": 0.00015450600544761437, "loss": 11.6738, "step": 45381 }, { "epoch": 0.949970694130453, "grad_norm": 0.3161952793598175, "learning_rate": 0.0001545041672215199, "loss": 11.6751, "step": 45382 }, { "epoch": 0.9499916268944152, "grad_norm": 0.37466973066329956, "learning_rate": 0.00015450232896922428, "loss": 11.6802, "step": 45383 }, { "epoch": 0.9500125596583773, "grad_norm": 0.27163130044937134, "learning_rate": 0.00015450049069072822, "loss": 11.6724, "step": 45384 }, { "epoch": 0.9500334924223395, "grad_norm": 0.29115015268325806, "learning_rate": 0.00015449865238603274, "loss": 11.6556, "step": 45385 }, { "epoch": 0.9500544251863016, "grad_norm": 0.2724888324737549, "learning_rate": 0.00015449681405513865, "loss": 11.6575, "step": 45386 }, { "epoch": 0.9500753579502638, "grad_norm": 0.3207888603210449, "learning_rate": 0.0001544949756980468, "loss": 11.6755, "step": 45387 }, { "epoch": 0.9500962907142259, "grad_norm": 0.2568311095237732, "learning_rate": 0.00015449313731475818, "loss": 11.6752, "step": 45388 }, { "epoch": 0.950117223478188, "grad_norm": 0.2975950539112091, "learning_rate": 0.00015449129890527364, "loss": 11.6694, "step": 45389 }, { "epoch": 0.9501381562421503, "grad_norm": 0.28304848074913025, "learning_rate": 0.000154489460469594, "loss": 11.6722, "step": 45390 }, { "epoch": 0.9501590890061123, "grad_norm": 0.31602153182029724, "learning_rate": 0.00015448762200772018, "loss": 11.6562, "step": 45391 }, { "epoch": 0.9501800217700745, "grad_norm": 0.30363577604293823, "learning_rate": 0.00015448578351965307, "loss": 11.6715, "step": 45392 }, { "epoch": 0.9502009545340366, "grad_norm": 0.3525676131248474, "learning_rate": 0.00015448394500539356, "loss": 11.6752, "step": 45393 }, { "epoch": 0.9502218872979988, "grad_norm": 0.41286683082580566, "learning_rate": 0.00015448210646494252, "loss": 11.6738, "step": 45394 }, { "epoch": 0.950242820061961, "grad_norm": 0.26824042201042175, "learning_rate": 0.00015448026789830084, "loss": 11.6835, "step": 45395 }, { "epoch": 0.9502637528259231, "grad_norm": 0.2358720302581787, "learning_rate": 0.0001544784293054694, "loss": 11.6865, "step": 45396 }, { "epoch": 0.9502846855898853, "grad_norm": 0.3260488510131836, "learning_rate": 0.0001544765906864491, "loss": 11.6812, "step": 45397 }, { "epoch": 0.9503056183538474, "grad_norm": 0.4489339292049408, "learning_rate": 0.0001544747520412408, "loss": 11.6714, "step": 45398 }, { "epoch": 0.9503265511178096, "grad_norm": 0.2695198655128479, "learning_rate": 0.0001544729133698454, "loss": 11.6482, "step": 45399 }, { "epoch": 0.9503474838817717, "grad_norm": 0.27330562472343445, "learning_rate": 0.00015447107467226378, "loss": 11.6687, "step": 45400 }, { "epoch": 0.9503684166457339, "grad_norm": 0.2606164813041687, "learning_rate": 0.0001544692359484968, "loss": 11.6698, "step": 45401 }, { "epoch": 0.9503893494096961, "grad_norm": 0.30853715538978577, "learning_rate": 0.00015446739719854539, "loss": 11.6467, "step": 45402 }, { "epoch": 0.9504102821736582, "grad_norm": 0.295926034450531, "learning_rate": 0.00015446555842241045, "loss": 11.6771, "step": 45403 }, { "epoch": 0.9504312149376204, "grad_norm": 0.35600966215133667, "learning_rate": 0.00015446371962009274, "loss": 11.6833, "step": 45404 }, { "epoch": 0.9504521477015825, "grad_norm": 0.35890641808509827, "learning_rate": 0.0001544618807915933, "loss": 11.6725, "step": 45405 }, { "epoch": 0.9504730804655447, "grad_norm": 0.32030025124549866, "learning_rate": 0.0001544600419369129, "loss": 11.6576, "step": 45406 }, { "epoch": 0.9504940132295068, "grad_norm": 0.27309057116508484, "learning_rate": 0.00015445820305605247, "loss": 11.6772, "step": 45407 }, { "epoch": 0.950514945993469, "grad_norm": 0.4449009597301483, "learning_rate": 0.00015445636414901292, "loss": 11.6679, "step": 45408 }, { "epoch": 0.9505358787574312, "grad_norm": 0.34381383657455444, "learning_rate": 0.00015445452521579509, "loss": 11.6654, "step": 45409 }, { "epoch": 0.9505568115213933, "grad_norm": 0.28381121158599854, "learning_rate": 0.00015445268625639987, "loss": 11.6709, "step": 45410 }, { "epoch": 0.9505777442853555, "grad_norm": 0.2774636745452881, "learning_rate": 0.00015445084727082814, "loss": 11.6671, "step": 45411 }, { "epoch": 0.9505986770493176, "grad_norm": 0.33199986815452576, "learning_rate": 0.0001544490082590808, "loss": 11.6876, "step": 45412 }, { "epoch": 0.9506196098132798, "grad_norm": 0.31698352098464966, "learning_rate": 0.00015444716922115875, "loss": 11.6894, "step": 45413 }, { "epoch": 0.950640542577242, "grad_norm": 2.079110622406006, "learning_rate": 0.00015444533015706287, "loss": 11.6474, "step": 45414 }, { "epoch": 0.950661475341204, "grad_norm": 0.39348679780960083, "learning_rate": 0.000154443491066794, "loss": 11.6904, "step": 45415 }, { "epoch": 0.9506824081051662, "grad_norm": 0.2584545910358429, "learning_rate": 0.00015444165195035306, "loss": 11.6619, "step": 45416 }, { "epoch": 0.9507033408691283, "grad_norm": 0.31376075744628906, "learning_rate": 0.00015443981280774093, "loss": 11.6823, "step": 45417 }, { "epoch": 0.9507242736330905, "grad_norm": 0.2741886079311371, "learning_rate": 0.00015443797363895846, "loss": 11.6714, "step": 45418 }, { "epoch": 0.9507452063970526, "grad_norm": 0.27131161093711853, "learning_rate": 0.0001544361344440066, "loss": 11.6662, "step": 45419 }, { "epoch": 0.9507661391610148, "grad_norm": 0.27809280157089233, "learning_rate": 0.00015443429522288618, "loss": 11.6625, "step": 45420 }, { "epoch": 0.950787071924977, "grad_norm": 0.3061544597148895, "learning_rate": 0.00015443245597559813, "loss": 11.6705, "step": 45421 }, { "epoch": 0.9508080046889391, "grad_norm": 0.29245397448539734, "learning_rate": 0.00015443061670214332, "loss": 11.6682, "step": 45422 }, { "epoch": 0.9508289374529013, "grad_norm": 0.34874972701072693, "learning_rate": 0.00015442877740252258, "loss": 11.6864, "step": 45423 }, { "epoch": 0.9508498702168634, "grad_norm": 0.35682615637779236, "learning_rate": 0.00015442693807673686, "loss": 11.6849, "step": 45424 }, { "epoch": 0.9508708029808256, "grad_norm": 0.27244794368743896, "learning_rate": 0.000154425098724787, "loss": 11.663, "step": 45425 }, { "epoch": 0.9508917357447877, "grad_norm": 0.3232746422290802, "learning_rate": 0.0001544232593466739, "loss": 11.6754, "step": 45426 }, { "epoch": 0.9509126685087499, "grad_norm": 0.26041314005851746, "learning_rate": 0.00015442141994239847, "loss": 11.668, "step": 45427 }, { "epoch": 0.9509336012727121, "grad_norm": 0.3637191951274872, "learning_rate": 0.00015441958051196157, "loss": 11.6836, "step": 45428 }, { "epoch": 0.9509545340366742, "grad_norm": 0.44596067070961, "learning_rate": 0.00015441774105536408, "loss": 11.6857, "step": 45429 }, { "epoch": 0.9509754668006364, "grad_norm": 0.3975149691104889, "learning_rate": 0.0001544159015726069, "loss": 11.6975, "step": 45430 }, { "epoch": 0.9509963995645985, "grad_norm": 0.3347132205963135, "learning_rate": 0.0001544140620636909, "loss": 11.6832, "step": 45431 }, { "epoch": 0.9510173323285607, "grad_norm": 0.312200129032135, "learning_rate": 0.000154412222528617, "loss": 11.6763, "step": 45432 }, { "epoch": 0.9510382650925229, "grad_norm": 0.24553990364074707, "learning_rate": 0.000154410382967386, "loss": 11.6625, "step": 45433 }, { "epoch": 0.951059197856485, "grad_norm": 0.32894015312194824, "learning_rate": 0.00015440854337999885, "loss": 11.6593, "step": 45434 }, { "epoch": 0.9510801306204472, "grad_norm": 0.2810359299182892, "learning_rate": 0.00015440670376645646, "loss": 11.6752, "step": 45435 }, { "epoch": 0.9511010633844093, "grad_norm": 0.3257293105125427, "learning_rate": 0.00015440486412675964, "loss": 11.6639, "step": 45436 }, { "epoch": 0.9511219961483715, "grad_norm": 0.33148786425590515, "learning_rate": 0.00015440302446090932, "loss": 11.6577, "step": 45437 }, { "epoch": 0.9511429289123335, "grad_norm": 0.38155752420425415, "learning_rate": 0.0001544011847689064, "loss": 11.6558, "step": 45438 }, { "epoch": 0.9511638616762957, "grad_norm": 0.4188504219055176, "learning_rate": 0.00015439934505075172, "loss": 11.6567, "step": 45439 }, { "epoch": 0.9511847944402579, "grad_norm": 0.35769563913345337, "learning_rate": 0.00015439750530644617, "loss": 11.6637, "step": 45440 }, { "epoch": 0.95120572720422, "grad_norm": 0.33533385396003723, "learning_rate": 0.00015439566553599065, "loss": 11.6652, "step": 45441 }, { "epoch": 0.9512266599681822, "grad_norm": 0.2782195508480072, "learning_rate": 0.00015439382573938609, "loss": 11.667, "step": 45442 }, { "epoch": 0.9512475927321443, "grad_norm": 0.30862969160079956, "learning_rate": 0.00015439198591663328, "loss": 11.6587, "step": 45443 }, { "epoch": 0.9512685254961065, "grad_norm": 0.2921276390552521, "learning_rate": 0.00015439014606773318, "loss": 11.6535, "step": 45444 }, { "epoch": 0.9512894582600686, "grad_norm": 0.3051919937133789, "learning_rate": 0.00015438830619268662, "loss": 11.6852, "step": 45445 }, { "epoch": 0.9513103910240308, "grad_norm": 0.35456833243370056, "learning_rate": 0.00015438646629149455, "loss": 11.6494, "step": 45446 }, { "epoch": 0.951331323787993, "grad_norm": 0.33089113235473633, "learning_rate": 0.00015438462636415778, "loss": 11.681, "step": 45447 }, { "epoch": 0.9513522565519551, "grad_norm": 0.323009729385376, "learning_rate": 0.00015438278641067722, "loss": 11.6678, "step": 45448 }, { "epoch": 0.9513731893159173, "grad_norm": 0.3425597548484802, "learning_rate": 0.0001543809464310538, "loss": 11.6638, "step": 45449 }, { "epoch": 0.9513941220798794, "grad_norm": 0.4137619435787201, "learning_rate": 0.00015437910642528838, "loss": 11.6866, "step": 45450 }, { "epoch": 0.9514150548438416, "grad_norm": 0.25879228115081787, "learning_rate": 0.0001543772663933818, "loss": 11.6743, "step": 45451 }, { "epoch": 0.9514359876078037, "grad_norm": 0.3066892623901367, "learning_rate": 0.00015437542633533498, "loss": 11.6761, "step": 45452 }, { "epoch": 0.9514569203717659, "grad_norm": 0.3231036961078644, "learning_rate": 0.0001543735862511488, "loss": 11.67, "step": 45453 }, { "epoch": 0.9514778531357281, "grad_norm": 0.37690457701683044, "learning_rate": 0.00015437174614082416, "loss": 11.6517, "step": 45454 }, { "epoch": 0.9514987858996902, "grad_norm": 0.3171928822994232, "learning_rate": 0.00015436990600436193, "loss": 11.6634, "step": 45455 }, { "epoch": 0.9515197186636524, "grad_norm": 0.30223771929740906, "learning_rate": 0.000154368065841763, "loss": 11.6657, "step": 45456 }, { "epoch": 0.9515406514276145, "grad_norm": 0.35909000039100647, "learning_rate": 0.00015436622565302826, "loss": 11.6675, "step": 45457 }, { "epoch": 0.9515615841915767, "grad_norm": 0.3378402888774872, "learning_rate": 0.00015436438543815856, "loss": 11.6672, "step": 45458 }, { "epoch": 0.9515825169555389, "grad_norm": 0.2539981007575989, "learning_rate": 0.00015436254519715482, "loss": 11.6672, "step": 45459 }, { "epoch": 0.951603449719501, "grad_norm": 0.3811318576335907, "learning_rate": 0.00015436070493001793, "loss": 11.6782, "step": 45460 }, { "epoch": 0.9516243824834631, "grad_norm": 0.29472100734710693, "learning_rate": 0.00015435886463674873, "loss": 11.6706, "step": 45461 }, { "epoch": 0.9516453152474252, "grad_norm": 0.3246611952781677, "learning_rate": 0.00015435702431734817, "loss": 11.6544, "step": 45462 }, { "epoch": 0.9516662480113874, "grad_norm": 0.26488226652145386, "learning_rate": 0.00015435518397181706, "loss": 11.6616, "step": 45463 }, { "epoch": 0.9516871807753495, "grad_norm": 0.36483505368232727, "learning_rate": 0.0001543533436001564, "loss": 11.6705, "step": 45464 }, { "epoch": 0.9517081135393117, "grad_norm": 0.2859548330307007, "learning_rate": 0.0001543515032023669, "loss": 11.6813, "step": 45465 }, { "epoch": 0.9517290463032739, "grad_norm": 0.29503321647644043, "learning_rate": 0.00015434966277844963, "loss": 11.6859, "step": 45466 }, { "epoch": 0.951749979067236, "grad_norm": 0.3436427712440491, "learning_rate": 0.0001543478223284053, "loss": 11.6573, "step": 45467 }, { "epoch": 0.9517709118311982, "grad_norm": 0.2913805842399597, "learning_rate": 0.00015434598185223495, "loss": 11.6755, "step": 45468 }, { "epoch": 0.9517918445951603, "grad_norm": 0.2795902192592621, "learning_rate": 0.00015434414134993937, "loss": 11.6704, "step": 45469 }, { "epoch": 0.9518127773591225, "grad_norm": 0.27633386850357056, "learning_rate": 0.0001543423008215195, "loss": 11.6616, "step": 45470 }, { "epoch": 0.9518337101230846, "grad_norm": 0.2874174416065216, "learning_rate": 0.00015434046026697616, "loss": 11.6653, "step": 45471 }, { "epoch": 0.9518546428870468, "grad_norm": 0.3000246584415436, "learning_rate": 0.00015433861968631028, "loss": 11.6608, "step": 45472 }, { "epoch": 0.951875575651009, "grad_norm": 0.27452850341796875, "learning_rate": 0.00015433677907952274, "loss": 11.6818, "step": 45473 }, { "epoch": 0.9518965084149711, "grad_norm": 0.31451094150543213, "learning_rate": 0.00015433493844661445, "loss": 11.6709, "step": 45474 }, { "epoch": 0.9519174411789333, "grad_norm": 0.2857160270214081, "learning_rate": 0.00015433309778758623, "loss": 11.6625, "step": 45475 }, { "epoch": 0.9519383739428954, "grad_norm": 0.27020812034606934, "learning_rate": 0.00015433125710243904, "loss": 11.6583, "step": 45476 }, { "epoch": 0.9519593067068576, "grad_norm": 0.3136729598045349, "learning_rate": 0.00015432941639117366, "loss": 11.6786, "step": 45477 }, { "epoch": 0.9519802394708198, "grad_norm": 0.255382776260376, "learning_rate": 0.00015432757565379108, "loss": 11.6743, "step": 45478 }, { "epoch": 0.9520011722347819, "grad_norm": 0.26934805512428284, "learning_rate": 0.00015432573489029214, "loss": 11.6707, "step": 45479 }, { "epoch": 0.9520221049987441, "grad_norm": 0.35448405146598816, "learning_rate": 0.00015432389410067774, "loss": 11.6781, "step": 45480 }, { "epoch": 0.9520430377627062, "grad_norm": 0.37052610516548157, "learning_rate": 0.00015432205328494875, "loss": 11.6801, "step": 45481 }, { "epoch": 0.9520639705266684, "grad_norm": 0.37034159898757935, "learning_rate": 0.00015432021244310605, "loss": 11.66, "step": 45482 }, { "epoch": 0.9520849032906304, "grad_norm": 0.37065747380256653, "learning_rate": 0.00015431837157515055, "loss": 11.6562, "step": 45483 }, { "epoch": 0.9521058360545926, "grad_norm": 0.36219140887260437, "learning_rate": 0.0001543165306810831, "loss": 11.6832, "step": 45484 }, { "epoch": 0.9521267688185548, "grad_norm": 0.47474777698516846, "learning_rate": 0.00015431468976090463, "loss": 11.672, "step": 45485 }, { "epoch": 0.9521477015825169, "grad_norm": 0.26535195112228394, "learning_rate": 0.00015431284881461602, "loss": 11.6819, "step": 45486 }, { "epoch": 0.9521686343464791, "grad_norm": 0.327974796295166, "learning_rate": 0.00015431100784221808, "loss": 11.6727, "step": 45487 }, { "epoch": 0.9521895671104412, "grad_norm": 0.28819766640663147, "learning_rate": 0.00015430916684371182, "loss": 11.6683, "step": 45488 }, { "epoch": 0.9522104998744034, "grad_norm": 0.30669042468070984, "learning_rate": 0.000154307325819098, "loss": 11.6559, "step": 45489 }, { "epoch": 0.9522314326383655, "grad_norm": 0.4112478792667389, "learning_rate": 0.00015430548476837755, "loss": 11.6755, "step": 45490 }, { "epoch": 0.9522523654023277, "grad_norm": 0.33888962864875793, "learning_rate": 0.0001543036436915514, "loss": 11.6515, "step": 45491 }, { "epoch": 0.9522732981662899, "grad_norm": 0.30490320920944214, "learning_rate": 0.0001543018025886204, "loss": 11.671, "step": 45492 }, { "epoch": 0.952294230930252, "grad_norm": 0.35584864020347595, "learning_rate": 0.00015429996145958542, "loss": 11.6628, "step": 45493 }, { "epoch": 0.9523151636942142, "grad_norm": 0.2996394634246826, "learning_rate": 0.00015429812030444735, "loss": 11.6604, "step": 45494 }, { "epoch": 0.9523360964581763, "grad_norm": 0.36425137519836426, "learning_rate": 0.00015429627912320708, "loss": 11.6842, "step": 45495 }, { "epoch": 0.9523570292221385, "grad_norm": 0.35794126987457275, "learning_rate": 0.00015429443791586554, "loss": 11.6711, "step": 45496 }, { "epoch": 0.9523779619861007, "grad_norm": 0.3516862094402313, "learning_rate": 0.00015429259668242354, "loss": 11.6547, "step": 45497 }, { "epoch": 0.9523988947500628, "grad_norm": 0.3713262677192688, "learning_rate": 0.00015429075542288203, "loss": 11.672, "step": 45498 }, { "epoch": 0.952419827514025, "grad_norm": 0.30617788434028625, "learning_rate": 0.00015428891413724185, "loss": 11.6761, "step": 45499 }, { "epoch": 0.9524407602779871, "grad_norm": 0.26814866065979004, "learning_rate": 0.0001542870728255039, "loss": 11.6659, "step": 45500 }, { "epoch": 0.9524616930419493, "grad_norm": 0.3156832456588745, "learning_rate": 0.00015428523148766906, "loss": 11.6723, "step": 45501 }, { "epoch": 0.9524826258059114, "grad_norm": 0.33512547612190247, "learning_rate": 0.00015428339012373826, "loss": 11.6608, "step": 45502 }, { "epoch": 0.9525035585698736, "grad_norm": 0.3084980547428131, "learning_rate": 0.0001542815487337123, "loss": 11.6796, "step": 45503 }, { "epoch": 0.9525244913338358, "grad_norm": 0.41205865144729614, "learning_rate": 0.00015427970731759212, "loss": 11.6906, "step": 45504 }, { "epoch": 0.9525454240977979, "grad_norm": 0.4771546423435211, "learning_rate": 0.0001542778658753786, "loss": 11.6565, "step": 45505 }, { "epoch": 0.95256635686176, "grad_norm": 0.3440524935722351, "learning_rate": 0.00015427602440707262, "loss": 11.6749, "step": 45506 }, { "epoch": 0.9525872896257221, "grad_norm": 0.33640745282173157, "learning_rate": 0.00015427418291267509, "loss": 11.6813, "step": 45507 }, { "epoch": 0.9526082223896843, "grad_norm": 0.3182092308998108, "learning_rate": 0.00015427234139218688, "loss": 11.6779, "step": 45508 }, { "epoch": 0.9526291551536464, "grad_norm": 0.30089786648750305, "learning_rate": 0.00015427049984560883, "loss": 11.674, "step": 45509 }, { "epoch": 0.9526500879176086, "grad_norm": 0.3462368845939636, "learning_rate": 0.00015426865827294187, "loss": 11.6712, "step": 45510 }, { "epoch": 0.9526710206815708, "grad_norm": 0.29914674162864685, "learning_rate": 0.00015426681667418687, "loss": 11.6646, "step": 45511 }, { "epoch": 0.9526919534455329, "grad_norm": 0.36077284812927246, "learning_rate": 0.00015426497504934474, "loss": 11.6641, "step": 45512 }, { "epoch": 0.9527128862094951, "grad_norm": 0.35823673009872437, "learning_rate": 0.00015426313339841636, "loss": 11.6727, "step": 45513 }, { "epoch": 0.9527338189734572, "grad_norm": 0.2703952193260193, "learning_rate": 0.0001542612917214026, "loss": 11.6736, "step": 45514 }, { "epoch": 0.9527547517374194, "grad_norm": 0.2949756979942322, "learning_rate": 0.00015425945001830435, "loss": 11.6722, "step": 45515 }, { "epoch": 0.9527756845013816, "grad_norm": 0.34438785910606384, "learning_rate": 0.00015425760828912248, "loss": 11.6681, "step": 45516 }, { "epoch": 0.9527966172653437, "grad_norm": 0.2995656728744507, "learning_rate": 0.0001542557665338579, "loss": 11.6556, "step": 45517 }, { "epoch": 0.9528175500293059, "grad_norm": 0.28061026334762573, "learning_rate": 0.0001542539247525115, "loss": 11.6539, "step": 45518 }, { "epoch": 0.952838482793268, "grad_norm": 0.3816194534301758, "learning_rate": 0.0001542520829450841, "loss": 11.6613, "step": 45519 }, { "epoch": 0.9528594155572302, "grad_norm": 0.2897516191005707, "learning_rate": 0.0001542502411115767, "loss": 11.6638, "step": 45520 }, { "epoch": 0.9528803483211923, "grad_norm": 0.38036391139030457, "learning_rate": 0.0001542483992519901, "loss": 11.6824, "step": 45521 }, { "epoch": 0.9529012810851545, "grad_norm": 0.29861143231391907, "learning_rate": 0.00015424655736632519, "loss": 11.6734, "step": 45522 }, { "epoch": 0.9529222138491167, "grad_norm": 0.3058973252773285, "learning_rate": 0.0001542447154545829, "loss": 11.6724, "step": 45523 }, { "epoch": 0.9529431466130788, "grad_norm": 0.29588553309440613, "learning_rate": 0.00015424287351676404, "loss": 11.6657, "step": 45524 }, { "epoch": 0.952964079377041, "grad_norm": 0.30260932445526123, "learning_rate": 0.00015424103155286959, "loss": 11.6654, "step": 45525 }, { "epoch": 0.9529850121410031, "grad_norm": 0.3661139905452728, "learning_rate": 0.00015423918956290037, "loss": 11.6752, "step": 45526 }, { "epoch": 0.9530059449049653, "grad_norm": 0.30492427945137024, "learning_rate": 0.00015423734754685728, "loss": 11.6724, "step": 45527 }, { "epoch": 0.9530268776689274, "grad_norm": 0.2953735291957855, "learning_rate": 0.00015423550550474125, "loss": 11.6643, "step": 45528 }, { "epoch": 0.9530478104328896, "grad_norm": 0.2834728956222534, "learning_rate": 0.00015423366343655308, "loss": 11.6733, "step": 45529 }, { "epoch": 0.9530687431968518, "grad_norm": 0.41362521052360535, "learning_rate": 0.00015423182134229372, "loss": 11.6776, "step": 45530 }, { "epoch": 0.9530896759608138, "grad_norm": 0.30024266242980957, "learning_rate": 0.00015422997922196402, "loss": 11.6571, "step": 45531 }, { "epoch": 0.953110608724776, "grad_norm": 0.35301652550697327, "learning_rate": 0.00015422813707556492, "loss": 11.6789, "step": 45532 }, { "epoch": 0.9531315414887381, "grad_norm": 0.2959400713443756, "learning_rate": 0.0001542262949030972, "loss": 11.661, "step": 45533 }, { "epoch": 0.9531524742527003, "grad_norm": 0.3790542185306549, "learning_rate": 0.00015422445270456189, "loss": 11.6782, "step": 45534 }, { "epoch": 0.9531734070166625, "grad_norm": 0.2792775332927704, "learning_rate": 0.00015422261047995977, "loss": 11.6726, "step": 45535 }, { "epoch": 0.9531943397806246, "grad_norm": 0.34424662590026855, "learning_rate": 0.00015422076822929173, "loss": 11.6632, "step": 45536 }, { "epoch": 0.9532152725445868, "grad_norm": 0.2878519594669342, "learning_rate": 0.0001542189259525587, "loss": 11.6748, "step": 45537 }, { "epoch": 0.9532362053085489, "grad_norm": 0.32877546548843384, "learning_rate": 0.00015421708364976155, "loss": 11.65, "step": 45538 }, { "epoch": 0.9532571380725111, "grad_norm": 0.34515365958213806, "learning_rate": 0.00015421524132090117, "loss": 11.661, "step": 45539 }, { "epoch": 0.9532780708364732, "grad_norm": 0.2942392826080322, "learning_rate": 0.00015421339896597841, "loss": 11.6711, "step": 45540 }, { "epoch": 0.9532990036004354, "grad_norm": 0.30265161395072937, "learning_rate": 0.0001542115565849942, "loss": 11.6694, "step": 45541 }, { "epoch": 0.9533199363643976, "grad_norm": 0.31448912620544434, "learning_rate": 0.0001542097141779494, "loss": 11.6672, "step": 45542 }, { "epoch": 0.9533408691283597, "grad_norm": 0.320099413394928, "learning_rate": 0.0001542078717448449, "loss": 11.6608, "step": 45543 }, { "epoch": 0.9533618018923219, "grad_norm": 0.35538923740386963, "learning_rate": 0.00015420602928568162, "loss": 11.682, "step": 45544 }, { "epoch": 0.953382734656284, "grad_norm": 0.27441203594207764, "learning_rate": 0.0001542041868004604, "loss": 11.6608, "step": 45545 }, { "epoch": 0.9534036674202462, "grad_norm": 0.3517153263092041, "learning_rate": 0.00015420234428918214, "loss": 11.6559, "step": 45546 }, { "epoch": 0.9534246001842083, "grad_norm": 0.21993288397789001, "learning_rate": 0.0001542005017518477, "loss": 11.6615, "step": 45547 }, { "epoch": 0.9534455329481705, "grad_norm": 0.3095718324184418, "learning_rate": 0.00015419865918845803, "loss": 11.6713, "step": 45548 }, { "epoch": 0.9534664657121327, "grad_norm": 0.36876606941223145, "learning_rate": 0.00015419681659901394, "loss": 11.669, "step": 45549 }, { "epoch": 0.9534873984760948, "grad_norm": 0.32387325167655945, "learning_rate": 0.0001541949739835164, "loss": 11.6562, "step": 45550 }, { "epoch": 0.953508331240057, "grad_norm": 0.3189390003681183, "learning_rate": 0.00015419313134196623, "loss": 11.6556, "step": 45551 }, { "epoch": 0.953529264004019, "grad_norm": 0.2734440565109253, "learning_rate": 0.00015419128867436433, "loss": 11.6619, "step": 45552 }, { "epoch": 0.9535501967679813, "grad_norm": 0.3183128237724304, "learning_rate": 0.00015418944598071159, "loss": 11.6645, "step": 45553 }, { "epoch": 0.9535711295319435, "grad_norm": 0.33292844891548157, "learning_rate": 0.00015418760326100893, "loss": 11.6717, "step": 45554 }, { "epoch": 0.9535920622959055, "grad_norm": 0.2873111963272095, "learning_rate": 0.00015418576051525715, "loss": 11.6602, "step": 45555 }, { "epoch": 0.9536129950598677, "grad_norm": 0.38366368412971497, "learning_rate": 0.0001541839177434572, "loss": 11.6732, "step": 45556 }, { "epoch": 0.9536339278238298, "grad_norm": 0.31478017568588257, "learning_rate": 0.00015418207494561, "loss": 11.6425, "step": 45557 }, { "epoch": 0.953654860587792, "grad_norm": 0.3289630115032196, "learning_rate": 0.00015418023212171632, "loss": 11.6465, "step": 45558 }, { "epoch": 0.9536757933517541, "grad_norm": 0.4264657497406006, "learning_rate": 0.00015417838927177718, "loss": 11.6801, "step": 45559 }, { "epoch": 0.9536967261157163, "grad_norm": 0.32910725474357605, "learning_rate": 0.00015417654639579337, "loss": 11.68, "step": 45560 }, { "epoch": 0.9537176588796785, "grad_norm": 0.42459550499916077, "learning_rate": 0.00015417470349376582, "loss": 11.6594, "step": 45561 }, { "epoch": 0.9537385916436406, "grad_norm": 0.2832876741886139, "learning_rate": 0.0001541728605656954, "loss": 11.6535, "step": 45562 }, { "epoch": 0.9537595244076028, "grad_norm": 0.2859860062599182, "learning_rate": 0.00015417101761158302, "loss": 11.652, "step": 45563 }, { "epoch": 0.9537804571715649, "grad_norm": 0.2564634084701538, "learning_rate": 0.0001541691746314295, "loss": 11.6936, "step": 45564 }, { "epoch": 0.9538013899355271, "grad_norm": 0.29839465022087097, "learning_rate": 0.00015416733162523582, "loss": 11.6617, "step": 45565 }, { "epoch": 0.9538223226994892, "grad_norm": 0.32012563943862915, "learning_rate": 0.0001541654885930028, "loss": 11.6654, "step": 45566 }, { "epoch": 0.9538432554634514, "grad_norm": 0.32680657505989075, "learning_rate": 0.00015416364553473132, "loss": 11.6735, "step": 45567 }, { "epoch": 0.9538641882274136, "grad_norm": 0.31038859486579895, "learning_rate": 0.00015416180245042234, "loss": 11.6705, "step": 45568 }, { "epoch": 0.9538851209913757, "grad_norm": 0.3923815190792084, "learning_rate": 0.00015415995934007666, "loss": 11.6835, "step": 45569 }, { "epoch": 0.9539060537553379, "grad_norm": 0.30407118797302246, "learning_rate": 0.00015415811620369524, "loss": 11.6635, "step": 45570 }, { "epoch": 0.9539269865193, "grad_norm": 0.3067784309387207, "learning_rate": 0.00015415627304127887, "loss": 11.6588, "step": 45571 }, { "epoch": 0.9539479192832622, "grad_norm": 0.2997535467147827, "learning_rate": 0.00015415442985282853, "loss": 11.6563, "step": 45572 }, { "epoch": 0.9539688520472244, "grad_norm": 0.3262852132320404, "learning_rate": 0.00015415258663834504, "loss": 11.6549, "step": 45573 }, { "epoch": 0.9539897848111865, "grad_norm": 0.4307679831981659, "learning_rate": 0.00015415074339782936, "loss": 11.6844, "step": 45574 }, { "epoch": 0.9540107175751487, "grad_norm": 0.31579211354255676, "learning_rate": 0.0001541489001312823, "loss": 11.6815, "step": 45575 }, { "epoch": 0.9540316503391107, "grad_norm": 0.2883415222167969, "learning_rate": 0.00015414705683870482, "loss": 11.655, "step": 45576 }, { "epoch": 0.954052583103073, "grad_norm": 0.3844873309135437, "learning_rate": 0.00015414521352009772, "loss": 11.6669, "step": 45577 }, { "epoch": 0.954073515867035, "grad_norm": 0.40231066942214966, "learning_rate": 0.00015414337017546193, "loss": 11.6585, "step": 45578 }, { "epoch": 0.9540944486309972, "grad_norm": 0.328931599855423, "learning_rate": 0.00015414152680479835, "loss": 11.6513, "step": 45579 }, { "epoch": 0.9541153813949594, "grad_norm": 0.2449324131011963, "learning_rate": 0.00015413968340810788, "loss": 11.6693, "step": 45580 }, { "epoch": 0.9541363141589215, "grad_norm": 0.3626157343387604, "learning_rate": 0.00015413783998539135, "loss": 11.6659, "step": 45581 }, { "epoch": 0.9541572469228837, "grad_norm": 0.24562205374240875, "learning_rate": 0.0001541359965366497, "loss": 11.6665, "step": 45582 }, { "epoch": 0.9541781796868458, "grad_norm": 0.4995974898338318, "learning_rate": 0.00015413415306188375, "loss": 11.687, "step": 45583 }, { "epoch": 0.954199112450808, "grad_norm": 0.35870108008384705, "learning_rate": 0.00015413230956109444, "loss": 11.6799, "step": 45584 }, { "epoch": 0.9542200452147701, "grad_norm": 0.34650149941444397, "learning_rate": 0.00015413046603428266, "loss": 11.6585, "step": 45585 }, { "epoch": 0.9542409779787323, "grad_norm": 0.3692728579044342, "learning_rate": 0.0001541286224814493, "loss": 11.673, "step": 45586 }, { "epoch": 0.9542619107426945, "grad_norm": 0.370113730430603, "learning_rate": 0.00015412677890259518, "loss": 11.6689, "step": 45587 }, { "epoch": 0.9542828435066566, "grad_norm": 0.32119178771972656, "learning_rate": 0.00015412493529772125, "loss": 11.6514, "step": 45588 }, { "epoch": 0.9543037762706188, "grad_norm": 0.36065512895584106, "learning_rate": 0.00015412309166682839, "loss": 11.6857, "step": 45589 }, { "epoch": 0.9543247090345809, "grad_norm": 0.32607215642929077, "learning_rate": 0.00015412124800991746, "loss": 11.6929, "step": 45590 }, { "epoch": 0.9543456417985431, "grad_norm": 0.2935864329338074, "learning_rate": 0.0001541194043269894, "loss": 11.6564, "step": 45591 }, { "epoch": 0.9543665745625053, "grad_norm": 0.4046070873737335, "learning_rate": 0.000154117560618045, "loss": 11.6793, "step": 45592 }, { "epoch": 0.9543875073264674, "grad_norm": 0.3216933608055115, "learning_rate": 0.00015411571688308523, "loss": 11.6554, "step": 45593 }, { "epoch": 0.9544084400904296, "grad_norm": 0.36879459023475647, "learning_rate": 0.00015411387312211095, "loss": 11.6634, "step": 45594 }, { "epoch": 0.9544293728543917, "grad_norm": 0.3768177032470703, "learning_rate": 0.00015411202933512307, "loss": 11.6809, "step": 45595 }, { "epoch": 0.9544503056183539, "grad_norm": 0.3846767246723175, "learning_rate": 0.00015411018552212243, "loss": 11.6876, "step": 45596 }, { "epoch": 0.954471238382316, "grad_norm": 0.41426634788513184, "learning_rate": 0.00015410834168310992, "loss": 11.6615, "step": 45597 }, { "epoch": 0.9544921711462782, "grad_norm": 0.2766231596469879, "learning_rate": 0.0001541064978180865, "loss": 11.6565, "step": 45598 }, { "epoch": 0.9545131039102404, "grad_norm": 0.36407479643821716, "learning_rate": 0.00015410465392705293, "loss": 11.6651, "step": 45599 }, { "epoch": 0.9545340366742024, "grad_norm": 0.24403107166290283, "learning_rate": 0.00015410281001001022, "loss": 11.6595, "step": 45600 }, { "epoch": 0.9545549694381646, "grad_norm": 0.28352126479148865, "learning_rate": 0.00015410096606695918, "loss": 11.6638, "step": 45601 }, { "epoch": 0.9545759022021267, "grad_norm": 0.2707439064979553, "learning_rate": 0.00015409912209790074, "loss": 11.6748, "step": 45602 }, { "epoch": 0.9545968349660889, "grad_norm": 0.35326382517814636, "learning_rate": 0.00015409727810283578, "loss": 11.6688, "step": 45603 }, { "epoch": 0.954617767730051, "grad_norm": 0.2990148961544037, "learning_rate": 0.00015409543408176514, "loss": 11.6694, "step": 45604 }, { "epoch": 0.9546387004940132, "grad_norm": 0.3705885410308838, "learning_rate": 0.00015409359003468976, "loss": 11.6577, "step": 45605 }, { "epoch": 0.9546596332579754, "grad_norm": 0.29206159710884094, "learning_rate": 0.00015409174596161051, "loss": 11.6611, "step": 45606 }, { "epoch": 0.9546805660219375, "grad_norm": 0.3100696802139282, "learning_rate": 0.00015408990186252828, "loss": 11.6754, "step": 45607 }, { "epoch": 0.9547014987858997, "grad_norm": 0.30035799741744995, "learning_rate": 0.00015408805773744393, "loss": 11.6677, "step": 45608 }, { "epoch": 0.9547224315498618, "grad_norm": 0.2769358158111572, "learning_rate": 0.00015408621358635837, "loss": 11.6595, "step": 45609 }, { "epoch": 0.954743364313824, "grad_norm": 0.25363776087760925, "learning_rate": 0.00015408436940927248, "loss": 11.6738, "step": 45610 }, { "epoch": 0.9547642970777862, "grad_norm": 0.29150786995887756, "learning_rate": 0.00015408252520618718, "loss": 11.6483, "step": 45611 }, { "epoch": 0.9547852298417483, "grad_norm": 0.24571675062179565, "learning_rate": 0.00015408068097710328, "loss": 11.6761, "step": 45612 }, { "epoch": 0.9548061626057105, "grad_norm": 0.313639372587204, "learning_rate": 0.00015407883672202176, "loss": 11.6711, "step": 45613 }, { "epoch": 0.9548270953696726, "grad_norm": 0.24359425902366638, "learning_rate": 0.0001540769924409434, "loss": 11.6665, "step": 45614 }, { "epoch": 0.9548480281336348, "grad_norm": 0.3314143121242523, "learning_rate": 0.00015407514813386918, "loss": 11.6646, "step": 45615 }, { "epoch": 0.9548689608975969, "grad_norm": 0.3414566218852997, "learning_rate": 0.0001540733038008, "loss": 11.6565, "step": 45616 }, { "epoch": 0.9548898936615591, "grad_norm": 0.33250755071640015, "learning_rate": 0.00015407145944173662, "loss": 11.6635, "step": 45617 }, { "epoch": 0.9549108264255213, "grad_norm": 0.3217605650424957, "learning_rate": 0.00015406961505668004, "loss": 11.6636, "step": 45618 }, { "epoch": 0.9549317591894834, "grad_norm": 0.2993682026863098, "learning_rate": 0.0001540677706456311, "loss": 11.6674, "step": 45619 }, { "epoch": 0.9549526919534456, "grad_norm": 0.2688712179660797, "learning_rate": 0.00015406592620859075, "loss": 11.6535, "step": 45620 }, { "epoch": 0.9549736247174077, "grad_norm": 0.3001573085784912, "learning_rate": 0.00015406408174555976, "loss": 11.6592, "step": 45621 }, { "epoch": 0.9549945574813699, "grad_norm": 0.3109448552131653, "learning_rate": 0.0001540622372565391, "loss": 11.6699, "step": 45622 }, { "epoch": 0.9550154902453319, "grad_norm": 0.3317570984363556, "learning_rate": 0.00015406039274152964, "loss": 11.6847, "step": 45623 }, { "epoch": 0.9550364230092941, "grad_norm": 0.25760766863822937, "learning_rate": 0.0001540585482005323, "loss": 11.6645, "step": 45624 }, { "epoch": 0.9550573557732563, "grad_norm": 0.4189164340496063, "learning_rate": 0.00015405670363354793, "loss": 11.6712, "step": 45625 }, { "epoch": 0.9550782885372184, "grad_norm": 0.24167434871196747, "learning_rate": 0.00015405485904057737, "loss": 11.6522, "step": 45626 }, { "epoch": 0.9550992213011806, "grad_norm": 0.348384827375412, "learning_rate": 0.00015405301442162158, "loss": 11.6813, "step": 45627 }, { "epoch": 0.9551201540651427, "grad_norm": 0.35576537251472473, "learning_rate": 0.00015405116977668145, "loss": 11.6767, "step": 45628 }, { "epoch": 0.9551410868291049, "grad_norm": 0.37924373149871826, "learning_rate": 0.00015404932510575781, "loss": 11.6878, "step": 45629 }, { "epoch": 0.955162019593067, "grad_norm": 0.34535956382751465, "learning_rate": 0.00015404748040885157, "loss": 11.6566, "step": 45630 }, { "epoch": 0.9551829523570292, "grad_norm": 0.5158879160881042, "learning_rate": 0.00015404563568596365, "loss": 11.6814, "step": 45631 }, { "epoch": 0.9552038851209914, "grad_norm": 0.34726083278656006, "learning_rate": 0.00015404379093709487, "loss": 11.6647, "step": 45632 }, { "epoch": 0.9552248178849535, "grad_norm": 0.27846992015838623, "learning_rate": 0.00015404194616224618, "loss": 11.6536, "step": 45633 }, { "epoch": 0.9552457506489157, "grad_norm": 0.35803401470184326, "learning_rate": 0.00015404010136141845, "loss": 11.6618, "step": 45634 }, { "epoch": 0.9552666834128778, "grad_norm": 0.3927311897277832, "learning_rate": 0.00015403825653461257, "loss": 11.6757, "step": 45635 }, { "epoch": 0.95528761617684, "grad_norm": 0.27488553524017334, "learning_rate": 0.0001540364116818294, "loss": 11.6771, "step": 45636 }, { "epoch": 0.9553085489408022, "grad_norm": 0.29298046231269836, "learning_rate": 0.00015403456680306983, "loss": 11.668, "step": 45637 }, { "epoch": 0.9553294817047643, "grad_norm": 0.37023118138313293, "learning_rate": 0.00015403272189833478, "loss": 11.6789, "step": 45638 }, { "epoch": 0.9553504144687265, "grad_norm": 0.3178916871547699, "learning_rate": 0.0001540308769676251, "loss": 11.68, "step": 45639 }, { "epoch": 0.9553713472326886, "grad_norm": 0.329144686460495, "learning_rate": 0.00015402903201094175, "loss": 11.6615, "step": 45640 }, { "epoch": 0.9553922799966508, "grad_norm": 0.3361702263355255, "learning_rate": 0.00015402718702828549, "loss": 11.6683, "step": 45641 }, { "epoch": 0.9554132127606129, "grad_norm": 0.3018311560153961, "learning_rate": 0.0001540253420196573, "loss": 11.6638, "step": 45642 }, { "epoch": 0.9554341455245751, "grad_norm": 0.30408427119255066, "learning_rate": 0.00015402349698505807, "loss": 11.6728, "step": 45643 }, { "epoch": 0.9554550782885373, "grad_norm": 0.373399943113327, "learning_rate": 0.00015402165192448863, "loss": 11.6818, "step": 45644 }, { "epoch": 0.9554760110524994, "grad_norm": 0.4131720960140228, "learning_rate": 0.00015401980683794992, "loss": 11.6563, "step": 45645 }, { "epoch": 0.9554969438164616, "grad_norm": 0.30701473355293274, "learning_rate": 0.00015401796172544283, "loss": 11.6654, "step": 45646 }, { "epoch": 0.9555178765804236, "grad_norm": 0.35603705048561096, "learning_rate": 0.0001540161165869682, "loss": 11.6715, "step": 45647 }, { "epoch": 0.9555388093443858, "grad_norm": 0.3144420087337494, "learning_rate": 0.0001540142714225269, "loss": 11.666, "step": 45648 }, { "epoch": 0.9555597421083479, "grad_norm": 0.3057281970977783, "learning_rate": 0.0001540124262321199, "loss": 11.661, "step": 45649 }, { "epoch": 0.9555806748723101, "grad_norm": 0.38341981172561646, "learning_rate": 0.00015401058101574804, "loss": 11.6847, "step": 45650 }, { "epoch": 0.9556016076362723, "grad_norm": 0.3027109205722809, "learning_rate": 0.00015400873577341222, "loss": 11.6685, "step": 45651 }, { "epoch": 0.9556225404002344, "grad_norm": 0.294535368680954, "learning_rate": 0.0001540068905051133, "loss": 11.6625, "step": 45652 }, { "epoch": 0.9556434731641966, "grad_norm": 0.2976929247379303, "learning_rate": 0.0001540050452108522, "loss": 11.6766, "step": 45653 }, { "epoch": 0.9556644059281587, "grad_norm": 0.34777185320854187, "learning_rate": 0.00015400319989062976, "loss": 11.6814, "step": 45654 }, { "epoch": 0.9556853386921209, "grad_norm": 0.35383668541908264, "learning_rate": 0.00015400135454444693, "loss": 11.6792, "step": 45655 }, { "epoch": 0.9557062714560831, "grad_norm": 0.2959744334220886, "learning_rate": 0.00015399950917230457, "loss": 11.6687, "step": 45656 }, { "epoch": 0.9557272042200452, "grad_norm": 0.3333214223384857, "learning_rate": 0.00015399766377420355, "loss": 11.6554, "step": 45657 }, { "epoch": 0.9557481369840074, "grad_norm": 0.3574691414833069, "learning_rate": 0.0001539958183501448, "loss": 11.6676, "step": 45658 }, { "epoch": 0.9557690697479695, "grad_norm": 0.557198166847229, "learning_rate": 0.00015399397290012914, "loss": 11.6816, "step": 45659 }, { "epoch": 0.9557900025119317, "grad_norm": 0.315208375453949, "learning_rate": 0.0001539921274241575, "loss": 11.6686, "step": 45660 }, { "epoch": 0.9558109352758938, "grad_norm": 0.2893199920654297, "learning_rate": 0.00015399028192223077, "loss": 11.6645, "step": 45661 }, { "epoch": 0.955831868039856, "grad_norm": 0.27371102571487427, "learning_rate": 0.00015398843639434983, "loss": 11.6587, "step": 45662 }, { "epoch": 0.9558528008038182, "grad_norm": 0.4895228147506714, "learning_rate": 0.00015398659084051556, "loss": 11.6891, "step": 45663 }, { "epoch": 0.9558737335677803, "grad_norm": 0.2861308753490448, "learning_rate": 0.00015398474526072886, "loss": 11.6498, "step": 45664 }, { "epoch": 0.9558946663317425, "grad_norm": 0.2633463144302368, "learning_rate": 0.00015398289965499063, "loss": 11.6716, "step": 45665 }, { "epoch": 0.9559155990957046, "grad_norm": 0.2979396879673004, "learning_rate": 0.00015398105402330173, "loss": 11.6652, "step": 45666 }, { "epoch": 0.9559365318596668, "grad_norm": 0.42365896701812744, "learning_rate": 0.00015397920836566305, "loss": 11.6586, "step": 45667 }, { "epoch": 0.9559574646236288, "grad_norm": 0.26511576771736145, "learning_rate": 0.00015397736268207545, "loss": 11.6535, "step": 45668 }, { "epoch": 0.955978397387591, "grad_norm": 0.4423754811286926, "learning_rate": 0.0001539755169725399, "loss": 11.6406, "step": 45669 }, { "epoch": 0.9559993301515533, "grad_norm": 0.3259352445602417, "learning_rate": 0.0001539736712370572, "loss": 11.6692, "step": 45670 }, { "epoch": 0.9560202629155153, "grad_norm": 0.30287617444992065, "learning_rate": 0.0001539718254756283, "loss": 11.665, "step": 45671 }, { "epoch": 0.9560411956794775, "grad_norm": 0.264923095703125, "learning_rate": 0.00015396997968825405, "loss": 11.6696, "step": 45672 }, { "epoch": 0.9560621284434396, "grad_norm": 0.4009045958518982, "learning_rate": 0.00015396813387493535, "loss": 11.6745, "step": 45673 }, { "epoch": 0.9560830612074018, "grad_norm": 0.325876384973526, "learning_rate": 0.0001539662880356731, "loss": 11.6764, "step": 45674 }, { "epoch": 0.956103993971364, "grad_norm": 0.24729646742343903, "learning_rate": 0.00015396444217046816, "loss": 11.6643, "step": 45675 }, { "epoch": 0.9561249267353261, "grad_norm": 0.3682772219181061, "learning_rate": 0.0001539625962793214, "loss": 11.6677, "step": 45676 }, { "epoch": 0.9561458594992883, "grad_norm": 0.3575502932071686, "learning_rate": 0.0001539607503622338, "loss": 11.6745, "step": 45677 }, { "epoch": 0.9561667922632504, "grad_norm": 0.31045547127723694, "learning_rate": 0.00015395890441920616, "loss": 11.6689, "step": 45678 }, { "epoch": 0.9561877250272126, "grad_norm": 0.38782480359077454, "learning_rate": 0.0001539570584502394, "loss": 11.681, "step": 45679 }, { "epoch": 0.9562086577911747, "grad_norm": 0.24009598791599274, "learning_rate": 0.00015395521245533438, "loss": 11.662, "step": 45680 }, { "epoch": 0.9562295905551369, "grad_norm": 0.34899070858955383, "learning_rate": 0.000153953366434492, "loss": 11.6558, "step": 45681 }, { "epoch": 0.9562505233190991, "grad_norm": 0.3207831382751465, "learning_rate": 0.0001539515203877132, "loss": 11.6499, "step": 45682 }, { "epoch": 0.9562714560830612, "grad_norm": 0.28059297800064087, "learning_rate": 0.0001539496743149988, "loss": 11.6766, "step": 45683 }, { "epoch": 0.9562923888470234, "grad_norm": 0.3039357662200928, "learning_rate": 0.0001539478282163497, "loss": 11.6734, "step": 45684 }, { "epoch": 0.9563133216109855, "grad_norm": 0.5084259510040283, "learning_rate": 0.0001539459820917668, "loss": 11.6844, "step": 45685 }, { "epoch": 0.9563342543749477, "grad_norm": 0.28287452459335327, "learning_rate": 0.000153944135941251, "loss": 11.6747, "step": 45686 }, { "epoch": 0.9563551871389098, "grad_norm": 0.302324503660202, "learning_rate": 0.00015394228976480317, "loss": 11.6713, "step": 45687 }, { "epoch": 0.956376119902872, "grad_norm": 0.3034577965736389, "learning_rate": 0.00015394044356242418, "loss": 11.6597, "step": 45688 }, { "epoch": 0.9563970526668342, "grad_norm": 0.2942992150783539, "learning_rate": 0.00015393859733411498, "loss": 11.6598, "step": 45689 }, { "epoch": 0.9564179854307963, "grad_norm": 0.30058997869491577, "learning_rate": 0.00015393675107987638, "loss": 11.6504, "step": 45690 }, { "epoch": 0.9564389181947585, "grad_norm": 0.30736243724823, "learning_rate": 0.00015393490479970932, "loss": 11.6707, "step": 45691 }, { "epoch": 0.9564598509587205, "grad_norm": 0.3109930157661438, "learning_rate": 0.00015393305849361463, "loss": 11.6746, "step": 45692 }, { "epoch": 0.9564807837226827, "grad_norm": 0.31760528683662415, "learning_rate": 0.00015393121216159328, "loss": 11.6464, "step": 45693 }, { "epoch": 0.956501716486645, "grad_norm": 0.34072670340538025, "learning_rate": 0.00015392936580364613, "loss": 11.6741, "step": 45694 }, { "epoch": 0.956522649250607, "grad_norm": 0.3088885545730591, "learning_rate": 0.000153927519419774, "loss": 11.6582, "step": 45695 }, { "epoch": 0.9565435820145692, "grad_norm": 0.31195175647735596, "learning_rate": 0.00015392567300997787, "loss": 11.6541, "step": 45696 }, { "epoch": 0.9565645147785313, "grad_norm": 0.2625606954097748, "learning_rate": 0.00015392382657425857, "loss": 11.6823, "step": 45697 }, { "epoch": 0.9565854475424935, "grad_norm": 0.2835977375507355, "learning_rate": 0.000153921980112617, "loss": 11.6748, "step": 45698 }, { "epoch": 0.9566063803064556, "grad_norm": 0.44533655047416687, "learning_rate": 0.00015392013362505407, "loss": 11.6764, "step": 45699 }, { "epoch": 0.9566273130704178, "grad_norm": 0.32483628392219543, "learning_rate": 0.00015391828711157067, "loss": 11.6736, "step": 45700 }, { "epoch": 0.95664824583438, "grad_norm": 0.35138052701950073, "learning_rate": 0.00015391644057216762, "loss": 11.6703, "step": 45701 }, { "epoch": 0.9566691785983421, "grad_norm": 0.30049169063568115, "learning_rate": 0.00015391459400684589, "loss": 11.6811, "step": 45702 }, { "epoch": 0.9566901113623043, "grad_norm": 0.4251629114151001, "learning_rate": 0.00015391274741560632, "loss": 11.6814, "step": 45703 }, { "epoch": 0.9567110441262664, "grad_norm": 0.2615565061569214, "learning_rate": 0.00015391090079844982, "loss": 11.655, "step": 45704 }, { "epoch": 0.9567319768902286, "grad_norm": 0.3707275688648224, "learning_rate": 0.0001539090541553773, "loss": 11.6832, "step": 45705 }, { "epoch": 0.9567529096541907, "grad_norm": 0.28334906697273254, "learning_rate": 0.00015390720748638956, "loss": 11.6665, "step": 45706 }, { "epoch": 0.9567738424181529, "grad_norm": 0.2797450125217438, "learning_rate": 0.00015390536079148759, "loss": 11.6775, "step": 45707 }, { "epoch": 0.9567947751821151, "grad_norm": 0.24760210514068604, "learning_rate": 0.0001539035140706722, "loss": 11.6722, "step": 45708 }, { "epoch": 0.9568157079460772, "grad_norm": 0.3331829011440277, "learning_rate": 0.0001539016673239443, "loss": 11.6808, "step": 45709 }, { "epoch": 0.9568366407100394, "grad_norm": 0.29464712738990784, "learning_rate": 0.00015389982055130484, "loss": 11.6759, "step": 45710 }, { "epoch": 0.9568575734740015, "grad_norm": 0.41871532797813416, "learning_rate": 0.00015389797375275462, "loss": 11.6694, "step": 45711 }, { "epoch": 0.9568785062379637, "grad_norm": 0.27465856075286865, "learning_rate": 0.00015389612692829458, "loss": 11.6507, "step": 45712 }, { "epoch": 0.9568994390019259, "grad_norm": 0.31889671087265015, "learning_rate": 0.0001538942800779256, "loss": 11.6734, "step": 45713 }, { "epoch": 0.956920371765888, "grad_norm": 0.25460201501846313, "learning_rate": 0.00015389243320164853, "loss": 11.6637, "step": 45714 }, { "epoch": 0.9569413045298502, "grad_norm": 0.255456805229187, "learning_rate": 0.00015389058629946428, "loss": 11.6802, "step": 45715 }, { "epoch": 0.9569622372938122, "grad_norm": 0.315229207277298, "learning_rate": 0.0001538887393713738, "loss": 11.6552, "step": 45716 }, { "epoch": 0.9569831700577744, "grad_norm": 0.2590782344341278, "learning_rate": 0.0001538868924173779, "loss": 11.6664, "step": 45717 }, { "epoch": 0.9570041028217365, "grad_norm": 0.28851521015167236, "learning_rate": 0.00015388504543747746, "loss": 11.6641, "step": 45718 }, { "epoch": 0.9570250355856987, "grad_norm": 0.2949741780757904, "learning_rate": 0.0001538831984316734, "loss": 11.6698, "step": 45719 }, { "epoch": 0.9570459683496609, "grad_norm": 0.3240016996860504, "learning_rate": 0.00015388135139996663, "loss": 11.6754, "step": 45720 }, { "epoch": 0.957066901113623, "grad_norm": 0.3181532621383667, "learning_rate": 0.00015387950434235802, "loss": 11.6774, "step": 45721 }, { "epoch": 0.9570878338775852, "grad_norm": 0.31399643421173096, "learning_rate": 0.00015387765725884844, "loss": 11.6668, "step": 45722 }, { "epoch": 0.9571087666415473, "grad_norm": 0.2953000068664551, "learning_rate": 0.0001538758101494388, "loss": 11.6651, "step": 45723 }, { "epoch": 0.9571296994055095, "grad_norm": 0.3323461413383484, "learning_rate": 0.00015387396301412998, "loss": 11.6496, "step": 45724 }, { "epoch": 0.9571506321694716, "grad_norm": 0.4073496162891388, "learning_rate": 0.00015387211585292282, "loss": 11.678, "step": 45725 }, { "epoch": 0.9571715649334338, "grad_norm": 0.39240843057632446, "learning_rate": 0.00015387026866581831, "loss": 11.6781, "step": 45726 }, { "epoch": 0.957192497697396, "grad_norm": 0.30154433846473694, "learning_rate": 0.00015386842145281726, "loss": 11.67, "step": 45727 }, { "epoch": 0.9572134304613581, "grad_norm": 0.2787723243236542, "learning_rate": 0.00015386657421392061, "loss": 11.6652, "step": 45728 }, { "epoch": 0.9572343632253203, "grad_norm": 0.27107465267181396, "learning_rate": 0.00015386472694912918, "loss": 11.6796, "step": 45729 }, { "epoch": 0.9572552959892824, "grad_norm": 0.25678902864456177, "learning_rate": 0.0001538628796584439, "loss": 11.6664, "step": 45730 }, { "epoch": 0.9572762287532446, "grad_norm": 0.3176180422306061, "learning_rate": 0.00015386103234186566, "loss": 11.6767, "step": 45731 }, { "epoch": 0.9572971615172068, "grad_norm": 0.27931657433509827, "learning_rate": 0.00015385918499939537, "loss": 11.6702, "step": 45732 }, { "epoch": 0.9573180942811689, "grad_norm": 0.2906804382801056, "learning_rate": 0.00015385733763103384, "loss": 11.6672, "step": 45733 }, { "epoch": 0.9573390270451311, "grad_norm": 0.3486844003200531, "learning_rate": 0.00015385549023678205, "loss": 11.6713, "step": 45734 }, { "epoch": 0.9573599598090932, "grad_norm": 0.32581043243408203, "learning_rate": 0.00015385364281664084, "loss": 11.6613, "step": 45735 }, { "epoch": 0.9573808925730554, "grad_norm": 0.33661848306655884, "learning_rate": 0.00015385179537061108, "loss": 11.6685, "step": 45736 }, { "epoch": 0.9574018253370175, "grad_norm": 0.3181098401546478, "learning_rate": 0.0001538499478986937, "loss": 11.6466, "step": 45737 }, { "epoch": 0.9574227581009797, "grad_norm": 0.29385924339294434, "learning_rate": 0.00015384810040088958, "loss": 11.6523, "step": 45738 }, { "epoch": 0.9574436908649419, "grad_norm": 0.3101407289505005, "learning_rate": 0.00015384625287719959, "loss": 11.6667, "step": 45739 }, { "epoch": 0.9574646236289039, "grad_norm": 0.28579628467559814, "learning_rate": 0.00015384440532762464, "loss": 11.681, "step": 45740 }, { "epoch": 0.9574855563928661, "grad_norm": 0.27524125576019287, "learning_rate": 0.00015384255775216556, "loss": 11.6576, "step": 45741 }, { "epoch": 0.9575064891568282, "grad_norm": 0.36785417795181274, "learning_rate": 0.00015384071015082332, "loss": 11.654, "step": 45742 }, { "epoch": 0.9575274219207904, "grad_norm": 0.27759912610054016, "learning_rate": 0.0001538388625235988, "loss": 11.6619, "step": 45743 }, { "epoch": 0.9575483546847525, "grad_norm": 0.2976100742816925, "learning_rate": 0.00015383701487049281, "loss": 11.6684, "step": 45744 }, { "epoch": 0.9575692874487147, "grad_norm": 0.349570631980896, "learning_rate": 0.0001538351671915063, "loss": 11.687, "step": 45745 }, { "epoch": 0.9575902202126769, "grad_norm": 0.33717256784439087, "learning_rate": 0.00015383331948664014, "loss": 11.663, "step": 45746 }, { "epoch": 0.957611152976639, "grad_norm": 0.32371965050697327, "learning_rate": 0.00015383147175589522, "loss": 11.6492, "step": 45747 }, { "epoch": 0.9576320857406012, "grad_norm": 0.38960930705070496, "learning_rate": 0.00015382962399927247, "loss": 11.6786, "step": 45748 }, { "epoch": 0.9576530185045633, "grad_norm": 0.282726526260376, "learning_rate": 0.0001538277762167727, "loss": 11.6491, "step": 45749 }, { "epoch": 0.9576739512685255, "grad_norm": 0.28170904517173767, "learning_rate": 0.00015382592840839689, "loss": 11.6717, "step": 45750 }, { "epoch": 0.9576948840324877, "grad_norm": 0.3465290367603302, "learning_rate": 0.00015382408057414582, "loss": 11.6858, "step": 45751 }, { "epoch": 0.9577158167964498, "grad_norm": 0.43031826615333557, "learning_rate": 0.00015382223271402046, "loss": 11.6783, "step": 45752 }, { "epoch": 0.957736749560412, "grad_norm": 0.2848462164402008, "learning_rate": 0.0001538203848280217, "loss": 11.6837, "step": 45753 }, { "epoch": 0.9577576823243741, "grad_norm": 0.2367749661207199, "learning_rate": 0.00015381853691615036, "loss": 11.6534, "step": 45754 }, { "epoch": 0.9577786150883363, "grad_norm": 0.3324018120765686, "learning_rate": 0.0001538166889784074, "loss": 11.6823, "step": 45755 }, { "epoch": 0.9577995478522984, "grad_norm": 0.27425718307495117, "learning_rate": 0.00015381484101479367, "loss": 11.6675, "step": 45756 }, { "epoch": 0.9578204806162606, "grad_norm": 0.3347249925136566, "learning_rate": 0.00015381299302531006, "loss": 11.6656, "step": 45757 }, { "epoch": 0.9578414133802228, "grad_norm": 0.26044782996177673, "learning_rate": 0.00015381114500995747, "loss": 11.6785, "step": 45758 }, { "epoch": 0.9578623461441849, "grad_norm": 0.30534735321998596, "learning_rate": 0.00015380929696873677, "loss": 11.6892, "step": 45759 }, { "epoch": 0.9578832789081471, "grad_norm": 0.2524673342704773, "learning_rate": 0.0001538074489016489, "loss": 11.6785, "step": 45760 }, { "epoch": 0.9579042116721092, "grad_norm": 0.3744480013847351, "learning_rate": 0.00015380560080869467, "loss": 11.6773, "step": 45761 }, { "epoch": 0.9579251444360714, "grad_norm": 0.28590360283851624, "learning_rate": 0.00015380375268987505, "loss": 11.6731, "step": 45762 }, { "epoch": 0.9579460772000334, "grad_norm": 0.3256748914718628, "learning_rate": 0.00015380190454519085, "loss": 11.6686, "step": 45763 }, { "epoch": 0.9579670099639956, "grad_norm": 0.2824656367301941, "learning_rate": 0.00015380005637464302, "loss": 11.6587, "step": 45764 }, { "epoch": 0.9579879427279578, "grad_norm": 0.31147024035453796, "learning_rate": 0.0001537982081782324, "loss": 11.6663, "step": 45765 }, { "epoch": 0.9580088754919199, "grad_norm": 0.3690294325351715, "learning_rate": 0.00015379635995595994, "loss": 11.6736, "step": 45766 }, { "epoch": 0.9580298082558821, "grad_norm": 0.2910442352294922, "learning_rate": 0.00015379451170782646, "loss": 11.6762, "step": 45767 }, { "epoch": 0.9580507410198442, "grad_norm": 0.32968953251838684, "learning_rate": 0.0001537926634338329, "loss": 11.6809, "step": 45768 }, { "epoch": 0.9580716737838064, "grad_norm": 0.37792566418647766, "learning_rate": 0.0001537908151339801, "loss": 11.6533, "step": 45769 }, { "epoch": 0.9580926065477686, "grad_norm": 0.27046024799346924, "learning_rate": 0.00015378896680826902, "loss": 11.6691, "step": 45770 }, { "epoch": 0.9581135393117307, "grad_norm": 0.278801292181015, "learning_rate": 0.00015378711845670049, "loss": 11.6705, "step": 45771 }, { "epoch": 0.9581344720756929, "grad_norm": 0.3734089732170105, "learning_rate": 0.0001537852700792754, "loss": 11.6576, "step": 45772 }, { "epoch": 0.958155404839655, "grad_norm": 0.44935330748558044, "learning_rate": 0.00015378342167599466, "loss": 11.6883, "step": 45773 }, { "epoch": 0.9581763376036172, "grad_norm": 0.3184831142425537, "learning_rate": 0.00015378157324685913, "loss": 11.674, "step": 45774 }, { "epoch": 0.9581972703675793, "grad_norm": 0.3081359267234802, "learning_rate": 0.00015377972479186977, "loss": 11.666, "step": 45775 }, { "epoch": 0.9582182031315415, "grad_norm": 0.3466647267341614, "learning_rate": 0.0001537778763110274, "loss": 11.675, "step": 45776 }, { "epoch": 0.9582391358955037, "grad_norm": 0.33592134714126587, "learning_rate": 0.00015377602780433292, "loss": 11.6755, "step": 45777 }, { "epoch": 0.9582600686594658, "grad_norm": 0.41916903853416443, "learning_rate": 0.00015377417927178722, "loss": 11.6584, "step": 45778 }, { "epoch": 0.958281001423428, "grad_norm": 0.5287811756134033, "learning_rate": 0.00015377233071339118, "loss": 11.6747, "step": 45779 }, { "epoch": 0.9583019341873901, "grad_norm": 0.25457853078842163, "learning_rate": 0.00015377048212914573, "loss": 11.6894, "step": 45780 }, { "epoch": 0.9583228669513523, "grad_norm": 0.25734493136405945, "learning_rate": 0.0001537686335190517, "loss": 11.6722, "step": 45781 }, { "epoch": 0.9583437997153144, "grad_norm": 0.3960837721824646, "learning_rate": 0.00015376678488311006, "loss": 11.6697, "step": 45782 }, { "epoch": 0.9583647324792766, "grad_norm": 0.350699245929718, "learning_rate": 0.0001537649362213216, "loss": 11.6749, "step": 45783 }, { "epoch": 0.9583856652432388, "grad_norm": 0.3950478434562683, "learning_rate": 0.0001537630875336873, "loss": 11.6763, "step": 45784 }, { "epoch": 0.9584065980072008, "grad_norm": 0.39975467324256897, "learning_rate": 0.00015376123882020797, "loss": 11.6765, "step": 45785 }, { "epoch": 0.958427530771163, "grad_norm": 0.2864973247051239, "learning_rate": 0.00015375939008088456, "loss": 11.6837, "step": 45786 }, { "epoch": 0.9584484635351251, "grad_norm": 0.31292104721069336, "learning_rate": 0.00015375754131571794, "loss": 11.664, "step": 45787 }, { "epoch": 0.9584693962990873, "grad_norm": 0.3477552831172943, "learning_rate": 0.00015375569252470896, "loss": 11.6446, "step": 45788 }, { "epoch": 0.9584903290630495, "grad_norm": 0.3426627516746521, "learning_rate": 0.00015375384370785856, "loss": 11.6908, "step": 45789 }, { "epoch": 0.9585112618270116, "grad_norm": 0.3676573932170868, "learning_rate": 0.0001537519948651676, "loss": 11.6734, "step": 45790 }, { "epoch": 0.9585321945909738, "grad_norm": 0.35137856006622314, "learning_rate": 0.000153750145996637, "loss": 11.6774, "step": 45791 }, { "epoch": 0.9585531273549359, "grad_norm": 0.31393927335739136, "learning_rate": 0.0001537482971022676, "loss": 11.6744, "step": 45792 }, { "epoch": 0.9585740601188981, "grad_norm": 0.3415796756744385, "learning_rate": 0.00015374644818206035, "loss": 11.6741, "step": 45793 }, { "epoch": 0.9585949928828602, "grad_norm": 0.3312261700630188, "learning_rate": 0.0001537445992360161, "loss": 11.6703, "step": 45794 }, { "epoch": 0.9586159256468224, "grad_norm": 0.28872862458229065, "learning_rate": 0.0001537427502641357, "loss": 11.6791, "step": 45795 }, { "epoch": 0.9586368584107846, "grad_norm": 0.3025214374065399, "learning_rate": 0.0001537409012664201, "loss": 11.6591, "step": 45796 }, { "epoch": 0.9586577911747467, "grad_norm": 0.3091329038143158, "learning_rate": 0.00015373905224287023, "loss": 11.6786, "step": 45797 }, { "epoch": 0.9586787239387089, "grad_norm": 0.33152052760124207, "learning_rate": 0.00015373720319348687, "loss": 11.6725, "step": 45798 }, { "epoch": 0.958699656702671, "grad_norm": 0.35507485270500183, "learning_rate": 0.00015373535411827098, "loss": 11.6816, "step": 45799 }, { "epoch": 0.9587205894666332, "grad_norm": 0.6547839641571045, "learning_rate": 0.00015373350501722338, "loss": 11.571, "step": 45800 }, { "epoch": 0.9587415222305953, "grad_norm": 0.31097763776779175, "learning_rate": 0.00015373165589034507, "loss": 11.6481, "step": 45801 }, { "epoch": 0.9587624549945575, "grad_norm": 0.26508036255836487, "learning_rate": 0.00015372980673763684, "loss": 11.6618, "step": 45802 }, { "epoch": 0.9587833877585197, "grad_norm": 0.3340788781642914, "learning_rate": 0.0001537279575590996, "loss": 11.6691, "step": 45803 }, { "epoch": 0.9588043205224818, "grad_norm": 0.3844457268714905, "learning_rate": 0.00015372610835473428, "loss": 11.6803, "step": 45804 }, { "epoch": 0.958825253286444, "grad_norm": 0.3965802490711212, "learning_rate": 0.00015372425912454175, "loss": 11.659, "step": 45805 }, { "epoch": 0.9588461860504061, "grad_norm": 0.34806880354881287, "learning_rate": 0.0001537224098685229, "loss": 11.6639, "step": 45806 }, { "epoch": 0.9588671188143683, "grad_norm": 0.3314175307750702, "learning_rate": 0.0001537205605866786, "loss": 11.6657, "step": 45807 }, { "epoch": 0.9588880515783305, "grad_norm": 0.2792169153690338, "learning_rate": 0.0001537187112790097, "loss": 11.6867, "step": 45808 }, { "epoch": 0.9589089843422925, "grad_norm": 0.3057829737663269, "learning_rate": 0.0001537168619455172, "loss": 11.6716, "step": 45809 }, { "epoch": 0.9589299171062547, "grad_norm": 0.3178230822086334, "learning_rate": 0.00015371501258620193, "loss": 11.6629, "step": 45810 }, { "epoch": 0.9589508498702168, "grad_norm": 0.3561815321445465, "learning_rate": 0.00015371316320106476, "loss": 11.6565, "step": 45811 }, { "epoch": 0.958971782634179, "grad_norm": 0.36970898509025574, "learning_rate": 0.00015371131379010656, "loss": 11.657, "step": 45812 }, { "epoch": 0.9589927153981411, "grad_norm": 0.28892555832862854, "learning_rate": 0.00015370946435332828, "loss": 11.6749, "step": 45813 }, { "epoch": 0.9590136481621033, "grad_norm": 0.27778875827789307, "learning_rate": 0.00015370761489073083, "loss": 11.6858, "step": 45814 }, { "epoch": 0.9590345809260655, "grad_norm": 0.35445231199264526, "learning_rate": 0.00015370576540231498, "loss": 11.6597, "step": 45815 }, { "epoch": 0.9590555136900276, "grad_norm": 0.27070683240890503, "learning_rate": 0.00015370391588808175, "loss": 11.6628, "step": 45816 }, { "epoch": 0.9590764464539898, "grad_norm": 0.28020626306533813, "learning_rate": 0.00015370206634803194, "loss": 11.6764, "step": 45817 }, { "epoch": 0.9590973792179519, "grad_norm": 0.3081621825695038, "learning_rate": 0.00015370021678216649, "loss": 11.6675, "step": 45818 }, { "epoch": 0.9591183119819141, "grad_norm": 0.34486886858940125, "learning_rate": 0.00015369836719048626, "loss": 11.6742, "step": 45819 }, { "epoch": 0.9591392447458762, "grad_norm": 0.27469900250434875, "learning_rate": 0.00015369651757299213, "loss": 11.6614, "step": 45820 }, { "epoch": 0.9591601775098384, "grad_norm": 0.45000332593917847, "learning_rate": 0.00015369466792968504, "loss": 11.6616, "step": 45821 }, { "epoch": 0.9591811102738006, "grad_norm": 0.34764719009399414, "learning_rate": 0.00015369281826056584, "loss": 11.6692, "step": 45822 }, { "epoch": 0.9592020430377627, "grad_norm": 0.3675905466079712, "learning_rate": 0.00015369096856563544, "loss": 11.6825, "step": 45823 }, { "epoch": 0.9592229758017249, "grad_norm": 0.35833266377449036, "learning_rate": 0.00015368911884489469, "loss": 11.6579, "step": 45824 }, { "epoch": 0.959243908565687, "grad_norm": 0.27725547552108765, "learning_rate": 0.0001536872690983445, "loss": 11.6597, "step": 45825 }, { "epoch": 0.9592648413296492, "grad_norm": 0.3561766445636749, "learning_rate": 0.00015368541932598578, "loss": 11.6809, "step": 45826 }, { "epoch": 0.9592857740936113, "grad_norm": 0.2682541608810425, "learning_rate": 0.00015368356952781938, "loss": 11.6655, "step": 45827 }, { "epoch": 0.9593067068575735, "grad_norm": 0.44152411818504333, "learning_rate": 0.00015368171970384624, "loss": 11.677, "step": 45828 }, { "epoch": 0.9593276396215357, "grad_norm": 0.3084193170070648, "learning_rate": 0.00015367986985406718, "loss": 11.6725, "step": 45829 }, { "epoch": 0.9593485723854978, "grad_norm": 0.3624635934829712, "learning_rate": 0.0001536780199784832, "loss": 11.6663, "step": 45830 }, { "epoch": 0.95936950514946, "grad_norm": 0.31383559107780457, "learning_rate": 0.00015367617007709504, "loss": 11.66, "step": 45831 }, { "epoch": 0.959390437913422, "grad_norm": 0.3413058817386627, "learning_rate": 0.00015367432014990376, "loss": 11.6846, "step": 45832 }, { "epoch": 0.9594113706773842, "grad_norm": 0.353900283575058, "learning_rate": 0.0001536724701969101, "loss": 11.6547, "step": 45833 }, { "epoch": 0.9594323034413464, "grad_norm": 0.28611108660697937, "learning_rate": 0.00015367062021811497, "loss": 11.6655, "step": 45834 }, { "epoch": 0.9594532362053085, "grad_norm": 0.29763999581336975, "learning_rate": 0.00015366877021351938, "loss": 11.6588, "step": 45835 }, { "epoch": 0.9594741689692707, "grad_norm": 0.3545379042625427, "learning_rate": 0.0001536669201831241, "loss": 11.6602, "step": 45836 }, { "epoch": 0.9594951017332328, "grad_norm": 0.28868818283081055, "learning_rate": 0.00015366507012693007, "loss": 11.6745, "step": 45837 }, { "epoch": 0.959516034497195, "grad_norm": 0.31692832708358765, "learning_rate": 0.00015366322004493814, "loss": 11.6733, "step": 45838 }, { "epoch": 0.9595369672611571, "grad_norm": 0.273823082447052, "learning_rate": 0.00015366136993714922, "loss": 11.6657, "step": 45839 }, { "epoch": 0.9595579000251193, "grad_norm": 0.3542816936969757, "learning_rate": 0.00015365951980356423, "loss": 11.6759, "step": 45840 }, { "epoch": 0.9595788327890815, "grad_norm": 0.2936948835849762, "learning_rate": 0.000153657669644184, "loss": 11.6536, "step": 45841 }, { "epoch": 0.9595997655530436, "grad_norm": 0.24624405801296234, "learning_rate": 0.0001536558194590095, "loss": 11.6524, "step": 45842 }, { "epoch": 0.9596206983170058, "grad_norm": 0.32321327924728394, "learning_rate": 0.00015365396924804157, "loss": 11.6725, "step": 45843 }, { "epoch": 0.9596416310809679, "grad_norm": 0.29504647850990295, "learning_rate": 0.00015365211901128108, "loss": 11.6672, "step": 45844 }, { "epoch": 0.9596625638449301, "grad_norm": 0.258293479681015, "learning_rate": 0.00015365026874872894, "loss": 11.6552, "step": 45845 }, { "epoch": 0.9596834966088922, "grad_norm": 0.2706822156906128, "learning_rate": 0.00015364841846038604, "loss": 11.6655, "step": 45846 }, { "epoch": 0.9597044293728544, "grad_norm": 0.4120924770832062, "learning_rate": 0.00015364656814625328, "loss": 11.6954, "step": 45847 }, { "epoch": 0.9597253621368166, "grad_norm": 0.44740763306617737, "learning_rate": 0.00015364471780633153, "loss": 11.6756, "step": 45848 }, { "epoch": 0.9597462949007787, "grad_norm": 0.2756168246269226, "learning_rate": 0.0001536428674406217, "loss": 11.651, "step": 45849 }, { "epoch": 0.9597672276647409, "grad_norm": 0.31767815351486206, "learning_rate": 0.00015364101704912466, "loss": 11.6553, "step": 45850 }, { "epoch": 0.959788160428703, "grad_norm": 0.4121757745742798, "learning_rate": 0.0001536391666318413, "loss": 11.6784, "step": 45851 }, { "epoch": 0.9598090931926652, "grad_norm": 0.428061306476593, "learning_rate": 0.00015363731618877252, "loss": 11.6859, "step": 45852 }, { "epoch": 0.9598300259566274, "grad_norm": 0.25487491488456726, "learning_rate": 0.00015363546571991923, "loss": 11.6513, "step": 45853 }, { "epoch": 0.9598509587205895, "grad_norm": 0.3200797140598297, "learning_rate": 0.00015363361522528228, "loss": 11.6633, "step": 45854 }, { "epoch": 0.9598718914845517, "grad_norm": 0.27724963426589966, "learning_rate": 0.00015363176470486255, "loss": 11.6679, "step": 45855 }, { "epoch": 0.9598928242485137, "grad_norm": 0.3665260374546051, "learning_rate": 0.000153629914158661, "loss": 11.6481, "step": 45856 }, { "epoch": 0.9599137570124759, "grad_norm": 0.45540758967399597, "learning_rate": 0.00015362806358667846, "loss": 11.6865, "step": 45857 }, { "epoch": 0.959934689776438, "grad_norm": 0.3004680573940277, "learning_rate": 0.00015362621298891583, "loss": 11.6602, "step": 45858 }, { "epoch": 0.9599556225404002, "grad_norm": 0.301639199256897, "learning_rate": 0.000153624362365374, "loss": 11.656, "step": 45859 }, { "epoch": 0.9599765553043624, "grad_norm": 0.27011892199516296, "learning_rate": 0.0001536225117160539, "loss": 11.6541, "step": 45860 }, { "epoch": 0.9599974880683245, "grad_norm": 0.31380000710487366, "learning_rate": 0.00015362066104095633, "loss": 11.678, "step": 45861 }, { "epoch": 0.9600184208322867, "grad_norm": 0.3338259756565094, "learning_rate": 0.00015361881034008229, "loss": 11.6625, "step": 45862 }, { "epoch": 0.9600393535962488, "grad_norm": 0.3175789713859558, "learning_rate": 0.00015361695961343255, "loss": 11.6672, "step": 45863 }, { "epoch": 0.960060286360211, "grad_norm": 0.3014242947101593, "learning_rate": 0.00015361510886100808, "loss": 11.661, "step": 45864 }, { "epoch": 0.9600812191241731, "grad_norm": 0.2990833818912506, "learning_rate": 0.0001536132580828098, "loss": 11.6768, "step": 45865 }, { "epoch": 0.9601021518881353, "grad_norm": 0.31595659255981445, "learning_rate": 0.0001536114072788385, "loss": 11.6723, "step": 45866 }, { "epoch": 0.9601230846520975, "grad_norm": 0.31889671087265015, "learning_rate": 0.0001536095564490952, "loss": 11.6737, "step": 45867 }, { "epoch": 0.9601440174160596, "grad_norm": 0.26370325684547424, "learning_rate": 0.00015360770559358063, "loss": 11.6606, "step": 45868 }, { "epoch": 0.9601649501800218, "grad_norm": 0.3360725939273834, "learning_rate": 0.00015360585471229578, "loss": 11.6523, "step": 45869 }, { "epoch": 0.9601858829439839, "grad_norm": 0.4001435935497284, "learning_rate": 0.00015360400380524152, "loss": 11.6654, "step": 45870 }, { "epoch": 0.9602068157079461, "grad_norm": 0.2991921305656433, "learning_rate": 0.00015360215287241874, "loss": 11.6461, "step": 45871 }, { "epoch": 0.9602277484719083, "grad_norm": 0.37174493074417114, "learning_rate": 0.00015360030191382838, "loss": 11.6785, "step": 45872 }, { "epoch": 0.9602486812358704, "grad_norm": 0.3533962666988373, "learning_rate": 0.00015359845092947121, "loss": 11.654, "step": 45873 }, { "epoch": 0.9602696139998326, "grad_norm": 0.3870680630207062, "learning_rate": 0.00015359659991934823, "loss": 11.6661, "step": 45874 }, { "epoch": 0.9602905467637947, "grad_norm": 0.35894080996513367, "learning_rate": 0.00015359474888346027, "loss": 11.672, "step": 45875 }, { "epoch": 0.9603114795277569, "grad_norm": 0.325295627117157, "learning_rate": 0.00015359289782180828, "loss": 11.6714, "step": 45876 }, { "epoch": 0.960332412291719, "grad_norm": 0.32922300696372986, "learning_rate": 0.00015359104673439306, "loss": 11.6603, "step": 45877 }, { "epoch": 0.9603533450556812, "grad_norm": 0.2838783264160156, "learning_rate": 0.00015358919562121555, "loss": 11.6576, "step": 45878 }, { "epoch": 0.9603742778196434, "grad_norm": 0.2835922837257385, "learning_rate": 0.00015358734448227667, "loss": 11.6555, "step": 45879 }, { "epoch": 0.9603952105836054, "grad_norm": 0.3369218707084656, "learning_rate": 0.0001535854933175773, "loss": 11.6628, "step": 45880 }, { "epoch": 0.9604161433475676, "grad_norm": 0.3942439556121826, "learning_rate": 0.00015358364212711827, "loss": 11.63, "step": 45881 }, { "epoch": 0.9604370761115297, "grad_norm": 0.3348906338214874, "learning_rate": 0.0001535817909109005, "loss": 11.6779, "step": 45882 }, { "epoch": 0.9604580088754919, "grad_norm": 0.30571481585502625, "learning_rate": 0.00015357993966892493, "loss": 11.6713, "step": 45883 }, { "epoch": 0.960478941639454, "grad_norm": 0.34829702973365784, "learning_rate": 0.0001535780884011924, "loss": 11.6763, "step": 45884 }, { "epoch": 0.9604998744034162, "grad_norm": 0.25334975123405457, "learning_rate": 0.0001535762371077038, "loss": 11.6471, "step": 45885 }, { "epoch": 0.9605208071673784, "grad_norm": 0.3320772349834442, "learning_rate": 0.00015357438578846, "loss": 11.6895, "step": 45886 }, { "epoch": 0.9605417399313405, "grad_norm": 0.34402376413345337, "learning_rate": 0.000153572534443462, "loss": 11.6657, "step": 45887 }, { "epoch": 0.9605626726953027, "grad_norm": 0.29091450572013855, "learning_rate": 0.00015357068307271052, "loss": 11.6666, "step": 45888 }, { "epoch": 0.9605836054592648, "grad_norm": 0.3487594425678253, "learning_rate": 0.00015356883167620656, "loss": 11.667, "step": 45889 }, { "epoch": 0.960604538223227, "grad_norm": 0.2870478630065918, "learning_rate": 0.000153566980253951, "loss": 11.6621, "step": 45890 }, { "epoch": 0.9606254709871892, "grad_norm": 0.29026612639427185, "learning_rate": 0.00015356512880594474, "loss": 11.6683, "step": 45891 }, { "epoch": 0.9606464037511513, "grad_norm": 0.2949596643447876, "learning_rate": 0.00015356327733218863, "loss": 11.6741, "step": 45892 }, { "epoch": 0.9606673365151135, "grad_norm": 0.3497641682624817, "learning_rate": 0.00015356142583268357, "loss": 11.6814, "step": 45893 }, { "epoch": 0.9606882692790756, "grad_norm": 0.3072012960910797, "learning_rate": 0.00015355957430743052, "loss": 11.6678, "step": 45894 }, { "epoch": 0.9607092020430378, "grad_norm": 0.41558346152305603, "learning_rate": 0.00015355772275643022, "loss": 11.6952, "step": 45895 }, { "epoch": 0.9607301348069999, "grad_norm": 0.4148571789264679, "learning_rate": 0.0001535558711796837, "loss": 11.6834, "step": 45896 }, { "epoch": 0.9607510675709621, "grad_norm": 0.3479454219341278, "learning_rate": 0.0001535540195771918, "loss": 11.6593, "step": 45897 }, { "epoch": 0.9607720003349243, "grad_norm": 0.4141637980937958, "learning_rate": 0.00015355216794895538, "loss": 11.6848, "step": 45898 }, { "epoch": 0.9607929330988864, "grad_norm": 0.3451879620552063, "learning_rate": 0.0001535503162949754, "loss": 11.6653, "step": 45899 }, { "epoch": 0.9608138658628486, "grad_norm": 0.2937914729118347, "learning_rate": 0.00015354846461525264, "loss": 11.6718, "step": 45900 }, { "epoch": 0.9608347986268106, "grad_norm": 0.30477797985076904, "learning_rate": 0.00015354661290978815, "loss": 11.6752, "step": 45901 }, { "epoch": 0.9608557313907728, "grad_norm": 0.2781679034233093, "learning_rate": 0.00015354476117858268, "loss": 11.659, "step": 45902 }, { "epoch": 0.9608766641547349, "grad_norm": 0.26461634039878845, "learning_rate": 0.00015354290942163717, "loss": 11.6701, "step": 45903 }, { "epoch": 0.9608975969186971, "grad_norm": 0.41882315278053284, "learning_rate": 0.00015354105763895252, "loss": 11.6991, "step": 45904 }, { "epoch": 0.9609185296826593, "grad_norm": 0.3277832269668579, "learning_rate": 0.0001535392058305296, "loss": 11.6633, "step": 45905 }, { "epoch": 0.9609394624466214, "grad_norm": 0.30827122926712036, "learning_rate": 0.00015353735399636933, "loss": 11.6569, "step": 45906 }, { "epoch": 0.9609603952105836, "grad_norm": 0.39235472679138184, "learning_rate": 0.00015353550213647257, "loss": 11.68, "step": 45907 }, { "epoch": 0.9609813279745457, "grad_norm": 0.3236386775970459, "learning_rate": 0.0001535336502508402, "loss": 11.669, "step": 45908 }, { "epoch": 0.9610022607385079, "grad_norm": 0.27850207686424255, "learning_rate": 0.0001535317983394732, "loss": 11.6689, "step": 45909 }, { "epoch": 0.9610231935024701, "grad_norm": 0.34232062101364136, "learning_rate": 0.00015352994640237234, "loss": 11.6793, "step": 45910 }, { "epoch": 0.9610441262664322, "grad_norm": 0.30763858556747437, "learning_rate": 0.00015352809443953852, "loss": 11.667, "step": 45911 }, { "epoch": 0.9610650590303944, "grad_norm": 0.3279344141483307, "learning_rate": 0.00015352624245097273, "loss": 11.6648, "step": 45912 }, { "epoch": 0.9610859917943565, "grad_norm": 0.28666895627975464, "learning_rate": 0.0001535243904366758, "loss": 11.6711, "step": 45913 }, { "epoch": 0.9611069245583187, "grad_norm": 0.34563755989074707, "learning_rate": 0.00015352253839664862, "loss": 11.6816, "step": 45914 }, { "epoch": 0.9611278573222808, "grad_norm": 0.33123138546943665, "learning_rate": 0.00015352068633089208, "loss": 11.6564, "step": 45915 }, { "epoch": 0.961148790086243, "grad_norm": 0.4629569351673126, "learning_rate": 0.00015351883423940708, "loss": 11.6813, "step": 45916 }, { "epoch": 0.9611697228502052, "grad_norm": 0.3488124907016754, "learning_rate": 0.0001535169821221945, "loss": 11.6727, "step": 45917 }, { "epoch": 0.9611906556141673, "grad_norm": 0.3422006368637085, "learning_rate": 0.0001535151299792552, "loss": 11.6776, "step": 45918 }, { "epoch": 0.9612115883781295, "grad_norm": 0.297398179769516, "learning_rate": 0.00015351327781059016, "loss": 11.6729, "step": 45919 }, { "epoch": 0.9612325211420916, "grad_norm": 0.35809117555618286, "learning_rate": 0.00015351142561620016, "loss": 11.666, "step": 45920 }, { "epoch": 0.9612534539060538, "grad_norm": 0.2706533372402191, "learning_rate": 0.0001535095733960862, "loss": 11.6496, "step": 45921 }, { "epoch": 0.9612743866700159, "grad_norm": 0.4124881625175476, "learning_rate": 0.00015350772115024907, "loss": 11.6734, "step": 45922 }, { "epoch": 0.9612953194339781, "grad_norm": 0.3203539550304413, "learning_rate": 0.00015350586887868972, "loss": 11.6683, "step": 45923 }, { "epoch": 0.9613162521979403, "grad_norm": 0.32783621549606323, "learning_rate": 0.00015350401658140904, "loss": 11.6698, "step": 45924 }, { "epoch": 0.9613371849619023, "grad_norm": 0.3148503005504608, "learning_rate": 0.0001535021642584079, "loss": 11.6728, "step": 45925 }, { "epoch": 0.9613581177258645, "grad_norm": 0.30812159180641174, "learning_rate": 0.00015350031190968722, "loss": 11.6527, "step": 45926 }, { "epoch": 0.9613790504898266, "grad_norm": 0.33327457308769226, "learning_rate": 0.0001534984595352478, "loss": 11.6675, "step": 45927 }, { "epoch": 0.9613999832537888, "grad_norm": 0.29133912920951843, "learning_rate": 0.00015349660713509066, "loss": 11.676, "step": 45928 }, { "epoch": 0.961420916017751, "grad_norm": 0.317028284072876, "learning_rate": 0.00015349475470921662, "loss": 11.6766, "step": 45929 }, { "epoch": 0.9614418487817131, "grad_norm": 0.27013397216796875, "learning_rate": 0.00015349290225762658, "loss": 11.6546, "step": 45930 }, { "epoch": 0.9614627815456753, "grad_norm": 0.28844138979911804, "learning_rate": 0.00015349104978032142, "loss": 11.666, "step": 45931 }, { "epoch": 0.9614837143096374, "grad_norm": 0.3223290741443634, "learning_rate": 0.00015348919727730203, "loss": 11.6604, "step": 45932 }, { "epoch": 0.9615046470735996, "grad_norm": 0.26025694608688354, "learning_rate": 0.00015348734474856932, "loss": 11.6659, "step": 45933 }, { "epoch": 0.9615255798375617, "grad_norm": 0.2856837213039398, "learning_rate": 0.0001534854921941242, "loss": 11.6704, "step": 45934 }, { "epoch": 0.9615465126015239, "grad_norm": 0.3436054587364197, "learning_rate": 0.00015348363961396747, "loss": 11.6926, "step": 45935 }, { "epoch": 0.9615674453654861, "grad_norm": 0.3410687744617462, "learning_rate": 0.00015348178700810014, "loss": 11.6697, "step": 45936 }, { "epoch": 0.9615883781294482, "grad_norm": 0.29681745171546936, "learning_rate": 0.00015347993437652303, "loss": 11.6668, "step": 45937 }, { "epoch": 0.9616093108934104, "grad_norm": 0.27879682183265686, "learning_rate": 0.00015347808171923703, "loss": 11.6643, "step": 45938 }, { "epoch": 0.9616302436573725, "grad_norm": 0.315548300743103, "learning_rate": 0.00015347622903624303, "loss": 11.6635, "step": 45939 }, { "epoch": 0.9616511764213347, "grad_norm": 0.3280337154865265, "learning_rate": 0.00015347437632754195, "loss": 11.6668, "step": 45940 }, { "epoch": 0.9616721091852968, "grad_norm": 0.26797911524772644, "learning_rate": 0.00015347252359313468, "loss": 11.6513, "step": 45941 }, { "epoch": 0.961693041949259, "grad_norm": 0.3329677879810333, "learning_rate": 0.0001534706708330221, "loss": 11.6613, "step": 45942 }, { "epoch": 0.9617139747132212, "grad_norm": 0.382267028093338, "learning_rate": 0.0001534688180472051, "loss": 11.6652, "step": 45943 }, { "epoch": 0.9617349074771833, "grad_norm": 0.30618637800216675, "learning_rate": 0.00015346696523568456, "loss": 11.665, "step": 45944 }, { "epoch": 0.9617558402411455, "grad_norm": 0.2849540114402771, "learning_rate": 0.00015346511239846134, "loss": 11.6696, "step": 45945 }, { "epoch": 0.9617767730051076, "grad_norm": 0.3369750678539276, "learning_rate": 0.0001534632595355364, "loss": 11.6742, "step": 45946 }, { "epoch": 0.9617977057690698, "grad_norm": 0.284417986869812, "learning_rate": 0.0001534614066469106, "loss": 11.6665, "step": 45947 }, { "epoch": 0.961818638533032, "grad_norm": 0.3054497539997101, "learning_rate": 0.00015345955373258485, "loss": 11.6705, "step": 45948 }, { "epoch": 0.961839571296994, "grad_norm": 0.2921712100505829, "learning_rate": 0.00015345770079256, "loss": 11.6707, "step": 45949 }, { "epoch": 0.9618605040609562, "grad_norm": 0.42829179763793945, "learning_rate": 0.00015345584782683696, "loss": 11.691, "step": 45950 }, { "epoch": 0.9618814368249183, "grad_norm": 0.2870333790779114, "learning_rate": 0.0001534539948354166, "loss": 11.6694, "step": 45951 }, { "epoch": 0.9619023695888805, "grad_norm": 0.36078450083732605, "learning_rate": 0.00015345214181829988, "loss": 11.6671, "step": 45952 }, { "epoch": 0.9619233023528426, "grad_norm": 0.29555076360702515, "learning_rate": 0.00015345028877548762, "loss": 11.6544, "step": 45953 }, { "epoch": 0.9619442351168048, "grad_norm": 0.4265221357345581, "learning_rate": 0.00015344843570698073, "loss": 11.665, "step": 45954 }, { "epoch": 0.961965167880767, "grad_norm": 0.28867393732070923, "learning_rate": 0.0001534465826127801, "loss": 11.6496, "step": 45955 }, { "epoch": 0.9619861006447291, "grad_norm": 0.28880950808525085, "learning_rate": 0.00015344472949288667, "loss": 11.6772, "step": 45956 }, { "epoch": 0.9620070334086913, "grad_norm": 0.2960171699523926, "learning_rate": 0.00015344287634730126, "loss": 11.6641, "step": 45957 }, { "epoch": 0.9620279661726534, "grad_norm": 0.24786953628063202, "learning_rate": 0.00015344102317602478, "loss": 11.6653, "step": 45958 }, { "epoch": 0.9620488989366156, "grad_norm": 0.2886289358139038, "learning_rate": 0.00015343916997905815, "loss": 11.6579, "step": 45959 }, { "epoch": 0.9620698317005777, "grad_norm": 0.29295000433921814, "learning_rate": 0.00015343731675640222, "loss": 11.6731, "step": 45960 }, { "epoch": 0.9620907644645399, "grad_norm": 0.3655818998813629, "learning_rate": 0.0001534354635080579, "loss": 11.6802, "step": 45961 }, { "epoch": 0.9621116972285021, "grad_norm": 0.2950747013092041, "learning_rate": 0.00015343361023402606, "loss": 11.6673, "step": 45962 }, { "epoch": 0.9621326299924642, "grad_norm": 0.28567174077033997, "learning_rate": 0.00015343175693430767, "loss": 11.649, "step": 45963 }, { "epoch": 0.9621535627564264, "grad_norm": 0.2365431785583496, "learning_rate": 0.00015342990360890353, "loss": 11.6635, "step": 45964 }, { "epoch": 0.9621744955203885, "grad_norm": 0.4494869112968445, "learning_rate": 0.00015342805025781456, "loss": 11.6696, "step": 45965 }, { "epoch": 0.9621954282843507, "grad_norm": 0.32024267315864563, "learning_rate": 0.00015342619688104168, "loss": 11.6825, "step": 45966 }, { "epoch": 0.9622163610483129, "grad_norm": 0.3141592741012573, "learning_rate": 0.00015342434347858573, "loss": 11.6581, "step": 45967 }, { "epoch": 0.962237293812275, "grad_norm": 0.3841666281223297, "learning_rate": 0.00015342249005044763, "loss": 11.6796, "step": 45968 }, { "epoch": 0.9622582265762372, "grad_norm": 0.35685425996780396, "learning_rate": 0.00015342063659662828, "loss": 11.6681, "step": 45969 }, { "epoch": 0.9622791593401993, "grad_norm": 0.33411338925361633, "learning_rate": 0.00015341878311712857, "loss": 11.6421, "step": 45970 }, { "epoch": 0.9623000921041615, "grad_norm": 0.26013487577438354, "learning_rate": 0.00015341692961194934, "loss": 11.6633, "step": 45971 }, { "epoch": 0.9623210248681235, "grad_norm": 0.37729543447494507, "learning_rate": 0.00015341507608109155, "loss": 11.6833, "step": 45972 }, { "epoch": 0.9623419576320857, "grad_norm": 0.44378674030303955, "learning_rate": 0.0001534132225245561, "loss": 11.6666, "step": 45973 }, { "epoch": 0.9623628903960479, "grad_norm": 0.29961082339286804, "learning_rate": 0.00015341136894234377, "loss": 11.6722, "step": 45974 }, { "epoch": 0.96238382316001, "grad_norm": 0.2980223596096039, "learning_rate": 0.00015340951533445558, "loss": 11.6543, "step": 45975 }, { "epoch": 0.9624047559239722, "grad_norm": 0.36669403314590454, "learning_rate": 0.00015340766170089234, "loss": 11.6573, "step": 45976 }, { "epoch": 0.9624256886879343, "grad_norm": 0.4130088686943054, "learning_rate": 0.00015340580804165498, "loss": 11.6798, "step": 45977 }, { "epoch": 0.9624466214518965, "grad_norm": 0.31563881039619446, "learning_rate": 0.00015340395435674436, "loss": 11.6728, "step": 45978 }, { "epoch": 0.9624675542158586, "grad_norm": 0.29518067836761475, "learning_rate": 0.0001534021006461614, "loss": 11.6728, "step": 45979 }, { "epoch": 0.9624884869798208, "grad_norm": 0.3213217258453369, "learning_rate": 0.00015340024690990698, "loss": 11.672, "step": 45980 }, { "epoch": 0.962509419743783, "grad_norm": 0.3696715235710144, "learning_rate": 0.00015339839314798195, "loss": 11.6632, "step": 45981 }, { "epoch": 0.9625303525077451, "grad_norm": 0.4005368649959564, "learning_rate": 0.00015339653936038733, "loss": 11.6348, "step": 45982 }, { "epoch": 0.9625512852717073, "grad_norm": 0.3477195203304291, "learning_rate": 0.00015339468554712387, "loss": 11.6624, "step": 45983 }, { "epoch": 0.9625722180356694, "grad_norm": 0.3534471094608307, "learning_rate": 0.00015339283170819252, "loss": 11.6849, "step": 45984 }, { "epoch": 0.9625931507996316, "grad_norm": 0.3267075717449188, "learning_rate": 0.00015339097784359415, "loss": 11.6674, "step": 45985 }, { "epoch": 0.9626140835635938, "grad_norm": 0.4253201186656952, "learning_rate": 0.0001533891239533297, "loss": 11.6803, "step": 45986 }, { "epoch": 0.9626350163275559, "grad_norm": 0.429551362991333, "learning_rate": 0.00015338727003740002, "loss": 11.6623, "step": 45987 }, { "epoch": 0.9626559490915181, "grad_norm": 0.25910621881484985, "learning_rate": 0.00015338541609580597, "loss": 11.67, "step": 45988 }, { "epoch": 0.9626768818554802, "grad_norm": 0.32805848121643066, "learning_rate": 0.0001533835621285485, "loss": 11.6671, "step": 45989 }, { "epoch": 0.9626978146194424, "grad_norm": 0.21224145591259003, "learning_rate": 0.00015338170813562852, "loss": 11.6677, "step": 45990 }, { "epoch": 0.9627187473834045, "grad_norm": 0.43060731887817383, "learning_rate": 0.00015337985411704685, "loss": 11.6794, "step": 45991 }, { "epoch": 0.9627396801473667, "grad_norm": 0.4376983642578125, "learning_rate": 0.00015337800007280444, "loss": 11.6802, "step": 45992 }, { "epoch": 0.9627606129113289, "grad_norm": 0.310535192489624, "learning_rate": 0.00015337614600290211, "loss": 11.6654, "step": 45993 }, { "epoch": 0.962781545675291, "grad_norm": 0.3600914776325226, "learning_rate": 0.00015337429190734084, "loss": 11.6727, "step": 45994 }, { "epoch": 0.9628024784392532, "grad_norm": 0.32992804050445557, "learning_rate": 0.00015337243778612144, "loss": 11.6593, "step": 45995 }, { "epoch": 0.9628234112032152, "grad_norm": 0.28717419505119324, "learning_rate": 0.00015337058363924485, "loss": 11.6575, "step": 45996 }, { "epoch": 0.9628443439671774, "grad_norm": 0.3169891834259033, "learning_rate": 0.00015336872946671197, "loss": 11.6733, "step": 45997 }, { "epoch": 0.9628652767311395, "grad_norm": 0.299670547246933, "learning_rate": 0.00015336687526852367, "loss": 11.6616, "step": 45998 }, { "epoch": 0.9628862094951017, "grad_norm": 0.3872482180595398, "learning_rate": 0.00015336502104468082, "loss": 11.6698, "step": 45999 }, { "epoch": 0.9629071422590639, "grad_norm": 0.2632756233215332, "learning_rate": 0.00015336316679518438, "loss": 11.6774, "step": 46000 }, { "epoch": 0.9629071422590639, "eval_loss": 11.668991088867188, "eval_runtime": 34.3308, "eval_samples_per_second": 27.992, "eval_steps_per_second": 7.02, "step": 46000 }, { "epoch": 0.962928075023026, "grad_norm": 0.41022545099258423, "learning_rate": 0.00015336131252003513, "loss": 11.6716, "step": 46001 }, { "epoch": 0.9629490077869882, "grad_norm": 0.3250223696231842, "learning_rate": 0.0001533594582192341, "loss": 11.6667, "step": 46002 }, { "epoch": 0.9629699405509503, "grad_norm": 0.2843572795391083, "learning_rate": 0.00015335760389278207, "loss": 11.6768, "step": 46003 }, { "epoch": 0.9629908733149125, "grad_norm": 0.29917722940444946, "learning_rate": 0.00015335574954067997, "loss": 11.6475, "step": 46004 }, { "epoch": 0.9630118060788746, "grad_norm": 0.43510764837265015, "learning_rate": 0.0001533538951629287, "loss": 11.6637, "step": 46005 }, { "epoch": 0.9630327388428368, "grad_norm": 0.3012597858905792, "learning_rate": 0.00015335204075952913, "loss": 11.6745, "step": 46006 }, { "epoch": 0.963053671606799, "grad_norm": 0.2729717791080475, "learning_rate": 0.0001533501863304822, "loss": 11.652, "step": 46007 }, { "epoch": 0.9630746043707611, "grad_norm": 0.2782938778400421, "learning_rate": 0.00015334833187578871, "loss": 11.6823, "step": 46008 }, { "epoch": 0.9630955371347233, "grad_norm": 0.3545534908771515, "learning_rate": 0.00015334647739544967, "loss": 11.6607, "step": 46009 }, { "epoch": 0.9631164698986854, "grad_norm": 0.39490148425102234, "learning_rate": 0.00015334462288946586, "loss": 11.6594, "step": 46010 }, { "epoch": 0.9631374026626476, "grad_norm": 0.31278422474861145, "learning_rate": 0.00015334276835783826, "loss": 11.6648, "step": 46011 }, { "epoch": 0.9631583354266098, "grad_norm": 0.378118634223938, "learning_rate": 0.0001533409138005677, "loss": 11.6594, "step": 46012 }, { "epoch": 0.9631792681905719, "grad_norm": 0.4122444689273834, "learning_rate": 0.0001533390592176551, "loss": 11.6684, "step": 46013 }, { "epoch": 0.9632002009545341, "grad_norm": 0.2990434169769287, "learning_rate": 0.00015333720460910134, "loss": 11.6692, "step": 46014 }, { "epoch": 0.9632211337184962, "grad_norm": 0.2631238102912903, "learning_rate": 0.0001533353499749073, "loss": 11.6592, "step": 46015 }, { "epoch": 0.9632420664824584, "grad_norm": 0.328481525182724, "learning_rate": 0.0001533334953150739, "loss": 11.6753, "step": 46016 }, { "epoch": 0.9632629992464204, "grad_norm": 0.3696518540382385, "learning_rate": 0.00015333164062960206, "loss": 11.6687, "step": 46017 }, { "epoch": 0.9632839320103826, "grad_norm": 0.3776189386844635, "learning_rate": 0.00015332978591849257, "loss": 11.6481, "step": 46018 }, { "epoch": 0.9633048647743448, "grad_norm": 0.3071272671222687, "learning_rate": 0.0001533279311817464, "loss": 11.6547, "step": 46019 }, { "epoch": 0.9633257975383069, "grad_norm": 0.29916906356811523, "learning_rate": 0.00015332607641936445, "loss": 11.6656, "step": 46020 }, { "epoch": 0.9633467303022691, "grad_norm": 0.288603812456131, "learning_rate": 0.00015332422163134756, "loss": 11.6624, "step": 46021 }, { "epoch": 0.9633676630662312, "grad_norm": 0.5589339137077332, "learning_rate": 0.00015332236681769667, "loss": 11.6792, "step": 46022 }, { "epoch": 0.9633885958301934, "grad_norm": 0.3090682029724121, "learning_rate": 0.00015332051197841263, "loss": 11.6681, "step": 46023 }, { "epoch": 0.9634095285941555, "grad_norm": 0.35034874081611633, "learning_rate": 0.0001533186571134964, "loss": 11.6691, "step": 46024 }, { "epoch": 0.9634304613581177, "grad_norm": 0.2967472970485687, "learning_rate": 0.00015331680222294876, "loss": 11.6801, "step": 46025 }, { "epoch": 0.9634513941220799, "grad_norm": 0.2544511556625366, "learning_rate": 0.0001533149473067707, "loss": 11.6693, "step": 46026 }, { "epoch": 0.963472326886042, "grad_norm": 0.2821993827819824, "learning_rate": 0.00015331309236496306, "loss": 11.6639, "step": 46027 }, { "epoch": 0.9634932596500042, "grad_norm": 0.35767489671707153, "learning_rate": 0.00015331123739752674, "loss": 11.6357, "step": 46028 }, { "epoch": 0.9635141924139663, "grad_norm": 0.3035854697227478, "learning_rate": 0.00015330938240446266, "loss": 11.6643, "step": 46029 }, { "epoch": 0.9635351251779285, "grad_norm": 0.28682610392570496, "learning_rate": 0.00015330752738577168, "loss": 11.679, "step": 46030 }, { "epoch": 0.9635560579418907, "grad_norm": 0.37098297476768494, "learning_rate": 0.0001533056723414547, "loss": 11.6638, "step": 46031 }, { "epoch": 0.9635769907058528, "grad_norm": 0.37882936000823975, "learning_rate": 0.00015330381727151265, "loss": 11.6649, "step": 46032 }, { "epoch": 0.963597923469815, "grad_norm": 0.29874712228775024, "learning_rate": 0.00015330196217594634, "loss": 11.6801, "step": 46033 }, { "epoch": 0.9636188562337771, "grad_norm": 0.33002275228500366, "learning_rate": 0.00015330010705475676, "loss": 11.6878, "step": 46034 }, { "epoch": 0.9636397889977393, "grad_norm": 0.3285626173019409, "learning_rate": 0.00015329825190794468, "loss": 11.6658, "step": 46035 }, { "epoch": 0.9636607217617014, "grad_norm": 0.34707358479499817, "learning_rate": 0.00015329639673551112, "loss": 11.6618, "step": 46036 }, { "epoch": 0.9636816545256636, "grad_norm": 0.3059235215187073, "learning_rate": 0.00015329454153745689, "loss": 11.6629, "step": 46037 }, { "epoch": 0.9637025872896258, "grad_norm": 0.40950462222099304, "learning_rate": 0.0001532926863137829, "loss": 11.6681, "step": 46038 }, { "epoch": 0.9637235200535879, "grad_norm": 0.25400811433792114, "learning_rate": 0.00015329083106449006, "loss": 11.6674, "step": 46039 }, { "epoch": 0.9637444528175501, "grad_norm": 0.3756873607635498, "learning_rate": 0.00015328897578957925, "loss": 11.6682, "step": 46040 }, { "epoch": 0.9637653855815121, "grad_norm": 0.3281025290489197, "learning_rate": 0.00015328712048905135, "loss": 11.6535, "step": 46041 }, { "epoch": 0.9637863183454743, "grad_norm": 0.3217350244522095, "learning_rate": 0.00015328526516290728, "loss": 11.6709, "step": 46042 }, { "epoch": 0.9638072511094364, "grad_norm": 0.3185819089412689, "learning_rate": 0.00015328340981114792, "loss": 11.6657, "step": 46043 }, { "epoch": 0.9638281838733986, "grad_norm": 0.2747771739959717, "learning_rate": 0.00015328155443377413, "loss": 11.6627, "step": 46044 }, { "epoch": 0.9638491166373608, "grad_norm": 0.38484901189804077, "learning_rate": 0.00015327969903078685, "loss": 11.6532, "step": 46045 }, { "epoch": 0.9638700494013229, "grad_norm": 0.2882980406284332, "learning_rate": 0.00015327784360218697, "loss": 11.6692, "step": 46046 }, { "epoch": 0.9638909821652851, "grad_norm": 0.2757487893104553, "learning_rate": 0.00015327598814797534, "loss": 11.6681, "step": 46047 }, { "epoch": 0.9639119149292472, "grad_norm": 0.32165417075157166, "learning_rate": 0.00015327413266815287, "loss": 11.674, "step": 46048 }, { "epoch": 0.9639328476932094, "grad_norm": 0.2843781113624573, "learning_rate": 0.00015327227716272046, "loss": 11.6639, "step": 46049 }, { "epoch": 0.9639537804571716, "grad_norm": 0.35742032527923584, "learning_rate": 0.00015327042163167898, "loss": 11.6863, "step": 46050 }, { "epoch": 0.9639747132211337, "grad_norm": 0.30248552560806274, "learning_rate": 0.00015326856607502938, "loss": 11.648, "step": 46051 }, { "epoch": 0.9639956459850959, "grad_norm": 0.2830686867237091, "learning_rate": 0.0001532667104927725, "loss": 11.6851, "step": 46052 }, { "epoch": 0.964016578749058, "grad_norm": 0.3056851923465729, "learning_rate": 0.00015326485488490922, "loss": 11.6817, "step": 46053 }, { "epoch": 0.9640375115130202, "grad_norm": 0.3806132674217224, "learning_rate": 0.00015326299925144045, "loss": 11.6633, "step": 46054 }, { "epoch": 0.9640584442769823, "grad_norm": 0.32706719636917114, "learning_rate": 0.00015326114359236713, "loss": 11.6637, "step": 46055 }, { "epoch": 0.9640793770409445, "grad_norm": 0.3101213872432709, "learning_rate": 0.00015325928790769008, "loss": 11.6663, "step": 46056 }, { "epoch": 0.9641003098049067, "grad_norm": 0.45095446705818176, "learning_rate": 0.00015325743219741025, "loss": 11.652, "step": 46057 }, { "epoch": 0.9641212425688688, "grad_norm": 0.3137134909629822, "learning_rate": 0.00015325557646152848, "loss": 11.6665, "step": 46058 }, { "epoch": 0.964142175332831, "grad_norm": 0.26184967160224915, "learning_rate": 0.0001532537207000457, "loss": 11.6652, "step": 46059 }, { "epoch": 0.9641631080967931, "grad_norm": 0.3101608157157898, "learning_rate": 0.00015325186491296278, "loss": 11.6667, "step": 46060 }, { "epoch": 0.9641840408607553, "grad_norm": 0.29839619994163513, "learning_rate": 0.00015325000910028065, "loss": 11.6506, "step": 46061 }, { "epoch": 0.9642049736247174, "grad_norm": 0.32264935970306396, "learning_rate": 0.00015324815326200018, "loss": 11.6783, "step": 46062 }, { "epoch": 0.9642259063886796, "grad_norm": 0.2689746916294098, "learning_rate": 0.00015324629739812222, "loss": 11.6604, "step": 46063 }, { "epoch": 0.9642468391526418, "grad_norm": 0.4148464798927307, "learning_rate": 0.00015324444150864767, "loss": 11.6738, "step": 46064 }, { "epoch": 0.9642677719166038, "grad_norm": 0.283130407333374, "learning_rate": 0.0001532425855935775, "loss": 11.6726, "step": 46065 }, { "epoch": 0.964288704680566, "grad_norm": 0.337173193693161, "learning_rate": 0.00015324072965291254, "loss": 11.6595, "step": 46066 }, { "epoch": 0.9643096374445281, "grad_norm": 0.3760605752468109, "learning_rate": 0.00015323887368665372, "loss": 11.6798, "step": 46067 }, { "epoch": 0.9643305702084903, "grad_norm": 0.32003363966941833, "learning_rate": 0.00015323701769480187, "loss": 11.6664, "step": 46068 }, { "epoch": 0.9643515029724525, "grad_norm": 0.3590032756328583, "learning_rate": 0.0001532351616773579, "loss": 11.6712, "step": 46069 }, { "epoch": 0.9643724357364146, "grad_norm": 0.3091728389263153, "learning_rate": 0.00015323330563432277, "loss": 11.6755, "step": 46070 }, { "epoch": 0.9643933685003768, "grad_norm": 0.3347359001636505, "learning_rate": 0.0001532314495656973, "loss": 11.6636, "step": 46071 }, { "epoch": 0.9644143012643389, "grad_norm": 0.3162384331226349, "learning_rate": 0.0001532295934714824, "loss": 11.6801, "step": 46072 }, { "epoch": 0.9644352340283011, "grad_norm": 0.30298295617103577, "learning_rate": 0.00015322773735167896, "loss": 11.687, "step": 46073 }, { "epoch": 0.9644561667922632, "grad_norm": 0.30964767932891846, "learning_rate": 0.00015322588120628792, "loss": 11.6781, "step": 46074 }, { "epoch": 0.9644770995562254, "grad_norm": 0.2941391170024872, "learning_rate": 0.00015322402503531007, "loss": 11.656, "step": 46075 }, { "epoch": 0.9644980323201876, "grad_norm": 0.26667582988739014, "learning_rate": 0.00015322216883874643, "loss": 11.6498, "step": 46076 }, { "epoch": 0.9645189650841497, "grad_norm": 0.293189138174057, "learning_rate": 0.00015322031261659778, "loss": 11.6533, "step": 46077 }, { "epoch": 0.9645398978481119, "grad_norm": 0.3828161656856537, "learning_rate": 0.00015321845636886508, "loss": 11.6749, "step": 46078 }, { "epoch": 0.964560830612074, "grad_norm": 0.2868163585662842, "learning_rate": 0.00015321660009554923, "loss": 11.6746, "step": 46079 }, { "epoch": 0.9645817633760362, "grad_norm": 0.4017784297466278, "learning_rate": 0.00015321474379665105, "loss": 11.6807, "step": 46080 }, { "epoch": 0.9646026961399983, "grad_norm": 0.255612313747406, "learning_rate": 0.00015321288747217147, "loss": 11.6586, "step": 46081 }, { "epoch": 0.9646236289039605, "grad_norm": 0.3445078134536743, "learning_rate": 0.00015321103112211142, "loss": 11.6674, "step": 46082 }, { "epoch": 0.9646445616679227, "grad_norm": 0.4078635573387146, "learning_rate": 0.00015320917474647176, "loss": 11.6851, "step": 46083 }, { "epoch": 0.9646654944318848, "grad_norm": 0.3620360493659973, "learning_rate": 0.00015320731834525338, "loss": 11.6919, "step": 46084 }, { "epoch": 0.964686427195847, "grad_norm": 0.260309636592865, "learning_rate": 0.00015320546191845716, "loss": 11.6652, "step": 46085 }, { "epoch": 0.964707359959809, "grad_norm": 0.39259567856788635, "learning_rate": 0.000153203605466084, "loss": 11.6529, "step": 46086 }, { "epoch": 0.9647282927237713, "grad_norm": 0.3069480061531067, "learning_rate": 0.00015320174898813483, "loss": 11.6629, "step": 46087 }, { "epoch": 0.9647492254877335, "grad_norm": 0.3242837190628052, "learning_rate": 0.00015319989248461052, "loss": 11.6497, "step": 46088 }, { "epoch": 0.9647701582516955, "grad_norm": 0.301395982503891, "learning_rate": 0.0001531980359555119, "loss": 11.6539, "step": 46089 }, { "epoch": 0.9647910910156577, "grad_norm": 0.3772019147872925, "learning_rate": 0.00015319617940084, "loss": 11.6902, "step": 46090 }, { "epoch": 0.9648120237796198, "grad_norm": 0.32501694560050964, "learning_rate": 0.00015319432282059556, "loss": 11.6821, "step": 46091 }, { "epoch": 0.964832956543582, "grad_norm": 0.40950968861579895, "learning_rate": 0.00015319246621477953, "loss": 11.6508, "step": 46092 }, { "epoch": 0.9648538893075441, "grad_norm": 0.27914512157440186, "learning_rate": 0.0001531906095833929, "loss": 11.6699, "step": 46093 }, { "epoch": 0.9648748220715063, "grad_norm": 0.3001295030117035, "learning_rate": 0.0001531887529264364, "loss": 11.6729, "step": 46094 }, { "epoch": 0.9648957548354685, "grad_norm": 0.3097839653491974, "learning_rate": 0.00015318689624391104, "loss": 11.6643, "step": 46095 }, { "epoch": 0.9649166875994306, "grad_norm": 0.3717504143714905, "learning_rate": 0.00015318503953581767, "loss": 11.6716, "step": 46096 }, { "epoch": 0.9649376203633928, "grad_norm": 0.33291205763816833, "learning_rate": 0.0001531831828021572, "loss": 11.6669, "step": 46097 }, { "epoch": 0.9649585531273549, "grad_norm": 0.3106277287006378, "learning_rate": 0.00015318132604293048, "loss": 11.6682, "step": 46098 }, { "epoch": 0.9649794858913171, "grad_norm": 0.34206467866897583, "learning_rate": 0.00015317946925813843, "loss": 11.66, "step": 46099 }, { "epoch": 0.9650004186552792, "grad_norm": 0.4374544620513916, "learning_rate": 0.00015317761244778194, "loss": 11.6846, "step": 46100 }, { "epoch": 0.9650213514192414, "grad_norm": 0.27396437525749207, "learning_rate": 0.00015317575561186192, "loss": 11.6703, "step": 46101 }, { "epoch": 0.9650422841832036, "grad_norm": 0.2914566397666931, "learning_rate": 0.00015317389875037926, "loss": 11.665, "step": 46102 }, { "epoch": 0.9650632169471657, "grad_norm": 0.40123939514160156, "learning_rate": 0.00015317204186333482, "loss": 11.6593, "step": 46103 }, { "epoch": 0.9650841497111279, "grad_norm": 0.2931816279888153, "learning_rate": 0.00015317018495072954, "loss": 11.6678, "step": 46104 }, { "epoch": 0.96510508247509, "grad_norm": 0.2537984848022461, "learning_rate": 0.00015316832801256423, "loss": 11.6579, "step": 46105 }, { "epoch": 0.9651260152390522, "grad_norm": 0.27930381894111633, "learning_rate": 0.0001531664710488399, "loss": 11.6702, "step": 46106 }, { "epoch": 0.9651469480030144, "grad_norm": 0.3844588100910187, "learning_rate": 0.00015316461405955737, "loss": 11.6723, "step": 46107 }, { "epoch": 0.9651678807669765, "grad_norm": 0.2771715819835663, "learning_rate": 0.00015316275704471753, "loss": 11.6812, "step": 46108 }, { "epoch": 0.9651888135309387, "grad_norm": 0.33907783031463623, "learning_rate": 0.00015316090000432126, "loss": 11.6786, "step": 46109 }, { "epoch": 0.9652097462949008, "grad_norm": 0.28452932834625244, "learning_rate": 0.00015315904293836954, "loss": 11.6713, "step": 46110 }, { "epoch": 0.965230679058863, "grad_norm": 0.32076501846313477, "learning_rate": 0.00015315718584686316, "loss": 11.6583, "step": 46111 }, { "epoch": 0.965251611822825, "grad_norm": 0.25465089082717896, "learning_rate": 0.0001531553287298031, "loss": 11.6678, "step": 46112 }, { "epoch": 0.9652725445867872, "grad_norm": 0.3243717551231384, "learning_rate": 0.00015315347158719014, "loss": 11.6588, "step": 46113 }, { "epoch": 0.9652934773507494, "grad_norm": 0.25951912999153137, "learning_rate": 0.0001531516144190253, "loss": 11.6641, "step": 46114 }, { "epoch": 0.9653144101147115, "grad_norm": 0.3263271450996399, "learning_rate": 0.00015314975722530939, "loss": 11.67, "step": 46115 }, { "epoch": 0.9653353428786737, "grad_norm": 0.25408369302749634, "learning_rate": 0.00015314790000604331, "loss": 11.6912, "step": 46116 }, { "epoch": 0.9653562756426358, "grad_norm": 0.3279326558113098, "learning_rate": 0.00015314604276122802, "loss": 11.6913, "step": 46117 }, { "epoch": 0.965377208406598, "grad_norm": 0.274059534072876, "learning_rate": 0.0001531441854908643, "loss": 11.6642, "step": 46118 }, { "epoch": 0.9653981411705601, "grad_norm": 0.39366263151168823, "learning_rate": 0.00015314232819495317, "loss": 11.6842, "step": 46119 }, { "epoch": 0.9654190739345223, "grad_norm": 0.3295731544494629, "learning_rate": 0.0001531404708734954, "loss": 11.6852, "step": 46120 }, { "epoch": 0.9654400066984845, "grad_norm": 0.2681540548801422, "learning_rate": 0.00015313861352649196, "loss": 11.6594, "step": 46121 }, { "epoch": 0.9654609394624466, "grad_norm": 0.2922087609767914, "learning_rate": 0.00015313675615394375, "loss": 11.6756, "step": 46122 }, { "epoch": 0.9654818722264088, "grad_norm": 0.3711501955986023, "learning_rate": 0.0001531348987558516, "loss": 11.6657, "step": 46123 }, { "epoch": 0.9655028049903709, "grad_norm": 0.28350207209587097, "learning_rate": 0.00015313304133221647, "loss": 11.6759, "step": 46124 }, { "epoch": 0.9655237377543331, "grad_norm": 0.3078969717025757, "learning_rate": 0.0001531311838830392, "loss": 11.6678, "step": 46125 }, { "epoch": 0.9655446705182953, "grad_norm": 0.2784491777420044, "learning_rate": 0.0001531293264083207, "loss": 11.6665, "step": 46126 }, { "epoch": 0.9655656032822574, "grad_norm": 0.3223415017127991, "learning_rate": 0.0001531274689080619, "loss": 11.6689, "step": 46127 }, { "epoch": 0.9655865360462196, "grad_norm": 0.29542285203933716, "learning_rate": 0.00015312561138226363, "loss": 11.6617, "step": 46128 }, { "epoch": 0.9656074688101817, "grad_norm": 0.35200974345207214, "learning_rate": 0.00015312375383092684, "loss": 11.6629, "step": 46129 }, { "epoch": 0.9656284015741439, "grad_norm": 0.288126140832901, "learning_rate": 0.00015312189625405235, "loss": 11.6745, "step": 46130 }, { "epoch": 0.965649334338106, "grad_norm": 0.2834486663341522, "learning_rate": 0.00015312003865164113, "loss": 11.66, "step": 46131 }, { "epoch": 0.9656702671020682, "grad_norm": 0.29786238074302673, "learning_rate": 0.00015311818102369405, "loss": 11.6596, "step": 46132 }, { "epoch": 0.9656911998660304, "grad_norm": 0.3274131715297699, "learning_rate": 0.00015311632337021197, "loss": 11.6669, "step": 46133 }, { "epoch": 0.9657121326299924, "grad_norm": 0.3836681544780731, "learning_rate": 0.00015311446569119585, "loss": 11.6835, "step": 46134 }, { "epoch": 0.9657330653939546, "grad_norm": 0.3868199288845062, "learning_rate": 0.0001531126079866465, "loss": 11.664, "step": 46135 }, { "epoch": 0.9657539981579167, "grad_norm": 0.42099857330322266, "learning_rate": 0.0001531107502565649, "loss": 11.6648, "step": 46136 }, { "epoch": 0.9657749309218789, "grad_norm": 0.36609935760498047, "learning_rate": 0.00015310889250095189, "loss": 11.6731, "step": 46137 }, { "epoch": 0.965795863685841, "grad_norm": 0.3421635925769806, "learning_rate": 0.00015310703471980834, "loss": 11.6721, "step": 46138 }, { "epoch": 0.9658167964498032, "grad_norm": 0.35266923904418945, "learning_rate": 0.00015310517691313516, "loss": 11.6801, "step": 46139 }, { "epoch": 0.9658377292137654, "grad_norm": 0.44778820872306824, "learning_rate": 0.0001531033190809333, "loss": 11.6773, "step": 46140 }, { "epoch": 0.9658586619777275, "grad_norm": 0.32023563981056213, "learning_rate": 0.00015310146122320361, "loss": 11.6559, "step": 46141 }, { "epoch": 0.9658795947416897, "grad_norm": 0.27030837535858154, "learning_rate": 0.00015309960333994695, "loss": 11.6766, "step": 46142 }, { "epoch": 0.9659005275056518, "grad_norm": 0.28829747438430786, "learning_rate": 0.00015309774543116427, "loss": 11.6717, "step": 46143 }, { "epoch": 0.965921460269614, "grad_norm": 0.2687898576259613, "learning_rate": 0.00015309588749685643, "loss": 11.6788, "step": 46144 }, { "epoch": 0.9659423930335762, "grad_norm": 0.30887579917907715, "learning_rate": 0.00015309402953702435, "loss": 11.6677, "step": 46145 }, { "epoch": 0.9659633257975383, "grad_norm": 0.26788267493247986, "learning_rate": 0.0001530921715516689, "loss": 11.6726, "step": 46146 }, { "epoch": 0.9659842585615005, "grad_norm": 0.4144405722618103, "learning_rate": 0.00015309031354079097, "loss": 11.6866, "step": 46147 }, { "epoch": 0.9660051913254626, "grad_norm": 0.3693891763687134, "learning_rate": 0.00015308845550439146, "loss": 11.6703, "step": 46148 }, { "epoch": 0.9660261240894248, "grad_norm": 0.3028056025505066, "learning_rate": 0.00015308659744247129, "loss": 11.6779, "step": 46149 }, { "epoch": 0.9660470568533869, "grad_norm": 0.29290831089019775, "learning_rate": 0.0001530847393550313, "loss": 11.6539, "step": 46150 }, { "epoch": 0.9660679896173491, "grad_norm": 0.24769893288612366, "learning_rate": 0.00015308288124207242, "loss": 11.6655, "step": 46151 }, { "epoch": 0.9660889223813113, "grad_norm": 0.300269216299057, "learning_rate": 0.00015308102310359554, "loss": 11.6678, "step": 46152 }, { "epoch": 0.9661098551452734, "grad_norm": 0.306001216173172, "learning_rate": 0.00015307916493960156, "loss": 11.674, "step": 46153 }, { "epoch": 0.9661307879092356, "grad_norm": 0.369772344827652, "learning_rate": 0.00015307730675009134, "loss": 11.6722, "step": 46154 }, { "epoch": 0.9661517206731977, "grad_norm": 0.3340747058391571, "learning_rate": 0.0001530754485350658, "loss": 11.6711, "step": 46155 }, { "epoch": 0.9661726534371599, "grad_norm": 0.32452455163002014, "learning_rate": 0.00015307359029452581, "loss": 11.6671, "step": 46156 }, { "epoch": 0.966193586201122, "grad_norm": 0.320743590593338, "learning_rate": 0.0001530717320284723, "loss": 11.6706, "step": 46157 }, { "epoch": 0.9662145189650841, "grad_norm": 0.30125460028648376, "learning_rate": 0.00015306987373690616, "loss": 11.6715, "step": 46158 }, { "epoch": 0.9662354517290463, "grad_norm": 0.2795819044113159, "learning_rate": 0.00015306801541982825, "loss": 11.6602, "step": 46159 }, { "epoch": 0.9662563844930084, "grad_norm": 0.39622941613197327, "learning_rate": 0.0001530661570772395, "loss": 11.6674, "step": 46160 }, { "epoch": 0.9662773172569706, "grad_norm": 0.3258615732192993, "learning_rate": 0.00015306429870914077, "loss": 11.6847, "step": 46161 }, { "epoch": 0.9662982500209327, "grad_norm": 0.2820683419704437, "learning_rate": 0.00015306244031553296, "loss": 11.6632, "step": 46162 }, { "epoch": 0.9663191827848949, "grad_norm": 0.2407132089138031, "learning_rate": 0.00015306058189641698, "loss": 11.6742, "step": 46163 }, { "epoch": 0.9663401155488571, "grad_norm": 0.3222094774246216, "learning_rate": 0.00015305872345179372, "loss": 11.677, "step": 46164 }, { "epoch": 0.9663610483128192, "grad_norm": 0.28928643465042114, "learning_rate": 0.00015305686498166406, "loss": 11.6622, "step": 46165 }, { "epoch": 0.9663819810767814, "grad_norm": 0.33027899265289307, "learning_rate": 0.0001530550064860289, "loss": 11.6566, "step": 46166 }, { "epoch": 0.9664029138407435, "grad_norm": 0.3646310865879059, "learning_rate": 0.00015305314796488917, "loss": 11.657, "step": 46167 }, { "epoch": 0.9664238466047057, "grad_norm": 0.3308848738670349, "learning_rate": 0.00015305128941824569, "loss": 11.6785, "step": 46168 }, { "epoch": 0.9664447793686678, "grad_norm": 0.25377970933914185, "learning_rate": 0.0001530494308460994, "loss": 11.6597, "step": 46169 }, { "epoch": 0.96646571213263, "grad_norm": 0.3620997369289398, "learning_rate": 0.0001530475722484512, "loss": 11.6567, "step": 46170 }, { "epoch": 0.9664866448965922, "grad_norm": 0.33502888679504395, "learning_rate": 0.00015304571362530193, "loss": 11.6863, "step": 46171 }, { "epoch": 0.9665075776605543, "grad_norm": 0.3003799319267273, "learning_rate": 0.00015304385497665255, "loss": 11.6736, "step": 46172 }, { "epoch": 0.9665285104245165, "grad_norm": 0.37735992670059204, "learning_rate": 0.00015304199630250396, "loss": 11.6644, "step": 46173 }, { "epoch": 0.9665494431884786, "grad_norm": 0.3621537983417511, "learning_rate": 0.00015304013760285696, "loss": 11.6846, "step": 46174 }, { "epoch": 0.9665703759524408, "grad_norm": 0.2659449577331543, "learning_rate": 0.00015303827887771252, "loss": 11.6672, "step": 46175 }, { "epoch": 0.9665913087164029, "grad_norm": 0.25930485129356384, "learning_rate": 0.00015303642012707154, "loss": 11.6505, "step": 46176 }, { "epoch": 0.9666122414803651, "grad_norm": 0.28832578659057617, "learning_rate": 0.00015303456135093486, "loss": 11.6848, "step": 46177 }, { "epoch": 0.9666331742443273, "grad_norm": 0.38658758997917175, "learning_rate": 0.00015303270254930342, "loss": 11.6765, "step": 46178 }, { "epoch": 0.9666541070082894, "grad_norm": 0.3022553324699402, "learning_rate": 0.00015303084372217808, "loss": 11.6592, "step": 46179 }, { "epoch": 0.9666750397722516, "grad_norm": 0.3422356843948364, "learning_rate": 0.00015302898486955978, "loss": 11.6707, "step": 46180 }, { "epoch": 0.9666959725362136, "grad_norm": 0.26368874311447144, "learning_rate": 0.00015302712599144937, "loss": 11.6675, "step": 46181 }, { "epoch": 0.9667169053001758, "grad_norm": 0.33904096484184265, "learning_rate": 0.00015302526708784776, "loss": 11.6867, "step": 46182 }, { "epoch": 0.966737838064138, "grad_norm": 0.33133968710899353, "learning_rate": 0.00015302340815875583, "loss": 11.6793, "step": 46183 }, { "epoch": 0.9667587708281001, "grad_norm": 0.38265106081962585, "learning_rate": 0.0001530215492041745, "loss": 11.6782, "step": 46184 }, { "epoch": 0.9667797035920623, "grad_norm": 0.28305789828300476, "learning_rate": 0.00015301969022410466, "loss": 11.6679, "step": 46185 }, { "epoch": 0.9668006363560244, "grad_norm": 0.2972482740879059, "learning_rate": 0.00015301783121854715, "loss": 11.662, "step": 46186 }, { "epoch": 0.9668215691199866, "grad_norm": 0.2658110558986664, "learning_rate": 0.00015301597218750294, "loss": 11.6752, "step": 46187 }, { "epoch": 0.9668425018839487, "grad_norm": 0.2821478247642517, "learning_rate": 0.0001530141131309729, "loss": 11.669, "step": 46188 }, { "epoch": 0.9668634346479109, "grad_norm": 0.31060677766799927, "learning_rate": 0.00015301225404895787, "loss": 11.6761, "step": 46189 }, { "epoch": 0.9668843674118731, "grad_norm": 0.3828558921813965, "learning_rate": 0.00015301039494145882, "loss": 11.6782, "step": 46190 }, { "epoch": 0.9669053001758352, "grad_norm": 0.2665886878967285, "learning_rate": 0.0001530085358084766, "loss": 11.6624, "step": 46191 }, { "epoch": 0.9669262329397974, "grad_norm": 1.9685615301132202, "learning_rate": 0.00015300667665001208, "loss": 11.6227, "step": 46192 }, { "epoch": 0.9669471657037595, "grad_norm": 0.3353733420372009, "learning_rate": 0.00015300481746606626, "loss": 11.6692, "step": 46193 }, { "epoch": 0.9669680984677217, "grad_norm": 0.31380966305732727, "learning_rate": 0.0001530029582566399, "loss": 11.6683, "step": 46194 }, { "epoch": 0.9669890312316838, "grad_norm": 0.2378253936767578, "learning_rate": 0.000153001099021734, "loss": 11.6494, "step": 46195 }, { "epoch": 0.967009963995646, "grad_norm": 0.303011953830719, "learning_rate": 0.0001529992397613494, "loss": 11.6632, "step": 46196 }, { "epoch": 0.9670308967596082, "grad_norm": 0.46639159321784973, "learning_rate": 0.00015299738047548698, "loss": 11.6946, "step": 46197 }, { "epoch": 0.9670518295235703, "grad_norm": 0.28999972343444824, "learning_rate": 0.00015299552116414768, "loss": 11.6671, "step": 46198 }, { "epoch": 0.9670727622875325, "grad_norm": 0.44622230529785156, "learning_rate": 0.00015299366182733233, "loss": 11.6662, "step": 46199 }, { "epoch": 0.9670936950514946, "grad_norm": 0.27266520261764526, "learning_rate": 0.00015299180246504195, "loss": 11.6738, "step": 46200 }, { "epoch": 0.9671146278154568, "grad_norm": 0.2903074026107788, "learning_rate": 0.00015298994307727726, "loss": 11.6621, "step": 46201 }, { "epoch": 0.9671355605794189, "grad_norm": 0.33816203474998474, "learning_rate": 0.0001529880836640393, "loss": 11.6626, "step": 46202 }, { "epoch": 0.967156493343381, "grad_norm": 0.2951582074165344, "learning_rate": 0.0001529862242253289, "loss": 11.6675, "step": 46203 }, { "epoch": 0.9671774261073433, "grad_norm": 0.2691488265991211, "learning_rate": 0.00015298436476114693, "loss": 11.6395, "step": 46204 }, { "epoch": 0.9671983588713053, "grad_norm": 0.41952750086784363, "learning_rate": 0.00015298250527149434, "loss": 11.6865, "step": 46205 }, { "epoch": 0.9672192916352675, "grad_norm": 0.31258425116539, "learning_rate": 0.00015298064575637198, "loss": 11.6641, "step": 46206 }, { "epoch": 0.9672402243992296, "grad_norm": 0.2947213351726532, "learning_rate": 0.00015297878621578078, "loss": 11.6638, "step": 46207 }, { "epoch": 0.9672611571631918, "grad_norm": 0.32192546129226685, "learning_rate": 0.0001529769266497216, "loss": 11.6746, "step": 46208 }, { "epoch": 0.967282089927154, "grad_norm": 0.2859971821308136, "learning_rate": 0.00015297506705819536, "loss": 11.6689, "step": 46209 }, { "epoch": 0.9673030226911161, "grad_norm": 0.35318511724472046, "learning_rate": 0.00015297320744120293, "loss": 11.678, "step": 46210 }, { "epoch": 0.9673239554550783, "grad_norm": 0.35387349128723145, "learning_rate": 0.0001529713477987452, "loss": 11.6686, "step": 46211 }, { "epoch": 0.9673448882190404, "grad_norm": 0.5348169803619385, "learning_rate": 0.00015296948813082315, "loss": 11.6827, "step": 46212 }, { "epoch": 0.9673658209830026, "grad_norm": 0.2954516112804413, "learning_rate": 0.00015296762843743756, "loss": 11.6637, "step": 46213 }, { "epoch": 0.9673867537469647, "grad_norm": 0.27983978390693665, "learning_rate": 0.00015296576871858935, "loss": 11.6772, "step": 46214 }, { "epoch": 0.9674076865109269, "grad_norm": 0.2625471353530884, "learning_rate": 0.00015296390897427946, "loss": 11.6672, "step": 46215 }, { "epoch": 0.9674286192748891, "grad_norm": 0.3187777101993561, "learning_rate": 0.00015296204920450874, "loss": 11.6751, "step": 46216 }, { "epoch": 0.9674495520388512, "grad_norm": 0.28171923756599426, "learning_rate": 0.0001529601894092781, "loss": 11.671, "step": 46217 }, { "epoch": 0.9674704848028134, "grad_norm": 0.2555297613143921, "learning_rate": 0.00015295832958858849, "loss": 11.6675, "step": 46218 }, { "epoch": 0.9674914175667755, "grad_norm": 0.2920185923576355, "learning_rate": 0.00015295646974244068, "loss": 11.6759, "step": 46219 }, { "epoch": 0.9675123503307377, "grad_norm": 0.3166014850139618, "learning_rate": 0.0001529546098708357, "loss": 11.6615, "step": 46220 }, { "epoch": 0.9675332830946998, "grad_norm": 0.3136788010597229, "learning_rate": 0.0001529527499737743, "loss": 11.6603, "step": 46221 }, { "epoch": 0.967554215858662, "grad_norm": 0.3289880156517029, "learning_rate": 0.0001529508900512575, "loss": 11.6725, "step": 46222 }, { "epoch": 0.9675751486226242, "grad_norm": 0.36546051502227783, "learning_rate": 0.00015294903010328617, "loss": 11.6666, "step": 46223 }, { "epoch": 0.9675960813865863, "grad_norm": 0.25552961230278015, "learning_rate": 0.00015294717012986114, "loss": 11.6733, "step": 46224 }, { "epoch": 0.9676170141505485, "grad_norm": 0.2435470074415207, "learning_rate": 0.00015294531013098337, "loss": 11.6811, "step": 46225 }, { "epoch": 0.9676379469145105, "grad_norm": 0.25982969999313354, "learning_rate": 0.0001529434501066537, "loss": 11.6741, "step": 46226 }, { "epoch": 0.9676588796784727, "grad_norm": 0.43803972005844116, "learning_rate": 0.00015294159005687304, "loss": 11.6641, "step": 46227 }, { "epoch": 0.967679812442435, "grad_norm": 0.3394757807254791, "learning_rate": 0.00015293972998164235, "loss": 11.6653, "step": 46228 }, { "epoch": 0.967700745206397, "grad_norm": 0.46940338611602783, "learning_rate": 0.00015293786988096244, "loss": 11.675, "step": 46229 }, { "epoch": 0.9677216779703592, "grad_norm": 0.32167673110961914, "learning_rate": 0.00015293600975483423, "loss": 11.6823, "step": 46230 }, { "epoch": 0.9677426107343213, "grad_norm": 0.33640873432159424, "learning_rate": 0.00015293414960325862, "loss": 11.6742, "step": 46231 }, { "epoch": 0.9677635434982835, "grad_norm": 0.30358636379241943, "learning_rate": 0.00015293228942623652, "loss": 11.6739, "step": 46232 }, { "epoch": 0.9677844762622456, "grad_norm": 0.3614178001880646, "learning_rate": 0.00015293042922376882, "loss": 11.66, "step": 46233 }, { "epoch": 0.9678054090262078, "grad_norm": 0.2646377384662628, "learning_rate": 0.00015292856899585638, "loss": 11.6526, "step": 46234 }, { "epoch": 0.96782634179017, "grad_norm": 0.32977229356765747, "learning_rate": 0.0001529267087425001, "loss": 11.6591, "step": 46235 }, { "epoch": 0.9678472745541321, "grad_norm": 0.2849961221218109, "learning_rate": 0.0001529248484637009, "loss": 11.6733, "step": 46236 }, { "epoch": 0.9678682073180943, "grad_norm": 0.3192071318626404, "learning_rate": 0.0001529229881594597, "loss": 11.6762, "step": 46237 }, { "epoch": 0.9678891400820564, "grad_norm": 0.3025445342063904, "learning_rate": 0.0001529211278297773, "loss": 11.6637, "step": 46238 }, { "epoch": 0.9679100728460186, "grad_norm": 0.3270930051803589, "learning_rate": 0.00015291926747465468, "loss": 11.6755, "step": 46239 }, { "epoch": 0.9679310056099807, "grad_norm": 0.40180787444114685, "learning_rate": 0.00015291740709409273, "loss": 11.6604, "step": 46240 }, { "epoch": 0.9679519383739429, "grad_norm": 0.3038584291934967, "learning_rate": 0.00015291554668809228, "loss": 11.6657, "step": 46241 }, { "epoch": 0.9679728711379051, "grad_norm": 0.28850170969963074, "learning_rate": 0.0001529136862566543, "loss": 11.6655, "step": 46242 }, { "epoch": 0.9679938039018672, "grad_norm": 0.3134588301181793, "learning_rate": 0.00015291182579977965, "loss": 11.6664, "step": 46243 }, { "epoch": 0.9680147366658294, "grad_norm": 0.298281192779541, "learning_rate": 0.00015290996531746922, "loss": 11.6675, "step": 46244 }, { "epoch": 0.9680356694297915, "grad_norm": 0.26574209332466125, "learning_rate": 0.00015290810480972388, "loss": 11.6786, "step": 46245 }, { "epoch": 0.9680566021937537, "grad_norm": 0.36524948477745056, "learning_rate": 0.0001529062442765446, "loss": 11.6691, "step": 46246 }, { "epoch": 0.9680775349577159, "grad_norm": 0.2799779772758484, "learning_rate": 0.0001529043837179322, "loss": 11.6858, "step": 46247 }, { "epoch": 0.968098467721678, "grad_norm": 0.45218735933303833, "learning_rate": 0.0001529025231338876, "loss": 11.6649, "step": 46248 }, { "epoch": 0.9681194004856402, "grad_norm": 0.5141395926475525, "learning_rate": 0.00015290066252441172, "loss": 11.6688, "step": 46249 }, { "epoch": 0.9681403332496022, "grad_norm": 0.38711023330688477, "learning_rate": 0.0001528988018895054, "loss": 11.6731, "step": 46250 }, { "epoch": 0.9681612660135644, "grad_norm": 0.33964428305625916, "learning_rate": 0.0001528969412291696, "loss": 11.6617, "step": 46251 }, { "epoch": 0.9681821987775265, "grad_norm": 0.3387938439846039, "learning_rate": 0.00015289508054340515, "loss": 11.6877, "step": 46252 }, { "epoch": 0.9682031315414887, "grad_norm": 0.4009164869785309, "learning_rate": 0.00015289321983221298, "loss": 11.6772, "step": 46253 }, { "epoch": 0.9682240643054509, "grad_norm": 0.2828618884086609, "learning_rate": 0.000152891359095594, "loss": 11.6554, "step": 46254 }, { "epoch": 0.968244997069413, "grad_norm": 0.27999716997146606, "learning_rate": 0.00015288949833354906, "loss": 11.6841, "step": 46255 }, { "epoch": 0.9682659298333752, "grad_norm": 0.28478938341140747, "learning_rate": 0.00015288763754607912, "loss": 11.6766, "step": 46256 }, { "epoch": 0.9682868625973373, "grad_norm": 0.3023095726966858, "learning_rate": 0.00015288577673318497, "loss": 11.6743, "step": 46257 }, { "epoch": 0.9683077953612995, "grad_norm": 0.34290817379951477, "learning_rate": 0.0001528839158948676, "loss": 11.6776, "step": 46258 }, { "epoch": 0.9683287281252616, "grad_norm": 0.27148446440696716, "learning_rate": 0.0001528820550311279, "loss": 11.6768, "step": 46259 }, { "epoch": 0.9683496608892238, "grad_norm": 0.3973119258880615, "learning_rate": 0.00015288019414196668, "loss": 11.6731, "step": 46260 }, { "epoch": 0.968370593653186, "grad_norm": 0.3081820607185364, "learning_rate": 0.00015287833322738493, "loss": 11.675, "step": 46261 }, { "epoch": 0.9683915264171481, "grad_norm": 0.36036399006843567, "learning_rate": 0.0001528764722873835, "loss": 11.6967, "step": 46262 }, { "epoch": 0.9684124591811103, "grad_norm": 0.30631881952285767, "learning_rate": 0.00015287461132196327, "loss": 11.6803, "step": 46263 }, { "epoch": 0.9684333919450724, "grad_norm": 0.3083480894565582, "learning_rate": 0.0001528727503311252, "loss": 11.6557, "step": 46264 }, { "epoch": 0.9684543247090346, "grad_norm": 0.3540806174278259, "learning_rate": 0.00015287088931487008, "loss": 11.6743, "step": 46265 }, { "epoch": 0.9684752574729968, "grad_norm": 0.2654513716697693, "learning_rate": 0.00015286902827319892, "loss": 11.6607, "step": 46266 }, { "epoch": 0.9684961902369589, "grad_norm": 0.30481278896331787, "learning_rate": 0.0001528671672061125, "loss": 11.6782, "step": 46267 }, { "epoch": 0.9685171230009211, "grad_norm": 0.28626739978790283, "learning_rate": 0.0001528653061136118, "loss": 11.6546, "step": 46268 }, { "epoch": 0.9685380557648832, "grad_norm": 0.37209951877593994, "learning_rate": 0.0001528634449956977, "loss": 11.6735, "step": 46269 }, { "epoch": 0.9685589885288454, "grad_norm": 0.33847710490226746, "learning_rate": 0.00015286158385237108, "loss": 11.7051, "step": 46270 }, { "epoch": 0.9685799212928075, "grad_norm": 0.3067794144153595, "learning_rate": 0.00015285972268363283, "loss": 11.6604, "step": 46271 }, { "epoch": 0.9686008540567697, "grad_norm": 0.2850169837474823, "learning_rate": 0.00015285786148948387, "loss": 11.6749, "step": 46272 }, { "epoch": 0.9686217868207319, "grad_norm": 0.3523778021335602, "learning_rate": 0.00015285600026992505, "loss": 11.6796, "step": 46273 }, { "epoch": 0.968642719584694, "grad_norm": 0.3142007887363434, "learning_rate": 0.0001528541390249573, "loss": 11.6452, "step": 46274 }, { "epoch": 0.9686636523486561, "grad_norm": 0.3680524230003357, "learning_rate": 0.00015285227775458152, "loss": 11.6739, "step": 46275 }, { "epoch": 0.9686845851126182, "grad_norm": 0.30768051743507385, "learning_rate": 0.00015285041645879857, "loss": 11.6594, "step": 46276 }, { "epoch": 0.9687055178765804, "grad_norm": 0.32422682642936707, "learning_rate": 0.00015284855513760935, "loss": 11.6798, "step": 46277 }, { "epoch": 0.9687264506405425, "grad_norm": 0.27760833501815796, "learning_rate": 0.0001528466937910148, "loss": 11.665, "step": 46278 }, { "epoch": 0.9687473834045047, "grad_norm": 0.32386571168899536, "learning_rate": 0.00015284483241901579, "loss": 11.6745, "step": 46279 }, { "epoch": 0.9687683161684669, "grad_norm": 0.28137603402137756, "learning_rate": 0.0001528429710216132, "loss": 11.6696, "step": 46280 }, { "epoch": 0.968789248932429, "grad_norm": 0.3437386751174927, "learning_rate": 0.00015284110959880794, "loss": 11.6512, "step": 46281 }, { "epoch": 0.9688101816963912, "grad_norm": 0.4249044954776764, "learning_rate": 0.0001528392481506009, "loss": 11.6715, "step": 46282 }, { "epoch": 0.9688311144603533, "grad_norm": 0.423974871635437, "learning_rate": 0.00015283738667699296, "loss": 11.6628, "step": 46283 }, { "epoch": 0.9688520472243155, "grad_norm": 0.37463143467903137, "learning_rate": 0.00015283552517798504, "loss": 11.6869, "step": 46284 }, { "epoch": 0.9688729799882777, "grad_norm": 0.36448296904563904, "learning_rate": 0.00015283366365357806, "loss": 11.6794, "step": 46285 }, { "epoch": 0.9688939127522398, "grad_norm": 0.32021793723106384, "learning_rate": 0.0001528318021037728, "loss": 11.6727, "step": 46286 }, { "epoch": 0.968914845516202, "grad_norm": 0.2474207878112793, "learning_rate": 0.0001528299405285703, "loss": 11.6557, "step": 46287 }, { "epoch": 0.9689357782801641, "grad_norm": 0.23646745085716248, "learning_rate": 0.00015282807892797134, "loss": 11.6649, "step": 46288 }, { "epoch": 0.9689567110441263, "grad_norm": 0.27220606803894043, "learning_rate": 0.0001528262173019769, "loss": 11.6465, "step": 46289 }, { "epoch": 0.9689776438080884, "grad_norm": 0.32883381843566895, "learning_rate": 0.00015282435565058784, "loss": 11.6564, "step": 46290 }, { "epoch": 0.9689985765720506, "grad_norm": 0.34201446175575256, "learning_rate": 0.00015282249397380502, "loss": 11.668, "step": 46291 }, { "epoch": 0.9690195093360128, "grad_norm": 0.3029378652572632, "learning_rate": 0.0001528206322716294, "loss": 11.6677, "step": 46292 }, { "epoch": 0.9690404420999749, "grad_norm": 0.39971670508384705, "learning_rate": 0.00015281877054406184, "loss": 11.6519, "step": 46293 }, { "epoch": 0.9690613748639371, "grad_norm": 0.23748937249183655, "learning_rate": 0.00015281690879110323, "loss": 11.6659, "step": 46294 }, { "epoch": 0.9690823076278992, "grad_norm": 0.32201245427131653, "learning_rate": 0.00015281504701275447, "loss": 11.6703, "step": 46295 }, { "epoch": 0.9691032403918614, "grad_norm": 0.2953391969203949, "learning_rate": 0.00015281318520901646, "loss": 11.6712, "step": 46296 }, { "epoch": 0.9691241731558234, "grad_norm": 0.2948901653289795, "learning_rate": 0.0001528113233798901, "loss": 11.6729, "step": 46297 }, { "epoch": 0.9691451059197856, "grad_norm": 0.2858520448207855, "learning_rate": 0.00015280946152537627, "loss": 11.6704, "step": 46298 }, { "epoch": 0.9691660386837478, "grad_norm": 0.3440152108669281, "learning_rate": 0.0001528075996454759, "loss": 11.6616, "step": 46299 }, { "epoch": 0.9691869714477099, "grad_norm": 0.2762269079685211, "learning_rate": 0.00015280573774018982, "loss": 11.6672, "step": 46300 }, { "epoch": 0.9692079042116721, "grad_norm": 0.34872713685035706, "learning_rate": 0.00015280387580951898, "loss": 11.676, "step": 46301 }, { "epoch": 0.9692288369756342, "grad_norm": 0.36743199825286865, "learning_rate": 0.00015280201385346425, "loss": 11.6413, "step": 46302 }, { "epoch": 0.9692497697395964, "grad_norm": 0.37868767976760864, "learning_rate": 0.00015280015187202657, "loss": 11.6727, "step": 46303 }, { "epoch": 0.9692707025035586, "grad_norm": 0.3057452142238617, "learning_rate": 0.00015279828986520676, "loss": 11.6861, "step": 46304 }, { "epoch": 0.9692916352675207, "grad_norm": 0.33147984743118286, "learning_rate": 0.00015279642783300575, "loss": 11.6574, "step": 46305 }, { "epoch": 0.9693125680314829, "grad_norm": 0.4105008840560913, "learning_rate": 0.0001527945657754245, "loss": 11.6739, "step": 46306 }, { "epoch": 0.969333500795445, "grad_norm": 0.3540234863758087, "learning_rate": 0.00015279270369246377, "loss": 11.6653, "step": 46307 }, { "epoch": 0.9693544335594072, "grad_norm": 0.38077470660209656, "learning_rate": 0.00015279084158412458, "loss": 11.6757, "step": 46308 }, { "epoch": 0.9693753663233693, "grad_norm": 0.32013171911239624, "learning_rate": 0.00015278897945040774, "loss": 11.6602, "step": 46309 }, { "epoch": 0.9693962990873315, "grad_norm": 0.30969053506851196, "learning_rate": 0.00015278711729131418, "loss": 11.6635, "step": 46310 }, { "epoch": 0.9694172318512937, "grad_norm": 0.2958069443702698, "learning_rate": 0.00015278525510684484, "loss": 11.6653, "step": 46311 }, { "epoch": 0.9694381646152558, "grad_norm": 0.2577861547470093, "learning_rate": 0.00015278339289700053, "loss": 11.654, "step": 46312 }, { "epoch": 0.969459097379218, "grad_norm": 0.35245227813720703, "learning_rate": 0.00015278153066178218, "loss": 11.679, "step": 46313 }, { "epoch": 0.9694800301431801, "grad_norm": 0.3460237383842468, "learning_rate": 0.0001527796684011907, "loss": 11.6593, "step": 46314 }, { "epoch": 0.9695009629071423, "grad_norm": 0.3015361428260803, "learning_rate": 0.000152777806115227, "loss": 11.6812, "step": 46315 }, { "epoch": 0.9695218956711044, "grad_norm": 0.2968744933605194, "learning_rate": 0.0001527759438038919, "loss": 11.6625, "step": 46316 }, { "epoch": 0.9695428284350666, "grad_norm": 0.28656405210494995, "learning_rate": 0.0001527740814671864, "loss": 11.6672, "step": 46317 }, { "epoch": 0.9695637611990288, "grad_norm": 0.2859829068183899, "learning_rate": 0.0001527722191051113, "loss": 11.6693, "step": 46318 }, { "epoch": 0.9695846939629909, "grad_norm": 0.30067381262779236, "learning_rate": 0.00015277035671766757, "loss": 11.657, "step": 46319 }, { "epoch": 0.969605626726953, "grad_norm": 0.3189783990383148, "learning_rate": 0.00015276849430485606, "loss": 11.6575, "step": 46320 }, { "epoch": 0.9696265594909151, "grad_norm": 0.36524996161460876, "learning_rate": 0.00015276663186667767, "loss": 11.6715, "step": 46321 }, { "epoch": 0.9696474922548773, "grad_norm": 0.28235313296318054, "learning_rate": 0.00015276476940313333, "loss": 11.6513, "step": 46322 }, { "epoch": 0.9696684250188395, "grad_norm": 0.27283596992492676, "learning_rate": 0.00015276290691422387, "loss": 11.6774, "step": 46323 }, { "epoch": 0.9696893577828016, "grad_norm": 0.2882366478443146, "learning_rate": 0.00015276104439995022, "loss": 11.6796, "step": 46324 }, { "epoch": 0.9697102905467638, "grad_norm": 0.2862095236778259, "learning_rate": 0.0001527591818603133, "loss": 11.6619, "step": 46325 }, { "epoch": 0.9697312233107259, "grad_norm": 0.4317946434020996, "learning_rate": 0.00015275731929531399, "loss": 11.6764, "step": 46326 }, { "epoch": 0.9697521560746881, "grad_norm": 0.3695780336856842, "learning_rate": 0.00015275545670495318, "loss": 11.6614, "step": 46327 }, { "epoch": 0.9697730888386502, "grad_norm": 0.33483636379241943, "learning_rate": 0.00015275359408923174, "loss": 11.685, "step": 46328 }, { "epoch": 0.9697940216026124, "grad_norm": 0.3521924912929535, "learning_rate": 0.0001527517314481506, "loss": 11.6596, "step": 46329 }, { "epoch": 0.9698149543665746, "grad_norm": 0.3259478509426117, "learning_rate": 0.00015274986878171065, "loss": 11.6571, "step": 46330 }, { "epoch": 0.9698358871305367, "grad_norm": 0.3148746192455292, "learning_rate": 0.00015274800608991278, "loss": 11.6729, "step": 46331 }, { "epoch": 0.9698568198944989, "grad_norm": 0.2653687298297882, "learning_rate": 0.0001527461433727579, "loss": 11.662, "step": 46332 }, { "epoch": 0.969877752658461, "grad_norm": 0.2939542829990387, "learning_rate": 0.00015274428063024686, "loss": 11.6627, "step": 46333 }, { "epoch": 0.9698986854224232, "grad_norm": 0.29325324296951294, "learning_rate": 0.0001527424178623806, "loss": 11.6625, "step": 46334 }, { "epoch": 0.9699196181863853, "grad_norm": 0.3061622083187103, "learning_rate": 0.00015274055506916002, "loss": 11.6691, "step": 46335 }, { "epoch": 0.9699405509503475, "grad_norm": 0.346094012260437, "learning_rate": 0.00015273869225058598, "loss": 11.6757, "step": 46336 }, { "epoch": 0.9699614837143097, "grad_norm": 0.37654319405555725, "learning_rate": 0.00015273682940665942, "loss": 11.6754, "step": 46337 }, { "epoch": 0.9699824164782718, "grad_norm": 0.3056805729866028, "learning_rate": 0.00015273496653738118, "loss": 11.6694, "step": 46338 }, { "epoch": 0.970003349242234, "grad_norm": 0.35701125860214233, "learning_rate": 0.0001527331036427522, "loss": 11.6756, "step": 46339 }, { "epoch": 0.9700242820061961, "grad_norm": 0.30436378717422485, "learning_rate": 0.00015273124072277336, "loss": 11.6403, "step": 46340 }, { "epoch": 0.9700452147701583, "grad_norm": 0.29796281456947327, "learning_rate": 0.00015272937777744552, "loss": 11.6758, "step": 46341 }, { "epoch": 0.9700661475341205, "grad_norm": 0.3336816728115082, "learning_rate": 0.00015272751480676969, "loss": 11.676, "step": 46342 }, { "epoch": 0.9700870802980825, "grad_norm": 0.33903926610946655, "learning_rate": 0.00015272565181074664, "loss": 11.6629, "step": 46343 }, { "epoch": 0.9701080130620447, "grad_norm": 0.39170515537261963, "learning_rate": 0.00015272378878937733, "loss": 11.6686, "step": 46344 }, { "epoch": 0.9701289458260068, "grad_norm": 0.3440993130207062, "learning_rate": 0.0001527219257426626, "loss": 11.6856, "step": 46345 }, { "epoch": 0.970149878589969, "grad_norm": 0.3669748306274414, "learning_rate": 0.0001527200626706034, "loss": 11.6628, "step": 46346 }, { "epoch": 0.9701708113539311, "grad_norm": 0.35148608684539795, "learning_rate": 0.00015271819957320061, "loss": 11.664, "step": 46347 }, { "epoch": 0.9701917441178933, "grad_norm": 0.30951768159866333, "learning_rate": 0.00015271633645045512, "loss": 11.6868, "step": 46348 }, { "epoch": 0.9702126768818555, "grad_norm": 0.30935975909233093, "learning_rate": 0.00015271447330236787, "loss": 11.6743, "step": 46349 }, { "epoch": 0.9702336096458176, "grad_norm": 0.26461759209632874, "learning_rate": 0.00015271261012893964, "loss": 11.6749, "step": 46350 }, { "epoch": 0.9702545424097798, "grad_norm": 0.3742107152938843, "learning_rate": 0.00015271074693017146, "loss": 11.6638, "step": 46351 }, { "epoch": 0.9702754751737419, "grad_norm": 0.30805087089538574, "learning_rate": 0.00015270888370606418, "loss": 11.6601, "step": 46352 }, { "epoch": 0.9702964079377041, "grad_norm": 0.4043772518634796, "learning_rate": 0.00015270702045661863, "loss": 11.6626, "step": 46353 }, { "epoch": 0.9703173407016662, "grad_norm": 0.4848022758960724, "learning_rate": 0.00015270515718183582, "loss": 11.6719, "step": 46354 }, { "epoch": 0.9703382734656284, "grad_norm": 0.2768714427947998, "learning_rate": 0.00015270329388171653, "loss": 11.6679, "step": 46355 }, { "epoch": 0.9703592062295906, "grad_norm": 0.29476630687713623, "learning_rate": 0.00015270143055626175, "loss": 11.6595, "step": 46356 }, { "epoch": 0.9703801389935527, "grad_norm": 0.2579876482486725, "learning_rate": 0.0001526995672054723, "loss": 11.6765, "step": 46357 }, { "epoch": 0.9704010717575149, "grad_norm": 0.28180351853370667, "learning_rate": 0.0001526977038293491, "loss": 11.6724, "step": 46358 }, { "epoch": 0.970422004521477, "grad_norm": 0.3463684022426605, "learning_rate": 0.00015269584042789313, "loss": 11.675, "step": 46359 }, { "epoch": 0.9704429372854392, "grad_norm": 0.29781103134155273, "learning_rate": 0.00015269397700110515, "loss": 11.6714, "step": 46360 }, { "epoch": 0.9704638700494014, "grad_norm": 0.27946937084198, "learning_rate": 0.00015269211354898617, "loss": 11.6658, "step": 46361 }, { "epoch": 0.9704848028133635, "grad_norm": 0.3340300917625427, "learning_rate": 0.000152690250071537, "loss": 11.6806, "step": 46362 }, { "epoch": 0.9705057355773257, "grad_norm": 0.3527374267578125, "learning_rate": 0.00015268838656875856, "loss": 11.6864, "step": 46363 }, { "epoch": 0.9705266683412878, "grad_norm": 0.4195197522640228, "learning_rate": 0.00015268652304065177, "loss": 11.672, "step": 46364 }, { "epoch": 0.97054760110525, "grad_norm": 0.2680734694004059, "learning_rate": 0.00015268465948721754, "loss": 11.6428, "step": 46365 }, { "epoch": 0.970568533869212, "grad_norm": 0.31015339493751526, "learning_rate": 0.00015268279590845672, "loss": 11.6782, "step": 46366 }, { "epoch": 0.9705894666331742, "grad_norm": 0.26968827843666077, "learning_rate": 0.00015268093230437022, "loss": 11.6716, "step": 46367 }, { "epoch": 0.9706103993971364, "grad_norm": 0.36182957887649536, "learning_rate": 0.00015267906867495891, "loss": 11.673, "step": 46368 }, { "epoch": 0.9706313321610985, "grad_norm": 0.2963119447231293, "learning_rate": 0.00015267720502022377, "loss": 11.6512, "step": 46369 }, { "epoch": 0.9706522649250607, "grad_norm": 0.31842318177223206, "learning_rate": 0.0001526753413401656, "loss": 11.6821, "step": 46370 }, { "epoch": 0.9706731976890228, "grad_norm": 0.3081853687763214, "learning_rate": 0.00015267347763478535, "loss": 11.6668, "step": 46371 }, { "epoch": 0.970694130452985, "grad_norm": 0.28801438212394714, "learning_rate": 0.0001526716139040839, "loss": 11.6656, "step": 46372 }, { "epoch": 0.9707150632169471, "grad_norm": 0.32016870379447937, "learning_rate": 0.00015266975014806216, "loss": 11.6516, "step": 46373 }, { "epoch": 0.9707359959809093, "grad_norm": 0.314883828163147, "learning_rate": 0.00015266788636672103, "loss": 11.6627, "step": 46374 }, { "epoch": 0.9707569287448715, "grad_norm": 0.3157363831996918, "learning_rate": 0.00015266602256006135, "loss": 11.6851, "step": 46375 }, { "epoch": 0.9707778615088336, "grad_norm": 0.2620992958545685, "learning_rate": 0.00015266415872808412, "loss": 11.6732, "step": 46376 }, { "epoch": 0.9707987942727958, "grad_norm": 0.3139328360557556, "learning_rate": 0.00015266229487079013, "loss": 11.6722, "step": 46377 }, { "epoch": 0.9708197270367579, "grad_norm": 0.32471874356269836, "learning_rate": 0.0001526604309881803, "loss": 11.6822, "step": 46378 }, { "epoch": 0.9708406598007201, "grad_norm": 0.2788318395614624, "learning_rate": 0.0001526585670802556, "loss": 11.6543, "step": 46379 }, { "epoch": 0.9708615925646822, "grad_norm": 0.3542959988117218, "learning_rate": 0.00015265670314701683, "loss": 11.6855, "step": 46380 }, { "epoch": 0.9708825253286444, "grad_norm": 0.2825884521007538, "learning_rate": 0.00015265483918846495, "loss": 11.6678, "step": 46381 }, { "epoch": 0.9709034580926066, "grad_norm": 0.30458250641822815, "learning_rate": 0.00015265297520460083, "loss": 11.6593, "step": 46382 }, { "epoch": 0.9709243908565687, "grad_norm": 0.35855498909950256, "learning_rate": 0.00015265111119542536, "loss": 11.6594, "step": 46383 }, { "epoch": 0.9709453236205309, "grad_norm": 0.5077121257781982, "learning_rate": 0.00015264924716093946, "loss": 11.6662, "step": 46384 }, { "epoch": 0.970966256384493, "grad_norm": 0.33733493089675903, "learning_rate": 0.00015264738310114397, "loss": 11.6572, "step": 46385 }, { "epoch": 0.9709871891484552, "grad_norm": 0.3229767978191376, "learning_rate": 0.00015264551901603986, "loss": 11.6754, "step": 46386 }, { "epoch": 0.9710081219124174, "grad_norm": 0.3165680766105652, "learning_rate": 0.000152643654905628, "loss": 11.6635, "step": 46387 }, { "epoch": 0.9710290546763795, "grad_norm": 0.5695500373840332, "learning_rate": 0.0001526417907699093, "loss": 11.6785, "step": 46388 }, { "epoch": 0.9710499874403417, "grad_norm": 0.33658453822135925, "learning_rate": 0.00015263992660888458, "loss": 11.6838, "step": 46389 }, { "epoch": 0.9710709202043037, "grad_norm": 0.33779770135879517, "learning_rate": 0.00015263806242255483, "loss": 11.6589, "step": 46390 }, { "epoch": 0.971091852968266, "grad_norm": 0.2601385712623596, "learning_rate": 0.00015263619821092087, "loss": 11.669, "step": 46391 }, { "epoch": 0.971112785732228, "grad_norm": 0.45736566185951233, "learning_rate": 0.00015263433397398368, "loss": 11.6663, "step": 46392 }, { "epoch": 0.9711337184961902, "grad_norm": 0.30783411860466003, "learning_rate": 0.0001526324697117441, "loss": 11.6782, "step": 46393 }, { "epoch": 0.9711546512601524, "grad_norm": 0.3255925178527832, "learning_rate": 0.00015263060542420305, "loss": 11.6652, "step": 46394 }, { "epoch": 0.9711755840241145, "grad_norm": 0.3068295121192932, "learning_rate": 0.00015262874111136138, "loss": 11.6863, "step": 46395 }, { "epoch": 0.9711965167880767, "grad_norm": 0.29339799284935, "learning_rate": 0.00015262687677322005, "loss": 11.6504, "step": 46396 }, { "epoch": 0.9712174495520388, "grad_norm": 0.32517150044441223, "learning_rate": 0.0001526250124097799, "loss": 11.6649, "step": 46397 }, { "epoch": 0.971238382316001, "grad_norm": 0.3338770866394043, "learning_rate": 0.00015262314802104188, "loss": 11.6631, "step": 46398 }, { "epoch": 0.9712593150799631, "grad_norm": 0.32104066014289856, "learning_rate": 0.0001526212836070068, "loss": 11.6415, "step": 46399 }, { "epoch": 0.9712802478439253, "grad_norm": 0.2844860553741455, "learning_rate": 0.00015261941916767566, "loss": 11.6774, "step": 46400 }, { "epoch": 0.9713011806078875, "grad_norm": 0.34472233057022095, "learning_rate": 0.00015261755470304933, "loss": 11.6737, "step": 46401 }, { "epoch": 0.9713221133718496, "grad_norm": 0.3120599091053009, "learning_rate": 0.00015261569021312864, "loss": 11.6536, "step": 46402 }, { "epoch": 0.9713430461358118, "grad_norm": 0.28189489245414734, "learning_rate": 0.00015261382569791457, "loss": 11.6777, "step": 46403 }, { "epoch": 0.9713639788997739, "grad_norm": 0.2932327389717102, "learning_rate": 0.00015261196115740796, "loss": 11.6829, "step": 46404 }, { "epoch": 0.9713849116637361, "grad_norm": 0.3024355471134186, "learning_rate": 0.00015261009659160973, "loss": 11.6757, "step": 46405 }, { "epoch": 0.9714058444276983, "grad_norm": 0.3232749104499817, "learning_rate": 0.00015260823200052078, "loss": 11.6664, "step": 46406 }, { "epoch": 0.9714267771916604, "grad_norm": 0.38862451910972595, "learning_rate": 0.00015260636738414197, "loss": 11.6772, "step": 46407 }, { "epoch": 0.9714477099556226, "grad_norm": 0.2727512717247009, "learning_rate": 0.00015260450274247428, "loss": 11.68, "step": 46408 }, { "epoch": 0.9714686427195847, "grad_norm": 0.3713497221469879, "learning_rate": 0.0001526026380755185, "loss": 11.6785, "step": 46409 }, { "epoch": 0.9714895754835469, "grad_norm": 0.3156765103340149, "learning_rate": 0.0001526007733832756, "loss": 11.6567, "step": 46410 }, { "epoch": 0.971510508247509, "grad_norm": 0.2509700357913971, "learning_rate": 0.00015259890866574645, "loss": 11.6581, "step": 46411 }, { "epoch": 0.9715314410114712, "grad_norm": 0.3671419620513916, "learning_rate": 0.00015259704392293194, "loss": 11.6479, "step": 46412 }, { "epoch": 0.9715523737754334, "grad_norm": 0.2372605800628662, "learning_rate": 0.000152595179154833, "loss": 11.6709, "step": 46413 }, { "epoch": 0.9715733065393954, "grad_norm": 0.46718689799308777, "learning_rate": 0.0001525933143614505, "loss": 11.6871, "step": 46414 }, { "epoch": 0.9715942393033576, "grad_norm": 0.3115103244781494, "learning_rate": 0.00015259144954278534, "loss": 11.6497, "step": 46415 }, { "epoch": 0.9716151720673197, "grad_norm": 0.29799380898475647, "learning_rate": 0.0001525895846988384, "loss": 11.6817, "step": 46416 }, { "epoch": 0.9716361048312819, "grad_norm": 0.3326384127140045, "learning_rate": 0.00015258771982961059, "loss": 11.6666, "step": 46417 }, { "epoch": 0.971657037595244, "grad_norm": 0.37006881833076477, "learning_rate": 0.00015258585493510283, "loss": 11.6686, "step": 46418 }, { "epoch": 0.9716779703592062, "grad_norm": 0.34025686979293823, "learning_rate": 0.00015258399001531598, "loss": 11.6726, "step": 46419 }, { "epoch": 0.9716989031231684, "grad_norm": 0.37262681126594543, "learning_rate": 0.00015258212507025096, "loss": 11.6614, "step": 46420 }, { "epoch": 0.9717198358871305, "grad_norm": 0.3400440216064453, "learning_rate": 0.00015258026009990866, "loss": 11.667, "step": 46421 }, { "epoch": 0.9717407686510927, "grad_norm": 0.270366370677948, "learning_rate": 0.00015257839510428994, "loss": 11.6685, "step": 46422 }, { "epoch": 0.9717617014150548, "grad_norm": 0.23620258271694183, "learning_rate": 0.0001525765300833958, "loss": 11.6632, "step": 46423 }, { "epoch": 0.971782634179017, "grad_norm": 0.2577389180660248, "learning_rate": 0.00015257466503722702, "loss": 11.6929, "step": 46424 }, { "epoch": 0.9718035669429792, "grad_norm": 0.2895784378051758, "learning_rate": 0.00015257279996578458, "loss": 11.6457, "step": 46425 }, { "epoch": 0.9718244997069413, "grad_norm": 0.3603430390357971, "learning_rate": 0.0001525709348690693, "loss": 11.6812, "step": 46426 }, { "epoch": 0.9718454324709035, "grad_norm": 0.29459717869758606, "learning_rate": 0.00015256906974708213, "loss": 11.6694, "step": 46427 }, { "epoch": 0.9718663652348656, "grad_norm": 0.34348201751708984, "learning_rate": 0.00015256720459982397, "loss": 11.663, "step": 46428 }, { "epoch": 0.9718872979988278, "grad_norm": 0.3127497732639313, "learning_rate": 0.00015256533942729566, "loss": 11.6784, "step": 46429 }, { "epoch": 0.9719082307627899, "grad_norm": 0.28430578112602234, "learning_rate": 0.00015256347422949818, "loss": 11.6623, "step": 46430 }, { "epoch": 0.9719291635267521, "grad_norm": 0.3242965638637543, "learning_rate": 0.0001525616090064324, "loss": 11.6872, "step": 46431 }, { "epoch": 0.9719500962907143, "grad_norm": 0.3941689431667328, "learning_rate": 0.0001525597437580992, "loss": 11.6836, "step": 46432 }, { "epoch": 0.9719710290546764, "grad_norm": 0.31119322776794434, "learning_rate": 0.00015255787848449942, "loss": 11.6645, "step": 46433 }, { "epoch": 0.9719919618186386, "grad_norm": 0.2961147427558899, "learning_rate": 0.00015255601318563404, "loss": 11.6706, "step": 46434 }, { "epoch": 0.9720128945826007, "grad_norm": 0.2861592471599579, "learning_rate": 0.00015255414786150394, "loss": 11.6582, "step": 46435 }, { "epoch": 0.9720338273465629, "grad_norm": 0.28361135721206665, "learning_rate": 0.00015255228251211, "loss": 11.6449, "step": 46436 }, { "epoch": 0.9720547601105249, "grad_norm": 0.3701418340206146, "learning_rate": 0.00015255041713745315, "loss": 11.6577, "step": 46437 }, { "epoch": 0.9720756928744871, "grad_norm": 0.42930352687835693, "learning_rate": 0.00015254855173753423, "loss": 11.6632, "step": 46438 }, { "epoch": 0.9720966256384493, "grad_norm": 0.25142624974250793, "learning_rate": 0.00015254668631235418, "loss": 11.6737, "step": 46439 }, { "epoch": 0.9721175584024114, "grad_norm": 0.26367542147636414, "learning_rate": 0.00015254482086191388, "loss": 11.6547, "step": 46440 }, { "epoch": 0.9721384911663736, "grad_norm": 0.3737529516220093, "learning_rate": 0.00015254295538621424, "loss": 11.6653, "step": 46441 }, { "epoch": 0.9721594239303357, "grad_norm": 0.35161522030830383, "learning_rate": 0.00015254108988525618, "loss": 11.6687, "step": 46442 }, { "epoch": 0.9721803566942979, "grad_norm": 0.3032298684120178, "learning_rate": 0.0001525392243590405, "loss": 11.6747, "step": 46443 }, { "epoch": 0.9722012894582601, "grad_norm": 0.3051307797431946, "learning_rate": 0.0001525373588075682, "loss": 11.6515, "step": 46444 }, { "epoch": 0.9722222222222222, "grad_norm": 0.34344789385795593, "learning_rate": 0.00015253549323084012, "loss": 11.6779, "step": 46445 }, { "epoch": 0.9722431549861844, "grad_norm": 0.3203549385070801, "learning_rate": 0.0001525336276288572, "loss": 11.6518, "step": 46446 }, { "epoch": 0.9722640877501465, "grad_norm": 0.3114127516746521, "learning_rate": 0.00015253176200162032, "loss": 11.6759, "step": 46447 }, { "epoch": 0.9722850205141087, "grad_norm": 0.2680329382419586, "learning_rate": 0.00015252989634913033, "loss": 11.6624, "step": 46448 }, { "epoch": 0.9723059532780708, "grad_norm": 0.29781344532966614, "learning_rate": 0.0001525280306713882, "loss": 11.6745, "step": 46449 }, { "epoch": 0.972326886042033, "grad_norm": 0.43347877264022827, "learning_rate": 0.00015252616496839476, "loss": 11.6622, "step": 46450 }, { "epoch": 0.9723478188059952, "grad_norm": 0.32670772075653076, "learning_rate": 0.00015252429924015096, "loss": 11.6635, "step": 46451 }, { "epoch": 0.9723687515699573, "grad_norm": 0.32049497961997986, "learning_rate": 0.0001525224334866577, "loss": 11.664, "step": 46452 }, { "epoch": 0.9723896843339195, "grad_norm": 0.3437923192977905, "learning_rate": 0.0001525205677079158, "loss": 11.6748, "step": 46453 }, { "epoch": 0.9724106170978816, "grad_norm": 0.37688618898391724, "learning_rate": 0.00015251870190392626, "loss": 11.6614, "step": 46454 }, { "epoch": 0.9724315498618438, "grad_norm": 0.3783825933933258, "learning_rate": 0.0001525168360746899, "loss": 11.6589, "step": 46455 }, { "epoch": 0.9724524826258059, "grad_norm": 0.3572072982788086, "learning_rate": 0.00015251497022020764, "loss": 11.659, "step": 46456 }, { "epoch": 0.9724734153897681, "grad_norm": 0.31358811259269714, "learning_rate": 0.00015251310434048038, "loss": 11.6834, "step": 46457 }, { "epoch": 0.9724943481537303, "grad_norm": 0.2814389169216156, "learning_rate": 0.00015251123843550905, "loss": 11.6662, "step": 46458 }, { "epoch": 0.9725152809176923, "grad_norm": 0.3300488293170929, "learning_rate": 0.00015250937250529449, "loss": 11.6551, "step": 46459 }, { "epoch": 0.9725362136816545, "grad_norm": 0.2692868113517761, "learning_rate": 0.00015250750654983763, "loss": 11.6791, "step": 46460 }, { "epoch": 0.9725571464456166, "grad_norm": 0.30075716972351074, "learning_rate": 0.00015250564056913936, "loss": 11.6672, "step": 46461 }, { "epoch": 0.9725780792095788, "grad_norm": 0.24116481840610504, "learning_rate": 0.00015250377456320057, "loss": 11.6566, "step": 46462 }, { "epoch": 0.972599011973541, "grad_norm": 0.3072110712528229, "learning_rate": 0.00015250190853202217, "loss": 11.6814, "step": 46463 }, { "epoch": 0.9726199447375031, "grad_norm": 0.28964588046073914, "learning_rate": 0.00015250004247560506, "loss": 11.6608, "step": 46464 }, { "epoch": 0.9726408775014653, "grad_norm": 0.5419697761535645, "learning_rate": 0.0001524981763939501, "loss": 11.6846, "step": 46465 }, { "epoch": 0.9726618102654274, "grad_norm": 0.34696561098098755, "learning_rate": 0.0001524963102870582, "loss": 11.6625, "step": 46466 }, { "epoch": 0.9726827430293896, "grad_norm": 0.35505107045173645, "learning_rate": 0.00015249444415493034, "loss": 11.6775, "step": 46467 }, { "epoch": 0.9727036757933517, "grad_norm": 0.3247191905975342, "learning_rate": 0.00015249257799756732, "loss": 11.6695, "step": 46468 }, { "epoch": 0.9727246085573139, "grad_norm": 0.2724229693412781, "learning_rate": 0.00015249071181497006, "loss": 11.6594, "step": 46469 }, { "epoch": 0.9727455413212761, "grad_norm": 0.35739603638648987, "learning_rate": 0.00015248884560713943, "loss": 11.6682, "step": 46470 }, { "epoch": 0.9727664740852382, "grad_norm": 0.3261696398258209, "learning_rate": 0.0001524869793740764, "loss": 11.6522, "step": 46471 }, { "epoch": 0.9727874068492004, "grad_norm": 0.32513687014579773, "learning_rate": 0.00015248511311578182, "loss": 11.6652, "step": 46472 }, { "epoch": 0.9728083396131625, "grad_norm": 0.2923119366168976, "learning_rate": 0.00015248324683225656, "loss": 11.6767, "step": 46473 }, { "epoch": 0.9728292723771247, "grad_norm": 0.5158812999725342, "learning_rate": 0.00015248138052350162, "loss": 11.6729, "step": 46474 }, { "epoch": 0.9728502051410868, "grad_norm": 0.2952289283275604, "learning_rate": 0.00015247951418951775, "loss": 11.6571, "step": 46475 }, { "epoch": 0.972871137905049, "grad_norm": 0.2684095501899719, "learning_rate": 0.000152477647830306, "loss": 11.6702, "step": 46476 }, { "epoch": 0.9728920706690112, "grad_norm": 0.3703606128692627, "learning_rate": 0.00015247578144586714, "loss": 11.6792, "step": 46477 }, { "epoch": 0.9729130034329733, "grad_norm": 0.3069256842136383, "learning_rate": 0.00015247391503620214, "loss": 11.6526, "step": 46478 }, { "epoch": 0.9729339361969355, "grad_norm": 0.37007808685302734, "learning_rate": 0.0001524720486013119, "loss": 11.6593, "step": 46479 }, { "epoch": 0.9729548689608976, "grad_norm": 0.3087119460105896, "learning_rate": 0.00015247018214119726, "loss": 11.6524, "step": 46480 }, { "epoch": 0.9729758017248598, "grad_norm": 0.3135026693344116, "learning_rate": 0.00015246831565585918, "loss": 11.6778, "step": 46481 }, { "epoch": 0.972996734488822, "grad_norm": 0.27460840344429016, "learning_rate": 0.0001524664491452985, "loss": 11.6831, "step": 46482 }, { "epoch": 0.973017667252784, "grad_norm": 0.3349740505218506, "learning_rate": 0.00015246458260951616, "loss": 11.6609, "step": 46483 }, { "epoch": 0.9730386000167462, "grad_norm": 0.2703973054885864, "learning_rate": 0.00015246271604851304, "loss": 11.6683, "step": 46484 }, { "epoch": 0.9730595327807083, "grad_norm": 0.35826462507247925, "learning_rate": 0.00015246084946229002, "loss": 11.6882, "step": 46485 }, { "epoch": 0.9730804655446705, "grad_norm": 0.26639875769615173, "learning_rate": 0.00015245898285084808, "loss": 11.6731, "step": 46486 }, { "epoch": 0.9731013983086326, "grad_norm": 0.4279508590698242, "learning_rate": 0.00015245711621418798, "loss": 11.6644, "step": 46487 }, { "epoch": 0.9731223310725948, "grad_norm": 0.34341567754745483, "learning_rate": 0.00015245524955231074, "loss": 11.6783, "step": 46488 }, { "epoch": 0.973143263836557, "grad_norm": 0.3454054892063141, "learning_rate": 0.00015245338286521722, "loss": 11.6866, "step": 46489 }, { "epoch": 0.9731641966005191, "grad_norm": 0.33247607946395874, "learning_rate": 0.00015245151615290826, "loss": 11.6622, "step": 46490 }, { "epoch": 0.9731851293644813, "grad_norm": 0.3015282154083252, "learning_rate": 0.00015244964941538486, "loss": 11.6745, "step": 46491 }, { "epoch": 0.9732060621284434, "grad_norm": 0.3463168442249298, "learning_rate": 0.00015244778265264782, "loss": 11.6745, "step": 46492 }, { "epoch": 0.9732269948924056, "grad_norm": 0.30893948674201965, "learning_rate": 0.0001524459158646981, "loss": 11.6656, "step": 46493 }, { "epoch": 0.9732479276563677, "grad_norm": 0.33305275440216064, "learning_rate": 0.0001524440490515366, "loss": 11.6824, "step": 46494 }, { "epoch": 0.9732688604203299, "grad_norm": 0.34613531827926636, "learning_rate": 0.00015244218221316413, "loss": 11.6733, "step": 46495 }, { "epoch": 0.9732897931842921, "grad_norm": 0.3777758777141571, "learning_rate": 0.00015244031534958173, "loss": 11.6753, "step": 46496 }, { "epoch": 0.9733107259482542, "grad_norm": 0.6022762060165405, "learning_rate": 0.00015243844846079017, "loss": 11.6779, "step": 46497 }, { "epoch": 0.9733316587122164, "grad_norm": 0.30130475759506226, "learning_rate": 0.00015243658154679042, "loss": 11.6878, "step": 46498 }, { "epoch": 0.9733525914761785, "grad_norm": 0.23296257853507996, "learning_rate": 0.00015243471460758332, "loss": 11.6651, "step": 46499 }, { "epoch": 0.9733735242401407, "grad_norm": 0.39735132455825806, "learning_rate": 0.00015243284764316983, "loss": 11.655, "step": 46500 }, { "epoch": 0.9733944570041029, "grad_norm": 0.30352458357810974, "learning_rate": 0.00015243098065355083, "loss": 11.6769, "step": 46501 }, { "epoch": 0.973415389768065, "grad_norm": 0.3017444908618927, "learning_rate": 0.00015242911363872716, "loss": 11.6727, "step": 46502 }, { "epoch": 0.9734363225320272, "grad_norm": 0.37398049235343933, "learning_rate": 0.0001524272465986998, "loss": 11.6762, "step": 46503 }, { "epoch": 0.9734572552959893, "grad_norm": 0.2606630027294159, "learning_rate": 0.0001524253795334696, "loss": 11.6856, "step": 46504 }, { "epoch": 0.9734781880599515, "grad_norm": 0.2550336718559265, "learning_rate": 0.00015242351244303747, "loss": 11.6775, "step": 46505 }, { "epoch": 0.9734991208239135, "grad_norm": 0.3029671311378479, "learning_rate": 0.00015242164532740433, "loss": 11.6711, "step": 46506 }, { "epoch": 0.9735200535878757, "grad_norm": 0.3410792648792267, "learning_rate": 0.00015241977818657103, "loss": 11.6782, "step": 46507 }, { "epoch": 0.973540986351838, "grad_norm": 0.22459127008914948, "learning_rate": 0.0001524179110205385, "loss": 11.6602, "step": 46508 }, { "epoch": 0.9735619191158, "grad_norm": 0.34691256284713745, "learning_rate": 0.00015241604382930764, "loss": 11.6765, "step": 46509 }, { "epoch": 0.9735828518797622, "grad_norm": 0.2935899496078491, "learning_rate": 0.00015241417661287932, "loss": 11.6675, "step": 46510 }, { "epoch": 0.9736037846437243, "grad_norm": 0.26564499735832214, "learning_rate": 0.00015241230937125446, "loss": 11.6593, "step": 46511 }, { "epoch": 0.9736247174076865, "grad_norm": 0.2885434031486511, "learning_rate": 0.00015241044210443394, "loss": 11.6767, "step": 46512 }, { "epoch": 0.9736456501716486, "grad_norm": 0.30132177472114563, "learning_rate": 0.0001524085748124187, "loss": 11.6618, "step": 46513 }, { "epoch": 0.9736665829356108, "grad_norm": 0.30064940452575684, "learning_rate": 0.00015240670749520958, "loss": 11.6686, "step": 46514 }, { "epoch": 0.973687515699573, "grad_norm": 0.2923260033130646, "learning_rate": 0.00015240484015280754, "loss": 11.6584, "step": 46515 }, { "epoch": 0.9737084484635351, "grad_norm": 0.22966143488883972, "learning_rate": 0.00015240297278521338, "loss": 11.6648, "step": 46516 }, { "epoch": 0.9737293812274973, "grad_norm": 0.30700650811195374, "learning_rate": 0.00015240110539242814, "loss": 11.6563, "step": 46517 }, { "epoch": 0.9737503139914594, "grad_norm": 0.3477932810783386, "learning_rate": 0.0001523992379744526, "loss": 11.6514, "step": 46518 }, { "epoch": 0.9737712467554216, "grad_norm": 0.2615073621273041, "learning_rate": 0.00015239737053128767, "loss": 11.6499, "step": 46519 }, { "epoch": 0.9737921795193838, "grad_norm": 0.3901633620262146, "learning_rate": 0.00015239550306293432, "loss": 11.678, "step": 46520 }, { "epoch": 0.9738131122833459, "grad_norm": 0.3219183087348938, "learning_rate": 0.00015239363556939335, "loss": 11.6799, "step": 46521 }, { "epoch": 0.9738340450473081, "grad_norm": 0.262248158454895, "learning_rate": 0.00015239176805066578, "loss": 11.6685, "step": 46522 }, { "epoch": 0.9738549778112702, "grad_norm": 0.2864891290664673, "learning_rate": 0.00015238990050675237, "loss": 11.6793, "step": 46523 }, { "epoch": 0.9738759105752324, "grad_norm": 0.3487652540206909, "learning_rate": 0.00015238803293765414, "loss": 11.6632, "step": 46524 }, { "epoch": 0.9738968433391945, "grad_norm": 0.3921833336353302, "learning_rate": 0.0001523861653433719, "loss": 11.6723, "step": 46525 }, { "epoch": 0.9739177761031567, "grad_norm": 0.40959981083869934, "learning_rate": 0.00015238429772390655, "loss": 11.6811, "step": 46526 }, { "epoch": 0.9739387088671189, "grad_norm": 0.3157767355442047, "learning_rate": 0.00015238243007925908, "loss": 11.6608, "step": 46527 }, { "epoch": 0.973959641631081, "grad_norm": 0.316476434469223, "learning_rate": 0.00015238056240943028, "loss": 11.6636, "step": 46528 }, { "epoch": 0.9739805743950432, "grad_norm": 0.33746886253356934, "learning_rate": 0.00015237869471442113, "loss": 11.6547, "step": 46529 }, { "epoch": 0.9740015071590052, "grad_norm": 0.31619346141815186, "learning_rate": 0.00015237682699423245, "loss": 11.6575, "step": 46530 }, { "epoch": 0.9740224399229674, "grad_norm": 0.3201843202114105, "learning_rate": 0.00015237495924886525, "loss": 11.6595, "step": 46531 }, { "epoch": 0.9740433726869295, "grad_norm": 0.2677445113658905, "learning_rate": 0.00015237309147832028, "loss": 11.6547, "step": 46532 }, { "epoch": 0.9740643054508917, "grad_norm": 0.29260027408599854, "learning_rate": 0.00015237122368259856, "loss": 11.6809, "step": 46533 }, { "epoch": 0.9740852382148539, "grad_norm": 0.32671216130256653, "learning_rate": 0.00015236935586170094, "loss": 11.6549, "step": 46534 }, { "epoch": 0.974106170978816, "grad_norm": 0.27763885259628296, "learning_rate": 0.0001523674880156283, "loss": 11.669, "step": 46535 }, { "epoch": 0.9741271037427782, "grad_norm": 0.38139617443084717, "learning_rate": 0.00015236562014438158, "loss": 11.6589, "step": 46536 }, { "epoch": 0.9741480365067403, "grad_norm": 0.35556453466415405, "learning_rate": 0.00015236375224796165, "loss": 11.6708, "step": 46537 }, { "epoch": 0.9741689692707025, "grad_norm": 0.29842081665992737, "learning_rate": 0.0001523618843263694, "loss": 11.666, "step": 46538 }, { "epoch": 0.9741899020346647, "grad_norm": 0.2944002151489258, "learning_rate": 0.00015236001637960577, "loss": 11.666, "step": 46539 }, { "epoch": 0.9742108347986268, "grad_norm": 0.3397136628627777, "learning_rate": 0.0001523581484076716, "loss": 11.6733, "step": 46540 }, { "epoch": 0.974231767562589, "grad_norm": 0.3260304629802704, "learning_rate": 0.00015235628041056787, "loss": 11.6773, "step": 46541 }, { "epoch": 0.9742527003265511, "grad_norm": 0.3719640374183655, "learning_rate": 0.00015235441238829536, "loss": 11.6706, "step": 46542 }, { "epoch": 0.9742736330905133, "grad_norm": 0.27536067366600037, "learning_rate": 0.00015235254434085507, "loss": 11.675, "step": 46543 }, { "epoch": 0.9742945658544754, "grad_norm": 0.3400845527648926, "learning_rate": 0.00015235067626824785, "loss": 11.6697, "step": 46544 }, { "epoch": 0.9743154986184376, "grad_norm": 0.32166481018066406, "learning_rate": 0.00015234880817047465, "loss": 11.6799, "step": 46545 }, { "epoch": 0.9743364313823998, "grad_norm": 0.315168172121048, "learning_rate": 0.0001523469400475363, "loss": 11.6669, "step": 46546 }, { "epoch": 0.9743573641463619, "grad_norm": 0.3109472095966339, "learning_rate": 0.0001523450718994337, "loss": 11.6763, "step": 46547 }, { "epoch": 0.9743782969103241, "grad_norm": 0.2687886655330658, "learning_rate": 0.00015234320372616781, "loss": 11.6638, "step": 46548 }, { "epoch": 0.9743992296742862, "grad_norm": 0.30198195576667786, "learning_rate": 0.00015234133552773947, "loss": 11.6636, "step": 46549 }, { "epoch": 0.9744201624382484, "grad_norm": 0.2862834334373474, "learning_rate": 0.00015233946730414965, "loss": 11.6719, "step": 46550 }, { "epoch": 0.9744410952022104, "grad_norm": 0.3099268674850464, "learning_rate": 0.00015233759905539915, "loss": 11.6505, "step": 46551 }, { "epoch": 0.9744620279661727, "grad_norm": 0.4092341959476471, "learning_rate": 0.00015233573078148895, "loss": 11.661, "step": 46552 }, { "epoch": 0.9744829607301349, "grad_norm": 0.32042255997657776, "learning_rate": 0.0001523338624824199, "loss": 11.6656, "step": 46553 }, { "epoch": 0.9745038934940969, "grad_norm": 0.310026615858078, "learning_rate": 0.00015233199415819288, "loss": 11.6666, "step": 46554 }, { "epoch": 0.9745248262580591, "grad_norm": 0.2890414297580719, "learning_rate": 0.00015233012580880887, "loss": 11.671, "step": 46555 }, { "epoch": 0.9745457590220212, "grad_norm": 0.4033280909061432, "learning_rate": 0.0001523282574342687, "loss": 11.683, "step": 46556 }, { "epoch": 0.9745666917859834, "grad_norm": 0.2596469223499298, "learning_rate": 0.0001523263890345733, "loss": 11.6632, "step": 46557 }, { "epoch": 0.9745876245499456, "grad_norm": 0.27793407440185547, "learning_rate": 0.00015232452060972353, "loss": 11.6647, "step": 46558 }, { "epoch": 0.9746085573139077, "grad_norm": 0.28600937128067017, "learning_rate": 0.00015232265215972033, "loss": 11.6706, "step": 46559 }, { "epoch": 0.9746294900778699, "grad_norm": 0.2811439633369446, "learning_rate": 0.0001523207836845646, "loss": 11.674, "step": 46560 }, { "epoch": 0.974650422841832, "grad_norm": 0.3180931806564331, "learning_rate": 0.00015231891518425718, "loss": 11.6597, "step": 46561 }, { "epoch": 0.9746713556057942, "grad_norm": 0.3584991991519928, "learning_rate": 0.00015231704665879904, "loss": 11.6727, "step": 46562 }, { "epoch": 0.9746922883697563, "grad_norm": 0.3050779104232788, "learning_rate": 0.00015231517810819105, "loss": 11.6689, "step": 46563 }, { "epoch": 0.9747132211337185, "grad_norm": 0.2239956259727478, "learning_rate": 0.0001523133095324341, "loss": 11.6751, "step": 46564 }, { "epoch": 0.9747341538976807, "grad_norm": 0.4623609185218811, "learning_rate": 0.00015231144093152908, "loss": 11.6678, "step": 46565 }, { "epoch": 0.9747550866616428, "grad_norm": 0.3164673447608948, "learning_rate": 0.0001523095723054769, "loss": 11.6751, "step": 46566 }, { "epoch": 0.974776019425605, "grad_norm": 0.3514898419380188, "learning_rate": 0.0001523077036542785, "loss": 11.675, "step": 46567 }, { "epoch": 0.9747969521895671, "grad_norm": 0.2873101830482483, "learning_rate": 0.00015230583497793468, "loss": 11.6772, "step": 46568 }, { "epoch": 0.9748178849535293, "grad_norm": 0.3890964388847351, "learning_rate": 0.00015230396627644645, "loss": 11.6656, "step": 46569 }, { "epoch": 0.9748388177174914, "grad_norm": 0.31087273359298706, "learning_rate": 0.00015230209754981463, "loss": 11.6682, "step": 46570 }, { "epoch": 0.9748597504814536, "grad_norm": 0.40055060386657715, "learning_rate": 0.00015230022879804013, "loss": 11.6511, "step": 46571 }, { "epoch": 0.9748806832454158, "grad_norm": 0.3042412996292114, "learning_rate": 0.0001522983600211239, "loss": 11.6752, "step": 46572 }, { "epoch": 0.9749016160093779, "grad_norm": 0.348126620054245, "learning_rate": 0.00015229649121906676, "loss": 11.6601, "step": 46573 }, { "epoch": 0.9749225487733401, "grad_norm": 0.25771766901016235, "learning_rate": 0.00015229462239186968, "loss": 11.6738, "step": 46574 }, { "epoch": 0.9749434815373021, "grad_norm": 0.2967924475669861, "learning_rate": 0.0001522927535395335, "loss": 11.6704, "step": 46575 }, { "epoch": 0.9749644143012643, "grad_norm": 0.3354228138923645, "learning_rate": 0.00015229088466205915, "loss": 11.6802, "step": 46576 }, { "epoch": 0.9749853470652264, "grad_norm": 0.3870757818222046, "learning_rate": 0.00015228901575944757, "loss": 11.6472, "step": 46577 }, { "epoch": 0.9750062798291886, "grad_norm": 0.35933324694633484, "learning_rate": 0.00015228714683169955, "loss": 11.6887, "step": 46578 }, { "epoch": 0.9750272125931508, "grad_norm": 0.3117930293083191, "learning_rate": 0.0001522852778788161, "loss": 11.6553, "step": 46579 }, { "epoch": 0.9750481453571129, "grad_norm": 0.34395962953567505, "learning_rate": 0.000152283408900798, "loss": 11.6857, "step": 46580 }, { "epoch": 0.9750690781210751, "grad_norm": 0.20963187515735626, "learning_rate": 0.0001522815398976463, "loss": 11.6581, "step": 46581 }, { "epoch": 0.9750900108850372, "grad_norm": 0.3767756223678589, "learning_rate": 0.00015227967086936176, "loss": 11.6696, "step": 46582 }, { "epoch": 0.9751109436489994, "grad_norm": 0.3091091513633728, "learning_rate": 0.00015227780181594538, "loss": 11.6628, "step": 46583 }, { "epoch": 0.9751318764129616, "grad_norm": 0.3098178803920746, "learning_rate": 0.00015227593273739798, "loss": 11.6712, "step": 46584 }, { "epoch": 0.9751528091769237, "grad_norm": 0.26868945360183716, "learning_rate": 0.0001522740636337205, "loss": 11.6557, "step": 46585 }, { "epoch": 0.9751737419408859, "grad_norm": 0.28394532203674316, "learning_rate": 0.00015227219450491384, "loss": 11.6613, "step": 46586 }, { "epoch": 0.975194674704848, "grad_norm": 0.370152086019516, "learning_rate": 0.00015227032535097888, "loss": 11.6727, "step": 46587 }, { "epoch": 0.9752156074688102, "grad_norm": 0.24194930493831635, "learning_rate": 0.00015226845617191648, "loss": 11.6843, "step": 46588 }, { "epoch": 0.9752365402327723, "grad_norm": 0.2891991138458252, "learning_rate": 0.00015226658696772765, "loss": 11.6527, "step": 46589 }, { "epoch": 0.9752574729967345, "grad_norm": 0.3283652067184448, "learning_rate": 0.0001522647177384132, "loss": 11.6647, "step": 46590 }, { "epoch": 0.9752784057606967, "grad_norm": 0.28951919078826904, "learning_rate": 0.00015226284848397404, "loss": 11.6615, "step": 46591 }, { "epoch": 0.9752993385246588, "grad_norm": 0.3393386900424957, "learning_rate": 0.00015226097920441111, "loss": 11.6644, "step": 46592 }, { "epoch": 0.975320271288621, "grad_norm": 0.37471523880958557, "learning_rate": 0.00015225910989972525, "loss": 11.6589, "step": 46593 }, { "epoch": 0.9753412040525831, "grad_norm": 0.40749555826187134, "learning_rate": 0.0001522572405699174, "loss": 11.6735, "step": 46594 }, { "epoch": 0.9753621368165453, "grad_norm": 0.34037625789642334, "learning_rate": 0.00015225537121498847, "loss": 11.6659, "step": 46595 }, { "epoch": 0.9753830695805074, "grad_norm": 0.3374333381652832, "learning_rate": 0.00015225350183493933, "loss": 11.6874, "step": 46596 }, { "epoch": 0.9754040023444696, "grad_norm": 0.49598073959350586, "learning_rate": 0.00015225163242977088, "loss": 11.6822, "step": 46597 }, { "epoch": 0.9754249351084318, "grad_norm": 0.3031013011932373, "learning_rate": 0.000152249762999484, "loss": 11.6699, "step": 46598 }, { "epoch": 0.9754458678723938, "grad_norm": 0.2605406939983368, "learning_rate": 0.00015224789354407965, "loss": 11.6648, "step": 46599 }, { "epoch": 0.975466800636356, "grad_norm": 0.28428828716278076, "learning_rate": 0.00015224602406355866, "loss": 11.6729, "step": 46600 }, { "epoch": 0.9754877334003181, "grad_norm": 0.3404255509376526, "learning_rate": 0.000152244154557922, "loss": 11.6739, "step": 46601 }, { "epoch": 0.9755086661642803, "grad_norm": 0.341050386428833, "learning_rate": 0.00015224228502717046, "loss": 11.6699, "step": 46602 }, { "epoch": 0.9755295989282425, "grad_norm": 0.2571382522583008, "learning_rate": 0.00015224041547130503, "loss": 11.6677, "step": 46603 }, { "epoch": 0.9755505316922046, "grad_norm": 0.3216641843318939, "learning_rate": 0.0001522385458903266, "loss": 11.6849, "step": 46604 }, { "epoch": 0.9755714644561668, "grad_norm": 0.32504138350486755, "learning_rate": 0.00015223667628423606, "loss": 11.6563, "step": 46605 }, { "epoch": 0.9755923972201289, "grad_norm": 0.3614078462123871, "learning_rate": 0.0001522348066530343, "loss": 11.6808, "step": 46606 }, { "epoch": 0.9756133299840911, "grad_norm": 0.34467175602912903, "learning_rate": 0.00015223293699672224, "loss": 11.6506, "step": 46607 }, { "epoch": 0.9756342627480532, "grad_norm": 0.36012333631515503, "learning_rate": 0.00015223106731530073, "loss": 11.6748, "step": 46608 }, { "epoch": 0.9756551955120154, "grad_norm": 0.27408990263938904, "learning_rate": 0.0001522291976087707, "loss": 11.6791, "step": 46609 }, { "epoch": 0.9756761282759776, "grad_norm": 0.25765249133110046, "learning_rate": 0.00015222732787713303, "loss": 11.6716, "step": 46610 }, { "epoch": 0.9756970610399397, "grad_norm": 0.30726900696754456, "learning_rate": 0.00015222545812038868, "loss": 11.6664, "step": 46611 }, { "epoch": 0.9757179938039019, "grad_norm": 0.24184714257717133, "learning_rate": 0.00015222358833853847, "loss": 11.6736, "step": 46612 }, { "epoch": 0.975738926567864, "grad_norm": 0.46329623460769653, "learning_rate": 0.00015222171853158337, "loss": 11.6803, "step": 46613 }, { "epoch": 0.9757598593318262, "grad_norm": 0.26580822467803955, "learning_rate": 0.00015221984869952425, "loss": 11.6419, "step": 46614 }, { "epoch": 0.9757807920957883, "grad_norm": 0.3238963186740875, "learning_rate": 0.00015221797884236194, "loss": 11.6605, "step": 46615 }, { "epoch": 0.9758017248597505, "grad_norm": 0.3079971373081207, "learning_rate": 0.00015221610896009745, "loss": 11.6715, "step": 46616 }, { "epoch": 0.9758226576237127, "grad_norm": 0.2937064468860626, "learning_rate": 0.0001522142390527316, "loss": 11.6591, "step": 46617 }, { "epoch": 0.9758435903876748, "grad_norm": 0.2653334140777588, "learning_rate": 0.00015221236912026534, "loss": 11.6759, "step": 46618 }, { "epoch": 0.975864523151637, "grad_norm": 0.2909926474094391, "learning_rate": 0.00015221049916269957, "loss": 11.6661, "step": 46619 }, { "epoch": 0.975885455915599, "grad_norm": 0.3298153877258301, "learning_rate": 0.00015220862918003514, "loss": 11.6678, "step": 46620 }, { "epoch": 0.9759063886795613, "grad_norm": 0.32365089654922485, "learning_rate": 0.00015220675917227298, "loss": 11.6726, "step": 46621 }, { "epoch": 0.9759273214435235, "grad_norm": 0.2746606171131134, "learning_rate": 0.00015220488913941397, "loss": 11.6534, "step": 46622 }, { "epoch": 0.9759482542074855, "grad_norm": 0.3365807831287384, "learning_rate": 0.00015220301908145905, "loss": 11.6644, "step": 46623 }, { "epoch": 0.9759691869714477, "grad_norm": 0.3558206558227539, "learning_rate": 0.00015220114899840907, "loss": 11.6802, "step": 46624 }, { "epoch": 0.9759901197354098, "grad_norm": 0.4372502565383911, "learning_rate": 0.00015219927889026496, "loss": 11.6937, "step": 46625 }, { "epoch": 0.976011052499372, "grad_norm": 0.3062995672225952, "learning_rate": 0.00015219740875702762, "loss": 11.6596, "step": 46626 }, { "epoch": 0.9760319852633341, "grad_norm": 0.3124406933784485, "learning_rate": 0.0001521955385986979, "loss": 11.674, "step": 46627 }, { "epoch": 0.9760529180272963, "grad_norm": 0.3288173973560333, "learning_rate": 0.0001521936684152768, "loss": 11.6809, "step": 46628 }, { "epoch": 0.9760738507912585, "grad_norm": 0.2515609860420227, "learning_rate": 0.0001521917982067651, "loss": 11.6516, "step": 46629 }, { "epoch": 0.9760947835552206, "grad_norm": 0.34062862396240234, "learning_rate": 0.0001521899279731638, "loss": 11.692, "step": 46630 }, { "epoch": 0.9761157163191828, "grad_norm": 0.33533138036727905, "learning_rate": 0.00015218805771447373, "loss": 11.6733, "step": 46631 }, { "epoch": 0.9761366490831449, "grad_norm": 0.28172895312309265, "learning_rate": 0.0001521861874306958, "loss": 11.6644, "step": 46632 }, { "epoch": 0.9761575818471071, "grad_norm": 0.33419424295425415, "learning_rate": 0.000152184317121831, "loss": 11.6708, "step": 46633 }, { "epoch": 0.9761785146110692, "grad_norm": 0.3792068362236023, "learning_rate": 0.00015218244678788008, "loss": 11.672, "step": 46634 }, { "epoch": 0.9761994473750314, "grad_norm": 0.3463071286678314, "learning_rate": 0.00015218057642884403, "loss": 11.6753, "step": 46635 }, { "epoch": 0.9762203801389936, "grad_norm": 0.35111358761787415, "learning_rate": 0.00015217870604472374, "loss": 11.6697, "step": 46636 }, { "epoch": 0.9762413129029557, "grad_norm": 0.28140023350715637, "learning_rate": 0.00015217683563552008, "loss": 11.6759, "step": 46637 }, { "epoch": 0.9762622456669179, "grad_norm": 0.31106534600257874, "learning_rate": 0.000152174965201234, "loss": 11.6657, "step": 46638 }, { "epoch": 0.97628317843088, "grad_norm": 0.3333648145198822, "learning_rate": 0.00015217309474186634, "loss": 11.6699, "step": 46639 }, { "epoch": 0.9763041111948422, "grad_norm": 0.3832923471927643, "learning_rate": 0.00015217122425741805, "loss": 11.6767, "step": 46640 }, { "epoch": 0.9763250439588044, "grad_norm": 0.27607524394989014, "learning_rate": 0.00015216935374788996, "loss": 11.6635, "step": 46641 }, { "epoch": 0.9763459767227665, "grad_norm": 0.37968605756759644, "learning_rate": 0.00015216748321328306, "loss": 11.6506, "step": 46642 }, { "epoch": 0.9763669094867287, "grad_norm": 0.24900156259536743, "learning_rate": 0.0001521656126535982, "loss": 11.66, "step": 46643 }, { "epoch": 0.9763878422506908, "grad_norm": 0.340280681848526, "learning_rate": 0.0001521637420688363, "loss": 11.6402, "step": 46644 }, { "epoch": 0.976408775014653, "grad_norm": 0.31018760800361633, "learning_rate": 0.00015216187145899822, "loss": 11.6532, "step": 46645 }, { "epoch": 0.976429707778615, "grad_norm": 0.32745125889778137, "learning_rate": 0.0001521600008240849, "loss": 11.6905, "step": 46646 }, { "epoch": 0.9764506405425772, "grad_norm": 0.33871525526046753, "learning_rate": 0.0001521581301640972, "loss": 11.6749, "step": 46647 }, { "epoch": 0.9764715733065394, "grad_norm": 0.3206920325756073, "learning_rate": 0.00015215625947903605, "loss": 11.6729, "step": 46648 }, { "epoch": 0.9764925060705015, "grad_norm": 0.34017816185951233, "learning_rate": 0.00015215438876890235, "loss": 11.6672, "step": 46649 }, { "epoch": 0.9765134388344637, "grad_norm": 0.37802284955978394, "learning_rate": 0.00015215251803369698, "loss": 11.7052, "step": 46650 }, { "epoch": 0.9765343715984258, "grad_norm": 0.28592485189437866, "learning_rate": 0.00015215064727342085, "loss": 11.6741, "step": 46651 }, { "epoch": 0.976555304362388, "grad_norm": 0.3074706792831421, "learning_rate": 0.00015214877648807488, "loss": 11.6747, "step": 46652 }, { "epoch": 0.9765762371263501, "grad_norm": 0.30792510509490967, "learning_rate": 0.00015214690567765993, "loss": 11.653, "step": 46653 }, { "epoch": 0.9765971698903123, "grad_norm": 0.3011753559112549, "learning_rate": 0.0001521450348421769, "loss": 11.6632, "step": 46654 }, { "epoch": 0.9766181026542745, "grad_norm": 0.28481873869895935, "learning_rate": 0.00015214316398162674, "loss": 11.6828, "step": 46655 }, { "epoch": 0.9766390354182366, "grad_norm": 0.3859304189682007, "learning_rate": 0.0001521412930960103, "loss": 11.6667, "step": 46656 }, { "epoch": 0.9766599681821988, "grad_norm": 0.3009440302848816, "learning_rate": 0.0001521394221853285, "loss": 11.658, "step": 46657 }, { "epoch": 0.9766809009461609, "grad_norm": 0.22618752717971802, "learning_rate": 0.00015213755124958222, "loss": 11.6826, "step": 46658 }, { "epoch": 0.9767018337101231, "grad_norm": 0.30852481722831726, "learning_rate": 0.00015213568028877238, "loss": 11.6669, "step": 46659 }, { "epoch": 0.9767227664740853, "grad_norm": 0.2527966797351837, "learning_rate": 0.00015213380930289988, "loss": 11.6669, "step": 46660 }, { "epoch": 0.9767436992380474, "grad_norm": 0.39839404821395874, "learning_rate": 0.00015213193829196562, "loss": 11.6816, "step": 46661 }, { "epoch": 0.9767646320020096, "grad_norm": 0.3193838596343994, "learning_rate": 0.0001521300672559705, "loss": 11.6959, "step": 46662 }, { "epoch": 0.9767855647659717, "grad_norm": 0.2736559510231018, "learning_rate": 0.0001521281961949154, "loss": 11.6783, "step": 46663 }, { "epoch": 0.9768064975299339, "grad_norm": 0.3083673119544983, "learning_rate": 0.0001521263251088012, "loss": 11.6707, "step": 46664 }, { "epoch": 0.976827430293896, "grad_norm": 0.30067092180252075, "learning_rate": 0.00015212445399762886, "loss": 11.6685, "step": 46665 }, { "epoch": 0.9768483630578582, "grad_norm": 0.2816009521484375, "learning_rate": 0.00015212258286139923, "loss": 11.6597, "step": 46666 }, { "epoch": 0.9768692958218204, "grad_norm": 0.336078017950058, "learning_rate": 0.00015212071170011329, "loss": 11.6682, "step": 46667 }, { "epoch": 0.9768902285857824, "grad_norm": 0.29946041107177734, "learning_rate": 0.0001521188405137718, "loss": 11.66, "step": 46668 }, { "epoch": 0.9769111613497447, "grad_norm": 0.27478906512260437, "learning_rate": 0.0001521169693023758, "loss": 11.678, "step": 46669 }, { "epoch": 0.9769320941137067, "grad_norm": 0.35807591676712036, "learning_rate": 0.0001521150980659261, "loss": 11.6629, "step": 46670 }, { "epoch": 0.9769530268776689, "grad_norm": 0.26022103428840637, "learning_rate": 0.00015211322680442363, "loss": 11.6514, "step": 46671 }, { "epoch": 0.976973959641631, "grad_norm": 0.353481262922287, "learning_rate": 0.00015211135551786932, "loss": 11.6633, "step": 46672 }, { "epoch": 0.9769948924055932, "grad_norm": 0.322304904460907, "learning_rate": 0.000152109484206264, "loss": 11.6777, "step": 46673 }, { "epoch": 0.9770158251695554, "grad_norm": 0.3343968987464905, "learning_rate": 0.0001521076128696086, "loss": 11.6782, "step": 46674 }, { "epoch": 0.9770367579335175, "grad_norm": 0.38271579146385193, "learning_rate": 0.00015210574150790405, "loss": 11.6753, "step": 46675 }, { "epoch": 0.9770576906974797, "grad_norm": 0.3233407139778137, "learning_rate": 0.00015210387012115121, "loss": 11.6749, "step": 46676 }, { "epoch": 0.9770786234614418, "grad_norm": 0.3437774181365967, "learning_rate": 0.000152101998709351, "loss": 11.6864, "step": 46677 }, { "epoch": 0.977099556225404, "grad_norm": 0.3200949728488922, "learning_rate": 0.00015210012727250431, "loss": 11.663, "step": 46678 }, { "epoch": 0.9771204889893662, "grad_norm": 0.30383238196372986, "learning_rate": 0.00015209825581061207, "loss": 11.6757, "step": 46679 }, { "epoch": 0.9771414217533283, "grad_norm": 0.2412455976009369, "learning_rate": 0.00015209638432367513, "loss": 11.6716, "step": 46680 }, { "epoch": 0.9771623545172905, "grad_norm": 0.30430930852890015, "learning_rate": 0.00015209451281169442, "loss": 11.6667, "step": 46681 }, { "epoch": 0.9771832872812526, "grad_norm": 0.29265835881233215, "learning_rate": 0.00015209264127467084, "loss": 11.6664, "step": 46682 }, { "epoch": 0.9772042200452148, "grad_norm": 0.2833555042743683, "learning_rate": 0.00015209076971260527, "loss": 11.6837, "step": 46683 }, { "epoch": 0.9772251528091769, "grad_norm": 0.3132232427597046, "learning_rate": 0.00015208889812549865, "loss": 11.6579, "step": 46684 }, { "epoch": 0.9772460855731391, "grad_norm": 0.3198261260986328, "learning_rate": 0.00015208702651335184, "loss": 11.6556, "step": 46685 }, { "epoch": 0.9772670183371013, "grad_norm": 0.48982423543930054, "learning_rate": 0.00015208515487616573, "loss": 11.6758, "step": 46686 }, { "epoch": 0.9772879511010634, "grad_norm": 0.42477482557296753, "learning_rate": 0.00015208328321394125, "loss": 11.6822, "step": 46687 }, { "epoch": 0.9773088838650256, "grad_norm": 0.2925087511539459, "learning_rate": 0.0001520814115266793, "loss": 11.6878, "step": 46688 }, { "epoch": 0.9773298166289877, "grad_norm": 0.34988537430763245, "learning_rate": 0.00015207953981438078, "loss": 11.6723, "step": 46689 }, { "epoch": 0.9773507493929499, "grad_norm": 0.3080856502056122, "learning_rate": 0.00015207766807704657, "loss": 11.6731, "step": 46690 }, { "epoch": 0.977371682156912, "grad_norm": 0.3373104929924011, "learning_rate": 0.00015207579631467758, "loss": 11.6517, "step": 46691 }, { "epoch": 0.9773926149208741, "grad_norm": 0.25708144903182983, "learning_rate": 0.00015207392452727474, "loss": 11.661, "step": 46692 }, { "epoch": 0.9774135476848363, "grad_norm": 0.4531936049461365, "learning_rate": 0.0001520720527148389, "loss": 11.6826, "step": 46693 }, { "epoch": 0.9774344804487984, "grad_norm": 0.3273329734802246, "learning_rate": 0.00015207018087737097, "loss": 11.696, "step": 46694 }, { "epoch": 0.9774554132127606, "grad_norm": 0.3946114480495453, "learning_rate": 0.00015206830901487187, "loss": 11.6797, "step": 46695 }, { "epoch": 0.9774763459767227, "grad_norm": 0.3372124135494232, "learning_rate": 0.00015206643712734248, "loss": 11.6734, "step": 46696 }, { "epoch": 0.9774972787406849, "grad_norm": 0.28247106075286865, "learning_rate": 0.0001520645652147837, "loss": 11.6847, "step": 46697 }, { "epoch": 0.9775182115046471, "grad_norm": 0.2822740972042084, "learning_rate": 0.00015206269327719646, "loss": 11.671, "step": 46698 }, { "epoch": 0.9775391442686092, "grad_norm": 0.31513088941574097, "learning_rate": 0.00015206082131458167, "loss": 11.6678, "step": 46699 }, { "epoch": 0.9775600770325714, "grad_norm": 0.29173025488853455, "learning_rate": 0.00015205894932694016, "loss": 11.664, "step": 46700 }, { "epoch": 0.9775810097965335, "grad_norm": 0.2663027346134186, "learning_rate": 0.0001520570773142729, "loss": 11.6638, "step": 46701 }, { "epoch": 0.9776019425604957, "grad_norm": 0.29304784536361694, "learning_rate": 0.00015205520527658073, "loss": 11.66, "step": 46702 }, { "epoch": 0.9776228753244578, "grad_norm": 0.25312817096710205, "learning_rate": 0.00015205333321386457, "loss": 11.6771, "step": 46703 }, { "epoch": 0.97764380808842, "grad_norm": 0.31558600068092346, "learning_rate": 0.00015205146112612537, "loss": 11.6898, "step": 46704 }, { "epoch": 0.9776647408523822, "grad_norm": 0.31313183903694153, "learning_rate": 0.00015204958901336396, "loss": 11.6713, "step": 46705 }, { "epoch": 0.9776856736163443, "grad_norm": 0.3528752028942108, "learning_rate": 0.0001520477168755813, "loss": 11.659, "step": 46706 }, { "epoch": 0.9777066063803065, "grad_norm": 0.3301078975200653, "learning_rate": 0.0001520458447127782, "loss": 11.6617, "step": 46707 }, { "epoch": 0.9777275391442686, "grad_norm": 0.35748347640037537, "learning_rate": 0.00015204397252495565, "loss": 11.6614, "step": 46708 }, { "epoch": 0.9777484719082308, "grad_norm": 0.26671352982521057, "learning_rate": 0.00015204210031211454, "loss": 11.6671, "step": 46709 }, { "epoch": 0.9777694046721929, "grad_norm": 0.43769121170043945, "learning_rate": 0.00015204022807425573, "loss": 11.6831, "step": 46710 }, { "epoch": 0.9777903374361551, "grad_norm": 0.35542815923690796, "learning_rate": 0.00015203835581138015, "loss": 11.645, "step": 46711 }, { "epoch": 0.9778112702001173, "grad_norm": 0.34855106472969055, "learning_rate": 0.0001520364835234887, "loss": 11.6672, "step": 46712 }, { "epoch": 0.9778322029640794, "grad_norm": 0.31129804253578186, "learning_rate": 0.00015203461121058223, "loss": 11.684, "step": 46713 }, { "epoch": 0.9778531357280416, "grad_norm": 0.33077263832092285, "learning_rate": 0.00015203273887266173, "loss": 11.6743, "step": 46714 }, { "epoch": 0.9778740684920036, "grad_norm": 0.3075065314769745, "learning_rate": 0.000152030866509728, "loss": 11.6674, "step": 46715 }, { "epoch": 0.9778950012559658, "grad_norm": 0.40082547068595886, "learning_rate": 0.000152028994121782, "loss": 11.6734, "step": 46716 }, { "epoch": 0.977915934019928, "grad_norm": 0.2955330014228821, "learning_rate": 0.00015202712170882463, "loss": 11.6774, "step": 46717 }, { "epoch": 0.9779368667838901, "grad_norm": 0.35887596011161804, "learning_rate": 0.0001520252492708568, "loss": 11.6706, "step": 46718 }, { "epoch": 0.9779577995478523, "grad_norm": 0.26532697677612305, "learning_rate": 0.00015202337680787938, "loss": 11.6557, "step": 46719 }, { "epoch": 0.9779787323118144, "grad_norm": 0.34896305203437805, "learning_rate": 0.00015202150431989323, "loss": 11.664, "step": 46720 }, { "epoch": 0.9779996650757766, "grad_norm": 0.4059199392795563, "learning_rate": 0.00015201963180689937, "loss": 11.6767, "step": 46721 }, { "epoch": 0.9780205978397387, "grad_norm": 0.3311785161495209, "learning_rate": 0.0001520177592688986, "loss": 11.6582, "step": 46722 }, { "epoch": 0.9780415306037009, "grad_norm": 0.308869868516922, "learning_rate": 0.00015201588670589187, "loss": 11.6642, "step": 46723 }, { "epoch": 0.9780624633676631, "grad_norm": 0.3636700510978699, "learning_rate": 0.00015201401411788003, "loss": 11.6813, "step": 46724 }, { "epoch": 0.9780833961316252, "grad_norm": 0.28560712933540344, "learning_rate": 0.000152012141504864, "loss": 11.6783, "step": 46725 }, { "epoch": 0.9781043288955874, "grad_norm": 0.2740594744682312, "learning_rate": 0.00015201026886684475, "loss": 11.6596, "step": 46726 }, { "epoch": 0.9781252616595495, "grad_norm": 0.30188506841659546, "learning_rate": 0.00015200839620382309, "loss": 11.6627, "step": 46727 }, { "epoch": 0.9781461944235117, "grad_norm": 0.4202658236026764, "learning_rate": 0.00015200652351579996, "loss": 11.6782, "step": 46728 }, { "epoch": 0.9781671271874738, "grad_norm": 0.28558599948883057, "learning_rate": 0.00015200465080277624, "loss": 11.6735, "step": 46729 }, { "epoch": 0.978188059951436, "grad_norm": 0.26425009965896606, "learning_rate": 0.00015200277806475283, "loss": 11.6572, "step": 46730 }, { "epoch": 0.9782089927153982, "grad_norm": 0.268178790807724, "learning_rate": 0.00015200090530173064, "loss": 11.6654, "step": 46731 }, { "epoch": 0.9782299254793603, "grad_norm": 0.3101882040500641, "learning_rate": 0.0001519990325137106, "loss": 11.656, "step": 46732 }, { "epoch": 0.9782508582433225, "grad_norm": 0.24179509282112122, "learning_rate": 0.0001519971597006936, "loss": 11.6583, "step": 46733 }, { "epoch": 0.9782717910072846, "grad_norm": 0.31115928292274475, "learning_rate": 0.00015199528686268048, "loss": 11.6626, "step": 46734 }, { "epoch": 0.9782927237712468, "grad_norm": 0.39060935378074646, "learning_rate": 0.0001519934139996722, "loss": 11.6698, "step": 46735 }, { "epoch": 0.978313656535209, "grad_norm": 0.31206241250038147, "learning_rate": 0.00015199154111166963, "loss": 11.6634, "step": 46736 }, { "epoch": 0.978334589299171, "grad_norm": 0.5816313624382019, "learning_rate": 0.00015198966819867372, "loss": 11.6876, "step": 46737 }, { "epoch": 0.9783555220631333, "grad_norm": 0.28472593426704407, "learning_rate": 0.00015198779526068531, "loss": 11.6808, "step": 46738 }, { "epoch": 0.9783764548270953, "grad_norm": 0.3338189423084259, "learning_rate": 0.00015198592229770532, "loss": 11.6873, "step": 46739 }, { "epoch": 0.9783973875910575, "grad_norm": 0.38378751277923584, "learning_rate": 0.00015198404930973465, "loss": 11.6768, "step": 46740 }, { "epoch": 0.9784183203550196, "grad_norm": 0.3268812894821167, "learning_rate": 0.0001519821762967742, "loss": 11.6849, "step": 46741 }, { "epoch": 0.9784392531189818, "grad_norm": 0.33694323897361755, "learning_rate": 0.00015198030325882492, "loss": 11.6582, "step": 46742 }, { "epoch": 0.978460185882944, "grad_norm": 0.29016950726509094, "learning_rate": 0.00015197843019588763, "loss": 11.6645, "step": 46743 }, { "epoch": 0.9784811186469061, "grad_norm": 0.35384678840637207, "learning_rate": 0.0001519765571079633, "loss": 11.6736, "step": 46744 }, { "epoch": 0.9785020514108683, "grad_norm": 0.4196010231971741, "learning_rate": 0.00015197468399505277, "loss": 11.676, "step": 46745 }, { "epoch": 0.9785229841748304, "grad_norm": 0.3270359933376312, "learning_rate": 0.00015197281085715697, "loss": 11.6449, "step": 46746 }, { "epoch": 0.9785439169387926, "grad_norm": 0.37823477387428284, "learning_rate": 0.0001519709376942768, "loss": 11.6687, "step": 46747 }, { "epoch": 0.9785648497027547, "grad_norm": 0.3861490786075592, "learning_rate": 0.00015196906450641319, "loss": 11.6536, "step": 46748 }, { "epoch": 0.9785857824667169, "grad_norm": 0.3022296130657196, "learning_rate": 0.00015196719129356696, "loss": 11.6686, "step": 46749 }, { "epoch": 0.9786067152306791, "grad_norm": 0.39791160821914673, "learning_rate": 0.00015196531805573908, "loss": 11.6552, "step": 46750 }, { "epoch": 0.9786276479946412, "grad_norm": 0.3062218427658081, "learning_rate": 0.0001519634447929304, "loss": 11.6615, "step": 46751 }, { "epoch": 0.9786485807586034, "grad_norm": 0.331640362739563, "learning_rate": 0.0001519615715051419, "loss": 11.6724, "step": 46752 }, { "epoch": 0.9786695135225655, "grad_norm": 0.3190276026725769, "learning_rate": 0.0001519596981923744, "loss": 11.6834, "step": 46753 }, { "epoch": 0.9786904462865277, "grad_norm": 0.402850478887558, "learning_rate": 0.00015195782485462886, "loss": 11.6599, "step": 46754 }, { "epoch": 0.9787113790504898, "grad_norm": 0.2925888001918793, "learning_rate": 0.00015195595149190613, "loss": 11.676, "step": 46755 }, { "epoch": 0.978732311814452, "grad_norm": 0.30023837089538574, "learning_rate": 0.00015195407810420712, "loss": 11.6507, "step": 46756 }, { "epoch": 0.9787532445784142, "grad_norm": 0.3402196168899536, "learning_rate": 0.0001519522046915328, "loss": 11.6781, "step": 46757 }, { "epoch": 0.9787741773423763, "grad_norm": 0.332694411277771, "learning_rate": 0.00015195033125388395, "loss": 11.6837, "step": 46758 }, { "epoch": 0.9787951101063385, "grad_norm": 0.24137583374977112, "learning_rate": 0.00015194845779126158, "loss": 11.6685, "step": 46759 }, { "epoch": 0.9788160428703006, "grad_norm": 0.32096922397613525, "learning_rate": 0.0001519465843036665, "loss": 11.6903, "step": 46760 }, { "epoch": 0.9788369756342628, "grad_norm": 0.4016011655330658, "learning_rate": 0.00015194471079109972, "loss": 11.6732, "step": 46761 }, { "epoch": 0.978857908398225, "grad_norm": 0.3091438412666321, "learning_rate": 0.00015194283725356202, "loss": 11.6774, "step": 46762 }, { "epoch": 0.978878841162187, "grad_norm": 0.33471882343292236, "learning_rate": 0.00015194096369105435, "loss": 11.6675, "step": 46763 }, { "epoch": 0.9788997739261492, "grad_norm": 0.38559961318969727, "learning_rate": 0.00015193909010357767, "loss": 11.6591, "step": 46764 }, { "epoch": 0.9789207066901113, "grad_norm": 0.26455825567245483, "learning_rate": 0.0001519372164911328, "loss": 11.6627, "step": 46765 }, { "epoch": 0.9789416394540735, "grad_norm": 0.33444541692733765, "learning_rate": 0.00015193534285372066, "loss": 11.6746, "step": 46766 }, { "epoch": 0.9789625722180356, "grad_norm": 0.29780614376068115, "learning_rate": 0.00015193346919134217, "loss": 11.6716, "step": 46767 }, { "epoch": 0.9789835049819978, "grad_norm": 0.30889779329299927, "learning_rate": 0.00015193159550399825, "loss": 11.6707, "step": 46768 }, { "epoch": 0.97900443774596, "grad_norm": 0.3856408894062042, "learning_rate": 0.0001519297217916897, "loss": 11.6748, "step": 46769 }, { "epoch": 0.9790253705099221, "grad_norm": 0.2813909649848938, "learning_rate": 0.00015192784805441754, "loss": 11.6631, "step": 46770 }, { "epoch": 0.9790463032738843, "grad_norm": 0.2919476628303528, "learning_rate": 0.00015192597429218262, "loss": 11.6628, "step": 46771 }, { "epoch": 0.9790672360378464, "grad_norm": 0.2758536636829376, "learning_rate": 0.00015192410050498585, "loss": 11.6606, "step": 46772 }, { "epoch": 0.9790881688018086, "grad_norm": 0.32621416449546814, "learning_rate": 0.0001519222266928281, "loss": 11.6717, "step": 46773 }, { "epoch": 0.9791091015657707, "grad_norm": 0.29778990149497986, "learning_rate": 0.0001519203528557103, "loss": 11.6815, "step": 46774 }, { "epoch": 0.9791300343297329, "grad_norm": 0.3603340983390808, "learning_rate": 0.00015191847899363338, "loss": 11.672, "step": 46775 }, { "epoch": 0.9791509670936951, "grad_norm": 0.29823747277259827, "learning_rate": 0.0001519166051065982, "loss": 11.6695, "step": 46776 }, { "epoch": 0.9791718998576572, "grad_norm": 0.30942872166633606, "learning_rate": 0.00015191473119460563, "loss": 11.6713, "step": 46777 }, { "epoch": 0.9791928326216194, "grad_norm": 0.2712843716144562, "learning_rate": 0.00015191285725765666, "loss": 11.6748, "step": 46778 }, { "epoch": 0.9792137653855815, "grad_norm": 0.2790629267692566, "learning_rate": 0.0001519109832957521, "loss": 11.6795, "step": 46779 }, { "epoch": 0.9792346981495437, "grad_norm": 0.29920077323913574, "learning_rate": 0.0001519091093088929, "loss": 11.6606, "step": 46780 }, { "epoch": 0.9792556309135059, "grad_norm": 0.34249743819236755, "learning_rate": 0.00015190723529707992, "loss": 11.6647, "step": 46781 }, { "epoch": 0.979276563677468, "grad_norm": 0.32140234112739563, "learning_rate": 0.00015190536126031415, "loss": 11.6521, "step": 46782 }, { "epoch": 0.9792974964414302, "grad_norm": 0.27153024077415466, "learning_rate": 0.0001519034871985964, "loss": 11.6545, "step": 46783 }, { "epoch": 0.9793184292053922, "grad_norm": 0.32225021719932556, "learning_rate": 0.0001519016131119276, "loss": 11.6498, "step": 46784 }, { "epoch": 0.9793393619693544, "grad_norm": 0.33701178431510925, "learning_rate": 0.00015189973900030864, "loss": 11.6668, "step": 46785 }, { "epoch": 0.9793602947333165, "grad_norm": 0.3275355100631714, "learning_rate": 0.00015189786486374048, "loss": 11.6733, "step": 46786 }, { "epoch": 0.9793812274972787, "grad_norm": 0.49540820717811584, "learning_rate": 0.00015189599070222396, "loss": 11.6763, "step": 46787 }, { "epoch": 0.9794021602612409, "grad_norm": 0.3094286322593689, "learning_rate": 0.00015189411651575998, "loss": 11.6908, "step": 46788 }, { "epoch": 0.979423093025203, "grad_norm": 0.44264307618141174, "learning_rate": 0.00015189224230434948, "loss": 11.6577, "step": 46789 }, { "epoch": 0.9794440257891652, "grad_norm": 0.32962194085121155, "learning_rate": 0.00015189036806799333, "loss": 11.6681, "step": 46790 }, { "epoch": 0.9794649585531273, "grad_norm": 0.2663523852825165, "learning_rate": 0.0001518884938066924, "loss": 11.6475, "step": 46791 }, { "epoch": 0.9794858913170895, "grad_norm": 0.3522575795650482, "learning_rate": 0.0001518866195204477, "loss": 11.6725, "step": 46792 }, { "epoch": 0.9795068240810516, "grad_norm": 0.43352335691452026, "learning_rate": 0.00015188474520926002, "loss": 11.6694, "step": 46793 }, { "epoch": 0.9795277568450138, "grad_norm": 0.41937464475631714, "learning_rate": 0.0001518828708731303, "loss": 11.6764, "step": 46794 }, { "epoch": 0.979548689608976, "grad_norm": 0.32234078645706177, "learning_rate": 0.00015188099651205946, "loss": 11.6686, "step": 46795 }, { "epoch": 0.9795696223729381, "grad_norm": 0.2815394103527069, "learning_rate": 0.0001518791221260484, "loss": 11.6621, "step": 46796 }, { "epoch": 0.9795905551369003, "grad_norm": 0.3388986587524414, "learning_rate": 0.000151877247715098, "loss": 11.6728, "step": 46797 }, { "epoch": 0.9796114879008624, "grad_norm": 0.34480372071266174, "learning_rate": 0.00015187537327920913, "loss": 11.6671, "step": 46798 }, { "epoch": 0.9796324206648246, "grad_norm": 0.31342417001724243, "learning_rate": 0.00015187349881838277, "loss": 11.6554, "step": 46799 }, { "epoch": 0.9796533534287868, "grad_norm": 0.2807431221008301, "learning_rate": 0.00015187162433261977, "loss": 11.6567, "step": 46800 }, { "epoch": 0.9796742861927489, "grad_norm": 0.32949209213256836, "learning_rate": 0.00015186974982192103, "loss": 11.6658, "step": 46801 }, { "epoch": 0.9796952189567111, "grad_norm": 0.36716771125793457, "learning_rate": 0.0001518678752862875, "loss": 11.6852, "step": 46802 }, { "epoch": 0.9797161517206732, "grad_norm": 0.29688334465026855, "learning_rate": 0.00015186600072571998, "loss": 11.64, "step": 46803 }, { "epoch": 0.9797370844846354, "grad_norm": 0.2736639976501465, "learning_rate": 0.00015186412614021948, "loss": 11.6669, "step": 46804 }, { "epoch": 0.9797580172485975, "grad_norm": 0.315384179353714, "learning_rate": 0.0001518622515297868, "loss": 11.6635, "step": 46805 }, { "epoch": 0.9797789500125597, "grad_norm": 0.3210912048816681, "learning_rate": 0.00015186037689442295, "loss": 11.6654, "step": 46806 }, { "epoch": 0.9797998827765219, "grad_norm": 0.3848465085029602, "learning_rate": 0.0001518585022341288, "loss": 11.6898, "step": 46807 }, { "epoch": 0.979820815540484, "grad_norm": 0.34916457533836365, "learning_rate": 0.00015185662754890514, "loss": 11.6642, "step": 46808 }, { "epoch": 0.9798417483044461, "grad_norm": 0.3151469826698303, "learning_rate": 0.00015185475283875305, "loss": 11.6648, "step": 46809 }, { "epoch": 0.9798626810684082, "grad_norm": 0.3294038772583008, "learning_rate": 0.0001518528781036733, "loss": 11.6654, "step": 46810 }, { "epoch": 0.9798836138323704, "grad_norm": 0.3151484429836273, "learning_rate": 0.00015185100334366684, "loss": 11.657, "step": 46811 }, { "epoch": 0.9799045465963325, "grad_norm": 0.3591856062412262, "learning_rate": 0.00015184912855873456, "loss": 11.665, "step": 46812 }, { "epoch": 0.9799254793602947, "grad_norm": 0.4704344570636749, "learning_rate": 0.00015184725374887736, "loss": 11.6686, "step": 46813 }, { "epoch": 0.9799464121242569, "grad_norm": 0.3046317994594574, "learning_rate": 0.00015184537891409615, "loss": 11.6891, "step": 46814 }, { "epoch": 0.979967344888219, "grad_norm": 0.2458491176366806, "learning_rate": 0.00015184350405439182, "loss": 11.6477, "step": 46815 }, { "epoch": 0.9799882776521812, "grad_norm": 0.32447102665901184, "learning_rate": 0.0001518416291697653, "loss": 11.669, "step": 46816 }, { "epoch": 0.9800092104161433, "grad_norm": 0.3782075047492981, "learning_rate": 0.00015183975426021746, "loss": 11.6924, "step": 46817 }, { "epoch": 0.9800301431801055, "grad_norm": 0.3139958083629608, "learning_rate": 0.0001518378793257492, "loss": 11.6654, "step": 46818 }, { "epoch": 0.9800510759440677, "grad_norm": 0.42914941906929016, "learning_rate": 0.00015183600436636144, "loss": 11.673, "step": 46819 }, { "epoch": 0.9800720087080298, "grad_norm": 0.3457587957382202, "learning_rate": 0.00015183412938205506, "loss": 11.6566, "step": 46820 }, { "epoch": 0.980092941471992, "grad_norm": 0.33662474155426025, "learning_rate": 0.000151832254372831, "loss": 11.6705, "step": 46821 }, { "epoch": 0.9801138742359541, "grad_norm": 0.3610641360282898, "learning_rate": 0.0001518303793386901, "loss": 11.668, "step": 46822 }, { "epoch": 0.9801348069999163, "grad_norm": 0.3178044557571411, "learning_rate": 0.00015182850427963333, "loss": 11.674, "step": 46823 }, { "epoch": 0.9801557397638784, "grad_norm": 0.29772016406059265, "learning_rate": 0.00015182662919566156, "loss": 11.6725, "step": 46824 }, { "epoch": 0.9801766725278406, "grad_norm": 0.4183252155780792, "learning_rate": 0.0001518247540867757, "loss": 11.6802, "step": 46825 }, { "epoch": 0.9801976052918028, "grad_norm": 0.2705496847629547, "learning_rate": 0.00015182287895297662, "loss": 11.6703, "step": 46826 }, { "epoch": 0.9802185380557649, "grad_norm": 0.269437313079834, "learning_rate": 0.00015182100379426526, "loss": 11.6633, "step": 46827 }, { "epoch": 0.9802394708197271, "grad_norm": 0.3590205907821655, "learning_rate": 0.00015181912861064246, "loss": 11.6919, "step": 46828 }, { "epoch": 0.9802604035836892, "grad_norm": 0.29396963119506836, "learning_rate": 0.00015181725340210922, "loss": 11.6621, "step": 46829 }, { "epoch": 0.9802813363476514, "grad_norm": 0.4116467535495758, "learning_rate": 0.00015181537816866638, "loss": 11.6796, "step": 46830 }, { "epoch": 0.9803022691116134, "grad_norm": 0.26628684997558594, "learning_rate": 0.00015181350291031484, "loss": 11.6606, "step": 46831 }, { "epoch": 0.9803232018755756, "grad_norm": 0.34596142172813416, "learning_rate": 0.00015181162762705548, "loss": 11.6765, "step": 46832 }, { "epoch": 0.9803441346395378, "grad_norm": 0.2393152117729187, "learning_rate": 0.0001518097523188893, "loss": 11.6573, "step": 46833 }, { "epoch": 0.9803650674034999, "grad_norm": 0.3685363233089447, "learning_rate": 0.00015180787698581706, "loss": 11.6773, "step": 46834 }, { "epoch": 0.9803860001674621, "grad_norm": 0.2615686058998108, "learning_rate": 0.00015180600162783978, "loss": 11.675, "step": 46835 }, { "epoch": 0.9804069329314242, "grad_norm": 0.3209059238433838, "learning_rate": 0.00015180412624495832, "loss": 11.6659, "step": 46836 }, { "epoch": 0.9804278656953864, "grad_norm": 0.3432774245738983, "learning_rate": 0.00015180225083717358, "loss": 11.6712, "step": 46837 }, { "epoch": 0.9804487984593486, "grad_norm": 0.31513822078704834, "learning_rate": 0.00015180037540448644, "loss": 11.6708, "step": 46838 }, { "epoch": 0.9804697312233107, "grad_norm": 0.3098773956298828, "learning_rate": 0.00015179849994689782, "loss": 11.6701, "step": 46839 }, { "epoch": 0.9804906639872729, "grad_norm": 0.2961120307445526, "learning_rate": 0.00015179662446440864, "loss": 11.6657, "step": 46840 }, { "epoch": 0.980511596751235, "grad_norm": 0.33441922068595886, "learning_rate": 0.00015179474895701977, "loss": 11.6669, "step": 46841 }, { "epoch": 0.9805325295151972, "grad_norm": 0.2988244593143463, "learning_rate": 0.00015179287342473213, "loss": 11.6778, "step": 46842 }, { "epoch": 0.9805534622791593, "grad_norm": 0.2805400788784027, "learning_rate": 0.0001517909978675466, "loss": 11.6725, "step": 46843 }, { "epoch": 0.9805743950431215, "grad_norm": 0.3009880781173706, "learning_rate": 0.0001517891222854641, "loss": 11.6572, "step": 46844 }, { "epoch": 0.9805953278070837, "grad_norm": 0.358687162399292, "learning_rate": 0.0001517872466784856, "loss": 11.6877, "step": 46845 }, { "epoch": 0.9806162605710458, "grad_norm": 0.35832318663597107, "learning_rate": 0.00015178537104661185, "loss": 11.6685, "step": 46846 }, { "epoch": 0.980637193335008, "grad_norm": 0.3231082856655121, "learning_rate": 0.00015178349538984387, "loss": 11.6723, "step": 46847 }, { "epoch": 0.9806581260989701, "grad_norm": 0.3119544982910156, "learning_rate": 0.00015178161970818253, "loss": 11.6552, "step": 46848 }, { "epoch": 0.9806790588629323, "grad_norm": 0.6490662097930908, "learning_rate": 0.00015177974400162872, "loss": 11.6855, "step": 46849 }, { "epoch": 0.9806999916268944, "grad_norm": 0.3827390670776367, "learning_rate": 0.0001517778682701833, "loss": 11.6698, "step": 46850 }, { "epoch": 0.9807209243908566, "grad_norm": 0.2851056754589081, "learning_rate": 0.00015177599251384733, "loss": 11.6583, "step": 46851 }, { "epoch": 0.9807418571548188, "grad_norm": 0.38089868426322937, "learning_rate": 0.0001517741167326215, "loss": 11.6865, "step": 46852 }, { "epoch": 0.9807627899187809, "grad_norm": 0.3240712881088257, "learning_rate": 0.0001517722409265069, "loss": 11.6775, "step": 46853 }, { "epoch": 0.980783722682743, "grad_norm": 0.35931628942489624, "learning_rate": 0.00015177036509550428, "loss": 11.6744, "step": 46854 }, { "epoch": 0.9808046554467051, "grad_norm": 0.27861371636390686, "learning_rate": 0.00015176848923961463, "loss": 11.6696, "step": 46855 }, { "epoch": 0.9808255882106673, "grad_norm": 0.31121134757995605, "learning_rate": 0.00015176661335883885, "loss": 11.6836, "step": 46856 }, { "epoch": 0.9808465209746295, "grad_norm": 0.3378687798976898, "learning_rate": 0.0001517647374531778, "loss": 11.6506, "step": 46857 }, { "epoch": 0.9808674537385916, "grad_norm": 0.2949989140033722, "learning_rate": 0.0001517628615226324, "loss": 11.6548, "step": 46858 }, { "epoch": 0.9808883865025538, "grad_norm": 0.40271177887916565, "learning_rate": 0.00015176098556720358, "loss": 11.6832, "step": 46859 }, { "epoch": 0.9809093192665159, "grad_norm": 0.3945113718509674, "learning_rate": 0.0001517591095868922, "loss": 11.6541, "step": 46860 }, { "epoch": 0.9809302520304781, "grad_norm": 0.3775474727153778, "learning_rate": 0.00015175723358169915, "loss": 11.6643, "step": 46861 }, { "epoch": 0.9809511847944402, "grad_norm": 0.3142052888870239, "learning_rate": 0.00015175535755162538, "loss": 11.6747, "step": 46862 }, { "epoch": 0.9809721175584024, "grad_norm": 0.3353913724422455, "learning_rate": 0.00015175348149667182, "loss": 11.6689, "step": 46863 }, { "epoch": 0.9809930503223646, "grad_norm": 0.3010236620903015, "learning_rate": 0.00015175160541683926, "loss": 11.67, "step": 46864 }, { "epoch": 0.9810139830863267, "grad_norm": 0.4284607768058777, "learning_rate": 0.0001517497293121287, "loss": 11.6654, "step": 46865 }, { "epoch": 0.9810349158502889, "grad_norm": 0.2770425081253052, "learning_rate": 0.00015174785318254098, "loss": 11.6654, "step": 46866 }, { "epoch": 0.981055848614251, "grad_norm": 0.27817362546920776, "learning_rate": 0.00015174597702807705, "loss": 11.6667, "step": 46867 }, { "epoch": 0.9810767813782132, "grad_norm": 0.343214213848114, "learning_rate": 0.0001517441008487378, "loss": 11.6555, "step": 46868 }, { "epoch": 0.9810977141421753, "grad_norm": 0.3010293245315552, "learning_rate": 0.0001517422246445241, "loss": 11.6727, "step": 46869 }, { "epoch": 0.9811186469061375, "grad_norm": 0.3550082743167877, "learning_rate": 0.00015174034841543692, "loss": 11.652, "step": 46870 }, { "epoch": 0.9811395796700997, "grad_norm": 0.25670287013053894, "learning_rate": 0.00015173847216147707, "loss": 11.6755, "step": 46871 }, { "epoch": 0.9811605124340618, "grad_norm": 0.46299469470977783, "learning_rate": 0.0001517365958826455, "loss": 11.6794, "step": 46872 }, { "epoch": 0.981181445198024, "grad_norm": 0.3227660357952118, "learning_rate": 0.00015173471957894314, "loss": 11.6568, "step": 46873 }, { "epoch": 0.9812023779619861, "grad_norm": 0.3027424216270447, "learning_rate": 0.00015173284325037086, "loss": 11.6563, "step": 46874 }, { "epoch": 0.9812233107259483, "grad_norm": 0.3476620018482208, "learning_rate": 0.00015173096689692956, "loss": 11.6546, "step": 46875 }, { "epoch": 0.9812442434899105, "grad_norm": 0.369190514087677, "learning_rate": 0.00015172909051862013, "loss": 11.6862, "step": 46876 }, { "epoch": 0.9812651762538726, "grad_norm": 0.3331117331981659, "learning_rate": 0.00015172721411544352, "loss": 11.6662, "step": 46877 }, { "epoch": 0.9812861090178348, "grad_norm": 0.3301675617694855, "learning_rate": 0.00015172533768740057, "loss": 11.6578, "step": 46878 }, { "epoch": 0.9813070417817968, "grad_norm": 0.2359602004289627, "learning_rate": 0.00015172346123449223, "loss": 11.6642, "step": 46879 }, { "epoch": 0.981327974545759, "grad_norm": 0.35084375739097595, "learning_rate": 0.0001517215847567194, "loss": 11.6901, "step": 46880 }, { "epoch": 0.9813489073097211, "grad_norm": 0.38377484679222107, "learning_rate": 0.00015171970825408296, "loss": 11.6818, "step": 46881 }, { "epoch": 0.9813698400736833, "grad_norm": 0.47121354937553406, "learning_rate": 0.00015171783172658382, "loss": 11.6604, "step": 46882 }, { "epoch": 0.9813907728376455, "grad_norm": 0.3758198320865631, "learning_rate": 0.00015171595517422286, "loss": 11.6608, "step": 46883 }, { "epoch": 0.9814117056016076, "grad_norm": 0.26172447204589844, "learning_rate": 0.000151714078597001, "loss": 11.6617, "step": 46884 }, { "epoch": 0.9814326383655698, "grad_norm": 0.3704293966293335, "learning_rate": 0.0001517122019949192, "loss": 11.6691, "step": 46885 }, { "epoch": 0.9814535711295319, "grad_norm": 0.2656725347042084, "learning_rate": 0.00015171032536797827, "loss": 11.6669, "step": 46886 }, { "epoch": 0.9814745038934941, "grad_norm": 0.4949021637439728, "learning_rate": 0.00015170844871617914, "loss": 11.701, "step": 46887 }, { "epoch": 0.9814954366574562, "grad_norm": 0.33076348900794983, "learning_rate": 0.00015170657203952273, "loss": 11.6678, "step": 46888 }, { "epoch": 0.9815163694214184, "grad_norm": 0.281296968460083, "learning_rate": 0.00015170469533800995, "loss": 11.6681, "step": 46889 }, { "epoch": 0.9815373021853806, "grad_norm": 0.2984246015548706, "learning_rate": 0.0001517028186116417, "loss": 11.6565, "step": 46890 }, { "epoch": 0.9815582349493427, "grad_norm": 0.2883564233779907, "learning_rate": 0.00015170094186041881, "loss": 11.6859, "step": 46891 }, { "epoch": 0.9815791677133049, "grad_norm": 0.3451899290084839, "learning_rate": 0.0001516990650843423, "loss": 11.6736, "step": 46892 }, { "epoch": 0.981600100477267, "grad_norm": 0.35327595472335815, "learning_rate": 0.00015169718828341298, "loss": 11.6543, "step": 46893 }, { "epoch": 0.9816210332412292, "grad_norm": 0.2811730206012726, "learning_rate": 0.00015169531145763177, "loss": 11.6713, "step": 46894 }, { "epoch": 0.9816419660051914, "grad_norm": 0.2453717142343521, "learning_rate": 0.00015169343460699964, "loss": 11.6727, "step": 46895 }, { "epoch": 0.9816628987691535, "grad_norm": 0.3195898234844208, "learning_rate": 0.0001516915577315174, "loss": 11.6952, "step": 46896 }, { "epoch": 0.9816838315331157, "grad_norm": 0.2939510643482208, "learning_rate": 0.00015168968083118602, "loss": 11.662, "step": 46897 }, { "epoch": 0.9817047642970778, "grad_norm": 0.3136444687843323, "learning_rate": 0.00015168780390600634, "loss": 11.7044, "step": 46898 }, { "epoch": 0.98172569706104, "grad_norm": 0.27810680866241455, "learning_rate": 0.00015168592695597935, "loss": 11.6827, "step": 46899 }, { "epoch": 0.981746629825002, "grad_norm": 0.25713950395584106, "learning_rate": 0.00015168404998110586, "loss": 11.669, "step": 46900 }, { "epoch": 0.9817675625889642, "grad_norm": 0.3221057057380676, "learning_rate": 0.0001516821729813868, "loss": 11.6726, "step": 46901 }, { "epoch": 0.9817884953529264, "grad_norm": 0.29083943367004395, "learning_rate": 0.0001516802959568231, "loss": 11.6566, "step": 46902 }, { "epoch": 0.9818094281168885, "grad_norm": 0.43932560086250305, "learning_rate": 0.00015167841890741568, "loss": 11.67, "step": 46903 }, { "epoch": 0.9818303608808507, "grad_norm": 0.26800692081451416, "learning_rate": 0.0001516765418331654, "loss": 11.6666, "step": 46904 }, { "epoch": 0.9818512936448128, "grad_norm": 0.3525555431842804, "learning_rate": 0.00015167466473407312, "loss": 11.6695, "step": 46905 }, { "epoch": 0.981872226408775, "grad_norm": 0.36267730593681335, "learning_rate": 0.0001516727876101398, "loss": 11.6738, "step": 46906 }, { "epoch": 0.9818931591727371, "grad_norm": 0.29440921545028687, "learning_rate": 0.00015167091046136636, "loss": 11.6571, "step": 46907 }, { "epoch": 0.9819140919366993, "grad_norm": 0.3472066819667816, "learning_rate": 0.00015166903328775369, "loss": 11.6578, "step": 46908 }, { "epoch": 0.9819350247006615, "grad_norm": 0.30258703231811523, "learning_rate": 0.00015166715608930264, "loss": 11.6691, "step": 46909 }, { "epoch": 0.9819559574646236, "grad_norm": 0.35478538274765015, "learning_rate": 0.00015166527886601418, "loss": 11.6777, "step": 46910 }, { "epoch": 0.9819768902285858, "grad_norm": 0.36037251353263855, "learning_rate": 0.0001516634016178892, "loss": 11.6563, "step": 46911 }, { "epoch": 0.9819978229925479, "grad_norm": 0.30434828996658325, "learning_rate": 0.00015166152434492854, "loss": 11.6669, "step": 46912 }, { "epoch": 0.9820187557565101, "grad_norm": 0.2809794545173645, "learning_rate": 0.0001516596470471332, "loss": 11.6653, "step": 46913 }, { "epoch": 0.9820396885204723, "grad_norm": 0.38678503036499023, "learning_rate": 0.00015165776972450403, "loss": 11.6674, "step": 46914 }, { "epoch": 0.9820606212844344, "grad_norm": 0.33283868432044983, "learning_rate": 0.0001516558923770419, "loss": 11.6673, "step": 46915 }, { "epoch": 0.9820815540483966, "grad_norm": 0.3359665274620056, "learning_rate": 0.00015165401500474775, "loss": 11.6612, "step": 46916 }, { "epoch": 0.9821024868123587, "grad_norm": 0.3349323570728302, "learning_rate": 0.00015165213760762249, "loss": 11.6704, "step": 46917 }, { "epoch": 0.9821234195763209, "grad_norm": 0.27408406138420105, "learning_rate": 0.000151650260185667, "loss": 11.6807, "step": 46918 }, { "epoch": 0.982144352340283, "grad_norm": 0.28918978571891785, "learning_rate": 0.00015164838273888225, "loss": 11.6642, "step": 46919 }, { "epoch": 0.9821652851042452, "grad_norm": 0.2869303524494171, "learning_rate": 0.00015164650526726904, "loss": 11.669, "step": 46920 }, { "epoch": 0.9821862178682074, "grad_norm": 0.30765992403030396, "learning_rate": 0.00015164462777082834, "loss": 11.6598, "step": 46921 }, { "epoch": 0.9822071506321695, "grad_norm": 0.28831008076667786, "learning_rate": 0.000151642750249561, "loss": 11.6593, "step": 46922 }, { "epoch": 0.9822280833961317, "grad_norm": 0.35900482535362244, "learning_rate": 0.00015164087270346798, "loss": 11.6641, "step": 46923 }, { "epoch": 0.9822490161600937, "grad_norm": 0.3412421941757202, "learning_rate": 0.00015163899513255015, "loss": 11.6634, "step": 46924 }, { "epoch": 0.982269948924056, "grad_norm": 0.363994300365448, "learning_rate": 0.0001516371175368084, "loss": 11.678, "step": 46925 }, { "epoch": 0.982290881688018, "grad_norm": 0.278474360704422, "learning_rate": 0.0001516352399162437, "loss": 11.668, "step": 46926 }, { "epoch": 0.9823118144519802, "grad_norm": 0.30415409803390503, "learning_rate": 0.00015163336227085687, "loss": 11.6701, "step": 46927 }, { "epoch": 0.9823327472159424, "grad_norm": 0.47698983550071716, "learning_rate": 0.00015163148460064886, "loss": 11.6844, "step": 46928 }, { "epoch": 0.9823536799799045, "grad_norm": 0.31015363335609436, "learning_rate": 0.0001516296069056206, "loss": 11.6451, "step": 46929 }, { "epoch": 0.9823746127438667, "grad_norm": 0.2958810329437256, "learning_rate": 0.00015162772918577287, "loss": 11.6825, "step": 46930 }, { "epoch": 0.9823955455078288, "grad_norm": 0.4141170382499695, "learning_rate": 0.00015162585144110673, "loss": 11.6722, "step": 46931 }, { "epoch": 0.982416478271791, "grad_norm": 0.38925912976264954, "learning_rate": 0.00015162397367162296, "loss": 11.6733, "step": 46932 }, { "epoch": 0.9824374110357531, "grad_norm": 0.33778896927833557, "learning_rate": 0.0001516220958773225, "loss": 11.6758, "step": 46933 }, { "epoch": 0.9824583437997153, "grad_norm": 0.4150056838989258, "learning_rate": 0.00015162021805820633, "loss": 11.6775, "step": 46934 }, { "epoch": 0.9824792765636775, "grad_norm": 0.3342955708503723, "learning_rate": 0.00015161834021427525, "loss": 11.6525, "step": 46935 }, { "epoch": 0.9825002093276396, "grad_norm": 0.3102753162384033, "learning_rate": 0.0001516164623455302, "loss": 11.6534, "step": 46936 }, { "epoch": 0.9825211420916018, "grad_norm": 0.32836103439331055, "learning_rate": 0.00015161458445197206, "loss": 11.6718, "step": 46937 }, { "epoch": 0.9825420748555639, "grad_norm": 0.32254791259765625, "learning_rate": 0.00015161270653360178, "loss": 11.6538, "step": 46938 }, { "epoch": 0.9825630076195261, "grad_norm": 0.4323679208755493, "learning_rate": 0.00015161082859042023, "loss": 11.6801, "step": 46939 }, { "epoch": 0.9825839403834883, "grad_norm": 0.2535342574119568, "learning_rate": 0.0001516089506224283, "loss": 11.665, "step": 46940 }, { "epoch": 0.9826048731474504, "grad_norm": 0.4090619385242462, "learning_rate": 0.00015160707262962698, "loss": 11.6629, "step": 46941 }, { "epoch": 0.9826258059114126, "grad_norm": 0.2945719063282013, "learning_rate": 0.00015160519461201706, "loss": 11.671, "step": 46942 }, { "epoch": 0.9826467386753747, "grad_norm": 0.3450552225112915, "learning_rate": 0.00015160331656959946, "loss": 11.6759, "step": 46943 }, { "epoch": 0.9826676714393369, "grad_norm": 0.3325828015804291, "learning_rate": 0.00015160143850237519, "loss": 11.6582, "step": 46944 }, { "epoch": 0.982688604203299, "grad_norm": 0.3861605226993561, "learning_rate": 0.000151599560410345, "loss": 11.6722, "step": 46945 }, { "epoch": 0.9827095369672612, "grad_norm": 0.2709171175956726, "learning_rate": 0.0001515976822935099, "loss": 11.6547, "step": 46946 }, { "epoch": 0.9827304697312234, "grad_norm": 0.2656019628047943, "learning_rate": 0.00015159580415187074, "loss": 11.6606, "step": 46947 }, { "epoch": 0.9827514024951854, "grad_norm": 0.3679412007331848, "learning_rate": 0.00015159392598542847, "loss": 11.673, "step": 46948 }, { "epoch": 0.9827723352591476, "grad_norm": 0.35622331500053406, "learning_rate": 0.00015159204779418396, "loss": 11.6524, "step": 46949 }, { "epoch": 0.9827932680231097, "grad_norm": 0.3679611086845398, "learning_rate": 0.0001515901695781381, "loss": 11.6752, "step": 46950 }, { "epoch": 0.9828142007870719, "grad_norm": 0.301887571811676, "learning_rate": 0.00015158829133729186, "loss": 11.6596, "step": 46951 }, { "epoch": 0.982835133551034, "grad_norm": 0.2744687497615814, "learning_rate": 0.00015158641307164606, "loss": 11.6633, "step": 46952 }, { "epoch": 0.9828560663149962, "grad_norm": 0.32005977630615234, "learning_rate": 0.00015158453478120163, "loss": 11.6854, "step": 46953 }, { "epoch": 0.9828769990789584, "grad_norm": 0.3209759294986725, "learning_rate": 0.00015158265646595949, "loss": 11.6654, "step": 46954 }, { "epoch": 0.9828979318429205, "grad_norm": 0.27183476090431213, "learning_rate": 0.0001515807781259205, "loss": 11.6602, "step": 46955 }, { "epoch": 0.9829188646068827, "grad_norm": 0.28910398483276367, "learning_rate": 0.00015157889976108563, "loss": 11.6655, "step": 46956 }, { "epoch": 0.9829397973708448, "grad_norm": 0.3287149965763092, "learning_rate": 0.00015157702137145575, "loss": 11.6564, "step": 46957 }, { "epoch": 0.982960730134807, "grad_norm": 0.28695327043533325, "learning_rate": 0.00015157514295703178, "loss": 11.6724, "step": 46958 }, { "epoch": 0.9829816628987692, "grad_norm": 0.42933353781700134, "learning_rate": 0.00015157326451781456, "loss": 11.698, "step": 46959 }, { "epoch": 0.9830025956627313, "grad_norm": 0.31445616483688354, "learning_rate": 0.00015157138605380506, "loss": 11.6728, "step": 46960 }, { "epoch": 0.9830235284266935, "grad_norm": 0.3195720314979553, "learning_rate": 0.00015156950756500416, "loss": 11.6747, "step": 46961 }, { "epoch": 0.9830444611906556, "grad_norm": 0.3372057378292084, "learning_rate": 0.0001515676290514128, "loss": 11.6635, "step": 46962 }, { "epoch": 0.9830653939546178, "grad_norm": 0.32810160517692566, "learning_rate": 0.00015156575051303178, "loss": 11.6532, "step": 46963 }, { "epoch": 0.9830863267185799, "grad_norm": 0.3412047326564789, "learning_rate": 0.00015156387194986213, "loss": 11.6657, "step": 46964 }, { "epoch": 0.9831072594825421, "grad_norm": 0.2868165075778961, "learning_rate": 0.00015156199336190468, "loss": 11.6633, "step": 46965 }, { "epoch": 0.9831281922465043, "grad_norm": 0.2794405519962311, "learning_rate": 0.0001515601147491603, "loss": 11.6763, "step": 46966 }, { "epoch": 0.9831491250104664, "grad_norm": 0.35685980319976807, "learning_rate": 0.00015155823611163, "loss": 11.6606, "step": 46967 }, { "epoch": 0.9831700577744286, "grad_norm": 0.26606670022010803, "learning_rate": 0.00015155635744931457, "loss": 11.6601, "step": 46968 }, { "epoch": 0.9831909905383907, "grad_norm": 0.29618123173713684, "learning_rate": 0.000151554478762215, "loss": 11.662, "step": 46969 }, { "epoch": 0.9832119233023529, "grad_norm": 0.3288577198982239, "learning_rate": 0.00015155260005033215, "loss": 11.6653, "step": 46970 }, { "epoch": 0.9832328560663149, "grad_norm": 0.34873148798942566, "learning_rate": 0.00015155072131366692, "loss": 11.6682, "step": 46971 }, { "epoch": 0.9832537888302771, "grad_norm": 0.4014497995376587, "learning_rate": 0.00015154884255222026, "loss": 11.6671, "step": 46972 }, { "epoch": 0.9832747215942393, "grad_norm": 0.3434452414512634, "learning_rate": 0.00015154696376599303, "loss": 11.6756, "step": 46973 }, { "epoch": 0.9832956543582014, "grad_norm": 0.3409540057182312, "learning_rate": 0.0001515450849549861, "loss": 11.6697, "step": 46974 }, { "epoch": 0.9833165871221636, "grad_norm": 0.2589658200740814, "learning_rate": 0.00015154320611920045, "loss": 11.6856, "step": 46975 }, { "epoch": 0.9833375198861257, "grad_norm": 0.2532688081264496, "learning_rate": 0.00015154132725863693, "loss": 11.6731, "step": 46976 }, { "epoch": 0.9833584526500879, "grad_norm": 0.2876475751399994, "learning_rate": 0.0001515394483732965, "loss": 11.668, "step": 46977 }, { "epoch": 0.9833793854140501, "grad_norm": 0.2786288857460022, "learning_rate": 0.00015153756946318, "loss": 11.66, "step": 46978 }, { "epoch": 0.9834003181780122, "grad_norm": 0.28049829602241516, "learning_rate": 0.00015153569052828836, "loss": 11.6683, "step": 46979 }, { "epoch": 0.9834212509419744, "grad_norm": 0.3728516697883606, "learning_rate": 0.00015153381156862245, "loss": 11.6694, "step": 46980 }, { "epoch": 0.9834421837059365, "grad_norm": 0.2990334630012512, "learning_rate": 0.00015153193258418321, "loss": 11.6369, "step": 46981 }, { "epoch": 0.9834631164698987, "grad_norm": 0.3297295570373535, "learning_rate": 0.00015153005357497156, "loss": 11.676, "step": 46982 }, { "epoch": 0.9834840492338608, "grad_norm": 0.2928924560546875, "learning_rate": 0.0001515281745409884, "loss": 11.6674, "step": 46983 }, { "epoch": 0.983504981997823, "grad_norm": 0.3022795021533966, "learning_rate": 0.00015152629548223457, "loss": 11.6559, "step": 46984 }, { "epoch": 0.9835259147617852, "grad_norm": 0.31502997875213623, "learning_rate": 0.00015152441639871105, "loss": 11.6637, "step": 46985 }, { "epoch": 0.9835468475257473, "grad_norm": 0.32576438784599304, "learning_rate": 0.00015152253729041872, "loss": 11.6647, "step": 46986 }, { "epoch": 0.9835677802897095, "grad_norm": 0.2946960926055908, "learning_rate": 0.00015152065815735843, "loss": 11.6778, "step": 46987 }, { "epoch": 0.9835887130536716, "grad_norm": 0.3385241627693176, "learning_rate": 0.00015151877899953115, "loss": 11.6587, "step": 46988 }, { "epoch": 0.9836096458176338, "grad_norm": 0.2738697826862335, "learning_rate": 0.00015151689981693776, "loss": 11.6481, "step": 46989 }, { "epoch": 0.9836305785815959, "grad_norm": 0.28717976808547974, "learning_rate": 0.00015151502060957914, "loss": 11.6833, "step": 46990 }, { "epoch": 0.9836515113455581, "grad_norm": 0.3596303164958954, "learning_rate": 0.00015151314137745628, "loss": 11.6675, "step": 46991 }, { "epoch": 0.9836724441095203, "grad_norm": 0.30917444825172424, "learning_rate": 0.00015151126212056997, "loss": 11.6757, "step": 46992 }, { "epoch": 0.9836933768734824, "grad_norm": 0.3309011161327362, "learning_rate": 0.00015150938283892118, "loss": 11.6722, "step": 46993 }, { "epoch": 0.9837143096374446, "grad_norm": 0.2783033549785614, "learning_rate": 0.00015150750353251077, "loss": 11.6712, "step": 46994 }, { "epoch": 0.9837352424014066, "grad_norm": 0.31261134147644043, "learning_rate": 0.0001515056242013397, "loss": 11.6536, "step": 46995 }, { "epoch": 0.9837561751653688, "grad_norm": 0.2763948440551758, "learning_rate": 0.00015150374484540886, "loss": 11.6631, "step": 46996 }, { "epoch": 0.983777107929331, "grad_norm": 0.31116849184036255, "learning_rate": 0.00015150186546471908, "loss": 11.6696, "step": 46997 }, { "epoch": 0.9837980406932931, "grad_norm": 0.35056206583976746, "learning_rate": 0.00015149998605927138, "loss": 11.672, "step": 46998 }, { "epoch": 0.9838189734572553, "grad_norm": 0.39212051033973694, "learning_rate": 0.00015149810662906657, "loss": 11.686, "step": 46999 }, { "epoch": 0.9838399062212174, "grad_norm": 0.36067965626716614, "learning_rate": 0.0001514962271741056, "loss": 11.6604, "step": 47000 }, { "epoch": 0.9838399062212174, "eval_loss": 11.668822288513184, "eval_runtime": 34.2954, "eval_samples_per_second": 28.021, "eval_steps_per_second": 7.027, "step": 47000 }, { "epoch": 0.9838608389851796, "grad_norm": 0.4532982110977173, "learning_rate": 0.00015149434769438935, "loss": 11.6715, "step": 47001 }, { "epoch": 0.9838817717491417, "grad_norm": 0.3010355830192566, "learning_rate": 0.00015149246818991875, "loss": 11.6629, "step": 47002 }, { "epoch": 0.9839027045131039, "grad_norm": 0.34729838371276855, "learning_rate": 0.0001514905886606947, "loss": 11.6761, "step": 47003 }, { "epoch": 0.9839236372770661, "grad_norm": 0.2858382761478424, "learning_rate": 0.00015148870910671806, "loss": 11.6724, "step": 47004 }, { "epoch": 0.9839445700410282, "grad_norm": 0.36873647570610046, "learning_rate": 0.0001514868295279898, "loss": 11.6766, "step": 47005 }, { "epoch": 0.9839655028049904, "grad_norm": 0.29125797748565674, "learning_rate": 0.00015148494992451076, "loss": 11.6603, "step": 47006 }, { "epoch": 0.9839864355689525, "grad_norm": 0.3019621670246124, "learning_rate": 0.0001514830702962819, "loss": 11.6528, "step": 47007 }, { "epoch": 0.9840073683329147, "grad_norm": 0.2759266197681427, "learning_rate": 0.00015148119064330406, "loss": 11.6638, "step": 47008 }, { "epoch": 0.9840283010968768, "grad_norm": 0.2585017681121826, "learning_rate": 0.0001514793109655782, "loss": 11.6607, "step": 47009 }, { "epoch": 0.984049233860839, "grad_norm": 0.43069756031036377, "learning_rate": 0.00015147743126310524, "loss": 11.6748, "step": 47010 }, { "epoch": 0.9840701666248012, "grad_norm": 0.38842132687568665, "learning_rate": 0.00015147555153588598, "loss": 11.6912, "step": 47011 }, { "epoch": 0.9840910993887633, "grad_norm": 0.33085909485816956, "learning_rate": 0.00015147367178392144, "loss": 11.6468, "step": 47012 }, { "epoch": 0.9841120321527255, "grad_norm": 0.3309749960899353, "learning_rate": 0.00015147179200721245, "loss": 11.6705, "step": 47013 }, { "epoch": 0.9841329649166876, "grad_norm": 0.3774895966053009, "learning_rate": 0.00015146991220575995, "loss": 11.6538, "step": 47014 }, { "epoch": 0.9841538976806498, "grad_norm": 0.39457371830940247, "learning_rate": 0.00015146803237956483, "loss": 11.6625, "step": 47015 }, { "epoch": 0.984174830444612, "grad_norm": 0.30916187167167664, "learning_rate": 0.00015146615252862797, "loss": 11.6689, "step": 47016 }, { "epoch": 0.984195763208574, "grad_norm": 0.35104066133499146, "learning_rate": 0.00015146427265295035, "loss": 11.671, "step": 47017 }, { "epoch": 0.9842166959725362, "grad_norm": 0.3091711103916168, "learning_rate": 0.00015146239275253275, "loss": 11.6815, "step": 47018 }, { "epoch": 0.9842376287364983, "grad_norm": 0.24939647316932678, "learning_rate": 0.00015146051282737623, "loss": 11.6879, "step": 47019 }, { "epoch": 0.9842585615004605, "grad_norm": 0.310440331697464, "learning_rate": 0.00015145863287748155, "loss": 11.6727, "step": 47020 }, { "epoch": 0.9842794942644226, "grad_norm": 0.28046712279319763, "learning_rate": 0.0001514567529028497, "loss": 11.6479, "step": 47021 }, { "epoch": 0.9843004270283848, "grad_norm": 0.4079723656177521, "learning_rate": 0.00015145487290348154, "loss": 11.6709, "step": 47022 }, { "epoch": 0.984321359792347, "grad_norm": 0.3114965856075287, "learning_rate": 0.00015145299287937802, "loss": 11.6636, "step": 47023 }, { "epoch": 0.9843422925563091, "grad_norm": 0.3490067720413208, "learning_rate": 0.00015145111283053998, "loss": 11.6683, "step": 47024 }, { "epoch": 0.9843632253202713, "grad_norm": 0.31643784046173096, "learning_rate": 0.00015144923275696838, "loss": 11.6589, "step": 47025 }, { "epoch": 0.9843841580842334, "grad_norm": 0.27260491251945496, "learning_rate": 0.0001514473526586641, "loss": 11.6739, "step": 47026 }, { "epoch": 0.9844050908481956, "grad_norm": 0.39830702543258667, "learning_rate": 0.00015144547253562806, "loss": 11.6541, "step": 47027 }, { "epoch": 0.9844260236121577, "grad_norm": 0.3174303472042084, "learning_rate": 0.00015144359238786112, "loss": 11.6637, "step": 47028 }, { "epoch": 0.9844469563761199, "grad_norm": 0.3342718780040741, "learning_rate": 0.00015144171221536426, "loss": 11.6865, "step": 47029 }, { "epoch": 0.9844678891400821, "grad_norm": 0.2981208264827728, "learning_rate": 0.0001514398320181383, "loss": 11.6727, "step": 47030 }, { "epoch": 0.9844888219040442, "grad_norm": 0.2918553948402405, "learning_rate": 0.0001514379517961842, "loss": 11.6683, "step": 47031 }, { "epoch": 0.9845097546680064, "grad_norm": 0.2750079333782196, "learning_rate": 0.0001514360715495028, "loss": 11.6621, "step": 47032 }, { "epoch": 0.9845306874319685, "grad_norm": 0.3106633722782135, "learning_rate": 0.0001514341912780951, "loss": 11.6728, "step": 47033 }, { "epoch": 0.9845516201959307, "grad_norm": 0.24251767992973328, "learning_rate": 0.00015143231098196195, "loss": 11.6672, "step": 47034 }, { "epoch": 0.9845725529598929, "grad_norm": 0.317732572555542, "learning_rate": 0.00015143043066110425, "loss": 11.6641, "step": 47035 }, { "epoch": 0.984593485723855, "grad_norm": 0.3996475338935852, "learning_rate": 0.00015142855031552292, "loss": 11.6722, "step": 47036 }, { "epoch": 0.9846144184878172, "grad_norm": 0.25966086983680725, "learning_rate": 0.00015142666994521884, "loss": 11.6645, "step": 47037 }, { "epoch": 0.9846353512517793, "grad_norm": 0.2901648283004761, "learning_rate": 0.00015142478955019293, "loss": 11.6633, "step": 47038 }, { "epoch": 0.9846562840157415, "grad_norm": 0.3658188283443451, "learning_rate": 0.00015142290913044607, "loss": 11.682, "step": 47039 }, { "epoch": 0.9846772167797035, "grad_norm": 0.42990973591804504, "learning_rate": 0.00015142102868597924, "loss": 11.6811, "step": 47040 }, { "epoch": 0.9846981495436657, "grad_norm": 0.30675461888313293, "learning_rate": 0.00015141914821679327, "loss": 11.6723, "step": 47041 }, { "epoch": 0.984719082307628, "grad_norm": 0.3196881413459778, "learning_rate": 0.00015141726772288907, "loss": 11.6796, "step": 47042 }, { "epoch": 0.98474001507159, "grad_norm": 0.32909294962882996, "learning_rate": 0.00015141538720426755, "loss": 11.682, "step": 47043 }, { "epoch": 0.9847609478355522, "grad_norm": 0.28253844380378723, "learning_rate": 0.00015141350666092966, "loss": 11.6667, "step": 47044 }, { "epoch": 0.9847818805995143, "grad_norm": 0.289592444896698, "learning_rate": 0.00015141162609287622, "loss": 11.6639, "step": 47045 }, { "epoch": 0.9848028133634765, "grad_norm": 0.3379133939743042, "learning_rate": 0.00015140974550010823, "loss": 11.6627, "step": 47046 }, { "epoch": 0.9848237461274386, "grad_norm": 0.3064607083797455, "learning_rate": 0.00015140786488262653, "loss": 11.6697, "step": 47047 }, { "epoch": 0.9848446788914008, "grad_norm": 0.3290863037109375, "learning_rate": 0.000151405984240432, "loss": 11.6838, "step": 47048 }, { "epoch": 0.984865611655363, "grad_norm": 0.3365607261657715, "learning_rate": 0.00015140410357352563, "loss": 11.6737, "step": 47049 }, { "epoch": 0.9848865444193251, "grad_norm": 0.3112790584564209, "learning_rate": 0.00015140222288190827, "loss": 11.6699, "step": 47050 }, { "epoch": 0.9849074771832873, "grad_norm": 0.30154329538345337, "learning_rate": 0.00015140034216558083, "loss": 11.6891, "step": 47051 }, { "epoch": 0.9849284099472494, "grad_norm": 0.32429859042167664, "learning_rate": 0.0001513984614245442, "loss": 11.6593, "step": 47052 }, { "epoch": 0.9849493427112116, "grad_norm": 0.2627260982990265, "learning_rate": 0.00015139658065879932, "loss": 11.6644, "step": 47053 }, { "epoch": 0.9849702754751738, "grad_norm": 0.41611453890800476, "learning_rate": 0.00015139469986834707, "loss": 11.6845, "step": 47054 }, { "epoch": 0.9849912082391359, "grad_norm": 0.34677213430404663, "learning_rate": 0.00015139281905318834, "loss": 11.6636, "step": 47055 }, { "epoch": 0.9850121410030981, "grad_norm": 0.3594723343849182, "learning_rate": 0.00015139093821332406, "loss": 11.6665, "step": 47056 }, { "epoch": 0.9850330737670602, "grad_norm": 0.32173794507980347, "learning_rate": 0.00015138905734875512, "loss": 11.658, "step": 47057 }, { "epoch": 0.9850540065310224, "grad_norm": 0.33677220344543457, "learning_rate": 0.00015138717645948248, "loss": 11.6712, "step": 47058 }, { "epoch": 0.9850749392949845, "grad_norm": 0.4062868356704712, "learning_rate": 0.00015138529554550693, "loss": 11.6738, "step": 47059 }, { "epoch": 0.9850958720589467, "grad_norm": 0.2933361530303955, "learning_rate": 0.00015138341460682944, "loss": 11.6597, "step": 47060 }, { "epoch": 0.9851168048229089, "grad_norm": 0.27666816115379333, "learning_rate": 0.00015138153364345098, "loss": 11.6631, "step": 47061 }, { "epoch": 0.985137737586871, "grad_norm": 0.2771880030632019, "learning_rate": 0.00015137965265537233, "loss": 11.6811, "step": 47062 }, { "epoch": 0.9851586703508332, "grad_norm": 0.3617985248565674, "learning_rate": 0.00015137777164259445, "loss": 11.6724, "step": 47063 }, { "epoch": 0.9851796031147952, "grad_norm": 0.37521180510520935, "learning_rate": 0.00015137589060511828, "loss": 11.6676, "step": 47064 }, { "epoch": 0.9852005358787574, "grad_norm": 0.35966670513153076, "learning_rate": 0.00015137400954294466, "loss": 11.6614, "step": 47065 }, { "epoch": 0.9852214686427195, "grad_norm": 0.34901607036590576, "learning_rate": 0.00015137212845607454, "loss": 11.6838, "step": 47066 }, { "epoch": 0.9852424014066817, "grad_norm": 0.28424951434135437, "learning_rate": 0.0001513702473445088, "loss": 11.6609, "step": 47067 }, { "epoch": 0.9852633341706439, "grad_norm": 0.3163596987724304, "learning_rate": 0.00015136836620824836, "loss": 11.6461, "step": 47068 }, { "epoch": 0.985284266934606, "grad_norm": 0.3174915611743927, "learning_rate": 0.0001513664850472941, "loss": 11.6606, "step": 47069 }, { "epoch": 0.9853051996985682, "grad_norm": 0.2936232089996338, "learning_rate": 0.00015136460386164693, "loss": 11.6691, "step": 47070 }, { "epoch": 0.9853261324625303, "grad_norm": 0.31819725036621094, "learning_rate": 0.00015136272265130782, "loss": 11.6617, "step": 47071 }, { "epoch": 0.9853470652264925, "grad_norm": 0.2483680695295334, "learning_rate": 0.00015136084141627757, "loss": 11.6715, "step": 47072 }, { "epoch": 0.9853679979904547, "grad_norm": 0.367488831281662, "learning_rate": 0.00015135896015655716, "loss": 11.6716, "step": 47073 }, { "epoch": 0.9853889307544168, "grad_norm": 0.2887563705444336, "learning_rate": 0.00015135707887214747, "loss": 11.6752, "step": 47074 }, { "epoch": 0.985409863518379, "grad_norm": 0.32872578501701355, "learning_rate": 0.00015135519756304938, "loss": 11.6608, "step": 47075 }, { "epoch": 0.9854307962823411, "grad_norm": 0.30727294087409973, "learning_rate": 0.00015135331622926384, "loss": 11.6648, "step": 47076 }, { "epoch": 0.9854517290463033, "grad_norm": 0.2606472373008728, "learning_rate": 0.0001513514348707917, "loss": 11.663, "step": 47077 }, { "epoch": 0.9854726618102654, "grad_norm": 0.3019067049026489, "learning_rate": 0.00015134955348763396, "loss": 11.6601, "step": 47078 }, { "epoch": 0.9854935945742276, "grad_norm": 0.3359290063381195, "learning_rate": 0.00015134767207979142, "loss": 11.6741, "step": 47079 }, { "epoch": 0.9855145273381898, "grad_norm": 0.30903974175453186, "learning_rate": 0.00015134579064726502, "loss": 11.6688, "step": 47080 }, { "epoch": 0.9855354601021519, "grad_norm": 0.34433862566947937, "learning_rate": 0.00015134390919005564, "loss": 11.6726, "step": 47081 }, { "epoch": 0.9855563928661141, "grad_norm": 0.3879132866859436, "learning_rate": 0.00015134202770816427, "loss": 11.6824, "step": 47082 }, { "epoch": 0.9855773256300762, "grad_norm": 0.3294183015823364, "learning_rate": 0.00015134014620159177, "loss": 11.6806, "step": 47083 }, { "epoch": 0.9855982583940384, "grad_norm": 0.2673989534378052, "learning_rate": 0.00015133826467033897, "loss": 11.6707, "step": 47084 }, { "epoch": 0.9856191911580005, "grad_norm": 0.30863291025161743, "learning_rate": 0.0001513363831144069, "loss": 11.6668, "step": 47085 }, { "epoch": 0.9856401239219627, "grad_norm": 0.3604438602924347, "learning_rate": 0.00015133450153379633, "loss": 11.6525, "step": 47086 }, { "epoch": 0.9856610566859249, "grad_norm": 0.38121476769447327, "learning_rate": 0.0001513326199285083, "loss": 11.6817, "step": 47087 }, { "epoch": 0.9856819894498869, "grad_norm": 0.2740020155906677, "learning_rate": 0.00015133073829854361, "loss": 11.6888, "step": 47088 }, { "epoch": 0.9857029222138491, "grad_norm": 0.3600802421569824, "learning_rate": 0.00015132885664390322, "loss": 11.672, "step": 47089 }, { "epoch": 0.9857238549778112, "grad_norm": 0.33476537466049194, "learning_rate": 0.00015132697496458803, "loss": 11.6479, "step": 47090 }, { "epoch": 0.9857447877417734, "grad_norm": 0.29105693101882935, "learning_rate": 0.00015132509326059893, "loss": 11.678, "step": 47091 }, { "epoch": 0.9857657205057356, "grad_norm": 0.28188779950141907, "learning_rate": 0.0001513232115319368, "loss": 11.6702, "step": 47092 }, { "epoch": 0.9857866532696977, "grad_norm": 0.3898945450782776, "learning_rate": 0.00015132132977860262, "loss": 11.6617, "step": 47093 }, { "epoch": 0.9858075860336599, "grad_norm": 0.3645493686199188, "learning_rate": 0.00015131944800059722, "loss": 11.6604, "step": 47094 }, { "epoch": 0.985828518797622, "grad_norm": 0.3164028525352478, "learning_rate": 0.00015131756619792154, "loss": 11.6772, "step": 47095 }, { "epoch": 0.9858494515615842, "grad_norm": 0.31074684858322144, "learning_rate": 0.0001513156843705765, "loss": 11.6665, "step": 47096 }, { "epoch": 0.9858703843255463, "grad_norm": 0.3732392489910126, "learning_rate": 0.00015131380251856292, "loss": 11.6659, "step": 47097 }, { "epoch": 0.9858913170895085, "grad_norm": 0.285045862197876, "learning_rate": 0.00015131192064188182, "loss": 11.6713, "step": 47098 }, { "epoch": 0.9859122498534707, "grad_norm": 0.32852527499198914, "learning_rate": 0.00015131003874053404, "loss": 11.6681, "step": 47099 }, { "epoch": 0.9859331826174328, "grad_norm": 0.37332549691200256, "learning_rate": 0.0001513081568145205, "loss": 11.6732, "step": 47100 }, { "epoch": 0.985954115381395, "grad_norm": 0.2863476872444153, "learning_rate": 0.00015130627486384209, "loss": 11.6665, "step": 47101 }, { "epoch": 0.9859750481453571, "grad_norm": 0.3275243639945984, "learning_rate": 0.00015130439288849976, "loss": 11.6622, "step": 47102 }, { "epoch": 0.9859959809093193, "grad_norm": 0.3412020802497864, "learning_rate": 0.00015130251088849432, "loss": 11.6609, "step": 47103 }, { "epoch": 0.9860169136732814, "grad_norm": 0.46728286147117615, "learning_rate": 0.00015130062886382675, "loss": 11.6595, "step": 47104 }, { "epoch": 0.9860378464372436, "grad_norm": 0.2182399332523346, "learning_rate": 0.00015129874681449798, "loss": 11.6551, "step": 47105 }, { "epoch": 0.9860587792012058, "grad_norm": 0.44379720091819763, "learning_rate": 0.00015129686474050884, "loss": 11.6991, "step": 47106 }, { "epoch": 0.9860797119651679, "grad_norm": 0.2387172132730484, "learning_rate": 0.00015129498264186027, "loss": 11.6707, "step": 47107 }, { "epoch": 0.9861006447291301, "grad_norm": 0.3026258647441864, "learning_rate": 0.00015129310051855321, "loss": 11.6658, "step": 47108 }, { "epoch": 0.9861215774930921, "grad_norm": 0.4334753453731537, "learning_rate": 0.0001512912183705885, "loss": 11.6774, "step": 47109 }, { "epoch": 0.9861425102570543, "grad_norm": 0.32454681396484375, "learning_rate": 0.00015128933619796707, "loss": 11.6674, "step": 47110 }, { "epoch": 0.9861634430210166, "grad_norm": 0.3366044759750366, "learning_rate": 0.00015128745400068983, "loss": 11.6829, "step": 47111 }, { "epoch": 0.9861843757849786, "grad_norm": 0.37020283937454224, "learning_rate": 0.0001512855717787577, "loss": 11.6854, "step": 47112 }, { "epoch": 0.9862053085489408, "grad_norm": 0.28098952770233154, "learning_rate": 0.00015128368953217154, "loss": 11.66, "step": 47113 }, { "epoch": 0.9862262413129029, "grad_norm": 0.33879587054252625, "learning_rate": 0.00015128180726093227, "loss": 11.682, "step": 47114 }, { "epoch": 0.9862471740768651, "grad_norm": 0.27935999631881714, "learning_rate": 0.00015127992496504085, "loss": 11.6689, "step": 47115 }, { "epoch": 0.9862681068408272, "grad_norm": 0.6107009649276733, "learning_rate": 0.00015127804264449812, "loss": 11.6763, "step": 47116 }, { "epoch": 0.9862890396047894, "grad_norm": 0.3141126036643982, "learning_rate": 0.00015127616029930502, "loss": 11.6622, "step": 47117 }, { "epoch": 0.9863099723687516, "grad_norm": 0.43807724118232727, "learning_rate": 0.00015127427792946242, "loss": 11.665, "step": 47118 }, { "epoch": 0.9863309051327137, "grad_norm": 0.3313435912132263, "learning_rate": 0.00015127239553497127, "loss": 11.6967, "step": 47119 }, { "epoch": 0.9863518378966759, "grad_norm": 0.29234230518341064, "learning_rate": 0.00015127051311583246, "loss": 11.6502, "step": 47120 }, { "epoch": 0.986372770660638, "grad_norm": 0.4238007366657257, "learning_rate": 0.00015126863067204687, "loss": 11.6741, "step": 47121 }, { "epoch": 0.9863937034246002, "grad_norm": 0.3123866617679596, "learning_rate": 0.0001512667482036154, "loss": 11.6655, "step": 47122 }, { "epoch": 0.9864146361885623, "grad_norm": 0.2502952814102173, "learning_rate": 0.00015126486571053903, "loss": 11.666, "step": 47123 }, { "epoch": 0.9864355689525245, "grad_norm": 0.31538042426109314, "learning_rate": 0.00015126298319281857, "loss": 11.6672, "step": 47124 }, { "epoch": 0.9864565017164867, "grad_norm": 0.2916334271430969, "learning_rate": 0.00015126110065045499, "loss": 11.6595, "step": 47125 }, { "epoch": 0.9864774344804488, "grad_norm": 0.2900700271129608, "learning_rate": 0.00015125921808344914, "loss": 11.6737, "step": 47126 }, { "epoch": 0.986498367244411, "grad_norm": 0.3236858546733856, "learning_rate": 0.00015125733549180197, "loss": 11.6667, "step": 47127 }, { "epoch": 0.9865193000083731, "grad_norm": 0.32552990317344666, "learning_rate": 0.0001512554528755144, "loss": 11.6361, "step": 47128 }, { "epoch": 0.9865402327723353, "grad_norm": 0.41445040702819824, "learning_rate": 0.0001512535702345873, "loss": 11.655, "step": 47129 }, { "epoch": 0.9865611655362974, "grad_norm": 0.366908460855484, "learning_rate": 0.00015125168756902155, "loss": 11.6557, "step": 47130 }, { "epoch": 0.9865820983002596, "grad_norm": 0.325761079788208, "learning_rate": 0.0001512498048788181, "loss": 11.6688, "step": 47131 }, { "epoch": 0.9866030310642218, "grad_norm": 0.2949022054672241, "learning_rate": 0.00015124792216397785, "loss": 11.6761, "step": 47132 }, { "epoch": 0.9866239638281838, "grad_norm": 0.5414543747901917, "learning_rate": 0.00015124603942450168, "loss": 11.6755, "step": 47133 }, { "epoch": 0.986644896592146, "grad_norm": 0.373009592294693, "learning_rate": 0.00015124415666039053, "loss": 11.6556, "step": 47134 }, { "epoch": 0.9866658293561081, "grad_norm": 0.332912802696228, "learning_rate": 0.00015124227387164528, "loss": 11.6708, "step": 47135 }, { "epoch": 0.9866867621200703, "grad_norm": 0.37360405921936035, "learning_rate": 0.00015124039105826685, "loss": 11.6638, "step": 47136 }, { "epoch": 0.9867076948840325, "grad_norm": 0.27399858832359314, "learning_rate": 0.00015123850822025613, "loss": 11.6643, "step": 47137 }, { "epoch": 0.9867286276479946, "grad_norm": 0.35808083415031433, "learning_rate": 0.000151236625357614, "loss": 11.6867, "step": 47138 }, { "epoch": 0.9867495604119568, "grad_norm": 0.3002650737762451, "learning_rate": 0.00015123474247034146, "loss": 11.6668, "step": 47139 }, { "epoch": 0.9867704931759189, "grad_norm": 0.33299508690834045, "learning_rate": 0.00015123285955843932, "loss": 11.6698, "step": 47140 }, { "epoch": 0.9867914259398811, "grad_norm": 0.25998082756996155, "learning_rate": 0.0001512309766219085, "loss": 11.6674, "step": 47141 }, { "epoch": 0.9868123587038432, "grad_norm": 0.3530597388744354, "learning_rate": 0.00015122909366074998, "loss": 11.6842, "step": 47142 }, { "epoch": 0.9868332914678054, "grad_norm": 0.38119256496429443, "learning_rate": 0.00015122721067496454, "loss": 11.6629, "step": 47143 }, { "epoch": 0.9868542242317676, "grad_norm": 0.3180544674396515, "learning_rate": 0.0001512253276645532, "loss": 11.6881, "step": 47144 }, { "epoch": 0.9868751569957297, "grad_norm": 0.3607407212257385, "learning_rate": 0.0001512234446295168, "loss": 11.6872, "step": 47145 }, { "epoch": 0.9868960897596919, "grad_norm": 0.2790028154850006, "learning_rate": 0.00015122156156985628, "loss": 11.6498, "step": 47146 }, { "epoch": 0.986917022523654, "grad_norm": 0.2882533371448517, "learning_rate": 0.0001512196784855725, "loss": 11.6725, "step": 47147 }, { "epoch": 0.9869379552876162, "grad_norm": 0.3427809774875641, "learning_rate": 0.00015121779537666637, "loss": 11.6756, "step": 47148 }, { "epoch": 0.9869588880515783, "grad_norm": 0.28454476594924927, "learning_rate": 0.00015121591224313892, "loss": 11.6707, "step": 47149 }, { "epoch": 0.9869798208155405, "grad_norm": 0.2925605773925781, "learning_rate": 0.00015121402908499086, "loss": 11.6649, "step": 47150 }, { "epoch": 0.9870007535795027, "grad_norm": 0.37446919083595276, "learning_rate": 0.00015121214590222323, "loss": 11.675, "step": 47151 }, { "epoch": 0.9870216863434648, "grad_norm": 0.25358298420906067, "learning_rate": 0.0001512102626948369, "loss": 11.661, "step": 47152 }, { "epoch": 0.987042619107427, "grad_norm": 0.3333233892917633, "learning_rate": 0.00015120837946283272, "loss": 11.6791, "step": 47153 }, { "epoch": 0.987063551871389, "grad_norm": 0.4226768910884857, "learning_rate": 0.00015120649620621173, "loss": 11.6665, "step": 47154 }, { "epoch": 0.9870844846353513, "grad_norm": 0.24616505205631256, "learning_rate": 0.0001512046129249747, "loss": 11.6622, "step": 47155 }, { "epoch": 0.9871054173993135, "grad_norm": 0.30823737382888794, "learning_rate": 0.00015120272961912257, "loss": 11.6675, "step": 47156 }, { "epoch": 0.9871263501632755, "grad_norm": 0.30186015367507935, "learning_rate": 0.00015120084628865628, "loss": 11.663, "step": 47157 }, { "epoch": 0.9871472829272377, "grad_norm": 0.25537198781967163, "learning_rate": 0.00015119896293357675, "loss": 11.6618, "step": 47158 }, { "epoch": 0.9871682156911998, "grad_norm": 0.2931670844554901, "learning_rate": 0.0001511970795538848, "loss": 11.6621, "step": 47159 }, { "epoch": 0.987189148455162, "grad_norm": 0.34083712100982666, "learning_rate": 0.0001511951961495814, "loss": 11.6603, "step": 47160 }, { "epoch": 0.9872100812191241, "grad_norm": 0.28792470693588257, "learning_rate": 0.00015119331272066746, "loss": 11.6607, "step": 47161 }, { "epoch": 0.9872310139830863, "grad_norm": 0.2753767669200897, "learning_rate": 0.00015119142926714387, "loss": 11.6766, "step": 47162 }, { "epoch": 0.9872519467470485, "grad_norm": 0.2289455682039261, "learning_rate": 0.0001511895457890115, "loss": 11.6722, "step": 47163 }, { "epoch": 0.9872728795110106, "grad_norm": 0.3818983733654022, "learning_rate": 0.00015118766228627134, "loss": 11.6784, "step": 47164 }, { "epoch": 0.9872938122749728, "grad_norm": 0.29383936524391174, "learning_rate": 0.00015118577875892423, "loss": 11.6722, "step": 47165 }, { "epoch": 0.9873147450389349, "grad_norm": 0.365170955657959, "learning_rate": 0.00015118389520697107, "loss": 11.6766, "step": 47166 }, { "epoch": 0.9873356778028971, "grad_norm": 0.31569790840148926, "learning_rate": 0.00015118201163041278, "loss": 11.6606, "step": 47167 }, { "epoch": 0.9873566105668592, "grad_norm": 0.2902262806892395, "learning_rate": 0.00015118012802925031, "loss": 11.6738, "step": 47168 }, { "epoch": 0.9873775433308214, "grad_norm": 0.3510029911994934, "learning_rate": 0.0001511782444034845, "loss": 11.6842, "step": 47169 }, { "epoch": 0.9873984760947836, "grad_norm": 0.35627204179763794, "learning_rate": 0.0001511763607531163, "loss": 11.6853, "step": 47170 }, { "epoch": 0.9874194088587457, "grad_norm": 0.35091936588287354, "learning_rate": 0.0001511744770781466, "loss": 11.6779, "step": 47171 }, { "epoch": 0.9874403416227079, "grad_norm": 0.31661033630371094, "learning_rate": 0.00015117259337857626, "loss": 11.6714, "step": 47172 }, { "epoch": 0.98746127438667, "grad_norm": 0.30220866203308105, "learning_rate": 0.00015117070965440626, "loss": 11.6517, "step": 47173 }, { "epoch": 0.9874822071506322, "grad_norm": 0.2668580710887909, "learning_rate": 0.00015116882590563747, "loss": 11.6852, "step": 47174 }, { "epoch": 0.9875031399145944, "grad_norm": 0.3198683559894562, "learning_rate": 0.0001511669421322708, "loss": 11.6683, "step": 47175 }, { "epoch": 0.9875240726785565, "grad_norm": 0.2983175218105316, "learning_rate": 0.00015116505833430717, "loss": 11.6533, "step": 47176 }, { "epoch": 0.9875450054425187, "grad_norm": 0.2906759977340698, "learning_rate": 0.00015116317451174745, "loss": 11.6794, "step": 47177 }, { "epoch": 0.9875659382064808, "grad_norm": 0.31031695008277893, "learning_rate": 0.0001511612906645926, "loss": 11.6665, "step": 47178 }, { "epoch": 0.987586870970443, "grad_norm": 0.31622716784477234, "learning_rate": 0.00015115940679284346, "loss": 11.6728, "step": 47179 }, { "epoch": 0.987607803734405, "grad_norm": 0.4387686550617218, "learning_rate": 0.00015115752289650096, "loss": 11.6883, "step": 47180 }, { "epoch": 0.9876287364983672, "grad_norm": 0.28366973996162415, "learning_rate": 0.00015115563897556605, "loss": 11.6729, "step": 47181 }, { "epoch": 0.9876496692623294, "grad_norm": 0.31263428926467896, "learning_rate": 0.00015115375503003956, "loss": 11.6585, "step": 47182 }, { "epoch": 0.9876706020262915, "grad_norm": 0.3849138021469116, "learning_rate": 0.00015115187105992246, "loss": 11.674, "step": 47183 }, { "epoch": 0.9876915347902537, "grad_norm": 0.3140803575515747, "learning_rate": 0.0001511499870652156, "loss": 11.6764, "step": 47184 }, { "epoch": 0.9877124675542158, "grad_norm": 0.283279150724411, "learning_rate": 0.00015114810304591995, "loss": 11.6804, "step": 47185 }, { "epoch": 0.987733400318178, "grad_norm": 0.26456496119499207, "learning_rate": 0.00015114621900203637, "loss": 11.6636, "step": 47186 }, { "epoch": 0.9877543330821401, "grad_norm": 0.3385511040687561, "learning_rate": 0.00015114433493356577, "loss": 11.6674, "step": 47187 }, { "epoch": 0.9877752658461023, "grad_norm": 0.3151377737522125, "learning_rate": 0.00015114245084050907, "loss": 11.6476, "step": 47188 }, { "epoch": 0.9877961986100645, "grad_norm": 0.2766147255897522, "learning_rate": 0.0001511405667228672, "loss": 11.6759, "step": 47189 }, { "epoch": 0.9878171313740266, "grad_norm": 0.3078388571739197, "learning_rate": 0.00015113868258064098, "loss": 11.6613, "step": 47190 }, { "epoch": 0.9878380641379888, "grad_norm": 0.2643110156059265, "learning_rate": 0.00015113679841383143, "loss": 11.6755, "step": 47191 }, { "epoch": 0.9878589969019509, "grad_norm": 0.35428979992866516, "learning_rate": 0.00015113491422243932, "loss": 11.6811, "step": 47192 }, { "epoch": 0.9878799296659131, "grad_norm": 0.282368928194046, "learning_rate": 0.0001511330300064657, "loss": 11.6662, "step": 47193 }, { "epoch": 0.9879008624298753, "grad_norm": 0.3020457327365875, "learning_rate": 0.0001511311457659114, "loss": 11.6807, "step": 47194 }, { "epoch": 0.9879217951938374, "grad_norm": 0.21944007277488708, "learning_rate": 0.0001511292615007773, "loss": 11.6693, "step": 47195 }, { "epoch": 0.9879427279577996, "grad_norm": 0.30203041434288025, "learning_rate": 0.00015112737721106436, "loss": 11.6722, "step": 47196 }, { "epoch": 0.9879636607217617, "grad_norm": 0.3543573021888733, "learning_rate": 0.00015112549289677345, "loss": 11.6753, "step": 47197 }, { "epoch": 0.9879845934857239, "grad_norm": 0.2997908592224121, "learning_rate": 0.00015112360855790553, "loss": 11.6607, "step": 47198 }, { "epoch": 0.988005526249686, "grad_norm": 0.3561044931411743, "learning_rate": 0.0001511217241944614, "loss": 11.7029, "step": 47199 }, { "epoch": 0.9880264590136482, "grad_norm": 0.23220425844192505, "learning_rate": 0.0001511198398064421, "loss": 11.6709, "step": 47200 }, { "epoch": 0.9880473917776104, "grad_norm": 0.3125539720058441, "learning_rate": 0.00015111795539384844, "loss": 11.6796, "step": 47201 }, { "epoch": 0.9880683245415725, "grad_norm": 0.32868218421936035, "learning_rate": 0.00015111607095668136, "loss": 11.656, "step": 47202 }, { "epoch": 0.9880892573055347, "grad_norm": 0.2510702908039093, "learning_rate": 0.00015111418649494174, "loss": 11.659, "step": 47203 }, { "epoch": 0.9881101900694967, "grad_norm": 0.34302017092704773, "learning_rate": 0.00015111230200863052, "loss": 11.6749, "step": 47204 }, { "epoch": 0.9881311228334589, "grad_norm": 0.27771294116973877, "learning_rate": 0.0001511104174977486, "loss": 11.6544, "step": 47205 }, { "epoch": 0.988152055597421, "grad_norm": 0.3229764699935913, "learning_rate": 0.00015110853296229686, "loss": 11.6579, "step": 47206 }, { "epoch": 0.9881729883613832, "grad_norm": 0.2528987228870392, "learning_rate": 0.00015110664840227623, "loss": 11.6501, "step": 47207 }, { "epoch": 0.9881939211253454, "grad_norm": 0.27123475074768066, "learning_rate": 0.00015110476381768764, "loss": 11.6439, "step": 47208 }, { "epoch": 0.9882148538893075, "grad_norm": 0.31748321652412415, "learning_rate": 0.00015110287920853192, "loss": 11.6848, "step": 47209 }, { "epoch": 0.9882357866532697, "grad_norm": 0.40472230315208435, "learning_rate": 0.00015110099457481003, "loss": 11.6745, "step": 47210 }, { "epoch": 0.9882567194172318, "grad_norm": 0.2506332993507385, "learning_rate": 0.00015109910991652288, "loss": 11.6646, "step": 47211 }, { "epoch": 0.988277652181194, "grad_norm": 0.3026098906993866, "learning_rate": 0.0001510972252336714, "loss": 11.67, "step": 47212 }, { "epoch": 0.9882985849451562, "grad_norm": 0.3724362552165985, "learning_rate": 0.0001510953405262564, "loss": 11.6632, "step": 47213 }, { "epoch": 0.9883195177091183, "grad_norm": 0.29141563177108765, "learning_rate": 0.00015109345579427888, "loss": 11.6568, "step": 47214 }, { "epoch": 0.9883404504730805, "grad_norm": 0.40083619952201843, "learning_rate": 0.0001510915710377397, "loss": 11.6653, "step": 47215 }, { "epoch": 0.9883613832370426, "grad_norm": 0.26886722445487976, "learning_rate": 0.00015108968625663976, "loss": 11.6777, "step": 47216 }, { "epoch": 0.9883823160010048, "grad_norm": 0.322069376707077, "learning_rate": 0.00015108780145098002, "loss": 11.6658, "step": 47217 }, { "epoch": 0.9884032487649669, "grad_norm": 0.3283156752586365, "learning_rate": 0.00015108591662076133, "loss": 11.6685, "step": 47218 }, { "epoch": 0.9884241815289291, "grad_norm": 0.2889973819255829, "learning_rate": 0.00015108403176598462, "loss": 11.6754, "step": 47219 }, { "epoch": 0.9884451142928913, "grad_norm": 0.2914113402366638, "learning_rate": 0.0001510821468866508, "loss": 11.6726, "step": 47220 }, { "epoch": 0.9884660470568534, "grad_norm": 0.2772758901119232, "learning_rate": 0.00015108026198276076, "loss": 11.6863, "step": 47221 }, { "epoch": 0.9884869798208156, "grad_norm": 0.2720339596271515, "learning_rate": 0.00015107837705431542, "loss": 11.6614, "step": 47222 }, { "epoch": 0.9885079125847777, "grad_norm": 0.36498507857322693, "learning_rate": 0.00015107649210131569, "loss": 11.6756, "step": 47223 }, { "epoch": 0.9885288453487399, "grad_norm": 0.37730756402015686, "learning_rate": 0.00015107460712376243, "loss": 11.6695, "step": 47224 }, { "epoch": 0.988549778112702, "grad_norm": 0.33768555521965027, "learning_rate": 0.00015107272212165659, "loss": 11.667, "step": 47225 }, { "epoch": 0.9885707108766641, "grad_norm": 0.29449397325515747, "learning_rate": 0.0001510708370949991, "loss": 11.6714, "step": 47226 }, { "epoch": 0.9885916436406263, "grad_norm": 0.3290060758590698, "learning_rate": 0.0001510689520437908, "loss": 11.6541, "step": 47227 }, { "epoch": 0.9886125764045884, "grad_norm": 0.31692591309547424, "learning_rate": 0.0001510670669680327, "loss": 11.6539, "step": 47228 }, { "epoch": 0.9886335091685506, "grad_norm": 0.25760743021965027, "learning_rate": 0.00015106518186772556, "loss": 11.6505, "step": 47229 }, { "epoch": 0.9886544419325127, "grad_norm": 0.3141268491744995, "learning_rate": 0.0001510632967428704, "loss": 11.6649, "step": 47230 }, { "epoch": 0.9886753746964749, "grad_norm": 0.28723442554473877, "learning_rate": 0.00015106141159346807, "loss": 11.6711, "step": 47231 }, { "epoch": 0.9886963074604371, "grad_norm": 0.2555266320705414, "learning_rate": 0.0001510595264195195, "loss": 11.6761, "step": 47232 }, { "epoch": 0.9887172402243992, "grad_norm": 0.3837421238422394, "learning_rate": 0.00015105764122102562, "loss": 11.668, "step": 47233 }, { "epoch": 0.9887381729883614, "grad_norm": 0.3696341812610626, "learning_rate": 0.00015105575599798726, "loss": 11.6585, "step": 47234 }, { "epoch": 0.9887591057523235, "grad_norm": 0.4602827727794647, "learning_rate": 0.00015105387075040543, "loss": 11.6638, "step": 47235 }, { "epoch": 0.9887800385162857, "grad_norm": 0.2967345118522644, "learning_rate": 0.00015105198547828096, "loss": 11.6641, "step": 47236 }, { "epoch": 0.9888009712802478, "grad_norm": 0.5504088997840881, "learning_rate": 0.00015105010018161475, "loss": 11.6789, "step": 47237 }, { "epoch": 0.98882190404421, "grad_norm": 0.3679884374141693, "learning_rate": 0.00015104821486040777, "loss": 11.658, "step": 47238 }, { "epoch": 0.9888428368081722, "grad_norm": 0.3412894904613495, "learning_rate": 0.00015104632951466087, "loss": 11.6676, "step": 47239 }, { "epoch": 0.9888637695721343, "grad_norm": 0.3377029299736023, "learning_rate": 0.00015104444414437498, "loss": 11.6696, "step": 47240 }, { "epoch": 0.9888847023360965, "grad_norm": 0.2964823842048645, "learning_rate": 0.000151042558749551, "loss": 11.6833, "step": 47241 }, { "epoch": 0.9889056351000586, "grad_norm": 0.4816396236419678, "learning_rate": 0.00015104067333018985, "loss": 11.6785, "step": 47242 }, { "epoch": 0.9889265678640208, "grad_norm": 0.3219643831253052, "learning_rate": 0.0001510387878862924, "loss": 11.6615, "step": 47243 }, { "epoch": 0.9889475006279829, "grad_norm": 0.35347306728363037, "learning_rate": 0.00015103690241785963, "loss": 11.6894, "step": 47244 }, { "epoch": 0.9889684333919451, "grad_norm": 0.3111659288406372, "learning_rate": 0.00015103501692489236, "loss": 11.6615, "step": 47245 }, { "epoch": 0.9889893661559073, "grad_norm": 0.4007664620876312, "learning_rate": 0.00015103313140739152, "loss": 11.6651, "step": 47246 }, { "epoch": 0.9890102989198694, "grad_norm": 0.29926636815071106, "learning_rate": 0.00015103124586535809, "loss": 11.6767, "step": 47247 }, { "epoch": 0.9890312316838316, "grad_norm": 0.2467561662197113, "learning_rate": 0.00015102936029879287, "loss": 11.6626, "step": 47248 }, { "epoch": 0.9890521644477936, "grad_norm": 0.3007708787918091, "learning_rate": 0.00015102747470769682, "loss": 11.6592, "step": 47249 }, { "epoch": 0.9890730972117558, "grad_norm": 0.2903880178928375, "learning_rate": 0.00015102558909207086, "loss": 11.6668, "step": 47250 }, { "epoch": 0.989094029975718, "grad_norm": 0.30847790837287903, "learning_rate": 0.00015102370345191583, "loss": 11.6522, "step": 47251 }, { "epoch": 0.9891149627396801, "grad_norm": 0.32946571707725525, "learning_rate": 0.00015102181778723275, "loss": 11.6663, "step": 47252 }, { "epoch": 0.9891358955036423, "grad_norm": 0.36458542943000793, "learning_rate": 0.00015101993209802242, "loss": 11.6599, "step": 47253 }, { "epoch": 0.9891568282676044, "grad_norm": 0.27864280343055725, "learning_rate": 0.0001510180463842858, "loss": 11.676, "step": 47254 }, { "epoch": 0.9891777610315666, "grad_norm": 0.331416517496109, "learning_rate": 0.00015101616064602377, "loss": 11.6663, "step": 47255 }, { "epoch": 0.9891986937955287, "grad_norm": 0.3660613000392914, "learning_rate": 0.00015101427488323723, "loss": 11.6633, "step": 47256 }, { "epoch": 0.9892196265594909, "grad_norm": 0.3171856999397278, "learning_rate": 0.00015101238909592715, "loss": 11.6754, "step": 47257 }, { "epoch": 0.9892405593234531, "grad_norm": 0.3079180121421814, "learning_rate": 0.00015101050328409436, "loss": 11.6656, "step": 47258 }, { "epoch": 0.9892614920874152, "grad_norm": 0.32573428750038147, "learning_rate": 0.00015100861744773983, "loss": 11.6684, "step": 47259 }, { "epoch": 0.9892824248513774, "grad_norm": 0.3182704448699951, "learning_rate": 0.0001510067315868644, "loss": 11.6577, "step": 47260 }, { "epoch": 0.9893033576153395, "grad_norm": 0.3324611485004425, "learning_rate": 0.00015100484570146905, "loss": 11.6787, "step": 47261 }, { "epoch": 0.9893242903793017, "grad_norm": 0.2871643304824829, "learning_rate": 0.00015100295979155463, "loss": 11.6641, "step": 47262 }, { "epoch": 0.9893452231432638, "grad_norm": 0.31116628646850586, "learning_rate": 0.00015100107385712205, "loss": 11.6619, "step": 47263 }, { "epoch": 0.989366155907226, "grad_norm": 0.28045976161956787, "learning_rate": 0.00015099918789817225, "loss": 11.6673, "step": 47264 }, { "epoch": 0.9893870886711882, "grad_norm": 0.29643478989601135, "learning_rate": 0.0001509973019147061, "loss": 11.6595, "step": 47265 }, { "epoch": 0.9894080214351503, "grad_norm": 0.2995946705341339, "learning_rate": 0.00015099541590672455, "loss": 11.6626, "step": 47266 }, { "epoch": 0.9894289541991125, "grad_norm": 0.3086753785610199, "learning_rate": 0.0001509935298742285, "loss": 11.6638, "step": 47267 }, { "epoch": 0.9894498869630746, "grad_norm": 0.33423978090286255, "learning_rate": 0.00015099164381721875, "loss": 11.6923, "step": 47268 }, { "epoch": 0.9894708197270368, "grad_norm": 0.28977057337760925, "learning_rate": 0.00015098975773569637, "loss": 11.6716, "step": 47269 }, { "epoch": 0.989491752490999, "grad_norm": 0.3490285277366638, "learning_rate": 0.00015098787162966216, "loss": 11.6753, "step": 47270 }, { "epoch": 0.989512685254961, "grad_norm": 0.2510194480419159, "learning_rate": 0.0001509859854991171, "loss": 11.6584, "step": 47271 }, { "epoch": 0.9895336180189233, "grad_norm": 0.2621600031852722, "learning_rate": 0.000150984099344062, "loss": 11.6811, "step": 47272 }, { "epoch": 0.9895545507828853, "grad_norm": 0.36811164021492004, "learning_rate": 0.00015098221316449785, "loss": 11.6869, "step": 47273 }, { "epoch": 0.9895754835468475, "grad_norm": 0.2977622449398041, "learning_rate": 0.00015098032696042555, "loss": 11.6635, "step": 47274 }, { "epoch": 0.9895964163108096, "grad_norm": 0.3336324095726013, "learning_rate": 0.00015097844073184595, "loss": 11.6608, "step": 47275 }, { "epoch": 0.9896173490747718, "grad_norm": 0.25016123056411743, "learning_rate": 0.00015097655447876, "loss": 11.655, "step": 47276 }, { "epoch": 0.989638281838734, "grad_norm": 0.32316070795059204, "learning_rate": 0.0001509746682011686, "loss": 11.6667, "step": 47277 }, { "epoch": 0.9896592146026961, "grad_norm": 0.4300729036331177, "learning_rate": 0.00015097278189907267, "loss": 11.6688, "step": 47278 }, { "epoch": 0.9896801473666583, "grad_norm": 0.3253099322319031, "learning_rate": 0.0001509708955724731, "loss": 11.6574, "step": 47279 }, { "epoch": 0.9897010801306204, "grad_norm": 0.32261893153190613, "learning_rate": 0.00015096900922137078, "loss": 11.6734, "step": 47280 }, { "epoch": 0.9897220128945826, "grad_norm": 0.25640565156936646, "learning_rate": 0.00015096712284576666, "loss": 11.6501, "step": 47281 }, { "epoch": 0.9897429456585447, "grad_norm": 0.30738553404808044, "learning_rate": 0.00015096523644566158, "loss": 11.669, "step": 47282 }, { "epoch": 0.9897638784225069, "grad_norm": 0.34073349833488464, "learning_rate": 0.00015096335002105654, "loss": 11.6591, "step": 47283 }, { "epoch": 0.9897848111864691, "grad_norm": 0.35679978132247925, "learning_rate": 0.00015096146357195238, "loss": 11.6612, "step": 47284 }, { "epoch": 0.9898057439504312, "grad_norm": 0.358561247587204, "learning_rate": 0.00015095957709834997, "loss": 11.687, "step": 47285 }, { "epoch": 0.9898266767143934, "grad_norm": 0.28828322887420654, "learning_rate": 0.00015095769060025034, "loss": 11.6546, "step": 47286 }, { "epoch": 0.9898476094783555, "grad_norm": 0.2378734052181244, "learning_rate": 0.0001509558040776543, "loss": 11.6777, "step": 47287 }, { "epoch": 0.9898685422423177, "grad_norm": 0.3491577208042145, "learning_rate": 0.00015095391753056281, "loss": 11.6708, "step": 47288 }, { "epoch": 0.9898894750062799, "grad_norm": 0.2782228887081146, "learning_rate": 0.00015095203095897672, "loss": 11.666, "step": 47289 }, { "epoch": 0.989910407770242, "grad_norm": 0.29547247290611267, "learning_rate": 0.00015095014436289697, "loss": 11.6675, "step": 47290 }, { "epoch": 0.9899313405342042, "grad_norm": 0.2650690972805023, "learning_rate": 0.00015094825774232445, "loss": 11.6681, "step": 47291 }, { "epoch": 0.9899522732981663, "grad_norm": 0.35629308223724365, "learning_rate": 0.0001509463710972601, "loss": 11.6774, "step": 47292 }, { "epoch": 0.9899732060621285, "grad_norm": 0.32354220747947693, "learning_rate": 0.00015094448442770487, "loss": 11.6579, "step": 47293 }, { "epoch": 0.9899941388260906, "grad_norm": 0.330581933259964, "learning_rate": 0.00015094259773365952, "loss": 11.6695, "step": 47294 }, { "epoch": 0.9900150715900528, "grad_norm": 0.32068705558776855, "learning_rate": 0.00015094071101512505, "loss": 11.6699, "step": 47295 }, { "epoch": 0.990036004354015, "grad_norm": 0.34786492586135864, "learning_rate": 0.00015093882427210238, "loss": 11.6747, "step": 47296 }, { "epoch": 0.990056937117977, "grad_norm": 0.3264698088169098, "learning_rate": 0.0001509369375045924, "loss": 11.6799, "step": 47297 }, { "epoch": 0.9900778698819392, "grad_norm": 0.3109114468097687, "learning_rate": 0.000150935050712596, "loss": 11.6593, "step": 47298 }, { "epoch": 0.9900988026459013, "grad_norm": 0.3223639130592346, "learning_rate": 0.00015093316389611411, "loss": 11.6615, "step": 47299 }, { "epoch": 0.9901197354098635, "grad_norm": 0.422182559967041, "learning_rate": 0.00015093127705514763, "loss": 11.6688, "step": 47300 }, { "epoch": 0.9901406681738256, "grad_norm": 0.30158334970474243, "learning_rate": 0.00015092939018969745, "loss": 11.6422, "step": 47301 }, { "epoch": 0.9901616009377878, "grad_norm": 0.24097558856010437, "learning_rate": 0.0001509275032997645, "loss": 11.659, "step": 47302 }, { "epoch": 0.99018253370175, "grad_norm": 0.32761240005493164, "learning_rate": 0.0001509256163853497, "loss": 11.6502, "step": 47303 }, { "epoch": 0.9902034664657121, "grad_norm": 0.2925669252872467, "learning_rate": 0.00015092372944645391, "loss": 11.6755, "step": 47304 }, { "epoch": 0.9902243992296743, "grad_norm": 0.30852705240249634, "learning_rate": 0.00015092184248307808, "loss": 11.6787, "step": 47305 }, { "epoch": 0.9902453319936364, "grad_norm": 0.26030099391937256, "learning_rate": 0.00015091995549522307, "loss": 11.6686, "step": 47306 }, { "epoch": 0.9902662647575986, "grad_norm": 0.30720067024230957, "learning_rate": 0.00015091806848288982, "loss": 11.6696, "step": 47307 }, { "epoch": 0.9902871975215607, "grad_norm": 0.3809053897857666, "learning_rate": 0.0001509161814460793, "loss": 11.6705, "step": 47308 }, { "epoch": 0.9903081302855229, "grad_norm": 0.30521103739738464, "learning_rate": 0.00015091429438479227, "loss": 11.6746, "step": 47309 }, { "epoch": 0.9903290630494851, "grad_norm": 0.2638792395591736, "learning_rate": 0.00015091240729902974, "loss": 11.6718, "step": 47310 }, { "epoch": 0.9903499958134472, "grad_norm": 0.4842073619365692, "learning_rate": 0.00015091052018879263, "loss": 11.6727, "step": 47311 }, { "epoch": 0.9903709285774094, "grad_norm": 0.2750660181045532, "learning_rate": 0.00015090863305408176, "loss": 11.66, "step": 47312 }, { "epoch": 0.9903918613413715, "grad_norm": 0.3500760793685913, "learning_rate": 0.00015090674589489811, "loss": 11.6705, "step": 47313 }, { "epoch": 0.9904127941053337, "grad_norm": 0.35591891407966614, "learning_rate": 0.0001509048587112426, "loss": 11.6674, "step": 47314 }, { "epoch": 0.9904337268692959, "grad_norm": 0.3234848976135254, "learning_rate": 0.00015090297150311609, "loss": 11.6737, "step": 47315 }, { "epoch": 0.990454659633258, "grad_norm": 0.2976886034011841, "learning_rate": 0.00015090108427051946, "loss": 11.6534, "step": 47316 }, { "epoch": 0.9904755923972202, "grad_norm": 0.2970111668109894, "learning_rate": 0.00015089919701345367, "loss": 11.6706, "step": 47317 }, { "epoch": 0.9904965251611823, "grad_norm": 0.2891974151134491, "learning_rate": 0.00015089730973191965, "loss": 11.6635, "step": 47318 }, { "epoch": 0.9905174579251445, "grad_norm": 0.278441846370697, "learning_rate": 0.00015089542242591826, "loss": 11.6707, "step": 47319 }, { "epoch": 0.9905383906891065, "grad_norm": 0.3189952075481415, "learning_rate": 0.00015089353509545043, "loss": 11.6597, "step": 47320 }, { "epoch": 0.9905593234530687, "grad_norm": 0.28462257981300354, "learning_rate": 0.00015089164774051701, "loss": 11.6726, "step": 47321 }, { "epoch": 0.9905802562170309, "grad_norm": 0.40059125423431396, "learning_rate": 0.000150889760361119, "loss": 11.6648, "step": 47322 }, { "epoch": 0.990601188980993, "grad_norm": 0.3493097424507141, "learning_rate": 0.00015088787295725728, "loss": 11.6637, "step": 47323 }, { "epoch": 0.9906221217449552, "grad_norm": 0.3204115629196167, "learning_rate": 0.0001508859855289327, "loss": 11.6453, "step": 47324 }, { "epoch": 0.9906430545089173, "grad_norm": 0.2627984881401062, "learning_rate": 0.00015088409807614622, "loss": 11.6762, "step": 47325 }, { "epoch": 0.9906639872728795, "grad_norm": 0.2985425591468811, "learning_rate": 0.00015088221059889872, "loss": 11.6753, "step": 47326 }, { "epoch": 0.9906849200368416, "grad_norm": 0.26143720746040344, "learning_rate": 0.00015088032309719116, "loss": 11.6653, "step": 47327 }, { "epoch": 0.9907058528008038, "grad_norm": 0.3133891522884369, "learning_rate": 0.00015087843557102439, "loss": 11.6845, "step": 47328 }, { "epoch": 0.990726785564766, "grad_norm": 0.3760940134525299, "learning_rate": 0.0001508765480203993, "loss": 11.6759, "step": 47329 }, { "epoch": 0.9907477183287281, "grad_norm": 0.44649171829223633, "learning_rate": 0.00015087466044531688, "loss": 11.6853, "step": 47330 }, { "epoch": 0.9907686510926903, "grad_norm": 0.38086840510368347, "learning_rate": 0.000150872772845778, "loss": 11.6609, "step": 47331 }, { "epoch": 0.9907895838566524, "grad_norm": 0.3062137961387634, "learning_rate": 0.00015087088522178351, "loss": 11.6703, "step": 47332 }, { "epoch": 0.9908105166206146, "grad_norm": 0.2915051579475403, "learning_rate": 0.0001508689975733344, "loss": 11.6642, "step": 47333 }, { "epoch": 0.9908314493845768, "grad_norm": 0.274253249168396, "learning_rate": 0.0001508671099004315, "loss": 11.6578, "step": 47334 }, { "epoch": 0.9908523821485389, "grad_norm": 0.28488287329673767, "learning_rate": 0.00015086522220307584, "loss": 11.6849, "step": 47335 }, { "epoch": 0.9908733149125011, "grad_norm": 0.3419952392578125, "learning_rate": 0.00015086333448126817, "loss": 11.6671, "step": 47336 }, { "epoch": 0.9908942476764632, "grad_norm": 0.29669618606567383, "learning_rate": 0.00015086144673500952, "loss": 11.6649, "step": 47337 }, { "epoch": 0.9909151804404254, "grad_norm": 0.30650821328163147, "learning_rate": 0.00015085955896430075, "loss": 11.6638, "step": 47338 }, { "epoch": 0.9909361132043875, "grad_norm": 0.31881603598594666, "learning_rate": 0.00015085767116914276, "loss": 11.667, "step": 47339 }, { "epoch": 0.9909570459683497, "grad_norm": 0.26871633529663086, "learning_rate": 0.00015085578334953647, "loss": 11.6554, "step": 47340 }, { "epoch": 0.9909779787323119, "grad_norm": 0.41603171825408936, "learning_rate": 0.00015085389550548278, "loss": 11.6609, "step": 47341 }, { "epoch": 0.990998911496274, "grad_norm": 0.36157020926475525, "learning_rate": 0.00015085200763698264, "loss": 11.6748, "step": 47342 }, { "epoch": 0.9910198442602361, "grad_norm": 0.2916518747806549, "learning_rate": 0.00015085011974403688, "loss": 11.6582, "step": 47343 }, { "epoch": 0.9910407770241982, "grad_norm": 0.31811946630477905, "learning_rate": 0.00015084823182664645, "loss": 11.6693, "step": 47344 }, { "epoch": 0.9910617097881604, "grad_norm": 0.26568055152893066, "learning_rate": 0.00015084634388481228, "loss": 11.6687, "step": 47345 }, { "epoch": 0.9910826425521225, "grad_norm": 0.3091947138309479, "learning_rate": 0.00015084445591853523, "loss": 11.6535, "step": 47346 }, { "epoch": 0.9911035753160847, "grad_norm": 0.3023608922958374, "learning_rate": 0.00015084256792781623, "loss": 11.6641, "step": 47347 }, { "epoch": 0.9911245080800469, "grad_norm": 0.31628745794296265, "learning_rate": 0.0001508406799126562, "loss": 11.6702, "step": 47348 }, { "epoch": 0.991145440844009, "grad_norm": 0.2812255918979645, "learning_rate": 0.00015083879187305604, "loss": 11.6684, "step": 47349 }, { "epoch": 0.9911663736079712, "grad_norm": 0.2974787652492523, "learning_rate": 0.00015083690380901665, "loss": 11.6661, "step": 47350 }, { "epoch": 0.9911873063719333, "grad_norm": 0.32257595658302307, "learning_rate": 0.00015083501572053893, "loss": 11.6707, "step": 47351 }, { "epoch": 0.9912082391358955, "grad_norm": 0.292387992143631, "learning_rate": 0.00015083312760762383, "loss": 11.6644, "step": 47352 }, { "epoch": 0.9912291718998577, "grad_norm": 0.26886048913002014, "learning_rate": 0.00015083123947027218, "loss": 11.6669, "step": 47353 }, { "epoch": 0.9912501046638198, "grad_norm": 0.3040754199028015, "learning_rate": 0.000150829351308485, "loss": 11.6694, "step": 47354 }, { "epoch": 0.991271037427782, "grad_norm": 0.3025263249874115, "learning_rate": 0.00015082746312226308, "loss": 11.6798, "step": 47355 }, { "epoch": 0.9912919701917441, "grad_norm": 0.3804977238178253, "learning_rate": 0.00015082557491160738, "loss": 11.6577, "step": 47356 }, { "epoch": 0.9913129029557063, "grad_norm": 0.2879994511604309, "learning_rate": 0.00015082368667651883, "loss": 11.6536, "step": 47357 }, { "epoch": 0.9913338357196684, "grad_norm": 0.32855135202407837, "learning_rate": 0.0001508217984169983, "loss": 11.6655, "step": 47358 }, { "epoch": 0.9913547684836306, "grad_norm": 0.35752415657043457, "learning_rate": 0.00015081991013304673, "loss": 11.6714, "step": 47359 }, { "epoch": 0.9913757012475928, "grad_norm": 0.2726193964481354, "learning_rate": 0.000150818021824665, "loss": 11.6731, "step": 47360 }, { "epoch": 0.9913966340115549, "grad_norm": 0.25174498558044434, "learning_rate": 0.000150816133491854, "loss": 11.6717, "step": 47361 }, { "epoch": 0.9914175667755171, "grad_norm": 0.2840648293495178, "learning_rate": 0.0001508142451346147, "loss": 11.678, "step": 47362 }, { "epoch": 0.9914384995394792, "grad_norm": 0.7235770225524902, "learning_rate": 0.000150812356752948, "loss": 11.5687, "step": 47363 }, { "epoch": 0.9914594323034414, "grad_norm": 0.251396119594574, "learning_rate": 0.00015081046834685476, "loss": 11.6663, "step": 47364 }, { "epoch": 0.9914803650674034, "grad_norm": 0.33816081285476685, "learning_rate": 0.00015080857991633587, "loss": 11.6784, "step": 47365 }, { "epoch": 0.9915012978313656, "grad_norm": 0.33991414308547974, "learning_rate": 0.00015080669146139231, "loss": 11.664, "step": 47366 }, { "epoch": 0.9915222305953278, "grad_norm": 0.3633836507797241, "learning_rate": 0.00015080480298202496, "loss": 11.6658, "step": 47367 }, { "epoch": 0.9915431633592899, "grad_norm": 0.24854430556297302, "learning_rate": 0.0001508029144782347, "loss": 11.6669, "step": 47368 }, { "epoch": 0.9915640961232521, "grad_norm": 0.35479867458343506, "learning_rate": 0.0001508010259500225, "loss": 11.6796, "step": 47369 }, { "epoch": 0.9915850288872142, "grad_norm": 0.354591429233551, "learning_rate": 0.0001507991373973892, "loss": 11.6656, "step": 47370 }, { "epoch": 0.9916059616511764, "grad_norm": 0.37663233280181885, "learning_rate": 0.00015079724882033574, "loss": 11.6735, "step": 47371 }, { "epoch": 0.9916268944151386, "grad_norm": 0.37312158942222595, "learning_rate": 0.00015079536021886302, "loss": 11.6781, "step": 47372 }, { "epoch": 0.9916478271791007, "grad_norm": 0.2860562801361084, "learning_rate": 0.00015079347159297198, "loss": 11.6741, "step": 47373 }, { "epoch": 0.9916687599430629, "grad_norm": 1.3957324028015137, "learning_rate": 0.00015079158294266346, "loss": 11.6711, "step": 47374 }, { "epoch": 0.991689692707025, "grad_norm": 0.32555580139160156, "learning_rate": 0.0001507896942679384, "loss": 11.6707, "step": 47375 }, { "epoch": 0.9917106254709872, "grad_norm": 0.34161269664764404, "learning_rate": 0.00015078780556879778, "loss": 11.6623, "step": 47376 }, { "epoch": 0.9917315582349493, "grad_norm": 0.42470112442970276, "learning_rate": 0.0001507859168452424, "loss": 11.6701, "step": 47377 }, { "epoch": 0.9917524909989115, "grad_norm": 0.27439191937446594, "learning_rate": 0.0001507840280972732, "loss": 11.6479, "step": 47378 }, { "epoch": 0.9917734237628737, "grad_norm": 0.5087775588035583, "learning_rate": 0.00015078213932489116, "loss": 11.6951, "step": 47379 }, { "epoch": 0.9917943565268358, "grad_norm": 0.3603804409503937, "learning_rate": 0.0001507802505280971, "loss": 11.662, "step": 47380 }, { "epoch": 0.991815289290798, "grad_norm": 0.32044717669487, "learning_rate": 0.0001507783617068919, "loss": 11.6815, "step": 47381 }, { "epoch": 0.9918362220547601, "grad_norm": 0.3463073670864105, "learning_rate": 0.00015077647286127656, "loss": 11.6603, "step": 47382 }, { "epoch": 0.9918571548187223, "grad_norm": 0.3065758943557739, "learning_rate": 0.00015077458399125198, "loss": 11.6658, "step": 47383 }, { "epoch": 0.9918780875826844, "grad_norm": 0.3255027234554291, "learning_rate": 0.00015077269509681902, "loss": 11.6661, "step": 47384 }, { "epoch": 0.9918990203466466, "grad_norm": 0.30733367800712585, "learning_rate": 0.0001507708061779786, "loss": 11.6577, "step": 47385 }, { "epoch": 0.9919199531106088, "grad_norm": 0.30980104207992554, "learning_rate": 0.00015076891723473165, "loss": 11.6556, "step": 47386 }, { "epoch": 0.9919408858745709, "grad_norm": 0.31271690130233765, "learning_rate": 0.00015076702826707905, "loss": 11.6878, "step": 47387 }, { "epoch": 0.991961818638533, "grad_norm": 0.3102513551712036, "learning_rate": 0.00015076513927502174, "loss": 11.6681, "step": 47388 }, { "epoch": 0.9919827514024951, "grad_norm": 0.2598288953304291, "learning_rate": 0.00015076325025856059, "loss": 11.6543, "step": 47389 }, { "epoch": 0.9920036841664573, "grad_norm": 0.2976478934288025, "learning_rate": 0.00015076136121769653, "loss": 11.6761, "step": 47390 }, { "epoch": 0.9920246169304195, "grad_norm": 0.3590465188026428, "learning_rate": 0.0001507594721524305, "loss": 11.6689, "step": 47391 }, { "epoch": 0.9920455496943816, "grad_norm": 0.41040343046188354, "learning_rate": 0.00015075758306276333, "loss": 11.6691, "step": 47392 }, { "epoch": 0.9920664824583438, "grad_norm": 0.3022730350494385, "learning_rate": 0.000150755693948696, "loss": 11.6533, "step": 47393 }, { "epoch": 0.9920874152223059, "grad_norm": 0.316882848739624, "learning_rate": 0.0001507538048102294, "loss": 11.6634, "step": 47394 }, { "epoch": 0.9921083479862681, "grad_norm": 0.4711400866508484, "learning_rate": 0.00015075191564736437, "loss": 11.6648, "step": 47395 }, { "epoch": 0.9921292807502302, "grad_norm": 0.34184563159942627, "learning_rate": 0.00015075002646010194, "loss": 11.6643, "step": 47396 }, { "epoch": 0.9921502135141924, "grad_norm": 0.3102456331253052, "learning_rate": 0.00015074813724844294, "loss": 11.6647, "step": 47397 }, { "epoch": 0.9921711462781546, "grad_norm": 0.31291764974594116, "learning_rate": 0.0001507462480123883, "loss": 11.6664, "step": 47398 }, { "epoch": 0.9921920790421167, "grad_norm": 0.369829386472702, "learning_rate": 0.00015074435875193887, "loss": 11.6694, "step": 47399 }, { "epoch": 0.9922130118060789, "grad_norm": 0.2675577998161316, "learning_rate": 0.00015074246946709567, "loss": 11.6835, "step": 47400 }, { "epoch": 0.992233944570041, "grad_norm": 0.28279808163642883, "learning_rate": 0.00015074058015785953, "loss": 11.6614, "step": 47401 }, { "epoch": 0.9922548773340032, "grad_norm": 0.3021462559700012, "learning_rate": 0.00015073869082423138, "loss": 11.6638, "step": 47402 }, { "epoch": 0.9922758100979653, "grad_norm": 0.33488714694976807, "learning_rate": 0.00015073680146621208, "loss": 11.6533, "step": 47403 }, { "epoch": 0.9922967428619275, "grad_norm": 0.33281347155570984, "learning_rate": 0.00015073491208380263, "loss": 11.6915, "step": 47404 }, { "epoch": 0.9923176756258897, "grad_norm": 0.2997068464756012, "learning_rate": 0.0001507330226770039, "loss": 11.6582, "step": 47405 }, { "epoch": 0.9923386083898518, "grad_norm": 0.35854142904281616, "learning_rate": 0.00015073113324581678, "loss": 11.6715, "step": 47406 }, { "epoch": 0.992359541153814, "grad_norm": 0.32987162470817566, "learning_rate": 0.00015072924379024217, "loss": 11.6758, "step": 47407 }, { "epoch": 0.9923804739177761, "grad_norm": 0.35924771428108215, "learning_rate": 0.000150727354310281, "loss": 11.6664, "step": 47408 }, { "epoch": 0.9924014066817383, "grad_norm": 0.380441278219223, "learning_rate": 0.00015072546480593416, "loss": 11.6641, "step": 47409 }, { "epoch": 0.9924223394457005, "grad_norm": 0.27569204568862915, "learning_rate": 0.0001507235752772026, "loss": 11.6792, "step": 47410 }, { "epoch": 0.9924432722096626, "grad_norm": 0.33621349930763245, "learning_rate": 0.0001507216857240872, "loss": 11.6616, "step": 47411 }, { "epoch": 0.9924642049736248, "grad_norm": 0.30068767070770264, "learning_rate": 0.00015071979614658885, "loss": 11.6578, "step": 47412 }, { "epoch": 0.9924851377375868, "grad_norm": 0.4768693447113037, "learning_rate": 0.0001507179065447085, "loss": 11.669, "step": 47413 }, { "epoch": 0.992506070501549, "grad_norm": 0.31865280866622925, "learning_rate": 0.00015071601691844702, "loss": 11.6733, "step": 47414 }, { "epoch": 0.9925270032655111, "grad_norm": 0.37383216619491577, "learning_rate": 0.00015071412726780535, "loss": 11.6738, "step": 47415 }, { "epoch": 0.9925479360294733, "grad_norm": 0.2863283157348633, "learning_rate": 0.00015071223759278434, "loss": 11.6662, "step": 47416 }, { "epoch": 0.9925688687934355, "grad_norm": 0.2871403992176056, "learning_rate": 0.000150710347893385, "loss": 11.6574, "step": 47417 }, { "epoch": 0.9925898015573976, "grad_norm": 0.3410724997520447, "learning_rate": 0.00015070845816960815, "loss": 11.6607, "step": 47418 }, { "epoch": 0.9926107343213598, "grad_norm": 0.33098557591438293, "learning_rate": 0.00015070656842145472, "loss": 11.6748, "step": 47419 }, { "epoch": 0.9926316670853219, "grad_norm": 0.3116593360900879, "learning_rate": 0.00015070467864892564, "loss": 11.6692, "step": 47420 }, { "epoch": 0.9926525998492841, "grad_norm": 0.23923255503177643, "learning_rate": 0.00015070278885202177, "loss": 11.6638, "step": 47421 }, { "epoch": 0.9926735326132462, "grad_norm": 0.2682781517505646, "learning_rate": 0.0001507008990307441, "loss": 11.6743, "step": 47422 }, { "epoch": 0.9926944653772084, "grad_norm": 0.30336183309555054, "learning_rate": 0.0001506990091850935, "loss": 11.6726, "step": 47423 }, { "epoch": 0.9927153981411706, "grad_norm": 0.2882775664329529, "learning_rate": 0.00015069711931507086, "loss": 11.659, "step": 47424 }, { "epoch": 0.9927363309051327, "grad_norm": 0.271971195936203, "learning_rate": 0.00015069522942067707, "loss": 11.6671, "step": 47425 }, { "epoch": 0.9927572636690949, "grad_norm": 0.4068937301635742, "learning_rate": 0.00015069333950191308, "loss": 11.6786, "step": 47426 }, { "epoch": 0.992778196433057, "grad_norm": 0.24544355273246765, "learning_rate": 0.00015069144955877977, "loss": 11.6658, "step": 47427 }, { "epoch": 0.9927991291970192, "grad_norm": 0.333638072013855, "learning_rate": 0.0001506895595912781, "loss": 11.6768, "step": 47428 }, { "epoch": 0.9928200619609814, "grad_norm": 0.3148944079875946, "learning_rate": 0.00015068766959940892, "loss": 11.6797, "step": 47429 }, { "epoch": 0.9928409947249435, "grad_norm": 0.318873792886734, "learning_rate": 0.00015068577958317317, "loss": 11.6526, "step": 47430 }, { "epoch": 0.9928619274889057, "grad_norm": 0.3588234484195709, "learning_rate": 0.00015068388954257174, "loss": 11.6694, "step": 47431 }, { "epoch": 0.9928828602528678, "grad_norm": 0.324054479598999, "learning_rate": 0.00015068199947760555, "loss": 11.6739, "step": 47432 }, { "epoch": 0.99290379301683, "grad_norm": 0.36413708329200745, "learning_rate": 0.00015068010938827552, "loss": 11.6584, "step": 47433 }, { "epoch": 0.992924725780792, "grad_norm": 0.2683081030845642, "learning_rate": 0.00015067821927458255, "loss": 11.6784, "step": 47434 }, { "epoch": 0.9929456585447543, "grad_norm": 0.32676994800567627, "learning_rate": 0.00015067632913652753, "loss": 11.6751, "step": 47435 }, { "epoch": 0.9929665913087165, "grad_norm": 0.3365978002548218, "learning_rate": 0.00015067443897411135, "loss": 11.6768, "step": 47436 }, { "epoch": 0.9929875240726785, "grad_norm": 0.33102157711982727, "learning_rate": 0.00015067254878733502, "loss": 11.6709, "step": 47437 }, { "epoch": 0.9930084568366407, "grad_norm": 0.3211917579174042, "learning_rate": 0.00015067065857619935, "loss": 11.6776, "step": 47438 }, { "epoch": 0.9930293896006028, "grad_norm": 0.30758488178253174, "learning_rate": 0.00015066876834070526, "loss": 11.6676, "step": 47439 }, { "epoch": 0.993050322364565, "grad_norm": 0.31920987367630005, "learning_rate": 0.0001506668780808537, "loss": 11.6575, "step": 47440 }, { "epoch": 0.9930712551285271, "grad_norm": 0.31274446845054626, "learning_rate": 0.00015066498779664554, "loss": 11.6624, "step": 47441 }, { "epoch": 0.9930921878924893, "grad_norm": 0.30442482233047485, "learning_rate": 0.00015066309748808175, "loss": 11.6699, "step": 47442 }, { "epoch": 0.9931131206564515, "grad_norm": 0.3733829855918884, "learning_rate": 0.00015066120715516315, "loss": 11.6767, "step": 47443 }, { "epoch": 0.9931340534204136, "grad_norm": 0.36063042283058167, "learning_rate": 0.0001506593167978907, "loss": 11.6885, "step": 47444 }, { "epoch": 0.9931549861843758, "grad_norm": 0.31996050477027893, "learning_rate": 0.00015065742641626529, "loss": 11.6689, "step": 47445 }, { "epoch": 0.9931759189483379, "grad_norm": 0.25050413608551025, "learning_rate": 0.00015065553601028789, "loss": 11.6593, "step": 47446 }, { "epoch": 0.9931968517123001, "grad_norm": 0.2794734537601471, "learning_rate": 0.00015065364557995928, "loss": 11.6662, "step": 47447 }, { "epoch": 0.9932177844762623, "grad_norm": 0.28252267837524414, "learning_rate": 0.00015065175512528052, "loss": 11.6878, "step": 47448 }, { "epoch": 0.9932387172402244, "grad_norm": 0.2856743037700653, "learning_rate": 0.0001506498646462524, "loss": 11.6791, "step": 47449 }, { "epoch": 0.9932596500041866, "grad_norm": 0.3830830156803131, "learning_rate": 0.00015064797414287588, "loss": 11.6694, "step": 47450 }, { "epoch": 0.9932805827681487, "grad_norm": 0.3174511194229126, "learning_rate": 0.00015064608361515188, "loss": 11.679, "step": 47451 }, { "epoch": 0.9933015155321109, "grad_norm": 0.43537333607673645, "learning_rate": 0.00015064419306308127, "loss": 11.6665, "step": 47452 }, { "epoch": 0.993322448296073, "grad_norm": 0.30485624074935913, "learning_rate": 0.00015064230248666501, "loss": 11.6895, "step": 47453 }, { "epoch": 0.9933433810600352, "grad_norm": 0.2967148423194885, "learning_rate": 0.000150640411885904, "loss": 11.6665, "step": 47454 }, { "epoch": 0.9933643138239974, "grad_norm": 0.3461398184299469, "learning_rate": 0.00015063852126079907, "loss": 11.6685, "step": 47455 }, { "epoch": 0.9933852465879595, "grad_norm": 0.2858280837535858, "learning_rate": 0.00015063663061135122, "loss": 11.6594, "step": 47456 }, { "epoch": 0.9934061793519217, "grad_norm": 0.3008842468261719, "learning_rate": 0.0001506347399375613, "loss": 11.6646, "step": 47457 }, { "epoch": 0.9934271121158837, "grad_norm": 0.2969497740268707, "learning_rate": 0.00015063284923943031, "loss": 11.6699, "step": 47458 }, { "epoch": 0.993448044879846, "grad_norm": 0.3105810284614563, "learning_rate": 0.00015063095851695904, "loss": 11.6534, "step": 47459 }, { "epoch": 0.993468977643808, "grad_norm": 0.29049959778785706, "learning_rate": 0.00015062906777014846, "loss": 11.6732, "step": 47460 }, { "epoch": 0.9934899104077702, "grad_norm": 0.29331353306770325, "learning_rate": 0.0001506271769989995, "loss": 11.6742, "step": 47461 }, { "epoch": 0.9935108431717324, "grad_norm": 0.27889037132263184, "learning_rate": 0.000150625286203513, "loss": 11.6681, "step": 47462 }, { "epoch": 0.9935317759356945, "grad_norm": 0.27134275436401367, "learning_rate": 0.00015062339538368994, "loss": 11.6685, "step": 47463 }, { "epoch": 0.9935527086996567, "grad_norm": 0.3750760555267334, "learning_rate": 0.00015062150453953117, "loss": 11.6775, "step": 47464 }, { "epoch": 0.9935736414636188, "grad_norm": 0.32260745763778687, "learning_rate": 0.00015061961367103765, "loss": 11.679, "step": 47465 }, { "epoch": 0.993594574227581, "grad_norm": 0.3139367401599884, "learning_rate": 0.00015061772277821027, "loss": 11.6755, "step": 47466 }, { "epoch": 0.9936155069915432, "grad_norm": 0.30678170919418335, "learning_rate": 0.00015061583186104995, "loss": 11.6618, "step": 47467 }, { "epoch": 0.9936364397555053, "grad_norm": 0.3620067238807678, "learning_rate": 0.00015061394091955754, "loss": 11.6641, "step": 47468 }, { "epoch": 0.9936573725194675, "grad_norm": 0.34590160846710205, "learning_rate": 0.00015061204995373405, "loss": 11.6686, "step": 47469 }, { "epoch": 0.9936783052834296, "grad_norm": 0.3037716746330261, "learning_rate": 0.0001506101589635803, "loss": 11.6519, "step": 47470 }, { "epoch": 0.9936992380473918, "grad_norm": 0.34314948320388794, "learning_rate": 0.00015060826794909723, "loss": 11.6592, "step": 47471 }, { "epoch": 0.9937201708113539, "grad_norm": 0.28050026297569275, "learning_rate": 0.00015060637691028577, "loss": 11.6833, "step": 47472 }, { "epoch": 0.9937411035753161, "grad_norm": 0.2587583363056183, "learning_rate": 0.00015060448584714679, "loss": 11.6615, "step": 47473 }, { "epoch": 0.9937620363392783, "grad_norm": 0.3242233395576477, "learning_rate": 0.00015060259475968124, "loss": 11.6713, "step": 47474 }, { "epoch": 0.9937829691032404, "grad_norm": 0.3238169848918915, "learning_rate": 0.00015060070364788998, "loss": 11.6656, "step": 47475 }, { "epoch": 0.9938039018672026, "grad_norm": 0.2890632748603821, "learning_rate": 0.00015059881251177398, "loss": 11.6604, "step": 47476 }, { "epoch": 0.9938248346311647, "grad_norm": 0.4095117449760437, "learning_rate": 0.0001505969213513341, "loss": 11.669, "step": 47477 }, { "epoch": 0.9938457673951269, "grad_norm": 0.2950298488140106, "learning_rate": 0.00015059503016657126, "loss": 11.6881, "step": 47478 }, { "epoch": 0.993866700159089, "grad_norm": 0.38802793622016907, "learning_rate": 0.00015059313895748639, "loss": 11.66, "step": 47479 }, { "epoch": 0.9938876329230512, "grad_norm": 0.2909596264362335, "learning_rate": 0.00015059124772408037, "loss": 11.6511, "step": 47480 }, { "epoch": 0.9939085656870134, "grad_norm": 0.28186938166618347, "learning_rate": 0.00015058935646635412, "loss": 11.6756, "step": 47481 }, { "epoch": 0.9939294984509754, "grad_norm": 0.3393357992172241, "learning_rate": 0.00015058746518430857, "loss": 11.6751, "step": 47482 }, { "epoch": 0.9939504312149376, "grad_norm": 0.3477000296115875, "learning_rate": 0.0001505855738779446, "loss": 11.6767, "step": 47483 }, { "epoch": 0.9939713639788997, "grad_norm": 0.3941549062728882, "learning_rate": 0.00015058368254726315, "loss": 11.6776, "step": 47484 }, { "epoch": 0.9939922967428619, "grad_norm": 0.3259504437446594, "learning_rate": 0.00015058179119226508, "loss": 11.658, "step": 47485 }, { "epoch": 0.9940132295068241, "grad_norm": 0.3877564072608948, "learning_rate": 0.00015057989981295139, "loss": 11.6701, "step": 47486 }, { "epoch": 0.9940341622707862, "grad_norm": 0.3170534670352936, "learning_rate": 0.00015057800840932286, "loss": 11.6829, "step": 47487 }, { "epoch": 0.9940550950347484, "grad_norm": 0.2631208002567291, "learning_rate": 0.00015057611698138045, "loss": 11.674, "step": 47488 }, { "epoch": 0.9940760277987105, "grad_norm": 0.3314301073551178, "learning_rate": 0.00015057422552912516, "loss": 11.6494, "step": 47489 }, { "epoch": 0.9940969605626727, "grad_norm": 0.3190656304359436, "learning_rate": 0.00015057233405255777, "loss": 11.6655, "step": 47490 }, { "epoch": 0.9941178933266348, "grad_norm": 0.34038397669792175, "learning_rate": 0.00015057044255167932, "loss": 11.6522, "step": 47491 }, { "epoch": 0.994138826090597, "grad_norm": 0.3260634243488312, "learning_rate": 0.00015056855102649056, "loss": 11.6604, "step": 47492 }, { "epoch": 0.9941597588545592, "grad_norm": 0.3113117218017578, "learning_rate": 0.0001505666594769925, "loss": 11.6777, "step": 47493 }, { "epoch": 0.9941806916185213, "grad_norm": 0.3326660692691803, "learning_rate": 0.00015056476790318608, "loss": 11.6598, "step": 47494 }, { "epoch": 0.9942016243824835, "grad_norm": 0.3159778118133545, "learning_rate": 0.00015056287630507213, "loss": 11.6452, "step": 47495 }, { "epoch": 0.9942225571464456, "grad_norm": 0.4201238453388214, "learning_rate": 0.0001505609846826516, "loss": 11.6731, "step": 47496 }, { "epoch": 0.9942434899104078, "grad_norm": 0.35757043957710266, "learning_rate": 0.00015055909303592538, "loss": 11.6705, "step": 47497 }, { "epoch": 0.9942644226743699, "grad_norm": 0.3185589611530304, "learning_rate": 0.0001505572013648944, "loss": 11.6658, "step": 47498 }, { "epoch": 0.9942853554383321, "grad_norm": 0.413131445646286, "learning_rate": 0.00015055530966955955, "loss": 11.6823, "step": 47499 }, { "epoch": 0.9943062882022943, "grad_norm": 0.295723557472229, "learning_rate": 0.00015055341794992176, "loss": 11.6746, "step": 47500 }, { "epoch": 0.9943272209662564, "grad_norm": 0.39752477407455444, "learning_rate": 0.00015055152620598196, "loss": 11.6598, "step": 47501 }, { "epoch": 0.9943481537302186, "grad_norm": 0.336712509393692, "learning_rate": 0.00015054963443774096, "loss": 11.6708, "step": 47502 }, { "epoch": 0.9943690864941807, "grad_norm": 0.30286115407943726, "learning_rate": 0.00015054774264519976, "loss": 11.6774, "step": 47503 }, { "epoch": 0.9943900192581429, "grad_norm": 0.4054217040538788, "learning_rate": 0.0001505458508283593, "loss": 11.6601, "step": 47504 }, { "epoch": 0.9944109520221049, "grad_norm": 0.295806884765625, "learning_rate": 0.0001505439589872204, "loss": 11.6799, "step": 47505 }, { "epoch": 0.9944318847860671, "grad_norm": 0.287661612033844, "learning_rate": 0.000150542067121784, "loss": 11.6894, "step": 47506 }, { "epoch": 0.9944528175500293, "grad_norm": 0.2780219614505768, "learning_rate": 0.000150540175232051, "loss": 11.6619, "step": 47507 }, { "epoch": 0.9944737503139914, "grad_norm": 0.3930799663066864, "learning_rate": 0.00015053828331802236, "loss": 11.6658, "step": 47508 }, { "epoch": 0.9944946830779536, "grad_norm": 0.3363626301288605, "learning_rate": 0.00015053639137969894, "loss": 11.6656, "step": 47509 }, { "epoch": 0.9945156158419157, "grad_norm": 0.4125763177871704, "learning_rate": 0.00015053449941708163, "loss": 11.6734, "step": 47510 }, { "epoch": 0.9945365486058779, "grad_norm": 0.2329641729593277, "learning_rate": 0.00015053260743017144, "loss": 11.6682, "step": 47511 }, { "epoch": 0.9945574813698401, "grad_norm": 0.2820713222026825, "learning_rate": 0.00015053071541896917, "loss": 11.6816, "step": 47512 }, { "epoch": 0.9945784141338022, "grad_norm": 0.36803576350212097, "learning_rate": 0.0001505288233834758, "loss": 11.6787, "step": 47513 }, { "epoch": 0.9945993468977644, "grad_norm": 0.31065917015075684, "learning_rate": 0.00015052693132369219, "loss": 11.6634, "step": 47514 }, { "epoch": 0.9946202796617265, "grad_norm": 0.25128692388534546, "learning_rate": 0.00015052503923961924, "loss": 11.6773, "step": 47515 }, { "epoch": 0.9946412124256887, "grad_norm": 0.2783697247505188, "learning_rate": 0.00015052314713125795, "loss": 11.6649, "step": 47516 }, { "epoch": 0.9946621451896508, "grad_norm": 0.2402060627937317, "learning_rate": 0.00015052125499860915, "loss": 11.6606, "step": 47517 }, { "epoch": 0.994683077953613, "grad_norm": 0.29919737577438354, "learning_rate": 0.00015051936284167375, "loss": 11.6669, "step": 47518 }, { "epoch": 0.9947040107175752, "grad_norm": 0.33049654960632324, "learning_rate": 0.0001505174706604527, "loss": 11.6547, "step": 47519 }, { "epoch": 0.9947249434815373, "grad_norm": 0.36772239208221436, "learning_rate": 0.00015051557845494688, "loss": 11.6679, "step": 47520 }, { "epoch": 0.9947458762454995, "grad_norm": 0.3113758862018585, "learning_rate": 0.00015051368622515722, "loss": 11.6485, "step": 47521 }, { "epoch": 0.9947668090094616, "grad_norm": 0.25104427337646484, "learning_rate": 0.0001505117939710846, "loss": 11.6586, "step": 47522 }, { "epoch": 0.9947877417734238, "grad_norm": 0.29923704266548157, "learning_rate": 0.00015050990169272998, "loss": 11.6665, "step": 47523 }, { "epoch": 0.9948086745373859, "grad_norm": 0.47430419921875, "learning_rate": 0.00015050800939009421, "loss": 11.6592, "step": 47524 }, { "epoch": 0.9948296073013481, "grad_norm": 0.3301182687282562, "learning_rate": 0.00015050611706317825, "loss": 11.674, "step": 47525 }, { "epoch": 0.9948505400653103, "grad_norm": 0.3057065010070801, "learning_rate": 0.00015050422471198297, "loss": 11.6839, "step": 47526 }, { "epoch": 0.9948714728292724, "grad_norm": 0.38194671273231506, "learning_rate": 0.0001505023323365093, "loss": 11.6824, "step": 47527 }, { "epoch": 0.9948924055932346, "grad_norm": 0.3186725974082947, "learning_rate": 0.00015050043993675816, "loss": 11.6759, "step": 47528 }, { "epoch": 0.9949133383571966, "grad_norm": 0.2602250874042511, "learning_rate": 0.00015049854751273043, "loss": 11.6679, "step": 47529 }, { "epoch": 0.9949342711211588, "grad_norm": 0.3618098497390747, "learning_rate": 0.00015049665506442706, "loss": 11.6815, "step": 47530 }, { "epoch": 0.994955203885121, "grad_norm": 0.2982594072818756, "learning_rate": 0.00015049476259184892, "loss": 11.6613, "step": 47531 }, { "epoch": 0.9949761366490831, "grad_norm": 0.27359604835510254, "learning_rate": 0.0001504928700949969, "loss": 11.6699, "step": 47532 }, { "epoch": 0.9949970694130453, "grad_norm": 0.34966814517974854, "learning_rate": 0.000150490977573872, "loss": 11.6574, "step": 47533 }, { "epoch": 0.9950180021770074, "grad_norm": 0.3119368851184845, "learning_rate": 0.00015048908502847503, "loss": 11.6801, "step": 47534 }, { "epoch": 0.9950389349409696, "grad_norm": 0.3539679944515228, "learning_rate": 0.00015048719245880699, "loss": 11.6783, "step": 47535 }, { "epoch": 0.9950598677049317, "grad_norm": 0.3598470985889435, "learning_rate": 0.0001504852998648687, "loss": 11.6672, "step": 47536 }, { "epoch": 0.9950808004688939, "grad_norm": 0.31797873973846436, "learning_rate": 0.00015048340724666115, "loss": 11.6539, "step": 47537 }, { "epoch": 0.9951017332328561, "grad_norm": 0.24618467688560486, "learning_rate": 0.00015048151460418522, "loss": 11.6601, "step": 47538 }, { "epoch": 0.9951226659968182, "grad_norm": 0.28807786107063293, "learning_rate": 0.00015047962193744178, "loss": 11.6601, "step": 47539 }, { "epoch": 0.9951435987607804, "grad_norm": 0.305906742811203, "learning_rate": 0.00015047772924643178, "loss": 11.6628, "step": 47540 }, { "epoch": 0.9951645315247425, "grad_norm": 0.3153187930583954, "learning_rate": 0.00015047583653115612, "loss": 11.6737, "step": 47541 }, { "epoch": 0.9951854642887047, "grad_norm": 0.26756060123443604, "learning_rate": 0.00015047394379161575, "loss": 11.673, "step": 47542 }, { "epoch": 0.9952063970526668, "grad_norm": 0.3034105896949768, "learning_rate": 0.00015047205102781153, "loss": 11.677, "step": 47543 }, { "epoch": 0.995227329816629, "grad_norm": 0.2792666554450989, "learning_rate": 0.00015047015823974437, "loss": 11.662, "step": 47544 }, { "epoch": 0.9952482625805912, "grad_norm": 0.3534695506095886, "learning_rate": 0.00015046826542741522, "loss": 11.6779, "step": 47545 }, { "epoch": 0.9952691953445533, "grad_norm": 0.36577263474464417, "learning_rate": 0.00015046637259082495, "loss": 11.6748, "step": 47546 }, { "epoch": 0.9952901281085155, "grad_norm": 0.3297136425971985, "learning_rate": 0.00015046447972997444, "loss": 11.662, "step": 47547 }, { "epoch": 0.9953110608724776, "grad_norm": 0.268471360206604, "learning_rate": 0.0001504625868448647, "loss": 11.6673, "step": 47548 }, { "epoch": 0.9953319936364398, "grad_norm": 0.26940664649009705, "learning_rate": 0.00015046069393549654, "loss": 11.6465, "step": 47549 }, { "epoch": 0.995352926400402, "grad_norm": 0.37130674719810486, "learning_rate": 0.00015045880100187094, "loss": 11.6527, "step": 47550 }, { "epoch": 0.995373859164364, "grad_norm": 0.27506616711616516, "learning_rate": 0.0001504569080439888, "loss": 11.6659, "step": 47551 }, { "epoch": 0.9953947919283263, "grad_norm": 0.37581878900527954, "learning_rate": 0.000150455015061851, "loss": 11.6545, "step": 47552 }, { "epoch": 0.9954157246922883, "grad_norm": 0.3532956540584564, "learning_rate": 0.00015045312205545845, "loss": 11.6715, "step": 47553 }, { "epoch": 0.9954366574562505, "grad_norm": 0.3046102523803711, "learning_rate": 0.00015045122902481207, "loss": 11.658, "step": 47554 }, { "epoch": 0.9954575902202126, "grad_norm": 0.3227407932281494, "learning_rate": 0.0001504493359699128, "loss": 11.6681, "step": 47555 }, { "epoch": 0.9954785229841748, "grad_norm": 0.30584385991096497, "learning_rate": 0.0001504474428907615, "loss": 11.6559, "step": 47556 }, { "epoch": 0.995499455748137, "grad_norm": 0.28821611404418945, "learning_rate": 0.00015044554978735909, "loss": 11.6685, "step": 47557 }, { "epoch": 0.9955203885120991, "grad_norm": 0.29171815514564514, "learning_rate": 0.00015044365665970653, "loss": 11.6643, "step": 47558 }, { "epoch": 0.9955413212760613, "grad_norm": 0.3132327198982239, "learning_rate": 0.00015044176350780466, "loss": 11.6538, "step": 47559 }, { "epoch": 0.9955622540400234, "grad_norm": 0.3342633843421936, "learning_rate": 0.00015043987033165444, "loss": 11.6442, "step": 47560 }, { "epoch": 0.9955831868039856, "grad_norm": 0.32285600900650024, "learning_rate": 0.00015043797713125675, "loss": 11.675, "step": 47561 }, { "epoch": 0.9956041195679477, "grad_norm": 0.3226207196712494, "learning_rate": 0.00015043608390661255, "loss": 11.6725, "step": 47562 }, { "epoch": 0.9956250523319099, "grad_norm": 0.3432478606700897, "learning_rate": 0.00015043419065772267, "loss": 11.6733, "step": 47563 }, { "epoch": 0.9956459850958721, "grad_norm": 0.3915615379810333, "learning_rate": 0.00015043229738458808, "loss": 11.6792, "step": 47564 }, { "epoch": 0.9956669178598342, "grad_norm": 0.2763807475566864, "learning_rate": 0.00015043040408720967, "loss": 11.6631, "step": 47565 }, { "epoch": 0.9956878506237964, "grad_norm": 0.2907316982746124, "learning_rate": 0.00015042851076558838, "loss": 11.6627, "step": 47566 }, { "epoch": 0.9957087833877585, "grad_norm": 0.31675732135772705, "learning_rate": 0.00015042661741972507, "loss": 11.674, "step": 47567 }, { "epoch": 0.9957297161517207, "grad_norm": 0.40819764137268066, "learning_rate": 0.00015042472404962068, "loss": 11.6728, "step": 47568 }, { "epoch": 0.9957506489156829, "grad_norm": 0.30496713519096375, "learning_rate": 0.0001504228306552761, "loss": 11.6661, "step": 47569 }, { "epoch": 0.995771581679645, "grad_norm": 0.31484124064445496, "learning_rate": 0.0001504209372366923, "loss": 11.6615, "step": 47570 }, { "epoch": 0.9957925144436072, "grad_norm": 0.3777198791503906, "learning_rate": 0.00015041904379387009, "loss": 11.6729, "step": 47571 }, { "epoch": 0.9958134472075693, "grad_norm": 0.3381493091583252, "learning_rate": 0.00015041715032681048, "loss": 11.6612, "step": 47572 }, { "epoch": 0.9958343799715315, "grad_norm": 0.2969459295272827, "learning_rate": 0.0001504152568355143, "loss": 11.6522, "step": 47573 }, { "epoch": 0.9958553127354935, "grad_norm": 0.26847007870674133, "learning_rate": 0.00015041336331998254, "loss": 11.6746, "step": 47574 }, { "epoch": 0.9958762454994557, "grad_norm": 0.3384064733982086, "learning_rate": 0.000150411469780216, "loss": 11.6749, "step": 47575 }, { "epoch": 0.995897178263418, "grad_norm": 0.2788856625556946, "learning_rate": 0.0001504095762162157, "loss": 11.6754, "step": 47576 }, { "epoch": 0.99591811102738, "grad_norm": 0.33303624391555786, "learning_rate": 0.0001504076826279825, "loss": 11.6737, "step": 47577 }, { "epoch": 0.9959390437913422, "grad_norm": 0.3720906674861908, "learning_rate": 0.00015040578901551733, "loss": 11.6961, "step": 47578 }, { "epoch": 0.9959599765553043, "grad_norm": 0.2660725712776184, "learning_rate": 0.00015040389537882107, "loss": 11.652, "step": 47579 }, { "epoch": 0.9959809093192665, "grad_norm": 0.2798684537410736, "learning_rate": 0.00015040200171789465, "loss": 11.6762, "step": 47580 }, { "epoch": 0.9960018420832286, "grad_norm": 0.32475778460502625, "learning_rate": 0.00015040010803273897, "loss": 11.6738, "step": 47581 }, { "epoch": 0.9960227748471908, "grad_norm": 0.28991201519966125, "learning_rate": 0.00015039821432335498, "loss": 11.6812, "step": 47582 }, { "epoch": 0.996043707611153, "grad_norm": 0.3178133964538574, "learning_rate": 0.00015039632058974351, "loss": 11.6793, "step": 47583 }, { "epoch": 0.9960646403751151, "grad_norm": 0.3641713559627533, "learning_rate": 0.00015039442683190558, "loss": 11.6724, "step": 47584 }, { "epoch": 0.9960855731390773, "grad_norm": 0.28331148624420166, "learning_rate": 0.00015039253304984196, "loss": 11.6722, "step": 47585 }, { "epoch": 0.9961065059030394, "grad_norm": 0.3494437634944916, "learning_rate": 0.00015039063924355368, "loss": 11.6769, "step": 47586 }, { "epoch": 0.9961274386670016, "grad_norm": 0.2988967299461365, "learning_rate": 0.00015038874541304162, "loss": 11.6759, "step": 47587 }, { "epoch": 0.9961483714309638, "grad_norm": 0.3128568232059479, "learning_rate": 0.00015038685155830666, "loss": 11.6632, "step": 47588 }, { "epoch": 0.9961693041949259, "grad_norm": 0.3435019552707672, "learning_rate": 0.00015038495767934976, "loss": 11.6686, "step": 47589 }, { "epoch": 0.9961902369588881, "grad_norm": 0.29763662815093994, "learning_rate": 0.0001503830637761718, "loss": 11.6739, "step": 47590 }, { "epoch": 0.9962111697228502, "grad_norm": 0.3220977187156677, "learning_rate": 0.00015038116984877363, "loss": 11.6687, "step": 47591 }, { "epoch": 0.9962321024868124, "grad_norm": 0.3330109715461731, "learning_rate": 0.0001503792758971563, "loss": 11.6772, "step": 47592 }, { "epoch": 0.9962530352507745, "grad_norm": 0.2715506851673126, "learning_rate": 0.0001503773819213206, "loss": 11.65, "step": 47593 }, { "epoch": 0.9962739680147367, "grad_norm": 0.3721481263637543, "learning_rate": 0.0001503754879212675, "loss": 11.6786, "step": 47594 }, { "epoch": 0.9962949007786989, "grad_norm": 0.3438973128795624, "learning_rate": 0.00015037359389699788, "loss": 11.6672, "step": 47595 }, { "epoch": 0.996315833542661, "grad_norm": 0.2699965238571167, "learning_rate": 0.0001503716998485127, "loss": 11.6721, "step": 47596 }, { "epoch": 0.9963367663066232, "grad_norm": 0.26864129304885864, "learning_rate": 0.00015036980577581277, "loss": 11.6753, "step": 47597 }, { "epoch": 0.9963576990705852, "grad_norm": 0.25621330738067627, "learning_rate": 0.0001503679116788991, "loss": 11.6608, "step": 47598 }, { "epoch": 0.9963786318345474, "grad_norm": 0.29623281955718994, "learning_rate": 0.00015036601755777257, "loss": 11.6785, "step": 47599 }, { "epoch": 0.9963995645985095, "grad_norm": 0.30574509501457214, "learning_rate": 0.00015036412341243408, "loss": 11.6634, "step": 47600 }, { "epoch": 0.9964204973624717, "grad_norm": 0.3567272126674652, "learning_rate": 0.00015036222924288457, "loss": 11.6709, "step": 47601 }, { "epoch": 0.9964414301264339, "grad_norm": 0.3321945071220398, "learning_rate": 0.0001503603350491249, "loss": 11.673, "step": 47602 }, { "epoch": 0.996462362890396, "grad_norm": 0.2799852192401886, "learning_rate": 0.000150358440831156, "loss": 11.663, "step": 47603 }, { "epoch": 0.9964832956543582, "grad_norm": 0.2964969277381897, "learning_rate": 0.0001503565465889788, "loss": 11.6733, "step": 47604 }, { "epoch": 0.9965042284183203, "grad_norm": 0.35176602005958557, "learning_rate": 0.0001503546523225942, "loss": 11.6759, "step": 47605 }, { "epoch": 0.9965251611822825, "grad_norm": 0.35084396600723267, "learning_rate": 0.00015035275803200313, "loss": 11.6678, "step": 47606 }, { "epoch": 0.9965460939462447, "grad_norm": 0.3748505115509033, "learning_rate": 0.00015035086371720645, "loss": 11.6577, "step": 47607 }, { "epoch": 0.9965670267102068, "grad_norm": 0.3089958429336548, "learning_rate": 0.0001503489693782051, "loss": 11.6745, "step": 47608 }, { "epoch": 0.996587959474169, "grad_norm": 0.32327529788017273, "learning_rate": 0.000150347075015, "loss": 11.6874, "step": 47609 }, { "epoch": 0.9966088922381311, "grad_norm": 0.3725421130657196, "learning_rate": 0.00015034518062759204, "loss": 11.6624, "step": 47610 }, { "epoch": 0.9966298250020933, "grad_norm": 0.34663647413253784, "learning_rate": 0.0001503432862159822, "loss": 11.6634, "step": 47611 }, { "epoch": 0.9966507577660554, "grad_norm": 0.29938429594039917, "learning_rate": 0.00015034139178017126, "loss": 11.6762, "step": 47612 }, { "epoch": 0.9966716905300176, "grad_norm": 0.3674168884754181, "learning_rate": 0.00015033949732016022, "loss": 11.6754, "step": 47613 }, { "epoch": 0.9966926232939798, "grad_norm": 0.350139319896698, "learning_rate": 0.00015033760283595, "loss": 11.6604, "step": 47614 }, { "epoch": 0.9967135560579419, "grad_norm": 0.3516356348991394, "learning_rate": 0.00015033570832754148, "loss": 11.6588, "step": 47615 }, { "epoch": 0.9967344888219041, "grad_norm": 0.2760857343673706, "learning_rate": 0.00015033381379493557, "loss": 11.6493, "step": 47616 }, { "epoch": 0.9967554215858662, "grad_norm": 0.34418705105781555, "learning_rate": 0.00015033191923813317, "loss": 11.6619, "step": 47617 }, { "epoch": 0.9967763543498284, "grad_norm": 0.400302529335022, "learning_rate": 0.00015033002465713525, "loss": 11.6745, "step": 47618 }, { "epoch": 0.9967972871137905, "grad_norm": 0.29536172747612, "learning_rate": 0.00015032813005194266, "loss": 11.6678, "step": 47619 }, { "epoch": 0.9968182198777527, "grad_norm": 0.3508983254432678, "learning_rate": 0.0001503262354225563, "loss": 11.6648, "step": 47620 }, { "epoch": 0.9968391526417149, "grad_norm": 0.37116822600364685, "learning_rate": 0.00015032434076897716, "loss": 11.6608, "step": 47621 }, { "epoch": 0.9968600854056769, "grad_norm": 0.3105407953262329, "learning_rate": 0.00015032244609120604, "loss": 11.6548, "step": 47622 }, { "epoch": 0.9968810181696391, "grad_norm": 0.3305249810218811, "learning_rate": 0.00015032055138924398, "loss": 11.6739, "step": 47623 }, { "epoch": 0.9969019509336012, "grad_norm": 0.2950807511806488, "learning_rate": 0.00015031865666309177, "loss": 11.6758, "step": 47624 }, { "epoch": 0.9969228836975634, "grad_norm": 0.2555064558982849, "learning_rate": 0.0001503167619127504, "loss": 11.6418, "step": 47625 }, { "epoch": 0.9969438164615256, "grad_norm": 0.4320102035999298, "learning_rate": 0.00015031486713822074, "loss": 11.6755, "step": 47626 }, { "epoch": 0.9969647492254877, "grad_norm": 0.30874526500701904, "learning_rate": 0.00015031297233950372, "loss": 11.6728, "step": 47627 }, { "epoch": 0.9969856819894499, "grad_norm": 0.2893184423446655, "learning_rate": 0.00015031107751660024, "loss": 11.653, "step": 47628 }, { "epoch": 0.997006614753412, "grad_norm": 0.3257025182247162, "learning_rate": 0.00015030918266951125, "loss": 11.6685, "step": 47629 }, { "epoch": 0.9970275475173742, "grad_norm": 0.3093770444393158, "learning_rate": 0.00015030728779823758, "loss": 11.6722, "step": 47630 }, { "epoch": 0.9970484802813363, "grad_norm": 0.39745455980300903, "learning_rate": 0.0001503053929027802, "loss": 11.6743, "step": 47631 }, { "epoch": 0.9970694130452985, "grad_norm": 0.30568212270736694, "learning_rate": 0.00015030349798314, "loss": 11.6707, "step": 47632 }, { "epoch": 0.9970903458092607, "grad_norm": 0.37760648131370544, "learning_rate": 0.00015030160303931795, "loss": 11.6852, "step": 47633 }, { "epoch": 0.9971112785732228, "grad_norm": 0.3252002000808716, "learning_rate": 0.00015029970807131488, "loss": 11.6729, "step": 47634 }, { "epoch": 0.997132211337185, "grad_norm": 0.3211696445941925, "learning_rate": 0.00015029781307913172, "loss": 11.6928, "step": 47635 }, { "epoch": 0.9971531441011471, "grad_norm": 0.34298545122146606, "learning_rate": 0.0001502959180627694, "loss": 11.6595, "step": 47636 }, { "epoch": 0.9971740768651093, "grad_norm": 0.32965707778930664, "learning_rate": 0.00015029402302222885, "loss": 11.6673, "step": 47637 }, { "epoch": 0.9971950096290714, "grad_norm": 0.4646669030189514, "learning_rate": 0.00015029212795751096, "loss": 11.6758, "step": 47638 }, { "epoch": 0.9972159423930336, "grad_norm": 0.3200492560863495, "learning_rate": 0.0001502902328686166, "loss": 11.6678, "step": 47639 }, { "epoch": 0.9972368751569958, "grad_norm": 0.3629133701324463, "learning_rate": 0.00015028833775554672, "loss": 11.6672, "step": 47640 }, { "epoch": 0.9972578079209579, "grad_norm": 0.307832807302475, "learning_rate": 0.00015028644261830224, "loss": 11.6648, "step": 47641 }, { "epoch": 0.9972787406849201, "grad_norm": 0.3570432960987091, "learning_rate": 0.00015028454745688404, "loss": 11.6717, "step": 47642 }, { "epoch": 0.9972996734488822, "grad_norm": 0.27425312995910645, "learning_rate": 0.00015028265227129308, "loss": 11.6683, "step": 47643 }, { "epoch": 0.9973206062128444, "grad_norm": 0.2698647975921631, "learning_rate": 0.00015028075706153022, "loss": 11.6724, "step": 47644 }, { "epoch": 0.9973415389768066, "grad_norm": 0.37184596061706543, "learning_rate": 0.0001502788618275964, "loss": 11.6788, "step": 47645 }, { "epoch": 0.9973624717407686, "grad_norm": 0.39740249514579773, "learning_rate": 0.00015027696656949252, "loss": 11.6935, "step": 47646 }, { "epoch": 0.9973834045047308, "grad_norm": 0.3005988299846649, "learning_rate": 0.0001502750712872195, "loss": 11.676, "step": 47647 }, { "epoch": 0.9974043372686929, "grad_norm": 0.43544432520866394, "learning_rate": 0.00015027317598077827, "loss": 11.6515, "step": 47648 }, { "epoch": 0.9974252700326551, "grad_norm": 0.3447308838367462, "learning_rate": 0.0001502712806501697, "loss": 11.6645, "step": 47649 }, { "epoch": 0.9974462027966172, "grad_norm": 0.2813289165496826, "learning_rate": 0.0001502693852953947, "loss": 11.6605, "step": 47650 }, { "epoch": 0.9974671355605794, "grad_norm": 0.3688814043998718, "learning_rate": 0.00015026748991645418, "loss": 11.6566, "step": 47651 }, { "epoch": 0.9974880683245416, "grad_norm": 0.2570865750312805, "learning_rate": 0.0001502655945133491, "loss": 11.6709, "step": 47652 }, { "epoch": 0.9975090010885037, "grad_norm": 0.37500903010368347, "learning_rate": 0.00015026369908608035, "loss": 11.6703, "step": 47653 }, { "epoch": 0.9975299338524659, "grad_norm": 0.3315301835536957, "learning_rate": 0.00015026180363464882, "loss": 11.6675, "step": 47654 }, { "epoch": 0.997550866616428, "grad_norm": 0.2594601511955261, "learning_rate": 0.00015025990815905544, "loss": 11.6701, "step": 47655 }, { "epoch": 0.9975717993803902, "grad_norm": 0.31741949915885925, "learning_rate": 0.00015025801265930114, "loss": 11.6758, "step": 47656 }, { "epoch": 0.9975927321443523, "grad_norm": 0.31337404251098633, "learning_rate": 0.00015025611713538676, "loss": 11.6573, "step": 47657 }, { "epoch": 0.9976136649083145, "grad_norm": 0.24609561264514923, "learning_rate": 0.00015025422158731328, "loss": 11.671, "step": 47658 }, { "epoch": 0.9976345976722767, "grad_norm": 0.3512977063655853, "learning_rate": 0.00015025232601508157, "loss": 11.6706, "step": 47659 }, { "epoch": 0.9976555304362388, "grad_norm": 0.29861027002334595, "learning_rate": 0.0001502504304186926, "loss": 11.6619, "step": 47660 }, { "epoch": 0.997676463200201, "grad_norm": 0.3898495137691498, "learning_rate": 0.0001502485347981472, "loss": 11.6706, "step": 47661 }, { "epoch": 0.9976973959641631, "grad_norm": 0.27570515871047974, "learning_rate": 0.00015024663915344635, "loss": 11.6734, "step": 47662 }, { "epoch": 0.9977183287281253, "grad_norm": 0.3647395968437195, "learning_rate": 0.0001502447434845909, "loss": 11.668, "step": 47663 }, { "epoch": 0.9977392614920875, "grad_norm": 0.22808463871479034, "learning_rate": 0.0001502428477915818, "loss": 11.6657, "step": 47664 }, { "epoch": 0.9977601942560496, "grad_norm": 0.2751387059688568, "learning_rate": 0.00015024095207442, "loss": 11.6662, "step": 47665 }, { "epoch": 0.9977811270200118, "grad_norm": 0.3830588459968567, "learning_rate": 0.00015023905633310632, "loss": 11.6567, "step": 47666 }, { "epoch": 0.9978020597839738, "grad_norm": 0.2899569272994995, "learning_rate": 0.00015023716056764176, "loss": 11.6711, "step": 47667 }, { "epoch": 0.997822992547936, "grad_norm": 0.25903353095054626, "learning_rate": 0.00015023526477802714, "loss": 11.6705, "step": 47668 }, { "epoch": 0.9978439253118981, "grad_norm": 0.29508253931999207, "learning_rate": 0.0001502333689642635, "loss": 11.666, "step": 47669 }, { "epoch": 0.9978648580758603, "grad_norm": 0.40266281366348267, "learning_rate": 0.00015023147312635157, "loss": 11.6796, "step": 47670 }, { "epoch": 0.9978857908398225, "grad_norm": 0.28544676303863525, "learning_rate": 0.00015022957726429245, "loss": 11.6578, "step": 47671 }, { "epoch": 0.9979067236037846, "grad_norm": 0.8351986408233643, "learning_rate": 0.0001502276813780869, "loss": 11.6838, "step": 47672 }, { "epoch": 0.9979276563677468, "grad_norm": 0.3258395195007324, "learning_rate": 0.00015022578546773594, "loss": 11.6846, "step": 47673 }, { "epoch": 0.9979485891317089, "grad_norm": 0.3898732364177704, "learning_rate": 0.00015022388953324044, "loss": 11.6709, "step": 47674 }, { "epoch": 0.9979695218956711, "grad_norm": 0.28403815627098083, "learning_rate": 0.0001502219935746013, "loss": 11.669, "step": 47675 }, { "epoch": 0.9979904546596332, "grad_norm": 0.29830482602119446, "learning_rate": 0.00015022009759181945, "loss": 11.6582, "step": 47676 }, { "epoch": 0.9980113874235954, "grad_norm": 0.3012841045856476, "learning_rate": 0.00015021820158489578, "loss": 11.6775, "step": 47677 }, { "epoch": 0.9980323201875576, "grad_norm": 0.30434420704841614, "learning_rate": 0.0001502163055538312, "loss": 11.6675, "step": 47678 }, { "epoch": 0.9980532529515197, "grad_norm": 0.35730889439582825, "learning_rate": 0.00015021440949862665, "loss": 11.6773, "step": 47679 }, { "epoch": 0.9980741857154819, "grad_norm": 0.2584129273891449, "learning_rate": 0.00015021251341928302, "loss": 11.6755, "step": 47680 }, { "epoch": 0.998095118479444, "grad_norm": 0.388640820980072, "learning_rate": 0.00015021061731580125, "loss": 11.6559, "step": 47681 }, { "epoch": 0.9981160512434062, "grad_norm": 0.2960616648197174, "learning_rate": 0.0001502087211881822, "loss": 11.6714, "step": 47682 }, { "epoch": 0.9981369840073683, "grad_norm": 0.4008898437023163, "learning_rate": 0.00015020682503642685, "loss": 11.6693, "step": 47683 }, { "epoch": 0.9981579167713305, "grad_norm": 0.3032248914241791, "learning_rate": 0.00015020492886053604, "loss": 11.6667, "step": 47684 }, { "epoch": 0.9981788495352927, "grad_norm": 0.318972110748291, "learning_rate": 0.00015020303266051076, "loss": 11.6687, "step": 47685 }, { "epoch": 0.9981997822992548, "grad_norm": 0.34107232093811035, "learning_rate": 0.0001502011364363518, "loss": 11.6448, "step": 47686 }, { "epoch": 0.998220715063217, "grad_norm": 0.29150283336639404, "learning_rate": 0.0001501992401880602, "loss": 11.6636, "step": 47687 }, { "epoch": 0.9982416478271791, "grad_norm": 0.34020018577575684, "learning_rate": 0.00015019734391563683, "loss": 11.6552, "step": 47688 }, { "epoch": 0.9982625805911413, "grad_norm": 0.2990413308143616, "learning_rate": 0.00015019544761908255, "loss": 11.6646, "step": 47689 }, { "epoch": 0.9982835133551035, "grad_norm": 0.45674964785575867, "learning_rate": 0.00015019355129839833, "loss": 11.6633, "step": 47690 }, { "epoch": 0.9983044461190655, "grad_norm": 0.32687872648239136, "learning_rate": 0.00015019165495358506, "loss": 11.664, "step": 47691 }, { "epoch": 0.9983253788830277, "grad_norm": 0.34885576367378235, "learning_rate": 0.00015018975858464368, "loss": 11.6665, "step": 47692 }, { "epoch": 0.9983463116469898, "grad_norm": 0.28206390142440796, "learning_rate": 0.00015018786219157506, "loss": 11.6664, "step": 47693 }, { "epoch": 0.998367244410952, "grad_norm": 0.319043904542923, "learning_rate": 0.00015018596577438012, "loss": 11.6954, "step": 47694 }, { "epoch": 0.9983881771749141, "grad_norm": 0.26586243510246277, "learning_rate": 0.00015018406933305978, "loss": 11.6577, "step": 47695 }, { "epoch": 0.9984091099388763, "grad_norm": 0.2717728316783905, "learning_rate": 0.00015018217286761498, "loss": 11.6603, "step": 47696 }, { "epoch": 0.9984300427028385, "grad_norm": 0.26698634028434753, "learning_rate": 0.00015018027637804656, "loss": 11.6775, "step": 47697 }, { "epoch": 0.9984509754668006, "grad_norm": 0.24416708946228027, "learning_rate": 0.0001501783798643555, "loss": 11.6743, "step": 47698 }, { "epoch": 0.9984719082307628, "grad_norm": 0.2644237279891968, "learning_rate": 0.0001501764833265427, "loss": 11.6928, "step": 47699 }, { "epoch": 0.9984928409947249, "grad_norm": 0.34136202931404114, "learning_rate": 0.00015017458676460906, "loss": 11.6671, "step": 47700 }, { "epoch": 0.9985137737586871, "grad_norm": 0.240930438041687, "learning_rate": 0.00015017269017855543, "loss": 11.6679, "step": 47701 }, { "epoch": 0.9985347065226492, "grad_norm": 0.44993266463279724, "learning_rate": 0.0001501707935683829, "loss": 11.6658, "step": 47702 }, { "epoch": 0.9985556392866114, "grad_norm": 0.35407865047454834, "learning_rate": 0.00015016889693409216, "loss": 11.6651, "step": 47703 }, { "epoch": 0.9985765720505736, "grad_norm": 0.44683822989463806, "learning_rate": 0.00015016700027568427, "loss": 11.6676, "step": 47704 }, { "epoch": 0.9985975048145357, "grad_norm": 0.31492194533348083, "learning_rate": 0.00015016510359316007, "loss": 11.668, "step": 47705 }, { "epoch": 0.9986184375784979, "grad_norm": 0.32475394010543823, "learning_rate": 0.0001501632068865205, "loss": 11.6503, "step": 47706 }, { "epoch": 0.99863937034246, "grad_norm": 0.3212716281414032, "learning_rate": 0.00015016131015576652, "loss": 11.671, "step": 47707 }, { "epoch": 0.9986603031064222, "grad_norm": 0.3399595618247986, "learning_rate": 0.00015015941340089893, "loss": 11.6729, "step": 47708 }, { "epoch": 0.9986812358703844, "grad_norm": 0.40344497561454773, "learning_rate": 0.00015015751662191877, "loss": 11.6691, "step": 47709 }, { "epoch": 0.9987021686343465, "grad_norm": 0.36038216948509216, "learning_rate": 0.00015015561981882684, "loss": 11.6549, "step": 47710 }, { "epoch": 0.9987231013983087, "grad_norm": 0.2555214762687683, "learning_rate": 0.0001501537229916241, "loss": 11.674, "step": 47711 }, { "epoch": 0.9987440341622708, "grad_norm": 0.38723307847976685, "learning_rate": 0.00015015182614031147, "loss": 11.6788, "step": 47712 }, { "epoch": 0.998764966926233, "grad_norm": 0.41092389822006226, "learning_rate": 0.00015014992926488984, "loss": 11.6831, "step": 47713 }, { "epoch": 0.998785899690195, "grad_norm": 0.3425479829311371, "learning_rate": 0.00015014803236536016, "loss": 11.6569, "step": 47714 }, { "epoch": 0.9988068324541572, "grad_norm": 0.31538406014442444, "learning_rate": 0.0001501461354417233, "loss": 11.6778, "step": 47715 }, { "epoch": 0.9988277652181194, "grad_norm": 0.4060280919075012, "learning_rate": 0.00015014423849398018, "loss": 11.6699, "step": 47716 }, { "epoch": 0.9988486979820815, "grad_norm": 0.2756088972091675, "learning_rate": 0.00015014234152213173, "loss": 11.6696, "step": 47717 }, { "epoch": 0.9988696307460437, "grad_norm": 0.3028113543987274, "learning_rate": 0.00015014044452617886, "loss": 11.6534, "step": 47718 }, { "epoch": 0.9988905635100058, "grad_norm": 0.3519482910633087, "learning_rate": 0.00015013854750612246, "loss": 11.6813, "step": 47719 }, { "epoch": 0.998911496273968, "grad_norm": 0.3577982187271118, "learning_rate": 0.00015013665046196347, "loss": 11.6684, "step": 47720 }, { "epoch": 0.9989324290379301, "grad_norm": 0.35478413105010986, "learning_rate": 0.00015013475339370277, "loss": 11.6677, "step": 47721 }, { "epoch": 0.9989533618018923, "grad_norm": 0.3345816433429718, "learning_rate": 0.00015013285630134128, "loss": 11.6723, "step": 47722 }, { "epoch": 0.9989742945658545, "grad_norm": 0.2963768243789673, "learning_rate": 0.00015013095918487993, "loss": 11.6734, "step": 47723 }, { "epoch": 0.9989952273298166, "grad_norm": 0.34881579875946045, "learning_rate": 0.00015012906204431965, "loss": 11.6824, "step": 47724 }, { "epoch": 0.9990161600937788, "grad_norm": 0.24812166392803192, "learning_rate": 0.0001501271648796613, "loss": 11.6705, "step": 47725 }, { "epoch": 0.9990370928577409, "grad_norm": 0.36052995920181274, "learning_rate": 0.00015012526769090585, "loss": 11.6622, "step": 47726 }, { "epoch": 0.9990580256217031, "grad_norm": 0.37508881092071533, "learning_rate": 0.00015012337047805413, "loss": 11.6833, "step": 47727 }, { "epoch": 0.9990789583856653, "grad_norm": 0.3212891221046448, "learning_rate": 0.00015012147324110713, "loss": 11.6772, "step": 47728 }, { "epoch": 0.9990998911496274, "grad_norm": 0.2805667519569397, "learning_rate": 0.00015011957598006572, "loss": 11.6531, "step": 47729 }, { "epoch": 0.9991208239135896, "grad_norm": 0.31987494230270386, "learning_rate": 0.00015011767869493086, "loss": 11.6646, "step": 47730 }, { "epoch": 0.9991417566775517, "grad_norm": 0.3032274842262268, "learning_rate": 0.00015011578138570338, "loss": 11.6621, "step": 47731 }, { "epoch": 0.9991626894415139, "grad_norm": 0.4179098308086395, "learning_rate": 0.00015011388405238428, "loss": 11.6886, "step": 47732 }, { "epoch": 0.999183622205476, "grad_norm": 0.32686951756477356, "learning_rate": 0.00015011198669497442, "loss": 11.6704, "step": 47733 }, { "epoch": 0.9992045549694382, "grad_norm": 0.3463076651096344, "learning_rate": 0.0001501100893134747, "loss": 11.6725, "step": 47734 }, { "epoch": 0.9992254877334004, "grad_norm": 0.29184600710868835, "learning_rate": 0.00015010819190788605, "loss": 11.6518, "step": 47735 }, { "epoch": 0.9992464204973625, "grad_norm": 0.2850795090198517, "learning_rate": 0.00015010629447820942, "loss": 11.6595, "step": 47736 }, { "epoch": 0.9992673532613247, "grad_norm": 0.3707214593887329, "learning_rate": 0.00015010439702444568, "loss": 11.6507, "step": 47737 }, { "epoch": 0.9992882860252867, "grad_norm": 0.2866271138191223, "learning_rate": 0.00015010249954659576, "loss": 11.6679, "step": 47738 }, { "epoch": 0.9993092187892489, "grad_norm": 0.2763236463069916, "learning_rate": 0.00015010060204466056, "loss": 11.6422, "step": 47739 }, { "epoch": 0.999330151553211, "grad_norm": 0.332368940114975, "learning_rate": 0.00015009870451864097, "loss": 11.6639, "step": 47740 }, { "epoch": 0.9993510843171732, "grad_norm": 0.4314521253108978, "learning_rate": 0.00015009680696853795, "loss": 11.6631, "step": 47741 }, { "epoch": 0.9993720170811354, "grad_norm": 0.3079347014427185, "learning_rate": 0.00015009490939435241, "loss": 11.689, "step": 47742 }, { "epoch": 0.9993929498450975, "grad_norm": 0.2826005518436432, "learning_rate": 0.00015009301179608523, "loss": 11.6511, "step": 47743 }, { "epoch": 0.9994138826090597, "grad_norm": 0.33349356055259705, "learning_rate": 0.00015009111417373734, "loss": 11.6707, "step": 47744 }, { "epoch": 0.9994348153730218, "grad_norm": 0.3536680340766907, "learning_rate": 0.0001500892165273096, "loss": 11.6727, "step": 47745 }, { "epoch": 0.999455748136984, "grad_norm": 0.49691587686538696, "learning_rate": 0.00015008731885680302, "loss": 11.667, "step": 47746 }, { "epoch": 0.9994766809009462, "grad_norm": 0.40283000469207764, "learning_rate": 0.00015008542116221846, "loss": 11.6699, "step": 47747 }, { "epoch": 0.9994976136649083, "grad_norm": 0.35990121960639954, "learning_rate": 0.00015008352344355683, "loss": 11.677, "step": 47748 }, { "epoch": 0.9995185464288705, "grad_norm": 0.3866826295852661, "learning_rate": 0.00015008162570081903, "loss": 11.6589, "step": 47749 }, { "epoch": 0.9995394791928326, "grad_norm": 0.2770465314388275, "learning_rate": 0.000150079727934006, "loss": 11.6546, "step": 47750 }, { "epoch": 0.9995604119567948, "grad_norm": 0.30756616592407227, "learning_rate": 0.00015007783014311864, "loss": 11.6431, "step": 47751 }, { "epoch": 0.9995813447207569, "grad_norm": 0.3223624527454376, "learning_rate": 0.00015007593232815788, "loss": 11.6855, "step": 47752 }, { "epoch": 0.9996022774847191, "grad_norm": 0.34402167797088623, "learning_rate": 0.00015007403448912457, "loss": 11.68, "step": 47753 }, { "epoch": 0.9996232102486813, "grad_norm": 0.3246363699436188, "learning_rate": 0.00015007213662601972, "loss": 11.6827, "step": 47754 }, { "epoch": 0.9996441430126434, "grad_norm": 0.2934410572052002, "learning_rate": 0.00015007023873884418, "loss": 11.6657, "step": 47755 }, { "epoch": 0.9996650757766056, "grad_norm": 0.32959166169166565, "learning_rate": 0.00015006834082759886, "loss": 11.6576, "step": 47756 }, { "epoch": 0.9996860085405677, "grad_norm": 0.32277724146842957, "learning_rate": 0.00015006644289228466, "loss": 11.6533, "step": 47757 }, { "epoch": 0.9997069413045299, "grad_norm": 0.37785446643829346, "learning_rate": 0.00015006454493290256, "loss": 11.686, "step": 47758 }, { "epoch": 0.999727874068492, "grad_norm": 0.3096091151237488, "learning_rate": 0.0001500626469494534, "loss": 11.6731, "step": 47759 }, { "epoch": 0.9997488068324542, "grad_norm": 0.38937652111053467, "learning_rate": 0.00015006074894193815, "loss": 11.6693, "step": 47760 }, { "epoch": 0.9997697395964164, "grad_norm": 0.3858678340911865, "learning_rate": 0.00015005885091035767, "loss": 11.6698, "step": 47761 }, { "epoch": 0.9997906723603784, "grad_norm": 0.341552197933197, "learning_rate": 0.00015005695285471294, "loss": 11.6669, "step": 47762 }, { "epoch": 0.9998116051243406, "grad_norm": 0.41461294889450073, "learning_rate": 0.00015005505477500478, "loss": 11.6729, "step": 47763 }, { "epoch": 0.9998325378883027, "grad_norm": 0.33727601170539856, "learning_rate": 0.00015005315667123415, "loss": 11.6695, "step": 47764 }, { "epoch": 0.9998534706522649, "grad_norm": 0.29730159044265747, "learning_rate": 0.000150051258543402, "loss": 11.673, "step": 47765 }, { "epoch": 0.9998744034162271, "grad_norm": 0.33750900626182556, "learning_rate": 0.0001500493603915092, "loss": 11.6719, "step": 47766 }, { "epoch": 0.9998953361801892, "grad_norm": 0.2829771339893341, "learning_rate": 0.00015004746221555665, "loss": 11.6598, "step": 47767 }, { "epoch": 0.9999162689441514, "grad_norm": 0.2895073890686035, "learning_rate": 0.0001500455640155453, "loss": 11.6767, "step": 47768 }, { "epoch": 0.9999372017081135, "grad_norm": 0.33052706718444824, "learning_rate": 0.00015004366579147603, "loss": 11.675, "step": 47769 }, { "epoch": 0.9999581344720757, "grad_norm": 0.4579976201057434, "learning_rate": 0.0001500417675433498, "loss": 11.7, "step": 47770 }, { "epoch": 0.9999790672360378, "grad_norm": 0.3775452673435211, "learning_rate": 0.00015003986927116745, "loss": 11.6756, "step": 47771 }, { "epoch": 1.0, "grad_norm": 0.3671385943889618, "learning_rate": 0.00015003797097492996, "loss": 11.6686, "step": 47772 }, { "epoch": 1.0000209327639622, "grad_norm": 0.47831788659095764, "learning_rate": 0.0001500360726546382, "loss": 11.6699, "step": 47773 }, { "epoch": 1.0000418655279244, "grad_norm": 0.2788480818271637, "learning_rate": 0.0001500341743102931, "loss": 11.6749, "step": 47774 }, { "epoch": 1.0000627982918864, "grad_norm": 0.3178669214248657, "learning_rate": 0.00015003227594189556, "loss": 11.6838, "step": 47775 }, { "epoch": 1.0000837310558486, "grad_norm": 0.30527082085609436, "learning_rate": 0.00015003037754944648, "loss": 11.6695, "step": 47776 }, { "epoch": 1.0001046638198108, "grad_norm": 0.36676308512687683, "learning_rate": 0.00015002847913294685, "loss": 11.6534, "step": 47777 }, { "epoch": 1.000125596583773, "grad_norm": 0.38807061314582825, "learning_rate": 0.0001500265806923975, "loss": 11.6856, "step": 47778 }, { "epoch": 1.0001465293477352, "grad_norm": 0.33821746706962585, "learning_rate": 0.00015002468222779939, "loss": 11.6631, "step": 47779 }, { "epoch": 1.0001674621116972, "grad_norm": 0.3266105353832245, "learning_rate": 0.00015002278373915336, "loss": 11.662, "step": 47780 }, { "epoch": 1.0001883948756594, "grad_norm": 0.36574187874794006, "learning_rate": 0.00015002088522646042, "loss": 11.669, "step": 47781 }, { "epoch": 1.0002093276396216, "grad_norm": 0.3037831485271454, "learning_rate": 0.00015001898668972146, "loss": 11.6733, "step": 47782 }, { "epoch": 1.0002302604035838, "grad_norm": 0.26431769132614136, "learning_rate": 0.00015001708812893735, "loss": 11.6669, "step": 47783 }, { "epoch": 1.0002511931675457, "grad_norm": 0.34774714708328247, "learning_rate": 0.00015001518954410896, "loss": 11.6657, "step": 47784 }, { "epoch": 1.000272125931508, "grad_norm": 0.3919920027256012, "learning_rate": 0.00015001329093523736, "loss": 11.6692, "step": 47785 }, { "epoch": 1.0002930586954701, "grad_norm": 0.255203515291214, "learning_rate": 0.0001500113923023233, "loss": 11.6741, "step": 47786 }, { "epoch": 1.0003139914594323, "grad_norm": 0.32676273584365845, "learning_rate": 0.0001500094936453678, "loss": 11.6704, "step": 47787 }, { "epoch": 1.0003349242233945, "grad_norm": 0.35009559988975525, "learning_rate": 0.00015000759496437173, "loss": 11.6753, "step": 47788 }, { "epoch": 1.0003558569873565, "grad_norm": 0.37300676107406616, "learning_rate": 0.00015000569625933598, "loss": 11.6658, "step": 47789 }, { "epoch": 1.0003767897513187, "grad_norm": 0.25887516140937805, "learning_rate": 0.00015000379753026154, "loss": 11.667, "step": 47790 }, { "epoch": 1.000397722515281, "grad_norm": 0.3480033278465271, "learning_rate": 0.0001500018987771492, "loss": 11.6653, "step": 47791 }, { "epoch": 1.0004186552792431, "grad_norm": 0.33852046728134155, "learning_rate": 0.00015000000000000001, "loss": 11.6776, "step": 47792 }, { "epoch": 1.0004395880432053, "grad_norm": 0.27439090609550476, "learning_rate": 0.0001499981011988148, "loss": 11.6683, "step": 47793 }, { "epoch": 1.0004605208071673, "grad_norm": 0.3012184798717499, "learning_rate": 0.00014999620237359448, "loss": 11.6778, "step": 47794 }, { "epoch": 1.0004814535711295, "grad_norm": 0.4439748227596283, "learning_rate": 0.00014999430352434, "loss": 11.6888, "step": 47795 }, { "epoch": 1.0005023863350917, "grad_norm": 0.32946014404296875, "learning_rate": 0.00014999240465105227, "loss": 11.679, "step": 47796 }, { "epoch": 1.000523319099054, "grad_norm": 0.33494821190834045, "learning_rate": 0.00014999050575373218, "loss": 11.6756, "step": 47797 }, { "epoch": 1.000544251863016, "grad_norm": 0.2908206582069397, "learning_rate": 0.00014998860683238064, "loss": 11.6795, "step": 47798 }, { "epoch": 1.000565184626978, "grad_norm": 0.3025561273097992, "learning_rate": 0.0001499867078869986, "loss": 11.6585, "step": 47799 }, { "epoch": 1.0005861173909403, "grad_norm": 0.27209946513175964, "learning_rate": 0.00014998480891758692, "loss": 11.6643, "step": 47800 }, { "epoch": 1.0006070501549025, "grad_norm": 0.40462321043014526, "learning_rate": 0.00014998290992414653, "loss": 11.6664, "step": 47801 }, { "epoch": 1.0006279829188647, "grad_norm": 0.3695262670516968, "learning_rate": 0.0001499810109066784, "loss": 11.6627, "step": 47802 }, { "epoch": 1.0006489156828267, "grad_norm": 0.3231261372566223, "learning_rate": 0.00014997911186518336, "loss": 11.6611, "step": 47803 }, { "epoch": 1.0006698484467889, "grad_norm": 0.27563580870628357, "learning_rate": 0.00014997721279966236, "loss": 11.66, "step": 47804 }, { "epoch": 1.000690781210751, "grad_norm": 0.2935892641544342, "learning_rate": 0.00014997531371011631, "loss": 11.6775, "step": 47805 }, { "epoch": 1.0007117139747133, "grad_norm": 0.3221643269062042, "learning_rate": 0.00014997341459654614, "loss": 11.6655, "step": 47806 }, { "epoch": 1.0007326467386755, "grad_norm": 0.2916524112224579, "learning_rate": 0.00014997151545895275, "loss": 11.671, "step": 47807 }, { "epoch": 1.0007535795026374, "grad_norm": 0.32893574237823486, "learning_rate": 0.00014996961629733704, "loss": 11.6768, "step": 47808 }, { "epoch": 1.0007745122665996, "grad_norm": 0.3080238997936249, "learning_rate": 0.00014996771711169997, "loss": 11.6688, "step": 47809 }, { "epoch": 1.0007954450305618, "grad_norm": 0.3645001947879791, "learning_rate": 0.00014996581790204238, "loss": 11.6767, "step": 47810 }, { "epoch": 1.000816377794524, "grad_norm": 0.2786339521408081, "learning_rate": 0.00014996391866836523, "loss": 11.679, "step": 47811 }, { "epoch": 1.0008373105584862, "grad_norm": 0.3690556287765503, "learning_rate": 0.00014996201941066943, "loss": 11.6561, "step": 47812 }, { "epoch": 1.0008582433224482, "grad_norm": 0.3127692639827728, "learning_rate": 0.00014996012012895586, "loss": 11.6637, "step": 47813 }, { "epoch": 1.0008791760864104, "grad_norm": 0.3230784833431244, "learning_rate": 0.00014995822082322549, "loss": 11.659, "step": 47814 }, { "epoch": 1.0009001088503726, "grad_norm": 0.27817291021347046, "learning_rate": 0.0001499563214934792, "loss": 11.663, "step": 47815 }, { "epoch": 1.0009210416143348, "grad_norm": 0.3066636323928833, "learning_rate": 0.00014995442213971787, "loss": 11.6663, "step": 47816 }, { "epoch": 1.000941974378297, "grad_norm": 0.34154605865478516, "learning_rate": 0.00014995252276194248, "loss": 11.6622, "step": 47817 }, { "epoch": 1.000962907142259, "grad_norm": 0.31199339032173157, "learning_rate": 0.00014995062336015387, "loss": 11.658, "step": 47818 }, { "epoch": 1.0009838399062212, "grad_norm": 0.2995454668998718, "learning_rate": 0.00014994872393435306, "loss": 11.6807, "step": 47819 }, { "epoch": 1.0010047726701834, "grad_norm": 0.2796311676502228, "learning_rate": 0.00014994682448454087, "loss": 11.6646, "step": 47820 }, { "epoch": 1.0010257054341456, "grad_norm": 0.3725583553314209, "learning_rate": 0.00014994492501071825, "loss": 11.6804, "step": 47821 }, { "epoch": 1.0010466381981076, "grad_norm": 0.331670343875885, "learning_rate": 0.00014994302551288606, "loss": 11.6716, "step": 47822 }, { "epoch": 1.0010675709620698, "grad_norm": 0.26542481780052185, "learning_rate": 0.0001499411259910453, "loss": 11.6804, "step": 47823 }, { "epoch": 1.001088503726032, "grad_norm": 0.2950088083744049, "learning_rate": 0.00014993922644519682, "loss": 11.6552, "step": 47824 }, { "epoch": 1.0011094364899942, "grad_norm": 0.37607359886169434, "learning_rate": 0.00014993732687534154, "loss": 11.6605, "step": 47825 }, { "epoch": 1.0011303692539564, "grad_norm": 0.3113759756088257, "learning_rate": 0.00014993542728148043, "loss": 11.6615, "step": 47826 }, { "epoch": 1.0011513020179184, "grad_norm": 0.4787489175796509, "learning_rate": 0.00014993352766361432, "loss": 11.6669, "step": 47827 }, { "epoch": 1.0011722347818806, "grad_norm": 0.2951887845993042, "learning_rate": 0.00014993162802174416, "loss": 11.6546, "step": 47828 }, { "epoch": 1.0011931675458428, "grad_norm": 0.3389052152633667, "learning_rate": 0.0001499297283558709, "loss": 11.6712, "step": 47829 }, { "epoch": 1.001214100309805, "grad_norm": 0.28258442878723145, "learning_rate": 0.0001499278286659954, "loss": 11.6727, "step": 47830 }, { "epoch": 1.0012350330737672, "grad_norm": 0.304699569940567, "learning_rate": 0.00014992592895211862, "loss": 11.6796, "step": 47831 }, { "epoch": 1.0012559658377291, "grad_norm": 0.5020493865013123, "learning_rate": 0.0001499240292142414, "loss": 11.6847, "step": 47832 }, { "epoch": 1.0012768986016913, "grad_norm": 0.33969646692276, "learning_rate": 0.0001499221294523647, "loss": 11.683, "step": 47833 }, { "epoch": 1.0012978313656535, "grad_norm": 0.38056525588035583, "learning_rate": 0.00014992022966648947, "loss": 11.6587, "step": 47834 }, { "epoch": 1.0013187641296157, "grad_norm": 0.43510758876800537, "learning_rate": 0.00014991832985661655, "loss": 11.6818, "step": 47835 }, { "epoch": 1.001339696893578, "grad_norm": 0.3685793876647949, "learning_rate": 0.00014991643002274692, "loss": 11.6833, "step": 47836 }, { "epoch": 1.00136062965754, "grad_norm": 0.31638896465301514, "learning_rate": 0.00014991453016488144, "loss": 11.6679, "step": 47837 }, { "epoch": 1.0013815624215021, "grad_norm": 0.36728668212890625, "learning_rate": 0.00014991263028302102, "loss": 11.6787, "step": 47838 }, { "epoch": 1.0014024951854643, "grad_norm": 0.3213771879673004, "learning_rate": 0.00014991073037716666, "loss": 11.6673, "step": 47839 }, { "epoch": 1.0014234279494265, "grad_norm": 0.3951956331729889, "learning_rate": 0.00014990883044731915, "loss": 11.6721, "step": 47840 }, { "epoch": 1.0014443607133885, "grad_norm": 0.3264249563217163, "learning_rate": 0.00014990693049347952, "loss": 11.6607, "step": 47841 }, { "epoch": 1.0014652934773507, "grad_norm": 0.23622019588947296, "learning_rate": 0.00014990503051564858, "loss": 11.6609, "step": 47842 }, { "epoch": 1.001486226241313, "grad_norm": 0.3574105203151703, "learning_rate": 0.00014990313051382734, "loss": 11.6775, "step": 47843 }, { "epoch": 1.001507159005275, "grad_norm": 0.2886806130409241, "learning_rate": 0.00014990123048801664, "loss": 11.6676, "step": 47844 }, { "epoch": 1.0015280917692373, "grad_norm": 0.3172197639942169, "learning_rate": 0.0001498993304382174, "loss": 11.656, "step": 47845 }, { "epoch": 1.0015490245331993, "grad_norm": 0.37607908248901367, "learning_rate": 0.00014989743036443058, "loss": 11.6704, "step": 47846 }, { "epoch": 1.0015699572971615, "grad_norm": 0.3114069998264313, "learning_rate": 0.00014989553026665707, "loss": 11.6829, "step": 47847 }, { "epoch": 1.0015908900611237, "grad_norm": 0.33096206188201904, "learning_rate": 0.00014989363014489775, "loss": 11.6812, "step": 47848 }, { "epoch": 1.0016118228250859, "grad_norm": 0.28733107447624207, "learning_rate": 0.00014989172999915358, "loss": 11.6699, "step": 47849 }, { "epoch": 1.001632755589048, "grad_norm": 0.31549227237701416, "learning_rate": 0.00014988982982942545, "loss": 11.679, "step": 47850 }, { "epoch": 1.00165368835301, "grad_norm": 0.3477310240268707, "learning_rate": 0.0001498879296357143, "loss": 11.6732, "step": 47851 }, { "epoch": 1.0016746211169723, "grad_norm": 0.30358657240867615, "learning_rate": 0.00014988602941802098, "loss": 11.6753, "step": 47852 }, { "epoch": 1.0016955538809345, "grad_norm": 0.3154924213886261, "learning_rate": 0.0001498841291763465, "loss": 11.6743, "step": 47853 }, { "epoch": 1.0017164866448967, "grad_norm": 0.2347564399242401, "learning_rate": 0.00014988222891069167, "loss": 11.6671, "step": 47854 }, { "epoch": 1.0017374194088589, "grad_norm": 0.3369332551956177, "learning_rate": 0.00014988032862105744, "loss": 11.6554, "step": 47855 }, { "epoch": 1.0017583521728208, "grad_norm": 0.3289448320865631, "learning_rate": 0.0001498784283074448, "loss": 11.6634, "step": 47856 }, { "epoch": 1.001779284936783, "grad_norm": 0.25874063372612, "learning_rate": 0.00014987652796985454, "loss": 11.6675, "step": 47857 }, { "epoch": 1.0018002177007452, "grad_norm": 0.41307464241981506, "learning_rate": 0.00014987462760828767, "loss": 11.6652, "step": 47858 }, { "epoch": 1.0018211504647074, "grad_norm": 0.3213523328304291, "learning_rate": 0.00014987272722274505, "loss": 11.6759, "step": 47859 }, { "epoch": 1.0018420832286694, "grad_norm": 0.2656806707382202, "learning_rate": 0.0001498708268132276, "loss": 11.6608, "step": 47860 }, { "epoch": 1.0018630159926316, "grad_norm": 0.23999905586242676, "learning_rate": 0.00014986892637973632, "loss": 11.659, "step": 47861 }, { "epoch": 1.0018839487565938, "grad_norm": 0.31457534432411194, "learning_rate": 0.00014986702592227196, "loss": 11.6529, "step": 47862 }, { "epoch": 1.001904881520556, "grad_norm": 0.3105168342590332, "learning_rate": 0.00014986512544083556, "loss": 11.6651, "step": 47863 }, { "epoch": 1.0019258142845182, "grad_norm": 0.36947566270828247, "learning_rate": 0.000149863224935428, "loss": 11.6814, "step": 47864 }, { "epoch": 1.0019467470484802, "grad_norm": 0.28266641497612, "learning_rate": 0.0001498613244060502, "loss": 11.6863, "step": 47865 }, { "epoch": 1.0019676798124424, "grad_norm": 0.25816503167152405, "learning_rate": 0.000149859423852703, "loss": 11.6675, "step": 47866 }, { "epoch": 1.0019886125764046, "grad_norm": 0.3203119933605194, "learning_rate": 0.00014985752327538742, "loss": 11.6611, "step": 47867 }, { "epoch": 1.0020095453403668, "grad_norm": 0.29321473836898804, "learning_rate": 0.00014985562267410432, "loss": 11.6596, "step": 47868 }, { "epoch": 1.002030478104329, "grad_norm": 0.3310914933681488, "learning_rate": 0.00014985372204885463, "loss": 11.6751, "step": 47869 }, { "epoch": 1.002051410868291, "grad_norm": 0.3557167649269104, "learning_rate": 0.00014985182139963925, "loss": 11.6724, "step": 47870 }, { "epoch": 1.0020723436322532, "grad_norm": 0.33123111724853516, "learning_rate": 0.00014984992072645913, "loss": 11.6818, "step": 47871 }, { "epoch": 1.0020932763962154, "grad_norm": 0.3877013325691223, "learning_rate": 0.00014984802002931507, "loss": 11.6628, "step": 47872 }, { "epoch": 1.0021142091601776, "grad_norm": 0.40874168276786804, "learning_rate": 0.00014984611930820814, "loss": 11.6499, "step": 47873 }, { "epoch": 1.0021351419241398, "grad_norm": 0.413044273853302, "learning_rate": 0.00014984421856313918, "loss": 11.6858, "step": 47874 }, { "epoch": 1.0021560746881018, "grad_norm": 0.6271057724952698, "learning_rate": 0.0001498423177941091, "loss": 11.6662, "step": 47875 }, { "epoch": 1.002177007452064, "grad_norm": 0.4294653534889221, "learning_rate": 0.00014984041700111882, "loss": 11.6785, "step": 47876 }, { "epoch": 1.0021979402160262, "grad_norm": 0.34988850355148315, "learning_rate": 0.00014983851618416923, "loss": 11.6699, "step": 47877 }, { "epoch": 1.0022188729799884, "grad_norm": 0.3535490930080414, "learning_rate": 0.00014983661534326127, "loss": 11.6691, "step": 47878 }, { "epoch": 1.0022398057439503, "grad_norm": 0.34972894191741943, "learning_rate": 0.00014983471447839586, "loss": 11.6619, "step": 47879 }, { "epoch": 1.0022607385079125, "grad_norm": 0.2967461347579956, "learning_rate": 0.0001498328135895739, "loss": 11.6604, "step": 47880 }, { "epoch": 1.0022816712718747, "grad_norm": 0.399123877286911, "learning_rate": 0.00014983091267679633, "loss": 11.6628, "step": 47881 }, { "epoch": 1.002302604035837, "grad_norm": 0.2552810609340668, "learning_rate": 0.000149829011740064, "loss": 11.6699, "step": 47882 }, { "epoch": 1.0023235367997991, "grad_norm": 0.26611414551734924, "learning_rate": 0.0001498271107793779, "loss": 11.6497, "step": 47883 }, { "epoch": 1.002344469563761, "grad_norm": 0.3120045065879822, "learning_rate": 0.0001498252097947389, "loss": 11.6648, "step": 47884 }, { "epoch": 1.0023654023277233, "grad_norm": 0.3027772903442383, "learning_rate": 0.00014982330878614796, "loss": 11.668, "step": 47885 }, { "epoch": 1.0023863350916855, "grad_norm": 0.3025694191455841, "learning_rate": 0.00014982140775360588, "loss": 11.6593, "step": 47886 }, { "epoch": 1.0024072678556477, "grad_norm": 0.3160308301448822, "learning_rate": 0.00014981950669711372, "loss": 11.6648, "step": 47887 }, { "epoch": 1.00242820061961, "grad_norm": 0.26734229922294617, "learning_rate": 0.00014981760561667227, "loss": 11.668, "step": 47888 }, { "epoch": 1.0024491333835719, "grad_norm": 0.30226385593414307, "learning_rate": 0.0001498157045122825, "loss": 11.6527, "step": 47889 }, { "epoch": 1.002470066147534, "grad_norm": 0.31709185242652893, "learning_rate": 0.00014981380338394538, "loss": 11.6833, "step": 47890 }, { "epoch": 1.0024909989114963, "grad_norm": 0.4476372003555298, "learning_rate": 0.00014981190223166172, "loss": 11.6721, "step": 47891 }, { "epoch": 1.0025119316754585, "grad_norm": 0.3833792507648468, "learning_rate": 0.0001498100010554325, "loss": 11.6802, "step": 47892 }, { "epoch": 1.0025328644394207, "grad_norm": 0.3402596116065979, "learning_rate": 0.00014980809985525858, "loss": 11.6631, "step": 47893 }, { "epoch": 1.0025537972033827, "grad_norm": 0.3205347955226898, "learning_rate": 0.00014980619863114096, "loss": 11.67, "step": 47894 }, { "epoch": 1.0025747299673449, "grad_norm": 0.3288784325122833, "learning_rate": 0.00014980429738308047, "loss": 11.673, "step": 47895 }, { "epoch": 1.002595662731307, "grad_norm": 0.32382190227508545, "learning_rate": 0.00014980239611107806, "loss": 11.6596, "step": 47896 }, { "epoch": 1.0026165954952693, "grad_norm": 0.33923232555389404, "learning_rate": 0.00014980049481513465, "loss": 11.6533, "step": 47897 }, { "epoch": 1.0026375282592312, "grad_norm": 2.0561394691467285, "learning_rate": 0.00014979859349525113, "loss": 11.6509, "step": 47898 }, { "epoch": 1.0026584610231934, "grad_norm": 0.38803866505622864, "learning_rate": 0.00014979669215142843, "loss": 11.6763, "step": 47899 }, { "epoch": 1.0026793937871556, "grad_norm": 0.4091273248195648, "learning_rate": 0.00014979479078366747, "loss": 11.7011, "step": 47900 }, { "epoch": 1.0027003265511178, "grad_norm": 0.29244738817214966, "learning_rate": 0.00014979288939196916, "loss": 11.6927, "step": 47901 }, { "epoch": 1.00272125931508, "grad_norm": 0.3542897701263428, "learning_rate": 0.00014979098797633438, "loss": 11.6841, "step": 47902 }, { "epoch": 1.002742192079042, "grad_norm": 0.3012790381908417, "learning_rate": 0.0001497890865367641, "loss": 11.6642, "step": 47903 }, { "epoch": 1.0027631248430042, "grad_norm": 0.33538031578063965, "learning_rate": 0.00014978718507325923, "loss": 11.6628, "step": 47904 }, { "epoch": 1.0027840576069664, "grad_norm": 0.2831849455833435, "learning_rate": 0.00014978528358582064, "loss": 11.664, "step": 47905 }, { "epoch": 1.0028049903709286, "grad_norm": 0.3337165117263794, "learning_rate": 0.0001497833820744493, "loss": 11.6606, "step": 47906 }, { "epoch": 1.0028259231348908, "grad_norm": 0.36097291111946106, "learning_rate": 0.00014978148053914603, "loss": 11.6638, "step": 47907 }, { "epoch": 1.0028468558988528, "grad_norm": 0.315689355134964, "learning_rate": 0.00014977957897991184, "loss": 11.66, "step": 47908 }, { "epoch": 1.002867788662815, "grad_norm": 0.29704228043556213, "learning_rate": 0.0001497776773967476, "loss": 11.6668, "step": 47909 }, { "epoch": 1.0028887214267772, "grad_norm": 0.28218764066696167, "learning_rate": 0.0001497757757896542, "loss": 11.6608, "step": 47910 }, { "epoch": 1.0029096541907394, "grad_norm": 0.42087775468826294, "learning_rate": 0.00014977387415863265, "loss": 11.6869, "step": 47911 }, { "epoch": 1.0029305869547016, "grad_norm": 0.27869483828544617, "learning_rate": 0.00014977197250368374, "loss": 11.6786, "step": 47912 }, { "epoch": 1.0029515197186636, "grad_norm": 0.3698202967643738, "learning_rate": 0.0001497700708248085, "loss": 11.6571, "step": 47913 }, { "epoch": 1.0029724524826258, "grad_norm": 0.2939755320549011, "learning_rate": 0.00014976816912200776, "loss": 11.6442, "step": 47914 }, { "epoch": 1.002993385246588, "grad_norm": 0.30256178975105286, "learning_rate": 0.00014976626739528247, "loss": 11.6724, "step": 47915 }, { "epoch": 1.0030143180105502, "grad_norm": 0.2963304817676544, "learning_rate": 0.00014976436564463353, "loss": 11.6914, "step": 47916 }, { "epoch": 1.0030352507745122, "grad_norm": 0.31192708015441895, "learning_rate": 0.00014976246387006186, "loss": 11.6578, "step": 47917 }, { "epoch": 1.0030561835384744, "grad_norm": 0.2955809533596039, "learning_rate": 0.0001497605620715684, "loss": 11.6605, "step": 47918 }, { "epoch": 1.0030771163024366, "grad_norm": 0.3867505192756653, "learning_rate": 0.00014975866024915404, "loss": 11.6773, "step": 47919 }, { "epoch": 1.0030980490663988, "grad_norm": 0.3143126666545868, "learning_rate": 0.00014975675840281968, "loss": 11.6836, "step": 47920 }, { "epoch": 1.003118981830361, "grad_norm": 0.26447540521621704, "learning_rate": 0.00014975485653256624, "loss": 11.6792, "step": 47921 }, { "epoch": 1.003139914594323, "grad_norm": 0.26098939776420593, "learning_rate": 0.00014975295463839466, "loss": 11.6747, "step": 47922 }, { "epoch": 1.0031608473582851, "grad_norm": 0.3360181748867035, "learning_rate": 0.00014975105272030582, "loss": 11.6627, "step": 47923 }, { "epoch": 1.0031817801222473, "grad_norm": 0.3167092204093933, "learning_rate": 0.00014974915077830067, "loss": 11.6592, "step": 47924 }, { "epoch": 1.0032027128862095, "grad_norm": 0.28659775853157043, "learning_rate": 0.0001497472488123801, "loss": 11.6847, "step": 47925 }, { "epoch": 1.0032236456501717, "grad_norm": 0.32032421231269836, "learning_rate": 0.00014974534682254503, "loss": 11.6745, "step": 47926 }, { "epoch": 1.0032445784141337, "grad_norm": 0.3276856541633606, "learning_rate": 0.00014974344480879638, "loss": 11.6748, "step": 47927 }, { "epoch": 1.003265511178096, "grad_norm": 0.30712220072746277, "learning_rate": 0.00014974154277113506, "loss": 11.6687, "step": 47928 }, { "epoch": 1.0032864439420581, "grad_norm": 0.4090788960456848, "learning_rate": 0.00014973964070956198, "loss": 11.673, "step": 47929 }, { "epoch": 1.0033073767060203, "grad_norm": 0.3129101097583771, "learning_rate": 0.00014973773862407804, "loss": 11.6556, "step": 47930 }, { "epoch": 1.0033283094699825, "grad_norm": 0.317409485578537, "learning_rate": 0.0001497358365146842, "loss": 11.6719, "step": 47931 }, { "epoch": 1.0033492422339445, "grad_norm": 0.274502158164978, "learning_rate": 0.00014973393438138133, "loss": 11.6589, "step": 47932 }, { "epoch": 1.0033701749979067, "grad_norm": 0.28699731826782227, "learning_rate": 0.00014973203222417035, "loss": 11.6844, "step": 47933 }, { "epoch": 1.003391107761869, "grad_norm": 0.2567877769470215, "learning_rate": 0.00014973013004305222, "loss": 11.6616, "step": 47934 }, { "epoch": 1.003412040525831, "grad_norm": 0.29629939794540405, "learning_rate": 0.0001497282278380278, "loss": 11.6803, "step": 47935 }, { "epoch": 1.003432973289793, "grad_norm": 0.33856505155563354, "learning_rate": 0.00014972632560909802, "loss": 11.6588, "step": 47936 }, { "epoch": 1.0034539060537553, "grad_norm": 0.3398284614086151, "learning_rate": 0.0001497244233562638, "loss": 11.6682, "step": 47937 }, { "epoch": 1.0034748388177175, "grad_norm": 0.31749895215034485, "learning_rate": 0.00014972252107952607, "loss": 11.6739, "step": 47938 }, { "epoch": 1.0034957715816797, "grad_norm": 0.31933531165122986, "learning_rate": 0.0001497206187788857, "loss": 11.6661, "step": 47939 }, { "epoch": 1.0035167043456419, "grad_norm": 0.2820940613746643, "learning_rate": 0.00014971871645434365, "loss": 11.6631, "step": 47940 }, { "epoch": 1.0035376371096039, "grad_norm": 0.27862706780433655, "learning_rate": 0.00014971681410590084, "loss": 11.6668, "step": 47941 }, { "epoch": 1.003558569873566, "grad_norm": 0.34103715419769287, "learning_rate": 0.00014971491173355813, "loss": 11.6685, "step": 47942 }, { "epoch": 1.0035795026375283, "grad_norm": 0.4831484854221344, "learning_rate": 0.00014971300933731645, "loss": 11.6821, "step": 47943 }, { "epoch": 1.0036004354014905, "grad_norm": 0.3249437212944031, "learning_rate": 0.00014971110691717676, "loss": 11.6794, "step": 47944 }, { "epoch": 1.0036213681654527, "grad_norm": 0.2628132104873657, "learning_rate": 0.00014970920447313993, "loss": 11.667, "step": 47945 }, { "epoch": 1.0036423009294146, "grad_norm": 0.29732784628868103, "learning_rate": 0.0001497073020052069, "loss": 11.6523, "step": 47946 }, { "epoch": 1.0036632336933768, "grad_norm": 0.3016895651817322, "learning_rate": 0.00014970539951337857, "loss": 11.6732, "step": 47947 }, { "epoch": 1.003684166457339, "grad_norm": 0.2727731168270111, "learning_rate": 0.00014970349699765584, "loss": 11.6607, "step": 47948 }, { "epoch": 1.0037050992213012, "grad_norm": 0.3426871597766876, "learning_rate": 0.00014970159445803967, "loss": 11.6455, "step": 47949 }, { "epoch": 1.0037260319852634, "grad_norm": 0.2813645005226135, "learning_rate": 0.00014969969189453092, "loss": 11.6651, "step": 47950 }, { "epoch": 1.0037469647492254, "grad_norm": 0.33320119976997375, "learning_rate": 0.00014969778930713057, "loss": 11.6719, "step": 47951 }, { "epoch": 1.0037678975131876, "grad_norm": 0.34951692819595337, "learning_rate": 0.00014969588669583948, "loss": 11.6631, "step": 47952 }, { "epoch": 1.0037888302771498, "grad_norm": 0.29880860447883606, "learning_rate": 0.00014969398406065857, "loss": 11.6729, "step": 47953 }, { "epoch": 1.003809763041112, "grad_norm": 0.358743280172348, "learning_rate": 0.00014969208140158877, "loss": 11.6556, "step": 47954 }, { "epoch": 1.003830695805074, "grad_norm": 0.3484102189540863, "learning_rate": 0.00014969017871863097, "loss": 11.6903, "step": 47955 }, { "epoch": 1.0038516285690362, "grad_norm": 0.35908037424087524, "learning_rate": 0.00014968827601178616, "loss": 11.6508, "step": 47956 }, { "epoch": 1.0038725613329984, "grad_norm": 0.32627373933792114, "learning_rate": 0.00014968637328105516, "loss": 11.6957, "step": 47957 }, { "epoch": 1.0038934940969606, "grad_norm": 0.2951628863811493, "learning_rate": 0.00014968447052643894, "loss": 11.6731, "step": 47958 }, { "epoch": 1.0039144268609228, "grad_norm": 0.3089054822921753, "learning_rate": 0.0001496825677479384, "loss": 11.6657, "step": 47959 }, { "epoch": 1.0039353596248848, "grad_norm": 0.32103556394577026, "learning_rate": 0.00014968066494555445, "loss": 11.6697, "step": 47960 }, { "epoch": 1.003956292388847, "grad_norm": 0.2940405607223511, "learning_rate": 0.000149678762119288, "loss": 11.6655, "step": 47961 }, { "epoch": 1.0039772251528092, "grad_norm": 0.2626489996910095, "learning_rate": 0.00014967685926913998, "loss": 11.6626, "step": 47962 }, { "epoch": 1.0039981579167714, "grad_norm": 0.3321380317211151, "learning_rate": 0.00014967495639511128, "loss": 11.6691, "step": 47963 }, { "epoch": 1.0040190906807336, "grad_norm": 0.3305342495441437, "learning_rate": 0.00014967305349720286, "loss": 11.6615, "step": 47964 }, { "epoch": 1.0040400234446956, "grad_norm": 0.3301728665828705, "learning_rate": 0.0001496711505754156, "loss": 11.6739, "step": 47965 }, { "epoch": 1.0040609562086578, "grad_norm": 0.35531044006347656, "learning_rate": 0.0001496692476297504, "loss": 11.6663, "step": 47966 }, { "epoch": 1.00408188897262, "grad_norm": 0.32916250824928284, "learning_rate": 0.00014966734466020825, "loss": 11.6673, "step": 47967 }, { "epoch": 1.0041028217365822, "grad_norm": 0.3211762607097626, "learning_rate": 0.00014966544166678997, "loss": 11.6653, "step": 47968 }, { "epoch": 1.0041237545005441, "grad_norm": 0.2900291085243225, "learning_rate": 0.00014966353864949653, "loss": 11.6742, "step": 47969 }, { "epoch": 1.0041446872645063, "grad_norm": 0.3125765323638916, "learning_rate": 0.00014966163560832882, "loss": 11.6658, "step": 47970 }, { "epoch": 1.0041656200284685, "grad_norm": 0.32790037989616394, "learning_rate": 0.0001496597325432878, "loss": 11.6661, "step": 47971 }, { "epoch": 1.0041865527924307, "grad_norm": 0.3168388605117798, "learning_rate": 0.0001496578294543743, "loss": 11.6693, "step": 47972 }, { "epoch": 1.004207485556393, "grad_norm": 0.30334287881851196, "learning_rate": 0.00014965592634158932, "loss": 11.6822, "step": 47973 }, { "epoch": 1.004228418320355, "grad_norm": 0.3312588036060333, "learning_rate": 0.0001496540232049337, "loss": 11.6765, "step": 47974 }, { "epoch": 1.0042493510843171, "grad_norm": 0.2691473662853241, "learning_rate": 0.00014965212004440845, "loss": 11.6685, "step": 47975 }, { "epoch": 1.0042702838482793, "grad_norm": 0.5757750868797302, "learning_rate": 0.0001496502168600144, "loss": 11.6754, "step": 47976 }, { "epoch": 1.0042912166122415, "grad_norm": 0.30183130502700806, "learning_rate": 0.00014964831365175252, "loss": 11.6613, "step": 47977 }, { "epoch": 1.0043121493762037, "grad_norm": 0.37722811102867126, "learning_rate": 0.0001496464104196237, "loss": 11.6616, "step": 47978 }, { "epoch": 1.0043330821401657, "grad_norm": 0.313632994890213, "learning_rate": 0.00014964450716362882, "loss": 11.685, "step": 47979 }, { "epoch": 1.004354014904128, "grad_norm": 0.34668242931365967, "learning_rate": 0.00014964260388376888, "loss": 11.6561, "step": 47980 }, { "epoch": 1.00437494766809, "grad_norm": 0.28100669384002686, "learning_rate": 0.00014964070058004473, "loss": 11.6798, "step": 47981 }, { "epoch": 1.0043958804320523, "grad_norm": 0.4296101927757263, "learning_rate": 0.00014963879725245725, "loss": 11.6675, "step": 47982 }, { "epoch": 1.0044168131960145, "grad_norm": 0.3510846793651581, "learning_rate": 0.00014963689390100747, "loss": 11.6691, "step": 47983 }, { "epoch": 1.0044377459599765, "grad_norm": 0.3783458471298218, "learning_rate": 0.0001496349905256962, "loss": 11.6585, "step": 47984 }, { "epoch": 1.0044586787239387, "grad_norm": 0.387510746717453, "learning_rate": 0.00014963308712652443, "loss": 11.6387, "step": 47985 }, { "epoch": 1.0044796114879009, "grad_norm": 0.32002711296081543, "learning_rate": 0.00014963118370349301, "loss": 11.676, "step": 47986 }, { "epoch": 1.004500544251863, "grad_norm": 0.383354127407074, "learning_rate": 0.00014962928025660286, "loss": 11.6748, "step": 47987 }, { "epoch": 1.004521477015825, "grad_norm": 0.3647889196872711, "learning_rate": 0.00014962737678585498, "loss": 11.6443, "step": 47988 }, { "epoch": 1.0045424097797873, "grad_norm": 0.3226425051689148, "learning_rate": 0.0001496254732912502, "loss": 11.6462, "step": 47989 }, { "epoch": 1.0045633425437495, "grad_norm": 0.31856200098991394, "learning_rate": 0.00014962356977278946, "loss": 11.6572, "step": 47990 }, { "epoch": 1.0045842753077117, "grad_norm": 0.31997060775756836, "learning_rate": 0.00014962166623047369, "loss": 11.6799, "step": 47991 }, { "epoch": 1.0046052080716739, "grad_norm": 0.29247039556503296, "learning_rate": 0.00014961976266430376, "loss": 11.6797, "step": 47992 }, { "epoch": 1.0046261408356358, "grad_norm": 0.2842649221420288, "learning_rate": 0.00014961785907428065, "loss": 11.6631, "step": 47993 }, { "epoch": 1.004647073599598, "grad_norm": 0.3117382228374481, "learning_rate": 0.0001496159554604052, "loss": 11.6667, "step": 47994 }, { "epoch": 1.0046680063635602, "grad_norm": 0.2811167240142822, "learning_rate": 0.00014961405182267843, "loss": 11.6669, "step": 47995 }, { "epoch": 1.0046889391275224, "grad_norm": 0.34859955310821533, "learning_rate": 0.00014961214816110115, "loss": 11.6635, "step": 47996 }, { "epoch": 1.0047098718914846, "grad_norm": 0.30976539850234985, "learning_rate": 0.0001496102444756743, "loss": 11.6514, "step": 47997 }, { "epoch": 1.0047308046554466, "grad_norm": 0.40459558367729187, "learning_rate": 0.00014960834076639884, "loss": 11.6781, "step": 47998 }, { "epoch": 1.0047517374194088, "grad_norm": 0.5374356508255005, "learning_rate": 0.00014960643703327563, "loss": 11.6719, "step": 47999 }, { "epoch": 1.004772670183371, "grad_norm": 0.32718607783317566, "learning_rate": 0.00014960453327630564, "loss": 11.6795, "step": 48000 }, { "epoch": 1.004772670183371, "eval_loss": 11.668924331665039, "eval_runtime": 34.3087, "eval_samples_per_second": 28.01, "eval_steps_per_second": 7.024, "step": 48000 }, { "epoch": 1.0047936029473332, "grad_norm": 0.31455105543136597, "learning_rate": 0.00014960262949548973, "loss": 11.6699, "step": 48001 }, { "epoch": 1.0048145357112954, "grad_norm": 0.33631423115730286, "learning_rate": 0.00014960072569082888, "loss": 11.661, "step": 48002 }, { "epoch": 1.0048354684752574, "grad_norm": 0.2864868640899658, "learning_rate": 0.0001495988218623239, "loss": 11.6602, "step": 48003 }, { "epoch": 1.0048564012392196, "grad_norm": 0.4381164014339447, "learning_rate": 0.00014959691800997584, "loss": 11.674, "step": 48004 }, { "epoch": 1.0048773340031818, "grad_norm": 0.29538729786872864, "learning_rate": 0.00014959501413378554, "loss": 11.6828, "step": 48005 }, { "epoch": 1.004898266767144, "grad_norm": 0.34771040081977844, "learning_rate": 0.00014959311023375388, "loss": 11.6663, "step": 48006 }, { "epoch": 1.004919199531106, "grad_norm": 0.3199692368507385, "learning_rate": 0.00014959120630988186, "loss": 11.6574, "step": 48007 }, { "epoch": 1.0049401322950682, "grad_norm": 0.37143129110336304, "learning_rate": 0.00014958930236217032, "loss": 11.6608, "step": 48008 }, { "epoch": 1.0049610650590304, "grad_norm": 0.2595811188220978, "learning_rate": 0.00014958739839062023, "loss": 11.652, "step": 48009 }, { "epoch": 1.0049819978229926, "grad_norm": 0.31978660821914673, "learning_rate": 0.00014958549439523247, "loss": 11.6733, "step": 48010 }, { "epoch": 1.0050029305869548, "grad_norm": 0.3512382507324219, "learning_rate": 0.000149583590376008, "loss": 11.6564, "step": 48011 }, { "epoch": 1.0050238633509168, "grad_norm": 0.40383636951446533, "learning_rate": 0.0001495816863329477, "loss": 11.6676, "step": 48012 }, { "epoch": 1.005044796114879, "grad_norm": 0.32967349886894226, "learning_rate": 0.00014957978226605244, "loss": 11.6781, "step": 48013 }, { "epoch": 1.0050657288788412, "grad_norm": 0.35151374340057373, "learning_rate": 0.00014957787817532323, "loss": 11.6869, "step": 48014 }, { "epoch": 1.0050866616428034, "grad_norm": 0.3485264182090759, "learning_rate": 0.00014957597406076094, "loss": 11.6952, "step": 48015 }, { "epoch": 1.0051075944067656, "grad_norm": 0.29874134063720703, "learning_rate": 0.00014957406992236646, "loss": 11.6586, "step": 48016 }, { "epoch": 1.0051285271707275, "grad_norm": 0.41779887676239014, "learning_rate": 0.00014957216576014075, "loss": 11.6675, "step": 48017 }, { "epoch": 1.0051494599346897, "grad_norm": 0.3061356246471405, "learning_rate": 0.0001495702615740847, "loss": 11.685, "step": 48018 }, { "epoch": 1.005170392698652, "grad_norm": 0.2954040765762329, "learning_rate": 0.00014956835736419923, "loss": 11.6707, "step": 48019 }, { "epoch": 1.0051913254626141, "grad_norm": 0.38750311732292175, "learning_rate": 0.00014956645313048527, "loss": 11.6765, "step": 48020 }, { "epoch": 1.0052122582265763, "grad_norm": 0.36179032921791077, "learning_rate": 0.0001495645488729437, "loss": 11.667, "step": 48021 }, { "epoch": 1.0052331909905383, "grad_norm": 0.40479740500450134, "learning_rate": 0.00014956264459157549, "loss": 11.6676, "step": 48022 }, { "epoch": 1.0052541237545005, "grad_norm": 0.3304385542869568, "learning_rate": 0.00014956074028638148, "loss": 11.6711, "step": 48023 }, { "epoch": 1.0052750565184627, "grad_norm": 0.31614625453948975, "learning_rate": 0.00014955883595736265, "loss": 11.6591, "step": 48024 }, { "epoch": 1.005295989282425, "grad_norm": 0.3029560148715973, "learning_rate": 0.00014955693160451992, "loss": 11.6738, "step": 48025 }, { "epoch": 1.005316922046387, "grad_norm": 0.24403102695941925, "learning_rate": 0.00014955502722785414, "loss": 11.6691, "step": 48026 }, { "epoch": 1.005337854810349, "grad_norm": 0.2418264001607895, "learning_rate": 0.00014955312282736628, "loss": 11.6689, "step": 48027 }, { "epoch": 1.0053587875743113, "grad_norm": 0.3533565104007721, "learning_rate": 0.00014955121840305727, "loss": 11.6757, "step": 48028 }, { "epoch": 1.0053797203382735, "grad_norm": 0.32212916016578674, "learning_rate": 0.00014954931395492796, "loss": 11.6812, "step": 48029 }, { "epoch": 1.0054006531022357, "grad_norm": 0.32402244210243225, "learning_rate": 0.00014954740948297933, "loss": 11.6555, "step": 48030 }, { "epoch": 1.0054215858661977, "grad_norm": 0.33184781670570374, "learning_rate": 0.00014954550498721224, "loss": 11.6685, "step": 48031 }, { "epoch": 1.0054425186301599, "grad_norm": 0.3185027241706848, "learning_rate": 0.00014954360046762763, "loss": 11.6712, "step": 48032 }, { "epoch": 1.005463451394122, "grad_norm": 0.2872600257396698, "learning_rate": 0.00014954169592422645, "loss": 11.6588, "step": 48033 }, { "epoch": 1.0054843841580843, "grad_norm": 0.3017582297325134, "learning_rate": 0.00014953979135700956, "loss": 11.647, "step": 48034 }, { "epoch": 1.0055053169220465, "grad_norm": 0.31755292415618896, "learning_rate": 0.00014953788676597788, "loss": 11.6617, "step": 48035 }, { "epoch": 1.0055262496860085, "grad_norm": 0.30524420738220215, "learning_rate": 0.00014953598215113238, "loss": 11.6646, "step": 48036 }, { "epoch": 1.0055471824499707, "grad_norm": 0.3034513294696808, "learning_rate": 0.00014953407751247392, "loss": 11.6683, "step": 48037 }, { "epoch": 1.0055681152139329, "grad_norm": 0.30159661173820496, "learning_rate": 0.00014953217285000345, "loss": 11.6596, "step": 48038 }, { "epoch": 1.005589047977895, "grad_norm": 0.32623010873794556, "learning_rate": 0.0001495302681637219, "loss": 11.6698, "step": 48039 }, { "epoch": 1.0056099807418573, "grad_norm": 0.2974592447280884, "learning_rate": 0.0001495283634536301, "loss": 11.653, "step": 48040 }, { "epoch": 1.0056309135058192, "grad_norm": 0.2611064016819, "learning_rate": 0.00014952645871972906, "loss": 11.6606, "step": 48041 }, { "epoch": 1.0056518462697814, "grad_norm": 0.2968732416629791, "learning_rate": 0.00014952455396201962, "loss": 11.6493, "step": 48042 }, { "epoch": 1.0056727790337436, "grad_norm": 0.27901414036750793, "learning_rate": 0.00014952264918050278, "loss": 11.6741, "step": 48043 }, { "epoch": 1.0056937117977058, "grad_norm": 0.2891480326652527, "learning_rate": 0.0001495207443751794, "loss": 11.6829, "step": 48044 }, { "epoch": 1.0057146445616678, "grad_norm": 0.34180304408073425, "learning_rate": 0.0001495188395460504, "loss": 11.692, "step": 48045 }, { "epoch": 1.00573557732563, "grad_norm": 0.42738792300224304, "learning_rate": 0.0001495169346931167, "loss": 11.6657, "step": 48046 }, { "epoch": 1.0057565100895922, "grad_norm": 0.300176739692688, "learning_rate": 0.0001495150298163792, "loss": 11.653, "step": 48047 }, { "epoch": 1.0057774428535544, "grad_norm": 0.37081900238990784, "learning_rate": 0.00014951312491583884, "loss": 11.6609, "step": 48048 }, { "epoch": 1.0057983756175166, "grad_norm": 0.3314097225666046, "learning_rate": 0.00014951121999149655, "loss": 11.6815, "step": 48049 }, { "epoch": 1.0058193083814786, "grad_norm": 0.319692999124527, "learning_rate": 0.0001495093150433532, "loss": 11.6667, "step": 48050 }, { "epoch": 1.0058402411454408, "grad_norm": 0.44644439220428467, "learning_rate": 0.00014950741007140978, "loss": 11.6364, "step": 48051 }, { "epoch": 1.005861173909403, "grad_norm": 0.3464975655078888, "learning_rate": 0.0001495055050756671, "loss": 11.6672, "step": 48052 }, { "epoch": 1.0058821066733652, "grad_norm": 0.3052451014518738, "learning_rate": 0.00014950360005612615, "loss": 11.6704, "step": 48053 }, { "epoch": 1.0059030394373274, "grad_norm": 0.3023990988731384, "learning_rate": 0.00014950169501278784, "loss": 11.6544, "step": 48054 }, { "epoch": 1.0059239722012894, "grad_norm": 0.30330145359039307, "learning_rate": 0.00014949978994565304, "loss": 11.6799, "step": 48055 }, { "epoch": 1.0059449049652516, "grad_norm": 0.40695226192474365, "learning_rate": 0.00014949788485472273, "loss": 11.6716, "step": 48056 }, { "epoch": 1.0059658377292138, "grad_norm": 0.39170074462890625, "learning_rate": 0.0001494959797399978, "loss": 11.6592, "step": 48057 }, { "epoch": 1.005986770493176, "grad_norm": 0.30740711092948914, "learning_rate": 0.00014949407460147912, "loss": 11.6648, "step": 48058 }, { "epoch": 1.0060077032571382, "grad_norm": 0.3331899344921112, "learning_rate": 0.00014949216943916768, "loss": 11.687, "step": 48059 }, { "epoch": 1.0060286360211002, "grad_norm": 0.30296096205711365, "learning_rate": 0.00014949026425306434, "loss": 11.6682, "step": 48060 }, { "epoch": 1.0060495687850624, "grad_norm": 0.294821560382843, "learning_rate": 0.00014948835904317006, "loss": 11.6733, "step": 48061 }, { "epoch": 1.0060705015490246, "grad_norm": 0.30800652503967285, "learning_rate": 0.00014948645380948573, "loss": 11.6716, "step": 48062 }, { "epoch": 1.0060914343129868, "grad_norm": 0.3028776943683624, "learning_rate": 0.00014948454855201227, "loss": 11.6568, "step": 48063 }, { "epoch": 1.0061123670769487, "grad_norm": 0.3311276435852051, "learning_rate": 0.0001494826432707506, "loss": 11.6733, "step": 48064 }, { "epoch": 1.006133299840911, "grad_norm": 0.24667708575725555, "learning_rate": 0.00014948073796570158, "loss": 11.6538, "step": 48065 }, { "epoch": 1.0061542326048731, "grad_norm": 0.3059414327144623, "learning_rate": 0.00014947883263686626, "loss": 11.6772, "step": 48066 }, { "epoch": 1.0061751653688353, "grad_norm": 0.31600135564804077, "learning_rate": 0.00014947692728424543, "loss": 11.6594, "step": 48067 }, { "epoch": 1.0061960981327975, "grad_norm": 0.25156477093696594, "learning_rate": 0.00014947502190784006, "loss": 11.6622, "step": 48068 }, { "epoch": 1.0062170308967595, "grad_norm": 0.29334020614624023, "learning_rate": 0.00014947311650765103, "loss": 11.672, "step": 48069 }, { "epoch": 1.0062379636607217, "grad_norm": 0.2897684574127197, "learning_rate": 0.0001494712110836793, "loss": 11.6766, "step": 48070 }, { "epoch": 1.006258896424684, "grad_norm": 0.3053227663040161, "learning_rate": 0.00014946930563592578, "loss": 11.6822, "step": 48071 }, { "epoch": 1.0062798291886461, "grad_norm": 0.2818632125854492, "learning_rate": 0.00014946740016439137, "loss": 11.6647, "step": 48072 }, { "epoch": 1.0063007619526083, "grad_norm": 0.29093292355537415, "learning_rate": 0.000149465494669077, "loss": 11.6775, "step": 48073 }, { "epoch": 1.0063216947165703, "grad_norm": 0.3863399922847748, "learning_rate": 0.00014946358914998353, "loss": 11.675, "step": 48074 }, { "epoch": 1.0063426274805325, "grad_norm": 0.36179983615875244, "learning_rate": 0.00014946168360711193, "loss": 11.663, "step": 48075 }, { "epoch": 1.0063635602444947, "grad_norm": 0.30135825276374817, "learning_rate": 0.00014945977804046314, "loss": 11.6563, "step": 48076 }, { "epoch": 1.006384493008457, "grad_norm": 0.4413931965827942, "learning_rate": 0.00014945787245003802, "loss": 11.6766, "step": 48077 }, { "epoch": 1.006405425772419, "grad_norm": 0.26645180583000183, "learning_rate": 0.00014945596683583754, "loss": 11.6629, "step": 48078 }, { "epoch": 1.006426358536381, "grad_norm": 0.31564539670944214, "learning_rate": 0.00014945406119786254, "loss": 11.6774, "step": 48079 }, { "epoch": 1.0064472913003433, "grad_norm": 0.31509390473365784, "learning_rate": 0.000149452155536114, "loss": 11.6558, "step": 48080 }, { "epoch": 1.0064682240643055, "grad_norm": 0.30629962682724, "learning_rate": 0.00014945024985059283, "loss": 11.6633, "step": 48081 }, { "epoch": 1.0064891568282677, "grad_norm": 0.3204179108142853, "learning_rate": 0.00014944834414129993, "loss": 11.6767, "step": 48082 }, { "epoch": 1.0065100895922297, "grad_norm": 0.25522753596305847, "learning_rate": 0.00014944643840823622, "loss": 11.6657, "step": 48083 }, { "epoch": 1.0065310223561919, "grad_norm": 0.40623581409454346, "learning_rate": 0.0001494445326514026, "loss": 11.6678, "step": 48084 }, { "epoch": 1.006551955120154, "grad_norm": 0.3152351975440979, "learning_rate": 0.00014944262687080002, "loss": 11.6748, "step": 48085 }, { "epoch": 1.0065728878841163, "grad_norm": 0.3422177731990814, "learning_rate": 0.0001494407210664294, "loss": 11.6662, "step": 48086 }, { "epoch": 1.0065938206480785, "grad_norm": 0.4209105372428894, "learning_rate": 0.0001494388152382916, "loss": 11.6777, "step": 48087 }, { "epoch": 1.0066147534120404, "grad_norm": 0.32584741711616516, "learning_rate": 0.0001494369093863876, "loss": 11.6724, "step": 48088 }, { "epoch": 1.0066356861760026, "grad_norm": 0.24490965902805328, "learning_rate": 0.00014943500351071825, "loss": 11.6629, "step": 48089 }, { "epoch": 1.0066566189399648, "grad_norm": 0.434720516204834, "learning_rate": 0.0001494330976112845, "loss": 11.684, "step": 48090 }, { "epoch": 1.006677551703927, "grad_norm": 0.3374333083629608, "learning_rate": 0.0001494311916880873, "loss": 11.6555, "step": 48091 }, { "epoch": 1.0066984844678892, "grad_norm": 0.2707499861717224, "learning_rate": 0.0001494292857411275, "loss": 11.6473, "step": 48092 }, { "epoch": 1.0067194172318512, "grad_norm": 0.3249589800834656, "learning_rate": 0.0001494273797704061, "loss": 11.6585, "step": 48093 }, { "epoch": 1.0067403499958134, "grad_norm": 0.28165680170059204, "learning_rate": 0.00014942547377592394, "loss": 11.6584, "step": 48094 }, { "epoch": 1.0067612827597756, "grad_norm": 0.34385946393013, "learning_rate": 0.00014942356775768197, "loss": 11.6561, "step": 48095 }, { "epoch": 1.0067822155237378, "grad_norm": 0.29838937520980835, "learning_rate": 0.00014942166171568111, "loss": 11.6607, "step": 48096 }, { "epoch": 1.0068031482877, "grad_norm": 0.3308381140232086, "learning_rate": 0.00014941975564992225, "loss": 11.6696, "step": 48097 }, { "epoch": 1.006824081051662, "grad_norm": 0.2620764374732971, "learning_rate": 0.00014941784956040634, "loss": 11.676, "step": 48098 }, { "epoch": 1.0068450138156242, "grad_norm": 0.43597468733787537, "learning_rate": 0.00014941594344713426, "loss": 11.6641, "step": 48099 }, { "epoch": 1.0068659465795864, "grad_norm": 0.3259789049625397, "learning_rate": 0.00014941403731010697, "loss": 11.6669, "step": 48100 }, { "epoch": 1.0068868793435486, "grad_norm": 0.2851060926914215, "learning_rate": 0.00014941213114932534, "loss": 11.6761, "step": 48101 }, { "epoch": 1.0069078121075106, "grad_norm": 0.30197960138320923, "learning_rate": 0.00014941022496479028, "loss": 11.6572, "step": 48102 }, { "epoch": 1.0069287448714728, "grad_norm": 0.2868384122848511, "learning_rate": 0.0001494083187565028, "loss": 11.6761, "step": 48103 }, { "epoch": 1.006949677635435, "grad_norm": 0.3293275237083435, "learning_rate": 0.0001494064125244637, "loss": 11.6771, "step": 48104 }, { "epoch": 1.0069706103993972, "grad_norm": 0.299404114484787, "learning_rate": 0.000149404506268674, "loss": 11.6735, "step": 48105 }, { "epoch": 1.0069915431633594, "grad_norm": 0.28169047832489014, "learning_rate": 0.0001494025999891345, "loss": 11.6664, "step": 48106 }, { "epoch": 1.0070124759273213, "grad_norm": 0.30722343921661377, "learning_rate": 0.0001494006936858462, "loss": 11.6652, "step": 48107 }, { "epoch": 1.0070334086912835, "grad_norm": 0.342905193567276, "learning_rate": 0.00014939878735881004, "loss": 11.6451, "step": 48108 }, { "epoch": 1.0070543414552457, "grad_norm": 0.39841899275779724, "learning_rate": 0.00014939688100802686, "loss": 11.6706, "step": 48109 }, { "epoch": 1.007075274219208, "grad_norm": 0.30804792046546936, "learning_rate": 0.00014939497463349762, "loss": 11.6726, "step": 48110 }, { "epoch": 1.0070962069831702, "grad_norm": 0.3502272963523865, "learning_rate": 0.0001493930682352232, "loss": 11.6744, "step": 48111 }, { "epoch": 1.0071171397471321, "grad_norm": 0.31864187121391296, "learning_rate": 0.00014939116181320457, "loss": 11.6812, "step": 48112 }, { "epoch": 1.0071380725110943, "grad_norm": 0.29031845927238464, "learning_rate": 0.0001493892553674426, "loss": 11.6718, "step": 48113 }, { "epoch": 1.0071590052750565, "grad_norm": 0.4571883976459503, "learning_rate": 0.00014938734889793822, "loss": 11.6779, "step": 48114 }, { "epoch": 1.0071799380390187, "grad_norm": 0.3315984606742859, "learning_rate": 0.00014938544240469237, "loss": 11.6611, "step": 48115 }, { "epoch": 1.007200870802981, "grad_norm": 0.28665879368782043, "learning_rate": 0.00014938353588770596, "loss": 11.6729, "step": 48116 }, { "epoch": 1.007221803566943, "grad_norm": 0.30876877903938293, "learning_rate": 0.00014938162934697986, "loss": 11.6709, "step": 48117 }, { "epoch": 1.007242736330905, "grad_norm": 0.35640186071395874, "learning_rate": 0.00014937972278251504, "loss": 11.6792, "step": 48118 }, { "epoch": 1.0072636690948673, "grad_norm": 0.3981177508831024, "learning_rate": 0.0001493778161943124, "loss": 11.6721, "step": 48119 }, { "epoch": 1.0072846018588295, "grad_norm": 0.24780425429344177, "learning_rate": 0.00014937590958237284, "loss": 11.6666, "step": 48120 }, { "epoch": 1.0073055346227915, "grad_norm": 0.379774808883667, "learning_rate": 0.0001493740029466973, "loss": 11.6894, "step": 48121 }, { "epoch": 1.0073264673867537, "grad_norm": 0.28755295276641846, "learning_rate": 0.0001493720962872867, "loss": 11.6599, "step": 48122 }, { "epoch": 1.0073474001507159, "grad_norm": 0.2614452540874481, "learning_rate": 0.00014937018960414192, "loss": 11.6663, "step": 48123 }, { "epoch": 1.007368332914678, "grad_norm": 0.3170713782310486, "learning_rate": 0.00014936828289726392, "loss": 11.6604, "step": 48124 }, { "epoch": 1.0073892656786403, "grad_norm": 0.5182889699935913, "learning_rate": 0.00014936637616665358, "loss": 11.6729, "step": 48125 }, { "epoch": 1.0074101984426023, "grad_norm": 0.2879209518432617, "learning_rate": 0.00014936446941231187, "loss": 11.693, "step": 48126 }, { "epoch": 1.0074311312065645, "grad_norm": 0.2973143458366394, "learning_rate": 0.00014936256263423962, "loss": 11.6554, "step": 48127 }, { "epoch": 1.0074520639705267, "grad_norm": 0.2916281819343567, "learning_rate": 0.0001493606558324378, "loss": 11.6497, "step": 48128 }, { "epoch": 1.0074729967344889, "grad_norm": 0.25280073285102844, "learning_rate": 0.00014935874900690738, "loss": 11.6732, "step": 48129 }, { "epoch": 1.007493929498451, "grad_norm": 0.3022565245628357, "learning_rate": 0.00014935684215764918, "loss": 11.6683, "step": 48130 }, { "epoch": 1.007514862262413, "grad_norm": 0.3040316104888916, "learning_rate": 0.00014935493528466415, "loss": 11.6715, "step": 48131 }, { "epoch": 1.0075357950263752, "grad_norm": 0.30077216029167175, "learning_rate": 0.00014935302838795322, "loss": 11.6617, "step": 48132 }, { "epoch": 1.0075567277903374, "grad_norm": 0.2877342402935028, "learning_rate": 0.00014935112146751732, "loss": 11.6673, "step": 48133 }, { "epoch": 1.0075776605542996, "grad_norm": 0.28128963708877563, "learning_rate": 0.00014934921452335734, "loss": 11.6755, "step": 48134 }, { "epoch": 1.0075985933182618, "grad_norm": 0.3039815127849579, "learning_rate": 0.00014934730755547418, "loss": 11.6704, "step": 48135 }, { "epoch": 1.0076195260822238, "grad_norm": 0.26194068789482117, "learning_rate": 0.0001493454005638688, "loss": 11.6658, "step": 48136 }, { "epoch": 1.007640458846186, "grad_norm": 0.3416258990764618, "learning_rate": 0.0001493434935485421, "loss": 11.6511, "step": 48137 }, { "epoch": 1.0076613916101482, "grad_norm": 0.41565635800361633, "learning_rate": 0.000149341586509495, "loss": 11.6564, "step": 48138 }, { "epoch": 1.0076823243741104, "grad_norm": 0.2731565833091736, "learning_rate": 0.0001493396794467284, "loss": 11.6684, "step": 48139 }, { "epoch": 1.0077032571380724, "grad_norm": 0.3973855674266815, "learning_rate": 0.0001493377723602432, "loss": 11.6648, "step": 48140 }, { "epoch": 1.0077241899020346, "grad_norm": 0.25789549946784973, "learning_rate": 0.00014933586525004038, "loss": 11.6467, "step": 48141 }, { "epoch": 1.0077451226659968, "grad_norm": 0.4428535997867584, "learning_rate": 0.00014933395811612079, "loss": 11.6847, "step": 48142 }, { "epoch": 1.007766055429959, "grad_norm": 0.30072900652885437, "learning_rate": 0.0001493320509584854, "loss": 11.679, "step": 48143 }, { "epoch": 1.0077869881939212, "grad_norm": 0.35241976380348206, "learning_rate": 0.0001493301437771351, "loss": 11.6649, "step": 48144 }, { "epoch": 1.0078079209578832, "grad_norm": 0.3199657201766968, "learning_rate": 0.00014932823657207083, "loss": 11.6506, "step": 48145 }, { "epoch": 1.0078288537218454, "grad_norm": 0.3254508674144745, "learning_rate": 0.00014932632934329347, "loss": 11.6897, "step": 48146 }, { "epoch": 1.0078497864858076, "grad_norm": 0.36277449131011963, "learning_rate": 0.00014932442209080395, "loss": 11.6596, "step": 48147 }, { "epoch": 1.0078707192497698, "grad_norm": 0.3438989818096161, "learning_rate": 0.0001493225148146032, "loss": 11.6446, "step": 48148 }, { "epoch": 1.007891652013732, "grad_norm": 0.28842198848724365, "learning_rate": 0.0001493206075146921, "loss": 11.6729, "step": 48149 }, { "epoch": 1.007912584777694, "grad_norm": 0.30998170375823975, "learning_rate": 0.00014931870019107164, "loss": 11.6766, "step": 48150 }, { "epoch": 1.0079335175416562, "grad_norm": 0.28215962648391724, "learning_rate": 0.00014931679284374268, "loss": 11.6629, "step": 48151 }, { "epoch": 1.0079544503056184, "grad_norm": 0.3223327696323395, "learning_rate": 0.00014931488547270614, "loss": 11.6579, "step": 48152 }, { "epoch": 1.0079753830695806, "grad_norm": 0.35564371943473816, "learning_rate": 0.00014931297807796294, "loss": 11.6631, "step": 48153 }, { "epoch": 1.0079963158335428, "grad_norm": 0.2805817127227783, "learning_rate": 0.00014931107065951403, "loss": 11.6776, "step": 48154 }, { "epoch": 1.0080172485975047, "grad_norm": 0.4341003894805908, "learning_rate": 0.00014930916321736028, "loss": 11.6769, "step": 48155 }, { "epoch": 1.008038181361467, "grad_norm": 0.2851158678531647, "learning_rate": 0.00014930725575150263, "loss": 11.6692, "step": 48156 }, { "epoch": 1.0080591141254291, "grad_norm": 0.31934306025505066, "learning_rate": 0.00014930534826194198, "loss": 11.6826, "step": 48157 }, { "epoch": 1.0080800468893913, "grad_norm": 0.32953310012817383, "learning_rate": 0.00014930344074867927, "loss": 11.6771, "step": 48158 }, { "epoch": 1.0081009796533533, "grad_norm": 0.3491971790790558, "learning_rate": 0.00014930153321171544, "loss": 11.6742, "step": 48159 }, { "epoch": 1.0081219124173155, "grad_norm": 0.3357691466808319, "learning_rate": 0.00014929962565105132, "loss": 11.6693, "step": 48160 }, { "epoch": 1.0081428451812777, "grad_norm": 0.3027395009994507, "learning_rate": 0.00014929771806668793, "loss": 11.6567, "step": 48161 }, { "epoch": 1.00816377794524, "grad_norm": 0.45390424132347107, "learning_rate": 0.00014929581045862613, "loss": 11.6726, "step": 48162 }, { "epoch": 1.0081847107092021, "grad_norm": 0.2982567846775055, "learning_rate": 0.0001492939028268668, "loss": 11.6713, "step": 48163 }, { "epoch": 1.008205643473164, "grad_norm": 0.399931937456131, "learning_rate": 0.00014929199517141098, "loss": 11.6946, "step": 48164 }, { "epoch": 1.0082265762371263, "grad_norm": 0.30109235644340515, "learning_rate": 0.00014929008749225945, "loss": 11.6556, "step": 48165 }, { "epoch": 1.0082475090010885, "grad_norm": 0.285515159368515, "learning_rate": 0.0001492881797894132, "loss": 11.6895, "step": 48166 }, { "epoch": 1.0082684417650507, "grad_norm": 0.31038907170295715, "learning_rate": 0.00014928627206287315, "loss": 11.6774, "step": 48167 }, { "epoch": 1.008289374529013, "grad_norm": 0.3089180290699005, "learning_rate": 0.00014928436431264016, "loss": 11.6617, "step": 48168 }, { "epoch": 1.0083103072929749, "grad_norm": 0.2851899564266205, "learning_rate": 0.00014928245653871524, "loss": 11.6534, "step": 48169 }, { "epoch": 1.008331240056937, "grad_norm": 0.28078749775886536, "learning_rate": 0.00014928054874109923, "loss": 11.6622, "step": 48170 }, { "epoch": 1.0083521728208993, "grad_norm": 0.34756606817245483, "learning_rate": 0.0001492786409197931, "loss": 11.6744, "step": 48171 }, { "epoch": 1.0083731055848615, "grad_norm": 0.31639596819877625, "learning_rate": 0.0001492767330747977, "loss": 11.6701, "step": 48172 }, { "epoch": 1.0083940383488237, "grad_norm": 0.298406183719635, "learning_rate": 0.000149274825206114, "loss": 11.6704, "step": 48173 }, { "epoch": 1.0084149711127857, "grad_norm": 0.29159069061279297, "learning_rate": 0.0001492729173137429, "loss": 11.6778, "step": 48174 }, { "epoch": 1.0084359038767479, "grad_norm": 0.3329106271266937, "learning_rate": 0.00014927100939768532, "loss": 11.6765, "step": 48175 }, { "epoch": 1.00845683664071, "grad_norm": 0.3124600052833557, "learning_rate": 0.00014926910145794222, "loss": 11.6418, "step": 48176 }, { "epoch": 1.0084777694046723, "grad_norm": 0.3682461082935333, "learning_rate": 0.00014926719349451443, "loss": 11.6745, "step": 48177 }, { "epoch": 1.0084987021686342, "grad_norm": 0.31246158480644226, "learning_rate": 0.00014926528550740292, "loss": 11.6563, "step": 48178 }, { "epoch": 1.0085196349325964, "grad_norm": 0.3849380910396576, "learning_rate": 0.0001492633774966086, "loss": 11.6741, "step": 48179 }, { "epoch": 1.0085405676965586, "grad_norm": 0.26827558875083923, "learning_rate": 0.0001492614694621324, "loss": 11.6704, "step": 48180 }, { "epoch": 1.0085615004605208, "grad_norm": 0.3001933991909027, "learning_rate": 0.00014925956140397523, "loss": 11.6679, "step": 48181 }, { "epoch": 1.008582433224483, "grad_norm": 0.36583808064460754, "learning_rate": 0.00014925765332213798, "loss": 11.692, "step": 48182 }, { "epoch": 1.008603365988445, "grad_norm": 0.27667319774627686, "learning_rate": 0.0001492557452166216, "loss": 11.668, "step": 48183 }, { "epoch": 1.0086242987524072, "grad_norm": 0.3156839609146118, "learning_rate": 0.000149253837087427, "loss": 11.6662, "step": 48184 }, { "epoch": 1.0086452315163694, "grad_norm": 0.24620330333709717, "learning_rate": 0.00014925192893455508, "loss": 11.657, "step": 48185 }, { "epoch": 1.0086661642803316, "grad_norm": 0.3172380328178406, "learning_rate": 0.00014925002075800677, "loss": 11.6607, "step": 48186 }, { "epoch": 1.0086870970442938, "grad_norm": 0.42256709933280945, "learning_rate": 0.00014924811255778302, "loss": 11.6697, "step": 48187 }, { "epoch": 1.0087080298082558, "grad_norm": 0.28881147503852844, "learning_rate": 0.0001492462043338847, "loss": 11.6583, "step": 48188 }, { "epoch": 1.008728962572218, "grad_norm": 0.24906569719314575, "learning_rate": 0.00014924429608631273, "loss": 11.6649, "step": 48189 }, { "epoch": 1.0087498953361802, "grad_norm": 0.3138130009174347, "learning_rate": 0.00014924238781506804, "loss": 11.6722, "step": 48190 }, { "epoch": 1.0087708281001424, "grad_norm": 0.2802639603614807, "learning_rate": 0.00014924047952015153, "loss": 11.6661, "step": 48191 }, { "epoch": 1.0087917608641046, "grad_norm": 0.3552148640155792, "learning_rate": 0.00014923857120156418, "loss": 11.6658, "step": 48192 }, { "epoch": 1.0088126936280666, "grad_norm": 0.289914608001709, "learning_rate": 0.00014923666285930685, "loss": 11.6722, "step": 48193 }, { "epoch": 1.0088336263920288, "grad_norm": 0.2633269131183624, "learning_rate": 0.0001492347544933804, "loss": 11.6716, "step": 48194 }, { "epoch": 1.008854559155991, "grad_norm": 0.24464471638202667, "learning_rate": 0.0001492328461037859, "loss": 11.6743, "step": 48195 }, { "epoch": 1.0088754919199532, "grad_norm": 0.3095110058784485, "learning_rate": 0.00014923093769052416, "loss": 11.6895, "step": 48196 }, { "epoch": 1.0088964246839152, "grad_norm": 0.39400631189346313, "learning_rate": 0.00014922902925359612, "loss": 11.6778, "step": 48197 }, { "epoch": 1.0089173574478774, "grad_norm": 0.5117248892784119, "learning_rate": 0.00014922712079300273, "loss": 11.6732, "step": 48198 }, { "epoch": 1.0089382902118396, "grad_norm": 0.2527678310871124, "learning_rate": 0.0001492252123087448, "loss": 11.68, "step": 48199 }, { "epoch": 1.0089592229758018, "grad_norm": 0.3281274735927582, "learning_rate": 0.00014922330380082342, "loss": 11.6729, "step": 48200 }, { "epoch": 1.008980155739764, "grad_norm": 0.30553022027015686, "learning_rate": 0.00014922139526923935, "loss": 11.683, "step": 48201 }, { "epoch": 1.009001088503726, "grad_norm": 0.3258157968521118, "learning_rate": 0.00014921948671399354, "loss": 11.6483, "step": 48202 }, { "epoch": 1.0090220212676881, "grad_norm": 0.29853159189224243, "learning_rate": 0.00014921757813508702, "loss": 11.6507, "step": 48203 }, { "epoch": 1.0090429540316503, "grad_norm": 0.32136180996894836, "learning_rate": 0.00014921566953252056, "loss": 11.6702, "step": 48204 }, { "epoch": 1.0090638867956125, "grad_norm": 0.28215476870536804, "learning_rate": 0.00014921376090629518, "loss": 11.6535, "step": 48205 }, { "epoch": 1.0090848195595747, "grad_norm": 0.30387449264526367, "learning_rate": 0.00014921185225641172, "loss": 11.6826, "step": 48206 }, { "epoch": 1.0091057523235367, "grad_norm": 0.27383187413215637, "learning_rate": 0.00014920994358287115, "loss": 11.6708, "step": 48207 }, { "epoch": 1.009126685087499, "grad_norm": 0.2989014685153961, "learning_rate": 0.0001492080348856744, "loss": 11.6442, "step": 48208 }, { "epoch": 1.0091476178514611, "grad_norm": 0.32809439301490784, "learning_rate": 0.00014920612616482233, "loss": 11.6593, "step": 48209 }, { "epoch": 1.0091685506154233, "grad_norm": 0.2858801484107971, "learning_rate": 0.0001492042174203159, "loss": 11.6775, "step": 48210 }, { "epoch": 1.0091894833793855, "grad_norm": 0.3086724877357483, "learning_rate": 0.00014920230865215602, "loss": 11.6782, "step": 48211 }, { "epoch": 1.0092104161433475, "grad_norm": 0.26366978883743286, "learning_rate": 0.0001492003998603436, "loss": 11.6644, "step": 48212 }, { "epoch": 1.0092313489073097, "grad_norm": 0.307094007730484, "learning_rate": 0.00014919849104487958, "loss": 11.6683, "step": 48213 }, { "epoch": 1.009252281671272, "grad_norm": 0.30628249049186707, "learning_rate": 0.0001491965822057648, "loss": 11.6579, "step": 48214 }, { "epoch": 1.009273214435234, "grad_norm": 0.38428398966789246, "learning_rate": 0.00014919467334300032, "loss": 11.6567, "step": 48215 }, { "epoch": 1.009294147199196, "grad_norm": 0.32793790102005005, "learning_rate": 0.00014919276445658693, "loss": 11.6746, "step": 48216 }, { "epoch": 1.0093150799631583, "grad_norm": 0.2739148736000061, "learning_rate": 0.0001491908555465256, "loss": 11.6483, "step": 48217 }, { "epoch": 1.0093360127271205, "grad_norm": 0.28887122869491577, "learning_rate": 0.00014918894661281723, "loss": 11.668, "step": 48218 }, { "epoch": 1.0093569454910827, "grad_norm": 0.4340779483318329, "learning_rate": 0.00014918703765546277, "loss": 11.6625, "step": 48219 }, { "epoch": 1.0093778782550449, "grad_norm": 0.2638871669769287, "learning_rate": 0.0001491851286744631, "loss": 11.673, "step": 48220 }, { "epoch": 1.0093988110190069, "grad_norm": 0.34167689085006714, "learning_rate": 0.00014918321966981917, "loss": 11.6594, "step": 48221 }, { "epoch": 1.009419743782969, "grad_norm": 0.32429707050323486, "learning_rate": 0.00014918131064153186, "loss": 11.675, "step": 48222 }, { "epoch": 1.0094406765469313, "grad_norm": 0.305055171251297, "learning_rate": 0.0001491794015896021, "loss": 11.6633, "step": 48223 }, { "epoch": 1.0094616093108935, "grad_norm": 0.4231220781803131, "learning_rate": 0.00014917749251403082, "loss": 11.6664, "step": 48224 }, { "epoch": 1.0094825420748557, "grad_norm": 0.28773820400238037, "learning_rate": 0.00014917558341481896, "loss": 11.6571, "step": 48225 }, { "epoch": 1.0095034748388176, "grad_norm": 0.24018406867980957, "learning_rate": 0.00014917367429196737, "loss": 11.6629, "step": 48226 }, { "epoch": 1.0095244076027798, "grad_norm": 0.26893794536590576, "learning_rate": 0.00014917176514547706, "loss": 11.6625, "step": 48227 }, { "epoch": 1.009545340366742, "grad_norm": 0.3097073435783386, "learning_rate": 0.00014916985597534884, "loss": 11.6685, "step": 48228 }, { "epoch": 1.0095662731307042, "grad_norm": 0.3382987380027771, "learning_rate": 0.00014916794678158373, "loss": 11.6764, "step": 48229 }, { "epoch": 1.0095872058946664, "grad_norm": 0.34347617626190186, "learning_rate": 0.0001491660375641826, "loss": 11.6762, "step": 48230 }, { "epoch": 1.0096081386586284, "grad_norm": 0.31843098998069763, "learning_rate": 0.00014916412832314635, "loss": 11.669, "step": 48231 }, { "epoch": 1.0096290714225906, "grad_norm": 0.2910913825035095, "learning_rate": 0.00014916221905847594, "loss": 11.678, "step": 48232 }, { "epoch": 1.0096500041865528, "grad_norm": 0.3781166970729828, "learning_rate": 0.00014916030977017223, "loss": 11.6694, "step": 48233 }, { "epoch": 1.009670936950515, "grad_norm": 0.2515578866004944, "learning_rate": 0.0001491584004582362, "loss": 11.6752, "step": 48234 }, { "epoch": 1.009691869714477, "grad_norm": 0.2951200306415558, "learning_rate": 0.00014915649112266874, "loss": 11.6552, "step": 48235 }, { "epoch": 1.0097128024784392, "grad_norm": 0.3261643052101135, "learning_rate": 0.00014915458176347076, "loss": 11.6631, "step": 48236 }, { "epoch": 1.0097337352424014, "grad_norm": 0.26128724217414856, "learning_rate": 0.0001491526723806432, "loss": 11.6671, "step": 48237 }, { "epoch": 1.0097546680063636, "grad_norm": 0.3221980035305023, "learning_rate": 0.00014915076297418697, "loss": 11.6585, "step": 48238 }, { "epoch": 1.0097756007703258, "grad_norm": 0.265598863363266, "learning_rate": 0.00014914885354410298, "loss": 11.6762, "step": 48239 }, { "epoch": 1.0097965335342878, "grad_norm": 0.3355732858181, "learning_rate": 0.00014914694409039214, "loss": 11.6598, "step": 48240 }, { "epoch": 1.00981746629825, "grad_norm": 0.26116740703582764, "learning_rate": 0.0001491450346130554, "loss": 11.6725, "step": 48241 }, { "epoch": 1.0098383990622122, "grad_norm": 0.343567818403244, "learning_rate": 0.00014914312511209364, "loss": 11.6631, "step": 48242 }, { "epoch": 1.0098593318261744, "grad_norm": 0.2922966778278351, "learning_rate": 0.00014914121558750777, "loss": 11.6473, "step": 48243 }, { "epoch": 1.0098802645901366, "grad_norm": 0.2845982313156128, "learning_rate": 0.00014913930603929876, "loss": 11.6474, "step": 48244 }, { "epoch": 1.0099011973540986, "grad_norm": 0.29557880759239197, "learning_rate": 0.0001491373964674675, "loss": 11.6721, "step": 48245 }, { "epoch": 1.0099221301180608, "grad_norm": 0.2651595175266266, "learning_rate": 0.0001491354868720149, "loss": 11.6745, "step": 48246 }, { "epoch": 1.009943062882023, "grad_norm": 0.38388222455978394, "learning_rate": 0.00014913357725294192, "loss": 11.6786, "step": 48247 }, { "epoch": 1.0099639956459852, "grad_norm": 0.3312743604183197, "learning_rate": 0.0001491316676102494, "loss": 11.6619, "step": 48248 }, { "epoch": 1.0099849284099474, "grad_norm": 0.3733534514904022, "learning_rate": 0.00014912975794393834, "loss": 11.6951, "step": 48249 }, { "epoch": 1.0100058611739093, "grad_norm": 0.3258120119571686, "learning_rate": 0.0001491278482540096, "loss": 11.674, "step": 48250 }, { "epoch": 1.0100267939378715, "grad_norm": 0.35133886337280273, "learning_rate": 0.0001491259385404641, "loss": 11.6636, "step": 48251 }, { "epoch": 1.0100477267018337, "grad_norm": 0.37622037529945374, "learning_rate": 0.00014912402880330283, "loss": 11.6474, "step": 48252 }, { "epoch": 1.010068659465796, "grad_norm": 0.2741049826145172, "learning_rate": 0.0001491221190425266, "loss": 11.6658, "step": 48253 }, { "epoch": 1.010089592229758, "grad_norm": 0.3874531686306, "learning_rate": 0.00014912020925813644, "loss": 11.6695, "step": 48254 }, { "epoch": 1.0101105249937201, "grad_norm": 0.23863735795021057, "learning_rate": 0.00014911829945013315, "loss": 11.6614, "step": 48255 }, { "epoch": 1.0101314577576823, "grad_norm": 0.30645301938056946, "learning_rate": 0.00014911638961851776, "loss": 11.6717, "step": 48256 }, { "epoch": 1.0101523905216445, "grad_norm": 0.32530677318573, "learning_rate": 0.0001491144797632911, "loss": 11.6633, "step": 48257 }, { "epoch": 1.0101733232856067, "grad_norm": 0.376717209815979, "learning_rate": 0.00014911256988445415, "loss": 11.6795, "step": 48258 }, { "epoch": 1.0101942560495687, "grad_norm": 0.27640530467033386, "learning_rate": 0.0001491106599820078, "loss": 11.6746, "step": 48259 }, { "epoch": 1.010215188813531, "grad_norm": 0.26354289054870605, "learning_rate": 0.00014910875005595296, "loss": 11.6837, "step": 48260 }, { "epoch": 1.010236121577493, "grad_norm": 0.30330219864845276, "learning_rate": 0.00014910684010629056, "loss": 11.6732, "step": 48261 }, { "epoch": 1.0102570543414553, "grad_norm": 0.2960166335105896, "learning_rate": 0.00014910493013302155, "loss": 11.6825, "step": 48262 }, { "epoch": 1.0102779871054175, "grad_norm": 0.3438021242618561, "learning_rate": 0.00014910302013614678, "loss": 11.6731, "step": 48263 }, { "epoch": 1.0102989198693795, "grad_norm": 0.432085245847702, "learning_rate": 0.00014910111011566722, "loss": 11.6695, "step": 48264 }, { "epoch": 1.0103198526333417, "grad_norm": 0.3079075813293457, "learning_rate": 0.00014909920007158373, "loss": 11.6647, "step": 48265 }, { "epoch": 1.0103407853973039, "grad_norm": 0.39383000135421753, "learning_rate": 0.0001490972900038973, "loss": 11.6617, "step": 48266 }, { "epoch": 1.010361718161266, "grad_norm": 0.49106088280677795, "learning_rate": 0.00014909537991260885, "loss": 11.6674, "step": 48267 }, { "epoch": 1.0103826509252283, "grad_norm": 0.3844371438026428, "learning_rate": 0.00014909346979771924, "loss": 11.6826, "step": 48268 }, { "epoch": 1.0104035836891903, "grad_norm": 0.26361632347106934, "learning_rate": 0.0001490915596592294, "loss": 11.6531, "step": 48269 }, { "epoch": 1.0104245164531525, "grad_norm": 0.30733147263526917, "learning_rate": 0.00014908964949714027, "loss": 11.6541, "step": 48270 }, { "epoch": 1.0104454492171147, "grad_norm": 0.29776427149772644, "learning_rate": 0.0001490877393114528, "loss": 11.663, "step": 48271 }, { "epoch": 1.0104663819810769, "grad_norm": 0.304890513420105, "learning_rate": 0.00014908582910216783, "loss": 11.6672, "step": 48272 }, { "epoch": 1.0104873147450388, "grad_norm": 0.2897147238254547, "learning_rate": 0.00014908391886928632, "loss": 11.6611, "step": 48273 }, { "epoch": 1.010508247509001, "grad_norm": 0.3210277855396271, "learning_rate": 0.0001490820086128092, "loss": 11.6844, "step": 48274 }, { "epoch": 1.0105291802729632, "grad_norm": 0.48817211389541626, "learning_rate": 0.00014908009833273738, "loss": 11.6576, "step": 48275 }, { "epoch": 1.0105501130369254, "grad_norm": 0.33222684264183044, "learning_rate": 0.00014907818802907178, "loss": 11.6617, "step": 48276 }, { "epoch": 1.0105710458008876, "grad_norm": 0.2966146469116211, "learning_rate": 0.00014907627770181328, "loss": 11.6768, "step": 48277 }, { "epoch": 1.0105919785648496, "grad_norm": 0.2755984961986542, "learning_rate": 0.00014907436735096282, "loss": 11.6616, "step": 48278 }, { "epoch": 1.0106129113288118, "grad_norm": 0.298921674489975, "learning_rate": 0.0001490724569765214, "loss": 11.676, "step": 48279 }, { "epoch": 1.010633844092774, "grad_norm": 0.32955923676490784, "learning_rate": 0.0001490705465784898, "loss": 11.6774, "step": 48280 }, { "epoch": 1.0106547768567362, "grad_norm": 0.4103303849697113, "learning_rate": 0.00014906863615686905, "loss": 11.6757, "step": 48281 }, { "epoch": 1.0106757096206984, "grad_norm": 0.37003493309020996, "learning_rate": 0.00014906672571165998, "loss": 11.6669, "step": 48282 }, { "epoch": 1.0106966423846604, "grad_norm": 0.29221004247665405, "learning_rate": 0.00014906481524286355, "loss": 11.6731, "step": 48283 }, { "epoch": 1.0107175751486226, "grad_norm": 0.30557534098625183, "learning_rate": 0.00014906290475048072, "loss": 11.675, "step": 48284 }, { "epoch": 1.0107385079125848, "grad_norm": 0.45642179250717163, "learning_rate": 0.00014906099423451233, "loss": 11.6794, "step": 48285 }, { "epoch": 1.010759440676547, "grad_norm": 0.3168082535266876, "learning_rate": 0.00014905908369495938, "loss": 11.6857, "step": 48286 }, { "epoch": 1.0107803734405092, "grad_norm": 0.30962181091308594, "learning_rate": 0.0001490571731318227, "loss": 11.6691, "step": 48287 }, { "epoch": 1.0108013062044712, "grad_norm": 0.31379029154777527, "learning_rate": 0.00014905526254510327, "loss": 11.6507, "step": 48288 }, { "epoch": 1.0108222389684334, "grad_norm": 0.37132686376571655, "learning_rate": 0.000149053351934802, "loss": 11.6579, "step": 48289 }, { "epoch": 1.0108431717323956, "grad_norm": 0.3039085865020752, "learning_rate": 0.0001490514413009198, "loss": 11.6757, "step": 48290 }, { "epoch": 1.0108641044963578, "grad_norm": 0.33636701107025146, "learning_rate": 0.00014904953064345762, "loss": 11.658, "step": 48291 }, { "epoch": 1.0108850372603198, "grad_norm": 0.2503453493118286, "learning_rate": 0.0001490476199624163, "loss": 11.6684, "step": 48292 }, { "epoch": 1.010905970024282, "grad_norm": 0.44110947847366333, "learning_rate": 0.00014904570925779683, "loss": 11.6711, "step": 48293 }, { "epoch": 1.0109269027882442, "grad_norm": 0.309128999710083, "learning_rate": 0.0001490437985296001, "loss": 11.6597, "step": 48294 }, { "epoch": 1.0109478355522064, "grad_norm": 0.3078731596469879, "learning_rate": 0.00014904188777782699, "loss": 11.6738, "step": 48295 }, { "epoch": 1.0109687683161686, "grad_norm": 0.41158750653266907, "learning_rate": 0.00014903997700247852, "loss": 11.6519, "step": 48296 }, { "epoch": 1.0109897010801305, "grad_norm": 0.27476245164871216, "learning_rate": 0.00014903806620355554, "loss": 11.6695, "step": 48297 }, { "epoch": 1.0110106338440927, "grad_norm": 0.28227534890174866, "learning_rate": 0.00014903615538105897, "loss": 11.6753, "step": 48298 }, { "epoch": 1.011031566608055, "grad_norm": 0.2890430688858032, "learning_rate": 0.00014903424453498973, "loss": 11.6839, "step": 48299 }, { "epoch": 1.0110524993720171, "grad_norm": 0.28377994894981384, "learning_rate": 0.00014903233366534875, "loss": 11.6765, "step": 48300 }, { "epoch": 1.0110734321359793, "grad_norm": 0.3225569725036621, "learning_rate": 0.00014903042277213693, "loss": 11.6742, "step": 48301 }, { "epoch": 1.0110943648999413, "grad_norm": 0.3514726459980011, "learning_rate": 0.00014902851185535522, "loss": 11.6725, "step": 48302 }, { "epoch": 1.0111152976639035, "grad_norm": 0.3754497468471527, "learning_rate": 0.00014902660091500454, "loss": 11.6814, "step": 48303 }, { "epoch": 1.0111362304278657, "grad_norm": 0.31949499249458313, "learning_rate": 0.00014902468995108575, "loss": 11.6554, "step": 48304 }, { "epoch": 1.011157163191828, "grad_norm": 0.33091506361961365, "learning_rate": 0.00014902277896359985, "loss": 11.6468, "step": 48305 }, { "epoch": 1.0111780959557901, "grad_norm": 0.3098156154155731, "learning_rate": 0.0001490208679525477, "loss": 11.6633, "step": 48306 }, { "epoch": 1.011199028719752, "grad_norm": 0.3050624430179596, "learning_rate": 0.00014901895691793023, "loss": 11.687, "step": 48307 }, { "epoch": 1.0112199614837143, "grad_norm": 0.28415507078170776, "learning_rate": 0.00014901704585974839, "loss": 11.6346, "step": 48308 }, { "epoch": 1.0112408942476765, "grad_norm": 0.32898929715156555, "learning_rate": 0.00014901513477800303, "loss": 11.6633, "step": 48309 }, { "epoch": 1.0112618270116387, "grad_norm": 0.3403345048427582, "learning_rate": 0.00014901322367269514, "loss": 11.6669, "step": 48310 }, { "epoch": 1.0112827597756007, "grad_norm": 0.29561498761177063, "learning_rate": 0.00014901131254382562, "loss": 11.6586, "step": 48311 }, { "epoch": 1.0113036925395629, "grad_norm": 0.2910073399543762, "learning_rate": 0.00014900940139139538, "loss": 11.6653, "step": 48312 }, { "epoch": 1.011324625303525, "grad_norm": 0.3241049349308014, "learning_rate": 0.00014900749021540532, "loss": 11.6562, "step": 48313 }, { "epoch": 1.0113455580674873, "grad_norm": 0.38217154145240784, "learning_rate": 0.00014900557901585638, "loss": 11.659, "step": 48314 }, { "epoch": 1.0113664908314495, "grad_norm": 0.3898214101791382, "learning_rate": 0.00014900366779274947, "loss": 11.6825, "step": 48315 }, { "epoch": 1.0113874235954114, "grad_norm": 0.3090146780014038, "learning_rate": 0.00014900175654608555, "loss": 11.6624, "step": 48316 }, { "epoch": 1.0114083563593737, "grad_norm": 0.3149715065956116, "learning_rate": 0.00014899984527586547, "loss": 11.6758, "step": 48317 }, { "epoch": 1.0114292891233359, "grad_norm": 0.3482886850833893, "learning_rate": 0.00014899793398209018, "loss": 11.6711, "step": 48318 }, { "epoch": 1.011450221887298, "grad_norm": 0.2718914747238159, "learning_rate": 0.0001489960226647606, "loss": 11.6594, "step": 48319 }, { "epoch": 1.0114711546512603, "grad_norm": 0.32058584690093994, "learning_rate": 0.0001489941113238777, "loss": 11.6666, "step": 48320 }, { "epoch": 1.0114920874152222, "grad_norm": 0.28815793991088867, "learning_rate": 0.0001489921999594423, "loss": 11.6628, "step": 48321 }, { "epoch": 1.0115130201791844, "grad_norm": 0.2605311870574951, "learning_rate": 0.00014899028857145535, "loss": 11.6601, "step": 48322 }, { "epoch": 1.0115339529431466, "grad_norm": 0.375881552696228, "learning_rate": 0.00014898837715991782, "loss": 11.677, "step": 48323 }, { "epoch": 1.0115548857071088, "grad_norm": 0.26719367504119873, "learning_rate": 0.0001489864657248306, "loss": 11.6665, "step": 48324 }, { "epoch": 1.011575818471071, "grad_norm": 0.30755242705345154, "learning_rate": 0.0001489845542661946, "loss": 11.6635, "step": 48325 }, { "epoch": 1.011596751235033, "grad_norm": 0.32721254229545593, "learning_rate": 0.00014898264278401073, "loss": 11.6644, "step": 48326 }, { "epoch": 1.0116176839989952, "grad_norm": 0.24466592073440552, "learning_rate": 0.00014898073127827993, "loss": 11.6586, "step": 48327 }, { "epoch": 1.0116386167629574, "grad_norm": 0.3021933436393738, "learning_rate": 0.00014897881974900313, "loss": 11.6802, "step": 48328 }, { "epoch": 1.0116595495269196, "grad_norm": 0.295673131942749, "learning_rate": 0.0001489769081961812, "loss": 11.6615, "step": 48329 }, { "epoch": 1.0116804822908816, "grad_norm": 0.3498857319355011, "learning_rate": 0.00014897499661981512, "loss": 11.6624, "step": 48330 }, { "epoch": 1.0117014150548438, "grad_norm": 0.3851371109485626, "learning_rate": 0.00014897308501990572, "loss": 11.6801, "step": 48331 }, { "epoch": 1.011722347818806, "grad_norm": 0.3463045358657837, "learning_rate": 0.000148971173396454, "loss": 11.671, "step": 48332 }, { "epoch": 1.0117432805827682, "grad_norm": 0.33053985238075256, "learning_rate": 0.0001489692617494609, "loss": 11.6507, "step": 48333 }, { "epoch": 1.0117642133467304, "grad_norm": 0.2853558361530304, "learning_rate": 0.00014896735007892726, "loss": 11.6542, "step": 48334 }, { "epoch": 1.0117851461106924, "grad_norm": 0.3697744607925415, "learning_rate": 0.00014896543838485403, "loss": 11.6682, "step": 48335 }, { "epoch": 1.0118060788746546, "grad_norm": 0.2579372227191925, "learning_rate": 0.00014896352666724213, "loss": 11.6794, "step": 48336 }, { "epoch": 1.0118270116386168, "grad_norm": 0.32733583450317383, "learning_rate": 0.00014896161492609252, "loss": 11.6739, "step": 48337 }, { "epoch": 1.011847944402579, "grad_norm": 0.33881887793540955, "learning_rate": 0.00014895970316140605, "loss": 11.6538, "step": 48338 }, { "epoch": 1.0118688771665412, "grad_norm": 0.29246950149536133, "learning_rate": 0.00014895779137318366, "loss": 11.661, "step": 48339 }, { "epoch": 1.0118898099305031, "grad_norm": 0.2724473774433136, "learning_rate": 0.00014895587956142628, "loss": 11.6597, "step": 48340 }, { "epoch": 1.0119107426944653, "grad_norm": 0.32504525780677795, "learning_rate": 0.00014895396772613486, "loss": 11.6696, "step": 48341 }, { "epoch": 1.0119316754584275, "grad_norm": 0.2713765799999237, "learning_rate": 0.00014895205586731028, "loss": 11.683, "step": 48342 }, { "epoch": 1.0119526082223897, "grad_norm": 0.3068711757659912, "learning_rate": 0.00014895014398495342, "loss": 11.6618, "step": 48343 }, { "epoch": 1.011973540986352, "grad_norm": 0.33041542768478394, "learning_rate": 0.0001489482320790653, "loss": 11.6623, "step": 48344 }, { "epoch": 1.011994473750314, "grad_norm": 0.2849390506744385, "learning_rate": 0.00014894632014964674, "loss": 11.6595, "step": 48345 }, { "epoch": 1.0120154065142761, "grad_norm": 0.36607861518859863, "learning_rate": 0.00014894440819669872, "loss": 11.6607, "step": 48346 }, { "epoch": 1.0120363392782383, "grad_norm": 0.3148082494735718, "learning_rate": 0.00014894249622022216, "loss": 11.6773, "step": 48347 }, { "epoch": 1.0120572720422005, "grad_norm": 0.35069817304611206, "learning_rate": 0.00014894058422021793, "loss": 11.6748, "step": 48348 }, { "epoch": 1.0120782048061625, "grad_norm": 0.30473193526268005, "learning_rate": 0.000148938672196687, "loss": 11.6691, "step": 48349 }, { "epoch": 1.0120991375701247, "grad_norm": 0.3498062193393707, "learning_rate": 0.00014893676014963024, "loss": 11.6625, "step": 48350 }, { "epoch": 1.012120070334087, "grad_norm": 0.283616840839386, "learning_rate": 0.00014893484807904862, "loss": 11.6794, "step": 48351 }, { "epoch": 1.012141003098049, "grad_norm": 0.4174530506134033, "learning_rate": 0.00014893293598494306, "loss": 11.6716, "step": 48352 }, { "epoch": 1.0121619358620113, "grad_norm": 0.2977006435394287, "learning_rate": 0.00014893102386731444, "loss": 11.6587, "step": 48353 }, { "epoch": 1.0121828686259733, "grad_norm": 0.32957425713539124, "learning_rate": 0.0001489291117261637, "loss": 11.6684, "step": 48354 }, { "epoch": 1.0122038013899355, "grad_norm": 0.3783291280269623, "learning_rate": 0.00014892719956149174, "loss": 11.6832, "step": 48355 }, { "epoch": 1.0122247341538977, "grad_norm": 0.2727665901184082, "learning_rate": 0.00014892528737329953, "loss": 11.6601, "step": 48356 }, { "epoch": 1.0122456669178599, "grad_norm": 0.33275243639945984, "learning_rate": 0.0001489233751615879, "loss": 11.666, "step": 48357 }, { "epoch": 1.012266599681822, "grad_norm": 0.32388168573379517, "learning_rate": 0.00014892146292635786, "loss": 11.6608, "step": 48358 }, { "epoch": 1.012287532445784, "grad_norm": 0.4080384075641632, "learning_rate": 0.0001489195506676103, "loss": 11.6714, "step": 48359 }, { "epoch": 1.0123084652097463, "grad_norm": 0.2827562689781189, "learning_rate": 0.0001489176383853461, "loss": 11.6671, "step": 48360 }, { "epoch": 1.0123293979737085, "grad_norm": 0.48936161398887634, "learning_rate": 0.00014891572607956624, "loss": 11.6861, "step": 48361 }, { "epoch": 1.0123503307376707, "grad_norm": 0.3146372437477112, "learning_rate": 0.00014891381375027157, "loss": 11.6674, "step": 48362 }, { "epoch": 1.0123712635016326, "grad_norm": 0.2952226400375366, "learning_rate": 0.0001489119013974631, "loss": 11.6629, "step": 48363 }, { "epoch": 1.0123921962655948, "grad_norm": 0.2744031250476837, "learning_rate": 0.00014890998902114167, "loss": 11.6593, "step": 48364 }, { "epoch": 1.012413129029557, "grad_norm": 0.27652910351753235, "learning_rate": 0.00014890807662130823, "loss": 11.6687, "step": 48365 }, { "epoch": 1.0124340617935192, "grad_norm": 0.31621885299682617, "learning_rate": 0.0001489061641979637, "loss": 11.6732, "step": 48366 }, { "epoch": 1.0124549945574814, "grad_norm": 0.3200669586658478, "learning_rate": 0.000148904251751109, "loss": 11.6544, "step": 48367 }, { "epoch": 1.0124759273214434, "grad_norm": 0.29871004819869995, "learning_rate": 0.00014890233928074506, "loss": 11.6594, "step": 48368 }, { "epoch": 1.0124968600854056, "grad_norm": 0.2901622951030731, "learning_rate": 0.00014890042678687276, "loss": 11.6727, "step": 48369 }, { "epoch": 1.0125177928493678, "grad_norm": 0.37657439708709717, "learning_rate": 0.00014889851426949305, "loss": 11.6702, "step": 48370 }, { "epoch": 1.01253872561333, "grad_norm": 0.29625603556632996, "learning_rate": 0.00014889660172860683, "loss": 11.6555, "step": 48371 }, { "epoch": 1.0125596583772922, "grad_norm": 0.3154050409793854, "learning_rate": 0.00014889468916421506, "loss": 11.6663, "step": 48372 }, { "epoch": 1.0125805911412542, "grad_norm": 0.32787615060806274, "learning_rate": 0.00014889277657631863, "loss": 11.6881, "step": 48373 }, { "epoch": 1.0126015239052164, "grad_norm": 0.26666373014450073, "learning_rate": 0.00014889086396491848, "loss": 11.6666, "step": 48374 }, { "epoch": 1.0126224566691786, "grad_norm": 0.2988928258419037, "learning_rate": 0.00014888895133001548, "loss": 11.6691, "step": 48375 }, { "epoch": 1.0126433894331408, "grad_norm": 0.35180187225341797, "learning_rate": 0.0001488870386716106, "loss": 11.6735, "step": 48376 }, { "epoch": 1.012664322197103, "grad_norm": 0.2716693580150604, "learning_rate": 0.00014888512598970472, "loss": 11.6597, "step": 48377 }, { "epoch": 1.012685254961065, "grad_norm": 0.2870868146419525, "learning_rate": 0.0001488832132842988, "loss": 11.6835, "step": 48378 }, { "epoch": 1.0127061877250272, "grad_norm": 0.2893925607204437, "learning_rate": 0.0001488813005553937, "loss": 11.6726, "step": 48379 }, { "epoch": 1.0127271204889894, "grad_norm": 0.3119063079357147, "learning_rate": 0.0001488793878029904, "loss": 11.6765, "step": 48380 }, { "epoch": 1.0127480532529516, "grad_norm": 0.34110453724861145, "learning_rate": 0.00014887747502708985, "loss": 11.6702, "step": 48381 }, { "epoch": 1.0127689860169136, "grad_norm": 0.2669612169265747, "learning_rate": 0.00014887556222769287, "loss": 11.6583, "step": 48382 }, { "epoch": 1.0127899187808758, "grad_norm": 0.2768861651420593, "learning_rate": 0.0001488736494048004, "loss": 11.6622, "step": 48383 }, { "epoch": 1.012810851544838, "grad_norm": 0.35053423047065735, "learning_rate": 0.00014887173655841345, "loss": 11.6691, "step": 48384 }, { "epoch": 1.0128317843088002, "grad_norm": 0.3779655396938324, "learning_rate": 0.00014886982368853287, "loss": 11.6671, "step": 48385 }, { "epoch": 1.0128527170727624, "grad_norm": 0.27422812581062317, "learning_rate": 0.00014886791079515954, "loss": 11.6706, "step": 48386 }, { "epoch": 1.0128736498367243, "grad_norm": 0.3510739505290985, "learning_rate": 0.00014886599787829446, "loss": 11.6793, "step": 48387 }, { "epoch": 1.0128945826006865, "grad_norm": 0.3773084580898285, "learning_rate": 0.0001488640849379385, "loss": 11.6732, "step": 48388 }, { "epoch": 1.0129155153646487, "grad_norm": 0.3374858796596527, "learning_rate": 0.0001488621719740926, "loss": 11.6859, "step": 48389 }, { "epoch": 1.012936448128611, "grad_norm": 0.37036144733428955, "learning_rate": 0.0001488602589867577, "loss": 11.6566, "step": 48390 }, { "epoch": 1.0129573808925731, "grad_norm": 0.5850666165351868, "learning_rate": 0.0001488583459759347, "loss": 11.6758, "step": 48391 }, { "epoch": 1.0129783136565351, "grad_norm": 0.297541081905365, "learning_rate": 0.00014885643294162444, "loss": 11.6568, "step": 48392 }, { "epoch": 1.0129992464204973, "grad_norm": 0.3749132752418518, "learning_rate": 0.00014885451988382796, "loss": 11.6488, "step": 48393 }, { "epoch": 1.0130201791844595, "grad_norm": 0.328685462474823, "learning_rate": 0.00014885260680254616, "loss": 11.6618, "step": 48394 }, { "epoch": 1.0130411119484217, "grad_norm": 0.32948246598243713, "learning_rate": 0.0001488506936977799, "loss": 11.6658, "step": 48395 }, { "epoch": 1.013062044712384, "grad_norm": 0.35060766339302063, "learning_rate": 0.00014884878056953014, "loss": 11.6811, "step": 48396 }, { "epoch": 1.013082977476346, "grad_norm": 0.27123144268989563, "learning_rate": 0.00014884686741779778, "loss": 11.6594, "step": 48397 }, { "epoch": 1.013103910240308, "grad_norm": 0.36023107171058655, "learning_rate": 0.00014884495424258377, "loss": 11.6747, "step": 48398 }, { "epoch": 1.0131248430042703, "grad_norm": 0.33871081471443176, "learning_rate": 0.000148843041043889, "loss": 11.6633, "step": 48399 }, { "epoch": 1.0131457757682325, "grad_norm": 0.3238653540611267, "learning_rate": 0.0001488411278217144, "loss": 11.6863, "step": 48400 }, { "epoch": 1.0131667085321945, "grad_norm": 0.38069218397140503, "learning_rate": 0.0001488392145760609, "loss": 11.67, "step": 48401 }, { "epoch": 1.0131876412961567, "grad_norm": 0.31265342235565186, "learning_rate": 0.00014883730130692938, "loss": 11.659, "step": 48402 }, { "epoch": 1.0132085740601189, "grad_norm": 0.3333856761455536, "learning_rate": 0.00014883538801432084, "loss": 11.6709, "step": 48403 }, { "epoch": 1.013229506824081, "grad_norm": 0.3051224648952484, "learning_rate": 0.0001488334746982361, "loss": 11.6698, "step": 48404 }, { "epoch": 1.0132504395880433, "grad_norm": 0.3255153000354767, "learning_rate": 0.00014883156135867616, "loss": 11.6805, "step": 48405 }, { "epoch": 1.0132713723520053, "grad_norm": 0.3993832468986511, "learning_rate": 0.00014882964799564193, "loss": 11.6796, "step": 48406 }, { "epoch": 1.0132923051159675, "grad_norm": 0.26571619510650635, "learning_rate": 0.0001488277346091343, "loss": 11.6686, "step": 48407 }, { "epoch": 1.0133132378799297, "grad_norm": 0.43542447686195374, "learning_rate": 0.00014882582119915417, "loss": 11.6781, "step": 48408 }, { "epoch": 1.0133341706438919, "grad_norm": 0.2830163836479187, "learning_rate": 0.0001488239077657025, "loss": 11.669, "step": 48409 }, { "epoch": 1.013355103407854, "grad_norm": 0.3729563057422638, "learning_rate": 0.0001488219943087802, "loss": 11.6609, "step": 48410 }, { "epoch": 1.013376036171816, "grad_norm": 0.2798320949077606, "learning_rate": 0.0001488200808283882, "loss": 11.6767, "step": 48411 }, { "epoch": 1.0133969689357782, "grad_norm": 0.26766499876976013, "learning_rate": 0.0001488181673245274, "loss": 11.6833, "step": 48412 }, { "epoch": 1.0134179016997404, "grad_norm": 0.6383970975875854, "learning_rate": 0.00014881625379719874, "loss": 11.6897, "step": 48413 }, { "epoch": 1.0134388344637026, "grad_norm": 0.3275104761123657, "learning_rate": 0.00014881434024640312, "loss": 11.6553, "step": 48414 }, { "epoch": 1.0134597672276648, "grad_norm": 0.2675684094429016, "learning_rate": 0.00014881242667214145, "loss": 11.6709, "step": 48415 }, { "epoch": 1.0134806999916268, "grad_norm": 0.3050551414489746, "learning_rate": 0.00014881051307441472, "loss": 11.6763, "step": 48416 }, { "epoch": 1.013501632755589, "grad_norm": 0.42548611760139465, "learning_rate": 0.00014880859945322374, "loss": 11.662, "step": 48417 }, { "epoch": 1.0135225655195512, "grad_norm": 0.3171602189540863, "learning_rate": 0.00014880668580856955, "loss": 11.6727, "step": 48418 }, { "epoch": 1.0135434982835134, "grad_norm": 0.4028572738170624, "learning_rate": 0.00014880477214045294, "loss": 11.6754, "step": 48419 }, { "epoch": 1.0135644310474754, "grad_norm": 0.30440330505371094, "learning_rate": 0.00014880285844887494, "loss": 11.655, "step": 48420 }, { "epoch": 1.0135853638114376, "grad_norm": 0.28661563992500305, "learning_rate": 0.00014880094473383642, "loss": 11.6525, "step": 48421 }, { "epoch": 1.0136062965753998, "grad_norm": 0.2680494785308838, "learning_rate": 0.0001487990309953383, "loss": 11.6597, "step": 48422 }, { "epoch": 1.013627229339362, "grad_norm": 0.2828056812286377, "learning_rate": 0.00014879711723338152, "loss": 11.6665, "step": 48423 }, { "epoch": 1.0136481621033242, "grad_norm": 0.42957091331481934, "learning_rate": 0.00014879520344796698, "loss": 11.6917, "step": 48424 }, { "epoch": 1.0136690948672862, "grad_norm": 0.31336843967437744, "learning_rate": 0.0001487932896390956, "loss": 11.6683, "step": 48425 }, { "epoch": 1.0136900276312484, "grad_norm": 0.3603871762752533, "learning_rate": 0.0001487913758067683, "loss": 11.6698, "step": 48426 }, { "epoch": 1.0137109603952106, "grad_norm": 0.41418278217315674, "learning_rate": 0.000148789461950986, "loss": 11.6739, "step": 48427 }, { "epoch": 1.0137318931591728, "grad_norm": 0.3326740860939026, "learning_rate": 0.00014878754807174968, "loss": 11.6596, "step": 48428 }, { "epoch": 1.013752825923135, "grad_norm": 0.2973899245262146, "learning_rate": 0.0001487856341690602, "loss": 11.6654, "step": 48429 }, { "epoch": 1.013773758687097, "grad_norm": 0.33501046895980835, "learning_rate": 0.00014878372024291846, "loss": 11.6584, "step": 48430 }, { "epoch": 1.0137946914510592, "grad_norm": 0.2934941053390503, "learning_rate": 0.0001487818062933254, "loss": 11.659, "step": 48431 }, { "epoch": 1.0138156242150214, "grad_norm": 0.32395026087760925, "learning_rate": 0.000148779892320282, "loss": 11.6646, "step": 48432 }, { "epoch": 1.0138365569789836, "grad_norm": 0.3205837309360504, "learning_rate": 0.0001487779783237891, "loss": 11.6651, "step": 48433 }, { "epoch": 1.0138574897429458, "grad_norm": 0.33298948407173157, "learning_rate": 0.0001487760643038476, "loss": 11.6759, "step": 48434 }, { "epoch": 1.0138784225069077, "grad_norm": 0.3830105662345886, "learning_rate": 0.00014877415026045854, "loss": 11.6826, "step": 48435 }, { "epoch": 1.01389935527087, "grad_norm": 0.321580708026886, "learning_rate": 0.00014877223619362272, "loss": 11.6766, "step": 48436 }, { "epoch": 1.0139202880348321, "grad_norm": 0.2955392301082611, "learning_rate": 0.0001487703221033411, "loss": 11.6535, "step": 48437 }, { "epoch": 1.0139412207987943, "grad_norm": 0.3277566730976105, "learning_rate": 0.00014876840798961464, "loss": 11.6668, "step": 48438 }, { "epoch": 1.0139621535627563, "grad_norm": 0.30112916231155396, "learning_rate": 0.00014876649385244423, "loss": 11.6547, "step": 48439 }, { "epoch": 1.0139830863267185, "grad_norm": 0.42158597707748413, "learning_rate": 0.0001487645796918308, "loss": 11.673, "step": 48440 }, { "epoch": 1.0140040190906807, "grad_norm": 0.42228302359580994, "learning_rate": 0.00014876266550777523, "loss": 11.6809, "step": 48441 }, { "epoch": 1.014024951854643, "grad_norm": 0.26119521260261536, "learning_rate": 0.00014876075130027848, "loss": 11.6684, "step": 48442 }, { "epoch": 1.0140458846186051, "grad_norm": 0.2921508252620697, "learning_rate": 0.00014875883706934147, "loss": 11.6762, "step": 48443 }, { "epoch": 1.014066817382567, "grad_norm": 0.28335705399513245, "learning_rate": 0.0001487569228149651, "loss": 11.6669, "step": 48444 }, { "epoch": 1.0140877501465293, "grad_norm": 0.40720734000205994, "learning_rate": 0.0001487550085371503, "loss": 11.6865, "step": 48445 }, { "epoch": 1.0141086829104915, "grad_norm": 0.3211001753807068, "learning_rate": 0.00014875309423589798, "loss": 11.6678, "step": 48446 }, { "epoch": 1.0141296156744537, "grad_norm": 0.32644665241241455, "learning_rate": 0.0001487511799112091, "loss": 11.6575, "step": 48447 }, { "epoch": 1.014150548438416, "grad_norm": 0.3354087769985199, "learning_rate": 0.00014874926556308454, "loss": 11.6563, "step": 48448 }, { "epoch": 1.0141714812023779, "grad_norm": 0.3724779188632965, "learning_rate": 0.0001487473511915252, "loss": 11.6655, "step": 48449 }, { "epoch": 1.01419241396634, "grad_norm": 0.2833350598812103, "learning_rate": 0.00014874543679653206, "loss": 11.6669, "step": 48450 }, { "epoch": 1.0142133467303023, "grad_norm": 0.2932845652103424, "learning_rate": 0.000148743522378106, "loss": 11.6824, "step": 48451 }, { "epoch": 1.0142342794942645, "grad_norm": 0.2732655704021454, "learning_rate": 0.00014874160793624798, "loss": 11.6832, "step": 48452 }, { "epoch": 1.0142552122582267, "grad_norm": 0.32169508934020996, "learning_rate": 0.00014873969347095885, "loss": 11.6541, "step": 48453 }, { "epoch": 1.0142761450221887, "grad_norm": 0.28355011343955994, "learning_rate": 0.0001487377789822396, "loss": 11.6729, "step": 48454 }, { "epoch": 1.0142970777861509, "grad_norm": 0.36566850543022156, "learning_rate": 0.00014873586447009112, "loss": 11.6852, "step": 48455 }, { "epoch": 1.014318010550113, "grad_norm": 0.3096957504749298, "learning_rate": 0.00014873394993451434, "loss": 11.6717, "step": 48456 }, { "epoch": 1.0143389433140753, "grad_norm": 0.28837069869041443, "learning_rate": 0.00014873203537551016, "loss": 11.668, "step": 48457 }, { "epoch": 1.0143598760780372, "grad_norm": 0.32296282052993774, "learning_rate": 0.0001487301207930795, "loss": 11.6605, "step": 48458 }, { "epoch": 1.0143808088419994, "grad_norm": 0.26296791434288025, "learning_rate": 0.00014872820618722332, "loss": 11.6666, "step": 48459 }, { "epoch": 1.0144017416059616, "grad_norm": 0.44973617792129517, "learning_rate": 0.00014872629155794251, "loss": 11.6814, "step": 48460 }, { "epoch": 1.0144226743699238, "grad_norm": 0.33296677470207214, "learning_rate": 0.00014872437690523798, "loss": 11.6507, "step": 48461 }, { "epoch": 1.014443607133886, "grad_norm": 0.4451492130756378, "learning_rate": 0.0001487224622291107, "loss": 11.682, "step": 48462 }, { "epoch": 1.014464539897848, "grad_norm": 0.2735723555088043, "learning_rate": 0.00014872054752956154, "loss": 11.6682, "step": 48463 }, { "epoch": 1.0144854726618102, "grad_norm": 0.33292803168296814, "learning_rate": 0.00014871863280659138, "loss": 11.6795, "step": 48464 }, { "epoch": 1.0145064054257724, "grad_norm": 0.293469101190567, "learning_rate": 0.0001487167180602013, "loss": 11.6416, "step": 48465 }, { "epoch": 1.0145273381897346, "grad_norm": 0.33692121505737305, "learning_rate": 0.00014871480329039204, "loss": 11.6689, "step": 48466 }, { "epoch": 1.0145482709536968, "grad_norm": 0.4047199785709381, "learning_rate": 0.00014871288849716463, "loss": 11.6755, "step": 48467 }, { "epoch": 1.0145692037176588, "grad_norm": 0.3480460047721863, "learning_rate": 0.00014871097368051995, "loss": 11.6749, "step": 48468 }, { "epoch": 1.014590136481621, "grad_norm": 0.2850017845630646, "learning_rate": 0.00014870905884045894, "loss": 11.6828, "step": 48469 }, { "epoch": 1.0146110692455832, "grad_norm": 0.32718193531036377, "learning_rate": 0.0001487071439769825, "loss": 11.679, "step": 48470 }, { "epoch": 1.0146320020095454, "grad_norm": 0.33730548620224, "learning_rate": 0.00014870522909009152, "loss": 11.6668, "step": 48471 }, { "epoch": 1.0146529347735076, "grad_norm": 0.301559716463089, "learning_rate": 0.00014870331417978703, "loss": 11.6868, "step": 48472 }, { "epoch": 1.0146738675374696, "grad_norm": 0.29711753129959106, "learning_rate": 0.00014870139924606982, "loss": 11.6771, "step": 48473 }, { "epoch": 1.0146948003014318, "grad_norm": 0.3231567442417145, "learning_rate": 0.00014869948428894095, "loss": 11.6841, "step": 48474 }, { "epoch": 1.014715733065394, "grad_norm": 0.29559752345085144, "learning_rate": 0.00014869756930840116, "loss": 11.6569, "step": 48475 }, { "epoch": 1.0147366658293562, "grad_norm": 0.26429158449172974, "learning_rate": 0.00014869565430445153, "loss": 11.6756, "step": 48476 }, { "epoch": 1.0147575985933182, "grad_norm": 0.3649146258831024, "learning_rate": 0.00014869373927709295, "loss": 11.6561, "step": 48477 }, { "epoch": 1.0147785313572804, "grad_norm": 0.37013334035873413, "learning_rate": 0.00014869182422632627, "loss": 11.6674, "step": 48478 }, { "epoch": 1.0147994641212426, "grad_norm": 0.3273736834526062, "learning_rate": 0.0001486899091521525, "loss": 11.6796, "step": 48479 }, { "epoch": 1.0148203968852048, "grad_norm": 0.2975809574127197, "learning_rate": 0.00014868799405457246, "loss": 11.6612, "step": 48480 }, { "epoch": 1.014841329649167, "grad_norm": 0.33901485800743103, "learning_rate": 0.00014868607893358713, "loss": 11.6654, "step": 48481 }, { "epoch": 1.014862262413129, "grad_norm": 0.28935927152633667, "learning_rate": 0.00014868416378919746, "loss": 11.6632, "step": 48482 }, { "epoch": 1.0148831951770911, "grad_norm": 0.2616446316242218, "learning_rate": 0.0001486822486214043, "loss": 11.6648, "step": 48483 }, { "epoch": 1.0149041279410533, "grad_norm": 0.3376530408859253, "learning_rate": 0.00014868033343020865, "loss": 11.6737, "step": 48484 }, { "epoch": 1.0149250607050155, "grad_norm": 0.30702728033065796, "learning_rate": 0.00014867841821561135, "loss": 11.6583, "step": 48485 }, { "epoch": 1.0149459934689777, "grad_norm": 0.3377059996128082, "learning_rate": 0.00014867650297761337, "loss": 11.669, "step": 48486 }, { "epoch": 1.0149669262329397, "grad_norm": 0.27425870299339294, "learning_rate": 0.00014867458771621563, "loss": 11.6696, "step": 48487 }, { "epoch": 1.014987858996902, "grad_norm": 0.3616510033607483, "learning_rate": 0.00014867267243141903, "loss": 11.6723, "step": 48488 }, { "epoch": 1.0150087917608641, "grad_norm": 0.24200953543186188, "learning_rate": 0.00014867075712322452, "loss": 11.6707, "step": 48489 }, { "epoch": 1.0150297245248263, "grad_norm": 0.35290294885635376, "learning_rate": 0.00014866884179163298, "loss": 11.6598, "step": 48490 }, { "epoch": 1.0150506572887885, "grad_norm": 0.3185769021511078, "learning_rate": 0.00014866692643664532, "loss": 11.6596, "step": 48491 }, { "epoch": 1.0150715900527505, "grad_norm": 0.297709196805954, "learning_rate": 0.00014866501105826257, "loss": 11.6658, "step": 48492 }, { "epoch": 1.0150925228167127, "grad_norm": 0.2561257779598236, "learning_rate": 0.00014866309565648552, "loss": 11.6644, "step": 48493 }, { "epoch": 1.015113455580675, "grad_norm": 0.33023011684417725, "learning_rate": 0.00014866118023131516, "loss": 11.6576, "step": 48494 }, { "epoch": 1.015134388344637, "grad_norm": 0.5813239216804504, "learning_rate": 0.0001486592647827524, "loss": 11.6915, "step": 48495 }, { "epoch": 1.015155321108599, "grad_norm": 0.39354151487350464, "learning_rate": 0.00014865734931079815, "loss": 11.6862, "step": 48496 }, { "epoch": 1.0151762538725613, "grad_norm": 0.3051413893699646, "learning_rate": 0.00014865543381545335, "loss": 11.6683, "step": 48497 }, { "epoch": 1.0151971866365235, "grad_norm": 0.2902381420135498, "learning_rate": 0.00014865351829671889, "loss": 11.6741, "step": 48498 }, { "epoch": 1.0152181194004857, "grad_norm": 0.4321146011352539, "learning_rate": 0.00014865160275459574, "loss": 11.6635, "step": 48499 }, { "epoch": 1.0152390521644479, "grad_norm": 0.33548709750175476, "learning_rate": 0.00014864968718908474, "loss": 11.6732, "step": 48500 }, { "epoch": 1.0152599849284099, "grad_norm": 0.3053630292415619, "learning_rate": 0.0001486477716001869, "loss": 11.6686, "step": 48501 }, { "epoch": 1.015280917692372, "grad_norm": 0.34054049849510193, "learning_rate": 0.00014864585598790307, "loss": 11.6688, "step": 48502 }, { "epoch": 1.0153018504563343, "grad_norm": 0.30580922961235046, "learning_rate": 0.0001486439403522342, "loss": 11.6771, "step": 48503 }, { "epoch": 1.0153227832202965, "grad_norm": 0.28045690059661865, "learning_rate": 0.00014864202469318127, "loss": 11.6537, "step": 48504 }, { "epoch": 1.0153437159842587, "grad_norm": 0.31632664799690247, "learning_rate": 0.0001486401090107451, "loss": 11.6644, "step": 48505 }, { "epoch": 1.0153646487482206, "grad_norm": 0.33603036403656006, "learning_rate": 0.00014863819330492665, "loss": 11.6608, "step": 48506 }, { "epoch": 1.0153855815121828, "grad_norm": 0.33275356888771057, "learning_rate": 0.00014863627757572687, "loss": 11.6557, "step": 48507 }, { "epoch": 1.015406514276145, "grad_norm": 0.6865434646606445, "learning_rate": 0.00014863436182314664, "loss": 11.6391, "step": 48508 }, { "epoch": 1.0154274470401072, "grad_norm": 0.24812817573547363, "learning_rate": 0.0001486324460471869, "loss": 11.6653, "step": 48509 }, { "epoch": 1.0154483798040694, "grad_norm": 0.34715011715888977, "learning_rate": 0.00014863053024784858, "loss": 11.6674, "step": 48510 }, { "epoch": 1.0154693125680314, "grad_norm": 0.35364118218421936, "learning_rate": 0.00014862861442513259, "loss": 11.6653, "step": 48511 }, { "epoch": 1.0154902453319936, "grad_norm": 0.28501924872398376, "learning_rate": 0.0001486266985790398, "loss": 11.6662, "step": 48512 }, { "epoch": 1.0155111780959558, "grad_norm": 0.42003950476646423, "learning_rate": 0.00014862478270957123, "loss": 11.6545, "step": 48513 }, { "epoch": 1.015532110859918, "grad_norm": 0.34980833530426025, "learning_rate": 0.00014862286681672777, "loss": 11.6757, "step": 48514 }, { "epoch": 1.01555304362388, "grad_norm": 0.28358209133148193, "learning_rate": 0.00014862095090051027, "loss": 11.6615, "step": 48515 }, { "epoch": 1.0155739763878422, "grad_norm": 0.26339948177337646, "learning_rate": 0.00014861903496091975, "loss": 11.6606, "step": 48516 }, { "epoch": 1.0155949091518044, "grad_norm": 0.3333493769168854, "learning_rate": 0.00014861711899795705, "loss": 11.6642, "step": 48517 }, { "epoch": 1.0156158419157666, "grad_norm": 0.2494930475950241, "learning_rate": 0.00014861520301162316, "loss": 11.6587, "step": 48518 }, { "epoch": 1.0156367746797288, "grad_norm": 0.3468918800354004, "learning_rate": 0.00014861328700191894, "loss": 11.685, "step": 48519 }, { "epoch": 1.0156577074436908, "grad_norm": 0.3259759545326233, "learning_rate": 0.00014861137096884534, "loss": 11.6448, "step": 48520 }, { "epoch": 1.015678640207653, "grad_norm": 0.3109622895717621, "learning_rate": 0.00014860945491240333, "loss": 11.6698, "step": 48521 }, { "epoch": 1.0156995729716152, "grad_norm": 0.386344313621521, "learning_rate": 0.00014860753883259373, "loss": 11.6629, "step": 48522 }, { "epoch": 1.0157205057355774, "grad_norm": 0.35328882932662964, "learning_rate": 0.00014860562272941752, "loss": 11.6813, "step": 48523 }, { "epoch": 1.0157414384995396, "grad_norm": 0.28716379404067993, "learning_rate": 0.0001486037066028756, "loss": 11.6611, "step": 48524 }, { "epoch": 1.0157623712635016, "grad_norm": 0.31795552372932434, "learning_rate": 0.00014860179045296894, "loss": 11.669, "step": 48525 }, { "epoch": 1.0157833040274638, "grad_norm": 0.3302159309387207, "learning_rate": 0.00014859987427969838, "loss": 11.6557, "step": 48526 }, { "epoch": 1.015804236791426, "grad_norm": 0.3990330696105957, "learning_rate": 0.00014859795808306494, "loss": 11.6663, "step": 48527 }, { "epoch": 1.0158251695553882, "grad_norm": 0.26342350244522095, "learning_rate": 0.00014859604186306946, "loss": 11.6567, "step": 48528 }, { "epoch": 1.0158461023193504, "grad_norm": 0.40200677514076233, "learning_rate": 0.0001485941256197129, "loss": 11.6771, "step": 48529 }, { "epoch": 1.0158670350833123, "grad_norm": 0.41212037205696106, "learning_rate": 0.00014859220935299614, "loss": 11.6738, "step": 48530 }, { "epoch": 1.0158879678472745, "grad_norm": 0.31879574060440063, "learning_rate": 0.00014859029306292016, "loss": 11.6691, "step": 48531 }, { "epoch": 1.0159089006112367, "grad_norm": 0.27762675285339355, "learning_rate": 0.00014858837674948584, "loss": 11.6545, "step": 48532 }, { "epoch": 1.015929833375199, "grad_norm": 0.36368483304977417, "learning_rate": 0.0001485864604126941, "loss": 11.6672, "step": 48533 }, { "epoch": 1.015950766139161, "grad_norm": 0.30300673842430115, "learning_rate": 0.0001485845440525459, "loss": 11.6466, "step": 48534 }, { "epoch": 1.015971698903123, "grad_norm": 0.2771350145339966, "learning_rate": 0.00014858262766904212, "loss": 11.6789, "step": 48535 }, { "epoch": 1.0159926316670853, "grad_norm": 0.2969360649585724, "learning_rate": 0.00014858071126218372, "loss": 11.6734, "step": 48536 }, { "epoch": 1.0160135644310475, "grad_norm": 0.2745395302772522, "learning_rate": 0.00014857879483197157, "loss": 11.6584, "step": 48537 }, { "epoch": 1.0160344971950097, "grad_norm": 0.30773401260375977, "learning_rate": 0.00014857687837840664, "loss": 11.653, "step": 48538 }, { "epoch": 1.0160554299589717, "grad_norm": 0.29367655515670776, "learning_rate": 0.0001485749619014898, "loss": 11.6806, "step": 48539 }, { "epoch": 1.016076362722934, "grad_norm": 0.25620219111442566, "learning_rate": 0.00014857304540122205, "loss": 11.6675, "step": 48540 }, { "epoch": 1.016097295486896, "grad_norm": 0.3700109124183655, "learning_rate": 0.00014857112887760424, "loss": 11.6648, "step": 48541 }, { "epoch": 1.0161182282508583, "grad_norm": 0.28493285179138184, "learning_rate": 0.0001485692123306373, "loss": 11.6682, "step": 48542 }, { "epoch": 1.0161391610148205, "grad_norm": 0.27417898178100586, "learning_rate": 0.0001485672957603222, "loss": 11.6612, "step": 48543 }, { "epoch": 1.0161600937787825, "grad_norm": 0.2678561508655548, "learning_rate": 0.0001485653791666598, "loss": 11.6883, "step": 48544 }, { "epoch": 1.0161810265427447, "grad_norm": 0.3284026086330414, "learning_rate": 0.00014856346254965105, "loss": 11.6557, "step": 48545 }, { "epoch": 1.0162019593067069, "grad_norm": 0.38764727115631104, "learning_rate": 0.00014856154590929687, "loss": 11.6731, "step": 48546 }, { "epoch": 1.016222892070669, "grad_norm": 0.4265349507331848, "learning_rate": 0.00014855962924559818, "loss": 11.6811, "step": 48547 }, { "epoch": 1.0162438248346313, "grad_norm": 0.3632144331932068, "learning_rate": 0.00014855771255855592, "loss": 11.6624, "step": 48548 }, { "epoch": 1.0162647575985932, "grad_norm": 0.39164644479751587, "learning_rate": 0.00014855579584817098, "loss": 11.6757, "step": 48549 }, { "epoch": 1.0162856903625554, "grad_norm": 0.3440481722354889, "learning_rate": 0.00014855387911444433, "loss": 11.6629, "step": 48550 }, { "epoch": 1.0163066231265176, "grad_norm": 0.32297274470329285, "learning_rate": 0.00014855196235737683, "loss": 11.6663, "step": 48551 }, { "epoch": 1.0163275558904799, "grad_norm": 0.30131423473358154, "learning_rate": 0.00014855004557696942, "loss": 11.6755, "step": 48552 }, { "epoch": 1.0163484886544418, "grad_norm": 0.3345883786678314, "learning_rate": 0.00014854812877322304, "loss": 11.6797, "step": 48553 }, { "epoch": 1.016369421418404, "grad_norm": 0.39349663257598877, "learning_rate": 0.0001485462119461386, "loss": 11.6739, "step": 48554 }, { "epoch": 1.0163903541823662, "grad_norm": 0.32673364877700806, "learning_rate": 0.00014854429509571703, "loss": 11.6619, "step": 48555 }, { "epoch": 1.0164112869463284, "grad_norm": 0.2677559554576874, "learning_rate": 0.00014854237822195922, "loss": 11.6614, "step": 48556 }, { "epoch": 1.0164322197102906, "grad_norm": 0.3143807053565979, "learning_rate": 0.00014854046132486612, "loss": 11.6694, "step": 48557 }, { "epoch": 1.0164531524742526, "grad_norm": 0.33420515060424805, "learning_rate": 0.0001485385444044387, "loss": 11.6658, "step": 48558 }, { "epoch": 1.0164740852382148, "grad_norm": 0.26150476932525635, "learning_rate": 0.0001485366274606778, "loss": 11.6756, "step": 48559 }, { "epoch": 1.016495018002177, "grad_norm": 0.3017410635948181, "learning_rate": 0.00014853471049358435, "loss": 11.6748, "step": 48560 }, { "epoch": 1.0165159507661392, "grad_norm": 0.2889966070652008, "learning_rate": 0.0001485327935031593, "loss": 11.6658, "step": 48561 }, { "epoch": 1.0165368835301014, "grad_norm": 0.36685991287231445, "learning_rate": 0.00014853087648940356, "loss": 11.6609, "step": 48562 }, { "epoch": 1.0165578162940634, "grad_norm": 0.36091506481170654, "learning_rate": 0.00014852895945231806, "loss": 11.6664, "step": 48563 }, { "epoch": 1.0165787490580256, "grad_norm": 0.3625487685203552, "learning_rate": 0.0001485270423919037, "loss": 11.6703, "step": 48564 }, { "epoch": 1.0165996818219878, "grad_norm": 0.26634809374809265, "learning_rate": 0.00014852512530816146, "loss": 11.6742, "step": 48565 }, { "epoch": 1.01662061458595, "grad_norm": 0.3403410315513611, "learning_rate": 0.00014852320820109218, "loss": 11.6429, "step": 48566 }, { "epoch": 1.0166415473499122, "grad_norm": 0.2867613732814789, "learning_rate": 0.00014852129107069685, "loss": 11.6612, "step": 48567 }, { "epoch": 1.0166624801138742, "grad_norm": 0.2955450713634491, "learning_rate": 0.00014851937391697635, "loss": 11.6642, "step": 48568 }, { "epoch": 1.0166834128778364, "grad_norm": 0.3112655580043793, "learning_rate": 0.0001485174567399316, "loss": 11.6766, "step": 48569 }, { "epoch": 1.0167043456417986, "grad_norm": 0.3284023106098175, "learning_rate": 0.00014851553953956358, "loss": 11.6833, "step": 48570 }, { "epoch": 1.0167252784057608, "grad_norm": 0.31901368498802185, "learning_rate": 0.00014851362231587312, "loss": 11.6789, "step": 48571 }, { "epoch": 1.0167462111697227, "grad_norm": 0.31549665331840515, "learning_rate": 0.00014851170506886122, "loss": 11.6885, "step": 48572 }, { "epoch": 1.016767143933685, "grad_norm": 0.3248622417449951, "learning_rate": 0.00014850978779852876, "loss": 11.6706, "step": 48573 }, { "epoch": 1.0167880766976471, "grad_norm": 0.31626981496810913, "learning_rate": 0.00014850787050487666, "loss": 11.6793, "step": 48574 }, { "epoch": 1.0168090094616093, "grad_norm": 0.3310968279838562, "learning_rate": 0.00014850595318790589, "loss": 11.667, "step": 48575 }, { "epoch": 1.0168299422255715, "grad_norm": 0.2950645387172699, "learning_rate": 0.0001485040358476173, "loss": 11.6562, "step": 48576 }, { "epoch": 1.0168508749895335, "grad_norm": 0.3770310580730438, "learning_rate": 0.00014850211848401186, "loss": 11.6694, "step": 48577 }, { "epoch": 1.0168718077534957, "grad_norm": 0.3071921169757843, "learning_rate": 0.00014850020109709048, "loss": 11.6684, "step": 48578 }, { "epoch": 1.016892740517458, "grad_norm": 0.2858515977859497, "learning_rate": 0.0001484982836868541, "loss": 11.6712, "step": 48579 }, { "epoch": 1.0169136732814201, "grad_norm": 0.33515465259552, "learning_rate": 0.0001484963662533036, "loss": 11.6626, "step": 48580 }, { "epoch": 1.0169346060453823, "grad_norm": 0.35966888070106506, "learning_rate": 0.00014849444879643992, "loss": 11.6665, "step": 48581 }, { "epoch": 1.0169555388093443, "grad_norm": 0.3183380961418152, "learning_rate": 0.000148492531316264, "loss": 11.6612, "step": 48582 }, { "epoch": 1.0169764715733065, "grad_norm": 0.2678285241127014, "learning_rate": 0.00014849061381277674, "loss": 11.6568, "step": 48583 }, { "epoch": 1.0169974043372687, "grad_norm": 0.3122277557849884, "learning_rate": 0.00014848869628597907, "loss": 11.6763, "step": 48584 }, { "epoch": 1.017018337101231, "grad_norm": 0.32398897409439087, "learning_rate": 0.0001484867787358719, "loss": 11.6509, "step": 48585 }, { "epoch": 1.017039269865193, "grad_norm": 0.3278947174549103, "learning_rate": 0.0001484848611624562, "loss": 11.6725, "step": 48586 }, { "epoch": 1.017060202629155, "grad_norm": 0.3552868664264679, "learning_rate": 0.00014848294356573284, "loss": 11.6563, "step": 48587 }, { "epoch": 1.0170811353931173, "grad_norm": 0.3230583965778351, "learning_rate": 0.00014848102594570276, "loss": 11.6837, "step": 48588 }, { "epoch": 1.0171020681570795, "grad_norm": 0.4299251139163971, "learning_rate": 0.00014847910830236688, "loss": 11.6664, "step": 48589 }, { "epoch": 1.0171230009210417, "grad_norm": 0.3389170169830322, "learning_rate": 0.0001484771906357261, "loss": 11.6751, "step": 48590 }, { "epoch": 1.0171439336850037, "grad_norm": 0.27876222133636475, "learning_rate": 0.00014847527294578142, "loss": 11.6715, "step": 48591 }, { "epoch": 1.0171648664489659, "grad_norm": 0.301291823387146, "learning_rate": 0.00014847335523253364, "loss": 11.6507, "step": 48592 }, { "epoch": 1.017185799212928, "grad_norm": 0.3147284686565399, "learning_rate": 0.00014847143749598379, "loss": 11.664, "step": 48593 }, { "epoch": 1.0172067319768903, "grad_norm": 0.375161737203598, "learning_rate": 0.0001484695197361327, "loss": 11.6668, "step": 48594 }, { "epoch": 1.0172276647408525, "grad_norm": 0.28448811173439026, "learning_rate": 0.00014846760195298138, "loss": 11.6726, "step": 48595 }, { "epoch": 1.0172485975048144, "grad_norm": 0.2593804895877838, "learning_rate": 0.0001484656841465307, "loss": 11.6586, "step": 48596 }, { "epoch": 1.0172695302687766, "grad_norm": 0.2657108008861542, "learning_rate": 0.0001484637663167816, "loss": 11.652, "step": 48597 }, { "epoch": 1.0172904630327388, "grad_norm": 0.29913079738616943, "learning_rate": 0.000148461848463735, "loss": 11.6619, "step": 48598 }, { "epoch": 1.017311395796701, "grad_norm": 0.3277978301048279, "learning_rate": 0.0001484599305873918, "loss": 11.6733, "step": 48599 }, { "epoch": 1.0173323285606632, "grad_norm": 0.3839140236377716, "learning_rate": 0.00014845801268775294, "loss": 11.6707, "step": 48600 }, { "epoch": 1.0173532613246252, "grad_norm": 0.30156266689300537, "learning_rate": 0.00014845609476481935, "loss": 11.6722, "step": 48601 }, { "epoch": 1.0173741940885874, "grad_norm": 0.3202330470085144, "learning_rate": 0.00014845417681859195, "loss": 11.6818, "step": 48602 }, { "epoch": 1.0173951268525496, "grad_norm": 0.42608213424682617, "learning_rate": 0.00014845225884907166, "loss": 11.6688, "step": 48603 }, { "epoch": 1.0174160596165118, "grad_norm": 0.29125747084617615, "learning_rate": 0.00014845034085625937, "loss": 11.6705, "step": 48604 }, { "epoch": 1.017436992380474, "grad_norm": 0.27364617586135864, "learning_rate": 0.00014844842284015606, "loss": 11.6721, "step": 48605 }, { "epoch": 1.017457925144436, "grad_norm": 0.3562709093093872, "learning_rate": 0.0001484465048007626, "loss": 11.684, "step": 48606 }, { "epoch": 1.0174788579083982, "grad_norm": 0.31267493963241577, "learning_rate": 0.00014844458673807996, "loss": 11.6669, "step": 48607 }, { "epoch": 1.0174997906723604, "grad_norm": 0.3422512412071228, "learning_rate": 0.00014844266865210904, "loss": 11.6689, "step": 48608 }, { "epoch": 1.0175207234363226, "grad_norm": 0.4529802203178406, "learning_rate": 0.00014844075054285072, "loss": 11.6795, "step": 48609 }, { "epoch": 1.0175416562002846, "grad_norm": 0.3705497682094574, "learning_rate": 0.000148438832410306, "loss": 11.6806, "step": 48610 }, { "epoch": 1.0175625889642468, "grad_norm": 0.2691303491592407, "learning_rate": 0.00014843691425447576, "loss": 11.6854, "step": 48611 }, { "epoch": 1.017583521728209, "grad_norm": 0.24859267473220825, "learning_rate": 0.00014843499607536091, "loss": 11.6815, "step": 48612 }, { "epoch": 1.0176044544921712, "grad_norm": 0.3412027060985565, "learning_rate": 0.00014843307787296237, "loss": 11.6745, "step": 48613 }, { "epoch": 1.0176253872561334, "grad_norm": 0.32326605916023254, "learning_rate": 0.0001484311596472811, "loss": 11.6721, "step": 48614 }, { "epoch": 1.0176463200200954, "grad_norm": 0.33202725648880005, "learning_rate": 0.000148429241398318, "loss": 11.6697, "step": 48615 }, { "epoch": 1.0176672527840576, "grad_norm": 0.3254029154777527, "learning_rate": 0.000148427323126074, "loss": 11.6665, "step": 48616 }, { "epoch": 1.0176881855480198, "grad_norm": 0.3079219162464142, "learning_rate": 0.00014842540483054999, "loss": 11.6596, "step": 48617 }, { "epoch": 1.017709118311982, "grad_norm": 0.31847959756851196, "learning_rate": 0.00014842348651174692, "loss": 11.6575, "step": 48618 }, { "epoch": 1.0177300510759442, "grad_norm": 0.26695048809051514, "learning_rate": 0.00014842156816966573, "loss": 11.6563, "step": 48619 }, { "epoch": 1.0177509838399061, "grad_norm": 0.399056077003479, "learning_rate": 0.00014841964980430732, "loss": 11.6665, "step": 48620 }, { "epoch": 1.0177719166038683, "grad_norm": 0.28921204805374146, "learning_rate": 0.0001484177314156726, "loss": 11.649, "step": 48621 }, { "epoch": 1.0177928493678305, "grad_norm": 0.32105720043182373, "learning_rate": 0.00014841581300376253, "loss": 11.6749, "step": 48622 }, { "epoch": 1.0178137821317927, "grad_norm": 0.334643691778183, "learning_rate": 0.00014841389456857798, "loss": 11.6708, "step": 48623 }, { "epoch": 1.017834714895755, "grad_norm": 0.40072396397590637, "learning_rate": 0.0001484119761101199, "loss": 11.6808, "step": 48624 }, { "epoch": 1.017855647659717, "grad_norm": 0.3010352551937103, "learning_rate": 0.00014841005762838925, "loss": 11.6809, "step": 48625 }, { "epoch": 1.0178765804236791, "grad_norm": 0.31330615282058716, "learning_rate": 0.0001484081391233869, "loss": 11.6563, "step": 48626 }, { "epoch": 1.0178975131876413, "grad_norm": 0.3314913213253021, "learning_rate": 0.00014840622059511375, "loss": 11.6482, "step": 48627 }, { "epoch": 1.0179184459516035, "grad_norm": 0.31077417731285095, "learning_rate": 0.0001484043020435708, "loss": 11.6568, "step": 48628 }, { "epoch": 1.0179393787155655, "grad_norm": 0.3628939092159271, "learning_rate": 0.00014840238346875892, "loss": 11.6611, "step": 48629 }, { "epoch": 1.0179603114795277, "grad_norm": 0.37051570415496826, "learning_rate": 0.00014840046487067904, "loss": 11.6669, "step": 48630 }, { "epoch": 1.01798124424349, "grad_norm": 0.2886897921562195, "learning_rate": 0.0001483985462493321, "loss": 11.6663, "step": 48631 }, { "epoch": 1.018002177007452, "grad_norm": 0.2972396910190582, "learning_rate": 0.000148396627604719, "loss": 11.6805, "step": 48632 }, { "epoch": 1.0180231097714143, "grad_norm": 0.29620444774627686, "learning_rate": 0.00014839470893684067, "loss": 11.665, "step": 48633 }, { "epoch": 1.0180440425353763, "grad_norm": 0.24480308592319489, "learning_rate": 0.00014839279024569802, "loss": 11.6732, "step": 48634 }, { "epoch": 1.0180649752993385, "grad_norm": 0.3099781572818756, "learning_rate": 0.00014839087153129202, "loss": 11.6734, "step": 48635 }, { "epoch": 1.0180859080633007, "grad_norm": 0.34123116731643677, "learning_rate": 0.00014838895279362352, "loss": 11.6579, "step": 48636 }, { "epoch": 1.0181068408272629, "grad_norm": 0.3942033350467682, "learning_rate": 0.0001483870340326935, "loss": 11.6715, "step": 48637 }, { "epoch": 1.018127773591225, "grad_norm": 0.3316209614276886, "learning_rate": 0.00014838511524850286, "loss": 11.6723, "step": 48638 }, { "epoch": 1.018148706355187, "grad_norm": 0.31445276737213135, "learning_rate": 0.00014838319644105253, "loss": 11.6775, "step": 48639 }, { "epoch": 1.0181696391191493, "grad_norm": 0.45482057332992554, "learning_rate": 0.00014838127761034343, "loss": 11.6757, "step": 48640 }, { "epoch": 1.0181905718831115, "grad_norm": 0.3102456033229828, "learning_rate": 0.00014837935875637646, "loss": 11.68, "step": 48641 }, { "epoch": 1.0182115046470737, "grad_norm": 0.3251796066761017, "learning_rate": 0.0001483774398791526, "loss": 11.6782, "step": 48642 }, { "epoch": 1.0182324374110359, "grad_norm": 0.3641267716884613, "learning_rate": 0.0001483755209786727, "loss": 11.6548, "step": 48643 }, { "epoch": 1.0182533701749978, "grad_norm": 0.34047025442123413, "learning_rate": 0.00014837360205493773, "loss": 11.6682, "step": 48644 }, { "epoch": 1.01827430293896, "grad_norm": 0.21973051130771637, "learning_rate": 0.0001483716831079486, "loss": 11.6697, "step": 48645 }, { "epoch": 1.0182952357029222, "grad_norm": 0.2979859411716461, "learning_rate": 0.00014836976413770624, "loss": 11.67, "step": 48646 }, { "epoch": 1.0183161684668844, "grad_norm": 0.3617916405200958, "learning_rate": 0.00014836784514421156, "loss": 11.6609, "step": 48647 }, { "epoch": 1.0183371012308464, "grad_norm": 0.3721466660499573, "learning_rate": 0.0001483659261274655, "loss": 11.6721, "step": 48648 }, { "epoch": 1.0183580339948086, "grad_norm": 0.34302449226379395, "learning_rate": 0.00014836400708746896, "loss": 11.669, "step": 48649 }, { "epoch": 1.0183789667587708, "grad_norm": 0.366401344537735, "learning_rate": 0.00014836208802422287, "loss": 11.6575, "step": 48650 }, { "epoch": 1.018399899522733, "grad_norm": 0.33127591013908386, "learning_rate": 0.00014836016893772814, "loss": 11.6602, "step": 48651 }, { "epoch": 1.0184208322866952, "grad_norm": 0.26213669776916504, "learning_rate": 0.0001483582498279857, "loss": 11.6609, "step": 48652 }, { "epoch": 1.0184417650506572, "grad_norm": 0.26216667890548706, "learning_rate": 0.00014835633069499652, "loss": 11.6697, "step": 48653 }, { "epoch": 1.0184626978146194, "grad_norm": 0.312255322933197, "learning_rate": 0.0001483544115387615, "loss": 11.6739, "step": 48654 }, { "epoch": 1.0184836305785816, "grad_norm": 0.31930646300315857, "learning_rate": 0.0001483524923592815, "loss": 11.6653, "step": 48655 }, { "epoch": 1.0185045633425438, "grad_norm": 0.29481634497642517, "learning_rate": 0.00014835057315655748, "loss": 11.6545, "step": 48656 }, { "epoch": 1.018525496106506, "grad_norm": 0.3012768626213074, "learning_rate": 0.0001483486539305904, "loss": 11.6812, "step": 48657 }, { "epoch": 1.018546428870468, "grad_norm": 0.3135091960430145, "learning_rate": 0.00014834673468138113, "loss": 11.6681, "step": 48658 }, { "epoch": 1.0185673616344302, "grad_norm": 0.37930113077163696, "learning_rate": 0.00014834481540893063, "loss": 11.6639, "step": 48659 }, { "epoch": 1.0185882943983924, "grad_norm": 0.28025490045547485, "learning_rate": 0.0001483428961132398, "loss": 11.6611, "step": 48660 }, { "epoch": 1.0186092271623546, "grad_norm": 0.2843300402164459, "learning_rate": 0.0001483409767943096, "loss": 11.6809, "step": 48661 }, { "epoch": 1.0186301599263166, "grad_norm": 0.34082674980163574, "learning_rate": 0.00014833905745214088, "loss": 11.6654, "step": 48662 }, { "epoch": 1.0186510926902788, "grad_norm": 0.5543482899665833, "learning_rate": 0.00014833713808673463, "loss": 11.6731, "step": 48663 }, { "epoch": 1.018672025454241, "grad_norm": 0.2971247732639313, "learning_rate": 0.00014833521869809174, "loss": 11.6634, "step": 48664 }, { "epoch": 1.0186929582182032, "grad_norm": 0.5131649374961853, "learning_rate": 0.00014833329928621317, "loss": 11.6631, "step": 48665 }, { "epoch": 1.0187138909821654, "grad_norm": 0.40971139073371887, "learning_rate": 0.00014833137985109977, "loss": 11.6722, "step": 48666 }, { "epoch": 1.0187348237461273, "grad_norm": 0.31826844811439514, "learning_rate": 0.00014832946039275254, "loss": 11.6688, "step": 48667 }, { "epoch": 1.0187557565100895, "grad_norm": 0.40734636783599854, "learning_rate": 0.00014832754091117237, "loss": 11.6823, "step": 48668 }, { "epoch": 1.0187766892740517, "grad_norm": 0.3749217391014099, "learning_rate": 0.00014832562140636014, "loss": 11.6895, "step": 48669 }, { "epoch": 1.018797622038014, "grad_norm": 0.2984410226345062, "learning_rate": 0.00014832370187831687, "loss": 11.6546, "step": 48670 }, { "epoch": 1.0188185548019761, "grad_norm": 0.2932743728160858, "learning_rate": 0.0001483217823270434, "loss": 11.6733, "step": 48671 }, { "epoch": 1.0188394875659381, "grad_norm": 0.3545365035533905, "learning_rate": 0.00014831986275254072, "loss": 11.6668, "step": 48672 }, { "epoch": 1.0188604203299003, "grad_norm": 0.3252564072608948, "learning_rate": 0.00014831794315480967, "loss": 11.6561, "step": 48673 }, { "epoch": 1.0188813530938625, "grad_norm": 0.40301311016082764, "learning_rate": 0.0001483160235338512, "loss": 11.6607, "step": 48674 }, { "epoch": 1.0189022858578247, "grad_norm": 0.36094731092453003, "learning_rate": 0.0001483141038896663, "loss": 11.6718, "step": 48675 }, { "epoch": 1.018923218621787, "grad_norm": 0.37110066413879395, "learning_rate": 0.0001483121842222558, "loss": 11.687, "step": 48676 }, { "epoch": 1.018944151385749, "grad_norm": 0.44538968801498413, "learning_rate": 0.0001483102645316207, "loss": 11.66, "step": 48677 }, { "epoch": 1.018965084149711, "grad_norm": 0.25359034538269043, "learning_rate": 0.00014830834481776184, "loss": 11.6724, "step": 48678 }, { "epoch": 1.0189860169136733, "grad_norm": 0.2727978229522705, "learning_rate": 0.00014830642508068022, "loss": 11.6718, "step": 48679 }, { "epoch": 1.0190069496776355, "grad_norm": 0.31494957208633423, "learning_rate": 0.00014830450532037675, "loss": 11.6735, "step": 48680 }, { "epoch": 1.0190278824415975, "grad_norm": 0.3065771162509918, "learning_rate": 0.00014830258553685233, "loss": 11.6609, "step": 48681 }, { "epoch": 1.0190488152055597, "grad_norm": 0.2851351797580719, "learning_rate": 0.00014830066573010787, "loss": 11.6626, "step": 48682 }, { "epoch": 1.0190697479695219, "grad_norm": 0.28448155522346497, "learning_rate": 0.00014829874590014433, "loss": 11.663, "step": 48683 }, { "epoch": 1.019090680733484, "grad_norm": 0.27026039361953735, "learning_rate": 0.0001482968260469626, "loss": 11.6596, "step": 48684 }, { "epoch": 1.0191116134974463, "grad_norm": 0.33643534779548645, "learning_rate": 0.00014829490617056362, "loss": 11.6742, "step": 48685 }, { "epoch": 1.0191325462614083, "grad_norm": 0.26034876704216003, "learning_rate": 0.00014829298627094833, "loss": 11.6768, "step": 48686 }, { "epoch": 1.0191534790253705, "grad_norm": 0.46937838196754456, "learning_rate": 0.0001482910663481176, "loss": 11.6806, "step": 48687 }, { "epoch": 1.0191744117893327, "grad_norm": 0.2583456039428711, "learning_rate": 0.00014828914640207242, "loss": 11.6484, "step": 48688 }, { "epoch": 1.0191953445532949, "grad_norm": 0.33448418974876404, "learning_rate": 0.00014828722643281365, "loss": 11.6653, "step": 48689 }, { "epoch": 1.019216277317257, "grad_norm": 0.35687217116355896, "learning_rate": 0.00014828530644034227, "loss": 11.6703, "step": 48690 }, { "epoch": 1.019237210081219, "grad_norm": 0.3575538098812103, "learning_rate": 0.00014828338642465915, "loss": 11.6645, "step": 48691 }, { "epoch": 1.0192581428451812, "grad_norm": 0.33838126063346863, "learning_rate": 0.00014828146638576527, "loss": 11.6775, "step": 48692 }, { "epoch": 1.0192790756091434, "grad_norm": 0.4002613425254822, "learning_rate": 0.0001482795463236615, "loss": 11.6519, "step": 48693 }, { "epoch": 1.0193000083731056, "grad_norm": 0.30027303099632263, "learning_rate": 0.00014827762623834877, "loss": 11.666, "step": 48694 }, { "epoch": 1.0193209411370678, "grad_norm": 0.3233940601348877, "learning_rate": 0.00014827570612982804, "loss": 11.6695, "step": 48695 }, { "epoch": 1.0193418739010298, "grad_norm": 0.3018259108066559, "learning_rate": 0.0001482737859981002, "loss": 11.6756, "step": 48696 }, { "epoch": 1.019362806664992, "grad_norm": 0.2808917462825775, "learning_rate": 0.0001482718658431662, "loss": 11.6629, "step": 48697 }, { "epoch": 1.0193837394289542, "grad_norm": 0.32048070430755615, "learning_rate": 0.00014826994566502694, "loss": 11.6707, "step": 48698 }, { "epoch": 1.0194046721929164, "grad_norm": 0.3135462999343872, "learning_rate": 0.00014826802546368334, "loss": 11.6715, "step": 48699 }, { "epoch": 1.0194256049568784, "grad_norm": 0.25250443816185, "learning_rate": 0.00014826610523913637, "loss": 11.6716, "step": 48700 }, { "epoch": 1.0194465377208406, "grad_norm": 0.3555011451244354, "learning_rate": 0.00014826418499138685, "loss": 11.6625, "step": 48701 }, { "epoch": 1.0194674704848028, "grad_norm": 0.3194887638092041, "learning_rate": 0.00014826226472043584, "loss": 11.6944, "step": 48702 }, { "epoch": 1.019488403248765, "grad_norm": 0.9156790375709534, "learning_rate": 0.00014826034442628414, "loss": 11.5864, "step": 48703 }, { "epoch": 1.0195093360127272, "grad_norm": 0.28980782628059387, "learning_rate": 0.00014825842410893275, "loss": 11.6812, "step": 48704 }, { "epoch": 1.0195302687766892, "grad_norm": 0.354745477437973, "learning_rate": 0.00014825650376838254, "loss": 11.6653, "step": 48705 }, { "epoch": 1.0195512015406514, "grad_norm": 0.28954625129699707, "learning_rate": 0.00014825458340463448, "loss": 11.6797, "step": 48706 }, { "epoch": 1.0195721343046136, "grad_norm": 0.3524492084980011, "learning_rate": 0.00014825266301768947, "loss": 11.6815, "step": 48707 }, { "epoch": 1.0195930670685758, "grad_norm": 0.31996849179267883, "learning_rate": 0.00014825074260754842, "loss": 11.6615, "step": 48708 }, { "epoch": 1.019613999832538, "grad_norm": 0.3541804850101471, "learning_rate": 0.00014824882217421233, "loss": 11.6525, "step": 48709 }, { "epoch": 1.0196349325965, "grad_norm": 0.32592135667800903, "learning_rate": 0.00014824690171768204, "loss": 11.6753, "step": 48710 }, { "epoch": 1.0196558653604622, "grad_norm": 0.3177110254764557, "learning_rate": 0.00014824498123795847, "loss": 11.6802, "step": 48711 }, { "epoch": 1.0196767981244244, "grad_norm": 0.48055776953697205, "learning_rate": 0.0001482430607350426, "loss": 11.696, "step": 48712 }, { "epoch": 1.0196977308883866, "grad_norm": 0.3461054265499115, "learning_rate": 0.0001482411402089353, "loss": 11.6761, "step": 48713 }, { "epoch": 1.0197186636523488, "grad_norm": 0.3539066016674042, "learning_rate": 0.00014823921965963755, "loss": 11.6702, "step": 48714 }, { "epoch": 1.0197395964163107, "grad_norm": 0.3562423288822174, "learning_rate": 0.0001482372990871502, "loss": 11.6608, "step": 48715 }, { "epoch": 1.019760529180273, "grad_norm": 0.25207218527793884, "learning_rate": 0.00014823537849147425, "loss": 11.647, "step": 48716 }, { "epoch": 1.0197814619442351, "grad_norm": 0.2816852629184723, "learning_rate": 0.00014823345787261057, "loss": 11.6616, "step": 48717 }, { "epoch": 1.0198023947081973, "grad_norm": 0.2759389877319336, "learning_rate": 0.00014823153723056006, "loss": 11.6745, "step": 48718 }, { "epoch": 1.0198233274721593, "grad_norm": 0.32856467366218567, "learning_rate": 0.00014822961656532375, "loss": 11.6838, "step": 48719 }, { "epoch": 1.0198442602361215, "grad_norm": 0.32978424429893494, "learning_rate": 0.00014822769587690245, "loss": 11.6762, "step": 48720 }, { "epoch": 1.0198651930000837, "grad_norm": 0.2692468464374542, "learning_rate": 0.00014822577516529717, "loss": 11.6579, "step": 48721 }, { "epoch": 1.019886125764046, "grad_norm": 0.5422027707099915, "learning_rate": 0.00014822385443050877, "loss": 11.6649, "step": 48722 }, { "epoch": 1.0199070585280081, "grad_norm": 0.3509475290775299, "learning_rate": 0.00014822193367253819, "loss": 11.6629, "step": 48723 }, { "epoch": 1.01992799129197, "grad_norm": 0.2931118607521057, "learning_rate": 0.00014822001289138638, "loss": 11.641, "step": 48724 }, { "epoch": 1.0199489240559323, "grad_norm": 0.34063735604286194, "learning_rate": 0.00014821809208705422, "loss": 11.6698, "step": 48725 }, { "epoch": 1.0199698568198945, "grad_norm": 0.23947346210479736, "learning_rate": 0.00014821617125954269, "loss": 11.6709, "step": 48726 }, { "epoch": 1.0199907895838567, "grad_norm": 0.3224680721759796, "learning_rate": 0.00014821425040885264, "loss": 11.6891, "step": 48727 }, { "epoch": 1.020011722347819, "grad_norm": 0.34214267134666443, "learning_rate": 0.00014821232953498504, "loss": 11.6663, "step": 48728 }, { "epoch": 1.0200326551117809, "grad_norm": 0.3515869975090027, "learning_rate": 0.00014821040863794083, "loss": 11.6673, "step": 48729 }, { "epoch": 1.020053587875743, "grad_norm": 0.34964823722839355, "learning_rate": 0.0001482084877177209, "loss": 11.6676, "step": 48730 }, { "epoch": 1.0200745206397053, "grad_norm": 0.3214593529701233, "learning_rate": 0.0001482065667743262, "loss": 11.6741, "step": 48731 }, { "epoch": 1.0200954534036675, "grad_norm": 0.3606087863445282, "learning_rate": 0.00014820464580775765, "loss": 11.6508, "step": 48732 }, { "epoch": 1.0201163861676297, "grad_norm": 0.3246400058269501, "learning_rate": 0.0001482027248180161, "loss": 11.6638, "step": 48733 }, { "epoch": 1.0201373189315917, "grad_norm": 0.2755714952945709, "learning_rate": 0.00014820080380510258, "loss": 11.6548, "step": 48734 }, { "epoch": 1.0201582516955539, "grad_norm": 0.24761883914470673, "learning_rate": 0.00014819888276901795, "loss": 11.6634, "step": 48735 }, { "epoch": 1.020179184459516, "grad_norm": 0.34475889801979065, "learning_rate": 0.00014819696170976316, "loss": 11.6621, "step": 48736 }, { "epoch": 1.0202001172234783, "grad_norm": 0.3019571006298065, "learning_rate": 0.0001481950406273391, "loss": 11.6443, "step": 48737 }, { "epoch": 1.0202210499874402, "grad_norm": 0.34560951590538025, "learning_rate": 0.00014819311952174677, "loss": 11.6692, "step": 48738 }, { "epoch": 1.0202419827514024, "grad_norm": 0.24486631155014038, "learning_rate": 0.000148191198392987, "loss": 11.6667, "step": 48739 }, { "epoch": 1.0202629155153646, "grad_norm": 0.2852063477039337, "learning_rate": 0.00014818927724106074, "loss": 11.6694, "step": 48740 }, { "epoch": 1.0202838482793268, "grad_norm": 0.5288833975791931, "learning_rate": 0.00014818735606596898, "loss": 11.6777, "step": 48741 }, { "epoch": 1.020304781043289, "grad_norm": 0.29577144980430603, "learning_rate": 0.00014818543486771256, "loss": 11.6972, "step": 48742 }, { "epoch": 1.020325713807251, "grad_norm": 0.3432046175003052, "learning_rate": 0.00014818351364629247, "loss": 11.6816, "step": 48743 }, { "epoch": 1.0203466465712132, "grad_norm": 0.2787396013736725, "learning_rate": 0.00014818159240170955, "loss": 11.6684, "step": 48744 }, { "epoch": 1.0203675793351754, "grad_norm": 0.3067719340324402, "learning_rate": 0.00014817967113396477, "loss": 11.6702, "step": 48745 }, { "epoch": 1.0203885120991376, "grad_norm": 0.3024427592754364, "learning_rate": 0.00014817774984305913, "loss": 11.6675, "step": 48746 }, { "epoch": 1.0204094448630998, "grad_norm": 0.2952665686607361, "learning_rate": 0.00014817582852899342, "loss": 11.6718, "step": 48747 }, { "epoch": 1.0204303776270618, "grad_norm": 0.4714030623435974, "learning_rate": 0.00014817390719176863, "loss": 11.6694, "step": 48748 }, { "epoch": 1.020451310391024, "grad_norm": 0.2747134566307068, "learning_rate": 0.00014817198583138568, "loss": 11.6535, "step": 48749 }, { "epoch": 1.0204722431549862, "grad_norm": 0.45568743348121643, "learning_rate": 0.00014817006444784548, "loss": 11.683, "step": 48750 }, { "epoch": 1.0204931759189484, "grad_norm": 0.31994593143463135, "learning_rate": 0.00014816814304114898, "loss": 11.6844, "step": 48751 }, { "epoch": 1.0205141086829106, "grad_norm": 0.31663408875465393, "learning_rate": 0.0001481662216112971, "loss": 11.6846, "step": 48752 }, { "epoch": 1.0205350414468726, "grad_norm": 0.3057294189929962, "learning_rate": 0.00014816430015829075, "loss": 11.6521, "step": 48753 }, { "epoch": 1.0205559742108348, "grad_norm": 0.3068753778934479, "learning_rate": 0.00014816237868213084, "loss": 11.6719, "step": 48754 }, { "epoch": 1.020576906974797, "grad_norm": 0.36726823449134827, "learning_rate": 0.00014816045718281831, "loss": 11.681, "step": 48755 }, { "epoch": 1.0205978397387592, "grad_norm": 0.4370896816253662, "learning_rate": 0.00014815853566035409, "loss": 11.6727, "step": 48756 }, { "epoch": 1.0206187725027211, "grad_norm": 0.2700100243091583, "learning_rate": 0.00014815661411473908, "loss": 11.6634, "step": 48757 }, { "epoch": 1.0206397052666834, "grad_norm": 0.3037251830101013, "learning_rate": 0.00014815469254597424, "loss": 11.677, "step": 48758 }, { "epoch": 1.0206606380306456, "grad_norm": 0.2815316617488861, "learning_rate": 0.0001481527709540605, "loss": 11.6582, "step": 48759 }, { "epoch": 1.0206815707946078, "grad_norm": 0.507611870765686, "learning_rate": 0.0001481508493389987, "loss": 11.6738, "step": 48760 }, { "epoch": 1.02070250355857, "grad_norm": 0.33583658933639526, "learning_rate": 0.00014814892770078985, "loss": 11.6703, "step": 48761 }, { "epoch": 1.020723436322532, "grad_norm": 0.31572896242141724, "learning_rate": 0.00014814700603943482, "loss": 11.6623, "step": 48762 }, { "epoch": 1.0207443690864941, "grad_norm": 0.31393933296203613, "learning_rate": 0.0001481450843549346, "loss": 11.6677, "step": 48763 }, { "epoch": 1.0207653018504563, "grad_norm": 0.34566107392311096, "learning_rate": 0.00014814316264729005, "loss": 11.6758, "step": 48764 }, { "epoch": 1.0207862346144185, "grad_norm": 0.3301573097705841, "learning_rate": 0.00014814124091650215, "loss": 11.6779, "step": 48765 }, { "epoch": 1.0208071673783807, "grad_norm": 0.3064837157726288, "learning_rate": 0.00014813931916257176, "loss": 11.6777, "step": 48766 }, { "epoch": 1.0208281001423427, "grad_norm": 0.43604010343551636, "learning_rate": 0.0001481373973854998, "loss": 11.6636, "step": 48767 }, { "epoch": 1.020849032906305, "grad_norm": 0.2836233377456665, "learning_rate": 0.00014813547558528727, "loss": 11.6856, "step": 48768 }, { "epoch": 1.020869965670267, "grad_norm": 0.36858218908309937, "learning_rate": 0.00014813355376193507, "loss": 11.684, "step": 48769 }, { "epoch": 1.0208908984342293, "grad_norm": 0.31659018993377686, "learning_rate": 0.0001481316319154441, "loss": 11.6607, "step": 48770 }, { "epoch": 1.0209118311981915, "grad_norm": 0.31800609827041626, "learning_rate": 0.00014812971004581527, "loss": 11.6745, "step": 48771 }, { "epoch": 1.0209327639621535, "grad_norm": 0.28401055932044983, "learning_rate": 0.00014812778815304954, "loss": 11.6657, "step": 48772 }, { "epoch": 1.0209536967261157, "grad_norm": 0.34811580181121826, "learning_rate": 0.0001481258662371478, "loss": 11.692, "step": 48773 }, { "epoch": 1.020974629490078, "grad_norm": 0.3149688243865967, "learning_rate": 0.00014812394429811098, "loss": 11.6541, "step": 48774 }, { "epoch": 1.02099556225404, "grad_norm": 0.3160039782524109, "learning_rate": 0.00014812202233594005, "loss": 11.674, "step": 48775 }, { "epoch": 1.021016495018002, "grad_norm": 0.29152771830558777, "learning_rate": 0.00014812010035063588, "loss": 11.6704, "step": 48776 }, { "epoch": 1.0210374277819643, "grad_norm": 0.2770516872406006, "learning_rate": 0.00014811817834219941, "loss": 11.6694, "step": 48777 }, { "epoch": 1.0210583605459265, "grad_norm": 0.30250224471092224, "learning_rate": 0.00014811625631063158, "loss": 11.6793, "step": 48778 }, { "epoch": 1.0210792933098887, "grad_norm": 0.28209346532821655, "learning_rate": 0.0001481143342559333, "loss": 11.6712, "step": 48779 }, { "epoch": 1.0211002260738509, "grad_norm": 0.2866485118865967, "learning_rate": 0.0001481124121781055, "loss": 11.6589, "step": 48780 }, { "epoch": 1.0211211588378128, "grad_norm": 0.31599292159080505, "learning_rate": 0.00014811049007714907, "loss": 11.6676, "step": 48781 }, { "epoch": 1.021142091601775, "grad_norm": 0.4238395690917969, "learning_rate": 0.000148108567953065, "loss": 11.6773, "step": 48782 }, { "epoch": 1.0211630243657372, "grad_norm": 0.30172547698020935, "learning_rate": 0.00014810664580585417, "loss": 11.6616, "step": 48783 }, { "epoch": 1.0211839571296994, "grad_norm": 0.3533734679222107, "learning_rate": 0.00014810472363551748, "loss": 11.6498, "step": 48784 }, { "epoch": 1.0212048898936616, "grad_norm": 0.3545081913471222, "learning_rate": 0.00014810280144205592, "loss": 11.6824, "step": 48785 }, { "epoch": 1.0212258226576236, "grad_norm": 0.3120948076248169, "learning_rate": 0.00014810087922547036, "loss": 11.666, "step": 48786 }, { "epoch": 1.0212467554215858, "grad_norm": 0.40486693382263184, "learning_rate": 0.00014809895698576178, "loss": 11.6705, "step": 48787 }, { "epoch": 1.021267688185548, "grad_norm": 0.2630736231803894, "learning_rate": 0.00014809703472293102, "loss": 11.6916, "step": 48788 }, { "epoch": 1.0212886209495102, "grad_norm": 0.38410335779190063, "learning_rate": 0.00014809511243697905, "loss": 11.6782, "step": 48789 }, { "epoch": 1.0213095537134724, "grad_norm": 0.3499649465084076, "learning_rate": 0.00014809319012790685, "loss": 11.6731, "step": 48790 }, { "epoch": 1.0213304864774344, "grad_norm": 0.28859853744506836, "learning_rate": 0.00014809126779571523, "loss": 11.6715, "step": 48791 }, { "epoch": 1.0213514192413966, "grad_norm": 0.2606542706489563, "learning_rate": 0.00014808934544040521, "loss": 11.6711, "step": 48792 }, { "epoch": 1.0213723520053588, "grad_norm": 0.3180890679359436, "learning_rate": 0.00014808742306197766, "loss": 11.6795, "step": 48793 }, { "epoch": 1.021393284769321, "grad_norm": 0.3861205577850342, "learning_rate": 0.00014808550066043352, "loss": 11.6596, "step": 48794 }, { "epoch": 1.021414217533283, "grad_norm": 0.25911545753479004, "learning_rate": 0.00014808357823577371, "loss": 11.655, "step": 48795 }, { "epoch": 1.0214351502972452, "grad_norm": 0.2674405872821808, "learning_rate": 0.0001480816557879992, "loss": 11.6641, "step": 48796 }, { "epoch": 1.0214560830612074, "grad_norm": 0.3382643461227417, "learning_rate": 0.00014807973331711085, "loss": 11.6828, "step": 48797 }, { "epoch": 1.0214770158251696, "grad_norm": 0.3024902045726776, "learning_rate": 0.00014807781082310962, "loss": 11.6646, "step": 48798 }, { "epoch": 1.0214979485891318, "grad_norm": 0.31960636377334595, "learning_rate": 0.00014807588830599637, "loss": 11.6773, "step": 48799 }, { "epoch": 1.0215188813530938, "grad_norm": 0.28325435519218445, "learning_rate": 0.00014807396576577215, "loss": 11.6806, "step": 48800 }, { "epoch": 1.021539814117056, "grad_norm": 0.28925570845603943, "learning_rate": 0.00014807204320243774, "loss": 11.6602, "step": 48801 }, { "epoch": 1.0215607468810182, "grad_norm": 0.2454751580953598, "learning_rate": 0.0001480701206159942, "loss": 11.6612, "step": 48802 }, { "epoch": 1.0215816796449804, "grad_norm": 0.322706013917923, "learning_rate": 0.00014806819800644236, "loss": 11.6681, "step": 48803 }, { "epoch": 1.0216026124089426, "grad_norm": 0.4323039948940277, "learning_rate": 0.00014806627537378319, "loss": 11.69, "step": 48804 }, { "epoch": 1.0216235451729045, "grad_norm": 0.4841807782649994, "learning_rate": 0.00014806435271801758, "loss": 11.672, "step": 48805 }, { "epoch": 1.0216444779368667, "grad_norm": 0.2721709609031677, "learning_rate": 0.00014806243003914648, "loss": 11.6581, "step": 48806 }, { "epoch": 1.021665410700829, "grad_norm": 0.3847977817058563, "learning_rate": 0.0001480605073371708, "loss": 11.6621, "step": 48807 }, { "epoch": 1.0216863434647911, "grad_norm": 0.3117530643939972, "learning_rate": 0.00014805858461209147, "loss": 11.6676, "step": 48808 }, { "epoch": 1.0217072762287533, "grad_norm": 0.3187462091445923, "learning_rate": 0.00014805666186390942, "loss": 11.6736, "step": 48809 }, { "epoch": 1.0217282089927153, "grad_norm": 0.36406058073043823, "learning_rate": 0.0001480547390926256, "loss": 11.6771, "step": 48810 }, { "epoch": 1.0217491417566775, "grad_norm": 0.3083421587944031, "learning_rate": 0.00014805281629824086, "loss": 11.659, "step": 48811 }, { "epoch": 1.0217700745206397, "grad_norm": 0.38373127579689026, "learning_rate": 0.00014805089348075617, "loss": 11.6584, "step": 48812 }, { "epoch": 1.021791007284602, "grad_norm": 0.44613102078437805, "learning_rate": 0.00014804897064017247, "loss": 11.6671, "step": 48813 }, { "epoch": 1.021811940048564, "grad_norm": 0.3636546730995178, "learning_rate": 0.00014804704777649066, "loss": 11.6809, "step": 48814 }, { "epoch": 1.021832872812526, "grad_norm": 0.3819543719291687, "learning_rate": 0.00014804512488971166, "loss": 11.6724, "step": 48815 }, { "epoch": 1.0218538055764883, "grad_norm": 0.3060961961746216, "learning_rate": 0.00014804320197983647, "loss": 11.667, "step": 48816 }, { "epoch": 1.0218747383404505, "grad_norm": 0.3397122323513031, "learning_rate": 0.0001480412790468659, "loss": 11.6755, "step": 48817 }, { "epoch": 1.0218956711044127, "grad_norm": 0.3078465759754181, "learning_rate": 0.00014803935609080093, "loss": 11.6759, "step": 48818 }, { "epoch": 1.0219166038683747, "grad_norm": 0.3320538103580475, "learning_rate": 0.00014803743311164245, "loss": 11.6705, "step": 48819 }, { "epoch": 1.0219375366323369, "grad_norm": 0.3514699637889862, "learning_rate": 0.00014803551010939144, "loss": 11.6597, "step": 48820 }, { "epoch": 1.021958469396299, "grad_norm": 0.29987674951553345, "learning_rate": 0.0001480335870840488, "loss": 11.6585, "step": 48821 }, { "epoch": 1.0219794021602613, "grad_norm": 0.30951327085494995, "learning_rate": 0.00014803166403561544, "loss": 11.6701, "step": 48822 }, { "epoch": 1.0220003349242235, "grad_norm": 0.3288123309612274, "learning_rate": 0.0001480297409640923, "loss": 11.6587, "step": 48823 }, { "epoch": 1.0220212676881855, "grad_norm": 0.3634614646434784, "learning_rate": 0.0001480278178694803, "loss": 11.6757, "step": 48824 }, { "epoch": 1.0220422004521477, "grad_norm": 0.3327989876270294, "learning_rate": 0.0001480258947517804, "loss": 11.645, "step": 48825 }, { "epoch": 1.0220631332161099, "grad_norm": 0.39107048511505127, "learning_rate": 0.00014802397161099343, "loss": 11.6554, "step": 48826 }, { "epoch": 1.022084065980072, "grad_norm": 0.2715441882610321, "learning_rate": 0.0001480220484471204, "loss": 11.6462, "step": 48827 }, { "epoch": 1.0221049987440343, "grad_norm": 0.33528998494148254, "learning_rate": 0.00014802012526016222, "loss": 11.6654, "step": 48828 }, { "epoch": 1.0221259315079962, "grad_norm": 0.39118537306785583, "learning_rate": 0.00014801820205011978, "loss": 11.6735, "step": 48829 }, { "epoch": 1.0221468642719584, "grad_norm": 0.3532107472419739, "learning_rate": 0.00014801627881699407, "loss": 11.6711, "step": 48830 }, { "epoch": 1.0221677970359206, "grad_norm": 0.43650656938552856, "learning_rate": 0.00014801435556078594, "loss": 11.6603, "step": 48831 }, { "epoch": 1.0221887297998828, "grad_norm": 0.2681366503238678, "learning_rate": 0.00014801243228149637, "loss": 11.6474, "step": 48832 }, { "epoch": 1.0222096625638448, "grad_norm": 0.28256919980049133, "learning_rate": 0.00014801050897912622, "loss": 11.6759, "step": 48833 }, { "epoch": 1.022230595327807, "grad_norm": 0.30808329582214355, "learning_rate": 0.00014800858565367647, "loss": 11.6517, "step": 48834 }, { "epoch": 1.0222515280917692, "grad_norm": 0.2624608278274536, "learning_rate": 0.00014800666230514807, "loss": 11.6592, "step": 48835 }, { "epoch": 1.0222724608557314, "grad_norm": 0.4471190869808197, "learning_rate": 0.00014800473893354185, "loss": 11.6606, "step": 48836 }, { "epoch": 1.0222933936196936, "grad_norm": 0.29447105526924133, "learning_rate": 0.00014800281553885885, "loss": 11.6593, "step": 48837 }, { "epoch": 1.0223143263836556, "grad_norm": 0.301603764295578, "learning_rate": 0.00014800089212109989, "loss": 11.6642, "step": 48838 }, { "epoch": 1.0223352591476178, "grad_norm": 0.4527828097343445, "learning_rate": 0.00014799896868026595, "loss": 11.6643, "step": 48839 }, { "epoch": 1.02235619191158, "grad_norm": 0.2955368757247925, "learning_rate": 0.00014799704521635792, "loss": 11.666, "step": 48840 }, { "epoch": 1.0223771246755422, "grad_norm": 0.33791017532348633, "learning_rate": 0.0001479951217293768, "loss": 11.6467, "step": 48841 }, { "epoch": 1.0223980574395044, "grad_norm": 0.3529125452041626, "learning_rate": 0.00014799319821932346, "loss": 11.674, "step": 48842 }, { "epoch": 1.0224189902034664, "grad_norm": 0.3172961175441742, "learning_rate": 0.00014799127468619877, "loss": 11.6528, "step": 48843 }, { "epoch": 1.0224399229674286, "grad_norm": 0.2688244581222534, "learning_rate": 0.00014798935113000377, "loss": 11.666, "step": 48844 }, { "epoch": 1.0224608557313908, "grad_norm": 0.2558126747608185, "learning_rate": 0.00014798742755073928, "loss": 11.664, "step": 48845 }, { "epoch": 1.022481788495353, "grad_norm": 0.22214782238006592, "learning_rate": 0.0001479855039484063, "loss": 11.641, "step": 48846 }, { "epoch": 1.0225027212593152, "grad_norm": 0.4674070179462433, "learning_rate": 0.0001479835803230057, "loss": 11.6775, "step": 48847 }, { "epoch": 1.0225236540232772, "grad_norm": 0.36756524443626404, "learning_rate": 0.00014798165667453847, "loss": 11.6624, "step": 48848 }, { "epoch": 1.0225445867872394, "grad_norm": 0.30742147564888, "learning_rate": 0.0001479797330030055, "loss": 11.6674, "step": 48849 }, { "epoch": 1.0225655195512016, "grad_norm": 0.3552083671092987, "learning_rate": 0.00014797780930840768, "loss": 11.6783, "step": 48850 }, { "epoch": 1.0225864523151638, "grad_norm": 0.32494255900382996, "learning_rate": 0.000147975885590746, "loss": 11.6674, "step": 48851 }, { "epoch": 1.0226073850791257, "grad_norm": 0.2612847089767456, "learning_rate": 0.00014797396185002132, "loss": 11.6696, "step": 48852 }, { "epoch": 1.022628317843088, "grad_norm": 0.362267404794693, "learning_rate": 0.00014797203808623462, "loss": 11.6899, "step": 48853 }, { "epoch": 1.0226492506070501, "grad_norm": 0.28151679039001465, "learning_rate": 0.00014797011429938675, "loss": 11.672, "step": 48854 }, { "epoch": 1.0226701833710123, "grad_norm": 0.2918476462364197, "learning_rate": 0.00014796819048947871, "loss": 11.6849, "step": 48855 }, { "epoch": 1.0226911161349745, "grad_norm": 0.343497097492218, "learning_rate": 0.0001479662666565114, "loss": 11.6898, "step": 48856 }, { "epoch": 1.0227120488989365, "grad_norm": 0.32958346605300903, "learning_rate": 0.00014796434280048577, "loss": 11.6554, "step": 48857 }, { "epoch": 1.0227329816628987, "grad_norm": 0.3517356812953949, "learning_rate": 0.0001479624189214027, "loss": 11.6665, "step": 48858 }, { "epoch": 1.022753914426861, "grad_norm": 0.3047454357147217, "learning_rate": 0.00014796049501926312, "loss": 11.669, "step": 48859 }, { "epoch": 1.0227748471908231, "grad_norm": 0.31682831048965454, "learning_rate": 0.000147958571094068, "loss": 11.6801, "step": 48860 }, { "epoch": 1.0227957799547853, "grad_norm": 0.3324504494667053, "learning_rate": 0.0001479566471458182, "loss": 11.6698, "step": 48861 }, { "epoch": 1.0228167127187473, "grad_norm": 0.29830941557884216, "learning_rate": 0.00014795472317451468, "loss": 11.6669, "step": 48862 }, { "epoch": 1.0228376454827095, "grad_norm": 0.4263470470905304, "learning_rate": 0.0001479527991801584, "loss": 11.6615, "step": 48863 }, { "epoch": 1.0228585782466717, "grad_norm": 0.2969990074634552, "learning_rate": 0.00014795087516275023, "loss": 11.6739, "step": 48864 }, { "epoch": 1.022879511010634, "grad_norm": 0.27179619669914246, "learning_rate": 0.0001479489511222911, "loss": 11.6609, "step": 48865 }, { "epoch": 1.022900443774596, "grad_norm": 0.3402826488018036, "learning_rate": 0.00014794702705878197, "loss": 11.682, "step": 48866 }, { "epoch": 1.022921376538558, "grad_norm": 0.30444997549057007, "learning_rate": 0.00014794510297222373, "loss": 11.6704, "step": 48867 }, { "epoch": 1.0229423093025203, "grad_norm": 0.33183205127716064, "learning_rate": 0.00014794317886261733, "loss": 11.6658, "step": 48868 }, { "epoch": 1.0229632420664825, "grad_norm": 0.2503907382488251, "learning_rate": 0.00014794125472996367, "loss": 11.6647, "step": 48869 }, { "epoch": 1.0229841748304447, "grad_norm": 0.36697402596473694, "learning_rate": 0.0001479393305742637, "loss": 11.6619, "step": 48870 }, { "epoch": 1.0230051075944067, "grad_norm": 0.28829696774482727, "learning_rate": 0.0001479374063955183, "loss": 11.6755, "step": 48871 }, { "epoch": 1.0230260403583689, "grad_norm": 0.30062898993492126, "learning_rate": 0.00014793548219372847, "loss": 11.6819, "step": 48872 }, { "epoch": 1.023046973122331, "grad_norm": 0.44544103741645813, "learning_rate": 0.00014793355796889507, "loss": 11.6813, "step": 48873 }, { "epoch": 1.0230679058862933, "grad_norm": 0.2477131187915802, "learning_rate": 0.00014793163372101904, "loss": 11.6555, "step": 48874 }, { "epoch": 1.0230888386502555, "grad_norm": 0.30562612414360046, "learning_rate": 0.00014792970945010133, "loss": 11.663, "step": 48875 }, { "epoch": 1.0231097714142174, "grad_norm": 0.3347178101539612, "learning_rate": 0.00014792778515614286, "loss": 11.6637, "step": 48876 }, { "epoch": 1.0231307041781796, "grad_norm": 0.30096346139907837, "learning_rate": 0.0001479258608391445, "loss": 11.67, "step": 48877 }, { "epoch": 1.0231516369421418, "grad_norm": 0.3144984841346741, "learning_rate": 0.00014792393649910727, "loss": 11.6709, "step": 48878 }, { "epoch": 1.023172569706104, "grad_norm": 0.26436296105384827, "learning_rate": 0.00014792201213603202, "loss": 11.6671, "step": 48879 }, { "epoch": 1.0231935024700662, "grad_norm": 0.3219369649887085, "learning_rate": 0.0001479200877499197, "loss": 11.682, "step": 48880 }, { "epoch": 1.0232144352340282, "grad_norm": 0.3148011565208435, "learning_rate": 0.00014791816334077122, "loss": 11.6668, "step": 48881 }, { "epoch": 1.0232353679979904, "grad_norm": 0.34886255860328674, "learning_rate": 0.00014791623890858752, "loss": 11.6709, "step": 48882 }, { "epoch": 1.0232563007619526, "grad_norm": 0.6398556232452393, "learning_rate": 0.00014791431445336953, "loss": 11.6451, "step": 48883 }, { "epoch": 1.0232772335259148, "grad_norm": 0.3376452922821045, "learning_rate": 0.0001479123899751182, "loss": 11.6551, "step": 48884 }, { "epoch": 1.023298166289877, "grad_norm": 0.38348186016082764, "learning_rate": 0.0001479104654738344, "loss": 11.6793, "step": 48885 }, { "epoch": 1.023319099053839, "grad_norm": 0.2743622958660126, "learning_rate": 0.00014790854094951907, "loss": 11.6581, "step": 48886 }, { "epoch": 1.0233400318178012, "grad_norm": 0.2709747850894928, "learning_rate": 0.00014790661640217315, "loss": 11.6767, "step": 48887 }, { "epoch": 1.0233609645817634, "grad_norm": 0.3685871958732605, "learning_rate": 0.00014790469183179755, "loss": 11.6638, "step": 48888 }, { "epoch": 1.0233818973457256, "grad_norm": 0.2606832981109619, "learning_rate": 0.0001479027672383932, "loss": 11.6565, "step": 48889 }, { "epoch": 1.0234028301096876, "grad_norm": 0.29579678177833557, "learning_rate": 0.00014790084262196107, "loss": 11.6749, "step": 48890 }, { "epoch": 1.0234237628736498, "grad_norm": 0.3321681618690491, "learning_rate": 0.000147898917982502, "loss": 11.6557, "step": 48891 }, { "epoch": 1.023444695637612, "grad_norm": 0.38622990250587463, "learning_rate": 0.000147896993320017, "loss": 11.6765, "step": 48892 }, { "epoch": 1.0234656284015742, "grad_norm": 0.2863897681236267, "learning_rate": 0.00014789506863450692, "loss": 11.6733, "step": 48893 }, { "epoch": 1.0234865611655364, "grad_norm": 0.28902119398117065, "learning_rate": 0.00014789314392597274, "loss": 11.6637, "step": 48894 }, { "epoch": 1.0235074939294984, "grad_norm": 0.29923322796821594, "learning_rate": 0.00014789121919441537, "loss": 11.6775, "step": 48895 }, { "epoch": 1.0235284266934606, "grad_norm": 0.35601526498794556, "learning_rate": 0.0001478892944398357, "loss": 11.6703, "step": 48896 }, { "epoch": 1.0235493594574228, "grad_norm": 0.2643938958644867, "learning_rate": 0.00014788736966223474, "loss": 11.6508, "step": 48897 }, { "epoch": 1.023570292221385, "grad_norm": 0.3098095655441284, "learning_rate": 0.0001478854448616133, "loss": 11.6639, "step": 48898 }, { "epoch": 1.0235912249853472, "grad_norm": 0.27160823345184326, "learning_rate": 0.00014788352003797242, "loss": 11.645, "step": 48899 }, { "epoch": 1.0236121577493091, "grad_norm": 0.3558503985404968, "learning_rate": 0.00014788159519131292, "loss": 11.6576, "step": 48900 }, { "epoch": 1.0236330905132713, "grad_norm": 0.31258445978164673, "learning_rate": 0.0001478796703216358, "loss": 11.6686, "step": 48901 }, { "epoch": 1.0236540232772335, "grad_norm": 0.4389072358608246, "learning_rate": 0.000147877745428942, "loss": 11.6736, "step": 48902 }, { "epoch": 1.0236749560411957, "grad_norm": 0.2886735498905182, "learning_rate": 0.00014787582051323237, "loss": 11.671, "step": 48903 }, { "epoch": 1.023695888805158, "grad_norm": 0.28530317544937134, "learning_rate": 0.0001478738955745079, "loss": 11.6537, "step": 48904 }, { "epoch": 1.02371682156912, "grad_norm": 0.3493782579898834, "learning_rate": 0.00014787197061276946, "loss": 11.6634, "step": 48905 }, { "epoch": 1.0237377543330821, "grad_norm": 0.3777643144130707, "learning_rate": 0.000147870045628018, "loss": 11.6684, "step": 48906 }, { "epoch": 1.0237586870970443, "grad_norm": 0.27052849531173706, "learning_rate": 0.00014786812062025448, "loss": 11.6608, "step": 48907 }, { "epoch": 1.0237796198610065, "grad_norm": 0.3575303554534912, "learning_rate": 0.00014786619558947978, "loss": 11.6653, "step": 48908 }, { "epoch": 1.0238005526249685, "grad_norm": 0.42629268765449524, "learning_rate": 0.00014786427053569483, "loss": 11.675, "step": 48909 }, { "epoch": 1.0238214853889307, "grad_norm": 0.2928629219532013, "learning_rate": 0.0001478623454589006, "loss": 11.6802, "step": 48910 }, { "epoch": 1.023842418152893, "grad_norm": 0.3792991638183594, "learning_rate": 0.00014786042035909794, "loss": 11.6583, "step": 48911 }, { "epoch": 1.023863350916855, "grad_norm": 0.284239798784256, "learning_rate": 0.00014785849523628786, "loss": 11.6622, "step": 48912 }, { "epoch": 1.0238842836808173, "grad_norm": 0.2641654312610626, "learning_rate": 0.00014785657009047117, "loss": 11.6629, "step": 48913 }, { "epoch": 1.0239052164447793, "grad_norm": 0.2700962424278259, "learning_rate": 0.00014785464492164893, "loss": 11.6667, "step": 48914 }, { "epoch": 1.0239261492087415, "grad_norm": 0.3081619143486023, "learning_rate": 0.00014785271972982199, "loss": 11.6708, "step": 48915 }, { "epoch": 1.0239470819727037, "grad_norm": 0.332236111164093, "learning_rate": 0.00014785079451499128, "loss": 11.6735, "step": 48916 }, { "epoch": 1.0239680147366659, "grad_norm": 0.2969840168952942, "learning_rate": 0.00014784886927715775, "loss": 11.6726, "step": 48917 }, { "epoch": 1.023988947500628, "grad_norm": 0.38697317242622375, "learning_rate": 0.00014784694401632231, "loss": 11.665, "step": 48918 }, { "epoch": 1.02400988026459, "grad_norm": 0.33219122886657715, "learning_rate": 0.00014784501873248588, "loss": 11.6693, "step": 48919 }, { "epoch": 1.0240308130285523, "grad_norm": 0.3113895654678345, "learning_rate": 0.00014784309342564937, "loss": 11.6773, "step": 48920 }, { "epoch": 1.0240517457925145, "grad_norm": 0.2594340741634369, "learning_rate": 0.00014784116809581375, "loss": 11.656, "step": 48921 }, { "epoch": 1.0240726785564767, "grad_norm": 0.26081234216690063, "learning_rate": 0.0001478392427429799, "loss": 11.6411, "step": 48922 }, { "epoch": 1.0240936113204389, "grad_norm": 0.3021596074104309, "learning_rate": 0.00014783731736714878, "loss": 11.6623, "step": 48923 }, { "epoch": 1.0241145440844008, "grad_norm": 0.31370171904563904, "learning_rate": 0.00014783539196832132, "loss": 11.6696, "step": 48924 }, { "epoch": 1.024135476848363, "grad_norm": 0.3180319666862488, "learning_rate": 0.00014783346654649842, "loss": 11.677, "step": 48925 }, { "epoch": 1.0241564096123252, "grad_norm": 0.28337252140045166, "learning_rate": 0.00014783154110168097, "loss": 11.6575, "step": 48926 }, { "epoch": 1.0241773423762874, "grad_norm": 0.3368053138256073, "learning_rate": 0.00014782961563387, "loss": 11.6714, "step": 48927 }, { "epoch": 1.0241982751402494, "grad_norm": 0.26388654112815857, "learning_rate": 0.00014782769014306637, "loss": 11.643, "step": 48928 }, { "epoch": 1.0242192079042116, "grad_norm": 0.36591610312461853, "learning_rate": 0.00014782576462927098, "loss": 11.663, "step": 48929 }, { "epoch": 1.0242401406681738, "grad_norm": 0.25660282373428345, "learning_rate": 0.0001478238390924848, "loss": 11.6651, "step": 48930 }, { "epoch": 1.024261073432136, "grad_norm": 0.32034239172935486, "learning_rate": 0.00014782191353270873, "loss": 11.6455, "step": 48931 }, { "epoch": 1.0242820061960982, "grad_norm": 0.33527272939682007, "learning_rate": 0.00014781998794994374, "loss": 11.6737, "step": 48932 }, { "epoch": 1.0243029389600602, "grad_norm": 0.25969603657722473, "learning_rate": 0.00014781806234419068, "loss": 11.683, "step": 48933 }, { "epoch": 1.0243238717240224, "grad_norm": 0.27605417370796204, "learning_rate": 0.00014781613671545056, "loss": 11.6723, "step": 48934 }, { "epoch": 1.0243448044879846, "grad_norm": 0.30884480476379395, "learning_rate": 0.00014781421106372424, "loss": 11.6694, "step": 48935 }, { "epoch": 1.0243657372519468, "grad_norm": 0.29506683349609375, "learning_rate": 0.00014781228538901267, "loss": 11.68, "step": 48936 }, { "epoch": 1.024386670015909, "grad_norm": 0.2768270671367645, "learning_rate": 0.0001478103596913168, "loss": 11.6523, "step": 48937 }, { "epoch": 1.024407602779871, "grad_norm": 0.2963241934776306, "learning_rate": 0.0001478084339706375, "loss": 11.6692, "step": 48938 }, { "epoch": 1.0244285355438332, "grad_norm": 0.3625681698322296, "learning_rate": 0.00014780650822697578, "loss": 11.6635, "step": 48939 }, { "epoch": 1.0244494683077954, "grad_norm": 0.3453785181045532, "learning_rate": 0.00014780458246033243, "loss": 11.6836, "step": 48940 }, { "epoch": 1.0244704010717576, "grad_norm": 0.3096596598625183, "learning_rate": 0.00014780265667070855, "loss": 11.6639, "step": 48941 }, { "epoch": 1.0244913338357198, "grad_norm": 0.35983046889305115, "learning_rate": 0.0001478007308581049, "loss": 11.6615, "step": 48942 }, { "epoch": 1.0245122665996818, "grad_norm": 0.296077698469162, "learning_rate": 0.0001477988050225225, "loss": 11.6671, "step": 48943 }, { "epoch": 1.024533199363644, "grad_norm": 0.28659969568252563, "learning_rate": 0.00014779687916396229, "loss": 11.6571, "step": 48944 }, { "epoch": 1.0245541321276062, "grad_norm": 0.24851392209529877, "learning_rate": 0.0001477949532824251, "loss": 11.6602, "step": 48945 }, { "epoch": 1.0245750648915684, "grad_norm": 0.30594852566719055, "learning_rate": 0.00014779302737791202, "loss": 11.6599, "step": 48946 }, { "epoch": 1.0245959976555303, "grad_norm": 0.30242258310317993, "learning_rate": 0.00014779110145042378, "loss": 11.668, "step": 48947 }, { "epoch": 1.0246169304194925, "grad_norm": 0.32251420617103577, "learning_rate": 0.00014778917549996144, "loss": 11.6527, "step": 48948 }, { "epoch": 1.0246378631834547, "grad_norm": 0.3119865953922272, "learning_rate": 0.00014778724952652588, "loss": 11.6606, "step": 48949 }, { "epoch": 1.024658795947417, "grad_norm": 0.3379002511501312, "learning_rate": 0.00014778532353011803, "loss": 11.6797, "step": 48950 }, { "epoch": 1.0246797287113791, "grad_norm": 0.24021503329277039, "learning_rate": 0.0001477833975107388, "loss": 11.6587, "step": 48951 }, { "epoch": 1.0247006614753411, "grad_norm": 0.2684551775455475, "learning_rate": 0.00014778147146838916, "loss": 11.6661, "step": 48952 }, { "epoch": 1.0247215942393033, "grad_norm": 0.3187991976737976, "learning_rate": 0.00014777954540307, "loss": 11.6634, "step": 48953 }, { "epoch": 1.0247425270032655, "grad_norm": 0.26731449365615845, "learning_rate": 0.00014777761931478227, "loss": 11.6648, "step": 48954 }, { "epoch": 1.0247634597672277, "grad_norm": 0.4259834587574005, "learning_rate": 0.00014777569320352683, "loss": 11.6701, "step": 48955 }, { "epoch": 1.02478439253119, "grad_norm": 0.27154773473739624, "learning_rate": 0.00014777376706930472, "loss": 11.6795, "step": 48956 }, { "epoch": 1.024805325295152, "grad_norm": 0.2671014368534088, "learning_rate": 0.00014777184091211676, "loss": 11.651, "step": 48957 }, { "epoch": 1.024826258059114, "grad_norm": 0.27429062128067017, "learning_rate": 0.00014776991473196394, "loss": 11.6751, "step": 48958 }, { "epoch": 1.0248471908230763, "grad_norm": 0.26456230878829956, "learning_rate": 0.00014776798852884717, "loss": 11.6654, "step": 48959 }, { "epoch": 1.0248681235870385, "grad_norm": 0.2548424005508423, "learning_rate": 0.00014776606230276732, "loss": 11.6574, "step": 48960 }, { "epoch": 1.0248890563510007, "grad_norm": 0.34803131222724915, "learning_rate": 0.00014776413605372543, "loss": 11.6663, "step": 48961 }, { "epoch": 1.0249099891149627, "grad_norm": 0.29349285364151, "learning_rate": 0.00014776220978172233, "loss": 11.6454, "step": 48962 }, { "epoch": 1.0249309218789249, "grad_norm": 0.32069486379623413, "learning_rate": 0.00014776028348675898, "loss": 11.669, "step": 48963 }, { "epoch": 1.024951854642887, "grad_norm": 0.29452285170555115, "learning_rate": 0.0001477583571688363, "loss": 11.6543, "step": 48964 }, { "epoch": 1.0249727874068493, "grad_norm": 0.30569732189178467, "learning_rate": 0.00014775643082795526, "loss": 11.6785, "step": 48965 }, { "epoch": 1.0249937201708113, "grad_norm": 0.34610623121261597, "learning_rate": 0.00014775450446411671, "loss": 11.6602, "step": 48966 }, { "epoch": 1.0250146529347735, "grad_norm": 0.30484238266944885, "learning_rate": 0.00014775257807732162, "loss": 11.6735, "step": 48967 }, { "epoch": 1.0250355856987357, "grad_norm": 0.2734162509441376, "learning_rate": 0.0001477506516675709, "loss": 11.6725, "step": 48968 }, { "epoch": 1.0250565184626979, "grad_norm": 0.2790057361125946, "learning_rate": 0.00014774872523486552, "loss": 11.6581, "step": 48969 }, { "epoch": 1.02507745122666, "grad_norm": 0.3034501075744629, "learning_rate": 0.00014774679877920633, "loss": 11.6613, "step": 48970 }, { "epoch": 1.025098383990622, "grad_norm": 0.34054651856422424, "learning_rate": 0.0001477448723005943, "loss": 11.6728, "step": 48971 }, { "epoch": 1.0251193167545842, "grad_norm": 0.3298689126968384, "learning_rate": 0.0001477429457990304, "loss": 11.6589, "step": 48972 }, { "epoch": 1.0251402495185464, "grad_norm": 0.306254118680954, "learning_rate": 0.00014774101927451548, "loss": 11.6527, "step": 48973 }, { "epoch": 1.0251611822825086, "grad_norm": 0.3091866970062256, "learning_rate": 0.00014773909272705046, "loss": 11.674, "step": 48974 }, { "epoch": 1.0251821150464708, "grad_norm": 0.31699392199516296, "learning_rate": 0.00014773716615663633, "loss": 11.6643, "step": 48975 }, { "epoch": 1.0252030478104328, "grad_norm": 0.27581578493118286, "learning_rate": 0.000147735239563274, "loss": 11.6619, "step": 48976 }, { "epoch": 1.025223980574395, "grad_norm": 0.27341973781585693, "learning_rate": 0.00014773331294696437, "loss": 11.6677, "step": 48977 }, { "epoch": 1.0252449133383572, "grad_norm": 0.3074702322483063, "learning_rate": 0.00014773138630770838, "loss": 11.6599, "step": 48978 }, { "epoch": 1.0252658461023194, "grad_norm": 0.4532403349876404, "learning_rate": 0.00014772945964550694, "loss": 11.679, "step": 48979 }, { "epoch": 1.0252867788662816, "grad_norm": 0.3116261959075928, "learning_rate": 0.00014772753296036101, "loss": 11.6645, "step": 48980 }, { "epoch": 1.0253077116302436, "grad_norm": 0.3009563684463501, "learning_rate": 0.00014772560625227153, "loss": 11.6655, "step": 48981 }, { "epoch": 1.0253286443942058, "grad_norm": 0.3029018044471741, "learning_rate": 0.00014772367952123932, "loss": 11.6684, "step": 48982 }, { "epoch": 1.025349577158168, "grad_norm": 0.30499985814094543, "learning_rate": 0.00014772175276726545, "loss": 11.6786, "step": 48983 }, { "epoch": 1.0253705099221302, "grad_norm": 0.29694414138793945, "learning_rate": 0.00014771982599035076, "loss": 11.666, "step": 48984 }, { "epoch": 1.0253914426860922, "grad_norm": 0.28075990080833435, "learning_rate": 0.0001477178991904962, "loss": 11.6878, "step": 48985 }, { "epoch": 1.0254123754500544, "grad_norm": 0.3123909533023834, "learning_rate": 0.00014771597236770266, "loss": 11.6608, "step": 48986 }, { "epoch": 1.0254333082140166, "grad_norm": 0.3323093056678772, "learning_rate": 0.0001477140455219711, "loss": 11.6564, "step": 48987 }, { "epoch": 1.0254542409779788, "grad_norm": 0.3926394283771515, "learning_rate": 0.00014771211865330245, "loss": 11.6573, "step": 48988 }, { "epoch": 1.025475173741941, "grad_norm": 0.2881142199039459, "learning_rate": 0.00014771019176169765, "loss": 11.6675, "step": 48989 }, { "epoch": 1.025496106505903, "grad_norm": 0.32416608929634094, "learning_rate": 0.00014770826484715758, "loss": 11.6863, "step": 48990 }, { "epoch": 1.0255170392698651, "grad_norm": 0.31966862082481384, "learning_rate": 0.0001477063379096832, "loss": 11.6701, "step": 48991 }, { "epoch": 1.0255379720338273, "grad_norm": 0.2589620053768158, "learning_rate": 0.00014770441094927542, "loss": 11.6686, "step": 48992 }, { "epoch": 1.0255589047977896, "grad_norm": 0.9660559892654419, "learning_rate": 0.0001477024839659352, "loss": 11.6671, "step": 48993 }, { "epoch": 1.0255798375617518, "grad_norm": 0.3470337390899658, "learning_rate": 0.00014770055695966338, "loss": 11.6782, "step": 48994 }, { "epoch": 1.0256007703257137, "grad_norm": 0.3148214519023895, "learning_rate": 0.000147698629930461, "loss": 11.6724, "step": 48995 }, { "epoch": 1.025621703089676, "grad_norm": 0.32447513937950134, "learning_rate": 0.0001476967028783289, "loss": 11.6698, "step": 48996 }, { "epoch": 1.0256426358536381, "grad_norm": 0.2673865556716919, "learning_rate": 0.00014769477580326806, "loss": 11.6629, "step": 48997 }, { "epoch": 1.0256635686176003, "grad_norm": 0.27719125151634216, "learning_rate": 0.0001476928487052794, "loss": 11.6641, "step": 48998 }, { "epoch": 1.0256845013815625, "grad_norm": 0.2879163324832916, "learning_rate": 0.0001476909215843638, "loss": 11.6717, "step": 48999 }, { "epoch": 1.0257054341455245, "grad_norm": 0.2890225350856781, "learning_rate": 0.00014768899444052224, "loss": 11.6683, "step": 49000 }, { "epoch": 1.0257054341455245, "eval_loss": 11.668989181518555, "eval_runtime": 34.3777, "eval_samples_per_second": 27.954, "eval_steps_per_second": 7.01, "step": 49000 }, { "epoch": 1.0257263669094867, "grad_norm": 0.41846010088920593, "learning_rate": 0.0001476870672737556, "loss": 11.6765, "step": 49001 }, { "epoch": 1.025747299673449, "grad_norm": 0.32939258217811584, "learning_rate": 0.00014768514008406484, "loss": 11.6616, "step": 49002 }, { "epoch": 1.025768232437411, "grad_norm": 0.3159659504890442, "learning_rate": 0.0001476832128714509, "loss": 11.6673, "step": 49003 }, { "epoch": 1.025789165201373, "grad_norm": 0.33087772130966187, "learning_rate": 0.00014768128563591464, "loss": 11.6738, "step": 49004 }, { "epoch": 1.0258100979653353, "grad_norm": 0.33737313747406006, "learning_rate": 0.0001476793583774571, "loss": 11.6683, "step": 49005 }, { "epoch": 1.0258310307292975, "grad_norm": 0.289470911026001, "learning_rate": 0.00014767743109607908, "loss": 11.6634, "step": 49006 }, { "epoch": 1.0258519634932597, "grad_norm": 0.2491772621870041, "learning_rate": 0.0001476755037917816, "loss": 11.6678, "step": 49007 }, { "epoch": 1.025872896257222, "grad_norm": 0.31098490953445435, "learning_rate": 0.0001476735764645655, "loss": 11.6751, "step": 49008 }, { "epoch": 1.0258938290211839, "grad_norm": 0.24834299087524414, "learning_rate": 0.0001476716491144318, "loss": 11.6747, "step": 49009 }, { "epoch": 1.025914761785146, "grad_norm": 0.3224928081035614, "learning_rate": 0.00014766972174138138, "loss": 11.6704, "step": 49010 }, { "epoch": 1.0259356945491083, "grad_norm": 0.36536502838134766, "learning_rate": 0.00014766779434541514, "loss": 11.6677, "step": 49011 }, { "epoch": 1.0259566273130705, "grad_norm": 0.4549548923969269, "learning_rate": 0.00014766586692653406, "loss": 11.6864, "step": 49012 }, { "epoch": 1.0259775600770327, "grad_norm": 0.26069197058677673, "learning_rate": 0.00014766393948473905, "loss": 11.6717, "step": 49013 }, { "epoch": 1.0259984928409946, "grad_norm": 0.36343443393707275, "learning_rate": 0.00014766201202003098, "loss": 11.679, "step": 49014 }, { "epoch": 1.0260194256049568, "grad_norm": 0.3836527168750763, "learning_rate": 0.0001476600845324109, "loss": 11.6741, "step": 49015 }, { "epoch": 1.026040358368919, "grad_norm": 0.4150448739528656, "learning_rate": 0.0001476581570218796, "loss": 11.6726, "step": 49016 }, { "epoch": 1.0260612911328812, "grad_norm": 0.3157162368297577, "learning_rate": 0.0001476562294884381, "loss": 11.69, "step": 49017 }, { "epoch": 1.0260822238968434, "grad_norm": 0.2712142765522003, "learning_rate": 0.0001476543019320873, "loss": 11.6867, "step": 49018 }, { "epoch": 1.0261031566608054, "grad_norm": 0.3338065445423126, "learning_rate": 0.00014765237435282808, "loss": 11.6509, "step": 49019 }, { "epoch": 1.0261240894247676, "grad_norm": 0.3687075972557068, "learning_rate": 0.00014765044675066146, "loss": 11.6749, "step": 49020 }, { "epoch": 1.0261450221887298, "grad_norm": 0.3789931833744049, "learning_rate": 0.00014764851912558828, "loss": 11.6777, "step": 49021 }, { "epoch": 1.026165954952692, "grad_norm": 0.3808272182941437, "learning_rate": 0.00014764659147760954, "loss": 11.6424, "step": 49022 }, { "epoch": 1.026186887716654, "grad_norm": 0.30351340770721436, "learning_rate": 0.00014764466380672608, "loss": 11.6715, "step": 49023 }, { "epoch": 1.0262078204806162, "grad_norm": 0.9758446216583252, "learning_rate": 0.00014764273611293888, "loss": 11.7007, "step": 49024 }, { "epoch": 1.0262287532445784, "grad_norm": 0.3404902219772339, "learning_rate": 0.0001476408083962489, "loss": 11.6648, "step": 49025 }, { "epoch": 1.0262496860085406, "grad_norm": 0.3130250573158264, "learning_rate": 0.000147638880656657, "loss": 11.6739, "step": 49026 }, { "epoch": 1.0262706187725028, "grad_norm": 0.3195883333683014, "learning_rate": 0.00014763695289416416, "loss": 11.6818, "step": 49027 }, { "epoch": 1.0262915515364648, "grad_norm": 0.2636825144290924, "learning_rate": 0.00014763502510877128, "loss": 11.6692, "step": 49028 }, { "epoch": 1.026312484300427, "grad_norm": 0.29308241605758667, "learning_rate": 0.00014763309730047924, "loss": 11.6644, "step": 49029 }, { "epoch": 1.0263334170643892, "grad_norm": 0.3083333671092987, "learning_rate": 0.0001476311694692891, "loss": 11.6648, "step": 49030 }, { "epoch": 1.0263543498283514, "grad_norm": 0.3332751393318176, "learning_rate": 0.00014762924161520165, "loss": 11.6817, "step": 49031 }, { "epoch": 1.0263752825923136, "grad_norm": 0.3026070296764374, "learning_rate": 0.00014762731373821786, "loss": 11.6593, "step": 49032 }, { "epoch": 1.0263962153562756, "grad_norm": 0.29546597599983215, "learning_rate": 0.00014762538583833868, "loss": 11.6651, "step": 49033 }, { "epoch": 1.0264171481202378, "grad_norm": 0.31631606817245483, "learning_rate": 0.00014762345791556502, "loss": 11.6813, "step": 49034 }, { "epoch": 1.0264380808842, "grad_norm": 0.3494105041027069, "learning_rate": 0.0001476215299698978, "loss": 11.6756, "step": 49035 }, { "epoch": 1.0264590136481622, "grad_norm": 0.33903613686561584, "learning_rate": 0.00014761960200133798, "loss": 11.687, "step": 49036 }, { "epoch": 1.0264799464121244, "grad_norm": 0.32730773091316223, "learning_rate": 0.00014761767400988646, "loss": 11.6674, "step": 49037 }, { "epoch": 1.0265008791760863, "grad_norm": 0.30721035599708557, "learning_rate": 0.00014761574599554415, "loss": 11.6606, "step": 49038 }, { "epoch": 1.0265218119400485, "grad_norm": 0.3273542523384094, "learning_rate": 0.00014761381795831203, "loss": 11.6453, "step": 49039 }, { "epoch": 1.0265427447040107, "grad_norm": 0.2758883237838745, "learning_rate": 0.00014761188989819094, "loss": 11.6689, "step": 49040 }, { "epoch": 1.026563677467973, "grad_norm": 0.31643664836883545, "learning_rate": 0.00014760996181518193, "loss": 11.6744, "step": 49041 }, { "epoch": 1.026584610231935, "grad_norm": 0.34492558240890503, "learning_rate": 0.0001476080337092858, "loss": 11.6772, "step": 49042 }, { "epoch": 1.0266055429958971, "grad_norm": 0.3419836163520813, "learning_rate": 0.00014760610558050354, "loss": 11.6643, "step": 49043 }, { "epoch": 1.0266264757598593, "grad_norm": 0.30168721079826355, "learning_rate": 0.0001476041774288361, "loss": 11.6709, "step": 49044 }, { "epoch": 1.0266474085238215, "grad_norm": 0.2599448263645172, "learning_rate": 0.00014760224925428435, "loss": 11.6593, "step": 49045 }, { "epoch": 1.0266683412877837, "grad_norm": 0.37949222326278687, "learning_rate": 0.00014760032105684927, "loss": 11.6832, "step": 49046 }, { "epoch": 1.0266892740517457, "grad_norm": 0.3748973309993744, "learning_rate": 0.00014759839283653173, "loss": 11.6606, "step": 49047 }, { "epoch": 1.026710206815708, "grad_norm": 0.34358707070350647, "learning_rate": 0.00014759646459333272, "loss": 11.671, "step": 49048 }, { "epoch": 1.02673113957967, "grad_norm": 0.32088467478752136, "learning_rate": 0.0001475945363272531, "loss": 11.679, "step": 49049 }, { "epoch": 1.0267520723436323, "grad_norm": 0.28631123900413513, "learning_rate": 0.00014759260803829383, "loss": 11.6707, "step": 49050 }, { "epoch": 1.0267730051075945, "grad_norm": 0.2653529644012451, "learning_rate": 0.0001475906797264559, "loss": 11.6621, "step": 49051 }, { "epoch": 1.0267939378715565, "grad_norm": 1.070421814918518, "learning_rate": 0.00014758875139174013, "loss": 11.6706, "step": 49052 }, { "epoch": 1.0268148706355187, "grad_norm": 0.3178001344203949, "learning_rate": 0.00014758682303414751, "loss": 11.6647, "step": 49053 }, { "epoch": 1.0268358033994809, "grad_norm": 0.2951890528202057, "learning_rate": 0.00014758489465367894, "loss": 11.6854, "step": 49054 }, { "epoch": 1.026856736163443, "grad_norm": 0.32585108280181885, "learning_rate": 0.00014758296625033538, "loss": 11.6822, "step": 49055 }, { "epoch": 1.0268776689274053, "grad_norm": 0.326517254114151, "learning_rate": 0.0001475810378241177, "loss": 11.6647, "step": 49056 }, { "epoch": 1.0268986016913673, "grad_norm": 0.33910509943962097, "learning_rate": 0.0001475791093750269, "loss": 11.6818, "step": 49057 }, { "epoch": 1.0269195344553295, "grad_norm": 0.3014167547225952, "learning_rate": 0.00014757718090306385, "loss": 11.648, "step": 49058 }, { "epoch": 1.0269404672192917, "grad_norm": 0.31539425253868103, "learning_rate": 0.00014757525240822946, "loss": 11.6616, "step": 49059 }, { "epoch": 1.0269613999832539, "grad_norm": 0.3207584023475647, "learning_rate": 0.00014757332389052477, "loss": 11.6466, "step": 49060 }, { "epoch": 1.0269823327472158, "grad_norm": 0.3115156888961792, "learning_rate": 0.00014757139534995055, "loss": 11.6881, "step": 49061 }, { "epoch": 1.027003265511178, "grad_norm": 0.30863824486732483, "learning_rate": 0.00014756946678650787, "loss": 11.6611, "step": 49062 }, { "epoch": 1.0270241982751402, "grad_norm": 0.34392356872558594, "learning_rate": 0.00014756753820019755, "loss": 11.6867, "step": 49063 }, { "epoch": 1.0270451310391024, "grad_norm": 0.3201983571052551, "learning_rate": 0.00014756560959102057, "loss": 11.6553, "step": 49064 }, { "epoch": 1.0270660638030646, "grad_norm": 0.36230286955833435, "learning_rate": 0.00014756368095897787, "loss": 11.6711, "step": 49065 }, { "epoch": 1.0270869965670266, "grad_norm": 0.32423752546310425, "learning_rate": 0.00014756175230407035, "loss": 11.6553, "step": 49066 }, { "epoch": 1.0271079293309888, "grad_norm": 0.27695077657699585, "learning_rate": 0.00014755982362629893, "loss": 11.6694, "step": 49067 }, { "epoch": 1.027128862094951, "grad_norm": 0.3143896758556366, "learning_rate": 0.00014755789492566454, "loss": 11.6836, "step": 49068 }, { "epoch": 1.0271497948589132, "grad_norm": 0.3524632453918457, "learning_rate": 0.00014755596620216814, "loss": 11.6892, "step": 49069 }, { "epoch": 1.0271707276228754, "grad_norm": 0.336195707321167, "learning_rate": 0.00014755403745581063, "loss": 11.6551, "step": 49070 }, { "epoch": 1.0271916603868374, "grad_norm": 0.29262930154800415, "learning_rate": 0.00014755210868659291, "loss": 11.6539, "step": 49071 }, { "epoch": 1.0272125931507996, "grad_norm": 0.23292653262615204, "learning_rate": 0.00014755017989451599, "loss": 11.6533, "step": 49072 }, { "epoch": 1.0272335259147618, "grad_norm": 0.2926672399044037, "learning_rate": 0.00014754825107958071, "loss": 11.6667, "step": 49073 }, { "epoch": 1.027254458678724, "grad_norm": 0.2911788821220398, "learning_rate": 0.00014754632224178805, "loss": 11.6711, "step": 49074 }, { "epoch": 1.0272753914426862, "grad_norm": 0.3677026033401489, "learning_rate": 0.0001475443933811389, "loss": 11.6502, "step": 49075 }, { "epoch": 1.0272963242066482, "grad_norm": 0.3133823275566101, "learning_rate": 0.00014754246449763422, "loss": 11.6562, "step": 49076 }, { "epoch": 1.0273172569706104, "grad_norm": 0.30375033617019653, "learning_rate": 0.00014754053559127493, "loss": 11.673, "step": 49077 }, { "epoch": 1.0273381897345726, "grad_norm": 0.4284268915653229, "learning_rate": 0.00014753860666206196, "loss": 11.6504, "step": 49078 }, { "epoch": 1.0273591224985348, "grad_norm": 0.31162577867507935, "learning_rate": 0.0001475366777099962, "loss": 11.664, "step": 49079 }, { "epoch": 1.0273800552624968, "grad_norm": 0.3036390244960785, "learning_rate": 0.00014753474873507863, "loss": 11.661, "step": 49080 }, { "epoch": 1.027400988026459, "grad_norm": 0.4009547531604767, "learning_rate": 0.00014753281973731015, "loss": 11.6819, "step": 49081 }, { "epoch": 1.0274219207904212, "grad_norm": 0.3276086449623108, "learning_rate": 0.00014753089071669167, "loss": 11.6775, "step": 49082 }, { "epoch": 1.0274428535543834, "grad_norm": 0.2679273188114166, "learning_rate": 0.00014752896167322417, "loss": 11.6608, "step": 49083 }, { "epoch": 1.0274637863183456, "grad_norm": 0.3301284909248352, "learning_rate": 0.00014752703260690852, "loss": 11.6587, "step": 49084 }, { "epoch": 1.0274847190823075, "grad_norm": 0.40132904052734375, "learning_rate": 0.00014752510351774567, "loss": 11.6871, "step": 49085 }, { "epoch": 1.0275056518462697, "grad_norm": 0.36954745650291443, "learning_rate": 0.0001475231744057366, "loss": 11.6729, "step": 49086 }, { "epoch": 1.027526584610232, "grad_norm": 0.22934570908546448, "learning_rate": 0.00014752124527088212, "loss": 11.6638, "step": 49087 }, { "epoch": 1.0275475173741941, "grad_norm": 0.29519519209861755, "learning_rate": 0.00014751931611318327, "loss": 11.6709, "step": 49088 }, { "epoch": 1.0275684501381563, "grad_norm": 0.3133404850959778, "learning_rate": 0.0001475173869326409, "loss": 11.6709, "step": 49089 }, { "epoch": 1.0275893829021183, "grad_norm": 0.2831353545188904, "learning_rate": 0.000147515457729256, "loss": 11.6391, "step": 49090 }, { "epoch": 1.0276103156660805, "grad_norm": 0.2579191327095032, "learning_rate": 0.00014751352850302945, "loss": 11.6704, "step": 49091 }, { "epoch": 1.0276312484300427, "grad_norm": 0.2762608826160431, "learning_rate": 0.0001475115992539622, "loss": 11.6477, "step": 49092 }, { "epoch": 1.027652181194005, "grad_norm": 0.3013053238391876, "learning_rate": 0.00014750966998205517, "loss": 11.6743, "step": 49093 }, { "epoch": 1.0276731139579671, "grad_norm": 0.34619343280792236, "learning_rate": 0.0001475077406873093, "loss": 11.6817, "step": 49094 }, { "epoch": 1.027694046721929, "grad_norm": 0.25819531083106995, "learning_rate": 0.00014750581136972548, "loss": 11.6697, "step": 49095 }, { "epoch": 1.0277149794858913, "grad_norm": 0.30669981241226196, "learning_rate": 0.00014750388202930468, "loss": 11.6773, "step": 49096 }, { "epoch": 1.0277359122498535, "grad_norm": 0.2894505262374878, "learning_rate": 0.00014750195266604782, "loss": 11.6503, "step": 49097 }, { "epoch": 1.0277568450138157, "grad_norm": 0.3375547230243683, "learning_rate": 0.00014750002327995582, "loss": 11.6726, "step": 49098 }, { "epoch": 1.0277777777777777, "grad_norm": 0.32396018505096436, "learning_rate": 0.0001474980938710296, "loss": 11.6811, "step": 49099 }, { "epoch": 1.0277987105417399, "grad_norm": 0.3116930425167084, "learning_rate": 0.00014749616443927011, "loss": 11.6619, "step": 49100 }, { "epoch": 1.027819643305702, "grad_norm": 0.29539912939071655, "learning_rate": 0.00014749423498467825, "loss": 11.6598, "step": 49101 }, { "epoch": 1.0278405760696643, "grad_norm": 0.3573737144470215, "learning_rate": 0.00014749230550725495, "loss": 11.6537, "step": 49102 }, { "epoch": 1.0278615088336265, "grad_norm": 0.3395158350467682, "learning_rate": 0.00014749037600700116, "loss": 11.671, "step": 49103 }, { "epoch": 1.0278824415975885, "grad_norm": 0.3430061638355255, "learning_rate": 0.00014748844648391777, "loss": 11.6574, "step": 49104 }, { "epoch": 1.0279033743615507, "grad_norm": 0.26852014660835266, "learning_rate": 0.00014748651693800578, "loss": 11.6673, "step": 49105 }, { "epoch": 1.0279243071255129, "grad_norm": 0.30082008242607117, "learning_rate": 0.00014748458736926602, "loss": 11.6792, "step": 49106 }, { "epoch": 1.027945239889475, "grad_norm": 0.3352263867855072, "learning_rate": 0.00014748265777769946, "loss": 11.6723, "step": 49107 }, { "epoch": 1.0279661726534373, "grad_norm": 0.37175726890563965, "learning_rate": 0.0001474807281633071, "loss": 11.6616, "step": 49108 }, { "epoch": 1.0279871054173992, "grad_norm": 0.3304760456085205, "learning_rate": 0.00014747879852608975, "loss": 11.6568, "step": 49109 }, { "epoch": 1.0280080381813614, "grad_norm": 0.3231409788131714, "learning_rate": 0.0001474768688660484, "loss": 11.6548, "step": 49110 }, { "epoch": 1.0280289709453236, "grad_norm": 0.36421453952789307, "learning_rate": 0.00014747493918318397, "loss": 11.6762, "step": 49111 }, { "epoch": 1.0280499037092858, "grad_norm": 0.3796085715293884, "learning_rate": 0.0001474730094774974, "loss": 11.668, "step": 49112 }, { "epoch": 1.028070836473248, "grad_norm": 0.34850558638572693, "learning_rate": 0.0001474710797489896, "loss": 11.6654, "step": 49113 }, { "epoch": 1.02809176923721, "grad_norm": 0.24461767077445984, "learning_rate": 0.0001474691499976615, "loss": 11.6599, "step": 49114 }, { "epoch": 1.0281127020011722, "grad_norm": 0.28822189569473267, "learning_rate": 0.00014746722022351402, "loss": 11.6846, "step": 49115 }, { "epoch": 1.0281336347651344, "grad_norm": 0.27257850766181946, "learning_rate": 0.0001474652904265481, "loss": 11.6653, "step": 49116 }, { "epoch": 1.0281545675290966, "grad_norm": 0.37046271562576294, "learning_rate": 0.00014746336060676463, "loss": 11.664, "step": 49117 }, { "epoch": 1.0281755002930586, "grad_norm": 0.35455524921417236, "learning_rate": 0.00014746143076416462, "loss": 11.6754, "step": 49118 }, { "epoch": 1.0281964330570208, "grad_norm": 0.3223947584629059, "learning_rate": 0.00014745950089874892, "loss": 11.6652, "step": 49119 }, { "epoch": 1.028217365820983, "grad_norm": 0.2556423246860504, "learning_rate": 0.00014745757101051853, "loss": 11.6689, "step": 49120 }, { "epoch": 1.0282382985849452, "grad_norm": 0.3107931315898895, "learning_rate": 0.00014745564109947427, "loss": 11.6556, "step": 49121 }, { "epoch": 1.0282592313489074, "grad_norm": 0.3317622244358063, "learning_rate": 0.00014745371116561717, "loss": 11.6742, "step": 49122 }, { "epoch": 1.0282801641128694, "grad_norm": 0.27592554688453674, "learning_rate": 0.0001474517812089481, "loss": 11.6677, "step": 49123 }, { "epoch": 1.0283010968768316, "grad_norm": 0.28355684876441956, "learning_rate": 0.000147449851229468, "loss": 11.6788, "step": 49124 }, { "epoch": 1.0283220296407938, "grad_norm": 0.2881624400615692, "learning_rate": 0.00014744792122717786, "loss": 11.6936, "step": 49125 }, { "epoch": 1.028342962404756, "grad_norm": 0.28244510293006897, "learning_rate": 0.0001474459912020785, "loss": 11.6622, "step": 49126 }, { "epoch": 1.0283638951687182, "grad_norm": 0.3969607949256897, "learning_rate": 0.00014744406115417094, "loss": 11.6792, "step": 49127 }, { "epoch": 1.0283848279326802, "grad_norm": 0.405156672000885, "learning_rate": 0.00014744213108345604, "loss": 11.6906, "step": 49128 }, { "epoch": 1.0284057606966424, "grad_norm": 0.32844892144203186, "learning_rate": 0.00014744020098993475, "loss": 11.6643, "step": 49129 }, { "epoch": 1.0284266934606046, "grad_norm": 0.31380513310432434, "learning_rate": 0.00014743827087360803, "loss": 11.6571, "step": 49130 }, { "epoch": 1.0284476262245668, "grad_norm": 0.333990216255188, "learning_rate": 0.0001474363407344768, "loss": 11.6667, "step": 49131 }, { "epoch": 1.028468558988529, "grad_norm": 0.32030126452445984, "learning_rate": 0.00014743441057254195, "loss": 11.6661, "step": 49132 }, { "epoch": 1.028489491752491, "grad_norm": 0.3365364372730255, "learning_rate": 0.00014743248038780443, "loss": 11.6837, "step": 49133 }, { "epoch": 1.0285104245164531, "grad_norm": 1.6111807823181152, "learning_rate": 0.00014743055018026513, "loss": 11.7209, "step": 49134 }, { "epoch": 1.0285313572804153, "grad_norm": 0.34344977140426636, "learning_rate": 0.00014742861994992504, "loss": 11.6715, "step": 49135 }, { "epoch": 1.0285522900443775, "grad_norm": 0.26068753004074097, "learning_rate": 0.00014742668969678507, "loss": 11.6643, "step": 49136 }, { "epoch": 1.0285732228083395, "grad_norm": 0.3341332972049713, "learning_rate": 0.00014742475942084614, "loss": 11.6808, "step": 49137 }, { "epoch": 1.0285941555723017, "grad_norm": 0.29703882336616516, "learning_rate": 0.00014742282912210918, "loss": 11.6507, "step": 49138 }, { "epoch": 1.028615088336264, "grad_norm": 0.33073684573173523, "learning_rate": 0.0001474208988005751, "loss": 11.6561, "step": 49139 }, { "epoch": 1.0286360211002261, "grad_norm": 0.29418402910232544, "learning_rate": 0.00014741896845624485, "loss": 11.6558, "step": 49140 }, { "epoch": 1.0286569538641883, "grad_norm": 0.40098679065704346, "learning_rate": 0.00014741703808911934, "loss": 11.6572, "step": 49141 }, { "epoch": 1.0286778866281503, "grad_norm": 0.32339826226234436, "learning_rate": 0.00014741510769919954, "loss": 11.6535, "step": 49142 }, { "epoch": 1.0286988193921125, "grad_norm": 0.39650624990463257, "learning_rate": 0.00014741317728648635, "loss": 11.6782, "step": 49143 }, { "epoch": 1.0287197521560747, "grad_norm": 0.3019696772098541, "learning_rate": 0.00014741124685098067, "loss": 11.6698, "step": 49144 }, { "epoch": 1.028740684920037, "grad_norm": 0.2822806239128113, "learning_rate": 0.00014740931639268344, "loss": 11.6737, "step": 49145 }, { "epoch": 1.028761617683999, "grad_norm": 0.26810961961746216, "learning_rate": 0.00014740738591159563, "loss": 11.6692, "step": 49146 }, { "epoch": 1.028782550447961, "grad_norm": 0.2829062342643738, "learning_rate": 0.00014740545540771813, "loss": 11.6504, "step": 49147 }, { "epoch": 1.0288034832119233, "grad_norm": 0.2674415111541748, "learning_rate": 0.00014740352488105187, "loss": 11.6745, "step": 49148 }, { "epoch": 1.0288244159758855, "grad_norm": 0.27512848377227783, "learning_rate": 0.00014740159433159782, "loss": 11.6628, "step": 49149 }, { "epoch": 1.0288453487398477, "grad_norm": 0.26200371980667114, "learning_rate": 0.00014739966375935682, "loss": 11.6744, "step": 49150 }, { "epoch": 1.0288662815038097, "grad_norm": 0.4100917875766754, "learning_rate": 0.00014739773316432988, "loss": 11.6731, "step": 49151 }, { "epoch": 1.0288872142677719, "grad_norm": 0.4844086170196533, "learning_rate": 0.00014739580254651794, "loss": 11.677, "step": 49152 }, { "epoch": 1.028908147031734, "grad_norm": 0.36788567900657654, "learning_rate": 0.00014739387190592181, "loss": 11.6609, "step": 49153 }, { "epoch": 1.0289290797956963, "grad_norm": 0.26878854632377625, "learning_rate": 0.00014739194124254254, "loss": 11.652, "step": 49154 }, { "epoch": 1.0289500125596585, "grad_norm": 0.3317684531211853, "learning_rate": 0.00014739001055638103, "loss": 11.6551, "step": 49155 }, { "epoch": 1.0289709453236204, "grad_norm": 0.2837837040424347, "learning_rate": 0.00014738807984743816, "loss": 11.6707, "step": 49156 }, { "epoch": 1.0289918780875826, "grad_norm": 0.4769271910190582, "learning_rate": 0.0001473861491157149, "loss": 11.6878, "step": 49157 }, { "epoch": 1.0290128108515448, "grad_norm": 0.33058249950408936, "learning_rate": 0.00014738421836121214, "loss": 11.668, "step": 49158 }, { "epoch": 1.029033743615507, "grad_norm": 0.33637213706970215, "learning_rate": 0.0001473822875839309, "loss": 11.668, "step": 49159 }, { "epoch": 1.0290546763794692, "grad_norm": 0.2775980830192566, "learning_rate": 0.000147380356783872, "loss": 11.6648, "step": 49160 }, { "epoch": 1.0290756091434312, "grad_norm": 0.28058043122291565, "learning_rate": 0.00014737842596103642, "loss": 11.6664, "step": 49161 }, { "epoch": 1.0290965419073934, "grad_norm": 0.3692132532596588, "learning_rate": 0.0001473764951154251, "loss": 11.6788, "step": 49162 }, { "epoch": 1.0291174746713556, "grad_norm": 0.38701745867729187, "learning_rate": 0.0001473745642470389, "loss": 11.678, "step": 49163 }, { "epoch": 1.0291384074353178, "grad_norm": 0.38229888677597046, "learning_rate": 0.00014737263335587886, "loss": 11.6844, "step": 49164 }, { "epoch": 1.02915934019928, "grad_norm": 0.35211315751075745, "learning_rate": 0.0001473707024419458, "loss": 11.6838, "step": 49165 }, { "epoch": 1.029180272963242, "grad_norm": 0.28334203362464905, "learning_rate": 0.0001473687715052407, "loss": 11.6495, "step": 49166 }, { "epoch": 1.0292012057272042, "grad_norm": 0.2849676012992859, "learning_rate": 0.0001473668405457645, "loss": 11.6745, "step": 49167 }, { "epoch": 1.0292221384911664, "grad_norm": 0.28906506299972534, "learning_rate": 0.00014736490956351813, "loss": 11.676, "step": 49168 }, { "epoch": 1.0292430712551286, "grad_norm": 0.31735125184059143, "learning_rate": 0.00014736297855850246, "loss": 11.6661, "step": 49169 }, { "epoch": 1.0292640040190906, "grad_norm": 0.4115874171257019, "learning_rate": 0.00014736104753071847, "loss": 11.6693, "step": 49170 }, { "epoch": 1.0292849367830528, "grad_norm": 0.38093435764312744, "learning_rate": 0.00014735911648016707, "loss": 11.68, "step": 49171 }, { "epoch": 1.029305869547015, "grad_norm": 0.3372710645198822, "learning_rate": 0.0001473571854068492, "loss": 11.6562, "step": 49172 }, { "epoch": 1.0293268023109772, "grad_norm": 0.2986583113670349, "learning_rate": 0.00014735525431076578, "loss": 11.6667, "step": 49173 }, { "epoch": 1.0293477350749394, "grad_norm": 0.31797167658805847, "learning_rate": 0.00014735332319191775, "loss": 11.6634, "step": 49174 }, { "epoch": 1.0293686678389014, "grad_norm": 0.2755064368247986, "learning_rate": 0.000147351392050306, "loss": 11.6446, "step": 49175 }, { "epoch": 1.0293896006028636, "grad_norm": 0.3118498623371124, "learning_rate": 0.0001473494608859315, "loss": 11.66, "step": 49176 }, { "epoch": 1.0294105333668258, "grad_norm": 0.2920381426811218, "learning_rate": 0.00014734752969879517, "loss": 11.6758, "step": 49177 }, { "epoch": 1.029431466130788, "grad_norm": 0.3386279344558716, "learning_rate": 0.00014734559848889793, "loss": 11.6539, "step": 49178 }, { "epoch": 1.0294523988947502, "grad_norm": 0.25011390447616577, "learning_rate": 0.00014734366725624074, "loss": 11.6621, "step": 49179 }, { "epoch": 1.0294733316587121, "grad_norm": 0.3458452820777893, "learning_rate": 0.00014734173600082447, "loss": 11.6635, "step": 49180 }, { "epoch": 1.0294942644226743, "grad_norm": 0.6732353568077087, "learning_rate": 0.0001473398047226501, "loss": 11.6533, "step": 49181 }, { "epoch": 1.0295151971866365, "grad_norm": 0.3630248010158539, "learning_rate": 0.0001473378734217185, "loss": 11.6815, "step": 49182 }, { "epoch": 1.0295361299505987, "grad_norm": 0.2647557556629181, "learning_rate": 0.00014733594209803065, "loss": 11.6657, "step": 49183 }, { "epoch": 1.029557062714561, "grad_norm": 0.3050941526889801, "learning_rate": 0.00014733401075158747, "loss": 11.6692, "step": 49184 }, { "epoch": 1.029577995478523, "grad_norm": 0.3643225431442261, "learning_rate": 0.0001473320793823899, "loss": 11.6887, "step": 49185 }, { "epoch": 1.0295989282424851, "grad_norm": 0.3228309154510498, "learning_rate": 0.00014733014799043885, "loss": 11.6645, "step": 49186 }, { "epoch": 1.0296198610064473, "grad_norm": 0.35006171464920044, "learning_rate": 0.00014732821657573522, "loss": 11.6602, "step": 49187 }, { "epoch": 1.0296407937704095, "grad_norm": 0.3257457911968231, "learning_rate": 0.00014732628513828, "loss": 11.6737, "step": 49188 }, { "epoch": 1.0296617265343715, "grad_norm": 0.2745388150215149, "learning_rate": 0.00014732435367807408, "loss": 11.6747, "step": 49189 }, { "epoch": 1.0296826592983337, "grad_norm": 0.33977368474006653, "learning_rate": 0.00014732242219511835, "loss": 11.6564, "step": 49190 }, { "epoch": 1.029703592062296, "grad_norm": 0.29061660170555115, "learning_rate": 0.00014732049068941384, "loss": 11.6689, "step": 49191 }, { "epoch": 1.029724524826258, "grad_norm": 0.34603291749954224, "learning_rate": 0.0001473185591609614, "loss": 11.6702, "step": 49192 }, { "epoch": 1.0297454575902203, "grad_norm": 0.24529139697551727, "learning_rate": 0.000147316627609762, "loss": 11.6832, "step": 49193 }, { "epoch": 1.0297663903541823, "grad_norm": 0.2948162257671356, "learning_rate": 0.0001473146960358165, "loss": 11.6389, "step": 49194 }, { "epoch": 1.0297873231181445, "grad_norm": 0.28880906105041504, "learning_rate": 0.0001473127644391259, "loss": 11.6544, "step": 49195 }, { "epoch": 1.0298082558821067, "grad_norm": 0.36924082040786743, "learning_rate": 0.0001473108328196911, "loss": 11.672, "step": 49196 }, { "epoch": 1.0298291886460689, "grad_norm": 0.38392722606658936, "learning_rate": 0.00014730890117751306, "loss": 11.6669, "step": 49197 }, { "epoch": 1.029850121410031, "grad_norm": 0.41565844416618347, "learning_rate": 0.00014730696951259267, "loss": 11.7032, "step": 49198 }, { "epoch": 1.029871054173993, "grad_norm": 0.29708215594291687, "learning_rate": 0.00014730503782493088, "loss": 11.652, "step": 49199 }, { "epoch": 1.0298919869379553, "grad_norm": 0.39040669798851013, "learning_rate": 0.0001473031061145286, "loss": 11.6638, "step": 49200 }, { "epoch": 1.0299129197019175, "grad_norm": 0.2790123522281647, "learning_rate": 0.00014730117438138676, "loss": 11.6697, "step": 49201 }, { "epoch": 1.0299338524658797, "grad_norm": 0.3517736494541168, "learning_rate": 0.00014729924262550632, "loss": 11.6706, "step": 49202 }, { "epoch": 1.0299547852298419, "grad_norm": 0.27172622084617615, "learning_rate": 0.00014729731084688817, "loss": 11.68, "step": 49203 }, { "epoch": 1.0299757179938038, "grad_norm": 0.3819694221019745, "learning_rate": 0.00014729537904553325, "loss": 11.6616, "step": 49204 }, { "epoch": 1.029996650757766, "grad_norm": 0.30721282958984375, "learning_rate": 0.0001472934472214425, "loss": 11.6653, "step": 49205 }, { "epoch": 1.0300175835217282, "grad_norm": 0.3842202126979828, "learning_rate": 0.00014729151537461684, "loss": 11.6704, "step": 49206 }, { "epoch": 1.0300385162856904, "grad_norm": 0.3512802720069885, "learning_rate": 0.00014728958350505723, "loss": 11.6705, "step": 49207 }, { "epoch": 1.0300594490496524, "grad_norm": 0.28784680366516113, "learning_rate": 0.00014728765161276455, "loss": 11.6671, "step": 49208 }, { "epoch": 1.0300803818136146, "grad_norm": 0.3305681049823761, "learning_rate": 0.00014728571969773973, "loss": 11.6632, "step": 49209 }, { "epoch": 1.0301013145775768, "grad_norm": 0.3863464593887329, "learning_rate": 0.00014728378775998374, "loss": 11.6714, "step": 49210 }, { "epoch": 1.030122247341539, "grad_norm": 0.3432149291038513, "learning_rate": 0.00014728185579949746, "loss": 11.6752, "step": 49211 }, { "epoch": 1.0301431801055012, "grad_norm": 0.30382758378982544, "learning_rate": 0.00014727992381628185, "loss": 11.68, "step": 49212 }, { "epoch": 1.0301641128694632, "grad_norm": 0.3167038559913635, "learning_rate": 0.00014727799181033784, "loss": 11.6691, "step": 49213 }, { "epoch": 1.0301850456334254, "grad_norm": 0.31908366084098816, "learning_rate": 0.00014727605978166636, "loss": 11.6728, "step": 49214 }, { "epoch": 1.0302059783973876, "grad_norm": 0.27877548336982727, "learning_rate": 0.00014727412773026834, "loss": 11.6569, "step": 49215 }, { "epoch": 1.0302269111613498, "grad_norm": 0.3489520251750946, "learning_rate": 0.00014727219565614468, "loss": 11.657, "step": 49216 }, { "epoch": 1.030247843925312, "grad_norm": 0.2858494520187378, "learning_rate": 0.0001472702635592963, "loss": 11.6787, "step": 49217 }, { "epoch": 1.030268776689274, "grad_norm": 0.36023950576782227, "learning_rate": 0.00014726833143972423, "loss": 11.6856, "step": 49218 }, { "epoch": 1.0302897094532362, "grad_norm": 0.24776434898376465, "learning_rate": 0.0001472663992974293, "loss": 11.6782, "step": 49219 }, { "epoch": 1.0303106422171984, "grad_norm": 0.2850148677825928, "learning_rate": 0.00014726446713241245, "loss": 11.6698, "step": 49220 }, { "epoch": 1.0303315749811606, "grad_norm": 0.35470080375671387, "learning_rate": 0.0001472625349446746, "loss": 11.674, "step": 49221 }, { "epoch": 1.0303525077451228, "grad_norm": 0.3560296893119812, "learning_rate": 0.00014726060273421673, "loss": 11.6791, "step": 49222 }, { "epoch": 1.0303734405090847, "grad_norm": 0.3012397587299347, "learning_rate": 0.00014725867050103975, "loss": 11.6605, "step": 49223 }, { "epoch": 1.030394373273047, "grad_norm": 0.3261534869670868, "learning_rate": 0.00014725673824514456, "loss": 11.6548, "step": 49224 }, { "epoch": 1.0304153060370091, "grad_norm": 0.31298497319221497, "learning_rate": 0.00014725480596653213, "loss": 11.6637, "step": 49225 }, { "epoch": 1.0304362388009713, "grad_norm": 0.29802486300468445, "learning_rate": 0.00014725287366520334, "loss": 11.6619, "step": 49226 }, { "epoch": 1.0304571715649333, "grad_norm": 0.3604615032672882, "learning_rate": 0.00014725094134115916, "loss": 11.6734, "step": 49227 }, { "epoch": 1.0304781043288955, "grad_norm": 0.30325189232826233, "learning_rate": 0.00014724900899440053, "loss": 11.6825, "step": 49228 }, { "epoch": 1.0304990370928577, "grad_norm": 0.3160302937030792, "learning_rate": 0.0001472470766249283, "loss": 11.671, "step": 49229 }, { "epoch": 1.03051996985682, "grad_norm": 0.2693188488483429, "learning_rate": 0.0001472451442327435, "loss": 11.6687, "step": 49230 }, { "epoch": 1.0305409026207821, "grad_norm": 0.3228417932987213, "learning_rate": 0.00014724321181784698, "loss": 11.6741, "step": 49231 }, { "epoch": 1.030561835384744, "grad_norm": 0.31423962116241455, "learning_rate": 0.00014724127938023975, "loss": 11.661, "step": 49232 }, { "epoch": 1.0305827681487063, "grad_norm": 0.38078999519348145, "learning_rate": 0.00014723934691992266, "loss": 11.674, "step": 49233 }, { "epoch": 1.0306037009126685, "grad_norm": 0.28326019644737244, "learning_rate": 0.00014723741443689665, "loss": 11.6597, "step": 49234 }, { "epoch": 1.0306246336766307, "grad_norm": 0.33165809512138367, "learning_rate": 0.00014723548193116268, "loss": 11.6625, "step": 49235 }, { "epoch": 1.030645566440593, "grad_norm": 0.32934844493865967, "learning_rate": 0.00014723354940272167, "loss": 11.6516, "step": 49236 }, { "epoch": 1.0306664992045549, "grad_norm": 0.27796047925949097, "learning_rate": 0.00014723161685157457, "loss": 11.6683, "step": 49237 }, { "epoch": 1.030687431968517, "grad_norm": 0.49842801690101624, "learning_rate": 0.00014722968427772226, "loss": 11.6743, "step": 49238 }, { "epoch": 1.0307083647324793, "grad_norm": 0.28604206442832947, "learning_rate": 0.00014722775168116568, "loss": 11.6755, "step": 49239 }, { "epoch": 1.0307292974964415, "grad_norm": 0.396225243806839, "learning_rate": 0.0001472258190619058, "loss": 11.6603, "step": 49240 }, { "epoch": 1.0307502302604037, "grad_norm": 0.3259536027908325, "learning_rate": 0.0001472238864199435, "loss": 11.6671, "step": 49241 }, { "epoch": 1.0307711630243657, "grad_norm": 0.3321553170681, "learning_rate": 0.00014722195375527978, "loss": 11.6702, "step": 49242 }, { "epoch": 1.0307920957883279, "grad_norm": 0.27937081456184387, "learning_rate": 0.00014722002106791547, "loss": 11.6549, "step": 49243 }, { "epoch": 1.03081302855229, "grad_norm": 0.2991113066673279, "learning_rate": 0.00014721808835785157, "loss": 11.6737, "step": 49244 }, { "epoch": 1.0308339613162523, "grad_norm": 0.27699434757232666, "learning_rate": 0.000147216155625089, "loss": 11.6685, "step": 49245 }, { "epoch": 1.0308548940802142, "grad_norm": 0.3325607478618622, "learning_rate": 0.00014721422286962865, "loss": 11.6715, "step": 49246 }, { "epoch": 1.0308758268441764, "grad_norm": 0.3319237530231476, "learning_rate": 0.00014721229009147151, "loss": 11.6754, "step": 49247 }, { "epoch": 1.0308967596081386, "grad_norm": 0.37929266691207886, "learning_rate": 0.00014721035729061842, "loss": 11.6708, "step": 49248 }, { "epoch": 1.0309176923721008, "grad_norm": 0.31794896721839905, "learning_rate": 0.0001472084244670704, "loss": 11.6712, "step": 49249 }, { "epoch": 1.030938625136063, "grad_norm": 0.3965214192867279, "learning_rate": 0.00014720649162082837, "loss": 11.6795, "step": 49250 }, { "epoch": 1.030959557900025, "grad_norm": 0.2737172245979309, "learning_rate": 0.00014720455875189318, "loss": 11.6657, "step": 49251 }, { "epoch": 1.0309804906639872, "grad_norm": 0.33724793791770935, "learning_rate": 0.00014720262586026585, "loss": 11.6522, "step": 49252 }, { "epoch": 1.0310014234279494, "grad_norm": 0.2941536605358124, "learning_rate": 0.00014720069294594726, "loss": 11.6649, "step": 49253 }, { "epoch": 1.0310223561919116, "grad_norm": 0.4131731390953064, "learning_rate": 0.00014719876000893837, "loss": 11.6665, "step": 49254 }, { "epoch": 1.0310432889558738, "grad_norm": 0.30990689992904663, "learning_rate": 0.00014719682704924007, "loss": 11.6696, "step": 49255 }, { "epoch": 1.0310642217198358, "grad_norm": 0.34889498353004456, "learning_rate": 0.0001471948940668533, "loss": 11.6678, "step": 49256 }, { "epoch": 1.031085154483798, "grad_norm": 0.36775442957878113, "learning_rate": 0.00014719296106177902, "loss": 11.6658, "step": 49257 }, { "epoch": 1.0311060872477602, "grad_norm": 0.2695089280605316, "learning_rate": 0.0001471910280340181, "loss": 11.6659, "step": 49258 }, { "epoch": 1.0311270200117224, "grad_norm": 0.27594396471977234, "learning_rate": 0.0001471890949835716, "loss": 11.665, "step": 49259 }, { "epoch": 1.0311479527756846, "grad_norm": 0.33228084444999695, "learning_rate": 0.00014718716191044025, "loss": 11.6766, "step": 49260 }, { "epoch": 1.0311688855396466, "grad_norm": 0.30840572714805603, "learning_rate": 0.00014718522881462513, "loss": 11.6602, "step": 49261 }, { "epoch": 1.0311898183036088, "grad_norm": 0.3102704882621765, "learning_rate": 0.00014718329569612715, "loss": 11.6759, "step": 49262 }, { "epoch": 1.031210751067571, "grad_norm": 0.2675573229789734, "learning_rate": 0.00014718136255494716, "loss": 11.6706, "step": 49263 }, { "epoch": 1.0312316838315332, "grad_norm": 0.28042861819267273, "learning_rate": 0.00014717942939108619, "loss": 11.6721, "step": 49264 }, { "epoch": 1.0312526165954952, "grad_norm": 0.2655065357685089, "learning_rate": 0.0001471774962045451, "loss": 11.6591, "step": 49265 }, { "epoch": 1.0312735493594574, "grad_norm": 0.2413954883813858, "learning_rate": 0.00014717556299532488, "loss": 11.6735, "step": 49266 }, { "epoch": 1.0312944821234196, "grad_norm": 0.319708913564682, "learning_rate": 0.00014717362976342634, "loss": 11.6511, "step": 49267 }, { "epoch": 1.0313154148873818, "grad_norm": 0.3380945026874542, "learning_rate": 0.00014717169650885055, "loss": 11.6782, "step": 49268 }, { "epoch": 1.031336347651344, "grad_norm": 0.32391199469566345, "learning_rate": 0.0001471697632315984, "loss": 11.6756, "step": 49269 }, { "epoch": 1.031357280415306, "grad_norm": 0.27656158804893494, "learning_rate": 0.00014716782993167072, "loss": 11.6672, "step": 49270 }, { "epoch": 1.0313782131792681, "grad_norm": 0.2648143768310547, "learning_rate": 0.00014716589660906858, "loss": 11.6618, "step": 49271 }, { "epoch": 1.0313991459432303, "grad_norm": 0.38204869627952576, "learning_rate": 0.00014716396326379285, "loss": 11.6822, "step": 49272 }, { "epoch": 1.0314200787071925, "grad_norm": 0.29945799708366394, "learning_rate": 0.00014716202989584445, "loss": 11.6652, "step": 49273 }, { "epoch": 1.0314410114711547, "grad_norm": 0.3469255864620209, "learning_rate": 0.0001471600965052243, "loss": 11.6641, "step": 49274 }, { "epoch": 1.0314619442351167, "grad_norm": 0.3493371605873108, "learning_rate": 0.00014715816309193337, "loss": 11.6643, "step": 49275 }, { "epoch": 1.031482876999079, "grad_norm": 0.31268033385276794, "learning_rate": 0.00014715622965597253, "loss": 11.6535, "step": 49276 }, { "epoch": 1.0315038097630411, "grad_norm": 0.4171954393386841, "learning_rate": 0.00014715429619734278, "loss": 11.6791, "step": 49277 }, { "epoch": 1.0315247425270033, "grad_norm": 0.31363362073898315, "learning_rate": 0.00014715236271604503, "loss": 11.6778, "step": 49278 }, { "epoch": 1.0315456752909655, "grad_norm": 0.3478853702545166, "learning_rate": 0.00014715042921208013, "loss": 11.6603, "step": 49279 }, { "epoch": 1.0315666080549275, "grad_norm": 0.2990257441997528, "learning_rate": 0.00014714849568544912, "loss": 11.6832, "step": 49280 }, { "epoch": 1.0315875408188897, "grad_norm": 0.2928575873374939, "learning_rate": 0.00014714656213615288, "loss": 11.6574, "step": 49281 }, { "epoch": 1.031608473582852, "grad_norm": 0.26982513070106506, "learning_rate": 0.00014714462856419233, "loss": 11.636, "step": 49282 }, { "epoch": 1.031629406346814, "grad_norm": 0.4387037754058838, "learning_rate": 0.00014714269496956842, "loss": 11.6649, "step": 49283 }, { "epoch": 1.031650339110776, "grad_norm": 0.2953886389732361, "learning_rate": 0.00014714076135228209, "loss": 11.6667, "step": 49284 }, { "epoch": 1.0316712718747383, "grad_norm": 0.2648402452468872, "learning_rate": 0.00014713882771233422, "loss": 11.6724, "step": 49285 }, { "epoch": 1.0316922046387005, "grad_norm": 0.2807089388370514, "learning_rate": 0.0001471368940497258, "loss": 11.6697, "step": 49286 }, { "epoch": 1.0317131374026627, "grad_norm": 0.3065231740474701, "learning_rate": 0.0001471349603644577, "loss": 11.6687, "step": 49287 }, { "epoch": 1.0317340701666249, "grad_norm": 0.3398083746433258, "learning_rate": 0.0001471330266565309, "loss": 11.6656, "step": 49288 }, { "epoch": 1.0317550029305869, "grad_norm": 0.362636923789978, "learning_rate": 0.0001471310929259463, "loss": 11.6713, "step": 49289 }, { "epoch": 1.031775935694549, "grad_norm": 0.30274122953414917, "learning_rate": 0.00014712915917270485, "loss": 11.6632, "step": 49290 }, { "epoch": 1.0317968684585113, "grad_norm": 0.2648429572582245, "learning_rate": 0.00014712722539680746, "loss": 11.6745, "step": 49291 }, { "epoch": 1.0318178012224735, "grad_norm": 0.3248515725135803, "learning_rate": 0.00014712529159825505, "loss": 11.6548, "step": 49292 }, { "epoch": 1.0318387339864357, "grad_norm": 0.3529735803604126, "learning_rate": 0.0001471233577770486, "loss": 11.6604, "step": 49293 }, { "epoch": 1.0318596667503976, "grad_norm": 0.38928738236427307, "learning_rate": 0.00014712142393318898, "loss": 11.6703, "step": 49294 }, { "epoch": 1.0318805995143598, "grad_norm": 0.31247761845588684, "learning_rate": 0.00014711949006667718, "loss": 11.6731, "step": 49295 }, { "epoch": 1.031901532278322, "grad_norm": 0.32933473587036133, "learning_rate": 0.00014711755617751406, "loss": 11.6811, "step": 49296 }, { "epoch": 1.0319224650422842, "grad_norm": 0.28021717071533203, "learning_rate": 0.00014711562226570063, "loss": 11.6726, "step": 49297 }, { "epoch": 1.0319433978062464, "grad_norm": 0.4536918103694916, "learning_rate": 0.00014711368833123772, "loss": 11.6846, "step": 49298 }, { "epoch": 1.0319643305702084, "grad_norm": 0.3160500228404999, "learning_rate": 0.00014711175437412635, "loss": 11.6694, "step": 49299 }, { "epoch": 1.0319852633341706, "grad_norm": 0.3229121267795563, "learning_rate": 0.00014710982039436743, "loss": 11.6841, "step": 49300 }, { "epoch": 1.0320061960981328, "grad_norm": 0.3227830231189728, "learning_rate": 0.00014710788639196186, "loss": 11.6547, "step": 49301 }, { "epoch": 1.032027128862095, "grad_norm": 0.3126611113548279, "learning_rate": 0.0001471059523669106, "loss": 11.6723, "step": 49302 }, { "epoch": 1.032048061626057, "grad_norm": 0.33210110664367676, "learning_rate": 0.00014710401831921453, "loss": 11.6625, "step": 49303 }, { "epoch": 1.0320689943900192, "grad_norm": 0.30956941843032837, "learning_rate": 0.00014710208424887464, "loss": 11.6766, "step": 49304 }, { "epoch": 1.0320899271539814, "grad_norm": 0.27736178040504456, "learning_rate": 0.00014710015015589181, "loss": 11.6634, "step": 49305 }, { "epoch": 1.0321108599179436, "grad_norm": 0.35046201944351196, "learning_rate": 0.00014709821604026702, "loss": 11.6692, "step": 49306 }, { "epoch": 1.0321317926819058, "grad_norm": 0.30346450209617615, "learning_rate": 0.00014709628190200116, "loss": 11.6725, "step": 49307 }, { "epoch": 1.0321527254458678, "grad_norm": 0.30039480328559875, "learning_rate": 0.00014709434774109518, "loss": 11.666, "step": 49308 }, { "epoch": 1.03217365820983, "grad_norm": 0.3829495310783386, "learning_rate": 0.00014709241355755, "loss": 11.6554, "step": 49309 }, { "epoch": 1.0321945909737922, "grad_norm": 0.2629208564758301, "learning_rate": 0.00014709047935136655, "loss": 11.6647, "step": 49310 }, { "epoch": 1.0322155237377544, "grad_norm": 0.3282245397567749, "learning_rate": 0.00014708854512254576, "loss": 11.6605, "step": 49311 }, { "epoch": 1.0322364565017166, "grad_norm": 0.2866866886615753, "learning_rate": 0.00014708661087108857, "loss": 11.6731, "step": 49312 }, { "epoch": 1.0322573892656786, "grad_norm": 0.3461875319480896, "learning_rate": 0.00014708467659699592, "loss": 11.6553, "step": 49313 }, { "epoch": 1.0322783220296408, "grad_norm": 0.3038496673107147, "learning_rate": 0.0001470827423002687, "loss": 11.6524, "step": 49314 }, { "epoch": 1.032299254793603, "grad_norm": 0.3534530699253082, "learning_rate": 0.00014708080798090786, "loss": 11.6621, "step": 49315 }, { "epoch": 1.0323201875575652, "grad_norm": 0.32137152552604675, "learning_rate": 0.00014707887363891436, "loss": 11.6602, "step": 49316 }, { "epoch": 1.0323411203215274, "grad_norm": 0.2684815227985382, "learning_rate": 0.00014707693927428904, "loss": 11.6803, "step": 49317 }, { "epoch": 1.0323620530854893, "grad_norm": 2.1829006671905518, "learning_rate": 0.00014707500488703295, "loss": 11.6532, "step": 49318 }, { "epoch": 1.0323829858494515, "grad_norm": 0.3681478500366211, "learning_rate": 0.00014707307047714697, "loss": 11.6716, "step": 49319 }, { "epoch": 1.0324039186134137, "grad_norm": 0.4188493490219116, "learning_rate": 0.00014707113604463197, "loss": 11.6657, "step": 49320 }, { "epoch": 1.032424851377376, "grad_norm": 0.2866337299346924, "learning_rate": 0.00014706920158948897, "loss": 11.653, "step": 49321 }, { "epoch": 1.032445784141338, "grad_norm": 0.34128424525260925, "learning_rate": 0.00014706726711171883, "loss": 11.6742, "step": 49322 }, { "epoch": 1.0324667169053001, "grad_norm": 0.2946780025959015, "learning_rate": 0.00014706533261132253, "loss": 11.6472, "step": 49323 }, { "epoch": 1.0324876496692623, "grad_norm": 0.3421957492828369, "learning_rate": 0.000147063398088301, "loss": 11.6615, "step": 49324 }, { "epoch": 1.0325085824332245, "grad_norm": 0.46340227127075195, "learning_rate": 0.00014706146354265514, "loss": 11.6866, "step": 49325 }, { "epoch": 1.0325295151971867, "grad_norm": 0.2626436948776245, "learning_rate": 0.00014705952897438586, "loss": 11.652, "step": 49326 }, { "epoch": 1.0325504479611487, "grad_norm": 0.2809584140777588, "learning_rate": 0.00014705759438349416, "loss": 11.662, "step": 49327 }, { "epoch": 1.032571380725111, "grad_norm": 0.2927493453025818, "learning_rate": 0.0001470556597699809, "loss": 11.6746, "step": 49328 }, { "epoch": 1.032592313489073, "grad_norm": 0.279097318649292, "learning_rate": 0.00014705372513384708, "loss": 11.6647, "step": 49329 }, { "epoch": 1.0326132462530353, "grad_norm": 0.2609914243221283, "learning_rate": 0.00014705179047509357, "loss": 11.6684, "step": 49330 }, { "epoch": 1.0326341790169975, "grad_norm": 0.37203842401504517, "learning_rate": 0.0001470498557937213, "loss": 11.6466, "step": 49331 }, { "epoch": 1.0326551117809595, "grad_norm": 0.3022324740886688, "learning_rate": 0.00014704792108973124, "loss": 11.6653, "step": 49332 }, { "epoch": 1.0326760445449217, "grad_norm": 0.3542182743549347, "learning_rate": 0.00014704598636312431, "loss": 11.6581, "step": 49333 }, { "epoch": 1.0326969773088839, "grad_norm": 0.3475359380245209, "learning_rate": 0.00014704405161390142, "loss": 11.6737, "step": 49334 }, { "epoch": 1.032717910072846, "grad_norm": 0.26101934909820557, "learning_rate": 0.00014704211684206354, "loss": 11.6657, "step": 49335 }, { "epoch": 1.0327388428368083, "grad_norm": 0.33980947732925415, "learning_rate": 0.00014704018204761155, "loss": 11.6728, "step": 49336 }, { "epoch": 1.0327597756007703, "grad_norm": 0.28475624322891235, "learning_rate": 0.0001470382472305464, "loss": 11.6634, "step": 49337 }, { "epoch": 1.0327807083647325, "grad_norm": 0.27017155289649963, "learning_rate": 0.00014703631239086903, "loss": 11.6815, "step": 49338 }, { "epoch": 1.0328016411286947, "grad_norm": 0.29290494322776794, "learning_rate": 0.00014703437752858033, "loss": 11.688, "step": 49339 }, { "epoch": 1.0328225738926569, "grad_norm": 0.2875538170337677, "learning_rate": 0.0001470324426436813, "loss": 11.6786, "step": 49340 }, { "epoch": 1.0328435066566188, "grad_norm": 0.29891347885131836, "learning_rate": 0.00014703050773617283, "loss": 11.6758, "step": 49341 }, { "epoch": 1.032864439420581, "grad_norm": 0.2683403193950653, "learning_rate": 0.00014702857280605582, "loss": 11.689, "step": 49342 }, { "epoch": 1.0328853721845432, "grad_norm": 0.37619155645370483, "learning_rate": 0.00014702663785333127, "loss": 11.68, "step": 49343 }, { "epoch": 1.0329063049485054, "grad_norm": 0.26658785343170166, "learning_rate": 0.00014702470287800002, "loss": 11.6561, "step": 49344 }, { "epoch": 1.0329272377124676, "grad_norm": 0.29607605934143066, "learning_rate": 0.00014702276788006313, "loss": 11.6559, "step": 49345 }, { "epoch": 1.0329481704764296, "grad_norm": 0.3545929193496704, "learning_rate": 0.0001470208328595214, "loss": 11.6818, "step": 49346 }, { "epoch": 1.0329691032403918, "grad_norm": 0.3092258870601654, "learning_rate": 0.00014701889781637583, "loss": 11.676, "step": 49347 }, { "epoch": 1.032990036004354, "grad_norm": 0.2967202067375183, "learning_rate": 0.00014701696275062732, "loss": 11.6617, "step": 49348 }, { "epoch": 1.0330109687683162, "grad_norm": 0.3308334946632385, "learning_rate": 0.0001470150276622768, "loss": 11.6706, "step": 49349 }, { "epoch": 1.0330319015322784, "grad_norm": 0.36641064286231995, "learning_rate": 0.00014701309255132528, "loss": 11.6712, "step": 49350 }, { "epoch": 1.0330528342962404, "grad_norm": 0.3845282793045044, "learning_rate": 0.0001470111574177736, "loss": 11.6813, "step": 49351 }, { "epoch": 1.0330737670602026, "grad_norm": 0.33261042833328247, "learning_rate": 0.00014700922226162268, "loss": 11.6545, "step": 49352 }, { "epoch": 1.0330946998241648, "grad_norm": 0.26830360293388367, "learning_rate": 0.0001470072870828735, "loss": 11.6769, "step": 49353 }, { "epoch": 1.033115632588127, "grad_norm": 0.3183620572090149, "learning_rate": 0.00014700535188152695, "loss": 11.681, "step": 49354 }, { "epoch": 1.033136565352089, "grad_norm": 0.38828206062316895, "learning_rate": 0.00014700341665758405, "loss": 11.6844, "step": 49355 }, { "epoch": 1.0331574981160512, "grad_norm": 0.2908799648284912, "learning_rate": 0.0001470014814110456, "loss": 11.6623, "step": 49356 }, { "epoch": 1.0331784308800134, "grad_norm": 0.3791106343269348, "learning_rate": 0.00014699954614191265, "loss": 11.6784, "step": 49357 }, { "epoch": 1.0331993636439756, "grad_norm": 0.36067965626716614, "learning_rate": 0.00014699761085018603, "loss": 11.6628, "step": 49358 }, { "epoch": 1.0332202964079378, "grad_norm": 0.32048726081848145, "learning_rate": 0.00014699567553586672, "loss": 11.6813, "step": 49359 }, { "epoch": 1.0332412291718998, "grad_norm": 0.36405542492866516, "learning_rate": 0.0001469937401989557, "loss": 11.6503, "step": 49360 }, { "epoch": 1.033262161935862, "grad_norm": 0.32508474588394165, "learning_rate": 0.00014699180483945378, "loss": 11.6591, "step": 49361 }, { "epoch": 1.0332830946998242, "grad_norm": 0.3550092577934265, "learning_rate": 0.000146989869457362, "loss": 11.6603, "step": 49362 }, { "epoch": 1.0333040274637864, "grad_norm": 0.32042649388313293, "learning_rate": 0.00014698793405268124, "loss": 11.6661, "step": 49363 }, { "epoch": 1.0333249602277486, "grad_norm": 0.35552966594696045, "learning_rate": 0.00014698599862541243, "loss": 11.6788, "step": 49364 }, { "epoch": 1.0333458929917105, "grad_norm": 0.3081638813018799, "learning_rate": 0.00014698406317555652, "loss": 11.6743, "step": 49365 }, { "epoch": 1.0333668257556727, "grad_norm": 1.0066437721252441, "learning_rate": 0.00014698212770311442, "loss": 11.5951, "step": 49366 }, { "epoch": 1.033387758519635, "grad_norm": 0.3184477388858795, "learning_rate": 0.00014698019220808707, "loss": 11.6692, "step": 49367 }, { "epoch": 1.0334086912835971, "grad_norm": 0.2855719327926636, "learning_rate": 0.0001469782566904754, "loss": 11.6645, "step": 49368 }, { "epoch": 1.0334296240475593, "grad_norm": 0.34548938274383545, "learning_rate": 0.00014697632115028036, "loss": 11.6649, "step": 49369 }, { "epoch": 1.0334505568115213, "grad_norm": 0.3276859223842621, "learning_rate": 0.00014697438558750284, "loss": 11.6623, "step": 49370 }, { "epoch": 1.0334714895754835, "grad_norm": 0.24726752936840057, "learning_rate": 0.00014697245000214377, "loss": 11.6723, "step": 49371 }, { "epoch": 1.0334924223394457, "grad_norm": 0.30732661485671997, "learning_rate": 0.00014697051439420414, "loss": 11.6761, "step": 49372 }, { "epoch": 1.033513355103408, "grad_norm": 0.726475715637207, "learning_rate": 0.00014696857876368482, "loss": 11.6433, "step": 49373 }, { "epoch": 1.03353428786737, "grad_norm": 0.3791213929653168, "learning_rate": 0.00014696664311058679, "loss": 11.689, "step": 49374 }, { "epoch": 1.033555220631332, "grad_norm": 0.3031546473503113, "learning_rate": 0.00014696470743491094, "loss": 11.6676, "step": 49375 }, { "epoch": 1.0335761533952943, "grad_norm": 0.28662756085395813, "learning_rate": 0.0001469627717366582, "loss": 11.6597, "step": 49376 }, { "epoch": 1.0335970861592565, "grad_norm": 0.30644673109054565, "learning_rate": 0.00014696083601582952, "loss": 11.6688, "step": 49377 }, { "epoch": 1.0336180189232187, "grad_norm": 0.30932384729385376, "learning_rate": 0.00014695890027242582, "loss": 11.6731, "step": 49378 }, { "epoch": 1.0336389516871807, "grad_norm": 0.3124479353427887, "learning_rate": 0.00014695696450644808, "loss": 11.6671, "step": 49379 }, { "epoch": 1.0336598844511429, "grad_norm": 0.3828691244125366, "learning_rate": 0.00014695502871789714, "loss": 11.6605, "step": 49380 }, { "epoch": 1.033680817215105, "grad_norm": 0.3723685145378113, "learning_rate": 0.00014695309290677398, "loss": 11.662, "step": 49381 }, { "epoch": 1.0337017499790673, "grad_norm": 0.35101571679115295, "learning_rate": 0.00014695115707307954, "loss": 11.6487, "step": 49382 }, { "epoch": 1.0337226827430295, "grad_norm": 0.3209717273712158, "learning_rate": 0.0001469492212168147, "loss": 11.6726, "step": 49383 }, { "epoch": 1.0337436155069915, "grad_norm": 0.38042986392974854, "learning_rate": 0.00014694728533798049, "loss": 11.6617, "step": 49384 }, { "epoch": 1.0337645482709537, "grad_norm": 0.32399311661720276, "learning_rate": 0.00014694534943657773, "loss": 11.6677, "step": 49385 }, { "epoch": 1.0337854810349159, "grad_norm": 0.4087192714214325, "learning_rate": 0.00014694341351260742, "loss": 11.6781, "step": 49386 }, { "epoch": 1.033806413798878, "grad_norm": 0.32186439633369446, "learning_rate": 0.00014694147756607047, "loss": 11.6622, "step": 49387 }, { "epoch": 1.0338273465628403, "grad_norm": 0.40958571434020996, "learning_rate": 0.00014693954159696783, "loss": 11.6567, "step": 49388 }, { "epoch": 1.0338482793268022, "grad_norm": 0.3151819407939911, "learning_rate": 0.0001469376056053004, "loss": 11.6621, "step": 49389 }, { "epoch": 1.0338692120907644, "grad_norm": 0.31038832664489746, "learning_rate": 0.0001469356695910691, "loss": 11.6697, "step": 49390 }, { "epoch": 1.0338901448547266, "grad_norm": 0.311123788356781, "learning_rate": 0.0001469337335542749, "loss": 11.6727, "step": 49391 }, { "epoch": 1.0339110776186888, "grad_norm": 0.3640754818916321, "learning_rate": 0.00014693179749491873, "loss": 11.6822, "step": 49392 }, { "epoch": 1.0339320103826508, "grad_norm": 0.287448525428772, "learning_rate": 0.00014692986141300146, "loss": 11.6621, "step": 49393 }, { "epoch": 1.033952943146613, "grad_norm": 0.3088547885417938, "learning_rate": 0.0001469279253085241, "loss": 11.6777, "step": 49394 }, { "epoch": 1.0339738759105752, "grad_norm": 0.2850187420845032, "learning_rate": 0.00014692598918148754, "loss": 11.6722, "step": 49395 }, { "epoch": 1.0339948086745374, "grad_norm": 0.37140002846717834, "learning_rate": 0.00014692405303189275, "loss": 11.6659, "step": 49396 }, { "epoch": 1.0340157414384996, "grad_norm": 0.37164437770843506, "learning_rate": 0.00014692211685974057, "loss": 11.6806, "step": 49397 }, { "epoch": 1.0340366742024616, "grad_norm": 0.30580422282218933, "learning_rate": 0.000146920180665032, "loss": 11.6668, "step": 49398 }, { "epoch": 1.0340576069664238, "grad_norm": 0.42000046372413635, "learning_rate": 0.000146918244447768, "loss": 11.6821, "step": 49399 }, { "epoch": 1.034078539730386, "grad_norm": 0.3199242353439331, "learning_rate": 0.0001469163082079494, "loss": 11.6761, "step": 49400 }, { "epoch": 1.0340994724943482, "grad_norm": 0.3677016496658325, "learning_rate": 0.00014691437194557723, "loss": 11.6648, "step": 49401 }, { "epoch": 1.0341204052583104, "grad_norm": 0.35637611150741577, "learning_rate": 0.00014691243566065235, "loss": 11.6783, "step": 49402 }, { "epoch": 1.0341413380222724, "grad_norm": 0.37332433462142944, "learning_rate": 0.00014691049935317577, "loss": 11.6706, "step": 49403 }, { "epoch": 1.0341622707862346, "grad_norm": 0.30907514691352844, "learning_rate": 0.00014690856302314832, "loss": 11.6577, "step": 49404 }, { "epoch": 1.0341832035501968, "grad_norm": 0.3509589433670044, "learning_rate": 0.00014690662667057102, "loss": 11.6633, "step": 49405 }, { "epoch": 1.034204136314159, "grad_norm": 0.39516451954841614, "learning_rate": 0.00014690469029544475, "loss": 11.6614, "step": 49406 }, { "epoch": 1.0342250690781212, "grad_norm": 0.38455745577812195, "learning_rate": 0.00014690275389777046, "loss": 11.6658, "step": 49407 }, { "epoch": 1.0342460018420832, "grad_norm": 0.44381406903266907, "learning_rate": 0.00014690081747754908, "loss": 11.6532, "step": 49408 }, { "epoch": 1.0342669346060454, "grad_norm": 0.3183080852031708, "learning_rate": 0.00014689888103478152, "loss": 11.6547, "step": 49409 }, { "epoch": 1.0342878673700076, "grad_norm": 0.4083603024482727, "learning_rate": 0.00014689694456946874, "loss": 11.6721, "step": 49410 }, { "epoch": 1.0343088001339698, "grad_norm": 0.36311468482017517, "learning_rate": 0.00014689500808161168, "loss": 11.6599, "step": 49411 }, { "epoch": 1.0343297328979317, "grad_norm": 0.33136212825775146, "learning_rate": 0.00014689307157121122, "loss": 11.6581, "step": 49412 }, { "epoch": 1.034350665661894, "grad_norm": 0.30616527795791626, "learning_rate": 0.00014689113503826831, "loss": 11.6742, "step": 49413 }, { "epoch": 1.0343715984258561, "grad_norm": 0.36399775743484497, "learning_rate": 0.00014688919848278393, "loss": 11.6616, "step": 49414 }, { "epoch": 1.0343925311898183, "grad_norm": 0.30774787068367004, "learning_rate": 0.00014688726190475895, "loss": 11.6734, "step": 49415 }, { "epoch": 1.0344134639537805, "grad_norm": 0.33176761865615845, "learning_rate": 0.0001468853253041943, "loss": 11.674, "step": 49416 }, { "epoch": 1.0344343967177425, "grad_norm": 0.3615012764930725, "learning_rate": 0.000146883388681091, "loss": 11.6686, "step": 49417 }, { "epoch": 1.0344553294817047, "grad_norm": 0.36651259660720825, "learning_rate": 0.00014688145203544986, "loss": 11.6596, "step": 49418 }, { "epoch": 1.034476262245667, "grad_norm": 0.4755418002605438, "learning_rate": 0.0001468795153672719, "loss": 11.6964, "step": 49419 }, { "epoch": 1.0344971950096291, "grad_norm": 0.43208029866218567, "learning_rate": 0.000146877578676558, "loss": 11.6906, "step": 49420 }, { "epoch": 1.0345181277735913, "grad_norm": 0.3619908094406128, "learning_rate": 0.0001468756419633091, "loss": 11.6674, "step": 49421 }, { "epoch": 1.0345390605375533, "grad_norm": 0.31018418073654175, "learning_rate": 0.00014687370522752614, "loss": 11.6704, "step": 49422 }, { "epoch": 1.0345599933015155, "grad_norm": 0.3189941942691803, "learning_rate": 0.00014687176846921006, "loss": 11.6813, "step": 49423 }, { "epoch": 1.0345809260654777, "grad_norm": 0.3511231243610382, "learning_rate": 0.00014686983168836178, "loss": 11.6596, "step": 49424 }, { "epoch": 1.03460185882944, "grad_norm": 0.36632487177848816, "learning_rate": 0.00014686789488498224, "loss": 11.681, "step": 49425 }, { "epoch": 1.034622791593402, "grad_norm": 0.3869478404521942, "learning_rate": 0.00014686595805907238, "loss": 11.6829, "step": 49426 }, { "epoch": 1.034643724357364, "grad_norm": 0.3623124063014984, "learning_rate": 0.00014686402121063307, "loss": 11.6636, "step": 49427 }, { "epoch": 1.0346646571213263, "grad_norm": 0.27609649300575256, "learning_rate": 0.00014686208433966534, "loss": 11.6672, "step": 49428 }, { "epoch": 1.0346855898852885, "grad_norm": 0.32245832681655884, "learning_rate": 0.00014686014744617003, "loss": 11.6824, "step": 49429 }, { "epoch": 1.0347065226492507, "grad_norm": 0.2809540927410126, "learning_rate": 0.0001468582105301481, "loss": 11.6746, "step": 49430 }, { "epoch": 1.0347274554132126, "grad_norm": 0.266005277633667, "learning_rate": 0.0001468562735916005, "loss": 11.6659, "step": 49431 }, { "epoch": 1.0347483881771748, "grad_norm": 0.3283085227012634, "learning_rate": 0.00014685433663052814, "loss": 11.6658, "step": 49432 }, { "epoch": 1.034769320941137, "grad_norm": 0.3211525082588196, "learning_rate": 0.00014685239964693197, "loss": 11.6811, "step": 49433 }, { "epoch": 1.0347902537050992, "grad_norm": 0.3485732674598694, "learning_rate": 0.0001468504626408129, "loss": 11.6581, "step": 49434 }, { "epoch": 1.0348111864690615, "grad_norm": 0.36314427852630615, "learning_rate": 0.00014684852561217194, "loss": 11.6581, "step": 49435 }, { "epoch": 1.0348321192330234, "grad_norm": 0.2908517122268677, "learning_rate": 0.0001468465885610099, "loss": 11.6625, "step": 49436 }, { "epoch": 1.0348530519969856, "grad_norm": 0.32266584038734436, "learning_rate": 0.00014684465148732777, "loss": 11.6578, "step": 49437 }, { "epoch": 1.0348739847609478, "grad_norm": 0.3174716532230377, "learning_rate": 0.0001468427143911265, "loss": 11.6797, "step": 49438 }, { "epoch": 1.03489491752491, "grad_norm": 0.2867923974990845, "learning_rate": 0.00014684077727240694, "loss": 11.6596, "step": 49439 }, { "epoch": 1.0349158502888722, "grad_norm": 0.3702811598777771, "learning_rate": 0.00014683884013117014, "loss": 11.6793, "step": 49440 }, { "epoch": 1.0349367830528342, "grad_norm": 0.24018441140651703, "learning_rate": 0.00014683690296741694, "loss": 11.6675, "step": 49441 }, { "epoch": 1.0349577158167964, "grad_norm": 0.3598953187465668, "learning_rate": 0.00014683496578114832, "loss": 11.6562, "step": 49442 }, { "epoch": 1.0349786485807586, "grad_norm": 0.3813015818595886, "learning_rate": 0.00014683302857236517, "loss": 11.6493, "step": 49443 }, { "epoch": 1.0349995813447208, "grad_norm": 0.3339166045188904, "learning_rate": 0.00014683109134106846, "loss": 11.69, "step": 49444 }, { "epoch": 1.035020514108683, "grad_norm": 0.29694560170173645, "learning_rate": 0.0001468291540872591, "loss": 11.673, "step": 49445 }, { "epoch": 1.035041446872645, "grad_norm": 0.3053343594074249, "learning_rate": 0.00014682721681093803, "loss": 11.6658, "step": 49446 }, { "epoch": 1.0350623796366072, "grad_norm": 0.33870846033096313, "learning_rate": 0.00014682527951210617, "loss": 11.6698, "step": 49447 }, { "epoch": 1.0350833124005694, "grad_norm": 0.3468701243400574, "learning_rate": 0.00014682334219076447, "loss": 11.6689, "step": 49448 }, { "epoch": 1.0351042451645316, "grad_norm": 0.27748915553092957, "learning_rate": 0.00014682140484691386, "loss": 11.6648, "step": 49449 }, { "epoch": 1.0351251779284936, "grad_norm": 0.3913208842277527, "learning_rate": 0.00014681946748055526, "loss": 11.6685, "step": 49450 }, { "epoch": 1.0351461106924558, "grad_norm": 0.3877754509449005, "learning_rate": 0.0001468175300916896, "loss": 11.6863, "step": 49451 }, { "epoch": 1.035167043456418, "grad_norm": 0.35920456051826477, "learning_rate": 0.00014681559268031778, "loss": 11.679, "step": 49452 }, { "epoch": 1.0351879762203802, "grad_norm": 0.280171275138855, "learning_rate": 0.00014681365524644082, "loss": 11.6608, "step": 49453 }, { "epoch": 1.0352089089843424, "grad_norm": 1.3541268110275269, "learning_rate": 0.00014681171779005957, "loss": 11.6315, "step": 49454 }, { "epoch": 1.0352298417483043, "grad_norm": 0.3090432286262512, "learning_rate": 0.000146809780311175, "loss": 11.6501, "step": 49455 }, { "epoch": 1.0352507745122665, "grad_norm": 0.26624158024787903, "learning_rate": 0.00014680784280978802, "loss": 11.6563, "step": 49456 }, { "epoch": 1.0352717072762287, "grad_norm": 0.2990012466907501, "learning_rate": 0.0001468059052858996, "loss": 11.6592, "step": 49457 }, { "epoch": 1.035292640040191, "grad_norm": 0.3032436966896057, "learning_rate": 0.0001468039677395106, "loss": 11.6486, "step": 49458 }, { "epoch": 1.0353135728041531, "grad_norm": 0.29688701033592224, "learning_rate": 0.00014680203017062202, "loss": 11.6825, "step": 49459 }, { "epoch": 1.0353345055681151, "grad_norm": 0.2771993577480316, "learning_rate": 0.00014680009257923477, "loss": 11.6755, "step": 49460 }, { "epoch": 1.0353554383320773, "grad_norm": 0.3321852684020996, "learning_rate": 0.00014679815496534976, "loss": 11.6782, "step": 49461 }, { "epoch": 1.0353763710960395, "grad_norm": 0.2963959574699402, "learning_rate": 0.00014679621732896795, "loss": 11.6558, "step": 49462 }, { "epoch": 1.0353973038600017, "grad_norm": 0.30341771245002747, "learning_rate": 0.00014679427967009026, "loss": 11.6563, "step": 49463 }, { "epoch": 1.035418236623964, "grad_norm": 0.3032572269439697, "learning_rate": 0.00014679234198871762, "loss": 11.6708, "step": 49464 }, { "epoch": 1.035439169387926, "grad_norm": 0.2989597022533417, "learning_rate": 0.00014679040428485095, "loss": 11.6863, "step": 49465 }, { "epoch": 1.035460102151888, "grad_norm": 0.3459618389606476, "learning_rate": 0.00014678846655849121, "loss": 11.6768, "step": 49466 }, { "epoch": 1.0354810349158503, "grad_norm": 0.37042492628097534, "learning_rate": 0.00014678652880963932, "loss": 11.6848, "step": 49467 }, { "epoch": 1.0355019676798125, "grad_norm": 0.3082961142063141, "learning_rate": 0.0001467845910382962, "loss": 11.6575, "step": 49468 }, { "epoch": 1.0355229004437745, "grad_norm": 0.39735785126686096, "learning_rate": 0.0001467826532444628, "loss": 11.6675, "step": 49469 }, { "epoch": 1.0355438332077367, "grad_norm": 0.2689169943332672, "learning_rate": 0.00014678071542814002, "loss": 11.6613, "step": 49470 }, { "epoch": 1.0355647659716989, "grad_norm": 0.32335495948791504, "learning_rate": 0.00014677877758932883, "loss": 11.674, "step": 49471 }, { "epoch": 1.035585698735661, "grad_norm": 0.28255876898765564, "learning_rate": 0.00014677683972803016, "loss": 11.6535, "step": 49472 }, { "epoch": 1.0356066314996233, "grad_norm": 0.28244826197624207, "learning_rate": 0.0001467749018442449, "loss": 11.6758, "step": 49473 }, { "epoch": 1.0356275642635853, "grad_norm": 0.27635490894317627, "learning_rate": 0.000146772963937974, "loss": 11.6953, "step": 49474 }, { "epoch": 1.0356484970275475, "grad_norm": 0.2854979634284973, "learning_rate": 0.00014677102600921843, "loss": 11.6932, "step": 49475 }, { "epoch": 1.0356694297915097, "grad_norm": 0.39126259088516235, "learning_rate": 0.00014676908805797906, "loss": 11.6668, "step": 49476 }, { "epoch": 1.0356903625554719, "grad_norm": 0.35447239875793457, "learning_rate": 0.00014676715008425688, "loss": 11.6765, "step": 49477 }, { "epoch": 1.035711295319434, "grad_norm": 0.30359089374542236, "learning_rate": 0.00014676521208805275, "loss": 11.6681, "step": 49478 }, { "epoch": 1.035732228083396, "grad_norm": 0.3183370530605316, "learning_rate": 0.00014676327406936768, "loss": 11.6681, "step": 49479 }, { "epoch": 1.0357531608473582, "grad_norm": 0.35495588183403015, "learning_rate": 0.00014676133602820258, "loss": 11.6837, "step": 49480 }, { "epoch": 1.0357740936113204, "grad_norm": 0.34695178270339966, "learning_rate": 0.00014675939796455833, "loss": 11.6643, "step": 49481 }, { "epoch": 1.0357950263752826, "grad_norm": 0.2580642104148865, "learning_rate": 0.00014675745987843594, "loss": 11.6615, "step": 49482 }, { "epoch": 1.0358159591392448, "grad_norm": 0.3521828353404999, "learning_rate": 0.00014675552176983628, "loss": 11.6841, "step": 49483 }, { "epoch": 1.0358368919032068, "grad_norm": 0.3917587697505951, "learning_rate": 0.0001467535836387603, "loss": 11.6698, "step": 49484 }, { "epoch": 1.035857824667169, "grad_norm": 0.40406563878059387, "learning_rate": 0.00014675164548520894, "loss": 11.6861, "step": 49485 }, { "epoch": 1.0358787574311312, "grad_norm": 0.33554840087890625, "learning_rate": 0.00014674970730918312, "loss": 11.6546, "step": 49486 }, { "epoch": 1.0358996901950934, "grad_norm": 0.3275749981403351, "learning_rate": 0.00014674776911068383, "loss": 11.6596, "step": 49487 }, { "epoch": 1.0359206229590554, "grad_norm": 0.3390425145626068, "learning_rate": 0.00014674583088971187, "loss": 11.6656, "step": 49488 }, { "epoch": 1.0359415557230176, "grad_norm": 0.2541833221912384, "learning_rate": 0.0001467438926462683, "loss": 11.6723, "step": 49489 }, { "epoch": 1.0359624884869798, "grad_norm": 0.5308831334114075, "learning_rate": 0.000146741954380354, "loss": 11.6758, "step": 49490 }, { "epoch": 1.035983421250942, "grad_norm": 0.3219588100910187, "learning_rate": 0.00014674001609196987, "loss": 11.667, "step": 49491 }, { "epoch": 1.0360043540149042, "grad_norm": 0.3040120601654053, "learning_rate": 0.00014673807778111693, "loss": 11.669, "step": 49492 }, { "epoch": 1.0360252867788662, "grad_norm": 0.39009445905685425, "learning_rate": 0.00014673613944779602, "loss": 11.6709, "step": 49493 }, { "epoch": 1.0360462195428284, "grad_norm": 0.321351557970047, "learning_rate": 0.00014673420109200815, "loss": 11.6728, "step": 49494 }, { "epoch": 1.0360671523067906, "grad_norm": 0.2691532075405121, "learning_rate": 0.00014673226271375418, "loss": 11.6575, "step": 49495 }, { "epoch": 1.0360880850707528, "grad_norm": 0.2974807620048523, "learning_rate": 0.0001467303243130351, "loss": 11.6763, "step": 49496 }, { "epoch": 1.036109017834715, "grad_norm": 0.3559155762195587, "learning_rate": 0.0001467283858898518, "loss": 11.6644, "step": 49497 }, { "epoch": 1.036129950598677, "grad_norm": 0.45557141304016113, "learning_rate": 0.00014672644744420524, "loss": 11.651, "step": 49498 }, { "epoch": 1.0361508833626392, "grad_norm": 0.31122273206710815, "learning_rate": 0.00014672450897609634, "loss": 11.6536, "step": 49499 }, { "epoch": 1.0361718161266014, "grad_norm": 0.3617315888404846, "learning_rate": 0.000146722570485526, "loss": 11.6664, "step": 49500 }, { "epoch": 1.0361927488905636, "grad_norm": 0.25228044390678406, "learning_rate": 0.00014672063197249523, "loss": 11.6853, "step": 49501 }, { "epoch": 1.0362136816545258, "grad_norm": 0.28638216853141785, "learning_rate": 0.0001467186934370049, "loss": 11.6643, "step": 49502 }, { "epoch": 1.0362346144184877, "grad_norm": 0.3558630347251892, "learning_rate": 0.00014671675487905593, "loss": 11.6862, "step": 49503 }, { "epoch": 1.03625554718245, "grad_norm": 0.3342204689979553, "learning_rate": 0.0001467148162986493, "loss": 11.6718, "step": 49504 }, { "epoch": 1.0362764799464121, "grad_norm": 0.3926060199737549, "learning_rate": 0.0001467128776957859, "loss": 11.6757, "step": 49505 }, { "epoch": 1.0362974127103743, "grad_norm": 0.27027618885040283, "learning_rate": 0.00014671093907046674, "loss": 11.6585, "step": 49506 }, { "epoch": 1.0363183454743363, "grad_norm": 0.292951762676239, "learning_rate": 0.00014670900042269264, "loss": 11.665, "step": 49507 }, { "epoch": 1.0363392782382985, "grad_norm": 0.2624019980430603, "learning_rate": 0.00014670706175246462, "loss": 11.6448, "step": 49508 }, { "epoch": 1.0363602110022607, "grad_norm": 0.30659034848213196, "learning_rate": 0.00014670512305978357, "loss": 11.6624, "step": 49509 }, { "epoch": 1.036381143766223, "grad_norm": 0.30919548869132996, "learning_rate": 0.00014670318434465045, "loss": 11.653, "step": 49510 }, { "epoch": 1.0364020765301851, "grad_norm": 0.3805807828903198, "learning_rate": 0.00014670124560706613, "loss": 11.6714, "step": 49511 }, { "epoch": 1.036423009294147, "grad_norm": 0.31467410922050476, "learning_rate": 0.00014669930684703162, "loss": 11.6491, "step": 49512 }, { "epoch": 1.0364439420581093, "grad_norm": 0.3771075904369354, "learning_rate": 0.0001466973680645478, "loss": 11.6659, "step": 49513 }, { "epoch": 1.0364648748220715, "grad_norm": 0.3219626545906067, "learning_rate": 0.0001466954292596156, "loss": 11.6706, "step": 49514 }, { "epoch": 1.0364858075860337, "grad_norm": 0.3021112382411957, "learning_rate": 0.00014669349043223604, "loss": 11.6873, "step": 49515 }, { "epoch": 1.036506740349996, "grad_norm": 0.31581833958625793, "learning_rate": 0.00014669155158240994, "loss": 11.664, "step": 49516 }, { "epoch": 1.0365276731139579, "grad_norm": 0.312639445066452, "learning_rate": 0.00014668961271013825, "loss": 11.6711, "step": 49517 }, { "epoch": 1.03654860587792, "grad_norm": 0.32558125257492065, "learning_rate": 0.00014668767381542197, "loss": 11.6749, "step": 49518 }, { "epoch": 1.0365695386418823, "grad_norm": 0.3884650468826294, "learning_rate": 0.00014668573489826195, "loss": 11.6515, "step": 49519 }, { "epoch": 1.0365904714058445, "grad_norm": 0.384196013212204, "learning_rate": 0.0001466837959586592, "loss": 11.6815, "step": 49520 }, { "epoch": 1.0366114041698067, "grad_norm": 0.3695357143878937, "learning_rate": 0.00014668185699661458, "loss": 11.6688, "step": 49521 }, { "epoch": 1.0366323369337687, "grad_norm": 0.2588357627391815, "learning_rate": 0.0001466799180121291, "loss": 11.6651, "step": 49522 }, { "epoch": 1.0366532696977309, "grad_norm": 0.3473602831363678, "learning_rate": 0.00014667797900520362, "loss": 11.6714, "step": 49523 }, { "epoch": 1.036674202461693, "grad_norm": 0.36719393730163574, "learning_rate": 0.0001466760399758391, "loss": 11.6779, "step": 49524 }, { "epoch": 1.0366951352256553, "grad_norm": 0.36660152673721313, "learning_rate": 0.00014667410092403647, "loss": 11.6672, "step": 49525 }, { "epoch": 1.0367160679896172, "grad_norm": 0.33866018056869507, "learning_rate": 0.00014667216184979667, "loss": 11.679, "step": 49526 }, { "epoch": 1.0367370007535794, "grad_norm": 0.3231615722179413, "learning_rate": 0.00014667022275312064, "loss": 11.6654, "step": 49527 }, { "epoch": 1.0367579335175416, "grad_norm": 0.32281190156936646, "learning_rate": 0.00014666828363400926, "loss": 11.6846, "step": 49528 }, { "epoch": 1.0367788662815038, "grad_norm": 0.34790632128715515, "learning_rate": 0.00014666634449246355, "loss": 11.6681, "step": 49529 }, { "epoch": 1.036799799045466, "grad_norm": 0.40816494822502136, "learning_rate": 0.00014666440532848434, "loss": 11.6521, "step": 49530 }, { "epoch": 1.036820731809428, "grad_norm": 0.3447984755039215, "learning_rate": 0.00014666246614207265, "loss": 11.6622, "step": 49531 }, { "epoch": 1.0368416645733902, "grad_norm": 0.31548261642456055, "learning_rate": 0.00014666052693322937, "loss": 11.6767, "step": 49532 }, { "epoch": 1.0368625973373524, "grad_norm": 0.3076154887676239, "learning_rate": 0.00014665858770195544, "loss": 11.6658, "step": 49533 }, { "epoch": 1.0368835301013146, "grad_norm": 0.28507232666015625, "learning_rate": 0.0001466566484482518, "loss": 11.6628, "step": 49534 }, { "epoch": 1.0369044628652768, "grad_norm": 0.36442381143569946, "learning_rate": 0.00014665470917211935, "loss": 11.6619, "step": 49535 }, { "epoch": 1.0369253956292388, "grad_norm": 0.25019922852516174, "learning_rate": 0.00014665276987355907, "loss": 11.6615, "step": 49536 }, { "epoch": 1.036946328393201, "grad_norm": 0.29677313566207886, "learning_rate": 0.00014665083055257188, "loss": 11.648, "step": 49537 }, { "epoch": 1.0369672611571632, "grad_norm": 0.3000931441783905, "learning_rate": 0.00014664889120915867, "loss": 11.6858, "step": 49538 }, { "epoch": 1.0369881939211254, "grad_norm": 0.2932811677455902, "learning_rate": 0.0001466469518433204, "loss": 11.6661, "step": 49539 }, { "epoch": 1.0370091266850876, "grad_norm": 0.31114351749420166, "learning_rate": 0.00014664501245505802, "loss": 11.647, "step": 49540 }, { "epoch": 1.0370300594490496, "grad_norm": 0.2629025876522064, "learning_rate": 0.00014664307304437246, "loss": 11.6494, "step": 49541 }, { "epoch": 1.0370509922130118, "grad_norm": 0.3435406982898712, "learning_rate": 0.0001466411336112646, "loss": 11.6682, "step": 49542 }, { "epoch": 1.037071924976974, "grad_norm": 0.4028362035751343, "learning_rate": 0.00014663919415573546, "loss": 11.6782, "step": 49543 }, { "epoch": 1.0370928577409362, "grad_norm": 0.27363264560699463, "learning_rate": 0.0001466372546777859, "loss": 11.6806, "step": 49544 }, { "epoch": 1.0371137905048982, "grad_norm": 0.30003878474235535, "learning_rate": 0.00014663531517741687, "loss": 11.6518, "step": 49545 }, { "epoch": 1.0371347232688604, "grad_norm": 0.3665379285812378, "learning_rate": 0.00014663337565462933, "loss": 11.6574, "step": 49546 }, { "epoch": 1.0371556560328226, "grad_norm": 0.3248295485973358, "learning_rate": 0.0001466314361094242, "loss": 11.6672, "step": 49547 }, { "epoch": 1.0371765887967848, "grad_norm": 0.34486472606658936, "learning_rate": 0.0001466294965418024, "loss": 11.6881, "step": 49548 }, { "epoch": 1.037197521560747, "grad_norm": 0.26801565289497375, "learning_rate": 0.00014662755695176482, "loss": 11.6688, "step": 49549 }, { "epoch": 1.037218454324709, "grad_norm": 0.27395474910736084, "learning_rate": 0.00014662561733931248, "loss": 11.6706, "step": 49550 }, { "epoch": 1.0372393870886711, "grad_norm": 0.2986033856868744, "learning_rate": 0.00014662367770444627, "loss": 11.6605, "step": 49551 }, { "epoch": 1.0372603198526333, "grad_norm": 0.32301896810531616, "learning_rate": 0.0001466217380471671, "loss": 11.6541, "step": 49552 }, { "epoch": 1.0372812526165955, "grad_norm": 0.35904648900032043, "learning_rate": 0.00014661979836747596, "loss": 11.6796, "step": 49553 }, { "epoch": 1.0373021853805577, "grad_norm": 0.327384889125824, "learning_rate": 0.00014661785866537373, "loss": 11.6742, "step": 49554 }, { "epoch": 1.0373231181445197, "grad_norm": 0.313750296831131, "learning_rate": 0.00014661591894086137, "loss": 11.6829, "step": 49555 }, { "epoch": 1.037344050908482, "grad_norm": 0.366075724363327, "learning_rate": 0.0001466139791939398, "loss": 11.6734, "step": 49556 }, { "epoch": 1.0373649836724441, "grad_norm": 0.2545691132545471, "learning_rate": 0.00014661203942460994, "loss": 11.6698, "step": 49557 }, { "epoch": 1.0373859164364063, "grad_norm": 0.3481226861476898, "learning_rate": 0.00014661009963287276, "loss": 11.6904, "step": 49558 }, { "epoch": 1.0374068492003685, "grad_norm": 0.30815351009368896, "learning_rate": 0.00014660815981872916, "loss": 11.6563, "step": 49559 }, { "epoch": 1.0374277819643305, "grad_norm": 0.4361001253128052, "learning_rate": 0.0001466062199821801, "loss": 11.6712, "step": 49560 }, { "epoch": 1.0374487147282927, "grad_norm": 0.30470672249794006, "learning_rate": 0.00014660428012322648, "loss": 11.682, "step": 49561 }, { "epoch": 1.037469647492255, "grad_norm": 0.347866415977478, "learning_rate": 0.00014660234024186923, "loss": 11.6863, "step": 49562 }, { "epoch": 1.037490580256217, "grad_norm": 0.23431167006492615, "learning_rate": 0.00014660040033810933, "loss": 11.6536, "step": 49563 }, { "epoch": 1.037511513020179, "grad_norm": 0.2955785095691681, "learning_rate": 0.00014659846041194767, "loss": 11.6727, "step": 49564 }, { "epoch": 1.0375324457841413, "grad_norm": 0.41136810183525085, "learning_rate": 0.0001465965204633852, "loss": 11.6859, "step": 49565 }, { "epoch": 1.0375533785481035, "grad_norm": 0.29267174005508423, "learning_rate": 0.00014659458049242285, "loss": 11.662, "step": 49566 }, { "epoch": 1.0375743113120657, "grad_norm": 0.2696413993835449, "learning_rate": 0.00014659264049906155, "loss": 11.6544, "step": 49567 }, { "epoch": 1.0375952440760279, "grad_norm": 0.3593820631504059, "learning_rate": 0.00014659070048330227, "loss": 11.6873, "step": 49568 }, { "epoch": 1.0376161768399899, "grad_norm": 0.3018990457057953, "learning_rate": 0.00014658876044514586, "loss": 11.6727, "step": 49569 }, { "epoch": 1.037637109603952, "grad_norm": 0.32383209466934204, "learning_rate": 0.0001465868203845933, "loss": 11.6707, "step": 49570 }, { "epoch": 1.0376580423679143, "grad_norm": 0.2503681182861328, "learning_rate": 0.00014658488030164552, "loss": 11.6575, "step": 49571 }, { "epoch": 1.0376789751318765, "grad_norm": 0.3286370635032654, "learning_rate": 0.0001465829401963035, "loss": 11.682, "step": 49572 }, { "epoch": 1.0376999078958387, "grad_norm": 0.28196561336517334, "learning_rate": 0.00014658100006856807, "loss": 11.6742, "step": 49573 }, { "epoch": 1.0377208406598006, "grad_norm": 0.3201757073402405, "learning_rate": 0.00014657905991844023, "loss": 11.6732, "step": 49574 }, { "epoch": 1.0377417734237628, "grad_norm": 0.27010664343833923, "learning_rate": 0.00014657711974592095, "loss": 11.6639, "step": 49575 }, { "epoch": 1.037762706187725, "grad_norm": 0.2951986789703369, "learning_rate": 0.00014657517955101107, "loss": 11.6682, "step": 49576 }, { "epoch": 1.0377836389516872, "grad_norm": 0.39063042402267456, "learning_rate": 0.0001465732393337116, "loss": 11.6798, "step": 49577 }, { "epoch": 1.0378045717156494, "grad_norm": 0.39403799176216125, "learning_rate": 0.0001465712990940234, "loss": 11.662, "step": 49578 }, { "epoch": 1.0378255044796114, "grad_norm": 0.3226760923862457, "learning_rate": 0.00014656935883194746, "loss": 11.665, "step": 49579 }, { "epoch": 1.0378464372435736, "grad_norm": 0.3549198806285858, "learning_rate": 0.0001465674185474847, "loss": 11.6436, "step": 49580 }, { "epoch": 1.0378673700075358, "grad_norm": 0.30201297998428345, "learning_rate": 0.00014656547824063603, "loss": 11.6742, "step": 49581 }, { "epoch": 1.037888302771498, "grad_norm": 0.32388049364089966, "learning_rate": 0.00014656353791140246, "loss": 11.674, "step": 49582 }, { "epoch": 1.03790923553546, "grad_norm": 0.32849499583244324, "learning_rate": 0.0001465615975597848, "loss": 11.649, "step": 49583 }, { "epoch": 1.0379301682994222, "grad_norm": 0.3989376723766327, "learning_rate": 0.00014655965718578408, "loss": 11.6727, "step": 49584 }, { "epoch": 1.0379511010633844, "grad_norm": 0.24797162413597107, "learning_rate": 0.0001465577167894012, "loss": 11.6549, "step": 49585 }, { "epoch": 1.0379720338273466, "grad_norm": 0.31283876299858093, "learning_rate": 0.00014655577637063706, "loss": 11.684, "step": 49586 }, { "epoch": 1.0379929665913088, "grad_norm": 0.36924147605895996, "learning_rate": 0.00014655383592949268, "loss": 11.6777, "step": 49587 }, { "epoch": 1.0380138993552708, "grad_norm": 0.2962549328804016, "learning_rate": 0.00014655189546596887, "loss": 11.6817, "step": 49588 }, { "epoch": 1.038034832119233, "grad_norm": 0.32352080941200256, "learning_rate": 0.00014654995498006667, "loss": 11.6887, "step": 49589 }, { "epoch": 1.0380557648831952, "grad_norm": 0.38651615381240845, "learning_rate": 0.00014654801447178698, "loss": 11.6818, "step": 49590 }, { "epoch": 1.0380766976471574, "grad_norm": 0.33399853110313416, "learning_rate": 0.0001465460739411307, "loss": 11.6724, "step": 49591 }, { "epoch": 1.0380976304111196, "grad_norm": 0.3087638318538666, "learning_rate": 0.00014654413338809885, "loss": 11.6769, "step": 49592 }, { "epoch": 1.0381185631750816, "grad_norm": 0.31469064950942993, "learning_rate": 0.00014654219281269224, "loss": 11.6667, "step": 49593 }, { "epoch": 1.0381394959390438, "grad_norm": 0.3699975311756134, "learning_rate": 0.0001465402522149119, "loss": 11.6645, "step": 49594 }, { "epoch": 1.038160428703006, "grad_norm": 0.2689770460128784, "learning_rate": 0.00014653831159475872, "loss": 11.6592, "step": 49595 }, { "epoch": 1.0381813614669682, "grad_norm": 0.22616992890834808, "learning_rate": 0.0001465363709522336, "loss": 11.6671, "step": 49596 }, { "epoch": 1.0382022942309304, "grad_norm": 0.2422860711812973, "learning_rate": 0.00014653443028733758, "loss": 11.6682, "step": 49597 }, { "epoch": 1.0382232269948923, "grad_norm": 0.3184589743614197, "learning_rate": 0.00014653248960007148, "loss": 11.6681, "step": 49598 }, { "epoch": 1.0382441597588545, "grad_norm": 0.33721673488616943, "learning_rate": 0.00014653054889043633, "loss": 11.6802, "step": 49599 }, { "epoch": 1.0382650925228167, "grad_norm": 0.424704909324646, "learning_rate": 0.00014652860815843295, "loss": 11.6548, "step": 49600 }, { "epoch": 1.038286025286779, "grad_norm": 3.0589962005615234, "learning_rate": 0.00014652666740406237, "loss": 11.7146, "step": 49601 }, { "epoch": 1.038306958050741, "grad_norm": 0.3680092692375183, "learning_rate": 0.00014652472662732548, "loss": 11.6876, "step": 49602 }, { "epoch": 1.0383278908147031, "grad_norm": 0.3503699004650116, "learning_rate": 0.00014652278582822322, "loss": 11.6653, "step": 49603 }, { "epoch": 1.0383488235786653, "grad_norm": 0.28859826922416687, "learning_rate": 0.00014652084500675652, "loss": 11.6737, "step": 49604 }, { "epoch": 1.0383697563426275, "grad_norm": 0.3459797501564026, "learning_rate": 0.00014651890416292633, "loss": 11.677, "step": 49605 }, { "epoch": 1.0383906891065897, "grad_norm": 0.2955072820186615, "learning_rate": 0.00014651696329673355, "loss": 11.6633, "step": 49606 }, { "epoch": 1.0384116218705517, "grad_norm": 0.36997273564338684, "learning_rate": 0.00014651502240817916, "loss": 11.6744, "step": 49607 }, { "epoch": 1.038432554634514, "grad_norm": 0.28682956099510193, "learning_rate": 0.00014651308149726404, "loss": 11.6808, "step": 49608 }, { "epoch": 1.038453487398476, "grad_norm": 0.24785244464874268, "learning_rate": 0.00014651114056398918, "loss": 11.6902, "step": 49609 }, { "epoch": 1.0384744201624383, "grad_norm": 0.30553343892097473, "learning_rate": 0.00014650919960835544, "loss": 11.6641, "step": 49610 }, { "epoch": 1.0384953529264005, "grad_norm": 0.33558109402656555, "learning_rate": 0.00014650725863036383, "loss": 11.662, "step": 49611 }, { "epoch": 1.0385162856903625, "grad_norm": 0.3119376301765442, "learning_rate": 0.00014650531763001526, "loss": 11.6567, "step": 49612 }, { "epoch": 1.0385372184543247, "grad_norm": 0.3398282825946808, "learning_rate": 0.00014650337660731063, "loss": 11.6621, "step": 49613 }, { "epoch": 1.0385581512182869, "grad_norm": 0.28539758920669556, "learning_rate": 0.0001465014355622509, "loss": 11.6596, "step": 49614 }, { "epoch": 1.038579083982249, "grad_norm": 0.3015984892845154, "learning_rate": 0.000146499494494837, "loss": 11.6567, "step": 49615 }, { "epoch": 1.0386000167462113, "grad_norm": 0.41872385144233704, "learning_rate": 0.0001464975534050699, "loss": 11.6577, "step": 49616 }, { "epoch": 1.0386209495101733, "grad_norm": 0.2854384183883667, "learning_rate": 0.0001464956122929504, "loss": 11.6634, "step": 49617 }, { "epoch": 1.0386418822741355, "grad_norm": 0.3513878285884857, "learning_rate": 0.00014649367115847958, "loss": 11.671, "step": 49618 }, { "epoch": 1.0386628150380977, "grad_norm": 0.3057411015033722, "learning_rate": 0.00014649173000165836, "loss": 11.6721, "step": 49619 }, { "epoch": 1.0386837478020599, "grad_norm": 0.2702484726905823, "learning_rate": 0.00014648978882248755, "loss": 11.6706, "step": 49620 }, { "epoch": 1.0387046805660218, "grad_norm": 0.304627925157547, "learning_rate": 0.00014648784762096823, "loss": 11.6543, "step": 49621 }, { "epoch": 1.038725613329984, "grad_norm": 0.36241114139556885, "learning_rate": 0.00014648590639710125, "loss": 11.6692, "step": 49622 }, { "epoch": 1.0387465460939462, "grad_norm": 0.3191704750061035, "learning_rate": 0.00014648396515088756, "loss": 11.6503, "step": 49623 }, { "epoch": 1.0387674788579084, "grad_norm": 0.3203267455101013, "learning_rate": 0.0001464820238823281, "loss": 11.6636, "step": 49624 }, { "epoch": 1.0387884116218706, "grad_norm": 0.28627920150756836, "learning_rate": 0.00014648008259142382, "loss": 11.6748, "step": 49625 }, { "epoch": 1.0388093443858326, "grad_norm": 0.3014686107635498, "learning_rate": 0.00014647814127817562, "loss": 11.6664, "step": 49626 }, { "epoch": 1.0388302771497948, "grad_norm": 0.3921706974506378, "learning_rate": 0.00014647619994258442, "loss": 11.6569, "step": 49627 }, { "epoch": 1.038851209913757, "grad_norm": 0.26897692680358887, "learning_rate": 0.00014647425858465118, "loss": 11.6617, "step": 49628 }, { "epoch": 1.0388721426777192, "grad_norm": 0.2676420211791992, "learning_rate": 0.00014647231720437686, "loss": 11.6743, "step": 49629 }, { "epoch": 1.0388930754416814, "grad_norm": 0.4302229881286621, "learning_rate": 0.00014647037580176235, "loss": 11.6673, "step": 49630 }, { "epoch": 1.0389140082056434, "grad_norm": 0.3956020176410675, "learning_rate": 0.0001464684343768086, "loss": 11.6904, "step": 49631 }, { "epoch": 1.0389349409696056, "grad_norm": 0.3338622748851776, "learning_rate": 0.00014646649292951658, "loss": 11.6594, "step": 49632 }, { "epoch": 1.0389558737335678, "grad_norm": 0.3422636091709137, "learning_rate": 0.00014646455145988713, "loss": 11.6714, "step": 49633 }, { "epoch": 1.03897680649753, "grad_norm": 0.28842800855636597, "learning_rate": 0.00014646260996792126, "loss": 11.6802, "step": 49634 }, { "epoch": 1.0389977392614922, "grad_norm": 0.4272688031196594, "learning_rate": 0.0001464606684536199, "loss": 11.6752, "step": 49635 }, { "epoch": 1.0390186720254542, "grad_norm": 0.29674476385116577, "learning_rate": 0.00014645872691698394, "loss": 11.6559, "step": 49636 }, { "epoch": 1.0390396047894164, "grad_norm": 0.30930638313293457, "learning_rate": 0.00014645678535801435, "loss": 11.6681, "step": 49637 }, { "epoch": 1.0390605375533786, "grad_norm": 0.273348867893219, "learning_rate": 0.00014645484377671207, "loss": 11.6602, "step": 49638 }, { "epoch": 1.0390814703173408, "grad_norm": 0.30085402727127075, "learning_rate": 0.00014645290217307798, "loss": 11.6685, "step": 49639 }, { "epoch": 1.0391024030813028, "grad_norm": 0.282391756772995, "learning_rate": 0.00014645096054711308, "loss": 11.6627, "step": 49640 }, { "epoch": 1.039123335845265, "grad_norm": 0.28241240978240967, "learning_rate": 0.00014644901889881827, "loss": 11.6651, "step": 49641 }, { "epoch": 1.0391442686092272, "grad_norm": 0.41202419996261597, "learning_rate": 0.00014644707722819445, "loss": 11.6726, "step": 49642 }, { "epoch": 1.0391652013731894, "grad_norm": 0.44415023922920227, "learning_rate": 0.00014644513553524263, "loss": 11.6575, "step": 49643 }, { "epoch": 1.0391861341371516, "grad_norm": 0.2965025305747986, "learning_rate": 0.00014644319381996367, "loss": 11.6674, "step": 49644 }, { "epoch": 1.0392070669011135, "grad_norm": 0.28092220425605774, "learning_rate": 0.00014644125208235854, "loss": 11.6522, "step": 49645 }, { "epoch": 1.0392279996650757, "grad_norm": 0.2934550940990448, "learning_rate": 0.00014643931032242818, "loss": 11.685, "step": 49646 }, { "epoch": 1.039248932429038, "grad_norm": 0.2948756515979767, "learning_rate": 0.00014643736854017352, "loss": 11.6841, "step": 49647 }, { "epoch": 1.0392698651930001, "grad_norm": 0.34696465730667114, "learning_rate": 0.0001464354267355955, "loss": 11.6397, "step": 49648 }, { "epoch": 1.0392907979569623, "grad_norm": 0.3450166881084442, "learning_rate": 0.00014643348490869502, "loss": 11.6664, "step": 49649 }, { "epoch": 1.0393117307209243, "grad_norm": 0.31547975540161133, "learning_rate": 0.00014643154305947302, "loss": 11.6731, "step": 49650 }, { "epoch": 1.0393326634848865, "grad_norm": 0.33326444029808044, "learning_rate": 0.00014642960118793047, "loss": 11.6795, "step": 49651 }, { "epoch": 1.0393535962488487, "grad_norm": 0.3474544882774353, "learning_rate": 0.00014642765929406829, "loss": 11.666, "step": 49652 }, { "epoch": 1.039374529012811, "grad_norm": 0.31548556685447693, "learning_rate": 0.00014642571737788737, "loss": 11.6536, "step": 49653 }, { "epoch": 1.039395461776773, "grad_norm": 0.38855746388435364, "learning_rate": 0.0001464237754393887, "loss": 11.6801, "step": 49654 }, { "epoch": 1.039416394540735, "grad_norm": 0.26184046268463135, "learning_rate": 0.00014642183347857317, "loss": 11.6673, "step": 49655 }, { "epoch": 1.0394373273046973, "grad_norm": 0.4682871103286743, "learning_rate": 0.00014641989149544174, "loss": 11.658, "step": 49656 }, { "epoch": 1.0394582600686595, "grad_norm": 0.2864449620246887, "learning_rate": 0.00014641794948999534, "loss": 11.6694, "step": 49657 }, { "epoch": 1.0394791928326217, "grad_norm": 0.3369215428829193, "learning_rate": 0.00014641600746223495, "loss": 11.6807, "step": 49658 }, { "epoch": 1.0395001255965837, "grad_norm": 0.29526934027671814, "learning_rate": 0.00014641406541216138, "loss": 11.6736, "step": 49659 }, { "epoch": 1.0395210583605459, "grad_norm": 0.30975306034088135, "learning_rate": 0.00014641212333977568, "loss": 11.6671, "step": 49660 }, { "epoch": 1.039541991124508, "grad_norm": 0.37722182273864746, "learning_rate": 0.00014641018124507874, "loss": 11.6734, "step": 49661 }, { "epoch": 1.0395629238884703, "grad_norm": 0.3437211811542511, "learning_rate": 0.00014640823912807145, "loss": 11.6535, "step": 49662 }, { "epoch": 1.0395838566524325, "grad_norm": 0.3892524540424347, "learning_rate": 0.00014640629698875484, "loss": 11.6675, "step": 49663 }, { "epoch": 1.0396047894163944, "grad_norm": 0.35790368914604187, "learning_rate": 0.0001464043548271298, "loss": 11.6658, "step": 49664 }, { "epoch": 1.0396257221803566, "grad_norm": 0.3186178207397461, "learning_rate": 0.00014640241264319724, "loss": 11.6732, "step": 49665 }, { "epoch": 1.0396466549443188, "grad_norm": 0.31486862897872925, "learning_rate": 0.00014640047043695811, "loss": 11.6709, "step": 49666 }, { "epoch": 1.039667587708281, "grad_norm": 0.32800790667533875, "learning_rate": 0.00014639852820841332, "loss": 11.6632, "step": 49667 }, { "epoch": 1.0396885204722432, "grad_norm": 0.38442620635032654, "learning_rate": 0.00014639658595756388, "loss": 11.6718, "step": 49668 }, { "epoch": 1.0397094532362052, "grad_norm": 0.29140952229499817, "learning_rate": 0.00014639464368441062, "loss": 11.6686, "step": 49669 }, { "epoch": 1.0397303860001674, "grad_norm": 0.35581260919570923, "learning_rate": 0.00014639270138895456, "loss": 11.6631, "step": 49670 }, { "epoch": 1.0397513187641296, "grad_norm": 0.2661780118942261, "learning_rate": 0.00014639075907119656, "loss": 11.6692, "step": 49671 }, { "epoch": 1.0397722515280918, "grad_norm": 0.32978472113609314, "learning_rate": 0.00014638881673113762, "loss": 11.6677, "step": 49672 }, { "epoch": 1.039793184292054, "grad_norm": 0.27523142099380493, "learning_rate": 0.00014638687436877864, "loss": 11.6703, "step": 49673 }, { "epoch": 1.039814117056016, "grad_norm": 0.3134543001651764, "learning_rate": 0.00014638493198412057, "loss": 11.6643, "step": 49674 }, { "epoch": 1.0398350498199782, "grad_norm": 0.27234891057014465, "learning_rate": 0.00014638298957716432, "loss": 11.6688, "step": 49675 }, { "epoch": 1.0398559825839404, "grad_norm": 0.4331859350204468, "learning_rate": 0.00014638104714791085, "loss": 11.6731, "step": 49676 }, { "epoch": 1.0398769153479026, "grad_norm": 0.30144476890563965, "learning_rate": 0.00014637910469636107, "loss": 11.6652, "step": 49677 }, { "epoch": 1.0398978481118646, "grad_norm": 0.314010888338089, "learning_rate": 0.00014637716222251594, "loss": 11.6663, "step": 49678 }, { "epoch": 1.0399187808758268, "grad_norm": 0.37314340472221375, "learning_rate": 0.00014637521972637636, "loss": 11.6729, "step": 49679 }, { "epoch": 1.039939713639789, "grad_norm": 0.2923157215118408, "learning_rate": 0.0001463732772079433, "loss": 11.6851, "step": 49680 }, { "epoch": 1.0399606464037512, "grad_norm": 0.3045809268951416, "learning_rate": 0.00014637133466721767, "loss": 11.671, "step": 49681 }, { "epoch": 1.0399815791677134, "grad_norm": 0.270183265209198, "learning_rate": 0.0001463693921042004, "loss": 11.6636, "step": 49682 }, { "epoch": 1.0400025119316754, "grad_norm": 0.34757158160209656, "learning_rate": 0.00014636744951889244, "loss": 11.6684, "step": 49683 }, { "epoch": 1.0400234446956376, "grad_norm": 0.3577862083911896, "learning_rate": 0.0001463655069112947, "loss": 11.6787, "step": 49684 }, { "epoch": 1.0400443774595998, "grad_norm": 0.34518301486968994, "learning_rate": 0.0001463635642814082, "loss": 11.6835, "step": 49685 }, { "epoch": 1.040065310223562, "grad_norm": 0.2692553699016571, "learning_rate": 0.00014636162162923373, "loss": 11.6783, "step": 49686 }, { "epoch": 1.0400862429875242, "grad_norm": 0.3105905055999756, "learning_rate": 0.00014635967895477233, "loss": 11.672, "step": 49687 }, { "epoch": 1.0401071757514861, "grad_norm": 0.31001201272010803, "learning_rate": 0.00014635773625802488, "loss": 11.6731, "step": 49688 }, { "epoch": 1.0401281085154483, "grad_norm": 0.28165295720100403, "learning_rate": 0.00014635579353899236, "loss": 11.6651, "step": 49689 }, { "epoch": 1.0401490412794105, "grad_norm": 0.3507937490940094, "learning_rate": 0.0001463538507976757, "loss": 11.6623, "step": 49690 }, { "epoch": 1.0401699740433727, "grad_norm": 0.3596077561378479, "learning_rate": 0.00014635190803407577, "loss": 11.677, "step": 49691 }, { "epoch": 1.040190906807335, "grad_norm": 0.28569895029067993, "learning_rate": 0.00014634996524819357, "loss": 11.6674, "step": 49692 }, { "epoch": 1.040211839571297, "grad_norm": 0.32541272044181824, "learning_rate": 0.00014634802244003003, "loss": 11.6735, "step": 49693 }, { "epoch": 1.0402327723352591, "grad_norm": 0.26879554986953735, "learning_rate": 0.00014634607960958604, "loss": 11.6632, "step": 49694 }, { "epoch": 1.0402537050992213, "grad_norm": 0.3087800145149231, "learning_rate": 0.00014634413675686258, "loss": 11.6599, "step": 49695 }, { "epoch": 1.0402746378631835, "grad_norm": 0.2925381660461426, "learning_rate": 0.00014634219388186053, "loss": 11.6644, "step": 49696 }, { "epoch": 1.0402955706271455, "grad_norm": 0.26664233207702637, "learning_rate": 0.0001463402509845809, "loss": 11.6701, "step": 49697 }, { "epoch": 1.0403165033911077, "grad_norm": 0.33014222979545593, "learning_rate": 0.0001463383080650246, "loss": 11.6596, "step": 49698 }, { "epoch": 1.04033743615507, "grad_norm": 0.4204416871070862, "learning_rate": 0.00014633636512319248, "loss": 11.6717, "step": 49699 }, { "epoch": 1.040358368919032, "grad_norm": 0.31320756673812866, "learning_rate": 0.0001463344221590856, "loss": 11.6739, "step": 49700 }, { "epoch": 1.0403793016829943, "grad_norm": 0.27889373898506165, "learning_rate": 0.0001463324791727048, "loss": 11.6608, "step": 49701 }, { "epoch": 1.0404002344469563, "grad_norm": 0.3319091498851776, "learning_rate": 0.00014633053616405105, "loss": 11.6446, "step": 49702 }, { "epoch": 1.0404211672109185, "grad_norm": 0.2838599681854248, "learning_rate": 0.0001463285931331253, "loss": 11.6637, "step": 49703 }, { "epoch": 1.0404420999748807, "grad_norm": 0.3017948567867279, "learning_rate": 0.00014632665007992848, "loss": 11.6679, "step": 49704 }, { "epoch": 1.0404630327388429, "grad_norm": 0.26139211654663086, "learning_rate": 0.00014632470700446146, "loss": 11.677, "step": 49705 }, { "epoch": 1.040483965502805, "grad_norm": 0.8730864524841309, "learning_rate": 0.00014632276390672523, "loss": 11.6337, "step": 49706 }, { "epoch": 1.040504898266767, "grad_norm": 0.39139431715011597, "learning_rate": 0.00014632082078672078, "loss": 11.674, "step": 49707 }, { "epoch": 1.0405258310307293, "grad_norm": 0.3004719913005829, "learning_rate": 0.00014631887764444894, "loss": 11.6612, "step": 49708 }, { "epoch": 1.0405467637946915, "grad_norm": 0.30002158880233765, "learning_rate": 0.00014631693447991068, "loss": 11.6817, "step": 49709 }, { "epoch": 1.0405676965586537, "grad_norm": 0.27068769931793213, "learning_rate": 0.00014631499129310696, "loss": 11.6723, "step": 49710 }, { "epoch": 1.0405886293226159, "grad_norm": 0.31784409284591675, "learning_rate": 0.00014631304808403867, "loss": 11.6684, "step": 49711 }, { "epoch": 1.0406095620865778, "grad_norm": 0.29474398493766785, "learning_rate": 0.00014631110485270683, "loss": 11.679, "step": 49712 }, { "epoch": 1.04063049485054, "grad_norm": 0.30068880319595337, "learning_rate": 0.00014630916159911225, "loss": 11.6726, "step": 49713 }, { "epoch": 1.0406514276145022, "grad_norm": 0.3699621558189392, "learning_rate": 0.000146307218323256, "loss": 11.6529, "step": 49714 }, { "epoch": 1.0406723603784644, "grad_norm": 0.2741628885269165, "learning_rate": 0.00014630527502513884, "loss": 11.6738, "step": 49715 }, { "epoch": 1.0406932931424264, "grad_norm": 0.3181312084197998, "learning_rate": 0.00014630333170476188, "loss": 11.6664, "step": 49716 }, { "epoch": 1.0407142259063886, "grad_norm": 0.34462496638298035, "learning_rate": 0.00014630138836212595, "loss": 11.6577, "step": 49717 }, { "epoch": 1.0407351586703508, "grad_norm": 0.25016486644744873, "learning_rate": 0.00014629944499723203, "loss": 11.6587, "step": 49718 }, { "epoch": 1.040756091434313, "grad_norm": 0.35671210289001465, "learning_rate": 0.00014629750161008104, "loss": 11.6696, "step": 49719 }, { "epoch": 1.0407770241982752, "grad_norm": 0.3368714153766632, "learning_rate": 0.0001462955582006739, "loss": 11.6683, "step": 49720 }, { "epoch": 1.0407979569622372, "grad_norm": 0.6245834231376648, "learning_rate": 0.00014629361476901158, "loss": 11.6524, "step": 49721 }, { "epoch": 1.0408188897261994, "grad_norm": 0.29815930128097534, "learning_rate": 0.00014629167131509496, "loss": 11.6609, "step": 49722 }, { "epoch": 1.0408398224901616, "grad_norm": 0.3407098948955536, "learning_rate": 0.00014628972783892503, "loss": 11.6804, "step": 49723 }, { "epoch": 1.0408607552541238, "grad_norm": 0.3492432236671448, "learning_rate": 0.0001462877843405027, "loss": 11.6603, "step": 49724 }, { "epoch": 1.040881688018086, "grad_norm": 0.2591036856174469, "learning_rate": 0.00014628584081982887, "loss": 11.6499, "step": 49725 }, { "epoch": 1.040902620782048, "grad_norm": 0.2981363832950592, "learning_rate": 0.00014628389727690456, "loss": 11.677, "step": 49726 }, { "epoch": 1.0409235535460102, "grad_norm": 0.4069727659225464, "learning_rate": 0.00014628195371173063, "loss": 11.6774, "step": 49727 }, { "epoch": 1.0409444863099724, "grad_norm": 0.3022017180919647, "learning_rate": 0.00014628001012430804, "loss": 11.6625, "step": 49728 }, { "epoch": 1.0409654190739346, "grad_norm": 0.40553319454193115, "learning_rate": 0.00014627806651463772, "loss": 11.6599, "step": 49729 }, { "epoch": 1.0409863518378968, "grad_norm": 0.3359912931919098, "learning_rate": 0.00014627612288272058, "loss": 11.6599, "step": 49730 }, { "epoch": 1.0410072846018588, "grad_norm": 0.3178601861000061, "learning_rate": 0.00014627417922855762, "loss": 11.6793, "step": 49731 }, { "epoch": 1.041028217365821, "grad_norm": 0.3413674533367157, "learning_rate": 0.0001462722355521497, "loss": 11.6492, "step": 49732 }, { "epoch": 1.0410491501297832, "grad_norm": 0.32441627979278564, "learning_rate": 0.00014627029185349782, "loss": 11.652, "step": 49733 }, { "epoch": 1.0410700828937454, "grad_norm": 0.3245493471622467, "learning_rate": 0.0001462683481326029, "loss": 11.6529, "step": 49734 }, { "epoch": 1.0410910156577073, "grad_norm": 0.3010786771774292, "learning_rate": 0.00014626640438946583, "loss": 11.6768, "step": 49735 }, { "epoch": 1.0411119484216695, "grad_norm": 0.27676188945770264, "learning_rate": 0.00014626446062408755, "loss": 11.6765, "step": 49736 }, { "epoch": 1.0411328811856317, "grad_norm": 0.3283717632293701, "learning_rate": 0.00014626251683646902, "loss": 11.6748, "step": 49737 }, { "epoch": 1.041153813949594, "grad_norm": 0.40621334314346313, "learning_rate": 0.0001462605730266112, "loss": 11.6939, "step": 49738 }, { "epoch": 1.0411747467135561, "grad_norm": 0.34686362743377686, "learning_rate": 0.00014625862919451498, "loss": 11.6685, "step": 49739 }, { "epoch": 1.0411956794775181, "grad_norm": 0.29902616143226624, "learning_rate": 0.00014625668534018132, "loss": 11.6748, "step": 49740 }, { "epoch": 1.0412166122414803, "grad_norm": 0.3890860676765442, "learning_rate": 0.00014625474146361114, "loss": 11.6724, "step": 49741 }, { "epoch": 1.0412375450054425, "grad_norm": 0.3388049602508545, "learning_rate": 0.00014625279756480539, "loss": 11.6735, "step": 49742 }, { "epoch": 1.0412584777694047, "grad_norm": 0.2708059847354889, "learning_rate": 0.00014625085364376497, "loss": 11.6682, "step": 49743 }, { "epoch": 1.041279410533367, "grad_norm": 0.3845210075378418, "learning_rate": 0.00014624890970049085, "loss": 11.6707, "step": 49744 }, { "epoch": 1.041300343297329, "grad_norm": 0.3429108262062073, "learning_rate": 0.00014624696573498395, "loss": 11.6657, "step": 49745 }, { "epoch": 1.041321276061291, "grad_norm": 0.29787498712539673, "learning_rate": 0.0001462450217472452, "loss": 11.6546, "step": 49746 }, { "epoch": 1.0413422088252533, "grad_norm": 0.2959366738796234, "learning_rate": 0.00014624307773727558, "loss": 11.6582, "step": 49747 }, { "epoch": 1.0413631415892155, "grad_norm": 0.4234386086463928, "learning_rate": 0.00014624113370507593, "loss": 11.6984, "step": 49748 }, { "epoch": 1.0413840743531777, "grad_norm": 0.3227395713329315, "learning_rate": 0.00014623918965064729, "loss": 11.6884, "step": 49749 }, { "epoch": 1.0414050071171397, "grad_norm": 0.23901188373565674, "learning_rate": 0.00014623724557399052, "loss": 11.6648, "step": 49750 }, { "epoch": 1.0414259398811019, "grad_norm": 0.3945143520832062, "learning_rate": 0.00014623530147510656, "loss": 11.6803, "step": 49751 }, { "epoch": 1.041446872645064, "grad_norm": 0.26432931423187256, "learning_rate": 0.0001462333573539964, "loss": 11.6641, "step": 49752 }, { "epoch": 1.0414678054090263, "grad_norm": 0.2982136011123657, "learning_rate": 0.00014623141321066092, "loss": 11.6722, "step": 49753 }, { "epoch": 1.0414887381729883, "grad_norm": 0.3382914960384369, "learning_rate": 0.00014622946904510107, "loss": 11.67, "step": 49754 }, { "epoch": 1.0415096709369505, "grad_norm": 0.27897047996520996, "learning_rate": 0.00014622752485731779, "loss": 11.6753, "step": 49755 }, { "epoch": 1.0415306037009127, "grad_norm": 0.3083512783050537, "learning_rate": 0.00014622558064731201, "loss": 11.6631, "step": 49756 }, { "epoch": 1.0415515364648749, "grad_norm": 0.24327993392944336, "learning_rate": 0.00014622363641508468, "loss": 11.6641, "step": 49757 }, { "epoch": 1.041572469228837, "grad_norm": 0.3030005097389221, "learning_rate": 0.0001462216921606367, "loss": 11.6544, "step": 49758 }, { "epoch": 1.041593401992799, "grad_norm": 0.2516510486602783, "learning_rate": 0.00014621974788396906, "loss": 11.6611, "step": 49759 }, { "epoch": 1.0416143347567612, "grad_norm": 0.333459734916687, "learning_rate": 0.00014621780358508264, "loss": 11.6695, "step": 49760 }, { "epoch": 1.0416352675207234, "grad_norm": 0.3178919851779938, "learning_rate": 0.0001462158592639784, "loss": 11.6732, "step": 49761 }, { "epoch": 1.0416562002846856, "grad_norm": 0.28960147500038147, "learning_rate": 0.00014621391492065727, "loss": 11.6668, "step": 49762 }, { "epoch": 1.0416771330486478, "grad_norm": 0.35277581214904785, "learning_rate": 0.0001462119705551202, "loss": 11.6413, "step": 49763 }, { "epoch": 1.0416980658126098, "grad_norm": 0.2822728753089905, "learning_rate": 0.00014621002616736807, "loss": 11.6823, "step": 49764 }, { "epoch": 1.041718998576572, "grad_norm": 0.2544799745082855, "learning_rate": 0.00014620808175740187, "loss": 11.6668, "step": 49765 }, { "epoch": 1.0417399313405342, "grad_norm": 0.3137955963611603, "learning_rate": 0.00014620613732522255, "loss": 11.6768, "step": 49766 }, { "epoch": 1.0417608641044964, "grad_norm": 0.39676329493522644, "learning_rate": 0.00014620419287083097, "loss": 11.6744, "step": 49767 }, { "epoch": 1.0417817968684586, "grad_norm": 0.38824474811553955, "learning_rate": 0.00014620224839422813, "loss": 11.6758, "step": 49768 }, { "epoch": 1.0418027296324206, "grad_norm": 0.34507569670677185, "learning_rate": 0.00014620030389541494, "loss": 11.6663, "step": 49769 }, { "epoch": 1.0418236623963828, "grad_norm": 0.33963721990585327, "learning_rate": 0.00014619835937439236, "loss": 11.6675, "step": 49770 }, { "epoch": 1.041844595160345, "grad_norm": 0.38266563415527344, "learning_rate": 0.00014619641483116128, "loss": 11.6766, "step": 49771 }, { "epoch": 1.0418655279243072, "grad_norm": 0.36974430084228516, "learning_rate": 0.00014619447026572265, "loss": 11.6514, "step": 49772 }, { "epoch": 1.0418864606882692, "grad_norm": 0.33228325843811035, "learning_rate": 0.00014619252567807741, "loss": 11.6784, "step": 49773 }, { "epoch": 1.0419073934522314, "grad_norm": 0.3851177394390106, "learning_rate": 0.00014619058106822653, "loss": 11.6565, "step": 49774 }, { "epoch": 1.0419283262161936, "grad_norm": 0.3024721145629883, "learning_rate": 0.00014618863643617089, "loss": 11.6758, "step": 49775 }, { "epoch": 1.0419492589801558, "grad_norm": 0.2648472189903259, "learning_rate": 0.00014618669178191146, "loss": 11.6828, "step": 49776 }, { "epoch": 1.041970191744118, "grad_norm": 0.3296024799346924, "learning_rate": 0.00014618474710544912, "loss": 11.674, "step": 49777 }, { "epoch": 1.04199112450808, "grad_norm": 0.3356958329677582, "learning_rate": 0.0001461828024067849, "loss": 11.6505, "step": 49778 }, { "epoch": 1.0420120572720422, "grad_norm": 0.3996966779232025, "learning_rate": 0.00014618085768591964, "loss": 11.678, "step": 49779 }, { "epoch": 1.0420329900360044, "grad_norm": 0.27862223982810974, "learning_rate": 0.00014617891294285435, "loss": 11.6552, "step": 49780 }, { "epoch": 1.0420539227999666, "grad_norm": 0.6802150011062622, "learning_rate": 0.0001461769681775899, "loss": 11.6764, "step": 49781 }, { "epoch": 1.0420748555639288, "grad_norm": 0.29358258843421936, "learning_rate": 0.00014617502339012725, "loss": 11.6518, "step": 49782 }, { "epoch": 1.0420957883278907, "grad_norm": 0.3169475793838501, "learning_rate": 0.0001461730785804674, "loss": 11.6579, "step": 49783 }, { "epoch": 1.042116721091853, "grad_norm": 0.2885286211967468, "learning_rate": 0.00014617113374861115, "loss": 11.6841, "step": 49784 }, { "epoch": 1.0421376538558151, "grad_norm": 0.29917192459106445, "learning_rate": 0.00014616918889455958, "loss": 11.6538, "step": 49785 }, { "epoch": 1.0421585866197773, "grad_norm": 0.3511894941329956, "learning_rate": 0.0001461672440183135, "loss": 11.6845, "step": 49786 }, { "epoch": 1.0421795193837395, "grad_norm": 0.3492104709148407, "learning_rate": 0.00014616529911987394, "loss": 11.6762, "step": 49787 }, { "epoch": 1.0422004521477015, "grad_norm": 0.3571774363517761, "learning_rate": 0.00014616335419924176, "loss": 11.6699, "step": 49788 }, { "epoch": 1.0422213849116637, "grad_norm": 0.33620306849479675, "learning_rate": 0.00014616140925641794, "loss": 11.6707, "step": 49789 }, { "epoch": 1.042242317675626, "grad_norm": 0.2864840030670166, "learning_rate": 0.00014615946429140343, "loss": 11.6771, "step": 49790 }, { "epoch": 1.0422632504395881, "grad_norm": 0.2599981129169464, "learning_rate": 0.00014615751930419915, "loss": 11.6777, "step": 49791 }, { "epoch": 1.04228418320355, "grad_norm": 0.33147743344306946, "learning_rate": 0.00014615557429480596, "loss": 11.6787, "step": 49792 }, { "epoch": 1.0423051159675123, "grad_norm": 0.3158515989780426, "learning_rate": 0.00014615362926322494, "loss": 11.6656, "step": 49793 }, { "epoch": 1.0423260487314745, "grad_norm": 0.3868262767791748, "learning_rate": 0.00014615168420945688, "loss": 11.6616, "step": 49794 }, { "epoch": 1.0423469814954367, "grad_norm": 0.342916876077652, "learning_rate": 0.00014614973913350282, "loss": 11.6835, "step": 49795 }, { "epoch": 1.042367914259399, "grad_norm": 0.29288455843925476, "learning_rate": 0.00014614779403536365, "loss": 11.6593, "step": 49796 }, { "epoch": 1.0423888470233609, "grad_norm": 0.2810027003288269, "learning_rate": 0.00014614584891504032, "loss": 11.644, "step": 49797 }, { "epoch": 1.042409779787323, "grad_norm": 0.5540253520011902, "learning_rate": 0.00014614390377253373, "loss": 11.6693, "step": 49798 }, { "epoch": 1.0424307125512853, "grad_norm": 0.36641132831573486, "learning_rate": 0.00014614195860784482, "loss": 11.6804, "step": 49799 }, { "epoch": 1.0424516453152475, "grad_norm": 0.3703690469264984, "learning_rate": 0.0001461400134209746, "loss": 11.6795, "step": 49800 }, { "epoch": 1.0424725780792097, "grad_norm": 0.2694239616394043, "learning_rate": 0.00014613806821192393, "loss": 11.6769, "step": 49801 }, { "epoch": 1.0424935108431717, "grad_norm": 0.34198108315467834, "learning_rate": 0.00014613612298069377, "loss": 11.6663, "step": 49802 }, { "epoch": 1.0425144436071339, "grad_norm": 0.3062179684638977, "learning_rate": 0.00014613417772728503, "loss": 11.6556, "step": 49803 }, { "epoch": 1.042535376371096, "grad_norm": 0.330398291349411, "learning_rate": 0.0001461322324516987, "loss": 11.6766, "step": 49804 }, { "epoch": 1.0425563091350583, "grad_norm": 0.27279287576675415, "learning_rate": 0.00014613028715393565, "loss": 11.6505, "step": 49805 }, { "epoch": 1.0425772418990205, "grad_norm": 0.293929785490036, "learning_rate": 0.00014612834183399685, "loss": 11.654, "step": 49806 }, { "epoch": 1.0425981746629824, "grad_norm": 0.326688289642334, "learning_rate": 0.00014612639649188327, "loss": 11.6821, "step": 49807 }, { "epoch": 1.0426191074269446, "grad_norm": 0.359811931848526, "learning_rate": 0.00014612445112759575, "loss": 11.6682, "step": 49808 }, { "epoch": 1.0426400401909068, "grad_norm": 0.30013301968574524, "learning_rate": 0.0001461225057411353, "loss": 11.6825, "step": 49809 }, { "epoch": 1.042660972954869, "grad_norm": 0.37993520498275757, "learning_rate": 0.00014612056033250288, "loss": 11.6622, "step": 49810 }, { "epoch": 1.042681905718831, "grad_norm": 0.24912890791893005, "learning_rate": 0.00014611861490169934, "loss": 11.6655, "step": 49811 }, { "epoch": 1.0427028384827932, "grad_norm": 0.2831631302833557, "learning_rate": 0.00014611666944872566, "loss": 11.6755, "step": 49812 }, { "epoch": 1.0427237712467554, "grad_norm": 0.39588403701782227, "learning_rate": 0.0001461147239735828, "loss": 11.6819, "step": 49813 }, { "epoch": 1.0427447040107176, "grad_norm": 0.30225181579589844, "learning_rate": 0.00014611277847627162, "loss": 11.6734, "step": 49814 }, { "epoch": 1.0427656367746798, "grad_norm": 0.33042508363723755, "learning_rate": 0.00014611083295679316, "loss": 11.6792, "step": 49815 }, { "epoch": 1.0427865695386418, "grad_norm": 0.3447333872318268, "learning_rate": 0.00014610888741514825, "loss": 11.688, "step": 49816 }, { "epoch": 1.042807502302604, "grad_norm": 0.2649880647659302, "learning_rate": 0.00014610694185133787, "loss": 11.6669, "step": 49817 }, { "epoch": 1.0428284350665662, "grad_norm": 0.2650410532951355, "learning_rate": 0.000146104996265363, "loss": 11.6634, "step": 49818 }, { "epoch": 1.0428493678305284, "grad_norm": 0.34054964780807495, "learning_rate": 0.00014610305065722453, "loss": 11.6599, "step": 49819 }, { "epoch": 1.0428703005944906, "grad_norm": 0.34066641330718994, "learning_rate": 0.00014610110502692337, "loss": 11.6659, "step": 49820 }, { "epoch": 1.0428912333584526, "grad_norm": 0.40690144896507263, "learning_rate": 0.0001460991593744605, "loss": 11.6899, "step": 49821 }, { "epoch": 1.0429121661224148, "grad_norm": 0.31428205966949463, "learning_rate": 0.00014609721369983686, "loss": 11.6312, "step": 49822 }, { "epoch": 1.042933098886377, "grad_norm": 0.28096911311149597, "learning_rate": 0.00014609526800305332, "loss": 11.6699, "step": 49823 }, { "epoch": 1.0429540316503392, "grad_norm": 0.336334228515625, "learning_rate": 0.0001460933222841109, "loss": 11.6787, "step": 49824 }, { "epoch": 1.0429749644143014, "grad_norm": 0.4534567892551422, "learning_rate": 0.00014609137654301047, "loss": 11.6751, "step": 49825 }, { "epoch": 1.0429958971782634, "grad_norm": 0.2896961569786072, "learning_rate": 0.000146089430779753, "loss": 11.6786, "step": 49826 }, { "epoch": 1.0430168299422256, "grad_norm": 0.35831964015960693, "learning_rate": 0.00014608748499433943, "loss": 11.6644, "step": 49827 }, { "epoch": 1.0430377627061878, "grad_norm": 0.3787614107131958, "learning_rate": 0.00014608553918677068, "loss": 11.6736, "step": 49828 }, { "epoch": 1.04305869547015, "grad_norm": 0.26229315996170044, "learning_rate": 0.00014608359335704772, "loss": 11.6756, "step": 49829 }, { "epoch": 1.043079628234112, "grad_norm": 0.34502771496772766, "learning_rate": 0.0001460816475051714, "loss": 11.6763, "step": 49830 }, { "epoch": 1.0431005609980741, "grad_norm": 0.35867074131965637, "learning_rate": 0.0001460797016311427, "loss": 11.667, "step": 49831 }, { "epoch": 1.0431214937620363, "grad_norm": 0.35886210203170776, "learning_rate": 0.0001460777557349626, "loss": 11.654, "step": 49832 }, { "epoch": 1.0431424265259985, "grad_norm": 0.2732369899749756, "learning_rate": 0.00014607580981663198, "loss": 11.6835, "step": 49833 }, { "epoch": 1.0431633592899607, "grad_norm": 0.3290922939777374, "learning_rate": 0.0001460738638761518, "loss": 11.6535, "step": 49834 }, { "epoch": 1.0431842920539227, "grad_norm": 0.356108695268631, "learning_rate": 0.000146071917913523, "loss": 11.668, "step": 49835 }, { "epoch": 1.043205224817885, "grad_norm": 0.3181917071342468, "learning_rate": 0.0001460699719287465, "loss": 11.6639, "step": 49836 }, { "epoch": 1.0432261575818471, "grad_norm": 0.3389797508716583, "learning_rate": 0.00014606802592182324, "loss": 11.6571, "step": 49837 }, { "epoch": 1.0432470903458093, "grad_norm": 0.32841426134109497, "learning_rate": 0.00014606607989275415, "loss": 11.6562, "step": 49838 }, { "epoch": 1.0432680231097715, "grad_norm": 0.3569857180118561, "learning_rate": 0.00014606413384154018, "loss": 11.6748, "step": 49839 }, { "epoch": 1.0432889558737335, "grad_norm": 0.3207003176212311, "learning_rate": 0.00014606218776818227, "loss": 11.6546, "step": 49840 }, { "epoch": 1.0433098886376957, "grad_norm": 0.32755011320114136, "learning_rate": 0.00014606024167268132, "loss": 11.6662, "step": 49841 }, { "epoch": 1.043330821401658, "grad_norm": 0.41075703501701355, "learning_rate": 0.0001460582955550383, "loss": 11.6711, "step": 49842 }, { "epoch": 1.04335175416562, "grad_norm": 0.44326695799827576, "learning_rate": 0.00014605634941525412, "loss": 11.6899, "step": 49843 }, { "epoch": 1.0433726869295823, "grad_norm": 0.3189410865306854, "learning_rate": 0.00014605440325332974, "loss": 11.67, "step": 49844 }, { "epoch": 1.0433936196935443, "grad_norm": 0.40700823068618774, "learning_rate": 0.00014605245706926612, "loss": 11.6682, "step": 49845 }, { "epoch": 1.0434145524575065, "grad_norm": 0.32544752955436707, "learning_rate": 0.00014605051086306414, "loss": 11.6696, "step": 49846 }, { "epoch": 1.0434354852214687, "grad_norm": 0.31035682559013367, "learning_rate": 0.0001460485646347247, "loss": 11.6583, "step": 49847 }, { "epoch": 1.0434564179854309, "grad_norm": 0.355947881937027, "learning_rate": 0.00014604661838424885, "loss": 11.6378, "step": 49848 }, { "epoch": 1.0434773507493929, "grad_norm": 0.2888600826263428, "learning_rate": 0.0001460446721116375, "loss": 11.6733, "step": 49849 }, { "epoch": 1.043498283513355, "grad_norm": 0.32906511425971985, "learning_rate": 0.00014604272581689146, "loss": 11.6777, "step": 49850 }, { "epoch": 1.0435192162773173, "grad_norm": 0.43118950724601746, "learning_rate": 0.00014604077950001182, "loss": 11.6538, "step": 49851 }, { "epoch": 1.0435401490412795, "grad_norm": 0.2887721657752991, "learning_rate": 0.00014603883316099945, "loss": 11.6639, "step": 49852 }, { "epoch": 1.0435610818052417, "grad_norm": 0.29027217626571655, "learning_rate": 0.00014603688679985528, "loss": 11.6813, "step": 49853 }, { "epoch": 1.0435820145692036, "grad_norm": 0.33007103204727173, "learning_rate": 0.00014603494041658027, "loss": 11.666, "step": 49854 }, { "epoch": 1.0436029473331658, "grad_norm": 0.2714526951313019, "learning_rate": 0.0001460329940111753, "loss": 11.6768, "step": 49855 }, { "epoch": 1.043623880097128, "grad_norm": 0.32301339507102966, "learning_rate": 0.0001460310475836414, "loss": 11.673, "step": 49856 }, { "epoch": 1.0436448128610902, "grad_norm": 0.391985684633255, "learning_rate": 0.00014602910113397944, "loss": 11.6752, "step": 49857 }, { "epoch": 1.0436657456250524, "grad_norm": 0.3802676796913147, "learning_rate": 0.00014602715466219035, "loss": 11.6668, "step": 49858 }, { "epoch": 1.0436866783890144, "grad_norm": 0.38150036334991455, "learning_rate": 0.00014602520816827508, "loss": 11.6761, "step": 49859 }, { "epoch": 1.0437076111529766, "grad_norm": 0.31250327825546265, "learning_rate": 0.00014602326165223458, "loss": 11.6532, "step": 49860 }, { "epoch": 1.0437285439169388, "grad_norm": 0.3109665513038635, "learning_rate": 0.0001460213151140698, "loss": 11.6781, "step": 49861 }, { "epoch": 1.043749476680901, "grad_norm": 0.31651240587234497, "learning_rate": 0.00014601936855378162, "loss": 11.6771, "step": 49862 }, { "epoch": 1.0437704094448632, "grad_norm": 0.3656149208545685, "learning_rate": 0.00014601742197137103, "loss": 11.6822, "step": 49863 }, { "epoch": 1.0437913422088252, "grad_norm": 0.32318028807640076, "learning_rate": 0.00014601547536683893, "loss": 11.655, "step": 49864 }, { "epoch": 1.0438122749727874, "grad_norm": 0.40752601623535156, "learning_rate": 0.00014601352874018625, "loss": 11.6664, "step": 49865 }, { "epoch": 1.0438332077367496, "grad_norm": 0.33702313899993896, "learning_rate": 0.00014601158209141398, "loss": 11.6574, "step": 49866 }, { "epoch": 1.0438541405007118, "grad_norm": 0.3828718960285187, "learning_rate": 0.000146009635420523, "loss": 11.654, "step": 49867 }, { "epoch": 1.0438750732646738, "grad_norm": 0.32682955265045166, "learning_rate": 0.00014600768872751426, "loss": 11.6598, "step": 49868 }, { "epoch": 1.043896006028636, "grad_norm": 0.26467522978782654, "learning_rate": 0.00014600574201238873, "loss": 11.6771, "step": 49869 }, { "epoch": 1.0439169387925982, "grad_norm": 0.2764032483100891, "learning_rate": 0.0001460037952751473, "loss": 11.6584, "step": 49870 }, { "epoch": 1.0439378715565604, "grad_norm": 0.2693565785884857, "learning_rate": 0.00014600184851579093, "loss": 11.6899, "step": 49871 }, { "epoch": 1.0439588043205226, "grad_norm": 0.42017462849617004, "learning_rate": 0.00014599990173432055, "loss": 11.6721, "step": 49872 }, { "epoch": 1.0439797370844845, "grad_norm": 0.28651851415634155, "learning_rate": 0.00014599795493073708, "loss": 11.6608, "step": 49873 }, { "epoch": 1.0440006698484467, "grad_norm": 0.2389783412218094, "learning_rate": 0.00014599600810504148, "loss": 11.6635, "step": 49874 }, { "epoch": 1.044021602612409, "grad_norm": 0.3487119972705841, "learning_rate": 0.00014599406125723468, "loss": 11.6617, "step": 49875 }, { "epoch": 1.0440425353763712, "grad_norm": 0.3142159581184387, "learning_rate": 0.0001459921143873176, "loss": 11.65, "step": 49876 }, { "epoch": 1.0440634681403334, "grad_norm": 0.30077889561653137, "learning_rate": 0.00014599016749529119, "loss": 11.6531, "step": 49877 }, { "epoch": 1.0440844009042953, "grad_norm": 0.3313624858856201, "learning_rate": 0.00014598822058115642, "loss": 11.6672, "step": 49878 }, { "epoch": 1.0441053336682575, "grad_norm": 0.2908031940460205, "learning_rate": 0.00014598627364491415, "loss": 11.6645, "step": 49879 }, { "epoch": 1.0441262664322197, "grad_norm": 0.2978571653366089, "learning_rate": 0.00014598432668656538, "loss": 11.6828, "step": 49880 }, { "epoch": 1.044147199196182, "grad_norm": 0.3382582366466522, "learning_rate": 0.00014598237970611102, "loss": 11.6781, "step": 49881 }, { "epoch": 1.0441681319601441, "grad_norm": 0.40280386805534363, "learning_rate": 0.000145980432703552, "loss": 11.6695, "step": 49882 }, { "epoch": 1.044189064724106, "grad_norm": 0.23792067170143127, "learning_rate": 0.00014597848567888928, "loss": 11.6617, "step": 49883 }, { "epoch": 1.0442099974880683, "grad_norm": 0.3842942416667938, "learning_rate": 0.00014597653863212374, "loss": 11.658, "step": 49884 }, { "epoch": 1.0442309302520305, "grad_norm": 0.3339185118675232, "learning_rate": 0.0001459745915632564, "loss": 11.6794, "step": 49885 }, { "epoch": 1.0442518630159927, "grad_norm": 0.5358495116233826, "learning_rate": 0.00014597264447228814, "loss": 11.6523, "step": 49886 }, { "epoch": 1.0442727957799547, "grad_norm": 0.32634827494621277, "learning_rate": 0.00014597069735921992, "loss": 11.6601, "step": 49887 }, { "epoch": 1.0442937285439169, "grad_norm": 0.37390148639678955, "learning_rate": 0.00014596875022405262, "loss": 11.6675, "step": 49888 }, { "epoch": 1.044314661307879, "grad_norm": 0.29336434602737427, "learning_rate": 0.00014596680306678728, "loss": 11.6651, "step": 49889 }, { "epoch": 1.0443355940718413, "grad_norm": 0.2318957895040512, "learning_rate": 0.00014596485588742474, "loss": 11.6808, "step": 49890 }, { "epoch": 1.0443565268358035, "grad_norm": 0.4306335151195526, "learning_rate": 0.000145962908685966, "loss": 11.6734, "step": 49891 }, { "epoch": 1.0443774595997655, "grad_norm": 0.3772672712802887, "learning_rate": 0.00014596096146241193, "loss": 11.6713, "step": 49892 }, { "epoch": 1.0443983923637277, "grad_norm": 0.3126848042011261, "learning_rate": 0.0001459590142167635, "loss": 11.6639, "step": 49893 }, { "epoch": 1.0444193251276899, "grad_norm": 0.3844982981681824, "learning_rate": 0.00014595706694902168, "loss": 11.664, "step": 49894 }, { "epoch": 1.044440257891652, "grad_norm": 0.3251676857471466, "learning_rate": 0.0001459551196591874, "loss": 11.6455, "step": 49895 }, { "epoch": 1.0444611906556143, "grad_norm": 0.30611658096313477, "learning_rate": 0.00014595317234726153, "loss": 11.6663, "step": 49896 }, { "epoch": 1.0444821234195762, "grad_norm": 0.33659857511520386, "learning_rate": 0.00014595122501324506, "loss": 11.6787, "step": 49897 }, { "epoch": 1.0445030561835384, "grad_norm": 0.40448907017707825, "learning_rate": 0.00014594927765713893, "loss": 11.6786, "step": 49898 }, { "epoch": 1.0445239889475006, "grad_norm": 0.4308508336544037, "learning_rate": 0.00014594733027894404, "loss": 11.6652, "step": 49899 }, { "epoch": 1.0445449217114628, "grad_norm": 0.3028079569339752, "learning_rate": 0.00014594538287866137, "loss": 11.6469, "step": 49900 }, { "epoch": 1.044565854475425, "grad_norm": 0.3976978659629822, "learning_rate": 0.00014594343545629182, "loss": 11.6681, "step": 49901 }, { "epoch": 1.044586787239387, "grad_norm": 0.37036728858947754, "learning_rate": 0.00014594148801183634, "loss": 11.6628, "step": 49902 }, { "epoch": 1.0446077200033492, "grad_norm": 0.29283323884010315, "learning_rate": 0.00014593954054529585, "loss": 11.6618, "step": 49903 }, { "epoch": 1.0446286527673114, "grad_norm": 0.2855639159679413, "learning_rate": 0.00014593759305667133, "loss": 11.6664, "step": 49904 }, { "epoch": 1.0446495855312736, "grad_norm": 0.3657369613647461, "learning_rate": 0.0001459356455459637, "loss": 11.6651, "step": 49905 }, { "epoch": 1.0446705182952356, "grad_norm": 0.3402023911476135, "learning_rate": 0.00014593369801317385, "loss": 11.6727, "step": 49906 }, { "epoch": 1.0446914510591978, "grad_norm": 0.3338177800178528, "learning_rate": 0.00014593175045830276, "loss": 11.6633, "step": 49907 }, { "epoch": 1.04471238382316, "grad_norm": 0.2840823829174042, "learning_rate": 0.00014592980288135137, "loss": 11.6902, "step": 49908 }, { "epoch": 1.0447333165871222, "grad_norm": 0.3998192548751831, "learning_rate": 0.0001459278552823206, "loss": 11.6445, "step": 49909 }, { "epoch": 1.0447542493510844, "grad_norm": 0.34571194648742676, "learning_rate": 0.00014592590766121136, "loss": 11.664, "step": 49910 }, { "epoch": 1.0447751821150464, "grad_norm": 0.29274529218673706, "learning_rate": 0.00014592396001802464, "loss": 11.6852, "step": 49911 }, { "epoch": 1.0447961148790086, "grad_norm": 0.33901262283325195, "learning_rate": 0.00014592201235276138, "loss": 11.6662, "step": 49912 }, { "epoch": 1.0448170476429708, "grad_norm": 0.34383630752563477, "learning_rate": 0.00014592006466542244, "loss": 11.6626, "step": 49913 }, { "epoch": 1.044837980406933, "grad_norm": 0.26045697927474976, "learning_rate": 0.00014591811695600882, "loss": 11.6682, "step": 49914 }, { "epoch": 1.0448589131708952, "grad_norm": 0.38206279277801514, "learning_rate": 0.00014591616922452144, "loss": 11.6708, "step": 49915 }, { "epoch": 1.0448798459348572, "grad_norm": 0.34722912311553955, "learning_rate": 0.00014591422147096125, "loss": 11.6721, "step": 49916 }, { "epoch": 1.0449007786988194, "grad_norm": 0.36740124225616455, "learning_rate": 0.00014591227369532917, "loss": 11.6627, "step": 49917 }, { "epoch": 1.0449217114627816, "grad_norm": 0.25094932317733765, "learning_rate": 0.00014591032589762612, "loss": 11.6676, "step": 49918 }, { "epoch": 1.0449426442267438, "grad_norm": 0.29475733637809753, "learning_rate": 0.00014590837807785308, "loss": 11.6582, "step": 49919 }, { "epoch": 1.044963576990706, "grad_norm": 0.3303917348384857, "learning_rate": 0.00014590643023601097, "loss": 11.6566, "step": 49920 }, { "epoch": 1.044984509754668, "grad_norm": 0.36634647846221924, "learning_rate": 0.0001459044823721007, "loss": 11.6514, "step": 49921 }, { "epoch": 1.0450054425186301, "grad_norm": 0.335898756980896, "learning_rate": 0.00014590253448612322, "loss": 11.6658, "step": 49922 }, { "epoch": 1.0450263752825923, "grad_norm": 0.2785457372665405, "learning_rate": 0.00014590058657807948, "loss": 11.6797, "step": 49923 }, { "epoch": 1.0450473080465545, "grad_norm": 0.32262495160102844, "learning_rate": 0.0001458986386479704, "loss": 11.6653, "step": 49924 }, { "epoch": 1.0450682408105165, "grad_norm": 0.32759159803390503, "learning_rate": 0.00014589669069579696, "loss": 11.678, "step": 49925 }, { "epoch": 1.0450891735744787, "grad_norm": 0.39268985390663147, "learning_rate": 0.00014589474272156002, "loss": 11.6779, "step": 49926 }, { "epoch": 1.045110106338441, "grad_norm": 0.2788008451461792, "learning_rate": 0.0001458927947252606, "loss": 11.6705, "step": 49927 }, { "epoch": 1.0451310391024031, "grad_norm": 0.3363970220088959, "learning_rate": 0.00014589084670689955, "loss": 11.6691, "step": 49928 }, { "epoch": 1.0451519718663653, "grad_norm": 0.32408350706100464, "learning_rate": 0.00014588889866647787, "loss": 11.6625, "step": 49929 }, { "epoch": 1.0451729046303273, "grad_norm": 0.2971736192703247, "learning_rate": 0.00014588695060399647, "loss": 11.6667, "step": 49930 }, { "epoch": 1.0451938373942895, "grad_norm": 0.34126344323158264, "learning_rate": 0.0001458850025194563, "loss": 11.6814, "step": 49931 }, { "epoch": 1.0452147701582517, "grad_norm": 0.35313114523887634, "learning_rate": 0.00014588305441285828, "loss": 11.696, "step": 49932 }, { "epoch": 1.045235702922214, "grad_norm": 0.30272310972213745, "learning_rate": 0.00014588110628420336, "loss": 11.6476, "step": 49933 }, { "epoch": 1.045256635686176, "grad_norm": 0.3397652804851532, "learning_rate": 0.00014587915813349248, "loss": 11.6518, "step": 49934 }, { "epoch": 1.045277568450138, "grad_norm": 0.27208399772644043, "learning_rate": 0.00014587720996072655, "loss": 11.6631, "step": 49935 }, { "epoch": 1.0452985012141003, "grad_norm": 0.39699628949165344, "learning_rate": 0.00014587526176590654, "loss": 11.6673, "step": 49936 }, { "epoch": 1.0453194339780625, "grad_norm": 0.32089558243751526, "learning_rate": 0.0001458733135490334, "loss": 11.6777, "step": 49937 }, { "epoch": 1.0453403667420247, "grad_norm": 0.4462476670742035, "learning_rate": 0.00014587136531010798, "loss": 11.6724, "step": 49938 }, { "epoch": 1.0453612995059867, "grad_norm": 0.3580140471458435, "learning_rate": 0.00014586941704913134, "loss": 11.6634, "step": 49939 }, { "epoch": 1.0453822322699489, "grad_norm": 0.35501769185066223, "learning_rate": 0.00014586746876610432, "loss": 11.6734, "step": 49940 }, { "epoch": 1.045403165033911, "grad_norm": 0.339089959859848, "learning_rate": 0.00014586552046102786, "loss": 11.6872, "step": 49941 }, { "epoch": 1.0454240977978733, "grad_norm": 0.331068217754364, "learning_rate": 0.00014586357213390295, "loss": 11.6767, "step": 49942 }, { "epoch": 1.0454450305618355, "grad_norm": 0.2820778787136078, "learning_rate": 0.00014586162378473053, "loss": 11.6698, "step": 49943 }, { "epoch": 1.0454659633257974, "grad_norm": 0.3138435482978821, "learning_rate": 0.0001458596754135115, "loss": 11.6699, "step": 49944 }, { "epoch": 1.0454868960897596, "grad_norm": 0.27934935688972473, "learning_rate": 0.00014585772702024676, "loss": 11.6669, "step": 49945 }, { "epoch": 1.0455078288537218, "grad_norm": 0.2892245054244995, "learning_rate": 0.0001458557786049373, "loss": 11.6775, "step": 49946 }, { "epoch": 1.045528761617684, "grad_norm": 0.2926297187805176, "learning_rate": 0.0001458538301675841, "loss": 11.6693, "step": 49947 }, { "epoch": 1.0455496943816462, "grad_norm": 0.35025641322135925, "learning_rate": 0.000145851881708188, "loss": 11.6758, "step": 49948 }, { "epoch": 1.0455706271456082, "grad_norm": 0.27744418382644653, "learning_rate": 0.00014584993322675, "loss": 11.6617, "step": 49949 }, { "epoch": 1.0455915599095704, "grad_norm": 0.401741087436676, "learning_rate": 0.00014584798472327103, "loss": 11.6516, "step": 49950 }, { "epoch": 1.0456124926735326, "grad_norm": 0.33741751313209534, "learning_rate": 0.000145846036197752, "loss": 11.6797, "step": 49951 }, { "epoch": 1.0456334254374948, "grad_norm": 0.3253941535949707, "learning_rate": 0.00014584408765019383, "loss": 11.6671, "step": 49952 }, { "epoch": 1.045654358201457, "grad_norm": 0.36684632301330566, "learning_rate": 0.00014584213908059752, "loss": 11.6563, "step": 49953 }, { "epoch": 1.045675290965419, "grad_norm": 0.26288092136383057, "learning_rate": 0.00014584019048896398, "loss": 11.669, "step": 49954 }, { "epoch": 1.0456962237293812, "grad_norm": 0.2528631091117859, "learning_rate": 0.00014583824187529412, "loss": 11.652, "step": 49955 }, { "epoch": 1.0457171564933434, "grad_norm": 0.29826128482818604, "learning_rate": 0.00014583629323958893, "loss": 11.6801, "step": 49956 }, { "epoch": 1.0457380892573056, "grad_norm": 0.38116705417633057, "learning_rate": 0.00014583434458184931, "loss": 11.6625, "step": 49957 }, { "epoch": 1.0457590220212676, "grad_norm": 0.3560752868652344, "learning_rate": 0.0001458323959020762, "loss": 11.6647, "step": 49958 }, { "epoch": 1.0457799547852298, "grad_norm": 0.4493936002254486, "learning_rate": 0.0001458304472002705, "loss": 11.6724, "step": 49959 }, { "epoch": 1.045800887549192, "grad_norm": 0.290853887796402, "learning_rate": 0.0001458284984764332, "loss": 11.667, "step": 49960 }, { "epoch": 1.0458218203131542, "grad_norm": 0.30162879824638367, "learning_rate": 0.00014582654973056526, "loss": 11.6651, "step": 49961 }, { "epoch": 1.0458427530771164, "grad_norm": 0.31836411356925964, "learning_rate": 0.00014582460096266753, "loss": 11.6746, "step": 49962 }, { "epoch": 1.0458636858410784, "grad_norm": 0.34665921330451965, "learning_rate": 0.00014582265217274104, "loss": 11.6707, "step": 49963 }, { "epoch": 1.0458846186050406, "grad_norm": 0.36511990427970886, "learning_rate": 0.00014582070336078667, "loss": 11.6833, "step": 49964 }, { "epoch": 1.0459055513690028, "grad_norm": 0.326904833316803, "learning_rate": 0.00014581875452680538, "loss": 11.6662, "step": 49965 }, { "epoch": 1.045926484132965, "grad_norm": 0.33779585361480713, "learning_rate": 0.00014581680567079806, "loss": 11.6908, "step": 49966 }, { "epoch": 1.0459474168969272, "grad_norm": 0.6104700565338135, "learning_rate": 0.0001458148567927657, "loss": 11.6689, "step": 49967 }, { "epoch": 1.0459683496608891, "grad_norm": 0.2953360080718994, "learning_rate": 0.00014581290789270925, "loss": 11.6642, "step": 49968 }, { "epoch": 1.0459892824248513, "grad_norm": 0.7457946538925171, "learning_rate": 0.00014581095897062956, "loss": 11.5661, "step": 49969 }, { "epoch": 1.0460102151888135, "grad_norm": 0.2767218053340912, "learning_rate": 0.0001458090100265277, "loss": 11.6597, "step": 49970 }, { "epoch": 1.0460311479527757, "grad_norm": 0.3751099407672882, "learning_rate": 0.00014580706106040445, "loss": 11.6761, "step": 49971 }, { "epoch": 1.046052080716738, "grad_norm": 0.28403162956237793, "learning_rate": 0.00014580511207226089, "loss": 11.6603, "step": 49972 }, { "epoch": 1.0460730134807, "grad_norm": 0.40335509181022644, "learning_rate": 0.00014580316306209788, "loss": 11.6714, "step": 49973 }, { "epoch": 1.0460939462446621, "grad_norm": 0.25979986786842346, "learning_rate": 0.00014580121402991633, "loss": 11.6383, "step": 49974 }, { "epoch": 1.0461148790086243, "grad_norm": 0.3055492341518402, "learning_rate": 0.00014579926497571726, "loss": 11.676, "step": 49975 }, { "epoch": 1.0461358117725865, "grad_norm": 0.3117656111717224, "learning_rate": 0.00014579731589950154, "loss": 11.6674, "step": 49976 }, { "epoch": 1.0461567445365485, "grad_norm": 0.3379090130329132, "learning_rate": 0.00014579536680127018, "loss": 11.6622, "step": 49977 }, { "epoch": 1.0461776773005107, "grad_norm": 0.36563774943351746, "learning_rate": 0.000145793417681024, "loss": 11.6754, "step": 49978 }, { "epoch": 1.046198610064473, "grad_norm": 0.4412229061126709, "learning_rate": 0.00014579146853876406, "loss": 11.6747, "step": 49979 }, { "epoch": 1.046219542828435, "grad_norm": 0.3768965005874634, "learning_rate": 0.00014578951937449123, "loss": 11.6554, "step": 49980 }, { "epoch": 1.0462404755923973, "grad_norm": 0.32471686601638794, "learning_rate": 0.00014578757018820647, "loss": 11.661, "step": 49981 }, { "epoch": 1.0462614083563593, "grad_norm": 0.2960212826728821, "learning_rate": 0.0001457856209799107, "loss": 11.6712, "step": 49982 }, { "epoch": 1.0462823411203215, "grad_norm": 0.2757786512374878, "learning_rate": 0.00014578367174960483, "loss": 11.6762, "step": 49983 }, { "epoch": 1.0463032738842837, "grad_norm": 0.2843799889087677, "learning_rate": 0.00014578172249728988, "loss": 11.6659, "step": 49984 }, { "epoch": 1.0463242066482459, "grad_norm": 0.312298446893692, "learning_rate": 0.0001457797732229667, "loss": 11.6695, "step": 49985 }, { "epoch": 1.046345139412208, "grad_norm": 0.3406357765197754, "learning_rate": 0.0001457778239266363, "loss": 11.6607, "step": 49986 }, { "epoch": 1.04636607217617, "grad_norm": 0.31782999634742737, "learning_rate": 0.00014577587460829955, "loss": 11.6616, "step": 49987 }, { "epoch": 1.0463870049401323, "grad_norm": 0.36801785230636597, "learning_rate": 0.00014577392526795744, "loss": 11.6706, "step": 49988 }, { "epoch": 1.0464079377040945, "grad_norm": 0.31378400325775146, "learning_rate": 0.0001457719759056109, "loss": 11.6703, "step": 49989 }, { "epoch": 1.0464288704680567, "grad_norm": 0.3822784125804901, "learning_rate": 0.0001457700265212608, "loss": 11.66, "step": 49990 }, { "epoch": 1.0464498032320189, "grad_norm": 0.32531431317329407, "learning_rate": 0.0001457680771149082, "loss": 11.6572, "step": 49991 }, { "epoch": 1.0464707359959808, "grad_norm": 0.3299713134765625, "learning_rate": 0.00014576612768655393, "loss": 11.655, "step": 49992 }, { "epoch": 1.046491668759943, "grad_norm": 0.38023823499679565, "learning_rate": 0.000145764178236199, "loss": 11.6507, "step": 49993 }, { "epoch": 1.0465126015239052, "grad_norm": 0.3706871271133423, "learning_rate": 0.00014576222876384426, "loss": 11.684, "step": 49994 }, { "epoch": 1.0465335342878674, "grad_norm": 0.3171660006046295, "learning_rate": 0.0001457602792694907, "loss": 11.6664, "step": 49995 }, { "epoch": 1.0465544670518294, "grad_norm": 0.31681811809539795, "learning_rate": 0.0001457583297531393, "loss": 11.6497, "step": 49996 }, { "epoch": 1.0465753998157916, "grad_norm": 0.3090757727622986, "learning_rate": 0.00014575638021479094, "loss": 11.6546, "step": 49997 }, { "epoch": 1.0465963325797538, "grad_norm": 0.3161593973636627, "learning_rate": 0.00014575443065444657, "loss": 11.6769, "step": 49998 }, { "epoch": 1.046617265343716, "grad_norm": 0.3346511721611023, "learning_rate": 0.00014575248107210711, "loss": 11.6727, "step": 49999 }, { "epoch": 1.0466381981076782, "grad_norm": 0.2743215560913086, "learning_rate": 0.00014575053146777354, "loss": 11.6655, "step": 50000 }, { "epoch": 1.0466381981076782, "eval_loss": 11.668407440185547, "eval_runtime": 34.3016, "eval_samples_per_second": 28.016, "eval_steps_per_second": 7.026, "step": 50000 }, { "epoch": 1.0466591308716402, "grad_norm": 0.2427031695842743, "learning_rate": 0.00014574858184144676, "loss": 11.6738, "step": 50001 }, { "epoch": 1.0466800636356024, "grad_norm": 0.32475417852401733, "learning_rate": 0.00014574663219312772, "loss": 11.6842, "step": 50002 }, { "epoch": 1.0467009963995646, "grad_norm": 0.2542896866798401, "learning_rate": 0.00014574468252281738, "loss": 11.6652, "step": 50003 }, { "epoch": 1.0467219291635268, "grad_norm": 0.31793251633644104, "learning_rate": 0.0001457427328305166, "loss": 11.6761, "step": 50004 }, { "epoch": 1.046742861927489, "grad_norm": 0.29177266359329224, "learning_rate": 0.00014574078311622643, "loss": 11.6686, "step": 50005 }, { "epoch": 1.046763794691451, "grad_norm": 0.41000187397003174, "learning_rate": 0.00014573883337994773, "loss": 11.6865, "step": 50006 }, { "epoch": 1.0467847274554132, "grad_norm": 0.3712095022201538, "learning_rate": 0.00014573688362168143, "loss": 11.6583, "step": 50007 }, { "epoch": 1.0468056602193754, "grad_norm": 0.33563894033432007, "learning_rate": 0.0001457349338414285, "loss": 11.664, "step": 50008 }, { "epoch": 1.0468265929833376, "grad_norm": 0.2754948139190674, "learning_rate": 0.0001457329840391899, "loss": 11.6691, "step": 50009 }, { "epoch": 1.0468475257472998, "grad_norm": 0.2868203818798065, "learning_rate": 0.00014573103421496652, "loss": 11.6614, "step": 50010 }, { "epoch": 1.0468684585112618, "grad_norm": 0.42615458369255066, "learning_rate": 0.0001457290843687593, "loss": 11.6783, "step": 50011 }, { "epoch": 1.046889391275224, "grad_norm": 0.2783316969871521, "learning_rate": 0.00014572713450056922, "loss": 11.6545, "step": 50012 }, { "epoch": 1.0469103240391862, "grad_norm": 0.3134807050228119, "learning_rate": 0.0001457251846103972, "loss": 11.6535, "step": 50013 }, { "epoch": 1.0469312568031484, "grad_norm": 0.4149833023548126, "learning_rate": 0.00014572323469824413, "loss": 11.6524, "step": 50014 }, { "epoch": 1.0469521895671103, "grad_norm": 0.3273007571697235, "learning_rate": 0.000145721284764111, "loss": 11.6618, "step": 50015 }, { "epoch": 1.0469731223310725, "grad_norm": 0.25816991925239563, "learning_rate": 0.00014571933480799873, "loss": 11.6673, "step": 50016 }, { "epoch": 1.0469940550950347, "grad_norm": 0.2514196038246155, "learning_rate": 0.0001457173848299083, "loss": 11.6474, "step": 50017 }, { "epoch": 1.047014987858997, "grad_norm": 0.3356395363807678, "learning_rate": 0.00014571543482984055, "loss": 11.6685, "step": 50018 }, { "epoch": 1.0470359206229591, "grad_norm": 0.31508299708366394, "learning_rate": 0.00014571348480779648, "loss": 11.6547, "step": 50019 }, { "epoch": 1.0470568533869211, "grad_norm": 0.3511089086532593, "learning_rate": 0.00014571153476377707, "loss": 11.6514, "step": 50020 }, { "epoch": 1.0470777861508833, "grad_norm": 0.28240087628364563, "learning_rate": 0.00014570958469778316, "loss": 11.6818, "step": 50021 }, { "epoch": 1.0470987189148455, "grad_norm": 0.3417038917541504, "learning_rate": 0.00014570763460981575, "loss": 11.6711, "step": 50022 }, { "epoch": 1.0471196516788077, "grad_norm": 0.3771498501300812, "learning_rate": 0.00014570568449987577, "loss": 11.6687, "step": 50023 }, { "epoch": 1.04714058444277, "grad_norm": 0.27078476548194885, "learning_rate": 0.00014570373436796413, "loss": 11.657, "step": 50024 }, { "epoch": 1.047161517206732, "grad_norm": 1.6346397399902344, "learning_rate": 0.00014570178421408184, "loss": 11.5833, "step": 50025 }, { "epoch": 1.047182449970694, "grad_norm": 0.3841087520122528, "learning_rate": 0.00014569983403822973, "loss": 11.6913, "step": 50026 }, { "epoch": 1.0472033827346563, "grad_norm": 0.2972654402256012, "learning_rate": 0.00014569788384040884, "loss": 11.6701, "step": 50027 }, { "epoch": 1.0472243154986185, "grad_norm": 0.38648495078086853, "learning_rate": 0.00014569593362062002, "loss": 11.6611, "step": 50028 }, { "epoch": 1.0472452482625807, "grad_norm": 0.2965609133243561, "learning_rate": 0.00014569398337886428, "loss": 11.6853, "step": 50029 }, { "epoch": 1.0472661810265427, "grad_norm": 0.32571807503700256, "learning_rate": 0.0001456920331151425, "loss": 11.6537, "step": 50030 }, { "epoch": 1.0472871137905049, "grad_norm": 0.41582056879997253, "learning_rate": 0.00014569008282945567, "loss": 11.6769, "step": 50031 }, { "epoch": 1.047308046554467, "grad_norm": 0.31098005175590515, "learning_rate": 0.0001456881325218047, "loss": 11.6691, "step": 50032 }, { "epoch": 1.0473289793184293, "grad_norm": 0.31642991304397583, "learning_rate": 0.0001456861821921905, "loss": 11.6685, "step": 50033 }, { "epoch": 1.0473499120823913, "grad_norm": 0.33008691668510437, "learning_rate": 0.00014568423184061405, "loss": 11.6569, "step": 50034 }, { "epoch": 1.0473708448463535, "grad_norm": 0.3320562541484833, "learning_rate": 0.00014568228146707627, "loss": 11.6785, "step": 50035 }, { "epoch": 1.0473917776103157, "grad_norm": 0.317815363407135, "learning_rate": 0.0001456803310715781, "loss": 11.6649, "step": 50036 }, { "epoch": 1.0474127103742779, "grad_norm": 0.33596789836883545, "learning_rate": 0.00014567838065412053, "loss": 11.6706, "step": 50037 }, { "epoch": 1.04743364313824, "grad_norm": 0.3515806198120117, "learning_rate": 0.0001456764302147044, "loss": 11.6692, "step": 50038 }, { "epoch": 1.047454575902202, "grad_norm": 0.28841453790664673, "learning_rate": 0.00014567447975333072, "loss": 11.6639, "step": 50039 }, { "epoch": 1.0474755086661642, "grad_norm": 0.30943718552589417, "learning_rate": 0.00014567252927000037, "loss": 11.689, "step": 50040 }, { "epoch": 1.0474964414301264, "grad_norm": 0.3783358633518219, "learning_rate": 0.00014567057876471433, "loss": 11.6832, "step": 50041 }, { "epoch": 1.0475173741940886, "grad_norm": 0.2620985209941864, "learning_rate": 0.00014566862823747355, "loss": 11.6746, "step": 50042 }, { "epoch": 1.0475383069580508, "grad_norm": 0.40748023986816406, "learning_rate": 0.00014566667768827892, "loss": 11.6583, "step": 50043 }, { "epoch": 1.0475592397220128, "grad_norm": 0.3386993110179901, "learning_rate": 0.00014566472711713142, "loss": 11.6786, "step": 50044 }, { "epoch": 1.047580172485975, "grad_norm": 0.26509225368499756, "learning_rate": 0.00014566277652403194, "loss": 11.6633, "step": 50045 }, { "epoch": 1.0476011052499372, "grad_norm": 0.36437344551086426, "learning_rate": 0.00014566082590898146, "loss": 11.6582, "step": 50046 }, { "epoch": 1.0476220380138994, "grad_norm": 0.29698407649993896, "learning_rate": 0.00014565887527198093, "loss": 11.6536, "step": 50047 }, { "epoch": 1.0476429707778616, "grad_norm": 0.27401018142700195, "learning_rate": 0.00014565692461303124, "loss": 11.6691, "step": 50048 }, { "epoch": 1.0476639035418236, "grad_norm": 0.4075009226799011, "learning_rate": 0.0001456549739321334, "loss": 11.6624, "step": 50049 }, { "epoch": 1.0476848363057858, "grad_norm": 3.1458935737609863, "learning_rate": 0.0001456530232292882, "loss": 11.6384, "step": 50050 }, { "epoch": 1.047705769069748, "grad_norm": 0.32976675033569336, "learning_rate": 0.00014565107250449674, "loss": 11.6713, "step": 50051 }, { "epoch": 1.0477267018337102, "grad_norm": 0.30310118198394775, "learning_rate": 0.0001456491217577599, "loss": 11.6803, "step": 50052 }, { "epoch": 1.0477476345976722, "grad_norm": 0.26835179328918457, "learning_rate": 0.0001456471709890786, "loss": 11.6445, "step": 50053 }, { "epoch": 1.0477685673616344, "grad_norm": 0.28240126371383667, "learning_rate": 0.0001456452201984538, "loss": 11.6652, "step": 50054 }, { "epoch": 1.0477895001255966, "grad_norm": 0.3248216211795807, "learning_rate": 0.00014564326938588642, "loss": 11.6768, "step": 50055 }, { "epoch": 1.0478104328895588, "grad_norm": 0.29964330792427063, "learning_rate": 0.00014564131855137743, "loss": 11.6827, "step": 50056 }, { "epoch": 1.047831365653521, "grad_norm": 0.30621060729026794, "learning_rate": 0.0001456393676949277, "loss": 11.6502, "step": 50057 }, { "epoch": 1.047852298417483, "grad_norm": 0.37744373083114624, "learning_rate": 0.00014563741681653824, "loss": 11.6622, "step": 50058 }, { "epoch": 1.0478732311814452, "grad_norm": 0.3268764913082123, "learning_rate": 0.00014563546591620996, "loss": 11.666, "step": 50059 }, { "epoch": 1.0478941639454074, "grad_norm": 0.2928374111652374, "learning_rate": 0.00014563351499394378, "loss": 11.6757, "step": 50060 }, { "epoch": 1.0479150967093696, "grad_norm": 0.3281674385070801, "learning_rate": 0.00014563156404974066, "loss": 11.6514, "step": 50061 }, { "epoch": 1.0479360294733318, "grad_norm": 0.3461436927318573, "learning_rate": 0.00014562961308360156, "loss": 11.6816, "step": 50062 }, { "epoch": 1.0479569622372937, "grad_norm": 0.2984010577201843, "learning_rate": 0.00014562766209552734, "loss": 11.6734, "step": 50063 }, { "epoch": 1.047977895001256, "grad_norm": 0.3185100555419922, "learning_rate": 0.00014562571108551903, "loss": 11.6721, "step": 50064 }, { "epoch": 1.0479988277652181, "grad_norm": 0.3395312428474426, "learning_rate": 0.0001456237600535775, "loss": 11.6773, "step": 50065 }, { "epoch": 1.0480197605291803, "grad_norm": 0.3073470890522003, "learning_rate": 0.00014562180899970373, "loss": 11.6809, "step": 50066 }, { "epoch": 1.0480406932931425, "grad_norm": 0.36018064618110657, "learning_rate": 0.00014561985792389863, "loss": 11.6829, "step": 50067 }, { "epoch": 1.0480616260571045, "grad_norm": 0.38016003370285034, "learning_rate": 0.00014561790682616316, "loss": 11.6711, "step": 50068 }, { "epoch": 1.0480825588210667, "grad_norm": 0.3031463027000427, "learning_rate": 0.00014561595570649824, "loss": 11.6484, "step": 50069 }, { "epoch": 1.048103491585029, "grad_norm": 0.3577607572078705, "learning_rate": 0.00014561400456490482, "loss": 11.6612, "step": 50070 }, { "epoch": 1.0481244243489911, "grad_norm": 0.3674886226654053, "learning_rate": 0.00014561205340138383, "loss": 11.6453, "step": 50071 }, { "epoch": 1.048145357112953, "grad_norm": 0.4151547849178314, "learning_rate": 0.00014561010221593622, "loss": 11.6722, "step": 50072 }, { "epoch": 1.0481662898769153, "grad_norm": 0.4601759910583496, "learning_rate": 0.00014560815100856292, "loss": 11.6788, "step": 50073 }, { "epoch": 1.0481872226408775, "grad_norm": 0.26408079266548157, "learning_rate": 0.00014560619977926486, "loss": 11.6743, "step": 50074 }, { "epoch": 1.0482081554048397, "grad_norm": 0.2786116600036621, "learning_rate": 0.000145604248528043, "loss": 11.6816, "step": 50075 }, { "epoch": 1.048229088168802, "grad_norm": 0.32316991686820984, "learning_rate": 0.00014560229725489826, "loss": 11.6759, "step": 50076 }, { "epoch": 1.0482500209327639, "grad_norm": 0.3396928310394287, "learning_rate": 0.00014560034595983157, "loss": 11.6696, "step": 50077 }, { "epoch": 1.048270953696726, "grad_norm": 0.3084683120250702, "learning_rate": 0.0001455983946428439, "loss": 11.674, "step": 50078 }, { "epoch": 1.0482918864606883, "grad_norm": 0.29723650217056274, "learning_rate": 0.00014559644330393613, "loss": 11.6724, "step": 50079 }, { "epoch": 1.0483128192246505, "grad_norm": 0.31057843565940857, "learning_rate": 0.00014559449194310924, "loss": 11.6829, "step": 50080 }, { "epoch": 1.0483337519886127, "grad_norm": 0.3119651973247528, "learning_rate": 0.0001455925405603642, "loss": 11.6438, "step": 50081 }, { "epoch": 1.0483546847525747, "grad_norm": 0.36629483103752136, "learning_rate": 0.0001455905891557019, "loss": 11.682, "step": 50082 }, { "epoch": 1.0483756175165369, "grad_norm": 0.3226460814476013, "learning_rate": 0.00014558863772912327, "loss": 11.6671, "step": 50083 }, { "epoch": 1.048396550280499, "grad_norm": 0.3383127450942993, "learning_rate": 0.00014558668628062932, "loss": 11.654, "step": 50084 }, { "epoch": 1.0484174830444613, "grad_norm": 0.36850792169570923, "learning_rate": 0.00014558473481022087, "loss": 11.6836, "step": 50085 }, { "epoch": 1.0484384158084235, "grad_norm": 0.3422658145427704, "learning_rate": 0.000145582783317899, "loss": 11.6765, "step": 50086 }, { "epoch": 1.0484593485723854, "grad_norm": 0.34092283248901367, "learning_rate": 0.0001455808318036645, "loss": 11.6942, "step": 50087 }, { "epoch": 1.0484802813363476, "grad_norm": 0.37053388357162476, "learning_rate": 0.00014557888026751843, "loss": 11.6835, "step": 50088 }, { "epoch": 1.0485012141003098, "grad_norm": 0.24839918315410614, "learning_rate": 0.00014557692870946162, "loss": 11.661, "step": 50089 }, { "epoch": 1.048522146864272, "grad_norm": 0.40708795189857483, "learning_rate": 0.00014557497712949511, "loss": 11.6489, "step": 50090 }, { "epoch": 1.048543079628234, "grad_norm": 0.3303287625312805, "learning_rate": 0.0001455730255276198, "loss": 11.6924, "step": 50091 }, { "epoch": 1.0485640123921962, "grad_norm": 0.3058966100215912, "learning_rate": 0.0001455710739038366, "loss": 11.6503, "step": 50092 }, { "epoch": 1.0485849451561584, "grad_norm": 0.3076607584953308, "learning_rate": 0.0001455691222581465, "loss": 11.6545, "step": 50093 }, { "epoch": 1.0486058779201206, "grad_norm": 0.3436712920665741, "learning_rate": 0.00014556717059055038, "loss": 11.6663, "step": 50094 }, { "epoch": 1.0486268106840828, "grad_norm": 0.2676837146282196, "learning_rate": 0.0001455652189010492, "loss": 11.6587, "step": 50095 }, { "epoch": 1.0486477434480448, "grad_norm": 0.3137657940387726, "learning_rate": 0.00014556326718964395, "loss": 11.6763, "step": 50096 }, { "epoch": 1.048668676212007, "grad_norm": 0.34366726875305176, "learning_rate": 0.0001455613154563355, "loss": 11.6708, "step": 50097 }, { "epoch": 1.0486896089759692, "grad_norm": 0.29230543971061707, "learning_rate": 0.00014555936370112482, "loss": 11.6598, "step": 50098 }, { "epoch": 1.0487105417399314, "grad_norm": 0.2950240671634674, "learning_rate": 0.00014555741192401285, "loss": 11.6641, "step": 50099 }, { "epoch": 1.0487314745038936, "grad_norm": 0.3263448476791382, "learning_rate": 0.00014555546012500048, "loss": 11.6577, "step": 50100 }, { "epoch": 1.0487524072678556, "grad_norm": 0.6160165071487427, "learning_rate": 0.00014555350830408874, "loss": 11.6616, "step": 50101 }, { "epoch": 1.0487733400318178, "grad_norm": 0.23333624005317688, "learning_rate": 0.00014555155646127848, "loss": 11.6666, "step": 50102 }, { "epoch": 1.04879427279578, "grad_norm": 0.3753035068511963, "learning_rate": 0.00014554960459657067, "loss": 11.6503, "step": 50103 }, { "epoch": 1.0488152055597422, "grad_norm": 0.29000407457351685, "learning_rate": 0.00014554765270996626, "loss": 11.6509, "step": 50104 }, { "epoch": 1.0488361383237041, "grad_norm": 0.32128116488456726, "learning_rate": 0.0001455457008014662, "loss": 11.6698, "step": 50105 }, { "epoch": 1.0488570710876663, "grad_norm": 0.36515626311302185, "learning_rate": 0.0001455437488710714, "loss": 11.6656, "step": 50106 }, { "epoch": 1.0488780038516285, "grad_norm": 0.2997801601886749, "learning_rate": 0.0001455417969187828, "loss": 11.6588, "step": 50107 }, { "epoch": 1.0488989366155907, "grad_norm": 0.29677584767341614, "learning_rate": 0.00014553984494460132, "loss": 11.6626, "step": 50108 }, { "epoch": 1.048919869379553, "grad_norm": 0.3384518325328827, "learning_rate": 0.00014553789294852794, "loss": 11.6564, "step": 50109 }, { "epoch": 1.048940802143515, "grad_norm": 0.38029539585113525, "learning_rate": 0.00014553594093056363, "loss": 11.6538, "step": 50110 }, { "epoch": 1.0489617349074771, "grad_norm": 0.682772159576416, "learning_rate": 0.00014553398889070925, "loss": 11.5881, "step": 50111 }, { "epoch": 1.0489826676714393, "grad_norm": 0.271990567445755, "learning_rate": 0.00014553203682896574, "loss": 11.6759, "step": 50112 }, { "epoch": 1.0490036004354015, "grad_norm": 0.3089241087436676, "learning_rate": 0.0001455300847453341, "loss": 11.6666, "step": 50113 }, { "epoch": 1.0490245331993637, "grad_norm": 0.28560203313827515, "learning_rate": 0.00014552813263981522, "loss": 11.6674, "step": 50114 }, { "epoch": 1.0490454659633257, "grad_norm": 0.27727457880973816, "learning_rate": 0.00014552618051241008, "loss": 11.65, "step": 50115 }, { "epoch": 1.049066398727288, "grad_norm": 0.337424635887146, "learning_rate": 0.00014552422836311955, "loss": 11.6657, "step": 50116 }, { "epoch": 1.04908733149125, "grad_norm": 0.3398994207382202, "learning_rate": 0.00014552227619194462, "loss": 11.663, "step": 50117 }, { "epoch": 1.0491082642552123, "grad_norm": 0.2897290289402008, "learning_rate": 0.00014552032399888624, "loss": 11.6567, "step": 50118 }, { "epoch": 1.0491291970191745, "grad_norm": 0.39931753277778625, "learning_rate": 0.00014551837178394532, "loss": 11.6039, "step": 50119 }, { "epoch": 1.0491501297831365, "grad_norm": 0.27348053455352783, "learning_rate": 0.0001455164195471228, "loss": 11.6642, "step": 50120 }, { "epoch": 1.0491710625470987, "grad_norm": 0.37375637888908386, "learning_rate": 0.00014551446728841965, "loss": 11.6876, "step": 50121 }, { "epoch": 1.0491919953110609, "grad_norm": 0.35851889848709106, "learning_rate": 0.00014551251500783676, "loss": 11.6677, "step": 50122 }, { "epoch": 1.049212928075023, "grad_norm": 0.3740832209587097, "learning_rate": 0.00014551056270537511, "loss": 11.6653, "step": 50123 }, { "epoch": 1.049233860838985, "grad_norm": 0.28604063391685486, "learning_rate": 0.0001455086103810356, "loss": 11.6608, "step": 50124 }, { "epoch": 1.0492547936029473, "grad_norm": 0.30781885981559753, "learning_rate": 0.0001455066580348192, "loss": 11.6644, "step": 50125 }, { "epoch": 1.0492757263669095, "grad_norm": 0.28448620438575745, "learning_rate": 0.00014550470566672682, "loss": 11.6604, "step": 50126 }, { "epoch": 1.0492966591308717, "grad_norm": 0.3754928708076477, "learning_rate": 0.00014550275327675943, "loss": 11.6788, "step": 50127 }, { "epoch": 1.0493175918948339, "grad_norm": 0.2927374839782715, "learning_rate": 0.00014550080086491796, "loss": 11.6781, "step": 50128 }, { "epoch": 1.0493385246587958, "grad_norm": 0.3322397768497467, "learning_rate": 0.00014549884843120334, "loss": 11.6698, "step": 50129 }, { "epoch": 1.049359457422758, "grad_norm": 0.3153533637523651, "learning_rate": 0.0001454968959756165, "loss": 11.6771, "step": 50130 }, { "epoch": 1.0493803901867202, "grad_norm": 0.2725796401500702, "learning_rate": 0.00014549494349815843, "loss": 11.6561, "step": 50131 }, { "epoch": 1.0494013229506824, "grad_norm": 0.5301766991615295, "learning_rate": 0.00014549299099882998, "loss": 11.6855, "step": 50132 }, { "epoch": 1.0494222557146446, "grad_norm": 0.2751353681087494, "learning_rate": 0.00014549103847763214, "loss": 11.675, "step": 50133 }, { "epoch": 1.0494431884786066, "grad_norm": 0.3396689295768738, "learning_rate": 0.00014548908593456586, "loss": 11.6706, "step": 50134 }, { "epoch": 1.0494641212425688, "grad_norm": 0.284426748752594, "learning_rate": 0.0001454871333696321, "loss": 11.676, "step": 50135 }, { "epoch": 1.049485054006531, "grad_norm": 0.2838591933250427, "learning_rate": 0.00014548518078283173, "loss": 11.6374, "step": 50136 }, { "epoch": 1.0495059867704932, "grad_norm": 0.27784594893455505, "learning_rate": 0.00014548322817416572, "loss": 11.6486, "step": 50137 }, { "epoch": 1.0495269195344554, "grad_norm": 0.31342941522598267, "learning_rate": 0.00014548127554363499, "loss": 11.652, "step": 50138 }, { "epoch": 1.0495478522984174, "grad_norm": 0.27812114357948303, "learning_rate": 0.00014547932289124053, "loss": 11.6612, "step": 50139 }, { "epoch": 1.0495687850623796, "grad_norm": 0.2879522144794464, "learning_rate": 0.00014547737021698327, "loss": 11.667, "step": 50140 }, { "epoch": 1.0495897178263418, "grad_norm": 0.35456857085227966, "learning_rate": 0.0001454754175208641, "loss": 11.6567, "step": 50141 }, { "epoch": 1.049610650590304, "grad_norm": 0.34220319986343384, "learning_rate": 0.00014547346480288397, "loss": 11.6677, "step": 50142 }, { "epoch": 1.049631583354266, "grad_norm": 0.28928041458129883, "learning_rate": 0.00014547151206304384, "loss": 11.6732, "step": 50143 }, { "epoch": 1.0496525161182282, "grad_norm": 0.4142468273639679, "learning_rate": 0.00014546955930134464, "loss": 11.6766, "step": 50144 }, { "epoch": 1.0496734488821904, "grad_norm": 0.5388379693031311, "learning_rate": 0.00014546760651778734, "loss": 11.671, "step": 50145 }, { "epoch": 1.0496943816461526, "grad_norm": 0.3141493499279022, "learning_rate": 0.0001454656537123728, "loss": 11.6624, "step": 50146 }, { "epoch": 1.0497153144101148, "grad_norm": 0.41917237639427185, "learning_rate": 0.00014546370088510207, "loss": 11.676, "step": 50147 }, { "epoch": 1.0497362471740768, "grad_norm": 0.3750608265399933, "learning_rate": 0.000145461748035976, "loss": 11.6575, "step": 50148 }, { "epoch": 1.049757179938039, "grad_norm": 0.3013133704662323, "learning_rate": 0.00014545979516499555, "loss": 11.6662, "step": 50149 }, { "epoch": 1.0497781127020012, "grad_norm": 0.33711645007133484, "learning_rate": 0.00014545784227216167, "loss": 11.6643, "step": 50150 }, { "epoch": 1.0497990454659634, "grad_norm": 0.36792227625846863, "learning_rate": 0.0001454558893574753, "loss": 11.673, "step": 50151 }, { "epoch": 1.0498199782299256, "grad_norm": 0.29591280221939087, "learning_rate": 0.00014545393642093738, "loss": 11.6629, "step": 50152 }, { "epoch": 1.0498409109938875, "grad_norm": 0.28470802307128906, "learning_rate": 0.0001454519834625488, "loss": 11.6564, "step": 50153 }, { "epoch": 1.0498618437578497, "grad_norm": 0.3749038577079773, "learning_rate": 0.00014545003048231062, "loss": 11.6776, "step": 50154 }, { "epoch": 1.049882776521812, "grad_norm": 0.3295123279094696, "learning_rate": 0.00014544807748022363, "loss": 11.6745, "step": 50155 }, { "epoch": 1.0499037092857741, "grad_norm": 0.30323296785354614, "learning_rate": 0.00014544612445628884, "loss": 11.6658, "step": 50156 }, { "epoch": 1.0499246420497363, "grad_norm": 0.2892240583896637, "learning_rate": 0.00014544417141050723, "loss": 11.6701, "step": 50157 }, { "epoch": 1.0499455748136983, "grad_norm": 0.30245235562324524, "learning_rate": 0.0001454422183428797, "loss": 11.6539, "step": 50158 }, { "epoch": 1.0499665075776605, "grad_norm": 0.37936127185821533, "learning_rate": 0.00014544026525340712, "loss": 11.6622, "step": 50159 }, { "epoch": 1.0499874403416227, "grad_norm": 0.2930426299571991, "learning_rate": 0.00014543831214209054, "loss": 11.6898, "step": 50160 }, { "epoch": 1.050008373105585, "grad_norm": 0.4135455787181854, "learning_rate": 0.00014543635900893085, "loss": 11.6772, "step": 50161 }, { "epoch": 1.050029305869547, "grad_norm": 0.4115520119667053, "learning_rate": 0.00014543440585392897, "loss": 11.6514, "step": 50162 }, { "epoch": 1.050050238633509, "grad_norm": 0.3304366171360016, "learning_rate": 0.00014543245267708587, "loss": 11.6866, "step": 50163 }, { "epoch": 1.0500711713974713, "grad_norm": 0.306210458278656, "learning_rate": 0.0001454304994784025, "loss": 11.6714, "step": 50164 }, { "epoch": 1.0500921041614335, "grad_norm": 0.310185045003891, "learning_rate": 0.00014542854625787977, "loss": 11.653, "step": 50165 }, { "epoch": 1.0501130369253957, "grad_norm": 0.30106955766677856, "learning_rate": 0.00014542659301551862, "loss": 11.6796, "step": 50166 }, { "epoch": 1.0501339696893577, "grad_norm": 0.31036755442619324, "learning_rate": 0.00014542463975132002, "loss": 11.6838, "step": 50167 }, { "epoch": 1.0501549024533199, "grad_norm": 0.26559388637542725, "learning_rate": 0.00014542268646528482, "loss": 11.6542, "step": 50168 }, { "epoch": 1.050175835217282, "grad_norm": 0.39262479543685913, "learning_rate": 0.0001454207331574141, "loss": 11.6848, "step": 50169 }, { "epoch": 1.0501967679812443, "grad_norm": 0.28282687067985535, "learning_rate": 0.0001454187798277087, "loss": 11.6575, "step": 50170 }, { "epoch": 1.0502177007452065, "grad_norm": 0.37368783354759216, "learning_rate": 0.00014541682647616953, "loss": 11.6683, "step": 50171 }, { "epoch": 1.0502386335091685, "grad_norm": 0.28927600383758545, "learning_rate": 0.00014541487310279765, "loss": 11.6484, "step": 50172 }, { "epoch": 1.0502595662731307, "grad_norm": 0.29480743408203125, "learning_rate": 0.0001454129197075939, "loss": 11.6933, "step": 50173 }, { "epoch": 1.0502804990370929, "grad_norm": 0.42107951641082764, "learning_rate": 0.00014541096629055926, "loss": 11.6693, "step": 50174 }, { "epoch": 1.050301431801055, "grad_norm": 0.3872849941253662, "learning_rate": 0.00014540901285169466, "loss": 11.6789, "step": 50175 }, { "epoch": 1.0503223645650173, "grad_norm": 0.34259897470474243, "learning_rate": 0.00014540705939100103, "loss": 11.6542, "step": 50176 }, { "epoch": 1.0503432973289792, "grad_norm": 0.3480951189994812, "learning_rate": 0.00014540510590847931, "loss": 11.6867, "step": 50177 }, { "epoch": 1.0503642300929414, "grad_norm": 0.32671499252319336, "learning_rate": 0.00014540315240413044, "loss": 11.6794, "step": 50178 }, { "epoch": 1.0503851628569036, "grad_norm": 0.30775830149650574, "learning_rate": 0.0001454011988779554, "loss": 11.6703, "step": 50179 }, { "epoch": 1.0504060956208658, "grad_norm": 0.28191888332366943, "learning_rate": 0.00014539924532995505, "loss": 11.6647, "step": 50180 }, { "epoch": 1.0504270283848278, "grad_norm": 0.30678510665893555, "learning_rate": 0.00014539729176013042, "loss": 11.6604, "step": 50181 }, { "epoch": 1.05044796114879, "grad_norm": 0.29857999086380005, "learning_rate": 0.00014539533816848235, "loss": 11.6784, "step": 50182 }, { "epoch": 1.0504688939127522, "grad_norm": 0.4392072260379791, "learning_rate": 0.00014539338455501187, "loss": 11.656, "step": 50183 }, { "epoch": 1.0504898266767144, "grad_norm": 0.2928309738636017, "learning_rate": 0.00014539143091971986, "loss": 11.6731, "step": 50184 }, { "epoch": 1.0505107594406766, "grad_norm": 0.24260881543159485, "learning_rate": 0.0001453894772626073, "loss": 11.6631, "step": 50185 }, { "epoch": 1.0505316922046386, "grad_norm": 0.31338420510292053, "learning_rate": 0.0001453875235836751, "loss": 11.6707, "step": 50186 }, { "epoch": 1.0505526249686008, "grad_norm": 0.3272375166416168, "learning_rate": 0.0001453855698829242, "loss": 11.6766, "step": 50187 }, { "epoch": 1.050573557732563, "grad_norm": 0.3207338750362396, "learning_rate": 0.00014538361616035555, "loss": 11.663, "step": 50188 }, { "epoch": 1.0505944904965252, "grad_norm": 0.29105904698371887, "learning_rate": 0.00014538166241597012, "loss": 11.6397, "step": 50189 }, { "epoch": 1.0506154232604874, "grad_norm": 0.3109534978866577, "learning_rate": 0.00014537970864976875, "loss": 11.6685, "step": 50190 }, { "epoch": 1.0506363560244494, "grad_norm": 0.29855218529701233, "learning_rate": 0.00014537775486175248, "loss": 11.67, "step": 50191 }, { "epoch": 1.0506572887884116, "grad_norm": 0.29101675748825073, "learning_rate": 0.00014537580105192222, "loss": 11.6698, "step": 50192 }, { "epoch": 1.0506782215523738, "grad_norm": 0.3287281095981598, "learning_rate": 0.0001453738472202789, "loss": 11.6604, "step": 50193 }, { "epoch": 1.050699154316336, "grad_norm": 0.35455724596977234, "learning_rate": 0.00014537189336682346, "loss": 11.6775, "step": 50194 }, { "epoch": 1.0507200870802982, "grad_norm": 0.31997227668762207, "learning_rate": 0.00014536993949155683, "loss": 11.6679, "step": 50195 }, { "epoch": 1.0507410198442602, "grad_norm": 0.42994728684425354, "learning_rate": 0.00014536798559447998, "loss": 11.6784, "step": 50196 }, { "epoch": 1.0507619526082224, "grad_norm": 0.2271474003791809, "learning_rate": 0.0001453660316755938, "loss": 11.6734, "step": 50197 }, { "epoch": 1.0507828853721846, "grad_norm": 0.2909128665924072, "learning_rate": 0.0001453640777348993, "loss": 11.6584, "step": 50198 }, { "epoch": 1.0508038181361468, "grad_norm": 0.2830406427383423, "learning_rate": 0.00014536212377239734, "loss": 11.6525, "step": 50199 }, { "epoch": 1.0508247509001087, "grad_norm": 0.3525899350643158, "learning_rate": 0.00014536016978808892, "loss": 11.6671, "step": 50200 }, { "epoch": 1.050845683664071, "grad_norm": 0.28882238268852234, "learning_rate": 0.00014535821578197494, "loss": 11.6601, "step": 50201 }, { "epoch": 1.0508666164280331, "grad_norm": 0.340938925743103, "learning_rate": 0.00014535626175405639, "loss": 11.6712, "step": 50202 }, { "epoch": 1.0508875491919953, "grad_norm": 0.3700333833694458, "learning_rate": 0.00014535430770433417, "loss": 11.6619, "step": 50203 }, { "epoch": 1.0509084819559575, "grad_norm": 0.3314480483531952, "learning_rate": 0.0001453523536328092, "loss": 11.6618, "step": 50204 }, { "epoch": 1.0509294147199195, "grad_norm": 0.30704617500305176, "learning_rate": 0.00014535039953948245, "loss": 11.6765, "step": 50205 }, { "epoch": 1.0509503474838817, "grad_norm": 0.2860426902770996, "learning_rate": 0.00014534844542435485, "loss": 11.6789, "step": 50206 }, { "epoch": 1.050971280247844, "grad_norm": 0.30173569917678833, "learning_rate": 0.00014534649128742738, "loss": 11.6781, "step": 50207 }, { "epoch": 1.0509922130118061, "grad_norm": 0.3011721968650818, "learning_rate": 0.00014534453712870092, "loss": 11.6573, "step": 50208 }, { "epoch": 1.0510131457757683, "grad_norm": 0.29617875814437866, "learning_rate": 0.00014534258294817643, "loss": 11.6624, "step": 50209 }, { "epoch": 1.0510340785397303, "grad_norm": 0.28323420882225037, "learning_rate": 0.00014534062874585485, "loss": 11.674, "step": 50210 }, { "epoch": 1.0510550113036925, "grad_norm": 0.3042648732662201, "learning_rate": 0.0001453386745217371, "loss": 11.6732, "step": 50211 }, { "epoch": 1.0510759440676547, "grad_norm": 0.2930608093738556, "learning_rate": 0.00014533672027582418, "loss": 11.6695, "step": 50212 }, { "epoch": 1.051096876831617, "grad_norm": 0.25975501537323, "learning_rate": 0.00014533476600811696, "loss": 11.6674, "step": 50213 }, { "epoch": 1.051117809595579, "grad_norm": 0.27704229950904846, "learning_rate": 0.00014533281171861645, "loss": 11.6751, "step": 50214 }, { "epoch": 1.051138742359541, "grad_norm": 0.29025542736053467, "learning_rate": 0.00014533085740732352, "loss": 11.6582, "step": 50215 }, { "epoch": 1.0511596751235033, "grad_norm": 0.3294641971588135, "learning_rate": 0.00014532890307423913, "loss": 11.6699, "step": 50216 }, { "epoch": 1.0511806078874655, "grad_norm": 0.31314677000045776, "learning_rate": 0.0001453269487193642, "loss": 11.6672, "step": 50217 }, { "epoch": 1.0512015406514277, "grad_norm": 0.26503986120224, "learning_rate": 0.00014532499434269976, "loss": 11.6746, "step": 50218 }, { "epoch": 1.0512224734153897, "grad_norm": 0.3335948586463928, "learning_rate": 0.00014532303994424666, "loss": 11.6742, "step": 50219 }, { "epoch": 1.0512434061793519, "grad_norm": 0.29448068141937256, "learning_rate": 0.0001453210855240059, "loss": 11.6779, "step": 50220 }, { "epoch": 1.051264338943314, "grad_norm": 0.335225373506546, "learning_rate": 0.00014531913108197832, "loss": 11.6731, "step": 50221 }, { "epoch": 1.0512852717072763, "grad_norm": 0.2969124913215637, "learning_rate": 0.00014531717661816495, "loss": 11.6678, "step": 50222 }, { "epoch": 1.0513062044712385, "grad_norm": 0.31962651014328003, "learning_rate": 0.00014531522213256674, "loss": 11.679, "step": 50223 }, { "epoch": 1.0513271372352004, "grad_norm": 0.34930333495140076, "learning_rate": 0.00014531326762518457, "loss": 11.6646, "step": 50224 }, { "epoch": 1.0513480699991626, "grad_norm": 0.30326253175735474, "learning_rate": 0.0001453113130960194, "loss": 11.6669, "step": 50225 }, { "epoch": 1.0513690027631248, "grad_norm": 0.34136688709259033, "learning_rate": 0.00014530935854507217, "loss": 11.6663, "step": 50226 }, { "epoch": 1.051389935527087, "grad_norm": 0.40594711899757385, "learning_rate": 0.0001453074039723438, "loss": 11.6865, "step": 50227 }, { "epoch": 1.0514108682910492, "grad_norm": 0.26095378398895264, "learning_rate": 0.00014530544937783532, "loss": 11.6545, "step": 50228 }, { "epoch": 1.0514318010550112, "grad_norm": 0.2725748121738434, "learning_rate": 0.00014530349476154754, "loss": 11.6527, "step": 50229 }, { "epoch": 1.0514527338189734, "grad_norm": 0.3247598111629486, "learning_rate": 0.0001453015401234815, "loss": 11.6788, "step": 50230 }, { "epoch": 1.0514736665829356, "grad_norm": 0.30019012093544006, "learning_rate": 0.0001452995854636381, "loss": 11.6675, "step": 50231 }, { "epoch": 1.0514945993468978, "grad_norm": 0.42000898718833923, "learning_rate": 0.00014529763078201827, "loss": 11.6805, "step": 50232 }, { "epoch": 1.05151553211086, "grad_norm": 0.28864729404449463, "learning_rate": 0.00014529567607862295, "loss": 11.674, "step": 50233 }, { "epoch": 1.051536464874822, "grad_norm": 0.304979145526886, "learning_rate": 0.0001452937213534531, "loss": 11.6625, "step": 50234 }, { "epoch": 1.0515573976387842, "grad_norm": 0.2747744917869568, "learning_rate": 0.00014529176660650967, "loss": 11.6599, "step": 50235 }, { "epoch": 1.0515783304027464, "grad_norm": 0.29584163427352905, "learning_rate": 0.00014528981183779355, "loss": 11.6712, "step": 50236 }, { "epoch": 1.0515992631667086, "grad_norm": 0.4058527648448944, "learning_rate": 0.00014528785704730573, "loss": 11.6665, "step": 50237 }, { "epoch": 1.0516201959306706, "grad_norm": 0.51790452003479, "learning_rate": 0.00014528590223504713, "loss": 11.663, "step": 50238 }, { "epoch": 1.0516411286946328, "grad_norm": 0.2887476682662964, "learning_rate": 0.00014528394740101868, "loss": 11.6487, "step": 50239 }, { "epoch": 1.051662061458595, "grad_norm": 0.33714619278907776, "learning_rate": 0.0001452819925452213, "loss": 11.6844, "step": 50240 }, { "epoch": 1.0516829942225572, "grad_norm": 0.4149685204029083, "learning_rate": 0.00014528003766765601, "loss": 11.6676, "step": 50241 }, { "epoch": 1.0517039269865194, "grad_norm": 0.32257702946662903, "learning_rate": 0.00014527808276832368, "loss": 11.6649, "step": 50242 }, { "epoch": 1.0517248597504814, "grad_norm": 0.37348347902297974, "learning_rate": 0.00014527612784722527, "loss": 11.6654, "step": 50243 }, { "epoch": 1.0517457925144436, "grad_norm": 0.27440011501312256, "learning_rate": 0.0001452741729043617, "loss": 11.6635, "step": 50244 }, { "epoch": 1.0517667252784058, "grad_norm": 0.2869415283203125, "learning_rate": 0.00014527221793973397, "loss": 11.6709, "step": 50245 }, { "epoch": 1.051787658042368, "grad_norm": 0.34259167313575745, "learning_rate": 0.0001452702629533429, "loss": 11.6589, "step": 50246 }, { "epoch": 1.0518085908063302, "grad_norm": 0.3206322491168976, "learning_rate": 0.0001452683079451896, "loss": 11.663, "step": 50247 }, { "epoch": 1.0518295235702921, "grad_norm": 0.26429876685142517, "learning_rate": 0.00014526635291527487, "loss": 11.6844, "step": 50248 }, { "epoch": 1.0518504563342543, "grad_norm": 0.33377113938331604, "learning_rate": 0.0001452643978635997, "loss": 11.6872, "step": 50249 }, { "epoch": 1.0518713890982165, "grad_norm": 0.32808709144592285, "learning_rate": 0.00014526244279016503, "loss": 11.6839, "step": 50250 }, { "epoch": 1.0518923218621787, "grad_norm": 0.3686760663986206, "learning_rate": 0.00014526048769497178, "loss": 11.6691, "step": 50251 }, { "epoch": 1.051913254626141, "grad_norm": 0.2870241403579712, "learning_rate": 0.00014525853257802093, "loss": 11.6561, "step": 50252 }, { "epoch": 1.051934187390103, "grad_norm": 0.2976498007774353, "learning_rate": 0.00014525657743931337, "loss": 11.6647, "step": 50253 }, { "epoch": 1.0519551201540651, "grad_norm": 0.3348216712474823, "learning_rate": 0.0001452546222788501, "loss": 11.6742, "step": 50254 }, { "epoch": 1.0519760529180273, "grad_norm": 0.2759120464324951, "learning_rate": 0.00014525266709663201, "loss": 11.6556, "step": 50255 }, { "epoch": 1.0519969856819895, "grad_norm": 0.3855929970741272, "learning_rate": 0.00014525071189266004, "loss": 11.668, "step": 50256 }, { "epoch": 1.0520179184459515, "grad_norm": 0.24515098333358765, "learning_rate": 0.0001452487566669352, "loss": 11.655, "step": 50257 }, { "epoch": 1.0520388512099137, "grad_norm": 0.35041239857673645, "learning_rate": 0.0001452468014194583, "loss": 11.6786, "step": 50258 }, { "epoch": 1.052059783973876, "grad_norm": 0.2819615602493286, "learning_rate": 0.0001452448461502304, "loss": 11.6698, "step": 50259 }, { "epoch": 1.052080716737838, "grad_norm": 0.2905868887901306, "learning_rate": 0.0001452428908592524, "loss": 11.6746, "step": 50260 }, { "epoch": 1.0521016495018003, "grad_norm": 0.2825753688812256, "learning_rate": 0.00014524093554652522, "loss": 11.6761, "step": 50261 }, { "epoch": 1.0521225822657623, "grad_norm": 0.2662075161933899, "learning_rate": 0.00014523898021204983, "loss": 11.6856, "step": 50262 }, { "epoch": 1.0521435150297245, "grad_norm": 0.37854012846946716, "learning_rate": 0.00014523702485582713, "loss": 11.6622, "step": 50263 }, { "epoch": 1.0521644477936867, "grad_norm": 0.3601778745651245, "learning_rate": 0.00014523506947785813, "loss": 11.6744, "step": 50264 }, { "epoch": 1.0521853805576489, "grad_norm": 0.2555963397026062, "learning_rate": 0.00014523311407814367, "loss": 11.6689, "step": 50265 }, { "epoch": 1.052206313321611, "grad_norm": 0.39434877038002014, "learning_rate": 0.00014523115865668477, "loss": 11.673, "step": 50266 }, { "epoch": 1.052227246085573, "grad_norm": 0.33793139457702637, "learning_rate": 0.00014522920321348237, "loss": 11.669, "step": 50267 }, { "epoch": 1.0522481788495353, "grad_norm": 0.2934032678604126, "learning_rate": 0.00014522724774853737, "loss": 11.66, "step": 50268 }, { "epoch": 1.0522691116134975, "grad_norm": 0.27088865637779236, "learning_rate": 0.0001452252922618507, "loss": 11.6693, "step": 50269 }, { "epoch": 1.0522900443774597, "grad_norm": 0.31065261363983154, "learning_rate": 0.00014522333675342334, "loss": 11.669, "step": 50270 }, { "epoch": 1.0523109771414219, "grad_norm": 0.28358086943626404, "learning_rate": 0.0001452213812232562, "loss": 11.6573, "step": 50271 }, { "epoch": 1.0523319099053838, "grad_norm": 0.3026660084724426, "learning_rate": 0.0001452194256713503, "loss": 11.6736, "step": 50272 }, { "epoch": 1.052352842669346, "grad_norm": 0.4249682128429413, "learning_rate": 0.00014521747009770645, "loss": 11.6686, "step": 50273 }, { "epoch": 1.0523737754333082, "grad_norm": 0.3372260630130768, "learning_rate": 0.0001452155145023257, "loss": 11.6805, "step": 50274 }, { "epoch": 1.0523947081972704, "grad_norm": 0.2851909101009369, "learning_rate": 0.0001452135588852089, "loss": 11.6661, "step": 50275 }, { "epoch": 1.0524156409612324, "grad_norm": 0.37761324644088745, "learning_rate": 0.00014521160324635707, "loss": 11.6589, "step": 50276 }, { "epoch": 1.0524365737251946, "grad_norm": 0.3641226291656494, "learning_rate": 0.00014520964758577113, "loss": 11.6733, "step": 50277 }, { "epoch": 1.0524575064891568, "grad_norm": 0.34132593870162964, "learning_rate": 0.00014520769190345197, "loss": 11.6672, "step": 50278 }, { "epoch": 1.052478439253119, "grad_norm": 0.3294042646884918, "learning_rate": 0.0001452057361994006, "loss": 11.67, "step": 50279 }, { "epoch": 1.0524993720170812, "grad_norm": 0.42197099328041077, "learning_rate": 0.0001452037804736179, "loss": 11.6647, "step": 50280 }, { "epoch": 1.0525203047810432, "grad_norm": 0.35407736897468567, "learning_rate": 0.0001452018247261048, "loss": 11.6652, "step": 50281 }, { "epoch": 1.0525412375450054, "grad_norm": 0.30229470133781433, "learning_rate": 0.00014519986895686235, "loss": 11.6547, "step": 50282 }, { "epoch": 1.0525621703089676, "grad_norm": 0.32525935769081116, "learning_rate": 0.00014519791316589139, "loss": 11.6568, "step": 50283 }, { "epoch": 1.0525831030729298, "grad_norm": 0.4021500051021576, "learning_rate": 0.0001451959573531929, "loss": 11.6744, "step": 50284 }, { "epoch": 1.052604035836892, "grad_norm": 0.4100588858127594, "learning_rate": 0.00014519400151876777, "loss": 11.6661, "step": 50285 }, { "epoch": 1.052624968600854, "grad_norm": 0.2601839601993561, "learning_rate": 0.000145192045662617, "loss": 11.671, "step": 50286 }, { "epoch": 1.0526459013648162, "grad_norm": 0.3238644301891327, "learning_rate": 0.0001451900897847415, "loss": 11.6765, "step": 50287 }, { "epoch": 1.0526668341287784, "grad_norm": 0.36983558535575867, "learning_rate": 0.00014518813388514224, "loss": 11.6646, "step": 50288 }, { "epoch": 1.0526877668927406, "grad_norm": 0.30211684107780457, "learning_rate": 0.00014518617796382011, "loss": 11.6601, "step": 50289 }, { "epoch": 1.0527086996567028, "grad_norm": 0.355450302362442, "learning_rate": 0.0001451842220207761, "loss": 11.6612, "step": 50290 }, { "epoch": 1.0527296324206648, "grad_norm": 0.27110937237739563, "learning_rate": 0.00014518226605601115, "loss": 11.6669, "step": 50291 }, { "epoch": 1.052750565184627, "grad_norm": 0.33692672848701477, "learning_rate": 0.00014518031006952611, "loss": 11.6787, "step": 50292 }, { "epoch": 1.0527714979485892, "grad_norm": 0.29204457998275757, "learning_rate": 0.00014517835406132205, "loss": 11.6773, "step": 50293 }, { "epoch": 1.0527924307125514, "grad_norm": 0.41303882002830505, "learning_rate": 0.00014517639803139984, "loss": 11.659, "step": 50294 }, { "epoch": 1.0528133634765133, "grad_norm": 0.29721197485923767, "learning_rate": 0.0001451744419797604, "loss": 11.6514, "step": 50295 }, { "epoch": 1.0528342962404755, "grad_norm": 0.3957488536834717, "learning_rate": 0.00014517248590640476, "loss": 11.6558, "step": 50296 }, { "epoch": 1.0528552290044377, "grad_norm": 0.31861481070518494, "learning_rate": 0.00014517052981133374, "loss": 11.6561, "step": 50297 }, { "epoch": 1.0528761617684, "grad_norm": 0.3420741558074951, "learning_rate": 0.00014516857369454837, "loss": 11.665, "step": 50298 }, { "epoch": 1.0528970945323621, "grad_norm": 0.3063330352306366, "learning_rate": 0.00014516661755604955, "loss": 11.6746, "step": 50299 }, { "epoch": 1.052918027296324, "grad_norm": 0.3386160731315613, "learning_rate": 0.00014516466139583825, "loss": 11.6706, "step": 50300 }, { "epoch": 1.0529389600602863, "grad_norm": 0.34127384424209595, "learning_rate": 0.00014516270521391537, "loss": 11.6769, "step": 50301 }, { "epoch": 1.0529598928242485, "grad_norm": 0.3023264706134796, "learning_rate": 0.00014516074901028188, "loss": 11.6744, "step": 50302 }, { "epoch": 1.0529808255882107, "grad_norm": 0.3341386318206787, "learning_rate": 0.0001451587927849387, "loss": 11.6718, "step": 50303 }, { "epoch": 1.053001758352173, "grad_norm": 0.31439703702926636, "learning_rate": 0.0001451568365378868, "loss": 11.6698, "step": 50304 }, { "epoch": 1.053022691116135, "grad_norm": 0.3605176508426666, "learning_rate": 0.0001451548802691271, "loss": 11.6924, "step": 50305 }, { "epoch": 1.053043623880097, "grad_norm": 0.4487086236476898, "learning_rate": 0.00014515292397866057, "loss": 11.6815, "step": 50306 }, { "epoch": 1.0530645566440593, "grad_norm": 0.3318383991718292, "learning_rate": 0.00014515096766648807, "loss": 11.6548, "step": 50307 }, { "epoch": 1.0530854894080215, "grad_norm": 0.29409611225128174, "learning_rate": 0.00014514901133261065, "loss": 11.6616, "step": 50308 }, { "epoch": 1.0531064221719837, "grad_norm": 0.3399112820625305, "learning_rate": 0.00014514705497702915, "loss": 11.6519, "step": 50309 }, { "epoch": 1.0531273549359457, "grad_norm": 0.29529258608818054, "learning_rate": 0.0001451450985997446, "loss": 11.6611, "step": 50310 }, { "epoch": 1.0531482876999079, "grad_norm": 0.31410956382751465, "learning_rate": 0.00014514314220075787, "loss": 11.6761, "step": 50311 }, { "epoch": 1.05316922046387, "grad_norm": 0.2765512466430664, "learning_rate": 0.00014514118578006993, "loss": 11.6815, "step": 50312 }, { "epoch": 1.0531901532278323, "grad_norm": 0.31668543815612793, "learning_rate": 0.00014513922933768173, "loss": 11.6733, "step": 50313 }, { "epoch": 1.0532110859917942, "grad_norm": 0.3333049714565277, "learning_rate": 0.00014513727287359418, "loss": 11.6791, "step": 50314 }, { "epoch": 1.0532320187557564, "grad_norm": 0.27330905199050903, "learning_rate": 0.00014513531638780824, "loss": 11.6516, "step": 50315 }, { "epoch": 1.0532529515197186, "grad_norm": 0.28688061237335205, "learning_rate": 0.00014513335988032485, "loss": 11.6738, "step": 50316 }, { "epoch": 1.0532738842836809, "grad_norm": 0.2473137080669403, "learning_rate": 0.00014513140335114494, "loss": 11.6683, "step": 50317 }, { "epoch": 1.053294817047643, "grad_norm": 0.31775790452957153, "learning_rate": 0.0001451294468002695, "loss": 11.6778, "step": 50318 }, { "epoch": 1.053315749811605, "grad_norm": 0.2783356308937073, "learning_rate": 0.00014512749022769939, "loss": 11.6762, "step": 50319 }, { "epoch": 1.0533366825755672, "grad_norm": 0.38458356261253357, "learning_rate": 0.0001451255336334356, "loss": 11.6774, "step": 50320 }, { "epoch": 1.0533576153395294, "grad_norm": 0.30334874987602234, "learning_rate": 0.00014512357701747909, "loss": 11.6742, "step": 50321 }, { "epoch": 1.0533785481034916, "grad_norm": 0.33920612931251526, "learning_rate": 0.0001451216203798307, "loss": 11.6595, "step": 50322 }, { "epoch": 1.0533994808674538, "grad_norm": 0.29780444502830505, "learning_rate": 0.00014511966372049152, "loss": 11.6673, "step": 50323 }, { "epoch": 1.0534204136314158, "grad_norm": 0.3484609127044678, "learning_rate": 0.00014511770703946238, "loss": 11.6783, "step": 50324 }, { "epoch": 1.053441346395378, "grad_norm": 0.26879844069480896, "learning_rate": 0.00014511575033674426, "loss": 11.6593, "step": 50325 }, { "epoch": 1.0534622791593402, "grad_norm": 0.32477372884750366, "learning_rate": 0.00014511379361233808, "loss": 11.6817, "step": 50326 }, { "epoch": 1.0534832119233024, "grad_norm": 0.4032638370990753, "learning_rate": 0.0001451118368662448, "loss": 11.6604, "step": 50327 }, { "epoch": 1.0535041446872646, "grad_norm": 0.3267064094543457, "learning_rate": 0.0001451098800984654, "loss": 11.6775, "step": 50328 }, { "epoch": 1.0535250774512266, "grad_norm": 0.3298368752002716, "learning_rate": 0.0001451079233090007, "loss": 11.6659, "step": 50329 }, { "epoch": 1.0535460102151888, "grad_norm": 0.3048911392688751, "learning_rate": 0.00014510596649785178, "loss": 11.6709, "step": 50330 }, { "epoch": 1.053566942979151, "grad_norm": 0.3406418263912201, "learning_rate": 0.0001451040096650195, "loss": 11.6757, "step": 50331 }, { "epoch": 1.0535878757431132, "grad_norm": 0.346406489610672, "learning_rate": 0.00014510205281050478, "loss": 11.6766, "step": 50332 }, { "epoch": 1.0536088085070752, "grad_norm": 0.29647085070610046, "learning_rate": 0.00014510009593430868, "loss": 11.675, "step": 50333 }, { "epoch": 1.0536297412710374, "grad_norm": 0.3016699552536011, "learning_rate": 0.000145098139036432, "loss": 11.6569, "step": 50334 }, { "epoch": 1.0536506740349996, "grad_norm": 0.34644296765327454, "learning_rate": 0.00014509618211687575, "loss": 11.6704, "step": 50335 }, { "epoch": 1.0536716067989618, "grad_norm": 0.2695247232913971, "learning_rate": 0.00014509422517564087, "loss": 11.6489, "step": 50336 }, { "epoch": 1.053692539562924, "grad_norm": 0.2726382911205292, "learning_rate": 0.00014509226821272827, "loss": 11.6603, "step": 50337 }, { "epoch": 1.053713472326886, "grad_norm": 0.48744839429855347, "learning_rate": 0.00014509031122813895, "loss": 11.6668, "step": 50338 }, { "epoch": 1.0537344050908481, "grad_norm": 0.38762348890304565, "learning_rate": 0.0001450883542218738, "loss": 11.6636, "step": 50339 }, { "epoch": 1.0537553378548103, "grad_norm": 0.3179703652858734, "learning_rate": 0.00014508639719393378, "loss": 11.6708, "step": 50340 }, { "epoch": 1.0537762706187725, "grad_norm": 0.32572901248931885, "learning_rate": 0.00014508444014431978, "loss": 11.6876, "step": 50341 }, { "epoch": 1.0537972033827347, "grad_norm": 0.40431997179985046, "learning_rate": 0.00014508248307303282, "loss": 11.6863, "step": 50342 }, { "epoch": 1.0538181361466967, "grad_norm": 0.3272183835506439, "learning_rate": 0.00014508052598007383, "loss": 11.6518, "step": 50343 }, { "epoch": 1.053839068910659, "grad_norm": 0.398064523935318, "learning_rate": 0.0001450785688654437, "loss": 11.6719, "step": 50344 }, { "epoch": 1.0538600016746211, "grad_norm": 0.328720360994339, "learning_rate": 0.00014507661172914342, "loss": 11.6713, "step": 50345 }, { "epoch": 1.0538809344385833, "grad_norm": 0.31439509987831116, "learning_rate": 0.00014507465457117388, "loss": 11.6646, "step": 50346 }, { "epoch": 1.0539018672025455, "grad_norm": 0.262239933013916, "learning_rate": 0.00014507269739153606, "loss": 11.665, "step": 50347 }, { "epoch": 1.0539227999665075, "grad_norm": 0.29596516489982605, "learning_rate": 0.0001450707401902309, "loss": 11.6646, "step": 50348 }, { "epoch": 1.0539437327304697, "grad_norm": 0.3374568521976471, "learning_rate": 0.00014506878296725932, "loss": 11.6713, "step": 50349 }, { "epoch": 1.053964665494432, "grad_norm": 0.28740623593330383, "learning_rate": 0.00014506682572262229, "loss": 11.6589, "step": 50350 }, { "epoch": 1.053985598258394, "grad_norm": 0.2991179823875427, "learning_rate": 0.00014506486845632072, "loss": 11.6728, "step": 50351 }, { "epoch": 1.054006531022356, "grad_norm": 0.31721335649490356, "learning_rate": 0.00014506291116835556, "loss": 11.6477, "step": 50352 }, { "epoch": 1.0540274637863183, "grad_norm": 0.287819504737854, "learning_rate": 0.00014506095385872779, "loss": 11.6903, "step": 50353 }, { "epoch": 1.0540483965502805, "grad_norm": 0.2942402958869934, "learning_rate": 0.00014505899652743825, "loss": 11.6896, "step": 50354 }, { "epoch": 1.0540693293142427, "grad_norm": 0.33947086334228516, "learning_rate": 0.000145057039174488, "loss": 11.6827, "step": 50355 }, { "epoch": 1.0540902620782049, "grad_norm": 0.30311381816864014, "learning_rate": 0.00014505508179987793, "loss": 11.682, "step": 50356 }, { "epoch": 1.0541111948421669, "grad_norm": 0.2505238354206085, "learning_rate": 0.00014505312440360896, "loss": 11.6713, "step": 50357 }, { "epoch": 1.054132127606129, "grad_norm": 0.29158756136894226, "learning_rate": 0.00014505116698568205, "loss": 11.6596, "step": 50358 }, { "epoch": 1.0541530603700913, "grad_norm": 0.3539663255214691, "learning_rate": 0.00014504920954609815, "loss": 11.6625, "step": 50359 }, { "epoch": 1.0541739931340535, "grad_norm": 0.372428297996521, "learning_rate": 0.00014504725208485818, "loss": 11.6695, "step": 50360 }, { "epoch": 1.0541949258980157, "grad_norm": 0.28105559945106506, "learning_rate": 0.0001450452946019631, "loss": 11.6645, "step": 50361 }, { "epoch": 1.0542158586619776, "grad_norm": 0.32115936279296875, "learning_rate": 0.00014504333709741387, "loss": 11.664, "step": 50362 }, { "epoch": 1.0542367914259398, "grad_norm": 0.2697928547859192, "learning_rate": 0.00014504137957121137, "loss": 11.6627, "step": 50363 }, { "epoch": 1.054257724189902, "grad_norm": 0.27374476194381714, "learning_rate": 0.00014503942202335657, "loss": 11.6707, "step": 50364 }, { "epoch": 1.0542786569538642, "grad_norm": 0.3331509828567505, "learning_rate": 0.00014503746445385045, "loss": 11.6717, "step": 50365 }, { "epoch": 1.0542995897178264, "grad_norm": 0.3618376851081848, "learning_rate": 0.0001450355068626939, "loss": 11.6735, "step": 50366 }, { "epoch": 1.0543205224817884, "grad_norm": 0.3097953796386719, "learning_rate": 0.0001450335492498879, "loss": 11.6615, "step": 50367 }, { "epoch": 1.0543414552457506, "grad_norm": 0.27548947930336, "learning_rate": 0.00014503159161543335, "loss": 11.6708, "step": 50368 }, { "epoch": 1.0543623880097128, "grad_norm": 0.3896319270133972, "learning_rate": 0.0001450296339593312, "loss": 11.676, "step": 50369 }, { "epoch": 1.054383320773675, "grad_norm": 0.27364128828048706, "learning_rate": 0.00014502767628158243, "loss": 11.6551, "step": 50370 }, { "epoch": 1.054404253537637, "grad_norm": 0.3215790092945099, "learning_rate": 0.0001450257185821879, "loss": 11.6601, "step": 50371 }, { "epoch": 1.0544251863015992, "grad_norm": 0.3291596472263336, "learning_rate": 0.00014502376086114867, "loss": 11.6681, "step": 50372 }, { "epoch": 1.0544461190655614, "grad_norm": 0.27947014570236206, "learning_rate": 0.0001450218031184656, "loss": 11.6757, "step": 50373 }, { "epoch": 1.0544670518295236, "grad_norm": 0.25622981786727905, "learning_rate": 0.00014501984535413964, "loss": 11.6621, "step": 50374 }, { "epoch": 1.0544879845934858, "grad_norm": 0.3341885209083557, "learning_rate": 0.00014501788756817173, "loss": 11.668, "step": 50375 }, { "epoch": 1.0545089173574478, "grad_norm": 0.2726077139377594, "learning_rate": 0.00014501592976056283, "loss": 11.6648, "step": 50376 }, { "epoch": 1.05452985012141, "grad_norm": 0.3511873185634613, "learning_rate": 0.00014501397193131388, "loss": 11.664, "step": 50377 }, { "epoch": 1.0545507828853722, "grad_norm": 0.39507806301116943, "learning_rate": 0.00014501201408042576, "loss": 11.6459, "step": 50378 }, { "epoch": 1.0545717156493344, "grad_norm": 0.27119240164756775, "learning_rate": 0.00014501005620789952, "loss": 11.6655, "step": 50379 }, { "epoch": 1.0545926484132966, "grad_norm": 0.29974862933158875, "learning_rate": 0.00014500809831373602, "loss": 11.6687, "step": 50380 }, { "epoch": 1.0546135811772586, "grad_norm": 0.3715387284755707, "learning_rate": 0.00014500614039793624, "loss": 11.6666, "step": 50381 }, { "epoch": 1.0546345139412208, "grad_norm": 0.3033964931964874, "learning_rate": 0.0001450041824605011, "loss": 11.6797, "step": 50382 }, { "epoch": 1.054655446705183, "grad_norm": 0.3406476676464081, "learning_rate": 0.00014500222450143153, "loss": 11.6803, "step": 50383 }, { "epoch": 1.0546763794691452, "grad_norm": 0.3013605773448944, "learning_rate": 0.0001450002665207285, "loss": 11.6498, "step": 50384 }, { "epoch": 1.0546973122331074, "grad_norm": 0.2833416163921356, "learning_rate": 0.00014499830851839293, "loss": 11.6651, "step": 50385 }, { "epoch": 1.0547182449970693, "grad_norm": 0.32579779624938965, "learning_rate": 0.0001449963504944258, "loss": 11.6641, "step": 50386 }, { "epoch": 1.0547391777610315, "grad_norm": 0.31352847814559937, "learning_rate": 0.000144994392448828, "loss": 11.6664, "step": 50387 }, { "epoch": 1.0547601105249937, "grad_norm": 0.34819790720939636, "learning_rate": 0.0001449924343816005, "loss": 11.688, "step": 50388 }, { "epoch": 1.054781043288956, "grad_norm": 0.2925910949707031, "learning_rate": 0.00014499047629274424, "loss": 11.6769, "step": 50389 }, { "epoch": 1.054801976052918, "grad_norm": 0.27648186683654785, "learning_rate": 0.00014498851818226015, "loss": 11.6627, "step": 50390 }, { "epoch": 1.0548229088168801, "grad_norm": 0.3158608376979828, "learning_rate": 0.00014498656005014916, "loss": 11.6484, "step": 50391 }, { "epoch": 1.0548438415808423, "grad_norm": 0.4386301040649414, "learning_rate": 0.0001449846018964123, "loss": 11.6811, "step": 50392 }, { "epoch": 1.0548647743448045, "grad_norm": 0.32813167572021484, "learning_rate": 0.00014498264372105036, "loss": 11.6595, "step": 50393 }, { "epoch": 1.0548857071087667, "grad_norm": 0.2861435115337372, "learning_rate": 0.0001449806855240644, "loss": 11.6512, "step": 50394 }, { "epoch": 1.0549066398727287, "grad_norm": 0.298703670501709, "learning_rate": 0.0001449787273054553, "loss": 11.6646, "step": 50395 }, { "epoch": 1.054927572636691, "grad_norm": 0.3056044280529022, "learning_rate": 0.00014497676906522405, "loss": 11.657, "step": 50396 }, { "epoch": 1.054948505400653, "grad_norm": 0.28350162506103516, "learning_rate": 0.00014497481080337154, "loss": 11.6618, "step": 50397 }, { "epoch": 1.0549694381646153, "grad_norm": 0.28403201699256897, "learning_rate": 0.00014497285251989874, "loss": 11.6781, "step": 50398 }, { "epoch": 1.0549903709285775, "grad_norm": 0.3018493950366974, "learning_rate": 0.0001449708942148066, "loss": 11.6627, "step": 50399 }, { "epoch": 1.0550113036925395, "grad_norm": 0.2875949442386627, "learning_rate": 0.00014496893588809606, "loss": 11.6764, "step": 50400 }, { "epoch": 1.0550322364565017, "grad_norm": 0.34110260009765625, "learning_rate": 0.00014496697753976805, "loss": 11.6809, "step": 50401 }, { "epoch": 1.0550531692204639, "grad_norm": 0.2988907992839813, "learning_rate": 0.0001449650191698235, "loss": 11.6592, "step": 50402 }, { "epoch": 1.055074101984426, "grad_norm": 0.31897246837615967, "learning_rate": 0.00014496306077826335, "loss": 11.6615, "step": 50403 }, { "epoch": 1.0550950347483883, "grad_norm": 0.3657732605934143, "learning_rate": 0.00014496110236508858, "loss": 11.679, "step": 50404 }, { "epoch": 1.0551159675123503, "grad_norm": 0.3918939232826233, "learning_rate": 0.0001449591439303001, "loss": 11.6631, "step": 50405 }, { "epoch": 1.0551369002763125, "grad_norm": 0.3125758469104767, "learning_rate": 0.00014495718547389888, "loss": 11.6716, "step": 50406 }, { "epoch": 1.0551578330402747, "grad_norm": 0.27936482429504395, "learning_rate": 0.0001449552269958858, "loss": 11.6719, "step": 50407 }, { "epoch": 1.0551787658042369, "grad_norm": 0.37110915780067444, "learning_rate": 0.00014495326849626186, "loss": 11.681, "step": 50408 }, { "epoch": 1.0551996985681988, "grad_norm": 0.319662481546402, "learning_rate": 0.000144951309975028, "loss": 11.6719, "step": 50409 }, { "epoch": 1.055220631332161, "grad_norm": 0.27722370624542236, "learning_rate": 0.00014494935143218512, "loss": 11.6658, "step": 50410 }, { "epoch": 1.0552415640961232, "grad_norm": 0.28586238622665405, "learning_rate": 0.0001449473928677342, "loss": 11.6907, "step": 50411 }, { "epoch": 1.0552624968600854, "grad_norm": 0.3630937337875366, "learning_rate": 0.00014494543428167616, "loss": 11.6518, "step": 50412 }, { "epoch": 1.0552834296240476, "grad_norm": 0.33095085620880127, "learning_rate": 0.00014494347567401195, "loss": 11.6805, "step": 50413 }, { "epoch": 1.0553043623880096, "grad_norm": 0.40519005060195923, "learning_rate": 0.0001449415170447425, "loss": 11.6513, "step": 50414 }, { "epoch": 1.0553252951519718, "grad_norm": 0.339253306388855, "learning_rate": 0.00014493955839386876, "loss": 11.6768, "step": 50415 }, { "epoch": 1.055346227915934, "grad_norm": 0.3725467622280121, "learning_rate": 0.00014493759972139173, "loss": 11.68, "step": 50416 }, { "epoch": 1.0553671606798962, "grad_norm": 0.34715765714645386, "learning_rate": 0.0001449356410273122, "loss": 11.6703, "step": 50417 }, { "epoch": 1.0553880934438584, "grad_norm": 0.33584651350975037, "learning_rate": 0.0001449336823116313, "loss": 11.6404, "step": 50418 }, { "epoch": 1.0554090262078204, "grad_norm": 0.3514757454395294, "learning_rate": 0.00014493172357434982, "loss": 11.6653, "step": 50419 }, { "epoch": 1.0554299589717826, "grad_norm": 0.270609587430954, "learning_rate": 0.0001449297648154688, "loss": 11.6668, "step": 50420 }, { "epoch": 1.0554508917357448, "grad_norm": 0.2528872489929199, "learning_rate": 0.0001449278060349891, "loss": 11.6609, "step": 50421 }, { "epoch": 1.055471824499707, "grad_norm": 0.34183937311172485, "learning_rate": 0.00014492584723291172, "loss": 11.667, "step": 50422 }, { "epoch": 1.0554927572636692, "grad_norm": 0.3227371275424957, "learning_rate": 0.0001449238884092376, "loss": 11.6567, "step": 50423 }, { "epoch": 1.0555136900276312, "grad_norm": 0.32391804456710815, "learning_rate": 0.00014492192956396763, "loss": 11.6603, "step": 50424 }, { "epoch": 1.0555346227915934, "grad_norm": 0.3464697003364563, "learning_rate": 0.0001449199706971028, "loss": 11.6496, "step": 50425 }, { "epoch": 1.0555555555555556, "grad_norm": 0.28992417454719543, "learning_rate": 0.00014491801180864405, "loss": 11.6617, "step": 50426 }, { "epoch": 1.0555764883195178, "grad_norm": 0.4161597788333893, "learning_rate": 0.00014491605289859233, "loss": 11.6526, "step": 50427 }, { "epoch": 1.0555974210834798, "grad_norm": 0.3120155334472656, "learning_rate": 0.00014491409396694854, "loss": 11.662, "step": 50428 }, { "epoch": 1.055618353847442, "grad_norm": 0.2955085039138794, "learning_rate": 0.00014491213501371367, "loss": 11.6651, "step": 50429 }, { "epoch": 1.0556392866114042, "grad_norm": 0.28030675649642944, "learning_rate": 0.00014491017603888862, "loss": 11.6571, "step": 50430 }, { "epoch": 1.0556602193753664, "grad_norm": 0.2704039216041565, "learning_rate": 0.00014490821704247431, "loss": 11.6718, "step": 50431 }, { "epoch": 1.0556811521393286, "grad_norm": 0.27198153734207153, "learning_rate": 0.0001449062580244718, "loss": 11.6576, "step": 50432 }, { "epoch": 1.0557020849032905, "grad_norm": 0.3874472677707672, "learning_rate": 0.0001449042989848819, "loss": 11.6909, "step": 50433 }, { "epoch": 1.0557230176672527, "grad_norm": 0.2815616726875305, "learning_rate": 0.00014490233992370561, "loss": 11.6716, "step": 50434 }, { "epoch": 1.055743950431215, "grad_norm": 0.40627896785736084, "learning_rate": 0.00014490038084094385, "loss": 11.6654, "step": 50435 }, { "epoch": 1.0557648831951771, "grad_norm": 0.28827351331710815, "learning_rate": 0.0001448984217365976, "loss": 11.6679, "step": 50436 }, { "epoch": 1.0557858159591393, "grad_norm": 0.3838529586791992, "learning_rate": 0.00014489646261066778, "loss": 11.6682, "step": 50437 }, { "epoch": 1.0558067487231013, "grad_norm": 0.27603188157081604, "learning_rate": 0.00014489450346315532, "loss": 11.6659, "step": 50438 }, { "epoch": 1.0558276814870635, "grad_norm": 0.33116936683654785, "learning_rate": 0.0001448925442940612, "loss": 11.6727, "step": 50439 }, { "epoch": 1.0558486142510257, "grad_norm": 0.3783999979496002, "learning_rate": 0.0001448905851033863, "loss": 11.6026, "step": 50440 }, { "epoch": 1.055869547014988, "grad_norm": 0.3109859526157379, "learning_rate": 0.00014488862589113162, "loss": 11.6607, "step": 50441 }, { "epoch": 1.0558904797789501, "grad_norm": 0.2694989740848541, "learning_rate": 0.00014488666665729804, "loss": 11.68, "step": 50442 }, { "epoch": 1.055911412542912, "grad_norm": 0.5407742857933044, "learning_rate": 0.00014488470740188654, "loss": 11.6534, "step": 50443 }, { "epoch": 1.0559323453068743, "grad_norm": 0.3515750467777252, "learning_rate": 0.00014488274812489812, "loss": 11.6803, "step": 50444 }, { "epoch": 1.0559532780708365, "grad_norm": 0.3467860817909241, "learning_rate": 0.00014488078882633363, "loss": 11.6511, "step": 50445 }, { "epoch": 1.0559742108347987, "grad_norm": 0.29391154646873474, "learning_rate": 0.00014487882950619404, "loss": 11.6748, "step": 50446 }, { "epoch": 1.0559951435987607, "grad_norm": 0.3030998110771179, "learning_rate": 0.0001448768701644803, "loss": 11.6683, "step": 50447 }, { "epoch": 1.0560160763627229, "grad_norm": 0.2900260388851166, "learning_rate": 0.00014487491080119336, "loss": 11.6687, "step": 50448 }, { "epoch": 1.056037009126685, "grad_norm": 0.3615156412124634, "learning_rate": 0.00014487295141633414, "loss": 11.6784, "step": 50449 }, { "epoch": 1.0560579418906473, "grad_norm": 0.2776243984699249, "learning_rate": 0.00014487099200990357, "loss": 11.6638, "step": 50450 }, { "epoch": 1.0560788746546095, "grad_norm": 0.2813177704811096, "learning_rate": 0.00014486903258190264, "loss": 11.6828, "step": 50451 }, { "epoch": 1.0560998074185715, "grad_norm": 0.3358898162841797, "learning_rate": 0.00014486707313233227, "loss": 11.65, "step": 50452 }, { "epoch": 1.0561207401825337, "grad_norm": 0.3088824450969696, "learning_rate": 0.00014486511366119343, "loss": 11.6677, "step": 50453 }, { "epoch": 1.0561416729464959, "grad_norm": 0.265083372592926, "learning_rate": 0.00014486315416848696, "loss": 11.6643, "step": 50454 }, { "epoch": 1.056162605710458, "grad_norm": 0.3700501620769501, "learning_rate": 0.00014486119465421395, "loss": 11.6621, "step": 50455 }, { "epoch": 1.0561835384744203, "grad_norm": 0.28300976753234863, "learning_rate": 0.0001448592351183752, "loss": 11.6495, "step": 50456 }, { "epoch": 1.0562044712383822, "grad_norm": 0.29257893562316895, "learning_rate": 0.00014485727556097174, "loss": 11.67, "step": 50457 }, { "epoch": 1.0562254040023444, "grad_norm": 0.3284631669521332, "learning_rate": 0.0001448553159820045, "loss": 11.6638, "step": 50458 }, { "epoch": 1.0562463367663066, "grad_norm": 0.2735723853111267, "learning_rate": 0.0001448533563814744, "loss": 11.6507, "step": 50459 }, { "epoch": 1.0562672695302688, "grad_norm": 0.28037920594215393, "learning_rate": 0.0001448513967593824, "loss": 11.6733, "step": 50460 }, { "epoch": 1.056288202294231, "grad_norm": 0.2725062668323517, "learning_rate": 0.0001448494371157294, "loss": 11.6602, "step": 50461 }, { "epoch": 1.056309135058193, "grad_norm": 0.28260621428489685, "learning_rate": 0.0001448474774505164, "loss": 11.6734, "step": 50462 }, { "epoch": 1.0563300678221552, "grad_norm": 0.35338103771209717, "learning_rate": 0.00014484551776374434, "loss": 11.6708, "step": 50463 }, { "epoch": 1.0563510005861174, "grad_norm": 0.33549612760543823, "learning_rate": 0.00014484355805541413, "loss": 11.6558, "step": 50464 }, { "epoch": 1.0563719333500796, "grad_norm": 0.3163560628890991, "learning_rate": 0.00014484159832552673, "loss": 11.6717, "step": 50465 }, { "epoch": 1.0563928661140416, "grad_norm": 0.29064393043518066, "learning_rate": 0.00014483963857408306, "loss": 11.6576, "step": 50466 }, { "epoch": 1.0564137988780038, "grad_norm": 0.37014421820640564, "learning_rate": 0.00014483767880108406, "loss": 11.659, "step": 50467 }, { "epoch": 1.056434731641966, "grad_norm": 0.35328367352485657, "learning_rate": 0.00014483571900653072, "loss": 11.6623, "step": 50468 }, { "epoch": 1.0564556644059282, "grad_norm": 0.33485227823257446, "learning_rate": 0.00014483375919042393, "loss": 11.6821, "step": 50469 }, { "epoch": 1.0564765971698904, "grad_norm": 0.3011243939399719, "learning_rate": 0.00014483179935276467, "loss": 11.6774, "step": 50470 }, { "epoch": 1.0564975299338524, "grad_norm": 0.3268931210041046, "learning_rate": 0.00014482983949355385, "loss": 11.6546, "step": 50471 }, { "epoch": 1.0565184626978146, "grad_norm": 0.29203030467033386, "learning_rate": 0.00014482787961279245, "loss": 11.6795, "step": 50472 }, { "epoch": 1.0565393954617768, "grad_norm": 0.31885990500450134, "learning_rate": 0.00014482591971048137, "loss": 11.6603, "step": 50473 }, { "epoch": 1.056560328225739, "grad_norm": 0.3041571378707886, "learning_rate": 0.00014482395978662158, "loss": 11.6496, "step": 50474 }, { "epoch": 1.0565812609897012, "grad_norm": 0.3130682408809662, "learning_rate": 0.000144821999841214, "loss": 11.6705, "step": 50475 }, { "epoch": 1.0566021937536632, "grad_norm": 0.3338685929775238, "learning_rate": 0.00014482003987425961, "loss": 11.6767, "step": 50476 }, { "epoch": 1.0566231265176254, "grad_norm": 0.29560378193855286, "learning_rate": 0.00014481807988575934, "loss": 11.6807, "step": 50477 }, { "epoch": 1.0566440592815876, "grad_norm": 0.3164263367652893, "learning_rate": 0.0001448161198757141, "loss": 11.6682, "step": 50478 }, { "epoch": 1.0566649920455498, "grad_norm": 0.28601813316345215, "learning_rate": 0.00014481415984412483, "loss": 11.669, "step": 50479 }, { "epoch": 1.056685924809512, "grad_norm": 0.3265669345855713, "learning_rate": 0.00014481219979099253, "loss": 11.6706, "step": 50480 }, { "epoch": 1.056706857573474, "grad_norm": 0.341042697429657, "learning_rate": 0.00014481023971631808, "loss": 11.6721, "step": 50481 }, { "epoch": 1.0567277903374361, "grad_norm": 0.2758888602256775, "learning_rate": 0.00014480827962010245, "loss": 11.6839, "step": 50482 }, { "epoch": 1.0567487231013983, "grad_norm": 0.29860299825668335, "learning_rate": 0.00014480631950234663, "loss": 11.6648, "step": 50483 }, { "epoch": 1.0567696558653605, "grad_norm": 0.3467883765697479, "learning_rate": 0.00014480435936305145, "loss": 11.6723, "step": 50484 }, { "epoch": 1.0567905886293225, "grad_norm": 0.31179043650627136, "learning_rate": 0.00014480239920221797, "loss": 11.6595, "step": 50485 }, { "epoch": 1.0568115213932847, "grad_norm": 0.3044857680797577, "learning_rate": 0.00014480043901984706, "loss": 11.6709, "step": 50486 }, { "epoch": 1.056832454157247, "grad_norm": 0.2706378996372223, "learning_rate": 0.00014479847881593966, "loss": 11.677, "step": 50487 }, { "epoch": 1.0568533869212091, "grad_norm": 0.2828003466129303, "learning_rate": 0.00014479651859049673, "loss": 11.6744, "step": 50488 }, { "epoch": 1.0568743196851713, "grad_norm": 0.2878054082393646, "learning_rate": 0.00014479455834351927, "loss": 11.6646, "step": 50489 }, { "epoch": 1.0568952524491333, "grad_norm": 0.3313465118408203, "learning_rate": 0.00014479259807500814, "loss": 11.6709, "step": 50490 }, { "epoch": 1.0569161852130955, "grad_norm": 0.3744750916957855, "learning_rate": 0.0001447906377849643, "loss": 11.6589, "step": 50491 }, { "epoch": 1.0569371179770577, "grad_norm": 0.28694307804107666, "learning_rate": 0.00014478867747338872, "loss": 11.6792, "step": 50492 }, { "epoch": 1.05695805074102, "grad_norm": 0.2925259470939636, "learning_rate": 0.0001447867171402823, "loss": 11.6663, "step": 50493 }, { "epoch": 1.056978983504982, "grad_norm": 0.3324456214904785, "learning_rate": 0.00014478475678564602, "loss": 11.6531, "step": 50494 }, { "epoch": 1.056999916268944, "grad_norm": 0.3084881007671356, "learning_rate": 0.0001447827964094808, "loss": 11.6891, "step": 50495 }, { "epoch": 1.0570208490329063, "grad_norm": 0.340880423784256, "learning_rate": 0.0001447808360117876, "loss": 11.6612, "step": 50496 }, { "epoch": 1.0570417817968685, "grad_norm": 0.2908499836921692, "learning_rate": 0.00014477887559256737, "loss": 11.6613, "step": 50497 }, { "epoch": 1.0570627145608307, "grad_norm": 0.4482882022857666, "learning_rate": 0.00014477691515182102, "loss": 11.6719, "step": 50498 }, { "epoch": 1.0570836473247929, "grad_norm": 0.27672386169433594, "learning_rate": 0.00014477495468954952, "loss": 11.6685, "step": 50499 }, { "epoch": 1.0571045800887549, "grad_norm": 0.42779260873794556, "learning_rate": 0.0001447729942057538, "loss": 11.6948, "step": 50500 }, { "epoch": 1.057125512852717, "grad_norm": 0.2624865472316742, "learning_rate": 0.00014477103370043478, "loss": 11.6778, "step": 50501 }, { "epoch": 1.0571464456166793, "grad_norm": 0.4655524790287018, "learning_rate": 0.00014476907317359348, "loss": 11.6666, "step": 50502 }, { "epoch": 1.0571673783806415, "grad_norm": 0.2522641718387604, "learning_rate": 0.00014476711262523072, "loss": 11.6614, "step": 50503 }, { "epoch": 1.0571883111446034, "grad_norm": 0.3121509253978729, "learning_rate": 0.00014476515205534758, "loss": 11.6565, "step": 50504 }, { "epoch": 1.0572092439085656, "grad_norm": 0.45093899965286255, "learning_rate": 0.0001447631914639449, "loss": 11.6709, "step": 50505 }, { "epoch": 1.0572301766725278, "grad_norm": 0.3669288456439972, "learning_rate": 0.00014476123085102363, "loss": 11.6643, "step": 50506 }, { "epoch": 1.05725110943649, "grad_norm": 0.36594846844673157, "learning_rate": 0.00014475927021658479, "loss": 11.6689, "step": 50507 }, { "epoch": 1.0572720422004522, "grad_norm": 0.4037346839904785, "learning_rate": 0.00014475730956062924, "loss": 11.6685, "step": 50508 }, { "epoch": 1.0572929749644142, "grad_norm": 0.25371265411376953, "learning_rate": 0.00014475534888315798, "loss": 11.6769, "step": 50509 }, { "epoch": 1.0573139077283764, "grad_norm": 0.4287832975387573, "learning_rate": 0.0001447533881841719, "loss": 11.6679, "step": 50510 }, { "epoch": 1.0573348404923386, "grad_norm": 0.23624588549137115, "learning_rate": 0.000144751427463672, "loss": 11.6886, "step": 50511 }, { "epoch": 1.0573557732563008, "grad_norm": 0.26031991839408875, "learning_rate": 0.00014474946672165916, "loss": 11.6546, "step": 50512 }, { "epoch": 1.057376706020263, "grad_norm": 0.34597843885421753, "learning_rate": 0.00014474750595813436, "loss": 11.6663, "step": 50513 }, { "epoch": 1.057397638784225, "grad_norm": 0.27689507603645325, "learning_rate": 0.00014474554517309856, "loss": 11.6536, "step": 50514 }, { "epoch": 1.0574185715481872, "grad_norm": 0.4293835461139679, "learning_rate": 0.00014474358436655264, "loss": 11.6852, "step": 50515 }, { "epoch": 1.0574395043121494, "grad_norm": 0.9940298199653625, "learning_rate": 0.00014474162353849762, "loss": 11.6883, "step": 50516 }, { "epoch": 1.0574604370761116, "grad_norm": 0.3407323956489563, "learning_rate": 0.0001447396626889344, "loss": 11.6734, "step": 50517 }, { "epoch": 1.0574813698400738, "grad_norm": 0.3319079577922821, "learning_rate": 0.0001447377018178639, "loss": 11.6706, "step": 50518 }, { "epoch": 1.0575023026040358, "grad_norm": 0.3566280007362366, "learning_rate": 0.0001447357409252871, "loss": 11.6766, "step": 50519 }, { "epoch": 1.057523235367998, "grad_norm": 0.3945353925228119, "learning_rate": 0.00014473378001120496, "loss": 11.6926, "step": 50520 }, { "epoch": 1.0575441681319602, "grad_norm": 0.4101446270942688, "learning_rate": 0.00014473181907561839, "loss": 11.6715, "step": 50521 }, { "epoch": 1.0575651008959224, "grad_norm": 0.38162297010421753, "learning_rate": 0.00014472985811852832, "loss": 11.6766, "step": 50522 }, { "epoch": 1.0575860336598844, "grad_norm": 0.35971298813819885, "learning_rate": 0.0001447278971399357, "loss": 11.6858, "step": 50523 }, { "epoch": 1.0576069664238466, "grad_norm": 0.33369067311286926, "learning_rate": 0.00014472593613984152, "loss": 11.6628, "step": 50524 }, { "epoch": 1.0576278991878088, "grad_norm": 0.2867741286754608, "learning_rate": 0.00014472397511824668, "loss": 11.6786, "step": 50525 }, { "epoch": 1.057648831951771, "grad_norm": 0.28410348296165466, "learning_rate": 0.0001447220140751521, "loss": 11.6806, "step": 50526 }, { "epoch": 1.0576697647157332, "grad_norm": 0.349894255399704, "learning_rate": 0.0001447200530105588, "loss": 11.6662, "step": 50527 }, { "epoch": 1.0576906974796951, "grad_norm": 0.33403706550598145, "learning_rate": 0.00014471809192446758, "loss": 11.6564, "step": 50528 }, { "epoch": 1.0577116302436573, "grad_norm": 0.33851704001426697, "learning_rate": 0.00014471613081687957, "loss": 11.6784, "step": 50529 }, { "epoch": 1.0577325630076195, "grad_norm": 0.3265189528465271, "learning_rate": 0.00014471416968779558, "loss": 11.6825, "step": 50530 }, { "epoch": 1.0577534957715817, "grad_norm": 0.36961808800697327, "learning_rate": 0.00014471220853721662, "loss": 11.6743, "step": 50531 }, { "epoch": 1.057774428535544, "grad_norm": 0.3508683443069458, "learning_rate": 0.00014471024736514358, "loss": 11.6528, "step": 50532 }, { "epoch": 1.057795361299506, "grad_norm": 0.2720710039138794, "learning_rate": 0.00014470828617157744, "loss": 11.6782, "step": 50533 }, { "epoch": 1.057816294063468, "grad_norm": 0.3888613283634186, "learning_rate": 0.0001447063249565191, "loss": 11.6826, "step": 50534 }, { "epoch": 1.0578372268274303, "grad_norm": 0.28228408098220825, "learning_rate": 0.00014470436371996956, "loss": 11.6844, "step": 50535 }, { "epoch": 1.0578581595913925, "grad_norm": 0.4260936379432678, "learning_rate": 0.00014470240246192977, "loss": 11.6704, "step": 50536 }, { "epoch": 1.0578790923553547, "grad_norm": 0.2858143746852875, "learning_rate": 0.00014470044118240058, "loss": 11.6593, "step": 50537 }, { "epoch": 1.0579000251193167, "grad_norm": 0.34055185317993164, "learning_rate": 0.00014469847988138302, "loss": 11.6654, "step": 50538 }, { "epoch": 1.057920957883279, "grad_norm": 0.34483009576797485, "learning_rate": 0.000144696518558878, "loss": 11.6679, "step": 50539 }, { "epoch": 1.057941890647241, "grad_norm": 0.34091463685035706, "learning_rate": 0.00014469455721488646, "loss": 11.6602, "step": 50540 }, { "epoch": 1.0579628234112033, "grad_norm": 0.2979668378829956, "learning_rate": 0.00014469259584940937, "loss": 11.6677, "step": 50541 }, { "epoch": 1.0579837561751653, "grad_norm": 0.34267646074295044, "learning_rate": 0.00014469063446244764, "loss": 11.6857, "step": 50542 }, { "epoch": 1.0580046889391275, "grad_norm": 0.283256858587265, "learning_rate": 0.00014468867305400226, "loss": 11.6634, "step": 50543 }, { "epoch": 1.0580256217030897, "grad_norm": 0.26998308300971985, "learning_rate": 0.00014468671162407408, "loss": 11.6697, "step": 50544 }, { "epoch": 1.0580465544670519, "grad_norm": 0.36866825819015503, "learning_rate": 0.00014468475017266413, "loss": 11.6627, "step": 50545 }, { "epoch": 1.058067487231014, "grad_norm": 0.33271899819374084, "learning_rate": 0.00014468278869977333, "loss": 11.6707, "step": 50546 }, { "epoch": 1.058088419994976, "grad_norm": 0.3604007959365845, "learning_rate": 0.00014468082720540262, "loss": 11.6762, "step": 50547 }, { "epoch": 1.0581093527589382, "grad_norm": 0.3817827105522156, "learning_rate": 0.00014467886568955295, "loss": 11.6787, "step": 50548 }, { "epoch": 1.0581302855229004, "grad_norm": 0.3734782338142395, "learning_rate": 0.00014467690415222523, "loss": 11.6736, "step": 50549 }, { "epoch": 1.0581512182868626, "grad_norm": 0.347403883934021, "learning_rate": 0.00014467494259342044, "loss": 11.6673, "step": 50550 }, { "epoch": 1.0581721510508248, "grad_norm": 0.3053886294364929, "learning_rate": 0.0001446729810131395, "loss": 11.6546, "step": 50551 }, { "epoch": 1.0581930838147868, "grad_norm": 0.32249656319618225, "learning_rate": 0.00014467101941138333, "loss": 11.6754, "step": 50552 }, { "epoch": 1.058214016578749, "grad_norm": 0.3362913429737091, "learning_rate": 0.00014466905778815297, "loss": 11.6463, "step": 50553 }, { "epoch": 1.0582349493427112, "grad_norm": 0.25390204787254333, "learning_rate": 0.00014466709614344928, "loss": 11.6714, "step": 50554 }, { "epoch": 1.0582558821066734, "grad_norm": 0.3367820382118225, "learning_rate": 0.00014466513447727324, "loss": 11.6586, "step": 50555 }, { "epoch": 1.0582768148706356, "grad_norm": 0.26416558027267456, "learning_rate": 0.00014466317278962574, "loss": 11.6686, "step": 50556 }, { "epoch": 1.0582977476345976, "grad_norm": 0.28591233491897583, "learning_rate": 0.00014466121108050773, "loss": 11.6505, "step": 50557 }, { "epoch": 1.0583186803985598, "grad_norm": 0.2841835618019104, "learning_rate": 0.00014465924934992027, "loss": 11.6472, "step": 50558 }, { "epoch": 1.058339613162522, "grad_norm": 0.30109846591949463, "learning_rate": 0.00014465728759786414, "loss": 11.6664, "step": 50559 }, { "epoch": 1.0583605459264842, "grad_norm": 0.4437645673751831, "learning_rate": 0.00014465532582434037, "loss": 11.6852, "step": 50560 }, { "epoch": 1.0583814786904462, "grad_norm": 0.2911960780620575, "learning_rate": 0.0001446533640293499, "loss": 11.659, "step": 50561 }, { "epoch": 1.0584024114544084, "grad_norm": 0.36832940578460693, "learning_rate": 0.00014465140221289367, "loss": 11.6627, "step": 50562 }, { "epoch": 1.0584233442183706, "grad_norm": 0.36495131254196167, "learning_rate": 0.0001446494403749726, "loss": 11.6723, "step": 50563 }, { "epoch": 1.0584442769823328, "grad_norm": 0.31825652718544006, "learning_rate": 0.00014464747851558767, "loss": 11.6702, "step": 50564 }, { "epoch": 1.058465209746295, "grad_norm": 0.3427252471446991, "learning_rate": 0.00014464551663473979, "loss": 11.6707, "step": 50565 }, { "epoch": 1.058486142510257, "grad_norm": 0.29940518736839294, "learning_rate": 0.0001446435547324299, "loss": 11.6676, "step": 50566 }, { "epoch": 1.0585070752742192, "grad_norm": 0.28116869926452637, "learning_rate": 0.000144641592808659, "loss": 11.6676, "step": 50567 }, { "epoch": 1.0585280080381814, "grad_norm": 0.47698020935058594, "learning_rate": 0.00014463963086342797, "loss": 11.6452, "step": 50568 }, { "epoch": 1.0585489408021436, "grad_norm": 0.33696693181991577, "learning_rate": 0.00014463766889673773, "loss": 11.6784, "step": 50569 }, { "epoch": 1.0585698735661058, "grad_norm": 0.322468101978302, "learning_rate": 0.00014463570690858934, "loss": 11.6537, "step": 50570 }, { "epoch": 1.0585908063300677, "grad_norm": 0.38064903020858765, "learning_rate": 0.00014463374489898364, "loss": 11.6547, "step": 50571 }, { "epoch": 1.05861173909403, "grad_norm": 0.2683473825454712, "learning_rate": 0.00014463178286792158, "loss": 11.6632, "step": 50572 }, { "epoch": 1.0586326718579921, "grad_norm": 0.24999377131462097, "learning_rate": 0.00014462982081540416, "loss": 11.6517, "step": 50573 }, { "epoch": 1.0586536046219543, "grad_norm": 0.3112713396549225, "learning_rate": 0.00014462785874143228, "loss": 11.6615, "step": 50574 }, { "epoch": 1.0586745373859165, "grad_norm": 0.36250728368759155, "learning_rate": 0.00014462589664600693, "loss": 11.68, "step": 50575 }, { "epoch": 1.0586954701498785, "grad_norm": 0.31972071528434753, "learning_rate": 0.00014462393452912894, "loss": 11.6696, "step": 50576 }, { "epoch": 1.0587164029138407, "grad_norm": 0.2570520043373108, "learning_rate": 0.0001446219723907994, "loss": 11.6506, "step": 50577 }, { "epoch": 1.058737335677803, "grad_norm": 0.3862147629261017, "learning_rate": 0.00014462001023101914, "loss": 11.6541, "step": 50578 }, { "epoch": 1.0587582684417651, "grad_norm": 0.26945969462394714, "learning_rate": 0.00014461804804978917, "loss": 11.66, "step": 50579 }, { "epoch": 1.058779201205727, "grad_norm": 0.29777348041534424, "learning_rate": 0.00014461608584711043, "loss": 11.6842, "step": 50580 }, { "epoch": 1.0588001339696893, "grad_norm": 0.4570271670818329, "learning_rate": 0.00014461412362298378, "loss": 11.6788, "step": 50581 }, { "epoch": 1.0588210667336515, "grad_norm": 0.2710368037223816, "learning_rate": 0.00014461216137741029, "loss": 11.6786, "step": 50582 }, { "epoch": 1.0588419994976137, "grad_norm": 0.3618740141391754, "learning_rate": 0.00014461019911039079, "loss": 11.6918, "step": 50583 }, { "epoch": 1.058862932261576, "grad_norm": 0.3343166708946228, "learning_rate": 0.0001446082368219263, "loss": 11.6695, "step": 50584 }, { "epoch": 1.0588838650255379, "grad_norm": 0.35056743025779724, "learning_rate": 0.00014460627451201775, "loss": 11.6905, "step": 50585 }, { "epoch": 1.0589047977895, "grad_norm": 0.3127937614917755, "learning_rate": 0.00014460431218066603, "loss": 11.6698, "step": 50586 }, { "epoch": 1.0589257305534623, "grad_norm": 0.3427045941352844, "learning_rate": 0.00014460234982787218, "loss": 11.6547, "step": 50587 }, { "epoch": 1.0589466633174245, "grad_norm": 0.5096153616905212, "learning_rate": 0.00014460038745363704, "loss": 11.6716, "step": 50588 }, { "epoch": 1.0589675960813867, "grad_norm": 0.374332994222641, "learning_rate": 0.0001445984250579616, "loss": 11.6722, "step": 50589 }, { "epoch": 1.0589885288453487, "grad_norm": 0.27803540229797363, "learning_rate": 0.00014459646264084684, "loss": 11.6673, "step": 50590 }, { "epoch": 1.0590094616093109, "grad_norm": 0.3184093236923218, "learning_rate": 0.00014459450020229363, "loss": 11.6668, "step": 50591 }, { "epoch": 1.059030394373273, "grad_norm": 0.2923834025859833, "learning_rate": 0.00014459253774230297, "loss": 11.6508, "step": 50592 }, { "epoch": 1.0590513271372353, "grad_norm": 0.3930121660232544, "learning_rate": 0.00014459057526087577, "loss": 11.6826, "step": 50593 }, { "epoch": 1.0590722599011975, "grad_norm": 0.2737888693809509, "learning_rate": 0.000144588612758013, "loss": 11.6658, "step": 50594 }, { "epoch": 1.0590931926651594, "grad_norm": 0.31822147965431213, "learning_rate": 0.0001445866502337156, "loss": 11.6731, "step": 50595 }, { "epoch": 1.0591141254291216, "grad_norm": 0.2612706422805786, "learning_rate": 0.00014458468768798448, "loss": 11.6677, "step": 50596 }, { "epoch": 1.0591350581930838, "grad_norm": 0.3319993317127228, "learning_rate": 0.00014458272512082063, "loss": 11.6703, "step": 50597 }, { "epoch": 1.059155990957046, "grad_norm": 0.3966977298259735, "learning_rate": 0.00014458076253222495, "loss": 11.6631, "step": 50598 }, { "epoch": 1.059176923721008, "grad_norm": 0.3253273367881775, "learning_rate": 0.00014457879992219838, "loss": 11.6743, "step": 50599 }, { "epoch": 1.0591978564849702, "grad_norm": 0.3521905243396759, "learning_rate": 0.00014457683729074194, "loss": 11.658, "step": 50600 }, { "epoch": 1.0592187892489324, "grad_norm": 0.28079041838645935, "learning_rate": 0.0001445748746378565, "loss": 11.6722, "step": 50601 }, { "epoch": 1.0592397220128946, "grad_norm": 0.2884652316570282, "learning_rate": 0.00014457291196354302, "loss": 11.6679, "step": 50602 }, { "epoch": 1.0592606547768568, "grad_norm": 0.3556962311267853, "learning_rate": 0.00014457094926780245, "loss": 11.6768, "step": 50603 }, { "epoch": 1.0592815875408188, "grad_norm": 0.30022943019866943, "learning_rate": 0.00014456898655063576, "loss": 11.6569, "step": 50604 }, { "epoch": 1.059302520304781, "grad_norm": 0.3082343339920044, "learning_rate": 0.00014456702381204382, "loss": 11.6653, "step": 50605 }, { "epoch": 1.0593234530687432, "grad_norm": 0.3340674340724945, "learning_rate": 0.00014456506105202762, "loss": 11.6629, "step": 50606 }, { "epoch": 1.0593443858327054, "grad_norm": 0.3803129196166992, "learning_rate": 0.00014456309827058815, "loss": 11.6809, "step": 50607 }, { "epoch": 1.0593653185966676, "grad_norm": 0.34085384011268616, "learning_rate": 0.00014456113546772626, "loss": 11.6626, "step": 50608 }, { "epoch": 1.0593862513606296, "grad_norm": 0.29108795523643494, "learning_rate": 0.00014455917264344294, "loss": 11.6701, "step": 50609 }, { "epoch": 1.0594071841245918, "grad_norm": 0.33337438106536865, "learning_rate": 0.00014455720979773915, "loss": 11.666, "step": 50610 }, { "epoch": 1.059428116888554, "grad_norm": 0.2778509557247162, "learning_rate": 0.0001445552469306158, "loss": 11.6683, "step": 50611 }, { "epoch": 1.0594490496525162, "grad_norm": 0.2573159635066986, "learning_rate": 0.0001445532840420739, "loss": 11.6646, "step": 50612 }, { "epoch": 1.0594699824164784, "grad_norm": 0.27710503339767456, "learning_rate": 0.00014455132113211426, "loss": 11.6663, "step": 50613 }, { "epoch": 1.0594909151804404, "grad_norm": 0.2980513572692871, "learning_rate": 0.00014454935820073798, "loss": 11.6535, "step": 50614 }, { "epoch": 1.0595118479444026, "grad_norm": 0.286303848028183, "learning_rate": 0.0001445473952479459, "loss": 11.6728, "step": 50615 }, { "epoch": 1.0595327807083648, "grad_norm": 0.4191254675388336, "learning_rate": 0.00014454543227373899, "loss": 11.6547, "step": 50616 }, { "epoch": 1.059553713472327, "grad_norm": 0.3574497103691101, "learning_rate": 0.00014454346927811822, "loss": 11.6793, "step": 50617 }, { "epoch": 1.059574646236289, "grad_norm": 0.31869345903396606, "learning_rate": 0.00014454150626108447, "loss": 11.6561, "step": 50618 }, { "epoch": 1.0595955790002511, "grad_norm": 0.4150727093219757, "learning_rate": 0.00014453954322263875, "loss": 11.6764, "step": 50619 }, { "epoch": 1.0596165117642133, "grad_norm": 0.3936295807361603, "learning_rate": 0.000144537580162782, "loss": 11.6734, "step": 50620 }, { "epoch": 1.0596374445281755, "grad_norm": 0.33385059237480164, "learning_rate": 0.00014453561708151512, "loss": 11.6696, "step": 50621 }, { "epoch": 1.0596583772921377, "grad_norm": 0.5162642002105713, "learning_rate": 0.0001445336539788391, "loss": 11.6791, "step": 50622 }, { "epoch": 1.0596793100560997, "grad_norm": 0.278341144323349, "learning_rate": 0.00014453169085475482, "loss": 11.6727, "step": 50623 }, { "epoch": 1.059700242820062, "grad_norm": 0.32629406452178955, "learning_rate": 0.0001445297277092633, "loss": 11.6591, "step": 50624 }, { "epoch": 1.0597211755840241, "grad_norm": 0.34352803230285645, "learning_rate": 0.00014452776454236543, "loss": 11.6715, "step": 50625 }, { "epoch": 1.0597421083479863, "grad_norm": 0.4022357165813446, "learning_rate": 0.00014452580135406218, "loss": 11.6881, "step": 50626 }, { "epoch": 1.0597630411119485, "grad_norm": 0.26982173323631287, "learning_rate": 0.00014452383814435446, "loss": 11.6808, "step": 50627 }, { "epoch": 1.0597839738759105, "grad_norm": 0.2536810338497162, "learning_rate": 0.00014452187491324325, "loss": 11.6464, "step": 50628 }, { "epoch": 1.0598049066398727, "grad_norm": 0.4254714548587799, "learning_rate": 0.0001445199116607295, "loss": 11.667, "step": 50629 }, { "epoch": 1.059825839403835, "grad_norm": 0.34789741039276123, "learning_rate": 0.0001445179483868141, "loss": 11.6684, "step": 50630 }, { "epoch": 1.059846772167797, "grad_norm": 0.2985673248767853, "learning_rate": 0.00014451598509149807, "loss": 11.6654, "step": 50631 }, { "epoch": 1.0598677049317593, "grad_norm": 0.33141157031059265, "learning_rate": 0.00014451402177478233, "loss": 11.6565, "step": 50632 }, { "epoch": 1.0598886376957213, "grad_norm": 0.2527666687965393, "learning_rate": 0.00014451205843666776, "loss": 11.6581, "step": 50633 }, { "epoch": 1.0599095704596835, "grad_norm": 0.3206212520599365, "learning_rate": 0.00014451009507715536, "loss": 11.667, "step": 50634 }, { "epoch": 1.0599305032236457, "grad_norm": 0.3758145868778229, "learning_rate": 0.0001445081316962461, "loss": 11.6584, "step": 50635 }, { "epoch": 1.0599514359876079, "grad_norm": 0.3778079152107239, "learning_rate": 0.00014450616829394085, "loss": 11.6925, "step": 50636 }, { "epoch": 1.0599723687515699, "grad_norm": 0.3212054967880249, "learning_rate": 0.00014450420487024062, "loss": 11.6797, "step": 50637 }, { "epoch": 1.059993301515532, "grad_norm": 0.29677435755729675, "learning_rate": 0.00014450224142514628, "loss": 11.6788, "step": 50638 }, { "epoch": 1.0600142342794943, "grad_norm": 0.3365219831466675, "learning_rate": 0.00014450027795865887, "loss": 11.6858, "step": 50639 }, { "epoch": 1.0600351670434565, "grad_norm": 0.32396575808525085, "learning_rate": 0.00014449831447077926, "loss": 11.6721, "step": 50640 }, { "epoch": 1.0600560998074187, "grad_norm": 0.300326406955719, "learning_rate": 0.00014449635096150845, "loss": 11.6746, "step": 50641 }, { "epoch": 1.0600770325713806, "grad_norm": 0.28492429852485657, "learning_rate": 0.0001444943874308473, "loss": 11.6903, "step": 50642 }, { "epoch": 1.0600979653353428, "grad_norm": 0.29529473185539246, "learning_rate": 0.00014449242387879686, "loss": 11.6552, "step": 50643 }, { "epoch": 1.060118898099305, "grad_norm": 0.3215833306312561, "learning_rate": 0.00014449046030535798, "loss": 11.6764, "step": 50644 }, { "epoch": 1.0601398308632672, "grad_norm": 0.38538599014282227, "learning_rate": 0.00014448849671053166, "loss": 11.6714, "step": 50645 }, { "epoch": 1.0601607636272294, "grad_norm": 0.34587687253952026, "learning_rate": 0.00014448653309431886, "loss": 11.6578, "step": 50646 }, { "epoch": 1.0601816963911914, "grad_norm": 0.4641013443470001, "learning_rate": 0.00014448456945672042, "loss": 11.6795, "step": 50647 }, { "epoch": 1.0602026291551536, "grad_norm": 0.3025854527950287, "learning_rate": 0.00014448260579773742, "loss": 11.6622, "step": 50648 }, { "epoch": 1.0602235619191158, "grad_norm": 0.29060453176498413, "learning_rate": 0.00014448064211737072, "loss": 11.67, "step": 50649 }, { "epoch": 1.060244494683078, "grad_norm": 0.2675279974937439, "learning_rate": 0.00014447867841562128, "loss": 11.6807, "step": 50650 }, { "epoch": 1.0602654274470402, "grad_norm": 0.30476319789886475, "learning_rate": 0.00014447671469249005, "loss": 11.6683, "step": 50651 }, { "epoch": 1.0602863602110022, "grad_norm": 0.33748990297317505, "learning_rate": 0.00014447475094797797, "loss": 11.6742, "step": 50652 }, { "epoch": 1.0603072929749644, "grad_norm": 0.2811448872089386, "learning_rate": 0.000144472787182086, "loss": 11.66, "step": 50653 }, { "epoch": 1.0603282257389266, "grad_norm": 0.28683510422706604, "learning_rate": 0.000144470823394815, "loss": 11.6569, "step": 50654 }, { "epoch": 1.0603491585028888, "grad_norm": 0.35281530022621155, "learning_rate": 0.00014446885958616608, "loss": 11.672, "step": 50655 }, { "epoch": 1.0603700912668508, "grad_norm": 0.3053148686885834, "learning_rate": 0.00014446689575614002, "loss": 11.6461, "step": 50656 }, { "epoch": 1.060391024030813, "grad_norm": 0.26109829545021057, "learning_rate": 0.00014446493190473788, "loss": 11.6439, "step": 50657 }, { "epoch": 1.0604119567947752, "grad_norm": 0.3128221929073334, "learning_rate": 0.00014446296803196054, "loss": 11.6747, "step": 50658 }, { "epoch": 1.0604328895587374, "grad_norm": 0.25378549098968506, "learning_rate": 0.00014446100413780894, "loss": 11.6606, "step": 50659 }, { "epoch": 1.0604538223226996, "grad_norm": 0.3178195059299469, "learning_rate": 0.00014445904022228407, "loss": 11.6715, "step": 50660 }, { "epoch": 1.0604747550866616, "grad_norm": 0.3510105609893799, "learning_rate": 0.00014445707628538682, "loss": 11.6766, "step": 50661 }, { "epoch": 1.0604956878506238, "grad_norm": 0.27138757705688477, "learning_rate": 0.0001444551123271182, "loss": 11.672, "step": 50662 }, { "epoch": 1.060516620614586, "grad_norm": 0.3023621141910553, "learning_rate": 0.00014445314834747908, "loss": 11.659, "step": 50663 }, { "epoch": 1.0605375533785482, "grad_norm": 0.42850974202156067, "learning_rate": 0.0001444511843464705, "loss": 11.6628, "step": 50664 }, { "epoch": 1.0605584861425104, "grad_norm": 0.2899433374404907, "learning_rate": 0.0001444492203240933, "loss": 11.6656, "step": 50665 }, { "epoch": 1.0605794189064723, "grad_norm": 0.27549612522125244, "learning_rate": 0.00014444725628034845, "loss": 11.676, "step": 50666 }, { "epoch": 1.0606003516704345, "grad_norm": 0.27523836493492126, "learning_rate": 0.00014444529221523694, "loss": 11.6639, "step": 50667 }, { "epoch": 1.0606212844343967, "grad_norm": 0.2971232533454895, "learning_rate": 0.0001444433281287597, "loss": 11.6874, "step": 50668 }, { "epoch": 1.060642217198359, "grad_norm": 0.46414870023727417, "learning_rate": 0.00014444136402091764, "loss": 11.6532, "step": 50669 }, { "epoch": 1.0606631499623211, "grad_norm": 0.2846214771270752, "learning_rate": 0.0001444393998917117, "loss": 11.6969, "step": 50670 }, { "epoch": 1.0606840827262831, "grad_norm": 0.3433743417263031, "learning_rate": 0.00014443743574114293, "loss": 11.6738, "step": 50671 }, { "epoch": 1.0607050154902453, "grad_norm": 0.3371259868144989, "learning_rate": 0.00014443547156921214, "loss": 11.6725, "step": 50672 }, { "epoch": 1.0607259482542075, "grad_norm": 0.3510604202747345, "learning_rate": 0.0001444335073759203, "loss": 11.6642, "step": 50673 }, { "epoch": 1.0607468810181697, "grad_norm": 0.3214612901210785, "learning_rate": 0.00014443154316126844, "loss": 11.6617, "step": 50674 }, { "epoch": 1.0607678137821317, "grad_norm": 0.32584238052368164, "learning_rate": 0.00014442957892525738, "loss": 11.6866, "step": 50675 }, { "epoch": 1.060788746546094, "grad_norm": 0.30917859077453613, "learning_rate": 0.0001444276146678882, "loss": 11.6534, "step": 50676 }, { "epoch": 1.060809679310056, "grad_norm": 0.2940444350242615, "learning_rate": 0.00014442565038916177, "loss": 11.6805, "step": 50677 }, { "epoch": 1.0608306120740183, "grad_norm": 0.34608200192451477, "learning_rate": 0.00014442368608907902, "loss": 11.6569, "step": 50678 }, { "epoch": 1.0608515448379805, "grad_norm": 0.32823362946510315, "learning_rate": 0.0001444217217676409, "loss": 11.6784, "step": 50679 }, { "epoch": 1.0608724776019425, "grad_norm": 0.30438482761383057, "learning_rate": 0.00014441975742484836, "loss": 11.6644, "step": 50680 }, { "epoch": 1.0608934103659047, "grad_norm": 0.33374860882759094, "learning_rate": 0.00014441779306070238, "loss": 11.6531, "step": 50681 }, { "epoch": 1.0609143431298669, "grad_norm": 0.35785940289497375, "learning_rate": 0.00014441582867520387, "loss": 11.6823, "step": 50682 }, { "epoch": 1.060935275893829, "grad_norm": 0.3762570917606354, "learning_rate": 0.0001444138642683538, "loss": 11.667, "step": 50683 }, { "epoch": 1.0609562086577913, "grad_norm": 0.35823217034339905, "learning_rate": 0.00014441189984015307, "loss": 11.6657, "step": 50684 }, { "epoch": 1.0609771414217533, "grad_norm": 0.2842153310775757, "learning_rate": 0.00014440993539060267, "loss": 11.668, "step": 50685 }, { "epoch": 1.0609980741857155, "grad_norm": 0.25271445512771606, "learning_rate": 0.00014440797091970348, "loss": 11.6817, "step": 50686 }, { "epoch": 1.0610190069496777, "grad_norm": 0.31071507930755615, "learning_rate": 0.00014440600642745652, "loss": 11.6717, "step": 50687 }, { "epoch": 1.0610399397136399, "grad_norm": 0.3315245509147644, "learning_rate": 0.00014440404191386273, "loss": 11.6802, "step": 50688 }, { "epoch": 1.0610608724776018, "grad_norm": 0.2750471830368042, "learning_rate": 0.00014440207737892298, "loss": 11.6733, "step": 50689 }, { "epoch": 1.061081805241564, "grad_norm": 0.32727158069610596, "learning_rate": 0.0001444001128226383, "loss": 11.656, "step": 50690 }, { "epoch": 1.0611027380055262, "grad_norm": 0.36092835664749146, "learning_rate": 0.00014439814824500954, "loss": 11.6734, "step": 50691 }, { "epoch": 1.0611236707694884, "grad_norm": 0.3443707227706909, "learning_rate": 0.00014439618364603775, "loss": 11.6806, "step": 50692 }, { "epoch": 1.0611446035334506, "grad_norm": 0.3212216794490814, "learning_rate": 0.0001443942190257238, "loss": 11.6639, "step": 50693 }, { "epoch": 1.0611655362974126, "grad_norm": 0.3570304214954376, "learning_rate": 0.00014439225438406868, "loss": 11.6608, "step": 50694 }, { "epoch": 1.0611864690613748, "grad_norm": 0.35281649231910706, "learning_rate": 0.0001443902897210733, "loss": 11.6601, "step": 50695 }, { "epoch": 1.061207401825337, "grad_norm": 0.3515084981918335, "learning_rate": 0.00014438832503673861, "loss": 11.6759, "step": 50696 }, { "epoch": 1.0612283345892992, "grad_norm": 0.30563151836395264, "learning_rate": 0.0001443863603310656, "loss": 11.6584, "step": 50697 }, { "epoch": 1.0612492673532614, "grad_norm": 0.26863357424736023, "learning_rate": 0.00014438439560405514, "loss": 11.6627, "step": 50698 }, { "epoch": 1.0612702001172234, "grad_norm": 0.2866172790527344, "learning_rate": 0.0001443824308557082, "loss": 11.6806, "step": 50699 }, { "epoch": 1.0612911328811856, "grad_norm": 0.3838295638561249, "learning_rate": 0.00014438046608602578, "loss": 11.6715, "step": 50700 }, { "epoch": 1.0613120656451478, "grad_norm": 0.3730832040309906, "learning_rate": 0.00014437850129500875, "loss": 11.6639, "step": 50701 }, { "epoch": 1.06133299840911, "grad_norm": 0.37839582562446594, "learning_rate": 0.0001443765364826581, "loss": 11.6657, "step": 50702 }, { "epoch": 1.0613539311730722, "grad_norm": 0.24412770569324493, "learning_rate": 0.00014437457164897475, "loss": 11.6631, "step": 50703 }, { "epoch": 1.0613748639370342, "grad_norm": 0.40935876965522766, "learning_rate": 0.00014437260679395965, "loss": 11.6671, "step": 50704 }, { "epoch": 1.0613957967009964, "grad_norm": 0.24875770509243011, "learning_rate": 0.00014437064191761377, "loss": 11.6753, "step": 50705 }, { "epoch": 1.0614167294649586, "grad_norm": 0.2869669795036316, "learning_rate": 0.00014436867701993802, "loss": 11.6664, "step": 50706 }, { "epoch": 1.0614376622289208, "grad_norm": 0.3833417296409607, "learning_rate": 0.00014436671210093336, "loss": 11.6672, "step": 50707 }, { "epoch": 1.0614585949928828, "grad_norm": 0.3439514935016632, "learning_rate": 0.0001443647471606007, "loss": 11.6645, "step": 50708 }, { "epoch": 1.061479527756845, "grad_norm": 0.3293008804321289, "learning_rate": 0.00014436278219894107, "loss": 11.6721, "step": 50709 }, { "epoch": 1.0615004605208072, "grad_norm": 0.29659274220466614, "learning_rate": 0.00014436081721595534, "loss": 11.6539, "step": 50710 }, { "epoch": 1.0615213932847694, "grad_norm": 0.3165239095687866, "learning_rate": 0.00014435885221164446, "loss": 11.6704, "step": 50711 }, { "epoch": 1.0615423260487316, "grad_norm": 0.34426021575927734, "learning_rate": 0.00014435688718600942, "loss": 11.6468, "step": 50712 }, { "epoch": 1.0615632588126935, "grad_norm": 0.3429790735244751, "learning_rate": 0.00014435492213905112, "loss": 11.6764, "step": 50713 }, { "epoch": 1.0615841915766557, "grad_norm": 0.3283572196960449, "learning_rate": 0.00014435295707077053, "loss": 11.689, "step": 50714 }, { "epoch": 1.061605124340618, "grad_norm": 0.3250667154788971, "learning_rate": 0.00014435099198116858, "loss": 11.6658, "step": 50715 }, { "epoch": 1.0616260571045801, "grad_norm": 0.42309489846229553, "learning_rate": 0.00014434902687024618, "loss": 11.6774, "step": 50716 }, { "epoch": 1.0616469898685423, "grad_norm": 0.33150410652160645, "learning_rate": 0.00014434706173800438, "loss": 11.6634, "step": 50717 }, { "epoch": 1.0616679226325043, "grad_norm": 0.36606696248054504, "learning_rate": 0.00014434509658444402, "loss": 11.6774, "step": 50718 }, { "epoch": 1.0616888553964665, "grad_norm": 0.31194987893104553, "learning_rate": 0.0001443431314095661, "loss": 11.6741, "step": 50719 }, { "epoch": 1.0617097881604287, "grad_norm": 0.3226590156555176, "learning_rate": 0.00014434116621337152, "loss": 11.6868, "step": 50720 }, { "epoch": 1.061730720924391, "grad_norm": 0.2995162010192871, "learning_rate": 0.00014433920099586126, "loss": 11.651, "step": 50721 }, { "epoch": 1.0617516536883531, "grad_norm": 0.3338499665260315, "learning_rate": 0.00014433723575703629, "loss": 11.6615, "step": 50722 }, { "epoch": 1.061772586452315, "grad_norm": 0.30518460273742676, "learning_rate": 0.0001443352704968975, "loss": 11.6563, "step": 50723 }, { "epoch": 1.0617935192162773, "grad_norm": 0.36426034569740295, "learning_rate": 0.00014433330521544587, "loss": 11.6681, "step": 50724 }, { "epoch": 1.0618144519802395, "grad_norm": 0.3488014340400696, "learning_rate": 0.0001443313399126823, "loss": 11.6526, "step": 50725 }, { "epoch": 1.0618353847442017, "grad_norm": 0.3416332006454468, "learning_rate": 0.00014432937458860778, "loss": 11.6638, "step": 50726 }, { "epoch": 1.0618563175081637, "grad_norm": 0.3808370530605316, "learning_rate": 0.00014432740924322327, "loss": 11.6545, "step": 50727 }, { "epoch": 1.0618772502721259, "grad_norm": 0.3260391056537628, "learning_rate": 0.00014432544387652966, "loss": 11.6758, "step": 50728 }, { "epoch": 1.061898183036088, "grad_norm": 0.38070395588874817, "learning_rate": 0.00014432347848852795, "loss": 11.6764, "step": 50729 }, { "epoch": 1.0619191158000503, "grad_norm": 0.3309116065502167, "learning_rate": 0.000144321513079219, "loss": 11.6631, "step": 50730 }, { "epoch": 1.0619400485640125, "grad_norm": 0.421281099319458, "learning_rate": 0.00014431954764860386, "loss": 11.669, "step": 50731 }, { "epoch": 1.0619609813279745, "grad_norm": 0.28635451197624207, "learning_rate": 0.0001443175821966834, "loss": 11.6674, "step": 50732 }, { "epoch": 1.0619819140919367, "grad_norm": 0.30281680822372437, "learning_rate": 0.0001443156167234586, "loss": 11.6647, "step": 50733 }, { "epoch": 1.0620028468558989, "grad_norm": 0.2704886496067047, "learning_rate": 0.0001443136512289304, "loss": 11.6742, "step": 50734 }, { "epoch": 1.062023779619861, "grad_norm": 0.3178492784500122, "learning_rate": 0.0001443116857130997, "loss": 11.6596, "step": 50735 }, { "epoch": 1.0620447123838233, "grad_norm": 0.337486207485199, "learning_rate": 0.00014430972017596754, "loss": 11.67, "step": 50736 }, { "epoch": 1.0620656451477852, "grad_norm": 0.27149879932403564, "learning_rate": 0.00014430775461753479, "loss": 11.6591, "step": 50737 }, { "epoch": 1.0620865779117474, "grad_norm": 0.2996070384979248, "learning_rate": 0.0001443057890378024, "loss": 11.6875, "step": 50738 }, { "epoch": 1.0621075106757096, "grad_norm": 0.3749340772628784, "learning_rate": 0.00014430382343677133, "loss": 11.6506, "step": 50739 }, { "epoch": 1.0621284434396718, "grad_norm": 0.27545803785324097, "learning_rate": 0.00014430185781444253, "loss": 11.6852, "step": 50740 }, { "epoch": 1.062149376203634, "grad_norm": 0.2920593023300171, "learning_rate": 0.00014429989217081696, "loss": 11.6786, "step": 50741 }, { "epoch": 1.062170308967596, "grad_norm": 0.27837052941322327, "learning_rate": 0.00014429792650589551, "loss": 11.6557, "step": 50742 }, { "epoch": 1.0621912417315582, "grad_norm": 0.3494129776954651, "learning_rate": 0.00014429596081967918, "loss": 11.6628, "step": 50743 }, { "epoch": 1.0622121744955204, "grad_norm": 0.2999606430530548, "learning_rate": 0.0001442939951121689, "loss": 11.6731, "step": 50744 }, { "epoch": 1.0622331072594826, "grad_norm": 0.38947993516921997, "learning_rate": 0.00014429202938336558, "loss": 11.6829, "step": 50745 }, { "epoch": 1.0622540400234446, "grad_norm": 0.36820298433303833, "learning_rate": 0.00014429006363327022, "loss": 11.666, "step": 50746 }, { "epoch": 1.0622749727874068, "grad_norm": 0.3242325186729431, "learning_rate": 0.00014428809786188373, "loss": 11.6556, "step": 50747 }, { "epoch": 1.062295905551369, "grad_norm": 0.3602287173271179, "learning_rate": 0.00014428613206920706, "loss": 11.6566, "step": 50748 }, { "epoch": 1.0623168383153312, "grad_norm": 0.33324602246284485, "learning_rate": 0.00014428416625524116, "loss": 11.6532, "step": 50749 }, { "epoch": 1.0623377710792934, "grad_norm": 0.27836936712265015, "learning_rate": 0.000144282200419987, "loss": 11.6476, "step": 50750 }, { "epoch": 1.0623587038432554, "grad_norm": 0.3987341523170471, "learning_rate": 0.00014428023456344548, "loss": 11.6741, "step": 50751 }, { "epoch": 1.0623796366072176, "grad_norm": 0.33837202191352844, "learning_rate": 0.00014427826868561753, "loss": 11.6741, "step": 50752 }, { "epoch": 1.0624005693711798, "grad_norm": 0.344270259141922, "learning_rate": 0.00014427630278650416, "loss": 11.6737, "step": 50753 }, { "epoch": 1.062421502135142, "grad_norm": 0.3275684714317322, "learning_rate": 0.0001442743368661063, "loss": 11.6657, "step": 50754 }, { "epoch": 1.0624424348991042, "grad_norm": 0.39077550172805786, "learning_rate": 0.00014427237092442486, "loss": 11.6741, "step": 50755 }, { "epoch": 1.0624633676630661, "grad_norm": 0.33906376361846924, "learning_rate": 0.0001442704049614608, "loss": 11.685, "step": 50756 }, { "epoch": 1.0624843004270283, "grad_norm": 0.3239835500717163, "learning_rate": 0.00014426843897721506, "loss": 11.6616, "step": 50757 }, { "epoch": 1.0625052331909906, "grad_norm": 0.2870526611804962, "learning_rate": 0.00014426647297168863, "loss": 11.6795, "step": 50758 }, { "epoch": 1.0625261659549528, "grad_norm": 0.3011797368526459, "learning_rate": 0.0001442645069448824, "loss": 11.6772, "step": 50759 }, { "epoch": 1.0625470987189147, "grad_norm": 0.3486955463886261, "learning_rate": 0.0001442625408967973, "loss": 11.6757, "step": 50760 }, { "epoch": 1.062568031482877, "grad_norm": 0.3349263370037079, "learning_rate": 0.00014426057482743438, "loss": 11.6821, "step": 50761 }, { "epoch": 1.0625889642468391, "grad_norm": 0.38347727060317993, "learning_rate": 0.00014425860873679445, "loss": 11.6707, "step": 50762 }, { "epoch": 1.0626098970108013, "grad_norm": 0.35298481583595276, "learning_rate": 0.00014425664262487857, "loss": 11.6728, "step": 50763 }, { "epoch": 1.0626308297747635, "grad_norm": 0.3173094391822815, "learning_rate": 0.0001442546764916876, "loss": 11.6658, "step": 50764 }, { "epoch": 1.0626517625387257, "grad_norm": 0.39141303300857544, "learning_rate": 0.00014425271033722254, "loss": 11.6667, "step": 50765 }, { "epoch": 1.0626726953026877, "grad_norm": 0.27151036262512207, "learning_rate": 0.00014425074416148432, "loss": 11.6681, "step": 50766 }, { "epoch": 1.06269362806665, "grad_norm": 0.25845861434936523, "learning_rate": 0.00014424877796447385, "loss": 11.6705, "step": 50767 }, { "epoch": 1.062714560830612, "grad_norm": 0.3785288333892822, "learning_rate": 0.00014424681174619216, "loss": 11.6702, "step": 50768 }, { "epoch": 1.0627354935945743, "grad_norm": 0.43692824244499207, "learning_rate": 0.0001442448455066401, "loss": 11.6593, "step": 50769 }, { "epoch": 1.0627564263585363, "grad_norm": 0.27532851696014404, "learning_rate": 0.00014424287924581864, "loss": 11.6788, "step": 50770 }, { "epoch": 1.0627773591224985, "grad_norm": 0.32370930910110474, "learning_rate": 0.00014424091296372877, "loss": 11.6846, "step": 50771 }, { "epoch": 1.0627982918864607, "grad_norm": 0.30339959263801575, "learning_rate": 0.0001442389466603714, "loss": 11.6762, "step": 50772 }, { "epoch": 1.062819224650423, "grad_norm": 0.35432514548301697, "learning_rate": 0.00014423698033574746, "loss": 11.6546, "step": 50773 }, { "epoch": 1.062840157414385, "grad_norm": 0.33660873770713806, "learning_rate": 0.00014423501398985795, "loss": 11.6939, "step": 50774 }, { "epoch": 1.062861090178347, "grad_norm": 0.2877105474472046, "learning_rate": 0.00014423304762270375, "loss": 11.6577, "step": 50775 }, { "epoch": 1.0628820229423093, "grad_norm": 0.31730780005455017, "learning_rate": 0.00014423108123428588, "loss": 11.685, "step": 50776 }, { "epoch": 1.0629029557062715, "grad_norm": 0.3232952654361725, "learning_rate": 0.0001442291148246052, "loss": 11.6609, "step": 50777 }, { "epoch": 1.0629238884702337, "grad_norm": 0.30738070607185364, "learning_rate": 0.00014422714839366273, "loss": 11.6825, "step": 50778 }, { "epoch": 1.0629448212341956, "grad_norm": 0.30532312393188477, "learning_rate": 0.00014422518194145937, "loss": 11.6747, "step": 50779 }, { "epoch": 1.0629657539981578, "grad_norm": 0.5479170083999634, "learning_rate": 0.00014422321546799608, "loss": 11.6716, "step": 50780 }, { "epoch": 1.06298668676212, "grad_norm": 0.301357239484787, "learning_rate": 0.00014422124897327381, "loss": 11.664, "step": 50781 }, { "epoch": 1.0630076195260822, "grad_norm": 0.30733999609947205, "learning_rate": 0.00014421928245729349, "loss": 11.6717, "step": 50782 }, { "epoch": 1.0630285522900444, "grad_norm": 0.4659646451473236, "learning_rate": 0.00014421731592005607, "loss": 11.6769, "step": 50783 }, { "epoch": 1.0630494850540066, "grad_norm": 0.3369462192058563, "learning_rate": 0.00014421534936156252, "loss": 11.6796, "step": 50784 }, { "epoch": 1.0630704178179686, "grad_norm": 0.33545008301734924, "learning_rate": 0.00014421338278181376, "loss": 11.6661, "step": 50785 }, { "epoch": 1.0630913505819308, "grad_norm": 0.3266460597515106, "learning_rate": 0.00014421141618081073, "loss": 11.6715, "step": 50786 }, { "epoch": 1.063112283345893, "grad_norm": 0.31705594062805176, "learning_rate": 0.00014420944955855438, "loss": 11.666, "step": 50787 }, { "epoch": 1.0631332161098552, "grad_norm": 0.30885958671569824, "learning_rate": 0.0001442074829150457, "loss": 11.6697, "step": 50788 }, { "epoch": 1.0631541488738172, "grad_norm": 0.3979797661304474, "learning_rate": 0.00014420551625028555, "loss": 11.6695, "step": 50789 }, { "epoch": 1.0631750816377794, "grad_norm": 0.3823290765285492, "learning_rate": 0.00014420354956427495, "loss": 11.6755, "step": 50790 }, { "epoch": 1.0631960144017416, "grad_norm": 0.30442559719085693, "learning_rate": 0.00014420158285701482, "loss": 11.683, "step": 50791 }, { "epoch": 1.0632169471657038, "grad_norm": 0.33781108260154724, "learning_rate": 0.00014419961612850606, "loss": 11.6567, "step": 50792 }, { "epoch": 1.063237879929666, "grad_norm": 0.2816992402076721, "learning_rate": 0.0001441976493787497, "loss": 11.6744, "step": 50793 }, { "epoch": 1.063258812693628, "grad_norm": 0.3816487193107605, "learning_rate": 0.00014419568260774665, "loss": 11.6655, "step": 50794 }, { "epoch": 1.0632797454575902, "grad_norm": 0.3431878983974457, "learning_rate": 0.00014419371581549784, "loss": 11.6781, "step": 50795 }, { "epoch": 1.0633006782215524, "grad_norm": 0.2763661742210388, "learning_rate": 0.0001441917490020042, "loss": 11.6614, "step": 50796 }, { "epoch": 1.0633216109855146, "grad_norm": 0.31029659509658813, "learning_rate": 0.00014418978216726672, "loss": 11.6745, "step": 50797 }, { "epoch": 1.0633425437494766, "grad_norm": 0.3309674561023712, "learning_rate": 0.00014418781531128636, "loss": 11.6844, "step": 50798 }, { "epoch": 1.0633634765134388, "grad_norm": 0.3287740647792816, "learning_rate": 0.000144185848434064, "loss": 11.6529, "step": 50799 }, { "epoch": 1.063384409277401, "grad_norm": 0.29582729935646057, "learning_rate": 0.0001441838815356006, "loss": 11.6545, "step": 50800 }, { "epoch": 1.0634053420413632, "grad_norm": 0.3517519533634186, "learning_rate": 0.00014418191461589713, "loss": 11.667, "step": 50801 }, { "epoch": 1.0634262748053254, "grad_norm": 0.33521273732185364, "learning_rate": 0.00014417994767495457, "loss": 11.6563, "step": 50802 }, { "epoch": 1.0634472075692873, "grad_norm": 0.3426966369152069, "learning_rate": 0.00014417798071277377, "loss": 11.6609, "step": 50803 }, { "epoch": 1.0634681403332495, "grad_norm": 0.23666641116142273, "learning_rate": 0.00014417601372935574, "loss": 11.6757, "step": 50804 }, { "epoch": 1.0634890730972117, "grad_norm": 0.29133957624435425, "learning_rate": 0.00014417404672470145, "loss": 11.67, "step": 50805 }, { "epoch": 1.063510005861174, "grad_norm": 0.4336799085140228, "learning_rate": 0.00014417207969881176, "loss": 11.6813, "step": 50806 }, { "epoch": 1.0635309386251361, "grad_norm": 0.38930070400238037, "learning_rate": 0.00014417011265168773, "loss": 11.6841, "step": 50807 }, { "epoch": 1.0635518713890981, "grad_norm": 0.3521372675895691, "learning_rate": 0.0001441681455833302, "loss": 11.6507, "step": 50808 }, { "epoch": 1.0635728041530603, "grad_norm": 0.2993553876876831, "learning_rate": 0.00014416617849374015, "loss": 11.6686, "step": 50809 }, { "epoch": 1.0635937369170225, "grad_norm": 0.3413712680339813, "learning_rate": 0.00014416421138291856, "loss": 11.6736, "step": 50810 }, { "epoch": 1.0636146696809847, "grad_norm": 0.29195016622543335, "learning_rate": 0.0001441622442508663, "loss": 11.6618, "step": 50811 }, { "epoch": 1.063635602444947, "grad_norm": 0.4010825753211975, "learning_rate": 0.0001441602770975844, "loss": 11.6789, "step": 50812 }, { "epoch": 1.063656535208909, "grad_norm": 0.304149866104126, "learning_rate": 0.00014415830992307377, "loss": 11.6757, "step": 50813 }, { "epoch": 1.063677467972871, "grad_norm": 0.3157810568809509, "learning_rate": 0.00014415634272733536, "loss": 11.6761, "step": 50814 }, { "epoch": 1.0636984007368333, "grad_norm": 0.32980677485466003, "learning_rate": 0.00014415437551037013, "loss": 11.661, "step": 50815 }, { "epoch": 1.0637193335007955, "grad_norm": 0.26497042179107666, "learning_rate": 0.00014415240827217896, "loss": 11.6722, "step": 50816 }, { "epoch": 1.0637402662647575, "grad_norm": 0.27609652280807495, "learning_rate": 0.00014415044101276288, "loss": 11.6738, "step": 50817 }, { "epoch": 1.0637611990287197, "grad_norm": 0.32647669315338135, "learning_rate": 0.00014414847373212276, "loss": 11.6657, "step": 50818 }, { "epoch": 1.0637821317926819, "grad_norm": 0.35436931252479553, "learning_rate": 0.0001441465064302596, "loss": 11.6713, "step": 50819 }, { "epoch": 1.063803064556644, "grad_norm": 0.32022979855537415, "learning_rate": 0.00014414453910717434, "loss": 11.6682, "step": 50820 }, { "epoch": 1.0638239973206063, "grad_norm": 0.35236474871635437, "learning_rate": 0.0001441425717628679, "loss": 11.6502, "step": 50821 }, { "epoch": 1.0638449300845683, "grad_norm": 0.3922807574272156, "learning_rate": 0.00014414060439734127, "loss": 11.6799, "step": 50822 }, { "epoch": 1.0638658628485305, "grad_norm": 0.2520776093006134, "learning_rate": 0.00014413863701059532, "loss": 11.6677, "step": 50823 }, { "epoch": 1.0638867956124927, "grad_norm": 0.29898712038993835, "learning_rate": 0.0001441366696026311, "loss": 11.6654, "step": 50824 }, { "epoch": 1.0639077283764549, "grad_norm": 0.28844064474105835, "learning_rate": 0.00014413470217344946, "loss": 11.6613, "step": 50825 }, { "epoch": 1.063928661140417, "grad_norm": 0.25536438822746277, "learning_rate": 0.0001441327347230514, "loss": 11.658, "step": 50826 }, { "epoch": 1.063949593904379, "grad_norm": 0.26754891872406006, "learning_rate": 0.00014413076725143783, "loss": 11.6595, "step": 50827 }, { "epoch": 1.0639705266683412, "grad_norm": 0.3348679542541504, "learning_rate": 0.00014412879975860975, "loss": 11.6679, "step": 50828 }, { "epoch": 1.0639914594323034, "grad_norm": 0.3309279978275299, "learning_rate": 0.00014412683224456806, "loss": 11.6643, "step": 50829 }, { "epoch": 1.0640123921962656, "grad_norm": 0.29075586795806885, "learning_rate": 0.00014412486470931368, "loss": 11.6768, "step": 50830 }, { "epoch": 1.0640333249602278, "grad_norm": 0.2935255765914917, "learning_rate": 0.00014412289715284762, "loss": 11.6685, "step": 50831 }, { "epoch": 1.0640542577241898, "grad_norm": 0.2722143232822418, "learning_rate": 0.0001441209295751708, "loss": 11.6543, "step": 50832 }, { "epoch": 1.064075190488152, "grad_norm": 0.4370986223220825, "learning_rate": 0.00014411896197628416, "loss": 11.6535, "step": 50833 }, { "epoch": 1.0640961232521142, "grad_norm": 0.3393002152442932, "learning_rate": 0.00014411699435618868, "loss": 11.6593, "step": 50834 }, { "epoch": 1.0641170560160764, "grad_norm": 0.28995054960250854, "learning_rate": 0.00014411502671488524, "loss": 11.6688, "step": 50835 }, { "epoch": 1.0641379887800384, "grad_norm": 0.4340404272079468, "learning_rate": 0.00014411305905237483, "loss": 11.6726, "step": 50836 }, { "epoch": 1.0641589215440006, "grad_norm": 0.3255884349346161, "learning_rate": 0.0001441110913686584, "loss": 11.6494, "step": 50837 }, { "epoch": 1.0641798543079628, "grad_norm": 0.3275059759616852, "learning_rate": 0.00014410912366373687, "loss": 11.6682, "step": 50838 }, { "epoch": 1.064200787071925, "grad_norm": 0.3203273117542267, "learning_rate": 0.00014410715593761122, "loss": 11.6803, "step": 50839 }, { "epoch": 1.0642217198358872, "grad_norm": 0.3126276135444641, "learning_rate": 0.00014410518819028237, "loss": 11.6633, "step": 50840 }, { "epoch": 1.0642426525998492, "grad_norm": 0.31747421622276306, "learning_rate": 0.00014410322042175127, "loss": 11.6676, "step": 50841 }, { "epoch": 1.0642635853638114, "grad_norm": 0.28331848978996277, "learning_rate": 0.00014410125263201887, "loss": 11.6714, "step": 50842 }, { "epoch": 1.0642845181277736, "grad_norm": 0.3377513289451599, "learning_rate": 0.00014409928482108609, "loss": 11.6765, "step": 50843 }, { "epoch": 1.0643054508917358, "grad_norm": 0.24581031501293182, "learning_rate": 0.00014409731698895392, "loss": 11.6736, "step": 50844 }, { "epoch": 1.064326383655698, "grad_norm": 0.5008936524391174, "learning_rate": 0.00014409534913562328, "loss": 11.6625, "step": 50845 }, { "epoch": 1.06434731641966, "grad_norm": 0.2880397439002991, "learning_rate": 0.00014409338126109513, "loss": 11.6608, "step": 50846 }, { "epoch": 1.0643682491836222, "grad_norm": 0.32807299494743347, "learning_rate": 0.00014409141336537042, "loss": 11.6847, "step": 50847 }, { "epoch": 1.0643891819475844, "grad_norm": 0.3529287874698639, "learning_rate": 0.00014408944544845004, "loss": 11.6724, "step": 50848 }, { "epoch": 1.0644101147115466, "grad_norm": 0.39398783445358276, "learning_rate": 0.00014408747751033506, "loss": 11.6723, "step": 50849 }, { "epoch": 1.0644310474755088, "grad_norm": 0.2970348298549652, "learning_rate": 0.00014408550955102628, "loss": 11.6725, "step": 50850 }, { "epoch": 1.0644519802394707, "grad_norm": 0.27824339270591736, "learning_rate": 0.00014408354157052474, "loss": 11.6449, "step": 50851 }, { "epoch": 1.064472913003433, "grad_norm": 0.273209810256958, "learning_rate": 0.00014408157356883132, "loss": 11.6776, "step": 50852 }, { "epoch": 1.0644938457673951, "grad_norm": 0.2651243209838867, "learning_rate": 0.00014407960554594702, "loss": 11.6618, "step": 50853 }, { "epoch": 1.0645147785313573, "grad_norm": 0.3723733723163605, "learning_rate": 0.0001440776375018728, "loss": 11.658, "step": 50854 }, { "epoch": 1.0645357112953193, "grad_norm": 0.3605661988258362, "learning_rate": 0.00014407566943660952, "loss": 11.6807, "step": 50855 }, { "epoch": 1.0645566440592815, "grad_norm": 0.38013070821762085, "learning_rate": 0.00014407370135015825, "loss": 11.6749, "step": 50856 }, { "epoch": 1.0645775768232437, "grad_norm": 0.4540807604789734, "learning_rate": 0.00014407173324251983, "loss": 11.6763, "step": 50857 }, { "epoch": 1.064598509587206, "grad_norm": 0.30365225672721863, "learning_rate": 0.00014406976511369525, "loss": 11.6812, "step": 50858 }, { "epoch": 1.0646194423511681, "grad_norm": 0.33965909481048584, "learning_rate": 0.00014406779696368548, "loss": 11.6675, "step": 50859 }, { "epoch": 1.06464037511513, "grad_norm": 0.31612274050712585, "learning_rate": 0.00014406582879249137, "loss": 11.6724, "step": 50860 }, { "epoch": 1.0646613078790923, "grad_norm": 0.27199533581733704, "learning_rate": 0.00014406386060011398, "loss": 11.669, "step": 50861 }, { "epoch": 1.0646822406430545, "grad_norm": 0.25897008180618286, "learning_rate": 0.0001440618923865542, "loss": 11.686, "step": 50862 }, { "epoch": 1.0647031734070167, "grad_norm": 0.45496535301208496, "learning_rate": 0.00014405992415181297, "loss": 11.671, "step": 50863 }, { "epoch": 1.064724106170979, "grad_norm": 0.3867148160934448, "learning_rate": 0.0001440579558958913, "loss": 11.6641, "step": 50864 }, { "epoch": 1.0647450389349409, "grad_norm": 0.33623969554901123, "learning_rate": 0.00014405598761879006, "loss": 11.6675, "step": 50865 }, { "epoch": 1.064765971698903, "grad_norm": 0.2939416766166687, "learning_rate": 0.0001440540193205102, "loss": 11.6535, "step": 50866 }, { "epoch": 1.0647869044628653, "grad_norm": 0.31416070461273193, "learning_rate": 0.00014405205100105273, "loss": 11.6452, "step": 50867 }, { "epoch": 1.0648078372268275, "grad_norm": 0.30118685960769653, "learning_rate": 0.00014405008266041853, "loss": 11.6884, "step": 50868 }, { "epoch": 1.0648287699907897, "grad_norm": 0.38940268754959106, "learning_rate": 0.00014404811429860858, "loss": 11.6707, "step": 50869 }, { "epoch": 1.0648497027547517, "grad_norm": 0.29984915256500244, "learning_rate": 0.0001440461459156238, "loss": 11.6662, "step": 50870 }, { "epoch": 1.0648706355187139, "grad_norm": 0.3009713888168335, "learning_rate": 0.0001440441775114652, "loss": 11.6816, "step": 50871 }, { "epoch": 1.064891568282676, "grad_norm": 0.24993424117565155, "learning_rate": 0.00014404220908613365, "loss": 11.6659, "step": 50872 }, { "epoch": 1.0649125010466383, "grad_norm": 0.3319467306137085, "learning_rate": 0.00014404024063963014, "loss": 11.6668, "step": 50873 }, { "epoch": 1.0649334338106002, "grad_norm": 0.317694753408432, "learning_rate": 0.00014403827217195558, "loss": 11.6587, "step": 50874 }, { "epoch": 1.0649543665745624, "grad_norm": 0.2713727355003357, "learning_rate": 0.00014403630368311097, "loss": 11.656, "step": 50875 }, { "epoch": 1.0649752993385246, "grad_norm": 0.2842935621738434, "learning_rate": 0.0001440343351730972, "loss": 11.6676, "step": 50876 }, { "epoch": 1.0649962321024868, "grad_norm": 0.3310840129852295, "learning_rate": 0.00014403236664191525, "loss": 11.6805, "step": 50877 }, { "epoch": 1.065017164866449, "grad_norm": 0.27896472811698914, "learning_rate": 0.00014403039808956607, "loss": 11.6623, "step": 50878 }, { "epoch": 1.065038097630411, "grad_norm": 0.29933300614356995, "learning_rate": 0.00014402842951605059, "loss": 11.6617, "step": 50879 }, { "epoch": 1.0650590303943732, "grad_norm": 0.260379433631897, "learning_rate": 0.00014402646092136976, "loss": 11.6706, "step": 50880 }, { "epoch": 1.0650799631583354, "grad_norm": 0.32205358147621155, "learning_rate": 0.0001440244923055245, "loss": 11.6953, "step": 50881 }, { "epoch": 1.0651008959222976, "grad_norm": 0.305729478597641, "learning_rate": 0.00014402252366851586, "loss": 11.6429, "step": 50882 }, { "epoch": 1.0651218286862598, "grad_norm": 0.36814892292022705, "learning_rate": 0.00014402055501034464, "loss": 11.6654, "step": 50883 }, { "epoch": 1.0651427614502218, "grad_norm": 0.30471670627593994, "learning_rate": 0.00014401858633101186, "loss": 11.6898, "step": 50884 }, { "epoch": 1.065163694214184, "grad_norm": 0.27366793155670166, "learning_rate": 0.0001440166176305185, "loss": 11.6825, "step": 50885 }, { "epoch": 1.0651846269781462, "grad_norm": 0.25175940990448, "learning_rate": 0.00014401464890886545, "loss": 11.6689, "step": 50886 }, { "epoch": 1.0652055597421084, "grad_norm": 0.30034905672073364, "learning_rate": 0.0001440126801660537, "loss": 11.6721, "step": 50887 }, { "epoch": 1.0652264925060706, "grad_norm": 0.33807113766670227, "learning_rate": 0.00014401071140208413, "loss": 11.6709, "step": 50888 }, { "epoch": 1.0652474252700326, "grad_norm": 0.2922322154045105, "learning_rate": 0.00014400874261695775, "loss": 11.66, "step": 50889 }, { "epoch": 1.0652683580339948, "grad_norm": 0.36365047097206116, "learning_rate": 0.0001440067738106755, "loss": 11.6611, "step": 50890 }, { "epoch": 1.065289290797957, "grad_norm": 0.336485892534256, "learning_rate": 0.00014400480498323828, "loss": 11.6523, "step": 50891 }, { "epoch": 1.0653102235619192, "grad_norm": 0.2647876441478729, "learning_rate": 0.0001440028361346471, "loss": 11.6686, "step": 50892 }, { "epoch": 1.0653311563258812, "grad_norm": 0.37472957372665405, "learning_rate": 0.00014400086726490285, "loss": 11.6822, "step": 50893 }, { "epoch": 1.0653520890898434, "grad_norm": 0.2898786664009094, "learning_rate": 0.00014399889837400653, "loss": 11.6617, "step": 50894 }, { "epoch": 1.0653730218538056, "grad_norm": 0.32421329617500305, "learning_rate": 0.00014399692946195903, "loss": 11.6767, "step": 50895 }, { "epoch": 1.0653939546177678, "grad_norm": 0.4171726405620575, "learning_rate": 0.00014399496052876134, "loss": 11.6609, "step": 50896 }, { "epoch": 1.06541488738173, "grad_norm": 0.2775011956691742, "learning_rate": 0.00014399299157441436, "loss": 11.6748, "step": 50897 }, { "epoch": 1.065435820145692, "grad_norm": 0.4248361587524414, "learning_rate": 0.0001439910225989191, "loss": 11.6738, "step": 50898 }, { "epoch": 1.0654567529096541, "grad_norm": 0.3737165033817291, "learning_rate": 0.00014398905360227647, "loss": 11.6578, "step": 50899 }, { "epoch": 1.0654776856736163, "grad_norm": 0.3450741469860077, "learning_rate": 0.0001439870845844874, "loss": 11.6743, "step": 50900 }, { "epoch": 1.0654986184375785, "grad_norm": 0.36368224024772644, "learning_rate": 0.0001439851155455529, "loss": 11.6675, "step": 50901 }, { "epoch": 1.0655195512015407, "grad_norm": 0.25206735730171204, "learning_rate": 0.00014398314648547385, "loss": 11.6523, "step": 50902 }, { "epoch": 1.0655404839655027, "grad_norm": 0.3860641419887543, "learning_rate": 0.0001439811774042512, "loss": 11.6676, "step": 50903 }, { "epoch": 1.065561416729465, "grad_norm": 0.32686156034469604, "learning_rate": 0.00014397920830188597, "loss": 11.6542, "step": 50904 }, { "epoch": 1.0655823494934271, "grad_norm": 0.2984572947025299, "learning_rate": 0.000143977239178379, "loss": 11.6471, "step": 50905 }, { "epoch": 1.0656032822573893, "grad_norm": 0.38248416781425476, "learning_rate": 0.0001439752700337313, "loss": 11.662, "step": 50906 }, { "epoch": 1.0656242150213515, "grad_norm": 0.29808375239372253, "learning_rate": 0.0001439733008679438, "loss": 11.6824, "step": 50907 }, { "epoch": 1.0656451477853135, "grad_norm": 0.3243451714515686, "learning_rate": 0.00014397133168101748, "loss": 11.684, "step": 50908 }, { "epoch": 1.0656660805492757, "grad_norm": 0.33621716499328613, "learning_rate": 0.00014396936247295324, "loss": 11.6544, "step": 50909 }, { "epoch": 1.065687013313238, "grad_norm": 0.3533744513988495, "learning_rate": 0.00014396739324375207, "loss": 11.6445, "step": 50910 }, { "epoch": 1.0657079460772, "grad_norm": 0.31331196427345276, "learning_rate": 0.00014396542399341486, "loss": 11.6729, "step": 50911 }, { "epoch": 1.065728878841162, "grad_norm": 0.3334096670150757, "learning_rate": 0.00014396345472194263, "loss": 11.6629, "step": 50912 }, { "epoch": 1.0657498116051243, "grad_norm": 0.2868553400039673, "learning_rate": 0.00014396148542933625, "loss": 11.6603, "step": 50913 }, { "epoch": 1.0657707443690865, "grad_norm": 0.24626731872558594, "learning_rate": 0.0001439595161155967, "loss": 11.6745, "step": 50914 }, { "epoch": 1.0657916771330487, "grad_norm": 0.33379143476486206, "learning_rate": 0.00014395754678072495, "loss": 11.6444, "step": 50915 }, { "epoch": 1.0658126098970109, "grad_norm": 0.3221026659011841, "learning_rate": 0.00014395557742472191, "loss": 11.6592, "step": 50916 }, { "epoch": 1.0658335426609729, "grad_norm": 0.34391120076179504, "learning_rate": 0.00014395360804758857, "loss": 11.679, "step": 50917 }, { "epoch": 1.065854475424935, "grad_norm": 0.2674369812011719, "learning_rate": 0.00014395163864932582, "loss": 11.6752, "step": 50918 }, { "epoch": 1.0658754081888973, "grad_norm": 0.3809162974357605, "learning_rate": 0.00014394966922993464, "loss": 11.6669, "step": 50919 }, { "epoch": 1.0658963409528595, "grad_norm": 0.2925645112991333, "learning_rate": 0.000143947699789416, "loss": 11.6697, "step": 50920 }, { "epoch": 1.0659172737168217, "grad_norm": 0.32693609595298767, "learning_rate": 0.0001439457303277708, "loss": 11.6673, "step": 50921 }, { "epoch": 1.0659382064807836, "grad_norm": 0.2950668931007385, "learning_rate": 0.00014394376084500004, "loss": 11.6528, "step": 50922 }, { "epoch": 1.0659591392447458, "grad_norm": 0.2874796986579895, "learning_rate": 0.00014394179134110457, "loss": 11.6705, "step": 50923 }, { "epoch": 1.065980072008708, "grad_norm": 0.38735148310661316, "learning_rate": 0.00014393982181608544, "loss": 11.6634, "step": 50924 }, { "epoch": 1.0660010047726702, "grad_norm": 0.30642738938331604, "learning_rate": 0.00014393785226994357, "loss": 11.6647, "step": 50925 }, { "epoch": 1.0660219375366324, "grad_norm": 0.32765883207321167, "learning_rate": 0.00014393588270267987, "loss": 11.6635, "step": 50926 }, { "epoch": 1.0660428703005944, "grad_norm": 0.348676472902298, "learning_rate": 0.0001439339131142953, "loss": 11.6747, "step": 50927 }, { "epoch": 1.0660638030645566, "grad_norm": 0.34503525495529175, "learning_rate": 0.00014393194350479084, "loss": 11.672, "step": 50928 }, { "epoch": 1.0660847358285188, "grad_norm": 0.3455316722393036, "learning_rate": 0.0001439299738741674, "loss": 11.6768, "step": 50929 }, { "epoch": 1.066105668592481, "grad_norm": 0.3645559549331665, "learning_rate": 0.00014392800422242597, "loss": 11.6883, "step": 50930 }, { "epoch": 1.066126601356443, "grad_norm": 0.32055407762527466, "learning_rate": 0.00014392603454956742, "loss": 11.6622, "step": 50931 }, { "epoch": 1.0661475341204052, "grad_norm": 0.44925788044929504, "learning_rate": 0.0001439240648555928, "loss": 11.6627, "step": 50932 }, { "epoch": 1.0661684668843674, "grad_norm": 0.42598775029182434, "learning_rate": 0.00014392209514050295, "loss": 11.6894, "step": 50933 }, { "epoch": 1.0661893996483296, "grad_norm": 0.3386073708534241, "learning_rate": 0.0001439201254042989, "loss": 11.6599, "step": 50934 }, { "epoch": 1.0662103324122918, "grad_norm": 0.3013497591018677, "learning_rate": 0.00014391815564698156, "loss": 11.664, "step": 50935 }, { "epoch": 1.0662312651762538, "grad_norm": 0.2822628319263458, "learning_rate": 0.00014391618586855187, "loss": 11.6531, "step": 50936 }, { "epoch": 1.066252197940216, "grad_norm": 0.28144997358322144, "learning_rate": 0.0001439142160690108, "loss": 11.6735, "step": 50937 }, { "epoch": 1.0662731307041782, "grad_norm": 0.30612507462501526, "learning_rate": 0.0001439122462483593, "loss": 11.6396, "step": 50938 }, { "epoch": 1.0662940634681404, "grad_norm": 0.261737197637558, "learning_rate": 0.0001439102764065983, "loss": 11.6762, "step": 50939 }, { "epoch": 1.0663149962321026, "grad_norm": 0.459955632686615, "learning_rate": 0.0001439083065437287, "loss": 11.692, "step": 50940 }, { "epoch": 1.0663359289960646, "grad_norm": 0.35124215483665466, "learning_rate": 0.00014390633665975155, "loss": 11.6803, "step": 50941 }, { "epoch": 1.0663568617600268, "grad_norm": 0.31857654452323914, "learning_rate": 0.00014390436675466774, "loss": 11.6648, "step": 50942 }, { "epoch": 1.066377794523989, "grad_norm": 0.27850571274757385, "learning_rate": 0.0001439023968284782, "loss": 11.6523, "step": 50943 }, { "epoch": 1.0663987272879512, "grad_norm": 0.3629029393196106, "learning_rate": 0.0001439004268811839, "loss": 11.6838, "step": 50944 }, { "epoch": 1.0664196600519134, "grad_norm": 0.3300554156303406, "learning_rate": 0.00014389845691278582, "loss": 11.6797, "step": 50945 }, { "epoch": 1.0664405928158753, "grad_norm": 0.3059845566749573, "learning_rate": 0.0001438964869232848, "loss": 11.6878, "step": 50946 }, { "epoch": 1.0664615255798375, "grad_norm": 0.35417914390563965, "learning_rate": 0.00014389451691268194, "loss": 11.6659, "step": 50947 }, { "epoch": 1.0664824583437997, "grad_norm": 0.31059524416923523, "learning_rate": 0.00014389254688097807, "loss": 11.6565, "step": 50948 }, { "epoch": 1.066503391107762, "grad_norm": 0.3475832939147949, "learning_rate": 0.00014389057682817418, "loss": 11.6702, "step": 50949 }, { "epoch": 1.066524323871724, "grad_norm": 0.3421960771083832, "learning_rate": 0.00014388860675427118, "loss": 11.6726, "step": 50950 }, { "epoch": 1.0665452566356861, "grad_norm": 0.32738935947418213, "learning_rate": 0.00014388663665927006, "loss": 11.6748, "step": 50951 }, { "epoch": 1.0665661893996483, "grad_norm": 0.3132038414478302, "learning_rate": 0.00014388466654317178, "loss": 11.6582, "step": 50952 }, { "epoch": 1.0665871221636105, "grad_norm": 0.35043397545814514, "learning_rate": 0.00014388269640597725, "loss": 11.6703, "step": 50953 }, { "epoch": 1.0666080549275727, "grad_norm": 0.33879685401916504, "learning_rate": 0.00014388072624768743, "loss": 11.6758, "step": 50954 }, { "epoch": 1.0666289876915347, "grad_norm": 0.37253209948539734, "learning_rate": 0.00014387875606830327, "loss": 11.6737, "step": 50955 }, { "epoch": 1.066649920455497, "grad_norm": 0.3176962733268738, "learning_rate": 0.00014387678586782568, "loss": 11.6677, "step": 50956 }, { "epoch": 1.066670853219459, "grad_norm": 0.327990859746933, "learning_rate": 0.00014387481564625567, "loss": 11.6691, "step": 50957 }, { "epoch": 1.0666917859834213, "grad_norm": 0.36614036560058594, "learning_rate": 0.00014387284540359416, "loss": 11.681, "step": 50958 }, { "epoch": 1.0667127187473835, "grad_norm": 0.30962830781936646, "learning_rate": 0.0001438708751398421, "loss": 11.6637, "step": 50959 }, { "epoch": 1.0667336515113455, "grad_norm": 0.3338012099266052, "learning_rate": 0.0001438689048550004, "loss": 11.685, "step": 50960 }, { "epoch": 1.0667545842753077, "grad_norm": 0.28217944502830505, "learning_rate": 0.00014386693454907006, "loss": 11.6799, "step": 50961 }, { "epoch": 1.0667755170392699, "grad_norm": 0.3065880239009857, "learning_rate": 0.00014386496422205198, "loss": 11.6616, "step": 50962 }, { "epoch": 1.066796449803232, "grad_norm": 0.33690035343170166, "learning_rate": 0.00014386299387394714, "loss": 11.6695, "step": 50963 }, { "epoch": 1.0668173825671943, "grad_norm": 0.3467615842819214, "learning_rate": 0.00014386102350475653, "loss": 11.6671, "step": 50964 }, { "epoch": 1.0668383153311563, "grad_norm": 0.36011195182800293, "learning_rate": 0.00014385905311448097, "loss": 11.6882, "step": 50965 }, { "epoch": 1.0668592480951185, "grad_norm": 0.3585110604763031, "learning_rate": 0.00014385708270312156, "loss": 11.6825, "step": 50966 }, { "epoch": 1.0668801808590807, "grad_norm": 0.3622647821903229, "learning_rate": 0.0001438551122706791, "loss": 11.6527, "step": 50967 }, { "epoch": 1.0669011136230429, "grad_norm": 0.32382988929748535, "learning_rate": 0.00014385314181715467, "loss": 11.6839, "step": 50968 }, { "epoch": 1.0669220463870048, "grad_norm": 0.3264167904853821, "learning_rate": 0.00014385117134254913, "loss": 11.6786, "step": 50969 }, { "epoch": 1.066942979150967, "grad_norm": 0.31265464425086975, "learning_rate": 0.00014384920084686343, "loss": 11.676, "step": 50970 }, { "epoch": 1.0669639119149292, "grad_norm": 0.26941877603530884, "learning_rate": 0.00014384723033009856, "loss": 11.6677, "step": 50971 }, { "epoch": 1.0669848446788914, "grad_norm": 0.37756484746932983, "learning_rate": 0.00014384525979225545, "loss": 11.6788, "step": 50972 }, { "epoch": 1.0670057774428536, "grad_norm": 0.30964624881744385, "learning_rate": 0.00014384328923333506, "loss": 11.6812, "step": 50973 }, { "epoch": 1.0670267102068156, "grad_norm": 0.2867996394634247, "learning_rate": 0.00014384131865333833, "loss": 11.6751, "step": 50974 }, { "epoch": 1.0670476429707778, "grad_norm": 0.3351382911205292, "learning_rate": 0.00014383934805226614, "loss": 11.6448, "step": 50975 }, { "epoch": 1.06706857573474, "grad_norm": 0.34878048300743103, "learning_rate": 0.00014383737743011956, "loss": 11.6761, "step": 50976 }, { "epoch": 1.0670895084987022, "grad_norm": 0.2771661579608917, "learning_rate": 0.00014383540678689945, "loss": 11.6774, "step": 50977 }, { "epoch": 1.0671104412626644, "grad_norm": 0.32743507623672485, "learning_rate": 0.00014383343612260675, "loss": 11.6706, "step": 50978 }, { "epoch": 1.0671313740266264, "grad_norm": 0.2911730110645294, "learning_rate": 0.00014383146543724248, "loss": 11.6773, "step": 50979 }, { "epoch": 1.0671523067905886, "grad_norm": 0.32904714345932007, "learning_rate": 0.00014382949473080753, "loss": 11.6704, "step": 50980 }, { "epoch": 1.0671732395545508, "grad_norm": 0.43306443095207214, "learning_rate": 0.00014382752400330287, "loss": 11.6766, "step": 50981 }, { "epoch": 1.067194172318513, "grad_norm": 0.28476303815841675, "learning_rate": 0.00014382555325472945, "loss": 11.6642, "step": 50982 }, { "epoch": 1.0672151050824752, "grad_norm": 0.35903775691986084, "learning_rate": 0.0001438235824850882, "loss": 11.6718, "step": 50983 }, { "epoch": 1.0672360378464372, "grad_norm": 0.29145926237106323, "learning_rate": 0.00014382161169438008, "loss": 11.6754, "step": 50984 }, { "epoch": 1.0672569706103994, "grad_norm": 0.3153928816318512, "learning_rate": 0.000143819640882606, "loss": 11.6689, "step": 50985 }, { "epoch": 1.0672779033743616, "grad_norm": 0.3317263424396515, "learning_rate": 0.00014381767004976697, "loss": 11.6585, "step": 50986 }, { "epoch": 1.0672988361383238, "grad_norm": 0.3103431165218353, "learning_rate": 0.0001438156991958639, "loss": 11.6669, "step": 50987 }, { "epoch": 1.0673197689022857, "grad_norm": 0.3559909462928772, "learning_rate": 0.00014381372832089777, "loss": 11.654, "step": 50988 }, { "epoch": 1.067340701666248, "grad_norm": 0.3084633946418762, "learning_rate": 0.00014381175742486944, "loss": 11.6646, "step": 50989 }, { "epoch": 1.0673616344302101, "grad_norm": 0.3813285827636719, "learning_rate": 0.00014380978650777997, "loss": 11.6672, "step": 50990 }, { "epoch": 1.0673825671941723, "grad_norm": 0.2675401270389557, "learning_rate": 0.00014380781556963028, "loss": 11.6663, "step": 50991 }, { "epoch": 1.0674034999581345, "grad_norm": 0.29617786407470703, "learning_rate": 0.00014380584461042126, "loss": 11.663, "step": 50992 }, { "epoch": 1.0674244327220965, "grad_norm": 0.2914485037326813, "learning_rate": 0.00014380387363015392, "loss": 11.677, "step": 50993 }, { "epoch": 1.0674453654860587, "grad_norm": 0.2726005017757416, "learning_rate": 0.00014380190262882914, "loss": 11.6645, "step": 50994 }, { "epoch": 1.067466298250021, "grad_norm": 0.3454379737377167, "learning_rate": 0.00014379993160644792, "loss": 11.6604, "step": 50995 }, { "epoch": 1.0674872310139831, "grad_norm": 0.48175662755966187, "learning_rate": 0.0001437979605630112, "loss": 11.6484, "step": 50996 }, { "epoch": 1.0675081637779453, "grad_norm": 0.2570488750934601, "learning_rate": 0.0001437959894985199, "loss": 11.6679, "step": 50997 }, { "epoch": 1.0675290965419073, "grad_norm": 0.27611207962036133, "learning_rate": 0.00014379401841297504, "loss": 11.668, "step": 50998 }, { "epoch": 1.0675500293058695, "grad_norm": 0.32218626141548157, "learning_rate": 0.00014379204730637747, "loss": 11.6678, "step": 50999 }, { "epoch": 1.0675709620698317, "grad_norm": 0.26347997784614563, "learning_rate": 0.0001437900761787282, "loss": 11.6749, "step": 51000 }, { "epoch": 1.0675709620698317, "eval_loss": 11.668360710144043, "eval_runtime": 34.3161, "eval_samples_per_second": 28.004, "eval_steps_per_second": 7.023, "step": 51000 }, { "epoch": 1.067591894833794, "grad_norm": 0.2971205711364746, "learning_rate": 0.00014378810503002819, "loss": 11.6561, "step": 51001 }, { "epoch": 1.067612827597756, "grad_norm": 0.3169681429862976, "learning_rate": 0.00014378613386027833, "loss": 11.6583, "step": 51002 }, { "epoch": 1.067633760361718, "grad_norm": 0.3629035949707031, "learning_rate": 0.00014378416266947963, "loss": 11.6577, "step": 51003 }, { "epoch": 1.0676546931256803, "grad_norm": 0.3711574077606201, "learning_rate": 0.00014378219145763296, "loss": 11.6598, "step": 51004 }, { "epoch": 1.0676756258896425, "grad_norm": 0.32310646772384644, "learning_rate": 0.00014378022022473936, "loss": 11.66, "step": 51005 }, { "epoch": 1.0676965586536047, "grad_norm": 0.3287386894226074, "learning_rate": 0.00014377824897079968, "loss": 11.6654, "step": 51006 }, { "epoch": 1.0677174914175667, "grad_norm": 0.24952539801597595, "learning_rate": 0.00014377627769581494, "loss": 11.6702, "step": 51007 }, { "epoch": 1.0677384241815289, "grad_norm": 0.2652662396430969, "learning_rate": 0.00014377430639978607, "loss": 11.6709, "step": 51008 }, { "epoch": 1.067759356945491, "grad_norm": 0.4405362010002136, "learning_rate": 0.000143772335082714, "loss": 11.6882, "step": 51009 }, { "epoch": 1.0677802897094533, "grad_norm": 0.7958345413208008, "learning_rate": 0.00014377036374459972, "loss": 11.6484, "step": 51010 }, { "epoch": 1.0678012224734155, "grad_norm": 0.28286993503570557, "learning_rate": 0.00014376839238544416, "loss": 11.6558, "step": 51011 }, { "epoch": 1.0678221552373774, "grad_norm": 0.33956456184387207, "learning_rate": 0.00014376642100524822, "loss": 11.6766, "step": 51012 }, { "epoch": 1.0678430880013396, "grad_norm": 0.4196760058403015, "learning_rate": 0.0001437644496040129, "loss": 11.6702, "step": 51013 }, { "epoch": 1.0678640207653018, "grad_norm": 0.41587764024734497, "learning_rate": 0.0001437624781817391, "loss": 11.6836, "step": 51014 }, { "epoch": 1.067884953529264, "grad_norm": 0.33617103099823, "learning_rate": 0.00014376050673842788, "loss": 11.676, "step": 51015 }, { "epoch": 1.0679058862932262, "grad_norm": 0.34651657938957214, "learning_rate": 0.00014375853527408004, "loss": 11.6601, "step": 51016 }, { "epoch": 1.0679268190571882, "grad_norm": 0.38467082381248474, "learning_rate": 0.0001437565637886966, "loss": 11.664, "step": 51017 }, { "epoch": 1.0679477518211504, "grad_norm": 0.27525144815444946, "learning_rate": 0.0001437545922822785, "loss": 11.6796, "step": 51018 }, { "epoch": 1.0679686845851126, "grad_norm": 0.3526594638824463, "learning_rate": 0.00014375262075482672, "loss": 11.6588, "step": 51019 }, { "epoch": 1.0679896173490748, "grad_norm": 0.30804362893104553, "learning_rate": 0.00014375064920634217, "loss": 11.6575, "step": 51020 }, { "epoch": 1.068010550113037, "grad_norm": 0.322549968957901, "learning_rate": 0.00014374867763682577, "loss": 11.6644, "step": 51021 }, { "epoch": 1.068031482876999, "grad_norm": 0.3113725781440735, "learning_rate": 0.00014374670604627853, "loss": 11.6772, "step": 51022 }, { "epoch": 1.0680524156409612, "grad_norm": 0.4090556204319, "learning_rate": 0.0001437447344347014, "loss": 11.6702, "step": 51023 }, { "epoch": 1.0680733484049234, "grad_norm": 0.3146859407424927, "learning_rate": 0.00014374276280209526, "loss": 11.6719, "step": 51024 }, { "epoch": 1.0680942811688856, "grad_norm": 0.333391010761261, "learning_rate": 0.00014374079114846112, "loss": 11.6527, "step": 51025 }, { "epoch": 1.0681152139328476, "grad_norm": 0.36185136437416077, "learning_rate": 0.0001437388194737999, "loss": 11.6569, "step": 51026 }, { "epoch": 1.0681361466968098, "grad_norm": 0.29053351283073425, "learning_rate": 0.00014373684777811257, "loss": 11.6718, "step": 51027 }, { "epoch": 1.068157079460772, "grad_norm": 0.454350084066391, "learning_rate": 0.0001437348760614, "loss": 11.6694, "step": 51028 }, { "epoch": 1.0681780122247342, "grad_norm": 0.33541175723075867, "learning_rate": 0.00014373290432366325, "loss": 11.6655, "step": 51029 }, { "epoch": 1.0681989449886964, "grad_norm": 0.3052726686000824, "learning_rate": 0.0001437309325649032, "loss": 11.6832, "step": 51030 }, { "epoch": 1.0682198777526584, "grad_norm": 0.323012113571167, "learning_rate": 0.00014372896078512082, "loss": 11.6637, "step": 51031 }, { "epoch": 1.0682408105166206, "grad_norm": 0.40562182664871216, "learning_rate": 0.00014372698898431707, "loss": 11.6805, "step": 51032 }, { "epoch": 1.0682617432805828, "grad_norm": 0.3512836694717407, "learning_rate": 0.00014372501716249285, "loss": 11.6822, "step": 51033 }, { "epoch": 1.068282676044545, "grad_norm": 0.30480971932411194, "learning_rate": 0.00014372304531964916, "loss": 11.6669, "step": 51034 }, { "epoch": 1.0683036088085072, "grad_norm": 0.2645265758037567, "learning_rate": 0.00014372107345578692, "loss": 11.6592, "step": 51035 }, { "epoch": 1.0683245415724691, "grad_norm": 0.28803300857543945, "learning_rate": 0.00014371910157090706, "loss": 11.6298, "step": 51036 }, { "epoch": 1.0683454743364313, "grad_norm": 0.28609344363212585, "learning_rate": 0.00014371712966501057, "loss": 11.6749, "step": 51037 }, { "epoch": 1.0683664071003935, "grad_norm": 0.35228222608566284, "learning_rate": 0.00014371515773809837, "loss": 11.6974, "step": 51038 }, { "epoch": 1.0683873398643557, "grad_norm": 0.31932422518730164, "learning_rate": 0.00014371318579017143, "loss": 11.6628, "step": 51039 }, { "epoch": 1.068408272628318, "grad_norm": 0.2940802276134491, "learning_rate": 0.0001437112138212307, "loss": 11.6574, "step": 51040 }, { "epoch": 1.06842920539228, "grad_norm": 0.3346446752548218, "learning_rate": 0.0001437092418312771, "loss": 11.6581, "step": 51041 }, { "epoch": 1.0684501381562421, "grad_norm": 0.6177146434783936, "learning_rate": 0.0001437072698203116, "loss": 11.6569, "step": 51042 }, { "epoch": 1.0684710709202043, "grad_norm": 0.33364298939704895, "learning_rate": 0.0001437052977883351, "loss": 11.6682, "step": 51043 }, { "epoch": 1.0684920036841665, "grad_norm": 0.26434093713760376, "learning_rate": 0.00014370332573534862, "loss": 11.6569, "step": 51044 }, { "epoch": 1.0685129364481285, "grad_norm": 0.2967694103717804, "learning_rate": 0.0001437013536613531, "loss": 11.669, "step": 51045 }, { "epoch": 1.0685338692120907, "grad_norm": 0.3218253254890442, "learning_rate": 0.0001436993815663494, "loss": 11.6501, "step": 51046 }, { "epoch": 1.068554801976053, "grad_norm": 0.36279526352882385, "learning_rate": 0.00014369740945033857, "loss": 11.678, "step": 51047 }, { "epoch": 1.068575734740015, "grad_norm": 0.301786869764328, "learning_rate": 0.0001436954373133215, "loss": 11.6566, "step": 51048 }, { "epoch": 1.0685966675039773, "grad_norm": 0.29228660464286804, "learning_rate": 0.0001436934651552992, "loss": 11.6611, "step": 51049 }, { "epoch": 1.0686176002679393, "grad_norm": 0.3806324601173401, "learning_rate": 0.00014369149297627256, "loss": 11.6703, "step": 51050 }, { "epoch": 1.0686385330319015, "grad_norm": 0.3189949691295624, "learning_rate": 0.0001436895207762425, "loss": 11.6816, "step": 51051 }, { "epoch": 1.0686594657958637, "grad_norm": 0.3714752793312073, "learning_rate": 0.00014368754855521008, "loss": 11.6746, "step": 51052 }, { "epoch": 1.0686803985598259, "grad_norm": 0.35668718814849854, "learning_rate": 0.00014368557631317614, "loss": 11.671, "step": 51053 }, { "epoch": 1.068701331323788, "grad_norm": 0.28155726194381714, "learning_rate": 0.00014368360405014167, "loss": 11.6635, "step": 51054 }, { "epoch": 1.06872226408775, "grad_norm": 0.32922008633613586, "learning_rate": 0.0001436816317661076, "loss": 11.66, "step": 51055 }, { "epoch": 1.0687431968517123, "grad_norm": 0.3710119128227234, "learning_rate": 0.0001436796594610749, "loss": 11.6747, "step": 51056 }, { "epoch": 1.0687641296156745, "grad_norm": 0.2691316306591034, "learning_rate": 0.00014367768713504454, "loss": 11.6774, "step": 51057 }, { "epoch": 1.0687850623796367, "grad_norm": 0.26518768072128296, "learning_rate": 0.0001436757147880174, "loss": 11.6621, "step": 51058 }, { "epoch": 1.0688059951435989, "grad_norm": 0.36884355545043945, "learning_rate": 0.00014367374241999455, "loss": 11.6647, "step": 51059 }, { "epoch": 1.0688269279075608, "grad_norm": 0.3209666907787323, "learning_rate": 0.00014367177003097678, "loss": 11.6686, "step": 51060 }, { "epoch": 1.068847860671523, "grad_norm": 0.35779204964637756, "learning_rate": 0.00014366979762096512, "loss": 11.6571, "step": 51061 }, { "epoch": 1.0688687934354852, "grad_norm": 0.2846372127532959, "learning_rate": 0.00014366782518996054, "loss": 11.6547, "step": 51062 }, { "epoch": 1.0688897261994474, "grad_norm": 0.3251328766345978, "learning_rate": 0.00014366585273796397, "loss": 11.6632, "step": 51063 }, { "epoch": 1.0689106589634094, "grad_norm": 0.4289904236793518, "learning_rate": 0.00014366388026497632, "loss": 11.6865, "step": 51064 }, { "epoch": 1.0689315917273716, "grad_norm": 0.3129127621650696, "learning_rate": 0.00014366190777099857, "loss": 11.6605, "step": 51065 }, { "epoch": 1.0689525244913338, "grad_norm": 0.2595110535621643, "learning_rate": 0.00014365993525603167, "loss": 11.6634, "step": 51066 }, { "epoch": 1.068973457255296, "grad_norm": 0.3323199152946472, "learning_rate": 0.00014365796272007656, "loss": 11.6718, "step": 51067 }, { "epoch": 1.0689943900192582, "grad_norm": 0.2736472487449646, "learning_rate": 0.0001436559901631342, "loss": 11.6678, "step": 51068 }, { "epoch": 1.0690153227832202, "grad_norm": 0.2847502529621124, "learning_rate": 0.00014365401758520557, "loss": 11.6598, "step": 51069 }, { "epoch": 1.0690362555471824, "grad_norm": 0.30648624897003174, "learning_rate": 0.00014365204498629152, "loss": 11.6781, "step": 51070 }, { "epoch": 1.0690571883111446, "grad_norm": 0.38268762826919556, "learning_rate": 0.00014365007236639307, "loss": 11.6735, "step": 51071 }, { "epoch": 1.0690781210751068, "grad_norm": 0.32288363575935364, "learning_rate": 0.00014364809972551115, "loss": 11.6809, "step": 51072 }, { "epoch": 1.069099053839069, "grad_norm": 0.2950347661972046, "learning_rate": 0.00014364612706364675, "loss": 11.6653, "step": 51073 }, { "epoch": 1.069119986603031, "grad_norm": 0.3870028555393219, "learning_rate": 0.00014364415438080077, "loss": 11.6775, "step": 51074 }, { "epoch": 1.0691409193669932, "grad_norm": 0.2795429527759552, "learning_rate": 0.00014364218167697414, "loss": 11.654, "step": 51075 }, { "epoch": 1.0691618521309554, "grad_norm": 0.33562010526657104, "learning_rate": 0.00014364020895216788, "loss": 11.6479, "step": 51076 }, { "epoch": 1.0691827848949176, "grad_norm": 0.31301939487457275, "learning_rate": 0.00014363823620638287, "loss": 11.6676, "step": 51077 }, { "epoch": 1.0692037176588798, "grad_norm": 0.36095482110977173, "learning_rate": 0.00014363626343962007, "loss": 11.6761, "step": 51078 }, { "epoch": 1.0692246504228418, "grad_norm": 0.34155869483947754, "learning_rate": 0.00014363429065188048, "loss": 11.6621, "step": 51079 }, { "epoch": 1.069245583186804, "grad_norm": 0.311612993478775, "learning_rate": 0.00014363231784316496, "loss": 11.6758, "step": 51080 }, { "epoch": 1.0692665159507662, "grad_norm": 0.2953568696975708, "learning_rate": 0.00014363034501347456, "loss": 11.6743, "step": 51081 }, { "epoch": 1.0692874487147284, "grad_norm": 0.30069366097450256, "learning_rate": 0.00014362837216281017, "loss": 11.6678, "step": 51082 }, { "epoch": 1.0693083814786903, "grad_norm": 0.4460517466068268, "learning_rate": 0.00014362639929117274, "loss": 11.6694, "step": 51083 }, { "epoch": 1.0693293142426525, "grad_norm": 0.3347591459751129, "learning_rate": 0.00014362442639856323, "loss": 11.68, "step": 51084 }, { "epoch": 1.0693502470066147, "grad_norm": 0.27922314405441284, "learning_rate": 0.00014362245348498257, "loss": 11.6628, "step": 51085 }, { "epoch": 1.069371179770577, "grad_norm": 0.30932512879371643, "learning_rate": 0.00014362048055043176, "loss": 11.6661, "step": 51086 }, { "epoch": 1.0693921125345391, "grad_norm": 0.4259263873100281, "learning_rate": 0.00014361850759491167, "loss": 11.6677, "step": 51087 }, { "epoch": 1.0694130452985011, "grad_norm": 0.2878856062889099, "learning_rate": 0.0001436165346184233, "loss": 11.6776, "step": 51088 }, { "epoch": 1.0694339780624633, "grad_norm": 0.3949671983718872, "learning_rate": 0.0001436145616209676, "loss": 11.6617, "step": 51089 }, { "epoch": 1.0694549108264255, "grad_norm": 0.36414581537246704, "learning_rate": 0.0001436125886025455, "loss": 11.6684, "step": 51090 }, { "epoch": 1.0694758435903877, "grad_norm": 0.3506936728954315, "learning_rate": 0.00014361061556315797, "loss": 11.67, "step": 51091 }, { "epoch": 1.06949677635435, "grad_norm": 0.23960350453853607, "learning_rate": 0.0001436086425028059, "loss": 11.6812, "step": 51092 }, { "epoch": 1.069517709118312, "grad_norm": 0.28528398275375366, "learning_rate": 0.0001436066694214903, "loss": 11.6697, "step": 51093 }, { "epoch": 1.069538641882274, "grad_norm": 0.31367647647857666, "learning_rate": 0.00014360469631921212, "loss": 11.6802, "step": 51094 }, { "epoch": 1.0695595746462363, "grad_norm": 0.2410101592540741, "learning_rate": 0.0001436027231959723, "loss": 11.6691, "step": 51095 }, { "epoch": 1.0695805074101985, "grad_norm": 0.28554069995880127, "learning_rate": 0.00014360075005177174, "loss": 11.6588, "step": 51096 }, { "epoch": 1.0696014401741607, "grad_norm": 0.28877851366996765, "learning_rate": 0.00014359877688661143, "loss": 11.6628, "step": 51097 }, { "epoch": 1.0696223729381227, "grad_norm": 0.3162131607532501, "learning_rate": 0.00014359680370049235, "loss": 11.6673, "step": 51098 }, { "epoch": 1.0696433057020849, "grad_norm": 0.41649550199508667, "learning_rate": 0.00014359483049341537, "loss": 11.6663, "step": 51099 }, { "epoch": 1.069664238466047, "grad_norm": 0.38776782155036926, "learning_rate": 0.0001435928572653815, "loss": 11.6507, "step": 51100 }, { "epoch": 1.0696851712300093, "grad_norm": 0.3676548898220062, "learning_rate": 0.00014359088401639164, "loss": 11.6576, "step": 51101 }, { "epoch": 1.0697061039939713, "grad_norm": 0.39620015025138855, "learning_rate": 0.0001435889107464468, "loss": 11.6611, "step": 51102 }, { "epoch": 1.0697270367579335, "grad_norm": 0.27947095036506653, "learning_rate": 0.00014358693745554789, "loss": 11.6534, "step": 51103 }, { "epoch": 1.0697479695218957, "grad_norm": 0.3206680119037628, "learning_rate": 0.00014358496414369586, "loss": 11.6679, "step": 51104 }, { "epoch": 1.0697689022858579, "grad_norm": 0.29320332407951355, "learning_rate": 0.0001435829908108917, "loss": 11.6599, "step": 51105 }, { "epoch": 1.06978983504982, "grad_norm": 0.2877601981163025, "learning_rate": 0.0001435810174571363, "loss": 11.6886, "step": 51106 }, { "epoch": 1.069810767813782, "grad_norm": 0.2822532057762146, "learning_rate": 0.0001435790440824306, "loss": 11.6591, "step": 51107 }, { "epoch": 1.0698317005777442, "grad_norm": 0.3475971817970276, "learning_rate": 0.00014357707068677562, "loss": 11.6716, "step": 51108 }, { "epoch": 1.0698526333417064, "grad_norm": 0.2810264527797699, "learning_rate": 0.00014357509727017223, "loss": 11.656, "step": 51109 }, { "epoch": 1.0698735661056686, "grad_norm": 0.43413740396499634, "learning_rate": 0.00014357312383262146, "loss": 11.6541, "step": 51110 }, { "epoch": 1.0698944988696308, "grad_norm": 0.36880284547805786, "learning_rate": 0.0001435711503741242, "loss": 11.6646, "step": 51111 }, { "epoch": 1.0699154316335928, "grad_norm": 0.2948036193847656, "learning_rate": 0.00014356917689468143, "loss": 11.6767, "step": 51112 }, { "epoch": 1.069936364397555, "grad_norm": 0.30350279808044434, "learning_rate": 0.00014356720339429405, "loss": 11.6682, "step": 51113 }, { "epoch": 1.0699572971615172, "grad_norm": 0.32746678590774536, "learning_rate": 0.00014356522987296307, "loss": 11.6847, "step": 51114 }, { "epoch": 1.0699782299254794, "grad_norm": 0.3200685977935791, "learning_rate": 0.0001435632563306894, "loss": 11.6423, "step": 51115 }, { "epoch": 1.0699991626894416, "grad_norm": 0.33923599123954773, "learning_rate": 0.00014356128276747402, "loss": 11.6632, "step": 51116 }, { "epoch": 1.0700200954534036, "grad_norm": 0.30109819769859314, "learning_rate": 0.00014355930918331784, "loss": 11.6678, "step": 51117 }, { "epoch": 1.0700410282173658, "grad_norm": 0.29760149121284485, "learning_rate": 0.0001435573355782218, "loss": 11.6642, "step": 51118 }, { "epoch": 1.070061960981328, "grad_norm": 0.27476945519447327, "learning_rate": 0.00014355536195218694, "loss": 11.6592, "step": 51119 }, { "epoch": 1.0700828937452902, "grad_norm": 0.30911555886268616, "learning_rate": 0.00014355338830521409, "loss": 11.6504, "step": 51120 }, { "epoch": 1.0701038265092522, "grad_norm": 0.3868080675601959, "learning_rate": 0.00014355141463730426, "loss": 11.6776, "step": 51121 }, { "epoch": 1.0701247592732144, "grad_norm": 0.2947985529899597, "learning_rate": 0.00014354944094845841, "loss": 11.665, "step": 51122 }, { "epoch": 1.0701456920371766, "grad_norm": 0.30314546823501587, "learning_rate": 0.00014354746723867744, "loss": 11.6786, "step": 51123 }, { "epoch": 1.0701666248011388, "grad_norm": 0.3040059208869934, "learning_rate": 0.00014354549350796238, "loss": 11.6723, "step": 51124 }, { "epoch": 1.070187557565101, "grad_norm": 0.28970688581466675, "learning_rate": 0.0001435435197563141, "loss": 11.6706, "step": 51125 }, { "epoch": 1.070208490329063, "grad_norm": 0.30802589654922485, "learning_rate": 0.00014354154598373358, "loss": 11.6542, "step": 51126 }, { "epoch": 1.0702294230930252, "grad_norm": 0.300433486700058, "learning_rate": 0.00014353957219022175, "loss": 11.6628, "step": 51127 }, { "epoch": 1.0702503558569874, "grad_norm": 0.2999882698059082, "learning_rate": 0.00014353759837577957, "loss": 11.681, "step": 51128 }, { "epoch": 1.0702712886209496, "grad_norm": 0.3339501619338989, "learning_rate": 0.00014353562454040805, "loss": 11.6659, "step": 51129 }, { "epoch": 1.0702922213849118, "grad_norm": 0.2644074559211731, "learning_rate": 0.00014353365068410804, "loss": 11.6694, "step": 51130 }, { "epoch": 1.0703131541488737, "grad_norm": 0.4110409617424011, "learning_rate": 0.00014353167680688055, "loss": 11.6841, "step": 51131 }, { "epoch": 1.070334086912836, "grad_norm": 0.31467583775520325, "learning_rate": 0.0001435297029087265, "loss": 11.6729, "step": 51132 }, { "epoch": 1.0703550196767981, "grad_norm": 0.33148711919784546, "learning_rate": 0.00014352772898964687, "loss": 11.6709, "step": 51133 }, { "epoch": 1.0703759524407603, "grad_norm": 0.34722065925598145, "learning_rate": 0.00014352575504964255, "loss": 11.6673, "step": 51134 }, { "epoch": 1.0703968852047225, "grad_norm": 0.3285212814807892, "learning_rate": 0.00014352378108871454, "loss": 11.6687, "step": 51135 }, { "epoch": 1.0704178179686845, "grad_norm": 0.3681904971599579, "learning_rate": 0.0001435218071068638, "loss": 11.6822, "step": 51136 }, { "epoch": 1.0704387507326467, "grad_norm": 0.2842243015766144, "learning_rate": 0.0001435198331040912, "loss": 11.6692, "step": 51137 }, { "epoch": 1.070459683496609, "grad_norm": 0.28862348198890686, "learning_rate": 0.00014351785908039782, "loss": 11.6671, "step": 51138 }, { "epoch": 1.0704806162605711, "grad_norm": 0.27642154693603516, "learning_rate": 0.00014351588503578447, "loss": 11.675, "step": 51139 }, { "epoch": 1.070501549024533, "grad_norm": 0.3096533417701721, "learning_rate": 0.0001435139109702522, "loss": 11.678, "step": 51140 }, { "epoch": 1.0705224817884953, "grad_norm": 0.26575222611427307, "learning_rate": 0.00014351193688380188, "loss": 11.6696, "step": 51141 }, { "epoch": 1.0705434145524575, "grad_norm": 0.3495827615261078, "learning_rate": 0.00014350996277643453, "loss": 11.6667, "step": 51142 }, { "epoch": 1.0705643473164197, "grad_norm": 0.38796985149383545, "learning_rate": 0.00014350798864815108, "loss": 11.6653, "step": 51143 }, { "epoch": 1.070585280080382, "grad_norm": 0.3167247176170349, "learning_rate": 0.00014350601449895244, "loss": 11.6586, "step": 51144 }, { "epoch": 1.0706062128443439, "grad_norm": 0.4704541265964508, "learning_rate": 0.0001435040403288396, "loss": 11.6689, "step": 51145 }, { "epoch": 1.070627145608306, "grad_norm": 0.29679471254348755, "learning_rate": 0.00014350206613781348, "loss": 11.6665, "step": 51146 }, { "epoch": 1.0706480783722683, "grad_norm": 0.3026726245880127, "learning_rate": 0.0001435000919258751, "loss": 11.678, "step": 51147 }, { "epoch": 1.0706690111362305, "grad_norm": 0.32504722476005554, "learning_rate": 0.00014349811769302532, "loss": 11.6639, "step": 51148 }, { "epoch": 1.0706899439001927, "grad_norm": 0.29593393206596375, "learning_rate": 0.0001434961434392651, "loss": 11.6447, "step": 51149 }, { "epoch": 1.0707108766641547, "grad_norm": 0.3214572072029114, "learning_rate": 0.00014349416916459545, "loss": 11.6596, "step": 51150 }, { "epoch": 1.0707318094281169, "grad_norm": 0.33900073170661926, "learning_rate": 0.00014349219486901725, "loss": 11.6654, "step": 51151 }, { "epoch": 1.070752742192079, "grad_norm": 0.2500617802143097, "learning_rate": 0.0001434902205525315, "loss": 11.6579, "step": 51152 }, { "epoch": 1.0707736749560413, "grad_norm": 0.3711583912372589, "learning_rate": 0.0001434882462151391, "loss": 11.6652, "step": 51153 }, { "epoch": 1.0707946077200035, "grad_norm": 0.2757208049297333, "learning_rate": 0.00014348627185684104, "loss": 11.6634, "step": 51154 }, { "epoch": 1.0708155404839654, "grad_norm": 0.3647598624229431, "learning_rate": 0.0001434842974776383, "loss": 11.6819, "step": 51155 }, { "epoch": 1.0708364732479276, "grad_norm": 0.25827574729919434, "learning_rate": 0.00014348232307753172, "loss": 11.6613, "step": 51156 }, { "epoch": 1.0708574060118898, "grad_norm": 0.3276554346084595, "learning_rate": 0.00014348034865652238, "loss": 11.6634, "step": 51157 }, { "epoch": 1.070878338775852, "grad_norm": 0.2517111897468567, "learning_rate": 0.00014347837421461112, "loss": 11.665, "step": 51158 }, { "epoch": 1.070899271539814, "grad_norm": 0.32334399223327637, "learning_rate": 0.00014347639975179893, "loss": 11.6619, "step": 51159 }, { "epoch": 1.0709202043037762, "grad_norm": 0.31759902834892273, "learning_rate": 0.00014347442526808682, "loss": 11.6418, "step": 51160 }, { "epoch": 1.0709411370677384, "grad_norm": 0.3526005148887634, "learning_rate": 0.00014347245076347563, "loss": 11.6614, "step": 51161 }, { "epoch": 1.0709620698317006, "grad_norm": 0.28244492411613464, "learning_rate": 0.0001434704762379664, "loss": 11.6711, "step": 51162 }, { "epoch": 1.0709830025956628, "grad_norm": 0.29921483993530273, "learning_rate": 0.00014346850169156, "loss": 11.6549, "step": 51163 }, { "epoch": 1.0710039353596248, "grad_norm": 0.26258060336112976, "learning_rate": 0.00014346652712425746, "loss": 11.6754, "step": 51164 }, { "epoch": 1.071024868123587, "grad_norm": 0.26053160429000854, "learning_rate": 0.00014346455253605962, "loss": 11.6633, "step": 51165 }, { "epoch": 1.0710458008875492, "grad_norm": 0.3931850790977478, "learning_rate": 0.00014346257792696756, "loss": 11.6616, "step": 51166 }, { "epoch": 1.0710667336515114, "grad_norm": 0.37300801277160645, "learning_rate": 0.00014346060329698215, "loss": 11.6804, "step": 51167 }, { "epoch": 1.0710876664154736, "grad_norm": 0.30126699805259705, "learning_rate": 0.00014345862864610436, "loss": 11.6448, "step": 51168 }, { "epoch": 1.0711085991794356, "grad_norm": 0.25128471851348877, "learning_rate": 0.00014345665397433517, "loss": 11.6532, "step": 51169 }, { "epoch": 1.0711295319433978, "grad_norm": 0.3611181676387787, "learning_rate": 0.00014345467928167545, "loss": 11.6641, "step": 51170 }, { "epoch": 1.07115046470736, "grad_norm": 0.3559775650501251, "learning_rate": 0.0001434527045681262, "loss": 11.6781, "step": 51171 }, { "epoch": 1.0711713974713222, "grad_norm": 0.3100903630256653, "learning_rate": 0.00014345072983368837, "loss": 11.6786, "step": 51172 }, { "epoch": 1.0711923302352844, "grad_norm": 0.31372877955436707, "learning_rate": 0.0001434487550783629, "loss": 11.6787, "step": 51173 }, { "epoch": 1.0712132629992464, "grad_norm": 0.3423885107040405, "learning_rate": 0.00014344678030215076, "loss": 11.665, "step": 51174 }, { "epoch": 1.0712341957632086, "grad_norm": 0.26557657122612, "learning_rate": 0.00014344480550505285, "loss": 11.6751, "step": 51175 }, { "epoch": 1.0712551285271708, "grad_norm": 0.2920616865158081, "learning_rate": 0.00014344283068707018, "loss": 11.6727, "step": 51176 }, { "epoch": 1.071276061291133, "grad_norm": 0.2585829198360443, "learning_rate": 0.00014344085584820366, "loss": 11.6577, "step": 51177 }, { "epoch": 1.071296994055095, "grad_norm": 0.2861858010292053, "learning_rate": 0.00014343888098845424, "loss": 11.6628, "step": 51178 }, { "epoch": 1.0713179268190571, "grad_norm": 0.4044576585292816, "learning_rate": 0.0001434369061078229, "loss": 11.6661, "step": 51179 }, { "epoch": 1.0713388595830193, "grad_norm": 0.2946976125240326, "learning_rate": 0.00014343493120631058, "loss": 11.6591, "step": 51180 }, { "epoch": 1.0713597923469815, "grad_norm": 0.35517704486846924, "learning_rate": 0.0001434329562839182, "loss": 11.6678, "step": 51181 }, { "epoch": 1.0713807251109437, "grad_norm": 0.26881709694862366, "learning_rate": 0.0001434309813406467, "loss": 11.669, "step": 51182 }, { "epoch": 1.0714016578749057, "grad_norm": 0.3183257579803467, "learning_rate": 0.0001434290063764971, "loss": 11.6641, "step": 51183 }, { "epoch": 1.071422590638868, "grad_norm": 0.3442075848579407, "learning_rate": 0.0001434270313914703, "loss": 11.6759, "step": 51184 }, { "epoch": 1.0714435234028301, "grad_norm": 0.3591132164001465, "learning_rate": 0.00014342505638556723, "loss": 11.6768, "step": 51185 }, { "epoch": 1.0714644561667923, "grad_norm": 0.3307717740535736, "learning_rate": 0.00014342308135878887, "loss": 11.6682, "step": 51186 }, { "epoch": 1.0714853889307545, "grad_norm": 0.28524917364120483, "learning_rate": 0.00014342110631113619, "loss": 11.6788, "step": 51187 }, { "epoch": 1.0715063216947165, "grad_norm": 0.33276867866516113, "learning_rate": 0.00014341913124261009, "loss": 11.6662, "step": 51188 }, { "epoch": 1.0715272544586787, "grad_norm": 0.42202073335647583, "learning_rate": 0.00014341715615321155, "loss": 11.6637, "step": 51189 }, { "epoch": 1.071548187222641, "grad_norm": 0.26262640953063965, "learning_rate": 0.00014341518104294152, "loss": 11.6735, "step": 51190 }, { "epoch": 1.071569119986603, "grad_norm": 0.278482049703598, "learning_rate": 0.00014341320591180093, "loss": 11.6705, "step": 51191 }, { "epoch": 1.0715900527505653, "grad_norm": 0.29908525943756104, "learning_rate": 0.00014341123075979075, "loss": 11.6585, "step": 51192 }, { "epoch": 1.0716109855145273, "grad_norm": 0.3423933982849121, "learning_rate": 0.00014340925558691193, "loss": 11.6709, "step": 51193 }, { "epoch": 1.0716319182784895, "grad_norm": 0.35614609718322754, "learning_rate": 0.0001434072803931654, "loss": 11.6824, "step": 51194 }, { "epoch": 1.0716528510424517, "grad_norm": 0.2774319648742676, "learning_rate": 0.00014340530517855212, "loss": 11.6644, "step": 51195 }, { "epoch": 1.0716737838064139, "grad_norm": 0.2829224765300751, "learning_rate": 0.00014340332994307303, "loss": 11.6848, "step": 51196 }, { "epoch": 1.0716947165703758, "grad_norm": 0.36841723322868347, "learning_rate": 0.0001434013546867291, "loss": 11.6659, "step": 51197 }, { "epoch": 1.071715649334338, "grad_norm": 0.39664724469184875, "learning_rate": 0.00014339937940952127, "loss": 11.6712, "step": 51198 }, { "epoch": 1.0717365820983003, "grad_norm": 0.34989213943481445, "learning_rate": 0.0001433974041114505, "loss": 11.6767, "step": 51199 }, { "epoch": 1.0717575148622625, "grad_norm": 0.26961591839790344, "learning_rate": 0.0001433954287925177, "loss": 11.6765, "step": 51200 }, { "epoch": 1.0717784476262247, "grad_norm": 0.3096422851085663, "learning_rate": 0.00014339345345272387, "loss": 11.6544, "step": 51201 }, { "epoch": 1.0717993803901866, "grad_norm": 0.34482890367507935, "learning_rate": 0.00014339147809206993, "loss": 11.677, "step": 51202 }, { "epoch": 1.0718203131541488, "grad_norm": 0.30187690258026123, "learning_rate": 0.00014338950271055684, "loss": 11.6788, "step": 51203 }, { "epoch": 1.071841245918111, "grad_norm": 0.3469350337982178, "learning_rate": 0.00014338752730818553, "loss": 11.6814, "step": 51204 }, { "epoch": 1.0718621786820732, "grad_norm": 0.30221468210220337, "learning_rate": 0.000143385551884957, "loss": 11.6677, "step": 51205 }, { "epoch": 1.0718831114460354, "grad_norm": 0.2605780065059662, "learning_rate": 0.00014338357644087216, "loss": 11.6897, "step": 51206 }, { "epoch": 1.0719040442099974, "grad_norm": 0.32194963097572327, "learning_rate": 0.00014338160097593193, "loss": 11.6528, "step": 51207 }, { "epoch": 1.0719249769739596, "grad_norm": 0.5290716886520386, "learning_rate": 0.00014337962549013734, "loss": 11.6725, "step": 51208 }, { "epoch": 1.0719459097379218, "grad_norm": 0.36429911851882935, "learning_rate": 0.00014337764998348927, "loss": 11.6625, "step": 51209 }, { "epoch": 1.071966842501884, "grad_norm": 0.28373727202415466, "learning_rate": 0.0001433756744559887, "loss": 11.6545, "step": 51210 }, { "epoch": 1.0719877752658462, "grad_norm": 0.3457666337490082, "learning_rate": 0.00014337369890763657, "loss": 11.6585, "step": 51211 }, { "epoch": 1.0720087080298082, "grad_norm": 0.34382614493370056, "learning_rate": 0.00014337172333843382, "loss": 11.6749, "step": 51212 }, { "epoch": 1.0720296407937704, "grad_norm": 0.31938329339027405, "learning_rate": 0.00014336974774838143, "loss": 11.6583, "step": 51213 }, { "epoch": 1.0720505735577326, "grad_norm": 0.3418465256690979, "learning_rate": 0.00014336777213748035, "loss": 11.6615, "step": 51214 }, { "epoch": 1.0720715063216948, "grad_norm": 0.3189723491668701, "learning_rate": 0.00014336579650573147, "loss": 11.6641, "step": 51215 }, { "epoch": 1.0720924390856568, "grad_norm": 0.3610745370388031, "learning_rate": 0.00014336382085313582, "loss": 11.6745, "step": 51216 }, { "epoch": 1.072113371849619, "grad_norm": 0.34832921624183655, "learning_rate": 0.0001433618451796943, "loss": 11.6688, "step": 51217 }, { "epoch": 1.0721343046135812, "grad_norm": 0.33343178033828735, "learning_rate": 0.00014335986948540787, "loss": 11.6798, "step": 51218 }, { "epoch": 1.0721552373775434, "grad_norm": 0.30998164415359497, "learning_rate": 0.00014335789377027748, "loss": 11.667, "step": 51219 }, { "epoch": 1.0721761701415056, "grad_norm": 0.35924702882766724, "learning_rate": 0.00014335591803430408, "loss": 11.6488, "step": 51220 }, { "epoch": 1.0721971029054675, "grad_norm": 0.4394785165786743, "learning_rate": 0.00014335394227748863, "loss": 11.6558, "step": 51221 }, { "epoch": 1.0722180356694297, "grad_norm": 0.33347761631011963, "learning_rate": 0.00014335196649983205, "loss": 11.6704, "step": 51222 }, { "epoch": 1.072238968433392, "grad_norm": 0.32886144518852234, "learning_rate": 0.00014334999070133534, "loss": 11.6754, "step": 51223 }, { "epoch": 1.0722599011973541, "grad_norm": 0.3206217288970947, "learning_rate": 0.0001433480148819994, "loss": 11.668, "step": 51224 }, { "epoch": 1.0722808339613163, "grad_norm": 0.3075861930847168, "learning_rate": 0.0001433460390418252, "loss": 11.6665, "step": 51225 }, { "epoch": 1.0723017667252783, "grad_norm": 0.3298802971839905, "learning_rate": 0.0001433440631808137, "loss": 11.6628, "step": 51226 }, { "epoch": 1.0723226994892405, "grad_norm": 0.3575957119464874, "learning_rate": 0.00014334208729896585, "loss": 11.6818, "step": 51227 }, { "epoch": 1.0723436322532027, "grad_norm": 0.34966105222702026, "learning_rate": 0.00014334011139628257, "loss": 11.6473, "step": 51228 }, { "epoch": 1.072364565017165, "grad_norm": 0.3365279734134674, "learning_rate": 0.00014333813547276486, "loss": 11.6695, "step": 51229 }, { "epoch": 1.0723854977811271, "grad_norm": 0.40814343094825745, "learning_rate": 0.00014333615952841363, "loss": 11.6752, "step": 51230 }, { "epoch": 1.072406430545089, "grad_norm": 0.30186572670936584, "learning_rate": 0.0001433341835632298, "loss": 11.6589, "step": 51231 }, { "epoch": 1.0724273633090513, "grad_norm": 0.261289119720459, "learning_rate": 0.0001433322075772144, "loss": 11.6484, "step": 51232 }, { "epoch": 1.0724482960730135, "grad_norm": 0.30770179629325867, "learning_rate": 0.00014333023157036832, "loss": 11.6725, "step": 51233 }, { "epoch": 1.0724692288369757, "grad_norm": 0.31467705965042114, "learning_rate": 0.00014332825554269252, "loss": 11.6786, "step": 51234 }, { "epoch": 1.0724901616009377, "grad_norm": 0.36147087812423706, "learning_rate": 0.000143326279494188, "loss": 11.6611, "step": 51235 }, { "epoch": 1.0725110943648999, "grad_norm": 0.2944040298461914, "learning_rate": 0.00014332430342485565, "loss": 11.6463, "step": 51236 }, { "epoch": 1.072532027128862, "grad_norm": 0.28305697441101074, "learning_rate": 0.0001433223273346964, "loss": 11.6646, "step": 51237 }, { "epoch": 1.0725529598928243, "grad_norm": 0.35281357169151306, "learning_rate": 0.0001433203512237113, "loss": 11.6677, "step": 51238 }, { "epoch": 1.0725738926567865, "grad_norm": 0.3063628375530243, "learning_rate": 0.0001433183750919012, "loss": 11.6621, "step": 51239 }, { "epoch": 1.0725948254207485, "grad_norm": 0.3719456195831299, "learning_rate": 0.00014331639893926711, "loss": 11.6502, "step": 51240 }, { "epoch": 1.0726157581847107, "grad_norm": 0.44489535689353943, "learning_rate": 0.00014331442276580995, "loss": 11.6878, "step": 51241 }, { "epoch": 1.0726366909486729, "grad_norm": 0.28527480363845825, "learning_rate": 0.00014331244657153066, "loss": 11.668, "step": 51242 }, { "epoch": 1.072657623712635, "grad_norm": 0.35769039392471313, "learning_rate": 0.00014331047035643027, "loss": 11.6901, "step": 51243 }, { "epoch": 1.0726785564765973, "grad_norm": 0.3456767201423645, "learning_rate": 0.00014330849412050962, "loss": 11.6713, "step": 51244 }, { "epoch": 1.0726994892405592, "grad_norm": 0.30998340249061584, "learning_rate": 0.00014330651786376972, "loss": 11.6812, "step": 51245 }, { "epoch": 1.0727204220045214, "grad_norm": 0.3238389790058136, "learning_rate": 0.0001433045415862115, "loss": 11.6557, "step": 51246 }, { "epoch": 1.0727413547684836, "grad_norm": 0.29974183440208435, "learning_rate": 0.0001433025652878359, "loss": 11.6601, "step": 51247 }, { "epoch": 1.0727622875324458, "grad_norm": 0.314144104719162, "learning_rate": 0.00014330058896864394, "loss": 11.6838, "step": 51248 }, { "epoch": 1.072783220296408, "grad_norm": 0.5100511312484741, "learning_rate": 0.00014329861262863647, "loss": 11.6777, "step": 51249 }, { "epoch": 1.07280415306037, "grad_norm": 1.024469017982483, "learning_rate": 0.00014329663626781454, "loss": 11.6829, "step": 51250 }, { "epoch": 1.0728250858243322, "grad_norm": 0.31599879264831543, "learning_rate": 0.000143294659886179, "loss": 11.6705, "step": 51251 }, { "epoch": 1.0728460185882944, "grad_norm": 0.44478023052215576, "learning_rate": 0.00014329268348373087, "loss": 11.6633, "step": 51252 }, { "epoch": 1.0728669513522566, "grad_norm": 0.8508807420730591, "learning_rate": 0.00014329070706047106, "loss": 11.6906, "step": 51253 }, { "epoch": 1.0728878841162186, "grad_norm": 0.27662646770477295, "learning_rate": 0.0001432887306164006, "loss": 11.6765, "step": 51254 }, { "epoch": 1.0729088168801808, "grad_norm": 0.32501718401908875, "learning_rate": 0.00014328675415152032, "loss": 11.6791, "step": 51255 }, { "epoch": 1.072929749644143, "grad_norm": 0.2723185420036316, "learning_rate": 0.00014328477766583126, "loss": 11.6718, "step": 51256 }, { "epoch": 1.0729506824081052, "grad_norm": 0.35657569766044617, "learning_rate": 0.00014328280115933432, "loss": 11.6613, "step": 51257 }, { "epoch": 1.0729716151720674, "grad_norm": 0.35426434874534607, "learning_rate": 0.00014328082463203046, "loss": 11.6561, "step": 51258 }, { "epoch": 1.0729925479360294, "grad_norm": 0.31482261419296265, "learning_rate": 0.00014327884808392068, "loss": 11.6538, "step": 51259 }, { "epoch": 1.0730134806999916, "grad_norm": 0.4179348945617676, "learning_rate": 0.00014327687151500585, "loss": 11.6701, "step": 51260 }, { "epoch": 1.0730344134639538, "grad_norm": 0.34875038266181946, "learning_rate": 0.000143274894925287, "loss": 11.6695, "step": 51261 }, { "epoch": 1.073055346227916, "grad_norm": 0.29073792695999146, "learning_rate": 0.000143272918314765, "loss": 11.6638, "step": 51262 }, { "epoch": 1.0730762789918782, "grad_norm": 0.3826243281364441, "learning_rate": 0.0001432709416834409, "loss": 11.6716, "step": 51263 }, { "epoch": 1.0730972117558402, "grad_norm": 0.32610955834388733, "learning_rate": 0.00014326896503131552, "loss": 11.6752, "step": 51264 }, { "epoch": 1.0731181445198024, "grad_norm": 0.39163118600845337, "learning_rate": 0.00014326698835838992, "loss": 11.6735, "step": 51265 }, { "epoch": 1.0731390772837646, "grad_norm": 0.30309170484542847, "learning_rate": 0.000143265011664665, "loss": 11.6821, "step": 51266 }, { "epoch": 1.0731600100477268, "grad_norm": 0.25489845871925354, "learning_rate": 0.00014326303495014174, "loss": 11.664, "step": 51267 }, { "epoch": 1.073180942811689, "grad_norm": 0.27255281805992126, "learning_rate": 0.00014326105821482106, "loss": 11.6555, "step": 51268 }, { "epoch": 1.073201875575651, "grad_norm": 0.38128015398979187, "learning_rate": 0.0001432590814587039, "loss": 11.6606, "step": 51269 }, { "epoch": 1.0732228083396131, "grad_norm": 0.437094509601593, "learning_rate": 0.00014325710468179127, "loss": 11.6722, "step": 51270 }, { "epoch": 1.0732437411035753, "grad_norm": 0.3052038252353668, "learning_rate": 0.00014325512788408406, "loss": 11.6672, "step": 51271 }, { "epoch": 1.0732646738675375, "grad_norm": 0.2781028747558594, "learning_rate": 0.00014325315106558328, "loss": 11.6607, "step": 51272 }, { "epoch": 1.0732856066314995, "grad_norm": 0.33024346828460693, "learning_rate": 0.0001432511742262898, "loss": 11.6618, "step": 51273 }, { "epoch": 1.0733065393954617, "grad_norm": 0.31107547879219055, "learning_rate": 0.00014324919736620465, "loss": 11.6601, "step": 51274 }, { "epoch": 1.073327472159424, "grad_norm": 0.2826472520828247, "learning_rate": 0.0001432472204853287, "loss": 11.6608, "step": 51275 }, { "epoch": 1.0733484049233861, "grad_norm": 0.3018525540828705, "learning_rate": 0.00014324524358366297, "loss": 11.6585, "step": 51276 }, { "epoch": 1.0733693376873483, "grad_norm": 0.3125064969062805, "learning_rate": 0.00014324326666120837, "loss": 11.6619, "step": 51277 }, { "epoch": 1.0733902704513103, "grad_norm": 0.300620973110199, "learning_rate": 0.00014324128971796588, "loss": 11.671, "step": 51278 }, { "epoch": 1.0734112032152725, "grad_norm": 0.3098186254501343, "learning_rate": 0.00014323931275393644, "loss": 11.6761, "step": 51279 }, { "epoch": 1.0734321359792347, "grad_norm": 0.32913610339164734, "learning_rate": 0.00014323733576912097, "loss": 11.6704, "step": 51280 }, { "epoch": 1.073453068743197, "grad_norm": 0.3643815815448761, "learning_rate": 0.00014323535876352049, "loss": 11.6614, "step": 51281 }, { "epoch": 1.073474001507159, "grad_norm": 0.2932639718055725, "learning_rate": 0.00014323338173713587, "loss": 11.6849, "step": 51282 }, { "epoch": 1.073494934271121, "grad_norm": 0.2987174391746521, "learning_rate": 0.0001432314046899681, "loss": 11.6497, "step": 51283 }, { "epoch": 1.0735158670350833, "grad_norm": 0.3344670534133911, "learning_rate": 0.00014322942762201815, "loss": 11.6653, "step": 51284 }, { "epoch": 1.0735367997990455, "grad_norm": 0.34735968708992004, "learning_rate": 0.00014322745053328692, "loss": 11.6742, "step": 51285 }, { "epoch": 1.0735577325630077, "grad_norm": 0.23250102996826172, "learning_rate": 0.0001432254734237754, "loss": 11.65, "step": 51286 }, { "epoch": 1.0735786653269699, "grad_norm": 0.3107312321662903, "learning_rate": 0.00014322349629348455, "loss": 11.66, "step": 51287 }, { "epoch": 1.0735995980909319, "grad_norm": 0.35863348841667175, "learning_rate": 0.00014322151914241527, "loss": 11.6764, "step": 51288 }, { "epoch": 1.073620530854894, "grad_norm": 0.35198071599006653, "learning_rate": 0.00014321954197056856, "loss": 11.6596, "step": 51289 }, { "epoch": 1.0736414636188563, "grad_norm": 0.3174622058868408, "learning_rate": 0.0001432175647779453, "loss": 11.6665, "step": 51290 }, { "epoch": 1.0736623963828185, "grad_norm": 0.31136444211006165, "learning_rate": 0.00014321558756454653, "loss": 11.6783, "step": 51291 }, { "epoch": 1.0736833291467804, "grad_norm": 0.3107373118400574, "learning_rate": 0.00014321361033037318, "loss": 11.6629, "step": 51292 }, { "epoch": 1.0737042619107426, "grad_norm": 0.31471720337867737, "learning_rate": 0.00014321163307542614, "loss": 11.6782, "step": 51293 }, { "epoch": 1.0737251946747048, "grad_norm": 0.2731490731239319, "learning_rate": 0.00014320965579970647, "loss": 11.6595, "step": 51294 }, { "epoch": 1.073746127438667, "grad_norm": 0.2882726490497589, "learning_rate": 0.000143207678503215, "loss": 11.6748, "step": 51295 }, { "epoch": 1.0737670602026292, "grad_norm": 0.32046762108802795, "learning_rate": 0.00014320570118595273, "loss": 11.6629, "step": 51296 }, { "epoch": 1.0737879929665912, "grad_norm": 0.26147255301475525, "learning_rate": 0.00014320372384792063, "loss": 11.6709, "step": 51297 }, { "epoch": 1.0738089257305534, "grad_norm": 0.3147031366825104, "learning_rate": 0.0001432017464891196, "loss": 11.6632, "step": 51298 }, { "epoch": 1.0738298584945156, "grad_norm": 0.33426156640052795, "learning_rate": 0.00014319976910955067, "loss": 11.6658, "step": 51299 }, { "epoch": 1.0738507912584778, "grad_norm": 0.348676860332489, "learning_rate": 0.00014319779170921473, "loss": 11.6586, "step": 51300 }, { "epoch": 1.07387172402244, "grad_norm": 0.3107355535030365, "learning_rate": 0.00014319581428811273, "loss": 11.6668, "step": 51301 }, { "epoch": 1.073892656786402, "grad_norm": 0.3596739172935486, "learning_rate": 0.00014319383684624566, "loss": 11.689, "step": 51302 }, { "epoch": 1.0739135895503642, "grad_norm": 0.2514776587486267, "learning_rate": 0.0001431918593836144, "loss": 11.6599, "step": 51303 }, { "epoch": 1.0739345223143264, "grad_norm": 0.30878081917762756, "learning_rate": 0.00014318988190022003, "loss": 11.6745, "step": 51304 }, { "epoch": 1.0739554550782886, "grad_norm": 0.2621220648288727, "learning_rate": 0.00014318790439606335, "loss": 11.6652, "step": 51305 }, { "epoch": 1.0739763878422508, "grad_norm": 0.2886403799057007, "learning_rate": 0.0001431859268711454, "loss": 11.6784, "step": 51306 }, { "epoch": 1.0739973206062128, "grad_norm": 0.3826097548007965, "learning_rate": 0.0001431839493254671, "loss": 11.6767, "step": 51307 }, { "epoch": 1.074018253370175, "grad_norm": 0.3580378592014313, "learning_rate": 0.00014318197175902943, "loss": 11.6749, "step": 51308 }, { "epoch": 1.0740391861341372, "grad_norm": 0.6700727939605713, "learning_rate": 0.00014317999417183332, "loss": 11.5809, "step": 51309 }, { "epoch": 1.0740601188980994, "grad_norm": 0.547231912612915, "learning_rate": 0.0001431780165638797, "loss": 11.6772, "step": 51310 }, { "epoch": 1.0740810516620614, "grad_norm": 0.2801363468170166, "learning_rate": 0.00014317603893516958, "loss": 11.675, "step": 51311 }, { "epoch": 1.0741019844260236, "grad_norm": 0.30329298973083496, "learning_rate": 0.00014317406128570384, "loss": 11.6741, "step": 51312 }, { "epoch": 1.0741229171899858, "grad_norm": 0.27856752276420593, "learning_rate": 0.00014317208361548346, "loss": 11.6565, "step": 51313 }, { "epoch": 1.074143849953948, "grad_norm": 0.26358380913734436, "learning_rate": 0.00014317010592450942, "loss": 11.6604, "step": 51314 }, { "epoch": 1.0741647827179102, "grad_norm": 0.29283079504966736, "learning_rate": 0.00014316812821278262, "loss": 11.6694, "step": 51315 }, { "epoch": 1.0741857154818721, "grad_norm": 0.2777796983718872, "learning_rate": 0.00014316615048030404, "loss": 11.6703, "step": 51316 }, { "epoch": 1.0742066482458343, "grad_norm": 0.305280476808548, "learning_rate": 0.00014316417272707463, "loss": 11.6638, "step": 51317 }, { "epoch": 1.0742275810097965, "grad_norm": 0.33442938327789307, "learning_rate": 0.00014316219495309534, "loss": 11.6721, "step": 51318 }, { "epoch": 1.0742485137737587, "grad_norm": 0.27368155121803284, "learning_rate": 0.0001431602171583671, "loss": 11.6632, "step": 51319 }, { "epoch": 1.074269446537721, "grad_norm": 0.28795477747917175, "learning_rate": 0.0001431582393428909, "loss": 11.6742, "step": 51320 }, { "epoch": 1.074290379301683, "grad_norm": 0.3546592891216278, "learning_rate": 0.0001431562615066677, "loss": 11.6821, "step": 51321 }, { "epoch": 1.0743113120656451, "grad_norm": 0.42087316513061523, "learning_rate": 0.00014315428364969837, "loss": 11.6729, "step": 51322 }, { "epoch": 1.0743322448296073, "grad_norm": 0.32353657484054565, "learning_rate": 0.00014315230577198392, "loss": 11.6641, "step": 51323 }, { "epoch": 1.0743531775935695, "grad_norm": 0.2976631820201874, "learning_rate": 0.00014315032787352532, "loss": 11.6785, "step": 51324 }, { "epoch": 1.0743741103575317, "grad_norm": 0.3408612310886383, "learning_rate": 0.00014314834995432345, "loss": 11.6797, "step": 51325 }, { "epoch": 1.0743950431214937, "grad_norm": 0.3125417232513428, "learning_rate": 0.00014314637201437935, "loss": 11.6601, "step": 51326 }, { "epoch": 1.074415975885456, "grad_norm": 0.35040250420570374, "learning_rate": 0.0001431443940536939, "loss": 11.693, "step": 51327 }, { "epoch": 1.074436908649418, "grad_norm": 0.30752798914909363, "learning_rate": 0.00014314241607226805, "loss": 11.678, "step": 51328 }, { "epoch": 1.0744578414133803, "grad_norm": 0.40701454877853394, "learning_rate": 0.0001431404380701028, "loss": 11.6604, "step": 51329 }, { "epoch": 1.0744787741773423, "grad_norm": 0.27032750844955444, "learning_rate": 0.00014313846004719908, "loss": 11.6697, "step": 51330 }, { "epoch": 1.0744997069413045, "grad_norm": 0.29052749276161194, "learning_rate": 0.00014313648200355784, "loss": 11.6706, "step": 51331 }, { "epoch": 1.0745206397052667, "grad_norm": 0.27764689922332764, "learning_rate": 0.00014313450393918002, "loss": 11.6632, "step": 51332 }, { "epoch": 1.0745415724692289, "grad_norm": 0.3757462501525879, "learning_rate": 0.00014313252585406661, "loss": 11.6684, "step": 51333 }, { "epoch": 1.074562505233191, "grad_norm": 0.268612265586853, "learning_rate": 0.00014313054774821847, "loss": 11.6614, "step": 51334 }, { "epoch": 1.074583437997153, "grad_norm": 0.3047667443752289, "learning_rate": 0.00014312856962163669, "loss": 11.6859, "step": 51335 }, { "epoch": 1.0746043707611153, "grad_norm": 0.489498496055603, "learning_rate": 0.00014312659147432206, "loss": 11.682, "step": 51336 }, { "epoch": 1.0746253035250775, "grad_norm": 0.26086708903312683, "learning_rate": 0.00014312461330627565, "loss": 11.6632, "step": 51337 }, { "epoch": 1.0746462362890397, "grad_norm": 0.3573130965232849, "learning_rate": 0.0001431226351174984, "loss": 11.6853, "step": 51338 }, { "epoch": 1.0746671690530019, "grad_norm": 0.29385408759117126, "learning_rate": 0.00014312065690799116, "loss": 11.6675, "step": 51339 }, { "epoch": 1.0746881018169638, "grad_norm": 0.34661924839019775, "learning_rate": 0.00014311867867775505, "loss": 11.6536, "step": 51340 }, { "epoch": 1.074709034580926, "grad_norm": 0.43060964345932007, "learning_rate": 0.00014311670042679086, "loss": 11.6455, "step": 51341 }, { "epoch": 1.0747299673448882, "grad_norm": 0.3525904715061188, "learning_rate": 0.00014311472215509965, "loss": 11.6584, "step": 51342 }, { "epoch": 1.0747509001088504, "grad_norm": 0.25476062297821045, "learning_rate": 0.0001431127438626823, "loss": 11.671, "step": 51343 }, { "epoch": 1.0747718328728126, "grad_norm": 0.44620317220687866, "learning_rate": 0.0001431107655495398, "loss": 11.6734, "step": 51344 }, { "epoch": 1.0747927656367746, "grad_norm": 0.249973326921463, "learning_rate": 0.0001431087872156731, "loss": 11.6637, "step": 51345 }, { "epoch": 1.0748136984007368, "grad_norm": 0.32753896713256836, "learning_rate": 0.00014310680886108312, "loss": 11.6617, "step": 51346 }, { "epoch": 1.074834631164699, "grad_norm": 0.2803606390953064, "learning_rate": 0.00014310483048577086, "loss": 11.6598, "step": 51347 }, { "epoch": 1.0748555639286612, "grad_norm": 0.3045439124107361, "learning_rate": 0.00014310285208973722, "loss": 11.6608, "step": 51348 }, { "epoch": 1.0748764966926232, "grad_norm": 0.30348849296569824, "learning_rate": 0.0001431008736729832, "loss": 11.6729, "step": 51349 }, { "epoch": 1.0748974294565854, "grad_norm": 0.2759476900100708, "learning_rate": 0.0001430988952355097, "loss": 11.6616, "step": 51350 }, { "epoch": 1.0749183622205476, "grad_norm": 0.2752871811389923, "learning_rate": 0.0001430969167773177, "loss": 11.6622, "step": 51351 }, { "epoch": 1.0749392949845098, "grad_norm": 0.2966283857822418, "learning_rate": 0.00014309493829840818, "loss": 11.6468, "step": 51352 }, { "epoch": 1.074960227748472, "grad_norm": 0.5070610046386719, "learning_rate": 0.000143092959798782, "loss": 11.6704, "step": 51353 }, { "epoch": 1.074981160512434, "grad_norm": 0.3248269259929657, "learning_rate": 0.00014309098127844022, "loss": 11.6624, "step": 51354 }, { "epoch": 1.0750020932763962, "grad_norm": 0.34327882528305054, "learning_rate": 0.00014308900273738372, "loss": 11.672, "step": 51355 }, { "epoch": 1.0750230260403584, "grad_norm": 0.3895249366760254, "learning_rate": 0.00014308702417561352, "loss": 11.6531, "step": 51356 }, { "epoch": 1.0750439588043206, "grad_norm": 0.29044729471206665, "learning_rate": 0.00014308504559313047, "loss": 11.6528, "step": 51357 }, { "epoch": 1.0750648915682828, "grad_norm": 0.31334564089775085, "learning_rate": 0.0001430830669899356, "loss": 11.6471, "step": 51358 }, { "epoch": 1.0750858243322448, "grad_norm": 0.5319238901138306, "learning_rate": 0.00014308108836602983, "loss": 11.6631, "step": 51359 }, { "epoch": 1.075106757096207, "grad_norm": 0.3239382803440094, "learning_rate": 0.0001430791097214141, "loss": 11.6482, "step": 51360 }, { "epoch": 1.0751276898601692, "grad_norm": 0.37891635298728943, "learning_rate": 0.0001430771310560894, "loss": 11.6756, "step": 51361 }, { "epoch": 1.0751486226241314, "grad_norm": 0.3288159668445587, "learning_rate": 0.00014307515237005664, "loss": 11.6678, "step": 51362 }, { "epoch": 1.0751695553880936, "grad_norm": 0.36232680082321167, "learning_rate": 0.00014307317366331682, "loss": 11.6774, "step": 51363 }, { "epoch": 1.0751904881520555, "grad_norm": 0.24048757553100586, "learning_rate": 0.00014307119493587084, "loss": 11.6661, "step": 51364 }, { "epoch": 1.0752114209160177, "grad_norm": 0.33864694833755493, "learning_rate": 0.0001430692161877197, "loss": 11.6623, "step": 51365 }, { "epoch": 1.07523235367998, "grad_norm": 0.3284049928188324, "learning_rate": 0.0001430672374188643, "loss": 11.6744, "step": 51366 }, { "epoch": 1.0752532864439421, "grad_norm": 0.4117206931114197, "learning_rate": 0.0001430652586293056, "loss": 11.6711, "step": 51367 }, { "epoch": 1.0752742192079041, "grad_norm": 0.287828654050827, "learning_rate": 0.00014306327981904459, "loss": 11.6708, "step": 51368 }, { "epoch": 1.0752951519718663, "grad_norm": 0.31312474608421326, "learning_rate": 0.00014306130098808221, "loss": 11.6742, "step": 51369 }, { "epoch": 1.0753160847358285, "grad_norm": 0.3644753098487854, "learning_rate": 0.0001430593221364194, "loss": 11.7026, "step": 51370 }, { "epoch": 1.0753370174997907, "grad_norm": 0.3249388635158539, "learning_rate": 0.00014305734326405708, "loss": 11.6705, "step": 51371 }, { "epoch": 1.075357950263753, "grad_norm": 0.2668803930282593, "learning_rate": 0.00014305536437099624, "loss": 11.6302, "step": 51372 }, { "epoch": 1.075378883027715, "grad_norm": 0.4243665039539337, "learning_rate": 0.00014305338545723785, "loss": 11.6652, "step": 51373 }, { "epoch": 1.075399815791677, "grad_norm": 0.28489431738853455, "learning_rate": 0.00014305140652278283, "loss": 11.6639, "step": 51374 }, { "epoch": 1.0754207485556393, "grad_norm": 0.281780481338501, "learning_rate": 0.00014304942756763212, "loss": 11.6615, "step": 51375 }, { "epoch": 1.0754416813196015, "grad_norm": 0.32928693294525146, "learning_rate": 0.00014304744859178667, "loss": 11.652, "step": 51376 }, { "epoch": 1.0754626140835637, "grad_norm": 0.29355159401893616, "learning_rate": 0.0001430454695952475, "loss": 11.6674, "step": 51377 }, { "epoch": 1.0754835468475257, "grad_norm": 0.3153546452522278, "learning_rate": 0.00014304349057801545, "loss": 11.6746, "step": 51378 }, { "epoch": 1.0755044796114879, "grad_norm": 0.3856683075428009, "learning_rate": 0.00014304151154009156, "loss": 11.6642, "step": 51379 }, { "epoch": 1.07552541237545, "grad_norm": 0.28284770250320435, "learning_rate": 0.00014303953248147677, "loss": 11.6696, "step": 51380 }, { "epoch": 1.0755463451394123, "grad_norm": 0.3351917564868927, "learning_rate": 0.000143037553402172, "loss": 11.6678, "step": 51381 }, { "epoch": 1.0755672779033745, "grad_norm": 0.39662623405456543, "learning_rate": 0.0001430355743021782, "loss": 11.6721, "step": 51382 }, { "epoch": 1.0755882106673365, "grad_norm": 0.2833578586578369, "learning_rate": 0.00014303359518149636, "loss": 11.6791, "step": 51383 }, { "epoch": 1.0756091434312987, "grad_norm": 0.339219331741333, "learning_rate": 0.0001430316160401274, "loss": 11.6661, "step": 51384 }, { "epoch": 1.0756300761952609, "grad_norm": 0.2766600251197815, "learning_rate": 0.00014302963687807228, "loss": 11.6824, "step": 51385 }, { "epoch": 1.075651008959223, "grad_norm": 0.2597542107105255, "learning_rate": 0.00014302765769533195, "loss": 11.6713, "step": 51386 }, { "epoch": 1.075671941723185, "grad_norm": 0.2739492654800415, "learning_rate": 0.00014302567849190737, "loss": 11.6546, "step": 51387 }, { "epoch": 1.0756928744871472, "grad_norm": 0.3042783737182617, "learning_rate": 0.00014302369926779947, "loss": 11.6665, "step": 51388 }, { "epoch": 1.0757138072511094, "grad_norm": 0.3723834156990051, "learning_rate": 0.00014302172002300922, "loss": 11.6675, "step": 51389 }, { "epoch": 1.0757347400150716, "grad_norm": 0.32515662908554077, "learning_rate": 0.00014301974075753757, "loss": 11.6709, "step": 51390 }, { "epoch": 1.0757556727790338, "grad_norm": 0.3387944996356964, "learning_rate": 0.00014301776147138546, "loss": 11.6688, "step": 51391 }, { "epoch": 1.0757766055429958, "grad_norm": 0.3611189126968384, "learning_rate": 0.00014301578216455388, "loss": 11.6607, "step": 51392 }, { "epoch": 1.075797538306958, "grad_norm": 0.31143051385879517, "learning_rate": 0.00014301380283704372, "loss": 11.6799, "step": 51393 }, { "epoch": 1.0758184710709202, "grad_norm": 0.3067191243171692, "learning_rate": 0.00014301182348885598, "loss": 11.665, "step": 51394 }, { "epoch": 1.0758394038348824, "grad_norm": 0.3529112637042999, "learning_rate": 0.00014300984411999158, "loss": 11.6697, "step": 51395 }, { "epoch": 1.0758603365988446, "grad_norm": 0.4945336878299713, "learning_rate": 0.00014300786473045148, "loss": 11.6687, "step": 51396 }, { "epoch": 1.0758812693628066, "grad_norm": 0.26129671931266785, "learning_rate": 0.00014300588532023667, "loss": 11.6529, "step": 51397 }, { "epoch": 1.0759022021267688, "grad_norm": 0.3323972821235657, "learning_rate": 0.00014300390588934802, "loss": 11.6893, "step": 51398 }, { "epoch": 1.075923134890731, "grad_norm": 0.26689326763153076, "learning_rate": 0.0001430019264377866, "loss": 11.6649, "step": 51399 }, { "epoch": 1.0759440676546932, "grad_norm": 0.27174824476242065, "learning_rate": 0.00014299994696555324, "loss": 11.6556, "step": 51400 }, { "epoch": 1.0759650004186554, "grad_norm": 0.2897360026836395, "learning_rate": 0.00014299796747264894, "loss": 11.6548, "step": 51401 }, { "epoch": 1.0759859331826174, "grad_norm": 0.3590376675128937, "learning_rate": 0.00014299598795907467, "loss": 11.6797, "step": 51402 }, { "epoch": 1.0760068659465796, "grad_norm": 0.3234351575374603, "learning_rate": 0.00014299400842483138, "loss": 11.6841, "step": 51403 }, { "epoch": 1.0760277987105418, "grad_norm": 0.37631914019584656, "learning_rate": 0.00014299202886992, "loss": 11.6588, "step": 51404 }, { "epoch": 1.076048731474504, "grad_norm": 0.3586256802082062, "learning_rate": 0.0001429900492943415, "loss": 11.6671, "step": 51405 }, { "epoch": 1.076069664238466, "grad_norm": 0.35620078444480896, "learning_rate": 0.0001429880696980968, "loss": 11.6808, "step": 51406 }, { "epoch": 1.0760905970024282, "grad_norm": 0.2707682251930237, "learning_rate": 0.0001429860900811869, "loss": 11.6615, "step": 51407 }, { "epoch": 1.0761115297663904, "grad_norm": 0.27013999223709106, "learning_rate": 0.0001429841104436127, "loss": 11.6613, "step": 51408 }, { "epoch": 1.0761324625303526, "grad_norm": 0.37890851497650146, "learning_rate": 0.00014298213078537517, "loss": 11.679, "step": 51409 }, { "epoch": 1.0761533952943148, "grad_norm": 0.33244529366493225, "learning_rate": 0.00014298015110647528, "loss": 11.6638, "step": 51410 }, { "epoch": 1.0761743280582767, "grad_norm": 0.33459028601646423, "learning_rate": 0.00014297817140691398, "loss": 11.6498, "step": 51411 }, { "epoch": 1.076195260822239, "grad_norm": 0.2838328778743744, "learning_rate": 0.00014297619168669222, "loss": 11.6682, "step": 51412 }, { "epoch": 1.0762161935862011, "grad_norm": 0.32950741052627563, "learning_rate": 0.0001429742119458109, "loss": 11.6832, "step": 51413 }, { "epoch": 1.0762371263501633, "grad_norm": 0.319108247756958, "learning_rate": 0.00014297223218427105, "loss": 11.6579, "step": 51414 }, { "epoch": 1.0762580591141255, "grad_norm": 0.2915847897529602, "learning_rate": 0.00014297025240207357, "loss": 11.6831, "step": 51415 }, { "epoch": 1.0762789918780875, "grad_norm": 0.26908349990844727, "learning_rate": 0.00014296827259921945, "loss": 11.6702, "step": 51416 }, { "epoch": 1.0762999246420497, "grad_norm": 0.3552773892879486, "learning_rate": 0.00014296629277570961, "loss": 11.6923, "step": 51417 }, { "epoch": 1.076320857406012, "grad_norm": 0.38832464814186096, "learning_rate": 0.000142964312931545, "loss": 11.6745, "step": 51418 }, { "epoch": 1.076341790169974, "grad_norm": 0.32951727509498596, "learning_rate": 0.0001429623330667266, "loss": 11.6656, "step": 51419 }, { "epoch": 1.0763627229339363, "grad_norm": 0.4557802677154541, "learning_rate": 0.00014296035318125535, "loss": 11.6609, "step": 51420 }, { "epoch": 1.0763836556978983, "grad_norm": 0.38703522086143494, "learning_rate": 0.0001429583732751322, "loss": 11.6689, "step": 51421 }, { "epoch": 1.0764045884618605, "grad_norm": 0.3243679404258728, "learning_rate": 0.00014295639334835805, "loss": 11.6604, "step": 51422 }, { "epoch": 1.0764255212258227, "grad_norm": 0.34900400042533875, "learning_rate": 0.00014295441340093394, "loss": 11.6556, "step": 51423 }, { "epoch": 1.076446453989785, "grad_norm": 0.3238537609577179, "learning_rate": 0.00014295243343286077, "loss": 11.6761, "step": 51424 }, { "epoch": 1.0764673867537469, "grad_norm": 0.3445480167865753, "learning_rate": 0.00014295045344413953, "loss": 11.675, "step": 51425 }, { "epoch": 1.076488319517709, "grad_norm": 0.3210914731025696, "learning_rate": 0.00014294847343477112, "loss": 11.6667, "step": 51426 }, { "epoch": 1.0765092522816713, "grad_norm": 0.30751168727874756, "learning_rate": 0.00014294649340475656, "loss": 11.6715, "step": 51427 }, { "epoch": 1.0765301850456335, "grad_norm": 0.34723198413848877, "learning_rate": 0.00014294451335409672, "loss": 11.6749, "step": 51428 }, { "epoch": 1.0765511178095957, "grad_norm": 0.4580143392086029, "learning_rate": 0.0001429425332827926, "loss": 11.6779, "step": 51429 }, { "epoch": 1.0765720505735576, "grad_norm": 0.34598761796951294, "learning_rate": 0.0001429405531908451, "loss": 11.6679, "step": 51430 }, { "epoch": 1.0765929833375198, "grad_norm": 0.27618080377578735, "learning_rate": 0.00014293857307825532, "loss": 11.6651, "step": 51431 }, { "epoch": 1.076613916101482, "grad_norm": 0.2815082371234894, "learning_rate": 0.00014293659294502403, "loss": 11.6698, "step": 51432 }, { "epoch": 1.0766348488654442, "grad_norm": 0.31806525588035583, "learning_rate": 0.0001429346127911523, "loss": 11.6753, "step": 51433 }, { "epoch": 1.0766557816294064, "grad_norm": 0.29346615076065063, "learning_rate": 0.00014293263261664098, "loss": 11.6693, "step": 51434 }, { "epoch": 1.0766767143933684, "grad_norm": 0.3465752899646759, "learning_rate": 0.00014293065242149112, "loss": 11.6653, "step": 51435 }, { "epoch": 1.0766976471573306, "grad_norm": 0.33727777004241943, "learning_rate": 0.00014292867220570365, "loss": 11.6562, "step": 51436 }, { "epoch": 1.0767185799212928, "grad_norm": 0.435449481010437, "learning_rate": 0.0001429266919692795, "loss": 11.6826, "step": 51437 }, { "epoch": 1.076739512685255, "grad_norm": 0.4401415288448334, "learning_rate": 0.00014292471171221962, "loss": 11.6786, "step": 51438 }, { "epoch": 1.0767604454492172, "grad_norm": 0.4171542525291443, "learning_rate": 0.00014292273143452495, "loss": 11.6742, "step": 51439 }, { "epoch": 1.0767813782131792, "grad_norm": 0.30033427476882935, "learning_rate": 0.00014292075113619648, "loss": 11.6789, "step": 51440 }, { "epoch": 1.0768023109771414, "grad_norm": 0.3207502067089081, "learning_rate": 0.00014291877081723517, "loss": 11.6811, "step": 51441 }, { "epoch": 1.0768232437411036, "grad_norm": 0.2945220172405243, "learning_rate": 0.00014291679047764193, "loss": 11.6762, "step": 51442 }, { "epoch": 1.0768441765050658, "grad_norm": 0.2818816304206848, "learning_rate": 0.0001429148101174177, "loss": 11.6563, "step": 51443 }, { "epoch": 1.0768651092690278, "grad_norm": 0.3092506527900696, "learning_rate": 0.0001429128297365635, "loss": 11.6661, "step": 51444 }, { "epoch": 1.07688604203299, "grad_norm": 0.3089633285999298, "learning_rate": 0.00014291084933508024, "loss": 11.6763, "step": 51445 }, { "epoch": 1.0769069747969522, "grad_norm": 0.42738646268844604, "learning_rate": 0.00014290886891296886, "loss": 11.6753, "step": 51446 }, { "epoch": 1.0769279075609144, "grad_norm": 0.31192535161972046, "learning_rate": 0.00014290688847023032, "loss": 11.6644, "step": 51447 }, { "epoch": 1.0769488403248766, "grad_norm": 0.2932461202144623, "learning_rate": 0.0001429049080068656, "loss": 11.6667, "step": 51448 }, { "epoch": 1.0769697730888386, "grad_norm": 0.27000993490219116, "learning_rate": 0.00014290292752287562, "loss": 11.6774, "step": 51449 }, { "epoch": 1.0769907058528008, "grad_norm": 0.34470242261886597, "learning_rate": 0.00014290094701826134, "loss": 11.6593, "step": 51450 }, { "epoch": 1.077011638616763, "grad_norm": 0.3318556547164917, "learning_rate": 0.00014289896649302371, "loss": 11.6704, "step": 51451 }, { "epoch": 1.0770325713807252, "grad_norm": 0.33695080876350403, "learning_rate": 0.0001428969859471637, "loss": 11.6728, "step": 51452 }, { "epoch": 1.0770535041446874, "grad_norm": 0.2719443738460541, "learning_rate": 0.00014289500538068226, "loss": 11.641, "step": 51453 }, { "epoch": 1.0770744369086493, "grad_norm": 0.3120124638080597, "learning_rate": 0.00014289302479358028, "loss": 11.6619, "step": 51454 }, { "epoch": 1.0770953696726115, "grad_norm": 0.32315975427627563, "learning_rate": 0.00014289104418585883, "loss": 11.6982, "step": 51455 }, { "epoch": 1.0771163024365737, "grad_norm": 0.3143322467803955, "learning_rate": 0.00014288906355751875, "loss": 11.6802, "step": 51456 }, { "epoch": 1.077137235200536, "grad_norm": 0.3279217481613159, "learning_rate": 0.00014288708290856105, "loss": 11.682, "step": 51457 }, { "epoch": 1.0771581679644981, "grad_norm": 0.3101317882537842, "learning_rate": 0.00014288510223898668, "loss": 11.6712, "step": 51458 }, { "epoch": 1.0771791007284601, "grad_norm": 0.2818624675273895, "learning_rate": 0.00014288312154879656, "loss": 11.6757, "step": 51459 }, { "epoch": 1.0772000334924223, "grad_norm": 0.3166011869907379, "learning_rate": 0.00014288114083799172, "loss": 11.6599, "step": 51460 }, { "epoch": 1.0772209662563845, "grad_norm": 0.30158987641334534, "learning_rate": 0.000142879160106573, "loss": 11.6902, "step": 51461 }, { "epoch": 1.0772418990203467, "grad_norm": 0.2826579809188843, "learning_rate": 0.0001428771793545414, "loss": 11.6564, "step": 51462 }, { "epoch": 1.0772628317843087, "grad_norm": 0.49978819489479065, "learning_rate": 0.0001428751985818979, "loss": 11.6738, "step": 51463 }, { "epoch": 1.077283764548271, "grad_norm": 0.2967913746833801, "learning_rate": 0.00014287321778864344, "loss": 11.6678, "step": 51464 }, { "epoch": 1.077304697312233, "grad_norm": 0.37043145298957825, "learning_rate": 0.00014287123697477897, "loss": 11.682, "step": 51465 }, { "epoch": 1.0773256300761953, "grad_norm": 0.2770231366157532, "learning_rate": 0.00014286925614030542, "loss": 11.6739, "step": 51466 }, { "epoch": 1.0773465628401575, "grad_norm": 0.4019743502140045, "learning_rate": 0.00014286727528522376, "loss": 11.6708, "step": 51467 }, { "epoch": 1.0773674956041195, "grad_norm": 0.3297286927700043, "learning_rate": 0.00014286529440953497, "loss": 11.6533, "step": 51468 }, { "epoch": 1.0773884283680817, "grad_norm": 0.26644593477249146, "learning_rate": 0.00014286331351323994, "loss": 11.669, "step": 51469 }, { "epoch": 1.0774093611320439, "grad_norm": 0.32715001702308655, "learning_rate": 0.0001428613325963397, "loss": 11.6834, "step": 51470 }, { "epoch": 1.077430293896006, "grad_norm": 0.2796514928340912, "learning_rate": 0.00014285935165883512, "loss": 11.6562, "step": 51471 }, { "epoch": 1.077451226659968, "grad_norm": 0.26683562994003296, "learning_rate": 0.00014285737070072717, "loss": 11.6629, "step": 51472 }, { "epoch": 1.0774721594239303, "grad_norm": 0.34273773431777954, "learning_rate": 0.0001428553897220169, "loss": 11.671, "step": 51473 }, { "epoch": 1.0774930921878925, "grad_norm": 0.309639036655426, "learning_rate": 0.0001428534087227051, "loss": 11.6872, "step": 51474 }, { "epoch": 1.0775140249518547, "grad_norm": 0.3267482817173004, "learning_rate": 0.00014285142770279285, "loss": 11.6664, "step": 51475 }, { "epoch": 1.0775349577158169, "grad_norm": 0.28335192799568176, "learning_rate": 0.00014284944666228107, "loss": 11.6759, "step": 51476 }, { "epoch": 1.077555890479779, "grad_norm": 0.4186066687107086, "learning_rate": 0.0001428474656011707, "loss": 11.6614, "step": 51477 }, { "epoch": 1.077576823243741, "grad_norm": 0.2860829532146454, "learning_rate": 0.00014284548451946267, "loss": 11.6674, "step": 51478 }, { "epoch": 1.0775977560077032, "grad_norm": 0.2738339304924011, "learning_rate": 0.00014284350341715795, "loss": 11.6722, "step": 51479 }, { "epoch": 1.0776186887716654, "grad_norm": 0.40042930841445923, "learning_rate": 0.00014284152229425754, "loss": 11.6672, "step": 51480 }, { "epoch": 1.0776396215356276, "grad_norm": 0.33616718649864197, "learning_rate": 0.00014283954115076232, "loss": 11.6559, "step": 51481 }, { "epoch": 1.0776605542995896, "grad_norm": 0.28231972455978394, "learning_rate": 0.00014283755998667328, "loss": 11.6667, "step": 51482 }, { "epoch": 1.0776814870635518, "grad_norm": 0.2823246419429779, "learning_rate": 0.00014283557880199137, "loss": 11.6786, "step": 51483 }, { "epoch": 1.077702419827514, "grad_norm": 0.35908931493759155, "learning_rate": 0.00014283359759671753, "loss": 11.6648, "step": 51484 }, { "epoch": 1.0777233525914762, "grad_norm": 0.2782886326313019, "learning_rate": 0.00014283161637085274, "loss": 11.6626, "step": 51485 }, { "epoch": 1.0777442853554384, "grad_norm": 0.4183913469314575, "learning_rate": 0.00014282963512439792, "loss": 11.6688, "step": 51486 }, { "epoch": 1.0777652181194004, "grad_norm": 0.40023213624954224, "learning_rate": 0.00014282765385735405, "loss": 11.6557, "step": 51487 }, { "epoch": 1.0777861508833626, "grad_norm": 0.3220685124397278, "learning_rate": 0.00014282567256972205, "loss": 11.6731, "step": 51488 }, { "epoch": 1.0778070836473248, "grad_norm": 0.2096347063779831, "learning_rate": 0.0001428236912615029, "loss": 11.6592, "step": 51489 }, { "epoch": 1.077828016411287, "grad_norm": 0.3061547875404358, "learning_rate": 0.00014282170993269754, "loss": 11.6596, "step": 51490 }, { "epoch": 1.077848949175249, "grad_norm": 0.2526840269565582, "learning_rate": 0.00014281972858330692, "loss": 11.6663, "step": 51491 }, { "epoch": 1.0778698819392112, "grad_norm": 0.2992420196533203, "learning_rate": 0.00014281774721333203, "loss": 11.6842, "step": 51492 }, { "epoch": 1.0778908147031734, "grad_norm": 0.2820541262626648, "learning_rate": 0.00014281576582277376, "loss": 11.6647, "step": 51493 }, { "epoch": 1.0779117474671356, "grad_norm": 0.30983468890190125, "learning_rate": 0.00014281378441163308, "loss": 11.6467, "step": 51494 }, { "epoch": 1.0779326802310978, "grad_norm": 0.48084163665771484, "learning_rate": 0.00014281180297991098, "loss": 11.6688, "step": 51495 }, { "epoch": 1.07795361299506, "grad_norm": 0.2808259129524231, "learning_rate": 0.0001428098215276084, "loss": 11.6703, "step": 51496 }, { "epoch": 1.077974545759022, "grad_norm": 0.29812026023864746, "learning_rate": 0.00014280784005472628, "loss": 11.6561, "step": 51497 }, { "epoch": 1.0779954785229842, "grad_norm": 0.2865215837955475, "learning_rate": 0.00014280585856126554, "loss": 11.6416, "step": 51498 }, { "epoch": 1.0780164112869464, "grad_norm": 0.26361340284347534, "learning_rate": 0.0001428038770472272, "loss": 11.6721, "step": 51499 }, { "epoch": 1.0780373440509086, "grad_norm": 0.2995276153087616, "learning_rate": 0.00014280189551261216, "loss": 11.663, "step": 51500 }, { "epoch": 1.0780582768148705, "grad_norm": 0.35640108585357666, "learning_rate": 0.0001427999139574214, "loss": 11.6766, "step": 51501 }, { "epoch": 1.0780792095788327, "grad_norm": 0.2945871651172638, "learning_rate": 0.00014279793238165586, "loss": 11.6583, "step": 51502 }, { "epoch": 1.078100142342795, "grad_norm": 0.35558944940567017, "learning_rate": 0.0001427959507853165, "loss": 11.6549, "step": 51503 }, { "epoch": 1.0781210751067571, "grad_norm": 0.40177837014198303, "learning_rate": 0.00014279396916840428, "loss": 11.6704, "step": 51504 }, { "epoch": 1.0781420078707193, "grad_norm": 0.37364718317985535, "learning_rate": 0.0001427919875309201, "loss": 11.6623, "step": 51505 }, { "epoch": 1.0781629406346813, "grad_norm": 0.35848483443260193, "learning_rate": 0.00014279000587286497, "loss": 11.6843, "step": 51506 }, { "epoch": 1.0781838733986435, "grad_norm": 0.3689289093017578, "learning_rate": 0.00014278802419423985, "loss": 11.6778, "step": 51507 }, { "epoch": 1.0782048061626057, "grad_norm": 0.3527343273162842, "learning_rate": 0.00014278604249504565, "loss": 11.667, "step": 51508 }, { "epoch": 1.078225738926568, "grad_norm": 0.28237566351890564, "learning_rate": 0.00014278406077528337, "loss": 11.6542, "step": 51509 }, { "epoch": 1.07824667169053, "grad_norm": 0.39586734771728516, "learning_rate": 0.0001427820790349539, "loss": 11.665, "step": 51510 }, { "epoch": 1.078267604454492, "grad_norm": 0.3163038492202759, "learning_rate": 0.0001427800972740582, "loss": 11.6804, "step": 51511 }, { "epoch": 1.0782885372184543, "grad_norm": 0.27830588817596436, "learning_rate": 0.00014277811549259732, "loss": 11.6741, "step": 51512 }, { "epoch": 1.0783094699824165, "grad_norm": 0.30986353754997253, "learning_rate": 0.0001427761336905721, "loss": 11.6684, "step": 51513 }, { "epoch": 1.0783304027463787, "grad_norm": 0.2818240225315094, "learning_rate": 0.00014277415186798357, "loss": 11.653, "step": 51514 }, { "epoch": 1.078351335510341, "grad_norm": 0.27208226919174194, "learning_rate": 0.0001427721700248326, "loss": 11.6644, "step": 51515 }, { "epoch": 1.0783722682743029, "grad_norm": 0.2974545657634735, "learning_rate": 0.00014277018816112022, "loss": 11.664, "step": 51516 }, { "epoch": 1.078393201038265, "grad_norm": 0.3105555474758148, "learning_rate": 0.00014276820627684734, "loss": 11.6713, "step": 51517 }, { "epoch": 1.0784141338022273, "grad_norm": 0.31418606638908386, "learning_rate": 0.00014276622437201495, "loss": 11.6697, "step": 51518 }, { "epoch": 1.0784350665661895, "grad_norm": 0.32780298590660095, "learning_rate": 0.00014276424244662395, "loss": 11.6458, "step": 51519 }, { "epoch": 1.0784559993301515, "grad_norm": 1.6315585374832153, "learning_rate": 0.00014276226050067537, "loss": 11.6538, "step": 51520 }, { "epoch": 1.0784769320941137, "grad_norm": 0.345474511384964, "learning_rate": 0.00014276027853417006, "loss": 11.6586, "step": 51521 }, { "epoch": 1.0784978648580759, "grad_norm": 0.3616492748260498, "learning_rate": 0.00014275829654710904, "loss": 11.6889, "step": 51522 }, { "epoch": 1.078518797622038, "grad_norm": 0.25559285283088684, "learning_rate": 0.00014275631453949326, "loss": 11.667, "step": 51523 }, { "epoch": 1.0785397303860003, "grad_norm": 0.31343600153923035, "learning_rate": 0.0001427543325113237, "loss": 11.6721, "step": 51524 }, { "epoch": 1.0785606631499622, "grad_norm": 0.33253347873687744, "learning_rate": 0.0001427523504626012, "loss": 11.675, "step": 51525 }, { "epoch": 1.0785815959139244, "grad_norm": 0.3121798038482666, "learning_rate": 0.00014275036839332683, "loss": 11.6558, "step": 51526 }, { "epoch": 1.0786025286778866, "grad_norm": 0.2976279854774475, "learning_rate": 0.0001427483863035015, "loss": 11.6721, "step": 51527 }, { "epoch": 1.0786234614418488, "grad_norm": 0.4988545775413513, "learning_rate": 0.00014274640419312614, "loss": 11.649, "step": 51528 }, { "epoch": 1.0786443942058108, "grad_norm": 0.2879565954208374, "learning_rate": 0.00014274442206220174, "loss": 11.6576, "step": 51529 }, { "epoch": 1.078665326969773, "grad_norm": 0.2502431273460388, "learning_rate": 0.00014274243991072927, "loss": 11.657, "step": 51530 }, { "epoch": 1.0786862597337352, "grad_norm": 0.32662877440452576, "learning_rate": 0.00014274045773870963, "loss": 11.6709, "step": 51531 }, { "epoch": 1.0787071924976974, "grad_norm": 0.40717023611068726, "learning_rate": 0.00014273847554614378, "loss": 11.6804, "step": 51532 }, { "epoch": 1.0787281252616596, "grad_norm": 0.4029940068721771, "learning_rate": 0.0001427364933330327, "loss": 11.6747, "step": 51533 }, { "epoch": 1.0787490580256218, "grad_norm": 0.31840914487838745, "learning_rate": 0.00014273451109937733, "loss": 11.6677, "step": 51534 }, { "epoch": 1.0787699907895838, "grad_norm": 0.3355352580547333, "learning_rate": 0.00014273252884517863, "loss": 11.6739, "step": 51535 }, { "epoch": 1.078790923553546, "grad_norm": 0.31875261664390564, "learning_rate": 0.00014273054657043757, "loss": 11.6636, "step": 51536 }, { "epoch": 1.0788118563175082, "grad_norm": 0.2996440827846527, "learning_rate": 0.00014272856427515504, "loss": 11.6801, "step": 51537 }, { "epoch": 1.0788327890814704, "grad_norm": 0.32235169410705566, "learning_rate": 0.00014272658195933204, "loss": 11.6578, "step": 51538 }, { "epoch": 1.0788537218454324, "grad_norm": 0.45092660188674927, "learning_rate": 0.00014272459962296955, "loss": 11.684, "step": 51539 }, { "epoch": 1.0788746546093946, "grad_norm": 0.3423188030719757, "learning_rate": 0.00014272261726606844, "loss": 11.6418, "step": 51540 }, { "epoch": 1.0788955873733568, "grad_norm": 0.3379227817058563, "learning_rate": 0.00014272063488862975, "loss": 11.6569, "step": 51541 }, { "epoch": 1.078916520137319, "grad_norm": 0.4043850004673004, "learning_rate": 0.00014271865249065435, "loss": 11.6699, "step": 51542 }, { "epoch": 1.0789374529012812, "grad_norm": 0.321377694606781, "learning_rate": 0.0001427166700721433, "loss": 11.6629, "step": 51543 }, { "epoch": 1.0789583856652432, "grad_norm": 0.3015257716178894, "learning_rate": 0.00014271468763309744, "loss": 11.668, "step": 51544 }, { "epoch": 1.0789793184292054, "grad_norm": 0.3436912000179291, "learning_rate": 0.00014271270517351778, "loss": 11.6692, "step": 51545 }, { "epoch": 1.0790002511931676, "grad_norm": 0.26569679379463196, "learning_rate": 0.00014271072269340528, "loss": 11.6673, "step": 51546 }, { "epoch": 1.0790211839571298, "grad_norm": 0.3237282931804657, "learning_rate": 0.0001427087401927609, "loss": 11.6679, "step": 51547 }, { "epoch": 1.0790421167210917, "grad_norm": 0.270356148481369, "learning_rate": 0.00014270675767158553, "loss": 11.6636, "step": 51548 }, { "epoch": 1.079063049485054, "grad_norm": 0.2710011899471283, "learning_rate": 0.00014270477512988018, "loss": 11.6574, "step": 51549 }, { "epoch": 1.0790839822490161, "grad_norm": 0.3248177468776703, "learning_rate": 0.0001427027925676458, "loss": 11.6663, "step": 51550 }, { "epoch": 1.0791049150129783, "grad_norm": 0.33089956641197205, "learning_rate": 0.00014270080998488334, "loss": 11.6627, "step": 51551 }, { "epoch": 1.0791258477769405, "grad_norm": 0.34236660599708557, "learning_rate": 0.0001426988273815937, "loss": 11.6648, "step": 51552 }, { "epoch": 1.0791467805409025, "grad_norm": 0.35601919889450073, "learning_rate": 0.00014269684475777793, "loss": 11.6643, "step": 51553 }, { "epoch": 1.0791677133048647, "grad_norm": 0.4659527540206909, "learning_rate": 0.0001426948621134369, "loss": 11.6793, "step": 51554 }, { "epoch": 1.079188646068827, "grad_norm": 0.2714771628379822, "learning_rate": 0.0001426928794485716, "loss": 11.6707, "step": 51555 }, { "epoch": 1.0792095788327891, "grad_norm": 0.3153645694255829, "learning_rate": 0.000142690896763183, "loss": 11.6662, "step": 51556 }, { "epoch": 1.0792305115967513, "grad_norm": 0.3010771572589874, "learning_rate": 0.00014268891405727198, "loss": 11.6583, "step": 51557 }, { "epoch": 1.0792514443607133, "grad_norm": 0.3386945128440857, "learning_rate": 0.0001426869313308396, "loss": 11.68, "step": 51558 }, { "epoch": 1.0792723771246755, "grad_norm": 0.3654249310493469, "learning_rate": 0.00014268494858388672, "loss": 11.6636, "step": 51559 }, { "epoch": 1.0792933098886377, "grad_norm": 0.4157240688800812, "learning_rate": 0.00014268296581641435, "loss": 11.682, "step": 51560 }, { "epoch": 1.0793142426526, "grad_norm": 0.2666463255882263, "learning_rate": 0.0001426809830284234, "loss": 11.6679, "step": 51561 }, { "epoch": 1.079335175416562, "grad_norm": 0.34438997507095337, "learning_rate": 0.00014267900021991485, "loss": 11.6688, "step": 51562 }, { "epoch": 1.079356108180524, "grad_norm": 0.2971176505088806, "learning_rate": 0.0001426770173908897, "loss": 11.6714, "step": 51563 }, { "epoch": 1.0793770409444863, "grad_norm": 0.3348013460636139, "learning_rate": 0.0001426750345413488, "loss": 11.6731, "step": 51564 }, { "epoch": 1.0793979737084485, "grad_norm": 0.2652672231197357, "learning_rate": 0.00014267305167129317, "loss": 11.665, "step": 51565 }, { "epoch": 1.0794189064724107, "grad_norm": 0.2847992777824402, "learning_rate": 0.00014267106878072374, "loss": 11.66, "step": 51566 }, { "epoch": 1.0794398392363727, "grad_norm": 0.28261932730674744, "learning_rate": 0.00014266908586964147, "loss": 11.676, "step": 51567 }, { "epoch": 1.0794607720003349, "grad_norm": 0.3601878583431244, "learning_rate": 0.00014266710293804732, "loss": 11.6798, "step": 51568 }, { "epoch": 1.079481704764297, "grad_norm": 0.33547547459602356, "learning_rate": 0.00014266511998594226, "loss": 11.666, "step": 51569 }, { "epoch": 1.0795026375282593, "grad_norm": 0.32784056663513184, "learning_rate": 0.0001426631370133272, "loss": 11.6772, "step": 51570 }, { "epoch": 1.0795235702922215, "grad_norm": 0.29508471488952637, "learning_rate": 0.0001426611540202031, "loss": 11.6635, "step": 51571 }, { "epoch": 1.0795445030561834, "grad_norm": 0.3131202757358551, "learning_rate": 0.00014265917100657097, "loss": 11.6595, "step": 51572 }, { "epoch": 1.0795654358201456, "grad_norm": 0.3505532443523407, "learning_rate": 0.0001426571879724317, "loss": 11.6757, "step": 51573 }, { "epoch": 1.0795863685841078, "grad_norm": 0.3920213580131531, "learning_rate": 0.00014265520491778627, "loss": 11.6643, "step": 51574 }, { "epoch": 1.07960730134807, "grad_norm": 0.2943076491355896, "learning_rate": 0.00014265322184263562, "loss": 11.6575, "step": 51575 }, { "epoch": 1.0796282341120322, "grad_norm": 0.287835031747818, "learning_rate": 0.0001426512387469807, "loss": 11.6615, "step": 51576 }, { "epoch": 1.0796491668759942, "grad_norm": 0.2458874136209488, "learning_rate": 0.0001426492556308225, "loss": 11.6631, "step": 51577 }, { "epoch": 1.0796700996399564, "grad_norm": 0.31037741899490356, "learning_rate": 0.00014264727249416192, "loss": 11.6692, "step": 51578 }, { "epoch": 1.0796910324039186, "grad_norm": 0.2851144075393677, "learning_rate": 0.000142645289337, "loss": 11.6738, "step": 51579 }, { "epoch": 1.0797119651678808, "grad_norm": 0.301613986492157, "learning_rate": 0.00014264330615933757, "loss": 11.6458, "step": 51580 }, { "epoch": 1.079732897931843, "grad_norm": 0.3078761696815491, "learning_rate": 0.00014264132296117567, "loss": 11.642, "step": 51581 }, { "epoch": 1.079753830695805, "grad_norm": 0.33420515060424805, "learning_rate": 0.00014263933974251525, "loss": 11.6521, "step": 51582 }, { "epoch": 1.0797747634597672, "grad_norm": 0.35051026940345764, "learning_rate": 0.0001426373565033572, "loss": 11.6781, "step": 51583 }, { "epoch": 1.0797956962237294, "grad_norm": 0.35537850856781006, "learning_rate": 0.00014263537324370255, "loss": 11.6646, "step": 51584 }, { "epoch": 1.0798166289876916, "grad_norm": 0.3382551372051239, "learning_rate": 0.00014263338996355223, "loss": 11.672, "step": 51585 }, { "epoch": 1.0798375617516536, "grad_norm": 0.2980833351612091, "learning_rate": 0.00014263140666290718, "loss": 11.6621, "step": 51586 }, { "epoch": 1.0798584945156158, "grad_norm": 0.21843577921390533, "learning_rate": 0.00014262942334176834, "loss": 11.6695, "step": 51587 }, { "epoch": 1.079879427279578, "grad_norm": 0.28095486760139465, "learning_rate": 0.0001426274400001367, "loss": 11.653, "step": 51588 }, { "epoch": 1.0799003600435402, "grad_norm": 0.28061726689338684, "learning_rate": 0.0001426254566380132, "loss": 11.6703, "step": 51589 }, { "epoch": 1.0799212928075024, "grad_norm": 0.34342873096466064, "learning_rate": 0.00014262347325539874, "loss": 11.6767, "step": 51590 }, { "epoch": 1.0799422255714644, "grad_norm": 0.3242277503013611, "learning_rate": 0.00014262148985229436, "loss": 11.6543, "step": 51591 }, { "epoch": 1.0799631583354266, "grad_norm": 0.3598879873752594, "learning_rate": 0.000142619506428701, "loss": 11.6733, "step": 51592 }, { "epoch": 1.0799840910993888, "grad_norm": 0.313293993473053, "learning_rate": 0.00014261752298461958, "loss": 11.6627, "step": 51593 }, { "epoch": 1.080005023863351, "grad_norm": 0.34707051515579224, "learning_rate": 0.00014261553952005102, "loss": 11.6845, "step": 51594 }, { "epoch": 1.0800259566273132, "grad_norm": 0.3548019230365753, "learning_rate": 0.00014261355603499635, "loss": 11.6459, "step": 51595 }, { "epoch": 1.0800468893912751, "grad_norm": 0.32048505544662476, "learning_rate": 0.00014261157252945647, "loss": 11.6779, "step": 51596 }, { "epoch": 1.0800678221552373, "grad_norm": 0.30842724442481995, "learning_rate": 0.00014260958900343237, "loss": 11.6662, "step": 51597 }, { "epoch": 1.0800887549191995, "grad_norm": 0.39553871750831604, "learning_rate": 0.000142607605456925, "loss": 11.6722, "step": 51598 }, { "epoch": 1.0801096876831617, "grad_norm": 0.28892385959625244, "learning_rate": 0.00014260562188993527, "loss": 11.6677, "step": 51599 }, { "epoch": 1.080130620447124, "grad_norm": 0.4160832464694977, "learning_rate": 0.0001426036383024642, "loss": 11.6797, "step": 51600 }, { "epoch": 1.080151553211086, "grad_norm": 0.31095603108406067, "learning_rate": 0.00014260165469451265, "loss": 11.6709, "step": 51601 }, { "epoch": 1.0801724859750481, "grad_norm": 0.3400299847126007, "learning_rate": 0.0001425996710660817, "loss": 11.6729, "step": 51602 }, { "epoch": 1.0801934187390103, "grad_norm": 0.3075723350048065, "learning_rate": 0.00014259768741717215, "loss": 11.6614, "step": 51603 }, { "epoch": 1.0802143515029725, "grad_norm": 0.265752911567688, "learning_rate": 0.00014259570374778512, "loss": 11.6625, "step": 51604 }, { "epoch": 1.0802352842669345, "grad_norm": 0.35946130752563477, "learning_rate": 0.00014259372005792143, "loss": 11.6795, "step": 51605 }, { "epoch": 1.0802562170308967, "grad_norm": 0.30945611000061035, "learning_rate": 0.00014259173634758213, "loss": 11.67, "step": 51606 }, { "epoch": 1.080277149794859, "grad_norm": 0.3043968677520752, "learning_rate": 0.0001425897526167681, "loss": 11.658, "step": 51607 }, { "epoch": 1.080298082558821, "grad_norm": 0.31951847672462463, "learning_rate": 0.0001425877688654803, "loss": 11.6822, "step": 51608 }, { "epoch": 1.0803190153227833, "grad_norm": 0.2689003348350525, "learning_rate": 0.00014258578509371975, "loss": 11.6715, "step": 51609 }, { "epoch": 1.0803399480867453, "grad_norm": 0.31342101097106934, "learning_rate": 0.00014258380130148735, "loss": 11.6493, "step": 51610 }, { "epoch": 1.0803608808507075, "grad_norm": 0.3351760804653168, "learning_rate": 0.00014258181748878402, "loss": 11.6584, "step": 51611 }, { "epoch": 1.0803818136146697, "grad_norm": 0.3502930700778961, "learning_rate": 0.00014257983365561082, "loss": 11.6767, "step": 51612 }, { "epoch": 1.0804027463786319, "grad_norm": 0.26329851150512695, "learning_rate": 0.0001425778498019686, "loss": 11.6542, "step": 51613 }, { "epoch": 1.080423679142594, "grad_norm": 0.3783467411994934, "learning_rate": 0.0001425758659278584, "loss": 11.6588, "step": 51614 }, { "epoch": 1.080444611906556, "grad_norm": 0.40295881032943726, "learning_rate": 0.00014257388203328105, "loss": 11.6696, "step": 51615 }, { "epoch": 1.0804655446705183, "grad_norm": 0.2649019956588745, "learning_rate": 0.00014257189811823763, "loss": 11.6768, "step": 51616 }, { "epoch": 1.0804864774344805, "grad_norm": 0.29318901896476746, "learning_rate": 0.00014256991418272905, "loss": 11.6605, "step": 51617 }, { "epoch": 1.0805074101984427, "grad_norm": 0.41681209206581116, "learning_rate": 0.00014256793022675624, "loss": 11.6793, "step": 51618 }, { "epoch": 1.0805283429624049, "grad_norm": 0.31671974062919617, "learning_rate": 0.0001425659462503202, "loss": 11.651, "step": 51619 }, { "epoch": 1.0805492757263668, "grad_norm": 0.34777382016181946, "learning_rate": 0.00014256396225342184, "loss": 11.6765, "step": 51620 }, { "epoch": 1.080570208490329, "grad_norm": 0.36931902170181274, "learning_rate": 0.0001425619782360621, "loss": 11.6856, "step": 51621 }, { "epoch": 1.0805911412542912, "grad_norm": 0.305965393781662, "learning_rate": 0.00014255999419824202, "loss": 11.6538, "step": 51622 }, { "epoch": 1.0806120740182534, "grad_norm": 0.28919512033462524, "learning_rate": 0.00014255801013996245, "loss": 11.6545, "step": 51623 }, { "epoch": 1.0806330067822154, "grad_norm": 0.3302317261695862, "learning_rate": 0.00014255602606122442, "loss": 11.6746, "step": 51624 }, { "epoch": 1.0806539395461776, "grad_norm": 0.3926917612552643, "learning_rate": 0.00014255404196202885, "loss": 11.6734, "step": 51625 }, { "epoch": 1.0806748723101398, "grad_norm": 0.2916625738143921, "learning_rate": 0.0001425520578423767, "loss": 11.6728, "step": 51626 }, { "epoch": 1.080695805074102, "grad_norm": 0.30056247115135193, "learning_rate": 0.00014255007370226895, "loss": 11.662, "step": 51627 }, { "epoch": 1.0807167378380642, "grad_norm": 0.27130070328712463, "learning_rate": 0.00014254808954170645, "loss": 11.6647, "step": 51628 }, { "epoch": 1.0807376706020262, "grad_norm": 0.29706287384033203, "learning_rate": 0.00014254610536069032, "loss": 11.662, "step": 51629 }, { "epoch": 1.0807586033659884, "grad_norm": 0.25864115357398987, "learning_rate": 0.00014254412115922136, "loss": 11.6596, "step": 51630 }, { "epoch": 1.0807795361299506, "grad_norm": 0.3119766414165497, "learning_rate": 0.00014254213693730062, "loss": 11.6726, "step": 51631 }, { "epoch": 1.0808004688939128, "grad_norm": 0.29445958137512207, "learning_rate": 0.00014254015269492902, "loss": 11.6598, "step": 51632 }, { "epoch": 1.080821401657875, "grad_norm": 0.2603219449520111, "learning_rate": 0.0001425381684321075, "loss": 11.6653, "step": 51633 }, { "epoch": 1.080842334421837, "grad_norm": 0.2945795953273773, "learning_rate": 0.00014253618414883704, "loss": 11.6741, "step": 51634 }, { "epoch": 1.0808632671857992, "grad_norm": 0.3001725375652313, "learning_rate": 0.00014253419984511858, "loss": 11.6769, "step": 51635 }, { "epoch": 1.0808841999497614, "grad_norm": 0.2686239778995514, "learning_rate": 0.00014253221552095312, "loss": 11.6906, "step": 51636 }, { "epoch": 1.0809051327137236, "grad_norm": 0.26738879084587097, "learning_rate": 0.0001425302311763415, "loss": 11.6686, "step": 51637 }, { "epoch": 1.0809260654776858, "grad_norm": 0.2640385925769806, "learning_rate": 0.0001425282468112848, "loss": 11.6614, "step": 51638 }, { "epoch": 1.0809469982416477, "grad_norm": 0.2647286355495453, "learning_rate": 0.00014252626242578387, "loss": 11.6631, "step": 51639 }, { "epoch": 1.08096793100561, "grad_norm": 0.30682000517845154, "learning_rate": 0.00014252427801983973, "loss": 11.6736, "step": 51640 }, { "epoch": 1.0809888637695722, "grad_norm": 0.3167745769023895, "learning_rate": 0.00014252229359345338, "loss": 11.6728, "step": 51641 }, { "epoch": 1.0810097965335344, "grad_norm": 0.6597554683685303, "learning_rate": 0.00014252030914662563, "loss": 11.6724, "step": 51642 }, { "epoch": 1.0810307292974963, "grad_norm": 0.31808507442474365, "learning_rate": 0.00014251832467935752, "loss": 11.6751, "step": 51643 }, { "epoch": 1.0810516620614585, "grad_norm": 0.3722224831581116, "learning_rate": 0.00014251634019165005, "loss": 11.657, "step": 51644 }, { "epoch": 1.0810725948254207, "grad_norm": 0.3205603361129761, "learning_rate": 0.00014251435568350408, "loss": 11.6698, "step": 51645 }, { "epoch": 1.081093527589383, "grad_norm": 0.3113740384578705, "learning_rate": 0.00014251237115492065, "loss": 11.6679, "step": 51646 }, { "epoch": 1.0811144603533451, "grad_norm": 0.26797324419021606, "learning_rate": 0.00014251038660590063, "loss": 11.6746, "step": 51647 }, { "epoch": 1.081135393117307, "grad_norm": 0.32019978761672974, "learning_rate": 0.00014250840203644503, "loss": 11.6682, "step": 51648 }, { "epoch": 1.0811563258812693, "grad_norm": 0.35451146960258484, "learning_rate": 0.0001425064174465548, "loss": 11.6686, "step": 51649 }, { "epoch": 1.0811772586452315, "grad_norm": 0.3276662826538086, "learning_rate": 0.00014250443283623086, "loss": 11.6503, "step": 51650 }, { "epoch": 1.0811981914091937, "grad_norm": 0.3550017178058624, "learning_rate": 0.00014250244820547423, "loss": 11.642, "step": 51651 }, { "epoch": 1.081219124173156, "grad_norm": 0.3427547216415405, "learning_rate": 0.0001425004635542858, "loss": 11.6633, "step": 51652 }, { "epoch": 1.0812400569371179, "grad_norm": 0.29240795969963074, "learning_rate": 0.00014249847888266653, "loss": 11.67, "step": 51653 }, { "epoch": 1.08126098970108, "grad_norm": 0.3412639796733856, "learning_rate": 0.0001424964941906174, "loss": 11.6618, "step": 51654 }, { "epoch": 1.0812819224650423, "grad_norm": 0.2743845582008362, "learning_rate": 0.00014249450947813935, "loss": 11.6703, "step": 51655 }, { "epoch": 1.0813028552290045, "grad_norm": 0.29730719327926636, "learning_rate": 0.00014249252474523333, "loss": 11.6825, "step": 51656 }, { "epoch": 1.0813237879929667, "grad_norm": 0.33295148611068726, "learning_rate": 0.00014249053999190032, "loss": 11.6752, "step": 51657 }, { "epoch": 1.0813447207569287, "grad_norm": 0.30518004298210144, "learning_rate": 0.00014248855521814123, "loss": 11.6628, "step": 51658 }, { "epoch": 1.0813656535208909, "grad_norm": 0.35124024748802185, "learning_rate": 0.00014248657042395706, "loss": 11.6709, "step": 51659 }, { "epoch": 1.081386586284853, "grad_norm": 0.28307127952575684, "learning_rate": 0.00014248458560934873, "loss": 11.6602, "step": 51660 }, { "epoch": 1.0814075190488153, "grad_norm": 0.2799661159515381, "learning_rate": 0.00014248260077431724, "loss": 11.6735, "step": 51661 }, { "epoch": 1.0814284518127772, "grad_norm": 0.33950406312942505, "learning_rate": 0.00014248061591886347, "loss": 11.6683, "step": 51662 }, { "epoch": 1.0814493845767394, "grad_norm": 0.3068712651729584, "learning_rate": 0.00014247863104298845, "loss": 11.674, "step": 51663 }, { "epoch": 1.0814703173407016, "grad_norm": 0.2810554504394531, "learning_rate": 0.00014247664614669309, "loss": 11.6688, "step": 51664 }, { "epoch": 1.0814912501046638, "grad_norm": 0.34356772899627686, "learning_rate": 0.00014247466122997834, "loss": 11.6789, "step": 51665 }, { "epoch": 1.081512182868626, "grad_norm": 0.3188877999782562, "learning_rate": 0.0001424726762928452, "loss": 11.6759, "step": 51666 }, { "epoch": 1.081533115632588, "grad_norm": 0.4110625684261322, "learning_rate": 0.00014247069133529458, "loss": 11.6904, "step": 51667 }, { "epoch": 1.0815540483965502, "grad_norm": 0.2630334496498108, "learning_rate": 0.00014246870635732745, "loss": 11.6603, "step": 51668 }, { "epoch": 1.0815749811605124, "grad_norm": 0.2269870936870575, "learning_rate": 0.00014246672135894475, "loss": 11.6707, "step": 51669 }, { "epoch": 1.0815959139244746, "grad_norm": 0.4821716845035553, "learning_rate": 0.00014246473634014746, "loss": 11.6525, "step": 51670 }, { "epoch": 1.0816168466884368, "grad_norm": 0.3071482181549072, "learning_rate": 0.0001424627513009365, "loss": 11.6632, "step": 51671 }, { "epoch": 1.0816377794523988, "grad_norm": 0.48073720932006836, "learning_rate": 0.00014246076624131285, "loss": 11.6772, "step": 51672 }, { "epoch": 1.081658712216361, "grad_norm": 0.3677091598510742, "learning_rate": 0.00014245878116127747, "loss": 11.6646, "step": 51673 }, { "epoch": 1.0816796449803232, "grad_norm": 0.26982569694519043, "learning_rate": 0.0001424567960608313, "loss": 11.6806, "step": 51674 }, { "epoch": 1.0817005777442854, "grad_norm": 0.3989124000072479, "learning_rate": 0.00014245481093997527, "loss": 11.6799, "step": 51675 }, { "epoch": 1.0817215105082476, "grad_norm": 0.34603434801101685, "learning_rate": 0.00014245282579871042, "loss": 11.6586, "step": 51676 }, { "epoch": 1.0817424432722096, "grad_norm": 0.37935253977775574, "learning_rate": 0.0001424508406370376, "loss": 11.6671, "step": 51677 }, { "epoch": 1.0817633760361718, "grad_norm": 0.3874841034412384, "learning_rate": 0.00014244885545495784, "loss": 11.6538, "step": 51678 }, { "epoch": 1.081784308800134, "grad_norm": 0.30696263909339905, "learning_rate": 0.00014244687025247204, "loss": 11.6661, "step": 51679 }, { "epoch": 1.0818052415640962, "grad_norm": 0.2928315997123718, "learning_rate": 0.0001424448850295812, "loss": 11.6715, "step": 51680 }, { "epoch": 1.0818261743280582, "grad_norm": 0.34591829776763916, "learning_rate": 0.00014244289978628623, "loss": 11.6597, "step": 51681 }, { "epoch": 1.0818471070920204, "grad_norm": 0.33002495765686035, "learning_rate": 0.0001424409145225881, "loss": 11.6606, "step": 51682 }, { "epoch": 1.0818680398559826, "grad_norm": 0.30176153779029846, "learning_rate": 0.0001424389292384878, "loss": 11.6584, "step": 51683 }, { "epoch": 1.0818889726199448, "grad_norm": 0.46285489201545715, "learning_rate": 0.00014243694393398622, "loss": 11.664, "step": 51684 }, { "epoch": 1.081909905383907, "grad_norm": 0.3023926317691803, "learning_rate": 0.00014243495860908443, "loss": 11.6456, "step": 51685 }, { "epoch": 1.081930838147869, "grad_norm": 0.37499040365219116, "learning_rate": 0.0001424329732637832, "loss": 11.6729, "step": 51686 }, { "epoch": 1.0819517709118311, "grad_norm": 0.2952832281589508, "learning_rate": 0.00014243098789808368, "loss": 11.6598, "step": 51687 }, { "epoch": 1.0819727036757933, "grad_norm": 0.27601414918899536, "learning_rate": 0.00014242900251198667, "loss": 11.6738, "step": 51688 }, { "epoch": 1.0819936364397555, "grad_norm": 0.43468788266181946, "learning_rate": 0.00014242701710549322, "loss": 11.6666, "step": 51689 }, { "epoch": 1.0820145692037177, "grad_norm": 0.26988351345062256, "learning_rate": 0.00014242503167860427, "loss": 11.6744, "step": 51690 }, { "epoch": 1.0820355019676797, "grad_norm": 0.3809848129749298, "learning_rate": 0.0001424230462313207, "loss": 11.6615, "step": 51691 }, { "epoch": 1.082056434731642, "grad_norm": 0.40449127554893494, "learning_rate": 0.00014242106076364354, "loss": 11.6684, "step": 51692 }, { "epoch": 1.0820773674956041, "grad_norm": 0.2909628450870514, "learning_rate": 0.00014241907527557377, "loss": 11.6677, "step": 51693 }, { "epoch": 1.0820983002595663, "grad_norm": 0.3374834358692169, "learning_rate": 0.00014241708976711224, "loss": 11.6581, "step": 51694 }, { "epoch": 1.0821192330235285, "grad_norm": 0.34074512124061584, "learning_rate": 0.00014241510423826004, "loss": 11.6598, "step": 51695 }, { "epoch": 1.0821401657874905, "grad_norm": 0.2999667227268219, "learning_rate": 0.00014241311868901798, "loss": 11.6778, "step": 51696 }, { "epoch": 1.0821610985514527, "grad_norm": 0.32795000076293945, "learning_rate": 0.0001424111331193871, "loss": 11.6594, "step": 51697 }, { "epoch": 1.082182031315415, "grad_norm": 0.2593657076358795, "learning_rate": 0.00014240914752936836, "loss": 11.6698, "step": 51698 }, { "epoch": 1.082202964079377, "grad_norm": 0.3124633729457855, "learning_rate": 0.00014240716191896266, "loss": 11.6794, "step": 51699 }, { "epoch": 1.082223896843339, "grad_norm": 0.2823161780834198, "learning_rate": 0.00014240517628817103, "loss": 11.6716, "step": 51700 }, { "epoch": 1.0822448296073013, "grad_norm": 0.3770834505558014, "learning_rate": 0.00014240319063699435, "loss": 11.6659, "step": 51701 }, { "epoch": 1.0822657623712635, "grad_norm": 0.36840420961380005, "learning_rate": 0.00014240120496543363, "loss": 11.675, "step": 51702 }, { "epoch": 1.0822866951352257, "grad_norm": 0.29497018456459045, "learning_rate": 0.00014239921927348978, "loss": 11.6759, "step": 51703 }, { "epoch": 1.0823076278991879, "grad_norm": 0.2457205057144165, "learning_rate": 0.00014239723356116376, "loss": 11.6635, "step": 51704 }, { "epoch": 1.0823285606631499, "grad_norm": 0.3468376398086548, "learning_rate": 0.00014239524782845656, "loss": 11.6854, "step": 51705 }, { "epoch": 1.082349493427112, "grad_norm": 0.31425681710243225, "learning_rate": 0.00014239326207536913, "loss": 11.6921, "step": 51706 }, { "epoch": 1.0823704261910743, "grad_norm": 0.32596662640571594, "learning_rate": 0.0001423912763019024, "loss": 11.6594, "step": 51707 }, { "epoch": 1.0823913589550365, "grad_norm": 0.2984055280685425, "learning_rate": 0.00014238929050805734, "loss": 11.6597, "step": 51708 }, { "epoch": 1.0824122917189987, "grad_norm": 0.31014177203178406, "learning_rate": 0.00014238730469383486, "loss": 11.6761, "step": 51709 }, { "epoch": 1.0824332244829606, "grad_norm": 0.2812556028366089, "learning_rate": 0.000142385318859236, "loss": 11.662, "step": 51710 }, { "epoch": 1.0824541572469228, "grad_norm": 0.3136754035949707, "learning_rate": 0.00014238333300426167, "loss": 11.6575, "step": 51711 }, { "epoch": 1.082475090010885, "grad_norm": 0.3213261067867279, "learning_rate": 0.0001423813471289128, "loss": 11.6646, "step": 51712 }, { "epoch": 1.0824960227748472, "grad_norm": 0.3539997935295105, "learning_rate": 0.00014237936123319035, "loss": 11.6634, "step": 51713 }, { "epoch": 1.0825169555388094, "grad_norm": 0.36787885427474976, "learning_rate": 0.00014237737531709528, "loss": 11.6687, "step": 51714 }, { "epoch": 1.0825378883027714, "grad_norm": 0.3151775598526001, "learning_rate": 0.00014237538938062863, "loss": 11.6622, "step": 51715 }, { "epoch": 1.0825588210667336, "grad_norm": 0.34889137744903564, "learning_rate": 0.00014237340342379123, "loss": 11.6622, "step": 51716 }, { "epoch": 1.0825797538306958, "grad_norm": 0.2875279188156128, "learning_rate": 0.00014237141744658408, "loss": 11.6476, "step": 51717 }, { "epoch": 1.082600686594658, "grad_norm": 0.3240335285663605, "learning_rate": 0.00014236943144900816, "loss": 11.6682, "step": 51718 }, { "epoch": 1.08262161935862, "grad_norm": 0.34584176540374756, "learning_rate": 0.0001423674454310644, "loss": 11.6936, "step": 51719 }, { "epoch": 1.0826425521225822, "grad_norm": 0.3480267822742462, "learning_rate": 0.00014236545939275377, "loss": 11.6826, "step": 51720 }, { "epoch": 1.0826634848865444, "grad_norm": 0.35237714648246765, "learning_rate": 0.0001423634733340772, "loss": 11.6694, "step": 51721 }, { "epoch": 1.0826844176505066, "grad_norm": 0.3461029529571533, "learning_rate": 0.00014236148725503566, "loss": 11.6617, "step": 51722 }, { "epoch": 1.0827053504144688, "grad_norm": 0.33081236481666565, "learning_rate": 0.0001423595011556301, "loss": 11.6645, "step": 51723 }, { "epoch": 1.0827262831784308, "grad_norm": 0.3116038739681244, "learning_rate": 0.0001423575150358615, "loss": 11.6679, "step": 51724 }, { "epoch": 1.082747215942393, "grad_norm": 0.34831804037094116, "learning_rate": 0.00014235552889573077, "loss": 11.6816, "step": 51725 }, { "epoch": 1.0827681487063552, "grad_norm": 0.40384286642074585, "learning_rate": 0.0001423535427352389, "loss": 11.6708, "step": 51726 }, { "epoch": 1.0827890814703174, "grad_norm": 0.2811548709869385, "learning_rate": 0.00014235155655438687, "loss": 11.6594, "step": 51727 }, { "epoch": 1.0828100142342796, "grad_norm": 0.2864886224269867, "learning_rate": 0.00014234957035317552, "loss": 11.6705, "step": 51728 }, { "epoch": 1.0828309469982416, "grad_norm": 0.4479214549064636, "learning_rate": 0.00014234758413160593, "loss": 11.6589, "step": 51729 }, { "epoch": 1.0828518797622038, "grad_norm": 0.4027967154979706, "learning_rate": 0.000142345597889679, "loss": 11.652, "step": 51730 }, { "epoch": 1.082872812526166, "grad_norm": 0.3266633152961731, "learning_rate": 0.0001423436116273957, "loss": 11.6507, "step": 51731 }, { "epoch": 1.0828937452901282, "grad_norm": 0.3121817409992218, "learning_rate": 0.00014234162534475697, "loss": 11.6729, "step": 51732 }, { "epoch": 1.0829146780540904, "grad_norm": 0.3249455690383911, "learning_rate": 0.00014233963904176377, "loss": 11.6805, "step": 51733 }, { "epoch": 1.0829356108180523, "grad_norm": 0.30853599309921265, "learning_rate": 0.00014233765271841708, "loss": 11.6711, "step": 51734 }, { "epoch": 1.0829565435820145, "grad_norm": 0.33706769347190857, "learning_rate": 0.0001423356663747178, "loss": 11.6611, "step": 51735 }, { "epoch": 1.0829774763459767, "grad_norm": 0.279497355222702, "learning_rate": 0.0001423336800106669, "loss": 11.6959, "step": 51736 }, { "epoch": 1.082998409109939, "grad_norm": 0.2619515061378479, "learning_rate": 0.0001423316936262654, "loss": 11.6701, "step": 51737 }, { "epoch": 1.083019341873901, "grad_norm": 0.3163474202156067, "learning_rate": 0.00014232970722151417, "loss": 11.6925, "step": 51738 }, { "epoch": 1.0830402746378631, "grad_norm": 0.2997380197048187, "learning_rate": 0.00014232772079641426, "loss": 11.6526, "step": 51739 }, { "epoch": 1.0830612074018253, "grad_norm": 0.26770055294036865, "learning_rate": 0.0001423257343509665, "loss": 11.673, "step": 51740 }, { "epoch": 1.0830821401657875, "grad_norm": 0.2596597969532013, "learning_rate": 0.00014232374788517192, "loss": 11.667, "step": 51741 }, { "epoch": 1.0831030729297497, "grad_norm": 0.32904189825057983, "learning_rate": 0.00014232176139903152, "loss": 11.6586, "step": 51742 }, { "epoch": 1.0831240056937117, "grad_norm": 0.3142487704753876, "learning_rate": 0.00014231977489254614, "loss": 11.6591, "step": 51743 }, { "epoch": 1.083144938457674, "grad_norm": 0.3301205039024353, "learning_rate": 0.00014231778836571682, "loss": 11.672, "step": 51744 }, { "epoch": 1.083165871221636, "grad_norm": 0.2913855314254761, "learning_rate": 0.00014231580181854448, "loss": 11.6401, "step": 51745 }, { "epoch": 1.0831868039855983, "grad_norm": 0.27461469173431396, "learning_rate": 0.00014231381525103013, "loss": 11.6728, "step": 51746 }, { "epoch": 1.0832077367495605, "grad_norm": 0.3041253387928009, "learning_rate": 0.00014231182866317462, "loss": 11.6619, "step": 51747 }, { "epoch": 1.0832286695135225, "grad_norm": 0.3755250871181488, "learning_rate": 0.000142309842054979, "loss": 11.6645, "step": 51748 }, { "epoch": 1.0832496022774847, "grad_norm": 0.28272557258605957, "learning_rate": 0.0001423078554264442, "loss": 11.681, "step": 51749 }, { "epoch": 1.0832705350414469, "grad_norm": 0.3329911530017853, "learning_rate": 0.00014230586877757114, "loss": 11.672, "step": 51750 }, { "epoch": 1.083291467805409, "grad_norm": 0.43772032856941223, "learning_rate": 0.00014230388210836084, "loss": 11.6509, "step": 51751 }, { "epoch": 1.0833124005693713, "grad_norm": 0.3702424466609955, "learning_rate": 0.00014230189541881416, "loss": 11.6743, "step": 51752 }, { "epoch": 1.0833333333333333, "grad_norm": 0.2808554768562317, "learning_rate": 0.00014229990870893213, "loss": 11.6679, "step": 51753 }, { "epoch": 1.0833542660972955, "grad_norm": 0.257968544960022, "learning_rate": 0.0001422979219787157, "loss": 11.6697, "step": 51754 }, { "epoch": 1.0833751988612577, "grad_norm": 0.24708829820156097, "learning_rate": 0.0001422959352281658, "loss": 11.6664, "step": 51755 }, { "epoch": 1.0833961316252199, "grad_norm": 0.28808799386024475, "learning_rate": 0.00014229394845728343, "loss": 11.6655, "step": 51756 }, { "epoch": 1.0834170643891818, "grad_norm": 0.42348921298980713, "learning_rate": 0.00014229196166606946, "loss": 11.6475, "step": 51757 }, { "epoch": 1.083437997153144, "grad_norm": 0.34132206439971924, "learning_rate": 0.0001422899748545249, "loss": 11.6682, "step": 51758 }, { "epoch": 1.0834589299171062, "grad_norm": 0.39853185415267944, "learning_rate": 0.00014228798802265076, "loss": 11.6765, "step": 51759 }, { "epoch": 1.0834798626810684, "grad_norm": 0.3348749876022339, "learning_rate": 0.0001422860011704479, "loss": 11.6733, "step": 51760 }, { "epoch": 1.0835007954450306, "grad_norm": 0.333111435174942, "learning_rate": 0.0001422840142979173, "loss": 11.666, "step": 51761 }, { "epoch": 1.0835217282089926, "grad_norm": 0.3315281271934509, "learning_rate": 0.00014228202740505994, "loss": 11.6882, "step": 51762 }, { "epoch": 1.0835426609729548, "grad_norm": 0.38388895988464355, "learning_rate": 0.00014228004049187675, "loss": 11.6626, "step": 51763 }, { "epoch": 1.083563593736917, "grad_norm": 0.2835715115070343, "learning_rate": 0.00014227805355836873, "loss": 11.6672, "step": 51764 }, { "epoch": 1.0835845265008792, "grad_norm": 0.36023104190826416, "learning_rate": 0.00014227606660453676, "loss": 11.6731, "step": 51765 }, { "epoch": 1.0836054592648414, "grad_norm": 0.33122575283050537, "learning_rate": 0.00014227407963038186, "loss": 11.6781, "step": 51766 }, { "epoch": 1.0836263920288034, "grad_norm": 0.40803062915802, "learning_rate": 0.00014227209263590495, "loss": 11.6642, "step": 51767 }, { "epoch": 1.0836473247927656, "grad_norm": 0.34072527289390564, "learning_rate": 0.00014227010562110703, "loss": 11.6726, "step": 51768 }, { "epoch": 1.0836682575567278, "grad_norm": 0.3461781442165375, "learning_rate": 0.00014226811858598898, "loss": 11.667, "step": 51769 }, { "epoch": 1.08368919032069, "grad_norm": 0.2764236629009247, "learning_rate": 0.00014226613153055183, "loss": 11.6544, "step": 51770 }, { "epoch": 1.0837101230846522, "grad_norm": 0.37321987748146057, "learning_rate": 0.00014226414445479648, "loss": 11.6536, "step": 51771 }, { "epoch": 1.0837310558486142, "grad_norm": 0.2963855564594269, "learning_rate": 0.0001422621573587239, "loss": 11.6703, "step": 51772 }, { "epoch": 1.0837519886125764, "grad_norm": 0.34095093607902527, "learning_rate": 0.0001422601702423351, "loss": 11.6648, "step": 51773 }, { "epoch": 1.0837729213765386, "grad_norm": 0.31619617342948914, "learning_rate": 0.00014225818310563098, "loss": 11.6594, "step": 51774 }, { "epoch": 1.0837938541405008, "grad_norm": 0.3309461772441864, "learning_rate": 0.00014225619594861245, "loss": 11.6712, "step": 51775 }, { "epoch": 1.0838147869044628, "grad_norm": 4.270306587219238, "learning_rate": 0.0001422542087712806, "loss": 11.6253, "step": 51776 }, { "epoch": 1.083835719668425, "grad_norm": 0.3340175151824951, "learning_rate": 0.00014225222157363625, "loss": 11.6753, "step": 51777 }, { "epoch": 1.0838566524323872, "grad_norm": 0.357360303401947, "learning_rate": 0.00014225023435568044, "loss": 11.6782, "step": 51778 }, { "epoch": 1.0838775851963494, "grad_norm": 0.3293329179286957, "learning_rate": 0.00014224824711741408, "loss": 11.6329, "step": 51779 }, { "epoch": 1.0838985179603116, "grad_norm": 0.30578652024269104, "learning_rate": 0.0001422462598588381, "loss": 11.6556, "step": 51780 }, { "epoch": 1.0839194507242735, "grad_norm": 0.2840234339237213, "learning_rate": 0.00014224427257995357, "loss": 11.6804, "step": 51781 }, { "epoch": 1.0839403834882357, "grad_norm": 0.45346611738204956, "learning_rate": 0.0001422422852807613, "loss": 11.6675, "step": 51782 }, { "epoch": 1.083961316252198, "grad_norm": 0.3534446358680725, "learning_rate": 0.0001422402979612624, "loss": 11.6752, "step": 51783 }, { "epoch": 1.0839822490161601, "grad_norm": 0.34366053342819214, "learning_rate": 0.00014223831062145766, "loss": 11.6787, "step": 51784 }, { "epoch": 1.0840031817801223, "grad_norm": 0.3523203730583191, "learning_rate": 0.00014223632326134818, "loss": 11.6822, "step": 51785 }, { "epoch": 1.0840241145440843, "grad_norm": 0.41957855224609375, "learning_rate": 0.00014223433588093483, "loss": 11.6871, "step": 51786 }, { "epoch": 1.0840450473080465, "grad_norm": 0.38401392102241516, "learning_rate": 0.0001422323484802186, "loss": 11.6779, "step": 51787 }, { "epoch": 1.0840659800720087, "grad_norm": 0.4066104590892792, "learning_rate": 0.0001422303610592004, "loss": 11.6577, "step": 51788 }, { "epoch": 1.084086912835971, "grad_norm": 0.2706637978553772, "learning_rate": 0.00014222837361788123, "loss": 11.6639, "step": 51789 }, { "epoch": 1.0841078455999331, "grad_norm": 0.28670573234558105, "learning_rate": 0.00014222638615626207, "loss": 11.6655, "step": 51790 }, { "epoch": 1.084128778363895, "grad_norm": 0.2872897684574127, "learning_rate": 0.0001422243986743438, "loss": 11.6568, "step": 51791 }, { "epoch": 1.0841497111278573, "grad_norm": 0.3211600184440613, "learning_rate": 0.00014222241117212741, "loss": 11.6721, "step": 51792 }, { "epoch": 1.0841706438918195, "grad_norm": 0.32043203711509705, "learning_rate": 0.0001422204236496139, "loss": 11.6682, "step": 51793 }, { "epoch": 1.0841915766557817, "grad_norm": 0.3194993734359741, "learning_rate": 0.00014221843610680415, "loss": 11.6814, "step": 51794 }, { "epoch": 1.0842125094197437, "grad_norm": 0.28147462010383606, "learning_rate": 0.0001422164485436992, "loss": 11.6772, "step": 51795 }, { "epoch": 1.0842334421837059, "grad_norm": 0.3207087516784668, "learning_rate": 0.00014221446096029992, "loss": 11.6591, "step": 51796 }, { "epoch": 1.084254374947668, "grad_norm": 0.36690837144851685, "learning_rate": 0.0001422124733566073, "loss": 11.6584, "step": 51797 }, { "epoch": 1.0842753077116303, "grad_norm": 0.26818910241127014, "learning_rate": 0.0001422104857326223, "loss": 11.671, "step": 51798 }, { "epoch": 1.0842962404755925, "grad_norm": 0.3339231014251709, "learning_rate": 0.00014220849808834588, "loss": 11.6586, "step": 51799 }, { "epoch": 1.0843171732395545, "grad_norm": 0.3605172336101532, "learning_rate": 0.00014220651042377898, "loss": 11.6484, "step": 51800 }, { "epoch": 1.0843381060035167, "grad_norm": 0.3277149796485901, "learning_rate": 0.0001422045227389226, "loss": 11.6626, "step": 51801 }, { "epoch": 1.0843590387674789, "grad_norm": 0.3565472364425659, "learning_rate": 0.00014220253503377764, "loss": 11.6779, "step": 51802 }, { "epoch": 1.084379971531441, "grad_norm": 0.29334017634391785, "learning_rate": 0.00014220054730834503, "loss": 11.6782, "step": 51803 }, { "epoch": 1.0844009042954033, "grad_norm": 0.3889886736869812, "learning_rate": 0.00014219855956262585, "loss": 11.6634, "step": 51804 }, { "epoch": 1.0844218370593652, "grad_norm": 0.46136051416397095, "learning_rate": 0.0001421965717966209, "loss": 11.6782, "step": 51805 }, { "epoch": 1.0844427698233274, "grad_norm": 0.3398987650871277, "learning_rate": 0.00014219458401033125, "loss": 11.6775, "step": 51806 }, { "epoch": 1.0844637025872896, "grad_norm": 0.3080849051475525, "learning_rate": 0.00014219259620375786, "loss": 11.6848, "step": 51807 }, { "epoch": 1.0844846353512518, "grad_norm": 0.3013603985309601, "learning_rate": 0.00014219060837690156, "loss": 11.6589, "step": 51808 }, { "epoch": 1.084505568115214, "grad_norm": 0.276469349861145, "learning_rate": 0.00014218862052976344, "loss": 11.6704, "step": 51809 }, { "epoch": 1.084526500879176, "grad_norm": 0.2992917001247406, "learning_rate": 0.0001421866326623444, "loss": 11.6482, "step": 51810 }, { "epoch": 1.0845474336431382, "grad_norm": 0.31403136253356934, "learning_rate": 0.0001421846447746454, "loss": 11.6641, "step": 51811 }, { "epoch": 1.0845683664071004, "grad_norm": 0.2958621382713318, "learning_rate": 0.0001421826568666674, "loss": 11.6735, "step": 51812 }, { "epoch": 1.0845892991710626, "grad_norm": 0.32471370697021484, "learning_rate": 0.00014218066893841133, "loss": 11.6756, "step": 51813 }, { "epoch": 1.0846102319350246, "grad_norm": 0.28782060742378235, "learning_rate": 0.0001421786809898782, "loss": 11.6714, "step": 51814 }, { "epoch": 1.0846311646989868, "grad_norm": 0.33204594254493713, "learning_rate": 0.00014217669302106888, "loss": 11.6636, "step": 51815 }, { "epoch": 1.084652097462949, "grad_norm": 0.26609501242637634, "learning_rate": 0.0001421747050319844, "loss": 11.6605, "step": 51816 }, { "epoch": 1.0846730302269112, "grad_norm": 0.2985897362232208, "learning_rate": 0.00014217271702262574, "loss": 11.6579, "step": 51817 }, { "epoch": 1.0846939629908734, "grad_norm": 0.30562925338745117, "learning_rate": 0.00014217072899299377, "loss": 11.682, "step": 51818 }, { "epoch": 1.0847148957548354, "grad_norm": 0.3349985182285309, "learning_rate": 0.0001421687409430895, "loss": 11.6725, "step": 51819 }, { "epoch": 1.0847358285187976, "grad_norm": 0.31220442056655884, "learning_rate": 0.00014216675287291383, "loss": 11.673, "step": 51820 }, { "epoch": 1.0847567612827598, "grad_norm": 0.4037680923938751, "learning_rate": 0.0001421647647824678, "loss": 11.6684, "step": 51821 }, { "epoch": 1.084777694046722, "grad_norm": 0.3317367732524872, "learning_rate": 0.00014216277667175231, "loss": 11.6867, "step": 51822 }, { "epoch": 1.0847986268106842, "grad_norm": 0.3231241703033447, "learning_rate": 0.00014216078854076836, "loss": 11.6436, "step": 51823 }, { "epoch": 1.0848195595746462, "grad_norm": 0.33703815937042236, "learning_rate": 0.00014215880038951682, "loss": 11.6672, "step": 51824 }, { "epoch": 1.0848404923386084, "grad_norm": 0.36995962262153625, "learning_rate": 0.00014215681221799872, "loss": 11.7069, "step": 51825 }, { "epoch": 1.0848614251025706, "grad_norm": 0.3288024067878723, "learning_rate": 0.00014215482402621501, "loss": 11.6763, "step": 51826 }, { "epoch": 1.0848823578665328, "grad_norm": 0.2908790707588196, "learning_rate": 0.00014215283581416662, "loss": 11.6607, "step": 51827 }, { "epoch": 1.084903290630495, "grad_norm": 0.31479692459106445, "learning_rate": 0.00014215084758185451, "loss": 11.6639, "step": 51828 }, { "epoch": 1.084924223394457, "grad_norm": 0.35037603974342346, "learning_rate": 0.00014214885932927965, "loss": 11.6599, "step": 51829 }, { "epoch": 1.0849451561584191, "grad_norm": 0.414048433303833, "learning_rate": 0.000142146871056443, "loss": 11.6722, "step": 51830 }, { "epoch": 1.0849660889223813, "grad_norm": 0.3410015404224396, "learning_rate": 0.00014214488276334547, "loss": 11.6673, "step": 51831 }, { "epoch": 1.0849870216863435, "grad_norm": 0.36766672134399414, "learning_rate": 0.00014214289444998808, "loss": 11.6715, "step": 51832 }, { "epoch": 1.0850079544503055, "grad_norm": 0.3445659577846527, "learning_rate": 0.00014214090611637173, "loss": 11.6614, "step": 51833 }, { "epoch": 1.0850288872142677, "grad_norm": 0.3286678194999695, "learning_rate": 0.00014213891776249744, "loss": 11.6638, "step": 51834 }, { "epoch": 1.08504981997823, "grad_norm": 0.43423449993133545, "learning_rate": 0.0001421369293883661, "loss": 11.6821, "step": 51835 }, { "epoch": 1.0850707527421921, "grad_norm": 0.28562936186790466, "learning_rate": 0.0001421349409939787, "loss": 11.6706, "step": 51836 }, { "epoch": 1.0850916855061543, "grad_norm": 0.43011948466300964, "learning_rate": 0.0001421329525793362, "loss": 11.6705, "step": 51837 }, { "epoch": 1.0851126182701163, "grad_norm": 0.39127394556999207, "learning_rate": 0.00014213096414443955, "loss": 11.6436, "step": 51838 }, { "epoch": 1.0851335510340785, "grad_norm": 0.3441259264945984, "learning_rate": 0.0001421289756892897, "loss": 11.6691, "step": 51839 }, { "epoch": 1.0851544837980407, "grad_norm": 0.3067079484462738, "learning_rate": 0.00014212698721388758, "loss": 11.6626, "step": 51840 }, { "epoch": 1.085175416562003, "grad_norm": 0.30287179350852966, "learning_rate": 0.00014212499871823418, "loss": 11.6576, "step": 51841 }, { "epoch": 1.085196349325965, "grad_norm": 0.29729166626930237, "learning_rate": 0.00014212301020233046, "loss": 11.6694, "step": 51842 }, { "epoch": 1.085217282089927, "grad_norm": 0.3442571461200714, "learning_rate": 0.00014212102166617736, "loss": 11.6802, "step": 51843 }, { "epoch": 1.0852382148538893, "grad_norm": 0.26319605112075806, "learning_rate": 0.00014211903310977587, "loss": 11.6576, "step": 51844 }, { "epoch": 1.0852591476178515, "grad_norm": 0.41498029232025146, "learning_rate": 0.00014211704453312687, "loss": 11.6708, "step": 51845 }, { "epoch": 1.0852800803818137, "grad_norm": 0.312631219625473, "learning_rate": 0.00014211505593623135, "loss": 11.6558, "step": 51846 }, { "epoch": 1.0853010131457759, "grad_norm": 0.32058659195899963, "learning_rate": 0.00014211306731909032, "loss": 11.6591, "step": 51847 }, { "epoch": 1.0853219459097379, "grad_norm": 0.32855626940727234, "learning_rate": 0.00014211107868170467, "loss": 11.6691, "step": 51848 }, { "epoch": 1.0853428786737, "grad_norm": 0.36389419436454773, "learning_rate": 0.00014210909002407545, "loss": 11.6796, "step": 51849 }, { "epoch": 1.0853638114376623, "grad_norm": 0.3677840828895569, "learning_rate": 0.00014210710134620347, "loss": 11.6824, "step": 51850 }, { "epoch": 1.0853847442016245, "grad_norm": 0.2995623052120209, "learning_rate": 0.00014210511264808976, "loss": 11.6735, "step": 51851 }, { "epoch": 1.0854056769655864, "grad_norm": 0.2992384433746338, "learning_rate": 0.00014210312392973532, "loss": 11.6792, "step": 51852 }, { "epoch": 1.0854266097295486, "grad_norm": 0.32687047123908997, "learning_rate": 0.00014210113519114102, "loss": 11.6666, "step": 51853 }, { "epoch": 1.0854475424935108, "grad_norm": 0.40260550379753113, "learning_rate": 0.00014209914643230793, "loss": 11.6799, "step": 51854 }, { "epoch": 1.085468475257473, "grad_norm": 0.43826791644096375, "learning_rate": 0.00014209715765323687, "loss": 11.6688, "step": 51855 }, { "epoch": 1.0854894080214352, "grad_norm": 0.3266518712043762, "learning_rate": 0.0001420951688539289, "loss": 11.6733, "step": 51856 }, { "epoch": 1.0855103407853972, "grad_norm": 0.2871922254562378, "learning_rate": 0.00014209318003438495, "loss": 11.6591, "step": 51857 }, { "epoch": 1.0855312735493594, "grad_norm": 0.31910839676856995, "learning_rate": 0.00014209119119460592, "loss": 11.6501, "step": 51858 }, { "epoch": 1.0855522063133216, "grad_norm": 0.3269549608230591, "learning_rate": 0.00014208920233459285, "loss": 11.6644, "step": 51859 }, { "epoch": 1.0855731390772838, "grad_norm": 0.39011144638061523, "learning_rate": 0.00014208721345434662, "loss": 11.6853, "step": 51860 }, { "epoch": 1.085594071841246, "grad_norm": 0.340353399515152, "learning_rate": 0.00014208522455386825, "loss": 11.6744, "step": 51861 }, { "epoch": 1.085615004605208, "grad_norm": 0.3855511546134949, "learning_rate": 0.00014208323563315867, "loss": 11.648, "step": 51862 }, { "epoch": 1.0856359373691702, "grad_norm": 0.27359700202941895, "learning_rate": 0.0001420812466922188, "loss": 11.6543, "step": 51863 }, { "epoch": 1.0856568701331324, "grad_norm": 0.3187158405780792, "learning_rate": 0.0001420792577310497, "loss": 11.6676, "step": 51864 }, { "epoch": 1.0856778028970946, "grad_norm": 0.3344012200832367, "learning_rate": 0.0001420772687496522, "loss": 11.6742, "step": 51865 }, { "epoch": 1.0856987356610568, "grad_norm": 0.3023076057434082, "learning_rate": 0.0001420752797480273, "loss": 11.6728, "step": 51866 }, { "epoch": 1.0857196684250188, "grad_norm": 0.3561055660247803, "learning_rate": 0.000142073290726176, "loss": 11.6757, "step": 51867 }, { "epoch": 1.085740601188981, "grad_norm": 0.2678273618221283, "learning_rate": 0.00014207130168409924, "loss": 11.6488, "step": 51868 }, { "epoch": 1.0857615339529432, "grad_norm": 0.262405127286911, "learning_rate": 0.00014206931262179795, "loss": 11.6752, "step": 51869 }, { "epoch": 1.0857824667169054, "grad_norm": 0.32309404015541077, "learning_rate": 0.0001420673235392731, "loss": 11.6729, "step": 51870 }, { "epoch": 1.0858033994808673, "grad_norm": 0.35377293825149536, "learning_rate": 0.00014206533443652562, "loss": 11.6816, "step": 51871 }, { "epoch": 1.0858243322448295, "grad_norm": 0.411867618560791, "learning_rate": 0.00014206334531355653, "loss": 11.6668, "step": 51872 }, { "epoch": 1.0858452650087917, "grad_norm": 0.40496575832366943, "learning_rate": 0.0001420613561703667, "loss": 11.6522, "step": 51873 }, { "epoch": 1.085866197772754, "grad_norm": 0.38778090476989746, "learning_rate": 0.00014205936700695718, "loss": 11.672, "step": 51874 }, { "epoch": 1.0858871305367161, "grad_norm": 0.38384637236595154, "learning_rate": 0.00014205737782332887, "loss": 11.668, "step": 51875 }, { "epoch": 1.0859080633006781, "grad_norm": 0.40932974219322205, "learning_rate": 0.00014205538861948274, "loss": 11.6723, "step": 51876 }, { "epoch": 1.0859289960646403, "grad_norm": 0.30707287788391113, "learning_rate": 0.0001420533993954197, "loss": 11.6701, "step": 51877 }, { "epoch": 1.0859499288286025, "grad_norm": 0.24191729724407196, "learning_rate": 0.00014205141015114075, "loss": 11.6638, "step": 51878 }, { "epoch": 1.0859708615925647, "grad_norm": 0.2923852801322937, "learning_rate": 0.0001420494208866469, "loss": 11.672, "step": 51879 }, { "epoch": 1.085991794356527, "grad_norm": 0.2869456708431244, "learning_rate": 0.00014204743160193902, "loss": 11.6808, "step": 51880 }, { "epoch": 1.086012727120489, "grad_norm": 0.3531031906604767, "learning_rate": 0.00014204544229701812, "loss": 11.662, "step": 51881 }, { "epoch": 1.086033659884451, "grad_norm": 0.3396296501159668, "learning_rate": 0.00014204345297188508, "loss": 11.6766, "step": 51882 }, { "epoch": 1.0860545926484133, "grad_norm": 0.33538806438446045, "learning_rate": 0.00014204146362654097, "loss": 11.6617, "step": 51883 }, { "epoch": 1.0860755254123755, "grad_norm": 0.2922171652317047, "learning_rate": 0.00014203947426098666, "loss": 11.6596, "step": 51884 }, { "epoch": 1.0860964581763377, "grad_norm": 0.35637620091438293, "learning_rate": 0.0001420374848752231, "loss": 11.6741, "step": 51885 }, { "epoch": 1.0861173909402997, "grad_norm": 0.37144017219543457, "learning_rate": 0.0001420354954692513, "loss": 11.676, "step": 51886 }, { "epoch": 1.0861383237042619, "grad_norm": 0.3268081545829773, "learning_rate": 0.0001420335060430722, "loss": 11.6745, "step": 51887 }, { "epoch": 1.086159256468224, "grad_norm": 0.32998135685920715, "learning_rate": 0.0001420315165966868, "loss": 11.6711, "step": 51888 }, { "epoch": 1.0861801892321863, "grad_norm": 0.2631848156452179, "learning_rate": 0.00014202952713009595, "loss": 11.6704, "step": 51889 }, { "epoch": 1.0862011219961483, "grad_norm": 0.29409411549568176, "learning_rate": 0.00014202753764330065, "loss": 11.6804, "step": 51890 }, { "epoch": 1.0862220547601105, "grad_norm": 0.3197113275527954, "learning_rate": 0.00014202554813630191, "loss": 11.6597, "step": 51891 }, { "epoch": 1.0862429875240727, "grad_norm": 0.31668516993522644, "learning_rate": 0.00014202355860910065, "loss": 11.6824, "step": 51892 }, { "epoch": 1.0862639202880349, "grad_norm": 0.2845900058746338, "learning_rate": 0.0001420215690616978, "loss": 11.6451, "step": 51893 }, { "epoch": 1.086284853051997, "grad_norm": 0.2761841416358948, "learning_rate": 0.00014201957949409432, "loss": 11.6744, "step": 51894 }, { "epoch": 1.086305785815959, "grad_norm": 0.28267785906791687, "learning_rate": 0.00014201758990629122, "loss": 11.6629, "step": 51895 }, { "epoch": 1.0863267185799212, "grad_norm": 0.3758961260318756, "learning_rate": 0.00014201560029828943, "loss": 11.6592, "step": 51896 }, { "epoch": 1.0863476513438834, "grad_norm": 0.32491764426231384, "learning_rate": 0.00014201361067008988, "loss": 11.6642, "step": 51897 }, { "epoch": 1.0863685841078456, "grad_norm": 0.291231632232666, "learning_rate": 0.00014201162102169354, "loss": 11.6844, "step": 51898 }, { "epoch": 1.0863895168718078, "grad_norm": 0.33545657992362976, "learning_rate": 0.00014200963135310138, "loss": 11.6711, "step": 51899 }, { "epoch": 1.0864104496357698, "grad_norm": 0.36867499351501465, "learning_rate": 0.00014200764166431436, "loss": 11.6753, "step": 51900 }, { "epoch": 1.086431382399732, "grad_norm": 0.3273043930530548, "learning_rate": 0.0001420056519553334, "loss": 11.6631, "step": 51901 }, { "epoch": 1.0864523151636942, "grad_norm": 0.3089902698993683, "learning_rate": 0.0001420036622261595, "loss": 11.6712, "step": 51902 }, { "epoch": 1.0864732479276564, "grad_norm": 0.27368372678756714, "learning_rate": 0.0001420016724767936, "loss": 11.6796, "step": 51903 }, { "epoch": 1.0864941806916186, "grad_norm": 0.33707395195961, "learning_rate": 0.00014199968270723666, "loss": 11.66, "step": 51904 }, { "epoch": 1.0865151134555806, "grad_norm": 0.3303958773612976, "learning_rate": 0.00014199769291748962, "loss": 11.6711, "step": 51905 }, { "epoch": 1.0865360462195428, "grad_norm": 0.33269575238227844, "learning_rate": 0.00014199570310755345, "loss": 11.6756, "step": 51906 }, { "epoch": 1.086556978983505, "grad_norm": 0.3651367723941803, "learning_rate": 0.00014199371327742907, "loss": 11.6682, "step": 51907 }, { "epoch": 1.0865779117474672, "grad_norm": 0.31280413269996643, "learning_rate": 0.0001419917234271175, "loss": 11.6483, "step": 51908 }, { "epoch": 1.0865988445114292, "grad_norm": 0.3236830234527588, "learning_rate": 0.00014198973355661966, "loss": 11.654, "step": 51909 }, { "epoch": 1.0866197772753914, "grad_norm": 0.3837129473686218, "learning_rate": 0.00014198774366593652, "loss": 11.6683, "step": 51910 }, { "epoch": 1.0866407100393536, "grad_norm": 0.25070053339004517, "learning_rate": 0.00014198575375506904, "loss": 11.6662, "step": 51911 }, { "epoch": 1.0866616428033158, "grad_norm": 0.29515472054481506, "learning_rate": 0.00014198376382401815, "loss": 11.6515, "step": 51912 }, { "epoch": 1.086682575567278, "grad_norm": 0.3357142508029938, "learning_rate": 0.00014198177387278485, "loss": 11.671, "step": 51913 }, { "epoch": 1.08670350833124, "grad_norm": 0.30348941683769226, "learning_rate": 0.00014197978390137004, "loss": 11.6775, "step": 51914 }, { "epoch": 1.0867244410952022, "grad_norm": 0.31898826360702515, "learning_rate": 0.00014197779390977473, "loss": 11.6623, "step": 51915 }, { "epoch": 1.0867453738591644, "grad_norm": 0.25028932094573975, "learning_rate": 0.00014197580389799984, "loss": 11.6512, "step": 51916 }, { "epoch": 1.0867663066231266, "grad_norm": 0.4301530718803406, "learning_rate": 0.00014197381386604632, "loss": 11.6534, "step": 51917 }, { "epoch": 1.0867872393870888, "grad_norm": 0.3092280328273773, "learning_rate": 0.00014197182381391516, "loss": 11.6687, "step": 51918 }, { "epoch": 1.0868081721510507, "grad_norm": 0.3009440004825592, "learning_rate": 0.0001419698337416073, "loss": 11.6755, "step": 51919 }, { "epoch": 1.086829104915013, "grad_norm": 0.9406782984733582, "learning_rate": 0.0001419678436491237, "loss": 11.6672, "step": 51920 }, { "epoch": 1.0868500376789751, "grad_norm": 0.4513165056705475, "learning_rate": 0.00014196585353646536, "loss": 11.682, "step": 51921 }, { "epoch": 1.0868709704429373, "grad_norm": 0.3067699670791626, "learning_rate": 0.00014196386340363314, "loss": 11.6621, "step": 51922 }, { "epoch": 1.0868919032068995, "grad_norm": 0.25098446011543274, "learning_rate": 0.00014196187325062808, "loss": 11.655, "step": 51923 }, { "epoch": 1.0869128359708615, "grad_norm": 0.27425122261047363, "learning_rate": 0.0001419598830774511, "loss": 11.6578, "step": 51924 }, { "epoch": 1.0869337687348237, "grad_norm": 0.37390848994255066, "learning_rate": 0.00014195789288410317, "loss": 11.6775, "step": 51925 }, { "epoch": 1.086954701498786, "grad_norm": 0.3327276408672333, "learning_rate": 0.0001419559026705852, "loss": 11.6729, "step": 51926 }, { "epoch": 1.0869756342627481, "grad_norm": 0.3004222810268402, "learning_rate": 0.00014195391243689823, "loss": 11.6609, "step": 51927 }, { "epoch": 1.08699656702671, "grad_norm": 0.26873114705085754, "learning_rate": 0.00014195192218304317, "loss": 11.66, "step": 51928 }, { "epoch": 1.0870174997906723, "grad_norm": 0.255550354719162, "learning_rate": 0.00014194993190902095, "loss": 11.6477, "step": 51929 }, { "epoch": 1.0870384325546345, "grad_norm": 0.27620023488998413, "learning_rate": 0.0001419479416148326, "loss": 11.6813, "step": 51930 }, { "epoch": 1.0870593653185967, "grad_norm": 0.3310941159725189, "learning_rate": 0.00014194595130047902, "loss": 11.6811, "step": 51931 }, { "epoch": 1.087080298082559, "grad_norm": 0.3693698048591614, "learning_rate": 0.00014194396096596116, "loss": 11.67, "step": 51932 }, { "epoch": 1.0871012308465209, "grad_norm": 0.31532251834869385, "learning_rate": 0.00014194197061128002, "loss": 11.6629, "step": 51933 }, { "epoch": 1.087122163610483, "grad_norm": 0.3399559259414673, "learning_rate": 0.00014193998023643648, "loss": 11.6673, "step": 51934 }, { "epoch": 1.0871430963744453, "grad_norm": 0.34423211216926575, "learning_rate": 0.0001419379898414316, "loss": 11.6817, "step": 51935 }, { "epoch": 1.0871640291384075, "grad_norm": 0.34030547738075256, "learning_rate": 0.0001419359994262663, "loss": 11.6717, "step": 51936 }, { "epoch": 1.0871849619023697, "grad_norm": 0.24905499815940857, "learning_rate": 0.0001419340089909415, "loss": 11.6693, "step": 51937 }, { "epoch": 1.0872058946663317, "grad_norm": 0.34247371554374695, "learning_rate": 0.00014193201853545818, "loss": 11.6657, "step": 51938 }, { "epoch": 1.0872268274302939, "grad_norm": 0.26891952753067017, "learning_rate": 0.0001419300280598173, "loss": 11.6729, "step": 51939 }, { "epoch": 1.087247760194256, "grad_norm": 0.3344014883041382, "learning_rate": 0.00014192803756401983, "loss": 11.6623, "step": 51940 }, { "epoch": 1.0872686929582183, "grad_norm": 0.2901407778263092, "learning_rate": 0.0001419260470480667, "loss": 11.6747, "step": 51941 }, { "epoch": 1.0872896257221805, "grad_norm": 0.27257901430130005, "learning_rate": 0.0001419240565119589, "loss": 11.6667, "step": 51942 }, { "epoch": 1.0873105584861424, "grad_norm": 0.3053772747516632, "learning_rate": 0.00014192206595569734, "loss": 11.6867, "step": 51943 }, { "epoch": 1.0873314912501046, "grad_norm": 0.40836092829704285, "learning_rate": 0.000141920075379283, "loss": 11.6814, "step": 51944 }, { "epoch": 1.0873524240140668, "grad_norm": 0.24110467731952667, "learning_rate": 0.00014191808478271686, "loss": 11.6592, "step": 51945 }, { "epoch": 1.087373356778029, "grad_norm": 0.3892180621623993, "learning_rate": 0.00014191609416599986, "loss": 11.6844, "step": 51946 }, { "epoch": 1.087394289541991, "grad_norm": 0.5227293372154236, "learning_rate": 0.00014191410352913295, "loss": 11.66, "step": 51947 }, { "epoch": 1.0874152223059532, "grad_norm": 0.2663852572441101, "learning_rate": 0.00014191211287211707, "loss": 11.6548, "step": 51948 }, { "epoch": 1.0874361550699154, "grad_norm": 0.27887988090515137, "learning_rate": 0.00014191012219495321, "loss": 11.6751, "step": 51949 }, { "epoch": 1.0874570878338776, "grad_norm": 0.3884586989879608, "learning_rate": 0.0001419081314976423, "loss": 11.6832, "step": 51950 }, { "epoch": 1.0874780205978398, "grad_norm": 0.3872574269771576, "learning_rate": 0.00014190614078018533, "loss": 11.6575, "step": 51951 }, { "epoch": 1.0874989533618018, "grad_norm": 0.3204297423362732, "learning_rate": 0.00014190415004258325, "loss": 11.6693, "step": 51952 }, { "epoch": 1.087519886125764, "grad_norm": 0.2398417592048645, "learning_rate": 0.00014190215928483696, "loss": 11.6696, "step": 51953 }, { "epoch": 1.0875408188897262, "grad_norm": 0.3001597821712494, "learning_rate": 0.00014190016850694752, "loss": 11.6692, "step": 51954 }, { "epoch": 1.0875617516536884, "grad_norm": 0.3299611508846283, "learning_rate": 0.00014189817770891578, "loss": 11.661, "step": 51955 }, { "epoch": 1.0875826844176506, "grad_norm": 0.3372960090637207, "learning_rate": 0.00014189618689074276, "loss": 11.675, "step": 51956 }, { "epoch": 1.0876036171816126, "grad_norm": 0.2635418474674225, "learning_rate": 0.0001418941960524294, "loss": 11.6633, "step": 51957 }, { "epoch": 1.0876245499455748, "grad_norm": 0.28945621848106384, "learning_rate": 0.00014189220519397668, "loss": 11.6671, "step": 51958 }, { "epoch": 1.087645482709537, "grad_norm": 0.3189285695552826, "learning_rate": 0.00014189021431538555, "loss": 11.6642, "step": 51959 }, { "epoch": 1.0876664154734992, "grad_norm": 0.3597371578216553, "learning_rate": 0.00014188822341665693, "loss": 11.6742, "step": 51960 }, { "epoch": 1.0876873482374614, "grad_norm": 0.30738961696624756, "learning_rate": 0.00014188623249779177, "loss": 11.665, "step": 51961 }, { "epoch": 1.0877082810014234, "grad_norm": 0.33529576659202576, "learning_rate": 0.00014188424155879112, "loss": 11.6662, "step": 51962 }, { "epoch": 1.0877292137653856, "grad_norm": 0.3683525621891022, "learning_rate": 0.00014188225059965582, "loss": 11.6714, "step": 51963 }, { "epoch": 1.0877501465293478, "grad_norm": 0.30743932723999023, "learning_rate": 0.00014188025962038695, "loss": 11.675, "step": 51964 }, { "epoch": 1.08777107929331, "grad_norm": 0.246322363615036, "learning_rate": 0.00014187826862098533, "loss": 11.6657, "step": 51965 }, { "epoch": 1.087792012057272, "grad_norm": 0.32436448335647583, "learning_rate": 0.000141876277601452, "loss": 11.67, "step": 51966 }, { "epoch": 1.0878129448212341, "grad_norm": 0.26778292655944824, "learning_rate": 0.00014187428656178797, "loss": 11.6867, "step": 51967 }, { "epoch": 1.0878338775851963, "grad_norm": 0.3212794065475464, "learning_rate": 0.00014187229550199405, "loss": 11.6795, "step": 51968 }, { "epoch": 1.0878548103491585, "grad_norm": 0.3476073443889618, "learning_rate": 0.00014187030442207131, "loss": 11.6767, "step": 51969 }, { "epoch": 1.0878757431131207, "grad_norm": 1.243166446685791, "learning_rate": 0.0001418683133220207, "loss": 11.6315, "step": 51970 }, { "epoch": 1.0878966758770827, "grad_norm": 0.35132697224617004, "learning_rate": 0.00014186632220184313, "loss": 11.6474, "step": 51971 }, { "epoch": 1.087917608641045, "grad_norm": 0.2936852276325226, "learning_rate": 0.00014186433106153955, "loss": 11.6564, "step": 51972 }, { "epoch": 1.0879385414050071, "grad_norm": 0.3664184510707855, "learning_rate": 0.000141862339901111, "loss": 11.6584, "step": 51973 }, { "epoch": 1.0879594741689693, "grad_norm": 0.27387240529060364, "learning_rate": 0.00014186034872055834, "loss": 11.6508, "step": 51974 }, { "epoch": 1.0879804069329315, "grad_norm": 0.28416359424591064, "learning_rate": 0.00014185835751988262, "loss": 11.6687, "step": 51975 }, { "epoch": 1.0880013396968935, "grad_norm": 0.3933357894420624, "learning_rate": 0.00014185636629908472, "loss": 11.6491, "step": 51976 }, { "epoch": 1.0880222724608557, "grad_norm": 0.34546542167663574, "learning_rate": 0.0001418543750581656, "loss": 11.6667, "step": 51977 }, { "epoch": 1.088043205224818, "grad_norm": 0.36202293634414673, "learning_rate": 0.00014185238379712625, "loss": 11.6567, "step": 51978 }, { "epoch": 1.08806413798878, "grad_norm": 0.31019940972328186, "learning_rate": 0.00014185039251596768, "loss": 11.657, "step": 51979 }, { "epoch": 1.0880850707527423, "grad_norm": 0.3541305959224701, "learning_rate": 0.00014184840121469073, "loss": 11.6717, "step": 51980 }, { "epoch": 1.0881060035167043, "grad_norm": 0.33455249667167664, "learning_rate": 0.00014184640989329644, "loss": 11.662, "step": 51981 }, { "epoch": 1.0881269362806665, "grad_norm": 0.3089103698730469, "learning_rate": 0.00014184441855178575, "loss": 11.6746, "step": 51982 }, { "epoch": 1.0881478690446287, "grad_norm": 0.32794466614723206, "learning_rate": 0.0001418424271901596, "loss": 11.6648, "step": 51983 }, { "epoch": 1.0881688018085909, "grad_norm": 0.27217423915863037, "learning_rate": 0.00014184043580841896, "loss": 11.6727, "step": 51984 }, { "epoch": 1.0881897345725529, "grad_norm": 0.3824211657047272, "learning_rate": 0.00014183844440656475, "loss": 11.6621, "step": 51985 }, { "epoch": 1.088210667336515, "grad_norm": 0.36733925342559814, "learning_rate": 0.000141836452984598, "loss": 11.6808, "step": 51986 }, { "epoch": 1.0882316001004773, "grad_norm": 0.2696993052959442, "learning_rate": 0.00014183446154251961, "loss": 11.6668, "step": 51987 }, { "epoch": 1.0882525328644395, "grad_norm": 0.25667619705200195, "learning_rate": 0.00014183247008033053, "loss": 11.6468, "step": 51988 }, { "epoch": 1.0882734656284017, "grad_norm": 0.3398340344429016, "learning_rate": 0.0001418304785980318, "loss": 11.6632, "step": 51989 }, { "epoch": 1.0882943983923636, "grad_norm": 0.31930336356163025, "learning_rate": 0.0001418284870956243, "loss": 11.6762, "step": 51990 }, { "epoch": 1.0883153311563258, "grad_norm": 0.27513229846954346, "learning_rate": 0.000141826495573109, "loss": 11.6731, "step": 51991 }, { "epoch": 1.088336263920288, "grad_norm": 0.3519584834575653, "learning_rate": 0.00014182450403048687, "loss": 11.6704, "step": 51992 }, { "epoch": 1.0883571966842502, "grad_norm": 0.5379563570022583, "learning_rate": 0.00014182251246775887, "loss": 11.6725, "step": 51993 }, { "epoch": 1.0883781294482124, "grad_norm": 0.3404785990715027, "learning_rate": 0.00014182052088492592, "loss": 11.6752, "step": 51994 }, { "epoch": 1.0883990622121744, "grad_norm": 0.3736746311187744, "learning_rate": 0.00014181852928198905, "loss": 11.6743, "step": 51995 }, { "epoch": 1.0884199949761366, "grad_norm": 0.2726307809352875, "learning_rate": 0.00014181653765894914, "loss": 11.6565, "step": 51996 }, { "epoch": 1.0884409277400988, "grad_norm": 0.23693443834781647, "learning_rate": 0.0001418145460158072, "loss": 11.6635, "step": 51997 }, { "epoch": 1.088461860504061, "grad_norm": 0.5433222055435181, "learning_rate": 0.00014181255435256418, "loss": 11.6694, "step": 51998 }, { "epoch": 1.0884827932680232, "grad_norm": 0.3702393174171448, "learning_rate": 0.000141810562669221, "loss": 11.6745, "step": 51999 }, { "epoch": 1.0885037260319852, "grad_norm": 0.276035875082016, "learning_rate": 0.00014180857096577864, "loss": 11.667, "step": 52000 }, { "epoch": 1.0885037260319852, "eval_loss": 11.66831111907959, "eval_runtime": 34.3734, "eval_samples_per_second": 27.958, "eval_steps_per_second": 7.011, "step": 52000 }, { "epoch": 1.0885246587959474, "grad_norm": 0.358717679977417, "learning_rate": 0.0001418065792422381, "loss": 11.6861, "step": 52001 }, { "epoch": 1.0885455915599096, "grad_norm": 0.27068692445755005, "learning_rate": 0.00014180458749860026, "loss": 11.6603, "step": 52002 }, { "epoch": 1.0885665243238718, "grad_norm": 0.27997392416000366, "learning_rate": 0.00014180259573486615, "loss": 11.6649, "step": 52003 }, { "epoch": 1.0885874570878338, "grad_norm": 0.26468512415885925, "learning_rate": 0.00014180060395103667, "loss": 11.6724, "step": 52004 }, { "epoch": 1.088608389851796, "grad_norm": 0.28376054763793945, "learning_rate": 0.0001417986121471128, "loss": 11.6806, "step": 52005 }, { "epoch": 1.0886293226157582, "grad_norm": 0.31298211216926575, "learning_rate": 0.00014179662032309557, "loss": 11.6572, "step": 52006 }, { "epoch": 1.0886502553797204, "grad_norm": 0.32053452730178833, "learning_rate": 0.00014179462847898578, "loss": 11.6712, "step": 52007 }, { "epoch": 1.0886711881436826, "grad_norm": 0.40089404582977295, "learning_rate": 0.0001417926366147845, "loss": 11.6779, "step": 52008 }, { "epoch": 1.0886921209076446, "grad_norm": 0.2549765110015869, "learning_rate": 0.00014179064473049266, "loss": 11.6621, "step": 52009 }, { "epoch": 1.0887130536716068, "grad_norm": 0.37051069736480713, "learning_rate": 0.00014178865282611124, "loss": 11.6643, "step": 52010 }, { "epoch": 1.088733986435569, "grad_norm": 0.27590787410736084, "learning_rate": 0.00014178666090164117, "loss": 11.6413, "step": 52011 }, { "epoch": 1.0887549191995312, "grad_norm": 0.3557482063770294, "learning_rate": 0.00014178466895708338, "loss": 11.6724, "step": 52012 }, { "epoch": 1.0887758519634934, "grad_norm": 0.4656694829463959, "learning_rate": 0.0001417826769924389, "loss": 11.6738, "step": 52013 }, { "epoch": 1.0887967847274553, "grad_norm": 0.2809898555278778, "learning_rate": 0.0001417806850077086, "loss": 11.6605, "step": 52014 }, { "epoch": 1.0888177174914175, "grad_norm": 0.2731553912162781, "learning_rate": 0.00014177869300289358, "loss": 11.6731, "step": 52015 }, { "epoch": 1.0888386502553797, "grad_norm": 0.30440378189086914, "learning_rate": 0.00014177670097799462, "loss": 11.6596, "step": 52016 }, { "epoch": 1.088859583019342, "grad_norm": 0.3294304609298706, "learning_rate": 0.0001417747089330128, "loss": 11.6586, "step": 52017 }, { "epoch": 1.0888805157833041, "grad_norm": 0.3862236738204956, "learning_rate": 0.00014177271686794906, "loss": 11.6669, "step": 52018 }, { "epoch": 1.0889014485472661, "grad_norm": 0.35108113288879395, "learning_rate": 0.0001417707247828043, "loss": 11.6793, "step": 52019 }, { "epoch": 1.0889223813112283, "grad_norm": 0.3162662982940674, "learning_rate": 0.00014176873267757954, "loss": 11.6823, "step": 52020 }, { "epoch": 1.0889433140751905, "grad_norm": 0.29279083013534546, "learning_rate": 0.0001417667405522757, "loss": 11.6656, "step": 52021 }, { "epoch": 1.0889642468391527, "grad_norm": 0.26920515298843384, "learning_rate": 0.00014176474840689376, "loss": 11.6748, "step": 52022 }, { "epoch": 1.0889851796031147, "grad_norm": 0.3608744144439697, "learning_rate": 0.00014176275624143466, "loss": 11.6694, "step": 52023 }, { "epoch": 1.089006112367077, "grad_norm": 0.39799192547798157, "learning_rate": 0.00014176076405589933, "loss": 11.6589, "step": 52024 }, { "epoch": 1.089027045131039, "grad_norm": 0.3327901363372803, "learning_rate": 0.00014175877185028884, "loss": 11.6674, "step": 52025 }, { "epoch": 1.0890479778950013, "grad_norm": 0.31211304664611816, "learning_rate": 0.000141756779624604, "loss": 11.6658, "step": 52026 }, { "epoch": 1.0890689106589635, "grad_norm": 0.34105077385902405, "learning_rate": 0.0001417547873788459, "loss": 11.6657, "step": 52027 }, { "epoch": 1.0890898434229255, "grad_norm": 0.41037851572036743, "learning_rate": 0.0001417527951130154, "loss": 11.6811, "step": 52028 }, { "epoch": 1.0891107761868877, "grad_norm": 0.3478597104549408, "learning_rate": 0.0001417508028271135, "loss": 11.6589, "step": 52029 }, { "epoch": 1.0891317089508499, "grad_norm": 0.276889443397522, "learning_rate": 0.00014174881052114114, "loss": 11.6854, "step": 52030 }, { "epoch": 1.089152641714812, "grad_norm": 0.3180646598339081, "learning_rate": 0.00014174681819509929, "loss": 11.6679, "step": 52031 }, { "epoch": 1.0891735744787743, "grad_norm": 0.28997743129730225, "learning_rate": 0.00014174482584898896, "loss": 11.6604, "step": 52032 }, { "epoch": 1.0891945072427363, "grad_norm": 0.3423316776752472, "learning_rate": 0.00014174283348281102, "loss": 11.6549, "step": 52033 }, { "epoch": 1.0892154400066985, "grad_norm": 0.49169230461120605, "learning_rate": 0.00014174084109656648, "loss": 11.6697, "step": 52034 }, { "epoch": 1.0892363727706607, "grad_norm": 0.34975844621658325, "learning_rate": 0.00014173884869025623, "loss": 11.6745, "step": 52035 }, { "epoch": 1.0892573055346229, "grad_norm": 0.2600134313106537, "learning_rate": 0.00014173685626388133, "loss": 11.6634, "step": 52036 }, { "epoch": 1.089278238298585, "grad_norm": 0.3176877796649933, "learning_rate": 0.00014173486381744265, "loss": 11.6693, "step": 52037 }, { "epoch": 1.089299171062547, "grad_norm": 0.4975571036338806, "learning_rate": 0.0001417328713509412, "loss": 11.6508, "step": 52038 }, { "epoch": 1.0893201038265092, "grad_norm": 0.3006420135498047, "learning_rate": 0.00014173087886437793, "loss": 11.6719, "step": 52039 }, { "epoch": 1.0893410365904714, "grad_norm": 0.297469824552536, "learning_rate": 0.00014172888635775378, "loss": 11.6663, "step": 52040 }, { "epoch": 1.0893619693544336, "grad_norm": 0.30254828929901123, "learning_rate": 0.00014172689383106974, "loss": 11.6651, "step": 52041 }, { "epoch": 1.0893829021183956, "grad_norm": 0.4152401089668274, "learning_rate": 0.00014172490128432674, "loss": 11.6695, "step": 52042 }, { "epoch": 1.0894038348823578, "grad_norm": 0.3140917122364044, "learning_rate": 0.00014172290871752573, "loss": 11.6755, "step": 52043 }, { "epoch": 1.08942476764632, "grad_norm": 0.4075198173522949, "learning_rate": 0.00014172091613066766, "loss": 11.6841, "step": 52044 }, { "epoch": 1.0894457004102822, "grad_norm": 0.32243964076042175, "learning_rate": 0.00014171892352375355, "loss": 11.67, "step": 52045 }, { "epoch": 1.0894666331742444, "grad_norm": 0.2742527425289154, "learning_rate": 0.0001417169308967843, "loss": 11.6699, "step": 52046 }, { "epoch": 1.0894875659382064, "grad_norm": 0.3463634252548218, "learning_rate": 0.0001417149382497609, "loss": 11.6659, "step": 52047 }, { "epoch": 1.0895084987021686, "grad_norm": 0.2427288144826889, "learning_rate": 0.00014171294558268428, "loss": 11.6827, "step": 52048 }, { "epoch": 1.0895294314661308, "grad_norm": 0.32601699233055115, "learning_rate": 0.0001417109528955554, "loss": 11.6628, "step": 52049 }, { "epoch": 1.089550364230093, "grad_norm": 0.30000680685043335, "learning_rate": 0.00014170896018837525, "loss": 11.6633, "step": 52050 }, { "epoch": 1.0895712969940552, "grad_norm": 0.36442917585372925, "learning_rate": 0.00014170696746114477, "loss": 11.66, "step": 52051 }, { "epoch": 1.0895922297580172, "grad_norm": 0.31549155712127686, "learning_rate": 0.0001417049747138649, "loss": 11.6566, "step": 52052 }, { "epoch": 1.0896131625219794, "grad_norm": 0.40038642287254333, "learning_rate": 0.00014170298194653662, "loss": 11.6736, "step": 52053 }, { "epoch": 1.0896340952859416, "grad_norm": 0.317544549703598, "learning_rate": 0.00014170098915916086, "loss": 11.668, "step": 52054 }, { "epoch": 1.0896550280499038, "grad_norm": 0.2926163673400879, "learning_rate": 0.00014169899635173863, "loss": 11.6841, "step": 52055 }, { "epoch": 1.089675960813866, "grad_norm": 0.4700528383255005, "learning_rate": 0.00014169700352427084, "loss": 11.6858, "step": 52056 }, { "epoch": 1.089696893577828, "grad_norm": 0.3124120831489563, "learning_rate": 0.00014169501067675847, "loss": 11.6687, "step": 52057 }, { "epoch": 1.0897178263417902, "grad_norm": 0.36349010467529297, "learning_rate": 0.00014169301780920246, "loss": 11.6628, "step": 52058 }, { "epoch": 1.0897387591057524, "grad_norm": 0.2952549159526825, "learning_rate": 0.00014169102492160378, "loss": 11.6648, "step": 52059 }, { "epoch": 1.0897596918697146, "grad_norm": 0.3627564013004303, "learning_rate": 0.0001416890320139634, "loss": 11.6748, "step": 52060 }, { "epoch": 1.0897806246336765, "grad_norm": 0.3345988988876343, "learning_rate": 0.00014168703908628227, "loss": 11.6665, "step": 52061 }, { "epoch": 1.0898015573976387, "grad_norm": 0.3425358831882477, "learning_rate": 0.00014168504613856135, "loss": 11.6769, "step": 52062 }, { "epoch": 1.089822490161601, "grad_norm": 0.40005120635032654, "learning_rate": 0.0001416830531708016, "loss": 11.6772, "step": 52063 }, { "epoch": 1.0898434229255631, "grad_norm": 0.33098503947257996, "learning_rate": 0.00014168106018300395, "loss": 11.6653, "step": 52064 }, { "epoch": 1.0898643556895253, "grad_norm": 0.26810359954833984, "learning_rate": 0.00014167906717516937, "loss": 11.6652, "step": 52065 }, { "epoch": 1.0898852884534873, "grad_norm": 0.2751122713088989, "learning_rate": 0.00014167707414729886, "loss": 11.6814, "step": 52066 }, { "epoch": 1.0899062212174495, "grad_norm": 0.36906352639198303, "learning_rate": 0.0001416750810993933, "loss": 11.6674, "step": 52067 }, { "epoch": 1.0899271539814117, "grad_norm": 0.30276644229888916, "learning_rate": 0.00014167308803145375, "loss": 11.6829, "step": 52068 }, { "epoch": 1.089948086745374, "grad_norm": 0.3475067913532257, "learning_rate": 0.00014167109494348106, "loss": 11.659, "step": 52069 }, { "epoch": 1.0899690195093361, "grad_norm": 0.3060820698738098, "learning_rate": 0.00014166910183547628, "loss": 11.6649, "step": 52070 }, { "epoch": 1.089989952273298, "grad_norm": 0.28308287262916565, "learning_rate": 0.0001416671087074403, "loss": 11.6472, "step": 52071 }, { "epoch": 1.0900108850372603, "grad_norm": 0.31599295139312744, "learning_rate": 0.00014166511555937413, "loss": 11.6591, "step": 52072 }, { "epoch": 1.0900318178012225, "grad_norm": 0.3930457532405853, "learning_rate": 0.0001416631223912787, "loss": 11.6623, "step": 52073 }, { "epoch": 1.0900527505651847, "grad_norm": 0.26682335138320923, "learning_rate": 0.00014166112920315494, "loss": 11.662, "step": 52074 }, { "epoch": 1.090073683329147, "grad_norm": 0.27275946736335754, "learning_rate": 0.00014165913599500387, "loss": 11.6804, "step": 52075 }, { "epoch": 1.0900946160931089, "grad_norm": 0.3431377410888672, "learning_rate": 0.00014165714276682639, "loss": 11.6711, "step": 52076 }, { "epoch": 1.090115548857071, "grad_norm": 0.41930466890335083, "learning_rate": 0.0001416551495186235, "loss": 11.6648, "step": 52077 }, { "epoch": 1.0901364816210333, "grad_norm": 0.3463379442691803, "learning_rate": 0.00014165315625039616, "loss": 11.6725, "step": 52078 }, { "epoch": 1.0901574143849955, "grad_norm": 0.33870643377304077, "learning_rate": 0.00014165116296214531, "loss": 11.6688, "step": 52079 }, { "epoch": 1.0901783471489574, "grad_norm": 0.3901359438896179, "learning_rate": 0.00014164916965387188, "loss": 11.6811, "step": 52080 }, { "epoch": 1.0901992799129196, "grad_norm": 0.3201506733894348, "learning_rate": 0.00014164717632557692, "loss": 11.6803, "step": 52081 }, { "epoch": 1.0902202126768819, "grad_norm": 0.3525221347808838, "learning_rate": 0.00014164518297726128, "loss": 11.6679, "step": 52082 }, { "epoch": 1.090241145440844, "grad_norm": 0.3405453562736511, "learning_rate": 0.00014164318960892597, "loss": 11.6794, "step": 52083 }, { "epoch": 1.0902620782048063, "grad_norm": 0.3133101165294647, "learning_rate": 0.00014164119622057196, "loss": 11.6675, "step": 52084 }, { "epoch": 1.0902830109687682, "grad_norm": 0.3033009171485901, "learning_rate": 0.00014163920281220018, "loss": 11.6829, "step": 52085 }, { "epoch": 1.0903039437327304, "grad_norm": 0.3169894814491272, "learning_rate": 0.00014163720938381163, "loss": 11.6615, "step": 52086 }, { "epoch": 1.0903248764966926, "grad_norm": 0.34590211510658264, "learning_rate": 0.0001416352159354072, "loss": 11.662, "step": 52087 }, { "epoch": 1.0903458092606548, "grad_norm": 0.3299982249736786, "learning_rate": 0.0001416332224669879, "loss": 11.6641, "step": 52088 }, { "epoch": 1.090366742024617, "grad_norm": 0.3341004252433777, "learning_rate": 0.00014163122897855468, "loss": 11.6658, "step": 52089 }, { "epoch": 1.090387674788579, "grad_norm": 0.2813175618648529, "learning_rate": 0.0001416292354701085, "loss": 11.6522, "step": 52090 }, { "epoch": 1.0904086075525412, "grad_norm": 0.3698742091655731, "learning_rate": 0.00014162724194165033, "loss": 11.6831, "step": 52091 }, { "epoch": 1.0904295403165034, "grad_norm": 0.3800172805786133, "learning_rate": 0.00014162524839318106, "loss": 11.6827, "step": 52092 }, { "epoch": 1.0904504730804656, "grad_norm": 0.2953844666481018, "learning_rate": 0.00014162325482470171, "loss": 11.6834, "step": 52093 }, { "epoch": 1.0904714058444278, "grad_norm": 0.27681097388267517, "learning_rate": 0.00014162126123621325, "loss": 11.6752, "step": 52094 }, { "epoch": 1.0904923386083898, "grad_norm": 0.29674702882766724, "learning_rate": 0.0001416192676277166, "loss": 11.6714, "step": 52095 }, { "epoch": 1.090513271372352, "grad_norm": 0.41850733757019043, "learning_rate": 0.00014161727399921275, "loss": 11.6777, "step": 52096 }, { "epoch": 1.0905342041363142, "grad_norm": 0.29398012161254883, "learning_rate": 0.0001416152803507026, "loss": 11.6598, "step": 52097 }, { "epoch": 1.0905551369002764, "grad_norm": 0.3266378939151764, "learning_rate": 0.00014161328668218717, "loss": 11.6725, "step": 52098 }, { "epoch": 1.0905760696642384, "grad_norm": 0.28006860613822937, "learning_rate": 0.00014161129299366745, "loss": 11.6688, "step": 52099 }, { "epoch": 1.0905970024282006, "grad_norm": 0.3827207684516907, "learning_rate": 0.0001416092992851443, "loss": 11.6961, "step": 52100 }, { "epoch": 1.0906179351921628, "grad_norm": 0.27375152707099915, "learning_rate": 0.00014160730555661875, "loss": 11.6677, "step": 52101 }, { "epoch": 1.090638867956125, "grad_norm": 0.31882619857788086, "learning_rate": 0.0001416053118080917, "loss": 11.6813, "step": 52102 }, { "epoch": 1.0906598007200872, "grad_norm": 0.2850281298160553, "learning_rate": 0.0001416033180395642, "loss": 11.661, "step": 52103 }, { "epoch": 1.0906807334840491, "grad_norm": 0.2596043050289154, "learning_rate": 0.0001416013242510371, "loss": 11.664, "step": 52104 }, { "epoch": 1.0907016662480113, "grad_norm": 0.37153711915016174, "learning_rate": 0.00014159933044251142, "loss": 11.6777, "step": 52105 }, { "epoch": 1.0907225990119735, "grad_norm": 0.30317020416259766, "learning_rate": 0.0001415973366139881, "loss": 11.666, "step": 52106 }, { "epoch": 1.0907435317759357, "grad_norm": 0.39231932163238525, "learning_rate": 0.00014159534276546814, "loss": 11.6542, "step": 52107 }, { "epoch": 1.090764464539898, "grad_norm": 0.3442192077636719, "learning_rate": 0.00014159334889695245, "loss": 11.6795, "step": 52108 }, { "epoch": 1.09078539730386, "grad_norm": 0.3983941078186035, "learning_rate": 0.000141591355008442, "loss": 11.6843, "step": 52109 }, { "epoch": 1.0908063300678221, "grad_norm": 0.33671438694000244, "learning_rate": 0.00014158936109993773, "loss": 11.6731, "step": 52110 }, { "epoch": 1.0908272628317843, "grad_norm": 0.31645023822784424, "learning_rate": 0.00014158736717144064, "loss": 11.6578, "step": 52111 }, { "epoch": 1.0908481955957465, "grad_norm": 0.3389087915420532, "learning_rate": 0.00014158537322295166, "loss": 11.6796, "step": 52112 }, { "epoch": 1.0908691283597087, "grad_norm": 0.3081187605857849, "learning_rate": 0.00014158337925447177, "loss": 11.6635, "step": 52113 }, { "epoch": 1.0908900611236707, "grad_norm": 0.31805288791656494, "learning_rate": 0.00014158138526600193, "loss": 11.6469, "step": 52114 }, { "epoch": 1.090910993887633, "grad_norm": 0.3132377564907074, "learning_rate": 0.00014157939125754304, "loss": 11.6905, "step": 52115 }, { "epoch": 1.090931926651595, "grad_norm": 0.3097098767757416, "learning_rate": 0.00014157739722909615, "loss": 11.6666, "step": 52116 }, { "epoch": 1.0909528594155573, "grad_norm": 0.261886864900589, "learning_rate": 0.00014157540318066215, "loss": 11.6695, "step": 52117 }, { "epoch": 1.0909737921795193, "grad_norm": 0.32941797375679016, "learning_rate": 0.00014157340911224203, "loss": 11.6607, "step": 52118 }, { "epoch": 1.0909947249434815, "grad_norm": 0.3061908483505249, "learning_rate": 0.00014157141502383673, "loss": 11.6637, "step": 52119 }, { "epoch": 1.0910156577074437, "grad_norm": 1.1644508838653564, "learning_rate": 0.00014156942091544722, "loss": 11.702, "step": 52120 }, { "epoch": 1.0910365904714059, "grad_norm": 0.30799394845962524, "learning_rate": 0.00014156742678707445, "loss": 11.6504, "step": 52121 }, { "epoch": 1.091057523235368, "grad_norm": 0.45299002528190613, "learning_rate": 0.0001415654326387194, "loss": 11.6805, "step": 52122 }, { "epoch": 1.09107845599933, "grad_norm": 0.31453201174736023, "learning_rate": 0.00014156343847038303, "loss": 11.6583, "step": 52123 }, { "epoch": 1.0910993887632923, "grad_norm": 0.2761707901954651, "learning_rate": 0.00014156144428206622, "loss": 11.6492, "step": 52124 }, { "epoch": 1.0911203215272545, "grad_norm": 0.32848456501960754, "learning_rate": 0.00014155945007377006, "loss": 11.6706, "step": 52125 }, { "epoch": 1.0911412542912167, "grad_norm": 0.3897354304790497, "learning_rate": 0.0001415574558454954, "loss": 11.6745, "step": 52126 }, { "epoch": 1.0911621870551789, "grad_norm": 0.3294509947299957, "learning_rate": 0.00014155546159724325, "loss": 11.6757, "step": 52127 }, { "epoch": 1.0911831198191408, "grad_norm": 0.3921404480934143, "learning_rate": 0.00014155346732901456, "loss": 11.6645, "step": 52128 }, { "epoch": 1.091204052583103, "grad_norm": 0.2925902009010315, "learning_rate": 0.00014155147304081026, "loss": 11.6669, "step": 52129 }, { "epoch": 1.0912249853470652, "grad_norm": 0.4212919771671295, "learning_rate": 0.00014154947873263137, "loss": 11.6745, "step": 52130 }, { "epoch": 1.0912459181110274, "grad_norm": 0.29402920603752136, "learning_rate": 0.0001415474844044788, "loss": 11.6755, "step": 52131 }, { "epoch": 1.0912668508749896, "grad_norm": 0.28681236505508423, "learning_rate": 0.0001415454900563535, "loss": 11.6835, "step": 52132 }, { "epoch": 1.0912877836389516, "grad_norm": 0.31376200914382935, "learning_rate": 0.0001415434956882565, "loss": 11.6731, "step": 52133 }, { "epoch": 1.0913087164029138, "grad_norm": 0.32722318172454834, "learning_rate": 0.00014154150130018866, "loss": 11.6657, "step": 52134 }, { "epoch": 1.091329649166876, "grad_norm": 0.36611849069595337, "learning_rate": 0.00014153950689215102, "loss": 11.6796, "step": 52135 }, { "epoch": 1.0913505819308382, "grad_norm": 0.32264405488967896, "learning_rate": 0.00014153751246414448, "loss": 11.6855, "step": 52136 }, { "epoch": 1.0913715146948002, "grad_norm": 0.2850409150123596, "learning_rate": 0.00014153551801617003, "loss": 11.6727, "step": 52137 }, { "epoch": 1.0913924474587624, "grad_norm": 0.29898184537887573, "learning_rate": 0.00014153352354822864, "loss": 11.6747, "step": 52138 }, { "epoch": 1.0914133802227246, "grad_norm": 0.34725067019462585, "learning_rate": 0.00014153152906032124, "loss": 11.6682, "step": 52139 }, { "epoch": 1.0914343129866868, "grad_norm": 0.3355867266654968, "learning_rate": 0.00014152953455244882, "loss": 11.6579, "step": 52140 }, { "epoch": 1.091455245750649, "grad_norm": 0.31456106901168823, "learning_rate": 0.00014152754002461228, "loss": 11.6697, "step": 52141 }, { "epoch": 1.091476178514611, "grad_norm": 0.3190224766731262, "learning_rate": 0.00014152554547681266, "loss": 11.6618, "step": 52142 }, { "epoch": 1.0914971112785732, "grad_norm": 0.3815092444419861, "learning_rate": 0.00014152355090905086, "loss": 11.6759, "step": 52143 }, { "epoch": 1.0915180440425354, "grad_norm": 0.33652687072753906, "learning_rate": 0.00014152155632132787, "loss": 11.6755, "step": 52144 }, { "epoch": 1.0915389768064976, "grad_norm": 0.3254269063472748, "learning_rate": 0.00014151956171364462, "loss": 11.6718, "step": 52145 }, { "epoch": 1.0915599095704598, "grad_norm": 0.3602723777294159, "learning_rate": 0.00014151756708600207, "loss": 11.6688, "step": 52146 }, { "epoch": 1.0915808423344218, "grad_norm": 0.35348761081695557, "learning_rate": 0.00014151557243840122, "loss": 11.6651, "step": 52147 }, { "epoch": 1.091601775098384, "grad_norm": 0.36080673336982727, "learning_rate": 0.000141513577770843, "loss": 11.674, "step": 52148 }, { "epoch": 1.0916227078623462, "grad_norm": 0.3109133243560791, "learning_rate": 0.00014151158308332837, "loss": 11.6705, "step": 52149 }, { "epoch": 1.0916436406263084, "grad_norm": 0.2493726909160614, "learning_rate": 0.0001415095883758583, "loss": 11.6608, "step": 52150 }, { "epoch": 1.0916645733902706, "grad_norm": 0.41336530447006226, "learning_rate": 0.00014150759364843371, "loss": 11.6757, "step": 52151 }, { "epoch": 1.0916855061542325, "grad_norm": 0.4069133996963501, "learning_rate": 0.00014150559890105564, "loss": 11.6662, "step": 52152 }, { "epoch": 1.0917064389181947, "grad_norm": 0.2976357042789459, "learning_rate": 0.00014150360413372494, "loss": 11.6753, "step": 52153 }, { "epoch": 1.091727371682157, "grad_norm": 0.35811617970466614, "learning_rate": 0.00014150160934644262, "loss": 11.6595, "step": 52154 }, { "epoch": 1.0917483044461191, "grad_norm": 0.4196389317512512, "learning_rate": 0.0001414996145392097, "loss": 11.6749, "step": 52155 }, { "epoch": 1.0917692372100811, "grad_norm": 0.2814870774745941, "learning_rate": 0.00014149761971202703, "loss": 11.6457, "step": 52156 }, { "epoch": 1.0917901699740433, "grad_norm": 0.35237497091293335, "learning_rate": 0.00014149562486489567, "loss": 11.6799, "step": 52157 }, { "epoch": 1.0918111027380055, "grad_norm": 0.31766289472579956, "learning_rate": 0.0001414936299978165, "loss": 11.6546, "step": 52158 }, { "epoch": 1.0918320355019677, "grad_norm": 0.30148226022720337, "learning_rate": 0.00014149163511079053, "loss": 11.6777, "step": 52159 }, { "epoch": 1.09185296826593, "grad_norm": 0.4749862849712372, "learning_rate": 0.0001414896402038187, "loss": 11.6655, "step": 52160 }, { "epoch": 1.091873901029892, "grad_norm": 0.3991972804069519, "learning_rate": 0.00014148764527690193, "loss": 11.6767, "step": 52161 }, { "epoch": 1.091894833793854, "grad_norm": 0.3316822350025177, "learning_rate": 0.0001414856503300413, "loss": 11.6558, "step": 52162 }, { "epoch": 1.0919157665578163, "grad_norm": 0.38148564100265503, "learning_rate": 0.0001414836553632376, "loss": 11.6471, "step": 52163 }, { "epoch": 1.0919366993217785, "grad_norm": 0.34056660532951355, "learning_rate": 0.0001414816603764919, "loss": 11.6822, "step": 52164 }, { "epoch": 1.0919576320857407, "grad_norm": 0.3634072542190552, "learning_rate": 0.00014147966536980514, "loss": 11.66, "step": 52165 }, { "epoch": 1.0919785648497027, "grad_norm": 0.34508854150772095, "learning_rate": 0.0001414776703431783, "loss": 11.6594, "step": 52166 }, { "epoch": 1.0919994976136649, "grad_norm": 0.29478690028190613, "learning_rate": 0.00014147567529661229, "loss": 11.6799, "step": 52167 }, { "epoch": 1.092020430377627, "grad_norm": 0.30764317512512207, "learning_rate": 0.0001414736802301081, "loss": 11.6664, "step": 52168 }, { "epoch": 1.0920413631415893, "grad_norm": 0.35884562134742737, "learning_rate": 0.00014147168514366666, "loss": 11.6625, "step": 52169 }, { "epoch": 1.0920622959055515, "grad_norm": 0.3045653700828552, "learning_rate": 0.00014146969003728894, "loss": 11.6679, "step": 52170 }, { "epoch": 1.0920832286695135, "grad_norm": 0.2635936439037323, "learning_rate": 0.00014146769491097592, "loss": 11.6637, "step": 52171 }, { "epoch": 1.0921041614334757, "grad_norm": 0.43215644359588623, "learning_rate": 0.0001414656997647286, "loss": 11.6605, "step": 52172 }, { "epoch": 1.0921250941974379, "grad_norm": 0.2944328188896179, "learning_rate": 0.0001414637045985478, "loss": 11.6528, "step": 52173 }, { "epoch": 1.0921460269614, "grad_norm": 0.2689601480960846, "learning_rate": 0.00014146170941243465, "loss": 11.6613, "step": 52174 }, { "epoch": 1.092166959725362, "grad_norm": 0.3611680269241333, "learning_rate": 0.00014145971420638997, "loss": 11.6741, "step": 52175 }, { "epoch": 1.0921878924893242, "grad_norm": 0.3269331455230713, "learning_rate": 0.00014145771898041478, "loss": 11.6808, "step": 52176 }, { "epoch": 1.0922088252532864, "grad_norm": 0.37308230996131897, "learning_rate": 0.00014145572373451006, "loss": 11.6685, "step": 52177 }, { "epoch": 1.0922297580172486, "grad_norm": 0.3370104432106018, "learning_rate": 0.00014145372846867673, "loss": 11.6973, "step": 52178 }, { "epoch": 1.0922506907812108, "grad_norm": 0.3807287812232971, "learning_rate": 0.00014145173318291576, "loss": 11.6736, "step": 52179 }, { "epoch": 1.0922716235451728, "grad_norm": 0.3137054145336151, "learning_rate": 0.0001414497378772281, "loss": 11.6811, "step": 52180 }, { "epoch": 1.092292556309135, "grad_norm": 0.3187716007232666, "learning_rate": 0.00014144774255161474, "loss": 11.6594, "step": 52181 }, { "epoch": 1.0923134890730972, "grad_norm": 0.3366082012653351, "learning_rate": 0.0001414457472060766, "loss": 11.6546, "step": 52182 }, { "epoch": 1.0923344218370594, "grad_norm": 0.3014705777168274, "learning_rate": 0.00014144375184061466, "loss": 11.6481, "step": 52183 }, { "epoch": 1.0923553546010216, "grad_norm": 0.33248963952064514, "learning_rate": 0.0001414417564552299, "loss": 11.6595, "step": 52184 }, { "epoch": 1.0923762873649836, "grad_norm": 0.34518393874168396, "learning_rate": 0.00014143976104992323, "loss": 11.6559, "step": 52185 }, { "epoch": 1.0923972201289458, "grad_norm": 0.33514514565467834, "learning_rate": 0.00014143776562469565, "loss": 11.6724, "step": 52186 }, { "epoch": 1.092418152892908, "grad_norm": 0.3735755383968353, "learning_rate": 0.00014143577017954813, "loss": 11.6555, "step": 52187 }, { "epoch": 1.0924390856568702, "grad_norm": 0.31477123498916626, "learning_rate": 0.00014143377471448158, "loss": 11.6655, "step": 52188 }, { "epoch": 1.0924600184208324, "grad_norm": 0.304503470659256, "learning_rate": 0.000141431779229497, "loss": 11.6674, "step": 52189 }, { "epoch": 1.0924809511847944, "grad_norm": 0.32953721284866333, "learning_rate": 0.0001414297837245953, "loss": 11.6743, "step": 52190 }, { "epoch": 1.0925018839487566, "grad_norm": 0.3027762174606323, "learning_rate": 0.00014142778819977749, "loss": 11.665, "step": 52191 }, { "epoch": 1.0925228167127188, "grad_norm": 0.36854538321495056, "learning_rate": 0.00014142579265504453, "loss": 11.6863, "step": 52192 }, { "epoch": 1.092543749476681, "grad_norm": 0.37981075048446655, "learning_rate": 0.00014142379709039733, "loss": 11.6747, "step": 52193 }, { "epoch": 1.092564682240643, "grad_norm": 0.271632581949234, "learning_rate": 0.0001414218015058369, "loss": 11.6723, "step": 52194 }, { "epoch": 1.0925856150046052, "grad_norm": 0.26297709345817566, "learning_rate": 0.0001414198059013642, "loss": 11.6641, "step": 52195 }, { "epoch": 1.0926065477685674, "grad_norm": 0.3188309669494629, "learning_rate": 0.00014141781027698013, "loss": 11.6633, "step": 52196 }, { "epoch": 1.0926274805325296, "grad_norm": 0.3201879858970642, "learning_rate": 0.0001414158146326857, "loss": 11.6672, "step": 52197 }, { "epoch": 1.0926484132964918, "grad_norm": 0.36908936500549316, "learning_rate": 0.00014141381896848185, "loss": 11.6711, "step": 52198 }, { "epoch": 1.0926693460604537, "grad_norm": 0.2547687590122223, "learning_rate": 0.00014141182328436958, "loss": 11.6561, "step": 52199 }, { "epoch": 1.092690278824416, "grad_norm": 0.3299175202846527, "learning_rate": 0.00014140982758034976, "loss": 11.6733, "step": 52200 }, { "epoch": 1.0927112115883781, "grad_norm": 0.3040294349193573, "learning_rate": 0.00014140783185642348, "loss": 11.6684, "step": 52201 }, { "epoch": 1.0927321443523403, "grad_norm": 0.37371891736984253, "learning_rate": 0.00014140583611259156, "loss": 11.6842, "step": 52202 }, { "epoch": 1.0927530771163025, "grad_norm": 0.31190502643585205, "learning_rate": 0.00014140384034885505, "loss": 11.6739, "step": 52203 }, { "epoch": 1.0927740098802645, "grad_norm": 0.3375908434391022, "learning_rate": 0.0001414018445652149, "loss": 11.6736, "step": 52204 }, { "epoch": 1.0927949426442267, "grad_norm": 0.4015475809574127, "learning_rate": 0.00014139984876167204, "loss": 11.6724, "step": 52205 }, { "epoch": 1.092815875408189, "grad_norm": 0.33160898089408875, "learning_rate": 0.00014139785293822745, "loss": 11.6821, "step": 52206 }, { "epoch": 1.0928368081721511, "grad_norm": 0.29336363077163696, "learning_rate": 0.00014139585709488207, "loss": 11.6739, "step": 52207 }, { "epoch": 1.0928577409361133, "grad_norm": 0.34150153398513794, "learning_rate": 0.00014139386123163686, "loss": 11.6666, "step": 52208 }, { "epoch": 1.0928786737000753, "grad_norm": 0.2965497076511383, "learning_rate": 0.00014139186534849283, "loss": 11.6617, "step": 52209 }, { "epoch": 1.0928996064640375, "grad_norm": 0.34496572613716125, "learning_rate": 0.00014138986944545087, "loss": 11.6578, "step": 52210 }, { "epoch": 1.0929205392279997, "grad_norm": 0.34481605887413025, "learning_rate": 0.00014138787352251198, "loss": 11.6792, "step": 52211 }, { "epoch": 1.092941471991962, "grad_norm": 0.5102416276931763, "learning_rate": 0.00014138587757967712, "loss": 11.6709, "step": 52212 }, { "epoch": 1.0929624047559239, "grad_norm": 0.3226785957813263, "learning_rate": 0.0001413838816169472, "loss": 11.6615, "step": 52213 }, { "epoch": 1.092983337519886, "grad_norm": 0.31403452157974243, "learning_rate": 0.00014138188563432325, "loss": 11.6518, "step": 52214 }, { "epoch": 1.0930042702838483, "grad_norm": 0.267190158367157, "learning_rate": 0.0001413798896318062, "loss": 11.6758, "step": 52215 }, { "epoch": 1.0930252030478105, "grad_norm": 0.3462558388710022, "learning_rate": 0.00014137789360939702, "loss": 11.6638, "step": 52216 }, { "epoch": 1.0930461358117727, "grad_norm": 0.3025858998298645, "learning_rate": 0.00014137589756709664, "loss": 11.6769, "step": 52217 }, { "epoch": 1.0930670685757347, "grad_norm": 0.4016719460487366, "learning_rate": 0.00014137390150490605, "loss": 11.6794, "step": 52218 }, { "epoch": 1.0930880013396969, "grad_norm": 0.32525238394737244, "learning_rate": 0.00014137190542282618, "loss": 11.6586, "step": 52219 }, { "epoch": 1.093108934103659, "grad_norm": 0.36464589834213257, "learning_rate": 0.00014136990932085797, "loss": 11.6579, "step": 52220 }, { "epoch": 1.0931298668676213, "grad_norm": 0.2798767685890198, "learning_rate": 0.0001413679131990025, "loss": 11.6751, "step": 52221 }, { "epoch": 1.0931507996315832, "grad_norm": 0.3324265480041504, "learning_rate": 0.0001413659170572606, "loss": 11.6748, "step": 52222 }, { "epoch": 1.0931717323955454, "grad_norm": 0.4193044900894165, "learning_rate": 0.00014136392089563325, "loss": 11.6679, "step": 52223 }, { "epoch": 1.0931926651595076, "grad_norm": 0.3199496865272522, "learning_rate": 0.00014136192471412147, "loss": 11.6648, "step": 52224 }, { "epoch": 1.0932135979234698, "grad_norm": 0.373436838388443, "learning_rate": 0.00014135992851272615, "loss": 11.6776, "step": 52225 }, { "epoch": 1.093234530687432, "grad_norm": 0.31280627846717834, "learning_rate": 0.00014135793229144833, "loss": 11.6444, "step": 52226 }, { "epoch": 1.0932554634513942, "grad_norm": 0.29223576188087463, "learning_rate": 0.0001413559360502889, "loss": 11.6593, "step": 52227 }, { "epoch": 1.0932763962153562, "grad_norm": 0.35741153359413147, "learning_rate": 0.00014135393978924885, "loss": 11.6631, "step": 52228 }, { "epoch": 1.0932973289793184, "grad_norm": 0.2222328633069992, "learning_rate": 0.00014135194350832912, "loss": 11.666, "step": 52229 }, { "epoch": 1.0933182617432806, "grad_norm": 0.3361150026321411, "learning_rate": 0.00014134994720753068, "loss": 11.6734, "step": 52230 }, { "epoch": 1.0933391945072428, "grad_norm": 0.30914562940597534, "learning_rate": 0.0001413479508868545, "loss": 11.6713, "step": 52231 }, { "epoch": 1.0933601272712048, "grad_norm": 0.325182169675827, "learning_rate": 0.00014134595454630154, "loss": 11.6493, "step": 52232 }, { "epoch": 1.093381060035167, "grad_norm": 0.3210635185241699, "learning_rate": 0.00014134395818587273, "loss": 11.6468, "step": 52233 }, { "epoch": 1.0934019927991292, "grad_norm": 0.3386164605617523, "learning_rate": 0.00014134196180556907, "loss": 11.6806, "step": 52234 }, { "epoch": 1.0934229255630914, "grad_norm": 0.34808361530303955, "learning_rate": 0.00014133996540539146, "loss": 11.6769, "step": 52235 }, { "epoch": 1.0934438583270536, "grad_norm": 0.2700306177139282, "learning_rate": 0.00014133796898534095, "loss": 11.6715, "step": 52236 }, { "epoch": 1.0934647910910156, "grad_norm": 0.33966389298439026, "learning_rate": 0.00014133597254541844, "loss": 11.6583, "step": 52237 }, { "epoch": 1.0934857238549778, "grad_norm": 0.4517526626586914, "learning_rate": 0.0001413339760856249, "loss": 11.6654, "step": 52238 }, { "epoch": 1.09350665661894, "grad_norm": 0.2802134156227112, "learning_rate": 0.00014133197960596128, "loss": 11.6703, "step": 52239 }, { "epoch": 1.0935275893829022, "grad_norm": 0.31947240233421326, "learning_rate": 0.00014132998310642856, "loss": 11.6773, "step": 52240 }, { "epoch": 1.0935485221468642, "grad_norm": 0.3070647120475769, "learning_rate": 0.00014132798658702766, "loss": 11.6776, "step": 52241 }, { "epoch": 1.0935694549108264, "grad_norm": 0.26595327258110046, "learning_rate": 0.0001413259900477596, "loss": 11.659, "step": 52242 }, { "epoch": 1.0935903876747886, "grad_norm": 0.3111853301525116, "learning_rate": 0.00014132399348862527, "loss": 11.6685, "step": 52243 }, { "epoch": 1.0936113204387508, "grad_norm": 0.30343097448349, "learning_rate": 0.0001413219969096257, "loss": 11.6736, "step": 52244 }, { "epoch": 1.093632253202713, "grad_norm": 0.3083803653717041, "learning_rate": 0.00014132000031076184, "loss": 11.662, "step": 52245 }, { "epoch": 1.0936531859666752, "grad_norm": 0.41884753108024597, "learning_rate": 0.00014131800369203458, "loss": 11.6649, "step": 52246 }, { "epoch": 1.0936741187306371, "grad_norm": 0.2698664963245392, "learning_rate": 0.00014131600705344493, "loss": 11.6582, "step": 52247 }, { "epoch": 1.0936950514945993, "grad_norm": 0.30640295147895813, "learning_rate": 0.0001413140103949939, "loss": 11.6672, "step": 52248 }, { "epoch": 1.0937159842585615, "grad_norm": 0.31219783425331116, "learning_rate": 0.00014131201371668235, "loss": 11.69, "step": 52249 }, { "epoch": 1.0937369170225237, "grad_norm": 0.32183384895324707, "learning_rate": 0.00014131001701851132, "loss": 11.6764, "step": 52250 }, { "epoch": 1.0937578497864857, "grad_norm": 0.35372307896614075, "learning_rate": 0.00014130802030048168, "loss": 11.6705, "step": 52251 }, { "epoch": 1.093778782550448, "grad_norm": 0.2876197397708893, "learning_rate": 0.00014130602356259453, "loss": 11.6564, "step": 52252 }, { "epoch": 1.0937997153144101, "grad_norm": 0.3756128251552582, "learning_rate": 0.00014130402680485066, "loss": 11.6751, "step": 52253 }, { "epoch": 1.0938206480783723, "grad_norm": 0.3367408514022827, "learning_rate": 0.00014130203002725116, "loss": 11.672, "step": 52254 }, { "epoch": 1.0938415808423345, "grad_norm": 0.28118783235549927, "learning_rate": 0.00014130003322979697, "loss": 11.6998, "step": 52255 }, { "epoch": 1.0938625136062965, "grad_norm": 0.2970122992992401, "learning_rate": 0.00014129803641248898, "loss": 11.6708, "step": 52256 }, { "epoch": 1.0938834463702587, "grad_norm": 0.3216017782688141, "learning_rate": 0.00014129603957532821, "loss": 11.6765, "step": 52257 }, { "epoch": 1.093904379134221, "grad_norm": 0.3299272656440735, "learning_rate": 0.00014129404271831563, "loss": 11.6662, "step": 52258 }, { "epoch": 1.093925311898183, "grad_norm": 0.3138924539089203, "learning_rate": 0.00014129204584145216, "loss": 11.6568, "step": 52259 }, { "epoch": 1.093946244662145, "grad_norm": 0.32911941409111023, "learning_rate": 0.00014129004894473875, "loss": 11.6631, "step": 52260 }, { "epoch": 1.0939671774261073, "grad_norm": 0.258357435464859, "learning_rate": 0.00014128805202817644, "loss": 11.6705, "step": 52261 }, { "epoch": 1.0939881101900695, "grad_norm": 0.2795656621456146, "learning_rate": 0.0001412860550917661, "loss": 11.6641, "step": 52262 }, { "epoch": 1.0940090429540317, "grad_norm": 0.2914866805076599, "learning_rate": 0.00014128405813550873, "loss": 11.6652, "step": 52263 }, { "epoch": 1.0940299757179939, "grad_norm": 0.3486935794353485, "learning_rate": 0.0001412820611594053, "loss": 11.668, "step": 52264 }, { "epoch": 1.094050908481956, "grad_norm": 0.3973860740661621, "learning_rate": 0.00014128006416345675, "loss": 11.6673, "step": 52265 }, { "epoch": 1.094071841245918, "grad_norm": 0.2934013605117798, "learning_rate": 0.00014127806714766403, "loss": 11.6838, "step": 52266 }, { "epoch": 1.0940927740098803, "grad_norm": 0.3263631761074066, "learning_rate": 0.00014127607011202813, "loss": 11.6595, "step": 52267 }, { "epoch": 1.0941137067738425, "grad_norm": 0.32521283626556396, "learning_rate": 0.00014127407305654995, "loss": 11.685, "step": 52268 }, { "epoch": 1.0941346395378047, "grad_norm": 0.3033548593521118, "learning_rate": 0.00014127207598123057, "loss": 11.6783, "step": 52269 }, { "epoch": 1.0941555723017666, "grad_norm": 0.32710158824920654, "learning_rate": 0.00014127007888607083, "loss": 11.6642, "step": 52270 }, { "epoch": 1.0941765050657288, "grad_norm": 0.32589051127433777, "learning_rate": 0.00014126808177107177, "loss": 11.6755, "step": 52271 }, { "epoch": 1.094197437829691, "grad_norm": 0.27937471866607666, "learning_rate": 0.00014126608463623427, "loss": 11.6721, "step": 52272 }, { "epoch": 1.0942183705936532, "grad_norm": 0.3346746265888214, "learning_rate": 0.0001412640874815594, "loss": 11.6665, "step": 52273 }, { "epoch": 1.0942393033576154, "grad_norm": 0.30267712473869324, "learning_rate": 0.000141262090307048, "loss": 11.66, "step": 52274 }, { "epoch": 1.0942602361215774, "grad_norm": 0.28569260239601135, "learning_rate": 0.0001412600931127011, "loss": 11.6602, "step": 52275 }, { "epoch": 1.0942811688855396, "grad_norm": 0.3147619366645813, "learning_rate": 0.00014125809589851966, "loss": 11.67, "step": 52276 }, { "epoch": 1.0943021016495018, "grad_norm": 0.47357457876205444, "learning_rate": 0.00014125609866450463, "loss": 11.6634, "step": 52277 }, { "epoch": 1.094323034413464, "grad_norm": 0.459052711725235, "learning_rate": 0.00014125410141065694, "loss": 11.6765, "step": 52278 }, { "epoch": 1.094343967177426, "grad_norm": 0.28264138102531433, "learning_rate": 0.00014125210413697757, "loss": 11.6602, "step": 52279 }, { "epoch": 1.0943648999413882, "grad_norm": 0.3726513087749481, "learning_rate": 0.00014125010684346752, "loss": 11.662, "step": 52280 }, { "epoch": 1.0943858327053504, "grad_norm": 0.32217079401016235, "learning_rate": 0.00014124810953012774, "loss": 11.653, "step": 52281 }, { "epoch": 1.0944067654693126, "grad_norm": 0.2951590120792389, "learning_rate": 0.0001412461121969591, "loss": 11.6751, "step": 52282 }, { "epoch": 1.0944276982332748, "grad_norm": 0.3167911767959595, "learning_rate": 0.00014124411484396266, "loss": 11.6635, "step": 52283 }, { "epoch": 1.094448630997237, "grad_norm": 0.31272923946380615, "learning_rate": 0.00014124211747113937, "loss": 11.6675, "step": 52284 }, { "epoch": 1.094469563761199, "grad_norm": 0.41418299078941345, "learning_rate": 0.00014124012007849012, "loss": 11.6662, "step": 52285 }, { "epoch": 1.0944904965251612, "grad_norm": 0.31553757190704346, "learning_rate": 0.00014123812266601595, "loss": 11.6815, "step": 52286 }, { "epoch": 1.0945114292891234, "grad_norm": 0.4186500012874603, "learning_rate": 0.00014123612523371778, "loss": 11.6526, "step": 52287 }, { "epoch": 1.0945323620530856, "grad_norm": 0.2993130683898926, "learning_rate": 0.00014123412778159657, "loss": 11.6805, "step": 52288 }, { "epoch": 1.0945532948170476, "grad_norm": 0.3609069883823395, "learning_rate": 0.0001412321303096533, "loss": 11.6621, "step": 52289 }, { "epoch": 1.0945742275810098, "grad_norm": 0.34999221563339233, "learning_rate": 0.00014123013281788893, "loss": 11.6829, "step": 52290 }, { "epoch": 1.094595160344972, "grad_norm": 0.31085917353630066, "learning_rate": 0.00014122813530630438, "loss": 11.6595, "step": 52291 }, { "epoch": 1.0946160931089342, "grad_norm": 0.33693090081214905, "learning_rate": 0.00014122613777490067, "loss": 11.6532, "step": 52292 }, { "epoch": 1.0946370258728964, "grad_norm": 0.31330084800720215, "learning_rate": 0.00014122414022367869, "loss": 11.6651, "step": 52293 }, { "epoch": 1.0946579586368583, "grad_norm": 0.4245103597640991, "learning_rate": 0.0001412221426526395, "loss": 11.6539, "step": 52294 }, { "epoch": 1.0946788914008205, "grad_norm": 0.38183873891830444, "learning_rate": 0.00014122014506178393, "loss": 11.6609, "step": 52295 }, { "epoch": 1.0946998241647827, "grad_norm": 0.35876211524009705, "learning_rate": 0.00014121814745111303, "loss": 11.6622, "step": 52296 }, { "epoch": 1.094720756928745, "grad_norm": 0.2744423747062683, "learning_rate": 0.00014121614982062777, "loss": 11.669, "step": 52297 }, { "epoch": 1.094741689692707, "grad_norm": 0.3325634002685547, "learning_rate": 0.00014121415217032904, "loss": 11.6721, "step": 52298 }, { "epoch": 1.094762622456669, "grad_norm": 0.38299140334129333, "learning_rate": 0.0001412121545002179, "loss": 11.6687, "step": 52299 }, { "epoch": 1.0947835552206313, "grad_norm": 0.296193927526474, "learning_rate": 0.0001412101568102952, "loss": 11.6931, "step": 52300 }, { "epoch": 1.0948044879845935, "grad_norm": 0.3187456429004669, "learning_rate": 0.00014120815910056194, "loss": 11.6996, "step": 52301 }, { "epoch": 1.0948254207485557, "grad_norm": 0.28445690870285034, "learning_rate": 0.00014120616137101917, "loss": 11.6448, "step": 52302 }, { "epoch": 1.0948463535125177, "grad_norm": 0.31499385833740234, "learning_rate": 0.0001412041636216677, "loss": 11.6741, "step": 52303 }, { "epoch": 1.09486728627648, "grad_norm": 0.3068365454673767, "learning_rate": 0.0001412021658525086, "loss": 11.6562, "step": 52304 }, { "epoch": 1.094888219040442, "grad_norm": 0.37020960450172424, "learning_rate": 0.00014120016806354278, "loss": 11.6588, "step": 52305 }, { "epoch": 1.0949091518044043, "grad_norm": 0.32408446073532104, "learning_rate": 0.0001411981702547712, "loss": 11.6703, "step": 52306 }, { "epoch": 1.0949300845683665, "grad_norm": 0.32158103585243225, "learning_rate": 0.00014119617242619486, "loss": 11.6606, "step": 52307 }, { "epoch": 1.0949510173323285, "grad_norm": 0.31232964992523193, "learning_rate": 0.00014119417457781466, "loss": 11.6767, "step": 52308 }, { "epoch": 1.0949719500962907, "grad_norm": 0.29732629656791687, "learning_rate": 0.00014119217670963163, "loss": 11.6618, "step": 52309 }, { "epoch": 1.0949928828602529, "grad_norm": 0.3656449019908905, "learning_rate": 0.00014119017882164667, "loss": 11.6509, "step": 52310 }, { "epoch": 1.095013815624215, "grad_norm": 0.37941330671310425, "learning_rate": 0.0001411881809138608, "loss": 11.6693, "step": 52311 }, { "epoch": 1.0950347483881773, "grad_norm": 0.32220059633255005, "learning_rate": 0.00014118618298627488, "loss": 11.6836, "step": 52312 }, { "epoch": 1.0950556811521392, "grad_norm": 0.26751869916915894, "learning_rate": 0.00014118418503888998, "loss": 11.6742, "step": 52313 }, { "epoch": 1.0950766139161014, "grad_norm": 0.32119718194007874, "learning_rate": 0.00014118218707170703, "loss": 11.6646, "step": 52314 }, { "epoch": 1.0950975466800636, "grad_norm": 0.2957553565502167, "learning_rate": 0.00014118018908472693, "loss": 11.6551, "step": 52315 }, { "epoch": 1.0951184794440258, "grad_norm": 0.3558785617351532, "learning_rate": 0.00014117819107795077, "loss": 11.6765, "step": 52316 }, { "epoch": 1.0951394122079878, "grad_norm": 0.3021395802497864, "learning_rate": 0.00014117619305137934, "loss": 11.6842, "step": 52317 }, { "epoch": 1.09516034497195, "grad_norm": 0.3450033962726593, "learning_rate": 0.00014117419500501375, "loss": 11.6722, "step": 52318 }, { "epoch": 1.0951812777359122, "grad_norm": 0.30554813146591187, "learning_rate": 0.00014117219693885488, "loss": 11.6599, "step": 52319 }, { "epoch": 1.0952022104998744, "grad_norm": 0.44703713059425354, "learning_rate": 0.0001411701988529037, "loss": 11.6683, "step": 52320 }, { "epoch": 1.0952231432638366, "grad_norm": 0.2975962162017822, "learning_rate": 0.0001411682007471612, "loss": 11.6454, "step": 52321 }, { "epoch": 1.0952440760277986, "grad_norm": 0.3053842782974243, "learning_rate": 0.0001411662026216283, "loss": 11.6615, "step": 52322 }, { "epoch": 1.0952650087917608, "grad_norm": 0.24652566015720367, "learning_rate": 0.00014116420447630597, "loss": 11.6571, "step": 52323 }, { "epoch": 1.095285941555723, "grad_norm": 0.365520179271698, "learning_rate": 0.00014116220631119522, "loss": 11.6697, "step": 52324 }, { "epoch": 1.0953068743196852, "grad_norm": 0.31796982884407043, "learning_rate": 0.00014116020812629697, "loss": 11.6577, "step": 52325 }, { "epoch": 1.0953278070836474, "grad_norm": 0.403469055891037, "learning_rate": 0.00014115820992161216, "loss": 11.6901, "step": 52326 }, { "epoch": 1.0953487398476094, "grad_norm": 0.2910648286342621, "learning_rate": 0.00014115621169714175, "loss": 11.6655, "step": 52327 }, { "epoch": 1.0953696726115716, "grad_norm": 0.38189980387687683, "learning_rate": 0.00014115421345288677, "loss": 11.6762, "step": 52328 }, { "epoch": 1.0953906053755338, "grad_norm": 0.37347492575645447, "learning_rate": 0.00014115221518884812, "loss": 11.6674, "step": 52329 }, { "epoch": 1.095411538139496, "grad_norm": 0.3875175416469574, "learning_rate": 0.00014115021690502678, "loss": 11.6802, "step": 52330 }, { "epoch": 1.0954324709034582, "grad_norm": 0.34437206387519836, "learning_rate": 0.00014114821860142368, "loss": 11.6603, "step": 52331 }, { "epoch": 1.0954534036674202, "grad_norm": 0.28881970047950745, "learning_rate": 0.00014114622027803985, "loss": 11.6769, "step": 52332 }, { "epoch": 1.0954743364313824, "grad_norm": 0.29104164242744446, "learning_rate": 0.0001411442219348762, "loss": 11.6671, "step": 52333 }, { "epoch": 1.0954952691953446, "grad_norm": 0.26636937260627747, "learning_rate": 0.00014114222357193364, "loss": 11.6689, "step": 52334 }, { "epoch": 1.0955162019593068, "grad_norm": 0.3613766133785248, "learning_rate": 0.00014114022518921324, "loss": 11.6541, "step": 52335 }, { "epoch": 1.0955371347232687, "grad_norm": 0.27718183398246765, "learning_rate": 0.00014113822678671592, "loss": 11.68, "step": 52336 }, { "epoch": 1.095558067487231, "grad_norm": 0.28452199697494507, "learning_rate": 0.0001411362283644426, "loss": 11.641, "step": 52337 }, { "epoch": 1.0955790002511931, "grad_norm": 0.291533887386322, "learning_rate": 0.00014113422992239428, "loss": 11.6585, "step": 52338 }, { "epoch": 1.0955999330151553, "grad_norm": 0.35763972997665405, "learning_rate": 0.0001411322314605719, "loss": 11.6709, "step": 52339 }, { "epoch": 1.0956208657791175, "grad_norm": 0.3736197054386139, "learning_rate": 0.00014113023297897644, "loss": 11.6646, "step": 52340 }, { "epoch": 1.0956417985430795, "grad_norm": 0.4184621274471283, "learning_rate": 0.00014112823447760887, "loss": 11.6681, "step": 52341 }, { "epoch": 1.0956627313070417, "grad_norm": 0.4127635061740875, "learning_rate": 0.00014112623595647012, "loss": 11.6816, "step": 52342 }, { "epoch": 1.095683664071004, "grad_norm": 0.33212170004844666, "learning_rate": 0.00014112423741556118, "loss": 11.6608, "step": 52343 }, { "epoch": 1.0957045968349661, "grad_norm": 0.31893739104270935, "learning_rate": 0.00014112223885488298, "loss": 11.6789, "step": 52344 }, { "epoch": 1.0957255295989283, "grad_norm": 0.41976964473724365, "learning_rate": 0.0001411202402744365, "loss": 11.6729, "step": 52345 }, { "epoch": 1.0957464623628903, "grad_norm": 0.3394964337348938, "learning_rate": 0.0001411182416742227, "loss": 11.6699, "step": 52346 }, { "epoch": 1.0957673951268525, "grad_norm": 0.4128476679325104, "learning_rate": 0.00014111624305424253, "loss": 11.6725, "step": 52347 }, { "epoch": 1.0957883278908147, "grad_norm": 0.3621324896812439, "learning_rate": 0.00014111424441449693, "loss": 11.6732, "step": 52348 }, { "epoch": 1.095809260654777, "grad_norm": 0.26115188002586365, "learning_rate": 0.00014111224575498693, "loss": 11.6571, "step": 52349 }, { "epoch": 1.095830193418739, "grad_norm": 0.4064576029777527, "learning_rate": 0.00014111024707571344, "loss": 11.6944, "step": 52350 }, { "epoch": 1.095851126182701, "grad_norm": 0.41801050305366516, "learning_rate": 0.0001411082483766774, "loss": 11.6668, "step": 52351 }, { "epoch": 1.0958720589466633, "grad_norm": 0.323789119720459, "learning_rate": 0.00014110624965787985, "loss": 11.6685, "step": 52352 }, { "epoch": 1.0958929917106255, "grad_norm": 0.48611748218536377, "learning_rate": 0.0001411042509193217, "loss": 11.6534, "step": 52353 }, { "epoch": 1.0959139244745877, "grad_norm": 0.3018482029438019, "learning_rate": 0.00014110225216100387, "loss": 11.6645, "step": 52354 }, { "epoch": 1.0959348572385497, "grad_norm": 0.3649514317512512, "learning_rate": 0.0001411002533829274, "loss": 11.6585, "step": 52355 }, { "epoch": 1.0959557900025119, "grad_norm": 0.3363741338253021, "learning_rate": 0.00014109825458509317, "loss": 11.6503, "step": 52356 }, { "epoch": 1.095976722766474, "grad_norm": 0.3657000958919525, "learning_rate": 0.00014109625576750224, "loss": 11.6732, "step": 52357 }, { "epoch": 1.0959976555304363, "grad_norm": 0.28261974453926086, "learning_rate": 0.00014109425693015548, "loss": 11.6749, "step": 52358 }, { "epoch": 1.0960185882943985, "grad_norm": 0.24053336679935455, "learning_rate": 0.00014109225807305392, "loss": 11.6659, "step": 52359 }, { "epoch": 1.0960395210583604, "grad_norm": 0.32459843158721924, "learning_rate": 0.00014109025919619846, "loss": 11.6634, "step": 52360 }, { "epoch": 1.0960604538223226, "grad_norm": 0.34784555435180664, "learning_rate": 0.00014108826029959008, "loss": 11.6642, "step": 52361 }, { "epoch": 1.0960813865862848, "grad_norm": 0.3971646726131439, "learning_rate": 0.00014108626138322977, "loss": 11.6742, "step": 52362 }, { "epoch": 1.096102319350247, "grad_norm": 0.42696496844291687, "learning_rate": 0.0001410842624471185, "loss": 11.6702, "step": 52363 }, { "epoch": 1.0961232521142092, "grad_norm": 0.3561351001262665, "learning_rate": 0.00014108226349125716, "loss": 11.6543, "step": 52364 }, { "epoch": 1.0961441848781712, "grad_norm": 0.39891520142555237, "learning_rate": 0.00014108026451564674, "loss": 11.6493, "step": 52365 }, { "epoch": 1.0961651176421334, "grad_norm": 0.42619454860687256, "learning_rate": 0.00014107826552028823, "loss": 11.656, "step": 52366 }, { "epoch": 1.0961860504060956, "grad_norm": 0.3702543079853058, "learning_rate": 0.00014107626650518258, "loss": 11.6654, "step": 52367 }, { "epoch": 1.0962069831700578, "grad_norm": 0.3140246272087097, "learning_rate": 0.00014107426747033075, "loss": 11.6898, "step": 52368 }, { "epoch": 1.09622791593402, "grad_norm": 0.31925052404403687, "learning_rate": 0.00014107226841573368, "loss": 11.6621, "step": 52369 }, { "epoch": 1.096248848697982, "grad_norm": 0.3598040044307709, "learning_rate": 0.00014107026934139237, "loss": 11.6519, "step": 52370 }, { "epoch": 1.0962697814619442, "grad_norm": 0.27636709809303284, "learning_rate": 0.00014106827024730777, "loss": 11.662, "step": 52371 }, { "epoch": 1.0962907142259064, "grad_norm": 0.30259522795677185, "learning_rate": 0.0001410662711334808, "loss": 11.6812, "step": 52372 }, { "epoch": 1.0963116469898686, "grad_norm": 0.2689387798309326, "learning_rate": 0.00014106427199991246, "loss": 11.6674, "step": 52373 }, { "epoch": 1.0963325797538306, "grad_norm": 0.4010286331176758, "learning_rate": 0.00014106227284660367, "loss": 11.6692, "step": 52374 }, { "epoch": 1.0963535125177928, "grad_norm": 0.3575526773929596, "learning_rate": 0.00014106027367355548, "loss": 11.6765, "step": 52375 }, { "epoch": 1.096374445281755, "grad_norm": 0.4227631986141205, "learning_rate": 0.00014105827448076874, "loss": 11.6834, "step": 52376 }, { "epoch": 1.0963953780457172, "grad_norm": 0.2441658228635788, "learning_rate": 0.0001410562752682445, "loss": 11.6522, "step": 52377 }, { "epoch": 1.0964163108096794, "grad_norm": 0.3261001706123352, "learning_rate": 0.00014105427603598367, "loss": 11.6712, "step": 52378 }, { "epoch": 1.0964372435736414, "grad_norm": 0.27779340744018555, "learning_rate": 0.0001410522767839872, "loss": 11.6742, "step": 52379 }, { "epoch": 1.0964581763376036, "grad_norm": 0.35158565640449524, "learning_rate": 0.00014105027751225614, "loss": 11.6622, "step": 52380 }, { "epoch": 1.0964791091015658, "grad_norm": 0.3600252866744995, "learning_rate": 0.00014104827822079136, "loss": 11.6828, "step": 52381 }, { "epoch": 1.096500041865528, "grad_norm": 0.34367960691452026, "learning_rate": 0.00014104627890959388, "loss": 11.6596, "step": 52382 }, { "epoch": 1.0965209746294902, "grad_norm": 0.37540295720100403, "learning_rate": 0.00014104427957866458, "loss": 11.673, "step": 52383 }, { "epoch": 1.0965419073934521, "grad_norm": 0.2886085510253906, "learning_rate": 0.00014104228022800448, "loss": 11.6693, "step": 52384 }, { "epoch": 1.0965628401574143, "grad_norm": 0.35505375266075134, "learning_rate": 0.00014104028085761455, "loss": 11.676, "step": 52385 }, { "epoch": 1.0965837729213765, "grad_norm": 0.4086380898952484, "learning_rate": 0.00014103828146749573, "loss": 11.659, "step": 52386 }, { "epoch": 1.0966047056853387, "grad_norm": 0.27817657589912415, "learning_rate": 0.000141036282057649, "loss": 11.6758, "step": 52387 }, { "epoch": 1.096625638449301, "grad_norm": 0.34037044644355774, "learning_rate": 0.00014103428262807527, "loss": 11.6744, "step": 52388 }, { "epoch": 1.096646571213263, "grad_norm": 0.3226012885570526, "learning_rate": 0.00014103228317877557, "loss": 11.6693, "step": 52389 }, { "epoch": 1.0966675039772251, "grad_norm": 0.3591874837875366, "learning_rate": 0.0001410302837097508, "loss": 11.6664, "step": 52390 }, { "epoch": 1.0966884367411873, "grad_norm": 0.3138349652290344, "learning_rate": 0.00014102828422100198, "loss": 11.6731, "step": 52391 }, { "epoch": 1.0967093695051495, "grad_norm": 0.3106418251991272, "learning_rate": 0.00014102628471253004, "loss": 11.6636, "step": 52392 }, { "epoch": 1.0967303022691115, "grad_norm": 0.27650830149650574, "learning_rate": 0.00014102428518433593, "loss": 11.6609, "step": 52393 }, { "epoch": 1.0967512350330737, "grad_norm": 0.34369879961013794, "learning_rate": 0.00014102228563642064, "loss": 11.6689, "step": 52394 }, { "epoch": 1.096772167797036, "grad_norm": 0.36958083510398865, "learning_rate": 0.0001410202860687851, "loss": 11.6689, "step": 52395 }, { "epoch": 1.096793100560998, "grad_norm": 0.3774467408657074, "learning_rate": 0.00014101828648143026, "loss": 11.6806, "step": 52396 }, { "epoch": 1.0968140333249603, "grad_norm": 0.3396112024784088, "learning_rate": 0.00014101628687435716, "loss": 11.6898, "step": 52397 }, { "epoch": 1.0968349660889223, "grad_norm": 0.3601354956626892, "learning_rate": 0.00014101428724756668, "loss": 11.6892, "step": 52398 }, { "epoch": 1.0968558988528845, "grad_norm": 0.29172641038894653, "learning_rate": 0.00014101228760105984, "loss": 11.6648, "step": 52399 }, { "epoch": 1.0968768316168467, "grad_norm": 0.32262179255485535, "learning_rate": 0.00014101028793483755, "loss": 11.6802, "step": 52400 }, { "epoch": 1.0968977643808089, "grad_norm": 0.2948402464389801, "learning_rate": 0.00014100828824890077, "loss": 11.6799, "step": 52401 }, { "epoch": 1.096918697144771, "grad_norm": 0.27767959237098694, "learning_rate": 0.00014100628854325054, "loss": 11.6896, "step": 52402 }, { "epoch": 1.096939629908733, "grad_norm": 0.29386550188064575, "learning_rate": 0.0001410042888178877, "loss": 11.6724, "step": 52403 }, { "epoch": 1.0969605626726953, "grad_norm": 0.3146134316921234, "learning_rate": 0.0001410022890728133, "loss": 11.6654, "step": 52404 }, { "epoch": 1.0969814954366575, "grad_norm": 0.40116655826568604, "learning_rate": 0.0001410002893080283, "loss": 11.6682, "step": 52405 }, { "epoch": 1.0970024282006197, "grad_norm": 0.34189674258232117, "learning_rate": 0.00014099828952353363, "loss": 11.6572, "step": 52406 }, { "epoch": 1.0970233609645819, "grad_norm": 0.44717180728912354, "learning_rate": 0.00014099628971933026, "loss": 11.6759, "step": 52407 }, { "epoch": 1.0970442937285438, "grad_norm": 0.2886590361595154, "learning_rate": 0.00014099428989541914, "loss": 11.6616, "step": 52408 }, { "epoch": 1.097065226492506, "grad_norm": 0.2765456438064575, "learning_rate": 0.00014099229005180126, "loss": 11.6642, "step": 52409 }, { "epoch": 1.0970861592564682, "grad_norm": 0.37234148383140564, "learning_rate": 0.00014099029018847754, "loss": 11.6605, "step": 52410 }, { "epoch": 1.0971070920204304, "grad_norm": 0.36039891839027405, "learning_rate": 0.000140988290305449, "loss": 11.6592, "step": 52411 }, { "epoch": 1.0971280247843924, "grad_norm": 0.3655664622783661, "learning_rate": 0.00014098629040271653, "loss": 11.6693, "step": 52412 }, { "epoch": 1.0971489575483546, "grad_norm": 0.3312968313694, "learning_rate": 0.00014098429048028114, "loss": 11.6758, "step": 52413 }, { "epoch": 1.0971698903123168, "grad_norm": 0.3456908166408539, "learning_rate": 0.0001409822905381438, "loss": 11.665, "step": 52414 }, { "epoch": 1.097190823076279, "grad_norm": 0.24811220169067383, "learning_rate": 0.00014098029057630544, "loss": 11.6809, "step": 52415 }, { "epoch": 1.0972117558402412, "grad_norm": 0.30257606506347656, "learning_rate": 0.00014097829059476702, "loss": 11.6586, "step": 52416 }, { "epoch": 1.0972326886042032, "grad_norm": 0.3622710108757019, "learning_rate": 0.00014097629059352956, "loss": 11.6696, "step": 52417 }, { "epoch": 1.0972536213681654, "grad_norm": 0.5537059307098389, "learning_rate": 0.00014097429057259394, "loss": 11.6775, "step": 52418 }, { "epoch": 1.0972745541321276, "grad_norm": 0.24281799793243408, "learning_rate": 0.00014097229053196115, "loss": 11.6726, "step": 52419 }, { "epoch": 1.0972954868960898, "grad_norm": 0.3030593693256378, "learning_rate": 0.00014097029047163215, "loss": 11.6596, "step": 52420 }, { "epoch": 1.097316419660052, "grad_norm": 0.33605527877807617, "learning_rate": 0.00014096829039160795, "loss": 11.6512, "step": 52421 }, { "epoch": 1.097337352424014, "grad_norm": 0.30858737230300903, "learning_rate": 0.00014096629029188942, "loss": 11.6866, "step": 52422 }, { "epoch": 1.0973582851879762, "grad_norm": 0.28503498435020447, "learning_rate": 0.0001409642901724776, "loss": 11.6668, "step": 52423 }, { "epoch": 1.0973792179519384, "grad_norm": 0.4280915856361389, "learning_rate": 0.00014096229003337344, "loss": 11.6918, "step": 52424 }, { "epoch": 1.0974001507159006, "grad_norm": 0.439443439245224, "learning_rate": 0.00014096028987457788, "loss": 11.6561, "step": 52425 }, { "epoch": 1.0974210834798628, "grad_norm": 0.31607288122177124, "learning_rate": 0.0001409582896960919, "loss": 11.6736, "step": 52426 }, { "epoch": 1.0974420162438248, "grad_norm": 0.3352090120315552, "learning_rate": 0.0001409562894979164, "loss": 11.6698, "step": 52427 }, { "epoch": 1.097462949007787, "grad_norm": 0.32173842191696167, "learning_rate": 0.00014095428928005243, "loss": 11.6748, "step": 52428 }, { "epoch": 1.0974838817717492, "grad_norm": 0.37195971608161926, "learning_rate": 0.00014095228904250094, "loss": 11.6593, "step": 52429 }, { "epoch": 1.0975048145357114, "grad_norm": 0.34315359592437744, "learning_rate": 0.00014095028878526283, "loss": 11.6533, "step": 52430 }, { "epoch": 1.0975257472996733, "grad_norm": 0.28327125310897827, "learning_rate": 0.00014094828850833908, "loss": 11.673, "step": 52431 }, { "epoch": 1.0975466800636355, "grad_norm": 0.25759512186050415, "learning_rate": 0.0001409462882117307, "loss": 11.69, "step": 52432 }, { "epoch": 1.0975676128275977, "grad_norm": 0.3025801479816437, "learning_rate": 0.00014094428789543855, "loss": 11.6801, "step": 52433 }, { "epoch": 1.09758854559156, "grad_norm": 0.30481818318367004, "learning_rate": 0.00014094228755946376, "loss": 11.6808, "step": 52434 }, { "epoch": 1.0976094783555221, "grad_norm": 0.31866946816444397, "learning_rate": 0.0001409402872038071, "loss": 11.6652, "step": 52435 }, { "epoch": 1.0976304111194841, "grad_norm": 0.3974212408065796, "learning_rate": 0.0001409382868284697, "loss": 11.6596, "step": 52436 }, { "epoch": 1.0976513438834463, "grad_norm": 0.35058873891830444, "learning_rate": 0.0001409362864334524, "loss": 11.672, "step": 52437 }, { "epoch": 1.0976722766474085, "grad_norm": 0.29599136114120483, "learning_rate": 0.00014093428601875624, "loss": 11.6512, "step": 52438 }, { "epoch": 1.0976932094113707, "grad_norm": 0.40172263979911804, "learning_rate": 0.00014093228558438212, "loss": 11.6723, "step": 52439 }, { "epoch": 1.097714142175333, "grad_norm": 0.3446275293827057, "learning_rate": 0.00014093028513033102, "loss": 11.6618, "step": 52440 }, { "epoch": 1.097735074939295, "grad_norm": 0.3299974203109741, "learning_rate": 0.00014092828465660391, "loss": 11.6689, "step": 52441 }, { "epoch": 1.097756007703257, "grad_norm": 0.29739871621131897, "learning_rate": 0.0001409262841632018, "loss": 11.6699, "step": 52442 }, { "epoch": 1.0977769404672193, "grad_norm": 0.3235488831996918, "learning_rate": 0.00014092428365012559, "loss": 11.6749, "step": 52443 }, { "epoch": 1.0977978732311815, "grad_norm": 0.38617268204689026, "learning_rate": 0.00014092228311737623, "loss": 11.6634, "step": 52444 }, { "epoch": 1.0978188059951437, "grad_norm": 0.28555426001548767, "learning_rate": 0.0001409202825649547, "loss": 11.6572, "step": 52445 }, { "epoch": 1.0978397387591057, "grad_norm": 0.3880612850189209, "learning_rate": 0.000140918281992862, "loss": 11.6695, "step": 52446 }, { "epoch": 1.0978606715230679, "grad_norm": 0.3643273413181305, "learning_rate": 0.00014091628140109905, "loss": 11.6793, "step": 52447 }, { "epoch": 1.09788160428703, "grad_norm": 0.3105109930038452, "learning_rate": 0.00014091428078966684, "loss": 11.6721, "step": 52448 }, { "epoch": 1.0979025370509923, "grad_norm": 0.3047752380371094, "learning_rate": 0.0001409122801585663, "loss": 11.666, "step": 52449 }, { "epoch": 1.0979234698149543, "grad_norm": 0.29567044973373413, "learning_rate": 0.0001409102795077984, "loss": 11.6827, "step": 52450 }, { "epoch": 1.0979444025789165, "grad_norm": 0.3048322796821594, "learning_rate": 0.00014090827883736411, "loss": 11.6722, "step": 52451 }, { "epoch": 1.0979653353428787, "grad_norm": 0.2931336760520935, "learning_rate": 0.0001409062781472644, "loss": 11.6774, "step": 52452 }, { "epoch": 1.0979862681068409, "grad_norm": 0.2851294279098511, "learning_rate": 0.00014090427743750027, "loss": 11.6656, "step": 52453 }, { "epoch": 1.098007200870803, "grad_norm": 0.31955486536026, "learning_rate": 0.00014090227670807257, "loss": 11.6603, "step": 52454 }, { "epoch": 1.098028133634765, "grad_norm": 0.5606886148452759, "learning_rate": 0.00014090027595898232, "loss": 11.6873, "step": 52455 }, { "epoch": 1.0980490663987272, "grad_norm": 0.3453783690929413, "learning_rate": 0.00014089827519023053, "loss": 11.6746, "step": 52456 }, { "epoch": 1.0980699991626894, "grad_norm": 0.3534632921218872, "learning_rate": 0.00014089627440181808, "loss": 11.6617, "step": 52457 }, { "epoch": 1.0980909319266516, "grad_norm": 0.2965866029262543, "learning_rate": 0.00014089427359374602, "loss": 11.6583, "step": 52458 }, { "epoch": 1.0981118646906138, "grad_norm": 0.39832431077957153, "learning_rate": 0.00014089227276601522, "loss": 11.6522, "step": 52459 }, { "epoch": 1.0981327974545758, "grad_norm": 0.2423582375049591, "learning_rate": 0.00014089027191862668, "loss": 11.6571, "step": 52460 }, { "epoch": 1.098153730218538, "grad_norm": 0.2682838439941406, "learning_rate": 0.0001408882710515814, "loss": 11.6708, "step": 52461 }, { "epoch": 1.0981746629825002, "grad_norm": 0.4334479868412018, "learning_rate": 0.00014088627016488027, "loss": 11.6651, "step": 52462 }, { "epoch": 1.0981955957464624, "grad_norm": 0.2703031897544861, "learning_rate": 0.00014088426925852433, "loss": 11.6609, "step": 52463 }, { "epoch": 1.0982165285104246, "grad_norm": 0.3191189169883728, "learning_rate": 0.0001408822683325145, "loss": 11.6619, "step": 52464 }, { "epoch": 1.0982374612743866, "grad_norm": 0.3662974238395691, "learning_rate": 0.00014088026738685175, "loss": 11.6781, "step": 52465 }, { "epoch": 1.0982583940383488, "grad_norm": 0.2958443760871887, "learning_rate": 0.00014087826642153697, "loss": 11.6581, "step": 52466 }, { "epoch": 1.098279326802311, "grad_norm": 0.3647913634777069, "learning_rate": 0.00014087626543657125, "loss": 11.6711, "step": 52467 }, { "epoch": 1.0983002595662732, "grad_norm": 0.28342342376708984, "learning_rate": 0.00014087426443195548, "loss": 11.6775, "step": 52468 }, { "epoch": 1.0983211923302352, "grad_norm": 0.3056112825870514, "learning_rate": 0.00014087226340769063, "loss": 11.6679, "step": 52469 }, { "epoch": 1.0983421250941974, "grad_norm": 0.4803832769393921, "learning_rate": 0.00014087026236377767, "loss": 11.6882, "step": 52470 }, { "epoch": 1.0983630578581596, "grad_norm": 0.32922741770744324, "learning_rate": 0.00014086826130021754, "loss": 11.6513, "step": 52471 }, { "epoch": 1.0983839906221218, "grad_norm": 0.34653446078300476, "learning_rate": 0.0001408662602170112, "loss": 11.6772, "step": 52472 }, { "epoch": 1.098404923386084, "grad_norm": 0.38352319598197937, "learning_rate": 0.0001408642591141597, "loss": 11.6862, "step": 52473 }, { "epoch": 1.098425856150046, "grad_norm": 0.3305617570877075, "learning_rate": 0.00014086225799166385, "loss": 11.6773, "step": 52474 }, { "epoch": 1.0984467889140082, "grad_norm": 0.3279271125793457, "learning_rate": 0.00014086025684952475, "loss": 11.6766, "step": 52475 }, { "epoch": 1.0984677216779704, "grad_norm": 0.4679569602012634, "learning_rate": 0.00014085825568774328, "loss": 11.6755, "step": 52476 }, { "epoch": 1.0984886544419326, "grad_norm": 0.33047327399253845, "learning_rate": 0.00014085625450632042, "loss": 11.6562, "step": 52477 }, { "epoch": 1.0985095872058948, "grad_norm": 0.32863637804985046, "learning_rate": 0.00014085425330525716, "loss": 11.6763, "step": 52478 }, { "epoch": 1.0985305199698567, "grad_norm": 0.33705753087997437, "learning_rate": 0.00014085225208455444, "loss": 11.691, "step": 52479 }, { "epoch": 1.098551452733819, "grad_norm": 0.30686333775520325, "learning_rate": 0.00014085025084421325, "loss": 11.6897, "step": 52480 }, { "epoch": 1.0985723854977811, "grad_norm": 0.44436147809028625, "learning_rate": 0.00014084824958423447, "loss": 11.6567, "step": 52481 }, { "epoch": 1.0985933182617433, "grad_norm": 0.2631388306617737, "learning_rate": 0.00014084624830461913, "loss": 11.6907, "step": 52482 }, { "epoch": 1.0986142510257055, "grad_norm": 0.3158268332481384, "learning_rate": 0.0001408442470053682, "loss": 11.6668, "step": 52483 }, { "epoch": 1.0986351837896675, "grad_norm": 0.25689393281936646, "learning_rate": 0.0001408422456864826, "loss": 11.6695, "step": 52484 }, { "epoch": 1.0986561165536297, "grad_norm": 0.34287482500076294, "learning_rate": 0.00014084024434796334, "loss": 11.6677, "step": 52485 }, { "epoch": 1.098677049317592, "grad_norm": 0.3677521347999573, "learning_rate": 0.00014083824298981133, "loss": 11.6791, "step": 52486 }, { "epoch": 1.0986979820815541, "grad_norm": 0.4115993082523346, "learning_rate": 0.0001408362416120276, "loss": 11.6786, "step": 52487 }, { "epoch": 1.098718914845516, "grad_norm": 0.30472925305366516, "learning_rate": 0.000140834240214613, "loss": 11.6657, "step": 52488 }, { "epoch": 1.0987398476094783, "grad_norm": 0.26865243911743164, "learning_rate": 0.00014083223879756863, "loss": 11.6694, "step": 52489 }, { "epoch": 1.0987607803734405, "grad_norm": 0.3607175350189209, "learning_rate": 0.00014083023736089536, "loss": 11.6787, "step": 52490 }, { "epoch": 1.0987817131374027, "grad_norm": 0.31943419575691223, "learning_rate": 0.00014082823590459419, "loss": 11.6751, "step": 52491 }, { "epoch": 1.098802645901365, "grad_norm": 0.33678364753723145, "learning_rate": 0.00014082623442866605, "loss": 11.6709, "step": 52492 }, { "epoch": 1.0988235786653269, "grad_norm": 0.296845942735672, "learning_rate": 0.00014082423293311193, "loss": 11.6509, "step": 52493 }, { "epoch": 1.098844511429289, "grad_norm": 0.3321448266506195, "learning_rate": 0.00014082223141793278, "loss": 11.6604, "step": 52494 }, { "epoch": 1.0988654441932513, "grad_norm": 0.3869444727897644, "learning_rate": 0.00014082022988312954, "loss": 11.6815, "step": 52495 }, { "epoch": 1.0988863769572135, "grad_norm": 0.30117982625961304, "learning_rate": 0.00014081822832870327, "loss": 11.6657, "step": 52496 }, { "epoch": 1.0989073097211757, "grad_norm": 0.3416643440723419, "learning_rate": 0.00014081622675465478, "loss": 11.6581, "step": 52497 }, { "epoch": 1.0989282424851377, "grad_norm": 0.36909204721450806, "learning_rate": 0.00014081422516098514, "loss": 11.6671, "step": 52498 }, { "epoch": 1.0989491752490999, "grad_norm": 0.3606666028499603, "learning_rate": 0.0001408122235476953, "loss": 11.6795, "step": 52499 }, { "epoch": 1.098970108013062, "grad_norm": 0.3093110918998718, "learning_rate": 0.0001408102219147862, "loss": 11.6622, "step": 52500 }, { "epoch": 1.0989910407770243, "grad_norm": 0.3158862888813019, "learning_rate": 0.0001408082202622588, "loss": 11.6646, "step": 52501 }, { "epoch": 1.0990119735409865, "grad_norm": 0.33079880475997925, "learning_rate": 0.00014080621859011407, "loss": 11.6715, "step": 52502 }, { "epoch": 1.0990329063049484, "grad_norm": 0.3557397723197937, "learning_rate": 0.000140804216898353, "loss": 11.6639, "step": 52503 }, { "epoch": 1.0990538390689106, "grad_norm": 0.29615190625190735, "learning_rate": 0.00014080221518697648, "loss": 11.6723, "step": 52504 }, { "epoch": 1.0990747718328728, "grad_norm": 0.2518194317817688, "learning_rate": 0.00014080021345598556, "loss": 11.6596, "step": 52505 }, { "epoch": 1.099095704596835, "grad_norm": 0.36512118577957153, "learning_rate": 0.00014079821170538112, "loss": 11.6717, "step": 52506 }, { "epoch": 1.099116637360797, "grad_norm": 0.2924916744232178, "learning_rate": 0.00014079620993516417, "loss": 11.6657, "step": 52507 }, { "epoch": 1.0991375701247592, "grad_norm": 0.3323269486427307, "learning_rate": 0.00014079420814533568, "loss": 11.6651, "step": 52508 }, { "epoch": 1.0991585028887214, "grad_norm": 0.3155588209629059, "learning_rate": 0.00014079220633589657, "loss": 11.6831, "step": 52509 }, { "epoch": 1.0991794356526836, "grad_norm": 0.254279762506485, "learning_rate": 0.00014079020450684788, "loss": 11.667, "step": 52510 }, { "epoch": 1.0992003684166458, "grad_norm": 0.4366491138935089, "learning_rate": 0.00014078820265819048, "loss": 11.684, "step": 52511 }, { "epoch": 1.0992213011806078, "grad_norm": 0.3597916066646576, "learning_rate": 0.00014078620078992538, "loss": 11.6651, "step": 52512 }, { "epoch": 1.09924223394457, "grad_norm": 0.45943719148635864, "learning_rate": 0.00014078419890205355, "loss": 11.6602, "step": 52513 }, { "epoch": 1.0992631667085322, "grad_norm": 0.31476402282714844, "learning_rate": 0.00014078219699457592, "loss": 11.666, "step": 52514 }, { "epoch": 1.0992840994724944, "grad_norm": 0.3768252730369568, "learning_rate": 0.00014078019506749348, "loss": 11.6664, "step": 52515 }, { "epoch": 1.0993050322364566, "grad_norm": 0.38533514738082886, "learning_rate": 0.00014077819312080717, "loss": 11.6857, "step": 52516 }, { "epoch": 1.0993259650004186, "grad_norm": 0.4724845588207245, "learning_rate": 0.000140776191154518, "loss": 11.6488, "step": 52517 }, { "epoch": 1.0993468977643808, "grad_norm": 0.35070955753326416, "learning_rate": 0.00014077418916862687, "loss": 11.67, "step": 52518 }, { "epoch": 1.099367830528343, "grad_norm": 0.31659621000289917, "learning_rate": 0.0001407721871631348, "loss": 11.6614, "step": 52519 }, { "epoch": 1.0993887632923052, "grad_norm": 0.2956794202327728, "learning_rate": 0.00014077018513804267, "loss": 11.6651, "step": 52520 }, { "epoch": 1.0994096960562674, "grad_norm": 0.3124248683452606, "learning_rate": 0.0001407681830933515, "loss": 11.675, "step": 52521 }, { "epoch": 1.0994306288202293, "grad_norm": 0.330584317445755, "learning_rate": 0.0001407661810290623, "loss": 11.6492, "step": 52522 }, { "epoch": 1.0994515615841916, "grad_norm": 0.2988424003124237, "learning_rate": 0.00014076417894517596, "loss": 11.6641, "step": 52523 }, { "epoch": 1.0994724943481538, "grad_norm": 0.35054734349250793, "learning_rate": 0.00014076217684169345, "loss": 11.6626, "step": 52524 }, { "epoch": 1.099493427112116, "grad_norm": 0.2993816137313843, "learning_rate": 0.00014076017471861574, "loss": 11.6711, "step": 52525 }, { "epoch": 1.099514359876078, "grad_norm": 0.3253116011619568, "learning_rate": 0.0001407581725759438, "loss": 11.6726, "step": 52526 }, { "epoch": 1.0995352926400401, "grad_norm": 0.3823997378349304, "learning_rate": 0.0001407561704136786, "loss": 11.6663, "step": 52527 }, { "epoch": 1.0995562254040023, "grad_norm": 0.3676704168319702, "learning_rate": 0.0001407541682318211, "loss": 11.6709, "step": 52528 }, { "epoch": 1.0995771581679645, "grad_norm": 0.36227819323539734, "learning_rate": 0.00014075216603037224, "loss": 11.6772, "step": 52529 }, { "epoch": 1.0995980909319267, "grad_norm": 0.3068234920501709, "learning_rate": 0.000140750163809333, "loss": 11.6748, "step": 52530 }, { "epoch": 1.0996190236958887, "grad_norm": 0.3832653760910034, "learning_rate": 0.00014074816156870433, "loss": 11.6837, "step": 52531 }, { "epoch": 1.099639956459851, "grad_norm": 0.29836979508399963, "learning_rate": 0.0001407461593084872, "loss": 11.6817, "step": 52532 }, { "epoch": 1.099660889223813, "grad_norm": 0.3781358599662781, "learning_rate": 0.00014074415702868258, "loss": 11.6695, "step": 52533 }, { "epoch": 1.0996818219877753, "grad_norm": 0.32669758796691895, "learning_rate": 0.00014074215472929143, "loss": 11.6496, "step": 52534 }, { "epoch": 1.0997027547517375, "grad_norm": 0.3045887351036072, "learning_rate": 0.00014074015241031474, "loss": 11.6605, "step": 52535 }, { "epoch": 1.0997236875156995, "grad_norm": 0.33379414677619934, "learning_rate": 0.00014073815007175343, "loss": 11.6659, "step": 52536 }, { "epoch": 1.0997446202796617, "grad_norm": 0.4111012816429138, "learning_rate": 0.00014073614771360844, "loss": 11.6556, "step": 52537 }, { "epoch": 1.099765553043624, "grad_norm": 0.28042688965797424, "learning_rate": 0.00014073414533588075, "loss": 11.676, "step": 52538 }, { "epoch": 1.099786485807586, "grad_norm": 0.33384010195732117, "learning_rate": 0.0001407321429385714, "loss": 11.663, "step": 52539 }, { "epoch": 1.0998074185715483, "grad_norm": 0.3033640682697296, "learning_rate": 0.00014073014052168125, "loss": 11.6709, "step": 52540 }, { "epoch": 1.0998283513355103, "grad_norm": 0.3521239459514618, "learning_rate": 0.00014072813808521135, "loss": 11.6871, "step": 52541 }, { "epoch": 1.0998492840994725, "grad_norm": 0.3063054382801056, "learning_rate": 0.00014072613562916256, "loss": 11.674, "step": 52542 }, { "epoch": 1.0998702168634347, "grad_norm": 0.2828877866268158, "learning_rate": 0.00014072413315353594, "loss": 11.6644, "step": 52543 }, { "epoch": 1.0998911496273969, "grad_norm": 0.34839197993278503, "learning_rate": 0.0001407221306583324, "loss": 11.6587, "step": 52544 }, { "epoch": 1.0999120823913588, "grad_norm": 0.31124067306518555, "learning_rate": 0.00014072012814355292, "loss": 11.6461, "step": 52545 }, { "epoch": 1.099933015155321, "grad_norm": 0.2831852436065674, "learning_rate": 0.0001407181256091985, "loss": 11.6627, "step": 52546 }, { "epoch": 1.0999539479192832, "grad_norm": 0.2709706127643585, "learning_rate": 0.00014071612305527, "loss": 11.6409, "step": 52547 }, { "epoch": 1.0999748806832454, "grad_norm": 0.3024623990058899, "learning_rate": 0.00014071412048176845, "loss": 11.6585, "step": 52548 }, { "epoch": 1.0999958134472076, "grad_norm": 0.2978230118751526, "learning_rate": 0.00014071211788869482, "loss": 11.6758, "step": 52549 }, { "epoch": 1.1000167462111696, "grad_norm": 0.33152928948402405, "learning_rate": 0.00014071011527605007, "loss": 11.659, "step": 52550 }, { "epoch": 1.1000376789751318, "grad_norm": 0.2892315685749054, "learning_rate": 0.00014070811264383515, "loss": 11.6631, "step": 52551 }, { "epoch": 1.100058611739094, "grad_norm": 0.37985959649086, "learning_rate": 0.000140706109992051, "loss": 11.6857, "step": 52552 }, { "epoch": 1.1000795445030562, "grad_norm": 0.2986599802970886, "learning_rate": 0.00014070410732069862, "loss": 11.6625, "step": 52553 }, { "epoch": 1.1001004772670184, "grad_norm": 0.43178462982177734, "learning_rate": 0.00014070210462977897, "loss": 11.6685, "step": 52554 }, { "epoch": 1.1001214100309804, "grad_norm": 0.32838863134384155, "learning_rate": 0.000140700101919293, "loss": 11.6787, "step": 52555 }, { "epoch": 1.1001423427949426, "grad_norm": 0.3051813542842865, "learning_rate": 0.00014069809918924167, "loss": 11.6727, "step": 52556 }, { "epoch": 1.1001632755589048, "grad_norm": 0.3300638198852539, "learning_rate": 0.00014069609643962595, "loss": 11.6791, "step": 52557 }, { "epoch": 1.100184208322867, "grad_norm": 0.3211005628108978, "learning_rate": 0.0001406940936704468, "loss": 11.6717, "step": 52558 }, { "epoch": 1.1002051410868292, "grad_norm": 0.3296228051185608, "learning_rate": 0.0001406920908817052, "loss": 11.6502, "step": 52559 }, { "epoch": 1.1002260738507912, "grad_norm": 0.3558589816093445, "learning_rate": 0.00014069008807340204, "loss": 11.6726, "step": 52560 }, { "epoch": 1.1002470066147534, "grad_norm": 0.26782286167144775, "learning_rate": 0.00014068808524553842, "loss": 11.6708, "step": 52561 }, { "epoch": 1.1002679393787156, "grad_norm": 0.2671660780906677, "learning_rate": 0.00014068608239811516, "loss": 11.6577, "step": 52562 }, { "epoch": 1.1002888721426778, "grad_norm": 0.31101036071777344, "learning_rate": 0.00014068407953113333, "loss": 11.6608, "step": 52563 }, { "epoch": 1.1003098049066398, "grad_norm": 0.3350539207458496, "learning_rate": 0.0001406820766445938, "loss": 11.6796, "step": 52564 }, { "epoch": 1.100330737670602, "grad_norm": 0.3303821384906769, "learning_rate": 0.00014068007373849758, "loss": 11.6729, "step": 52565 }, { "epoch": 1.1003516704345642, "grad_norm": 0.33349111676216125, "learning_rate": 0.00014067807081284567, "loss": 11.6662, "step": 52566 }, { "epoch": 1.1003726031985264, "grad_norm": 0.3419094979763031, "learning_rate": 0.00014067606786763898, "loss": 11.6721, "step": 52567 }, { "epoch": 1.1003935359624886, "grad_norm": 0.29135990142822266, "learning_rate": 0.0001406740649028785, "loss": 11.6689, "step": 52568 }, { "epoch": 1.1004144687264505, "grad_norm": 0.26804155111312866, "learning_rate": 0.00014067206191856516, "loss": 11.6672, "step": 52569 }, { "epoch": 1.1004354014904127, "grad_norm": 0.37295374274253845, "learning_rate": 0.0001406700589147, "loss": 11.6664, "step": 52570 }, { "epoch": 1.100456334254375, "grad_norm": 0.4774271845817566, "learning_rate": 0.00014066805589128386, "loss": 11.6825, "step": 52571 }, { "epoch": 1.1004772670183371, "grad_norm": 0.28279298543930054, "learning_rate": 0.0001406660528483178, "loss": 11.6647, "step": 52572 }, { "epoch": 1.1004981997822993, "grad_norm": 0.28479650616645813, "learning_rate": 0.00014066404978580277, "loss": 11.6672, "step": 52573 }, { "epoch": 1.1005191325462613, "grad_norm": 0.3046548068523407, "learning_rate": 0.00014066204670373967, "loss": 11.6541, "step": 52574 }, { "epoch": 1.1005400653102235, "grad_norm": 0.2784503400325775, "learning_rate": 0.00014066004360212955, "loss": 11.6618, "step": 52575 }, { "epoch": 1.1005609980741857, "grad_norm": 0.3132913112640381, "learning_rate": 0.0001406580404809733, "loss": 11.6775, "step": 52576 }, { "epoch": 1.100581930838148, "grad_norm": 0.389264851808548, "learning_rate": 0.00014065603734027192, "loss": 11.6846, "step": 52577 }, { "epoch": 1.1006028636021101, "grad_norm": 0.39229193329811096, "learning_rate": 0.00014065403418002642, "loss": 11.6687, "step": 52578 }, { "epoch": 1.100623796366072, "grad_norm": 0.3063908517360687, "learning_rate": 0.00014065203100023768, "loss": 11.6705, "step": 52579 }, { "epoch": 1.1006447291300343, "grad_norm": 0.3383296728134155, "learning_rate": 0.00014065002780090668, "loss": 11.6584, "step": 52580 }, { "epoch": 1.1006656618939965, "grad_norm": 0.3943025469779968, "learning_rate": 0.00014064802458203438, "loss": 11.6785, "step": 52581 }, { "epoch": 1.1006865946579587, "grad_norm": 0.34112221002578735, "learning_rate": 0.0001406460213436218, "loss": 11.6827, "step": 52582 }, { "epoch": 1.1007075274219207, "grad_norm": 0.26468706130981445, "learning_rate": 0.00014064401808566986, "loss": 11.6709, "step": 52583 }, { "epoch": 1.1007284601858829, "grad_norm": 0.46157345175743103, "learning_rate": 0.0001406420148081795, "loss": 11.6751, "step": 52584 }, { "epoch": 1.100749392949845, "grad_norm": 0.30730119347572327, "learning_rate": 0.00014064001151115174, "loss": 11.6634, "step": 52585 }, { "epoch": 1.1007703257138073, "grad_norm": 0.2724372148513794, "learning_rate": 0.00014063800819458748, "loss": 11.6718, "step": 52586 }, { "epoch": 1.1007912584777695, "grad_norm": 0.3359106779098511, "learning_rate": 0.00014063600485848772, "loss": 11.6542, "step": 52587 }, { "epoch": 1.1008121912417315, "grad_norm": 0.37905552983283997, "learning_rate": 0.00014063400150285342, "loss": 11.6559, "step": 52588 }, { "epoch": 1.1008331240056937, "grad_norm": 0.31086185574531555, "learning_rate": 0.00014063199812768554, "loss": 11.6424, "step": 52589 }, { "epoch": 1.1008540567696559, "grad_norm": 0.32505348324775696, "learning_rate": 0.00014062999473298508, "loss": 11.6639, "step": 52590 }, { "epoch": 1.100874989533618, "grad_norm": 0.31693944334983826, "learning_rate": 0.00014062799131875294, "loss": 11.6786, "step": 52591 }, { "epoch": 1.1008959222975803, "grad_norm": 0.2762138545513153, "learning_rate": 0.00014062598788499008, "loss": 11.6508, "step": 52592 }, { "epoch": 1.1009168550615422, "grad_norm": 0.360443115234375, "learning_rate": 0.00014062398443169752, "loss": 11.6685, "step": 52593 }, { "epoch": 1.1009377878255044, "grad_norm": 1.5374164581298828, "learning_rate": 0.00014062198095887623, "loss": 11.6455, "step": 52594 }, { "epoch": 1.1009587205894666, "grad_norm": 0.3106764554977417, "learning_rate": 0.0001406199774665271, "loss": 11.6783, "step": 52595 }, { "epoch": 1.1009796533534288, "grad_norm": 0.3290315270423889, "learning_rate": 0.00014061797395465114, "loss": 11.6855, "step": 52596 }, { "epoch": 1.101000586117391, "grad_norm": 0.27764832973480225, "learning_rate": 0.00014061597042324932, "loss": 11.6727, "step": 52597 }, { "epoch": 1.101021518881353, "grad_norm": 0.3200094997882843, "learning_rate": 0.00014061396687232257, "loss": 11.6623, "step": 52598 }, { "epoch": 1.1010424516453152, "grad_norm": 0.47998470067977905, "learning_rate": 0.00014061196330187188, "loss": 11.6706, "step": 52599 }, { "epoch": 1.1010633844092774, "grad_norm": 0.30773067474365234, "learning_rate": 0.0001406099597118982, "loss": 11.658, "step": 52600 }, { "epoch": 1.1010843171732396, "grad_norm": 0.2683677077293396, "learning_rate": 0.00014060795610240252, "loss": 11.6628, "step": 52601 }, { "epoch": 1.1011052499372016, "grad_norm": 0.24546192586421967, "learning_rate": 0.00014060595247338578, "loss": 11.6619, "step": 52602 }, { "epoch": 1.1011261827011638, "grad_norm": 0.43315809965133667, "learning_rate": 0.0001406039488248489, "loss": 11.6595, "step": 52603 }, { "epoch": 1.101147115465126, "grad_norm": 0.34182894229888916, "learning_rate": 0.0001406019451567929, "loss": 11.6728, "step": 52604 }, { "epoch": 1.1011680482290882, "grad_norm": 0.2474936842918396, "learning_rate": 0.0001405999414692188, "loss": 11.6551, "step": 52605 }, { "epoch": 1.1011889809930504, "grad_norm": 0.4029678404331207, "learning_rate": 0.00014059793776212743, "loss": 11.6692, "step": 52606 }, { "epoch": 1.1012099137570124, "grad_norm": 0.3835316300392151, "learning_rate": 0.00014059593403551983, "loss": 11.6813, "step": 52607 }, { "epoch": 1.1012308465209746, "grad_norm": 0.311954140663147, "learning_rate": 0.00014059393028939692, "loss": 11.6701, "step": 52608 }, { "epoch": 1.1012517792849368, "grad_norm": 0.3262481093406677, "learning_rate": 0.00014059192652375976, "loss": 11.6656, "step": 52609 }, { "epoch": 1.101272712048899, "grad_norm": 0.29338181018829346, "learning_rate": 0.0001405899227386092, "loss": 11.6767, "step": 52610 }, { "epoch": 1.1012936448128612, "grad_norm": 0.28070899844169617, "learning_rate": 0.00014058791893394626, "loss": 11.6568, "step": 52611 }, { "epoch": 1.1013145775768232, "grad_norm": 0.4231064021587372, "learning_rate": 0.0001405859151097719, "loss": 11.6704, "step": 52612 }, { "epoch": 1.1013355103407854, "grad_norm": 0.30873584747314453, "learning_rate": 0.00014058391126608706, "loss": 11.66, "step": 52613 }, { "epoch": 1.1013564431047476, "grad_norm": 0.25081589818000793, "learning_rate": 0.00014058190740289273, "loss": 11.6832, "step": 52614 }, { "epoch": 1.1013773758687098, "grad_norm": 0.31262558698654175, "learning_rate": 0.0001405799035201899, "loss": 11.6862, "step": 52615 }, { "epoch": 1.101398308632672, "grad_norm": 0.2800607979297638, "learning_rate": 0.00014057789961797944, "loss": 11.6696, "step": 52616 }, { "epoch": 1.101419241396634, "grad_norm": 0.32183554768562317, "learning_rate": 0.00014057589569626243, "loss": 11.6647, "step": 52617 }, { "epoch": 1.1014401741605961, "grad_norm": 0.32436510920524597, "learning_rate": 0.00014057389175503975, "loss": 11.6635, "step": 52618 }, { "epoch": 1.1014611069245583, "grad_norm": 0.30002444982528687, "learning_rate": 0.00014057188779431238, "loss": 11.6765, "step": 52619 }, { "epoch": 1.1014820396885205, "grad_norm": 0.27023476362228394, "learning_rate": 0.00014056988381408128, "loss": 11.6592, "step": 52620 }, { "epoch": 1.1015029724524825, "grad_norm": 0.32322683930397034, "learning_rate": 0.00014056787981434743, "loss": 11.6642, "step": 52621 }, { "epoch": 1.1015239052164447, "grad_norm": 0.3711789846420288, "learning_rate": 0.0001405658757951118, "loss": 11.67, "step": 52622 }, { "epoch": 1.101544837980407, "grad_norm": 0.29366201162338257, "learning_rate": 0.0001405638717563753, "loss": 11.6708, "step": 52623 }, { "epoch": 1.1015657707443691, "grad_norm": 0.2623025178909302, "learning_rate": 0.000140561867698139, "loss": 11.6593, "step": 52624 }, { "epoch": 1.1015867035083313, "grad_norm": 0.3639819025993347, "learning_rate": 0.00014055986362040374, "loss": 11.6702, "step": 52625 }, { "epoch": 1.1016076362722933, "grad_norm": 0.3755655586719513, "learning_rate": 0.00014055785952317055, "loss": 11.651, "step": 52626 }, { "epoch": 1.1016285690362555, "grad_norm": 0.29658007621765137, "learning_rate": 0.00014055585540644042, "loss": 11.6549, "step": 52627 }, { "epoch": 1.1016495018002177, "grad_norm": 0.32345694303512573, "learning_rate": 0.00014055385127021425, "loss": 11.66, "step": 52628 }, { "epoch": 1.10167043456418, "grad_norm": 0.24295416474342346, "learning_rate": 0.00014055184711449304, "loss": 11.6725, "step": 52629 }, { "epoch": 1.101691367328142, "grad_norm": 0.45698240399360657, "learning_rate": 0.00014054984293927774, "loss": 11.682, "step": 52630 }, { "epoch": 1.101712300092104, "grad_norm": 0.3041190207004547, "learning_rate": 0.0001405478387445693, "loss": 11.6556, "step": 52631 }, { "epoch": 1.1017332328560663, "grad_norm": 0.2810647785663605, "learning_rate": 0.00014054583453036872, "loss": 11.6705, "step": 52632 }, { "epoch": 1.1017541656200285, "grad_norm": 0.2687321901321411, "learning_rate": 0.00014054383029667695, "loss": 11.6601, "step": 52633 }, { "epoch": 1.1017750983839907, "grad_norm": 0.3724203109741211, "learning_rate": 0.00014054182604349497, "loss": 11.6772, "step": 52634 }, { "epoch": 1.1017960311479529, "grad_norm": 0.3051639497280121, "learning_rate": 0.0001405398217708237, "loss": 11.6594, "step": 52635 }, { "epoch": 1.1018169639119149, "grad_norm": 0.34352990984916687, "learning_rate": 0.0001405378174786641, "loss": 11.6537, "step": 52636 }, { "epoch": 1.101837896675877, "grad_norm": 0.3372472822666168, "learning_rate": 0.00014053581316701718, "loss": 11.6759, "step": 52637 }, { "epoch": 1.1018588294398393, "grad_norm": 0.3114203214645386, "learning_rate": 0.00014053380883588388, "loss": 11.6559, "step": 52638 }, { "epoch": 1.1018797622038015, "grad_norm": 0.2920082211494446, "learning_rate": 0.00014053180448526516, "loss": 11.6688, "step": 52639 }, { "epoch": 1.1019006949677634, "grad_norm": 0.2233288735151291, "learning_rate": 0.000140529800115162, "loss": 11.6814, "step": 52640 }, { "epoch": 1.1019216277317256, "grad_norm": 0.31042611598968506, "learning_rate": 0.00014052779572557535, "loss": 11.6648, "step": 52641 }, { "epoch": 1.1019425604956878, "grad_norm": 0.3347119987010956, "learning_rate": 0.00014052579131650615, "loss": 11.6794, "step": 52642 }, { "epoch": 1.10196349325965, "grad_norm": 0.4004356861114502, "learning_rate": 0.0001405237868879554, "loss": 11.6603, "step": 52643 }, { "epoch": 1.1019844260236122, "grad_norm": 0.2937384247779846, "learning_rate": 0.0001405217824399241, "loss": 11.6594, "step": 52644 }, { "epoch": 1.1020053587875742, "grad_norm": 0.33791235089302063, "learning_rate": 0.00014051977797241314, "loss": 11.6617, "step": 52645 }, { "epoch": 1.1020262915515364, "grad_norm": 0.32224470376968384, "learning_rate": 0.00014051777348542352, "loss": 11.6526, "step": 52646 }, { "epoch": 1.1020472243154986, "grad_norm": 0.32438090443611145, "learning_rate": 0.0001405157689789562, "loss": 11.6629, "step": 52647 }, { "epoch": 1.1020681570794608, "grad_norm": 0.3085685968399048, "learning_rate": 0.00014051376445301208, "loss": 11.6766, "step": 52648 }, { "epoch": 1.102089089843423, "grad_norm": 0.3534521162509918, "learning_rate": 0.0001405117599075922, "loss": 11.6561, "step": 52649 }, { "epoch": 1.102110022607385, "grad_norm": 0.3600795865058899, "learning_rate": 0.00014050975534269755, "loss": 11.6687, "step": 52650 }, { "epoch": 1.1021309553713472, "grad_norm": 0.30706751346588135, "learning_rate": 0.00014050775075832901, "loss": 11.6605, "step": 52651 }, { "epoch": 1.1021518881353094, "grad_norm": 0.29271504282951355, "learning_rate": 0.0001405057461544876, "loss": 11.6778, "step": 52652 }, { "epoch": 1.1021728208992716, "grad_norm": 0.22676748037338257, "learning_rate": 0.00014050374153117427, "loss": 11.6529, "step": 52653 }, { "epoch": 1.1021937536632338, "grad_norm": 0.3319343030452728, "learning_rate": 0.00014050173688838998, "loss": 11.6796, "step": 52654 }, { "epoch": 1.1022146864271958, "grad_norm": 0.36437734961509705, "learning_rate": 0.0001404997322261357, "loss": 11.678, "step": 52655 }, { "epoch": 1.102235619191158, "grad_norm": 0.30314725637435913, "learning_rate": 0.00014049772754441236, "loss": 11.6562, "step": 52656 }, { "epoch": 1.1022565519551202, "grad_norm": 0.356670618057251, "learning_rate": 0.00014049572284322096, "loss": 11.6727, "step": 52657 }, { "epoch": 1.1022774847190824, "grad_norm": 0.3132304251194, "learning_rate": 0.00014049371812256244, "loss": 11.6712, "step": 52658 }, { "epoch": 1.1022984174830444, "grad_norm": 0.2663353979587555, "learning_rate": 0.0001404917133824378, "loss": 11.6568, "step": 52659 }, { "epoch": 1.1023193502470066, "grad_norm": 0.32586702704429626, "learning_rate": 0.00014048970862284797, "loss": 11.6728, "step": 52660 }, { "epoch": 1.1023402830109688, "grad_norm": 0.30668357014656067, "learning_rate": 0.00014048770384379394, "loss": 11.67, "step": 52661 }, { "epoch": 1.102361215774931, "grad_norm": 0.3425593972206116, "learning_rate": 0.00014048569904527667, "loss": 11.6657, "step": 52662 }, { "epoch": 1.1023821485388932, "grad_norm": 0.3943176865577698, "learning_rate": 0.00014048369422729707, "loss": 11.687, "step": 52663 }, { "epoch": 1.1024030813028551, "grad_norm": 0.35629284381866455, "learning_rate": 0.0001404816893898562, "loss": 11.6627, "step": 52664 }, { "epoch": 1.1024240140668173, "grad_norm": 0.3205012083053589, "learning_rate": 0.00014047968453295492, "loss": 11.6657, "step": 52665 }, { "epoch": 1.1024449468307795, "grad_norm": 0.29003995656967163, "learning_rate": 0.0001404776796565943, "loss": 11.6759, "step": 52666 }, { "epoch": 1.1024658795947417, "grad_norm": 0.36167681217193604, "learning_rate": 0.0001404756747607752, "loss": 11.6733, "step": 52667 }, { "epoch": 1.102486812358704, "grad_norm": 0.3247568607330322, "learning_rate": 0.00014047366984549866, "loss": 11.6665, "step": 52668 }, { "epoch": 1.102507745122666, "grad_norm": 0.2867002487182617, "learning_rate": 0.0001404716649107656, "loss": 11.6682, "step": 52669 }, { "epoch": 1.1025286778866281, "grad_norm": 0.33736467361450195, "learning_rate": 0.000140469659956577, "loss": 11.67, "step": 52670 }, { "epoch": 1.1025496106505903, "grad_norm": 0.30176839232444763, "learning_rate": 0.00014046765498293385, "loss": 11.6588, "step": 52671 }, { "epoch": 1.1025705434145525, "grad_norm": 0.38508617877960205, "learning_rate": 0.00014046564998983704, "loss": 11.6528, "step": 52672 }, { "epoch": 1.1025914761785147, "grad_norm": 0.32289689779281616, "learning_rate": 0.0001404636449772876, "loss": 11.6721, "step": 52673 }, { "epoch": 1.1026124089424767, "grad_norm": 0.2933870255947113, "learning_rate": 0.0001404616399452865, "loss": 11.647, "step": 52674 }, { "epoch": 1.102633341706439, "grad_norm": 0.2840676009654999, "learning_rate": 0.00014045963489383464, "loss": 11.6673, "step": 52675 }, { "epoch": 1.102654274470401, "grad_norm": 0.34674641489982605, "learning_rate": 0.00014045762982293304, "loss": 11.661, "step": 52676 }, { "epoch": 1.1026752072343633, "grad_norm": 0.32354190945625305, "learning_rate": 0.00014045562473258264, "loss": 11.6573, "step": 52677 }, { "epoch": 1.1026961399983253, "grad_norm": 0.38367927074432373, "learning_rate": 0.00014045361962278444, "loss": 11.6629, "step": 52678 }, { "epoch": 1.1027170727622875, "grad_norm": 0.3132339417934418, "learning_rate": 0.00014045161449353935, "loss": 11.6802, "step": 52679 }, { "epoch": 1.1027380055262497, "grad_norm": 0.3430088758468628, "learning_rate": 0.00014044960934484835, "loss": 11.6715, "step": 52680 }, { "epoch": 1.1027589382902119, "grad_norm": 0.27483102679252625, "learning_rate": 0.00014044760417671244, "loss": 11.689, "step": 52681 }, { "epoch": 1.102779871054174, "grad_norm": 0.32269197702407837, "learning_rate": 0.00014044559898913252, "loss": 11.6689, "step": 52682 }, { "epoch": 1.102800803818136, "grad_norm": 0.2876645624637604, "learning_rate": 0.00014044359378210963, "loss": 11.6679, "step": 52683 }, { "epoch": 1.1028217365820983, "grad_norm": 0.38082945346832275, "learning_rate": 0.00014044158855564467, "loss": 11.6791, "step": 52684 }, { "epoch": 1.1028426693460605, "grad_norm": 0.33317917585372925, "learning_rate": 0.00014043958330973863, "loss": 11.6656, "step": 52685 }, { "epoch": 1.1028636021100227, "grad_norm": 0.3069693148136139, "learning_rate": 0.00014043757804439247, "loss": 11.6648, "step": 52686 }, { "epoch": 1.1028845348739849, "grad_norm": 0.3399289548397064, "learning_rate": 0.00014043557275960716, "loss": 11.6751, "step": 52687 }, { "epoch": 1.1029054676379468, "grad_norm": 0.3959636986255646, "learning_rate": 0.00014043356745538368, "loss": 11.6912, "step": 52688 }, { "epoch": 1.102926400401909, "grad_norm": 0.2731887102127075, "learning_rate": 0.00014043156213172292, "loss": 11.6683, "step": 52689 }, { "epoch": 1.1029473331658712, "grad_norm": 0.3278341293334961, "learning_rate": 0.00014042955678862594, "loss": 11.6624, "step": 52690 }, { "epoch": 1.1029682659298334, "grad_norm": 0.34294191002845764, "learning_rate": 0.00014042755142609369, "loss": 11.6631, "step": 52691 }, { "epoch": 1.1029891986937956, "grad_norm": 0.2760562300682068, "learning_rate": 0.00014042554604412703, "loss": 11.6608, "step": 52692 }, { "epoch": 1.1030101314577576, "grad_norm": 0.39340707659721375, "learning_rate": 0.00014042354064272705, "loss": 11.6715, "step": 52693 }, { "epoch": 1.1030310642217198, "grad_norm": 0.30141180753707886, "learning_rate": 0.00014042153522189464, "loss": 11.665, "step": 52694 }, { "epoch": 1.103051996985682, "grad_norm": 0.3015844225883484, "learning_rate": 0.0001404195297816308, "loss": 11.6637, "step": 52695 }, { "epoch": 1.1030729297496442, "grad_norm": 0.3478715121746063, "learning_rate": 0.0001404175243219365, "loss": 11.6726, "step": 52696 }, { "epoch": 1.1030938625136062, "grad_norm": 0.3897581696510315, "learning_rate": 0.00014041551884281264, "loss": 11.6613, "step": 52697 }, { "epoch": 1.1031147952775684, "grad_norm": 0.35480669140815735, "learning_rate": 0.0001404135133442603, "loss": 11.669, "step": 52698 }, { "epoch": 1.1031357280415306, "grad_norm": 0.2994900047779083, "learning_rate": 0.0001404115078262803, "loss": 11.662, "step": 52699 }, { "epoch": 1.1031566608054928, "grad_norm": 0.2645837366580963, "learning_rate": 0.00014040950228887372, "loss": 11.6738, "step": 52700 }, { "epoch": 1.103177593569455, "grad_norm": 0.29317206144332886, "learning_rate": 0.00014040749673204149, "loss": 11.6644, "step": 52701 }, { "epoch": 1.103198526333417, "grad_norm": 0.2968899607658386, "learning_rate": 0.00014040549115578452, "loss": 11.6689, "step": 52702 }, { "epoch": 1.1032194590973792, "grad_norm": 0.41026362776756287, "learning_rate": 0.0001404034855601039, "loss": 11.6799, "step": 52703 }, { "epoch": 1.1032403918613414, "grad_norm": 0.3123888671398163, "learning_rate": 0.00014040147994500045, "loss": 11.6565, "step": 52704 }, { "epoch": 1.1032613246253036, "grad_norm": 0.30531156063079834, "learning_rate": 0.0001403994743104752, "loss": 11.6718, "step": 52705 }, { "epoch": 1.1032822573892658, "grad_norm": 0.36834052205085754, "learning_rate": 0.00014039746865652912, "loss": 11.6682, "step": 52706 }, { "epoch": 1.1033031901532278, "grad_norm": 0.28369760513305664, "learning_rate": 0.0001403954629831632, "loss": 11.6567, "step": 52707 }, { "epoch": 1.10332412291719, "grad_norm": 0.332781046628952, "learning_rate": 0.00014039345729037833, "loss": 11.6713, "step": 52708 }, { "epoch": 1.1033450556811522, "grad_norm": 0.3412027359008789, "learning_rate": 0.00014039145157817553, "loss": 11.6851, "step": 52709 }, { "epoch": 1.1033659884451144, "grad_norm": 0.29128971695899963, "learning_rate": 0.00014038944584655576, "loss": 11.6613, "step": 52710 }, { "epoch": 1.1033869212090766, "grad_norm": 0.2696268558502197, "learning_rate": 0.00014038744009551996, "loss": 11.6559, "step": 52711 }, { "epoch": 1.1034078539730385, "grad_norm": 0.37933704257011414, "learning_rate": 0.00014038543432506913, "loss": 11.6756, "step": 52712 }, { "epoch": 1.1034287867370007, "grad_norm": 0.38068917393684387, "learning_rate": 0.00014038342853520418, "loss": 11.6798, "step": 52713 }, { "epoch": 1.103449719500963, "grad_norm": 0.5577476620674133, "learning_rate": 0.00014038142272592615, "loss": 11.6894, "step": 52714 }, { "epoch": 1.1034706522649251, "grad_norm": 0.2968767583370209, "learning_rate": 0.0001403794168972359, "loss": 11.6685, "step": 52715 }, { "epoch": 1.1034915850288871, "grad_norm": 0.3500835597515106, "learning_rate": 0.0001403774110491345, "loss": 11.6612, "step": 52716 }, { "epoch": 1.1035125177928493, "grad_norm": 0.2848687469959259, "learning_rate": 0.00014037540518162286, "loss": 11.6628, "step": 52717 }, { "epoch": 1.1035334505568115, "grad_norm": 0.36061176657676697, "learning_rate": 0.00014037339929470194, "loss": 11.6722, "step": 52718 }, { "epoch": 1.1035543833207737, "grad_norm": 0.2984113395214081, "learning_rate": 0.00014037139338837274, "loss": 11.6696, "step": 52719 }, { "epoch": 1.103575316084736, "grad_norm": 0.28481096029281616, "learning_rate": 0.0001403693874626362, "loss": 11.6709, "step": 52720 }, { "epoch": 1.103596248848698, "grad_norm": 0.2858053147792816, "learning_rate": 0.0001403673815174933, "loss": 11.6896, "step": 52721 }, { "epoch": 1.10361718161266, "grad_norm": 0.37124690413475037, "learning_rate": 0.00014036537555294493, "loss": 11.6759, "step": 52722 }, { "epoch": 1.1036381143766223, "grad_norm": 0.2524345815181732, "learning_rate": 0.00014036336956899215, "loss": 11.6684, "step": 52723 }, { "epoch": 1.1036590471405845, "grad_norm": 0.3162146508693695, "learning_rate": 0.0001403613635656359, "loss": 11.6716, "step": 52724 }, { "epoch": 1.1036799799045467, "grad_norm": 0.36577776074409485, "learning_rate": 0.00014035935754287712, "loss": 11.67, "step": 52725 }, { "epoch": 1.1037009126685087, "grad_norm": 0.3104992210865021, "learning_rate": 0.00014035735150071682, "loss": 11.6493, "step": 52726 }, { "epoch": 1.1037218454324709, "grad_norm": 0.3085078001022339, "learning_rate": 0.0001403553454391559, "loss": 11.6554, "step": 52727 }, { "epoch": 1.103742778196433, "grad_norm": 0.3045879304409027, "learning_rate": 0.00014035333935819537, "loss": 11.6757, "step": 52728 }, { "epoch": 1.1037637109603953, "grad_norm": 0.392776221036911, "learning_rate": 0.00014035133325783616, "loss": 11.6936, "step": 52729 }, { "epoch": 1.1037846437243575, "grad_norm": 0.3909708559513092, "learning_rate": 0.00014034932713807927, "loss": 11.6793, "step": 52730 }, { "epoch": 1.1038055764883195, "grad_norm": 0.3621140122413635, "learning_rate": 0.00014034732099892566, "loss": 11.6584, "step": 52731 }, { "epoch": 1.1038265092522817, "grad_norm": 0.3151837885379791, "learning_rate": 0.00014034531484037626, "loss": 11.6721, "step": 52732 }, { "epoch": 1.1038474420162439, "grad_norm": 0.30214354395866394, "learning_rate": 0.0001403433086624321, "loss": 11.6768, "step": 52733 }, { "epoch": 1.103868374780206, "grad_norm": 0.3809814751148224, "learning_rate": 0.00014034130246509406, "loss": 11.6791, "step": 52734 }, { "epoch": 1.103889307544168, "grad_norm": 0.36352336406707764, "learning_rate": 0.00014033929624836317, "loss": 11.6714, "step": 52735 }, { "epoch": 1.1039102403081302, "grad_norm": 0.36538681387901306, "learning_rate": 0.00014033729001224036, "loss": 11.6663, "step": 52736 }, { "epoch": 1.1039311730720924, "grad_norm": 0.2886716425418854, "learning_rate": 0.00014033528375672662, "loss": 11.674, "step": 52737 }, { "epoch": 1.1039521058360546, "grad_norm": 0.2599571645259857, "learning_rate": 0.0001403332774818229, "loss": 11.6757, "step": 52738 }, { "epoch": 1.1039730386000168, "grad_norm": 0.29658186435699463, "learning_rate": 0.00014033127118753016, "loss": 11.6469, "step": 52739 }, { "epoch": 1.1039939713639788, "grad_norm": 0.3200394809246063, "learning_rate": 0.00014032926487384935, "loss": 11.6723, "step": 52740 }, { "epoch": 1.104014904127941, "grad_norm": 0.26018667221069336, "learning_rate": 0.0001403272585407815, "loss": 11.6629, "step": 52741 }, { "epoch": 1.1040358368919032, "grad_norm": 0.29503732919692993, "learning_rate": 0.0001403252521883275, "loss": 11.6579, "step": 52742 }, { "epoch": 1.1040567696558654, "grad_norm": 0.33036547899246216, "learning_rate": 0.00014032324581648832, "loss": 11.6701, "step": 52743 }, { "epoch": 1.1040777024198276, "grad_norm": 0.3632851541042328, "learning_rate": 0.00014032123942526496, "loss": 11.6646, "step": 52744 }, { "epoch": 1.1040986351837896, "grad_norm": 0.3159191310405731, "learning_rate": 0.0001403192330146584, "loss": 11.6539, "step": 52745 }, { "epoch": 1.1041195679477518, "grad_norm": 0.3423466384410858, "learning_rate": 0.00014031722658466956, "loss": 11.6833, "step": 52746 }, { "epoch": 1.104140500711714, "grad_norm": 0.4239169657230377, "learning_rate": 0.00014031522013529943, "loss": 11.6862, "step": 52747 }, { "epoch": 1.1041614334756762, "grad_norm": 0.3542385399341583, "learning_rate": 0.00014031321366654895, "loss": 11.6562, "step": 52748 }, { "epoch": 1.1041823662396384, "grad_norm": 0.31539350748062134, "learning_rate": 0.00014031120717841912, "loss": 11.6578, "step": 52749 }, { "epoch": 1.1042032990036004, "grad_norm": 0.2826935052871704, "learning_rate": 0.00014030920067091088, "loss": 11.6634, "step": 52750 }, { "epoch": 1.1042242317675626, "grad_norm": 0.27940550446510315, "learning_rate": 0.00014030719414402515, "loss": 11.6637, "step": 52751 }, { "epoch": 1.1042451645315248, "grad_norm": 0.3000566363334656, "learning_rate": 0.000140305187597763, "loss": 11.644, "step": 52752 }, { "epoch": 1.104266097295487, "grad_norm": 0.3749880790710449, "learning_rate": 0.00014030318103212531, "loss": 11.6696, "step": 52753 }, { "epoch": 1.104287030059449, "grad_norm": 0.42498597502708435, "learning_rate": 0.00014030117444711312, "loss": 11.682, "step": 52754 }, { "epoch": 1.1043079628234111, "grad_norm": 0.2686092257499695, "learning_rate": 0.0001402991678427273, "loss": 11.6663, "step": 52755 }, { "epoch": 1.1043288955873733, "grad_norm": 0.272840678691864, "learning_rate": 0.0001402971612189689, "loss": 11.6718, "step": 52756 }, { "epoch": 1.1043498283513355, "grad_norm": 0.5127471685409546, "learning_rate": 0.0001402951545758388, "loss": 11.6835, "step": 52757 }, { "epoch": 1.1043707611152977, "grad_norm": 0.27056387066841125, "learning_rate": 0.00014029314791333804, "loss": 11.6621, "step": 52758 }, { "epoch": 1.1043916938792597, "grad_norm": 0.3116213381290436, "learning_rate": 0.00014029114123146756, "loss": 11.6737, "step": 52759 }, { "epoch": 1.104412626643222, "grad_norm": 0.293730229139328, "learning_rate": 0.0001402891345302283, "loss": 11.6715, "step": 52760 }, { "epoch": 1.1044335594071841, "grad_norm": 0.2626979649066925, "learning_rate": 0.00014028712780962126, "loss": 11.6828, "step": 52761 }, { "epoch": 1.1044544921711463, "grad_norm": 0.3220568597316742, "learning_rate": 0.0001402851210696474, "loss": 11.6802, "step": 52762 }, { "epoch": 1.1044754249351085, "grad_norm": 0.3108178377151489, "learning_rate": 0.00014028311431030764, "loss": 11.6775, "step": 52763 }, { "epoch": 1.1044963576990705, "grad_norm": 0.2595142126083374, "learning_rate": 0.00014028110753160303, "loss": 11.6659, "step": 52764 }, { "epoch": 1.1045172904630327, "grad_norm": 0.4670110046863556, "learning_rate": 0.00014027910073353446, "loss": 11.6762, "step": 52765 }, { "epoch": 1.104538223226995, "grad_norm": 0.290020614862442, "learning_rate": 0.00014027709391610293, "loss": 11.6876, "step": 52766 }, { "epoch": 1.104559155990957, "grad_norm": 0.3136691749095917, "learning_rate": 0.00014027508707930937, "loss": 11.6708, "step": 52767 }, { "epoch": 1.1045800887549193, "grad_norm": 0.3947862982749939, "learning_rate": 0.00014027308022315478, "loss": 11.66, "step": 52768 }, { "epoch": 1.1046010215188813, "grad_norm": 0.3134635388851166, "learning_rate": 0.0001402710733476401, "loss": 11.6651, "step": 52769 }, { "epoch": 1.1046219542828435, "grad_norm": 0.2878073751926422, "learning_rate": 0.00014026906645276633, "loss": 11.6769, "step": 52770 }, { "epoch": 1.1046428870468057, "grad_norm": 0.2927496135234833, "learning_rate": 0.0001402670595385344, "loss": 11.6819, "step": 52771 }, { "epoch": 1.1046638198107679, "grad_norm": 0.4194124937057495, "learning_rate": 0.0001402650526049453, "loss": 11.6666, "step": 52772 }, { "epoch": 1.1046847525747299, "grad_norm": 0.29655903577804565, "learning_rate": 0.00014026304565199996, "loss": 11.6731, "step": 52773 }, { "epoch": 1.104705685338692, "grad_norm": 0.4115387797355652, "learning_rate": 0.0001402610386796994, "loss": 11.679, "step": 52774 }, { "epoch": 1.1047266181026543, "grad_norm": 0.43663594126701355, "learning_rate": 0.0001402590316880445, "loss": 11.6562, "step": 52775 }, { "epoch": 1.1047475508666165, "grad_norm": 0.3490334451198578, "learning_rate": 0.00014025702467703634, "loss": 11.6604, "step": 52776 }, { "epoch": 1.1047684836305787, "grad_norm": 0.3350905478000641, "learning_rate": 0.00014025501764667578, "loss": 11.6783, "step": 52777 }, { "epoch": 1.1047894163945406, "grad_norm": 0.22929048538208008, "learning_rate": 0.00014025301059696386, "loss": 11.6506, "step": 52778 }, { "epoch": 1.1048103491585028, "grad_norm": 0.366024374961853, "learning_rate": 0.00014025100352790148, "loss": 11.6689, "step": 52779 }, { "epoch": 1.104831281922465, "grad_norm": 0.29286569356918335, "learning_rate": 0.00014024899643948964, "loss": 11.6682, "step": 52780 }, { "epoch": 1.1048522146864272, "grad_norm": 0.33271291851997375, "learning_rate": 0.00014024698933172932, "loss": 11.6604, "step": 52781 }, { "epoch": 1.1048731474503894, "grad_norm": 0.38346177339553833, "learning_rate": 0.00014024498220462144, "loss": 11.6823, "step": 52782 }, { "epoch": 1.1048940802143514, "grad_norm": 0.29412296414375305, "learning_rate": 0.000140242975058167, "loss": 11.6635, "step": 52783 }, { "epoch": 1.1049150129783136, "grad_norm": 0.3128131628036499, "learning_rate": 0.00014024096789236695, "loss": 11.6827, "step": 52784 }, { "epoch": 1.1049359457422758, "grad_norm": 0.28624796867370605, "learning_rate": 0.00014023896070722227, "loss": 11.6701, "step": 52785 }, { "epoch": 1.104956878506238, "grad_norm": 0.4679386019706726, "learning_rate": 0.00014023695350273393, "loss": 11.6787, "step": 52786 }, { "epoch": 1.1049778112702002, "grad_norm": 0.3895041048526764, "learning_rate": 0.00014023494627890282, "loss": 11.6636, "step": 52787 }, { "epoch": 1.1049987440341622, "grad_norm": 0.3978346288204193, "learning_rate": 0.00014023293903573002, "loss": 11.6853, "step": 52788 }, { "epoch": 1.1050196767981244, "grad_norm": 0.336185485124588, "learning_rate": 0.00014023093177321645, "loss": 11.6691, "step": 52789 }, { "epoch": 1.1050406095620866, "grad_norm": 0.314315527677536, "learning_rate": 0.00014022892449136302, "loss": 11.6711, "step": 52790 }, { "epoch": 1.1050615423260488, "grad_norm": 0.36941099166870117, "learning_rate": 0.00014022691719017076, "loss": 11.6885, "step": 52791 }, { "epoch": 1.1050824750900108, "grad_norm": 0.2914755046367645, "learning_rate": 0.0001402249098696406, "loss": 11.6613, "step": 52792 }, { "epoch": 1.105103407853973, "grad_norm": 0.29676222801208496, "learning_rate": 0.00014022290252977356, "loss": 11.6742, "step": 52793 }, { "epoch": 1.1051243406179352, "grad_norm": 0.2953526973724365, "learning_rate": 0.00014022089517057052, "loss": 11.6693, "step": 52794 }, { "epoch": 1.1051452733818974, "grad_norm": 0.3456750810146332, "learning_rate": 0.00014021888779203252, "loss": 11.6823, "step": 52795 }, { "epoch": 1.1051662061458596, "grad_norm": 0.2987879812717438, "learning_rate": 0.0001402168803941605, "loss": 11.6748, "step": 52796 }, { "epoch": 1.1051871389098216, "grad_norm": 0.32579731941223145, "learning_rate": 0.0001402148729769554, "loss": 11.6716, "step": 52797 }, { "epoch": 1.1052080716737838, "grad_norm": 0.3769213855266571, "learning_rate": 0.00014021286554041822, "loss": 11.672, "step": 52798 }, { "epoch": 1.105229004437746, "grad_norm": 0.4083387851715088, "learning_rate": 0.0001402108580845499, "loss": 11.6659, "step": 52799 }, { "epoch": 1.1052499372017082, "grad_norm": 0.27819159626960754, "learning_rate": 0.00014020885060935144, "loss": 11.6726, "step": 52800 }, { "epoch": 1.1052708699656704, "grad_norm": 0.48860371112823486, "learning_rate": 0.00014020684311482373, "loss": 11.6573, "step": 52801 }, { "epoch": 1.1052918027296323, "grad_norm": 0.3224917948246002, "learning_rate": 0.0001402048356009678, "loss": 11.6625, "step": 52802 }, { "epoch": 1.1053127354935945, "grad_norm": 0.38871705532073975, "learning_rate": 0.00014020282806778459, "loss": 11.6649, "step": 52803 }, { "epoch": 1.1053336682575567, "grad_norm": 0.2903287708759308, "learning_rate": 0.0001402008205152751, "loss": 11.6582, "step": 52804 }, { "epoch": 1.105354601021519, "grad_norm": 0.44384944438934326, "learning_rate": 0.00014019881294344028, "loss": 11.6629, "step": 52805 }, { "epoch": 1.1053755337854811, "grad_norm": 0.40345892310142517, "learning_rate": 0.00014019680535228105, "loss": 11.6504, "step": 52806 }, { "epoch": 1.1053964665494431, "grad_norm": 0.33592453598976135, "learning_rate": 0.00014019479774179843, "loss": 11.6709, "step": 52807 }, { "epoch": 1.1054173993134053, "grad_norm": 0.3663105368614197, "learning_rate": 0.00014019279011199338, "loss": 11.6461, "step": 52808 }, { "epoch": 1.1054383320773675, "grad_norm": 0.2955307960510254, "learning_rate": 0.00014019078246286682, "loss": 11.6762, "step": 52809 }, { "epoch": 1.1054592648413297, "grad_norm": 0.3368545174598694, "learning_rate": 0.00014018877479441975, "loss": 11.6633, "step": 52810 }, { "epoch": 1.1054801976052917, "grad_norm": 0.3347463011741638, "learning_rate": 0.00014018676710665314, "loss": 11.6679, "step": 52811 }, { "epoch": 1.105501130369254, "grad_norm": 0.38288164138793945, "learning_rate": 0.00014018475939956795, "loss": 11.6715, "step": 52812 }, { "epoch": 1.105522063133216, "grad_norm": 0.34335097670555115, "learning_rate": 0.00014018275167316515, "loss": 11.6583, "step": 52813 }, { "epoch": 1.1055429958971783, "grad_norm": 0.2839457094669342, "learning_rate": 0.00014018074392744566, "loss": 11.6632, "step": 52814 }, { "epoch": 1.1055639286611405, "grad_norm": 0.29973745346069336, "learning_rate": 0.00014017873616241052, "loss": 11.6687, "step": 52815 }, { "epoch": 1.1055848614251025, "grad_norm": 0.3506973683834076, "learning_rate": 0.00014017672837806063, "loss": 11.6619, "step": 52816 }, { "epoch": 1.1056057941890647, "grad_norm": 0.3406185805797577, "learning_rate": 0.000140174720574397, "loss": 11.6617, "step": 52817 }, { "epoch": 1.1056267269530269, "grad_norm": 0.36191633343696594, "learning_rate": 0.00014017271275142056, "loss": 11.6703, "step": 52818 }, { "epoch": 1.105647659716989, "grad_norm": 0.25546973943710327, "learning_rate": 0.00014017070490913232, "loss": 11.6778, "step": 52819 }, { "epoch": 1.1056685924809513, "grad_norm": 0.2892046272754669, "learning_rate": 0.0001401686970475332, "loss": 11.6686, "step": 52820 }, { "epoch": 1.1056895252449133, "grad_norm": 0.3858703672885895, "learning_rate": 0.00014016668916662416, "loss": 11.6641, "step": 52821 }, { "epoch": 1.1057104580088755, "grad_norm": 0.3337877690792084, "learning_rate": 0.00014016468126640624, "loss": 11.6654, "step": 52822 }, { "epoch": 1.1057313907728377, "grad_norm": 0.2879011034965515, "learning_rate": 0.00014016267334688028, "loss": 11.6717, "step": 52823 }, { "epoch": 1.1057523235367999, "grad_norm": 0.311382532119751, "learning_rate": 0.00014016066540804736, "loss": 11.6642, "step": 52824 }, { "epoch": 1.105773256300762, "grad_norm": 0.2835595905780792, "learning_rate": 0.0001401586574499084, "loss": 11.6547, "step": 52825 }, { "epoch": 1.105794189064724, "grad_norm": 0.31688791513442993, "learning_rate": 0.0001401566494724644, "loss": 11.6705, "step": 52826 }, { "epoch": 1.1058151218286862, "grad_norm": 0.2788849174976349, "learning_rate": 0.00014015464147571627, "loss": 11.6694, "step": 52827 }, { "epoch": 1.1058360545926484, "grad_norm": 0.25640517473220825, "learning_rate": 0.00014015263345966495, "loss": 11.6682, "step": 52828 }, { "epoch": 1.1058569873566106, "grad_norm": 0.37004873156547546, "learning_rate": 0.0001401506254243115, "loss": 11.6727, "step": 52829 }, { "epoch": 1.1058779201205726, "grad_norm": 0.3069689869880676, "learning_rate": 0.00014014861736965685, "loss": 11.6669, "step": 52830 }, { "epoch": 1.1058988528845348, "grad_norm": 0.2918190062046051, "learning_rate": 0.00014014660929570193, "loss": 11.6772, "step": 52831 }, { "epoch": 1.105919785648497, "grad_norm": 0.33153221011161804, "learning_rate": 0.00014014460120244778, "loss": 11.6767, "step": 52832 }, { "epoch": 1.1059407184124592, "grad_norm": 0.26404550671577454, "learning_rate": 0.00014014259308989526, "loss": 11.6501, "step": 52833 }, { "epoch": 1.1059616511764214, "grad_norm": 0.27507540583610535, "learning_rate": 0.00014014058495804538, "loss": 11.6562, "step": 52834 }, { "epoch": 1.1059825839403834, "grad_norm": 0.33085599541664124, "learning_rate": 0.00014013857680689917, "loss": 11.6724, "step": 52835 }, { "epoch": 1.1060035167043456, "grad_norm": 0.38517656922340393, "learning_rate": 0.0001401365686364575, "loss": 11.6608, "step": 52836 }, { "epoch": 1.1060244494683078, "grad_norm": 0.38216057419776917, "learning_rate": 0.00014013456044672143, "loss": 11.6696, "step": 52837 }, { "epoch": 1.10604538223227, "grad_norm": 0.27637484669685364, "learning_rate": 0.0001401325522376918, "loss": 11.6551, "step": 52838 }, { "epoch": 1.1060663149962322, "grad_norm": 0.37334710359573364, "learning_rate": 0.0001401305440093697, "loss": 11.6816, "step": 52839 }, { "epoch": 1.1060872477601942, "grad_norm": 0.3397057354450226, "learning_rate": 0.00014012853576175603, "loss": 11.6786, "step": 52840 }, { "epoch": 1.1061081805241564, "grad_norm": 0.36828097701072693, "learning_rate": 0.00014012652749485175, "loss": 11.6745, "step": 52841 }, { "epoch": 1.1061291132881186, "grad_norm": 0.2988438606262207, "learning_rate": 0.00014012451920865788, "loss": 11.6697, "step": 52842 }, { "epoch": 1.1061500460520808, "grad_norm": 0.3054456412792206, "learning_rate": 0.00014012251090317533, "loss": 11.6704, "step": 52843 }, { "epoch": 1.106170978816043, "grad_norm": 0.2710445821285248, "learning_rate": 0.0001401205025784051, "loss": 11.6743, "step": 52844 }, { "epoch": 1.106191911580005, "grad_norm": 0.2750149965286255, "learning_rate": 0.0001401184942343481, "loss": 11.6608, "step": 52845 }, { "epoch": 1.1062128443439672, "grad_norm": 0.3049279451370239, "learning_rate": 0.00014011648587100535, "loss": 11.6543, "step": 52846 }, { "epoch": 1.1062337771079294, "grad_norm": 0.303354948759079, "learning_rate": 0.00014011447748837782, "loss": 11.6874, "step": 52847 }, { "epoch": 1.1062547098718916, "grad_norm": 0.3265414237976074, "learning_rate": 0.00014011246908646644, "loss": 11.6813, "step": 52848 }, { "epoch": 1.1062756426358535, "grad_norm": 0.30084899067878723, "learning_rate": 0.00014011046066527222, "loss": 11.6621, "step": 52849 }, { "epoch": 1.1062965753998157, "grad_norm": 0.29900696873664856, "learning_rate": 0.00014010845222479608, "loss": 11.6514, "step": 52850 }, { "epoch": 1.106317508163778, "grad_norm": 0.2889713943004608, "learning_rate": 0.000140106443765039, "loss": 11.6652, "step": 52851 }, { "epoch": 1.1063384409277401, "grad_norm": 0.29621732234954834, "learning_rate": 0.00014010443528600196, "loss": 11.6504, "step": 52852 }, { "epoch": 1.1063593736917023, "grad_norm": 0.35729560256004333, "learning_rate": 0.00014010242678768592, "loss": 11.6642, "step": 52853 }, { "epoch": 1.1063803064556643, "grad_norm": 0.3133454918861389, "learning_rate": 0.0001401004182700918, "loss": 11.6584, "step": 52854 }, { "epoch": 1.1064012392196265, "grad_norm": 0.317655473947525, "learning_rate": 0.00014009840973322065, "loss": 11.67, "step": 52855 }, { "epoch": 1.1064221719835887, "grad_norm": 0.3111875057220459, "learning_rate": 0.00014009640117707334, "loss": 11.6707, "step": 52856 }, { "epoch": 1.106443104747551, "grad_norm": 0.2546015977859497, "learning_rate": 0.00014009439260165093, "loss": 11.6747, "step": 52857 }, { "epoch": 1.1064640375115131, "grad_norm": 0.2988741993904114, "learning_rate": 0.00014009238400695433, "loss": 11.6551, "step": 52858 }, { "epoch": 1.106484970275475, "grad_norm": 0.34887176752090454, "learning_rate": 0.00014009037539298454, "loss": 11.661, "step": 52859 }, { "epoch": 1.1065059030394373, "grad_norm": 0.315372109413147, "learning_rate": 0.00014008836675974247, "loss": 11.6693, "step": 52860 }, { "epoch": 1.1065268358033995, "grad_norm": 0.2681266963481903, "learning_rate": 0.0001400863581072291, "loss": 11.6644, "step": 52861 }, { "epoch": 1.1065477685673617, "grad_norm": 0.29984140396118164, "learning_rate": 0.00014008434943544546, "loss": 11.6616, "step": 52862 }, { "epoch": 1.106568701331324, "grad_norm": 0.3193132281303406, "learning_rate": 0.00014008234074439245, "loss": 11.6748, "step": 52863 }, { "epoch": 1.1065896340952859, "grad_norm": 0.39504119753837585, "learning_rate": 0.00014008033203407108, "loss": 11.6778, "step": 52864 }, { "epoch": 1.106610566859248, "grad_norm": 0.32884714007377625, "learning_rate": 0.00014007832330448225, "loss": 11.6755, "step": 52865 }, { "epoch": 1.1066314996232103, "grad_norm": 0.31551966071128845, "learning_rate": 0.000140076314555627, "loss": 11.6638, "step": 52866 }, { "epoch": 1.1066524323871725, "grad_norm": 0.303882360458374, "learning_rate": 0.00014007430578750625, "loss": 11.6733, "step": 52867 }, { "epoch": 1.1066733651511345, "grad_norm": 0.37745052576065063, "learning_rate": 0.00014007229700012096, "loss": 11.6776, "step": 52868 }, { "epoch": 1.1066942979150967, "grad_norm": 0.3342004120349884, "learning_rate": 0.00014007028819347213, "loss": 11.6705, "step": 52869 }, { "epoch": 1.1067152306790589, "grad_norm": 0.2853834331035614, "learning_rate": 0.00014006827936756072, "loss": 11.6716, "step": 52870 }, { "epoch": 1.106736163443021, "grad_norm": 0.3096003234386444, "learning_rate": 0.00014006627052238766, "loss": 11.6546, "step": 52871 }, { "epoch": 1.1067570962069833, "grad_norm": 0.33295291662216187, "learning_rate": 0.00014006426165795398, "loss": 11.6662, "step": 52872 }, { "epoch": 1.1067780289709452, "grad_norm": 0.2873440682888031, "learning_rate": 0.00014006225277426058, "loss": 11.664, "step": 52873 }, { "epoch": 1.1067989617349074, "grad_norm": 0.2679832875728607, "learning_rate": 0.00014006024387130844, "loss": 11.6825, "step": 52874 }, { "epoch": 1.1068198944988696, "grad_norm": 0.3161298930644989, "learning_rate": 0.00014005823494909857, "loss": 11.6648, "step": 52875 }, { "epoch": 1.1068408272628318, "grad_norm": 0.37301117181777954, "learning_rate": 0.00014005622600763188, "loss": 11.6863, "step": 52876 }, { "epoch": 1.106861760026794, "grad_norm": 0.2759261429309845, "learning_rate": 0.0001400542170469094, "loss": 11.6535, "step": 52877 }, { "epoch": 1.106882692790756, "grad_norm": 0.2815215587615967, "learning_rate": 0.00014005220806693198, "loss": 11.6702, "step": 52878 }, { "epoch": 1.1069036255547182, "grad_norm": 0.28921815752983093, "learning_rate": 0.0001400501990677007, "loss": 11.6607, "step": 52879 }, { "epoch": 1.1069245583186804, "grad_norm": 0.43931761384010315, "learning_rate": 0.0001400481900492165, "loss": 11.6891, "step": 52880 }, { "epoch": 1.1069454910826426, "grad_norm": 0.37363073229789734, "learning_rate": 0.00014004618101148033, "loss": 11.6759, "step": 52881 }, { "epoch": 1.1069664238466048, "grad_norm": 0.34994250535964966, "learning_rate": 0.00014004417195449315, "loss": 11.6717, "step": 52882 }, { "epoch": 1.1069873566105668, "grad_norm": 0.3728226125240326, "learning_rate": 0.0001400421628782559, "loss": 11.67, "step": 52883 }, { "epoch": 1.107008289374529, "grad_norm": 0.36347731947898865, "learning_rate": 0.00014004015378276963, "loss": 11.66, "step": 52884 }, { "epoch": 1.1070292221384912, "grad_norm": 0.28380879759788513, "learning_rate": 0.00014003814466803524, "loss": 11.6526, "step": 52885 }, { "epoch": 1.1070501549024534, "grad_norm": 0.30661457777023315, "learning_rate": 0.00014003613553405372, "loss": 11.6352, "step": 52886 }, { "epoch": 1.1070710876664154, "grad_norm": 0.3263031840324402, "learning_rate": 0.00014003412638082603, "loss": 11.6649, "step": 52887 }, { "epoch": 1.1070920204303776, "grad_norm": 0.26318639516830444, "learning_rate": 0.0001400321172083531, "loss": 11.6747, "step": 52888 }, { "epoch": 1.1071129531943398, "grad_norm": 0.3217693269252777, "learning_rate": 0.00014003010801663597, "loss": 11.6728, "step": 52889 }, { "epoch": 1.107133885958302, "grad_norm": 0.3261887729167938, "learning_rate": 0.00014002809880567552, "loss": 11.6646, "step": 52890 }, { "epoch": 1.1071548187222642, "grad_norm": 0.3854534924030304, "learning_rate": 0.00014002608957547282, "loss": 11.6642, "step": 52891 }, { "epoch": 1.1071757514862262, "grad_norm": 0.31976020336151123, "learning_rate": 0.00014002408032602872, "loss": 11.6684, "step": 52892 }, { "epoch": 1.1071966842501884, "grad_norm": 0.2919620871543884, "learning_rate": 0.0001400220710573443, "loss": 11.6677, "step": 52893 }, { "epoch": 1.1072176170141506, "grad_norm": 0.28388965129852295, "learning_rate": 0.0001400200617694204, "loss": 11.6451, "step": 52894 }, { "epoch": 1.1072385497781128, "grad_norm": 0.4031214416027069, "learning_rate": 0.0001400180524622581, "loss": 11.6738, "step": 52895 }, { "epoch": 1.107259482542075, "grad_norm": 0.3318447470664978, "learning_rate": 0.0001400160431358583, "loss": 11.6625, "step": 52896 }, { "epoch": 1.107280415306037, "grad_norm": 0.37637269496917725, "learning_rate": 0.000140014033790222, "loss": 11.6798, "step": 52897 }, { "epoch": 1.1073013480699991, "grad_norm": 0.4070817828178406, "learning_rate": 0.00014001202442535016, "loss": 11.6746, "step": 52898 }, { "epoch": 1.1073222808339613, "grad_norm": 0.41659119725227356, "learning_rate": 0.0001400100150412437, "loss": 11.6635, "step": 52899 }, { "epoch": 1.1073432135979235, "grad_norm": 0.3533268868923187, "learning_rate": 0.00014000800563790364, "loss": 11.6673, "step": 52900 }, { "epoch": 1.1073641463618857, "grad_norm": 0.2855740189552307, "learning_rate": 0.00014000599621533094, "loss": 11.6711, "step": 52901 }, { "epoch": 1.1073850791258477, "grad_norm": 0.398846834897995, "learning_rate": 0.00014000398677352656, "loss": 11.6706, "step": 52902 }, { "epoch": 1.10740601188981, "grad_norm": 0.41086170077323914, "learning_rate": 0.00014000197731249146, "loss": 11.6725, "step": 52903 }, { "epoch": 1.1074269446537721, "grad_norm": 0.3009680509567261, "learning_rate": 0.0001399999678322266, "loss": 11.681, "step": 52904 }, { "epoch": 1.1074478774177343, "grad_norm": 0.2503781318664551, "learning_rate": 0.00013999795833273293, "loss": 11.667, "step": 52905 }, { "epoch": 1.1074688101816963, "grad_norm": 0.3236522376537323, "learning_rate": 0.0001399959488140115, "loss": 11.6523, "step": 52906 }, { "epoch": 1.1074897429456585, "grad_norm": 0.30046942830085754, "learning_rate": 0.00013999393927606316, "loss": 11.6601, "step": 52907 }, { "epoch": 1.1075106757096207, "grad_norm": 0.2900260388851166, "learning_rate": 0.00013999192971888896, "loss": 11.648, "step": 52908 }, { "epoch": 1.107531608473583, "grad_norm": 0.2856646776199341, "learning_rate": 0.00013998992014248985, "loss": 11.6622, "step": 52909 }, { "epoch": 1.107552541237545, "grad_norm": 0.29781001806259155, "learning_rate": 0.00013998791054686677, "loss": 11.6751, "step": 52910 }, { "epoch": 1.107573474001507, "grad_norm": 0.20138640701770782, "learning_rate": 0.00013998590093202071, "loss": 11.6669, "step": 52911 }, { "epoch": 1.1075944067654693, "grad_norm": 0.2895720601081848, "learning_rate": 0.0001399838912979526, "loss": 11.6746, "step": 52912 }, { "epoch": 1.1076153395294315, "grad_norm": 0.2983962893486023, "learning_rate": 0.00013998188164466346, "loss": 11.6651, "step": 52913 }, { "epoch": 1.1076362722933937, "grad_norm": 0.34188011288642883, "learning_rate": 0.00013997987197215419, "loss": 11.6725, "step": 52914 }, { "epoch": 1.1076572050573559, "grad_norm": 0.32498452067375183, "learning_rate": 0.00013997786228042583, "loss": 11.6726, "step": 52915 }, { "epoch": 1.1076781378213179, "grad_norm": 0.31101280450820923, "learning_rate": 0.0001399758525694793, "loss": 11.6658, "step": 52916 }, { "epoch": 1.10769907058528, "grad_norm": 0.3866993188858032, "learning_rate": 0.00013997384283931556, "loss": 11.6742, "step": 52917 }, { "epoch": 1.1077200033492423, "grad_norm": 0.25095486640930176, "learning_rate": 0.00013997183308993562, "loss": 11.674, "step": 52918 }, { "epoch": 1.1077409361132045, "grad_norm": 0.45929449796676636, "learning_rate": 0.0001399698233213404, "loss": 11.6695, "step": 52919 }, { "epoch": 1.1077618688771667, "grad_norm": 0.2926838994026184, "learning_rate": 0.0001399678135335309, "loss": 11.6703, "step": 52920 }, { "epoch": 1.1077828016411286, "grad_norm": 0.4432401657104492, "learning_rate": 0.00013996580372650804, "loss": 11.6688, "step": 52921 }, { "epoch": 1.1078037344050908, "grad_norm": 0.3553953468799591, "learning_rate": 0.00013996379390027286, "loss": 11.6739, "step": 52922 }, { "epoch": 1.107824667169053, "grad_norm": 0.31695619225502014, "learning_rate": 0.00013996178405482627, "loss": 11.6695, "step": 52923 }, { "epoch": 1.1078455999330152, "grad_norm": 0.346664160490036, "learning_rate": 0.00013995977419016924, "loss": 11.6836, "step": 52924 }, { "epoch": 1.1078665326969772, "grad_norm": 0.3025587201118469, "learning_rate": 0.00013995776430630275, "loss": 11.6693, "step": 52925 }, { "epoch": 1.1078874654609394, "grad_norm": 0.39372435212135315, "learning_rate": 0.00013995575440322775, "loss": 11.6682, "step": 52926 }, { "epoch": 1.1079083982249016, "grad_norm": 0.28671011328697205, "learning_rate": 0.00013995374448094521, "loss": 11.6656, "step": 52927 }, { "epoch": 1.1079293309888638, "grad_norm": 0.2977496385574341, "learning_rate": 0.00013995173453945614, "loss": 11.6877, "step": 52928 }, { "epoch": 1.107950263752826, "grad_norm": 0.3301906883716583, "learning_rate": 0.00013994972457876146, "loss": 11.6504, "step": 52929 }, { "epoch": 1.107971196516788, "grad_norm": 0.6562764048576355, "learning_rate": 0.00013994771459886215, "loss": 11.6769, "step": 52930 }, { "epoch": 1.1079921292807502, "grad_norm": 0.28836509585380554, "learning_rate": 0.00013994570459975914, "loss": 11.6351, "step": 52931 }, { "epoch": 1.1080130620447124, "grad_norm": 0.2903119921684265, "learning_rate": 0.00013994369458145345, "loss": 11.6748, "step": 52932 }, { "epoch": 1.1080339948086746, "grad_norm": 0.4306957423686981, "learning_rate": 0.00013994168454394603, "loss": 11.6859, "step": 52933 }, { "epoch": 1.1080549275726368, "grad_norm": 0.30581337213516235, "learning_rate": 0.00013993967448723785, "loss": 11.6539, "step": 52934 }, { "epoch": 1.1080758603365988, "grad_norm": 0.3037790060043335, "learning_rate": 0.00013993766441132985, "loss": 11.6591, "step": 52935 }, { "epoch": 1.108096793100561, "grad_norm": 0.34265872836112976, "learning_rate": 0.00013993565431622302, "loss": 11.6604, "step": 52936 }, { "epoch": 1.1081177258645232, "grad_norm": 0.33183762431144714, "learning_rate": 0.00013993364420191833, "loss": 11.6614, "step": 52937 }, { "epoch": 1.1081386586284854, "grad_norm": 0.3824891746044159, "learning_rate": 0.00013993163406841674, "loss": 11.649, "step": 52938 }, { "epoch": 1.1081595913924476, "grad_norm": 0.2698882818222046, "learning_rate": 0.0001399296239157192, "loss": 11.6606, "step": 52939 }, { "epoch": 1.1081805241564096, "grad_norm": 0.3194434344768524, "learning_rate": 0.0001399276137438267, "loss": 11.6708, "step": 52940 }, { "epoch": 1.1082014569203718, "grad_norm": 0.37577885389328003, "learning_rate": 0.00013992560355274017, "loss": 11.6642, "step": 52941 }, { "epoch": 1.108222389684334, "grad_norm": 0.42313751578330994, "learning_rate": 0.00013992359334246064, "loss": 11.6604, "step": 52942 }, { "epoch": 1.1082433224482962, "grad_norm": 0.40261438488960266, "learning_rate": 0.000139921583112989, "loss": 11.6674, "step": 52943 }, { "epoch": 1.1082642552122581, "grad_norm": 0.33458998799324036, "learning_rate": 0.00013991957286432627, "loss": 11.6669, "step": 52944 }, { "epoch": 1.1082851879762203, "grad_norm": 0.3319401443004608, "learning_rate": 0.0001399175625964734, "loss": 11.6629, "step": 52945 }, { "epoch": 1.1083061207401825, "grad_norm": 0.3413970470428467, "learning_rate": 0.00013991555230943138, "loss": 11.6676, "step": 52946 }, { "epoch": 1.1083270535041447, "grad_norm": 0.4032495319843292, "learning_rate": 0.00013991354200320114, "loss": 11.6841, "step": 52947 }, { "epoch": 1.108347986268107, "grad_norm": 0.3373487889766693, "learning_rate": 0.00013991153167778366, "loss": 11.6792, "step": 52948 }, { "epoch": 1.108368919032069, "grad_norm": 0.33414673805236816, "learning_rate": 0.0001399095213331799, "loss": 11.6714, "step": 52949 }, { "epoch": 1.1083898517960311, "grad_norm": 0.32378190755844116, "learning_rate": 0.00013990751096939086, "loss": 11.6737, "step": 52950 }, { "epoch": 1.1084107845599933, "grad_norm": 0.26127317547798157, "learning_rate": 0.00013990550058641746, "loss": 11.6774, "step": 52951 }, { "epoch": 1.1084317173239555, "grad_norm": 0.32737481594085693, "learning_rate": 0.00013990349018426067, "loss": 11.6682, "step": 52952 }, { "epoch": 1.1084526500879177, "grad_norm": 0.3059617578983307, "learning_rate": 0.00013990147976292148, "loss": 11.652, "step": 52953 }, { "epoch": 1.1084735828518797, "grad_norm": 0.2803017199039459, "learning_rate": 0.00013989946932240084, "loss": 11.6597, "step": 52954 }, { "epoch": 1.108494515615842, "grad_norm": 0.3185797929763794, "learning_rate": 0.00013989745886269976, "loss": 11.6585, "step": 52955 }, { "epoch": 1.108515448379804, "grad_norm": 0.3085678815841675, "learning_rate": 0.00013989544838381916, "loss": 11.6632, "step": 52956 }, { "epoch": 1.1085363811437663, "grad_norm": 0.5246484875679016, "learning_rate": 0.00013989343788576002, "loss": 11.6675, "step": 52957 }, { "epoch": 1.1085573139077285, "grad_norm": 0.3502921760082245, "learning_rate": 0.0001398914273685233, "loss": 11.6805, "step": 52958 }, { "epoch": 1.1085782466716905, "grad_norm": 0.3122944235801697, "learning_rate": 0.00013988941683210995, "loss": 11.677, "step": 52959 }, { "epoch": 1.1085991794356527, "grad_norm": 0.3264005184173584, "learning_rate": 0.00013988740627652097, "loss": 11.6872, "step": 52960 }, { "epoch": 1.1086201121996149, "grad_norm": 0.2967892587184906, "learning_rate": 0.00013988539570175735, "loss": 11.6578, "step": 52961 }, { "epoch": 1.108641044963577, "grad_norm": 0.2773084342479706, "learning_rate": 0.00013988338510781997, "loss": 11.6729, "step": 52962 }, { "epoch": 1.108661977727539, "grad_norm": 0.289783239364624, "learning_rate": 0.00013988137449470989, "loss": 11.6819, "step": 52963 }, { "epoch": 1.1086829104915013, "grad_norm": 0.30914106965065, "learning_rate": 0.000139879363862428, "loss": 11.6817, "step": 52964 }, { "epoch": 1.1087038432554635, "grad_norm": 0.26234889030456543, "learning_rate": 0.0001398773532109753, "loss": 11.6788, "step": 52965 }, { "epoch": 1.1087247760194257, "grad_norm": 0.28853195905685425, "learning_rate": 0.00013987534254035277, "loss": 11.6582, "step": 52966 }, { "epoch": 1.1087457087833879, "grad_norm": 0.28583836555480957, "learning_rate": 0.00013987333185056135, "loss": 11.6563, "step": 52967 }, { "epoch": 1.1087666415473498, "grad_norm": 0.42128974199295044, "learning_rate": 0.00013987132114160207, "loss": 11.6809, "step": 52968 }, { "epoch": 1.108787574311312, "grad_norm": 0.3985956907272339, "learning_rate": 0.0001398693104134758, "loss": 11.6613, "step": 52969 }, { "epoch": 1.1088085070752742, "grad_norm": 0.3551216423511505, "learning_rate": 0.00013986729966618356, "loss": 11.6581, "step": 52970 }, { "epoch": 1.1088294398392364, "grad_norm": 0.2863325774669647, "learning_rate": 0.00013986528889972633, "loss": 11.6644, "step": 52971 }, { "epoch": 1.1088503726031984, "grad_norm": 0.3426491320133209, "learning_rate": 0.00013986327811410504, "loss": 11.6681, "step": 52972 }, { "epoch": 1.1088713053671606, "grad_norm": 0.3265923857688904, "learning_rate": 0.00013986126730932067, "loss": 11.6564, "step": 52973 }, { "epoch": 1.1088922381311228, "grad_norm": 0.34864556789398193, "learning_rate": 0.0001398592564853742, "loss": 11.6606, "step": 52974 }, { "epoch": 1.108913170895085, "grad_norm": 0.33853036165237427, "learning_rate": 0.00013985724564226658, "loss": 11.6721, "step": 52975 }, { "epoch": 1.1089341036590472, "grad_norm": 0.29274123907089233, "learning_rate": 0.00013985523477999878, "loss": 11.6759, "step": 52976 }, { "epoch": 1.1089550364230094, "grad_norm": 0.3729344606399536, "learning_rate": 0.00013985322389857178, "loss": 11.646, "step": 52977 }, { "epoch": 1.1089759691869714, "grad_norm": 0.32924944162368774, "learning_rate": 0.00013985121299798654, "loss": 11.6632, "step": 52978 }, { "epoch": 1.1089969019509336, "grad_norm": 0.3897537887096405, "learning_rate": 0.00013984920207824403, "loss": 11.6639, "step": 52979 }, { "epoch": 1.1090178347148958, "grad_norm": 0.4687672257423401, "learning_rate": 0.00013984719113934517, "loss": 11.6876, "step": 52980 }, { "epoch": 1.109038767478858, "grad_norm": 0.3934028148651123, "learning_rate": 0.000139845180181291, "loss": 11.6742, "step": 52981 }, { "epoch": 1.10905970024282, "grad_norm": 0.3053024411201477, "learning_rate": 0.00013984316920408245, "loss": 11.6732, "step": 52982 }, { "epoch": 1.1090806330067822, "grad_norm": 0.33295738697052, "learning_rate": 0.00013984115820772045, "loss": 11.6663, "step": 52983 }, { "epoch": 1.1091015657707444, "grad_norm": 0.3319408595561981, "learning_rate": 0.00013983914719220607, "loss": 11.6537, "step": 52984 }, { "epoch": 1.1091224985347066, "grad_norm": 0.31185129284858704, "learning_rate": 0.00013983713615754018, "loss": 11.6641, "step": 52985 }, { "epoch": 1.1091434312986688, "grad_norm": 0.30423426628112793, "learning_rate": 0.0001398351251037238, "loss": 11.6824, "step": 52986 }, { "epoch": 1.1091643640626307, "grad_norm": 0.34143030643463135, "learning_rate": 0.00013983311403075786, "loss": 11.6678, "step": 52987 }, { "epoch": 1.109185296826593, "grad_norm": 0.3622746765613556, "learning_rate": 0.00013983110293864332, "loss": 11.6734, "step": 52988 }, { "epoch": 1.1092062295905551, "grad_norm": 0.3605794906616211, "learning_rate": 0.0001398290918273812, "loss": 11.6749, "step": 52989 }, { "epoch": 1.1092271623545173, "grad_norm": 0.29926761984825134, "learning_rate": 0.00013982708069697246, "loss": 11.6777, "step": 52990 }, { "epoch": 1.1092480951184793, "grad_norm": 0.26797130703926086, "learning_rate": 0.000139825069547418, "loss": 11.6682, "step": 52991 }, { "epoch": 1.1092690278824415, "grad_norm": 0.30291101336479187, "learning_rate": 0.00013982305837871884, "loss": 11.6891, "step": 52992 }, { "epoch": 1.1092899606464037, "grad_norm": 0.33519241213798523, "learning_rate": 0.00013982104719087595, "loss": 11.6765, "step": 52993 }, { "epoch": 1.109310893410366, "grad_norm": 0.40412768721580505, "learning_rate": 0.0001398190359838903, "loss": 11.6453, "step": 52994 }, { "epoch": 1.1093318261743281, "grad_norm": 0.31389403343200684, "learning_rate": 0.00013981702475776282, "loss": 11.6553, "step": 52995 }, { "epoch": 1.1093527589382903, "grad_norm": 0.3416426181793213, "learning_rate": 0.0001398150135124945, "loss": 11.6539, "step": 52996 }, { "epoch": 1.1093736917022523, "grad_norm": 0.30194950103759766, "learning_rate": 0.00013981300224808632, "loss": 11.6506, "step": 52997 }, { "epoch": 1.1093946244662145, "grad_norm": 0.32154813408851624, "learning_rate": 0.0001398109909645392, "loss": 11.6687, "step": 52998 }, { "epoch": 1.1094155572301767, "grad_norm": 0.397777259349823, "learning_rate": 0.00013980897966185416, "loss": 11.663, "step": 52999 }, { "epoch": 1.109436489994139, "grad_norm": 0.32101407647132874, "learning_rate": 0.00013980696834003212, "loss": 11.676, "step": 53000 }, { "epoch": 1.109436489994139, "eval_loss": 11.668341636657715, "eval_runtime": 34.3093, "eval_samples_per_second": 28.01, "eval_steps_per_second": 7.024, "step": 53000 }, { "epoch": 1.1094574227581009, "grad_norm": 0.34064018726348877, "learning_rate": 0.0001398049569990741, "loss": 11.6865, "step": 53001 }, { "epoch": 1.109478355522063, "grad_norm": 0.38167643547058105, "learning_rate": 0.00013980294563898106, "loss": 11.6729, "step": 53002 }, { "epoch": 1.1094992882860253, "grad_norm": 0.3194502294063568, "learning_rate": 0.00013980093425975393, "loss": 11.6787, "step": 53003 }, { "epoch": 1.1095202210499875, "grad_norm": 0.3846692144870758, "learning_rate": 0.00013979892286139367, "loss": 11.6675, "step": 53004 }, { "epoch": 1.1095411538139497, "grad_norm": 0.3312642574310303, "learning_rate": 0.00013979691144390128, "loss": 11.6564, "step": 53005 }, { "epoch": 1.1095620865779117, "grad_norm": 0.3227896988391876, "learning_rate": 0.00013979490000727774, "loss": 11.6688, "step": 53006 }, { "epoch": 1.1095830193418739, "grad_norm": 0.7951313853263855, "learning_rate": 0.00013979288855152396, "loss": 11.5855, "step": 53007 }, { "epoch": 1.109603952105836, "grad_norm": 0.3540557324886322, "learning_rate": 0.00013979087707664099, "loss": 11.6794, "step": 53008 }, { "epoch": 1.1096248848697983, "grad_norm": 0.3046894669532776, "learning_rate": 0.0001397888655826297, "loss": 11.6715, "step": 53009 }, { "epoch": 1.1096458176337602, "grad_norm": 0.3460521996021271, "learning_rate": 0.0001397868540694911, "loss": 11.6671, "step": 53010 }, { "epoch": 1.1096667503977224, "grad_norm": 0.45405811071395874, "learning_rate": 0.0001397848425372262, "loss": 11.6817, "step": 53011 }, { "epoch": 1.1096876831616846, "grad_norm": 0.40233704447746277, "learning_rate": 0.0001397828309858359, "loss": 11.6949, "step": 53012 }, { "epoch": 1.1097086159256468, "grad_norm": 0.28887003660202026, "learning_rate": 0.0001397808194153212, "loss": 11.6579, "step": 53013 }, { "epoch": 1.109729548689609, "grad_norm": 0.4683820605278015, "learning_rate": 0.0001397788078256831, "loss": 11.6653, "step": 53014 }, { "epoch": 1.1097504814535712, "grad_norm": 0.3513106405735016, "learning_rate": 0.00013977679621692248, "loss": 11.6672, "step": 53015 }, { "epoch": 1.1097714142175332, "grad_norm": 0.33699798583984375, "learning_rate": 0.0001397747845890404, "loss": 11.6727, "step": 53016 }, { "epoch": 1.1097923469814954, "grad_norm": 0.39988356828689575, "learning_rate": 0.00013977277294203772, "loss": 11.6878, "step": 53017 }, { "epoch": 1.1098132797454576, "grad_norm": 0.3408433198928833, "learning_rate": 0.00013977076127591554, "loss": 11.6617, "step": 53018 }, { "epoch": 1.1098342125094198, "grad_norm": 0.3436207175254822, "learning_rate": 0.0001397687495906747, "loss": 11.6676, "step": 53019 }, { "epoch": 1.1098551452733818, "grad_norm": 0.2806752324104309, "learning_rate": 0.00013976673788631627, "loss": 11.6661, "step": 53020 }, { "epoch": 1.109876078037344, "grad_norm": 0.3091173470020294, "learning_rate": 0.00013976472616284116, "loss": 11.6741, "step": 53021 }, { "epoch": 1.1098970108013062, "grad_norm": 0.3929811418056488, "learning_rate": 0.00013976271442025032, "loss": 11.6727, "step": 53022 }, { "epoch": 1.1099179435652684, "grad_norm": 0.8334019184112549, "learning_rate": 0.0001397607026585448, "loss": 11.6658, "step": 53023 }, { "epoch": 1.1099388763292306, "grad_norm": 0.2713417410850525, "learning_rate": 0.0001397586908777255, "loss": 11.6686, "step": 53024 }, { "epoch": 1.1099598090931926, "grad_norm": 0.35623273253440857, "learning_rate": 0.00013975667907779337, "loss": 11.6658, "step": 53025 }, { "epoch": 1.1099807418571548, "grad_norm": 0.2783859670162201, "learning_rate": 0.00013975466725874944, "loss": 11.6691, "step": 53026 }, { "epoch": 1.110001674621117, "grad_norm": 0.3702550232410431, "learning_rate": 0.0001397526554205946, "loss": 11.6513, "step": 53027 }, { "epoch": 1.1100226073850792, "grad_norm": 0.35431379079818726, "learning_rate": 0.00013975064356332991, "loss": 11.6697, "step": 53028 }, { "epoch": 1.1100435401490412, "grad_norm": 0.32529008388519287, "learning_rate": 0.00013974863168695627, "loss": 11.6581, "step": 53029 }, { "epoch": 1.1100644729130034, "grad_norm": 0.25561121106147766, "learning_rate": 0.00013974661979147468, "loss": 11.6559, "step": 53030 }, { "epoch": 1.1100854056769656, "grad_norm": 0.3363288342952728, "learning_rate": 0.00013974460787688606, "loss": 11.6679, "step": 53031 }, { "epoch": 1.1101063384409278, "grad_norm": 0.3657526969909668, "learning_rate": 0.0001397425959431914, "loss": 11.6759, "step": 53032 }, { "epoch": 1.11012727120489, "grad_norm": 0.3698444366455078, "learning_rate": 0.00013974058399039173, "loss": 11.6601, "step": 53033 }, { "epoch": 1.110148203968852, "grad_norm": 1.0624520778656006, "learning_rate": 0.00013973857201848795, "loss": 11.6173, "step": 53034 }, { "epoch": 1.1101691367328141, "grad_norm": 0.3082970976829529, "learning_rate": 0.00013973656002748105, "loss": 11.6575, "step": 53035 }, { "epoch": 1.1101900694967763, "grad_norm": 0.2812165319919586, "learning_rate": 0.00013973454801737196, "loss": 11.6554, "step": 53036 }, { "epoch": 1.1102110022607385, "grad_norm": 0.24350634217262268, "learning_rate": 0.00013973253598816167, "loss": 11.6545, "step": 53037 }, { "epoch": 1.1102319350247007, "grad_norm": 0.3784545361995697, "learning_rate": 0.00013973052393985119, "loss": 11.6686, "step": 53038 }, { "epoch": 1.1102528677886627, "grad_norm": 0.34744590520858765, "learning_rate": 0.00013972851187244142, "loss": 11.6826, "step": 53039 }, { "epoch": 1.110273800552625, "grad_norm": 0.2639317512512207, "learning_rate": 0.0001397264997859334, "loss": 11.6584, "step": 53040 }, { "epoch": 1.1102947333165871, "grad_norm": 0.35343503952026367, "learning_rate": 0.000139724487680328, "loss": 11.6528, "step": 53041 }, { "epoch": 1.1103156660805493, "grad_norm": 0.3471473455429077, "learning_rate": 0.00013972247555562627, "loss": 11.6748, "step": 53042 }, { "epoch": 1.1103365988445115, "grad_norm": 0.3655906915664673, "learning_rate": 0.00013972046341182917, "loss": 11.6723, "step": 53043 }, { "epoch": 1.1103575316084735, "grad_norm": 0.3478972315788269, "learning_rate": 0.00013971845124893762, "loss": 11.6666, "step": 53044 }, { "epoch": 1.1103784643724357, "grad_norm": 0.2819688022136688, "learning_rate": 0.00013971643906695265, "loss": 11.6783, "step": 53045 }, { "epoch": 1.110399397136398, "grad_norm": 0.29267019033432007, "learning_rate": 0.00013971442686587515, "loss": 11.6682, "step": 53046 }, { "epoch": 1.11042032990036, "grad_norm": 0.28814467787742615, "learning_rate": 0.00013971241464570615, "loss": 11.6818, "step": 53047 }, { "epoch": 1.110441262664322, "grad_norm": 0.3838036358356476, "learning_rate": 0.00013971040240644657, "loss": 11.6587, "step": 53048 }, { "epoch": 1.1104621954282843, "grad_norm": 0.29247555136680603, "learning_rate": 0.00013970839014809742, "loss": 11.6554, "step": 53049 }, { "epoch": 1.1104831281922465, "grad_norm": 0.2837042510509491, "learning_rate": 0.00013970637787065967, "loss": 11.6601, "step": 53050 }, { "epoch": 1.1105040609562087, "grad_norm": 0.366163969039917, "learning_rate": 0.00013970436557413424, "loss": 11.6791, "step": 53051 }, { "epoch": 1.1105249937201709, "grad_norm": 0.3874353766441345, "learning_rate": 0.00013970235325852216, "loss": 11.6397, "step": 53052 }, { "epoch": 1.1105459264841329, "grad_norm": 0.3392315208911896, "learning_rate": 0.00013970034092382434, "loss": 11.6661, "step": 53053 }, { "epoch": 1.110566859248095, "grad_norm": 0.328329861164093, "learning_rate": 0.00013969832857004178, "loss": 11.6743, "step": 53054 }, { "epoch": 1.1105877920120573, "grad_norm": 0.4187169373035431, "learning_rate": 0.00013969631619717543, "loss": 11.6667, "step": 53055 }, { "epoch": 1.1106087247760195, "grad_norm": 0.3171888589859009, "learning_rate": 0.00013969430380522625, "loss": 11.6819, "step": 53056 }, { "epoch": 1.1106296575399817, "grad_norm": 0.3043643534183502, "learning_rate": 0.00013969229139419525, "loss": 11.6721, "step": 53057 }, { "epoch": 1.1106505903039436, "grad_norm": 0.27015915513038635, "learning_rate": 0.00013969027896408335, "loss": 11.6523, "step": 53058 }, { "epoch": 1.1106715230679058, "grad_norm": 0.3650510609149933, "learning_rate": 0.00013968826651489152, "loss": 11.6726, "step": 53059 }, { "epoch": 1.110692455831868, "grad_norm": 0.2705058157444, "learning_rate": 0.0001396862540466208, "loss": 11.6498, "step": 53060 }, { "epoch": 1.1107133885958302, "grad_norm": 0.31806686520576477, "learning_rate": 0.00013968424155927207, "loss": 11.6575, "step": 53061 }, { "epoch": 1.1107343213597924, "grad_norm": 0.36615219712257385, "learning_rate": 0.00013968222905284635, "loss": 11.6856, "step": 53062 }, { "epoch": 1.1107552541237544, "grad_norm": 0.3220794200897217, "learning_rate": 0.00013968021652734455, "loss": 11.6587, "step": 53063 }, { "epoch": 1.1107761868877166, "grad_norm": 0.3004321753978729, "learning_rate": 0.00013967820398276768, "loss": 11.6664, "step": 53064 }, { "epoch": 1.1107971196516788, "grad_norm": 1.1184524297714233, "learning_rate": 0.00013967619141911673, "loss": 11.5917, "step": 53065 }, { "epoch": 1.110818052415641, "grad_norm": 0.3107532262802124, "learning_rate": 0.0001396741788363926, "loss": 11.6594, "step": 53066 }, { "epoch": 1.110838985179603, "grad_norm": 0.6070153713226318, "learning_rate": 0.00013967216623459635, "loss": 11.5922, "step": 53067 }, { "epoch": 1.1108599179435652, "grad_norm": 0.31663164496421814, "learning_rate": 0.00013967015361372886, "loss": 11.657, "step": 53068 }, { "epoch": 1.1108808507075274, "grad_norm": 0.28665080666542053, "learning_rate": 0.00013966814097379117, "loss": 11.6721, "step": 53069 }, { "epoch": 1.1109017834714896, "grad_norm": 0.33434414863586426, "learning_rate": 0.00013966612831478416, "loss": 11.652, "step": 53070 }, { "epoch": 1.1109227162354518, "grad_norm": 0.3168419897556305, "learning_rate": 0.00013966411563670888, "loss": 11.6695, "step": 53071 }, { "epoch": 1.1109436489994138, "grad_norm": 0.31004661321640015, "learning_rate": 0.00013966210293956625, "loss": 11.6848, "step": 53072 }, { "epoch": 1.110964581763376, "grad_norm": 0.3692494332790375, "learning_rate": 0.00013966009022335724, "loss": 11.6613, "step": 53073 }, { "epoch": 1.1109855145273382, "grad_norm": 0.2430240511894226, "learning_rate": 0.00013965807748808287, "loss": 11.6689, "step": 53074 }, { "epoch": 1.1110064472913004, "grad_norm": 0.37955477833747864, "learning_rate": 0.00013965606473374402, "loss": 11.6602, "step": 53075 }, { "epoch": 1.1110273800552626, "grad_norm": 0.25792422890663147, "learning_rate": 0.00013965405196034172, "loss": 11.668, "step": 53076 }, { "epoch": 1.1110483128192246, "grad_norm": 0.32406705617904663, "learning_rate": 0.00013965203916787696, "loss": 11.6699, "step": 53077 }, { "epoch": 1.1110692455831868, "grad_norm": 0.2791801989078522, "learning_rate": 0.00013965002635635062, "loss": 11.6585, "step": 53078 }, { "epoch": 1.111090178347149, "grad_norm": 0.3080807626247406, "learning_rate": 0.00013964801352576376, "loss": 11.6522, "step": 53079 }, { "epoch": 1.1111111111111112, "grad_norm": 0.3859730362892151, "learning_rate": 0.00013964600067611725, "loss": 11.6783, "step": 53080 }, { "epoch": 1.1111320438750734, "grad_norm": 0.3634651005268097, "learning_rate": 0.00013964398780741215, "loss": 11.6767, "step": 53081 }, { "epoch": 1.1111529766390353, "grad_norm": 0.3622210919857025, "learning_rate": 0.0001396419749196494, "loss": 11.6713, "step": 53082 }, { "epoch": 1.1111739094029975, "grad_norm": 0.30487871170043945, "learning_rate": 0.00013963996201282993, "loss": 11.665, "step": 53083 }, { "epoch": 1.1111948421669597, "grad_norm": 0.3078114986419678, "learning_rate": 0.00013963794908695477, "loss": 11.6655, "step": 53084 }, { "epoch": 1.111215774930922, "grad_norm": 0.3131316900253296, "learning_rate": 0.00013963593614202483, "loss": 11.6682, "step": 53085 }, { "epoch": 1.111236707694884, "grad_norm": 0.3085857033729553, "learning_rate": 0.0001396339231780411, "loss": 11.675, "step": 53086 }, { "epoch": 1.1112576404588461, "grad_norm": 0.3491156995296478, "learning_rate": 0.00013963191019500458, "loss": 11.6625, "step": 53087 }, { "epoch": 1.1112785732228083, "grad_norm": 0.3046233355998993, "learning_rate": 0.00013962989719291615, "loss": 11.6742, "step": 53088 }, { "epoch": 1.1112995059867705, "grad_norm": 0.26959428191185, "learning_rate": 0.0001396278841717769, "loss": 11.6679, "step": 53089 }, { "epoch": 1.1113204387507327, "grad_norm": 0.249691441655159, "learning_rate": 0.00013962587113158767, "loss": 11.6727, "step": 53090 }, { "epoch": 1.1113413715146947, "grad_norm": 0.3886045813560486, "learning_rate": 0.00013962385807234952, "loss": 11.6705, "step": 53091 }, { "epoch": 1.111362304278657, "grad_norm": 0.33890995383262634, "learning_rate": 0.0001396218449940634, "loss": 11.686, "step": 53092 }, { "epoch": 1.111383237042619, "grad_norm": 0.36881810426712036, "learning_rate": 0.00013961983189673022, "loss": 11.6548, "step": 53093 }, { "epoch": 1.1114041698065813, "grad_norm": 0.3085809051990509, "learning_rate": 0.00013961781878035103, "loss": 11.6533, "step": 53094 }, { "epoch": 1.1114251025705435, "grad_norm": 0.3300892412662506, "learning_rate": 0.00013961580564492675, "loss": 11.6698, "step": 53095 }, { "epoch": 1.1114460353345055, "grad_norm": 0.41298967599868774, "learning_rate": 0.00013961379249045837, "loss": 11.6519, "step": 53096 }, { "epoch": 1.1114669680984677, "grad_norm": 0.3528428077697754, "learning_rate": 0.00013961177931694682, "loss": 11.6517, "step": 53097 }, { "epoch": 1.1114879008624299, "grad_norm": 0.3676459789276123, "learning_rate": 0.0001396097661243931, "loss": 11.6757, "step": 53098 }, { "epoch": 1.111508833626392, "grad_norm": 1.2533693313598633, "learning_rate": 0.0001396077529127982, "loss": 11.6369, "step": 53099 }, { "epoch": 1.1115297663903543, "grad_norm": 0.37694692611694336, "learning_rate": 0.00013960573968216304, "loss": 11.6881, "step": 53100 }, { "epoch": 1.1115506991543163, "grad_norm": 0.3770528733730316, "learning_rate": 0.00013960372643248862, "loss": 11.6736, "step": 53101 }, { "epoch": 1.1115716319182785, "grad_norm": 0.29450616240501404, "learning_rate": 0.00013960171316377588, "loss": 11.6713, "step": 53102 }, { "epoch": 1.1115925646822407, "grad_norm": 0.2577849328517914, "learning_rate": 0.0001395996998760258, "loss": 11.6765, "step": 53103 }, { "epoch": 1.1116134974462029, "grad_norm": 0.3951014578342438, "learning_rate": 0.00013959768656923935, "loss": 11.6743, "step": 53104 }, { "epoch": 1.1116344302101648, "grad_norm": 0.353098064661026, "learning_rate": 0.0001395956732434175, "loss": 11.6585, "step": 53105 }, { "epoch": 1.111655362974127, "grad_norm": 0.38350164890289307, "learning_rate": 0.00013959365989856125, "loss": 11.6603, "step": 53106 }, { "epoch": 1.1116762957380892, "grad_norm": 0.2688208222389221, "learning_rate": 0.0001395916465346715, "loss": 11.6729, "step": 53107 }, { "epoch": 1.1116972285020514, "grad_norm": 0.30001357197761536, "learning_rate": 0.00013958963315174923, "loss": 11.6729, "step": 53108 }, { "epoch": 1.1117181612660136, "grad_norm": 0.26197901368141174, "learning_rate": 0.00013958761974979547, "loss": 11.6642, "step": 53109 }, { "epoch": 1.1117390940299756, "grad_norm": 0.3904739022254944, "learning_rate": 0.00013958560632881115, "loss": 11.6697, "step": 53110 }, { "epoch": 1.1117600267939378, "grad_norm": 0.34651410579681396, "learning_rate": 0.00013958359288879723, "loss": 11.6688, "step": 53111 }, { "epoch": 1.1117809595579, "grad_norm": 0.2598206102848053, "learning_rate": 0.00013958157942975467, "loss": 11.6617, "step": 53112 }, { "epoch": 1.1118018923218622, "grad_norm": 0.26722103357315063, "learning_rate": 0.00013957956595168447, "loss": 11.6691, "step": 53113 }, { "epoch": 1.1118228250858244, "grad_norm": 0.3778117895126343, "learning_rate": 0.00013957755245458755, "loss": 11.6487, "step": 53114 }, { "epoch": 1.1118437578497864, "grad_norm": 0.3299110233783722, "learning_rate": 0.00013957553893846492, "loss": 11.6602, "step": 53115 }, { "epoch": 1.1118646906137486, "grad_norm": 0.3879440724849701, "learning_rate": 0.00013957352540331755, "loss": 11.6806, "step": 53116 }, { "epoch": 1.1118856233777108, "grad_norm": 0.3875892162322998, "learning_rate": 0.00013957151184914636, "loss": 11.6679, "step": 53117 }, { "epoch": 1.111906556141673, "grad_norm": 0.3147922158241272, "learning_rate": 0.0001395694982759524, "loss": 11.6625, "step": 53118 }, { "epoch": 1.1119274889056352, "grad_norm": 0.3186245262622833, "learning_rate": 0.00013956748468373656, "loss": 11.6614, "step": 53119 }, { "epoch": 1.1119484216695972, "grad_norm": 0.2946482002735138, "learning_rate": 0.00013956547107249982, "loss": 11.6748, "step": 53120 }, { "epoch": 1.1119693544335594, "grad_norm": 0.3677119016647339, "learning_rate": 0.0001395634574422432, "loss": 11.6784, "step": 53121 }, { "epoch": 1.1119902871975216, "grad_norm": 0.40148723125457764, "learning_rate": 0.0001395614437929676, "loss": 11.6596, "step": 53122 }, { "epoch": 1.1120112199614838, "grad_norm": 0.31271183490753174, "learning_rate": 0.00013955943012467406, "loss": 11.6722, "step": 53123 }, { "epoch": 1.1120321527254458, "grad_norm": 0.3074202239513397, "learning_rate": 0.0001395574164373635, "loss": 11.6654, "step": 53124 }, { "epoch": 1.112053085489408, "grad_norm": 0.2885071039199829, "learning_rate": 0.00013955540273103686, "loss": 11.6541, "step": 53125 }, { "epoch": 1.1120740182533702, "grad_norm": 0.3350149691104889, "learning_rate": 0.0001395533890056952, "loss": 11.6803, "step": 53126 }, { "epoch": 1.1120949510173324, "grad_norm": 0.4224355220794678, "learning_rate": 0.00013955137526133936, "loss": 11.6654, "step": 53127 }, { "epoch": 1.1121158837812946, "grad_norm": 0.3266298472881317, "learning_rate": 0.00013954936149797046, "loss": 11.6844, "step": 53128 }, { "epoch": 1.1121368165452565, "grad_norm": 0.3227286636829376, "learning_rate": 0.00013954734771558935, "loss": 11.6985, "step": 53129 }, { "epoch": 1.1121577493092187, "grad_norm": 0.33194664120674133, "learning_rate": 0.00013954533391419705, "loss": 11.6495, "step": 53130 }, { "epoch": 1.112178682073181, "grad_norm": 0.34701478481292725, "learning_rate": 0.0001395433200937945, "loss": 11.6706, "step": 53131 }, { "epoch": 1.1121996148371431, "grad_norm": 0.41304561495780945, "learning_rate": 0.0001395413062543827, "loss": 11.6809, "step": 53132 }, { "epoch": 1.1122205476011053, "grad_norm": 0.32898595929145813, "learning_rate": 0.0001395392923959626, "loss": 11.6566, "step": 53133 }, { "epoch": 1.1122414803650673, "grad_norm": 0.38319575786590576, "learning_rate": 0.00013953727851853516, "loss": 11.6689, "step": 53134 }, { "epoch": 1.1122624131290295, "grad_norm": 0.3083727955818176, "learning_rate": 0.00013953526462210137, "loss": 11.6651, "step": 53135 }, { "epoch": 1.1122833458929917, "grad_norm": 0.32682493329048157, "learning_rate": 0.00013953325070666214, "loss": 11.6833, "step": 53136 }, { "epoch": 1.112304278656954, "grad_norm": 0.321917861700058, "learning_rate": 0.00013953123677221853, "loss": 11.6652, "step": 53137 }, { "epoch": 1.1123252114209161, "grad_norm": 0.3621346652507782, "learning_rate": 0.00013952922281877146, "loss": 11.6815, "step": 53138 }, { "epoch": 1.112346144184878, "grad_norm": 0.3428122401237488, "learning_rate": 0.00013952720884632188, "loss": 11.6644, "step": 53139 }, { "epoch": 1.1123670769488403, "grad_norm": 0.2848672568798065, "learning_rate": 0.0001395251948548708, "loss": 11.6788, "step": 53140 }, { "epoch": 1.1123880097128025, "grad_norm": 0.3393113613128662, "learning_rate": 0.00013952318084441915, "loss": 11.6789, "step": 53141 }, { "epoch": 1.1124089424767647, "grad_norm": 0.2905806303024292, "learning_rate": 0.00013952116681496793, "loss": 11.6603, "step": 53142 }, { "epoch": 1.1124298752407267, "grad_norm": 0.3535975217819214, "learning_rate": 0.0001395191527665181, "loss": 11.67, "step": 53143 }, { "epoch": 1.1124508080046889, "grad_norm": 0.3535895347595215, "learning_rate": 0.0001395171386990706, "loss": 11.6632, "step": 53144 }, { "epoch": 1.112471740768651, "grad_norm": 0.3727566599845886, "learning_rate": 0.00013951512461262645, "loss": 11.6822, "step": 53145 }, { "epoch": 1.1124926735326133, "grad_norm": 0.45140865445137024, "learning_rate": 0.00013951311050718654, "loss": 11.6429, "step": 53146 }, { "epoch": 1.1125136062965755, "grad_norm": 0.3587271571159363, "learning_rate": 0.00013951109638275192, "loss": 11.6647, "step": 53147 }, { "epoch": 1.1125345390605375, "grad_norm": 0.310014009475708, "learning_rate": 0.00013950908223932351, "loss": 11.6763, "step": 53148 }, { "epoch": 1.1125554718244997, "grad_norm": 0.30931827425956726, "learning_rate": 0.0001395070680769023, "loss": 11.6515, "step": 53149 }, { "epoch": 1.1125764045884619, "grad_norm": 0.3632136285305023, "learning_rate": 0.00013950505389548925, "loss": 11.6541, "step": 53150 }, { "epoch": 1.112597337352424, "grad_norm": 0.3128688931465149, "learning_rate": 0.00013950303969508535, "loss": 11.6715, "step": 53151 }, { "epoch": 1.1126182701163863, "grad_norm": 0.4075110852718353, "learning_rate": 0.00013950102547569148, "loss": 11.6476, "step": 53152 }, { "epoch": 1.1126392028803482, "grad_norm": 0.2932443916797638, "learning_rate": 0.00013949901123730873, "loss": 11.6587, "step": 53153 }, { "epoch": 1.1126601356443104, "grad_norm": 0.5301587581634521, "learning_rate": 0.000139496996979938, "loss": 11.6696, "step": 53154 }, { "epoch": 1.1126810684082726, "grad_norm": 0.2961888313293457, "learning_rate": 0.0001394949827035803, "loss": 11.6562, "step": 53155 }, { "epoch": 1.1127020011722348, "grad_norm": 0.2693122625350952, "learning_rate": 0.00013949296840823653, "loss": 11.6751, "step": 53156 }, { "epoch": 1.112722933936197, "grad_norm": 0.30557915568351746, "learning_rate": 0.00013949095409390773, "loss": 11.6754, "step": 53157 }, { "epoch": 1.112743866700159, "grad_norm": 0.3407342731952667, "learning_rate": 0.0001394889397605948, "loss": 11.6891, "step": 53158 }, { "epoch": 1.1127647994641212, "grad_norm": 0.3521949350833893, "learning_rate": 0.00013948692540829876, "loss": 11.6805, "step": 53159 }, { "epoch": 1.1127857322280834, "grad_norm": 0.25177502632141113, "learning_rate": 0.00013948491103702058, "loss": 11.6683, "step": 53160 }, { "epoch": 1.1128066649920456, "grad_norm": 0.38066890835762024, "learning_rate": 0.00013948289664676122, "loss": 11.6558, "step": 53161 }, { "epoch": 1.1128275977560076, "grad_norm": 0.34814387559890747, "learning_rate": 0.00013948088223752163, "loss": 11.6723, "step": 53162 }, { "epoch": 1.1128485305199698, "grad_norm": 0.2951260507106781, "learning_rate": 0.00013947886780930278, "loss": 11.6735, "step": 53163 }, { "epoch": 1.112869463283932, "grad_norm": 0.27153027057647705, "learning_rate": 0.00013947685336210562, "loss": 11.6659, "step": 53164 }, { "epoch": 1.1128903960478942, "grad_norm": 0.38715609908103943, "learning_rate": 0.00013947483889593118, "loss": 11.69, "step": 53165 }, { "epoch": 1.1129113288118564, "grad_norm": 0.3212530016899109, "learning_rate": 0.0001394728244107804, "loss": 11.6723, "step": 53166 }, { "epoch": 1.1129322615758184, "grad_norm": 0.30086034536361694, "learning_rate": 0.00013947080990665424, "loss": 11.668, "step": 53167 }, { "epoch": 1.1129531943397806, "grad_norm": 0.29600900411605835, "learning_rate": 0.00013946879538355366, "loss": 11.6703, "step": 53168 }, { "epoch": 1.1129741271037428, "grad_norm": 0.3071289658546448, "learning_rate": 0.00013946678084147965, "loss": 11.6672, "step": 53169 }, { "epoch": 1.112995059867705, "grad_norm": 0.29889336228370667, "learning_rate": 0.00013946476628043315, "loss": 11.6671, "step": 53170 }, { "epoch": 1.1130159926316672, "grad_norm": 0.34218963980674744, "learning_rate": 0.00013946275170041513, "loss": 11.6678, "step": 53171 }, { "epoch": 1.1130369253956292, "grad_norm": 0.38545098900794983, "learning_rate": 0.00013946073710142663, "loss": 11.6869, "step": 53172 }, { "epoch": 1.1130578581595914, "grad_norm": 0.3645497262477875, "learning_rate": 0.00013945872248346852, "loss": 11.667, "step": 53173 }, { "epoch": 1.1130787909235536, "grad_norm": 0.29947978258132935, "learning_rate": 0.0001394567078465418, "loss": 11.659, "step": 53174 }, { "epoch": 1.1130997236875158, "grad_norm": 0.34515807032585144, "learning_rate": 0.00013945469319064747, "loss": 11.6679, "step": 53175 }, { "epoch": 1.113120656451478, "grad_norm": 0.2856161892414093, "learning_rate": 0.0001394526785157865, "loss": 11.6541, "step": 53176 }, { "epoch": 1.11314158921544, "grad_norm": 0.36091190576553345, "learning_rate": 0.0001394506638219598, "loss": 11.6631, "step": 53177 }, { "epoch": 1.1131625219794021, "grad_norm": 0.3997952342033386, "learning_rate": 0.0001394486491091684, "loss": 11.6659, "step": 53178 }, { "epoch": 1.1131834547433643, "grad_norm": 0.33976638317108154, "learning_rate": 0.00013944663437741325, "loss": 11.6586, "step": 53179 }, { "epoch": 1.1132043875073265, "grad_norm": 0.3873588442802429, "learning_rate": 0.0001394446196266953, "loss": 11.6569, "step": 53180 }, { "epoch": 1.1132253202712885, "grad_norm": 0.3479382395744324, "learning_rate": 0.00013944260485701553, "loss": 11.6745, "step": 53181 }, { "epoch": 1.1132462530352507, "grad_norm": 0.32959094643592834, "learning_rate": 0.00013944059006837492, "loss": 11.6556, "step": 53182 }, { "epoch": 1.113267185799213, "grad_norm": 0.4083196520805359, "learning_rate": 0.00013943857526077443, "loss": 11.6762, "step": 53183 }, { "epoch": 1.113288118563175, "grad_norm": 0.3307240903377533, "learning_rate": 0.00013943656043421498, "loss": 11.6656, "step": 53184 }, { "epoch": 1.1133090513271373, "grad_norm": 0.3125448524951935, "learning_rate": 0.0001394345455886976, "loss": 11.6372, "step": 53185 }, { "epoch": 1.1133299840910993, "grad_norm": 0.31620991230010986, "learning_rate": 0.0001394325307242233, "loss": 11.6625, "step": 53186 }, { "epoch": 1.1133509168550615, "grad_norm": 0.3360300660133362, "learning_rate": 0.00013943051584079294, "loss": 11.6715, "step": 53187 }, { "epoch": 1.1133718496190237, "grad_norm": 0.3469371199607849, "learning_rate": 0.00013942850093840757, "loss": 11.6729, "step": 53188 }, { "epoch": 1.113392782382986, "grad_norm": 0.32855284214019775, "learning_rate": 0.00013942648601706812, "loss": 11.6789, "step": 53189 }, { "epoch": 1.113413715146948, "grad_norm": 0.9486753344535828, "learning_rate": 0.00013942447107677554, "loss": 11.6026, "step": 53190 }, { "epoch": 1.11343464791091, "grad_norm": 0.30917391180992126, "learning_rate": 0.00013942245611753086, "loss": 11.666, "step": 53191 }, { "epoch": 1.1134555806748723, "grad_norm": 0.2785235643386841, "learning_rate": 0.00013942044113933498, "loss": 11.6874, "step": 53192 }, { "epoch": 1.1134765134388345, "grad_norm": 0.3315402567386627, "learning_rate": 0.00013941842614218896, "loss": 11.6771, "step": 53193 }, { "epoch": 1.1134974462027967, "grad_norm": 0.3274903893470764, "learning_rate": 0.00013941641112609368, "loss": 11.6685, "step": 53194 }, { "epoch": 1.1135183789667589, "grad_norm": 0.3465971350669861, "learning_rate": 0.00013941439609105013, "loss": 11.679, "step": 53195 }, { "epoch": 1.1135393117307208, "grad_norm": 0.3302726447582245, "learning_rate": 0.0001394123810370593, "loss": 11.6761, "step": 53196 }, { "epoch": 1.113560244494683, "grad_norm": 0.3331802785396576, "learning_rate": 0.00013941036596412215, "loss": 11.6672, "step": 53197 }, { "epoch": 1.1135811772586452, "grad_norm": 0.3358379304409027, "learning_rate": 0.00013940835087223965, "loss": 11.6596, "step": 53198 }, { "epoch": 1.1136021100226074, "grad_norm": 0.319254070520401, "learning_rate": 0.00013940633576141277, "loss": 11.6699, "step": 53199 }, { "epoch": 1.1136230427865694, "grad_norm": 0.24520675837993622, "learning_rate": 0.00013940432063164247, "loss": 11.6553, "step": 53200 }, { "epoch": 1.1136439755505316, "grad_norm": 0.2914026975631714, "learning_rate": 0.00013940230548292973, "loss": 11.6713, "step": 53201 }, { "epoch": 1.1136649083144938, "grad_norm": 0.3390824496746063, "learning_rate": 0.0001394002903152755, "loss": 11.6616, "step": 53202 }, { "epoch": 1.113685841078456, "grad_norm": 0.3249858617782593, "learning_rate": 0.00013939827512868077, "loss": 11.6583, "step": 53203 }, { "epoch": 1.1137067738424182, "grad_norm": 0.3291686475276947, "learning_rate": 0.0001393962599231465, "loss": 11.6625, "step": 53204 }, { "epoch": 1.1137277066063802, "grad_norm": 0.38157179951667786, "learning_rate": 0.00013939424469867364, "loss": 11.6691, "step": 53205 }, { "epoch": 1.1137486393703424, "grad_norm": 0.29163751006126404, "learning_rate": 0.00013939222945526318, "loss": 11.6658, "step": 53206 }, { "epoch": 1.1137695721343046, "grad_norm": 0.3499913811683655, "learning_rate": 0.00013939021419291612, "loss": 11.6791, "step": 53207 }, { "epoch": 1.1137905048982668, "grad_norm": 0.30832675099372864, "learning_rate": 0.00013938819891163334, "loss": 11.6731, "step": 53208 }, { "epoch": 1.113811437662229, "grad_norm": 0.2412567287683487, "learning_rate": 0.00013938618361141592, "loss": 11.657, "step": 53209 }, { "epoch": 1.113832370426191, "grad_norm": 0.3464124798774719, "learning_rate": 0.00013938416829226475, "loss": 11.6679, "step": 53210 }, { "epoch": 1.1138533031901532, "grad_norm": 0.3538334369659424, "learning_rate": 0.00013938215295418083, "loss": 11.6645, "step": 53211 }, { "epoch": 1.1138742359541154, "grad_norm": 0.3641201853752136, "learning_rate": 0.0001393801375971651, "loss": 11.6648, "step": 53212 }, { "epoch": 1.1138951687180776, "grad_norm": 0.3541747033596039, "learning_rate": 0.00013937812222121853, "loss": 11.6722, "step": 53213 }, { "epoch": 1.1139161014820398, "grad_norm": 0.331133097410202, "learning_rate": 0.00013937610682634215, "loss": 11.664, "step": 53214 }, { "epoch": 1.1139370342460018, "grad_norm": 0.29263773560523987, "learning_rate": 0.00013937409141253685, "loss": 11.6598, "step": 53215 }, { "epoch": 1.113957967009964, "grad_norm": 0.4624785780906677, "learning_rate": 0.00013937207597980369, "loss": 11.6775, "step": 53216 }, { "epoch": 1.1139788997739262, "grad_norm": 0.3095998764038086, "learning_rate": 0.00013937006052814352, "loss": 11.6741, "step": 53217 }, { "epoch": 1.1139998325378884, "grad_norm": 0.3898126780986786, "learning_rate": 0.00013936804505755741, "loss": 11.673, "step": 53218 }, { "epoch": 1.1140207653018503, "grad_norm": 0.30413785576820374, "learning_rate": 0.0001393660295680463, "loss": 11.6691, "step": 53219 }, { "epoch": 1.1140416980658125, "grad_norm": 0.3217472732067108, "learning_rate": 0.00013936401405961112, "loss": 11.6617, "step": 53220 }, { "epoch": 1.1140626308297747, "grad_norm": 0.3358226418495178, "learning_rate": 0.00013936199853225288, "loss": 11.6665, "step": 53221 }, { "epoch": 1.114083563593737, "grad_norm": 0.37806886434555054, "learning_rate": 0.00013935998298597255, "loss": 11.6603, "step": 53222 }, { "epoch": 1.1141044963576991, "grad_norm": 0.376034677028656, "learning_rate": 0.00013935796742077107, "loss": 11.6686, "step": 53223 }, { "epoch": 1.1141254291216611, "grad_norm": 0.3204365670681, "learning_rate": 0.00013935595183664945, "loss": 11.6667, "step": 53224 }, { "epoch": 1.1141463618856233, "grad_norm": 0.3671562075614929, "learning_rate": 0.00013935393623360862, "loss": 11.6918, "step": 53225 }, { "epoch": 1.1141672946495855, "grad_norm": 0.3168194890022278, "learning_rate": 0.00013935192061164956, "loss": 11.6659, "step": 53226 }, { "epoch": 1.1141882274135477, "grad_norm": 0.3797396421432495, "learning_rate": 0.00013934990497077324, "loss": 11.6763, "step": 53227 }, { "epoch": 1.11420916017751, "grad_norm": 0.38735294342041016, "learning_rate": 0.00013934788931098066, "loss": 11.6927, "step": 53228 }, { "epoch": 1.114230092941472, "grad_norm": 0.27574822306632996, "learning_rate": 0.00013934587363227273, "loss": 11.6776, "step": 53229 }, { "epoch": 1.114251025705434, "grad_norm": 0.33626624941825867, "learning_rate": 0.00013934385793465046, "loss": 11.6599, "step": 53230 }, { "epoch": 1.1142719584693963, "grad_norm": 0.27710992097854614, "learning_rate": 0.00013934184221811485, "loss": 11.6726, "step": 53231 }, { "epoch": 1.1142928912333585, "grad_norm": 0.34647393226623535, "learning_rate": 0.00013933982648266677, "loss": 11.6568, "step": 53232 }, { "epoch": 1.1143138239973207, "grad_norm": 0.343732088804245, "learning_rate": 0.00013933781072830728, "loss": 11.6593, "step": 53233 }, { "epoch": 1.1143347567612827, "grad_norm": 0.277547687292099, "learning_rate": 0.00013933579495503727, "loss": 11.6595, "step": 53234 }, { "epoch": 1.1143556895252449, "grad_norm": 0.35073667764663696, "learning_rate": 0.0001393337791628578, "loss": 11.6799, "step": 53235 }, { "epoch": 1.114376622289207, "grad_norm": 0.2844766080379486, "learning_rate": 0.00013933176335176979, "loss": 11.6723, "step": 53236 }, { "epoch": 1.1143975550531693, "grad_norm": 0.2759813964366913, "learning_rate": 0.00013932974752177418, "loss": 11.6563, "step": 53237 }, { "epoch": 1.1144184878171313, "grad_norm": 0.3133040964603424, "learning_rate": 0.00013932773167287202, "loss": 11.6617, "step": 53238 }, { "epoch": 1.1144394205810935, "grad_norm": 0.35622766613960266, "learning_rate": 0.0001393257158050642, "loss": 11.6742, "step": 53239 }, { "epoch": 1.1144603533450557, "grad_norm": 0.26663997769355774, "learning_rate": 0.00013932369991835172, "loss": 11.6587, "step": 53240 }, { "epoch": 1.1144812861090179, "grad_norm": 0.26198652386665344, "learning_rate": 0.00013932168401273556, "loss": 11.6707, "step": 53241 }, { "epoch": 1.11450221887298, "grad_norm": 0.3322560489177704, "learning_rate": 0.0001393196680882167, "loss": 11.6534, "step": 53242 }, { "epoch": 1.114523151636942, "grad_norm": 0.3408885896205902, "learning_rate": 0.00013931765214479603, "loss": 11.6786, "step": 53243 }, { "epoch": 1.1145440844009042, "grad_norm": 0.31066077947616577, "learning_rate": 0.0001393156361824746, "loss": 11.6747, "step": 53244 }, { "epoch": 1.1145650171648664, "grad_norm": 0.42454442381858826, "learning_rate": 0.00013931362020125338, "loss": 11.6792, "step": 53245 }, { "epoch": 1.1145859499288286, "grad_norm": 0.29694947600364685, "learning_rate": 0.0001393116042011333, "loss": 11.6766, "step": 53246 }, { "epoch": 1.1146068826927908, "grad_norm": 0.3264179229736328, "learning_rate": 0.00013930958818211535, "loss": 11.6512, "step": 53247 }, { "epoch": 1.1146278154567528, "grad_norm": 0.3009536564350128, "learning_rate": 0.0001393075721442005, "loss": 11.6695, "step": 53248 }, { "epoch": 1.114648748220715, "grad_norm": 0.30103829503059387, "learning_rate": 0.0001393055560873897, "loss": 11.6409, "step": 53249 }, { "epoch": 1.1146696809846772, "grad_norm": 0.2742045521736145, "learning_rate": 0.00013930354001168393, "loss": 11.6594, "step": 53250 }, { "epoch": 1.1146906137486394, "grad_norm": 0.2820214331150055, "learning_rate": 0.00013930152391708415, "loss": 11.6686, "step": 53251 }, { "epoch": 1.1147115465126016, "grad_norm": 0.3731954097747803, "learning_rate": 0.00013929950780359137, "loss": 11.6654, "step": 53252 }, { "epoch": 1.1147324792765636, "grad_norm": 0.4520195722579956, "learning_rate": 0.00013929749167120652, "loss": 11.6773, "step": 53253 }, { "epoch": 1.1147534120405258, "grad_norm": 0.32749536633491516, "learning_rate": 0.00013929547551993053, "loss": 11.6579, "step": 53254 }, { "epoch": 1.114774344804488, "grad_norm": 0.3266196548938751, "learning_rate": 0.00013929345934976447, "loss": 11.6631, "step": 53255 }, { "epoch": 1.1147952775684502, "grad_norm": 0.40550440549850464, "learning_rate": 0.00013929144316070927, "loss": 11.6746, "step": 53256 }, { "epoch": 1.1148162103324122, "grad_norm": 0.5590006113052368, "learning_rate": 0.00013928942695276583, "loss": 11.6677, "step": 53257 }, { "epoch": 1.1148371430963744, "grad_norm": 0.3031412661075592, "learning_rate": 0.00013928741072593523, "loss": 11.6664, "step": 53258 }, { "epoch": 1.1148580758603366, "grad_norm": 0.28042343258857727, "learning_rate": 0.00013928539448021833, "loss": 11.6751, "step": 53259 }, { "epoch": 1.1148790086242988, "grad_norm": 0.34230783581733704, "learning_rate": 0.00013928337821561622, "loss": 11.6668, "step": 53260 }, { "epoch": 1.114899941388261, "grad_norm": 0.3320280909538269, "learning_rate": 0.00013928136193212974, "loss": 11.676, "step": 53261 }, { "epoch": 1.114920874152223, "grad_norm": 0.3074134886264801, "learning_rate": 0.00013927934562975994, "loss": 11.6775, "step": 53262 }, { "epoch": 1.1149418069161852, "grad_norm": 0.4220338463783264, "learning_rate": 0.0001392773293085078, "loss": 11.667, "step": 53263 }, { "epoch": 1.1149627396801474, "grad_norm": 0.2875254154205322, "learning_rate": 0.00013927531296837424, "loss": 11.6552, "step": 53264 }, { "epoch": 1.1149836724441096, "grad_norm": 0.28413525223731995, "learning_rate": 0.00013927329660936026, "loss": 11.6559, "step": 53265 }, { "epoch": 1.1150046052080718, "grad_norm": 0.2759033739566803, "learning_rate": 0.00013927128023146678, "loss": 11.6778, "step": 53266 }, { "epoch": 1.1150255379720337, "grad_norm": 0.2950718402862549, "learning_rate": 0.00013926926383469484, "loss": 11.6585, "step": 53267 }, { "epoch": 1.115046470735996, "grad_norm": 0.2655499279499054, "learning_rate": 0.00013926724741904537, "loss": 11.6737, "step": 53268 }, { "epoch": 1.1150674034999581, "grad_norm": 0.34150758385658264, "learning_rate": 0.00013926523098451937, "loss": 11.6818, "step": 53269 }, { "epoch": 1.1150883362639203, "grad_norm": 0.3100246489048004, "learning_rate": 0.00013926321453111776, "loss": 11.6613, "step": 53270 }, { "epoch": 1.1151092690278825, "grad_norm": 0.3423605263233185, "learning_rate": 0.00013926119805884155, "loss": 11.6644, "step": 53271 }, { "epoch": 1.1151302017918445, "grad_norm": 0.31430989503860474, "learning_rate": 0.0001392591815676917, "loss": 11.665, "step": 53272 }, { "epoch": 1.1151511345558067, "grad_norm": 0.2963186502456665, "learning_rate": 0.00013925716505766914, "loss": 11.6606, "step": 53273 }, { "epoch": 1.115172067319769, "grad_norm": 0.31456124782562256, "learning_rate": 0.0001392551485287749, "loss": 11.6574, "step": 53274 }, { "epoch": 1.1151930000837311, "grad_norm": 0.38049593567848206, "learning_rate": 0.00013925313198100996, "loss": 11.6749, "step": 53275 }, { "epoch": 1.115213932847693, "grad_norm": 0.29336994886398315, "learning_rate": 0.0001392511154143752, "loss": 11.6693, "step": 53276 }, { "epoch": 1.1152348656116553, "grad_norm": 0.26823922991752625, "learning_rate": 0.00013924909882887166, "loss": 11.6774, "step": 53277 }, { "epoch": 1.1152557983756175, "grad_norm": 0.2605089545249939, "learning_rate": 0.0001392470822245003, "loss": 11.6629, "step": 53278 }, { "epoch": 1.1152767311395797, "grad_norm": 0.37783679366111755, "learning_rate": 0.00013924506560126205, "loss": 11.647, "step": 53279 }, { "epoch": 1.115297663903542, "grad_norm": 0.352470338344574, "learning_rate": 0.00013924304895915793, "loss": 11.681, "step": 53280 }, { "epoch": 1.1153185966675039, "grad_norm": 0.32980456948280334, "learning_rate": 0.0001392410322981889, "loss": 11.6501, "step": 53281 }, { "epoch": 1.115339529431466, "grad_norm": 0.3114508092403412, "learning_rate": 0.00013923901561835593, "loss": 11.6663, "step": 53282 }, { "epoch": 1.1153604621954283, "grad_norm": 0.3799157738685608, "learning_rate": 0.00013923699891965993, "loss": 11.6868, "step": 53283 }, { "epoch": 1.1153813949593905, "grad_norm": 0.30037838220596313, "learning_rate": 0.00013923498220210195, "loss": 11.6764, "step": 53284 }, { "epoch": 1.1154023277233527, "grad_norm": 0.3246919810771942, "learning_rate": 0.00013923296546568294, "loss": 11.6589, "step": 53285 }, { "epoch": 1.1154232604873147, "grad_norm": 0.36146166920661926, "learning_rate": 0.00013923094871040384, "loss": 11.6878, "step": 53286 }, { "epoch": 1.1154441932512769, "grad_norm": 0.3524891436100006, "learning_rate": 0.00013922893193626563, "loss": 11.6738, "step": 53287 }, { "epoch": 1.115465126015239, "grad_norm": 0.29763004183769226, "learning_rate": 0.0001392269151432693, "loss": 11.6718, "step": 53288 }, { "epoch": 1.1154860587792013, "grad_norm": 0.29685038328170776, "learning_rate": 0.0001392248983314158, "loss": 11.661, "step": 53289 }, { "epoch": 1.1155069915431635, "grad_norm": 0.4190578758716583, "learning_rate": 0.0001392228815007061, "loss": 11.6699, "step": 53290 }, { "epoch": 1.1155279243071254, "grad_norm": 0.2451702356338501, "learning_rate": 0.00013922086465114117, "loss": 11.67, "step": 53291 }, { "epoch": 1.1155488570710876, "grad_norm": 0.2686420679092407, "learning_rate": 0.00013921884778272202, "loss": 11.6663, "step": 53292 }, { "epoch": 1.1155697898350498, "grad_norm": 0.2728310525417328, "learning_rate": 0.00013921683089544953, "loss": 11.664, "step": 53293 }, { "epoch": 1.115590722599012, "grad_norm": 0.3546938896179199, "learning_rate": 0.00013921481398932478, "loss": 11.6608, "step": 53294 }, { "epoch": 1.115611655362974, "grad_norm": 0.30034250020980835, "learning_rate": 0.00013921279706434865, "loss": 11.6691, "step": 53295 }, { "epoch": 1.1156325881269362, "grad_norm": 0.467288613319397, "learning_rate": 0.00013921078012052213, "loss": 11.6645, "step": 53296 }, { "epoch": 1.1156535208908984, "grad_norm": 0.2729724049568176, "learning_rate": 0.00013920876315784626, "loss": 11.668, "step": 53297 }, { "epoch": 1.1156744536548606, "grad_norm": 0.3050607442855835, "learning_rate": 0.00013920674617632188, "loss": 11.6685, "step": 53298 }, { "epoch": 1.1156953864188228, "grad_norm": 0.4623339772224426, "learning_rate": 0.0001392047291759501, "loss": 11.6602, "step": 53299 }, { "epoch": 1.1157163191827848, "grad_norm": 0.3079366683959961, "learning_rate": 0.00013920271215673178, "loss": 11.6717, "step": 53300 }, { "epoch": 1.115737251946747, "grad_norm": 0.30999988317489624, "learning_rate": 0.00013920069511866792, "loss": 11.6736, "step": 53301 }, { "epoch": 1.1157581847107092, "grad_norm": 0.3570573031902313, "learning_rate": 0.00013919867806175954, "loss": 11.6603, "step": 53302 }, { "epoch": 1.1157791174746714, "grad_norm": 0.3254498541355133, "learning_rate": 0.00013919666098600753, "loss": 11.662, "step": 53303 }, { "epoch": 1.1158000502386336, "grad_norm": 0.27567732334136963, "learning_rate": 0.0001391946438914129, "loss": 11.6815, "step": 53304 }, { "epoch": 1.1158209830025956, "grad_norm": 0.5009501576423645, "learning_rate": 0.00013919262677797664, "loss": 11.6567, "step": 53305 }, { "epoch": 1.1158419157665578, "grad_norm": 0.2954429090023041, "learning_rate": 0.00013919060964569967, "loss": 11.6832, "step": 53306 }, { "epoch": 1.11586284853052, "grad_norm": 0.2321656197309494, "learning_rate": 0.00013918859249458304, "loss": 11.6719, "step": 53307 }, { "epoch": 1.1158837812944822, "grad_norm": 0.36082667112350464, "learning_rate": 0.0001391865753246276, "loss": 11.6766, "step": 53308 }, { "epoch": 1.1159047140584444, "grad_norm": 0.2722671926021576, "learning_rate": 0.00013918455813583447, "loss": 11.6736, "step": 53309 }, { "epoch": 1.1159256468224064, "grad_norm": 0.29161137342453003, "learning_rate": 0.00013918254092820447, "loss": 11.6477, "step": 53310 }, { "epoch": 1.1159465795863686, "grad_norm": 0.3303779363632202, "learning_rate": 0.0001391805237017387, "loss": 11.6678, "step": 53311 }, { "epoch": 1.1159675123503308, "grad_norm": 0.26646387577056885, "learning_rate": 0.00013917850645643801, "loss": 11.6642, "step": 53312 }, { "epoch": 1.115988445114293, "grad_norm": 1.610352635383606, "learning_rate": 0.00013917648919230344, "loss": 11.6453, "step": 53313 }, { "epoch": 1.116009377878255, "grad_norm": 0.3215605318546295, "learning_rate": 0.00013917447190933596, "loss": 11.6728, "step": 53314 }, { "epoch": 1.1160303106422171, "grad_norm": 0.30647504329681396, "learning_rate": 0.00013917245460753653, "loss": 11.6572, "step": 53315 }, { "epoch": 1.1160512434061793, "grad_norm": 0.6597934365272522, "learning_rate": 0.0001391704372869061, "loss": 11.6481, "step": 53316 }, { "epoch": 1.1160721761701415, "grad_norm": 0.35469040274620056, "learning_rate": 0.00013916841994744567, "loss": 11.6755, "step": 53317 }, { "epoch": 1.1160931089341037, "grad_norm": 0.4263931214809418, "learning_rate": 0.00013916640258915616, "loss": 11.6745, "step": 53318 }, { "epoch": 1.1161140416980657, "grad_norm": 0.2942270338535309, "learning_rate": 0.00013916438521203862, "loss": 11.6647, "step": 53319 }, { "epoch": 1.116134974462028, "grad_norm": 0.31206050515174866, "learning_rate": 0.00013916236781609395, "loss": 11.6811, "step": 53320 }, { "epoch": 1.1161559072259901, "grad_norm": 0.2904747724533081, "learning_rate": 0.00013916035040132315, "loss": 11.6705, "step": 53321 }, { "epoch": 1.1161768399899523, "grad_norm": 0.27347517013549805, "learning_rate": 0.00013915833296772721, "loss": 11.6625, "step": 53322 }, { "epoch": 1.1161977727539145, "grad_norm": 0.4154583811759949, "learning_rate": 0.00013915631551530703, "loss": 11.6732, "step": 53323 }, { "epoch": 1.1162187055178765, "grad_norm": 0.277435302734375, "learning_rate": 0.00013915429804406366, "loss": 11.6794, "step": 53324 }, { "epoch": 1.1162396382818387, "grad_norm": 0.3259570300579071, "learning_rate": 0.00013915228055399803, "loss": 11.6621, "step": 53325 }, { "epoch": 1.116260571045801, "grad_norm": 0.3570854961872101, "learning_rate": 0.0001391502630451111, "loss": 11.6449, "step": 53326 }, { "epoch": 1.116281503809763, "grad_norm": 0.33220285177230835, "learning_rate": 0.00013914824551740384, "loss": 11.6748, "step": 53327 }, { "epoch": 1.1163024365737253, "grad_norm": 0.3562065362930298, "learning_rate": 0.00013914622797087727, "loss": 11.6578, "step": 53328 }, { "epoch": 1.1163233693376873, "grad_norm": 0.3549509048461914, "learning_rate": 0.0001391442104055323, "loss": 11.6856, "step": 53329 }, { "epoch": 1.1163443021016495, "grad_norm": 0.24230892956256866, "learning_rate": 0.00013914219282136993, "loss": 11.6498, "step": 53330 }, { "epoch": 1.1163652348656117, "grad_norm": 0.27907994389533997, "learning_rate": 0.00013914017521839115, "loss": 11.6825, "step": 53331 }, { "epoch": 1.1163861676295739, "grad_norm": 0.3100946545600891, "learning_rate": 0.00013913815759659685, "loss": 11.6671, "step": 53332 }, { "epoch": 1.1164071003935359, "grad_norm": 0.3382018208503723, "learning_rate": 0.00013913613995598806, "loss": 11.6775, "step": 53333 }, { "epoch": 1.116428033157498, "grad_norm": 0.32625073194503784, "learning_rate": 0.0001391341222965658, "loss": 11.6676, "step": 53334 }, { "epoch": 1.1164489659214603, "grad_norm": 0.2837143838405609, "learning_rate": 0.00013913210461833093, "loss": 11.6677, "step": 53335 }, { "epoch": 1.1164698986854225, "grad_norm": 0.4552460312843323, "learning_rate": 0.00013913008692128452, "loss": 11.688, "step": 53336 }, { "epoch": 1.1164908314493847, "grad_norm": 0.34998399019241333, "learning_rate": 0.00013912806920542744, "loss": 11.6738, "step": 53337 }, { "epoch": 1.1165117642133466, "grad_norm": 0.27759766578674316, "learning_rate": 0.00013912605147076075, "loss": 11.6608, "step": 53338 }, { "epoch": 1.1165326969773088, "grad_norm": 0.3765324354171753, "learning_rate": 0.00013912403371728536, "loss": 11.6686, "step": 53339 }, { "epoch": 1.116553629741271, "grad_norm": 0.26858457922935486, "learning_rate": 0.00013912201594500228, "loss": 11.669, "step": 53340 }, { "epoch": 1.1165745625052332, "grad_norm": 0.40027254819869995, "learning_rate": 0.00013911999815391246, "loss": 11.6733, "step": 53341 }, { "epoch": 1.1165954952691954, "grad_norm": 0.27799099683761597, "learning_rate": 0.00013911798034401684, "loss": 11.6593, "step": 53342 }, { "epoch": 1.1166164280331574, "grad_norm": 0.3651646375656128, "learning_rate": 0.00013911596251531646, "loss": 11.6595, "step": 53343 }, { "epoch": 1.1166373607971196, "grad_norm": 0.4071557819843292, "learning_rate": 0.00013911394466781224, "loss": 11.6726, "step": 53344 }, { "epoch": 1.1166582935610818, "grad_norm": 0.328551709651947, "learning_rate": 0.00013911192680150515, "loss": 11.6726, "step": 53345 }, { "epoch": 1.116679226325044, "grad_norm": 0.3314801752567291, "learning_rate": 0.00013910990891639622, "loss": 11.665, "step": 53346 }, { "epoch": 1.1167001590890062, "grad_norm": 0.3318396508693695, "learning_rate": 0.00013910789101248634, "loss": 11.6656, "step": 53347 }, { "epoch": 1.1167210918529682, "grad_norm": 0.3164549171924591, "learning_rate": 0.00013910587308977652, "loss": 11.6691, "step": 53348 }, { "epoch": 1.1167420246169304, "grad_norm": 0.342242568731308, "learning_rate": 0.0001391038551482677, "loss": 11.6598, "step": 53349 }, { "epoch": 1.1167629573808926, "grad_norm": 0.34117835760116577, "learning_rate": 0.0001391018371879609, "loss": 11.6652, "step": 53350 }, { "epoch": 1.1167838901448548, "grad_norm": 0.3722437918186188, "learning_rate": 0.00013909981920885706, "loss": 11.6837, "step": 53351 }, { "epoch": 1.1168048229088168, "grad_norm": 0.2936111092567444, "learning_rate": 0.00013909780121095712, "loss": 11.6676, "step": 53352 }, { "epoch": 1.116825755672779, "grad_norm": 0.3847556710243225, "learning_rate": 0.00013909578319426213, "loss": 11.6782, "step": 53353 }, { "epoch": 1.1168466884367412, "grad_norm": 0.3564140796661377, "learning_rate": 0.000139093765158773, "loss": 11.6718, "step": 53354 }, { "epoch": 1.1168676212007034, "grad_norm": 0.28781411051750183, "learning_rate": 0.0001390917471044907, "loss": 11.6706, "step": 53355 }, { "epoch": 1.1168885539646656, "grad_norm": 0.3166544437408447, "learning_rate": 0.00013908972903141625, "loss": 11.6666, "step": 53356 }, { "epoch": 1.1169094867286276, "grad_norm": 0.31104737520217896, "learning_rate": 0.00013908771093955053, "loss": 11.6698, "step": 53357 }, { "epoch": 1.1169304194925898, "grad_norm": 0.28938305377960205, "learning_rate": 0.0001390856928288946, "loss": 11.6678, "step": 53358 }, { "epoch": 1.116951352256552, "grad_norm": 0.38999173045158386, "learning_rate": 0.00013908367469944938, "loss": 11.6768, "step": 53359 }, { "epoch": 1.1169722850205142, "grad_norm": 0.3582402169704437, "learning_rate": 0.00013908165655121587, "loss": 11.6828, "step": 53360 }, { "epoch": 1.1169932177844764, "grad_norm": 0.3689862787723541, "learning_rate": 0.000139079638384195, "loss": 11.6691, "step": 53361 }, { "epoch": 1.1170141505484383, "grad_norm": 0.33991697430610657, "learning_rate": 0.00013907762019838777, "loss": 11.6641, "step": 53362 }, { "epoch": 1.1170350833124005, "grad_norm": 0.3978045880794525, "learning_rate": 0.00013907560199379518, "loss": 11.6832, "step": 53363 }, { "epoch": 1.1170560160763627, "grad_norm": 0.280409574508667, "learning_rate": 0.00013907358377041813, "loss": 11.6723, "step": 53364 }, { "epoch": 1.117076948840325, "grad_norm": 0.3335113823413849, "learning_rate": 0.00013907156552825763, "loss": 11.665, "step": 53365 }, { "epoch": 1.1170978816042871, "grad_norm": 0.29685261845588684, "learning_rate": 0.00013906954726731466, "loss": 11.6702, "step": 53366 }, { "epoch": 1.1171188143682491, "grad_norm": 0.34139835834503174, "learning_rate": 0.00013906752898759016, "loss": 11.6628, "step": 53367 }, { "epoch": 1.1171397471322113, "grad_norm": 0.3020316958427429, "learning_rate": 0.0001390655106890851, "loss": 11.6616, "step": 53368 }, { "epoch": 1.1171606798961735, "grad_norm": 0.3241049349308014, "learning_rate": 0.0001390634923718005, "loss": 11.66, "step": 53369 }, { "epoch": 1.1171816126601357, "grad_norm": 0.31653162837028503, "learning_rate": 0.0001390614740357373, "loss": 11.6768, "step": 53370 }, { "epoch": 1.1172025454240977, "grad_norm": 0.2769845128059387, "learning_rate": 0.00013905945568089642, "loss": 11.6589, "step": 53371 }, { "epoch": 1.11722347818806, "grad_norm": 0.3319985568523407, "learning_rate": 0.00013905743730727892, "loss": 11.667, "step": 53372 }, { "epoch": 1.117244410952022, "grad_norm": 0.7073844075202942, "learning_rate": 0.0001390554189148857, "loss": 11.6711, "step": 53373 }, { "epoch": 1.1172653437159843, "grad_norm": 0.3081057071685791, "learning_rate": 0.00013905340050371777, "loss": 11.6645, "step": 53374 }, { "epoch": 1.1172862764799465, "grad_norm": 0.3730328679084778, "learning_rate": 0.0001390513820737761, "loss": 11.7012, "step": 53375 }, { "epoch": 1.1173072092439085, "grad_norm": 0.35432422161102295, "learning_rate": 0.00013904936362506165, "loss": 11.6709, "step": 53376 }, { "epoch": 1.1173281420078707, "grad_norm": 0.35150983929634094, "learning_rate": 0.00013904734515757537, "loss": 11.6728, "step": 53377 }, { "epoch": 1.1173490747718329, "grad_norm": 0.3394763767719269, "learning_rate": 0.00013904532667131825, "loss": 11.6881, "step": 53378 }, { "epoch": 1.117370007535795, "grad_norm": 0.39138659834861755, "learning_rate": 0.00013904330816629125, "loss": 11.6568, "step": 53379 }, { "epoch": 1.1173909402997573, "grad_norm": 0.36760222911834717, "learning_rate": 0.00013904128964249538, "loss": 11.6622, "step": 53380 }, { "epoch": 1.1174118730637193, "grad_norm": 0.3018381893634796, "learning_rate": 0.00013903927109993155, "loss": 11.6425, "step": 53381 }, { "epoch": 1.1174328058276815, "grad_norm": 0.38116341829299927, "learning_rate": 0.0001390372525386008, "loss": 11.6658, "step": 53382 }, { "epoch": 1.1174537385916437, "grad_norm": 0.4023283123970032, "learning_rate": 0.00013903523395850402, "loss": 11.6804, "step": 53383 }, { "epoch": 1.1174746713556059, "grad_norm": 0.404723197221756, "learning_rate": 0.00013903321535964222, "loss": 11.6904, "step": 53384 }, { "epoch": 1.117495604119568, "grad_norm": 0.38794344663619995, "learning_rate": 0.0001390311967420164, "loss": 11.6716, "step": 53385 }, { "epoch": 1.11751653688353, "grad_norm": 0.29525306820869446, "learning_rate": 0.00013902917810562748, "loss": 11.6625, "step": 53386 }, { "epoch": 1.1175374696474922, "grad_norm": 0.2966277003288269, "learning_rate": 0.00013902715945047646, "loss": 11.6768, "step": 53387 }, { "epoch": 1.1175584024114544, "grad_norm": 0.3366146385669708, "learning_rate": 0.00013902514077656427, "loss": 11.6787, "step": 53388 }, { "epoch": 1.1175793351754166, "grad_norm": 0.3949243724346161, "learning_rate": 0.00013902312208389193, "loss": 11.6808, "step": 53389 }, { "epoch": 1.1176002679393786, "grad_norm": 0.31465238332748413, "learning_rate": 0.00013902110337246044, "loss": 11.6778, "step": 53390 }, { "epoch": 1.1176212007033408, "grad_norm": 0.38165974617004395, "learning_rate": 0.00013901908464227067, "loss": 11.6727, "step": 53391 }, { "epoch": 1.117642133467303, "grad_norm": 0.35352060198783875, "learning_rate": 0.0001390170658933237, "loss": 11.6772, "step": 53392 }, { "epoch": 1.1176630662312652, "grad_norm": 0.5026628971099854, "learning_rate": 0.0001390150471256204, "loss": 11.6903, "step": 53393 }, { "epoch": 1.1176839989952274, "grad_norm": 0.30039718747138977, "learning_rate": 0.00013901302833916178, "loss": 11.6671, "step": 53394 }, { "epoch": 1.1177049317591894, "grad_norm": 0.2958417534828186, "learning_rate": 0.00013901100953394883, "loss": 11.6681, "step": 53395 }, { "epoch": 1.1177258645231516, "grad_norm": 0.33595362305641174, "learning_rate": 0.0001390089907099825, "loss": 11.6605, "step": 53396 }, { "epoch": 1.1177467972871138, "grad_norm": 0.2985418736934662, "learning_rate": 0.00013900697186726378, "loss": 11.6555, "step": 53397 }, { "epoch": 1.117767730051076, "grad_norm": 0.29337987303733826, "learning_rate": 0.0001390049530057936, "loss": 11.6563, "step": 53398 }, { "epoch": 1.1177886628150382, "grad_norm": 0.44887620210647583, "learning_rate": 0.000139002934125573, "loss": 11.659, "step": 53399 }, { "epoch": 1.1178095955790002, "grad_norm": 0.2984134554862976, "learning_rate": 0.00013900091522660288, "loss": 11.674, "step": 53400 }, { "epoch": 1.1178305283429624, "grad_norm": 0.2715907394886017, "learning_rate": 0.00013899889630888422, "loss": 11.6571, "step": 53401 }, { "epoch": 1.1178514611069246, "grad_norm": 0.3287741541862488, "learning_rate": 0.00013899687737241805, "loss": 11.6566, "step": 53402 }, { "epoch": 1.1178723938708868, "grad_norm": 0.30138131976127625, "learning_rate": 0.00013899485841720527, "loss": 11.6635, "step": 53403 }, { "epoch": 1.117893326634849, "grad_norm": 0.4035007953643799, "learning_rate": 0.0001389928394432469, "loss": 11.6741, "step": 53404 }, { "epoch": 1.117914259398811, "grad_norm": 0.3087151050567627, "learning_rate": 0.00013899082045054387, "loss": 11.6533, "step": 53405 }, { "epoch": 1.1179351921627732, "grad_norm": 0.2840732932090759, "learning_rate": 0.0001389888014390972, "loss": 11.6628, "step": 53406 }, { "epoch": 1.1179561249267354, "grad_norm": 0.3255687654018402, "learning_rate": 0.0001389867824089078, "loss": 11.6527, "step": 53407 }, { "epoch": 1.1179770576906976, "grad_norm": 0.3097142279148102, "learning_rate": 0.00013898476335997667, "loss": 11.663, "step": 53408 }, { "epoch": 1.1179979904546595, "grad_norm": 0.3130170702934265, "learning_rate": 0.00013898274429230482, "loss": 11.6502, "step": 53409 }, { "epoch": 1.1180189232186217, "grad_norm": 0.48969554901123047, "learning_rate": 0.00013898072520589317, "loss": 11.6452, "step": 53410 }, { "epoch": 1.118039855982584, "grad_norm": 0.3249768018722534, "learning_rate": 0.00013897870610074268, "loss": 11.6669, "step": 53411 }, { "epoch": 1.1180607887465461, "grad_norm": 0.31167277693748474, "learning_rate": 0.00013897668697685438, "loss": 11.6633, "step": 53412 }, { "epoch": 1.1180817215105083, "grad_norm": 0.2841757833957672, "learning_rate": 0.00013897466783422918, "loss": 11.6588, "step": 53413 }, { "epoch": 1.1181026542744703, "grad_norm": 0.2987578511238098, "learning_rate": 0.0001389726486728681, "loss": 11.6541, "step": 53414 }, { "epoch": 1.1181235870384325, "grad_norm": 0.2690562903881073, "learning_rate": 0.00013897062949277204, "loss": 11.6712, "step": 53415 }, { "epoch": 1.1181445198023947, "grad_norm": 0.3078412711620331, "learning_rate": 0.00013896861029394207, "loss": 11.6854, "step": 53416 }, { "epoch": 1.118165452566357, "grad_norm": 0.40347856283187866, "learning_rate": 0.00013896659107637906, "loss": 11.6735, "step": 53417 }, { "epoch": 1.118186385330319, "grad_norm": 0.3916972875595093, "learning_rate": 0.0001389645718400841, "loss": 11.6808, "step": 53418 }, { "epoch": 1.118207318094281, "grad_norm": 0.3570001423358917, "learning_rate": 0.00013896255258505802, "loss": 11.6685, "step": 53419 }, { "epoch": 1.1182282508582433, "grad_norm": 0.48665133118629456, "learning_rate": 0.0001389605333113019, "loss": 11.5891, "step": 53420 }, { "epoch": 1.1182491836222055, "grad_norm": 0.3935531675815582, "learning_rate": 0.00013895851401881664, "loss": 11.6526, "step": 53421 }, { "epoch": 1.1182701163861677, "grad_norm": 0.34338319301605225, "learning_rate": 0.00013895649470760323, "loss": 11.6824, "step": 53422 }, { "epoch": 1.11829104915013, "grad_norm": 0.3716099262237549, "learning_rate": 0.00013895447537766272, "loss": 11.6749, "step": 53423 }, { "epoch": 1.1183119819140919, "grad_norm": 0.4917055666446686, "learning_rate": 0.00013895245602899597, "loss": 11.6808, "step": 53424 }, { "epoch": 1.118332914678054, "grad_norm": 0.41626420617103577, "learning_rate": 0.000138950436661604, "loss": 11.6654, "step": 53425 }, { "epoch": 1.1183538474420163, "grad_norm": 0.35328105092048645, "learning_rate": 0.00013894841727548775, "loss": 11.6581, "step": 53426 }, { "epoch": 1.1183747802059785, "grad_norm": 0.2717554271221161, "learning_rate": 0.00013894639787064825, "loss": 11.6638, "step": 53427 }, { "epoch": 1.1183957129699404, "grad_norm": 0.3988364338874817, "learning_rate": 0.00013894437844708642, "loss": 11.6651, "step": 53428 }, { "epoch": 1.1184166457339026, "grad_norm": 0.2716696262359619, "learning_rate": 0.00013894235900480323, "loss": 11.663, "step": 53429 }, { "epoch": 1.1184375784978648, "grad_norm": 0.3200875222682953, "learning_rate": 0.0001389403395437997, "loss": 11.6736, "step": 53430 }, { "epoch": 1.118458511261827, "grad_norm": 0.3755178153514862, "learning_rate": 0.00013893832006407674, "loss": 11.6697, "step": 53431 }, { "epoch": 1.1184794440257892, "grad_norm": 0.32646864652633667, "learning_rate": 0.00013893630056563538, "loss": 11.6688, "step": 53432 }, { "epoch": 1.1185003767897512, "grad_norm": 0.2866532504558563, "learning_rate": 0.00013893428104847652, "loss": 11.6626, "step": 53433 }, { "epoch": 1.1185213095537134, "grad_norm": 0.3127591609954834, "learning_rate": 0.0001389322615126012, "loss": 11.6953, "step": 53434 }, { "epoch": 1.1185422423176756, "grad_norm": 0.2803073525428772, "learning_rate": 0.00013893024195801038, "loss": 11.6484, "step": 53435 }, { "epoch": 1.1185631750816378, "grad_norm": 0.2640852928161621, "learning_rate": 0.00013892822238470497, "loss": 11.6772, "step": 53436 }, { "epoch": 1.1185841078456, "grad_norm": 0.3605911433696747, "learning_rate": 0.000138926202792686, "loss": 11.6769, "step": 53437 }, { "epoch": 1.118605040609562, "grad_norm": 0.32938098907470703, "learning_rate": 0.00013892418318195443, "loss": 11.6689, "step": 53438 }, { "epoch": 1.1186259733735242, "grad_norm": 0.29253169894218445, "learning_rate": 0.00013892216355251125, "loss": 11.6609, "step": 53439 }, { "epoch": 1.1186469061374864, "grad_norm": 0.35240283608436584, "learning_rate": 0.00013892014390435735, "loss": 11.6771, "step": 53440 }, { "epoch": 1.1186678389014486, "grad_norm": 0.3019205927848816, "learning_rate": 0.0001389181242374938, "loss": 11.6828, "step": 53441 }, { "epoch": 1.1186887716654108, "grad_norm": 0.3076705038547516, "learning_rate": 0.0001389161045519215, "loss": 11.6796, "step": 53442 }, { "epoch": 1.1187097044293728, "grad_norm": 0.35922378301620483, "learning_rate": 0.00013891408484764146, "loss": 11.6742, "step": 53443 }, { "epoch": 1.118730637193335, "grad_norm": 0.28594809770584106, "learning_rate": 0.00013891206512465465, "loss": 11.6716, "step": 53444 }, { "epoch": 1.1187515699572972, "grad_norm": 0.3698531687259674, "learning_rate": 0.00013891004538296202, "loss": 11.67, "step": 53445 }, { "epoch": 1.1187725027212594, "grad_norm": 0.2761549949645996, "learning_rate": 0.00013890802562256454, "loss": 11.646, "step": 53446 }, { "epoch": 1.1187934354852214, "grad_norm": 0.3101826608181, "learning_rate": 0.0001389060058434632, "loss": 11.6733, "step": 53447 }, { "epoch": 1.1188143682491836, "grad_norm": 0.3672488033771515, "learning_rate": 0.00013890398604565898, "loss": 11.6839, "step": 53448 }, { "epoch": 1.1188353010131458, "grad_norm": 0.2978714406490326, "learning_rate": 0.00013890196622915282, "loss": 11.6617, "step": 53449 }, { "epoch": 1.118856233777108, "grad_norm": 0.3764522969722748, "learning_rate": 0.00013889994639394571, "loss": 11.6866, "step": 53450 }, { "epoch": 1.1188771665410702, "grad_norm": 0.27352654933929443, "learning_rate": 0.00013889792654003862, "loss": 11.6703, "step": 53451 }, { "epoch": 1.1188980993050321, "grad_norm": 0.37208256125450134, "learning_rate": 0.0001388959066674325, "loss": 11.673, "step": 53452 }, { "epoch": 1.1189190320689943, "grad_norm": 0.3580394983291626, "learning_rate": 0.00013889388677612834, "loss": 11.6832, "step": 53453 }, { "epoch": 1.1189399648329565, "grad_norm": 0.530231773853302, "learning_rate": 0.00013889186686612712, "loss": 11.6717, "step": 53454 }, { "epoch": 1.1189608975969187, "grad_norm": 0.34534522891044617, "learning_rate": 0.00013888984693742978, "loss": 11.6748, "step": 53455 }, { "epoch": 1.118981830360881, "grad_norm": 0.3536924719810486, "learning_rate": 0.00013888782699003738, "loss": 11.6712, "step": 53456 }, { "epoch": 1.119002763124843, "grad_norm": 0.313681423664093, "learning_rate": 0.00013888580702395075, "loss": 11.6765, "step": 53457 }, { "epoch": 1.1190236958888051, "grad_norm": 0.37692490220069885, "learning_rate": 0.00013888378703917094, "loss": 11.6508, "step": 53458 }, { "epoch": 1.1190446286527673, "grad_norm": 0.2709329128265381, "learning_rate": 0.00013888176703569894, "loss": 11.6678, "step": 53459 }, { "epoch": 1.1190655614167295, "grad_norm": 0.39006850123405457, "learning_rate": 0.00013887974701353567, "loss": 11.6702, "step": 53460 }, { "epoch": 1.1190864941806917, "grad_norm": 0.3147518038749695, "learning_rate": 0.00013887772697268214, "loss": 11.6773, "step": 53461 }, { "epoch": 1.1191074269446537, "grad_norm": 0.3832090497016907, "learning_rate": 0.0001388757069131393, "loss": 11.656, "step": 53462 }, { "epoch": 1.119128359708616, "grad_norm": 0.2818315327167511, "learning_rate": 0.00013887368683490817, "loss": 11.6608, "step": 53463 }, { "epoch": 1.119149292472578, "grad_norm": 0.27616655826568604, "learning_rate": 0.00013887166673798963, "loss": 11.6756, "step": 53464 }, { "epoch": 1.1191702252365403, "grad_norm": 0.3283948004245758, "learning_rate": 0.00013886964662238472, "loss": 11.6763, "step": 53465 }, { "epoch": 1.1191911580005023, "grad_norm": 0.31721171736717224, "learning_rate": 0.0001388676264880944, "loss": 11.6714, "step": 53466 }, { "epoch": 1.1192120907644645, "grad_norm": 0.35801172256469727, "learning_rate": 0.0001388656063351196, "loss": 11.6656, "step": 53467 }, { "epoch": 1.1192330235284267, "grad_norm": 0.40880724787712097, "learning_rate": 0.00013886358616346139, "loss": 11.6579, "step": 53468 }, { "epoch": 1.1192539562923889, "grad_norm": 0.2920529842376709, "learning_rate": 0.0001388615659731206, "loss": 11.6575, "step": 53469 }, { "epoch": 1.119274889056351, "grad_norm": 0.3491787910461426, "learning_rate": 0.0001388595457640983, "loss": 11.6696, "step": 53470 }, { "epoch": 1.119295821820313, "grad_norm": 0.2872912883758545, "learning_rate": 0.00013885752553639545, "loss": 11.6738, "step": 53471 }, { "epoch": 1.1193167545842753, "grad_norm": 0.31173819303512573, "learning_rate": 0.00013885550529001302, "loss": 11.6834, "step": 53472 }, { "epoch": 1.1193376873482375, "grad_norm": 0.2745421230792999, "learning_rate": 0.00013885348502495196, "loss": 11.6431, "step": 53473 }, { "epoch": 1.1193586201121997, "grad_norm": 0.3337724506855011, "learning_rate": 0.00013885146474121323, "loss": 11.6567, "step": 53474 }, { "epoch": 1.1193795528761619, "grad_norm": 0.37858232855796814, "learning_rate": 0.00013884944443879786, "loss": 11.6758, "step": 53475 }, { "epoch": 1.1194004856401238, "grad_norm": 0.3048025965690613, "learning_rate": 0.00013884742411770674, "loss": 11.6711, "step": 53476 }, { "epoch": 1.119421418404086, "grad_norm": 0.3654642403125763, "learning_rate": 0.0001388454037779409, "loss": 11.6696, "step": 53477 }, { "epoch": 1.1194423511680482, "grad_norm": 0.28575149178504944, "learning_rate": 0.00013884338341950132, "loss": 11.6678, "step": 53478 }, { "epoch": 1.1194632839320104, "grad_norm": 0.3722537159919739, "learning_rate": 0.00013884136304238894, "loss": 11.6686, "step": 53479 }, { "epoch": 1.1194842166959726, "grad_norm": 0.29461246728897095, "learning_rate": 0.00013883934264660472, "loss": 11.6579, "step": 53480 }, { "epoch": 1.1195051494599346, "grad_norm": 0.311902791261673, "learning_rate": 0.00013883732223214965, "loss": 11.6685, "step": 53481 }, { "epoch": 1.1195260822238968, "grad_norm": 0.3213631510734558, "learning_rate": 0.0001388353017990247, "loss": 11.6741, "step": 53482 }, { "epoch": 1.119547014987859, "grad_norm": 0.36006686091423035, "learning_rate": 0.00013883328134723085, "loss": 11.6604, "step": 53483 }, { "epoch": 1.1195679477518212, "grad_norm": 0.36418968439102173, "learning_rate": 0.00013883126087676907, "loss": 11.6635, "step": 53484 }, { "epoch": 1.1195888805157832, "grad_norm": 0.3666290044784546, "learning_rate": 0.00013882924038764033, "loss": 11.671, "step": 53485 }, { "epoch": 1.1196098132797454, "grad_norm": 0.34651950001716614, "learning_rate": 0.00013882721987984556, "loss": 11.6868, "step": 53486 }, { "epoch": 1.1196307460437076, "grad_norm": 0.29190120100975037, "learning_rate": 0.0001388251993533858, "loss": 11.6704, "step": 53487 }, { "epoch": 1.1196516788076698, "grad_norm": 0.3676007390022278, "learning_rate": 0.000138823178808262, "loss": 11.6698, "step": 53488 }, { "epoch": 1.119672611571632, "grad_norm": 0.3532552123069763, "learning_rate": 0.00013882115824447508, "loss": 11.6412, "step": 53489 }, { "epoch": 1.119693544335594, "grad_norm": 0.4964596927165985, "learning_rate": 0.00013881913766202608, "loss": 11.6795, "step": 53490 }, { "epoch": 1.1197144770995562, "grad_norm": 0.2746464014053345, "learning_rate": 0.00013881711706091593, "loss": 11.6738, "step": 53491 }, { "epoch": 1.1197354098635184, "grad_norm": 0.35690435767173767, "learning_rate": 0.0001388150964411456, "loss": 11.6696, "step": 53492 }, { "epoch": 1.1197563426274806, "grad_norm": 0.3280889391899109, "learning_rate": 0.00013881307580271613, "loss": 11.6735, "step": 53493 }, { "epoch": 1.1197772753914428, "grad_norm": 0.3161267936229706, "learning_rate": 0.00013881105514562838, "loss": 11.6829, "step": 53494 }, { "epoch": 1.1197982081554048, "grad_norm": 0.3030959665775299, "learning_rate": 0.0001388090344698834, "loss": 11.6685, "step": 53495 }, { "epoch": 1.119819140919367, "grad_norm": 0.295866996049881, "learning_rate": 0.00013880701377548213, "loss": 11.6723, "step": 53496 }, { "epoch": 1.1198400736833292, "grad_norm": 0.32994502782821655, "learning_rate": 0.00013880499306242558, "loss": 11.6683, "step": 53497 }, { "epoch": 1.1198610064472914, "grad_norm": 0.33510786294937134, "learning_rate": 0.00013880297233071466, "loss": 11.6563, "step": 53498 }, { "epoch": 1.1198819392112536, "grad_norm": 0.2924644351005554, "learning_rate": 0.0001388009515803504, "loss": 11.6617, "step": 53499 }, { "epoch": 1.1199028719752155, "grad_norm": 0.39885783195495605, "learning_rate": 0.00013879893081133373, "loss": 11.6695, "step": 53500 }, { "epoch": 1.1199238047391777, "grad_norm": 0.2638039290904999, "learning_rate": 0.0001387969100236656, "loss": 11.6604, "step": 53501 }, { "epoch": 1.11994473750314, "grad_norm": 0.26694148778915405, "learning_rate": 0.00013879488921734708, "loss": 11.6642, "step": 53502 }, { "epoch": 1.1199656702671021, "grad_norm": 0.3789808750152588, "learning_rate": 0.00013879286839237905, "loss": 11.6545, "step": 53503 }, { "epoch": 1.1199866030310641, "grad_norm": 0.34319600462913513, "learning_rate": 0.0001387908475487625, "loss": 11.6537, "step": 53504 }, { "epoch": 1.1200075357950263, "grad_norm": 0.31322532892227173, "learning_rate": 0.00013878882668649843, "loss": 11.6769, "step": 53505 }, { "epoch": 1.1200284685589885, "grad_norm": 0.4577501714229584, "learning_rate": 0.0001387868058055878, "loss": 11.6712, "step": 53506 }, { "epoch": 1.1200494013229507, "grad_norm": 0.399245947599411, "learning_rate": 0.00013878478490603156, "loss": 11.6843, "step": 53507 }, { "epoch": 1.120070334086913, "grad_norm": 0.3628813326358795, "learning_rate": 0.0001387827639878307, "loss": 11.6865, "step": 53508 }, { "epoch": 1.120091266850875, "grad_norm": 0.33434993028640747, "learning_rate": 0.0001387807430509862, "loss": 11.6631, "step": 53509 }, { "epoch": 1.120112199614837, "grad_norm": 0.276170015335083, "learning_rate": 0.000138778722095499, "loss": 11.6597, "step": 53510 }, { "epoch": 1.1201331323787993, "grad_norm": 0.2441968023777008, "learning_rate": 0.0001387767011213701, "loss": 11.6698, "step": 53511 }, { "epoch": 1.1201540651427615, "grad_norm": 0.32919105887413025, "learning_rate": 0.00013877468012860048, "loss": 11.6701, "step": 53512 }, { "epoch": 1.1201749979067237, "grad_norm": 0.30384671688079834, "learning_rate": 0.00013877265911719106, "loss": 11.6816, "step": 53513 }, { "epoch": 1.1201959306706857, "grad_norm": 0.38179704546928406, "learning_rate": 0.00013877063808714286, "loss": 11.6673, "step": 53514 }, { "epoch": 1.1202168634346479, "grad_norm": 0.34757018089294434, "learning_rate": 0.00013876861703845684, "loss": 11.6602, "step": 53515 }, { "epoch": 1.12023779619861, "grad_norm": 0.37854763865470886, "learning_rate": 0.00013876659597113398, "loss": 11.673, "step": 53516 }, { "epoch": 1.1202587289625723, "grad_norm": 0.3166128695011139, "learning_rate": 0.00013876457488517523, "loss": 11.6746, "step": 53517 }, { "epoch": 1.1202796617265345, "grad_norm": 0.3555954396724701, "learning_rate": 0.00013876255378058157, "loss": 11.668, "step": 53518 }, { "epoch": 1.1203005944904965, "grad_norm": 0.26184946298599243, "learning_rate": 0.00013876053265735397, "loss": 11.6629, "step": 53519 }, { "epoch": 1.1203215272544587, "grad_norm": 0.4036923050880432, "learning_rate": 0.00013875851151549343, "loss": 11.6539, "step": 53520 }, { "epoch": 1.1203424600184209, "grad_norm": 0.3386712074279785, "learning_rate": 0.00013875649035500085, "loss": 11.6632, "step": 53521 }, { "epoch": 1.120363392782383, "grad_norm": 0.4251787066459656, "learning_rate": 0.0001387544691758773, "loss": 11.6693, "step": 53522 }, { "epoch": 1.120384325546345, "grad_norm": 0.36029350757598877, "learning_rate": 0.00013875244797812366, "loss": 11.674, "step": 53523 }, { "epoch": 1.1204052583103072, "grad_norm": 0.33926236629486084, "learning_rate": 0.00013875042676174095, "loss": 11.6634, "step": 53524 }, { "epoch": 1.1204261910742694, "grad_norm": 0.3190590739250183, "learning_rate": 0.00013874840552673015, "loss": 11.662, "step": 53525 }, { "epoch": 1.1204471238382316, "grad_norm": 0.3042196035385132, "learning_rate": 0.0001387463842730922, "loss": 11.6801, "step": 53526 }, { "epoch": 1.1204680566021938, "grad_norm": 0.4296500086784363, "learning_rate": 0.0001387443630008281, "loss": 11.6712, "step": 53527 }, { "epoch": 1.1204889893661558, "grad_norm": 0.39588847756385803, "learning_rate": 0.0001387423417099388, "loss": 11.6674, "step": 53528 }, { "epoch": 1.120509922130118, "grad_norm": 0.2855693995952606, "learning_rate": 0.00013874032040042526, "loss": 11.6508, "step": 53529 }, { "epoch": 1.1205308548940802, "grad_norm": 0.2714442014694214, "learning_rate": 0.0001387382990722885, "loss": 11.6601, "step": 53530 }, { "epoch": 1.1205517876580424, "grad_norm": 0.3866778016090393, "learning_rate": 0.00013873627772552943, "loss": 11.6694, "step": 53531 }, { "epoch": 1.1205727204220046, "grad_norm": 0.29771289229393005, "learning_rate": 0.00013873425636014912, "loss": 11.6703, "step": 53532 }, { "epoch": 1.1205936531859666, "grad_norm": 0.32820528745651245, "learning_rate": 0.0001387322349761484, "loss": 11.6554, "step": 53533 }, { "epoch": 1.1206145859499288, "grad_norm": 0.3088547885417938, "learning_rate": 0.00013873021357352838, "loss": 11.6665, "step": 53534 }, { "epoch": 1.120635518713891, "grad_norm": 0.2644750773906708, "learning_rate": 0.00013872819215228995, "loss": 11.68, "step": 53535 }, { "epoch": 1.1206564514778532, "grad_norm": 0.3254227936267853, "learning_rate": 0.00013872617071243408, "loss": 11.6678, "step": 53536 }, { "epoch": 1.1206773842418154, "grad_norm": 0.2982805371284485, "learning_rate": 0.0001387241492539618, "loss": 11.6727, "step": 53537 }, { "epoch": 1.1206983170057774, "grad_norm": 0.2673529088497162, "learning_rate": 0.00013872212777687402, "loss": 11.6628, "step": 53538 }, { "epoch": 1.1207192497697396, "grad_norm": 0.317082941532135, "learning_rate": 0.00013872010628117174, "loss": 11.6827, "step": 53539 }, { "epoch": 1.1207401825337018, "grad_norm": 0.2888141870498657, "learning_rate": 0.00013871808476685594, "loss": 11.6565, "step": 53540 }, { "epoch": 1.120761115297664, "grad_norm": 0.32559239864349365, "learning_rate": 0.00013871606323392758, "loss": 11.6792, "step": 53541 }, { "epoch": 1.120782048061626, "grad_norm": 0.34158408641815186, "learning_rate": 0.0001387140416823876, "loss": 11.6692, "step": 53542 }, { "epoch": 1.1208029808255882, "grad_norm": 0.4079569876194, "learning_rate": 0.00013871202011223703, "loss": 11.6905, "step": 53543 }, { "epoch": 1.1208239135895504, "grad_norm": 0.34385383129119873, "learning_rate": 0.0001387099985234768, "loss": 11.6693, "step": 53544 }, { "epoch": 1.1208448463535126, "grad_norm": 0.31791016459465027, "learning_rate": 0.00013870797691610792, "loss": 11.6458, "step": 53545 }, { "epoch": 1.1208657791174748, "grad_norm": 0.3092597424983978, "learning_rate": 0.00013870595529013135, "loss": 11.6647, "step": 53546 }, { "epoch": 1.1208867118814367, "grad_norm": 0.34033748507499695, "learning_rate": 0.00013870393364554803, "loss": 11.6878, "step": 53547 }, { "epoch": 1.120907644645399, "grad_norm": 0.2663060128688812, "learning_rate": 0.00013870191198235896, "loss": 11.6621, "step": 53548 }, { "epoch": 1.1209285774093611, "grad_norm": 0.329764187335968, "learning_rate": 0.00013869989030056512, "loss": 11.6773, "step": 53549 }, { "epoch": 1.1209495101733233, "grad_norm": 0.2653234302997589, "learning_rate": 0.00013869786860016742, "loss": 11.6589, "step": 53550 }, { "epoch": 1.1209704429372855, "grad_norm": 0.25513774156570435, "learning_rate": 0.00013869584688116696, "loss": 11.6755, "step": 53551 }, { "epoch": 1.1209913757012475, "grad_norm": 0.3113231062889099, "learning_rate": 0.00013869382514356454, "loss": 11.6744, "step": 53552 }, { "epoch": 1.1210123084652097, "grad_norm": 0.30702200531959534, "learning_rate": 0.00013869180338736125, "loss": 11.6821, "step": 53553 }, { "epoch": 1.121033241229172, "grad_norm": 0.36951473355293274, "learning_rate": 0.00013868978161255807, "loss": 11.6694, "step": 53554 }, { "epoch": 1.1210541739931341, "grad_norm": 0.2659468650817871, "learning_rate": 0.0001386877598191559, "loss": 11.6719, "step": 53555 }, { "epoch": 1.1210751067570963, "grad_norm": 0.30685827136039734, "learning_rate": 0.00013868573800715579, "loss": 11.6624, "step": 53556 }, { "epoch": 1.1210960395210583, "grad_norm": 0.3346293866634369, "learning_rate": 0.00013868371617655864, "loss": 11.6518, "step": 53557 }, { "epoch": 1.1211169722850205, "grad_norm": 0.3660484254360199, "learning_rate": 0.00013868169432736544, "loss": 11.6662, "step": 53558 }, { "epoch": 1.1211379050489827, "grad_norm": 0.36328136920928955, "learning_rate": 0.0001386796724595772, "loss": 11.6655, "step": 53559 }, { "epoch": 1.121158837812945, "grad_norm": 0.2967834770679474, "learning_rate": 0.00013867765057319487, "loss": 11.6702, "step": 53560 }, { "epoch": 1.1211797705769069, "grad_norm": 0.3631212115287781, "learning_rate": 0.00013867562866821942, "loss": 11.6658, "step": 53561 }, { "epoch": 1.121200703340869, "grad_norm": 0.3120001554489136, "learning_rate": 0.00013867360674465177, "loss": 11.669, "step": 53562 }, { "epoch": 1.1212216361048313, "grad_norm": 0.2737964987754822, "learning_rate": 0.000138671584802493, "loss": 11.6605, "step": 53563 }, { "epoch": 1.1212425688687935, "grad_norm": 0.3182792663574219, "learning_rate": 0.000138669562841744, "loss": 11.6732, "step": 53564 }, { "epoch": 1.1212635016327557, "grad_norm": 0.3007907271385193, "learning_rate": 0.00013866754086240576, "loss": 11.6668, "step": 53565 }, { "epoch": 1.1212844343967177, "grad_norm": 0.3101861774921417, "learning_rate": 0.00013866551886447927, "loss": 11.6621, "step": 53566 }, { "epoch": 1.1213053671606799, "grad_norm": 0.37083670496940613, "learning_rate": 0.0001386634968479655, "loss": 11.6846, "step": 53567 }, { "epoch": 1.121326299924642, "grad_norm": 0.4703519344329834, "learning_rate": 0.0001386614748128654, "loss": 11.6708, "step": 53568 }, { "epoch": 1.1213472326886043, "grad_norm": 0.2693931758403778, "learning_rate": 0.00013865945275917993, "loss": 11.663, "step": 53569 }, { "epoch": 1.1213681654525665, "grad_norm": 0.25169193744659424, "learning_rate": 0.0001386574306869101, "loss": 11.6752, "step": 53570 }, { "epoch": 1.1213890982165284, "grad_norm": 0.379646897315979, "learning_rate": 0.0001386554085960569, "loss": 11.6762, "step": 53571 }, { "epoch": 1.1214100309804906, "grad_norm": 0.3268827199935913, "learning_rate": 0.00013865338648662125, "loss": 11.6525, "step": 53572 }, { "epoch": 1.1214309637444528, "grad_norm": 0.36315107345581055, "learning_rate": 0.00013865136435860413, "loss": 11.6674, "step": 53573 }, { "epoch": 1.121451896508415, "grad_norm": 0.3392677307128906, "learning_rate": 0.00013864934221200653, "loss": 11.6787, "step": 53574 }, { "epoch": 1.1214728292723772, "grad_norm": 0.2887893319129944, "learning_rate": 0.00013864732004682944, "loss": 11.6703, "step": 53575 }, { "epoch": 1.1214937620363392, "grad_norm": 0.3203192353248596, "learning_rate": 0.0001386452978630738, "loss": 11.6813, "step": 53576 }, { "epoch": 1.1215146948003014, "grad_norm": 0.3144443929195404, "learning_rate": 0.00013864327566074055, "loss": 11.6565, "step": 53577 }, { "epoch": 1.1215356275642636, "grad_norm": 0.3153704106807709, "learning_rate": 0.00013864125343983076, "loss": 11.6654, "step": 53578 }, { "epoch": 1.1215565603282258, "grad_norm": 0.36633414030075073, "learning_rate": 0.00013863923120034533, "loss": 11.6726, "step": 53579 }, { "epoch": 1.1215774930921878, "grad_norm": 0.3891792893409729, "learning_rate": 0.0001386372089422852, "loss": 11.6722, "step": 53580 }, { "epoch": 1.12159842585615, "grad_norm": 0.3372955918312073, "learning_rate": 0.00013863518666565146, "loss": 11.6739, "step": 53581 }, { "epoch": 1.1216193586201122, "grad_norm": 0.4213675856590271, "learning_rate": 0.00013863316437044495, "loss": 11.6806, "step": 53582 }, { "epoch": 1.1216402913840744, "grad_norm": 0.3484746217727661, "learning_rate": 0.00013863114205666673, "loss": 11.6688, "step": 53583 }, { "epoch": 1.1216612241480366, "grad_norm": 0.320939302444458, "learning_rate": 0.00013862911972431776, "loss": 11.6787, "step": 53584 }, { "epoch": 1.1216821569119986, "grad_norm": 0.3259515166282654, "learning_rate": 0.000138627097373399, "loss": 11.6584, "step": 53585 }, { "epoch": 1.1217030896759608, "grad_norm": 0.3855727016925812, "learning_rate": 0.0001386250750039114, "loss": 11.6802, "step": 53586 }, { "epoch": 1.121724022439923, "grad_norm": 0.34546446800231934, "learning_rate": 0.00013862305261585592, "loss": 11.6767, "step": 53587 }, { "epoch": 1.1217449552038852, "grad_norm": 0.4984036684036255, "learning_rate": 0.00013862103020923362, "loss": 11.6694, "step": 53588 }, { "epoch": 1.1217658879678474, "grad_norm": 0.3182845115661621, "learning_rate": 0.00013861900778404542, "loss": 11.6702, "step": 53589 }, { "epoch": 1.1217868207318094, "grad_norm": 0.33266720175743103, "learning_rate": 0.00013861698534029228, "loss": 11.6851, "step": 53590 }, { "epoch": 1.1218077534957716, "grad_norm": 0.2896438241004944, "learning_rate": 0.00013861496287797515, "loss": 11.6568, "step": 53591 }, { "epoch": 1.1218286862597338, "grad_norm": 0.35289812088012695, "learning_rate": 0.00013861294039709505, "loss": 11.6574, "step": 53592 }, { "epoch": 1.121849619023696, "grad_norm": 0.29874861240386963, "learning_rate": 0.00013861091789765293, "loss": 11.6765, "step": 53593 }, { "epoch": 1.1218705517876582, "grad_norm": 0.30840450525283813, "learning_rate": 0.0001386088953796498, "loss": 11.6645, "step": 53594 }, { "epoch": 1.1218914845516201, "grad_norm": 0.33055973052978516, "learning_rate": 0.00013860687284308657, "loss": 11.6689, "step": 53595 }, { "epoch": 1.1219124173155823, "grad_norm": 0.28781482577323914, "learning_rate": 0.00013860485028796427, "loss": 11.6646, "step": 53596 }, { "epoch": 1.1219333500795445, "grad_norm": 0.31863024830818176, "learning_rate": 0.0001386028277142838, "loss": 11.6805, "step": 53597 }, { "epoch": 1.1219542828435067, "grad_norm": 0.29578396677970886, "learning_rate": 0.00013860080512204622, "loss": 11.6761, "step": 53598 }, { "epoch": 1.1219752156074687, "grad_norm": 0.3470560908317566, "learning_rate": 0.0001385987825112524, "loss": 11.6625, "step": 53599 }, { "epoch": 1.121996148371431, "grad_norm": 0.3619844615459442, "learning_rate": 0.00013859675988190344, "loss": 11.6623, "step": 53600 }, { "epoch": 1.1220170811353931, "grad_norm": 0.3815003037452698, "learning_rate": 0.00013859473723400022, "loss": 11.6699, "step": 53601 }, { "epoch": 1.1220380138993553, "grad_norm": 0.3019440174102783, "learning_rate": 0.00013859271456754372, "loss": 11.6802, "step": 53602 }, { "epoch": 1.1220589466633175, "grad_norm": 0.3299426734447479, "learning_rate": 0.00013859069188253495, "loss": 11.659, "step": 53603 }, { "epoch": 1.1220798794272795, "grad_norm": 0.2787221670150757, "learning_rate": 0.00013858866917897483, "loss": 11.673, "step": 53604 }, { "epoch": 1.1221008121912417, "grad_norm": 0.319134384393692, "learning_rate": 0.00013858664645686442, "loss": 11.6657, "step": 53605 }, { "epoch": 1.122121744955204, "grad_norm": 0.4816722273826599, "learning_rate": 0.0001385846237162046, "loss": 11.6594, "step": 53606 }, { "epoch": 1.122142677719166, "grad_norm": 0.29485198855400085, "learning_rate": 0.00013858260095699637, "loss": 11.6712, "step": 53607 }, { "epoch": 1.1221636104831283, "grad_norm": 0.3485284149646759, "learning_rate": 0.00013858057817924072, "loss": 11.6707, "step": 53608 }, { "epoch": 1.1221845432470903, "grad_norm": 0.3405538499355316, "learning_rate": 0.0001385785553829386, "loss": 11.6833, "step": 53609 }, { "epoch": 1.1222054760110525, "grad_norm": 0.38985416293144226, "learning_rate": 0.000138576532568091, "loss": 11.6636, "step": 53610 }, { "epoch": 1.1222264087750147, "grad_norm": 0.35111868381500244, "learning_rate": 0.00013857450973469893, "loss": 11.6851, "step": 53611 }, { "epoch": 1.1222473415389769, "grad_norm": 0.2738005816936493, "learning_rate": 0.00013857248688276327, "loss": 11.668, "step": 53612 }, { "epoch": 1.122268274302939, "grad_norm": 0.26058080792427063, "learning_rate": 0.00013857046401228505, "loss": 11.656, "step": 53613 }, { "epoch": 1.122289207066901, "grad_norm": 0.3587256669998169, "learning_rate": 0.00013856844112326527, "loss": 11.6796, "step": 53614 }, { "epoch": 1.1223101398308633, "grad_norm": 0.3419073522090912, "learning_rate": 0.00013856641821570484, "loss": 11.682, "step": 53615 }, { "epoch": 1.1223310725948255, "grad_norm": 0.35424092411994934, "learning_rate": 0.00013856439528960474, "loss": 11.6586, "step": 53616 }, { "epoch": 1.1223520053587877, "grad_norm": 0.33484792709350586, "learning_rate": 0.000138562372344966, "loss": 11.6662, "step": 53617 }, { "epoch": 1.1223729381227496, "grad_norm": 0.3139248490333557, "learning_rate": 0.00013856034938178953, "loss": 11.6709, "step": 53618 }, { "epoch": 1.1223938708867118, "grad_norm": 0.4625345766544342, "learning_rate": 0.00013855832640007634, "loss": 11.678, "step": 53619 }, { "epoch": 1.122414803650674, "grad_norm": 0.3924509882926941, "learning_rate": 0.0001385563033998274, "loss": 11.6678, "step": 53620 }, { "epoch": 1.1224357364146362, "grad_norm": 0.33447036147117615, "learning_rate": 0.00013855428038104365, "loss": 11.6733, "step": 53621 }, { "epoch": 1.1224566691785984, "grad_norm": 0.3361239731311798, "learning_rate": 0.0001385522573437261, "loss": 11.6711, "step": 53622 }, { "epoch": 1.1224776019425604, "grad_norm": 0.47026196122169495, "learning_rate": 0.00013855023428787567, "loss": 11.6849, "step": 53623 }, { "epoch": 1.1224985347065226, "grad_norm": 0.43437647819519043, "learning_rate": 0.00013854821121349342, "loss": 11.6688, "step": 53624 }, { "epoch": 1.1225194674704848, "grad_norm": 0.3283880650997162, "learning_rate": 0.00013854618812058026, "loss": 11.6618, "step": 53625 }, { "epoch": 1.122540400234447, "grad_norm": 0.3974786400794983, "learning_rate": 0.00013854416500913715, "loss": 11.6815, "step": 53626 }, { "epoch": 1.1225613329984092, "grad_norm": 0.2965918481349945, "learning_rate": 0.00013854214187916512, "loss": 11.6786, "step": 53627 }, { "epoch": 1.1225822657623712, "grad_norm": 0.3592735528945923, "learning_rate": 0.0001385401187306651, "loss": 11.6864, "step": 53628 }, { "epoch": 1.1226031985263334, "grad_norm": 0.35333606600761414, "learning_rate": 0.0001385380955636381, "loss": 11.6737, "step": 53629 }, { "epoch": 1.1226241312902956, "grad_norm": 0.3151627779006958, "learning_rate": 0.000138536072378085, "loss": 11.6733, "step": 53630 }, { "epoch": 1.1226450640542578, "grad_norm": 0.3373869061470032, "learning_rate": 0.00013853404917400685, "loss": 11.6769, "step": 53631 }, { "epoch": 1.12266599681822, "grad_norm": 0.3243747055530548, "learning_rate": 0.00013853202595140465, "loss": 11.6644, "step": 53632 }, { "epoch": 1.122686929582182, "grad_norm": 0.3537006974220276, "learning_rate": 0.00013853000271027928, "loss": 11.6771, "step": 53633 }, { "epoch": 1.1227078623461442, "grad_norm": 0.30762097239494324, "learning_rate": 0.00013852797945063182, "loss": 11.6674, "step": 53634 }, { "epoch": 1.1227287951101064, "grad_norm": 0.32923826575279236, "learning_rate": 0.00013852595617246317, "loss": 11.6716, "step": 53635 }, { "epoch": 1.1227497278740686, "grad_norm": 0.3112839460372925, "learning_rate": 0.0001385239328757743, "loss": 11.66, "step": 53636 }, { "epoch": 1.1227706606380305, "grad_norm": 0.26692187786102295, "learning_rate": 0.00013852190956056623, "loss": 11.6504, "step": 53637 }, { "epoch": 1.1227915934019927, "grad_norm": 0.3271597921848297, "learning_rate": 0.00013851988622683994, "loss": 11.6631, "step": 53638 }, { "epoch": 1.122812526165955, "grad_norm": 0.3629395365715027, "learning_rate": 0.00013851786287459632, "loss": 11.6731, "step": 53639 }, { "epoch": 1.1228334589299171, "grad_norm": 0.29066067934036255, "learning_rate": 0.00013851583950383637, "loss": 11.6772, "step": 53640 }, { "epoch": 1.1228543916938794, "grad_norm": 0.24523073434829712, "learning_rate": 0.00013851381611456112, "loss": 11.6535, "step": 53641 }, { "epoch": 1.1228753244578413, "grad_norm": 0.24338097870349884, "learning_rate": 0.0001385117927067715, "loss": 11.6715, "step": 53642 }, { "epoch": 1.1228962572218035, "grad_norm": 0.25408288836479187, "learning_rate": 0.0001385097692804685, "loss": 11.6641, "step": 53643 }, { "epoch": 1.1229171899857657, "grad_norm": 0.2800559103488922, "learning_rate": 0.00013850774583565304, "loss": 11.659, "step": 53644 }, { "epoch": 1.122938122749728, "grad_norm": 0.26830747723579407, "learning_rate": 0.00013850572237232618, "loss": 11.6599, "step": 53645 }, { "epoch": 1.1229590555136901, "grad_norm": 0.2870360314846039, "learning_rate": 0.00013850369889048883, "loss": 11.6704, "step": 53646 }, { "epoch": 1.122979988277652, "grad_norm": 0.28403419256210327, "learning_rate": 0.00013850167539014197, "loss": 11.6388, "step": 53647 }, { "epoch": 1.1230009210416143, "grad_norm": 0.3623047471046448, "learning_rate": 0.00013849965187128662, "loss": 11.6678, "step": 53648 }, { "epoch": 1.1230218538055765, "grad_norm": 0.322998046875, "learning_rate": 0.00013849762833392367, "loss": 11.6726, "step": 53649 }, { "epoch": 1.1230427865695387, "grad_norm": 0.368974506855011, "learning_rate": 0.0001384956047780542, "loss": 11.6584, "step": 53650 }, { "epoch": 1.123063719333501, "grad_norm": 0.32010871171951294, "learning_rate": 0.00013849358120367905, "loss": 11.6771, "step": 53651 }, { "epoch": 1.1230846520974629, "grad_norm": 0.5233447551727295, "learning_rate": 0.00013849155761079933, "loss": 11.6941, "step": 53652 }, { "epoch": 1.123105584861425, "grad_norm": 0.3616674244403839, "learning_rate": 0.0001384895339994159, "loss": 11.6728, "step": 53653 }, { "epoch": 1.1231265176253873, "grad_norm": 0.28605085611343384, "learning_rate": 0.00013848751036952978, "loss": 11.671, "step": 53654 }, { "epoch": 1.1231474503893495, "grad_norm": 0.49198639392852783, "learning_rate": 0.00013848548672114197, "loss": 11.6913, "step": 53655 }, { "epoch": 1.1231683831533115, "grad_norm": 0.3241472840309143, "learning_rate": 0.0001384834630542534, "loss": 11.6833, "step": 53656 }, { "epoch": 1.1231893159172737, "grad_norm": 0.29159122705459595, "learning_rate": 0.00013848143936886505, "loss": 11.6659, "step": 53657 }, { "epoch": 1.1232102486812359, "grad_norm": 0.3708442449569702, "learning_rate": 0.00013847941566497791, "loss": 11.6475, "step": 53658 }, { "epoch": 1.123231181445198, "grad_norm": 0.4399915933609009, "learning_rate": 0.00013847739194259296, "loss": 11.6897, "step": 53659 }, { "epoch": 1.1232521142091603, "grad_norm": 0.29330411553382874, "learning_rate": 0.0001384753682017111, "loss": 11.6641, "step": 53660 }, { "epoch": 1.1232730469731222, "grad_norm": 0.27941590547561646, "learning_rate": 0.00013847334444233342, "loss": 11.6807, "step": 53661 }, { "epoch": 1.1232939797370844, "grad_norm": 0.3869852125644684, "learning_rate": 0.00013847132066446081, "loss": 11.6768, "step": 53662 }, { "epoch": 1.1233149125010466, "grad_norm": 0.2852829694747925, "learning_rate": 0.00013846929686809427, "loss": 11.6609, "step": 53663 }, { "epoch": 1.1233358452650088, "grad_norm": 0.34176015853881836, "learning_rate": 0.00013846727305323478, "loss": 11.6471, "step": 53664 }, { "epoch": 1.123356778028971, "grad_norm": 0.30075350403785706, "learning_rate": 0.00013846524921988325, "loss": 11.6634, "step": 53665 }, { "epoch": 1.123377710792933, "grad_norm": 0.32796210050582886, "learning_rate": 0.00013846322536804076, "loss": 11.665, "step": 53666 }, { "epoch": 1.1233986435568952, "grad_norm": 0.32861238718032837, "learning_rate": 0.0001384612014977082, "loss": 11.6979, "step": 53667 }, { "epoch": 1.1234195763208574, "grad_norm": 0.26828262209892273, "learning_rate": 0.0001384591776088866, "loss": 11.6468, "step": 53668 }, { "epoch": 1.1234405090848196, "grad_norm": 0.301662415266037, "learning_rate": 0.00013845715370157686, "loss": 11.6739, "step": 53669 }, { "epoch": 1.1234614418487818, "grad_norm": 0.27895429730415344, "learning_rate": 0.00013845512977578002, "loss": 11.6636, "step": 53670 }, { "epoch": 1.1234823746127438, "grad_norm": 0.3361305892467499, "learning_rate": 0.00013845310583149702, "loss": 11.6692, "step": 53671 }, { "epoch": 1.123503307376706, "grad_norm": 0.33687669038772583, "learning_rate": 0.00013845108186872883, "loss": 11.6722, "step": 53672 }, { "epoch": 1.1235242401406682, "grad_norm": 0.36855387687683105, "learning_rate": 0.00013844905788747646, "loss": 11.676, "step": 53673 }, { "epoch": 1.1235451729046304, "grad_norm": 0.32653579115867615, "learning_rate": 0.00013844703388774085, "loss": 11.6741, "step": 53674 }, { "epoch": 1.1235661056685924, "grad_norm": 0.32347849011421204, "learning_rate": 0.00013844500986952296, "loss": 11.6614, "step": 53675 }, { "epoch": 1.1235870384325546, "grad_norm": 0.46339118480682373, "learning_rate": 0.00013844298583282384, "loss": 11.6838, "step": 53676 }, { "epoch": 1.1236079711965168, "grad_norm": 0.36971962451934814, "learning_rate": 0.00013844096177764432, "loss": 11.6623, "step": 53677 }, { "epoch": 1.123628903960479, "grad_norm": 0.3075348436832428, "learning_rate": 0.00013843893770398554, "loss": 11.6762, "step": 53678 }, { "epoch": 1.1236498367244412, "grad_norm": 0.3083623945713043, "learning_rate": 0.00013843691361184834, "loss": 11.6617, "step": 53679 }, { "epoch": 1.1236707694884032, "grad_norm": 0.30488571524620056, "learning_rate": 0.00013843488950123376, "loss": 11.6669, "step": 53680 }, { "epoch": 1.1236917022523654, "grad_norm": 0.48955613374710083, "learning_rate": 0.00013843286537214278, "loss": 11.5984, "step": 53681 }, { "epoch": 1.1237126350163276, "grad_norm": 0.3122434616088867, "learning_rate": 0.0001384308412245763, "loss": 11.6717, "step": 53682 }, { "epoch": 1.1237335677802898, "grad_norm": 0.33440732955932617, "learning_rate": 0.0001384288170585354, "loss": 11.6664, "step": 53683 }, { "epoch": 1.123754500544252, "grad_norm": 0.24660257995128632, "learning_rate": 0.00013842679287402095, "loss": 11.6546, "step": 53684 }, { "epoch": 1.123775433308214, "grad_norm": 0.4279652535915375, "learning_rate": 0.000138424768671034, "loss": 11.6549, "step": 53685 }, { "epoch": 1.1237963660721761, "grad_norm": 0.4102265536785126, "learning_rate": 0.0001384227444495755, "loss": 11.6596, "step": 53686 }, { "epoch": 1.1238172988361383, "grad_norm": 0.2807953357696533, "learning_rate": 0.00013842072020964642, "loss": 11.6605, "step": 53687 }, { "epoch": 1.1238382316001005, "grad_norm": 0.38046368956565857, "learning_rate": 0.0001384186959512477, "loss": 11.6505, "step": 53688 }, { "epoch": 1.1238591643640627, "grad_norm": 0.3542974889278412, "learning_rate": 0.00013841667167438035, "loss": 11.6672, "step": 53689 }, { "epoch": 1.1238800971280247, "grad_norm": 0.30883315205574036, "learning_rate": 0.00013841464737904534, "loss": 11.6688, "step": 53690 }, { "epoch": 1.123901029891987, "grad_norm": 0.38158825039863586, "learning_rate": 0.00013841262306524366, "loss": 11.6588, "step": 53691 }, { "epoch": 1.1239219626559491, "grad_norm": 0.30264824628829956, "learning_rate": 0.00013841059873297623, "loss": 11.6747, "step": 53692 }, { "epoch": 1.1239428954199113, "grad_norm": 0.3082192540168762, "learning_rate": 0.00013840857438224407, "loss": 11.6722, "step": 53693 }, { "epoch": 1.1239638281838733, "grad_norm": 0.3073667287826538, "learning_rate": 0.00013840655001304814, "loss": 11.6701, "step": 53694 }, { "epoch": 1.1239847609478355, "grad_norm": 0.31160032749176025, "learning_rate": 0.00013840452562538942, "loss": 11.6687, "step": 53695 }, { "epoch": 1.1240056937117977, "grad_norm": 0.35672104358673096, "learning_rate": 0.00013840250121926884, "loss": 11.6685, "step": 53696 }, { "epoch": 1.12402662647576, "grad_norm": 0.3197556138038635, "learning_rate": 0.00013840047679468745, "loss": 11.6594, "step": 53697 }, { "epoch": 1.124047559239722, "grad_norm": 0.2850464880466461, "learning_rate": 0.00013839845235164616, "loss": 11.6737, "step": 53698 }, { "epoch": 1.124068492003684, "grad_norm": 0.33066806197166443, "learning_rate": 0.00013839642789014597, "loss": 11.6725, "step": 53699 }, { "epoch": 1.1240894247676463, "grad_norm": 0.3696809709072113, "learning_rate": 0.00013839440341018787, "loss": 11.6815, "step": 53700 }, { "epoch": 1.1241103575316085, "grad_norm": 0.33322906494140625, "learning_rate": 0.00013839237891177277, "loss": 11.6833, "step": 53701 }, { "epoch": 1.1241312902955707, "grad_norm": 0.3517390489578247, "learning_rate": 0.0001383903543949017, "loss": 11.6716, "step": 53702 }, { "epoch": 1.1241522230595329, "grad_norm": 0.275887668132782, "learning_rate": 0.00013838832985957564, "loss": 11.6585, "step": 53703 }, { "epoch": 1.1241731558234949, "grad_norm": 0.363431841135025, "learning_rate": 0.0001383863053057955, "loss": 11.6469, "step": 53704 }, { "epoch": 1.124194088587457, "grad_norm": 0.3406514823436737, "learning_rate": 0.00013838428073356234, "loss": 11.6641, "step": 53705 }, { "epoch": 1.1242150213514193, "grad_norm": 1.04581618309021, "learning_rate": 0.00013838225614287705, "loss": 11.7082, "step": 53706 }, { "epoch": 1.1242359541153815, "grad_norm": 0.32062411308288574, "learning_rate": 0.00013838023153374063, "loss": 11.6645, "step": 53707 }, { "epoch": 1.1242568868793437, "grad_norm": 0.266716331243515, "learning_rate": 0.0001383782069061541, "loss": 11.6616, "step": 53708 }, { "epoch": 1.1242778196433056, "grad_norm": 0.29114067554473877, "learning_rate": 0.00013837618226011838, "loss": 11.6824, "step": 53709 }, { "epoch": 1.1242987524072678, "grad_norm": 0.4484676420688629, "learning_rate": 0.00013837415759563446, "loss": 11.6879, "step": 53710 }, { "epoch": 1.12431968517123, "grad_norm": 0.33605390787124634, "learning_rate": 0.00013837213291270333, "loss": 11.6775, "step": 53711 }, { "epoch": 1.1243406179351922, "grad_norm": 0.4012940526008606, "learning_rate": 0.0001383701082113259, "loss": 11.6586, "step": 53712 }, { "epoch": 1.1243615506991542, "grad_norm": 0.2717801034450531, "learning_rate": 0.00013836808349150322, "loss": 11.675, "step": 53713 }, { "epoch": 1.1243824834631164, "grad_norm": 0.39731910824775696, "learning_rate": 0.00013836605875323625, "loss": 11.6536, "step": 53714 }, { "epoch": 1.1244034162270786, "grad_norm": 0.3313542902469635, "learning_rate": 0.00013836403399652592, "loss": 11.6608, "step": 53715 }, { "epoch": 1.1244243489910408, "grad_norm": 0.37922751903533936, "learning_rate": 0.00013836200922137326, "loss": 11.6641, "step": 53716 }, { "epoch": 1.124445281755003, "grad_norm": 0.35565099120140076, "learning_rate": 0.00013835998442777918, "loss": 11.6766, "step": 53717 }, { "epoch": 1.124466214518965, "grad_norm": 0.3761347532272339, "learning_rate": 0.00013835795961574473, "loss": 11.6601, "step": 53718 }, { "epoch": 1.1244871472829272, "grad_norm": 0.30514222383499146, "learning_rate": 0.0001383559347852708, "loss": 11.6699, "step": 53719 }, { "epoch": 1.1245080800468894, "grad_norm": 0.317648321390152, "learning_rate": 0.0001383539099363584, "loss": 11.6692, "step": 53720 }, { "epoch": 1.1245290128108516, "grad_norm": 0.3402605652809143, "learning_rate": 0.00013835188506900852, "loss": 11.6816, "step": 53721 }, { "epoch": 1.1245499455748136, "grad_norm": 0.4533306062221527, "learning_rate": 0.00013834986018322214, "loss": 11.6732, "step": 53722 }, { "epoch": 1.1245708783387758, "grad_norm": 0.2559451460838318, "learning_rate": 0.0001383478352790002, "loss": 11.6529, "step": 53723 }, { "epoch": 1.124591811102738, "grad_norm": 0.3137607276439667, "learning_rate": 0.00013834581035634366, "loss": 11.6676, "step": 53724 }, { "epoch": 1.1246127438667002, "grad_norm": 0.2931186556816101, "learning_rate": 0.00013834378541525356, "loss": 11.6621, "step": 53725 }, { "epoch": 1.1246336766306624, "grad_norm": 0.3153534233570099, "learning_rate": 0.00013834176045573083, "loss": 11.6752, "step": 53726 }, { "epoch": 1.1246546093946246, "grad_norm": 0.30535298585891724, "learning_rate": 0.00013833973547777644, "loss": 11.6596, "step": 53727 }, { "epoch": 1.1246755421585866, "grad_norm": 0.34479212760925293, "learning_rate": 0.00013833771048139134, "loss": 11.6616, "step": 53728 }, { "epoch": 1.1246964749225488, "grad_norm": 0.3813090920448303, "learning_rate": 0.00013833568546657655, "loss": 11.6713, "step": 53729 }, { "epoch": 1.124717407686511, "grad_norm": 0.306007981300354, "learning_rate": 0.00013833366043333308, "loss": 11.6666, "step": 53730 }, { "epoch": 1.1247383404504732, "grad_norm": 0.3525991141796112, "learning_rate": 0.00013833163538166178, "loss": 11.6689, "step": 53731 }, { "epoch": 1.1247592732144351, "grad_norm": 0.3523148000240326, "learning_rate": 0.00013832961031156376, "loss": 11.6694, "step": 53732 }, { "epoch": 1.1247802059783973, "grad_norm": 0.323675274848938, "learning_rate": 0.00013832758522303986, "loss": 11.667, "step": 53733 }, { "epoch": 1.1248011387423595, "grad_norm": 0.2679122984409332, "learning_rate": 0.00013832556011609114, "loss": 11.6639, "step": 53734 }, { "epoch": 1.1248220715063217, "grad_norm": 0.31872913241386414, "learning_rate": 0.0001383235349907186, "loss": 11.6394, "step": 53735 }, { "epoch": 1.124843004270284, "grad_norm": 0.3475922644138336, "learning_rate": 0.00013832150984692312, "loss": 11.68, "step": 53736 }, { "epoch": 1.124863937034246, "grad_norm": 0.43831661343574524, "learning_rate": 0.00013831948468470576, "loss": 11.6649, "step": 53737 }, { "epoch": 1.1248848697982081, "grad_norm": 0.41684433817863464, "learning_rate": 0.00013831745950406742, "loss": 11.7013, "step": 53738 }, { "epoch": 1.1249058025621703, "grad_norm": 0.30086302757263184, "learning_rate": 0.0001383154343050091, "loss": 11.6686, "step": 53739 }, { "epoch": 1.1249267353261325, "grad_norm": 0.29953253269195557, "learning_rate": 0.00013831340908753182, "loss": 11.6676, "step": 53740 }, { "epoch": 1.1249476680900945, "grad_norm": 0.3399882912635803, "learning_rate": 0.00013831138385163652, "loss": 11.6593, "step": 53741 }, { "epoch": 1.1249686008540567, "grad_norm": 0.3772774338722229, "learning_rate": 0.00013830935859732418, "loss": 11.6675, "step": 53742 }, { "epoch": 1.124989533618019, "grad_norm": 0.2519489526748657, "learning_rate": 0.0001383073333245957, "loss": 11.6625, "step": 53743 }, { "epoch": 1.125010466381981, "grad_norm": 0.28107503056526184, "learning_rate": 0.00013830530803345215, "loss": 11.6718, "step": 53744 }, { "epoch": 1.1250313991459433, "grad_norm": 0.3350384533405304, "learning_rate": 0.0001383032827238945, "loss": 11.6714, "step": 53745 }, { "epoch": 1.1250523319099055, "grad_norm": 0.35171833634376526, "learning_rate": 0.00013830125739592366, "loss": 11.6651, "step": 53746 }, { "epoch": 1.1250732646738675, "grad_norm": 0.29450440406799316, "learning_rate": 0.00013829923204954065, "loss": 11.6488, "step": 53747 }, { "epoch": 1.1250941974378297, "grad_norm": 0.31601476669311523, "learning_rate": 0.00013829720668474642, "loss": 11.6677, "step": 53748 }, { "epoch": 1.1251151302017919, "grad_norm": 0.39709189534187317, "learning_rate": 0.00013829518130154198, "loss": 11.6799, "step": 53749 }, { "epoch": 1.125136062965754, "grad_norm": 0.33765149116516113, "learning_rate": 0.00013829315589992827, "loss": 11.663, "step": 53750 }, { "epoch": 1.125156995729716, "grad_norm": 0.2779832184314728, "learning_rate": 0.00013829113047990625, "loss": 11.6559, "step": 53751 }, { "epoch": 1.1251779284936783, "grad_norm": 0.31010881066322327, "learning_rate": 0.00013828910504147694, "loss": 11.6568, "step": 53752 }, { "epoch": 1.1251988612576405, "grad_norm": 0.3311915099620819, "learning_rate": 0.0001382870795846413, "loss": 11.6617, "step": 53753 }, { "epoch": 1.1252197940216027, "grad_norm": 0.4326685667037964, "learning_rate": 0.00013828505410940026, "loss": 11.684, "step": 53754 }, { "epoch": 1.1252407267855649, "grad_norm": 0.950551450252533, "learning_rate": 0.00013828302861575484, "loss": 11.6004, "step": 53755 }, { "epoch": 1.1252616595495268, "grad_norm": 0.3128608167171478, "learning_rate": 0.00013828100310370599, "loss": 11.6539, "step": 53756 }, { "epoch": 1.125282592313489, "grad_norm": 0.29142385721206665, "learning_rate": 0.00013827897757325472, "loss": 11.6857, "step": 53757 }, { "epoch": 1.1253035250774512, "grad_norm": 0.3939889073371887, "learning_rate": 0.00013827695202440197, "loss": 11.6746, "step": 53758 }, { "epoch": 1.1253244578414134, "grad_norm": 0.30513685941696167, "learning_rate": 0.00013827492645714873, "loss": 11.6674, "step": 53759 }, { "epoch": 1.1253453906053754, "grad_norm": 0.34296151995658875, "learning_rate": 0.00013827290087149594, "loss": 11.6839, "step": 53760 }, { "epoch": 1.1253663233693376, "grad_norm": 0.36989787220954895, "learning_rate": 0.0001382708752674446, "loss": 11.6572, "step": 53761 }, { "epoch": 1.1253872561332998, "grad_norm": 0.3965684473514557, "learning_rate": 0.0001382688496449957, "loss": 11.6891, "step": 53762 }, { "epoch": 1.125408188897262, "grad_norm": 0.22876346111297607, "learning_rate": 0.0001382668240041502, "loss": 11.6583, "step": 53763 }, { "epoch": 1.1254291216612242, "grad_norm": 0.2755540907382965, "learning_rate": 0.0001382647983449091, "loss": 11.6593, "step": 53764 }, { "epoch": 1.1254500544251864, "grad_norm": 0.32493147253990173, "learning_rate": 0.0001382627726672733, "loss": 11.67, "step": 53765 }, { "epoch": 1.1254709871891484, "grad_norm": 0.29798489809036255, "learning_rate": 0.00013826074697124383, "loss": 11.6659, "step": 53766 }, { "epoch": 1.1254919199531106, "grad_norm": 0.3383982479572296, "learning_rate": 0.00013825872125682165, "loss": 11.6699, "step": 53767 }, { "epoch": 1.1255128527170728, "grad_norm": 0.3216758966445923, "learning_rate": 0.00013825669552400773, "loss": 11.6556, "step": 53768 }, { "epoch": 1.125533785481035, "grad_norm": 0.2604678273200989, "learning_rate": 0.00013825466977280306, "loss": 11.6527, "step": 53769 }, { "epoch": 1.125554718244997, "grad_norm": 0.32084783911705017, "learning_rate": 0.0001382526440032086, "loss": 11.6674, "step": 53770 }, { "epoch": 1.1255756510089592, "grad_norm": 0.3071231544017792, "learning_rate": 0.00013825061821522535, "loss": 11.6687, "step": 53771 }, { "epoch": 1.1255965837729214, "grad_norm": 0.32988810539245605, "learning_rate": 0.00013824859240885423, "loss": 11.6825, "step": 53772 }, { "epoch": 1.1256175165368836, "grad_norm": 0.29856592416763306, "learning_rate": 0.00013824656658409623, "loss": 11.6728, "step": 53773 }, { "epoch": 1.1256384493008458, "grad_norm": 0.41202327609062195, "learning_rate": 0.00013824454074095235, "loss": 11.6742, "step": 53774 }, { "epoch": 1.1256593820648078, "grad_norm": 0.3113384544849396, "learning_rate": 0.00013824251487942358, "loss": 11.6832, "step": 53775 }, { "epoch": 1.12568031482877, "grad_norm": 0.42779865860939026, "learning_rate": 0.00013824048899951086, "loss": 11.6709, "step": 53776 }, { "epoch": 1.1257012475927322, "grad_norm": 0.34528133273124695, "learning_rate": 0.00013823846310121514, "loss": 11.6783, "step": 53777 }, { "epoch": 1.1257221803566944, "grad_norm": 0.3405764698982239, "learning_rate": 0.00013823643718453748, "loss": 11.6559, "step": 53778 }, { "epoch": 1.1257431131206563, "grad_norm": 0.2702476382255554, "learning_rate": 0.00013823441124947874, "loss": 11.645, "step": 53779 }, { "epoch": 1.1257640458846185, "grad_norm": 0.2718884348869324, "learning_rate": 0.00013823238529604, "loss": 11.6603, "step": 53780 }, { "epoch": 1.1257849786485807, "grad_norm": 0.3548433482646942, "learning_rate": 0.00013823035932422216, "loss": 11.6599, "step": 53781 }, { "epoch": 1.125805911412543, "grad_norm": 0.3004958927631378, "learning_rate": 0.00013822833333402624, "loss": 11.6517, "step": 53782 }, { "epoch": 1.1258268441765051, "grad_norm": 0.3158594071865082, "learning_rate": 0.00013822630732545317, "loss": 11.6673, "step": 53783 }, { "epoch": 1.1258477769404673, "grad_norm": 0.3521173894405365, "learning_rate": 0.00013822428129850396, "loss": 11.6519, "step": 53784 }, { "epoch": 1.1258687097044293, "grad_norm": 0.3357079327106476, "learning_rate": 0.00013822225525317956, "loss": 11.6704, "step": 53785 }, { "epoch": 1.1258896424683915, "grad_norm": 0.37227410078048706, "learning_rate": 0.00013822022918948097, "loss": 11.6809, "step": 53786 }, { "epoch": 1.1259105752323537, "grad_norm": 0.36427226662635803, "learning_rate": 0.00013821820310740913, "loss": 11.6758, "step": 53787 }, { "epoch": 1.125931507996316, "grad_norm": 0.3003414571285248, "learning_rate": 0.00013821617700696508, "loss": 11.6756, "step": 53788 }, { "epoch": 1.125952440760278, "grad_norm": 0.385952889919281, "learning_rate": 0.00013821415088814971, "loss": 11.6879, "step": 53789 }, { "epoch": 1.12597337352424, "grad_norm": 0.2689456641674042, "learning_rate": 0.000138212124750964, "loss": 11.6705, "step": 53790 }, { "epoch": 1.1259943062882023, "grad_norm": 0.30903008580207825, "learning_rate": 0.00013821009859540904, "loss": 11.6543, "step": 53791 }, { "epoch": 1.1260152390521645, "grad_norm": 0.3284243047237396, "learning_rate": 0.00013820807242148565, "loss": 11.6634, "step": 53792 }, { "epoch": 1.1260361718161267, "grad_norm": 0.2917432487010956, "learning_rate": 0.00013820604622919494, "loss": 11.6692, "step": 53793 }, { "epoch": 1.1260571045800887, "grad_norm": 0.3632246255874634, "learning_rate": 0.00013820402001853775, "loss": 11.6652, "step": 53794 }, { "epoch": 1.1260780373440509, "grad_norm": 0.3368183672428131, "learning_rate": 0.00013820199378951513, "loss": 11.6652, "step": 53795 }, { "epoch": 1.126098970108013, "grad_norm": 0.32674166560173035, "learning_rate": 0.00013819996754212808, "loss": 11.6711, "step": 53796 }, { "epoch": 1.1261199028719753, "grad_norm": 0.4087684750556946, "learning_rate": 0.0001381979412763775, "loss": 11.6484, "step": 53797 }, { "epoch": 1.1261408356359373, "grad_norm": 0.3007805049419403, "learning_rate": 0.00013819591499226447, "loss": 11.6896, "step": 53798 }, { "epoch": 1.1261617683998995, "grad_norm": 0.32994577288627625, "learning_rate": 0.00013819388868978983, "loss": 11.6674, "step": 53799 }, { "epoch": 1.1261827011638617, "grad_norm": 0.295380175113678, "learning_rate": 0.00013819186236895464, "loss": 11.684, "step": 53800 }, { "epoch": 1.1262036339278239, "grad_norm": 0.3120083510875702, "learning_rate": 0.00013818983602975987, "loss": 11.6629, "step": 53801 }, { "epoch": 1.126224566691786, "grad_norm": 0.2575138211250305, "learning_rate": 0.0001381878096722065, "loss": 11.6424, "step": 53802 }, { "epoch": 1.1262454994557483, "grad_norm": 0.3309801518917084, "learning_rate": 0.00013818578329629545, "loss": 11.6676, "step": 53803 }, { "epoch": 1.1262664322197102, "grad_norm": 0.5014107823371887, "learning_rate": 0.00013818375690202774, "loss": 11.6836, "step": 53804 }, { "epoch": 1.1262873649836724, "grad_norm": 0.3363341689109802, "learning_rate": 0.00013818173048940431, "loss": 11.663, "step": 53805 }, { "epoch": 1.1263082977476346, "grad_norm": 0.2563451826572418, "learning_rate": 0.0001381797040584262, "loss": 11.6586, "step": 53806 }, { "epoch": 1.1263292305115968, "grad_norm": 0.29655951261520386, "learning_rate": 0.0001381776776090943, "loss": 11.6621, "step": 53807 }, { "epoch": 1.1263501632755588, "grad_norm": 0.3549223840236664, "learning_rate": 0.00013817565114140966, "loss": 11.6651, "step": 53808 }, { "epoch": 1.126371096039521, "grad_norm": 0.31114712357521057, "learning_rate": 0.0001381736246553732, "loss": 11.6773, "step": 53809 }, { "epoch": 1.1263920288034832, "grad_norm": 0.39737141132354736, "learning_rate": 0.00013817159815098594, "loss": 11.673, "step": 53810 }, { "epoch": 1.1264129615674454, "grad_norm": 0.35998573899269104, "learning_rate": 0.0001381695716282488, "loss": 11.6701, "step": 53811 }, { "epoch": 1.1264338943314076, "grad_norm": 0.35073667764663696, "learning_rate": 0.00013816754508716276, "loss": 11.6619, "step": 53812 }, { "epoch": 1.1264548270953696, "grad_norm": 0.462887167930603, "learning_rate": 0.00013816551852772885, "loss": 11.6798, "step": 53813 }, { "epoch": 1.1264757598593318, "grad_norm": 0.37286365032196045, "learning_rate": 0.00013816349194994802, "loss": 11.6814, "step": 53814 }, { "epoch": 1.126496692623294, "grad_norm": 0.3522842526435852, "learning_rate": 0.0001381614653538212, "loss": 11.6585, "step": 53815 }, { "epoch": 1.1265176253872562, "grad_norm": 0.3378894031047821, "learning_rate": 0.00013815943873934943, "loss": 11.6641, "step": 53816 }, { "epoch": 1.1265385581512182, "grad_norm": 0.35486483573913574, "learning_rate": 0.00013815741210653363, "loss": 11.6742, "step": 53817 }, { "epoch": 1.1265594909151804, "grad_norm": 0.3332309126853943, "learning_rate": 0.00013815538545537483, "loss": 11.674, "step": 53818 }, { "epoch": 1.1265804236791426, "grad_norm": 0.32890093326568604, "learning_rate": 0.00013815335878587394, "loss": 11.6646, "step": 53819 }, { "epoch": 1.1266013564431048, "grad_norm": 0.33442673087120056, "learning_rate": 0.00013815133209803198, "loss": 11.6675, "step": 53820 }, { "epoch": 1.126622289207067, "grad_norm": 0.2847937047481537, "learning_rate": 0.0001381493053918499, "loss": 11.6514, "step": 53821 }, { "epoch": 1.1266432219710292, "grad_norm": 0.3394688367843628, "learning_rate": 0.0001381472786673287, "loss": 11.674, "step": 53822 }, { "epoch": 1.1266641547349912, "grad_norm": 0.2930799126625061, "learning_rate": 0.00013814525192446932, "loss": 11.6778, "step": 53823 }, { "epoch": 1.1266850874989534, "grad_norm": 0.3116927146911621, "learning_rate": 0.00013814322516327276, "loss": 11.666, "step": 53824 }, { "epoch": 1.1267060202629156, "grad_norm": 0.2934437394142151, "learning_rate": 0.00013814119838374, "loss": 11.6718, "step": 53825 }, { "epoch": 1.1267269530268778, "grad_norm": 0.31042465567588806, "learning_rate": 0.00013813917158587196, "loss": 11.6681, "step": 53826 }, { "epoch": 1.1267478857908397, "grad_norm": 0.3461572229862213, "learning_rate": 0.0001381371447696697, "loss": 11.6541, "step": 53827 }, { "epoch": 1.126768818554802, "grad_norm": 0.3314625918865204, "learning_rate": 0.00013813511793513413, "loss": 11.6764, "step": 53828 }, { "epoch": 1.1267897513187641, "grad_norm": 0.34931424260139465, "learning_rate": 0.00013813309108226627, "loss": 11.6636, "step": 53829 }, { "epoch": 1.1268106840827263, "grad_norm": 0.2871773838996887, "learning_rate": 0.00013813106421106704, "loss": 11.6696, "step": 53830 }, { "epoch": 1.1268316168466885, "grad_norm": 0.2693120241165161, "learning_rate": 0.00013812903732153745, "loss": 11.6789, "step": 53831 }, { "epoch": 1.1268525496106505, "grad_norm": 0.4090609550476074, "learning_rate": 0.00013812701041367848, "loss": 11.6767, "step": 53832 }, { "epoch": 1.1268734823746127, "grad_norm": 0.3098733425140381, "learning_rate": 0.0001381249834874911, "loss": 11.6688, "step": 53833 }, { "epoch": 1.126894415138575, "grad_norm": 0.3251407742500305, "learning_rate": 0.00013812295654297625, "loss": 11.6665, "step": 53834 }, { "epoch": 1.1269153479025371, "grad_norm": 0.2933349609375, "learning_rate": 0.00013812092958013494, "loss": 11.6551, "step": 53835 }, { "epoch": 1.126936280666499, "grad_norm": 0.29925858974456787, "learning_rate": 0.00013811890259896815, "loss": 11.6594, "step": 53836 }, { "epoch": 1.1269572134304613, "grad_norm": 0.3793950080871582, "learning_rate": 0.00013811687559947683, "loss": 11.6705, "step": 53837 }, { "epoch": 1.1269781461944235, "grad_norm": 0.2855895757675171, "learning_rate": 0.00013811484858166194, "loss": 11.6784, "step": 53838 }, { "epoch": 1.1269990789583857, "grad_norm": 0.4572063684463501, "learning_rate": 0.00013811282154552448, "loss": 11.6724, "step": 53839 }, { "epoch": 1.127020011722348, "grad_norm": 0.3496881127357483, "learning_rate": 0.00013811079449106546, "loss": 11.6659, "step": 53840 }, { "epoch": 1.12704094448631, "grad_norm": 0.30309632420539856, "learning_rate": 0.0001381087674182858, "loss": 11.6636, "step": 53841 }, { "epoch": 1.127061877250272, "grad_norm": 0.3008211851119995, "learning_rate": 0.0001381067403271865, "loss": 11.6647, "step": 53842 }, { "epoch": 1.1270828100142343, "grad_norm": 0.3028705418109894, "learning_rate": 0.0001381047132177685, "loss": 11.6815, "step": 53843 }, { "epoch": 1.1271037427781965, "grad_norm": 0.29913368821144104, "learning_rate": 0.0001381026860900328, "loss": 11.6622, "step": 53844 }, { "epoch": 1.1271246755421587, "grad_norm": 0.34614723920822144, "learning_rate": 0.0001381006589439804, "loss": 11.6658, "step": 53845 }, { "epoch": 1.1271456083061206, "grad_norm": 0.292721152305603, "learning_rate": 0.00013809863177961223, "loss": 11.6556, "step": 53846 }, { "epoch": 1.1271665410700829, "grad_norm": 0.3074434697628021, "learning_rate": 0.0001380966045969293, "loss": 11.664, "step": 53847 }, { "epoch": 1.127187473834045, "grad_norm": 0.38466304540634155, "learning_rate": 0.00013809457739593256, "loss": 11.6568, "step": 53848 }, { "epoch": 1.1272084065980073, "grad_norm": 0.4536283016204834, "learning_rate": 0.00013809255017662302, "loss": 11.6671, "step": 53849 }, { "epoch": 1.1272293393619695, "grad_norm": 0.42695754766464233, "learning_rate": 0.0001380905229390016, "loss": 11.6662, "step": 53850 }, { "epoch": 1.1272502721259314, "grad_norm": 0.3083970248699188, "learning_rate": 0.0001380884956830693, "loss": 11.6657, "step": 53851 }, { "epoch": 1.1272712048898936, "grad_norm": 0.3537205159664154, "learning_rate": 0.00013808646840882713, "loss": 11.6697, "step": 53852 }, { "epoch": 1.1272921376538558, "grad_norm": 0.330340713262558, "learning_rate": 0.00013808444111627598, "loss": 11.6568, "step": 53853 }, { "epoch": 1.127313070417818, "grad_norm": 0.30675938725471497, "learning_rate": 0.00013808241380541692, "loss": 11.6578, "step": 53854 }, { "epoch": 1.12733400318178, "grad_norm": 0.3343648910522461, "learning_rate": 0.00013808038647625086, "loss": 11.6888, "step": 53855 }, { "epoch": 1.1273549359457422, "grad_norm": 0.31262561678886414, "learning_rate": 0.00013807835912877877, "loss": 11.6615, "step": 53856 }, { "epoch": 1.1273758687097044, "grad_norm": 0.31239086389541626, "learning_rate": 0.0001380763317630017, "loss": 11.645, "step": 53857 }, { "epoch": 1.1273968014736666, "grad_norm": 0.307264506816864, "learning_rate": 0.00013807430437892055, "loss": 11.6689, "step": 53858 }, { "epoch": 1.1274177342376288, "grad_norm": 0.32116836309432983, "learning_rate": 0.00013807227697653634, "loss": 11.6638, "step": 53859 }, { "epoch": 1.127438667001591, "grad_norm": 0.3098521828651428, "learning_rate": 0.00013807024955585, "loss": 11.6816, "step": 53860 }, { "epoch": 1.127459599765553, "grad_norm": 0.27614879608154297, "learning_rate": 0.00013806822211686253, "loss": 11.6495, "step": 53861 }, { "epoch": 1.1274805325295152, "grad_norm": 0.3485625982284546, "learning_rate": 0.0001380661946595749, "loss": 11.6678, "step": 53862 }, { "epoch": 1.1275014652934774, "grad_norm": 0.33559614419937134, "learning_rate": 0.0001380641671839881, "loss": 11.6669, "step": 53863 }, { "epoch": 1.1275223980574396, "grad_norm": 0.25656577944755554, "learning_rate": 0.0001380621396901031, "loss": 11.6633, "step": 53864 }, { "epoch": 1.1275433308214016, "grad_norm": 0.30578312277793884, "learning_rate": 0.0001380601121779208, "loss": 11.677, "step": 53865 }, { "epoch": 1.1275642635853638, "grad_norm": 0.27216947078704834, "learning_rate": 0.00013805808464744233, "loss": 11.652, "step": 53866 }, { "epoch": 1.127585196349326, "grad_norm": 0.32773804664611816, "learning_rate": 0.00013805605709866852, "loss": 11.6624, "step": 53867 }, { "epoch": 1.1276061291132882, "grad_norm": 0.2686663866043091, "learning_rate": 0.00013805402953160045, "loss": 11.6474, "step": 53868 }, { "epoch": 1.1276270618772504, "grad_norm": 0.34769558906555176, "learning_rate": 0.00013805200194623899, "loss": 11.6704, "step": 53869 }, { "epoch": 1.1276479946412123, "grad_norm": 0.2854265570640564, "learning_rate": 0.0001380499743425852, "loss": 11.6655, "step": 53870 }, { "epoch": 1.1276689274051745, "grad_norm": 0.3056991696357727, "learning_rate": 0.00013804794672064, "loss": 11.664, "step": 53871 }, { "epoch": 1.1276898601691367, "grad_norm": 0.3361715078353882, "learning_rate": 0.00013804591908040442, "loss": 11.6943, "step": 53872 }, { "epoch": 1.127710792933099, "grad_norm": 0.3306812644004822, "learning_rate": 0.0001380438914218794, "loss": 11.672, "step": 53873 }, { "epoch": 1.127731725697061, "grad_norm": 0.3044206202030182, "learning_rate": 0.0001380418637450659, "loss": 11.6892, "step": 53874 }, { "epoch": 1.1277526584610231, "grad_norm": 0.2915840744972229, "learning_rate": 0.00013803983604996492, "loss": 11.6636, "step": 53875 }, { "epoch": 1.1277735912249853, "grad_norm": 0.2385156750679016, "learning_rate": 0.00013803780833657745, "loss": 11.672, "step": 53876 }, { "epoch": 1.1277945239889475, "grad_norm": 0.2981753349304199, "learning_rate": 0.00013803578060490442, "loss": 11.6724, "step": 53877 }, { "epoch": 1.1278154567529097, "grad_norm": 0.40320342779159546, "learning_rate": 0.00013803375285494684, "loss": 11.6616, "step": 53878 }, { "epoch": 1.127836389516872, "grad_norm": 0.274906188249588, "learning_rate": 0.00013803172508670568, "loss": 11.6672, "step": 53879 }, { "epoch": 1.127857322280834, "grad_norm": 0.3360411822795868, "learning_rate": 0.00013802969730018189, "loss": 11.6626, "step": 53880 }, { "epoch": 1.127878255044796, "grad_norm": 0.30454447865486145, "learning_rate": 0.00013802766949537647, "loss": 11.6738, "step": 53881 }, { "epoch": 1.1278991878087583, "grad_norm": 0.3179413378238678, "learning_rate": 0.0001380256416722904, "loss": 11.6603, "step": 53882 }, { "epoch": 1.1279201205727205, "grad_norm": 0.383603572845459, "learning_rate": 0.00013802361383092462, "loss": 11.6473, "step": 53883 }, { "epoch": 1.1279410533366825, "grad_norm": 0.417035847902298, "learning_rate": 0.00013802158597128012, "loss": 11.6571, "step": 53884 }, { "epoch": 1.1279619861006447, "grad_norm": 0.2869521379470825, "learning_rate": 0.00013801955809335793, "loss": 11.6777, "step": 53885 }, { "epoch": 1.1279829188646069, "grad_norm": 0.3403966724872589, "learning_rate": 0.0001380175301971589, "loss": 11.6776, "step": 53886 }, { "epoch": 1.128003851628569, "grad_norm": 0.26670658588409424, "learning_rate": 0.00013801550228268413, "loss": 11.6574, "step": 53887 }, { "epoch": 1.1280247843925313, "grad_norm": 0.2975500226020813, "learning_rate": 0.00013801347434993456, "loss": 11.6772, "step": 53888 }, { "epoch": 1.1280457171564933, "grad_norm": 0.37174472212791443, "learning_rate": 0.00013801144639891114, "loss": 11.6826, "step": 53889 }, { "epoch": 1.1280666499204555, "grad_norm": 0.3606935143470764, "learning_rate": 0.00013800941842961482, "loss": 11.6849, "step": 53890 }, { "epoch": 1.1280875826844177, "grad_norm": 0.3177192807197571, "learning_rate": 0.00013800739044204664, "loss": 11.663, "step": 53891 }, { "epoch": 1.1281085154483799, "grad_norm": 0.28466176986694336, "learning_rate": 0.00013800536243620756, "loss": 11.6702, "step": 53892 }, { "epoch": 1.1281294482123418, "grad_norm": 0.328693687915802, "learning_rate": 0.00013800333441209853, "loss": 11.6825, "step": 53893 }, { "epoch": 1.128150380976304, "grad_norm": 0.32038819789886475, "learning_rate": 0.00013800130636972054, "loss": 11.6589, "step": 53894 }, { "epoch": 1.1281713137402662, "grad_norm": 0.29620984196662903, "learning_rate": 0.00013799927830907452, "loss": 11.666, "step": 53895 }, { "epoch": 1.1281922465042284, "grad_norm": 0.4476529359817505, "learning_rate": 0.00013799725023016152, "loss": 11.6704, "step": 53896 }, { "epoch": 1.1282131792681906, "grad_norm": 0.3000246286392212, "learning_rate": 0.0001379952221329825, "loss": 11.6649, "step": 53897 }, { "epoch": 1.1282341120321528, "grad_norm": 0.3610839545726776, "learning_rate": 0.00013799319401753835, "loss": 11.6597, "step": 53898 }, { "epoch": 1.1282550447961148, "grad_norm": 0.3855489194393158, "learning_rate": 0.00013799116588383018, "loss": 11.668, "step": 53899 }, { "epoch": 1.128275977560077, "grad_norm": 0.33634793758392334, "learning_rate": 0.00013798913773185883, "loss": 11.6772, "step": 53900 }, { "epoch": 1.1282969103240392, "grad_norm": 0.39507800340652466, "learning_rate": 0.0001379871095616254, "loss": 11.6635, "step": 53901 }, { "epoch": 1.1283178430880014, "grad_norm": 0.3329138457775116, "learning_rate": 0.00013798508137313078, "loss": 11.6633, "step": 53902 }, { "epoch": 1.1283387758519634, "grad_norm": 0.2852499783039093, "learning_rate": 0.00013798305316637598, "loss": 11.6767, "step": 53903 }, { "epoch": 1.1283597086159256, "grad_norm": 0.2835591435432434, "learning_rate": 0.00013798102494136196, "loss": 11.6714, "step": 53904 }, { "epoch": 1.1283806413798878, "grad_norm": 0.2828633785247803, "learning_rate": 0.0001379789966980897, "loss": 11.66, "step": 53905 }, { "epoch": 1.12840157414385, "grad_norm": 0.4575608968734741, "learning_rate": 0.00013797696843656016, "loss": 11.6899, "step": 53906 }, { "epoch": 1.1284225069078122, "grad_norm": 0.3312711715698242, "learning_rate": 0.00013797494015677434, "loss": 11.6749, "step": 53907 }, { "epoch": 1.1284434396717742, "grad_norm": 0.3295227289199829, "learning_rate": 0.00013797291185873323, "loss": 11.671, "step": 53908 }, { "epoch": 1.1284643724357364, "grad_norm": 0.3525940477848053, "learning_rate": 0.00013797088354243774, "loss": 11.6738, "step": 53909 }, { "epoch": 1.1284853051996986, "grad_norm": 0.32685261964797974, "learning_rate": 0.00013796885520788889, "loss": 11.6645, "step": 53910 }, { "epoch": 1.1285062379636608, "grad_norm": 0.2682519853115082, "learning_rate": 0.00013796682685508768, "loss": 11.6769, "step": 53911 }, { "epoch": 1.1285271707276228, "grad_norm": 0.3572924733161926, "learning_rate": 0.00013796479848403502, "loss": 11.6706, "step": 53912 }, { "epoch": 1.128548103491585, "grad_norm": 0.36086925864219666, "learning_rate": 0.00013796277009473198, "loss": 11.681, "step": 53913 }, { "epoch": 1.1285690362555472, "grad_norm": 0.41635239124298096, "learning_rate": 0.0001379607416871794, "loss": 11.6716, "step": 53914 }, { "epoch": 1.1285899690195094, "grad_norm": 0.2801326811313629, "learning_rate": 0.00013795871326137838, "loss": 11.6642, "step": 53915 }, { "epoch": 1.1286109017834716, "grad_norm": 1.735469102859497, "learning_rate": 0.00013795668481732983, "loss": 11.6374, "step": 53916 }, { "epoch": 1.1286318345474338, "grad_norm": 0.28968098759651184, "learning_rate": 0.00013795465635503472, "loss": 11.6622, "step": 53917 }, { "epoch": 1.1286527673113957, "grad_norm": 0.3271254301071167, "learning_rate": 0.0001379526278744941, "loss": 11.6525, "step": 53918 }, { "epoch": 1.128673700075358, "grad_norm": 0.35335832834243774, "learning_rate": 0.00013795059937570886, "loss": 11.6678, "step": 53919 }, { "epoch": 1.1286946328393201, "grad_norm": 0.37478387355804443, "learning_rate": 0.00013794857085868003, "loss": 11.6744, "step": 53920 }, { "epoch": 1.1287155656032823, "grad_norm": 0.30770379304885864, "learning_rate": 0.00013794654232340852, "loss": 11.6798, "step": 53921 }, { "epoch": 1.1287364983672443, "grad_norm": 0.25078126788139343, "learning_rate": 0.00013794451376989534, "loss": 11.647, "step": 53922 }, { "epoch": 1.1287574311312065, "grad_norm": 0.2990522086620331, "learning_rate": 0.0001379424851981415, "loss": 11.6645, "step": 53923 }, { "epoch": 1.1287783638951687, "grad_norm": 0.3152318298816681, "learning_rate": 0.00013794045660814796, "loss": 11.672, "step": 53924 }, { "epoch": 1.128799296659131, "grad_norm": 0.3112824559211731, "learning_rate": 0.00013793842799991567, "loss": 11.6662, "step": 53925 }, { "epoch": 1.1288202294230931, "grad_norm": 0.30536016821861267, "learning_rate": 0.0001379363993734456, "loss": 11.6674, "step": 53926 }, { "epoch": 1.128841162187055, "grad_norm": 0.3438185751438141, "learning_rate": 0.00013793437072873876, "loss": 11.6761, "step": 53927 }, { "epoch": 1.1288620949510173, "grad_norm": 0.3569568991661072, "learning_rate": 0.0001379323420657961, "loss": 11.688, "step": 53928 }, { "epoch": 1.1288830277149795, "grad_norm": 0.36851540207862854, "learning_rate": 0.0001379303133846186, "loss": 11.6682, "step": 53929 }, { "epoch": 1.1289039604789417, "grad_norm": 0.2789979577064514, "learning_rate": 0.00013792828468520726, "loss": 11.6691, "step": 53930 }, { "epoch": 1.1289248932429037, "grad_norm": 0.5375877618789673, "learning_rate": 0.000137926255967563, "loss": 11.6868, "step": 53931 }, { "epoch": 1.1289458260068659, "grad_norm": 0.36572134494781494, "learning_rate": 0.00013792422723168685, "loss": 11.6626, "step": 53932 }, { "epoch": 1.128966758770828, "grad_norm": 0.30530601739883423, "learning_rate": 0.00013792219847757976, "loss": 11.6657, "step": 53933 }, { "epoch": 1.1289876915347903, "grad_norm": 0.35130369663238525, "learning_rate": 0.0001379201697052427, "loss": 11.6592, "step": 53934 }, { "epoch": 1.1290086242987525, "grad_norm": 0.29210710525512695, "learning_rate": 0.00013791814091467669, "loss": 11.6591, "step": 53935 }, { "epoch": 1.1290295570627147, "grad_norm": 0.3974757194519043, "learning_rate": 0.00013791611210588265, "loss": 11.6787, "step": 53936 }, { "epoch": 1.1290504898266767, "grad_norm": 0.3156868815422058, "learning_rate": 0.00013791408327886156, "loss": 11.6528, "step": 53937 }, { "epoch": 1.1290714225906389, "grad_norm": 0.3069523870944977, "learning_rate": 0.00013791205443361443, "loss": 11.6713, "step": 53938 }, { "epoch": 1.129092355354601, "grad_norm": 0.3199699819087982, "learning_rate": 0.00013791002557014218, "loss": 11.6641, "step": 53939 }, { "epoch": 1.1291132881185633, "grad_norm": 0.7530558705329895, "learning_rate": 0.00013790799668844588, "loss": 11.6934, "step": 53940 }, { "epoch": 1.1291342208825252, "grad_norm": 0.34088531136512756, "learning_rate": 0.0001379059677885264, "loss": 11.6738, "step": 53941 }, { "epoch": 1.1291551536464874, "grad_norm": 0.3061467707157135, "learning_rate": 0.00013790393887038477, "loss": 11.674, "step": 53942 }, { "epoch": 1.1291760864104496, "grad_norm": 0.2579041123390198, "learning_rate": 0.00013790190993402196, "loss": 11.6758, "step": 53943 }, { "epoch": 1.1291970191744118, "grad_norm": 0.38872721791267395, "learning_rate": 0.00013789988097943892, "loss": 11.6711, "step": 53944 }, { "epoch": 1.129217951938374, "grad_norm": 0.29174816608428955, "learning_rate": 0.0001378978520066367, "loss": 11.6764, "step": 53945 }, { "epoch": 1.129238884702336, "grad_norm": 0.25944018363952637, "learning_rate": 0.00013789582301561618, "loss": 11.6584, "step": 53946 }, { "epoch": 1.1292598174662982, "grad_norm": 0.3248864710330963, "learning_rate": 0.0001378937940063784, "loss": 11.6672, "step": 53947 }, { "epoch": 1.1292807502302604, "grad_norm": 0.2689329981803894, "learning_rate": 0.00013789176497892432, "loss": 11.6764, "step": 53948 }, { "epoch": 1.1293016829942226, "grad_norm": 0.314255952835083, "learning_rate": 0.00013788973593325488, "loss": 11.681, "step": 53949 }, { "epoch": 1.1293226157581846, "grad_norm": 0.3571151793003082, "learning_rate": 0.00013788770686937112, "loss": 11.6846, "step": 53950 }, { "epoch": 1.1293435485221468, "grad_norm": 0.3198911249637604, "learning_rate": 0.00013788567778727395, "loss": 11.6552, "step": 53951 }, { "epoch": 1.129364481286109, "grad_norm": 0.2556760907173157, "learning_rate": 0.0001378836486869644, "loss": 11.6765, "step": 53952 }, { "epoch": 1.1293854140500712, "grad_norm": 0.3084714114665985, "learning_rate": 0.0001378816195684434, "loss": 11.6711, "step": 53953 }, { "epoch": 1.1294063468140334, "grad_norm": 0.2705395817756653, "learning_rate": 0.00013787959043171192, "loss": 11.6587, "step": 53954 }, { "epoch": 1.1294272795779956, "grad_norm": 0.30467545986175537, "learning_rate": 0.00013787756127677103, "loss": 11.6585, "step": 53955 }, { "epoch": 1.1294482123419576, "grad_norm": 0.31614455580711365, "learning_rate": 0.00013787553210362158, "loss": 11.6793, "step": 53956 }, { "epoch": 1.1294691451059198, "grad_norm": 0.28304171562194824, "learning_rate": 0.00013787350291226465, "loss": 11.657, "step": 53957 }, { "epoch": 1.129490077869882, "grad_norm": 0.38322028517723083, "learning_rate": 0.00013787147370270114, "loss": 11.6758, "step": 53958 }, { "epoch": 1.1295110106338442, "grad_norm": 0.2579883635044098, "learning_rate": 0.00013786944447493203, "loss": 11.6518, "step": 53959 }, { "epoch": 1.1295319433978062, "grad_norm": 0.30657172203063965, "learning_rate": 0.00013786741522895835, "loss": 11.6737, "step": 53960 }, { "epoch": 1.1295528761617684, "grad_norm": 0.2936704456806183, "learning_rate": 0.00013786538596478106, "loss": 11.661, "step": 53961 }, { "epoch": 1.1295738089257306, "grad_norm": 0.33568230271339417, "learning_rate": 0.00013786335668240112, "loss": 11.665, "step": 53962 }, { "epoch": 1.1295947416896928, "grad_norm": 0.33160921931266785, "learning_rate": 0.00013786132738181946, "loss": 11.67, "step": 53963 }, { "epoch": 1.129615674453655, "grad_norm": 0.33059853315353394, "learning_rate": 0.0001378592980630371, "loss": 11.6535, "step": 53964 }, { "epoch": 1.129636607217617, "grad_norm": 0.3117356300354004, "learning_rate": 0.00013785726872605507, "loss": 11.6897, "step": 53965 }, { "epoch": 1.1296575399815791, "grad_norm": 0.32487475872039795, "learning_rate": 0.00013785523937087426, "loss": 11.6463, "step": 53966 }, { "epoch": 1.1296784727455413, "grad_norm": 0.2522028982639313, "learning_rate": 0.0001378532099974957, "loss": 11.6619, "step": 53967 }, { "epoch": 1.1296994055095035, "grad_norm": 0.41757526993751526, "learning_rate": 0.0001378511806059203, "loss": 11.6712, "step": 53968 }, { "epoch": 1.1297203382734655, "grad_norm": 0.33401989936828613, "learning_rate": 0.00013784915119614913, "loss": 11.6673, "step": 53969 }, { "epoch": 1.1297412710374277, "grad_norm": 0.24882249534130096, "learning_rate": 0.0001378471217681831, "loss": 11.6495, "step": 53970 }, { "epoch": 1.12976220380139, "grad_norm": 0.46317577362060547, "learning_rate": 0.00013784509232202317, "loss": 11.6883, "step": 53971 }, { "epoch": 1.1297831365653521, "grad_norm": 0.31625837087631226, "learning_rate": 0.0001378430628576704, "loss": 11.6715, "step": 53972 }, { "epoch": 1.1298040693293143, "grad_norm": 0.2951779365539551, "learning_rate": 0.00013784103337512567, "loss": 11.6654, "step": 53973 }, { "epoch": 1.1298250020932765, "grad_norm": 0.33097830414772034, "learning_rate": 0.00013783900387439, "loss": 11.6607, "step": 53974 }, { "epoch": 1.1298459348572385, "grad_norm": 0.321498304605484, "learning_rate": 0.00013783697435546435, "loss": 11.668, "step": 53975 }, { "epoch": 1.1298668676212007, "grad_norm": 0.26119306683540344, "learning_rate": 0.0001378349448183497, "loss": 11.6546, "step": 53976 }, { "epoch": 1.129887800385163, "grad_norm": 0.3478969633579254, "learning_rate": 0.00013783291526304708, "loss": 11.6695, "step": 53977 }, { "epoch": 1.129908733149125, "grad_norm": 0.3697284758090973, "learning_rate": 0.0001378308856895574, "loss": 11.6658, "step": 53978 }, { "epoch": 1.129929665913087, "grad_norm": 0.24813427031040192, "learning_rate": 0.00013782885609788168, "loss": 11.6419, "step": 53979 }, { "epoch": 1.1299505986770493, "grad_norm": 0.36601296067237854, "learning_rate": 0.00013782682648802084, "loss": 11.6561, "step": 53980 }, { "epoch": 1.1299715314410115, "grad_norm": 0.3706125020980835, "learning_rate": 0.00013782479685997586, "loss": 11.655, "step": 53981 }, { "epoch": 1.1299924642049737, "grad_norm": 0.3079054057598114, "learning_rate": 0.0001378227672137478, "loss": 11.6507, "step": 53982 }, { "epoch": 1.1300133969689359, "grad_norm": 0.3024742007255554, "learning_rate": 0.00013782073754933753, "loss": 11.6845, "step": 53983 }, { "epoch": 1.1300343297328979, "grad_norm": 0.4078061878681183, "learning_rate": 0.00013781870786674612, "loss": 11.681, "step": 53984 }, { "epoch": 1.13005526249686, "grad_norm": 1.966847538948059, "learning_rate": 0.00013781667816597446, "loss": 11.6339, "step": 53985 }, { "epoch": 1.1300761952608223, "grad_norm": 0.2783757150173187, "learning_rate": 0.00013781464844702355, "loss": 11.6689, "step": 53986 }, { "epoch": 1.1300971280247845, "grad_norm": 0.3204309344291687, "learning_rate": 0.00013781261870989445, "loss": 11.6657, "step": 53987 }, { "epoch": 1.1301180607887464, "grad_norm": 0.363432377576828, "learning_rate": 0.00013781058895458802, "loss": 11.6688, "step": 53988 }, { "epoch": 1.1301389935527086, "grad_norm": 0.4123704135417938, "learning_rate": 0.0001378085591811053, "loss": 11.6818, "step": 53989 }, { "epoch": 1.1301599263166708, "grad_norm": 0.322346568107605, "learning_rate": 0.00013780652938944724, "loss": 11.6685, "step": 53990 }, { "epoch": 1.130180859080633, "grad_norm": 0.289264976978302, "learning_rate": 0.00013780449957961482, "loss": 11.6657, "step": 53991 }, { "epoch": 1.1302017918445952, "grad_norm": 0.2777012288570404, "learning_rate": 0.000137802469751609, "loss": 11.6849, "step": 53992 }, { "epoch": 1.1302227246085574, "grad_norm": 0.33190441131591797, "learning_rate": 0.00013780043990543078, "loss": 11.6721, "step": 53993 }, { "epoch": 1.1302436573725194, "grad_norm": 0.34876808524131775, "learning_rate": 0.00013779841004108115, "loss": 11.6697, "step": 53994 }, { "epoch": 1.1302645901364816, "grad_norm": 0.2776780426502228, "learning_rate": 0.00013779638015856108, "loss": 11.671, "step": 53995 }, { "epoch": 1.1302855229004438, "grad_norm": 0.36741575598716736, "learning_rate": 0.0001377943502578715, "loss": 11.6677, "step": 53996 }, { "epoch": 1.130306455664406, "grad_norm": 0.3167799115180969, "learning_rate": 0.00013779232033901342, "loss": 11.6524, "step": 53997 }, { "epoch": 1.130327388428368, "grad_norm": 0.406156450510025, "learning_rate": 0.00013779029040198782, "loss": 11.6502, "step": 53998 }, { "epoch": 1.1303483211923302, "grad_norm": 0.3140547275543213, "learning_rate": 0.0001377882604467957, "loss": 11.6784, "step": 53999 }, { "epoch": 1.1303692539562924, "grad_norm": 0.28335249423980713, "learning_rate": 0.00013778623047343798, "loss": 11.6766, "step": 54000 }, { "epoch": 1.1303692539562924, "eval_loss": 11.66841983795166, "eval_runtime": 34.2707, "eval_samples_per_second": 28.041, "eval_steps_per_second": 7.032, "step": 54000 }, { "epoch": 1.1303901867202546, "grad_norm": 0.35113757848739624, "learning_rate": 0.0001377842004819157, "loss": 11.6505, "step": 54001 }, { "epoch": 1.1304111194842168, "grad_norm": 0.3694671094417572, "learning_rate": 0.00013778217047222973, "loss": 11.6861, "step": 54002 }, { "epoch": 1.1304320522481788, "grad_norm": 0.3102453649044037, "learning_rate": 0.00013778014044438114, "loss": 11.6635, "step": 54003 }, { "epoch": 1.130452985012141, "grad_norm": 0.34771403670310974, "learning_rate": 0.0001377781103983709, "loss": 11.6581, "step": 54004 }, { "epoch": 1.1304739177761032, "grad_norm": 0.326265424489975, "learning_rate": 0.00013777608033419995, "loss": 11.6587, "step": 54005 }, { "epoch": 1.1304948505400654, "grad_norm": 0.31880202889442444, "learning_rate": 0.0001377740502518693, "loss": 11.6723, "step": 54006 }, { "epoch": 1.1305157833040274, "grad_norm": 0.34814271330833435, "learning_rate": 0.00013777202015137987, "loss": 11.6575, "step": 54007 }, { "epoch": 1.1305367160679896, "grad_norm": 0.3218432068824768, "learning_rate": 0.00013776999003273268, "loss": 11.6514, "step": 54008 }, { "epoch": 1.1305576488319518, "grad_norm": 0.3258248567581177, "learning_rate": 0.00013776795989592873, "loss": 11.668, "step": 54009 }, { "epoch": 1.130578581595914, "grad_norm": 0.30917513370513916, "learning_rate": 0.00013776592974096893, "loss": 11.6675, "step": 54010 }, { "epoch": 1.1305995143598762, "grad_norm": 0.28449422121047974, "learning_rate": 0.00013776389956785432, "loss": 11.6563, "step": 54011 }, { "epoch": 1.1306204471238381, "grad_norm": 0.23921935260295868, "learning_rate": 0.00013776186937658583, "loss": 11.6661, "step": 54012 }, { "epoch": 1.1306413798878003, "grad_norm": 0.3121386170387268, "learning_rate": 0.00013775983916716446, "loss": 11.673, "step": 54013 }, { "epoch": 1.1306623126517625, "grad_norm": 0.38668444752693176, "learning_rate": 0.0001377578089395912, "loss": 11.6699, "step": 54014 }, { "epoch": 1.1306832454157247, "grad_norm": 0.46382445096969604, "learning_rate": 0.00013775577869386694, "loss": 11.6796, "step": 54015 }, { "epoch": 1.130704178179687, "grad_norm": 0.3269175887107849, "learning_rate": 0.0001377537484299928, "loss": 11.6731, "step": 54016 }, { "epoch": 1.130725110943649, "grad_norm": 0.27139991521835327, "learning_rate": 0.0001377517181479696, "loss": 11.6643, "step": 54017 }, { "epoch": 1.1307460437076111, "grad_norm": 0.3212091028690338, "learning_rate": 0.00013774968784779844, "loss": 11.6558, "step": 54018 }, { "epoch": 1.1307669764715733, "grad_norm": 0.32529106736183167, "learning_rate": 0.0001377476575294802, "loss": 11.6805, "step": 54019 }, { "epoch": 1.1307879092355355, "grad_norm": 0.31438517570495605, "learning_rate": 0.00013774562719301596, "loss": 11.6657, "step": 54020 }, { "epoch": 1.1308088419994977, "grad_norm": 0.32168132066726685, "learning_rate": 0.00013774359683840662, "loss": 11.6721, "step": 54021 }, { "epoch": 1.1308297747634597, "grad_norm": 0.3823481798171997, "learning_rate": 0.0001377415664656532, "loss": 11.6585, "step": 54022 }, { "epoch": 1.130850707527422, "grad_norm": 0.3367629945278168, "learning_rate": 0.00013773953607475664, "loss": 11.663, "step": 54023 }, { "epoch": 1.130871640291384, "grad_norm": 0.3151356875896454, "learning_rate": 0.0001377375056657179, "loss": 11.6489, "step": 54024 }, { "epoch": 1.1308925730553463, "grad_norm": 0.2927524149417877, "learning_rate": 0.000137735475238538, "loss": 11.6689, "step": 54025 }, { "epoch": 1.1309135058193083, "grad_norm": 0.3087892532348633, "learning_rate": 0.00013773344479321793, "loss": 11.6784, "step": 54026 }, { "epoch": 1.1309344385832705, "grad_norm": 0.3203635513782501, "learning_rate": 0.0001377314143297586, "loss": 11.6526, "step": 54027 }, { "epoch": 1.1309553713472327, "grad_norm": 0.3184250593185425, "learning_rate": 0.00013772938384816103, "loss": 11.671, "step": 54028 }, { "epoch": 1.1309763041111949, "grad_norm": 0.2923976182937622, "learning_rate": 0.00013772735334842618, "loss": 11.6601, "step": 54029 }, { "epoch": 1.130997236875157, "grad_norm": 0.3083982765674591, "learning_rate": 0.00013772532283055507, "loss": 11.6759, "step": 54030 }, { "epoch": 1.131018169639119, "grad_norm": 0.34782856702804565, "learning_rate": 0.00013772329229454861, "loss": 11.6771, "step": 54031 }, { "epoch": 1.1310391024030813, "grad_norm": 0.5143877267837524, "learning_rate": 0.00013772126174040782, "loss": 11.6603, "step": 54032 }, { "epoch": 1.1310600351670435, "grad_norm": 0.36089980602264404, "learning_rate": 0.00013771923116813367, "loss": 11.6718, "step": 54033 }, { "epoch": 1.1310809679310057, "grad_norm": 0.30784299969673157, "learning_rate": 0.00013771720057772713, "loss": 11.6742, "step": 54034 }, { "epoch": 1.1311019006949679, "grad_norm": 0.3000766932964325, "learning_rate": 0.00013771516996918917, "loss": 11.6776, "step": 54035 }, { "epoch": 1.1311228334589298, "grad_norm": 0.27270248532295227, "learning_rate": 0.00013771313934252076, "loss": 11.6714, "step": 54036 }, { "epoch": 1.131143766222892, "grad_norm": 0.37602412700653076, "learning_rate": 0.00013771110869772288, "loss": 11.6569, "step": 54037 }, { "epoch": 1.1311646989868542, "grad_norm": 0.3302326202392578, "learning_rate": 0.00013770907803479655, "loss": 11.6809, "step": 54038 }, { "epoch": 1.1311856317508164, "grad_norm": 0.33101147413253784, "learning_rate": 0.0001377070473537427, "loss": 11.6787, "step": 54039 }, { "epoch": 1.1312065645147786, "grad_norm": 0.3147676885128021, "learning_rate": 0.00013770501665456232, "loss": 11.6789, "step": 54040 }, { "epoch": 1.1312274972787406, "grad_norm": 0.3374026119709015, "learning_rate": 0.00013770298593725633, "loss": 11.6633, "step": 54041 }, { "epoch": 1.1312484300427028, "grad_norm": 0.29657500982284546, "learning_rate": 0.00013770095520182583, "loss": 11.6556, "step": 54042 }, { "epoch": 1.131269362806665, "grad_norm": 0.3051241338253021, "learning_rate": 0.00013769892444827169, "loss": 11.658, "step": 54043 }, { "epoch": 1.1312902955706272, "grad_norm": 0.2870519757270813, "learning_rate": 0.00013769689367659492, "loss": 11.6692, "step": 54044 }, { "epoch": 1.1313112283345892, "grad_norm": 0.2991538345813751, "learning_rate": 0.00013769486288679652, "loss": 11.6613, "step": 54045 }, { "epoch": 1.1313321610985514, "grad_norm": 0.2848963737487793, "learning_rate": 0.00013769283207887742, "loss": 11.6577, "step": 54046 }, { "epoch": 1.1313530938625136, "grad_norm": 0.3145105838775635, "learning_rate": 0.00013769080125283862, "loss": 11.6659, "step": 54047 }, { "epoch": 1.1313740266264758, "grad_norm": 0.3746292293071747, "learning_rate": 0.00013768877040868112, "loss": 11.6558, "step": 54048 }, { "epoch": 1.131394959390438, "grad_norm": 0.3189907670021057, "learning_rate": 0.00013768673954640584, "loss": 11.673, "step": 54049 }, { "epoch": 1.1314158921544, "grad_norm": 0.28806647658348083, "learning_rate": 0.00013768470866601384, "loss": 11.6693, "step": 54050 }, { "epoch": 1.1314368249183622, "grad_norm": 0.3107885718345642, "learning_rate": 0.00013768267776750602, "loss": 11.6801, "step": 54051 }, { "epoch": 1.1314577576823244, "grad_norm": 0.31969910860061646, "learning_rate": 0.00013768064685088335, "loss": 11.6706, "step": 54052 }, { "epoch": 1.1314786904462866, "grad_norm": 0.3912142217159271, "learning_rate": 0.00013767861591614688, "loss": 11.6707, "step": 54053 }, { "epoch": 1.1314996232102488, "grad_norm": 0.330462783575058, "learning_rate": 0.00013767658496329751, "loss": 11.658, "step": 54054 }, { "epoch": 1.1315205559742108, "grad_norm": 0.3011908531188965, "learning_rate": 0.00013767455399233628, "loss": 11.6851, "step": 54055 }, { "epoch": 1.131541488738173, "grad_norm": 0.40369269251823425, "learning_rate": 0.00013767252300326413, "loss": 11.6575, "step": 54056 }, { "epoch": 1.1315624215021352, "grad_norm": 0.28594592213630676, "learning_rate": 0.00013767049199608203, "loss": 11.6678, "step": 54057 }, { "epoch": 1.1315833542660974, "grad_norm": 0.3347427546977997, "learning_rate": 0.00013766846097079097, "loss": 11.6648, "step": 54058 }, { "epoch": 1.1316042870300596, "grad_norm": 0.35789066553115845, "learning_rate": 0.00013766642992739194, "loss": 11.6802, "step": 54059 }, { "epoch": 1.1316252197940215, "grad_norm": 0.2858177423477173, "learning_rate": 0.0001376643988658859, "loss": 11.6656, "step": 54060 }, { "epoch": 1.1316461525579837, "grad_norm": 0.3244367241859436, "learning_rate": 0.0001376623677862738, "loss": 11.6604, "step": 54061 }, { "epoch": 1.131667085321946, "grad_norm": 0.3157458007335663, "learning_rate": 0.0001376603366885567, "loss": 11.6798, "step": 54062 }, { "epoch": 1.1316880180859081, "grad_norm": 0.29816675186157227, "learning_rate": 0.0001376583055727355, "loss": 11.6755, "step": 54063 }, { "epoch": 1.13170895084987, "grad_norm": 0.3286593556404114, "learning_rate": 0.00013765627443881114, "loss": 11.6726, "step": 54064 }, { "epoch": 1.1317298836138323, "grad_norm": 0.37071457505226135, "learning_rate": 0.00013765424328678473, "loss": 11.6733, "step": 54065 }, { "epoch": 1.1317508163777945, "grad_norm": 0.3497229814529419, "learning_rate": 0.00013765221211665715, "loss": 11.6836, "step": 54066 }, { "epoch": 1.1317717491417567, "grad_norm": 0.37265700101852417, "learning_rate": 0.0001376501809284294, "loss": 11.6784, "step": 54067 }, { "epoch": 1.131792681905719, "grad_norm": 0.27000895142555237, "learning_rate": 0.00013764814972210243, "loss": 11.666, "step": 54068 }, { "epoch": 1.131813614669681, "grad_norm": 0.35492417216300964, "learning_rate": 0.00013764611849767725, "loss": 11.6548, "step": 54069 }, { "epoch": 1.131834547433643, "grad_norm": 0.3443671464920044, "learning_rate": 0.00013764408725515484, "loss": 11.672, "step": 54070 }, { "epoch": 1.1318554801976053, "grad_norm": 0.40940114855766296, "learning_rate": 0.00013764205599453615, "loss": 11.6737, "step": 54071 }, { "epoch": 1.1318764129615675, "grad_norm": 0.34873276948928833, "learning_rate": 0.00013764002471582218, "loss": 11.6778, "step": 54072 }, { "epoch": 1.1318973457255297, "grad_norm": 0.31049010157585144, "learning_rate": 0.00013763799341901388, "loss": 11.6702, "step": 54073 }, { "epoch": 1.1319182784894917, "grad_norm": 0.25030404329299927, "learning_rate": 0.00013763596210411225, "loss": 11.6748, "step": 54074 }, { "epoch": 1.1319392112534539, "grad_norm": 0.43621405959129333, "learning_rate": 0.00013763393077111826, "loss": 11.6841, "step": 54075 }, { "epoch": 1.131960144017416, "grad_norm": 0.2766675353050232, "learning_rate": 0.00013763189942003287, "loss": 11.6635, "step": 54076 }, { "epoch": 1.1319810767813783, "grad_norm": 0.29310283064842224, "learning_rate": 0.0001376298680508571, "loss": 11.6708, "step": 54077 }, { "epoch": 1.1320020095453405, "grad_norm": 0.28036656975746155, "learning_rate": 0.0001376278366635919, "loss": 11.6864, "step": 54078 }, { "epoch": 1.1320229423093024, "grad_norm": 0.47617918252944946, "learning_rate": 0.00013762580525823823, "loss": 11.6801, "step": 54079 }, { "epoch": 1.1320438750732646, "grad_norm": 0.3864101469516754, "learning_rate": 0.00013762377383479707, "loss": 11.6788, "step": 54080 }, { "epoch": 1.1320648078372268, "grad_norm": 0.33699002861976624, "learning_rate": 0.0001376217423932694, "loss": 11.6736, "step": 54081 }, { "epoch": 1.132085740601189, "grad_norm": 0.3563130795955658, "learning_rate": 0.00013761971093365625, "loss": 11.6605, "step": 54082 }, { "epoch": 1.132106673365151, "grad_norm": 0.29525014758110046, "learning_rate": 0.00013761767945595852, "loss": 11.6588, "step": 54083 }, { "epoch": 1.1321276061291132, "grad_norm": 0.31968215107917786, "learning_rate": 0.00013761564796017724, "loss": 11.6607, "step": 54084 }, { "epoch": 1.1321485388930754, "grad_norm": 0.32053902745246887, "learning_rate": 0.00013761361644631332, "loss": 11.6607, "step": 54085 }, { "epoch": 1.1321694716570376, "grad_norm": 0.33932384848594666, "learning_rate": 0.0001376115849143678, "loss": 11.6877, "step": 54086 }, { "epoch": 1.1321904044209998, "grad_norm": 0.4044620096683502, "learning_rate": 0.00013760955336434167, "loss": 11.6762, "step": 54087 }, { "epoch": 1.1322113371849618, "grad_norm": 0.3398403823375702, "learning_rate": 0.00013760752179623584, "loss": 11.6594, "step": 54088 }, { "epoch": 1.132232269948924, "grad_norm": 0.33983710408210754, "learning_rate": 0.00013760549021005132, "loss": 11.6773, "step": 54089 }, { "epoch": 1.1322532027128862, "grad_norm": 0.2807399034500122, "learning_rate": 0.00013760345860578912, "loss": 11.6728, "step": 54090 }, { "epoch": 1.1322741354768484, "grad_norm": 0.3221387267112732, "learning_rate": 0.00013760142698345014, "loss": 11.6546, "step": 54091 }, { "epoch": 1.1322950682408106, "grad_norm": 0.4055774211883545, "learning_rate": 0.00013759939534303543, "loss": 11.6573, "step": 54092 }, { "epoch": 1.1323160010047726, "grad_norm": 0.3248705565929413, "learning_rate": 0.00013759736368454593, "loss": 11.6763, "step": 54093 }, { "epoch": 1.1323369337687348, "grad_norm": 0.27053913474082947, "learning_rate": 0.00013759533200798262, "loss": 11.6509, "step": 54094 }, { "epoch": 1.132357866532697, "grad_norm": 0.3495728373527527, "learning_rate": 0.00013759330031334647, "loss": 11.6633, "step": 54095 }, { "epoch": 1.1323787992966592, "grad_norm": 0.3035641610622406, "learning_rate": 0.0001375912686006385, "loss": 11.6665, "step": 54096 }, { "epoch": 1.1323997320606214, "grad_norm": 0.3681876063346863, "learning_rate": 0.00013758923686985964, "loss": 11.6439, "step": 54097 }, { "epoch": 1.1324206648245834, "grad_norm": 0.37907904386520386, "learning_rate": 0.00013758720512101088, "loss": 11.6571, "step": 54098 }, { "epoch": 1.1324415975885456, "grad_norm": 0.5644365549087524, "learning_rate": 0.00013758517335409317, "loss": 11.6878, "step": 54099 }, { "epoch": 1.1324625303525078, "grad_norm": 0.3447853624820709, "learning_rate": 0.00013758314156910753, "loss": 11.6648, "step": 54100 }, { "epoch": 1.13248346311647, "grad_norm": 0.37218421697616577, "learning_rate": 0.00013758110976605495, "loss": 11.6617, "step": 54101 }, { "epoch": 1.132504395880432, "grad_norm": 0.3170030415058136, "learning_rate": 0.00013757907794493635, "loss": 11.6794, "step": 54102 }, { "epoch": 1.1325253286443941, "grad_norm": 0.2901628017425537, "learning_rate": 0.00013757704610575276, "loss": 11.6373, "step": 54103 }, { "epoch": 1.1325462614083563, "grad_norm": 0.30425184965133667, "learning_rate": 0.00013757501424850507, "loss": 11.6542, "step": 54104 }, { "epoch": 1.1325671941723185, "grad_norm": 0.33525440096855164, "learning_rate": 0.00013757298237319438, "loss": 11.6692, "step": 54105 }, { "epoch": 1.1325881269362807, "grad_norm": 0.3888615071773529, "learning_rate": 0.00013757095047982156, "loss": 11.6643, "step": 54106 }, { "epoch": 1.1326090597002427, "grad_norm": 0.3463326096534729, "learning_rate": 0.00013756891856838766, "loss": 11.6831, "step": 54107 }, { "epoch": 1.132629992464205, "grad_norm": 0.37483444809913635, "learning_rate": 0.00013756688663889364, "loss": 11.6787, "step": 54108 }, { "epoch": 1.1326509252281671, "grad_norm": 0.3057079315185547, "learning_rate": 0.00013756485469134042, "loss": 11.6595, "step": 54109 }, { "epoch": 1.1326718579921293, "grad_norm": 0.33724865317344666, "learning_rate": 0.00013756282272572908, "loss": 11.6669, "step": 54110 }, { "epoch": 1.1326927907560915, "grad_norm": 0.2800031900405884, "learning_rate": 0.00013756079074206048, "loss": 11.6624, "step": 54111 }, { "epoch": 1.1327137235200535, "grad_norm": 0.3137739896774292, "learning_rate": 0.00013755875874033567, "loss": 11.6519, "step": 54112 }, { "epoch": 1.1327346562840157, "grad_norm": 0.309865266084671, "learning_rate": 0.0001375567267205556, "loss": 11.6752, "step": 54113 }, { "epoch": 1.132755589047978, "grad_norm": 0.3402308225631714, "learning_rate": 0.0001375546946827213, "loss": 11.6696, "step": 54114 }, { "epoch": 1.13277652181194, "grad_norm": 0.316681832075119, "learning_rate": 0.0001375526626268337, "loss": 11.6632, "step": 54115 }, { "epoch": 1.1327974545759023, "grad_norm": 0.3201984465122223, "learning_rate": 0.00013755063055289374, "loss": 11.6835, "step": 54116 }, { "epoch": 1.1328183873398643, "grad_norm": 0.41242876648902893, "learning_rate": 0.00013754859846090248, "loss": 11.6724, "step": 54117 }, { "epoch": 1.1328393201038265, "grad_norm": 0.3682076036930084, "learning_rate": 0.00013754656635086085, "loss": 11.6738, "step": 54118 }, { "epoch": 1.1328602528677887, "grad_norm": 0.26236969232559204, "learning_rate": 0.00013754453422276983, "loss": 11.6692, "step": 54119 }, { "epoch": 1.1328811856317509, "grad_norm": 0.319959431886673, "learning_rate": 0.0001375425020766304, "loss": 11.6677, "step": 54120 }, { "epoch": 1.1329021183957129, "grad_norm": 0.3688852787017822, "learning_rate": 0.00013754046991244353, "loss": 11.6468, "step": 54121 }, { "epoch": 1.132923051159675, "grad_norm": 0.40930378437042236, "learning_rate": 0.0001375384377302102, "loss": 11.6643, "step": 54122 }, { "epoch": 1.1329439839236373, "grad_norm": 0.3607238233089447, "learning_rate": 0.00013753640552993142, "loss": 11.6713, "step": 54123 }, { "epoch": 1.1329649166875995, "grad_norm": 0.3199062943458557, "learning_rate": 0.0001375343733116081, "loss": 11.6723, "step": 54124 }, { "epoch": 1.1329858494515617, "grad_norm": 0.3717951774597168, "learning_rate": 0.00013753234107524126, "loss": 11.6661, "step": 54125 }, { "epoch": 1.1330067822155236, "grad_norm": 0.37265458703041077, "learning_rate": 0.0001375303088208319, "loss": 11.6609, "step": 54126 }, { "epoch": 1.1330277149794858, "grad_norm": 0.4269435405731201, "learning_rate": 0.00013752827654838097, "loss": 11.658, "step": 54127 }, { "epoch": 1.133048647743448, "grad_norm": 0.3340364992618561, "learning_rate": 0.0001375262442578894, "loss": 11.6681, "step": 54128 }, { "epoch": 1.1330695805074102, "grad_norm": 0.45864880084991455, "learning_rate": 0.00013752421194935824, "loss": 11.6899, "step": 54129 }, { "epoch": 1.1330905132713724, "grad_norm": 0.34574633836746216, "learning_rate": 0.00013752217962278843, "loss": 11.6584, "step": 54130 }, { "epoch": 1.1331114460353344, "grad_norm": 0.3589317798614502, "learning_rate": 0.000137520147278181, "loss": 11.6646, "step": 54131 }, { "epoch": 1.1331323787992966, "grad_norm": 0.36621540784835815, "learning_rate": 0.00013751811491553682, "loss": 11.6701, "step": 54132 }, { "epoch": 1.1331533115632588, "grad_norm": 0.28336554765701294, "learning_rate": 0.00013751608253485698, "loss": 11.652, "step": 54133 }, { "epoch": 1.133174244327221, "grad_norm": 0.37913843989372253, "learning_rate": 0.00013751405013614235, "loss": 11.669, "step": 54134 }, { "epoch": 1.1331951770911832, "grad_norm": 0.39182981848716736, "learning_rate": 0.00013751201771939402, "loss": 11.6691, "step": 54135 }, { "epoch": 1.1332161098551452, "grad_norm": 0.3302517533302307, "learning_rate": 0.0001375099852846129, "loss": 11.6815, "step": 54136 }, { "epoch": 1.1332370426191074, "grad_norm": 0.3414818346500397, "learning_rate": 0.00013750795283179996, "loss": 11.6745, "step": 54137 }, { "epoch": 1.1332579753830696, "grad_norm": 0.4065760374069214, "learning_rate": 0.0001375059203609562, "loss": 11.6454, "step": 54138 }, { "epoch": 1.1332789081470318, "grad_norm": 0.2687430679798126, "learning_rate": 0.0001375038878720826, "loss": 11.6696, "step": 54139 }, { "epoch": 1.1332998409109938, "grad_norm": 0.32851096987724304, "learning_rate": 0.00013750185536518013, "loss": 11.6721, "step": 54140 }, { "epoch": 1.133320773674956, "grad_norm": 0.3945326805114746, "learning_rate": 0.00013749982284024976, "loss": 11.6739, "step": 54141 }, { "epoch": 1.1333417064389182, "grad_norm": 0.3022674322128296, "learning_rate": 0.00013749779029729247, "loss": 11.6819, "step": 54142 }, { "epoch": 1.1333626392028804, "grad_norm": 0.3297118842601776, "learning_rate": 0.00013749575773630926, "loss": 11.6742, "step": 54143 }, { "epoch": 1.1333835719668426, "grad_norm": 0.32528775930404663, "learning_rate": 0.00013749372515730105, "loss": 11.6743, "step": 54144 }, { "epoch": 1.1334045047308046, "grad_norm": 0.296371191740036, "learning_rate": 0.0001374916925602689, "loss": 11.6629, "step": 54145 }, { "epoch": 1.1334254374947668, "grad_norm": 0.3255869150161743, "learning_rate": 0.0001374896599452137, "loss": 11.6727, "step": 54146 }, { "epoch": 1.133446370258729, "grad_norm": 0.5364335179328918, "learning_rate": 0.0001374876273121365, "loss": 11.6733, "step": 54147 }, { "epoch": 1.1334673030226912, "grad_norm": 0.36842331290245056, "learning_rate": 0.00013748559466103825, "loss": 11.6539, "step": 54148 }, { "epoch": 1.1334882357866534, "grad_norm": 0.26304325461387634, "learning_rate": 0.0001374835619919199, "loss": 11.6669, "step": 54149 }, { "epoch": 1.1335091685506153, "grad_norm": 0.3165642023086548, "learning_rate": 0.00013748152930478247, "loss": 11.6592, "step": 54150 }, { "epoch": 1.1335301013145775, "grad_norm": 0.3326045572757721, "learning_rate": 0.0001374794965996269, "loss": 11.6589, "step": 54151 }, { "epoch": 1.1335510340785397, "grad_norm": 0.3614528477191925, "learning_rate": 0.0001374774638764542, "loss": 11.6637, "step": 54152 }, { "epoch": 1.133571966842502, "grad_norm": 0.3854731023311615, "learning_rate": 0.00013747543113526534, "loss": 11.6723, "step": 54153 }, { "epoch": 1.1335928996064641, "grad_norm": 0.3460739850997925, "learning_rate": 0.00013747339837606127, "loss": 11.6512, "step": 54154 }, { "epoch": 1.1336138323704261, "grad_norm": 0.3046630322933197, "learning_rate": 0.000137471365598843, "loss": 11.6818, "step": 54155 }, { "epoch": 1.1336347651343883, "grad_norm": 0.3252606987953186, "learning_rate": 0.00013746933280361147, "loss": 11.647, "step": 54156 }, { "epoch": 1.1336556978983505, "grad_norm": 0.2797549366950989, "learning_rate": 0.00013746729999036767, "loss": 11.6779, "step": 54157 }, { "epoch": 1.1336766306623127, "grad_norm": 0.3011327385902405, "learning_rate": 0.00013746526715911262, "loss": 11.6715, "step": 54158 }, { "epoch": 1.1336975634262747, "grad_norm": 0.32056212425231934, "learning_rate": 0.00013746323430984726, "loss": 11.6812, "step": 54159 }, { "epoch": 1.133718496190237, "grad_norm": 0.28951340913772583, "learning_rate": 0.0001374612014425726, "loss": 11.6615, "step": 54160 }, { "epoch": 1.133739428954199, "grad_norm": 0.32113662362098694, "learning_rate": 0.00013745916855728956, "loss": 11.654, "step": 54161 }, { "epoch": 1.1337603617181613, "grad_norm": 0.3066962659358978, "learning_rate": 0.00013745713565399914, "loss": 11.6782, "step": 54162 }, { "epoch": 1.1337812944821235, "grad_norm": 0.2967415452003479, "learning_rate": 0.00013745510273270231, "loss": 11.6552, "step": 54163 }, { "epoch": 1.1338022272460855, "grad_norm": 0.2613266706466675, "learning_rate": 0.0001374530697934001, "loss": 11.6774, "step": 54164 }, { "epoch": 1.1338231600100477, "grad_norm": 0.34099331498146057, "learning_rate": 0.00013745103683609344, "loss": 11.6616, "step": 54165 }, { "epoch": 1.1338440927740099, "grad_norm": 0.28310632705688477, "learning_rate": 0.0001374490038607833, "loss": 11.6687, "step": 54166 }, { "epoch": 1.133865025537972, "grad_norm": 0.3184910714626312, "learning_rate": 0.0001374469708674707, "loss": 11.6748, "step": 54167 }, { "epoch": 1.1338859583019343, "grad_norm": 0.3046059012413025, "learning_rate": 0.00013744493785615655, "loss": 11.6782, "step": 54168 }, { "epoch": 1.1339068910658963, "grad_norm": 0.30676954984664917, "learning_rate": 0.00013744290482684191, "loss": 11.6525, "step": 54169 }, { "epoch": 1.1339278238298585, "grad_norm": 0.32007360458374023, "learning_rate": 0.0001374408717795277, "loss": 11.6831, "step": 54170 }, { "epoch": 1.1339487565938207, "grad_norm": 0.2792918086051941, "learning_rate": 0.0001374388387142149, "loss": 11.6583, "step": 54171 }, { "epoch": 1.1339696893577829, "grad_norm": 0.39047256112098694, "learning_rate": 0.00013743680563090452, "loss": 11.6638, "step": 54172 }, { "epoch": 1.133990622121745, "grad_norm": 0.37972211837768555, "learning_rate": 0.0001374347725295975, "loss": 11.6728, "step": 54173 }, { "epoch": 1.134011554885707, "grad_norm": 0.37326115369796753, "learning_rate": 0.00013743273941029481, "loss": 11.6927, "step": 54174 }, { "epoch": 1.1340324876496692, "grad_norm": 0.35300952196121216, "learning_rate": 0.0001374307062729975, "loss": 11.6836, "step": 54175 }, { "epoch": 1.1340534204136314, "grad_norm": 0.29053524136543274, "learning_rate": 0.0001374286731177065, "loss": 11.6701, "step": 54176 }, { "epoch": 1.1340743531775936, "grad_norm": 0.3167509138584137, "learning_rate": 0.00013742663994442277, "loss": 11.6629, "step": 54177 }, { "epoch": 1.1340952859415556, "grad_norm": 0.3272870182991028, "learning_rate": 0.0001374246067531473, "loss": 11.6726, "step": 54178 }, { "epoch": 1.1341162187055178, "grad_norm": 0.31772086024284363, "learning_rate": 0.00013742257354388106, "loss": 11.6813, "step": 54179 }, { "epoch": 1.13413715146948, "grad_norm": 0.31449776887893677, "learning_rate": 0.00013742054031662507, "loss": 11.6641, "step": 54180 }, { "epoch": 1.1341580842334422, "grad_norm": 0.37488481402397156, "learning_rate": 0.00013741850707138024, "loss": 11.6556, "step": 54181 }, { "epoch": 1.1341790169974044, "grad_norm": 0.2672605514526367, "learning_rate": 0.00013741647380814762, "loss": 11.6777, "step": 54182 }, { "epoch": 1.1341999497613664, "grad_norm": 0.29390281438827515, "learning_rate": 0.00013741444052692812, "loss": 11.6581, "step": 54183 }, { "epoch": 1.1342208825253286, "grad_norm": 0.35316240787506104, "learning_rate": 0.00013741240722772277, "loss": 11.6605, "step": 54184 }, { "epoch": 1.1342418152892908, "grad_norm": 0.25711357593536377, "learning_rate": 0.00013741037391053253, "loss": 11.6619, "step": 54185 }, { "epoch": 1.134262748053253, "grad_norm": 0.31081512570381165, "learning_rate": 0.00013740834057535833, "loss": 11.6681, "step": 54186 }, { "epoch": 1.1342836808172152, "grad_norm": 0.33382266759872437, "learning_rate": 0.00013740630722220126, "loss": 11.6745, "step": 54187 }, { "epoch": 1.1343046135811772, "grad_norm": 0.37672489881515503, "learning_rate": 0.00013740427385106218, "loss": 11.6532, "step": 54188 }, { "epoch": 1.1343255463451394, "grad_norm": 0.3225831389427185, "learning_rate": 0.00013740224046194214, "loss": 11.6587, "step": 54189 }, { "epoch": 1.1343464791091016, "grad_norm": 0.415458619594574, "learning_rate": 0.00013740020705484208, "loss": 11.6739, "step": 54190 }, { "epoch": 1.1343674118730638, "grad_norm": 0.31454023718833923, "learning_rate": 0.00013739817362976297, "loss": 11.6631, "step": 54191 }, { "epoch": 1.134388344637026, "grad_norm": 0.30150851607322693, "learning_rate": 0.00013739614018670586, "loss": 11.6764, "step": 54192 }, { "epoch": 1.134409277400988, "grad_norm": 0.2645419239997864, "learning_rate": 0.00013739410672567164, "loss": 11.6631, "step": 54193 }, { "epoch": 1.1344302101649502, "grad_norm": 0.3141108751296997, "learning_rate": 0.00013739207324666136, "loss": 11.6642, "step": 54194 }, { "epoch": 1.1344511429289124, "grad_norm": 0.2662806808948517, "learning_rate": 0.0001373900397496759, "loss": 11.6619, "step": 54195 }, { "epoch": 1.1344720756928746, "grad_norm": 0.302807092666626, "learning_rate": 0.00013738800623471634, "loss": 11.6615, "step": 54196 }, { "epoch": 1.1344930084568365, "grad_norm": 0.3427751064300537, "learning_rate": 0.0001373859727017836, "loss": 11.6664, "step": 54197 }, { "epoch": 1.1345139412207987, "grad_norm": 0.33564192056655884, "learning_rate": 0.00013738393915087868, "loss": 11.6671, "step": 54198 }, { "epoch": 1.134534873984761, "grad_norm": 0.3700059950351715, "learning_rate": 0.00013738190558200257, "loss": 11.6719, "step": 54199 }, { "epoch": 1.1345558067487231, "grad_norm": 0.39861077070236206, "learning_rate": 0.00013737987199515618, "loss": 11.6585, "step": 54200 }, { "epoch": 1.1345767395126853, "grad_norm": 0.32382798194885254, "learning_rate": 0.00013737783839034057, "loss": 11.6758, "step": 54201 }, { "epoch": 1.1345976722766473, "grad_norm": 0.32103967666625977, "learning_rate": 0.0001373758047675567, "loss": 11.6566, "step": 54202 }, { "epoch": 1.1346186050406095, "grad_norm": 0.2991729974746704, "learning_rate": 0.0001373737711268055, "loss": 11.6554, "step": 54203 }, { "epoch": 1.1346395378045717, "grad_norm": 0.3260866105556488, "learning_rate": 0.000137371737468088, "loss": 11.6673, "step": 54204 }, { "epoch": 1.134660470568534, "grad_norm": 0.3014521598815918, "learning_rate": 0.00013736970379140515, "loss": 11.666, "step": 54205 }, { "epoch": 1.1346814033324961, "grad_norm": 0.3975423574447632, "learning_rate": 0.00013736767009675792, "loss": 11.6836, "step": 54206 }, { "epoch": 1.134702336096458, "grad_norm": 0.26083844900131226, "learning_rate": 0.00013736563638414733, "loss": 11.6599, "step": 54207 }, { "epoch": 1.1347232688604203, "grad_norm": 0.31262099742889404, "learning_rate": 0.0001373636026535743, "loss": 11.6624, "step": 54208 }, { "epoch": 1.1347442016243825, "grad_norm": 0.3495093286037445, "learning_rate": 0.00013736156890503987, "loss": 11.6493, "step": 54209 }, { "epoch": 1.1347651343883447, "grad_norm": 0.2814880609512329, "learning_rate": 0.00013735953513854495, "loss": 11.6652, "step": 54210 }, { "epoch": 1.134786067152307, "grad_norm": 0.28898900747299194, "learning_rate": 0.0001373575013540906, "loss": 11.6676, "step": 54211 }, { "epoch": 1.1348069999162689, "grad_norm": 0.43575945496559143, "learning_rate": 0.0001373554675516777, "loss": 11.684, "step": 54212 }, { "epoch": 1.134827932680231, "grad_norm": 0.3000098466873169, "learning_rate": 0.0001373534337313073, "loss": 11.6954, "step": 54213 }, { "epoch": 1.1348488654441933, "grad_norm": 0.350999116897583, "learning_rate": 0.00013735139989298035, "loss": 11.6629, "step": 54214 }, { "epoch": 1.1348697982081555, "grad_norm": 0.3862999975681305, "learning_rate": 0.00013734936603669785, "loss": 11.6596, "step": 54215 }, { "epoch": 1.1348907309721175, "grad_norm": 0.33940768241882324, "learning_rate": 0.00013734733216246077, "loss": 11.6775, "step": 54216 }, { "epoch": 1.1349116637360797, "grad_norm": 0.2976839244365692, "learning_rate": 0.00013734529827027004, "loss": 11.6638, "step": 54217 }, { "epoch": 1.1349325965000419, "grad_norm": 0.382158100605011, "learning_rate": 0.00013734326436012667, "loss": 11.6701, "step": 54218 }, { "epoch": 1.134953529264004, "grad_norm": 0.2750028073787689, "learning_rate": 0.0001373412304320317, "loss": 11.6601, "step": 54219 }, { "epoch": 1.1349744620279663, "grad_norm": 0.2937152087688446, "learning_rate": 0.000137339196485986, "loss": 11.6623, "step": 54220 }, { "epoch": 1.1349953947919282, "grad_norm": 0.4068496525287628, "learning_rate": 0.00013733716252199062, "loss": 11.6642, "step": 54221 }, { "epoch": 1.1350163275558904, "grad_norm": 0.2867811620235443, "learning_rate": 0.00013733512854004654, "loss": 11.6714, "step": 54222 }, { "epoch": 1.1350372603198526, "grad_norm": 0.36192527413368225, "learning_rate": 0.00013733309454015468, "loss": 11.6631, "step": 54223 }, { "epoch": 1.1350581930838148, "grad_norm": 0.2989879250526428, "learning_rate": 0.00013733106052231608, "loss": 11.6725, "step": 54224 }, { "epoch": 1.135079125847777, "grad_norm": 0.3030935823917389, "learning_rate": 0.00013732902648653166, "loss": 11.6611, "step": 54225 }, { "epoch": 1.135100058611739, "grad_norm": 0.3965662717819214, "learning_rate": 0.00013732699243280248, "loss": 11.6581, "step": 54226 }, { "epoch": 1.1351209913757012, "grad_norm": 0.39253178238868713, "learning_rate": 0.00013732495836112942, "loss": 11.6709, "step": 54227 }, { "epoch": 1.1351419241396634, "grad_norm": 0.3249194920063019, "learning_rate": 0.0001373229242715135, "loss": 11.6823, "step": 54228 }, { "epoch": 1.1351628569036256, "grad_norm": 0.282909095287323, "learning_rate": 0.00013732089016395572, "loss": 11.6684, "step": 54229 }, { "epoch": 1.1351837896675878, "grad_norm": 0.33223503828048706, "learning_rate": 0.00013731885603845707, "loss": 11.6709, "step": 54230 }, { "epoch": 1.1352047224315498, "grad_norm": 0.2791617810726166, "learning_rate": 0.00013731682189501847, "loss": 11.6547, "step": 54231 }, { "epoch": 1.135225655195512, "grad_norm": 0.3501529097557068, "learning_rate": 0.00013731478773364094, "loss": 11.6752, "step": 54232 }, { "epoch": 1.1352465879594742, "grad_norm": 0.336542546749115, "learning_rate": 0.00013731275355432541, "loss": 11.6769, "step": 54233 }, { "epoch": 1.1352675207234364, "grad_norm": 0.3231000304222107, "learning_rate": 0.0001373107193570729, "loss": 11.654, "step": 54234 }, { "epoch": 1.1352884534873984, "grad_norm": 0.294124960899353, "learning_rate": 0.0001373086851418844, "loss": 11.6623, "step": 54235 }, { "epoch": 1.1353093862513606, "grad_norm": 0.30065709352493286, "learning_rate": 0.00013730665090876088, "loss": 11.6729, "step": 54236 }, { "epoch": 1.1353303190153228, "grad_norm": 0.29416537284851074, "learning_rate": 0.00013730461665770327, "loss": 11.6745, "step": 54237 }, { "epoch": 1.135351251779285, "grad_norm": 0.47020381689071655, "learning_rate": 0.0001373025823887126, "loss": 11.6559, "step": 54238 }, { "epoch": 1.1353721845432472, "grad_norm": 0.40450820326805115, "learning_rate": 0.00013730054810178982, "loss": 11.6669, "step": 54239 }, { "epoch": 1.1353931173072092, "grad_norm": 0.30302831530570984, "learning_rate": 0.00013729851379693593, "loss": 11.6794, "step": 54240 }, { "epoch": 1.1354140500711714, "grad_norm": 0.31496676802635193, "learning_rate": 0.0001372964794741519, "loss": 11.6573, "step": 54241 }, { "epoch": 1.1354349828351336, "grad_norm": 0.2716256082057953, "learning_rate": 0.00013729444513343867, "loss": 11.6608, "step": 54242 }, { "epoch": 1.1354559155990958, "grad_norm": 0.2852536737918854, "learning_rate": 0.0001372924107747973, "loss": 11.6442, "step": 54243 }, { "epoch": 1.1354768483630577, "grad_norm": 0.30886563658714294, "learning_rate": 0.0001372903763982287, "loss": 11.6624, "step": 54244 }, { "epoch": 1.13549778112702, "grad_norm": 0.27058446407318115, "learning_rate": 0.00013728834200373385, "loss": 11.666, "step": 54245 }, { "epoch": 1.1355187138909821, "grad_norm": 0.35563981533050537, "learning_rate": 0.0001372863075913138, "loss": 11.6673, "step": 54246 }, { "epoch": 1.1355396466549443, "grad_norm": 0.32441461086273193, "learning_rate": 0.00013728427316096943, "loss": 11.6747, "step": 54247 }, { "epoch": 1.1355605794189065, "grad_norm": 0.34407487511634827, "learning_rate": 0.00013728223871270178, "loss": 11.674, "step": 54248 }, { "epoch": 1.1355815121828687, "grad_norm": 0.27252477407455444, "learning_rate": 0.0001372802042465118, "loss": 11.6675, "step": 54249 }, { "epoch": 1.1356024449468307, "grad_norm": 0.3014008700847626, "learning_rate": 0.00013727816976240046, "loss": 11.6589, "step": 54250 }, { "epoch": 1.135623377710793, "grad_norm": 0.27037304639816284, "learning_rate": 0.00013727613526036883, "loss": 11.6673, "step": 54251 }, { "epoch": 1.1356443104747551, "grad_norm": 0.3278372883796692, "learning_rate": 0.00013727410074041774, "loss": 11.663, "step": 54252 }, { "epoch": 1.1356652432387173, "grad_norm": 0.24864211678504944, "learning_rate": 0.0001372720662025483, "loss": 11.661, "step": 54253 }, { "epoch": 1.1356861760026793, "grad_norm": 0.4038837254047394, "learning_rate": 0.00013727003164676138, "loss": 11.6857, "step": 54254 }, { "epoch": 1.1357071087666415, "grad_norm": 0.3884880244731903, "learning_rate": 0.00013726799707305801, "loss": 11.6696, "step": 54255 }, { "epoch": 1.1357280415306037, "grad_norm": 0.36810994148254395, "learning_rate": 0.0001372659624814392, "loss": 11.6867, "step": 54256 }, { "epoch": 1.135748974294566, "grad_norm": 0.3101585805416107, "learning_rate": 0.00013726392787190587, "loss": 11.6618, "step": 54257 }, { "epoch": 1.135769907058528, "grad_norm": 0.3414410948753357, "learning_rate": 0.00013726189324445907, "loss": 11.6662, "step": 54258 }, { "epoch": 1.13579083982249, "grad_norm": 0.24909687042236328, "learning_rate": 0.00013725985859909968, "loss": 11.675, "step": 54259 }, { "epoch": 1.1358117725864523, "grad_norm": 0.25866302847862244, "learning_rate": 0.00013725782393582872, "loss": 11.6663, "step": 54260 }, { "epoch": 1.1358327053504145, "grad_norm": 0.26756927371025085, "learning_rate": 0.0001372557892546472, "loss": 11.6752, "step": 54261 }, { "epoch": 1.1358536381143767, "grad_norm": 0.2642662525177002, "learning_rate": 0.00013725375455555608, "loss": 11.6593, "step": 54262 }, { "epoch": 1.1358745708783387, "grad_norm": 0.3114488422870636, "learning_rate": 0.00013725171983855635, "loss": 11.655, "step": 54263 }, { "epoch": 1.1358955036423009, "grad_norm": 0.30617275834083557, "learning_rate": 0.00013724968510364893, "loss": 11.6824, "step": 54264 }, { "epoch": 1.135916436406263, "grad_norm": 0.28316575288772583, "learning_rate": 0.00013724765035083488, "loss": 11.6599, "step": 54265 }, { "epoch": 1.1359373691702253, "grad_norm": 0.2771475911140442, "learning_rate": 0.00013724561558011513, "loss": 11.6699, "step": 54266 }, { "epoch": 1.1359583019341875, "grad_norm": 0.2738001048564911, "learning_rate": 0.00013724358079149064, "loss": 11.6781, "step": 54267 }, { "epoch": 1.1359792346981497, "grad_norm": 0.3057832717895508, "learning_rate": 0.00013724154598496245, "loss": 11.6515, "step": 54268 }, { "epoch": 1.1360001674621116, "grad_norm": 0.27996525168418884, "learning_rate": 0.00013723951116053146, "loss": 11.6643, "step": 54269 }, { "epoch": 1.1360211002260738, "grad_norm": 0.39482301473617554, "learning_rate": 0.0001372374763181987, "loss": 11.699, "step": 54270 }, { "epoch": 1.136042032990036, "grad_norm": 0.3088972866535187, "learning_rate": 0.00013723544145796516, "loss": 11.6718, "step": 54271 }, { "epoch": 1.1360629657539982, "grad_norm": 0.29638683795928955, "learning_rate": 0.0001372334065798318, "loss": 11.6644, "step": 54272 }, { "epoch": 1.1360838985179602, "grad_norm": 0.2493756264448166, "learning_rate": 0.00013723137168379957, "loss": 11.6491, "step": 54273 }, { "epoch": 1.1361048312819224, "grad_norm": 0.26235198974609375, "learning_rate": 0.0001372293367698695, "loss": 11.6733, "step": 54274 }, { "epoch": 1.1361257640458846, "grad_norm": 0.3451341986656189, "learning_rate": 0.00013722730183804256, "loss": 11.6987, "step": 54275 }, { "epoch": 1.1361466968098468, "grad_norm": 0.4418959617614746, "learning_rate": 0.00013722526688831965, "loss": 11.6513, "step": 54276 }, { "epoch": 1.136167629573809, "grad_norm": 0.36721256375312805, "learning_rate": 0.00013722323192070184, "loss": 11.681, "step": 54277 }, { "epoch": 1.136188562337771, "grad_norm": 0.35204771161079407, "learning_rate": 0.00013722119693519009, "loss": 11.6624, "step": 54278 }, { "epoch": 1.1362094951017332, "grad_norm": 0.24095386266708374, "learning_rate": 0.00013721916193178534, "loss": 11.6551, "step": 54279 }, { "epoch": 1.1362304278656954, "grad_norm": 0.3242948353290558, "learning_rate": 0.00013721712691048863, "loss": 11.6701, "step": 54280 }, { "epoch": 1.1362513606296576, "grad_norm": 0.3125092089176178, "learning_rate": 0.00013721509187130084, "loss": 11.6479, "step": 54281 }, { "epoch": 1.1362722933936196, "grad_norm": 0.3507530987262726, "learning_rate": 0.00013721305681422307, "loss": 11.6848, "step": 54282 }, { "epoch": 1.1362932261575818, "grad_norm": 0.31261125206947327, "learning_rate": 0.00013721102173925619, "loss": 11.6791, "step": 54283 }, { "epoch": 1.136314158921544, "grad_norm": 0.3748065233230591, "learning_rate": 0.00013720898664640126, "loss": 11.6676, "step": 54284 }, { "epoch": 1.1363350916855062, "grad_norm": 0.5578004121780396, "learning_rate": 0.0001372069515356592, "loss": 11.6799, "step": 54285 }, { "epoch": 1.1363560244494684, "grad_norm": 0.3471944034099579, "learning_rate": 0.00013720491640703103, "loss": 11.6611, "step": 54286 }, { "epoch": 1.1363769572134306, "grad_norm": 0.3320075273513794, "learning_rate": 0.00013720288126051772, "loss": 11.6617, "step": 54287 }, { "epoch": 1.1363978899773926, "grad_norm": 0.30022671818733215, "learning_rate": 0.0001372008460961202, "loss": 11.6509, "step": 54288 }, { "epoch": 1.1364188227413548, "grad_norm": 0.31717461347579956, "learning_rate": 0.00013719881091383952, "loss": 11.681, "step": 54289 }, { "epoch": 1.136439755505317, "grad_norm": 0.33605486154556274, "learning_rate": 0.00013719677571367663, "loss": 11.6776, "step": 54290 }, { "epoch": 1.1364606882692792, "grad_norm": 0.3175072968006134, "learning_rate": 0.0001371947404956325, "loss": 11.674, "step": 54291 }, { "epoch": 1.1364816210332411, "grad_norm": 0.3258779048919678, "learning_rate": 0.00013719270525970812, "loss": 11.6539, "step": 54292 }, { "epoch": 1.1365025537972033, "grad_norm": 0.26969391107559204, "learning_rate": 0.00013719067000590441, "loss": 11.6665, "step": 54293 }, { "epoch": 1.1365234865611655, "grad_norm": 0.3735181391239166, "learning_rate": 0.00013718863473422243, "loss": 11.6716, "step": 54294 }, { "epoch": 1.1365444193251277, "grad_norm": 0.3707241117954254, "learning_rate": 0.00013718659944466318, "loss": 11.6886, "step": 54295 }, { "epoch": 1.13656535208909, "grad_norm": 0.29441559314727783, "learning_rate": 0.0001371845641372275, "loss": 11.6565, "step": 54296 }, { "epoch": 1.136586284853052, "grad_norm": 0.28888261318206787, "learning_rate": 0.00013718252881191653, "loss": 11.6509, "step": 54297 }, { "epoch": 1.136607217617014, "grad_norm": 0.3605319857597351, "learning_rate": 0.0001371804934687311, "loss": 11.6589, "step": 54298 }, { "epoch": 1.1366281503809763, "grad_norm": 0.31187525391578674, "learning_rate": 0.00013717845810767233, "loss": 11.6667, "step": 54299 }, { "epoch": 1.1366490831449385, "grad_norm": 0.5146878361701965, "learning_rate": 0.0001371764227287411, "loss": 11.6809, "step": 54300 }, { "epoch": 1.1366700159089005, "grad_norm": 0.3305475115776062, "learning_rate": 0.0001371743873319384, "loss": 11.6767, "step": 54301 }, { "epoch": 1.1366909486728627, "grad_norm": 0.32212135195732117, "learning_rate": 0.00013717235191726526, "loss": 11.6636, "step": 54302 }, { "epoch": 1.136711881436825, "grad_norm": 0.31822770833969116, "learning_rate": 0.00013717031648472261, "loss": 11.679, "step": 54303 }, { "epoch": 1.136732814200787, "grad_norm": 0.31163540482521057, "learning_rate": 0.00013716828103431148, "loss": 11.6798, "step": 54304 }, { "epoch": 1.1367537469647493, "grad_norm": 0.34506362676620483, "learning_rate": 0.00013716624556603274, "loss": 11.6629, "step": 54305 }, { "epoch": 1.1367746797287115, "grad_norm": 0.3750501573085785, "learning_rate": 0.0001371642100798875, "loss": 11.6765, "step": 54306 }, { "epoch": 1.1367956124926735, "grad_norm": 0.32628849148750305, "learning_rate": 0.00013716217457587665, "loss": 11.6643, "step": 54307 }, { "epoch": 1.1368165452566357, "grad_norm": 0.44344425201416016, "learning_rate": 0.0001371601390540012, "loss": 11.6516, "step": 54308 }, { "epoch": 1.1368374780205979, "grad_norm": 0.5303942561149597, "learning_rate": 0.00013715810351426215, "loss": 11.6683, "step": 54309 }, { "epoch": 1.13685841078456, "grad_norm": 0.32121041417121887, "learning_rate": 0.00013715606795666044, "loss": 11.6713, "step": 54310 }, { "epoch": 1.136879343548522, "grad_norm": 0.3825502395629883, "learning_rate": 0.00013715403238119707, "loss": 11.6699, "step": 54311 }, { "epoch": 1.1369002763124842, "grad_norm": 0.24111278355121613, "learning_rate": 0.00013715199678787302, "loss": 11.6547, "step": 54312 }, { "epoch": 1.1369212090764464, "grad_norm": 0.3287949860095978, "learning_rate": 0.00013714996117668922, "loss": 11.658, "step": 54313 }, { "epoch": 1.1369421418404086, "grad_norm": 0.34582096338272095, "learning_rate": 0.00013714792554764672, "loss": 11.6651, "step": 54314 }, { "epoch": 1.1369630746043708, "grad_norm": 0.2684502899646759, "learning_rate": 0.00013714588990074647, "loss": 11.6548, "step": 54315 }, { "epoch": 1.1369840073683328, "grad_norm": 0.34851208329200745, "learning_rate": 0.00013714385423598942, "loss": 11.6605, "step": 54316 }, { "epoch": 1.137004940132295, "grad_norm": 0.2965170741081238, "learning_rate": 0.0001371418185533766, "loss": 11.681, "step": 54317 }, { "epoch": 1.1370258728962572, "grad_norm": 0.30407431721687317, "learning_rate": 0.00013713978285290893, "loss": 11.6564, "step": 54318 }, { "epoch": 1.1370468056602194, "grad_norm": 0.28169190883636475, "learning_rate": 0.00013713774713458747, "loss": 11.6683, "step": 54319 }, { "epoch": 1.1370677384241814, "grad_norm": 0.3154779374599457, "learning_rate": 0.0001371357113984131, "loss": 11.6731, "step": 54320 }, { "epoch": 1.1370886711881436, "grad_norm": 0.37872037291526794, "learning_rate": 0.00013713367564438693, "loss": 11.6699, "step": 54321 }, { "epoch": 1.1371096039521058, "grad_norm": 0.34183943271636963, "learning_rate": 0.0001371316398725098, "loss": 11.6797, "step": 54322 }, { "epoch": 1.137130536716068, "grad_norm": 0.30542054772377014, "learning_rate": 0.00013712960408278276, "loss": 11.6752, "step": 54323 }, { "epoch": 1.1371514694800302, "grad_norm": 0.3370092213153839, "learning_rate": 0.00013712756827520678, "loss": 11.6781, "step": 54324 }, { "epoch": 1.1371724022439924, "grad_norm": 0.4258006513118744, "learning_rate": 0.0001371255324497828, "loss": 11.6585, "step": 54325 }, { "epoch": 1.1371933350079544, "grad_norm": 0.3950825333595276, "learning_rate": 0.00013712349660651183, "loss": 11.6714, "step": 54326 }, { "epoch": 1.1372142677719166, "grad_norm": 0.2557046115398407, "learning_rate": 0.00013712146074539488, "loss": 11.6636, "step": 54327 }, { "epoch": 1.1372352005358788, "grad_norm": 0.34426918625831604, "learning_rate": 0.00013711942486643291, "loss": 11.6763, "step": 54328 }, { "epoch": 1.137256133299841, "grad_norm": 0.3409072160720825, "learning_rate": 0.0001371173889696269, "loss": 11.6627, "step": 54329 }, { "epoch": 1.137277066063803, "grad_norm": 0.5113263130187988, "learning_rate": 0.00013711535305497777, "loss": 11.6851, "step": 54330 }, { "epoch": 1.1372979988277652, "grad_norm": 0.2877841889858246, "learning_rate": 0.0001371133171224866, "loss": 11.6625, "step": 54331 }, { "epoch": 1.1373189315917274, "grad_norm": 0.41753607988357544, "learning_rate": 0.00013711128117215425, "loss": 11.6774, "step": 54332 }, { "epoch": 1.1373398643556896, "grad_norm": 0.26717957854270935, "learning_rate": 0.0001371092452039818, "loss": 11.6705, "step": 54333 }, { "epoch": 1.1373607971196518, "grad_norm": 0.4031375050544739, "learning_rate": 0.00013710720921797018, "loss": 11.6639, "step": 54334 }, { "epoch": 1.1373817298836137, "grad_norm": 0.3220871686935425, "learning_rate": 0.00013710517321412042, "loss": 11.6698, "step": 54335 }, { "epoch": 1.137402662647576, "grad_norm": 0.3713109791278839, "learning_rate": 0.0001371031371924334, "loss": 11.6677, "step": 54336 }, { "epoch": 1.1374235954115381, "grad_norm": 0.34114161133766174, "learning_rate": 0.0001371011011529102, "loss": 11.6941, "step": 54337 }, { "epoch": 1.1374445281755003, "grad_norm": 0.3486427068710327, "learning_rate": 0.00013709906509555176, "loss": 11.6577, "step": 54338 }, { "epoch": 1.1374654609394623, "grad_norm": 0.33700278401374817, "learning_rate": 0.000137097029020359, "loss": 11.6627, "step": 54339 }, { "epoch": 1.1374863937034245, "grad_norm": 0.34833824634552, "learning_rate": 0.00013709499292733303, "loss": 11.686, "step": 54340 }, { "epoch": 1.1375073264673867, "grad_norm": 0.3260454535484314, "learning_rate": 0.0001370929568164747, "loss": 11.6633, "step": 54341 }, { "epoch": 1.137528259231349, "grad_norm": 0.30821725726127625, "learning_rate": 0.00013709092068778507, "loss": 11.6605, "step": 54342 }, { "epoch": 1.1375491919953111, "grad_norm": 0.3956184387207031, "learning_rate": 0.00013708888454126508, "loss": 11.6737, "step": 54343 }, { "epoch": 1.1375701247592733, "grad_norm": 0.3335348963737488, "learning_rate": 0.00013708684837691575, "loss": 11.6718, "step": 54344 }, { "epoch": 1.1375910575232353, "grad_norm": 0.24486114084720612, "learning_rate": 0.000137084812194738, "loss": 11.6572, "step": 54345 }, { "epoch": 1.1376119902871975, "grad_norm": 0.3497864007949829, "learning_rate": 0.0001370827759947328, "loss": 11.6652, "step": 54346 }, { "epoch": 1.1376329230511597, "grad_norm": 0.2637932598590851, "learning_rate": 0.0001370807397769012, "loss": 11.6621, "step": 54347 }, { "epoch": 1.137653855815122, "grad_norm": 0.3471871614456177, "learning_rate": 0.00013707870354124417, "loss": 11.663, "step": 54348 }, { "epoch": 1.1376747885790839, "grad_norm": 0.3817348778247833, "learning_rate": 0.00013707666728776264, "loss": 11.6608, "step": 54349 }, { "epoch": 1.137695721343046, "grad_norm": 0.2995477616786957, "learning_rate": 0.00013707463101645761, "loss": 11.6776, "step": 54350 }, { "epoch": 1.1377166541070083, "grad_norm": 0.3454253375530243, "learning_rate": 0.0001370725947273301, "loss": 11.6678, "step": 54351 }, { "epoch": 1.1377375868709705, "grad_norm": 0.28558772802352905, "learning_rate": 0.000137070558420381, "loss": 11.6638, "step": 54352 }, { "epoch": 1.1377585196349327, "grad_norm": 0.2834356129169464, "learning_rate": 0.00013706852209561133, "loss": 11.6691, "step": 54353 }, { "epoch": 1.1377794523988947, "grad_norm": 0.29152220487594604, "learning_rate": 0.00013706648575302215, "loss": 11.6644, "step": 54354 }, { "epoch": 1.1378003851628569, "grad_norm": 0.3703083097934723, "learning_rate": 0.00013706444939261432, "loss": 11.6619, "step": 54355 }, { "epoch": 1.137821317926819, "grad_norm": 0.3350214958190918, "learning_rate": 0.00013706241301438888, "loss": 11.6727, "step": 54356 }, { "epoch": 1.1378422506907813, "grad_norm": 0.3204955756664276, "learning_rate": 0.00013706037661834677, "loss": 11.6673, "step": 54357 }, { "epoch": 1.1378631834547432, "grad_norm": 0.2998967468738556, "learning_rate": 0.00013705834020448903, "loss": 11.6609, "step": 54358 }, { "epoch": 1.1378841162187054, "grad_norm": 0.36053231358528137, "learning_rate": 0.00013705630377281656, "loss": 11.6566, "step": 54359 }, { "epoch": 1.1379050489826676, "grad_norm": 0.4204886257648468, "learning_rate": 0.00013705426732333042, "loss": 11.655, "step": 54360 }, { "epoch": 1.1379259817466298, "grad_norm": 0.3426128029823303, "learning_rate": 0.00013705223085603153, "loss": 11.6586, "step": 54361 }, { "epoch": 1.137946914510592, "grad_norm": 0.34908929467201233, "learning_rate": 0.0001370501943709209, "loss": 11.6743, "step": 54362 }, { "epoch": 1.1379678472745542, "grad_norm": 0.38468530774116516, "learning_rate": 0.0001370481578679995, "loss": 11.651, "step": 54363 }, { "epoch": 1.1379887800385162, "grad_norm": 0.32736194133758545, "learning_rate": 0.0001370461213472683, "loss": 11.6702, "step": 54364 }, { "epoch": 1.1380097128024784, "grad_norm": 0.36783069372177124, "learning_rate": 0.0001370440848087283, "loss": 11.6721, "step": 54365 }, { "epoch": 1.1380306455664406, "grad_norm": 0.2490149587392807, "learning_rate": 0.00013704204825238048, "loss": 11.654, "step": 54366 }, { "epoch": 1.1380515783304028, "grad_norm": 0.3490259647369385, "learning_rate": 0.00013704001167822574, "loss": 11.6777, "step": 54367 }, { "epoch": 1.1380725110943648, "grad_norm": 0.34653180837631226, "learning_rate": 0.00013703797508626519, "loss": 11.681, "step": 54368 }, { "epoch": 1.138093443858327, "grad_norm": 0.4006091058254242, "learning_rate": 0.00013703593847649972, "loss": 11.6786, "step": 54369 }, { "epoch": 1.1381143766222892, "grad_norm": 0.3927997946739197, "learning_rate": 0.00013703390184893033, "loss": 11.6601, "step": 54370 }, { "epoch": 1.1381353093862514, "grad_norm": 0.3218362629413605, "learning_rate": 0.000137031865203558, "loss": 11.6616, "step": 54371 }, { "epoch": 1.1381562421502136, "grad_norm": 0.38949307799339294, "learning_rate": 0.0001370298285403837, "loss": 11.6822, "step": 54372 }, { "epoch": 1.1381771749141756, "grad_norm": 0.3469213545322418, "learning_rate": 0.00013702779185940846, "loss": 11.6691, "step": 54373 }, { "epoch": 1.1381981076781378, "grad_norm": 0.39040327072143555, "learning_rate": 0.00013702575516063317, "loss": 11.6529, "step": 54374 }, { "epoch": 1.1382190404421, "grad_norm": 0.3476276397705078, "learning_rate": 0.0001370237184440589, "loss": 11.6662, "step": 54375 }, { "epoch": 1.1382399732060622, "grad_norm": 0.33457478880882263, "learning_rate": 0.00013702168170968656, "loss": 11.6707, "step": 54376 }, { "epoch": 1.1382609059700242, "grad_norm": 0.3822413384914398, "learning_rate": 0.00013701964495751716, "loss": 11.6534, "step": 54377 }, { "epoch": 1.1382818387339864, "grad_norm": 0.2747609615325928, "learning_rate": 0.0001370176081875517, "loss": 11.6672, "step": 54378 }, { "epoch": 1.1383027714979486, "grad_norm": 0.30824220180511475, "learning_rate": 0.00013701557139979108, "loss": 11.6679, "step": 54379 }, { "epoch": 1.1383237042619108, "grad_norm": 0.2990872859954834, "learning_rate": 0.00013701353459423638, "loss": 11.6914, "step": 54380 }, { "epoch": 1.138344637025873, "grad_norm": 0.3786865472793579, "learning_rate": 0.0001370114977708885, "loss": 11.6455, "step": 54381 }, { "epoch": 1.1383655697898352, "grad_norm": 0.2742268741130829, "learning_rate": 0.00013700946092974847, "loss": 11.6503, "step": 54382 }, { "epoch": 1.1383865025537971, "grad_norm": 0.4545963704586029, "learning_rate": 0.00013700742407081725, "loss": 11.6112, "step": 54383 }, { "epoch": 1.1384074353177593, "grad_norm": 0.3360515534877777, "learning_rate": 0.00013700538719409582, "loss": 11.6683, "step": 54384 }, { "epoch": 1.1384283680817215, "grad_norm": 0.28115981817245483, "learning_rate": 0.00013700335029958515, "loss": 11.6445, "step": 54385 }, { "epoch": 1.1384493008456837, "grad_norm": 0.3721102774143219, "learning_rate": 0.00013700131338728624, "loss": 11.6803, "step": 54386 }, { "epoch": 1.1384702336096457, "grad_norm": 0.2568434476852417, "learning_rate": 0.00013699927645720002, "loss": 11.6718, "step": 54387 }, { "epoch": 1.138491166373608, "grad_norm": 0.28874439001083374, "learning_rate": 0.00013699723950932758, "loss": 11.6684, "step": 54388 }, { "epoch": 1.1385120991375701, "grad_norm": 0.33078253269195557, "learning_rate": 0.00013699520254366977, "loss": 11.6649, "step": 54389 }, { "epoch": 1.1385330319015323, "grad_norm": 0.3265024423599243, "learning_rate": 0.00013699316556022763, "loss": 11.6728, "step": 54390 }, { "epoch": 1.1385539646654945, "grad_norm": 0.30576881766319275, "learning_rate": 0.00013699112855900217, "loss": 11.6675, "step": 54391 }, { "epoch": 1.1385748974294565, "grad_norm": 0.31823304295539856, "learning_rate": 0.0001369890915399943, "loss": 11.675, "step": 54392 }, { "epoch": 1.1385958301934187, "grad_norm": 0.48752662539482117, "learning_rate": 0.00013698705450320502, "loss": 11.6676, "step": 54393 }, { "epoch": 1.138616762957381, "grad_norm": 0.29405468702316284, "learning_rate": 0.00013698501744863534, "loss": 11.6654, "step": 54394 }, { "epoch": 1.138637695721343, "grad_norm": 0.2781704068183899, "learning_rate": 0.00013698298037628624, "loss": 11.66, "step": 54395 }, { "epoch": 1.138658628485305, "grad_norm": 0.32145455479621887, "learning_rate": 0.00013698094328615864, "loss": 11.6742, "step": 54396 }, { "epoch": 1.1386795612492673, "grad_norm": 0.3431553542613983, "learning_rate": 0.00013697890617825358, "loss": 11.6419, "step": 54397 }, { "epoch": 1.1387004940132295, "grad_norm": 0.3568767309188843, "learning_rate": 0.000136976869052572, "loss": 11.667, "step": 54398 }, { "epoch": 1.1387214267771917, "grad_norm": 0.2983161509037018, "learning_rate": 0.0001369748319091149, "loss": 11.6732, "step": 54399 }, { "epoch": 1.1387423595411539, "grad_norm": 0.3086344599723816, "learning_rate": 0.00013697279474788332, "loss": 11.6704, "step": 54400 }, { "epoch": 1.138763292305116, "grad_norm": 0.2994506061077118, "learning_rate": 0.0001369707575688781, "loss": 11.6667, "step": 54401 }, { "epoch": 1.138784225069078, "grad_norm": 0.3804547190666199, "learning_rate": 0.00013696872037210036, "loss": 11.684, "step": 54402 }, { "epoch": 1.1388051578330403, "grad_norm": 0.30468395352363586, "learning_rate": 0.00013696668315755096, "loss": 11.672, "step": 54403 }, { "epoch": 1.1388260905970025, "grad_norm": 0.2794245481491089, "learning_rate": 0.00013696464592523094, "loss": 11.6499, "step": 54404 }, { "epoch": 1.1388470233609647, "grad_norm": 0.3757229149341583, "learning_rate": 0.0001369626086751413, "loss": 11.6687, "step": 54405 }, { "epoch": 1.1388679561249266, "grad_norm": 0.43603548407554626, "learning_rate": 0.00013696057140728297, "loss": 11.6837, "step": 54406 }, { "epoch": 1.1388888888888888, "grad_norm": 0.2667413055896759, "learning_rate": 0.000136958534121657, "loss": 11.6646, "step": 54407 }, { "epoch": 1.138909821652851, "grad_norm": 0.3163650631904602, "learning_rate": 0.00013695649681826424, "loss": 11.686, "step": 54408 }, { "epoch": 1.1389307544168132, "grad_norm": 0.3182964324951172, "learning_rate": 0.00013695445949710581, "loss": 11.6717, "step": 54409 }, { "epoch": 1.1389516871807754, "grad_norm": 0.36840298771858215, "learning_rate": 0.00013695242215818265, "loss": 11.6664, "step": 54410 }, { "epoch": 1.1389726199447374, "grad_norm": 0.2861146628856659, "learning_rate": 0.00013695038480149568, "loss": 11.6612, "step": 54411 }, { "epoch": 1.1389935527086996, "grad_norm": 0.2855009436607361, "learning_rate": 0.00013694834742704594, "loss": 11.6716, "step": 54412 }, { "epoch": 1.1390144854726618, "grad_norm": 0.34275850653648376, "learning_rate": 0.00013694631003483435, "loss": 11.6861, "step": 54413 }, { "epoch": 1.139035418236624, "grad_norm": 0.2842520475387573, "learning_rate": 0.00013694427262486198, "loss": 11.6699, "step": 54414 }, { "epoch": 1.139056351000586, "grad_norm": 0.374307245016098, "learning_rate": 0.0001369422351971297, "loss": 11.6808, "step": 54415 }, { "epoch": 1.1390772837645482, "grad_norm": 0.42783015966415405, "learning_rate": 0.0001369401977516386, "loss": 11.6761, "step": 54416 }, { "epoch": 1.1390982165285104, "grad_norm": 0.41332077980041504, "learning_rate": 0.0001369381602883896, "loss": 11.6899, "step": 54417 }, { "epoch": 1.1391191492924726, "grad_norm": 0.38902175426483154, "learning_rate": 0.0001369361228073837, "loss": 11.6482, "step": 54418 }, { "epoch": 1.1391400820564348, "grad_norm": 0.3007003664970398, "learning_rate": 0.0001369340853086218, "loss": 11.6696, "step": 54419 }, { "epoch": 1.139161014820397, "grad_norm": 0.3304904103279114, "learning_rate": 0.000136932047792105, "loss": 11.6743, "step": 54420 }, { "epoch": 1.139181947584359, "grad_norm": 0.4072444438934326, "learning_rate": 0.0001369300102578342, "loss": 11.6705, "step": 54421 }, { "epoch": 1.1392028803483212, "grad_norm": 0.26854443550109863, "learning_rate": 0.00013692797270581041, "loss": 11.6492, "step": 54422 }, { "epoch": 1.1392238131122834, "grad_norm": 0.3925406336784363, "learning_rate": 0.00013692593513603461, "loss": 11.6903, "step": 54423 }, { "epoch": 1.1392447458762456, "grad_norm": 0.40182796120643616, "learning_rate": 0.0001369238975485078, "loss": 11.6764, "step": 54424 }, { "epoch": 1.1392656786402076, "grad_norm": 0.8204267621040344, "learning_rate": 0.00013692185994323088, "loss": 11.6777, "step": 54425 }, { "epoch": 1.1392866114041698, "grad_norm": 0.28922683000564575, "learning_rate": 0.00013691982232020487, "loss": 11.6679, "step": 54426 }, { "epoch": 1.139307544168132, "grad_norm": 0.28413158655166626, "learning_rate": 0.00013691778467943085, "loss": 11.6655, "step": 54427 }, { "epoch": 1.1393284769320942, "grad_norm": 0.42127445340156555, "learning_rate": 0.00013691574702090963, "loss": 11.6801, "step": 54428 }, { "epoch": 1.1393494096960564, "grad_norm": 0.33907487988471985, "learning_rate": 0.0001369137093446423, "loss": 11.6674, "step": 54429 }, { "epoch": 1.1393703424600183, "grad_norm": 0.3655618131160736, "learning_rate": 0.0001369116716506298, "loss": 11.6712, "step": 54430 }, { "epoch": 1.1393912752239805, "grad_norm": 0.28623107075691223, "learning_rate": 0.00013690963393887313, "loss": 11.6528, "step": 54431 }, { "epoch": 1.1394122079879427, "grad_norm": 0.39124608039855957, "learning_rate": 0.0001369075962093733, "loss": 11.6687, "step": 54432 }, { "epoch": 1.139433140751905, "grad_norm": 0.39580395817756653, "learning_rate": 0.00013690555846213116, "loss": 11.6595, "step": 54433 }, { "epoch": 1.139454073515867, "grad_norm": 0.323079913854599, "learning_rate": 0.00013690352069714785, "loss": 11.6736, "step": 54434 }, { "epoch": 1.1394750062798291, "grad_norm": 0.32868731021881104, "learning_rate": 0.00013690148291442425, "loss": 11.6612, "step": 54435 }, { "epoch": 1.1394959390437913, "grad_norm": 0.3190084993839264, "learning_rate": 0.00013689944511396135, "loss": 11.6699, "step": 54436 }, { "epoch": 1.1395168718077535, "grad_norm": 0.4152095913887024, "learning_rate": 0.00013689740729576016, "loss": 11.6774, "step": 54437 }, { "epoch": 1.1395378045717157, "grad_norm": 0.3397059440612793, "learning_rate": 0.00013689536945982167, "loss": 11.6702, "step": 54438 }, { "epoch": 1.139558737335678, "grad_norm": 0.25867173075675964, "learning_rate": 0.00013689333160614683, "loss": 11.6787, "step": 54439 }, { "epoch": 1.13957967009964, "grad_norm": 0.3485364317893982, "learning_rate": 0.00013689129373473658, "loss": 11.6685, "step": 54440 }, { "epoch": 1.139600602863602, "grad_norm": 0.30977973341941833, "learning_rate": 0.000136889255845592, "loss": 11.6689, "step": 54441 }, { "epoch": 1.1396215356275643, "grad_norm": 0.2537821829319, "learning_rate": 0.00013688721793871398, "loss": 11.677, "step": 54442 }, { "epoch": 1.1396424683915265, "grad_norm": 0.299620121717453, "learning_rate": 0.00013688518001410356, "loss": 11.6892, "step": 54443 }, { "epoch": 1.1396634011554885, "grad_norm": 0.31572791934013367, "learning_rate": 0.0001368831420717617, "loss": 11.6744, "step": 54444 }, { "epoch": 1.1396843339194507, "grad_norm": 0.3251931965351105, "learning_rate": 0.00013688110411168934, "loss": 11.6725, "step": 54445 }, { "epoch": 1.1397052666834129, "grad_norm": 0.3141253888607025, "learning_rate": 0.00013687906613388755, "loss": 11.6651, "step": 54446 }, { "epoch": 1.139726199447375, "grad_norm": 0.4084301292896271, "learning_rate": 0.00013687702813835722, "loss": 11.6937, "step": 54447 }, { "epoch": 1.1397471322113373, "grad_norm": 0.27200978994369507, "learning_rate": 0.00013687499012509933, "loss": 11.6511, "step": 54448 }, { "epoch": 1.1397680649752993, "grad_norm": 0.2705433666706085, "learning_rate": 0.00013687295209411496, "loss": 11.666, "step": 54449 }, { "epoch": 1.1397889977392615, "grad_norm": 0.36849769949913025, "learning_rate": 0.00013687091404540497, "loss": 11.6941, "step": 54450 }, { "epoch": 1.1398099305032237, "grad_norm": 0.34370094537734985, "learning_rate": 0.00013686887597897044, "loss": 11.6812, "step": 54451 }, { "epoch": 1.1398308632671859, "grad_norm": 0.460697740316391, "learning_rate": 0.00013686683789481226, "loss": 11.667, "step": 54452 }, { "epoch": 1.1398517960311478, "grad_norm": 0.4834199547767639, "learning_rate": 0.00013686479979293145, "loss": 11.6533, "step": 54453 }, { "epoch": 1.13987272879511, "grad_norm": 0.3446224331855774, "learning_rate": 0.00013686276167332903, "loss": 11.6646, "step": 54454 }, { "epoch": 1.1398936615590722, "grad_norm": 0.3744746148586273, "learning_rate": 0.0001368607235360059, "loss": 11.6623, "step": 54455 }, { "epoch": 1.1399145943230344, "grad_norm": 0.28601980209350586, "learning_rate": 0.00013685868538096312, "loss": 11.6494, "step": 54456 }, { "epoch": 1.1399355270869966, "grad_norm": 0.3295336365699768, "learning_rate": 0.00013685664720820162, "loss": 11.6635, "step": 54457 }, { "epoch": 1.1399564598509588, "grad_norm": 0.35624533891677856, "learning_rate": 0.00013685460901772238, "loss": 11.6787, "step": 54458 }, { "epoch": 1.1399773926149208, "grad_norm": 0.2844374179840088, "learning_rate": 0.00013685257080952638, "loss": 11.6586, "step": 54459 }, { "epoch": 1.139998325378883, "grad_norm": 0.3242539167404175, "learning_rate": 0.00013685053258361465, "loss": 11.6711, "step": 54460 }, { "epoch": 1.1400192581428452, "grad_norm": 0.4212028682231903, "learning_rate": 0.0001368484943399881, "loss": 11.6795, "step": 54461 }, { "epoch": 1.1400401909068074, "grad_norm": 0.37674951553344727, "learning_rate": 0.00013684645607864773, "loss": 11.6698, "step": 54462 }, { "epoch": 1.1400611236707694, "grad_norm": 0.3536170721054077, "learning_rate": 0.00013684441779959457, "loss": 11.6558, "step": 54463 }, { "epoch": 1.1400820564347316, "grad_norm": 0.3251650929450989, "learning_rate": 0.00013684237950282953, "loss": 11.6694, "step": 54464 }, { "epoch": 1.1401029891986938, "grad_norm": 0.40344342589378357, "learning_rate": 0.0001368403411883536, "loss": 11.6799, "step": 54465 }, { "epoch": 1.140123921962656, "grad_norm": 0.2732636630535126, "learning_rate": 0.00013683830285616784, "loss": 11.6682, "step": 54466 }, { "epoch": 1.1401448547266182, "grad_norm": 0.24144786596298218, "learning_rate": 0.00013683626450627313, "loss": 11.6723, "step": 54467 }, { "epoch": 1.1401657874905802, "grad_norm": 0.2795935273170471, "learning_rate": 0.0001368342261386705, "loss": 11.6756, "step": 54468 }, { "epoch": 1.1401867202545424, "grad_norm": 0.3511239290237427, "learning_rate": 0.0001368321877533609, "loss": 11.6651, "step": 54469 }, { "epoch": 1.1402076530185046, "grad_norm": 0.44464120268821716, "learning_rate": 0.00013683014935034535, "loss": 11.67, "step": 54470 }, { "epoch": 1.1402285857824668, "grad_norm": 0.2627239525318146, "learning_rate": 0.00013682811092962483, "loss": 11.6804, "step": 54471 }, { "epoch": 1.1402495185464288, "grad_norm": 0.3832915723323822, "learning_rate": 0.00013682607249120024, "loss": 11.6624, "step": 54472 }, { "epoch": 1.140270451310391, "grad_norm": 0.3808514177799225, "learning_rate": 0.00013682403403507266, "loss": 11.6771, "step": 54473 }, { "epoch": 1.1402913840743532, "grad_norm": 0.34151339530944824, "learning_rate": 0.00013682199556124303, "loss": 11.6699, "step": 54474 }, { "epoch": 1.1403123168383154, "grad_norm": 0.2573256492614746, "learning_rate": 0.0001368199570697123, "loss": 11.6529, "step": 54475 }, { "epoch": 1.1403332496022776, "grad_norm": 0.37153688073158264, "learning_rate": 0.00013681791856048152, "loss": 11.6792, "step": 54476 }, { "epoch": 1.1403541823662398, "grad_norm": 0.309396892786026, "learning_rate": 0.0001368158800335516, "loss": 11.6602, "step": 54477 }, { "epoch": 1.1403751151302017, "grad_norm": 0.2911333739757538, "learning_rate": 0.00013681384148892355, "loss": 11.6661, "step": 54478 }, { "epoch": 1.140396047894164, "grad_norm": 0.2978394627571106, "learning_rate": 0.00013681180292659834, "loss": 11.6732, "step": 54479 }, { "epoch": 1.1404169806581261, "grad_norm": 0.2507574260234833, "learning_rate": 0.00013680976434657697, "loss": 11.6506, "step": 54480 }, { "epoch": 1.1404379134220883, "grad_norm": 0.2432958483695984, "learning_rate": 0.00013680772574886042, "loss": 11.6616, "step": 54481 }, { "epoch": 1.1404588461860503, "grad_norm": 0.37392857670783997, "learning_rate": 0.00013680568713344966, "loss": 11.6578, "step": 54482 }, { "epoch": 1.1404797789500125, "grad_norm": 0.3036283254623413, "learning_rate": 0.00013680364850034567, "loss": 11.6662, "step": 54483 }, { "epoch": 1.1405007117139747, "grad_norm": 0.3193393349647522, "learning_rate": 0.0001368016098495494, "loss": 11.6576, "step": 54484 }, { "epoch": 1.140521644477937, "grad_norm": 0.32847514748573303, "learning_rate": 0.00013679957118106187, "loss": 11.6701, "step": 54485 }, { "epoch": 1.1405425772418991, "grad_norm": 0.27020594477653503, "learning_rate": 0.00013679753249488407, "loss": 11.6531, "step": 54486 }, { "epoch": 1.140563510005861, "grad_norm": 0.3099590837955475, "learning_rate": 0.00013679549379101694, "loss": 11.6681, "step": 54487 }, { "epoch": 1.1405844427698233, "grad_norm": 0.3085559904575348, "learning_rate": 0.0001367934550694615, "loss": 11.6571, "step": 54488 }, { "epoch": 1.1406053755337855, "grad_norm": 0.2684389650821686, "learning_rate": 0.00013679141633021866, "loss": 11.6727, "step": 54489 }, { "epoch": 1.1406263082977477, "grad_norm": 0.3116244971752167, "learning_rate": 0.00013678937757328948, "loss": 11.6755, "step": 54490 }, { "epoch": 1.1406472410617097, "grad_norm": 0.3381907641887665, "learning_rate": 0.0001367873387986749, "loss": 11.6643, "step": 54491 }, { "epoch": 1.1406681738256719, "grad_norm": 0.3207962214946747, "learning_rate": 0.00013678530000637591, "loss": 11.6718, "step": 54492 }, { "epoch": 1.140689106589634, "grad_norm": 0.35835281014442444, "learning_rate": 0.0001367832611963935, "loss": 11.6582, "step": 54493 }, { "epoch": 1.1407100393535963, "grad_norm": 0.45001477003097534, "learning_rate": 0.00013678122236872865, "loss": 11.6924, "step": 54494 }, { "epoch": 1.1407309721175585, "grad_norm": 0.2569904625415802, "learning_rate": 0.00013677918352338232, "loss": 11.6653, "step": 54495 }, { "epoch": 1.1407519048815207, "grad_norm": 0.26991257071495056, "learning_rate": 0.0001367771446603555, "loss": 11.67, "step": 54496 }, { "epoch": 1.1407728376454827, "grad_norm": 0.3617772161960602, "learning_rate": 0.00013677510577964915, "loss": 11.667, "step": 54497 }, { "epoch": 1.1407937704094449, "grad_norm": 0.447566956281662, "learning_rate": 0.0001367730668812643, "loss": 11.6708, "step": 54498 }, { "epoch": 1.140814703173407, "grad_norm": 0.2906445264816284, "learning_rate": 0.00013677102796520187, "loss": 11.6615, "step": 54499 }, { "epoch": 1.1408356359373693, "grad_norm": 0.2915176451206207, "learning_rate": 0.00013676898903146288, "loss": 11.679, "step": 54500 }, { "epoch": 1.1408565687013312, "grad_norm": 0.31844285130500793, "learning_rate": 0.00013676695008004832, "loss": 11.6552, "step": 54501 }, { "epoch": 1.1408775014652934, "grad_norm": 0.3008694052696228, "learning_rate": 0.0001367649111109591, "loss": 11.6521, "step": 54502 }, { "epoch": 1.1408984342292556, "grad_norm": 0.31876295804977417, "learning_rate": 0.00013676287212419628, "loss": 11.6661, "step": 54503 }, { "epoch": 1.1409193669932178, "grad_norm": 0.30997002124786377, "learning_rate": 0.00013676083311976083, "loss": 11.6497, "step": 54504 }, { "epoch": 1.14094029975718, "grad_norm": 0.2783697247505188, "learning_rate": 0.00013675879409765372, "loss": 11.664, "step": 54505 }, { "epoch": 1.140961232521142, "grad_norm": 0.328551709651947, "learning_rate": 0.00013675675505787587, "loss": 11.6523, "step": 54506 }, { "epoch": 1.1409821652851042, "grad_norm": 0.2895074188709259, "learning_rate": 0.00013675471600042835, "loss": 11.6671, "step": 54507 }, { "epoch": 1.1410030980490664, "grad_norm": 0.4288688004016876, "learning_rate": 0.00013675267692531208, "loss": 11.6496, "step": 54508 }, { "epoch": 1.1410240308130286, "grad_norm": 0.3240976929664612, "learning_rate": 0.00013675063783252805, "loss": 11.6835, "step": 54509 }, { "epoch": 1.1410449635769906, "grad_norm": 0.32097145915031433, "learning_rate": 0.0001367485987220773, "loss": 11.6734, "step": 54510 }, { "epoch": 1.1410658963409528, "grad_norm": 0.3036063015460968, "learning_rate": 0.00013674655959396072, "loss": 11.6682, "step": 54511 }, { "epoch": 1.141086829104915, "grad_norm": 0.310579776763916, "learning_rate": 0.00013674452044817938, "loss": 11.6616, "step": 54512 }, { "epoch": 1.1411077618688772, "grad_norm": 0.3162006139755249, "learning_rate": 0.00013674248128473418, "loss": 11.6777, "step": 54513 }, { "epoch": 1.1411286946328394, "grad_norm": 0.305088609457016, "learning_rate": 0.0001367404421036261, "loss": 11.6707, "step": 54514 }, { "epoch": 1.1411496273968016, "grad_norm": 0.2872721552848816, "learning_rate": 0.0001367384029048562, "loss": 11.6682, "step": 54515 }, { "epoch": 1.1411705601607636, "grad_norm": 0.2698664963245392, "learning_rate": 0.0001367363636884254, "loss": 11.6784, "step": 54516 }, { "epoch": 1.1411914929247258, "grad_norm": 0.34597331285476685, "learning_rate": 0.0001367343244543347, "loss": 11.6858, "step": 54517 }, { "epoch": 1.141212425688688, "grad_norm": 0.35869109630584717, "learning_rate": 0.00013673228520258505, "loss": 11.6675, "step": 54518 }, { "epoch": 1.1412333584526502, "grad_norm": 0.28612348437309265, "learning_rate": 0.00013673024593317747, "loss": 11.6547, "step": 54519 }, { "epoch": 1.1412542912166121, "grad_norm": 0.42887771129608154, "learning_rate": 0.00013672820664611295, "loss": 11.6609, "step": 54520 }, { "epoch": 1.1412752239805743, "grad_norm": 0.36580246686935425, "learning_rate": 0.00013672616734139242, "loss": 11.6656, "step": 54521 }, { "epoch": 1.1412961567445365, "grad_norm": 0.353771835565567, "learning_rate": 0.0001367241280190169, "loss": 11.6884, "step": 54522 }, { "epoch": 1.1413170895084987, "grad_norm": 0.36261361837387085, "learning_rate": 0.0001367220886789873, "loss": 11.6588, "step": 54523 }, { "epoch": 1.141338022272461, "grad_norm": 0.25037410855293274, "learning_rate": 0.00013672004932130472, "loss": 11.6771, "step": 54524 }, { "epoch": 1.141358955036423, "grad_norm": 0.3135067820549011, "learning_rate": 0.00013671800994597005, "loss": 11.652, "step": 54525 }, { "epoch": 1.1413798878003851, "grad_norm": 0.30488231778144836, "learning_rate": 0.00013671597055298432, "loss": 11.6616, "step": 54526 }, { "epoch": 1.1414008205643473, "grad_norm": 0.319173663854599, "learning_rate": 0.00013671393114234848, "loss": 11.6549, "step": 54527 }, { "epoch": 1.1414217533283095, "grad_norm": 0.3674784302711487, "learning_rate": 0.00013671189171406346, "loss": 11.6714, "step": 54528 }, { "epoch": 1.1414426860922715, "grad_norm": 0.34626904129981995, "learning_rate": 0.00013670985226813039, "loss": 11.6737, "step": 54529 }, { "epoch": 1.1414636188562337, "grad_norm": 0.3152979016304016, "learning_rate": 0.00013670781280455008, "loss": 11.6628, "step": 54530 }, { "epoch": 1.141484551620196, "grad_norm": 0.3646213412284851, "learning_rate": 0.0001367057733233236, "loss": 11.6784, "step": 54531 }, { "epoch": 1.141505484384158, "grad_norm": 0.29246100783348083, "learning_rate": 0.00013670373382445198, "loss": 11.6719, "step": 54532 }, { "epoch": 1.1415264171481203, "grad_norm": 0.34513935446739197, "learning_rate": 0.0001367016943079361, "loss": 11.6681, "step": 54533 }, { "epoch": 1.1415473499120825, "grad_norm": 0.3832530975341797, "learning_rate": 0.00013669965477377697, "loss": 11.6688, "step": 54534 }, { "epoch": 1.1415682826760445, "grad_norm": 0.3683047592639923, "learning_rate": 0.0001366976152219756, "loss": 11.6522, "step": 54535 }, { "epoch": 1.1415892154400067, "grad_norm": 0.2817422151565552, "learning_rate": 0.00013669557565253293, "loss": 11.671, "step": 54536 }, { "epoch": 1.1416101482039689, "grad_norm": 0.3144935965538025, "learning_rate": 0.00013669353606545, "loss": 11.6667, "step": 54537 }, { "epoch": 1.141631080967931, "grad_norm": 0.3200761377811432, "learning_rate": 0.0001366914964607277, "loss": 11.6853, "step": 54538 }, { "epoch": 1.141652013731893, "grad_norm": 0.31647002696990967, "learning_rate": 0.00013668945683836708, "loss": 11.6769, "step": 54539 }, { "epoch": 1.1416729464958553, "grad_norm": 0.2561149597167969, "learning_rate": 0.00013668741719836911, "loss": 11.6629, "step": 54540 }, { "epoch": 1.1416938792598175, "grad_norm": 0.323980450630188, "learning_rate": 0.00013668537754073476, "loss": 11.6496, "step": 54541 }, { "epoch": 1.1417148120237797, "grad_norm": 0.33767715096473694, "learning_rate": 0.00013668333786546504, "loss": 11.671, "step": 54542 }, { "epoch": 1.1417357447877419, "grad_norm": 0.3317458927631378, "learning_rate": 0.00013668129817256088, "loss": 11.6698, "step": 54543 }, { "epoch": 1.1417566775517038, "grad_norm": 0.297782838344574, "learning_rate": 0.0001366792584620233, "loss": 11.6668, "step": 54544 }, { "epoch": 1.141777610315666, "grad_norm": 0.26606443524360657, "learning_rate": 0.00013667721873385325, "loss": 11.6856, "step": 54545 }, { "epoch": 1.1417985430796282, "grad_norm": 0.354753315448761, "learning_rate": 0.00013667517898805173, "loss": 11.6757, "step": 54546 }, { "epoch": 1.1418194758435904, "grad_norm": 0.3243666887283325, "learning_rate": 0.0001366731392246197, "loss": 11.6728, "step": 54547 }, { "epoch": 1.1418404086075524, "grad_norm": 0.3862014412879944, "learning_rate": 0.00013667109944355817, "loss": 11.6688, "step": 54548 }, { "epoch": 1.1418613413715146, "grad_norm": 0.2899082899093628, "learning_rate": 0.0001366690596448681, "loss": 11.6463, "step": 54549 }, { "epoch": 1.1418822741354768, "grad_norm": 0.32147642970085144, "learning_rate": 0.0001366670198285505, "loss": 11.6613, "step": 54550 }, { "epoch": 1.141903206899439, "grad_norm": 0.3368929326534271, "learning_rate": 0.00013666497999460632, "loss": 11.6681, "step": 54551 }, { "epoch": 1.1419241396634012, "grad_norm": 0.43430987000465393, "learning_rate": 0.00013666294014303654, "loss": 11.649, "step": 54552 }, { "epoch": 1.1419450724273634, "grad_norm": 0.38823169469833374, "learning_rate": 0.00013666090027384217, "loss": 11.682, "step": 54553 }, { "epoch": 1.1419660051913254, "grad_norm": 0.3884943127632141, "learning_rate": 0.00013665886038702414, "loss": 11.6683, "step": 54554 }, { "epoch": 1.1419869379552876, "grad_norm": 0.44382786750793457, "learning_rate": 0.00013665682048258346, "loss": 11.6759, "step": 54555 }, { "epoch": 1.1420078707192498, "grad_norm": 0.332162469625473, "learning_rate": 0.00013665478056052114, "loss": 11.6677, "step": 54556 }, { "epoch": 1.142028803483212, "grad_norm": 0.38805273175239563, "learning_rate": 0.00013665274062083811, "loss": 11.6639, "step": 54557 }, { "epoch": 1.142049736247174, "grad_norm": 0.2565208375453949, "learning_rate": 0.0001366507006635354, "loss": 11.6774, "step": 54558 }, { "epoch": 1.1420706690111362, "grad_norm": 0.35825082659721375, "learning_rate": 0.00013664866068861396, "loss": 11.6804, "step": 54559 }, { "epoch": 1.1420916017750984, "grad_norm": 0.44006669521331787, "learning_rate": 0.00013664662069607476, "loss": 11.6602, "step": 54560 }, { "epoch": 1.1421125345390606, "grad_norm": 0.3037140667438507, "learning_rate": 0.00013664458068591878, "loss": 11.6663, "step": 54561 }, { "epoch": 1.1421334673030228, "grad_norm": 0.3313349485397339, "learning_rate": 0.00013664254065814702, "loss": 11.6792, "step": 54562 }, { "epoch": 1.1421544000669848, "grad_norm": 0.3848041296005249, "learning_rate": 0.0001366405006127605, "loss": 11.6732, "step": 54563 }, { "epoch": 1.142175332830947, "grad_norm": 0.2911997139453888, "learning_rate": 0.0001366384605497601, "loss": 11.6571, "step": 54564 }, { "epoch": 1.1421962655949092, "grad_norm": 0.32452288269996643, "learning_rate": 0.0001366364204691469, "loss": 11.6703, "step": 54565 }, { "epoch": 1.1422171983588714, "grad_norm": 0.2733415961265564, "learning_rate": 0.00013663438037092183, "loss": 11.6673, "step": 54566 }, { "epoch": 1.1422381311228333, "grad_norm": 0.3469473421573639, "learning_rate": 0.00013663234025508588, "loss": 11.6695, "step": 54567 }, { "epoch": 1.1422590638867955, "grad_norm": 0.2845800518989563, "learning_rate": 0.00013663030012164002, "loss": 11.6657, "step": 54568 }, { "epoch": 1.1422799966507577, "grad_norm": 0.30689185857772827, "learning_rate": 0.00013662825997058523, "loss": 11.642, "step": 54569 }, { "epoch": 1.14230092941472, "grad_norm": 0.28155720233917236, "learning_rate": 0.00013662621980192253, "loss": 11.6589, "step": 54570 }, { "epoch": 1.1423218621786821, "grad_norm": 0.35790279507637024, "learning_rate": 0.00013662417961565285, "loss": 11.6594, "step": 54571 }, { "epoch": 1.1423427949426443, "grad_norm": 0.3338566720485687, "learning_rate": 0.0001366221394117772, "loss": 11.6789, "step": 54572 }, { "epoch": 1.1423637277066063, "grad_norm": 0.3807401955127716, "learning_rate": 0.00013662009919029655, "loss": 11.6666, "step": 54573 }, { "epoch": 1.1423846604705685, "grad_norm": 0.32720017433166504, "learning_rate": 0.00013661805895121188, "loss": 11.663, "step": 54574 }, { "epoch": 1.1424055932345307, "grad_norm": 0.43827158212661743, "learning_rate": 0.0001366160186945242, "loss": 11.6672, "step": 54575 }, { "epoch": 1.142426525998493, "grad_norm": 0.3571443259716034, "learning_rate": 0.00013661397842023443, "loss": 11.6624, "step": 54576 }, { "epoch": 1.142447458762455, "grad_norm": 0.5257154703140259, "learning_rate": 0.00013661193812834361, "loss": 11.6862, "step": 54577 }, { "epoch": 1.142468391526417, "grad_norm": 0.4309994876384735, "learning_rate": 0.00013660989781885268, "loss": 11.6704, "step": 54578 }, { "epoch": 1.1424893242903793, "grad_norm": 0.4578285217285156, "learning_rate": 0.00013660785749176267, "loss": 11.6731, "step": 54579 }, { "epoch": 1.1425102570543415, "grad_norm": 0.4846424460411072, "learning_rate": 0.0001366058171470745, "loss": 11.6653, "step": 54580 }, { "epoch": 1.1425311898183037, "grad_norm": 0.4594530761241913, "learning_rate": 0.0001366037767847892, "loss": 11.6737, "step": 54581 }, { "epoch": 1.1425521225822657, "grad_norm": 0.2821056544780731, "learning_rate": 0.00013660173640490773, "loss": 11.6569, "step": 54582 }, { "epoch": 1.1425730553462279, "grad_norm": 0.29828330874443054, "learning_rate": 0.00013659969600743102, "loss": 11.6823, "step": 54583 }, { "epoch": 1.14259398811019, "grad_norm": 0.4405038058757782, "learning_rate": 0.00013659765559236018, "loss": 11.671, "step": 54584 }, { "epoch": 1.1426149208741523, "grad_norm": 0.2780522108078003, "learning_rate": 0.00013659561515969607, "loss": 11.6828, "step": 54585 }, { "epoch": 1.1426358536381143, "grad_norm": 0.4125579595565796, "learning_rate": 0.00013659357470943974, "loss": 11.6733, "step": 54586 }, { "epoch": 1.1426567864020765, "grad_norm": 0.3684632182121277, "learning_rate": 0.00013659153424159213, "loss": 11.6763, "step": 54587 }, { "epoch": 1.1426777191660387, "grad_norm": 0.2785075604915619, "learning_rate": 0.00013658949375615424, "loss": 11.6598, "step": 54588 }, { "epoch": 1.1426986519300009, "grad_norm": 0.2909826934337616, "learning_rate": 0.00013658745325312703, "loss": 11.645, "step": 54589 }, { "epoch": 1.142719584693963, "grad_norm": 0.2974793016910553, "learning_rate": 0.00013658541273251152, "loss": 11.658, "step": 54590 }, { "epoch": 1.1427405174579253, "grad_norm": 0.3224996328353882, "learning_rate": 0.00013658337219430865, "loss": 11.6653, "step": 54591 }, { "epoch": 1.1427614502218872, "grad_norm": 0.2948748469352722, "learning_rate": 0.00013658133163851942, "loss": 11.6543, "step": 54592 }, { "epoch": 1.1427823829858494, "grad_norm": 0.4219552278518677, "learning_rate": 0.00013657929106514484, "loss": 11.675, "step": 54593 }, { "epoch": 1.1428033157498116, "grad_norm": 0.3779911994934082, "learning_rate": 0.00013657725047418583, "loss": 11.6692, "step": 54594 }, { "epoch": 1.1428242485137738, "grad_norm": 0.3222889304161072, "learning_rate": 0.00013657520986564342, "loss": 11.6861, "step": 54595 }, { "epoch": 1.1428451812777358, "grad_norm": 0.3336784243583679, "learning_rate": 0.00013657316923951855, "loss": 11.6521, "step": 54596 }, { "epoch": 1.142866114041698, "grad_norm": 0.2858508825302124, "learning_rate": 0.00013657112859581224, "loss": 11.6704, "step": 54597 }, { "epoch": 1.1428870468056602, "grad_norm": 0.3210632801055908, "learning_rate": 0.00013656908793452548, "loss": 11.6732, "step": 54598 }, { "epoch": 1.1429079795696224, "grad_norm": 0.34857505559921265, "learning_rate": 0.0001365670472556592, "loss": 11.6713, "step": 54599 }, { "epoch": 1.1429289123335846, "grad_norm": 0.29230183362960815, "learning_rate": 0.00013656500655921444, "loss": 11.6596, "step": 54600 }, { "epoch": 1.1429498450975466, "grad_norm": 0.5106084942817688, "learning_rate": 0.00013656296584519208, "loss": 11.6748, "step": 54601 }, { "epoch": 1.1429707778615088, "grad_norm": 0.353034645318985, "learning_rate": 0.00013656092511359321, "loss": 11.655, "step": 54602 }, { "epoch": 1.142991710625471, "grad_norm": 0.2947573661804199, "learning_rate": 0.0001365588843644188, "loss": 11.6616, "step": 54603 }, { "epoch": 1.1430126433894332, "grad_norm": 0.2987007200717926, "learning_rate": 0.00013655684359766977, "loss": 11.6681, "step": 54604 }, { "epoch": 1.1430335761533952, "grad_norm": 0.27951228618621826, "learning_rate": 0.00013655480281334713, "loss": 11.667, "step": 54605 }, { "epoch": 1.1430545089173574, "grad_norm": 0.34361425042152405, "learning_rate": 0.00013655276201145187, "loss": 11.6751, "step": 54606 }, { "epoch": 1.1430754416813196, "grad_norm": 0.44949907064437866, "learning_rate": 0.00013655072119198494, "loss": 11.6657, "step": 54607 }, { "epoch": 1.1430963744452818, "grad_norm": 0.3092949688434601, "learning_rate": 0.00013654868035494738, "loss": 11.6581, "step": 54608 }, { "epoch": 1.143117307209244, "grad_norm": 0.32330238819122314, "learning_rate": 0.0001365466395003401, "loss": 11.6566, "step": 54609 }, { "epoch": 1.1431382399732062, "grad_norm": 0.28124362230300903, "learning_rate": 0.00013654459862816418, "loss": 11.661, "step": 54610 }, { "epoch": 1.1431591727371682, "grad_norm": 0.34824201464653015, "learning_rate": 0.00013654255773842052, "loss": 11.6674, "step": 54611 }, { "epoch": 1.1431801055011304, "grad_norm": 0.30471837520599365, "learning_rate": 0.00013654051683111007, "loss": 11.6734, "step": 54612 }, { "epoch": 1.1432010382650926, "grad_norm": 0.3973849415779114, "learning_rate": 0.0001365384759062339, "loss": 11.6765, "step": 54613 }, { "epoch": 1.1432219710290548, "grad_norm": 0.31154176592826843, "learning_rate": 0.00013653643496379294, "loss": 11.6634, "step": 54614 }, { "epoch": 1.1432429037930167, "grad_norm": 0.33334067463874817, "learning_rate": 0.0001365343940037882, "loss": 11.6735, "step": 54615 }, { "epoch": 1.143263836556979, "grad_norm": 0.4116821587085724, "learning_rate": 0.00013653235302622062, "loss": 11.6766, "step": 54616 }, { "epoch": 1.1432847693209411, "grad_norm": 0.3265521228313446, "learning_rate": 0.00013653031203109125, "loss": 11.6489, "step": 54617 }, { "epoch": 1.1433057020849033, "grad_norm": 0.31116223335266113, "learning_rate": 0.000136528271018401, "loss": 11.6698, "step": 54618 }, { "epoch": 1.1433266348488655, "grad_norm": 0.4495384097099304, "learning_rate": 0.00013652622998815085, "loss": 11.6973, "step": 54619 }, { "epoch": 1.1433475676128275, "grad_norm": 0.3894522488117218, "learning_rate": 0.00013652418894034187, "loss": 11.6616, "step": 54620 }, { "epoch": 1.1433685003767897, "grad_norm": 0.34565895795822144, "learning_rate": 0.00013652214787497494, "loss": 11.6544, "step": 54621 }, { "epoch": 1.143389433140752, "grad_norm": 0.31520041823387146, "learning_rate": 0.0001365201067920511, "loss": 11.6627, "step": 54622 }, { "epoch": 1.1434103659047141, "grad_norm": 0.37964969873428345, "learning_rate": 0.0001365180656915713, "loss": 11.6593, "step": 54623 }, { "epoch": 1.143431298668676, "grad_norm": 0.2960466742515564, "learning_rate": 0.0001365160245735365, "loss": 11.677, "step": 54624 }, { "epoch": 1.1434522314326383, "grad_norm": 0.4118116796016693, "learning_rate": 0.00013651398343794778, "loss": 11.6824, "step": 54625 }, { "epoch": 1.1434731641966005, "grad_norm": 0.3721519112586975, "learning_rate": 0.000136511942284806, "loss": 11.6727, "step": 54626 }, { "epoch": 1.1434940969605627, "grad_norm": 0.32718512415885925, "learning_rate": 0.00013650990111411225, "loss": 11.6581, "step": 54627 }, { "epoch": 1.143515029724525, "grad_norm": 0.29170235991477966, "learning_rate": 0.0001365078599258674, "loss": 11.6772, "step": 54628 }, { "epoch": 1.143535962488487, "grad_norm": 0.34055638313293457, "learning_rate": 0.00013650581872007254, "loss": 11.671, "step": 54629 }, { "epoch": 1.143556895252449, "grad_norm": 0.49065715074539185, "learning_rate": 0.00013650377749672857, "loss": 11.6838, "step": 54630 }, { "epoch": 1.1435778280164113, "grad_norm": 0.32270482182502747, "learning_rate": 0.00013650173625583652, "loss": 11.6635, "step": 54631 }, { "epoch": 1.1435987607803735, "grad_norm": 0.3132859170436859, "learning_rate": 0.00013649969499739736, "loss": 11.6588, "step": 54632 }, { "epoch": 1.1436196935443357, "grad_norm": 0.3239586055278778, "learning_rate": 0.00013649765372141206, "loss": 11.6784, "step": 54633 }, { "epoch": 1.1436406263082977, "grad_norm": 0.34126412868499756, "learning_rate": 0.0001364956124278816, "loss": 11.6546, "step": 54634 }, { "epoch": 1.1436615590722599, "grad_norm": 0.3776565194129944, "learning_rate": 0.00013649357111680696, "loss": 11.6795, "step": 54635 }, { "epoch": 1.143682491836222, "grad_norm": 0.2712189555168152, "learning_rate": 0.00013649152978818913, "loss": 11.659, "step": 54636 }, { "epoch": 1.1437034246001843, "grad_norm": 0.30236729979515076, "learning_rate": 0.00013648948844202912, "loss": 11.6726, "step": 54637 }, { "epoch": 1.1437243573641465, "grad_norm": 0.40720421075820923, "learning_rate": 0.00013648744707832786, "loss": 11.6679, "step": 54638 }, { "epoch": 1.1437452901281084, "grad_norm": 0.33834096789360046, "learning_rate": 0.00013648540569708638, "loss": 11.6729, "step": 54639 }, { "epoch": 1.1437662228920706, "grad_norm": 0.39219558238983154, "learning_rate": 0.00013648336429830557, "loss": 11.6732, "step": 54640 }, { "epoch": 1.1437871556560328, "grad_norm": 0.34302040934562683, "learning_rate": 0.0001364813228819865, "loss": 11.6599, "step": 54641 }, { "epoch": 1.143808088419995, "grad_norm": 0.3048847019672394, "learning_rate": 0.00013647928144813016, "loss": 11.6762, "step": 54642 }, { "epoch": 1.143829021183957, "grad_norm": 0.30614566802978516, "learning_rate": 0.00013647723999673746, "loss": 11.6625, "step": 54643 }, { "epoch": 1.1438499539479192, "grad_norm": 0.2851449251174927, "learning_rate": 0.00013647519852780948, "loss": 11.6693, "step": 54644 }, { "epoch": 1.1438708867118814, "grad_norm": 0.33679404854774475, "learning_rate": 0.00013647315704134707, "loss": 11.6615, "step": 54645 }, { "epoch": 1.1438918194758436, "grad_norm": 0.28211668133735657, "learning_rate": 0.0001364711155373513, "loss": 11.6653, "step": 54646 }, { "epoch": 1.1439127522398058, "grad_norm": 0.3340556025505066, "learning_rate": 0.00013646907401582316, "loss": 11.6564, "step": 54647 }, { "epoch": 1.143933685003768, "grad_norm": 0.28417736291885376, "learning_rate": 0.0001364670324767636, "loss": 11.6714, "step": 54648 }, { "epoch": 1.14395461776773, "grad_norm": 0.29192936420440674, "learning_rate": 0.00013646499092017362, "loss": 11.6608, "step": 54649 }, { "epoch": 1.1439755505316922, "grad_norm": 0.3153674602508545, "learning_rate": 0.00013646294934605414, "loss": 11.6823, "step": 54650 }, { "epoch": 1.1439964832956544, "grad_norm": 0.3409840166568756, "learning_rate": 0.0001364609077544062, "loss": 11.6711, "step": 54651 }, { "epoch": 1.1440174160596166, "grad_norm": 0.31468454003334045, "learning_rate": 0.00013645886614523081, "loss": 11.658, "step": 54652 }, { "epoch": 1.1440383488235786, "grad_norm": 0.34726959466934204, "learning_rate": 0.0001364568245185289, "loss": 11.6847, "step": 54653 }, { "epoch": 1.1440592815875408, "grad_norm": 0.30673566460609436, "learning_rate": 0.00013645478287430146, "loss": 11.6649, "step": 54654 }, { "epoch": 1.144080214351503, "grad_norm": 0.3382391035556793, "learning_rate": 0.00013645274121254944, "loss": 11.684, "step": 54655 }, { "epoch": 1.1441011471154652, "grad_norm": 0.3577377200126648, "learning_rate": 0.0001364506995332739, "loss": 11.6626, "step": 54656 }, { "epoch": 1.1441220798794274, "grad_norm": 0.30959296226501465, "learning_rate": 0.00013644865783647575, "loss": 11.6707, "step": 54657 }, { "epoch": 1.1441430126433894, "grad_norm": 0.27984514832496643, "learning_rate": 0.000136446616122156, "loss": 11.6706, "step": 54658 }, { "epoch": 1.1441639454073516, "grad_norm": 0.32115840911865234, "learning_rate": 0.0001364445743903157, "loss": 11.6668, "step": 54659 }, { "epoch": 1.1441848781713138, "grad_norm": 0.3095357120037079, "learning_rate": 0.00013644253264095567, "loss": 11.6657, "step": 54660 }, { "epoch": 1.144205810935276, "grad_norm": 0.3426092267036438, "learning_rate": 0.00013644049087407702, "loss": 11.6564, "step": 54661 }, { "epoch": 1.144226743699238, "grad_norm": 0.33042797446250916, "learning_rate": 0.0001364384490896807, "loss": 11.6639, "step": 54662 }, { "epoch": 1.1442476764632001, "grad_norm": 0.36030447483062744, "learning_rate": 0.00013643640728776767, "loss": 11.6543, "step": 54663 }, { "epoch": 1.1442686092271623, "grad_norm": 0.4500155746936798, "learning_rate": 0.00013643436546833897, "loss": 11.6546, "step": 54664 }, { "epoch": 1.1442895419911245, "grad_norm": 0.4015966057777405, "learning_rate": 0.0001364323236313955, "loss": 11.6535, "step": 54665 }, { "epoch": 1.1443104747550867, "grad_norm": 0.3031463623046875, "learning_rate": 0.0001364302817769383, "loss": 11.6623, "step": 54666 }, { "epoch": 1.144331407519049, "grad_norm": 0.3078981935977936, "learning_rate": 0.00013642823990496833, "loss": 11.6523, "step": 54667 }, { "epoch": 1.144352340283011, "grad_norm": 0.32257944345474243, "learning_rate": 0.00013642619801548656, "loss": 11.6626, "step": 54668 }, { "epoch": 1.1443732730469731, "grad_norm": 0.36856555938720703, "learning_rate": 0.000136424156108494, "loss": 11.6792, "step": 54669 }, { "epoch": 1.1443942058109353, "grad_norm": 0.32045307755470276, "learning_rate": 0.0001364221141839916, "loss": 11.6722, "step": 54670 }, { "epoch": 1.1444151385748975, "grad_norm": 0.3234970271587372, "learning_rate": 0.0001364200722419804, "loss": 11.6644, "step": 54671 }, { "epoch": 1.1444360713388595, "grad_norm": 0.33268991112709045, "learning_rate": 0.00013641803028246132, "loss": 11.6662, "step": 54672 }, { "epoch": 1.1444570041028217, "grad_norm": 0.26234856247901917, "learning_rate": 0.00013641598830543535, "loss": 11.6616, "step": 54673 }, { "epoch": 1.144477936866784, "grad_norm": 0.36302030086517334, "learning_rate": 0.00013641394631090352, "loss": 11.6507, "step": 54674 }, { "epoch": 1.144498869630746, "grad_norm": 0.3546193540096283, "learning_rate": 0.00013641190429886674, "loss": 11.6755, "step": 54675 }, { "epoch": 1.1445198023947083, "grad_norm": 0.4041610062122345, "learning_rate": 0.00013640986226932605, "loss": 11.6795, "step": 54676 }, { "epoch": 1.1445407351586703, "grad_norm": 0.2797848582267761, "learning_rate": 0.0001364078202222824, "loss": 11.6686, "step": 54677 }, { "epoch": 1.1445616679226325, "grad_norm": 0.2784171998500824, "learning_rate": 0.0001364057781577368, "loss": 11.6546, "step": 54678 }, { "epoch": 1.1445826006865947, "grad_norm": 0.31482070684432983, "learning_rate": 0.0001364037360756902, "loss": 11.6708, "step": 54679 }, { "epoch": 1.1446035334505569, "grad_norm": 0.31999504566192627, "learning_rate": 0.00013640169397614358, "loss": 11.6846, "step": 54680 }, { "epoch": 1.1446244662145189, "grad_norm": 0.3414512872695923, "learning_rate": 0.00013639965185909794, "loss": 11.6611, "step": 54681 }, { "epoch": 1.144645398978481, "grad_norm": 0.35235562920570374, "learning_rate": 0.00013639760972455425, "loss": 11.6622, "step": 54682 }, { "epoch": 1.1446663317424433, "grad_norm": 0.3776834309101105, "learning_rate": 0.00013639556757251352, "loss": 11.6466, "step": 54683 }, { "epoch": 1.1446872645064055, "grad_norm": 0.5103427171707153, "learning_rate": 0.00013639352540297672, "loss": 11.6753, "step": 54684 }, { "epoch": 1.1447081972703677, "grad_norm": 0.3744368255138397, "learning_rate": 0.0001363914832159448, "loss": 11.6653, "step": 54685 }, { "epoch": 1.1447291300343299, "grad_norm": 0.34175121784210205, "learning_rate": 0.00013638944101141876, "loss": 11.6565, "step": 54686 }, { "epoch": 1.1447500627982918, "grad_norm": 0.3506440818309784, "learning_rate": 0.0001363873987893996, "loss": 11.6661, "step": 54687 }, { "epoch": 1.144770995562254, "grad_norm": 0.5050452947616577, "learning_rate": 0.00013638535654988832, "loss": 11.6815, "step": 54688 }, { "epoch": 1.1447919283262162, "grad_norm": 0.41137978434562683, "learning_rate": 0.0001363833142928858, "loss": 11.6859, "step": 54689 }, { "epoch": 1.1448128610901784, "grad_norm": 0.4749943017959595, "learning_rate": 0.00013638127201839314, "loss": 11.6539, "step": 54690 }, { "epoch": 1.1448337938541404, "grad_norm": 0.3145467936992645, "learning_rate": 0.00013637922972641126, "loss": 11.6685, "step": 54691 }, { "epoch": 1.1448547266181026, "grad_norm": 0.29763004183769226, "learning_rate": 0.00013637718741694118, "loss": 11.6707, "step": 54692 }, { "epoch": 1.1448756593820648, "grad_norm": 0.35618647933006287, "learning_rate": 0.00013637514508998383, "loss": 11.6508, "step": 54693 }, { "epoch": 1.144896592146027, "grad_norm": 0.2940438687801361, "learning_rate": 0.0001363731027455402, "loss": 11.6575, "step": 54694 }, { "epoch": 1.1449175249099892, "grad_norm": 0.4648072421550751, "learning_rate": 0.00013637106038361132, "loss": 11.665, "step": 54695 }, { "epoch": 1.1449384576739512, "grad_norm": 0.30072149634361267, "learning_rate": 0.00013636901800419816, "loss": 11.6569, "step": 54696 }, { "epoch": 1.1449593904379134, "grad_norm": 0.3440392017364502, "learning_rate": 0.00013636697560730164, "loss": 11.6918, "step": 54697 }, { "epoch": 1.1449803232018756, "grad_norm": 0.30519524216651917, "learning_rate": 0.00013636493319292282, "loss": 11.67, "step": 54698 }, { "epoch": 1.1450012559658378, "grad_norm": 0.2991369366645813, "learning_rate": 0.0001363628907610626, "loss": 11.6483, "step": 54699 }, { "epoch": 1.1450221887297998, "grad_norm": 0.3397887647151947, "learning_rate": 0.00013636084831172206, "loss": 11.6673, "step": 54700 }, { "epoch": 1.145043121493762, "grad_norm": 0.2592887580394745, "learning_rate": 0.00013635880584490212, "loss": 11.6733, "step": 54701 }, { "epoch": 1.1450640542577242, "grad_norm": 0.45634305477142334, "learning_rate": 0.00013635676336060378, "loss": 11.6676, "step": 54702 }, { "epoch": 1.1450849870216864, "grad_norm": 0.34088870882987976, "learning_rate": 0.00013635472085882802, "loss": 11.6631, "step": 54703 }, { "epoch": 1.1451059197856486, "grad_norm": 0.2601509690284729, "learning_rate": 0.0001363526783395758, "loss": 11.6615, "step": 54704 }, { "epoch": 1.1451268525496108, "grad_norm": 0.38983672857284546, "learning_rate": 0.0001363506358028481, "loss": 11.6678, "step": 54705 }, { "epoch": 1.1451477853135728, "grad_norm": 0.30090898275375366, "learning_rate": 0.00013634859324864596, "loss": 11.6688, "step": 54706 }, { "epoch": 1.145168718077535, "grad_norm": 0.3480837345123291, "learning_rate": 0.00013634655067697028, "loss": 11.6752, "step": 54707 }, { "epoch": 1.1451896508414972, "grad_norm": 0.2852459251880646, "learning_rate": 0.0001363445080878221, "loss": 11.6643, "step": 54708 }, { "epoch": 1.1452105836054594, "grad_norm": 0.3253690302371979, "learning_rate": 0.0001363424654812024, "loss": 11.6591, "step": 54709 }, { "epoch": 1.1452315163694213, "grad_norm": 0.34124991297721863, "learning_rate": 0.00013634042285711216, "loss": 11.6701, "step": 54710 }, { "epoch": 1.1452524491333835, "grad_norm": 0.43361520767211914, "learning_rate": 0.00013633838021555233, "loss": 11.665, "step": 54711 }, { "epoch": 1.1452733818973457, "grad_norm": 0.28920552134513855, "learning_rate": 0.00013633633755652391, "loss": 11.6665, "step": 54712 }, { "epoch": 1.145294314661308, "grad_norm": 0.32401183247566223, "learning_rate": 0.00013633429488002792, "loss": 11.688, "step": 54713 }, { "epoch": 1.1453152474252701, "grad_norm": 0.3336152732372284, "learning_rate": 0.00013633225218606524, "loss": 11.6513, "step": 54714 }, { "epoch": 1.1453361801892321, "grad_norm": 0.34712061285972595, "learning_rate": 0.00013633020947463698, "loss": 11.6593, "step": 54715 }, { "epoch": 1.1453571129531943, "grad_norm": 0.308893084526062, "learning_rate": 0.00013632816674574402, "loss": 11.6652, "step": 54716 }, { "epoch": 1.1453780457171565, "grad_norm": 0.32229480147361755, "learning_rate": 0.0001363261239993874, "loss": 11.6737, "step": 54717 }, { "epoch": 1.1453989784811187, "grad_norm": 0.3663638234138489, "learning_rate": 0.00013632408123556808, "loss": 11.6754, "step": 54718 }, { "epoch": 1.1454199112450807, "grad_norm": 0.30221328139305115, "learning_rate": 0.00013632203845428706, "loss": 11.6305, "step": 54719 }, { "epoch": 1.145440844009043, "grad_norm": 0.3357568085193634, "learning_rate": 0.0001363199956555453, "loss": 11.6768, "step": 54720 }, { "epoch": 1.145461776773005, "grad_norm": 0.35060298442840576, "learning_rate": 0.00013631795283934378, "loss": 11.6592, "step": 54721 }, { "epoch": 1.1454827095369673, "grad_norm": 0.3107055425643921, "learning_rate": 0.0001363159100056835, "loss": 11.6686, "step": 54722 }, { "epoch": 1.1455036423009295, "grad_norm": 0.30968624353408813, "learning_rate": 0.00013631386715456543, "loss": 11.6605, "step": 54723 }, { "epoch": 1.1455245750648917, "grad_norm": 0.4109967350959778, "learning_rate": 0.00013631182428599055, "loss": 11.6639, "step": 54724 }, { "epoch": 1.1455455078288537, "grad_norm": 0.30846747756004333, "learning_rate": 0.00013630978139995986, "loss": 11.6634, "step": 54725 }, { "epoch": 1.1455664405928159, "grad_norm": 0.30639591813087463, "learning_rate": 0.00013630773849647435, "loss": 11.6507, "step": 54726 }, { "epoch": 1.145587373356778, "grad_norm": 0.3204803168773651, "learning_rate": 0.00013630569557553497, "loss": 11.6723, "step": 54727 }, { "epoch": 1.1456083061207403, "grad_norm": 0.352640837430954, "learning_rate": 0.00013630365263714268, "loss": 11.6677, "step": 54728 }, { "epoch": 1.1456292388847023, "grad_norm": 0.5115729570388794, "learning_rate": 0.00013630160968129852, "loss": 11.6779, "step": 54729 }, { "epoch": 1.1456501716486645, "grad_norm": 0.2767227292060852, "learning_rate": 0.00013629956670800346, "loss": 11.6618, "step": 54730 }, { "epoch": 1.1456711044126267, "grad_norm": 0.31973007321357727, "learning_rate": 0.00013629752371725846, "loss": 11.6603, "step": 54731 }, { "epoch": 1.1456920371765889, "grad_norm": 0.3893236219882965, "learning_rate": 0.00013629548070906452, "loss": 11.6605, "step": 54732 }, { "epoch": 1.145712969940551, "grad_norm": 0.29615435004234314, "learning_rate": 0.00013629343768342263, "loss": 11.6742, "step": 54733 }, { "epoch": 1.145733902704513, "grad_norm": 0.41800254583358765, "learning_rate": 0.0001362913946403337, "loss": 11.6646, "step": 54734 }, { "epoch": 1.1457548354684752, "grad_norm": 0.3951979875564575, "learning_rate": 0.00013628935157979883, "loss": 11.691, "step": 54735 }, { "epoch": 1.1457757682324374, "grad_norm": 0.33918413519859314, "learning_rate": 0.0001362873085018189, "loss": 11.655, "step": 54736 }, { "epoch": 1.1457967009963996, "grad_norm": 0.3508119285106659, "learning_rate": 0.00013628526540639498, "loss": 11.6615, "step": 54737 }, { "epoch": 1.1458176337603616, "grad_norm": 0.2652237117290497, "learning_rate": 0.00013628322229352797, "loss": 11.6548, "step": 54738 }, { "epoch": 1.1458385665243238, "grad_norm": 0.34465911984443665, "learning_rate": 0.00013628117916321888, "loss": 11.6687, "step": 54739 }, { "epoch": 1.145859499288286, "grad_norm": 0.28674301505088806, "learning_rate": 0.00013627913601546872, "loss": 11.6708, "step": 54740 }, { "epoch": 1.1458804320522482, "grad_norm": 0.39161452651023865, "learning_rate": 0.00013627709285027843, "loss": 11.6752, "step": 54741 }, { "epoch": 1.1459013648162104, "grad_norm": 0.3775031268596649, "learning_rate": 0.00013627504966764905, "loss": 11.6775, "step": 54742 }, { "epoch": 1.1459222975801726, "grad_norm": 0.3156266212463379, "learning_rate": 0.0001362730064675815, "loss": 11.6709, "step": 54743 }, { "epoch": 1.1459432303441346, "grad_norm": 0.25832316279411316, "learning_rate": 0.0001362709632500768, "loss": 11.6678, "step": 54744 }, { "epoch": 1.1459641631080968, "grad_norm": 0.31938838958740234, "learning_rate": 0.00013626892001513592, "loss": 11.6645, "step": 54745 }, { "epoch": 1.145985095872059, "grad_norm": 0.43522951006889343, "learning_rate": 0.00013626687676275985, "loss": 11.6816, "step": 54746 }, { "epoch": 1.1460060286360212, "grad_norm": 0.30180105566978455, "learning_rate": 0.00013626483349294955, "loss": 11.6755, "step": 54747 }, { "epoch": 1.1460269613999832, "grad_norm": 0.27503541111946106, "learning_rate": 0.00013626279020570602, "loss": 11.6806, "step": 54748 }, { "epoch": 1.1460478941639454, "grad_norm": 0.30566126108169556, "learning_rate": 0.00013626074690103023, "loss": 11.659, "step": 54749 }, { "epoch": 1.1460688269279076, "grad_norm": 0.3666193187236786, "learning_rate": 0.0001362587035789232, "loss": 11.6575, "step": 54750 }, { "epoch": 1.1460897596918698, "grad_norm": 0.37907281517982483, "learning_rate": 0.0001362566602393859, "loss": 11.6618, "step": 54751 }, { "epoch": 1.146110692455832, "grad_norm": 0.26001426577568054, "learning_rate": 0.00013625461688241924, "loss": 11.6759, "step": 54752 }, { "epoch": 1.146131625219794, "grad_norm": 0.34890472888946533, "learning_rate": 0.00013625257350802428, "loss": 11.6762, "step": 54753 }, { "epoch": 1.1461525579837561, "grad_norm": 0.22331605851650238, "learning_rate": 0.00013625053011620203, "loss": 11.6591, "step": 54754 }, { "epoch": 1.1461734907477183, "grad_norm": 0.29600027203559875, "learning_rate": 0.00013624848670695336, "loss": 11.6767, "step": 54755 }, { "epoch": 1.1461944235116805, "grad_norm": 0.23687103390693665, "learning_rate": 0.00013624644328027934, "loss": 11.6664, "step": 54756 }, { "epoch": 1.1462153562756425, "grad_norm": 0.3097939193248749, "learning_rate": 0.00013624439983618095, "loss": 11.6573, "step": 54757 }, { "epoch": 1.1462362890396047, "grad_norm": 0.2961373031139374, "learning_rate": 0.0001362423563746591, "loss": 11.66, "step": 54758 }, { "epoch": 1.146257221803567, "grad_norm": 0.32316672801971436, "learning_rate": 0.00013624031289571485, "loss": 11.6733, "step": 54759 }, { "epoch": 1.1462781545675291, "grad_norm": 0.3070906400680542, "learning_rate": 0.00013623826939934916, "loss": 11.6659, "step": 54760 }, { "epoch": 1.1462990873314913, "grad_norm": 0.35503000020980835, "learning_rate": 0.000136236225885563, "loss": 11.6745, "step": 54761 }, { "epoch": 1.1463200200954533, "grad_norm": 0.3149591088294983, "learning_rate": 0.00013623418235435736, "loss": 11.6856, "step": 54762 }, { "epoch": 1.1463409528594155, "grad_norm": 0.3121671676635742, "learning_rate": 0.00013623213880573323, "loss": 11.682, "step": 54763 }, { "epoch": 1.1463618856233777, "grad_norm": 0.35687515139579773, "learning_rate": 0.00013623009523969158, "loss": 11.6735, "step": 54764 }, { "epoch": 1.14638281838734, "grad_norm": 0.46250081062316895, "learning_rate": 0.0001362280516562334, "loss": 11.6711, "step": 54765 }, { "epoch": 1.146403751151302, "grad_norm": 0.33386269211769104, "learning_rate": 0.0001362260080553597, "loss": 11.6735, "step": 54766 }, { "epoch": 1.146424683915264, "grad_norm": 0.31788405776023865, "learning_rate": 0.0001362239644370714, "loss": 11.6506, "step": 54767 }, { "epoch": 1.1464456166792263, "grad_norm": 0.45956167578697205, "learning_rate": 0.0001362219208013695, "loss": 11.6735, "step": 54768 }, { "epoch": 1.1464665494431885, "grad_norm": 0.3083702325820923, "learning_rate": 0.00013621987714825502, "loss": 11.6663, "step": 54769 }, { "epoch": 1.1464874822071507, "grad_norm": 0.31328675150871277, "learning_rate": 0.0001362178334777289, "loss": 11.6768, "step": 54770 }, { "epoch": 1.1465084149711129, "grad_norm": 0.3682880401611328, "learning_rate": 0.00013621578978979213, "loss": 11.6774, "step": 54771 }, { "epoch": 1.1465293477350749, "grad_norm": 0.41367512941360474, "learning_rate": 0.00013621374608444573, "loss": 11.6768, "step": 54772 }, { "epoch": 1.146550280499037, "grad_norm": 0.3963225185871124, "learning_rate": 0.00013621170236169066, "loss": 11.6844, "step": 54773 }, { "epoch": 1.1465712132629993, "grad_norm": 0.31958651542663574, "learning_rate": 0.0001362096586215279, "loss": 11.6751, "step": 54774 }, { "epoch": 1.1465921460269615, "grad_norm": 0.31478235125541687, "learning_rate": 0.00013620761486395842, "loss": 11.656, "step": 54775 }, { "epoch": 1.1466130787909234, "grad_norm": 0.32551342248916626, "learning_rate": 0.0001362055710889832, "loss": 11.6643, "step": 54776 }, { "epoch": 1.1466340115548856, "grad_norm": 0.3079689145088196, "learning_rate": 0.00013620352729660327, "loss": 11.6631, "step": 54777 }, { "epoch": 1.1466549443188478, "grad_norm": 0.2694455683231354, "learning_rate": 0.00013620148348681958, "loss": 11.6768, "step": 54778 }, { "epoch": 1.14667587708281, "grad_norm": 0.42545464634895325, "learning_rate": 0.0001361994396596331, "loss": 11.6702, "step": 54779 }, { "epoch": 1.1466968098467722, "grad_norm": 0.39566853642463684, "learning_rate": 0.00013619739581504479, "loss": 11.6781, "step": 54780 }, { "epoch": 1.1467177426107342, "grad_norm": 0.3053864538669586, "learning_rate": 0.0001361953519530557, "loss": 11.6568, "step": 54781 }, { "epoch": 1.1467386753746964, "grad_norm": 0.31409627199172974, "learning_rate": 0.00013619330807366676, "loss": 11.6824, "step": 54782 }, { "epoch": 1.1467596081386586, "grad_norm": 0.37071019411087036, "learning_rate": 0.000136191264176879, "loss": 11.6746, "step": 54783 }, { "epoch": 1.1467805409026208, "grad_norm": 0.3374473452568054, "learning_rate": 0.00013618922026269336, "loss": 11.6736, "step": 54784 }, { "epoch": 1.146801473666583, "grad_norm": 0.33739253878593445, "learning_rate": 0.00013618717633111083, "loss": 11.6648, "step": 54785 }, { "epoch": 1.146822406430545, "grad_norm": 0.3606218993663788, "learning_rate": 0.00013618513238213242, "loss": 11.6741, "step": 54786 }, { "epoch": 1.1468433391945072, "grad_norm": 0.28743237257003784, "learning_rate": 0.00013618308841575907, "loss": 11.6737, "step": 54787 }, { "epoch": 1.1468642719584694, "grad_norm": 0.3288560211658478, "learning_rate": 0.0001361810444319918, "loss": 11.6711, "step": 54788 }, { "epoch": 1.1468852047224316, "grad_norm": 0.2444322109222412, "learning_rate": 0.00013617900043083156, "loss": 11.6673, "step": 54789 }, { "epoch": 1.1469061374863938, "grad_norm": 0.2909988462924957, "learning_rate": 0.00013617695641227938, "loss": 11.6712, "step": 54790 }, { "epoch": 1.1469270702503558, "grad_norm": 0.302494078874588, "learning_rate": 0.00013617491237633616, "loss": 11.6596, "step": 54791 }, { "epoch": 1.146948003014318, "grad_norm": 0.35949793457984924, "learning_rate": 0.000136172868323003, "loss": 11.6707, "step": 54792 }, { "epoch": 1.1469689357782802, "grad_norm": 0.2523043751716614, "learning_rate": 0.00013617082425228073, "loss": 11.6809, "step": 54793 }, { "epoch": 1.1469898685422424, "grad_norm": 0.47441399097442627, "learning_rate": 0.00013616878016417048, "loss": 11.6765, "step": 54794 }, { "epoch": 1.1470108013062044, "grad_norm": 0.33211982250213623, "learning_rate": 0.0001361667360586732, "loss": 11.6906, "step": 54795 }, { "epoch": 1.1470317340701666, "grad_norm": 0.727203905582428, "learning_rate": 0.00013616469193578979, "loss": 11.6943, "step": 54796 }, { "epoch": 1.1470526668341288, "grad_norm": 0.3058239817619324, "learning_rate": 0.0001361626477955213, "loss": 11.685, "step": 54797 }, { "epoch": 1.147073599598091, "grad_norm": 0.30653971433639526, "learning_rate": 0.0001361606036378687, "loss": 11.66, "step": 54798 }, { "epoch": 1.1470945323620532, "grad_norm": 0.4133957028388977, "learning_rate": 0.00013615855946283301, "loss": 11.6575, "step": 54799 }, { "epoch": 1.1471154651260151, "grad_norm": 0.33995598554611206, "learning_rate": 0.00013615651527041514, "loss": 11.6694, "step": 54800 }, { "epoch": 1.1471363978899773, "grad_norm": 0.36885714530944824, "learning_rate": 0.0001361544710606161, "loss": 11.6647, "step": 54801 }, { "epoch": 1.1471573306539395, "grad_norm": 0.46803686022758484, "learning_rate": 0.0001361524268334369, "loss": 11.6643, "step": 54802 }, { "epoch": 1.1471782634179017, "grad_norm": 0.36734360456466675, "learning_rate": 0.0001361503825888785, "loss": 11.6791, "step": 54803 }, { "epoch": 1.147199196181864, "grad_norm": 0.3030419945716858, "learning_rate": 0.00013614833832694187, "loss": 11.6711, "step": 54804 }, { "epoch": 1.147220128945826, "grad_norm": 0.2947213649749756, "learning_rate": 0.000136146294047628, "loss": 11.6602, "step": 54805 }, { "epoch": 1.1472410617097881, "grad_norm": 0.30623704195022583, "learning_rate": 0.00013614424975093794, "loss": 11.6499, "step": 54806 }, { "epoch": 1.1472619944737503, "grad_norm": 0.27844688296318054, "learning_rate": 0.00013614220543687258, "loss": 11.672, "step": 54807 }, { "epoch": 1.1472829272377125, "grad_norm": 0.2927616834640503, "learning_rate": 0.00013614016110543293, "loss": 11.6566, "step": 54808 }, { "epoch": 1.1473038600016747, "grad_norm": 0.32155776023864746, "learning_rate": 0.00013613811675662, "loss": 11.6661, "step": 54809 }, { "epoch": 1.1473247927656367, "grad_norm": 0.38013336062431335, "learning_rate": 0.00013613607239043474, "loss": 11.682, "step": 54810 }, { "epoch": 1.147345725529599, "grad_norm": 0.2599378526210785, "learning_rate": 0.00013613402800687814, "loss": 11.6717, "step": 54811 }, { "epoch": 1.147366658293561, "grad_norm": 0.40322747826576233, "learning_rate": 0.00013613198360595122, "loss": 11.6831, "step": 54812 }, { "epoch": 1.1473875910575233, "grad_norm": 0.39603519439697266, "learning_rate": 0.00013612993918765492, "loss": 11.659, "step": 54813 }, { "epoch": 1.1474085238214853, "grad_norm": 0.3212137818336487, "learning_rate": 0.00013612789475199018, "loss": 11.6631, "step": 54814 }, { "epoch": 1.1474294565854475, "grad_norm": 0.29079288244247437, "learning_rate": 0.0001361258502989581, "loss": 11.6746, "step": 54815 }, { "epoch": 1.1474503893494097, "grad_norm": 0.36967432498931885, "learning_rate": 0.00013612380582855957, "loss": 11.6586, "step": 54816 }, { "epoch": 1.1474713221133719, "grad_norm": 0.33935824036598206, "learning_rate": 0.00013612176134079562, "loss": 11.6836, "step": 54817 }, { "epoch": 1.147492254877334, "grad_norm": 0.3501451313495636, "learning_rate": 0.00013611971683566722, "loss": 11.6785, "step": 54818 }, { "epoch": 1.147513187641296, "grad_norm": 0.29526135325431824, "learning_rate": 0.00013611767231317532, "loss": 11.6835, "step": 54819 }, { "epoch": 1.1475341204052583, "grad_norm": 0.42806488275527954, "learning_rate": 0.00013611562777332095, "loss": 11.6722, "step": 54820 }, { "epoch": 1.1475550531692205, "grad_norm": 0.310455322265625, "learning_rate": 0.00013611358321610507, "loss": 11.6613, "step": 54821 }, { "epoch": 1.1475759859331827, "grad_norm": 0.3828903138637543, "learning_rate": 0.0001361115386415287, "loss": 11.68, "step": 54822 }, { "epoch": 1.1475969186971449, "grad_norm": 0.31879889965057373, "learning_rate": 0.00013610949404959275, "loss": 11.6746, "step": 54823 }, { "epoch": 1.1476178514611068, "grad_norm": 0.4020099937915802, "learning_rate": 0.00013610744944029824, "loss": 11.6677, "step": 54824 }, { "epoch": 1.147638784225069, "grad_norm": 0.2915492355823517, "learning_rate": 0.00013610540481364617, "loss": 11.669, "step": 54825 }, { "epoch": 1.1476597169890312, "grad_norm": 0.3543381690979004, "learning_rate": 0.0001361033601696375, "loss": 11.6614, "step": 54826 }, { "epoch": 1.1476806497529934, "grad_norm": 0.25149229168891907, "learning_rate": 0.0001361013155082732, "loss": 11.664, "step": 54827 }, { "epoch": 1.1477015825169556, "grad_norm": 0.2994636297225952, "learning_rate": 0.00013609927082955433, "loss": 11.6797, "step": 54828 }, { "epoch": 1.1477225152809176, "grad_norm": 0.33029428124427795, "learning_rate": 0.00013609722613348177, "loss": 11.6728, "step": 54829 }, { "epoch": 1.1477434480448798, "grad_norm": 0.31466156244277954, "learning_rate": 0.00013609518142005658, "loss": 11.6706, "step": 54830 }, { "epoch": 1.147764380808842, "grad_norm": 0.44342535734176636, "learning_rate": 0.00013609313668927969, "loss": 11.6547, "step": 54831 }, { "epoch": 1.1477853135728042, "grad_norm": 0.31497788429260254, "learning_rate": 0.00013609109194115212, "loss": 11.676, "step": 54832 }, { "epoch": 1.1478062463367662, "grad_norm": 0.3354884088039398, "learning_rate": 0.00013608904717567484, "loss": 11.6797, "step": 54833 }, { "epoch": 1.1478271791007284, "grad_norm": 0.3306334614753723, "learning_rate": 0.0001360870023928488, "loss": 11.6566, "step": 54834 }, { "epoch": 1.1478481118646906, "grad_norm": 0.26631656289100647, "learning_rate": 0.00013608495759267504, "loss": 11.6723, "step": 54835 }, { "epoch": 1.1478690446286528, "grad_norm": 0.36971810460090637, "learning_rate": 0.00013608291277515452, "loss": 11.6522, "step": 54836 }, { "epoch": 1.147889977392615, "grad_norm": 0.3125988841056824, "learning_rate": 0.00013608086794028823, "loss": 11.6589, "step": 54837 }, { "epoch": 1.147910910156577, "grad_norm": 0.2611640393733978, "learning_rate": 0.00013607882308807714, "loss": 11.6726, "step": 54838 }, { "epoch": 1.1479318429205392, "grad_norm": 0.3612525165081024, "learning_rate": 0.0001360767782185222, "loss": 11.6566, "step": 54839 }, { "epoch": 1.1479527756845014, "grad_norm": 0.3012305796146393, "learning_rate": 0.00013607473333162448, "loss": 11.6735, "step": 54840 }, { "epoch": 1.1479737084484636, "grad_norm": 0.2788010835647583, "learning_rate": 0.0001360726884273849, "loss": 11.6623, "step": 54841 }, { "epoch": 1.1479946412124258, "grad_norm": 0.2765044867992401, "learning_rate": 0.00013607064350580446, "loss": 11.6607, "step": 54842 }, { "epoch": 1.1480155739763878, "grad_norm": 0.34426265954971313, "learning_rate": 0.00013606859856688409, "loss": 11.6713, "step": 54843 }, { "epoch": 1.14803650674035, "grad_norm": 0.30018648505210876, "learning_rate": 0.00013606655361062488, "loss": 11.659, "step": 54844 }, { "epoch": 1.1480574395043122, "grad_norm": 0.311012864112854, "learning_rate": 0.00013606450863702774, "loss": 11.6717, "step": 54845 }, { "epoch": 1.1480783722682744, "grad_norm": 0.24588541686534882, "learning_rate": 0.00013606246364609366, "loss": 11.6741, "step": 54846 }, { "epoch": 1.1480993050322366, "grad_norm": 0.3667415678501129, "learning_rate": 0.00013606041863782365, "loss": 11.6775, "step": 54847 }, { "epoch": 1.1481202377961985, "grad_norm": 0.3522345721721649, "learning_rate": 0.00013605837361221863, "loss": 11.6591, "step": 54848 }, { "epoch": 1.1481411705601607, "grad_norm": 0.36045780777931213, "learning_rate": 0.00013605632856927965, "loss": 11.6638, "step": 54849 }, { "epoch": 1.148162103324123, "grad_norm": 0.4683670997619629, "learning_rate": 0.00013605428350900767, "loss": 11.6455, "step": 54850 }, { "epoch": 1.1481830360880851, "grad_norm": 0.2766887843608856, "learning_rate": 0.00013605223843140367, "loss": 11.6665, "step": 54851 }, { "epoch": 1.1482039688520471, "grad_norm": 0.28839027881622314, "learning_rate": 0.00013605019333646866, "loss": 11.6694, "step": 54852 }, { "epoch": 1.1482249016160093, "grad_norm": 0.38636571168899536, "learning_rate": 0.00013604814822420358, "loss": 11.6721, "step": 54853 }, { "epoch": 1.1482458343799715, "grad_norm": 0.3083641529083252, "learning_rate": 0.00013604610309460945, "loss": 11.6687, "step": 54854 }, { "epoch": 1.1482667671439337, "grad_norm": 0.324884831905365, "learning_rate": 0.0001360440579476872, "loss": 11.6533, "step": 54855 }, { "epoch": 1.148287699907896, "grad_norm": 0.29365041851997375, "learning_rate": 0.00013604201278343787, "loss": 11.6808, "step": 54856 }, { "epoch": 1.148308632671858, "grad_norm": 0.3797298073768616, "learning_rate": 0.00013603996760186245, "loss": 11.6726, "step": 54857 }, { "epoch": 1.14832956543582, "grad_norm": 0.3381297290325165, "learning_rate": 0.00013603792240296186, "loss": 11.6739, "step": 54858 }, { "epoch": 1.1483504981997823, "grad_norm": 0.3638373017311096, "learning_rate": 0.00013603587718673713, "loss": 11.6633, "step": 54859 }, { "epoch": 1.1483714309637445, "grad_norm": 0.4213322103023529, "learning_rate": 0.0001360338319531892, "loss": 11.664, "step": 54860 }, { "epoch": 1.1483923637277067, "grad_norm": 0.3863813281059265, "learning_rate": 0.00013603178670231912, "loss": 11.681, "step": 54861 }, { "epoch": 1.1484132964916687, "grad_norm": 0.26727011799812317, "learning_rate": 0.00013602974143412782, "loss": 11.6662, "step": 54862 }, { "epoch": 1.1484342292556309, "grad_norm": 0.3084879219532013, "learning_rate": 0.00013602769614861634, "loss": 11.672, "step": 54863 }, { "epoch": 1.148455162019593, "grad_norm": 0.3454509377479553, "learning_rate": 0.0001360256508457856, "loss": 11.6584, "step": 54864 }, { "epoch": 1.1484760947835553, "grad_norm": 0.3049401640892029, "learning_rate": 0.0001360236055256366, "loss": 11.6731, "step": 54865 }, { "epoch": 1.1484970275475175, "grad_norm": 0.3789343237876892, "learning_rate": 0.00013602156018817032, "loss": 11.679, "step": 54866 }, { "epoch": 1.1485179603114795, "grad_norm": 0.2884688973426819, "learning_rate": 0.00013601951483338778, "loss": 11.6412, "step": 54867 }, { "epoch": 1.1485388930754417, "grad_norm": 0.31449094414711, "learning_rate": 0.00013601746946128993, "loss": 11.6811, "step": 54868 }, { "epoch": 1.1485598258394039, "grad_norm": 0.3825416564941406, "learning_rate": 0.00013601542407187776, "loss": 11.6697, "step": 54869 }, { "epoch": 1.148580758603366, "grad_norm": 0.34395942091941833, "learning_rate": 0.00013601337866515225, "loss": 11.6831, "step": 54870 }, { "epoch": 1.148601691367328, "grad_norm": 0.36274808645248413, "learning_rate": 0.00013601133324111437, "loss": 11.6706, "step": 54871 }, { "epoch": 1.1486226241312902, "grad_norm": 0.3461702764034271, "learning_rate": 0.00013600928779976517, "loss": 11.6533, "step": 54872 }, { "epoch": 1.1486435568952524, "grad_norm": 0.31192469596862793, "learning_rate": 0.00013600724234110554, "loss": 11.6815, "step": 54873 }, { "epoch": 1.1486644896592146, "grad_norm": 0.34071084856987, "learning_rate": 0.00013600519686513652, "loss": 11.6414, "step": 54874 }, { "epoch": 1.1486854224231768, "grad_norm": 0.37938612699508667, "learning_rate": 0.00013600315137185908, "loss": 11.6595, "step": 54875 }, { "epoch": 1.1487063551871388, "grad_norm": 0.4715850055217743, "learning_rate": 0.0001360011058612742, "loss": 11.6539, "step": 54876 }, { "epoch": 1.148727287951101, "grad_norm": 0.2872946262359619, "learning_rate": 0.00013599906033338289, "loss": 11.6759, "step": 54877 }, { "epoch": 1.1487482207150632, "grad_norm": 0.34377238154411316, "learning_rate": 0.00013599701478818606, "loss": 11.6819, "step": 54878 }, { "epoch": 1.1487691534790254, "grad_norm": 0.3876703381538391, "learning_rate": 0.0001359949692256848, "loss": 11.6787, "step": 54879 }, { "epoch": 1.1487900862429876, "grad_norm": 0.28113049268722534, "learning_rate": 0.00013599292364587998, "loss": 11.6638, "step": 54880 }, { "epoch": 1.1488110190069496, "grad_norm": 0.313095360994339, "learning_rate": 0.0001359908780487727, "loss": 11.6646, "step": 54881 }, { "epoch": 1.1488319517709118, "grad_norm": 0.3257686197757721, "learning_rate": 0.00013598883243436384, "loss": 11.6708, "step": 54882 }, { "epoch": 1.148852884534874, "grad_norm": 0.385536253452301, "learning_rate": 0.00013598678680265444, "loss": 11.6834, "step": 54883 }, { "epoch": 1.1488738172988362, "grad_norm": 0.42127782106399536, "learning_rate": 0.0001359847411536455, "loss": 11.6794, "step": 54884 }, { "epoch": 1.1488947500627984, "grad_norm": 0.3275498151779175, "learning_rate": 0.00013598269548733793, "loss": 11.6707, "step": 54885 }, { "epoch": 1.1489156828267604, "grad_norm": 0.2953230142593384, "learning_rate": 0.00013598064980373278, "loss": 11.675, "step": 54886 }, { "epoch": 1.1489366155907226, "grad_norm": 0.3186991214752197, "learning_rate": 0.000135978604102831, "loss": 11.6666, "step": 54887 }, { "epoch": 1.1489575483546848, "grad_norm": 0.28600215911865234, "learning_rate": 0.00013597655838463356, "loss": 11.6626, "step": 54888 }, { "epoch": 1.148978481118647, "grad_norm": 0.34757745265960693, "learning_rate": 0.00013597451264914153, "loss": 11.6728, "step": 54889 }, { "epoch": 1.148999413882609, "grad_norm": 0.29264217615127563, "learning_rate": 0.0001359724668963558, "loss": 11.6754, "step": 54890 }, { "epoch": 1.1490203466465712, "grad_norm": 0.2698681056499481, "learning_rate": 0.0001359704211262774, "loss": 11.6713, "step": 54891 }, { "epoch": 1.1490412794105334, "grad_norm": 0.37898018956184387, "learning_rate": 0.00013596837533890726, "loss": 11.6435, "step": 54892 }, { "epoch": 1.1490622121744956, "grad_norm": 0.4069180190563202, "learning_rate": 0.00013596632953424639, "loss": 11.6936, "step": 54893 }, { "epoch": 1.1490831449384578, "grad_norm": 0.3593764901161194, "learning_rate": 0.00013596428371229585, "loss": 11.6764, "step": 54894 }, { "epoch": 1.1491040777024197, "grad_norm": 0.2765408158302307, "learning_rate": 0.0001359622378730565, "loss": 11.6743, "step": 54895 }, { "epoch": 1.149125010466382, "grad_norm": 0.3291119337081909, "learning_rate": 0.00013596019201652943, "loss": 11.6794, "step": 54896 }, { "epoch": 1.1491459432303441, "grad_norm": 0.2943434715270996, "learning_rate": 0.00013595814614271554, "loss": 11.6654, "step": 54897 }, { "epoch": 1.1491668759943063, "grad_norm": 0.30593180656433105, "learning_rate": 0.00013595610025161586, "loss": 11.6735, "step": 54898 }, { "epoch": 1.1491878087582685, "grad_norm": 0.2774520516395569, "learning_rate": 0.00013595405434323134, "loss": 11.6622, "step": 54899 }, { "epoch": 1.1492087415222305, "grad_norm": 0.36611348390579224, "learning_rate": 0.000135952008417563, "loss": 11.6604, "step": 54900 }, { "epoch": 1.1492296742861927, "grad_norm": 0.31268516182899475, "learning_rate": 0.00013594996247461185, "loss": 11.6615, "step": 54901 }, { "epoch": 1.149250607050155, "grad_norm": 0.2837231457233429, "learning_rate": 0.00013594791651437877, "loss": 11.6704, "step": 54902 }, { "epoch": 1.1492715398141171, "grad_norm": 0.2735162675380707, "learning_rate": 0.00013594587053686482, "loss": 11.6504, "step": 54903 }, { "epoch": 1.1492924725780793, "grad_norm": 0.4195539355278015, "learning_rate": 0.000135943824542071, "loss": 11.6711, "step": 54904 }, { "epoch": 1.1493134053420413, "grad_norm": 0.3419404625892639, "learning_rate": 0.00013594177852999825, "loss": 11.664, "step": 54905 }, { "epoch": 1.1493343381060035, "grad_norm": 0.32644546031951904, "learning_rate": 0.00013593973250064755, "loss": 11.6794, "step": 54906 }, { "epoch": 1.1493552708699657, "grad_norm": 0.3315591514110565, "learning_rate": 0.0001359376864540199, "loss": 11.6776, "step": 54907 }, { "epoch": 1.149376203633928, "grad_norm": 0.38107576966285706, "learning_rate": 0.00013593564039011632, "loss": 11.6852, "step": 54908 }, { "epoch": 1.1493971363978899, "grad_norm": 0.2858637571334839, "learning_rate": 0.00013593359430893776, "loss": 11.6671, "step": 54909 }, { "epoch": 1.149418069161852, "grad_norm": 0.3203212320804596, "learning_rate": 0.00013593154821048514, "loss": 11.6767, "step": 54910 }, { "epoch": 1.1494390019258143, "grad_norm": 0.3305646777153015, "learning_rate": 0.00013592950209475956, "loss": 11.676, "step": 54911 }, { "epoch": 1.1494599346897765, "grad_norm": 0.4963005483150482, "learning_rate": 0.0001359274559617619, "loss": 11.5985, "step": 54912 }, { "epoch": 1.1494808674537387, "grad_norm": 0.3707285225391388, "learning_rate": 0.00013592540981149324, "loss": 11.6625, "step": 54913 }, { "epoch": 1.1495018002177007, "grad_norm": 0.3675210475921631, "learning_rate": 0.0001359233636439545, "loss": 11.6458, "step": 54914 }, { "epoch": 1.1495227329816629, "grad_norm": 0.3886130154132843, "learning_rate": 0.00013592131745914667, "loss": 11.6731, "step": 54915 }, { "epoch": 1.149543665745625, "grad_norm": 0.3688908815383911, "learning_rate": 0.00013591927125707076, "loss": 11.6646, "step": 54916 }, { "epoch": 1.1495645985095873, "grad_norm": 0.28374478220939636, "learning_rate": 0.00013591722503772773, "loss": 11.6519, "step": 54917 }, { "epoch": 1.1495855312735495, "grad_norm": 0.3574768602848053, "learning_rate": 0.00013591517880111854, "loss": 11.6684, "step": 54918 }, { "epoch": 1.1496064640375114, "grad_norm": 0.3198339343070984, "learning_rate": 0.00013591313254724424, "loss": 11.6685, "step": 54919 }, { "epoch": 1.1496273968014736, "grad_norm": 0.3933385908603668, "learning_rate": 0.00013591108627610576, "loss": 11.6723, "step": 54920 }, { "epoch": 1.1496483295654358, "grad_norm": 0.3091997504234314, "learning_rate": 0.00013590903998770412, "loss": 11.6734, "step": 54921 }, { "epoch": 1.149669262329398, "grad_norm": 0.27927273511886597, "learning_rate": 0.00013590699368204025, "loss": 11.6673, "step": 54922 }, { "epoch": 1.1496901950933602, "grad_norm": 0.31928956508636475, "learning_rate": 0.0001359049473591152, "loss": 11.6645, "step": 54923 }, { "epoch": 1.1497111278573222, "grad_norm": 0.31625035405158997, "learning_rate": 0.00013590290101892993, "loss": 11.6714, "step": 54924 }, { "epoch": 1.1497320606212844, "grad_norm": 0.28561297059059143, "learning_rate": 0.00013590085466148538, "loss": 11.6665, "step": 54925 }, { "epoch": 1.1497529933852466, "grad_norm": 0.29038599133491516, "learning_rate": 0.0001358988082867826, "loss": 11.6684, "step": 54926 }, { "epoch": 1.1497739261492088, "grad_norm": 0.3741726577281952, "learning_rate": 0.00013589676189482253, "loss": 11.6549, "step": 54927 }, { "epoch": 1.1497948589131708, "grad_norm": 0.4681086242198944, "learning_rate": 0.00013589471548560618, "loss": 11.6467, "step": 54928 }, { "epoch": 1.149815791677133, "grad_norm": 0.3209705948829651, "learning_rate": 0.0001358926690591345, "loss": 11.6798, "step": 54929 }, { "epoch": 1.1498367244410952, "grad_norm": 0.29645124077796936, "learning_rate": 0.0001358906226154085, "loss": 11.6679, "step": 54930 }, { "epoch": 1.1498576572050574, "grad_norm": 0.28945499658584595, "learning_rate": 0.00013588857615442915, "loss": 11.6752, "step": 54931 }, { "epoch": 1.1498785899690196, "grad_norm": 0.3580963611602783, "learning_rate": 0.00013588652967619745, "loss": 11.6674, "step": 54932 }, { "epoch": 1.1498995227329816, "grad_norm": 0.3102859854698181, "learning_rate": 0.0001358844831807144, "loss": 11.6721, "step": 54933 }, { "epoch": 1.1499204554969438, "grad_norm": 0.32288414239883423, "learning_rate": 0.00013588243666798095, "loss": 11.6515, "step": 54934 }, { "epoch": 1.149941388260906, "grad_norm": 0.3639492094516754, "learning_rate": 0.00013588039013799809, "loss": 11.6656, "step": 54935 }, { "epoch": 1.1499623210248682, "grad_norm": 0.3141385316848755, "learning_rate": 0.0001358783435907668, "loss": 11.6514, "step": 54936 }, { "epoch": 1.1499832537888304, "grad_norm": 0.35111844539642334, "learning_rate": 0.00013587629702628806, "loss": 11.676, "step": 54937 }, { "epoch": 1.1500041865527924, "grad_norm": 0.2382081001996994, "learning_rate": 0.00013587425044456287, "loss": 11.6612, "step": 54938 }, { "epoch": 1.1500251193167546, "grad_norm": 0.32452136278152466, "learning_rate": 0.00013587220384559222, "loss": 11.6727, "step": 54939 }, { "epoch": 1.1500460520807168, "grad_norm": 0.36157575249671936, "learning_rate": 0.0001358701572293771, "loss": 11.6734, "step": 54940 }, { "epoch": 1.150066984844679, "grad_norm": 0.319273978471756, "learning_rate": 0.00013586811059591844, "loss": 11.6822, "step": 54941 }, { "epoch": 1.1500879176086412, "grad_norm": 0.32017403841018677, "learning_rate": 0.0001358660639452173, "loss": 11.6639, "step": 54942 }, { "epoch": 1.1501088503726031, "grad_norm": 0.3151192367076874, "learning_rate": 0.00013586401727727458, "loss": 11.6665, "step": 54943 }, { "epoch": 1.1501297831365653, "grad_norm": 0.31743133068084717, "learning_rate": 0.00013586197059209132, "loss": 11.6698, "step": 54944 }, { "epoch": 1.1501507159005275, "grad_norm": 0.36541226506233215, "learning_rate": 0.00013585992388966852, "loss": 11.6835, "step": 54945 }, { "epoch": 1.1501716486644897, "grad_norm": 0.3230442702770233, "learning_rate": 0.00013585787717000712, "loss": 11.6751, "step": 54946 }, { "epoch": 1.1501925814284517, "grad_norm": 0.3044709861278534, "learning_rate": 0.0001358558304331081, "loss": 11.67, "step": 54947 }, { "epoch": 1.150213514192414, "grad_norm": 0.2959796190261841, "learning_rate": 0.0001358537836789725, "loss": 11.6783, "step": 54948 }, { "epoch": 1.150234446956376, "grad_norm": 0.2746388018131256, "learning_rate": 0.00013585173690760125, "loss": 11.6667, "step": 54949 }, { "epoch": 1.1502553797203383, "grad_norm": 0.32203271985054016, "learning_rate": 0.00013584969011899535, "loss": 11.667, "step": 54950 }, { "epoch": 1.1502763124843005, "grad_norm": 0.30003786087036133, "learning_rate": 0.00013584764331315579, "loss": 11.6755, "step": 54951 }, { "epoch": 1.1502972452482625, "grad_norm": 0.36140429973602295, "learning_rate": 0.00013584559649008356, "loss": 11.6757, "step": 54952 }, { "epoch": 1.1503181780122247, "grad_norm": 0.3240716755390167, "learning_rate": 0.0001358435496497796, "loss": 11.6707, "step": 54953 }, { "epoch": 1.150339110776187, "grad_norm": 0.3132610321044922, "learning_rate": 0.00013584150279224492, "loss": 11.6661, "step": 54954 }, { "epoch": 1.150360043540149, "grad_norm": 0.2751900851726532, "learning_rate": 0.00013583945591748057, "loss": 11.6601, "step": 54955 }, { "epoch": 1.1503809763041113, "grad_norm": 0.41931450366973877, "learning_rate": 0.00013583740902548742, "loss": 11.658, "step": 54956 }, { "epoch": 1.1504019090680733, "grad_norm": 0.2851881980895996, "learning_rate": 0.00013583536211626655, "loss": 11.6759, "step": 54957 }, { "epoch": 1.1504228418320355, "grad_norm": 0.30964356660842896, "learning_rate": 0.00013583331518981888, "loss": 11.6567, "step": 54958 }, { "epoch": 1.1504437745959977, "grad_norm": 0.27970534563064575, "learning_rate": 0.00013583126824614542, "loss": 11.6645, "step": 54959 }, { "epoch": 1.1504647073599599, "grad_norm": 0.35575923323631287, "learning_rate": 0.00013582922128524714, "loss": 11.6683, "step": 54960 }, { "epoch": 1.150485640123922, "grad_norm": 0.38622328639030457, "learning_rate": 0.00013582717430712506, "loss": 11.6786, "step": 54961 }, { "epoch": 1.150506572887884, "grad_norm": 0.37583333253860474, "learning_rate": 0.00013582512731178013, "loss": 11.6696, "step": 54962 }, { "epoch": 1.1505275056518462, "grad_norm": 0.27431294322013855, "learning_rate": 0.00013582308029921335, "loss": 11.6648, "step": 54963 }, { "epoch": 1.1505484384158084, "grad_norm": 0.3523891568183899, "learning_rate": 0.00013582103326942568, "loss": 11.6764, "step": 54964 }, { "epoch": 1.1505693711797707, "grad_norm": 0.2913726568222046, "learning_rate": 0.00013581898622241814, "loss": 11.6698, "step": 54965 }, { "epoch": 1.1505903039437326, "grad_norm": 0.33802810311317444, "learning_rate": 0.00013581693915819167, "loss": 11.6642, "step": 54966 }, { "epoch": 1.1506112367076948, "grad_norm": 0.2888921797275543, "learning_rate": 0.00013581489207674732, "loss": 11.6754, "step": 54967 }, { "epoch": 1.150632169471657, "grad_norm": 0.2892228662967682, "learning_rate": 0.000135812844978086, "loss": 11.681, "step": 54968 }, { "epoch": 1.1506531022356192, "grad_norm": 0.31060805916786194, "learning_rate": 0.00013581079786220874, "loss": 11.6954, "step": 54969 }, { "epoch": 1.1506740349995814, "grad_norm": 0.4109382629394531, "learning_rate": 0.0001358087507291165, "loss": 11.6697, "step": 54970 }, { "epoch": 1.1506949677635434, "grad_norm": 0.3104046583175659, "learning_rate": 0.0001358067035788103, "loss": 11.6709, "step": 54971 }, { "epoch": 1.1507159005275056, "grad_norm": 0.3359355628490448, "learning_rate": 0.00013580465641129107, "loss": 11.674, "step": 54972 }, { "epoch": 1.1507368332914678, "grad_norm": 0.3057062327861786, "learning_rate": 0.00013580260922655985, "loss": 11.6735, "step": 54973 }, { "epoch": 1.15075776605543, "grad_norm": 0.3282983601093292, "learning_rate": 0.0001358005620246176, "loss": 11.6789, "step": 54974 }, { "epoch": 1.1507786988193922, "grad_norm": 0.3555383086204529, "learning_rate": 0.00013579851480546528, "loss": 11.6708, "step": 54975 }, { "epoch": 1.1507996315833542, "grad_norm": 0.33529189229011536, "learning_rate": 0.0001357964675691039, "loss": 11.6519, "step": 54976 }, { "epoch": 1.1508205643473164, "grad_norm": 0.31028592586517334, "learning_rate": 0.00013579442031553445, "loss": 11.6819, "step": 54977 }, { "epoch": 1.1508414971112786, "grad_norm": 0.3018004894256592, "learning_rate": 0.0001357923730447579, "loss": 11.682, "step": 54978 }, { "epoch": 1.1508624298752408, "grad_norm": 0.3185877203941345, "learning_rate": 0.00013579032575677523, "loss": 11.6606, "step": 54979 }, { "epoch": 1.150883362639203, "grad_norm": 0.34959468245506287, "learning_rate": 0.00013578827845158747, "loss": 11.67, "step": 54980 }, { "epoch": 1.150904295403165, "grad_norm": 0.3045620620250702, "learning_rate": 0.0001357862311291955, "loss": 11.6497, "step": 54981 }, { "epoch": 1.1509252281671272, "grad_norm": 0.26695379614830017, "learning_rate": 0.00013578418378960044, "loss": 11.66, "step": 54982 }, { "epoch": 1.1509461609310894, "grad_norm": 0.3307904303073883, "learning_rate": 0.0001357821364328032, "loss": 11.658, "step": 54983 }, { "epoch": 1.1509670936950516, "grad_norm": 0.3110657036304474, "learning_rate": 0.00013578008905880471, "loss": 11.665, "step": 54984 }, { "epoch": 1.1509880264590135, "grad_norm": 0.36893877387046814, "learning_rate": 0.00013577804166760605, "loss": 11.6807, "step": 54985 }, { "epoch": 1.1510089592229757, "grad_norm": 0.3696364462375641, "learning_rate": 0.00013577599425920817, "loss": 11.6759, "step": 54986 }, { "epoch": 1.151029891986938, "grad_norm": 0.3545561730861664, "learning_rate": 0.00013577394683361206, "loss": 11.6729, "step": 54987 }, { "epoch": 1.1510508247509001, "grad_norm": 0.34354835748672485, "learning_rate": 0.00013577189939081868, "loss": 11.6757, "step": 54988 }, { "epoch": 1.1510717575148623, "grad_norm": 0.3591194152832031, "learning_rate": 0.00013576985193082907, "loss": 11.6613, "step": 54989 }, { "epoch": 1.1510926902788243, "grad_norm": 0.2817166745662689, "learning_rate": 0.00013576780445364415, "loss": 11.6664, "step": 54990 }, { "epoch": 1.1511136230427865, "grad_norm": 0.3483864367008209, "learning_rate": 0.0001357657569592649, "loss": 11.661, "step": 54991 }, { "epoch": 1.1511345558067487, "grad_norm": 0.3598285913467407, "learning_rate": 0.00013576370944769236, "loss": 11.6802, "step": 54992 }, { "epoch": 1.151155488570711, "grad_norm": 0.3334222435951233, "learning_rate": 0.0001357616619189275, "loss": 11.6589, "step": 54993 }, { "epoch": 1.151176421334673, "grad_norm": 0.3237048089504242, "learning_rate": 0.00013575961437297128, "loss": 11.6706, "step": 54994 }, { "epoch": 1.151197354098635, "grad_norm": 0.31172406673431396, "learning_rate": 0.0001357575668098247, "loss": 11.6696, "step": 54995 }, { "epoch": 1.1512182868625973, "grad_norm": 0.3102432191371918, "learning_rate": 0.00013575551922948874, "loss": 11.6713, "step": 54996 }, { "epoch": 1.1512392196265595, "grad_norm": 0.3820693790912628, "learning_rate": 0.0001357534716319644, "loss": 11.669, "step": 54997 }, { "epoch": 1.1512601523905217, "grad_norm": 0.41096606850624084, "learning_rate": 0.0001357514240172526, "loss": 11.6503, "step": 54998 }, { "epoch": 1.151281085154484, "grad_norm": 0.314386785030365, "learning_rate": 0.00013574937638535442, "loss": 11.6664, "step": 54999 }, { "epoch": 1.1513020179184459, "grad_norm": 0.34621137380599976, "learning_rate": 0.0001357473287362708, "loss": 11.6837, "step": 55000 }, { "epoch": 1.1513020179184459, "eval_loss": 11.667974472045898, "eval_runtime": 34.3815, "eval_samples_per_second": 27.951, "eval_steps_per_second": 7.01, "step": 55000 } ], "logging_steps": 1, "max_steps": 143316, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2270424771624960.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }